From 582fc32574a3b158c81e49cb00e6ae59205e66ba Mon Sep 17 00:00:00 2001
From: Antonin Kral <a.kral@bobek.cz>
Date: Thu, 17 Mar 2011 00:05:43 +0100
Subject: Imported Upstream version 1.8.0

---
 .gitignore                                         |    4 +-
 SConstruct                                         |  720 ++-
 bson/bson-inl.h                                    |  665 +++
 bson/bson.h                                        |   30 +-
 bson/bson_db.h                                     |   28 +-
 bson/bsondemo/bsondemo.cpp                         |   26 +-
 bson/bsonelement.h                                 |  662 +--
 bson/bsoninlines.h                                 |  588 ---
 bson/bsonmisc.h                                    |   78 +-
 bson/bsonobj.h                                     |  212 +-
 bson/bsonobjbuilder.h                              |  265 +-
 bson/bsonobjiterator.h                             |   70 +-
 bson/bsontypes.h                                   |  112 +-
 bson/inline_decls.h                                |    2 +-
 bson/oid.cpp                                       |  154 +
 bson/oid.h                                         |   95 +-
 bson/ordering.h                                    |   10 +-
 bson/stringdata.h                                  |   37 +-
 bson/util/atomic_int.h                             |   40 +-
 bson/util/builder.h                                |  149 +-
 bson/util/misc.h                                   |    4 +-
 buildscripts/distmirror.py                         |    2 +-
 buildscripts/errorcodes.py                         |   21 +-
 buildscripts/frob_version.py                       |    2 +-
 buildscripts/hacks_ubuntu.py                       |    2 +-
 buildscripts/makealldists.py                       |   20 +-
 buildscripts/makedist.py                           |   32 +-
 buildscripts/mergerepositories.py                  |    2 +-
 buildscripts/s3del.py                              |   36 +
 buildscripts/smoke.py                              |  425 +-
 buildscripts/utils.py                              |   21 +-
 client/clientOnly.cpp                              |   16 +-
 client/connpool.cpp                                |  223 +-
 client/connpool.h                                  |  182 +-
 client/constants.h                                 |   20 +-
 client/dbclient.cpp                                |  587 +--
 client/dbclient.h                                  |  506 +-
 client/dbclient_rs.cpp                             |  594 +++
 client/dbclient_rs.h                               |  276 ++
 client/dbclientcursor.cpp                          |  102 +-
 client/dbclientcursor.h                            |  132 +-
 client/dbclientmockcursor.h                        |   40 +
 client/distlock.cpp                                |  272 +-
 client/distlock.h                                  |   55 +-
 client/distlock_test.cpp                           |   58 +-
 client/examples/authTest.cpp                       |   11 +-
 client/examples/clientTest.cpp                     |   35 +-
 client/examples/first.cpp                          |   11 +-
 client/examples/httpClientTest.cpp                 |    8 +-
 client/examples/rs.cpp                             |   58 +
 client/examples/second.cpp                         |    2 +-
 client/examples/tail.cpp                           |   40 +-
 client/examples/tutorial.cpp                       |   64 +-
 client/examples/whereExample.cpp                   |   10 +-
 client/gridfs.cpp                                  |   60 +-
 client/gridfs.h                                    |   28 +-
 client/model.cpp                                   |   44 +-
 client/model.h                                     |    8 +-
 client/mongo_client_lib.cpp                        |   66 +
 client/parallel.cpp                                |  376 +-
 client/parallel.h                                  |   98 +-
 client/redef_macros.h                              |    3 +
 client/simple_client_demo.cpp                      |   36 +
 client/syncclusterconnection.cpp                   |  195 +-
 client/syncclusterconnection.h                     |   41 +-
 client/undef_macros.h                              |    3 +
 db/background.h                                    |   12 +-
 db/btree.cpp                                       | 1242 +++--
 db/btree.h                                         |  585 ++-
 db/btreecursor.cpp                                 |  145 +-
 db/cap.cpp                                         |  239 +-
 db/client.cpp                                      |  396 +-
 db/client.h                                        |  277 +-
 db/clientcursor.cpp                                |  310 +-
 db/clientcursor.h                                  |  296 +-
 db/cloner.cpp                                      |  169 +-
 db/cmdline.cpp                                     |  191 +-
 db/cmdline.h                                       |  114 +-
 db/commands.cpp                                    |   26 +-
 db/commands.h                                      |   20 +-
 db/commands/distinct.cpp                           |  150 +
 db/commands/group.cpp                              |  202 +
 db/commands/isself.cpp                             |  220 +
 db/commands/mr.cpp                                 | 1074 ++++
 db/commands/mr.h                                   |  291 ++
 db/common.cpp                                      |    4 +
 db/compact.cpp                                     |  199 +
 db/concurrency.h                                   |  249 +-
 db/curop-inl.h                                     |   42 +
 db/curop.h                                         |  358 +-
 db/cursor.cpp                                      |   23 +-
 db/cursor.h                                        |   98 +-
 db/database.cpp                                    |  211 +-
 db/database.h                                      |  214 +-
 db/db.cpp                                          |  747 ++-
 db/db.h                                            |  119 +-
 db/db.sln                                          |   86 -
 db/db.vcproj                                       | 1885 -------
 db/db.vcxproj                                      |   72 +-
 db/db.vcxproj.filters                              | 1229 ++---
 db/db_10.sln                                       |    8 -
 db/dbcommands.cpp                                  |  936 ++--
 db/dbcommands_admin.cpp                            |  233 +-
 db/dbcommands_generic.cpp                          |  198 +-
 db/dbeval.cpp                                      |   23 +-
 db/dbhelpers.cpp                                   |  166 +-
 db/dbhelpers.h                                     |   38 +-
 db/dbmessage.h                                     |   39 +-
 db/dbwebserver.cpp                                 |  316 +-
 db/dbwebserver.h                                   |   31 +-
 db/diskloc.h                                       |  101 +-
 db/driverHelpers.cpp                               |   16 +-
 db/dur.cpp                                         |  635 +++
 db/dur.h                                           |  201 +
 db/dur_commitjob.cpp                               |  210 +
 db/dur_commitjob.h                                 |  221 +
 db/dur_journal.cpp                                 |  576 +++
 db/dur_journal.h                                   |   68 +
 db/dur_journalformat.h                             |  166 +
 db/dur_journalimpl.h                               |  101 +
 db/dur_preplogbuffer.cpp                           |  192 +
 db/dur_recover.cpp                                 |  457 ++
 db/dur_recover.h                                   |   45 +
 db/dur_stats.h                                     |   46 +
 db/dur_writetodatafiles.cpp                        |   99 +
 db/durop.cpp                                       |  160 +
 db/durop.h                                         |  111 +
 db/extsort.cpp                                     |  147 +-
 db/extsort.h                                       |   50 +-
 db/filever.h                                       |    8 +-
 db/geo/2d.cpp                                      |  949 ++--
 db/geo/core.h                                      |  153 +-
 db/geo/haystack.cpp                                |  146 +-
 db/helpers/dblogger.h                              |    4 +-
 db/index.cpp                                       |  148 +-
 db/index.h                                         |   46 +-
 db/indexkey.cpp                                    |  161 +-
 db/indexkey.h                                      |   67 +-
 db/instance.cpp                                    |  557 ++-
 db/instance.h                                      |   50 +-
 db/introspect.cpp                                  |    3 +-
 db/jsobj.cpp                                       |  409 +-
 db/jsobj.h                                         |    4 +-
 db/jsobjmanipulator.h                              |   44 +-
 db/json.cpp                                        |   69 +-
 db/lasterror.cpp                                   |  109 +-
 db/lasterror.h                                     |   40 +-
 db/matcher.cpp                                     |  495 +-
 db/matcher.h                                       |   71 +-
 db/matcher_covered.cpp                             |   53 +-
 db/minilex.h                                       |  190 +-
 db/module.cpp                                      |   16 +-
 db/module.h                                        |   10 +-
 db/modules/mms.cpp                                 |   88 +-
 db/mongommf.cpp                                    |  391 ++
 db/mongommf.h                                      |  140 +
 db/mongomutex.h                                    |  239 +
 db/mr.cpp                                          |  721 ---
 db/namespace-inl.h                                 |  130 +
 db/namespace.cpp                                   |  398 +-
 db/namespace.h                                     |  517 +-
 db/nonce.cpp                                       |   54 +-
 db/nonce.h                                         |   22 +-
 db/oplog.cpp                                       |  228 +-
 db/oplog.h                                         |  133 +-
 db/oplogreader.h                                   |   46 +-
 db/pdfile.cpp                                      |  800 +--
 db/pdfile.h                                        |  236 +-
 db/projection.cpp                                  |  301 ++
 db/projection.h                                    |  127 +
 db/query.cpp                                       |  560 ++-
 db/query.h                                         |  106 +-
 db/queryoptimizer.cpp                              |  657 +--
 db/queryoptimizer.h                                |  224 +-
 db/queryutil.cpp                                   |  840 ++--
 db/queryutil.h                                     |  209 +-
 db/rec.h                                           |  137 -
 db/reccache.cpp                                    |  419 --
 db/reccache.h                                      |  262 -
 db/reci.h                                          |   64 -
 db/recstore.h                                      |  126 -
 db/repl.cpp                                        |  631 +--
 db/repl.h                                          |   70 +-
 db/repl/connections.h                              |   49 +-
 db/repl/consensus.cpp                              |  124 +-
 db/repl/health.cpp                                 |  161 +-
 db/repl/health.h                                   |    8 +-
 db/repl/heartbeat.cpp                              |   71 +-
 db/repl/manager.cpp                                |   70 +-
 db/repl/multicmd.h                                 |   29 +-
 db/repl/replset_commands.cpp                       |  106 +-
 db/repl/rs.cpp                                     |  282 +-
 db/repl/rs.h                                       |  115 +-
 db/repl/rs_config.cpp                              |  174 +-
 db/repl/rs_config.h                                |   20 +-
 db/repl/rs_exception.h                             |   18 +-
 db/repl/rs_initialsync.cpp                         |  205 +-
 db/repl/rs_initiate.cpp                            |   66 +-
 db/repl/rs_member.h                                |   35 +-
 db/repl/rs_optime.h                                |  114 +-
 db/repl/rs_rollback.cpp                            |  661 +--
 db/repl/rs_sync.cpp                                |  368 +-
 db/repl_block.cpp                                  |   92 +-
 db/repl_block.h                                    |   10 +-
 db/replpair.h                                      |   30 +-
 db/resource.h                                      |   32 +-
 db/restapi.cpp                                     |   60 +-
 db/restapi.h                                       |   34 +
 db/scanandorder.h                                  |   36 +-
 db/security.cpp                                    |   20 +-
 db/security.h                                      |   28 +-
 db/security_commands.cpp                           |   89 +-
 db/security_key.cpp                                |  105 +
 db/security_key.h                                  |   47 +
 db/stats/counters.cpp                              |  110 +-
 db/stats/counters.h                                |   77 +-
 db/stats/fine_clock.h                              |   13 +-
 db/stats/service_stats.cpp                         |    6 +-
 db/stats/snapshots.cpp                             |  121 +-
 db/stats/snapshots.h                               |   20 +-
 db/stats/top.cpp                                   |  115 +-
 db/stats/top.h                                     |   57 +-
 db/storage.cpp                                     |   81 -
 db/taskqueue.h                                     |  106 +
 db/tests.cpp                                       |    2 +-
 db/update.cpp                                      |  617 ++-
 db/update.h                                        |  276 +-
 dbtests/background_job_test.cpp                    |  109 +
 dbtests/balancer_policy_tests.cpp                  |  203 +
 dbtests/basictests.cpp                             |  277 +-
 dbtests/btreetests.cpp                             | 1412 +++++-
 dbtests/clienttests.cpp                            |   77 +-
 dbtests/commandtests.cpp                           |   18 +-
 dbtests/cursortests.cpp                            |   33 +-
 dbtests/d_chunk_manager_tests.cpp                  |  467 ++
 dbtests/dbtests.cpp                                |    4 +-
 dbtests/directclienttests.cpp                      |   80 +
 dbtests/framework.cpp                              |  142 +-
 dbtests/framework.h                                |   52 +-
 dbtests/histogram_test.cpp                         |   20 +-
 dbtests/jsobjtests.cpp                             |  370 +-
 dbtests/jsontests.cpp                              |   74 +-
 dbtests/jstests.cpp                                |  363 +-
 dbtests/matchertests.cpp                           |   66 +-
 dbtests/mmaptests.cpp                              |  219 +
 dbtests/mockdbclient.h                             |    4 +-
 dbtests/namespacetests.cpp                         |   56 +-
 dbtests/pairingtests.cpp                           |   24 +-
 dbtests/pdfiletests.cpp                            |  131 +-
 dbtests/perf/btreeperf.cpp                         |  442 ++
 dbtests/perf/perftest.cpp                          |   88 +-
 dbtests/perftests.cpp                              |  336 ++
 dbtests/queryoptimizertests.cpp                    |  555 ++-
 dbtests/querytests.cpp                             |  302 +-
 dbtests/repltests.cpp                              |  411 +-
 dbtests/sharding.cpp                               |   12 +-
 dbtests/socktests.cpp                              |   13 +-
 dbtests/spin_lock_test.cpp                         |   68 +-
 dbtests/test.vcproj                                | 1453 ------
 dbtests/test.vcxproj                               |   57 +-
 dbtests/test.vcxproj.filters                       |  141 +-
 dbtests/threadedtests.cpp                          |  154 +-
 dbtests/updatetests.cpp                            |  195 +-
 debian/changelog                                   |  134 -
 debian/compat                                      |    1 -
 debian/control                                     |   29 -
 debian/copyright                                   |   23 -
 debian/dirs                                        |    3 -
 debian/init.d                                      |  243 -
 debian/lintian-overrides                           |   11 -
 debian/mongo.1                                     |   62 -
 debian/mongod.1                                    |   16 -
 debian/mongodb.conf                                |   95 -
 debian/mongodump.1                                 |   36 -
 debian/mongoexport.1                               |   51 -
 debian/mongofiles.1                                |   52 -
 debian/mongoimport.1                               |   63 -
 debian/mongorestore.1                              |   36 -
 debian/mongos.1                                    |   39 -
 debian/mongosniff.1                                |   30 -
 debian/mongostat.1                                 |   39 -
 debian/postinst                                    |   55 -
 debian/postrm                                      |   39 -
 debian/prerm                                       |   41 -
 debian/rules                                       |  107 -
 debian/watch                                       |   10 -
 distsrc/client/SConstruct                          |   48 +-
 doxygenConfig                                      |    4 +-
 jstests/_tst.js                                    |   41 +
 jstests/apitest_db.js                              |    5 +
 jstests/array4.js                                  |   30 +
 jstests/arrayfind3.js                              |   21 +
 jstests/auth/auth1.js                              |    2 +-
 jstests/basic3.js                                  |   32 +-
 jstests/big_object1.js                             |   46 +
 jstests/capped3.js                                 |    6 +-
 jstests/capped6.js                                 |   25 +
 jstests/capped7.js                                 |   19 +-
 jstests/capped8.js                                 |   86 +
 jstests/check_shard_index.js                       |   45 +
 jstests/conc_update.js                             |   45 -
 jstests/coveredIndex1.js                           |   59 +
 jstests/coveredIndex2.js                           |   18 +
 jstests/cursora.js                                 |   41 +-
 jstests/datasize3.js                               |    8 +-
 jstests/dbcase.js                                  |    4 +-
 jstests/disk/directoryperdb.js                     |    4 +-
 jstests/disk/diskfull.js                           |   12 +-
 jstests/disk/killall.js                            |   42 +
 jstests/disk/preallocate.js                        |    8 +-
 jstests/disk/preallocate2.js                       |    6 +-
 jstests/disk/preallocate_directoryperdb.js         |   50 +
 jstests/distinct1.js                               |    2 +
 jstests/distinct_array1.js                         |    1 +
 jstests/distinct_index1.js                         |   50 +
 jstests/distinct_index2.js                         |   35 +
 jstests/drop2.js                                   |   43 +
 jstests/dropIndex.js                               |   16 -
 jstests/drop_index.js                              |   16 +
 jstests/dur/a_quick.js                             |  123 +
 jstests/dur/closeall.js                            |   80 +
 jstests/dur/diskfull.js                            |  136 +
 jstests/dur/dropdb.js                              |  163 +
 jstests/dur/dur1.js                                |  154 +
 jstests/dur/dur2.js                                |   92 +
 jstests/dur/lsn.js                                 |  126 +
 jstests/dur/manyRestart.js                         |  191 +
 jstests/dur/md5.js                                 |  101 +
 jstests/dur/oplog.js                               |  159 +
 jstests/error5.js                                  |    2 +-
 jstests/eval_nolock.js                             |   16 +
 jstests/evalc.js                                   |   14 -
 jstests/evald.js                                   |   68 +
 jstests/evale.js                                   |    5 +
 jstests/evalf.js                                   |   26 +
 jstests/exists.js                                  |    3 +-
 jstests/explain1.js                                |    2 +-
 jstests/explain2.js                                |    6 +-
 jstests/explain3.js                                |   24 +
 jstests/find_and_modify3.js                        |    4 +-
 jstests/geo_borders.js                             |  189 +
 jstests/geo_center_sphere1.js                      |   93 +
 jstests/geo_circle2.js                             |    3 +
 jstests/geo_circle2a.js                            |   36 +
 jstests/geo_near_random1.js                        |   12 +
 jstests/geo_near_random2.js                        |   21 +
 jstests/geo_sort1.js                               |   22 +
 jstests/geo_update1.js                             |   38 +
 jstests/geo_update2.js                             |   40 +
 jstests/geof.js                                    |   19 +
 jstests/group6.js                                  |    1 +
 jstests/in3.js                                     |    2 +-
 jstests/in4.js                                     |    4 +-
 jstests/index11.js                                 |   13 +
 jstests/index_check6.js                            |   45 +-
 jstests/index_check7.js                            |    2 +-
 jstests/index_many2.js                             |    2 +
 jstests/index_sparse1.js                           |   46 +
 jstests/index_sparse2.js                           |   21 +
 jstests/indexh.js                                  |    7 +
 jstests/indexi.js                                  |   16 +
 jstests/indexj.js                                  |   44 +
 jstests/insert2.js                                 |    8 +
 jstests/jni2.js                                    |    4 +-
 jstests/killop.js                                  |   43 +
 jstests/libs/concurrent.js                         |   30 +
 jstests/libs/fun.js                                |   32 +
 jstests/libs/geo_near_random.js                    |   78 +
 jstests/libs/grid.js                               |  172 +
 jstests/libs/network.js                            |   37 +
 jstests/misc/biginsert.js                          |   18 +
 jstests/mr1.js                                     |   22 +-
 jstests/mr2.js                                     |   27 +-
 jstests/mr3.js                                     |   10 +-
 jstests/mr4.js                                     |    4 +-
 jstests/mr5.js                                     |    4 +-
 jstests/mr_bigobject.js                            |   13 +-
 jstests/mr_comments.js                             |   28 +
 jstests/mr_errorhandling.js                        |    8 +-
 jstests/mr_index.js                                |   43 +
 jstests/mr_index2.js                               |   22 +
 jstests/mr_index3.js                               |   50 +
 jstests/mr_killop.js                               |  127 +
 jstests/mr_merge.js                                |   51 +
 jstests/mr_optim.js                                |   47 +
 jstests/mr_outreduce.js                            |   41 +
 jstests/mr_outreduce2.js                           |   27 +
 jstests/mr_replaceIntoDB.js                        |   45 +
 jstests/mr_sort.js                                 |    6 +-
 jstests/multiClient/rsDurKillRestart1.js           |  139 +
 jstests/ne2.js                                     |   21 +
 jstests/ne3.js                                     |   12 +
 jstests/not2.js                                    |    5 +-
 jstests/notablescan.js                             |   22 +
 jstests/objid5.js                                  |    9 +-
 jstests/or4.js                                     |   11 +-
 jstests/or6.js                                     |   14 +-
 jstests/orc.js                                     |   29 +
 jstests/ord.js                                     |   34 +
 jstests/ore.js                                     |   13 +
 jstests/orf.js                                     |   15 +
 jstests/parallel/del.js                            |   79 +
 jstests/parallel/repl.js                           |    4 +-
 jstests/perf/geo_near1.js                          |   11 +
 jstests/profile1.js                                |    7 +
 jstests/proj_key1.js                               |   28 +
 jstests/pull_remove1.js                            |   14 +
 jstests/push2.js                                   |    2 +
 jstests/queryoptimizer2.js                         |   62 +
 jstests/regex3.js                                  |    2 +-
 jstests/regex6.js                                  |    4 +-
 jstests/regex9.js                                  |    2 +-
 jstests/remove_undefined.js                        |   28 +
 jstests/rename4.js                                 |  121 +
 jstests/repl/basic1.js                             |    4 +-
 jstests/repl/block2.js                             |   15 +-
 jstests/repl/mastermaster1.js                      |    8 +-
 jstests/repl/pair1.js                              |    4 +-
 jstests/repl/repl1.js                              |    2 +
 jstests/repl/repl11.js                             |    4 +
 jstests/repl/repl2.js                              |    2 +
 jstests/repl/snapshot3.js                          |    4 +-
 jstests/replsets/auth1.js                          |  184 +
 jstests/replsets/buildindexes.js                   |   86 +
 jstests/replsets/cloneDb.js                        |   52 +
 jstests/replsets/config1.js                        |   21 +
 jstests/replsets/fastsync.js                       |  117 +
 jstests/replsets/getlasterror_w2.js                |   36 +
 jstests/replsets/groupAndMapReduce.js              |  105 +
 jstests/replsets/initial_sync1.js                  |  129 +
 jstests/replsets/initial_sync2.js                  |  179 +
 jstests/replsets/initial_sync3.js                  |   87 +
 jstests/replsets/ismaster1.js                      |   36 +
 jstests/replsets/key1                              |    1 +
 jstests/replsets/key2                              |    1 +
 jstests/replsets/remove1.js                        |  132 +
 jstests/replsets/replset2.js                       |  252 +-
 jstests/replsets/replset3.js                       |  130 +-
 jstests/replsets/replset5.js                       |   42 +-
 jstests/replsets/replset_remove_node.js            |    9 +-
 jstests/replsets/replsetarb2.js                    |    2 +
 jstests/replsets/replsetarb3.js                    |  144 +
 jstests/replsets/replsetfreeze.js                  |  105 +
 jstests/replsets/rollback.js                       |  333 +-
 jstests/replsets/rollback2.js                      |  423 +-
 jstests/replsets/rollback3.js                      |   39 +-
 jstests/replsets/rslib.js                          |   63 +
 jstests/replsets/slaveDelay2.js                    |  104 +
 jstests/replsets/slavedelay1.js                    |  127 +
 jstests/replsets/sync1.js                          |   30 +-
 jstests/replsets/sync_passive.js                   |   89 +
 jstests/replsets/sync_passive2.js                  |  120 +
 jstests/replsets/toostale.js                       |  121 +
 jstests/replsets/two_initsync.js                   |    1 +
 jstests/replsets/twosets.js                        |   19 +-
 jstests/rs/rs_basic.js                             |   88 +-
 jstests/set_param1.js                              |    9 +
 jstests/sharding/addshard3.js                      |    9 +
 jstests/sharding/addshard4.js                      |   24 +
 jstests/sharding/auto1.js                          |    5 +
 jstests/sharding/bigMapReduce.js                   |   62 +-
 jstests/sharding/count1.js                         |   10 +-
 jstests/sharding/cursor1.js                        |    2 +-
 jstests/sharding/features1.js                      |   24 +-
 jstests/sharding/features2.js                      |   29 +-
 jstests/sharding/features3.js                      |    3 +-
 jstests/sharding/geo_near_random1.js               |   37 +
 jstests/sharding/geo_near_random2.js               |   44 +
 jstests/sharding/key_many.js                       |    6 +-
 jstests/sharding/key_string.js                     |   13 +-
 jstests/sharding/limit_push.js                     |   47 +
 jstests/sharding/migrateBig.js                     |   45 +
 jstests/sharding/multi_mongos1.js                  |   70 +
 jstests/sharding/rename.js                         |    1 +
 jstests/sharding/shard1.js                         |    1 +
 jstests/sharding/shard3.js                         |   36 +-
 jstests/sharding/shard_insert_getlasterror_w2.js   |   89 +
 jstests/sharding/sort1.js                          |   46 +-
 jstests/sharding/splitpick.js                      |   39 -
 jstests/sharding/sync1.js                          |   15 +-
 jstests/sharding/update1.js                        |    7 +
 jstests/sharding/version1.js                       |   40 +-
 jstests/sharding/version2.js                       |   35 +-
 jstests/shellkillop.js                             |   83 +-
 jstests/shellspawn.js                              |    2 +
 jstests/shellstartparallel.js                      |   17 +
 jstests/slowNightly/32bit.js                       |  125 +
 jstests/slowNightly/btreedel.js                    |   43 +
 jstests/slowNightly/capped4.js                     |    2 +-
 jstests/slowNightly/command_line_parsing.js        |    9 +
 jstests/slowNightly/dur_big_atomic_update.js       |   31 +
 jstests/slowNightly/dur_passthrough.js             |   89 +
 jstests/slowNightly/dur_remove_old_journals.js     |   53 +
 jstests/slowNightly/geo_near_random1.js            |   13 +
 jstests/slowNightly/geo_near_random2.js            |   21 +
 jstests/slowNightly/index_check9.js                |  118 +
 jstests/slowNightly/large_chunk.js                 |   51 +
 jstests/slowNightly/moveprimary-replset.js         |   67 +
 jstests/slowNightly/newcollection2.js              |   11 +
 jstests/slowNightly/run_sharding_passthrough.js    |   94 -
 jstests/slowNightly/sharding_balance1.js           |    2 +-
 jstests/slowNightly/sharding_balance2.js           |    2 +-
 jstests/slowNightly/sharding_balance3.js           |    4 +-
 jstests/slowNightly/sharding_balance4.js           |   43 +-
 .../slowNightly/sharding_balance_randomorder1.js   |   54 +
 jstests/slowNightly/sharding_cursors1.js           |    6 +-
 .../slowNightly/sharding_multiple_collections.js   |   53 +
 jstests/slowNightly/sharding_passthrough.js        |   94 +
 jstests/slowNightly/sharding_rs1.js                |   13 +-
 jstests/slowNightly/sharding_rs2.js                |  163 +
 jstests/slowNightly/unix_socket1.js                |   26 +
 jstests/slowWeekly/conc_update.js                  |   29 +-
 jstests/slowWeekly/disk_reuse1.js                  |   41 +
 jstests/slowWeekly/dur_passthrough.js              |   44 +
 jstests/slowWeekly/geo_near_random1.js             |   13 +
 jstests/slowWeekly/geo_near_random2.js             |   21 +
 jstests/slowWeekly/indexbg_dur.js                  |   67 +
 jstests/slowWeekly/query_yield1.js                 |    6 +-
 jstests/slowWeekly/query_yield2.js                 |    6 +-
 jstests/slowWeekly/update_yield1.js                |   21 +-
 jstests/sort2.js                                   |    2 +-
 jstests/splitvector.js                             |  144 +-
 jstests/tempCleanup.js                             |   16 -
 jstests/temp_cleanup.js                            |   16 +
 jstests/tool/dumprestore2.js                       |    3 +
 jstests/tool/dumprestore3.js                       |   60 +
 jstests/tool/dumprestore4.js                       |   42 +
 jstests/tool/tool1.js                              |    2 +-
 jstests/ts1.js                                     |   38 +
 jstests/update_addToSet3.js                        |   18 +
 jstests/update_arraymatch6.js                      |   14 +
 jstests/update_multi6.js                           |   10 +
 lib/libboost_thread-gcc41-mt-d-1_34_1.a            |  Bin 0 -> 692920 bytes
 mongo.xcodeproj/project.pbxproj                    | 1879 -------
 mongo_astyle                                       |   16 +
 pch.cpp                                            |    2 +-
 pch.h                                              |   40 +-
 rpm/init.d-mongod                                  |    3 +-
 rpm/mongo.spec                                     |    3 +-
 rpm/mongod.conf                                    |   11 -
 s/balance.cpp                                      |  197 +-
 s/balance.h                                        |   69 +-
 s/balancer_policy.cpp                              |  227 +-
 s/balancer_policy.h                                |   28 +-
 s/chunk.cpp                                        | 1097 ++--
 s/chunk.h                                          |  279 +-
 s/client.cpp                                       |  292 ++
 s/client.h                                         |  120 +
 s/commands_admin.cpp                               |  568 ++-
 s/commands_public.cpp                              |  541 +-
 s/config.cpp                                       |  409 +-
 s/config.h                                         |  120 +-
 s/config_migrate.cpp                               |   76 +-
 s/cursors.cpp                                      |  153 +-
 s/cursors.h                                        |   37 +-
 s/d_chunk_manager.cpp                              |  328 ++
 s/d_chunk_manager.h                                |  150 +
 s/d_logic.cpp                                      |   37 +-
 s/d_logic.h                                        |  213 +-
 s/d_migrate.cpp                                    | 1197 +++--
 s/d_split.cpp                                      |  722 ++-
 s/d_state.cpp                                      |  694 +--
 s/d_util.cpp                                       |   41 -
 s/d_writeback.cpp                                  |   97 +-
 s/d_writeback.h                                    |   75 +
 s/dbgrid.vcproj                                    | 1048 ----
 s/dbgrid.vcxproj                                   |   18 +
 s/dbgrid.vcxproj.filters                           |   36 +
 s/grid.cpp                                         |  257 +-
 s/grid.h                                           |   33 +-
 s/request.cpp                                      |  168 +-
 s/request.h                                        |   63 +-
 s/s_only.cpp                                       |   63 +-
 s/server.cpp                                       |  193 +-
 s/server.h                                         |    4 +-
 s/shard.cpp                                        |  271 +-
 s/shard.h                                          |   97 +-
 s/shard_version.cpp                                |  151 +
 s/shard_version.h                                  |   31 +
 s/shardconnection.cpp                              |  173 +-
 s/shardkey.cpp                                     |  152 +-
 s/shardkey.h                                       |   40 +-
 s/stats.cpp                                        |    2 +-
 s/stats.h                                          |    2 +-
 s/strategy.cpp                                     |  308 +-
 s/strategy.h                                       |   13 +-
 s/strategy_shard.cpp                               |  196 +-
 s/strategy_single.cpp                              |  184 +-
 s/util.h                                           |   68 +-
 s/writeback_listener.cpp                           |  254 +
 s/writeback_listener.h                             |   67 +
 scripting/bench.cpp                                |  173 +
 scripting/engine.cpp                               |  253 +-
 scripting/engine.h                                 |  134 +-
 scripting/engine_java.cpp                          |   77 +-
 scripting/engine_java.h                            |   16 +-
 scripting/engine_none.cpp                          |    2 +-
 scripting/engine_spidermonkey.cpp                  |  624 +--
 scripting/engine_spidermonkey.h                    |   16 +-
 scripting/engine_v8.cpp                            |  323 +-
 scripting/engine_v8.h                              |   55 +-
 scripting/sm_db.cpp                                |  628 +--
 scripting/utils.cpp                                |   23 +-
 scripting/v8_db.cpp                                |  498 +-
 scripting/v8_db.h                                  |   92 +-
 scripting/v8_utils.cpp                             |  141 +-
 scripting/v8_utils.h                               |    4 +-
 scripting/v8_wrapper.cpp                           |  282 +-
 scripting/v8_wrapper.h                             |    4 +-
 shell/collection.js                                |   79 +-
 shell/db.js                                        |  144 +-
 shell/dbshell.cpp                                  |  351 +-
 shell/mongo.js                                     |    9 +-
 shell/mongo_vstudio.cpp                            | 5223 +++++++++++++-------
 shell/msvc/mongo.vcxproj                           |    7 +-
 shell/msvc/mongo.vcxproj.filters                   |   13 +-
 shell/query.js                                     |   55 +-
 shell/servers.js                                   |  110 +-
 shell/shell_utils.cpp                              |  383 +-
 shell/utils.h                                      |    8 +-
 shell/utils.js                                     |  495 +-
 tools/bridge.cpp                                   |   80 +-
 tools/bsondump.cpp                                 |   57 +-
 tools/dump.cpp                                     |  286 +-
 tools/export.cpp                                   |   69 +-
 tools/files.cpp                                    |   54 +-
 tools/import.cpp                                   |  181 +-
 tools/restore.cpp                                  |  148 +-
 tools/sniffer.cpp                                  |  200 +-
 tools/stat.cpp                                     |  653 ++-
 tools/tool.cpp                                     |  218 +-
 tools/tool.h                                       |   58 +-
 util/admin_access.h                                |   52 +
 util/alignedbuilder.cpp                            |  102 +
 util/alignedbuilder.h                              |  123 +
 util/allocator.h                                   |    8 +-
 util/array.h                                       |   52 +-
 util/assert_util.cpp                               |   60 +-
 util/assert_util.h                                 |  133 +-
 util/background.cpp                                |  146 +-
 util/background.h                                  |  135 +-
 util/base64.cpp                                    |   40 +-
 util/base64.h                                      |   25 +-
 util/bufreader.h                                   |   98 +
 util/concurrency/README                            |   19 +
 util/concurrency/list.h                            |   96 +-
 util/concurrency/msg.h                             |    8 +-
 util/concurrency/mutex.h                           |  129 +-
 util/concurrency/mvar.h                            |   28 +-
 util/concurrency/race.h                            |   72 +
 util/concurrency/readme.txt                        |   15 -
 util/concurrency/rwlock.h                          |  170 +-
 util/concurrency/shared_mutex_win.hpp              |  573 +++
 util/concurrency/spin_lock.cpp                     |   34 +-
 util/concurrency/spin_lock.h                       |   26 +-
 util/concurrency/synchronization.cpp               |   56 +
 util/concurrency/synchronization.h                 |   73 +
 util/concurrency/task.cpp                          |   56 +-
 util/concurrency/task.h                            |   14 +-
 util/concurrency/thread_pool.cpp                   |   45 +-
 util/concurrency/thread_pool.h                     |  110 +-
 util/concurrency/value.h                           |   24 +-
 util/concurrency/vars.cpp                          |   24 +-
 util/debug_util.cpp                                |    9 +-
 util/debug_util.h                                  |   19 +-
 util/embedded_builder.h                            |   16 +-
 util/file.h                                        |  225 +-
 util/file_allocator.cpp                            |  282 ++
 util/file_allocator.h                              |  278 +-
 util/goodies.h                                     |  367 +-
 util/hashtab.h                                     |   58 +-
 util/heapcheck.h                                   |   33 +
 util/hex.h                                         |   12 +-
 util/histogram.cpp                                 |   40 +-
 util/histogram.h                                   |    8 +-
 util/hostandport.h                                 |   67 +-
 util/httpclient.cpp                                |   52 +-
 util/httpclient.h                                  |   22 +-
 util/log.cpp                                       |   45 +-
 util/log.h                                         |  166 +-
 util/logfile.cpp                                   |  157 +
 util/logfile.h                                     |   50 +
 util/lruishmap.h                                   |    4 +-
 util/md5.h                                         |   16 +-
 util/md5.hpp                                       |    9 +-
 util/md5main.cpp                                   |  104 +-
 util/message.cpp                                   |  342 +-
 util/message.h                                     |  200 +-
 util/message_server.h                              |   10 +-
 util/message_server_asio.cpp                       |  112 +-
 util/message_server_port.cpp                       |   55 +-
 util/miniwebserver.cpp                             |   32 +-
 util/miniwebserver.h                               |    2 +-
 util/mmap.cpp                                      |  113 +-
 util/mmap.h                                        |  289 +-
 util/mmap_mm.cpp                                   |    4 +-
 util/mmap_posix.cpp                                |  132 +-
 util/mmap_win.cpp                                  |  158 +-
 util/mongoutils/README                             |    8 +-
 util/mongoutils/checksum.h                         |    4 +-
 util/mongoutils/hash.h                             |   41 +
 util/mongoutils/html.h                             |   44 +-
 util/mongoutils/mongoutils.vcxproj                 |    2 +
 util/mongoutils/str.h                              |  126 +-
 util/mongoutils/test.cpp                           |   79 +-
 util/moveablebuffer.h                              |   51 +
 util/ntservice.cpp                                 |  418 +-
 util/ntservice.h                                   |   17 +-
 util/optime.h                                      |   31 +-
 util/password.h                                    |    4 +-
 util/paths.h                                       |   79 +
 util/processinfo.cpp                               |   33 +-
 util/processinfo.h                                 |   12 +-
 util/processinfo_darwin.cpp                        |   48 +-
 util/processinfo_linux2.cpp                        |  195 +-
 util/processinfo_none.cpp                          |   24 +-
 util/processinfo_win32.cpp                         |   28 +-
 util/queue.h                                       |   54 +-
 util/ramlog.h                                      |   24 +-
 util/ramstore.cpp                                  |   93 -
 util/ramstore.h                                    |   86 -
 util/signal_handlers.cpp                           |  122 +
 util/signal_handlers.h                             |   34 +
 util/sock.cpp                                      |   70 +-
 util/sock.h                                        |  124 +-
 util/stringutils.cpp                               |    8 +-
 util/stringutils.h                                 |   12 +-
 util/text.cpp                                      |   92 +-
 util/text.h                                        |   48 +-
 util/time_support.h                                |  201 +
 util/timer.h                                       |   67 +
 util/unittest.h                                    |    3 +
 util/util.cpp                                      |   93 +-
 util/version.cpp                                   |   84 +-
 util/version.h                                     |    3 +-
 735 files changed, 60850 insertions(+), 37351 deletions(-)
 create mode 100644 bson/bson-inl.h
 delete mode 100644 bson/bsoninlines.h
 create mode 100644 bson/oid.cpp
 create mode 100644 buildscripts/s3del.py
 create mode 100644 client/dbclient_rs.cpp
 create mode 100644 client/dbclient_rs.h
 create mode 100644 client/dbclientmockcursor.h
 create mode 100644 client/examples/rs.cpp
 create mode 100644 client/mongo_client_lib.cpp
 create mode 100644 client/simple_client_demo.cpp
 create mode 100644 db/commands/distinct.cpp
 create mode 100644 db/commands/group.cpp
 create mode 100644 db/commands/isself.cpp
 create mode 100644 db/commands/mr.cpp
 create mode 100644 db/commands/mr.h
 create mode 100644 db/compact.cpp
 create mode 100644 db/curop-inl.h
 delete mode 100644 db/db.sln
 delete mode 100644 db/db.vcproj
 mode change 100644 => 100755 db/db_10.sln
 create mode 100644 db/dur.cpp
 create mode 100644 db/dur.h
 create mode 100644 db/dur_commitjob.cpp
 create mode 100644 db/dur_commitjob.h
 create mode 100644 db/dur_journal.cpp
 create mode 100644 db/dur_journal.h
 create mode 100644 db/dur_journalformat.h
 create mode 100644 db/dur_journalimpl.h
 create mode 100644 db/dur_preplogbuffer.cpp
 create mode 100644 db/dur_recover.cpp
 create mode 100644 db/dur_recover.h
 create mode 100644 db/dur_stats.h
 create mode 100644 db/dur_writetodatafiles.cpp
 create mode 100644 db/durop.cpp
 create mode 100644 db/durop.h
 create mode 100644 db/mongommf.cpp
 create mode 100644 db/mongommf.h
 create mode 100644 db/mongomutex.h
 delete mode 100644 db/mr.cpp
 create mode 100644 db/namespace-inl.h
 create mode 100644 db/projection.cpp
 create mode 100644 db/projection.h
 delete mode 100644 db/rec.h
 delete mode 100644 db/reccache.cpp
 delete mode 100644 db/reccache.h
 delete mode 100644 db/reci.h
 delete mode 100644 db/recstore.h
 mode change 100755 => 100644 db/repl/rs_exception.h
 mode change 100755 => 100644 db/resource.h
 create mode 100644 db/restapi.h
 create mode 100644 db/security_key.cpp
 create mode 100644 db/security_key.h
 delete mode 100644 db/storage.cpp
 create mode 100644 db/taskqueue.h
 create mode 100644 dbtests/background_job_test.cpp
 create mode 100644 dbtests/balancer_policy_tests.cpp
 create mode 100644 dbtests/d_chunk_manager_tests.cpp
 create mode 100644 dbtests/directclienttests.cpp
 create mode 100644 dbtests/mmaptests.cpp
 create mode 100644 dbtests/perf/btreeperf.cpp
 create mode 100644 dbtests/perftests.cpp
 delete mode 100644 dbtests/test.vcproj
 delete mode 100644 debian/changelog
 delete mode 100644 debian/compat
 delete mode 100644 debian/control
 delete mode 100644 debian/copyright
 delete mode 100644 debian/dirs
 delete mode 100644 debian/init.d
 delete mode 100644 debian/lintian-overrides
 delete mode 100644 debian/mongo.1
 delete mode 100644 debian/mongod.1
 delete mode 100644 debian/mongodb.conf
 delete mode 100644 debian/mongodump.1
 delete mode 100644 debian/mongoexport.1
 delete mode 100644 debian/mongofiles.1
 delete mode 100644 debian/mongoimport.1
 delete mode 100644 debian/mongorestore.1
 delete mode 100644 debian/mongos.1
 delete mode 100644 debian/mongosniff.1
 delete mode 100644 debian/mongostat.1
 delete mode 100644 debian/postinst
 delete mode 100644 debian/postrm
 delete mode 100644 debian/prerm
 delete mode 100644 debian/rules
 delete mode 100644 debian/watch
 create mode 100644 jstests/_tst.js
 create mode 100644 jstests/array4.js
 create mode 100644 jstests/arrayfind3.js
 create mode 100644 jstests/big_object1.js
 create mode 100644 jstests/capped8.js
 create mode 100644 jstests/check_shard_index.js
 delete mode 100644 jstests/conc_update.js
 create mode 100644 jstests/coveredIndex1.js
 create mode 100644 jstests/coveredIndex2.js
 create mode 100644 jstests/disk/killall.js
 create mode 100644 jstests/disk/preallocate_directoryperdb.js
 create mode 100644 jstests/distinct_index1.js
 create mode 100644 jstests/distinct_index2.js
 create mode 100644 jstests/drop2.js
 delete mode 100644 jstests/dropIndex.js
 create mode 100644 jstests/drop_index.js
 create mode 100755 jstests/dur/a_quick.js
 create mode 100644 jstests/dur/closeall.js
 create mode 100644 jstests/dur/diskfull.js
 create mode 100644 jstests/dur/dropdb.js
 create mode 100755 jstests/dur/dur1.js
 create mode 100644 jstests/dur/dur2.js
 create mode 100755 jstests/dur/lsn.js
 create mode 100755 jstests/dur/manyRestart.js
 create mode 100644 jstests/dur/md5.js
 create mode 100755 jstests/dur/oplog.js
 create mode 100644 jstests/eval_nolock.js
 create mode 100644 jstests/evald.js
 create mode 100644 jstests/evale.js
 create mode 100644 jstests/evalf.js
 create mode 100644 jstests/explain3.js
 create mode 100644 jstests/geo_borders.js
 create mode 100644 jstests/geo_center_sphere1.js
 create mode 100644 jstests/geo_circle2a.js
 create mode 100644 jstests/geo_near_random1.js
 create mode 100644 jstests/geo_near_random2.js
 create mode 100644 jstests/geo_sort1.js
 create mode 100644 jstests/geo_update1.js
 create mode 100644 jstests/geo_update2.js
 create mode 100644 jstests/geof.js
 create mode 100644 jstests/index11.js
 create mode 100644 jstests/index_sparse1.js
 create mode 100644 jstests/index_sparse2.js
 create mode 100644 jstests/indexi.js
 create mode 100644 jstests/indexj.js
 create mode 100644 jstests/insert2.js
 create mode 100644 jstests/killop.js
 create mode 100644 jstests/libs/concurrent.js
 create mode 100644 jstests/libs/fun.js
 create mode 100644 jstests/libs/geo_near_random.js
 create mode 100644 jstests/libs/grid.js
 create mode 100644 jstests/libs/network.js
 create mode 100755 jstests/misc/biginsert.js
 create mode 100644 jstests/mr_comments.js
 create mode 100644 jstests/mr_index.js
 create mode 100644 jstests/mr_index2.js
 create mode 100644 jstests/mr_index3.js
 create mode 100644 jstests/mr_killop.js
 create mode 100644 jstests/mr_merge.js
 create mode 100644 jstests/mr_optim.js
 create mode 100644 jstests/mr_outreduce.js
 create mode 100644 jstests/mr_outreduce2.js
 create mode 100644 jstests/mr_replaceIntoDB.js
 create mode 100644 jstests/multiClient/rsDurKillRestart1.js
 create mode 100644 jstests/ne2.js
 create mode 100644 jstests/ne3.js
 create mode 100644 jstests/notablescan.js
 create mode 100644 jstests/orc.js
 create mode 100644 jstests/ord.js
 create mode 100644 jstests/ore.js
 create mode 100644 jstests/orf.js
 create mode 100644 jstests/parallel/del.js
 create mode 100644 jstests/perf/geo_near1.js
 create mode 100644 jstests/proj_key1.js
 create mode 100644 jstests/pull_remove1.js
 create mode 100644 jstests/queryoptimizer2.js
 create mode 100644 jstests/remove_undefined.js
 create mode 100644 jstests/rename4.js
 create mode 100644 jstests/replsets/auth1.js
 create mode 100644 jstests/replsets/buildindexes.js
 create mode 100644 jstests/replsets/cloneDb.js
 create mode 100644 jstests/replsets/config1.js
 create mode 100644 jstests/replsets/fastsync.js
 create mode 100644 jstests/replsets/getlasterror_w2.js
 create mode 100644 jstests/replsets/groupAndMapReduce.js
 create mode 100644 jstests/replsets/initial_sync1.js
 create mode 100644 jstests/replsets/initial_sync2.js
 create mode 100644 jstests/replsets/initial_sync3.js
 create mode 100644 jstests/replsets/ismaster1.js
 create mode 100644 jstests/replsets/key1
 create mode 100644 jstests/replsets/key2
 create mode 100644 jstests/replsets/remove1.js
 create mode 100644 jstests/replsets/replsetarb3.js
 create mode 100644 jstests/replsets/replsetfreeze.js
 create mode 100644 jstests/replsets/rslib.js
 create mode 100644 jstests/replsets/slaveDelay2.js
 create mode 100644 jstests/replsets/slavedelay1.js
 create mode 100644 jstests/replsets/sync_passive.js
 create mode 100644 jstests/replsets/sync_passive2.js
 create mode 100644 jstests/replsets/toostale.js
 create mode 100644 jstests/set_param1.js
 create mode 100644 jstests/sharding/addshard3.js
 create mode 100644 jstests/sharding/addshard4.js
 create mode 100644 jstests/sharding/geo_near_random1.js
 create mode 100644 jstests/sharding/geo_near_random2.js
 create mode 100644 jstests/sharding/limit_push.js
 create mode 100644 jstests/sharding/migrateBig.js
 create mode 100644 jstests/sharding/multi_mongos1.js
 create mode 100644 jstests/sharding/shard_insert_getlasterror_w2.js
 delete mode 100644 jstests/sharding/splitpick.js
 create mode 100644 jstests/shellstartparallel.js
 create mode 100755 jstests/slowNightly/32bit.js
 create mode 100644 jstests/slowNightly/btreedel.js
 create mode 100644 jstests/slowNightly/command_line_parsing.js
 create mode 100644 jstests/slowNightly/dur_big_atomic_update.js
 create mode 100644 jstests/slowNightly/dur_passthrough.js
 create mode 100644 jstests/slowNightly/dur_remove_old_journals.js
 create mode 100644 jstests/slowNightly/geo_near_random1.js
 create mode 100644 jstests/slowNightly/geo_near_random2.js
 create mode 100644 jstests/slowNightly/index_check9.js
 create mode 100644 jstests/slowNightly/large_chunk.js
 create mode 100755 jstests/slowNightly/moveprimary-replset.js
 create mode 100644 jstests/slowNightly/newcollection2.js
 delete mode 100644 jstests/slowNightly/run_sharding_passthrough.js
 create mode 100644 jstests/slowNightly/sharding_balance_randomorder1.js
 create mode 100644 jstests/slowNightly/sharding_multiple_collections.js
 create mode 100644 jstests/slowNightly/sharding_passthrough.js
 create mode 100644 jstests/slowNightly/sharding_rs2.js
 create mode 100644 jstests/slowNightly/unix_socket1.js
 create mode 100644 jstests/slowWeekly/disk_reuse1.js
 create mode 100644 jstests/slowWeekly/dur_passthrough.js
 create mode 100644 jstests/slowWeekly/geo_near_random1.js
 create mode 100644 jstests/slowWeekly/geo_near_random2.js
 create mode 100644 jstests/slowWeekly/indexbg_dur.js
 delete mode 100644 jstests/tempCleanup.js
 create mode 100644 jstests/temp_cleanup.js
 create mode 100644 jstests/tool/dumprestore3.js
 create mode 100644 jstests/tool/dumprestore4.js
 create mode 100644 jstests/ts1.js
 create mode 100644 jstests/update_addToSet3.js
 create mode 100644 jstests/update_arraymatch6.js
 create mode 100644 jstests/update_multi6.js
 create mode 100644 lib/libboost_thread-gcc41-mt-d-1_34_1.a
 delete mode 100644 mongo.xcodeproj/project.pbxproj
 create mode 100644 mongo_astyle
 create mode 100644 s/client.cpp
 create mode 100644 s/client.h
 create mode 100644 s/d_chunk_manager.cpp
 create mode 100644 s/d_chunk_manager.h
 delete mode 100644 s/d_util.cpp
 create mode 100644 s/d_writeback.h
 delete mode 100644 s/dbgrid.vcproj
 create mode 100644 s/shard_version.cpp
 create mode 100644 s/shard_version.h
 create mode 100644 s/writeback_listener.cpp
 create mode 100644 s/writeback_listener.h
 create mode 100644 scripting/bench.cpp
 create mode 100644 util/admin_access.h
 create mode 100644 util/alignedbuilder.cpp
 create mode 100644 util/alignedbuilder.h
 create mode 100644 util/bufreader.h
 create mode 100644 util/concurrency/README
 create mode 100644 util/concurrency/race.h
 delete mode 100644 util/concurrency/readme.txt
 create mode 100755 util/concurrency/shared_mutex_win.hpp
 create mode 100644 util/concurrency/synchronization.cpp
 create mode 100644 util/concurrency/synchronization.h
 create mode 100644 util/file_allocator.cpp
 create mode 100644 util/heapcheck.h
 create mode 100644 util/logfile.cpp
 create mode 100644 util/logfile.h
 create mode 100644 util/mongoutils/hash.h
 mode change 100755 => 100644 util/mongoutils/test.cpp
 create mode 100644 util/moveablebuffer.h
 create mode 100644 util/paths.h
 delete mode 100644 util/ramstore.cpp
 delete mode 100644 util/ramstore.h
 create mode 100644 util/signal_handlers.cpp
 create mode 100644 util/signal_handlers.h
 create mode 100644 util/time_support.h
 create mode 100644 util/timer.h

diff --git a/.gitignore b/.gitignore
index 2c7d1bd..3847ca4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -95,6 +95,8 @@ libmongoshellfiles.*
 firstExample
 secondExample
 whereExample
+bsondemo
+rsExample
 
 #tests
 test
@@ -119,4 +121,4 @@ debian/mongodb
 *.creator.user
 *.files
 *.includes
-
+*.orig
diff --git a/SConstruct b/SConstruct
index e3046ff..41383b1 100644
--- a/SConstruct
+++ b/SConstruct
@@ -10,6 +10,8 @@
 #      scons --distname=0.8 s3dist
 #      all s3 pushes require settings.py and simples3
 
+EnsureSConsVersion(0, 98, 4) # this is a common version known to work
+
 import os
 import sys
 import imp
@@ -24,241 +26,137 @@ from buildscripts import utils
 
 buildscripts.bb.checkOk()
 
+def findSettingsSetup():
+    sys.path.append( "." )
+    sys.path.append( ".." )
+    sys.path.append( "../../" )
+
+
+
 # --- options ----
-AddOption('--prefix',
-          dest='prefix',
-          type='string',
-          nargs=1,
-          action='store',
-          metavar='DIR',
-          help='installation prefix')
-
-AddOption('--distname',
-          dest='distname',
-          type='string',
-          nargs=1,
-          action='store',
-          metavar='DIR',
-          help='dist name (0.8.0)')
-
-AddOption('--distmod',
-          dest='distmod',
-          type='string',
-          nargs=1,
-          action='store',
-          metavar='DIR',
-          help='additional piece for full dist name')
-
-AddOption( "--64",
-           dest="force64",
-           type="string",
-           nargs=0,
-           action="store",
-           help="whether to force 64 bit" )
-
-
-AddOption( "--32",
-           dest="force32",
-           type="string",
-           nargs=0,
-           action="store",
-           help="whether to force 32 bit" )
-
-
-AddOption( "--mm",
-           dest="mm",
-           type="string",
-           nargs=0,
-           action="store",
-           help="use main memory instead of memory mapped files" )
-
-
-AddOption( "--release",
-           dest="release",
-           type="string",
-           nargs=0,
-           action="store",
-           help="relase build")
-
-
-AddOption( "--static",
-           dest="static",
-           type="string",
-           nargs=0,
-           action="store",
-           help="fully static build")
-
-
-AddOption('--usesm',
-          dest='usesm',
-          type="string",
-          nargs=0,
-          action="store",
-          help="use spider monkey for javascript" )
-
-AddOption('--usev8',
-          dest='usev8',
-          type="string",
-          nargs=0,
-          action="store",
-          help="use v8 for javascript" )
-
-AddOption('--asio',
-          dest='asio',
-          type="string",
-          nargs=0,
-          action="store",
-          help="Use Asynchronous IO (NOT READY YET)" )
-
-AddOption( "--d",
-           dest="debugBuild",
-           type="string",
-           nargs=0,
-           action="store",
-           help="debug build no optimization, etc..." )
-
-AddOption( "--dd",
-           dest="debugBuildAndLogging",
-           type="string",
-           nargs=0,
-           action="store",
-           help="debug build no optimization, additional debug logging, etc..." )
-
-AddOption( "--recstore",
-           dest="recstore",
-           type="string",
-           nargs=0,
-           action="store",
-           help="use new recstore" )
-
-AddOption( "--noshell",
-           dest="noshell",
-           type="string",
-           nargs=0,
-           action="store",
-           help="don't build shell" )
-
-AddOption( "--safeshell",
-           dest="safeshell",
-           type="string",
-           nargs=0,
-           action="store",
-           help="don't let shell scripts run programs (still, don't run untrusted scripts)" )
-
-AddOption( "--extrapath",
-           dest="extrapath",
-           type="string",
-           nargs=1,
-           action="store",
-           help="comma separated list of add'l paths  (--extrapath /opt/foo/,/foo) static linking" )
-
-AddOption( "--extrapathdyn",
-           dest="extrapathdyn",
-           type="string",
-           nargs=1,
-           action="store",
-           help="comma separated list of add'l paths  (--extrapath /opt/foo/,/foo) dynamic linking" )
-
-
-AddOption( "--extralib",
-           dest="extralib",
-           type="string",
-           nargs=1,
-           action="store",
-           help="comma separated list of libraries  (--extralib js_static,readline" )
-
-AddOption( "--staticlib",
-           dest="staticlib",
-           type="string",
-           nargs=1,
-           action="store",
-           help="comma separated list of libs to link statically (--staticlib js_static,boost_program_options-mt,..." )
-
-AddOption( "--staticlibpath",
-           dest="staticlibpath",
-           type="string",
-           nargs=1,
-           action="store",
-           help="comma separated list of dirs to search for staticlib arguments" )
-
-AddOption( "--cxx",
-           dest="cxx",
-           type="string",
-           nargs=1,
-           action="store",
-           help="compiler to use" )
-
-
-AddOption( "--boost-compiler",
-           dest="boostCompiler",
-           type="string",
-           nargs=1,
-           action="store",
-           help="compiler used for boost (gcc41)" )
-
-AddOption( "--boost-version",
-           dest="boostVersion",
-           type="string",
-           nargs=1,
-           action="store",
-           help="boost version for linking(1_38)" )
-
-AddOption( "--cpppath",
-           dest="cpppath",
-           type="string",
-           nargs=1,
-           action="store",
-           help="Include path if you have headers in a nonstandard directory" )
-
-AddOption( "--libpath",
-           dest="libpath",
-           type="string",
-           nargs=1,
-           action="store",
-           help="Library path if you have libraries in a nonstandard directory" )
-
-# 
+
+options = {}
+
+def add_option( name, help , nargs , contibutesToVariantDir , dest=None ):
+
+    if dest is None:
+        dest = name
+
+    AddOption( "--" + name , 
+               dest=dest,
+               type="string",
+               nargs=nargs,
+               action="store",
+               help=help )
+
+    options[name] = { "help" : help ,
+                      "nargs" : nargs , 
+                      "contibutesToVariantDir" : contibutesToVariantDir ,
+                      "dest" : dest } 
+
+def get_option( name ):
+    return GetOption( name )
+
+def has_option( name ):
+    x = get_option( name )
+    if x is None:
+        return False
+
+    if x == False:
+        return False
+
+    if x == "":
+        return False
+
+    return True
+
+def get_variant_dir():
+    
+    a = []
+    
+    for name in options:
+        o = options[name]
+        if not has_option( o["dest"] ):
+            continue
+        if not o["contibutesToVariantDir"]:
+            continue
+        
+        if o["nargs"] == 0:
+            a.append( name )
+        else:
+            a.append( name + "-" + get_option( name ) )
+
+    s = "build/"
+
+    if len(a) > 0:
+        a.sort()
+        s += "/".join( a ) + "/"
+        
+    return s
+        
+
+
+# installation/packaging
+add_option( "prefix" , "installation prefix" , 1 , False )
+add_option( "distname" , "dist name (0.8.0)" , 1 , False )
+add_option( "distmod", "additional piece for full dist name" , 1 , False )
+add_option( "nostrip", "do not strip installed binaries" , 0 , False )
+
+add_option( "sharedclient", "build a libmongoclient.so/.dll" , 0 , False )
+add_option( "full", "include client and headers when doing scons install", 0 , False )
+
+# linking options
+add_option( "release" , "release build" , 0 , True )
+add_option( "static" , "fully static build" , 0 , True )
+
+# base compile flags
+add_option( "64" , "whether to force 64 bit" , 0 , True , "force64" )
+add_option( "32" , "whether to force 32 bit" , 0 , True , "force32" )
+
+add_option( "cxx", "compiler to use" , 1 , True )
+
+add_option( "cpppath", "Include path if you have headers in a nonstandard directory" , 1 , True )
+add_option( "libpath", "Library path if you have libraries in a nonstandard directory" , 1 , True )
+
+add_option( "extrapath", "comma separated list of add'l paths  (--extrapath /opt/foo/,/foo) static linking" , 1 , True )
+add_option( "extrapathdyn", "comma separated list of add'l paths  (--extrapath /opt/foo/,/foo) dynamic linking" , 1 , True )
+add_option( "extralib", "comma separated list of libraries  (--extralib js_static,readline" , 1 , True )
+add_option( "staticlib", "comma separated list of libs to link statically (--staticlib js_static,boost_program_options-mt,..." , 1 , True )
+add_option( "staticlibpath", "comma separated list of dirs to search for staticlib arguments" , 1 , True )
+
+add_option( "boost-compiler", "compiler used for boost (gcc41)" , 1 , True , "boostCompiler" )
+add_option( "boost-version", "boost version for linking(1_38)" , 1 , True , "boostVersion" )
+
+
+# experimental features
+add_option( "mm", "use main memory instead of memory mapped files" , 0 , True )
+add_option( "asio" , "Use Asynchronous IO (NOT READY YET)" , 0 , True )
+
+# library choices
+add_option( "usesm" , "use spider monkey for javascript" , 0 , True )
+add_option( "usev8" , "use v8 for javascript" , 0 , True )
+
+# mongo feature options
+add_option( "noshell", "don't build shell" , 0 , True )
+add_option( "safeshell", "don't let shell scripts run programs (still, don't run untrusted scripts)" , 0 , True )
+
+# dev tools
+add_option( "d", "debug build no optimization, etc..." , 0 , True , "debugBuild" )
+add_option( "dd", "debug build no optimization, additional debug logging, etc..." , 0 , False , "debugBuildAndLogging" )
+add_option( "durableDefaultOn" , "have durable default to on" , 0 , True )
+
+add_option( "pch" , "use precompiled headers to speed up the build (experimental)" , 0 , True , "usePCH" )
+add_option( "distcc" , "use distcc for distributing builds" , 0 , False )
+
+# debugging/profiling help
+
 # to use CPUPROFILE=/tmp/profile
 # to view pprof -gv mongod /tmp/profile
-#
-AddOption( "--pg",
-           dest="profile",
-           type="string",
-           nargs=0,
-           action="store" )
-
-AddOption( "--gdbserver",
-           dest="gdbserver",
-           type="string",
-           nargs=0,
-           action="store" )
-
-AddOption("--nostrip",
-          dest="nostrip",
-          action="store_true",
-          help="do not strip installed binaries")
-
-AddOption("--sharedclient",
-          dest="sharedclient",
-          action="store_true",
-          help="build a libmongoclient.so/.dll")
-
-AddOption("--full",
-          dest="full",
-          action="store_true",
-          help="include client and headers when doing scons install")
-
-AddOption("--smokedbprefix",
-          dest="smokedbprefix",
-          action="store",
-          help="prefix to dbpath et al. for smoke tests")
-
-AddOption( "--pch",
-           dest="usePCH",
-           type="string",
-           nargs=0,
-           action="store",
-           help="use precompiled headers to speed up the build (experimental)" )
+add_option( "pg", "link against profiler" , 0 , False , "profile" )
+add_option( "gdbserver" , "build in gdb server support" , 0 , True )
+add_option( "heapcheck", "link to heap-checking malloc-lib and look for memory leaks during tests" , 0 , False )
+
+add_option("smokedbprefix", "prefix to dbpath et al. for smoke tests", 1 , False )
 
 # --- environment setup ---
 
@@ -284,7 +182,7 @@ windows = False
 freebsd = False
 openbsd = False
 solaris = False
-force64 = not GetOption( "force64" ) is None
+force64 = has_option( "force64" )
 if not force64 and os.getcwd().endswith( "mongo-64" ):
     force64 = True
     print( "*** assuming you want a 64-bit build b/c of directory *** " )
@@ -292,44 +190,45 @@ msarch = None
 if force64:
     msarch = "amd64"
 
-force32 = not GetOption( "force32" ) is None
-release = not GetOption( "release" ) is None
-static = not GetOption( "static" ) is None
+force32 = has_option( "force32" ) 
+release = has_option( "release" )
+static = has_option( "static" )
 
-debugBuild = ( not GetOption( "debugBuild" ) is None ) or ( not GetOption( "debugBuildAndLogging" ) is None )
-debugLogging = not GetOption( "debugBuildAndLogging" ) is None
-noshell = not GetOption( "noshell" ) is None
+debugBuild = has_option( "debugBuild" ) or has_option( "debugBuildAndLogging" ) 
+debugLogging = has_option( "debugBuildAndLogging" )
+noshell = has_option( "noshell" ) 
 
-usesm = not GetOption( "usesm" ) is None
-usev8 = not GetOption( "usev8" ) is None
+usesm = has_option( "usesm" )
+usev8 = has_option( "usev8" ) 
 
-asio = not GetOption( "asio" ) is None
+asio = has_option( "asio" )
 
-usePCH = not GetOption( "usePCH" ) is None
+usePCH = has_option( "usePCH" )
 
 justClientLib = (COMMAND_LINE_TARGETS == ['mongoclient'])
 
 env = Environment( MSVS_ARCH=msarch , tools = ["default", "gch"], toolpath = '.' )
-if GetOption( "cxx" ) is not None:
-    env["CC"] = GetOption( "cxx" )
-    env["CXX"] = GetOption( "cxx" )
+if has_option( "cxx" ):
+    env["CC"] = get_option( "cxx" )
+    env["CXX"] = get_option( "cxx" )
 env["LIBPATH"] = []
 
-if GetOption( "libpath" ) is not None:
-    env["LIBPATH"] = [GetOption( "libpath" )]
+if has_option( "libpath" ):
+    env["LIBPATH"] = [get_option( "libpath" )]
 
-if GetOption( "cpppath" ) is not None:
-    env["CPPPATH"] = [GetOption( "cpppath" )]
+if has_option( "cpppath" ):
+    env["CPPPATH"] = [get_option( "cpppath" )]
 
-if GetOption( "recstore" ) != None:
-    env.Append( CPPDEFINES=[ "_RECSTORE" ] )
 env.Append( CPPDEFINES=[ "_SCONS" , "MONGO_EXPOSE_MACROS" ] )
 env.Append( CPPPATH=[ "." ] )
 
 
-if GetOption( "safeshell" ) != None:
+if has_option( "safeshell" ):
     env.Append( CPPDEFINES=[ "MONGO_SAFE_SHELL" ] )
 
+if has_option( "durableDefaultOn" ):
+    env.Append( CPPDEFINES=[ "_DURABLEDEFAULTON" ] )
+
 boostCompiler = GetOption( "boostCompiler" )
 if boostCompiler is None:
     boostCompiler = ""
@@ -356,14 +255,14 @@ def addExtraLibs( s ):
         env.Append( LIBPATH=[ x + "/lib64" ] )
         extraLibPlaces.append( x + "/lib" )
 
-if GetOption( "extrapath" ) is not None:
+if has_option( "extrapath" ):
     addExtraLibs( GetOption( "extrapath" ) )
-    release = True
+    release = True # this is so we force using .a
 
-if GetOption( "extrapathdyn" ) is not None:
+if has_option( "extrapathdyn" ):
     addExtraLibs( GetOption( "extrapathdyn" ) )
 
-if GetOption( "extralib" ) is not None:
+if has_option( "extralib" ):
     for x in GetOption( "extralib" ).split( "," ):
         env.Append( LIBS=[ x ] )
 
@@ -399,53 +298,61 @@ installSetup = InstallSetup()
 if distBuild:
     installSetup.bannerDir = "distsrc"
 
-if GetOption( "full" ):
+if has_option( "full" ):
     installSetup.headers = True
     installSetup.libraries = True
 
 
 # ------    SOURCE FILE SETUP -----------
 
-commonFiles = Split( "pch.cpp buildinfo.cpp db/common.cpp db/jsobj.cpp db/json.cpp db/lasterror.cpp db/nonce.cpp db/queryutil.cpp shell/mongo.cpp" )
-commonFiles += [ "util/background.cpp" , "util/mmap.cpp" , "util/ramstore.cpp", "util/sock.cpp" ,  "util/util.cpp" , "util/message.cpp" , 
+commonFiles = Split( "pch.cpp buildinfo.cpp db/common.cpp  db/indexkey.cpp db/jsobj.cpp bson/oid.cpp db/json.cpp db/lasterror.cpp db/nonce.cpp db/queryutil.cpp db/projection.cpp shell/mongo.cpp db/security_key.cpp" )
+commonFiles += [ "util/background.cpp" , "util/mmap.cpp" , "util/sock.cpp" ,  "util/util.cpp" , "util/file_allocator.cpp" , "util/message.cpp" , 
                  "util/assert_util.cpp" , "util/log.cpp" , "util/httpclient.cpp" , "util/md5main.cpp" , "util/base64.cpp", "util/concurrency/vars.cpp", "util/concurrency/task.cpp", "util/debug_util.cpp",
-                 "util/concurrency/thread_pool.cpp", "util/password.cpp", "util/version.cpp", 
-                 "util/histogram.cpp", "util/concurrency/spin_lock.cpp", "util/text.cpp" , "util/stringutils.cpp" , "util/processinfo.cpp" ]
+                 "util/concurrency/thread_pool.cpp", "util/password.cpp", "util/version.cpp", "util/signal_handlers.cpp",  
+                 "util/histogram.cpp", "util/concurrency/spin_lock.cpp", "util/text.cpp" , "util/stringutils.cpp" ,
+                 "util/concurrency/synchronization.cpp" ]
 commonFiles += Glob( "util/*.c" )
-commonFiles += Split( "client/connpool.cpp client/dbclient.cpp client/dbclientcursor.cpp client/model.cpp client/syncclusterconnection.cpp client/distlock.cpp s/shardconnection.cpp" )
+commonFiles += Split( "client/connpool.cpp client/dbclient.cpp client/dbclient_rs.cpp client/dbclientcursor.cpp client/model.cpp client/syncclusterconnection.cpp client/distlock.cpp s/shardconnection.cpp" )
 
 #mmap stuff
 
-if GetOption( "mm" ) != None:
+if has_option( "mm" ):
     commonFiles += [ "util/mmap_mm.cpp" ]
 elif os.sys.platform == "win32":
     commonFiles += [ "util/mmap_win.cpp" ]
 else:
     commonFiles += [ "util/mmap_posix.cpp" ]
 
-if os.path.exists( "util/processinfo_" + os.sys.platform + ".cpp" ):
-    commonFiles += [ "util/processinfo_" + os.sys.platform + ".cpp" ]
-else:
-    commonFiles += [ "util/processinfo_none.cpp" ]
-
 coreDbFiles = [ "db/commands.cpp" ]
 coreServerFiles = [ "util/message_server_port.cpp" , 
                     "client/parallel.cpp" ,  
                     "util/miniwebserver.cpp" , "db/dbwebserver.cpp" , 
-                    "db/matcher.cpp" , "db/indexkey.cpp" , "db/dbcommands_generic.cpp" ]
+                    "db/matcher.cpp" , "db/dbcommands_generic.cpp" ]
+
+processInfoFiles = [ "util/processinfo.cpp" ]
+
+if os.path.exists( "util/processinfo_" + os.sys.platform + ".cpp" ):
+    processInfoFiles += [ "util/processinfo_" + os.sys.platform + ".cpp" ]
+else:
+    processInfoFiles += [ "util/processinfo_none.cpp" ]
+
+coreServerFiles += processInfoFiles
+
+
 
-if GetOption( "asio" ) != None:
+if has_option( "asio" ):
     coreServerFiles += [ "util/message_server_asio.cpp" ]
 
-serverOnlyFiles = Split( "db/query.cpp db/update.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/cursor.cpp db/security_commands.cpp db/security.cpp db/storage.cpp db/queryoptimizer.cpp db/extsort.cpp db/mr.cpp s/d_util.cpp db/cmdline.cpp" )
+serverOnlyFiles = Split( "util/logfile.cpp util/alignedbuilder.cpp db/mongommf.cpp db/dur.cpp db/durop.cpp db/dur_writetodatafiles.cpp db/dur_preplogbuffer.cpp db/dur_commitjob.cpp db/dur_recover.cpp db/dur_journal.cpp db/query.cpp db/update.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/cursor.cpp db/security_commands.cpp db/security.cpp db/queryoptimizer.cpp db/extsort.cpp db/cmdline.cpp" )
 
 serverOnlyFiles += [ "db/index.cpp" ] + Glob( "db/geo/*.cpp" )
 
 serverOnlyFiles += [ "db/dbcommands.cpp" , "db/dbcommands_admin.cpp" ]
+serverOnlyFiles += Glob( "db/commands/*.cpp" )
 coreServerFiles += Glob( "db/stats/*.cpp" )
 serverOnlyFiles += [ "db/driverHelpers.cpp" ]
 
-scriptingFiles = [ "scripting/engine.cpp" , "scripting/utils.cpp" ]
+scriptingFiles = [ "scripting/engine.cpp" , "scripting/utils.cpp" , "scripting/bench.cpp" ]
 
 if usesm:
     scriptingFiles += [ "scripting/engine_spidermonkey.cpp" ]
@@ -457,8 +364,8 @@ else:
 coreServerFiles += scriptingFiles
 
 coreShardFiles = [ "s/config.cpp" , "s/grid.cpp" , "s/chunk.cpp" , "s/shard.cpp" , "s/shardkey.cpp" ]
-shardServerFiles = coreShardFiles + Glob( "s/strategy*.cpp" ) + [ "s/commands_admin.cpp" , "s/commands_public.cpp" , "s/request.cpp" ,  "s/cursors.cpp" ,  "s/server.cpp" , "s/config_migrate.cpp" , "s/s_only.cpp" , "s/stats.cpp" , "s/balance.cpp" , "s/balancer_policy.cpp" , "db/cmdline.cpp" ]
-serverOnlyFiles += coreShardFiles + [ "s/d_logic.cpp" , "s/d_writeback.cpp" , "s/d_migrate.cpp" , "s/d_state.cpp" , "s/d_split.cpp" , "client/distlock_test.cpp" ]
+shardServerFiles = coreShardFiles + Glob( "s/strategy*.cpp" ) + [ "s/commands_admin.cpp" , "s/commands_public.cpp" , "s/request.cpp" , "s/client.cpp" , "s/cursors.cpp" ,  "s/server.cpp" , "s/config_migrate.cpp" , "s/s_only.cpp" , "s/stats.cpp" , "s/balance.cpp" , "s/balancer_policy.cpp" , "db/cmdline.cpp" , "s/writeback_listener.cpp" , "s/shard_version.cpp" ]
+serverOnlyFiles += coreShardFiles + [ "s/d_logic.cpp" , "s/d_writeback.cpp" , "s/d_migrate.cpp" , "s/d_state.cpp" , "s/d_split.cpp" , "client/distlock_test.cpp" , "s/d_chunk_manager.cpp" ]
 
 serverOnlyFiles += [ "db/module.cpp" ] + Glob( "db/modules/*.cpp" )
 
@@ -471,12 +378,20 @@ for x in os.listdir( "db/modules/" ):
     print( "adding module: " + x )
     moduleNames.append( x )
     modRoot = "db/modules/" + x + "/"
-    serverOnlyFiles += Glob( modRoot + "src/*.cpp" )
+
     modBuildFile = modRoot + "build.py"
+    myModule = None
     if os.path.exists( modBuildFile ):
-        modules += [ imp.load_module( "module_" + x , open( modBuildFile , "r" ) , modBuildFile , ( ".py" , "r" , imp.PY_SOURCE  ) ) ]
+        myModule = imp.load_module( "module_" + x , open( modBuildFile , "r" ) , modBuildFile , ( ".py" , "r" , imp.PY_SOURCE  ) )
+        modules.append( myModule )
+        
+    if myModule and "customIncludes" in dir(myModule) and myModule.customIncludes:
+        pass
+    else:
+        serverOnlyFiles += Glob( modRoot + "src/*.cpp" )
 
-allClientFiles = commonFiles + coreDbFiles + [ "client/clientOnly.cpp" , "client/gridfs.cpp" , "s/d_util.cpp" ];
+
+allClientFiles = commonFiles + coreDbFiles + [ "client/clientOnly.cpp" , "client/gridfs.cpp" ];
 
 # ---- other build setup -----
 
@@ -504,7 +419,7 @@ if distBuild:
 def isDriverBuild():
     return GetOption( "prefix" ) and GetOption( "prefix" ).find( "mongo-cxx-driver" ) >= 0
 
-if GetOption( "prefix" ):
+if has_option( "prefix" ):
     installDir = GetOption( "prefix" )
     if isDriverBuild():
         installSetup.justClient()
@@ -533,6 +448,7 @@ if "darwin" == os.sys.platform:
     platform = "osx" # prettier than darwin
 
     if env["CXX"] is None:
+        print( "YO" )
         if os.path.exists( "/usr/bin/g++-4.2" ):
             env["CXX"] = "g++-4.2"
 
@@ -623,8 +539,6 @@ elif "win32" == os.sys.platform:
     else:
         print( "boost found at '" + boostDir + "'" )
 
-    serverOnlyFiles += [ "util/ntservice.cpp" ]
-
     boostLibs = []
 
     env.Append(CPPPATH=[ "js/src/" ])
@@ -651,7 +565,7 @@ elif "win32" == os.sys.platform:
     # some warnings we don't like:
     env.Append( CPPFLAGS=" /wd4355 /wd4800 /wd4267 /wd4244 " )
     
-    env.Append( CPPDEFINES=["WIN32","_CONSOLE","_CRT_SECURE_NO_WARNINGS","HAVE_CONFIG_H","PCRE_STATIC","SUPPORT_UCP","SUPPORT_UTF8,PSAPI_VERSION=1" ] )
+    env.Append( CPPDEFINES=["WIN32","_CONSOLE","_CRT_SECURE_NO_WARNINGS","HAVE_CONFIG_H","PCRE_STATIC","SUPPORT_UCP","SUPPORT_UTF8","PSAPI_VERSION=1" ] )
 
     #env.Append( CPPFLAGS='  /Yu"pch.h" ' ) # this would be for pre-compiled headers, could play with it later
 
@@ -667,14 +581,20 @@ elif "win32" == os.sys.platform:
         env.Append( CPPFLAGS= " /GL " ) 
         env.Append( LINKFLAGS=" /LTCG " )
     else:
-        env.Append( CPPDEFINES=[ "_DEBUG" ] )
+
         # /Od disable optimization
         # /ZI debug info w/edit & continue 
         # /TP it's a c++ file
         # RTC1 /GZ (Enable Stack Frame Run-Time Error Checking)
-        env.Append( CPPFLAGS=" /Od /RTC1 /MDd /Z7 /TP /errorReport:none " )
+        env.Append( CPPFLAGS=" /RTC1 /MDd /Z7 /TP /errorReport:none " )
         env.Append( CPPFLAGS=' /Fd"mongod.pdb" ' )
-        env.Append( LINKFLAGS=" /debug " )
+
+        if debugBuild:
+            env.Append( LINKFLAGS=" /debug " )
+            env.Append( CPPFLAGS=" /Od " )
+            
+        if debugLogging:
+            env.Append( CPPDEFINES=[ "_DEBUG" ] )
 
     if os.path.exists("../readline/lib") :
         env.Append( LIBPATH=["../readline/lib"] )
@@ -744,14 +664,24 @@ else:
     print( "No special config for [" + os.sys.platform + "] which probably means it won't work" )
 
 if nix:
+
+    if has_option( "distcc" ):
+        env["CXX"] = "distcc " + env["CXX"]
+        
     env.Append( CPPFLAGS="-fPIC -fno-strict-aliasing -ggdb -pthread -Wall -Wsign-compare -Wno-unknown-pragmas -Winvalid-pch" )
+    # env.Append( " -Wconversion" ) TODO: this doesn't really work yet
     if linux:
         env.Append( CPPFLAGS=" -Werror " )
+        env.Append( CPPFLAGS=" -fno-builtin-memcmp " ) # glibc's memcmp is faster than gcc's
     env.Append( CXXFLAGS=" -Wnon-virtual-dtor " )
     env.Append( LINKFLAGS=" -fPIC -pthread -rdynamic" )
     env.Append( LIBS=[] )
 
-    if linux and GetOption( "sharedclient" ):
+    #make scons colorgcc friendly
+    env['ENV']['HOME'] = os.environ['HOME']
+    env['ENV']['TERM'] = os.environ['TERM']
+
+    if linux and has_option( "sharedclient" ):
         env.Append( LINKFLAGS=" -Wl,--as-needed -Wl,-zdefs " )
 
     if debugBuild:
@@ -759,6 +689,11 @@ if nix:
         env['ENV']['GLIBCXX_FORCE_NEW'] = 1; # play nice with valgrind
     else:
         env.Append( CPPFLAGS=" -O3" )
+        #env.Append( CPPFLAGS=" -fprofile-generate" )
+        #env.Append( LINKFLAGS=" -fprofile-generate" )
+        # then:
+        #env.Append( CPPFLAGS=" -fprofile-use" )
+        #env.Append( LINKFLAGS=" -fprofile-use" )        
 
     if debugLogging:
         env.Append( CPPFLAGS=" -D_DEBUG" );
@@ -773,10 +708,10 @@ if nix:
         env.Append( CXXFLAGS="-m32" )
         env.Append( LINKFLAGS="-m32" )
 
-    if GetOption( "profile" ) is not None:
+    if has_option( "profile" ):
         env.Append( LIBS=[ "profiler" ] )
 
-    if GetOption( "gdbserver" ) is not None:
+    if has_option( "gdbserver" ):
         env.Append( CPPDEFINES=["USE_GDBSERVER"] )
 
     # pre-compiled headers
@@ -940,8 +875,12 @@ def doConfigure( myenv , needPcre=True , shell=False ):
     removeIfInList( myenv["LIBS"] , "wpcap" )
 
     for m in modules:
-        m.configure( conf , myenv )
+        if "customIncludes" in dir(m) and m.customIncludes:
+            m.configure( conf , myenv , serverOnlyFiles )
+        else:
+            m.configure( conf , myenv )
 
+    # XP_* is for spidermonkey.
     # this is outside of usesm block so don't have to rebuild for java
     if windows:
         myenv.Append( CPPDEFINES=[ "XP_WIN" ] )
@@ -1018,14 +957,14 @@ def doConfigure( myenv , needPcre=True , shell=False ):
 
     # Handle staticlib,staticlibpath options.
     staticlibfiles = []
-    if GetOption( "staticlib" ) is not None:
+    if has_option( "staticlib" ):
         # FIXME: probably this loop ought to do something clever
         # depending on whether we want to use 32bit or 64bit
         # libraries.  For now, we sort of rely on the user supplying a
         # sensible staticlibpath option. (myCheckLib implements an
         # analogous search, but it also does other things I don't
         # understand, so I'm not using it.)
-        if GetOption ( "staticlibpath" ) is not None:
+        if has_option ( "staticlibpath" ):
             dirs = GetOption ( "staticlibpath" ).split( "," )
         else:
             dirs = [ "/usr/lib64", "/usr/lib" ]
@@ -1042,6 +981,28 @@ def doConfigure( myenv , needPcre=True , shell=False ):
             if not found:
                 raise "can't find a static %s" % l
 
+    # 'tcmalloc' needs to be the last library linked. Please, add new libraries before this 
+    # point.
+    if has_option( "heapcheck" ) and not shell:
+        if ( not debugBuild ) and ( not debugLogging ):
+            print( "--heapcheck needs --d or --dd" )
+            Exit( 1 )
+
+        if not conf.CheckCXXHeader( "google/heap-checker.h" ):
+            print( "--heapcheck neads header 'google/heap-checker.h'" )
+            Exit( 1 )
+
+        myCheckLib( "tcmalloc" , True );  # if successful, appedded 'tcmalloc' to myenv[ LIBS ]
+        myenv.Append( CPPDEFINES=[ "HEAP_CHECKING" ] )
+        myenv.Append( CPPFLAGS="-fno-omit-frame-pointer" )
+
+    # FIXME doConfigure() is being called twice, in the case of the shell. So if it is called 
+    # with shell==True, it'd be on its second call and it would need to rearrange the libraries'
+    # order. The following removes tcmalloc from the LIB's list and reinserts it at the end.
+    if has_option( "heapcheck" ) and shell:
+        removeIfInList( myenv["LIBS"] , "tcmalloc" )
+        myenv.Append( LIBS="tcmalloc" )
+
     myenv.Append(LINKCOM=" $STATICFILES")
     myenv.Append(STATICFILES=staticlibfiles)
 
@@ -1049,90 +1010,60 @@ def doConfigure( myenv , needPcre=True , shell=False ):
 
 env = doConfigure( env )
 
-# --- js concat ---
 
-def concatjs(target, source, env):
+# --- jsh ---
+
+def jsToH(target, source, env):
 
     outFile = str( target[0] )
 
-    fullSource = ""
+    h =  ['#include "bson/stringdata.h"'
+         ,'namespace mongo {'
+         ,'struct JSFile{ const char* name; const StringData& source; };'
+         ,'namespace JSFiles{'
+         ]
 
-    first = True
+    def cppEscape(s):
+        s = s.strip()
+        s = s.replace( '\\' , '\\\\' )
+        s = s.replace( '"' , r'\"' )
+        return s
 
     for s in source:
-        f = open( str(s) , 'r' )
-        for l in f:
-
-            #strip comments. special case if // is potentially in a string
-            parts = l.split("//", 1)
-            if (len(parts) > 1) and ('"' not in parts[1]) and ('"' not in parts[1]):
-                l = parts[0]
+        filename = str(s)
+        objname = os.path.split(filename)[1].split('.')[0]
+        stringname = '_jscode_raw_' + objname
 
-            l = l.strip()
-            if len ( l ) == 0:
-                continue
-            
-            if l == "}":
-                fullSource += "}"
-                continue
+        h.append('const StringData ' + stringname + " = ")
 
-            if first:
-                first = False
-            else:
-                fullSource += "\n"
+        for l in open( filename , 'r' ):
+            h.append( '"' + cppEscape(l) + r'\n" ' )
 
-            fullSource += l
+        h.append(";")
+        h.append('extern const JSFile %s;'%objname) #symbols aren't exported w/o this
+        h.append('const JSFile %s = { "%s" , %s };'%(objname, filename.replace('\\', '/'), stringname))
 
-    fullSource += "\n"
-    
-    fullSource = re.compile( r'/\*\*.*?\*/' , re.M | re.S ).sub( "" , fullSource )
-
-    out = open( outFile , 'w' )
-    out.write( fullSource )
-
-    return None
-
-jsBuilder = Builder(action = concatjs,
-                    suffix = '.jsall',
-                    src_suffix = '.js')
+    h.append("} // namespace JSFiles")
+    h.append("} // namespace mongo")
+    h.append("")
 
-env.Append( BUILDERS={'JSConcat' : jsBuilder})
+    text = '\n'.join(h);
 
-# --- jsh ---
-
-def jsToH(target, source, env):
-
-    outFile = str( target[0] )
-    if len( source ) != 1:
-        raise Exception( "wrong" )
-
-    h = "const char * jsconcatcode" + outFile.split( "mongo" )[-1].replace( "-" , "_").split( ".cpp")[0] + " = \n"
-
-    for l in open( str(source[0]) , 'r' ):
-        l = l.strip()
-        l = l.replace( '\\' , "\\\\" )
-        l = l.replace( '"' , "\\\"" )
-
-
-        h += '"' + l + "\\n\"\n "
-
-    h += ";\n\n"
-
-    out = open( outFile , 'w' )
-    out.write( h )
+    out = open( outFile , 'wb' )
+    out.write( text )
     out.close()
 
     # mongo_vstudio.cpp is in git as the .vcproj doesn't generate this file.
     if outFile.find( "mongo.cpp" ) >= 0:
-        out = open( outFile.replace( "mongo" , "mongo_vstudio" ) , 'w' )
-        out.write( h )
+        out = open( outFile.replace( "mongo" , "mongo_vstudio" ) , 'wb' )
+        out.write( text )
         out.close()
 
     return None
 
 jshBuilder = Builder(action = jsToH,
                     suffix = '.cpp',
-                    src_suffix = '.jsall')
+                    src_suffix = '.js')
 
 env.Append( BUILDERS={'JSHeader' : jshBuilder})
 
@@ -1143,7 +1074,7 @@ clientEnv = env.Clone();
 clientEnv.Append( CPPPATH=["../"] )
 clientEnv.Prepend( LIBS=[ "mongoclient"] )
 clientEnv.Prepend( LIBPATH=["."] )
-#clientEnv["CPPDEFINES"].remove( "MONGO_EXPOSE_MACROS" )
+clientEnv["CPPDEFINES"].remove( "MONGO_EXPOSE_MACROS" )
 l = clientEnv[ "LIBS" ]
 removeIfInList( l , "pcre" )
 removeIfInList( l , "pcrecpp" )
@@ -1164,7 +1095,10 @@ def checkErrorCodes():
 checkErrorCodes()
 
 # main db target
-mongod = env.Program( "mongod" , commonFiles + coreDbFiles + coreServerFiles + serverOnlyFiles + [ "db/db.cpp" ] )
+mongodOnlyFiles = [ "db/db.cpp" ]
+if windows:
+    mongodOnlyFiles.append( "util/ntservice.cpp" ) 
+mongod = env.Program( "mongod" , commonFiles + coreDbFiles + coreServerFiles + serverOnlyFiles + mongodOnlyFiles )
 Default( mongod )
 
 # tools
@@ -1183,7 +1117,7 @@ mongos = env.Program( "mongos" , commonFiles + coreDbFiles + coreServerFiles + s
 
 # c++ library
 clientLibName = str( env.Library( "mongoclient" , allClientFiles )[0] )
-if GetOption( "sharedclient" ):
+if has_option( "sharedclient" ):
     sharedClientLibName = str( env.SharedLibrary( "mongoclient" , allClientFiles )[0] )
 env.Library( "mongotestfiles" , commonFiles + coreDbFiles + coreServerFiles + serverOnlyFiles + ["client/gridfs.cpp"])
 env.Library( "mongoshellfiles" , allClientFiles + coreServerFiles )
@@ -1192,11 +1126,12 @@ clientTests = []
 
 # examples
 clientTests += [ clientEnv.Program( "firstExample" , [ "client/examples/first.cpp" ] ) ]
+clientTests += [ clientEnv.Program( "rsExample" , [ "client/examples/rs.cpp" ] ) ]
 clientTests += [ clientEnv.Program( "secondExample" , [ "client/examples/second.cpp" ] ) ]
 clientTests += [ clientEnv.Program( "whereExample" , [ "client/examples/whereExample.cpp" ] ) ]
 clientTests += [ clientEnv.Program( "authTest" , [ "client/examples/authTest.cpp" ] ) ]
 clientTests += [ clientEnv.Program( "httpClientTest" , [ "client/examples/httpClientTest.cpp" ] ) ]
-# clientTests += [ clientEnv.Program( "bsondemo" , [ "bson/bsondemo/bsondemo.cpp" ] ) ] #TODO
+clientTests += [ clientEnv.Program( "bsondemo" , [ "bson/bsondemo/bsondemo.cpp" ] ) ]
 
 # testing
 test = testEnv.Program( "test" , Glob( "dbtests/*.cpp" ) )
@@ -1210,6 +1145,7 @@ mongosniff_built = False
 if darwin or clientEnv["_HAVEPCAP"]:
     mongosniff_built = True
     sniffEnv = clientEnv.Clone()
+    sniffEnv.Append( CPPDEFINES="MONGO_EXPOSE_MACROS" )
     if not windows:
         sniffEnv.Append( LIBS=[ "pcap" ] )
     else:
@@ -1218,11 +1154,9 @@ if darwin or clientEnv["_HAVEPCAP"]:
 
 # --- shell ---
 
-env.JSConcat( "shell/mongo.jsall"  , ["shell/utils.js","shell/db.js","shell/mongo.js","shell/mr.js","shell/query.js","shell/collection.js"] )
-env.JSHeader( "shell/mongo.jsall" )
+env.JSHeader( "shell/mongo.cpp"  , ["shell/utils.js","shell/db.js","shell/mongo.js","shell/mr.js","shell/query.js","shell/collection.js"] )
 
-env.JSConcat( "shell/mongo-server.jsall"  , [ "shell/servers.js"] )
-env.JSHeader( "shell/mongo-server.jsall" )
+env.JSHeader( "shell/mongo-server.cpp"  , [ "shell/servers.js"] )
 
 shellEnv = env.Clone();
 
@@ -1261,8 +1195,9 @@ elif not onlyServer:
             shell32BitFiles.append( "32bit/" + str( f ) )
         for f in scriptingFiles:
             shell32BitFiles.append( "32bit/" + str( f ) )
-        shellEnv.VariantDir( "32bit" , "." )
-        shellEnv.Append( CPPPATH=["32bit/"] )
+        for f in processInfoFiles:
+            shell32BitFiles.append( "32bit/" + str( f ) )
+        shellEnv.VariantDir( "32bit" , "." , duplicate=1 )
     else:
         shellEnv.Prepend( LIBPATH=[ "." ] )
 
@@ -1289,7 +1224,7 @@ smokeFlags = []
 
 # Ugh.  Frobbing the smokeFlags must precede using them to construct
 # actions, I think.
-if GetOption( 'smokedbprefix') is not None:
+if has_option( 'smokedbprefix'):
     smokeFlags += ['--smoke-db-prefix', GetOption( 'smokedbprefix')]
 
 if 'startMongodSmallOplog' in COMMAND_LINE_TARGETS:
@@ -1302,7 +1237,15 @@ def addTest(name, deps, actions):
     smokeEnv.SideEffect( "dummySmokeSideEffect", name )
 
 def addSmoketest( name, deps ):
-    addTest(name, deps, [ "python buildscripts/smoke.py " + " ".join(smokeFlags) + ' ' + name ])
+    # Convert from smoke to test, smokeJs to js, and foo to foo
+    target = name
+    if name.startswith("smoke"):
+        if name == "smoke":
+            target = "test"
+        else:
+            target = name[5].lower() + name[6:]
+
+    addTest(name, deps, [ "python buildscripts/smoke.py " + " ".join(smokeFlags) + ' ' + target ])
 
 addSmoketest( "smoke", [ add_exe( "test" ) ] )
 addSmoketest( "smokePerf", [ "perftest" ]  )
@@ -1315,15 +1258,16 @@ if not onlyServer and not noshell:
     addSmoketest( "smokeClone", [ "mongo", "mongod" ] )
     addSmoketest( "smokeRepl", [ "mongo", "mongod", "mongobridge" ] )
     addSmoketest( "smokeReplSets", [ "mongo", "mongod", "mongobridge" ] )
-    addSmoketest( "smokeDisk", [ add_exe( "mongo" ), add_exe( "mongod" ) ] )
+    addSmoketest( "smokeDur", [ add_exe( "mongo" ) , add_exe( "mongod" ) ] )
+    addSmoketest( "smokeDisk", [ add_exe( "mongo" ), add_exe( "mongod" ), add_exe( "mongodump" ), add_exe( "mongorestore" ) ] )
     addSmoketest( "smokeAuth", [ add_exe( "mongo" ), add_exe( "mongod" ) ] )
     addSmoketest( "smokeParallel", [ add_exe( "mongo" ), add_exe( "mongod" ) ] )
     addSmoketest( "smokeSharding", [ "mongo", "mongod", "mongos" ] )
     addSmoketest( "smokeJsPerf", [ "mongo" ] )
-    addSmoketest("smokeJsSlowNightly", [add_exe("mongo")])
-    addSmoketest("smokeJsSlowWeekly", [add_exe("mongo")])
+    addSmoketest( "smokeJsSlowNightly", [add_exe("mongo")])
+    addSmoketest( "smokeJsSlowWeekly", [add_exe("mongo")])
     addSmoketest( "smokeQuota", [ "mongo" ] )
-    addSmoketest( "smokeTool", [ add_exe( "mongo" ) ] )
+    addSmoketest( "smokeTool", [ add_exe( "mongo" ), add_exe("mongod"), "tools" ] )
 
 # Note: although the test running logic has been moved to
 # buildscripts/smoke.py, the interface to running the tests has been
@@ -1408,13 +1352,38 @@ def build_docs(env, target, source):
 env.Alias("docs", [], [build_docs])
 env.AlwaysBuild("docs")
 
+#  ---- astyle ----
+
+def doStyling( env , target , source ):
+    
+    res = utils.execsys( "astyle --version" )
+    res = " ".join(res)
+    if res.count( "2." ) == 0:
+        print( "astyle 2.x needed, found:" + res )
+        Exit(-1)
+
+    files = utils.getAllSourceFiles() 
+    files = filter( lambda x: not x.endswith( ".c" ) , files )
+    files.remove( "./shell/mongo_vstudio.cpp" )
+
+    cmd = "astyle --options=mongo_astyle " + " ".join( files )
+    res = utils.execsys( cmd )
+    print( res[0] )
+    print( res[1] )
+
+
+env.Alias( "style" , [] , [ doStyling ] )
+env.AlwaysBuild( "style" )
+
+
+
 #  ----  INSTALL -------
 
 def getSystemInstallName():
     n = platform + "-" + processor
     if static:
         n += "-static"
-    if GetOption("nostrip"):
+    if has_option("nostrip"):
         n += "-debugsymbols"
     if nix and os.uname()[2].startswith( "8." ):
         n += "-tiger"
@@ -1423,6 +1392,7 @@ def getSystemInstallName():
         n += "-" + "-".join( moduleNames )
 
     try:
+        findSettingsSetup()
         import settings
         if "distmod" in dir( settings ):
             n = n + "-" + str( settings.distmod )
@@ -1503,7 +1473,7 @@ def installBinary( e , name ):
     fullInstallName = installDir + "/bin/" + name
 
     allBinaries += [ name ]
-    if (solaris or linux) and (not GetOption("nostrip")):
+    if (solaris or linux) and (not has_option("nostrip")):
         e.AddPostAction( inst, e.Action( 'strip ' + fullInstallName ) )
 
     if linux and len( COMMAND_LINE_TARGETS ) == 1 and str( COMMAND_LINE_TARGETS[0] ) == "s3dist":
@@ -1542,7 +1512,7 @@ if installSetup.clientSrc:
 #lib
 if installSetup.libraries:
     env.Install( installDir + "/" + nixLibPrefix, clientLibName )
-    if GetOption( "sharedclient" ): 
+    if has_option( "sharedclient" ): 
         env.Install( installDir + "/" + nixLibPrefix, sharedClientLibName )
 
 
@@ -1569,7 +1539,7 @@ if installSetup.clientTestsDir:
 env.Alias( "install" , installDir )
 
 # aliases
-env.Alias( "mongoclient" , GetOption( "sharedclient" ) and sharedClientLibName or clientLibName )
+env.Alias( "mongoclient" , has_option( "sharedclient" ) and sharedClientLibName or clientLibName )
 
 
 #  ---- CONVENIENCE ----
@@ -1605,9 +1575,7 @@ def s3push( localName , remoteName=None , remotePrefix=None , fixName=True , pla
         else:
             remotePrefix = "-" + distName
 
-    sys.path.append( "." )
-    sys.path.append( ".." )
-    sys.path.append( "../../" )
+    findSettingsSetup()
 
     import simples3
     import settings
@@ -1676,7 +1644,7 @@ def build_and_test_client(env, target, source):
     call(scons_command + ["libmongoclient.a", "clientTests"], cwd=installDir)
 
     return bool(call(["python", "buildscripts/smoke.py",
-                      "--test-path", installDir, "smokeClient"]))
+                      "--test-path", installDir, "client"]))
 env.Alias("clientBuild", [mongod, installDir], [build_and_test_client])
 env.AlwaysBuild("clientBuild")
 
diff --git a/bson/bson-inl.h b/bson/bson-inl.h
new file mode 100644
index 0000000..5b4c490
--- /dev/null
+++ b/bson/bson-inl.h
@@ -0,0 +1,665 @@
+// bsoninlines.h
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+#include <map>
+#include "util/atomic_int.h"
+#include "util/misc.h"
+#include "../util/hex.h"
+
+namespace mongo {
+
+    inline BSONObjIterator BSONObj::begin() {
+        return BSONObjIterator(*this);
+    }
+
+    inline BSONObj BSONElement::embeddedObjectUserCheck() const {
+        if ( isABSONObj() )
+            return BSONObj(value());
+        stringstream ss;
+        ss << "invalid parameter: expected an object (" << fieldName() << ")";
+        uasserted( 10065 , ss.str() );
+        return BSONObj(); // never reachable
+    }
+
+    inline BSONObj BSONElement::embeddedObject() const {
+        assert( isABSONObj() );
+        return BSONObj(value());
+    }
+
+    inline BSONObj BSONElement::codeWScopeObject() const {
+        assert( type() == CodeWScope );
+        int strSizeWNull = *(int *)( value() + 4 );
+        return BSONObj( value() + 4 + 4 + strSizeWNull );
+    }
+
+    inline NOINLINE_DECL void BSONObj::_assertInvalid() const {
+        StringBuilder ss;
+        int os = objsize();
+        ss << "Invalid BSONObj size: " << os << " (0x" << toHex( &os, 4 ) << ')';
+        try {
+            BSONElement e = firstElement();
+            ss << " first element: " << e.toString();
+        }
+        catch ( ... ) { }
+        massert( 10334 , ss.str() , 0 );
+    }
+
+    /* the idea with NOINLINE_DECL here is to keep this from inlining in the
+       getOwned() method.  the presumption being that is better.
+    */
+    inline NOINLINE_DECL BSONObj BSONObj::copy() const {
+        char *p = (char*) malloc(objsize());
+        memcpy(p, objdata(), objsize());
+        return BSONObj(p, true);
+    }
+
+    inline BSONObj BSONObj::getOwned() const {
+        if ( isOwned() )
+            return *this;
+        return copy();
+    }
+
+    // wrap this element up as a singleton object.
+    inline BSONObj BSONElement::wrap() const {
+        BSONObjBuilder b(size()+6);
+        b.append(*this);
+        return b.obj();
+    }
+
+    inline BSONObj BSONElement::wrap( const char * newName ) const {
+        BSONObjBuilder b(size()+6+(int)strlen(newName));
+        b.appendAs(*this,newName);
+        return b.obj();
+    }
+
+    inline bool BSONObj::hasElement(const char *name) const {
+        if ( !isEmpty() ) {
+            BSONObjIterator it(*this);
+            while ( it.moreWithEOO() ) {
+                BSONElement e = it.next();
+                if ( strcmp(name, e.fieldName()) == 0 )
+                    return true;
+            }
+        }
+        return false;
+    }
+
+    inline BSONElement BSONObj::getField(const StringData& name) const {
+        BSONObjIterator i(*this);
+        while ( i.more() ) {
+            BSONElement e = i.next();
+            if ( strcmp(e.fieldName(), name.data()) == 0 )
+                return e;
+        }
+        return BSONElement();
+    }
+
+    /* add all the fields from the object specified to this object */
+    inline BSONObjBuilder& BSONObjBuilder::appendElements(BSONObj x) {
+        BSONObjIterator it(x);
+        while ( it.moreWithEOO() ) {
+            BSONElement e = it.next();
+            if ( e.eoo() ) break;
+            append(e);
+        }
+        return *this;
+    }
+
+    /* add all the fields from the object specified to this object if they don't exist */
+    inline BSONObjBuilder& BSONObjBuilder::appendElementsUnique(BSONObj x) {
+        set<string> have;
+        {
+            BSONObjIterator i = iterator();
+            while ( i.more() )
+                have.insert( i.next().fieldName() );
+        }
+        
+        BSONObjIterator it(x);
+        while ( it.more() ) {
+            BSONElement e = it.next();
+            if ( have.count( e.fieldName() ) )
+                continue;
+            append(e);
+        }
+        return *this;
+    }
+
+
+    inline bool BSONObj::isValid() {
+        int x = objsize();
+        return x > 0 && x <= BSONObjMaxInternalSize;
+    }
+
+    inline bool BSONObj::getObjectID(BSONElement& e) const {
+        BSONElement f = getField("_id");
+        if( !f.eoo() ) {
+            e = f;
+            return true;
+        }
+        return false;
+    }
+
+    inline BSONObjBuilderValueStream::BSONObjBuilderValueStream( BSONObjBuilder * builder ) {
+        _fieldName = 0;
+        _builder = builder;
+    }
+
+    template<class T>
+    inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<( T value ) {
+        _builder->append(_fieldName, value);
+        _fieldName = 0;
+        return *_builder;
+    }
+
+    inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<( const BSONElement& e ) {
+        _builder->appendAs( e , _fieldName );
+        _fieldName = 0;
+        return *_builder;
+    }
+
+    inline Labeler BSONObjBuilderValueStream::operator<<( const Labeler::Label &l ) {
+        return Labeler( l, this );
+    }
+
+    inline void BSONObjBuilderValueStream::endField( const char *nextFieldName ) {
+        if ( _fieldName && haveSubobj() ) {
+            _builder->append( _fieldName, subobj()->done() );
+        }
+        _subobj.reset();
+        _fieldName = nextFieldName;
+    }
+
+    inline BSONObjBuilder *BSONObjBuilderValueStream::subobj() {
+        if ( !haveSubobj() )
+            _subobj.reset( new BSONObjBuilder() );
+        return _subobj.get();
+    }
+
+    template<class T> inline
+    BSONObjBuilder& Labeler::operator<<( T value ) {
+        s_->subobj()->append( l_.l_, value );
+        return *s_->_builder;
+    }
+
+    inline
+    BSONObjBuilder& Labeler::operator<<( const BSONElement& e ) {
+        s_->subobj()->appendAs( e, l_.l_ );
+        return *s_->_builder;
+    }
+
+    // {a: {b:1}} -> {a.b:1}
+    void nested2dotted(BSONObjBuilder& b, const BSONObj& obj, const string& base="");
+    inline BSONObj nested2dotted(const BSONObj& obj) {
+        BSONObjBuilder b;
+        nested2dotted(b, obj);
+        return b.obj();
+    }
+
+    // {a.b:1} -> {a: {b:1}}
+    void dotted2nested(BSONObjBuilder& b, const BSONObj& obj);
+    inline BSONObj dotted2nested(const BSONObj& obj) {
+        BSONObjBuilder b;
+        dotted2nested(b, obj);
+        return b.obj();
+    }
+
+    inline BSONObjIterator BSONObjBuilder::iterator() const {
+        const char * s = _b.buf() + _offset;
+        const char * e = _b.buf() + _b.len();
+        return BSONObjIterator( s , e );
+    }
+
+    inline bool BSONObjBuilder::hasField( const StringData& name ) const {
+        BSONObjIterator i = iterator();
+        while ( i.more() )
+            if ( strcmp( name.data() , i.next().fieldName() ) == 0 )
+                return true;
+        return false;
+    }
+
+    /* WARNING: nested/dotted conversions are not 100% reversible
+     * nested2dotted(dotted2nested({a.b: {c:1}})) -> {a.b.c: 1}
+     * also, dotted2nested ignores order
+     */
+
+    typedef map<string, BSONElement> BSONMap;
+    inline BSONMap bson2map(const BSONObj& obj) {
+        BSONMap m;
+        BSONObjIterator it(obj);
+        while (it.more()) {
+            BSONElement e = it.next();
+            m[e.fieldName()] = e;
+        }
+        return m;
+    }
+
+    struct BSONElementFieldNameCmp {
+        bool operator()( const BSONElement &l, const BSONElement &r ) const {
+            return strcmp( l.fieldName() , r.fieldName() ) <= 0;
+        }
+    };
+
+    typedef set<BSONElement, BSONElementFieldNameCmp> BSONSortedElements;
+    inline BSONSortedElements bson2set( const BSONObj& obj ) {
+        BSONSortedElements s;
+        BSONObjIterator it(obj);
+        while ( it.more() )
+            s.insert( it.next() );
+        return s;
+    }
+
+    inline string BSONObj::toString( bool isArray, bool full ) const {
+        if ( isEmpty() ) return "{}";
+        StringBuilder s;
+        toString(s, isArray, full);
+        return s.str();
+    }
+    inline void BSONObj::toString(StringBuilder& s,  bool isArray, bool full ) const {
+        if ( isEmpty() ) {
+            s << "{}";
+            return;
+        }
+
+        s << ( isArray ? "[ " : "{ " );
+        BSONObjIterator i(*this);
+        bool first = true;
+        while ( 1 ) {
+            massert( 10327 ,  "Object does not end with EOO", i.moreWithEOO() );
+            BSONElement e = i.next( true );
+            massert( 10328 ,  "Invalid element size", e.size() > 0 );
+            massert( 10329 ,  "Element too large", e.size() < ( 1 << 30 ) );
+            int offset = (int) (e.rawdata() - this->objdata());
+            massert( 10330 ,  "Element extends past end of object",
+                     e.size() + offset <= this->objsize() );
+            e.validate();
+            bool end = ( e.size() + offset == this->objsize() );
+            if ( e.eoo() ) {
+                massert( 10331 ,  "EOO Before end of object", end );
+                break;
+            }
+            if ( first )
+                first = false;
+            else
+                s << ", ";
+            e.toString(s, !isArray, full );
+        }
+        s << ( isArray ? " ]" : " }" );
+    }
+
+    extern unsigned getRandomNumber();
+
+    inline void BSONElement::validate() const {
+        const BSONType t = type();
+
+        switch( t ) {
+        case DBRef:
+        case Code:
+        case Symbol:
+        case mongo::String: {
+            unsigned x = (unsigned) valuestrsize();
+            bool lenOk = x > 0 && x < (unsigned) BSONObjMaxInternalSize;
+            if( lenOk && valuestr()[x-1] == 0 )
+                return;
+            StringBuilder buf;
+            buf <<  "Invalid dbref/code/string/symbol size: " << x;
+            if( lenOk )
+                buf << " strnlen:" << mongo::strnlen( valuestr() , x );
+            msgasserted( 10321 , buf.str() );
+            break;
+        }
+        case CodeWScope: {
+            int totalSize = *( int * )( value() );
+            massert( 10322 ,  "Invalid CodeWScope size", totalSize >= 8 );
+            int strSizeWNull = *( int * )( value() + 4 );
+            massert( 10323 ,  "Invalid CodeWScope string size", totalSize >= strSizeWNull + 4 + 4 );
+            massert( 10324 ,  "Invalid CodeWScope string size",
+                     strSizeWNull > 0 &&
+                     (strSizeWNull - 1) == mongo::strnlen( codeWScopeCode(), strSizeWNull ) );
+            massert( 10325 ,  "Invalid CodeWScope size", totalSize >= strSizeWNull + 4 + 4 + 4 );
+            int objSize = *( int * )( value() + 4 + 4 + strSizeWNull );
+            massert( 10326 ,  "Invalid CodeWScope object size", totalSize == 4 + 4 + strSizeWNull + objSize );
+            // Subobject validation handled elsewhere.
+        }
+        case Object:
+            // We expect Object size validation to be handled elsewhere.
+        default:
+            break;
+        }
+    }
+
+    inline int BSONElement::size( int maxLen ) const {
+        if ( totalSize >= 0 )
+            return totalSize;
+
+        int remain = maxLen - fieldNameSize() - 1;
+
+        int x = 0;
+        switch ( type() ) {
+        case EOO:
+        case Undefined:
+        case jstNULL:
+        case MaxKey:
+        case MinKey:
+            break;
+        case mongo::Bool:
+            x = 1;
+            break;
+        case NumberInt:
+            x = 4;
+            break;
+        case Timestamp:
+        case mongo::Date:
+        case NumberDouble:
+        case NumberLong:
+            x = 8;
+            break;
+        case jstOID:
+            x = 12;
+            break;
+        case Symbol:
+        case Code:
+        case mongo::String:
+            massert( 10313 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
+            x = valuestrsize() + 4;
+            break;
+        case CodeWScope:
+            massert( 10314 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
+            x = objsize();
+            break;
+
+        case DBRef:
+            massert( 10315 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
+            x = valuestrsize() + 4 + 12;
+            break;
+        case Object:
+        case mongo::Array:
+            massert( 10316 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
+            x = objsize();
+            break;
+        case BinData:
+            massert( 10317 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
+            x = valuestrsize() + 4 + 1/*subtype*/;
+            break;
+        case RegEx: {
+            const char *p = value();
+            size_t len1 = ( maxLen == -1 ) ? strlen( p ) : mongo::strnlen( p, remain );
+            //massert( 10318 ,  "Invalid regex string", len1 != -1 ); // ERH - 4/28/10 - don't think this does anything
+            p = p + len1 + 1;
+            size_t len2;
+            if( maxLen == -1 )
+                len2 = strlen( p );
+            else {
+                size_t x = remain - len1 - 1;
+                assert( x <= 0x7fffffff );
+                len2 = mongo::strnlen( p, (int) x );
+            }
+            //massert( 10319 ,  "Invalid regex options string", len2 != -1 ); // ERH - 4/28/10 - don't think this does anything
+            x = (int) (len1 + 1 + len2 + 1);
+        }
+        break;
+        default: {
+            StringBuilder ss;
+            ss << "BSONElement: bad type " << (int) type();
+            string msg = ss.str();
+            massert( 10320 , msg.c_str(),false);
+        }
+        }
+        totalSize =  x + fieldNameSize() + 1; // BSONType
+
+        return totalSize;
+    }
+
+    inline string BSONElement::toString( bool includeFieldName, bool full ) const {
+        StringBuilder s;
+        toString(s, includeFieldName, full);
+        return s.str();
+    }
+    inline void BSONElement::toString(StringBuilder& s, bool includeFieldName, bool full ) const {
+        if ( includeFieldName && type() != EOO )
+            s << fieldName() << ": ";
+        switch ( type() ) {
+        case EOO:
+            s << "EOO";
+            break;
+        case mongo::Date:
+            s << "new Date(" << date() << ')';
+            break;
+        case RegEx: {
+            s << "/" << regex() << '/';
+            const char *p = regexFlags();
+            if ( p ) s << p;
+        }
+        break;
+        case NumberDouble:
+            s.appendDoubleNice( number() );
+            break;
+        case NumberLong:
+            s << _numberLong();
+            break;
+        case NumberInt:
+            s << _numberInt();
+            break;
+        case mongo::Bool:
+            s << ( boolean() ? "true" : "false" );
+            break;
+        case Object:
+            embeddedObject().toString(s, false, full);
+            break;
+        case mongo::Array:
+            embeddedObject().toString(s, true, full);
+            break;
+        case Undefined:
+            s << "undefined";
+            break;
+        case jstNULL:
+            s << "null";
+            break;
+        case MaxKey:
+            s << "MaxKey";
+            break;
+        case MinKey:
+            s << "MinKey";
+            break;
+        case CodeWScope:
+            s << "CodeWScope( "
+              << codeWScopeCode() << ", " << codeWScopeObject().toString(false, full) << ")";
+            break;
+        case Code:
+            if ( !full &&  valuestrsize() > 80 ) {
+                s.write(valuestr(), 70);
+                s << "...";
+            }
+            else {
+                s.write(valuestr(), valuestrsize()-1);
+            }
+            break;
+        case Symbol:
+        case mongo::String:
+            s << '"';
+            if ( !full &&  valuestrsize() > 80 ) {
+                s.write(valuestr(), 70);
+                s << "...\"";
+            }
+            else {
+                s.write(valuestr(), valuestrsize()-1);
+                s << '"';
+            }
+            break;
+        case DBRef:
+            s << "DBRef('" << valuestr() << "',";
+            {
+                mongo::OID *x = (mongo::OID *) (valuestr() + valuestrsize());
+                s << *x << ')';
+            }
+            break;
+        case jstOID:
+            s << "ObjectId('";
+            s << __oid() << "')";
+            break;
+        case BinData:
+            s << "BinData";
+            if (full) {
+                int len;
+                const char* data = binDataClean(len);
+                s << '(' << binDataType() << ", " << toHex(data, len) << ')';
+            }
+            break;
+        case Timestamp:
+            s << "Timestamp " << timestampTime() << "|" << timestampInc();
+            break;
+        default:
+            s << "?type=" << type();
+            break;
+        }
+    }
+
+    /* return has eoo() true if no match
+       supports "." notation to reach into embedded objects
+    */
+    inline BSONElement BSONObj::getFieldDotted(const char *name) const {
+        BSONElement e = getField( name );
+        if ( e.eoo() ) {
+            const char *p = strchr(name, '.');
+            if ( p ) {
+                string left(name, p-name);
+                BSONObj sub = getObjectField(left.c_str());
+                return sub.isEmpty() ? BSONElement() : sub.getFieldDotted(p+1);
+            }
+        }
+
+        return e;
+    }
+
+    inline BSONObj BSONObj::getObjectField(const char *name) const {
+        BSONElement e = getField(name);
+        BSONType t = e.type();
+        return t == Object || t == Array ? e.embeddedObject() : BSONObj();
+    }
+
+    inline int BSONObj::nFields() const {
+        int n = 0;
+        BSONObjIterator i(*this);
+        while ( i.moreWithEOO() ) {
+            BSONElement e = i.next();
+            if ( e.eoo() )
+                break;
+            n++;
+        }
+        return n;
+    }
+
+    inline BSONObj::BSONObj() {
+        /* little endian ordering here, but perhaps that is ok regardless as BSON is spec'd
+           to be little endian external to the system. (i.e. the rest of the implementation of bson,
+           not this part, fails to support big endian)
+        */
+        static char p[] = { /*size*/5, 0, 0, 0, /*eoo*/0 };
+        _objdata = p;
+    }
+
+    inline BSONObj BSONElement::Obj() const { return embeddedObjectUserCheck(); }
+
+    inline BSONElement BSONElement::operator[] (const string& field) const {
+        BSONObj o = Obj();
+        return o[field];
+    }
+
+    inline void BSONObj::elems(vector<BSONElement> &v) const {
+        BSONObjIterator i(*this);
+        while( i.more() )
+            v.push_back(i.next());
+    }
+
+    inline void BSONObj::elems(list<BSONElement> &v) const {
+        BSONObjIterator i(*this);
+        while( i.more() )
+            v.push_back(i.next());
+    }
+
+    template <class T>
+    void BSONObj::Vals(vector<T>& v) const {
+        BSONObjIterator i(*this);
+        while( i.more() ) {
+            T t;
+            i.next().Val(t);
+            v.push_back(t);
+        }
+    }
+    template <class T>
+    void BSONObj::Vals(list<T>& v) const {
+        BSONObjIterator i(*this);
+        while( i.more() ) {
+            T t;
+            i.next().Val(t);
+            v.push_back(t);
+        }
+    }
+
+    template <class T>
+    void BSONObj::vals(vector<T>& v) const {
+        BSONObjIterator i(*this);
+        while( i.more() ) {
+            try {
+                T t;
+                i.next().Val(t);
+                v.push_back(t);
+            }
+            catch(...) { }
+        }
+    }
+    template <class T>
+    void BSONObj::vals(list<T>& v) const {
+        BSONObjIterator i(*this);
+        while( i.more() ) {
+            try {
+                T t;
+                i.next().Val(t);
+                v.push_back(t);
+            }
+            catch(...) { }
+        }
+    }
+
+    inline ostream& operator<<( ostream &s, const BSONObj &o ) {
+        return s << o.toString();
+    }
+
+    inline ostream& operator<<( ostream &s, const BSONElement &e ) {
+        return s << e.toString();
+    }
+
+    inline StringBuilder& operator<<( StringBuilder &s, const BSONObj &o ) {
+        o.toString( s );
+        return s;
+    }
+    inline StringBuilder& operator<<( StringBuilder &s, const BSONElement &e ) {
+        e.toString( s );
+        return s;
+    }
+
+
+    inline void BSONElement::Val(BSONObj& v) const { v = Obj(); }
+
+    template<typename T>
+    inline BSONFieldValue<BSONObj> BSONField<T>::query( const char * q , const T& t ) const {
+        BSONObjBuilder b;
+        b.append( q , t );
+        return BSONFieldValue<BSONObj>( _name , b.obj() );
+    }
+}
diff --git a/bson/bson.h b/bson/bson.h
index 3d92831..ba1b751 100644
--- a/bson/bson.h
+++ b/bson/bson.h
@@ -1,10 +1,10 @@
-/* NOTE: Standalone bson header for when not using MongoDB.  
+/* NOTE: Standalone bson header for when not using MongoDB.
    See also: bsondemo.
 
    MongoDB includes ../db/jsobj.h instead. This file, however, pulls in much less code / dependencies.
 */
 
-/** @file bson.h 
+/** @file bson.h
     BSON classes
 */
 
@@ -25,7 +25,7 @@
  */
 
 /**
-   BSONObj and its helpers
+   bo and its helpers
 
    "BSON" stands for "binary JSON" -- ie a binary way to represent objects that would be
    represented in JSON (plus a few extensions useful for databases & other languages).
@@ -47,15 +47,15 @@
 #include <boost/utility.hpp>
 #include "util/builder.h"
 
-namespace bson { 
+namespace bson {
 
     using std::string;
     using std::stringstream;
 
-    class assertion : public std::exception { 
+    class assertion : public std::exception {
     public:
         assertion( unsigned u , const string& s )
-            : id( u ) , msg( s ){
+            : id( u ) , msg( s ) {
             mongo::StringBuilder ss;
             ss << "BsonAssertion id: " << u << " " << s;
             full = ss.str();
@@ -64,7 +64,7 @@ namespace bson {
         virtual ~assertion() throw() {}
 
         virtual const char* what() const throw() { return full.c_str(); }
-        
+
         unsigned id;
         string msg;
         string full;
@@ -72,9 +72,9 @@ namespace bson {
 }
 
 namespace mongo {
-#if !defined(assert) 
+#if !defined(assert)
     inline void assert(bool expr) {
-        if(!expr) { 
+        if(!expr) {
             throw bson::assertion( 0 , "assertion failure in bson library" );
         }
     }
@@ -88,12 +88,12 @@ namespace mongo {
         if( !expr )
             uasserted( msgid , msg );
     }
-    inline void msgasserted(int msgid, const char *msg) { 
+    inline void msgasserted(int msgid, const char *msg) {
         throw bson::assertion( msgid , msg );
     }
     inline void msgasserted(int msgid, const std::string &msg) { msgasserted(msgid, msg.c_str()); }
-    inline void massert(unsigned msgid, std::string msg, bool expr) { 
-        if(!expr) { 
+    inline void massert(unsigned msgid, std::string msg, bool expr) {
+        if(!expr) {
             std::cout << "assertion failure in bson library: " << msgid << ' ' << msg << std::endl;
             throw bson::assertion( msgid , msg );
         }
@@ -108,15 +108,15 @@ namespace mongo {
 #include "../bson/bsonmisc.h"
 #include "../bson/bsonobjbuilder.h"
 #include "../bson/bsonobjiterator.h"
-#include "../bson/bsoninlines.h"
+#include "../bson/bson-inl.h"
 
-namespace mongo { 
+namespace mongo {
 
     inline unsigned getRandomNumber() {
 #if defined(_WIN32)
         return rand();
 #else
-        return random(); 
+        return random();
 #endif
     }
 
diff --git a/bson/bson_db.h b/bson/bson_db.h
index 18cd59f..71f92aa 100644
--- a/bson/bson_db.h
+++ b/bson/bson_db.h
@@ -1,10 +1,10 @@
-/** @file bson_db.h 
+/** @file bson_db.h
 
-    This file contains the implementation of BSON-related methods that are required 
+    This file contains the implementation of BSON-related methods that are required
     by the MongoDB database server.
 
-    Normally, for standalone BSON usage, you do not want this file - it will tend to 
-    pull in some other files from the MongoDB project. Thus, bson.h (the main file 
+    Normally, for standalone BSON usage, you do not want this file - it will tend to
+    pull in some other files from the MongoDB project. Thus, bson.h (the main file
     one would use) does not include this file.
 */
 
@@ -26,6 +26,7 @@
 #pragma once
 
 #include "../util/optime.h"
+#include "../util/time_support.h"
 
 namespace mongo {
 
@@ -34,10 +35,10 @@ namespace mongo {
     Append a timestamp element to the object being ebuilt.
     @param time - in millis (but stored in seconds)
     */
-    inline BSONObjBuilder& BSONObjBuilder::appendTimestamp( const StringData& fieldName , unsigned long long time , unsigned int inc ){
+    inline BSONObjBuilder& BSONObjBuilder::appendTimestamp( const StringData& fieldName , unsigned long long time , unsigned int inc ) {
         OpTime t( (unsigned) (time / 1000) , inc );
         appendTimestamp( fieldName , t.asDate() );
-        return *this; 
+        return *this;
     }
 
     inline OpTime BSONElement::_opTime() const {
@@ -47,7 +48,7 @@ namespace mongo {
     }
 
     inline string BSONElement::_asCode() const {
-        switch( type() ){
+        switch( type() ) {
         case mongo::String:
         case Code:
             return string(valuestr(), valuestrsize()-1);
@@ -60,11 +61,22 @@ namespace mongo {
         return "";
     }
 
-    inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<(DateNowLabeler& id){
+    inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<(DateNowLabeler& id) {
         _builder->appendDate(_fieldName, jsTime());
         _fieldName = 0;
         return *_builder;
     }
 
+    inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<(MinKeyLabeler& id) {
+        _builder->appendMinKey(_fieldName);
+        _fieldName = 0;
+        return *_builder;
+    }
+
+    inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<(MaxKeyLabeler& id) {
+        _builder->appendMaxKey(_fieldName);
+        _fieldName = 0;
+        return *_builder;
+    }
 
 }
diff --git a/bson/bsondemo/bsondemo.cpp b/bson/bsondemo/bsondemo.cpp
index b0da1b8..ec83f5e 100644
--- a/bson/bsondemo/bsondemo.cpp
+++ b/bson/bsondemo/bsondemo.cpp
@@ -1,4 +1,4 @@
-/** @file bsondemo.cpp 
+/** @file bsondemo.cpp
 
     Example of use of BSON from C++.
 
@@ -29,17 +29,16 @@
 using namespace std;
 using namespace bson;
 
-void iter(bo o) { 
+void iter(bo o) {
     /* iterator example */
     cout << "\niter()\n";
-    for( bo::iterator i(o); i.more(); ) { 
+    for( bo::iterator i(o); i.more(); ) {
         cout << ' ' << i.next().toString() << '\n';
     }
 }
 
-int main()
-{
-	cout << "build bits: " << 8 * sizeof(char *) << '\n' <<  endl;
+int main() {
+    cout << "build bits: " << 8 * sizeof(char *) << '\n' <<  endl;
 
     /* a bson object defaults on construction to { } */
     bo empty;
@@ -47,7 +46,7 @@ int main()
 
     /* make a simple { name : 'joe', age : 33.7 } object */
     {
-        bob b; 
+        bob b;
         b.append("name", "joe");
         b.append("age", 33.7);
         b.obj();
@@ -73,7 +72,7 @@ int main()
 
     /* reach in and get subobj.z */
     cout << "subobj.z: " << y.getFieldDotted("subobj.z").Number() << endl;
- 
+
     /* alternate syntax: */
     cout << "subobj.z: " << y["subobj"]["z"].Number() << endl;
 
@@ -83,19 +82,19 @@ int main()
     cout << v[0] << endl;
 
     /* into an array */
-    list<be> L; 
+    list<be> L;
     y.elems(L);
 
     bo sub = y["subobj"].Obj();
 
-    /* grab all the int's that were in subobj.  if it had elements that were not ints, we throw an exception 
-       (capital V on Vals() means exception if wrong type found 
+    /* grab all the int's that were in subobj.  if it had elements that were not ints, we throw an exception
+       (capital V on Vals() means exception if wrong type found
     */
     vector<int> myints;
     sub.Vals(myints);
     cout << "my ints: " << myints[0] << ' ' << myints[1] << endl;
 
-    /* grab all the string values from x.  if the field isn't of string type, just skip it -- 
+    /* grab all the string values from x.  if the field isn't of string type, just skip it --
        lowercase v on vals() indicates skip don't throw.
     */
     vector<string> strs;
@@ -103,5 +102,6 @@ int main()
     cout << strs.size() << " strings, first one: " << strs[0] << endl;
 
     iter(y);
-	return 0;
+    return 0;
 }
+
diff --git a/bson/bsonelement.h b/bson/bsonelement.h
index 534c773..23d59fa 100644
--- a/bson/bsonelement.h
+++ b/bson/bsonelement.h
@@ -36,378 +36,384 @@ namespace mongo {
     int compareElementValues(const BSONElement& l, const BSONElement& r);
 
 
-/** BSONElement represents an "element" in a BSONObj.  So for the object { a : 3, b : "abc" },
-    'a : 3' is the first element (key+value).
-       
-    The BSONElement object points into the BSONObj's data.  Thus the BSONObj must stay in scope
-    for the life of the BSONElement.
-
-    internals:
-    <type><fieldName    ><value>
-    -------- size() ------------
-    -fieldNameSize-
-    value()
-    type()
-*/
-class BSONElement {
-public:
-    /** These functions, which start with a capital letter, throw a UserException if the 
-        element is not of the required type. Example:
-
-        string foo = obj["foo"].String(); // exception if not a string type or DNE
+    /** BSONElement represents an "element" in a BSONObj.  So for the object { a : 3, b : "abc" },
+        'a : 3' is the first element (key+value).
+
+        The BSONElement object points into the BSONObj's data.  Thus the BSONObj must stay in scope
+        for the life of the BSONElement.
+
+        internals:
+        <type><fieldName    ><value>
+        -------- size() ------------
+        -fieldNameSize-
+        value()
+        type()
     */
-    string String()             const { return chk(mongo::String).valuestr(); }
-    Date_t Date()               const { return chk(mongo::Date).date(); }
-    double Number()             const { return chk(isNumber()).number(); }
-    double Double()             const { return chk(NumberDouble)._numberDouble(); }
-    long long Long()            const { return chk(NumberLong)._numberLong(); }
-    int Int()                   const { return chk(NumberInt)._numberInt(); }
-    bool Bool()                 const { return chk(mongo::Bool).boolean(); }
-    BSONObj Obj()               const;
-    vector<BSONElement> Array() const; // see implementation for detailed comments
-    mongo::OID OID()            const { return chk(jstOID).__oid(); }
-    void Null()                 const { chk(isNull()); }
-    void OK()                   const { chk(ok()); }
-
-    /** populate v with the value of the element.  If type does not match, throw exception. 
-        useful in templates -- see also BSONObj::Vals().
+    class BSONElement {
+    public:
+        /** These functions, which start with a capital letter, throw a UserException if the
+            element is not of the required type. Example:
+
+            string foo = obj["foo"].String(); // exception if not a string type or DNE
         */
-    void Val(Date_t& v)         const { v = Date(); }
-    void Val(long long& v)      const { v = Long(); }
-    void Val(bool& v)           const { v = Bool(); }
-    void Val(BSONObj& v)        const;
-    void Val(mongo::OID& v)     const { v = OID(); }
-    void Val(int& v)            const { v = Int(); }
-    void Val(double& v)         const { v = Double(); }
-    void Val(string& v)         const { v = String(); }
-
-    /** Use ok() to check if a value is assigned:
-          if( myObj["foo"].ok() ) ...
-    */
-    bool ok() const { return !eoo(); }
+        string String()             const { return chk(mongo::String).valuestr(); }
+        Date_t Date()               const { return chk(mongo::Date).date(); }
+        double Number()             const { return chk(isNumber()).number(); }
+        double Double()             const { return chk(NumberDouble)._numberDouble(); }
+        long long Long()            const { return chk(NumberLong)._numberLong(); }
+        int Int()                   const { return chk(NumberInt)._numberInt(); }
+        bool Bool()                 const { return chk(mongo::Bool).boolean(); }
+        vector<BSONElement> Array() const; // see implementation for detailed comments
+        mongo::OID OID()            const { return chk(jstOID).__oid(); }
+        void Null()                 const { chk(isNull()); } // throw UserException if not null
+        void OK()                   const { chk(ok()); }     // throw UserException if element DNE
+
+	/** @return the embedded object associated with this field.
+            Note the returned object is a reference to within the parent bson object. If that 
+	    object is out of scope, this pointer will no longer be valid. Call getOwned() on the 
+	    returned BSONObj if you need your own copy.
+	    throws UserException if the element is not of type object.
+	*/
+        BSONObj Obj()               const;
+
+        /** populate v with the value of the element.  If type does not match, throw exception.
+            useful in templates -- see also BSONObj::Vals().
+        */
+        void Val(Date_t& v)         const { v = Date(); }
+        void Val(long long& v)      const { v = Long(); }
+        void Val(bool& v)           const { v = Bool(); }
+        void Val(BSONObj& v)        const;
+        void Val(mongo::OID& v)     const { v = OID(); }
+        void Val(int& v)            const { v = Int(); }
+        void Val(double& v)         const { v = Double(); }
+        void Val(string& v)         const { v = String(); }
+
+        /** Use ok() to check if a value is assigned:
+            if( myObj["foo"].ok() ) ...
+        */
+        bool ok() const { return !eoo(); }
 
-    string toString( bool includeFieldName = true, bool full=false) const;
-    void toString(StringBuilder& s, bool includeFieldName = true, bool full=false) const;
-    string jsonString( JsonStringFormat format, bool includeFieldNames = true, int pretty = 0 ) const;
-    operator string() const { return toString(); }
+        string toString( bool includeFieldName = true, bool full=false) const;
+        void toString(StringBuilder& s, bool includeFieldName = true, bool full=false) const;
+        string jsonString( JsonStringFormat format, bool includeFieldNames = true, int pretty = 0 ) const;
+        operator string() const { return toString(); }
 
-    /** Returns the type of the element */
-    BSONType type() const { return (BSONType) *data; }
+        /** Returns the type of the element */
+        BSONType type() const { return (BSONType) *data; }
 
-    /** retrieve a field within this element 
-        throws exception if *this is not an embedded object
-    */
-    BSONElement operator[] (const string& field) const;
-        
-    /** returns the tyoe of the element fixed for the main type
-        the main purpose is numbers.  any numeric type will return NumberDouble
-        Note: if the order changes, indexes have to be re-built or than can be corruption
-    */
-    int canonicalType() const;
+        /** retrieve a field within this element
+            throws exception if *this is not an embedded object
+        */
+        BSONElement operator[] (const string& field) const;
 
-    /** Indicates if it is the end-of-object element, which is present at the end of 
-        every BSON object.
-    */
-    bool eoo() const { return type() == EOO; }
+        /** returns the tyoe of the element fixed for the main type
+            the main purpose is numbers.  any numeric type will return NumberDouble
+            Note: if the order changes, indexes have to be re-built or than can be corruption
+        */
+        int canonicalType() const;
 
-    /** Size of the element.
-        @param maxLen If maxLen is specified, don't scan more than maxLen bytes to calculate size. 
-    */
-    int size( int maxLen = -1 ) const;
+        /** Indicates if it is the end-of-object element, which is present at the end of
+            every BSON object.
+        */
+        bool eoo() const { return type() == EOO; }
 
-    /** Wrap this element up as a singleton object. */
-    BSONObj wrap() const;
+        /** Size of the element.
+            @param maxLen If maxLen is specified, don't scan more than maxLen bytes to calculate size.
+        */
+        int size( int maxLen = -1 ) const;
 
-    /** Wrap this element up as a singleton object with a new name. */
-    BSONObj wrap( const char* newName) const;
+        /** Wrap this element up as a singleton object. */
+        BSONObj wrap() const;
 
-    /** field name of the element.  e.g., for 
-        name : "Joe"
-        "name" is the fieldname
-    */
-    const char * fieldName() const {
-        if ( eoo() ) return ""; // no fieldname for it.
-        return data + 1;
-    }
+        /** Wrap this element up as a singleton object with a new name. */
+        BSONObj wrap( const char* newName) const;
 
-    /** raw data of the element's value (so be careful). */
-    const char * value() const {
-        return (data + fieldNameSize() + 1);
-    }
-    /** size in bytes of the element's value (when applicable). */
-    int valuesize() const {
-        return size() - fieldNameSize() - 1;
-    }
+        /** field name of the element.  e.g., for
+            name : "Joe"
+            "name" is the fieldname
+        */
+        const char * fieldName() const {
+            if ( eoo() ) return ""; // no fieldname for it.
+            return data + 1;
+        }
 
-    bool isBoolean() const { return type() == mongo::Bool; }
+        /** raw data of the element's value (so be careful). */
+        const char * value() const {
+            return (data + fieldNameSize() + 1);
+        }
+        /** size in bytes of the element's value (when applicable). */
+        int valuesize() const {
+            return size() - fieldNameSize() - 1;
+        }
 
-    /** @return value of a boolean element.  
-        You must assure element is a boolean before 
-        calling. */
-    bool boolean() const {
-        return *value() ? true : false;
-    }
+        bool isBoolean() const { return type() == mongo::Bool; }
 
-    /** Retrieve a java style date value from the element. 
-        Ensure element is of type Date before calling.
-    */
-    Date_t date() const {
-        return *reinterpret_cast< const Date_t* >( value() );
-    }
+        /** @return value of a boolean element.
+            You must assure element is a boolean before
+            calling. */
+        bool boolean() const {
+            return *value() ? true : false;
+        }
 
-    /** Convert the value to boolean, regardless of its type, in a javascript-like fashion 
-        (i.e., treat zero and null as false).
-    */
-    bool trueValue() const;
+        /** Retrieve a java style date value from the element.
+            Ensure element is of type Date before calling.
+        */
+        Date_t date() const {
+            return *reinterpret_cast< const Date_t* >( value() );
+        }
 
-    /** True if number, string, bool, date, OID */
-    bool isSimpleType() const;
+        /** Convert the value to boolean, regardless of its type, in a javascript-like fashion
+            (i.e., treat zero and null as false).
+        */
+        bool trueValue() const;
+
+        /** True if number, string, bool, date, OID */
+        bool isSimpleType() const;
+
+        /** True if element is of a numeric type. */
+        bool isNumber() const;
+
+        /** Return double value for this field. MUST be NumberDouble type. */
+        double _numberDouble() const {return *reinterpret_cast< const double* >( value() ); }
+        /** Return double value for this field. MUST be NumberInt type. */
+        int _numberInt() const {return *reinterpret_cast< const int* >( value() ); }
+        /** Return double value for this field. MUST be NumberLong type. */
+        long long _numberLong() const {return *reinterpret_cast< const long long* >( value() ); }
+
+        /** Retrieve int value for the element safely.  Zero returned if not a number. */
+        int numberInt() const;
+        /** Retrieve long value for the element safely.  Zero returned if not a number. */
+        long long numberLong() const;
+        /** Retrieve the numeric value of the element.  If not of a numeric type, returns 0.
+            Note: casts to double, data loss may occur with large (>52 bit) NumberLong values.
+        */
+        double numberDouble() const;
+        /** Retrieve the numeric value of the element.  If not of a numeric type, returns 0.
+            Note: casts to double, data loss may occur with large (>52 bit) NumberLong values.
+        */
+        double number() const { return numberDouble(); }
 
-    /** True if element is of a numeric type. */
-    bool isNumber() const;
+        /** Retrieve the object ID stored in the object.
+            You must ensure the element is of type jstOID first. */
+        const mongo::OID &__oid() const { return *reinterpret_cast< const mongo::OID* >( value() ); }
 
-    /** Return double value for this field. MUST be NumberDouble type. */
-    double _numberDouble() const {return *reinterpret_cast< const double* >( value() ); }
-    /** Return double value for this field. MUST be NumberInt type. */
-    int _numberInt() const {return *reinterpret_cast< const int* >( value() ); }
-    /** Return double value for this field. MUST be NumberLong type. */
-    long long _numberLong() const {return *reinterpret_cast< const long long* >( value() ); }
+        /** True if element is null. */
+        bool isNull() const {
+            return type() == jstNULL;
+        }
 
-    /** Retrieve int value for the element safely.  Zero returned if not a number. */
-    int numberInt() const;
-    /** Retrieve long value for the element safely.  Zero returned if not a number. */
-    long long numberLong() const;
-    /** Retrieve the numeric value of the element.  If not of a numeric type, returns 0. 
-        Note: casts to double, data loss may occur with large (>52 bit) NumberLong values.
-    */
-    double numberDouble() const;
-    /** Retrieve the numeric value of the element.  If not of a numeric type, returns 0. 
-        Note: casts to double, data loss may occur with large (>52 bit) NumberLong values.
-    */
-    double number() const { return numberDouble(); }
+        /** Size (length) of a string element.
+            You must assure of type String first.  */
+        int valuestrsize() const {
+            return *reinterpret_cast< const int* >( value() );
+        }
 
-    /** Retrieve the object ID stored in the object. 
-        You must ensure the element is of type jstOID first. */
-    const mongo::OID &__oid() const { return *reinterpret_cast< const mongo::OID* >( value() ); }
+        // for objects the size *includes* the size of the size field
+        int objsize() const {
+            return *reinterpret_cast< const int* >( value() );
+        }
 
-    /** True if element is null. */
-    bool isNull() const {
-        return type() == jstNULL;
-    }
-        
-    /** Size (length) of a string element.  
-        You must assure of type String first.  */
-    int valuestrsize() const {
-        return *reinterpret_cast< const int* >( value() );
-    }
+        /** Get a string's value.  Also gives you start of the real data for an embedded object.
+            You must assure data is of an appropriate type first -- see also valuestrsafe().
+        */
+        const char * valuestr() const {
+            return value() + 4;
+        }
 
-    // for objects the size *includes* the size of the size field
-    int objsize() const {
-        return *reinterpret_cast< const int* >( value() );
-    }
+        /** Get the string value of the element.  If not a string returns "". */
+        const char *valuestrsafe() const {
+            return type() == mongo::String ? valuestr() : "";
+        }
+        /** Get the string value of the element.  If not a string returns "". */
+        string str() const {
+            return type() == mongo::String ? string(valuestr(), valuestrsize()-1) : string();
+        }
 
-    /** Get a string's value.  Also gives you start of the real data for an embedded object. 
-        You must assure data is of an appropriate type first -- see also valuestrsafe().
-    */
-    const char * valuestr() const {
-        return value() + 4;
-    }
+        /** Get javascript code of a CodeWScope data element. */
+        const char * codeWScopeCode() const {
+            return value() + 8;
+        }
+        /** Get the scope SavedContext of a CodeWScope data element. */
+        const char * codeWScopeScopeData() const {
+            // TODO fix
+            return codeWScopeCode() + strlen( codeWScopeCode() ) + 1;
+        }
 
-    /** Get the string value of the element.  If not a string returns "". */
-    const char *valuestrsafe() const {
-        return type() == mongo::String ? valuestr() : "";
-    }
-    /** Get the string value of the element.  If not a string returns "". */
-    string str() const {
-        return type() == mongo::String ? string(valuestr(), valuestrsize()-1) : string();
-    }
+        /** Get the embedded object this element holds. */
+        BSONObj embeddedObject() const;
 
-    /** Get javascript code of a CodeWScope data element. */
-    const char * codeWScopeCode() const {
-        return value() + 8;
-    }
-    /** Get the scope SavedContext of a CodeWScope data element. */
-    const char * codeWScopeScopeData() const {
-        // TODO fix
-        return codeWScopeCode() + strlen( codeWScopeCode() ) + 1;
-    }
+        /* uasserts if not an object */
+        BSONObj embeddedObjectUserCheck() const;
 
-    /** Get the embedded object this element holds. */
-    BSONObj embeddedObject() const;
+        BSONObj codeWScopeObject() const;
 
-    /* uasserts if not an object */
-    BSONObj embeddedObjectUserCheck() const;
+        /** Get raw binary data.  Element must be of type BinData. Doesn't handle type 2 specially */
+        const char *binData(int& len) const {
+            // BinData: <int len> <byte subtype> <byte[len] data>
+            assert( type() == BinData );
+            len = valuestrsize();
+            return value() + 5;
+        }
+        /** Get binary data.  Element must be of type BinData. Handles type 2 */
+        const char *binDataClean(int& len) const {
+            // BinData: <int len> <byte subtype> <byte[len] data>
+            if (binDataType() != ByteArrayDeprecated) {
+                return binData(len);
+            }
+            else {
+                // Skip extra size
+                len = valuestrsize() - 4;
+                return value() + 5 + 4;
+            }
+        }
 
-    BSONObj codeWScopeObject() const;
+        BinDataType binDataType() const {
+            // BinData: <int len> <byte subtype> <byte[len] data>
+            assert( type() == BinData );
+            unsigned char c = (value() + 4)[0];
+            return (BinDataType)c;
+        }
 
-    /** Get raw binary data.  Element must be of type BinData. Doesn't handle type 2 specially */
-    const char *binData(int& len) const { 
-        // BinData: <int len> <byte subtype> <byte[len] data>
-        assert( type() == BinData );
-        len = valuestrsize();
-        return value() + 5;
-    }
-    /** Get binary data.  Element must be of type BinData. Handles type 2 */
-    const char *binDataClean(int& len) const { 
-        // BinData: <int len> <byte subtype> <byte[len] data>
-        if (binDataType() != ByteArrayDeprecated){
-            return binData(len);
-        } else {
-            // Skip extra size
-            len = valuestrsize() - 4;
-            return value() + 5 + 4;
+        /** Retrieve the regex string for a Regex element */
+        const char *regex() const {
+            assert(type() == RegEx);
+            return value();
         }
-    }
-        
-    BinDataType binDataType() const {
-        // BinData: <int len> <byte subtype> <byte[len] data>
-        assert( type() == BinData );
-        unsigned char c = (value() + 4)[0];
-        return (BinDataType)c;
-    }
 
-    /** Retrieve the regex string for a Regex element */
-    const char *regex() const {
-        assert(type() == RegEx);
-        return value();
-    }
+        /** Retrieve the regex flags (options) for a Regex element */
+        const char *regexFlags() const {
+            const char *p = regex();
+            return p + strlen(p) + 1;
+        }
 
-    /** Retrieve the regex flags (options) for a Regex element */
-    const char *regexFlags() const {
-        const char *p = regex();
-        return p + strlen(p) + 1;
-    }
+        /** like operator== but doesn't check the fieldname,
+            just the value.
+        */
+        bool valuesEqual(const BSONElement& r) const {
+            return woCompare( r , false ) == 0;
+        }
 
-    /** like operator== but doesn't check the fieldname,
-        just the value.
-    */
-    bool valuesEqual(const BSONElement& r) const {
-        return woCompare( r , false ) == 0;
-    }
+        /** Returns true if elements are equal. */
+        bool operator==(const BSONElement& r) const {
+            return woCompare( r , true ) == 0;
+        }
 
-    /** Returns true if elements are equal. */
-    bool operator==(const BSONElement& r) const {
-        return woCompare( r , true ) == 0;
-    }
+        /** Well ordered comparison.
+            @return <0: l<r. 0:l==r. >0:l>r
+            order by type, field name, and field value.
+            If considerFieldName is true, pay attention to the field name.
+        */
+        int woCompare( const BSONElement &e, bool considerFieldName = true ) const;
 
-    /** Well ordered comparison.
-        @return <0: l<r. 0:l==r. >0:l>r
-        order by type, field name, and field value.
-        If considerFieldName is true, pay attention to the field name.
-    */
-    int woCompare( const BSONElement &e, bool considerFieldName = true ) const;
+        const char * rawdata() const { return data; }
 
-    const char * rawdata() const {
-        return data;
-    }
-        
-    /** 0 == Equality, just not defined yet */
-    int getGtLtOp( int def = 0 ) const;
-
-    /** Constructs an empty element */
-    BSONElement();
-        
-    /** Check that data is internally consistent. */
-    void validate() const;
-
-    /** True if this element may contain subobjects. */
-    bool mayEncapsulate() const {
-        switch ( type() ){
-        case Object:
-        case mongo::Array:
-        case CodeWScope:
-            return true;
-        default:
-            return false;
+        /** 0 == Equality, just not defined yet */
+        int getGtLtOp( int def = 0 ) const;
+
+        /** Constructs an empty element */
+        BSONElement();
+
+        /** Check that data is internally consistent. */
+        void validate() const;
+
+        /** True if this element may contain subobjects. */
+        bool mayEncapsulate() const {
+            switch ( type() ) {
+            case Object:
+            case mongo::Array:
+            case CodeWScope:
+                return true;
+            default:
+                return false;
+            }
         }
-    }
 
-    /** True if this element can be a BSONObj */
-    bool isABSONObj() const {
-        switch( type() ){
-        case Object:
-        case mongo::Array:
-            return true;
-        default:
-            return false;
+        /** True if this element can be a BSONObj */
+        bool isABSONObj() const {
+            switch( type() ) {
+            case Object:
+            case mongo::Array:
+                return true;
+            default:
+                return false;
+            }
         }
-    }
 
-    Date_t timestampTime() const{
-        unsigned long long t = ((unsigned int*)(value() + 4 ))[0];
-        return t * 1000;
-    }
-    unsigned int timestampInc() const{
-        return ((unsigned int*)(value() ))[0];
-    }
+        Date_t timestampTime() const {
+            unsigned long long t = ((unsigned int*)(value() + 4 ))[0];
+            return t * 1000;
+        }
+        unsigned int timestampInc() const {
+            return ((unsigned int*)(value() ))[0];
+        }
 
-    const char * dbrefNS() const {
-        uassert( 10063 ,  "not a dbref" , type() == DBRef );
-        return value() + 4;
-    }
+        const char * dbrefNS() const {
+            uassert( 10063 ,  "not a dbref" , type() == DBRef );
+            return value() + 4;
+        }
 
-    const mongo::OID& dbrefOID() const {
-        uassert( 10064 ,  "not a dbref" , type() == DBRef );
-        const char * start = value();
-        start += 4 + *reinterpret_cast< const int* >( start );
-        return *reinterpret_cast< const mongo::OID* >( start );
-    }
+        const mongo::OID& dbrefOID() const {
+            uassert( 10064 ,  "not a dbref" , type() == DBRef );
+            const char * start = value();
+            start += 4 + *reinterpret_cast< const int* >( start );
+            return *reinterpret_cast< const mongo::OID* >( start );
+        }
 
-    bool operator<( const BSONElement& other ) const {
-        int x = (int)canonicalType() - (int)other.canonicalType();
-        if ( x < 0 ) return true;
-        else if ( x > 0 ) return false;
-        return compareElementValues(*this,other) < 0;
-    }
-        
-    // If maxLen is specified, don't scan more than maxLen bytes.
-    explicit BSONElement(const char *d, int maxLen = -1) : data(d) {
-        fieldNameSize_ = -1;
-        if ( eoo() )
-            fieldNameSize_ = 0;
-        else {
-            if ( maxLen != -1 ) {
-                int size = (int) strnlen( fieldName(), maxLen - 1 );
-                massert( 10333 ,  "Invalid field name", size != -1 );
-                fieldNameSize_ = size + 1;
-            }
+        bool operator<( const BSONElement& other ) const {
+            int x = (int)canonicalType() - (int)other.canonicalType();
+            if ( x < 0 ) return true;
+            else if ( x > 0 ) return false;
+            return compareElementValues(*this,other) < 0;
         }
-        totalSize = -1;
-    }
 
-    string _asCode() const;
-    OpTime _opTime() const;
+        // If maxLen is specified, don't scan more than maxLen bytes.
+        explicit BSONElement(const char *d, int maxLen = -1) : data(d) {
+            fieldNameSize_ = -1;
+            if ( eoo() )
+                fieldNameSize_ = 0;
+            else {
+                if ( maxLen != -1 ) {
+                    int size = (int) strnlen( fieldName(), maxLen - 1 );
+                    massert( 10333 ,  "Invalid field name", size != -1 );
+                    fieldNameSize_ = size + 1;
+                }
+            }
+            totalSize = -1;
+        }
 
-private:
-    const char *data;
-    mutable int fieldNameSize_; // cached value
-    int fieldNameSize() const {
-        if ( fieldNameSize_ == -1 )
-            fieldNameSize_ = (int)strlen( fieldName() ) + 1;
-        return fieldNameSize_;
-    }
-    mutable int totalSize; /* caches the computed size */
+        string _asCode() const;
+        OpTime _opTime() const;
 
-    friend class BSONObjIterator;
-    friend class BSONObj;
-    const BSONElement& chk(int t) const { 
-        if ( t != type() ){
-            StringBuilder ss;
-            ss << "wrong type for BSONElement (" << fieldName() << ") " << type() << " != " << t;
-            uasserted(13111, ss.str() );
+    private:
+        const char *data;
+        mutable int fieldNameSize_; // cached value
+        int fieldNameSize() const {
+            if ( fieldNameSize_ == -1 )
+                fieldNameSize_ = (int)strlen( fieldName() ) + 1;
+            return fieldNameSize_;
         }
-        return *this;
-    }
-    const BSONElement& chk(bool expr) const { 
-        uassert(13118, "unexpected or missing type value in BSON object", expr);
-        return *this;
-    }
-};
+        mutable int totalSize; /* caches the computed size */
+
+        friend class BSONObjIterator;
+        friend class BSONObj;
+        const BSONElement& chk(int t) const {
+            if ( t != type() ) {
+                StringBuilder ss;
+                ss << "wrong type for BSONElement (" << fieldName() << ") " << type() << " != " << t;
+                uasserted(13111, ss.str() );
+            }
+            return *this;
+        }
+        const BSONElement& chk(bool expr) const {
+            uassert(13118, "unexpected or missing type value in BSON object", expr);
+            return *this;
+        }
+    };
 
 
     inline int BSONElement::canonicalType() const {
         BSONType t = type();
-        switch ( t ){
+        switch ( t ) {
         case MinKey:
         case MaxKey:
             return t;
@@ -448,7 +454,7 @@ private:
             assert(0);
             return -1;
         }
-	}
+    }
 
     inline bool BSONElement::trueValue() const {
         switch( type() ) {
@@ -464,7 +470,7 @@ private:
         case jstNULL:
         case Undefined:
             return false;
-                
+
         default:
             ;
         }
@@ -478,13 +484,13 @@ private:
         case NumberDouble:
         case NumberInt:
             return true;
-        default: 
+        default:
             return false;
         }
     }
 
     inline bool BSONElement::isSimpleType() const {
-        switch( type() ){
+        switch( type() ) {
         case NumberLong:
         case NumberDouble:
         case NumberInt:
@@ -493,7 +499,7 @@ private:
         case mongo::Date:
         case jstOID:
             return true;
-        default: 
+        default:
             return false;
         }
     }
@@ -512,7 +518,7 @@ private:
     }
 
     /** Retrieve int value for the element safely.  Zero returned if not a number. Converted to int if another numeric type. */
-    inline int BSONElement::numberInt() const { 
+    inline int BSONElement::numberInt() const {
         switch( type() ) {
         case NumberDouble:
             return (int) _numberDouble();
@@ -526,7 +532,7 @@ private:
     }
 
     /** Retrieve long value for the element safely.  Zero returned if not a number. */
-    inline long long BSONElement::numberLong() const { 
+    inline long long BSONElement::numberLong() const {
         switch( type() ) {
         case NumberDouble:
             return (long long) _numberDouble();
@@ -537,7 +543,7 @@ private:
         default:
             return 0;
         }
-    }    
+    }
 
     inline BSONElement::BSONElement() {
         static char z = 0;
diff --git a/bson/bsoninlines.h b/bson/bsoninlines.h
deleted file mode 100644
index 0a2e59b..0000000
--- a/bson/bsoninlines.h
+++ /dev/null
@@ -1,588 +0,0 @@
-// bsoninlines.h
-
-/*    Copyright 2009 10gen Inc.
- *
- *    Licensed under the Apache License, Version 2.0 (the "License");
- *    you may not use this file except in compliance with the License.
- *    You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- *    Unless required by applicable law or agreed to in writing, software
- *    distributed under the License is distributed on an "AS IS" BASIS,
- *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *    See the License for the specific language governing permissions and
- *    limitations under the License.
- */
-
-#pragma once
-
-#include <map>
-#include "util/atomic_int.h"
-#include "util/misc.h"
-#include "../util/hex.h"
-
-namespace mongo {
-
-    inline BSONObjIterator BSONObj::begin() { 
-        return BSONObjIterator(*this);
-    }
-
-    inline BSONObj BSONElement::embeddedObjectUserCheck() const {
-        uassert( 10065 ,  "invalid parameter: expected an object", isABSONObj() );
-        return BSONObj(value());
-    }
-
-    inline BSONObj BSONElement::embeddedObject() const {
-        assert( isABSONObj() );
-        return BSONObj(value());
-    }
-
-    inline BSONObj BSONElement::codeWScopeObject() const {
-        assert( type() == CodeWScope );
-        int strSizeWNull = *(int *)( value() + 4 );
-        return BSONObj( value() + 4 + 4 + strSizeWNull );
-    }
-    
-    inline BSONObj BSONObj::copy() const {
-        char *p = (char*) malloc(objsize());
-        memcpy(p, objdata(), objsize());
-        return BSONObj(p, true);
-    }
-
-    // wrap this element up as a singleton object.
-    inline BSONObj BSONElement::wrap() const {
-        BSONObjBuilder b(size()+6);
-        b.append(*this);
-        return b.obj();
-    }
-
-    inline BSONObj BSONElement::wrap( const char * newName ) const {
-        BSONObjBuilder b(size()+6+(int)strlen(newName));
-        b.appendAs(*this,newName);
-        return b.obj();
-    }
-
-
-    inline bool BSONObj::hasElement(const char *name) const {
-        if ( !isEmpty() ) {
-            BSONObjIterator it(*this);
-            while ( it.moreWithEOO() ) {
-                BSONElement e = it.next();
-                if ( strcmp(name, e.fieldName()) == 0 )
-                    return true;
-            }
-        }
-        return false;
-    }
-
-    inline BSONElement BSONObj::getField(const StringData& name) const {
-        BSONObjIterator i(*this);
-        while ( i.more() ) {
-            BSONElement e = i.next();
-            if ( strcmp(e.fieldName(), name.data()) == 0 )
-                return e;
-        }
-        return BSONElement();
-    }
-
-    /* add all the fields from the object specified to this object */
-    inline BSONObjBuilder& BSONObjBuilder::appendElements(BSONObj x) {
-        BSONObjIterator it(x);
-        while ( it.moreWithEOO() ) {
-            BSONElement e = it.next();
-            if ( e.eoo() ) break;
-            append(e);
-        }
-        return *this;
-    }
-
-    inline bool BSONObj::isValid(){
-        int x = objsize();
-        return x > 0 && x <= 1024 * 1024 * 8;
-    }
-
-    inline bool BSONObj::getObjectID(BSONElement& e) const { 
-        BSONElement f = getField("_id");
-        if( !f.eoo() ) { 
-            e = f;
-            return true;
-        }
-        return false;
-    }
-
-    inline BSONObjBuilderValueStream::BSONObjBuilderValueStream( BSONObjBuilder * builder ) {
-        _fieldName = 0;
-        _builder = builder;
-    }
-    
-    template<class T> 
-    inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<( T value ) { 
-        _builder->append(_fieldName, value);
-        _fieldName = 0;
-        return *_builder;
-    }
-
-    inline BSONObjBuilder& BSONObjBuilderValueStream::operator<<( const BSONElement& e ) { 
-        _builder->appendAs( e , _fieldName );
-        _fieldName = 0;
-        return *_builder;
-    }
-
-    inline Labeler BSONObjBuilderValueStream::operator<<( const Labeler::Label &l ) { 
-        return Labeler( l, this );
-    }
-
-    inline void BSONObjBuilderValueStream::endField( const char *nextFieldName ) {
-        if ( _fieldName && haveSubobj() ) {
-            _builder->append( _fieldName, subobj()->done() );
-        }
-        _subobj.reset();
-        _fieldName = nextFieldName;
-    }    
-
-    inline BSONObjBuilder *BSONObjBuilderValueStream::subobj() {
-        if ( !haveSubobj() )
-            _subobj.reset( new BSONObjBuilder() );
-        return _subobj.get();
-    }
-    
-    template<class T> inline
-    BSONObjBuilder& Labeler::operator<<( T value ) {
-        s_->subobj()->append( l_.l_, value );
-        return *s_->_builder;
-    }    
-
-    inline
-    BSONObjBuilder& Labeler::operator<<( const BSONElement& e ) {
-        s_->subobj()->appendAs( e, l_.l_ );
-        return *s_->_builder;
-    }    
-
-    // {a: {b:1}} -> {a.b:1}
-    void nested2dotted(BSONObjBuilder& b, const BSONObj& obj, const string& base="");
-    inline BSONObj nested2dotted(const BSONObj& obj){
-        BSONObjBuilder b;
-        nested2dotted(b, obj);
-        return b.obj();
-    }
-
-    // {a.b:1} -> {a: {b:1}}
-    void dotted2nested(BSONObjBuilder& b, const BSONObj& obj);
-    inline BSONObj dotted2nested(const BSONObj& obj){
-        BSONObjBuilder b;
-        dotted2nested(b, obj);
-        return b.obj();
-    }
-
-    inline BSONObjIterator BSONObjBuilder::iterator() const {
-        const char * s = _b.buf() + _offset;
-        const char * e = _b.buf() + _b.len();
-        return BSONObjIterator( s , e );
-    }
-    
-    /* WARNING: nested/dotted conversions are not 100% reversible
-     * nested2dotted(dotted2nested({a.b: {c:1}})) -> {a.b.c: 1}
-     * also, dotted2nested ignores order
-     */
-
-    typedef map<string, BSONElement> BSONMap;
-    inline BSONMap bson2map(const BSONObj& obj){
-        BSONMap m;
-        BSONObjIterator it(obj);
-        while (it.more()){
-            BSONElement e = it.next();
-            m[e.fieldName()] = e;
-        }
-        return m;
-    }
-
-    struct BSONElementFieldNameCmp {
-        bool operator()( const BSONElement &l, const BSONElement &r ) const {
-            return strcmp( l.fieldName() , r.fieldName() ) <= 0;
-        }
-    };
-
-    typedef set<BSONElement, BSONElementFieldNameCmp> BSONSortedElements;
-    inline BSONSortedElements bson2set( const BSONObj& obj ){
-        BSONSortedElements s;
-        BSONObjIterator it(obj);
-        while ( it.more() )
-            s.insert( it.next() );
-        return s;
-    }
-
-    inline string BSONObj::toString( bool isArray, bool full ) const {
-        if ( isEmpty() ) return "{}";
-        StringBuilder s;
-        toString(s, isArray, full);
-        return s.str();
-    }
-    inline void BSONObj::toString(StringBuilder& s,  bool isArray, bool full ) const {
-        if ( isEmpty() ){
-            s << "{}";
-            return;
-        }
-
-        s << ( isArray ? "[ " : "{ " );
-        BSONObjIterator i(*this);
-        bool first = true;
-        while ( 1 ) {
-            massert( 10327 ,  "Object does not end with EOO", i.moreWithEOO() );
-            BSONElement e = i.next( true );
-            massert( 10328 ,  "Invalid element size", e.size() > 0 );
-            massert( 10329 ,  "Element too large", e.size() < ( 1 << 30 ) );
-            int offset = (int) (e.rawdata() - this->objdata());
-            massert( 10330 ,  "Element extends past end of object",
-                    e.size() + offset <= this->objsize() );
-            e.validate();
-            bool end = ( e.size() + offset == this->objsize() );
-            if ( e.eoo() ) {
-                massert( 10331 ,  "EOO Before end of object", end );
-                break;
-            }
-            if ( first )
-                first = false;
-            else
-                s << ", ";
-            e.toString(s, !isArray, full );
-        }
-        s << ( isArray ? " ]" : " }" );
-    }
-
-    extern unsigned getRandomNumber();
-
-    inline void BSONElement::validate() const {
-        const BSONType t = type();
-        
-        switch( t ) {
-        case DBRef:
-        case Code:
-        case Symbol:
-        case mongo::String: {
-            int x = valuestrsize();
-            if ( x > 0 && valuestr()[x-1] == 0 )
-                return;
-            StringBuilder buf;
-            buf <<  "Invalid dbref/code/string/symbol size: " << x << " strnlen:" << mongo::strnlen( valuestr() , x );
-            msgasserted( 10321 , buf.str() );
-            break;
-        }
-        case CodeWScope: {
-            int totalSize = *( int * )( value() );
-            massert( 10322 ,  "Invalid CodeWScope size", totalSize >= 8 );
-            int strSizeWNull = *( int * )( value() + 4 );
-            massert( 10323 ,  "Invalid CodeWScope string size", totalSize >= strSizeWNull + 4 + 4 );
-            massert( 10324 ,  "Invalid CodeWScope string size",
-                     strSizeWNull > 0 &&
-                     (strSizeWNull - 1) == mongo::strnlen( codeWScopeCode(), strSizeWNull ) );
-            massert( 10325 ,  "Invalid CodeWScope size", totalSize >= strSizeWNull + 4 + 4 + 4 );
-            int objSize = *( int * )( value() + 4 + 4 + strSizeWNull );
-            massert( 10326 ,  "Invalid CodeWScope object size", totalSize == 4 + 4 + strSizeWNull + objSize );
-            // Subobject validation handled elsewhere.
-        }
-        case Object:
-            // We expect Object size validation to be handled elsewhere.
-        default:
-            break;
-        }
-    }
-
-    inline int BSONElement::size( int maxLen ) const {
-        if ( totalSize >= 0 )
-            return totalSize;
-
-        int remain = maxLen - fieldNameSize() - 1;
-
-        int x = 0;
-        switch ( type() ) {
-        case EOO:
-        case Undefined:
-        case jstNULL:
-        case MaxKey:
-        case MinKey:
-            break;
-        case mongo::Bool:
-            x = 1;
-            break;
-        case NumberInt:
-            x = 4;
-            break;
-        case Timestamp:
-        case mongo::Date:
-        case NumberDouble:
-        case NumberLong:
-            x = 8;
-            break;
-        case jstOID:
-            x = 12;
-            break;
-        case Symbol:
-        case Code:
-        case mongo::String:
-            massert( 10313 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
-            x = valuestrsize() + 4;
-            break;
-        case CodeWScope:
-            massert( 10314 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
-            x = objsize();
-            break;
-
-        case DBRef:
-            massert( 10315 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
-            x = valuestrsize() + 4 + 12;
-            break;
-        case Object:
-        case mongo::Array:
-            massert( 10316 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
-            x = objsize();
-            break;
-        case BinData:
-            massert( 10317 ,  "Insufficient bytes to calculate element size", maxLen == -1 || remain > 3 );
-            x = valuestrsize() + 4 + 1/*subtype*/;
-            break;
-        case RegEx:
-        {
-            const char *p = value();
-            size_t len1 = ( maxLen == -1 ) ? strlen( p ) : mongo::strnlen( p, remain );
-            //massert( 10318 ,  "Invalid regex string", len1 != -1 ); // ERH - 4/28/10 - don't think this does anything
-            p = p + len1 + 1;
-            size_t len2 = ( maxLen == -1 ) ? strlen( p ) : mongo::strnlen( p, remain - len1 - 1 );
-            //massert( 10319 ,  "Invalid regex options string", len2 != -1 ); // ERH - 4/28/10 - don't think this does anything
-            x = (int) (len1 + 1 + len2 + 1);
-        }
-        break;
-        default: {
-            StringBuilder ss;
-            ss << "BSONElement: bad type " << (int) type();
-            string msg = ss.str();
-            massert( 10320 , msg.c_str(),false);
-        }
-        }
-        totalSize =  x + fieldNameSize() + 1; // BSONType
-
-        return totalSize;
-    }
-
-    inline string BSONElement::toString( bool includeFieldName, bool full ) const {
-        StringBuilder s;
-        toString(s, includeFieldName, full);
-        return s.str();
-    }
-    inline void BSONElement::toString(StringBuilder& s, bool includeFieldName, bool full ) const {
-        if ( includeFieldName && type() != EOO )
-            s << fieldName() << ": ";
-        switch ( type() ) {
-        case EOO:
-            s << "EOO";
-            break;
-        case mongo::Date:
-            s << "new Date(" << date() << ')';
-            break;
-        case RegEx:
-            {
-                s << "/" << regex() << '/';
-                const char *p = regexFlags();
-                if ( p ) s << p;
-            }
-            break;
-        case NumberDouble:
-            s.appendDoubleNice( number() );
-            break;
-        case NumberLong:
-            s << _numberLong();
-            break;
-        case NumberInt:
-            s << _numberInt();
-            break;
-        case mongo::Bool:
-            s << ( boolean() ? "true" : "false" );
-            break;
-        case Object:
-            embeddedObject().toString(s, false, full);
-            break;
-        case mongo::Array:
-            embeddedObject().toString(s, true, full);
-            break;
-        case Undefined:
-            s << "undefined";
-            break;
-        case jstNULL:
-            s << "null";
-            break;
-        case MaxKey:
-            s << "MaxKey";
-            break;
-        case MinKey:
-            s << "MinKey";
-            break;
-        case CodeWScope:
-            s << "CodeWScope( "
-                << codeWScopeCode() << ", " << codeWScopeObject().toString(false, full) << ")";
-            break;
-        case Code:
-            if ( !full &&  valuestrsize() > 80 ) {
-                s.write(valuestr(), 70);
-                s << "...";
-            } else {
-                s.write(valuestr(), valuestrsize()-1);
-            }
-            break;
-        case Symbol:
-        case mongo::String:
-            s << '"';
-            if ( !full &&  valuestrsize() > 80 ) {
-                s.write(valuestr(), 70);
-                s << "...\"";
-            } else {
-                s.write(valuestr(), valuestrsize()-1);
-                s << '"';
-            }
-            break;
-        case DBRef:
-            s << "DBRef('" << valuestr() << "',";
-            {
-                mongo::OID *x = (mongo::OID *) (valuestr() + valuestrsize());
-                s << *x << ')';
-            }
-            break;
-        case jstOID:
-            s << "ObjectId('";
-            s << __oid() << "')";
-            break;
-        case BinData:
-            s << "BinData";
-            if (full){
-                int len;
-                const char* data = binDataClean(len);
-                s << '(' << binDataType() << ", " << toHex(data, len) << ')';
-            }
-            break;
-        case Timestamp:
-            s << "Timestamp " << timestampTime() << "|" << timestampInc();
-            break;
-        default:
-            s << "?type=" << type();
-            break;
-        }
-    }
-
-    /* return has eoo() true if no match
-       supports "." notation to reach into embedded objects
-    */
-    inline BSONElement BSONObj::getFieldDotted(const char *name) const {
-        BSONElement e = getField( name );
-        if ( e.eoo() ) {
-            const char *p = strchr(name, '.');
-            if ( p ) {
-                string left(name, p-name);
-                BSONObj sub = getObjectField(left.c_str());
-                return sub.isEmpty() ? BSONElement() : sub.getFieldDotted(p+1);
-            }
-        }
-
-        return e;
-    }
-
-    inline BSONObj BSONObj::getObjectField(const char *name) const {
-        BSONElement e = getField(name);
-        BSONType t = e.type();
-        return t == Object || t == Array ? e.embeddedObject() : BSONObj();
-    }
-
-    inline int BSONObj::nFields() const {
-        int n = 0;
-        BSONObjIterator i(*this);
-        while ( i.moreWithEOO() ) {
-            BSONElement e = i.next();
-            if ( e.eoo() )
-                break;
-            n++;
-        }
-        return n;
-    }
-
-    inline BSONObj::BSONObj() {
-        /* LITTLE ENDIAN */
-        static char p[] = { 5, 0, 0, 0, 0 };
-        _objdata = p;
-    }
-
-    inline BSONObj BSONElement::Obj() const { return embeddedObjectUserCheck(); }
-
-    inline BSONElement BSONElement::operator[] (const string& field) const { 
-        BSONObj o = Obj();
-        return o[field];
-    }
-
-    inline void BSONObj::elems(vector<BSONElement> &v) const {
-        BSONObjIterator i(*this);
-        while( i.more() )
-            v.push_back(i.next());
-    }
-
-    inline void BSONObj::elems(list<BSONElement> &v) const { 
-        BSONObjIterator i(*this);
-        while( i.more() )
-            v.push_back(i.next());
-    }
-
-    template <class T>
-    void BSONObj::Vals(vector<T>& v) const { 
-        BSONObjIterator i(*this);
-        while( i.more() ) {
-            T t;
-            i.next().Val(t);
-            v.push_back(t);
-        }
-    }
-    template <class T>
-    void BSONObj::Vals(list<T>& v) const { 
-        BSONObjIterator i(*this);
-        while( i.more() ) {
-            T t;
-            i.next().Val(t);
-            v.push_back(t);
-        }
-    }
-
-    template <class T>
-    void BSONObj::vals(vector<T>& v) const { 
-        BSONObjIterator i(*this);
-        while( i.more() ) {
-            try {
-                T t;
-                i.next().Val(t);
-                v.push_back(t);
-            } catch(...) { }
-        }
-    }
-    template <class T>
-    void BSONObj::vals(list<T>& v) const { 
-        BSONObjIterator i(*this);
-        while( i.more() ) {
-            try {
-                T t;
-                i.next().Val(t);
-                v.push_back(t);
-            } catch(...) { }
-        }
-    }
-
-    inline ostream& operator<<( ostream &s, const BSONObj &o ) {
-        return s << o.toString();
-    }
-
-    inline ostream& operator<<( ostream &s, const BSONElement &e ) {
-        return s << e.toString();
-    }
-
-    inline void BSONElement::Val(BSONObj& v) const { v = Obj(); }
-
-    template<typename T>
-    inline BSONFieldValue<BSONObj> BSONField<T>::query( const char * q , const T& t ) const {
-        BSONObjBuilder b;
-        b.append( q , t );
-        return BSONFieldValue<BSONObj>( _name , b.obj() );
-    }
-}
diff --git a/bson/bsonmisc.h b/bson/bsonmisc.h
index 40ec6d3..96be12a 100644
--- a/bson/bsonmisc.h
+++ b/bson/bsonmisc.h
@@ -26,7 +26,7 @@ namespace mongo {
             return l.woCompare( r, false ) < 0;
         }
     };
-   
+
     class BSONObjCmp {
     public:
         BSONObjCmp( const BSONObj &_order = BSONObj() ) : order( _order ) {}
@@ -54,26 +54,26 @@ namespace mongo {
 
     FieldCompareResult compareDottedFieldNames( const string& l , const string& r );
 
-/** Use BSON macro to build a BSONObj from a stream 
+    /** Use BSON macro to build a BSONObj from a stream
+
+        e.g.,
+           BSON( "name" << "joe" << "age" << 33 )
 
-    e.g., 
-       BSON( "name" << "joe" << "age" << 33 )
+        with auto-generated object id:
+           BSON( GENOID << "name" << "joe" << "age" << 33 )
 
-    with auto-generated object id:
-       BSON( GENOID << "name" << "joe" << "age" << 33 )
- 
-    The labels GT, GTE, LT, LTE, NE can be helpful for stream-oriented construction
-    of a BSONObj, particularly when assembling a Query.  For example,
-    BSON( "a" << GT << 23.4 << NE << 30 << "b" << 2 ) produces the object
-    { a: { \$gt: 23.4, \$ne: 30 }, b: 2 }.
-*/
+        The labels GT, GTE, LT, LTE, NE can be helpful for stream-oriented construction
+        of a BSONObj, particularly when assembling a Query.  For example,
+        BSON( "a" << GT << 23.4 << NE << 30 << "b" << 2 ) produces the object
+        { a: { \$gt: 23.4, \$ne: 30 }, b: 2 }.
+    */
 #define BSON(x) (( mongo::BSONObjBuilder(64) << x ).obj())
 
-/** Use BSON_ARRAY macro like BSON macro, but without keys
+    /** Use BSON_ARRAY macro like BSON macro, but without keys
 
-    BSONArray arr = BSON_ARRAY( "hello" << 1 << BSON( "foo" << BSON_ARRAY( "bar" << "baz" << "qux" ) ) );
+        BSONArray arr = BSON_ARRAY( "hello" << 1 << BSON( "foo" << BSON_ARRAY( "bar" << "baz" << "qux" ) ) );
 
- */
+     */
 #define BSON_ARRAY(x) (( mongo::BSONArrayBuilder() << x ).arr())
 
     /* Utility class to auto assign object IDs.
@@ -83,11 +83,18 @@ namespace mongo {
     extern struct GENOIDLabeler { } GENOID;
 
     /* Utility class to add a Date element with the current time
-       Example: 
+       Example:
          cout << BSON( "created" << DATENOW ); // { created : "2009-10-09 11:41:42" }
     */
     extern struct DateNowLabeler { } DATENOW;
 
+    /* Utility class to add the minKey (minus infinity) to a given attribute
+       Example:
+         cout << BSON( "a" << MINKEY ); // { "a" : { "$minKey" : 1 } }
+    */
+    extern struct MinKeyLabeler { } MINKEY;
+    extern struct MaxKeyLabeler { } MAXKEY;
+
     // Utility class to implement GT, GTE, etc as described above.
     class Labeler {
     public:
@@ -99,17 +106,17 @@ namespace mongo {
         template<class T>
         BSONObjBuilder& operator<<( T value );
 
-        /* the value of the element e is appended i.e. for 
+        /* the value of the element e is appended i.e. for
              "age" << GT << someElement
-           one gets 
-             { age : { $gt : someElement's value } } 
+           one gets
+             { age : { $gt : someElement's value } }
         */
         BSONObjBuilder& operator<<( const BSONElement& e );
     private:
         const Label &l_;
         BSONObjBuilderValueStream *s_;
     };
-    
+
     extern Labeler::Label GT;
     extern Labeler::Label GTE;
     extern Labeler::Label LT;
@@ -126,7 +133,7 @@ namespace mongo {
     inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d, const BSONObj& e);
     inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d, const BSONObj& e, const BSONObj& f);
     // definitions in bsonobjbuilder.h b/c of incomplete types
-    
+
     // Utility class to implement BSON( key << val ) as described above.
     class BSONObjBuilderValueStream : public boost::noncopyable {
     public:
@@ -134,17 +141,20 @@ namespace mongo {
         BSONObjBuilderValueStream( BSONObjBuilder * builder );
 
         BSONObjBuilder& operator<<( const BSONElement& e );
-        
-        template<class T> 
+
+        template<class T>
         BSONObjBuilder& operator<<( T value );
 
         BSONObjBuilder& operator<<(DateNowLabeler& id);
-        
+
+        BSONObjBuilder& operator<<(MinKeyLabeler& id);
+        BSONObjBuilder& operator<<(MaxKeyLabeler& id);
+
         Labeler operator<<( const Labeler::Label &l );
 
         void endField( const char *nextFieldName = 0 );
         bool subobjStarted() const { return _fieldName != 0; }
-        
+
     private:
         const char * _fieldName;
         BSONObjBuilder * _builder;
@@ -153,39 +163,39 @@ namespace mongo {
         BSONObjBuilder *subobj();
         auto_ptr< BSONObjBuilder > _subobj;
     };
-    
+
     /**
        used in conjuction with BSONObjBuilder, allows for proper buffer size to prevent crazy memory usage
      */
     class BSONSizeTracker {
     public:
-        BSONSizeTracker(){
+        BSONSizeTracker() {
             _pos = 0;
             for ( int i=0; i<SIZE; i++ )
                 _sizes[i] = 512; // this is the default, so just be consistent
         }
-        
-        ~BSONSizeTracker(){
+
+        ~BSONSizeTracker() {
         }
-        
-        void got( int size ){
+
+        void got( int size ) {
             _sizes[_pos++] = size;
             if ( _pos >= SIZE )
                 _pos = 0;
         }
-        
+
         /**
          * right now choosing largest size
          */
         int getSize() const {
             int x = 16; // sane min
-            for ( int i=0; i<SIZE; i++ ){
+            for ( int i=0; i<SIZE; i++ ) {
                 if ( _sizes[i] > x )
                     x = _sizes[i];
             }
             return x;
         }
-        
+
     private:
         enum { SIZE = 10 };
         int _pos;
diff --git a/bson/bsonobj.h b/bson/bsonobj.h
index a802526..3ca6b8c 100644
--- a/bson/bsonobj.h
+++ b/bson/bsonobj.h
@@ -28,23 +28,23 @@ namespace mongo {
     typedef set< BSONElement, BSONElementCmpWithoutField > BSONElementSet;
 
     /**
-	   C++ representation of a "BSON" object -- that is, an extended JSON-style 
+       C++ representation of a "BSON" object -- that is, an extended JSON-style
        object in a binary representation.
 
        See bsonspec.org.
 
-       Note that BSONObj's have a smart pointer capability built in -- so you can 
+       Note that BSONObj's have a smart pointer capability built in -- so you can
        pass them around by value.  The reference counts used to implement this
        do not use locking, so copying and destroying BSONObj's are not thread-safe
        operations.
 
      BSON object format:
-     
+
      code
      <unsigned totalSize> {<byte BSONType><cstring FieldName><Data>}* EOO
-     
+
      totalSize includes itself.
-     
+
      Data:
      Bool:      <byte>
      EOO:       nothing follows
@@ -67,31 +67,65 @@ namespace mongo {
      */
     class BSONObj {
     public:
-        /** Construct a BSONObj from data in the proper format. 
-            @param ifree true if the BSONObj should free() the msgdata when 
-            it destructs. 
-            */
+
+        /** Construct a BSONObj from data in the proper format.
+            @param ifree true if the BSONObj should free() the msgdata when
+            it destructs.
+        */
         explicit BSONObj(const char *msgdata, bool ifree = false) {
             init(msgdata, ifree);
         }
-        BSONObj(const Record *r);
+
+        explicit BSONObj(const Record *r);
+
         /** Construct an empty BSONObj -- that is, {}. */
         BSONObj();
-        // defensive
-        ~BSONObj() { _objdata = 0; }
 
-        void appendSelfToBufBuilder(BufBuilder& b) const {
-            assert( objsize() );
-            b.appendBuf(reinterpret_cast<const void *>( objdata() ), objsize());
-        }
+        ~BSONObj() { /*defensive:*/ _objdata = 0; }
+
+        /**
+           A BSONObj can use a buffer it "owns" or one it does not.
+
+           OWNED CASE
+           If the BSONObj owns the buffer, the buffer can be shared among several BSONObj's (by assignment).
+           In this case the buffer is basically implemented as a shared_ptr.
+           Since BSONObj's are typically immutable, this works well.
+
+           UNOWNED CASE
+           A BSONObj can also point to BSON data in some other data structure it does not "own" or free later.
+           For example, in a memory mapped file.  In this case, it is important the original data stays in
+           scope for as long as the BSONObj is in use.  If you think the original data may go out of scope,
+           call BSONObj::getOwned() to promote your BSONObj to having its own copy.
+
+           On a BSONObj assignment, if the source is unowned, both the source and dest will have unowned
+           pointers to the original buffer after the assignment.
 
-        /** Readable representation of a BSON object in an extended JSON-style notation. 
+           If you are not sure about ownership but need the buffer to last as long as the BSONObj, call
+           getOwned().  getOwned() is a no-op if the buffer is already owned.  If not already owned, a malloc
+           and memcpy will result.
+
+           Most ways to create BSONObj's create 'owned' variants.  Unowned versions can be created with:
+           (1) specifying true for the ifree parameter in the constructor
+           (2) calling BSONObjBuilder::done().  Use BSONObjBuilder::obj() to get an owned copy
+           (3) retrieving a subobject retrieves an unowned pointer into the parent BSON object
+
+           @return true if this is in owned mode
+        */
+        bool isOwned() const { return _holder.get() != 0; }
+
+        /* make sure the data buffer is under the control of this BSONObj and not a remote buffer */
+        BSONObj getOwned() const;
+
+        /** @return a new full (and owned) copy of the object. */
+        BSONObj copy() const;
+
+        /** Readable representation of a BSON object in an extended JSON-style notation.
             This is an abbreviated representation which might be used for logging.
         */
         string toString( bool isArray = false, bool full=false ) const;
         void toString(StringBuilder& s, bool isArray = false, bool full=false ) const;
-        
-        /** Properly formatted JSON string. 
+
+        /** Properly formatted JSON string.
             @param pretty if true we try to add some lf's and indentation
         */
         string jsonString( JsonStringFormat format = Strict, int pretty = 0 ) const;
@@ -126,38 +160,36 @@ namespace mongo {
             names with respect to the returned element. */
         BSONElement getFieldDottedOrArray(const char *&name) const;
 
-        /** Get the field of the specified name. eoo() is true on the returned 
-            element if not found. 
+        /** Get the field of the specified name. eoo() is true on the returned
+            element if not found.
         */
         BSONElement getField(const StringData& name) const;
 
-        /** Get the field of the specified name. eoo() is true on the returned 
-            element if not found. 
+        /** Get the field of the specified name. eoo() is true on the returned
+            element if not found.
         */
-        BSONElement operator[] (const char *field) const { 
+        BSONElement operator[] (const char *field) const {
             return getField(field);
         }
 
-        BSONElement operator[] (const string& field) const { 
+        BSONElement operator[] (const string& field) const {
             return getField(field);
         }
 
-        BSONElement operator[] (int field) const { 
+        BSONElement operator[] (int field) const {
             StringBuilder ss;
             ss << field;
             string s = ss.str();
             return getField(s.c_str());
         }
 
-		/** @return true if field exists */
-        bool hasField( const char * name )const {
-            return ! getField( name ).eoo();
-        }
+        /** @return true if field exists */
+        bool hasField( const char * name ) const { return ! getField( name ).eoo(); }
 
         /** @return "" if DNE or wrong type */
         const char * getStringField(const char *name) const;
 
-		/** @return subobject of the given name */
+        /** @return subobject of the given name */
         BSONObj getObjectField(const char *name) const;
 
         /** @return INT_MIN if not present - does some type conversions */
@@ -172,26 +204,24 @@ namespace mongo {
            object.
         */
         BSONObj extractFieldsUnDotted(BSONObj pattern) const;
-        
+
         /** extract items from object which match a pattern object.
-			e.g., if pattern is { x : 1, y : 1 }, builds an object with 
-			x and y elements of this object, if they are present.
+            e.g., if pattern is { x : 1, y : 1 }, builds an object with
+            x and y elements of this object, if they are present.
            returns elements with original field names
         */
         BSONObj extractFields(const BSONObj &pattern , bool fillWithNull=false) const;
-        
+
         BSONObj filterFieldsUndotted(const BSONObj &filter, bool inFilter) const;
 
         BSONElement getFieldUsingIndexNames(const char *fieldName, const BSONObj &indexKey) const;
-        
+
         /** @return the raw data of the object */
         const char *objdata() const {
             return _objdata;
         }
         /** @return total size of the BSON object in bytes */
-        int objsize() const {
-            return *(reinterpret_cast<const int*>(objdata()));
-        }
+        int objsize() const { return *(reinterpret_cast<const int*>(objdata())); }
 
         /** performs a cursory check on the object's size only. */
         bool isValid();
@@ -201,32 +231,30 @@ namespace mongo {
          */
         bool okForStorage() const;
 
-		/** @return true if object is empty -- i.e.,  {} */
-        bool isEmpty() const {
-            return objsize() <= 5;
-        }
+        /** @return true if object is empty -- i.e.,  {} */
+        bool isEmpty() const { return objsize() <= 5; }
 
         void dump() const;
 
         /** Alternative output format */
         string hexDump() const;
-        
+
         /**wo='well ordered'.  fields must be in same order in each object.
-           Ordering is with respect to the signs of the elements 
+           Ordering is with respect to the signs of the elements
            and allows ascending / descending key mixing.
-		   @return  <0 if l<r. 0 if l==r. >0 if l>r
+           @return  <0 if l<r. 0 if l==r. >0 if l>r
         */
         int woCompare(const BSONObj& r, const Ordering &o,
                       bool considerFieldName=true) const;
 
         /**wo='well ordered'.  fields must be in same order in each object.
-           Ordering is with respect to the signs of the elements 
+           Ordering is with respect to the signs of the elements
            and allows ascending / descending key mixing.
-		   @return  <0 if l<r. 0 if l==r. >0 if l>r
+           @return  <0 if l<r. 0 if l==r. >0 if l>r
         */
         int woCompare(const BSONObj& r, const BSONObj &ordering = BSONObj(),
                       bool considerFieldName=true) const;
-        
+
 
         bool operator<( const BSONObj& other ) const { return woCompare( other ) < 0; }
         bool operator<=( const BSONObj& other ) const { return woCompare( other ) <= 0; }
@@ -249,31 +277,18 @@ namespace mongo {
             return false;
         }
 
-		/** @return first field of the object */
-        BSONElement firstElement() const {
-            return BSONElement(objdata() + 4);
-        }
+        /** @return first field of the object */
+        BSONElement firstElement() const { return BSONElement(objdata() + 4); }
 
-		/** @return true if field exists in the object */
+        /** @return true if field exists in the object */
         bool hasElement(const char *name) const;
 
-		/** Get the _id field from the object.  For good performance drivers should 
-            assure that _id is the first element of the object; however, correct operation 
+        /** Get the _id field from the object.  For good performance drivers should
+            assure that _id is the first element of the object; however, correct operation
             is assured regardless.
             @return true if found
-		*/
-		bool getObjectID(BSONElement& e) const;
-
-        /** makes a copy of the object. */
-        BSONObj copy() const;
-
-        /* make sure the data buffer is under the control of this BSONObj and not a remote buffer */
-        BSONObj getOwned() const{
-            if ( !isOwned() )
-                return copy();
-            return *this;
-        }
-        bool isOwned() const { return _holder.get() != 0; }
+        */
+        bool getObjectID(BSONElement& e) const;
 
         /** @return A hash code for the object */
         int hash() const {
@@ -289,18 +304,18 @@ namespace mongo {
         // string identifier equivalents.
         // TODO Support conversion of element types other than min and max.
         BSONObj clientReadable() const;
-        
+
         /** Return new object with the field names replaced by those in the
             passed object. */
         BSONObj replaceFieldNames( const BSONObj &obj ) const;
-        
+
         /** true unless corrupt */
         bool valid() const;
-        
+
         /** @return an md5 value for this object. */
         string md5() const;
-        
-        bool operator==( const BSONObj& other ) const{
+
+        bool operator==( const BSONObj& other ) const {
             return woCompare( other ) == 0;
         }
 
@@ -324,14 +339,21 @@ namespace mongo {
             opNEAR = 0x13,
             opWITHIN = 0x14,
             opMAX_DISTANCE=0x15
-        };               
+        };
 
         /** add all elements of the object to the specified vector */
         void elems(vector<BSONElement> &) const;
         /** add all elements of the object to the specified list */
         void elems(list<BSONElement> &) const;
 
-        /** add all values of the object to the specified vector.  If type mismatches, exception. */
+        /** add all values of the object to the specified vector.  If type mismatches, exception.
+            this is most useful when the BSONObj is an array, but can be used with non-arrays too in theory.
+
+            example:
+              bo sub = y["subobj"].Obj();
+              vector<int> myints;
+              sub.Vals(myints);
+        */
         template <class T>
         void Vals(vector<T> &) const;
         /** add all values of the object to the specified list.  If type mismatches, exception. */
@@ -347,13 +369,25 @@ namespace mongo {
 
         friend class BSONObjIterator;
         typedef BSONObjIterator iterator;
+
+        /** use something like this:
+            for( BSONObj::iterator i = myObj.begin(); i.more(); ) {
+                BSONElement e = i.next();
+                ...
+            }
+        */
         BSONObjIterator begin();
 
-private:
+        void appendSelfToBufBuilder(BufBuilder& b) const {
+            assert( objsize() );
+            b.appendBuf(reinterpret_cast<const void *>( objdata() ), objsize());
+        }
+
+    private:
         class Holder {
         public:
             Holder( const char *objdata ) :
-            _objdata( objdata ) {
+                _objdata( objdata ) {
             }
             ~Holder() {
                 free((void *)_objdata);
@@ -362,29 +396,27 @@ private:
         private:
             const char *_objdata;
         };
+
         const char *_objdata;
         boost::shared_ptr< Holder > _holder;
+
+        void _assertInvalid() const;
         void init(const char *data, bool ifree) {
             if ( ifree )
                 _holder.reset( new Holder( data ) );
             _objdata = data;
-            if ( ! isValid() ){
-                StringBuilder ss;
-                int os = objsize();
-                ss << "Invalid BSONObj spec size: " << os << " (" << toHex( &os, 4 ) << ")";
-                try {
-                    BSONElement e = firstElement();
-                    ss << " first element:" << e.toString() << " ";
-                }
-                catch ( ... ){}
-                string s = ss.str();
-                massert( 10334 , s , 0 );
-            }
+            if ( !isValid() )
+                _assertInvalid();
         }
     };
+
     ostream& operator<<( ostream &s, const BSONObj &o );
     ostream& operator<<( ostream &s, const BSONElement &e );
 
+    StringBuilder& operator<<( StringBuilder &s, const BSONObj &o );
+    StringBuilder& operator<<( StringBuilder &s, const BSONElement &e );
+
+
     struct BSONArray : BSONObj {
         // Don't add anything other than forwarding constructors!!!
         BSONArray(): BSONObj() {}
diff --git a/bson/bsonobjbuilder.h b/bson/bsonobjbuilder.h
index fdfe4de..a39b529 100644
--- a/bson/bsonobjbuilder.h
+++ b/bson/bsonobjbuilder.h
@@ -36,7 +36,7 @@ namespace mongo {
     template<typename T>
     class BSONFieldValue {
     public:
-        BSONFieldValue( const string& name , const T& t ){
+        BSONFieldValue( const string& name , const T& t ) {
             _name = name;
             _t = t;
         }
@@ -52,8 +52,8 @@ namespace mongo {
     template<typename T>
     class BSONField {
     public:
-        BSONField( const string& name , const string& longName="" ) 
-            : _name(name), _longName(longName){}
+        BSONField( const string& name , const string& longName="" )
+            : _name(name), _longName(longName) {}
         const string& name() const { return _name; }
         operator string() const { return _name; }
 
@@ -65,11 +65,11 @@ namespace mongo {
         BSONFieldValue<BSONObj> lt( const T& t ) const { return query( "$lt" , t ); }
 
         BSONFieldValue<BSONObj> query( const char * q , const T& t ) const;
-        
+
         BSONFieldValue<T> operator()( const T& t ) const {
             return BSONFieldValue<T>( _name , t );
         }
-        
+
     private:
         string _name;
         string _longName;
@@ -85,17 +85,18 @@ namespace mongo {
             _b.skip(4); /*leave room for size field*/
         }
 
+        /* dm why do we have this/need this? not clear to me, comment please tx. */
         /** @param baseBuilder construct a BSONObjBuilder using an existing BufBuilder */
         BSONObjBuilder( BufBuilder &baseBuilder ) : _b( baseBuilder ), _buf( 0 ), _offset( baseBuilder.len() ), _s( this ) , _tracker(0) , _doneCalled(false) {
             _b.skip( 4 );
         }
-        
+
         BSONObjBuilder( const BSONSizeTracker & tracker ) : _b(_buf) , _buf(tracker.getSize() ), _offset(0), _s( this ) , _tracker( (BSONSizeTracker*)(&tracker) ) , _doneCalled(false) {
             _b.skip( 4 );
         }
 
-        ~BSONObjBuilder(){
-            if ( !_doneCalled && _b.buf() && _buf.getSize() == 0 ){
+        ~BSONObjBuilder() {
+            if ( !_doneCalled && _b.buf() && _buf.getSize() == 0 ) {
                 _done();
             }
         }
@@ -103,6 +104,9 @@ namespace mongo {
         /** add all the fields from the object specified to this object */
         BSONObjBuilder& appendElements(BSONObj x);
 
+        /** add all the fields from the object specified to this object if they don't exist already */
+        BSONObjBuilder& appendElementsUnique( BSONObj x );
+
         /** append element to the object we are building */
         BSONObjBuilder& append( const BSONElement& e) {
             assert( !e.eoo() ); // do not append eoo, that would corrupt us. the builder auto appends when done() is called.
@@ -111,7 +115,7 @@ namespace mongo {
         }
 
         /** append an element but with a new name */
-        BSONObjBuilder&  appendAs(const BSONElement& e, const StringData& fieldName) {
+        BSONObjBuilder& appendAs(const BSONElement& e, const StringData& fieldName) {
             assert( !e.eoo() ); // do not append eoo, that would corrupt us. the builder auto appends when done() is called.
             _b.appendNum((char) e.type());
             _b.appendStr(fieldName);
@@ -128,14 +132,14 @@ namespace mongo {
         }
 
         /** add a subobject as a member */
-        BSONObjBuilder& appendObject(const StringData& fieldName, const char * objdata , int size = 0 ){
+        BSONObjBuilder& appendObject(const StringData& fieldName, const char * objdata , int size = 0 ) {
             assert( objdata );
-            if ( size == 0 ){
+            if ( size == 0 ) {
                 size = *((int*)objdata);
             }
-            
+
             assert( size > 4 && size < 100000000 );
-            
+
             _b.appendNum((char) Object);
             _b.appendStr(fieldName);
             _b.appendBuf((void*)objdata, size );
@@ -150,7 +154,7 @@ namespace mongo {
             _b.appendStr(fieldName);
             return _b;
         }
-        
+
         /** add a subobject as a member with type Array.  Thus arr object should have "0", "1", ...
             style fields in it.
         */
@@ -160,9 +164,9 @@ namespace mongo {
             _b.appendBuf((void *) subObj.objdata(), subObj.objsize());
             return *this;
         }
-        BSONObjBuilder& append(const StringData& fieldName, BSONArray arr) { 
-            return appendArray(fieldName, arr); 
-        }    
+        BSONObjBuilder& append(const StringData& fieldName, BSONArray arr) {
+            return appendArray(fieldName, arr);
+        }
 
         /** add header for a new subarray and return bufbuilder for writing to
             the subarray's body */
@@ -171,7 +175,7 @@ namespace mongo {
             _b.appendStr(fieldName);
             return _b;
         }
-        
+
         /** Append a boolean element */
         BSONObjBuilder& appendBool(const StringData& fieldName, int val) {
             _b.appendNum((char) Bool);
@@ -184,10 +188,10 @@ namespace mongo {
         BSONObjBuilder& append(const StringData& fieldName, bool val) {
             _b.appendNum((char) Bool);
             _b.appendStr(fieldName);
-            _b.appendNum((char) (val?1:0));            
+            _b.appendNum((char) (val?1:0));
             return *this;
         }
-        
+
         /** Append a 32 bit integer element */
         BSONObjBuilder& append(const StringData& fieldName, int n) {
             _b.appendNum((char) NumberInt);
@@ -197,20 +201,20 @@ namespace mongo {
         }
 
         /** Append a 32 bit unsigned element - cast to a signed int. */
-        BSONObjBuilder& append(const StringData& fieldName, unsigned n) { 
-            return append(fieldName, (int) n); 
+        BSONObjBuilder& append(const StringData& fieldName, unsigned n) {
+            return append(fieldName, (int) n);
         }
 
         /** Append a NumberLong */
-        BSONObjBuilder& append(const StringData& fieldName, long long n) { 
+        BSONObjBuilder& append(const StringData& fieldName, long long n) {
             _b.appendNum((char) NumberLong);
             _b.appendStr(fieldName);
             _b.appendNum(n);
-            return *this; 
+            return *this;
         }
 
         /** appends a number.  if n < max(int)/2 then uses int, otherwise long long */
-        BSONObjBuilder& appendIntOrLL( const StringData& fieldName , long long n ){
+        BSONObjBuilder& appendIntOrLL( const StringData& fieldName , long long n ) {
             long long x = n;
             if ( x < 0 )
                 x = x * -1;
@@ -225,15 +229,26 @@ namespace mongo {
          * appendNumber is a series of method for appending the smallest sensible type
          * mostly for JS
          */
-        BSONObjBuilder& appendNumber( const StringData& fieldName , int n ){
+        BSONObjBuilder& appendNumber( const StringData& fieldName , int n ) {
             return append( fieldName , n );
         }
 
-        BSONObjBuilder& appendNumber( const StringData& fieldName , double d ){
+        BSONObjBuilder& appendNumber( const StringData& fieldName , double d ) {
             return append( fieldName , d );
         }
 
-        BSONObjBuilder& appendNumber( const StringData& fieldName , long long l ){
+        BSONObjBuilder& appendNumber( const StringData& fieldName , size_t n ) {
+            static size_t maxInt = (size_t)pow( 2.0 , 30.0 );
+
+            if ( n < maxInt )
+                append( fieldName , (int)n );
+            else
+                append( fieldName , (long long)n );
+            return *this;
+        }
+
+
+        BSONObjBuilder& appendNumber( const StringData& fieldName , long long l ) {
             static long long maxInt = (int)pow( 2.0 , 30.0 );
             static long long maxDouble = (long long)pow( 2.0 , 40.0 );
 
@@ -245,7 +260,7 @@ namespace mongo {
                 append( fieldName , l );
             return *this;
         }
-        
+
         /** Append a double element */
         BSONObjBuilder& append(const StringData& fieldName, double n) {
             _b.appendNum((char) NumberDouble);
@@ -259,8 +274,8 @@ namespace mongo {
          */
         bool appendAsNumber( const StringData& fieldName , const string& data );
 
-        /** Append a BSON Object ID (OID type). 
-            @deprecated Generally, it is preferred to use the append append(name, oid) 
+        /** Append a BSON Object ID (OID type).
+            @deprecated Generally, it is preferred to use the append append(name, oid)
             method for this.
         */
         BSONObjBuilder& appendOID(const StringData& fieldName, OID *oid = 0 , bool generateIfBlank = false ) {
@@ -279,8 +294,8 @@ namespace mongo {
             return *this;
         }
 
-        /** 
-        Append a BSON Object ID. 
+        /**
+        Append a BSON Object ID.
         @param fieldName Field name, e.g., "_id".
         @returns the builder object
         */
@@ -309,14 +324,14 @@ namespace mongo {
             _b.appendNum(static_cast<unsigned long long>(dt) * 1000);
             return *this;
         }
-        /** Append a date.  
-            @param dt a Java-style 64 bit date value, that is 
+        /** Append a date.
+            @param dt a Java-style 64 bit date value, that is
             the number of milliseconds since January 1, 1970, 00:00:00 GMT
         */
         BSONObjBuilder& appendDate(const StringData& fieldName, Date_t dt) {
             /* easy to pass a time_t to this and get a bad result.  thus this warning. */
 #if defined(_DEBUG) && defined(MONGO_EXPOSE_MACROS)
-            if( dt > 0 && dt <= 0xffffffff ) { 
+            if( dt > 0 && dt <= 0xffffffff ) {
                 static int n;
                 if( n++ == 0 )
                     log() << "DEV WARNING appendDate() called with a tiny (but nonzero) date" << endl;
@@ -335,27 +350,22 @@ namespace mongo {
             @param regex the regular expression pattern
             @param regex options such as "i" or "g"
         */
-        BSONObjBuilder& appendRegex(const StringData& fieldName, const char *regex, const char *options = "") {
+        BSONObjBuilder& appendRegex(const StringData& fieldName, const StringData& regex, const StringData& options = "") {
             _b.appendNum((char) RegEx);
             _b.appendStr(fieldName);
             _b.appendStr(regex);
             _b.appendStr(options);
             return *this;
         }
-        /** Append a regular expression value
-            @param regex the regular expression pattern
-            @param regex options such as "i" or "g"
-        */
-        BSONObjBuilder& appendRegex(const StringData& fieldName, string regex, string options = "") {
-            return appendRegex(fieldName, regex.c_str(), options.c_str());
-        }
-        BSONObjBuilder& appendCode(const StringData& fieldName, const char *code) {
+
+        BSONObjBuilder& appendCode(const StringData& fieldName, const StringData& code) {
             _b.appendNum((char) Code);
             _b.appendStr(fieldName);
-            _b.appendNum((int) strlen(code)+1);
+            _b.appendNum((int) code.size()+1);
             _b.appendStr(code);
             return *this;
         }
+
         /** Append a string element. len DOES include terminating nul */
         BSONObjBuilder& append(const StringData& fieldName, const char *str, int len) {
             _b.appendNum((char) String);
@@ -369,48 +379,51 @@ namespace mongo {
             return append(fieldName, str, (int) strlen(str)+1);
         }
         /** Append a string element */
-        BSONObjBuilder& append(const StringData& fieldName, string str) {
+        BSONObjBuilder& append(const StringData& fieldName, const string& str) {
             return append(fieldName, str.c_str(), (int) str.size()+1);
         }
-        BSONObjBuilder& appendSymbol(const StringData& fieldName, const char *symbol) {
+
+        BSONObjBuilder& appendSymbol(const StringData& fieldName, const StringData& symbol) {
             _b.appendNum((char) Symbol);
             _b.appendStr(fieldName);
-            _b.appendNum((int) strlen(symbol)+1);
+            _b.appendNum((int) symbol.size()+1);
             _b.appendStr(symbol);
-        return *this; }
+            return *this;
+        }
 
         /** Append a Null element to the object */
         BSONObjBuilder& appendNull( const StringData& fieldName ) {
             _b.appendNum( (char) jstNULL );
             _b.appendStr( fieldName );
-        return *this; }
+            return *this;
+        }
 
         // Append an element that is less than all other keys.
         BSONObjBuilder& appendMinKey( const StringData& fieldName ) {
             _b.appendNum( (char) MinKey );
             _b.appendStr( fieldName );
-            return *this; 
+            return *this;
         }
         // Append an element that is greater than all other keys.
         BSONObjBuilder& appendMaxKey( const StringData& fieldName ) {
             _b.appendNum( (char) MaxKey );
             _b.appendStr( fieldName );
-            return *this; 
+            return *this;
         }
-        
+
         // Append a Timestamp field -- will be updated to next OpTime on db insert.
         BSONObjBuilder& appendTimestamp( const StringData& fieldName ) {
             _b.appendNum( (char) Timestamp );
             _b.appendStr( fieldName );
             _b.appendNum( (unsigned long long) 0 );
-            return *this; 
+            return *this;
         }
 
         BSONObjBuilder& appendTimestamp( const StringData& fieldName , unsigned long long val ) {
             _b.appendNum( (char) Timestamp );
             _b.appendStr( fieldName );
             _b.appendNum( val );
-            return *this; 
+            return *this;
         }
 
         /**
@@ -419,24 +432,24 @@ namespace mongo {
         @param time - in millis (but stored in seconds)
         */
         BSONObjBuilder& appendTimestamp( const StringData& fieldName , unsigned long long time , unsigned int inc );
-        
+
         /*
         Append an element of the deprecated DBRef type.
-        @deprecated 
+        @deprecated
         */
-        BSONObjBuilder& appendDBRef( const StringData& fieldName, const char *ns, const OID &oid ) {
+        BSONObjBuilder& appendDBRef( const StringData& fieldName, const StringData& ns, const OID &oid ) {
             _b.appendNum( (char) DBRef );
             _b.appendStr( fieldName );
-            _b.appendNum( (int) strlen( ns ) + 1 );
+            _b.appendNum( (int) ns.size() + 1 );
             _b.appendStr( ns );
             _b.appendBuf( (void *) &oid, 12 );
-            return *this; 
+            return *this;
         }
 
-        /** Append a binary data element 
+        /** Append a binary data element
             @param fieldName name of the field
             @param len length of the binary data in bytes
-            @param subtype subtype information for the data. @see enum BinDataType in bsontypes.h.  
+            @param subtype subtype information for the data. @see enum BinDataType in bsontypes.h.
                    Use BinDataGeneral if you don't care about the type.
             @param data the byte array
         */
@@ -446,36 +459,36 @@ namespace mongo {
             _b.appendNum( len );
             _b.appendNum( (char) type );
             _b.appendBuf( (void *) data, len );
-            return *this; 
+            return *this;
         }
         BSONObjBuilder& appendBinData( const StringData& fieldName, int len, BinDataType type, const unsigned char *data ) {
             return appendBinData(fieldName, len, type, (const char *) data);
         }
-        
+
         /**
         Subtype 2 is deprecated.
         Append a BSON bindata bytearray element.
         @param data a byte array
         @param len the length of data
         */
-        BSONObjBuilder& appendBinDataArrayDeprecated( const char * fieldName , const char * data , int len ){
+        BSONObjBuilder& appendBinDataArrayDeprecated( const char * fieldName , const char * data , int len ) {
             _b.appendNum( (char) BinData );
             _b.appendStr( fieldName );
             _b.appendNum( len + 4 );
             _b.appendNum( (char)0x2 );
             _b.appendNum( len );
-            _b.appendBuf( (void *) data, len );            
-            return *this; 
+            _b.appendBuf( (void *) data, len );
+            return *this;
         }
 
-        /** Append to the BSON object a field of type CodeWScope.  This is a javascript code 
+        /** Append to the BSON object a field of type CodeWScope.  This is a javascript code
             fragment accompanied by some scope that goes with it.
         */
-        BSONObjBuilder& appendCodeWScope( const StringData& fieldName, const char *code, const BSONObj &scope ) {
+        BSONObjBuilder& appendCodeWScope( const StringData& fieldName, const StringData& code, const BSONObj &scope ) {
             _b.appendNum( (char) CodeWScope );
             _b.appendStr( fieldName );
-            _b.appendNum( ( int )( 4 + 4 + strlen( code ) + 1 + scope.objsize() ) );
-            _b.appendNum( ( int ) strlen( code ) + 1 );
+            _b.appendNum( ( int )( 4 + 4 + code.size() + 1 + scope.objsize() ) );
+            _b.appendNum( ( int ) code.size() + 1 );
             _b.appendStr( code );
             _b.appendBuf( ( void * )scope.objdata(), scope.objsize() );
             return *this;
@@ -485,15 +498,12 @@ namespace mongo {
             _b.appendNum( (char) Undefined );
             _b.appendStr( fieldName );
         }
-        
+
         /* helper function -- see Query::where() for primary way to do this. */
-        void appendWhere( const char *code, const BSONObj &scope ){
+        void appendWhere( const StringData& code, const BSONObj &scope ) {
             appendCodeWScope( "$where" , code , scope );
         }
-        void appendWhere( const string &code, const BSONObj &scope ){
-            appendWhere( code.c_str(), scope );
-        }
-        
+
         /**
            these are the min/max when comparing, not strict min/max elements for a given type
         */
@@ -507,7 +517,11 @@ namespace mongo {
         template < class T >
         BSONObjBuilder& append( const StringData& fieldName, const list< T >& vals );
 
-        /** The returned BSONObj will free the buffer when it is finished. */
+        /**
+         * destructive
+         * The returned BSONObj will free the buffer when it is finished.
+         * @return owned BSONObj
+        */
         BSONObj obj() {
             bool own = owned();
             massert( 10335 , "builder does not own memory", own );
@@ -516,12 +530,12 @@ namespace mongo {
         }
 
         /** Fetch the object we have built.
-			BSONObjBuilder still frees the object when the builder goes out of 
-			scope -- very important to keep in mind.  Use obj() if you 
-			would like the BSONObj to last longer than the builder.
+            BSONObjBuilder still frees the object when the builder goes out of
+            scope -- very important to keep in mind.  Use obj() if you
+            would like the BSONObj to last longer than the builder.
         */
         BSONObj done() {
-            return BSONObj(_done());
+            return BSONObj(_done(), /*ifree*/false);
         }
 
         // Like 'done' above, but does not construct a BSONObj to return to the caller.
@@ -591,25 +605,29 @@ namespace mongo {
         BSONObjBuilderValueStream& operator<<( const BSONField<T>& f ) {
             _s.endField( f.name().c_str() );
             return _s;
-        } 
+        }
 
         template<typename T>
         BSONObjBuilder& operator<<( const BSONFieldValue<T>& v ) {
             append( v.name().c_str() , v.value() );
             return *this;
-        } 
-        
+        }
+
 
         /** @return true if we are using our own bufbuilder, and not an alternate that was given to us in our constructor */
         bool owned() const { return &_b == &_buf; }
 
         BSONObjIterator iterator() const ;
-        
+
+        bool hasField( const StringData& name ) const ;
+
+        int len() const { return _b.len(); }
+
     private:
         char* _done() {
             if ( _doneCalled )
                 return _b.buf() + _offset;
-            
+
             _doneCalled = true;
             _s.endField();
             _b.appendNum((char) EOO);
@@ -635,82 +653,89 @@ namespace mongo {
     public:
         BSONArrayBuilder() : _i(0), _b() {}
         BSONArrayBuilder( BufBuilder &_b ) : _i(0), _b(_b) {}
+        BSONArrayBuilder( int initialSize ) : _i(0), _b(initialSize) {}
 
         template <typename T>
-        BSONArrayBuilder& append(const T& x){
-            _b.append(num().c_str(), x);
+        BSONArrayBuilder& append(const T& x) {
+            _b.append(num(), x);
             return *this;
         }
 
-        BSONArrayBuilder& append(const BSONElement& e){
+        BSONArrayBuilder& append(const BSONElement& e) {
             _b.appendAs(e, num());
             return *this;
         }
-        
+
         template <typename T>
-        BSONArrayBuilder& operator<<(const T& x){
+        BSONArrayBuilder& operator<<(const T& x) {
             return append(x);
         }
-        
+
         void appendNull() {
-            _b.appendNull(num().c_str());
+            _b.appendNull(num());
         }
 
-        BSONArray arr(){ return BSONArray(_b.obj()); }
-        
+        /**
+         * destructive - ownership moves to returned BSONArray
+         * @return owned BSONArray
+         */
+        BSONArray arr() { return BSONArray(_b.obj()); }
+
         BSONObj done() { return _b.done(); }
-        
+
         void doneFast() { _b.doneFast(); }
-        
+
         template <typename T>
-        BSONArrayBuilder& append(const StringData& name, const T& x){
+        BSONArrayBuilder& append(const StringData& name, const T& x) {
             fill( name );
             append( x );
             return *this;
         }
-        
-        BufBuilder &subobjStart( const char *name = "0" ) {
+
+        BufBuilder &subobjStart( const StringData& name = "0" ) {
             fill( name );
-            return _b.subobjStart( num().c_str() );
+            return _b.subobjStart( num() );
         }
 
         BufBuilder &subarrayStart( const char *name ) {
             fill( name );
-            return _b.subarrayStart( num().c_str() );
+            return _b.subarrayStart( num() );
         }
-        
+
         void appendArray( const StringData& name, BSONObj subObj ) {
             fill( name );
-            _b.appendArray( num().c_str(), subObj );
+            _b.appendArray( num(), subObj );
         }
-        
-        void appendAs( const BSONElement &e, const char *name ) {
+
+        void appendAs( const BSONElement &e, const char *name) {
             fill( name );
             append( e );
         }
-        
+
+        int len() const { return _b.len(); }
+
     private:
         void fill( const StringData& name ) {
             char *r;
-            int n = strtol( name.data(), &r, 10 );
+            long int n = strtol( name.data(), &r, 10 );
             if ( *r )
                 uasserted( 13048, (string)"can't append to array using string field name [" + name.data() + "]" );
             while( _i < n )
                 append( nullElt() );
         }
-        
+
         static BSONElement nullElt() {
             static BSONObj n = nullObj();
             return n.firstElement();
         }
-        
+
         static BSONObj nullObj() {
             BSONObjBuilder _b;
             _b.appendNull( "" );
             return _b.obj();
         }
-        
-        string num(){ return _b.numStr(_i++); }
+
+        string num() { return _b.numStr(_i++); }
         int _i;
         BSONObjBuilder _b;
     };
@@ -736,14 +761,14 @@ namespace mongo {
 
     // $or helper: OR(BSON("x" << GT << 7), BSON("y" << LT 6));
     inline BSONObj OR(const BSONObj& a, const BSONObj& b)
-        { return BSON( "$or" << BSON_ARRAY(a << b) ); }
+    { return BSON( "$or" << BSON_ARRAY(a << b) ); }
     inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c)
-        { return BSON( "$or" << BSON_ARRAY(a << b << c) ); }
+    { return BSON( "$or" << BSON_ARRAY(a << b << c) ); }
     inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d)
-        { return BSON( "$or" << BSON_ARRAY(a << b << c << d) ); }
+    { return BSON( "$or" << BSON_ARRAY(a << b << c << d) ); }
     inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d, const BSONObj& e)
-        { return BSON( "$or" << BSON_ARRAY(a << b << c << d << e) ); }
+    { return BSON( "$or" << BSON_ARRAY(a << b << c << d << e) ); }
     inline BSONObj OR(const BSONObj& a, const BSONObj& b, const BSONObj& c, const BSONObj& d, const BSONObj& e, const BSONObj& f)
-        { return BSON( "$or" << BSON_ARRAY(a << b << c << d << e << f) ); }
-    
+    { return BSON( "$or" << BSON_ARRAY(a << b << c << d << e << f) ); }
+
 }
diff --git a/bson/bsonobjiterator.h b/bson/bsonobjiterator.h
index c8224d2..6e6a69e 100644
--- a/bson/bsonobjiterator.h
+++ b/bson/bsonobjiterator.h
@@ -20,6 +20,7 @@
 #include <boost/preprocessor/cat.hpp> // like the ## operator but works with __LINE__
 
 namespace mongo {
+
     /** iterator for a BSONObj
 
        Note each BSONObj ends with an EOO element: so you will get more() on an empty
@@ -30,7 +31,7 @@ namespace mongo {
     */
     class BSONObjIterator {
     public:
-        /** Create an iterator for a BSON object. 
+        /** Create an iterator for a BSON object.
         */
         BSONObjIterator(const BSONObj& jso) {
             int sz = jso.objsize();
@@ -42,18 +43,17 @@ namespace mongo {
             _theend = jso.objdata() + sz;
         }
 
-        BSONObjIterator( const char * start , const char * end ){
+        BSONObjIterator( const char * start , const char * end ) {
             _pos = start + 4;
             _theend = end;
         }
-        
+
         /** @return true if more elements exist to be enumerated. */
-        bool moreWithEOO() {
-            return _pos < _theend;
-        }
-        bool more(){
-            return _pos < _theend && _pos[0];
-        }
+        bool more() { return _pos < _theend && _pos[0]; }
+
+        /** @return true if more elements exist to be enumerated INCLUDING the EOO element which is always at the end. */
+        bool moreWithEOO() { return _pos < _theend; }
+
         /** @return the next element in the object. For the final element, element.eoo() will be true. */
         BSONElement next( bool checkEnd = false ) {
             assert( _pos < _theend );
@@ -78,18 +78,18 @@ namespace mongo {
     class BSONObjIteratorSorted {
     public:
         BSONObjIteratorSorted( const BSONObj& o );
-        
-        ~BSONObjIteratorSorted(){
+
+        ~BSONObjIteratorSorted() {
             assert( _fields );
             delete[] _fields;
             _fields = 0;
         }
 
-        bool more(){
+        bool more() {
             return _cur < _nfields;
         }
-        
-        BSONElement next(){
+
+        BSONElement next() {
             assert( _fields );
             if ( _cur < _nfields )
                 return BSONElement( _fields[_cur++] );
@@ -102,30 +102,30 @@ namespace mongo {
         int _cur;
     };
 
-/** Similar to BOOST_FOREACH
- *
- *  because the iterator is defined outside of the for, you must use {} around
- *  the surrounding scope. Don't do this:
- *
- *  if (foo)
- *      BSONForEach(e, obj)
- *          doSomething(e);
- *
- *  but this is OK:
- *
- *  if (foo) {
- *      BSONForEach(e, obj)
- *          doSomething(e);
- *  }
- *
- */
+    /** Similar to BOOST_FOREACH
+     *
+     *  because the iterator is defined outside of the for, you must use {} around
+     *  the surrounding scope. Don't do this:
+     *
+     *  if (foo)
+     *      BSONForEach(e, obj)
+     *          doSomething(e);
+     *
+     *  but this is OK:
+     *
+     *  if (foo) {
+     *      BSONForEach(e, obj)
+     *          doSomething(e);
+     *  }
+     *
+     */
 
 #define BSONForEach(e, obj)                                     \
     BSONObjIterator BOOST_PP_CAT(it_,__LINE__)(obj);            \
     for ( BSONElement e;                                        \
-          (BOOST_PP_CAT(it_,__LINE__).more() ?                  \
-               (e = BOOST_PP_CAT(it_,__LINE__).next(), true) :  \
-               false) ;                                         \
-          /*nothing*/ )
+            (BOOST_PP_CAT(it_,__LINE__).more() ?                  \
+             (e = BOOST_PP_CAT(it_,__LINE__).next(), true) :  \
+             false) ;                                         \
+            /*nothing*/ )
 
 }
diff --git a/bson/bsontypes.h b/bson/bsontypes.h
index 27f2aaf..9d95e8e 100644
--- a/bson/bsontypes.h
+++ b/bson/bsontypes.h
@@ -39,69 +39,69 @@ namespace mongo {
     extern BSONObj maxKey;
     extern BSONObj minKey;
 
-/** 
-    the complete list of valid BSON types
-    see also bsonspec.org
-*/
-enum BSONType {
-    /** smaller than all other types */
-    MinKey=-1,
-    /** end of object */
-    EOO=0,
-    /** double precision floating point value */
-    NumberDouble=1,
-    /** character string, stored in utf8 */
-    String=2,
-    /** an embedded object */
-    Object=3,
-    /** an embedded array */
-    Array=4,
-    /** binary data */
-    BinData=5,
-    /** Undefined type */
-    Undefined=6,
-    /** ObjectId */
-    jstOID=7,
-    /** boolean type */
-    Bool=8,
-    /** date type */
-    Date=9,
-    /** null type */
-    jstNULL=10,
-    /** regular expression, a pattern with options */
-    RegEx=11,
-    /** deprecated / will be redesigned */
-    DBRef=12,
-    /** deprecated / use CodeWScope */
-    Code=13,
-    /** a programming language (e.g., Python) symbol */
-    Symbol=14,
-    /** javascript code that can execute on the database server, with SavedContext */
-    CodeWScope=15,
-    /** 32 bit signed integer */
-    NumberInt = 16,
-    /** Updated to a Date with value next OpTime on insert */
-    Timestamp = 17,
-    /** 64 bit integer */
-    NumberLong = 18,
-    /** max type that is not MaxKey */
-    JSTypeMax=18,
-    /** larger than all other types */
-    MaxKey=127
-};
+    /**
+        the complete list of valid BSON types
+        see also bsonspec.org
+    */
+    enum BSONType {
+        /** smaller than all other types */
+        MinKey=-1,
+        /** end of object */
+        EOO=0,
+        /** double precision floating point value */
+        NumberDouble=1,
+        /** character string, stored in utf8 */
+        String=2,
+        /** an embedded object */
+        Object=3,
+        /** an embedded array */
+        Array=4,
+        /** binary data */
+        BinData=5,
+        /** Undefined type */
+        Undefined=6,
+        /** ObjectId */
+        jstOID=7,
+        /** boolean type */
+        Bool=8,
+        /** date type */
+        Date=9,
+        /** null type */
+        jstNULL=10,
+        /** regular expression, a pattern with options */
+        RegEx=11,
+        /** deprecated / will be redesigned */
+        DBRef=12,
+        /** deprecated / use CodeWScope */
+        Code=13,
+        /** a programming language (e.g., Python) symbol */
+        Symbol=14,
+        /** javascript code that can execute on the database server, with SavedContext */
+        CodeWScope=15,
+        /** 32 bit signed integer */
+        NumberInt = 16,
+        /** Updated to a Date with value next OpTime on insert */
+        Timestamp = 17,
+        /** 64 bit integer */
+        NumberLong = 18,
+        /** max type that is not MaxKey */
+        JSTypeMax=18,
+        /** larger than all other types */
+        MaxKey=127
+    };
 
     /* subtypes of BinData.
        bdtCustom and above are ones that the JS compiler understands, but are
        opaque to the database.
     */
-    enum BinDataType { 
+    enum BinDataType {
         BinDataGeneral=0,
-        Function=1, 
+        Function=1,
         ByteArrayDeprecated=2, /* use BinGeneral instead */
-        bdtUUID = 3, 
-        MD5Type=5, 
-        bdtCustom=128 
+        bdtUUID = 3,
+        MD5Type=5,
+        bdtCustom=128
     };
- 
+
 }
 
diff --git a/bson/inline_decls.h b/bson/inline_decls.h
index aab9810..1605611 100644
--- a/bson/inline_decls.h
+++ b/bson/inline_decls.h
@@ -26,7 +26,7 @@
 
 #define NOINLINE_DECL __declspec(noinline)
 
-#else 
+#else
 
 #define NOINLINE_DECL
 
diff --git a/bson/oid.cpp b/bson/oid.cpp
new file mode 100644
index 0000000..6aa0730
--- /dev/null
+++ b/bson/oid.cpp
@@ -0,0 +1,154 @@
+// @file oid.cpp
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#include "pch.h"
+#include "oid.h"
+#include "util/atomic_int.h"
+#include "../db/nonce.h"
+
+BOOST_STATIC_ASSERT( sizeof(mongo::OID) == 12 );
+
+namespace mongo {
+
+    // machine # before folding in the process id
+    OID::MachineAndPid OID::ourMachine;
+
+    unsigned OID::ourPid() {
+        unsigned pid;
+#if defined(_WIN32)
+        pid = (unsigned short) GetCurrentProcessId();
+#elif defined(__linux__) || defined(__APPLE__) || defined(__sunos__)
+        pid = (unsigned short) getpid();
+#else
+        pid = (unsigned short) security.getNonce();
+#endif
+        return pid;
+    }
+
+    void OID::foldInPid(OID::MachineAndPid& x) {
+        unsigned p = ourPid();
+        x._pid ^= (unsigned short) p;
+        // when the pid is greater than 16 bits, let the high bits modulate the machine id field.
+        unsigned short& rest = (unsigned short &) x._machineNumber[1];
+        rest ^= p >> 16;
+    }
+
+    OID::MachineAndPid OID::genMachineAndPid() {
+        BOOST_STATIC_ASSERT( sizeof(mongo::OID::MachineAndPid) == 5 );
+
+        // this is not called often, so the following is not expensive, and gives us some
+        // testing that nonce generation is working right and that our OIDs are (perhaps) ok.
+        {
+            nonce a = security.getNonce();
+            nonce b = security.getNonce();
+            nonce c = security.getNonce();
+            assert( !(a==b && b==c) );
+        }
+
+        unsigned long long n = security.getNonce();
+        OID::MachineAndPid x = ourMachine = (OID::MachineAndPid&) n;
+        foldInPid(x);
+        return x;
+    }
+
+    // after folding in the process id
+    OID::MachineAndPid OID::ourMachineAndPid = OID::genMachineAndPid();
+
+    void OID::regenMachineId() {
+        ourMachineAndPid = genMachineAndPid();
+    }
+
+    inline bool OID::MachineAndPid::operator!=(const OID::MachineAndPid& rhs) const {
+        return _pid != rhs._pid || _machineNumber != rhs._machineNumber;
+    }
+
+    unsigned OID::getMachineId() {
+        unsigned char x[4];
+        x[0] = ourMachineAndPid._machineNumber[0];
+        x[1] = ourMachineAndPid._machineNumber[1];
+        x[2] = ourMachineAndPid._machineNumber[2];
+        x[3] = 0;
+        return (unsigned&) x[0];
+    }
+
+    void OID::justForked() {
+        MachineAndPid x = ourMachine;
+        // we let the random # for machine go into all 5 bytes of MachineAndPid, and then
+        // xor in the pid into _pid.  this reduces the probability of collisions.
+        foldInPid(x);
+        ourMachineAndPid = genMachineAndPid();
+        assert( x != ourMachineAndPid );
+        ourMachineAndPid = x;
+    }
+
+    void OID::init() {
+        static AtomicUInt inc = (unsigned) security.getNonce();
+
+        {
+            unsigned t = (unsigned) time(0);
+            unsigned char *T = (unsigned char *) &t;
+            _time[0] = T[3]; // big endian order because we use memcmp() to compare OID's
+            _time[1] = T[2];
+            _time[2] = T[1];
+            _time[3] = T[0];
+        }
+
+        _machineAndPid = ourMachineAndPid;
+
+        {
+            int new_inc = inc++;
+            unsigned char *T = (unsigned char *) &new_inc;
+            _inc[0] = T[2];
+            _inc[1] = T[1];
+            _inc[2] = T[0];
+        }
+    }
+
+    void OID::init( string s ) {
+        assert( s.size() == 24 );
+        const char *p = s.c_str();
+        for( int i = 0; i < 12; i++ ) {
+            data[i] = fromHex(p);
+            p += 2;
+        }
+    }
+
+    void OID::init(Date_t date, bool max) {
+        int time = (int) (date / 1000);
+        char* T = (char *) &time;
+        data[0] = T[3];
+        data[1] = T[2];
+        data[2] = T[1];
+        data[3] = T[0];
+
+        if (max)
+            *(long long*)(data + 4) = 0xFFFFFFFFFFFFFFFFll;
+        else
+            *(long long*)(data + 4) = 0x0000000000000000ll;
+    }
+
+    time_t OID::asTimeT() {
+        int time;
+        char* T = (char *) &time;
+        T[0] = data[3];
+        T[1] = data[2];
+        T[2] = data[1];
+        T[3] = data[0];
+        return time;
+    }
+
+}
diff --git a/bson/oid.h b/bson/oid.h
index c1bf34d..bf06ee1 100644
--- a/bson/oid.h
+++ b/bson/oid.h
@@ -22,56 +22,48 @@
 namespace mongo {
 
 #pragma pack(1)
-    /**	Object ID type.
-        BSON objects typically have an _id field for the object id.  This field should be the first 
-        member of the object when present.  class OID is a special type that is a 12 byte id which 
+    /** Object ID type.
+        BSON objects typically have an _id field for the object id.  This field should be the first
+        member of the object when present.  class OID is a special type that is a 12 byte id which
         is likely to be unique to the system.  You may also use other types for _id's.
-        When _id field is missing from a BSON object, on an insert the database may insert one 
+        When _id field is missing from a BSON object, on an insert the database may insert one
         automatically in certain circumstances.
 
         Warning: You must call OID::newState() after a fork().
+
+        Typical contents of the BSON ObjectID is a 12-byte value consisting of a 4-byte timestamp (seconds since epoch),
+        a 3-byte machine id, a 2-byte process id, and a 3-byte counter. Note that the timestamp and counter fields must
+        be stored big endian unlike the rest of BSON. This is because they are compared byte-by-byte and we want to ensure
+        a mostly increasing order.
     */
     class OID {
-        union {
-            struct{
-                long long a;
-                unsigned b;
-            };
-            unsigned char data[12];
-        };
-        static unsigned _machine;
     public:
-        /** call this after a fork */
-        static void newState();
+        OID() : a(0), b(0) { }
 
-		/** initialize to 'null' */
-		void clear() { a = 0; b = 0; }
+        /** init from a 24 char hex string */
+        explicit OID(const string &s) { init(s); }
 
-        const unsigned char *getData() const { return data; }
+        /** initialize to 'null' */
+        void clear() { a = 0; b = 0; }
 
-        bool operator==(const OID& r) {
-            return a==r.a&&b==r.b;
-        }
-        bool operator!=(const OID& r) {
-            return a!=r.a||b!=r.b;
-        }
+        const unsigned char *getData() const { return data; }
 
-        /** The object ID output as 24 hex digits. */
-        string str() const {
-            return toHexLower(data, 12);
-        }
+        bool operator==(const OID& r) const { return a==r.a && b==r.b; }
+        bool operator!=(const OID& r) const { return a!=r.a || b!=r.b; }
+        int compare( const OID& other ) const { return memcmp( data , other.data , 12 ); }
+        bool operator<( const OID& other ) const { return compare( other ) < 0; }
+        bool operator<=( const OID& other ) const { return compare( other ) <= 0; }
 
+        /** @return the object ID output as 24 hex digits */
+        string str() const { return toHexLower(data, 12); }
         string toString() const { return str(); }
 
         static OID gen() { OID o; o.init(); return o; }
-        
-        static unsigned staticMachine(){ return _machine; }
-        /**
-           sets the contents to a new oid / randomized value
-        */
+
+        /** sets the contents to a new oid / randomized value */
         void init();
 
-        /** Set to the hex string value specified. */
+        /** init from a 24 char hex string */
         void init( string s );
 
         /** Set to the min/max OID that could be generated at given timestamp. */
@@ -79,12 +71,39 @@ namespace mongo {
 
         time_t asTimeT();
         Date_t asDateT() { return asTimeT() * (long long)1000; }
-        
+
         bool isSet() const { return a || b; }
-        
-        int compare( const OID& other ) const { return memcmp( data , other.data , 12 ); }
-        
-        bool operator<( const OID& other ) const { return compare( other ) < 0; }
+
+        /** call this after a fork to update the process id */
+        static void justForked();
+
+        static unsigned getMachineId(); // features command uses
+        static void regenMachineId(); // used by unit tests
+
+    private:
+        struct MachineAndPid {
+            unsigned char _machineNumber[3];
+            unsigned short _pid;
+            bool operator!=(const OID::MachineAndPid& rhs) const;
+        };
+        static MachineAndPid ourMachine, ourMachineAndPid;
+        union {
+            struct {
+                // 12 bytes total
+                unsigned char _time[4];
+                MachineAndPid _machineAndPid;
+                unsigned char _inc[3];
+            };
+            struct {
+                long long a;
+                unsigned b;
+            };
+            unsigned char data[12];
+        };
+
+        static unsigned ourPid();
+        static void foldInPid(MachineAndPid& x);
+        static MachineAndPid genMachineAndPid();
     };
 #pragma pack()
 
diff --git a/bson/ordering.h b/bson/ordering.h
index fbbfbec..749e20d 100644
--- a/bson/ordering.h
+++ b/bson/ordering.h
@@ -23,7 +23,7 @@ namespace mongo {
         The constructor is private to make conversion more explicit so we notice where we call make().
         Over time we should push this up higher and higher.
         */
-    class Ordering { 
+    class Ordering {
         const unsigned bits;
         const unsigned nkeys;
         Ordering(unsigned b,unsigned n) : bits(b),nkeys(n) { }
@@ -32,13 +32,13 @@ namespace mongo {
             get(0) == 1
             get(1) == -1
         */
-        int get(int i) const { 
+        int get(int i) const {
             return ((1 << i) & bits) ? -1 : 1;
         }
 
         // for woCompare...
         unsigned descending(unsigned mask) const { return bits & mask; }
-        
+
         operator string() const {
             StringBuilder buf(32);
             for ( unsigned i=0; i<nkeys; i++)
@@ -50,7 +50,7 @@ namespace mongo {
             unsigned b = 0;
             BSONObjIterator k(obj);
             unsigned n = 0;
-            while( 1 ) { 
+            while( 1 ) {
                 BSONElement e = k.next();
                 if( e.eoo() )
                     break;
@@ -62,5 +62,5 @@ namespace mongo {
             return Ordering(b,n);
         }
     };
-    
+
 }
diff --git a/bson/stringdata.h b/bson/stringdata.h
index ccf30f7..46cdb7a 100644
--- a/bson/stringdata.h
+++ b/bson/stringdata.h
@@ -25,36 +25,49 @@ namespace mongo {
 
     using std::string;
 
+    // A StringData object wraps a 'const string&' or a 'const char*' without
+    // copying its contents. The most common usage is as a function argument that
+    // takes any of the two forms of strings above. Fundamentally, this class tries
+    // go around the fact that string literals in C++ are char[N]'s.
+    //
+    // Note that the object StringData wraps around must be alive while the StringDAta
+    // is.
+
     class StringData {
     public:
-        StringData( const char* c ) 
+        // Construct a StringData explicilty, for the case where the lenght of
+        // string is not known. 'c' must be a pointer to a null-terminated string.
+        StringData( const char* c )
             : _data(c), _size((unsigned) strlen(c)) {}
 
+        // Construct a StringData explicitly, for the case where the length of the string
+        // is already known. 'c' must be a pointer to a null-terminated string, and strlenOfc
+        // must be the length that std::strlen(c) would return, a.k.a the index of the
+        // terminator in c.
+        StringData( const char* c, size_t strlenOfc )
+            : _data(c), _size((unsigned) strlenOfc) {}
+
+        // Construct a StringData explicitly, for the case of a std::string.
         StringData( const string& s )
             : _data(s.c_str()), _size((unsigned) s.size()) {}
-        
+
+        // Construct a StringData explicitly, for the case of a literal whose size is
+        // known at compile time.
         struct LiteralTag {};
         template<size_t N>
         StringData( const char (&val)[N], LiteralTag )
             : _data(&val[0]), _size(N-1) {}
 
-        // Construct a StringData explicitly, for the case where the
-        // length of the string is already known. 'c' must be a
-        // pointer to a null-terminated string, and strlenOfc must be
-        // the length that std::strlen(c) would return, a.k.a the
-        // index of the terminator in c.
-        StringData( const char* c, size_t strlenOfc )
-            : _data(c), _size((unsigned) strlenOfc) {}
+        // accessors
 
         const char* const data() const { return _data; }
         const unsigned size() const { return _size; }
 
     private:
-        // TODO - Hook this class up in the BSON machinery
-        // There are two assumptions here that we may want to review then.
+        // There are two assumptions we use bellow.
         // '_data' *always* finishes with a null terminator
         // 'size' does *not* account for the null terminator
-        // These assumptions may make it easier to minimize changes to existing code
+        // These assumptions may make it easier to minimize changes to existing code.
         const char* const _data;
         const unsigned    _size;
     };
diff --git a/bson/util/atomic_int.h b/bson/util/atomic_int.h
index f4d2749..1573552 100644
--- a/bson/util/atomic_int.h
+++ b/bson/util/atomic_int.h
@@ -24,51 +24,55 @@
 
 namespace mongo {
 
-    struct AtomicUInt{
+    struct AtomicUInt {
         AtomicUInt() : x(0) {}
         AtomicUInt(unsigned z) : x(z) { }
-        volatile unsigned x;
-        operator unsigned() const {
-            return x;
-        }
+
+        operator unsigned() const { return x; }
+        unsigned get() const { return x; }
+
         inline AtomicUInt operator++(); // ++prefix
         inline AtomicUInt operator++(int);// postfix++
         inline AtomicUInt operator--(); // --prefix
         inline AtomicUInt operator--(int); // postfix--
+
+        inline void zero() { x = 0; } // TODO: this isn't thread safe
+
+        volatile unsigned x;
     };
 
 #if defined(_WIN32)
-    AtomicUInt AtomicUInt::operator++(){
+    AtomicUInt AtomicUInt::operator++() {
         // InterlockedIncrement returns the new value
         return InterlockedIncrement((volatile long*)&x); //long is 32bits in Win64
     }
-    AtomicUInt AtomicUInt::operator++(int){
+    AtomicUInt AtomicUInt::operator++(int) {
         return InterlockedIncrement((volatile long*)&x)-1;
     }
-    AtomicUInt AtomicUInt::operator--(){
+    AtomicUInt AtomicUInt::operator--() {
         return InterlockedDecrement((volatile long*)&x);
     }
-    AtomicUInt AtomicUInt::operator--(int){
+    AtomicUInt AtomicUInt::operator--(int) {
         return InterlockedDecrement((volatile long*)&x)+1;
     }
 #elif defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
     // this is in GCC >= 4.1
-    AtomicUInt AtomicUInt::operator++(){
+    AtomicUInt AtomicUInt::operator++() {
         return __sync_add_and_fetch(&x, 1);
     }
-    AtomicUInt AtomicUInt::operator++(int){
+    AtomicUInt AtomicUInt::operator++(int) {
         return __sync_fetch_and_add(&x, 1);
     }
-    AtomicUInt AtomicUInt::operator--(){
+    AtomicUInt AtomicUInt::operator--() {
         return __sync_add_and_fetch(&x, -1);
     }
-    AtomicUInt AtomicUInt::operator--(int){
+    AtomicUInt AtomicUInt::operator--(int) {
         return __sync_fetch_and_add(&x, -1);
     }
 #elif defined(__GNUC__)  && (defined(__i386__) || defined(__x86_64__))
     // from boost 1.39 interprocess/detail/atomic.hpp
 
-    inline unsigned atomic_int_helper(volatile unsigned *x, int val){
+    inline unsigned atomic_int_helper(volatile unsigned *x, int val) {
         int r;
         asm volatile
         (
@@ -80,16 +84,16 @@ namespace mongo {
         );
         return r;
     }
-    AtomicUInt AtomicUInt::operator++(){
+    AtomicUInt AtomicUInt::operator++() {
         return atomic_int_helper(&x, 1)+1;
     }
-    AtomicUInt AtomicUInt::operator++(int){
+    AtomicUInt AtomicUInt::operator++(int) {
         return atomic_int_helper(&x, 1);
     }
-    AtomicUInt AtomicUInt::operator--(){
+    AtomicUInt AtomicUInt::operator--() {
         return atomic_int_helper(&x, -1)-1;
     }
-    AtomicUInt AtomicUInt::operator--(int){
+    AtomicUInt AtomicUInt::operator--(int) {
         return atomic_int_helper(&x, -1);
     }
 #else
diff --git a/bson/util/builder.h b/bson/util/builder.h
index 9d9eda2..6f4ff9e 100644
--- a/bson/util/builder.h
+++ b/bson/util/builder.h
@@ -27,6 +27,24 @@
 
 namespace mongo {
 
+    /* Note the limit here is rather arbitrary and is simply a standard. generally the code works
+       with any object that fits in ram.
+
+       Also note that the server has some basic checks to enforce this limit but those checks are not exhaustive
+       for example need to check for size too big after
+         update $push (append) operation
+         various db.eval() type operations
+    */
+    const int BSONObjMaxUserSize = 16 * 1024 * 1024;
+
+    /*
+       Sometimeswe we need objects slightly larger - an object in the replication local.oplog
+       is slightly larger than a user object for example.
+    */
+    const int BSONObjMaxInternalSize = BSONObjMaxUserSize + ( 16 * 1024 );
+
+    const int BufferMaxSize = 64 * 1024 * 1024;
+
     class StringBuilder;
 
     void msgasserted(int msgid, const char *msg);
@@ -38,7 +56,8 @@ namespace mongo {
                 data = (char *) malloc(size);
                 if( data == 0 )
                     msgasserted(10000, "out of memory BufBuilder");
-            } else {
+            }
+            else {
                 data = 0;
             }
             l = 0;
@@ -54,16 +73,18 @@ namespace mongo {
             }
         }
 
-        void reset( int maxSize = 0 ){
+        void reset( int maxSize = 0 ) {
             l = 0;
-            if ( maxSize && size > maxSize ){
+            if ( maxSize && size > maxSize ) {
                 free(data);
                 data = (char*)malloc(maxSize);
                 size = maxSize;
-            }            
+            }
         }
 
-        /* leave room for some stuff later */
+        /** leave room for some stuff later
+            @return point to region that was skipped.  pointer may change later (on realloc), so for immediate use only
+        */
         char* skip(int n) { return grow(n); }
 
         /* note this may be deallocated (realloced) if you keep writing. */
@@ -73,10 +94,10 @@ namespace mongo {
         /* assume ownership of the buffer - you must then free() it */
         void decouple() { data = 0; }
 
-        void appendChar(char j){
+        void appendChar(char j) {
             *((char*)grow(sizeof(char))) = j;
         }
-        void appendNum(char j){
+        void appendNum(char j) {
             *((char*)grow(sizeof(char))) = j;
         }
         void appendNum(short j) {
@@ -105,18 +126,19 @@ namespace mongo {
             memcpy(grow((int) len), src, len);
         }
 
+        template<class T>
+        void appendStruct(const T& s) {
+            appendBuf(&s, sizeof(T));
+        }
+
         void appendStr(const StringData &str , bool includeEOO = true ) {
             const int len = str.size() + ( includeEOO ? 1 : 0 );
             memcpy(grow(len), str.data(), len);
         }
 
-        int len() const {
-            return l;
-        }
-
-        void setlen( int newLen ){
-            l = newLen;
-        }
+        int len() const { return l; }
+        void setlen( int newLen ) { l = newLen; }
+        int getSize() const { return size; }
 
         /* returns the pre-grow write position */
         inline char* grow(int by) {
@@ -128,18 +150,16 @@ namespace mongo {
             return data + oldlen;
         }
 
-        int getSize() const { return size; }
-
     private:
         /* "slow" portion of 'grow()'  */
-        void NOINLINE_DECL grow_reallocate(){
+        void NOINLINE_DECL grow_reallocate() {
             int a = size * 2;
             if ( a == 0 )
                 a = 512;
             if ( l > a )
                 a = l + 16 * 1024;
-            if( a > 64 * 1024 * 1024 )
-                msgasserted(10000, "BufBuilder grow() > 64MB");
+            if ( a > BufferMaxSize )
+                msgasserted(13548, "BufBuilder grow() > 64MB");
             data = (char *) realloc(data, a);
             size= a;
         }
@@ -152,87 +172,90 @@ namespace mongo {
     };
 
 #if defined(_WIN32)
+#pragma warning( push )
+// warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS.
 #pragma warning( disable : 4996 )
 #endif
 
+    /** stringstream deals with locale so this is a lot faster than std::stringstream for UTF8 */
     class StringBuilder {
     public:
         StringBuilder( int initsize=256 )
-            : _buf( initsize ){
+            : _buf( initsize ) {
         }
 
-#define SBNUM(val,maxSize,macro) \
-            int prev = _buf.l; \
-            int z = sprintf( _buf.grow(maxSize) , macro , (val) );  \
-            assert( z >= 0 ); \
-            _buf.l = prev + z; \
-            return *this; 
-
-        StringBuilder& operator<<( double x ){
-            SBNUM( x , 25 , "%g" );
+        StringBuilder& operator<<( double x ) {
+            return SBNUM( x , 25 , "%g" );
         }
-        StringBuilder& operator<<( int x ){
-            SBNUM( x , 11 , "%d" );
+        StringBuilder& operator<<( int x ) {
+            return SBNUM( x , 11 , "%d" );
         }
-        StringBuilder& operator<<( unsigned x ){
-            SBNUM( x , 11 , "%u" );
+        StringBuilder& operator<<( unsigned x ) {
+            return SBNUM( x , 11 , "%u" );
         }
-        StringBuilder& operator<<( long x ){
-            SBNUM( x , 22 , "%ld" );
+        StringBuilder& operator<<( long x ) {
+            return SBNUM( x , 22 , "%ld" );
         }
-        StringBuilder& operator<<( unsigned long x ){
-            SBNUM( x , 22 , "%lu" );
+        StringBuilder& operator<<( unsigned long x ) {
+            return SBNUM( x , 22 , "%lu" );
         }
-        StringBuilder& operator<<( long long x ){
-            SBNUM( x , 22 , "%lld" );
+        StringBuilder& operator<<( long long x ) {
+            return SBNUM( x , 22 , "%lld" );
         }
-        StringBuilder& operator<<( unsigned long long x ){
-            SBNUM( x , 22 , "%llu" );
+        StringBuilder& operator<<( unsigned long long x ) {
+            return SBNUM( x , 22 , "%llu" );
         }
-        StringBuilder& operator<<( short x ){
-            SBNUM( x , 8 , "%hd" );
+        StringBuilder& operator<<( short x ) {
+            return SBNUM( x , 8 , "%hd" );
         }
-        StringBuilder& operator<<( char c ){
+        StringBuilder& operator<<( char c ) {
             _buf.grow( 1 )[0] = c;
             return *this;
         }
-#undef SBNUM
 
-        void appendDoubleNice( double x ){
+        void appendDoubleNice( double x ) {
             int prev = _buf.l;
             char * start = _buf.grow( 32 );
             int z = sprintf( start , "%.16g" , x );
             assert( z >= 0 );
             _buf.l = prev + z;
-            if( strchr(start, '.') == 0 && strchr(start, 'E') == 0 && strchr(start, 'N') == 0 ){
+            if( strchr(start, '.') == 0 && strchr(start, 'E') == 0 && strchr(start, 'N') == 0 ) {
                 write( ".0" , 2 );
             }
         }
 
-        void write( const char* buf, int len){
-            memcpy( _buf.grow( len ) , buf , len );
-        }
+        void write( const char* buf, int len) { memcpy( _buf.grow( len ) , buf , len ); }
 
-        void append( const StringData& str ){
-            memcpy( _buf.grow( str.size() ) , str.data() , str.size() );
-        }
-        StringBuilder& operator<<( const StringData& str ){
+        void append( const StringData& str ) { memcpy( _buf.grow( str.size() ) , str.data() , str.size() ); }
+
+        StringBuilder& operator<<( const StringData& str ) {
             append( str );
             return *this;
         }
-        
-        // access
 
-        void reset( int maxSize = 0 ){
-            _buf.reset( maxSize );
-        }
-        
-        std::string str(){
-            return std::string(_buf.data, _buf.l);
-        }
+        void reset( int maxSize = 0 ) { _buf.reset( maxSize ); }
+
+        std::string str() const { return std::string(_buf.data, _buf.l); }
 
     private:
         BufBuilder _buf;
+
+        // non-copyable, non-assignable
+        StringBuilder( const StringBuilder& );
+        StringBuilder& operator=( const StringBuilder& );
+
+        template <typename T>
+        StringBuilder& SBNUM(T val,int maxSize,const char *macro)  {
+            int prev = _buf.l;
+            int z = sprintf( _buf.grow(maxSize) , macro , (val) );
+            assert( z >= 0 );
+            _buf.l = prev + z;
+            return *this;
+        }
     };
 
+#if defined(_WIN32)
+#pragma warning( pop )
+#endif
+
 } // namespace mongo
diff --git a/bson/util/misc.h b/bson/util/misc.h
index cad9a28..b31f36f 100644
--- a/bson/util/misc.h
+++ b/bson/util/misc.h
@@ -34,7 +34,7 @@ namespace mongo {
         buf[24] = 0; // don't want the \n
     }
 
-    inline string time_t_to_String(time_t t = time(0) ){
+    inline string time_t_to_String(time_t t = time(0) ) {
         char buf[64];
 #if defined(_WIN32)
         ctime_s(buf, sizeof(buf), &t);
@@ -76,7 +76,7 @@ namespace mongo {
         Date_t(unsigned long long m): millis(m) {}
         operator unsigned long long&() { return millis; }
         operator const unsigned long long&() const { return millis; }
-        string toString() const { 
+        string toString() const {
             char buf[64];
             time_t_to_String(millis/1000, buf);
             return buf;
diff --git a/buildscripts/distmirror.py b/buildscripts/distmirror.py
index 1902e2a..7af1a89 100644
--- a/buildscripts/distmirror.py
+++ b/buildscripts/distmirror.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Download mongodb stuff (at present builds, sources, docs, but not
 # drivers).
diff --git a/buildscripts/errorcodes.py b/buildscripts/errorcodes.py
index d87b7ad..a105647 100755
--- a/buildscripts/errorcodes.py
+++ b/buildscripts/errorcodes.py
@@ -1,32 +1,17 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import os
 import sys
 import re
 import utils
 
-def getAllSourceFiles( arr=None , prefix="." ):
-    if arr is None:
-        arr = []
-
-    for x in os.listdir( prefix ):
-        if x.startswith( "." ) or x.startswith( "pcre-" ) or x.startswith( "32bit" ) or x.startswith( "mongodb-" ) or x.startswith("debian") or x.startswith( "mongo-cxx-driver" ):
-            continue
-        full = prefix + "/" + x
-        if os.path.isdir( full ) and not os.path.islink( full ):
-            getAllSourceFiles( arr , full )
-        else:
-            if full.endswith( ".cpp" ) or full.endswith( ".h" ) or full.endswith( ".c" ):
-                arr.append( full )
-
-    return arr
 
 assertNames = [ "uassert" , "massert" ]
 
 def assignErrorCodes():
     cur = 10000
     for root in assertNames:
-        for x in getAllSourceFiles():
+        for x in utils.getAllSourceFiles():
             print( x )
             didAnything = False
             fixed = ""
@@ -50,7 +35,7 @@ def readErrorCodes( callback ):
     ps = [ re.compile( "([um]asser(t|ted)) *\( *(\d+)" ) ,
            re.compile( "(User|Msg)Exceptio(n)\( *(\d+)" )
            ]
-    for x in getAllSourceFiles():
+    for x in utils.getAllSourceFiles():
         lineNum = 1
         for line in open( x ):
             for p in ps:               
diff --git a/buildscripts/frob_version.py b/buildscripts/frob_version.py
index 7b89e0b..560a8ed 100644
--- a/buildscripts/frob_version.py
+++ b/buildscripts/frob_version.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 from __future__ import with_statement
 import tempfile
diff --git a/buildscripts/hacks_ubuntu.py b/buildscripts/hacks_ubuntu.py
index 81deddd..977d2df 100644
--- a/buildscripts/hacks_ubuntu.py
+++ b/buildscripts/hacks_ubuntu.py
@@ -21,7 +21,7 @@ def foundxulrunner( env , options ):
         
 
     if best is None:
-        print( "warning: using ubuntu without xulrunner-dev.  we reccomend installing it" )
+        print( "warning: using ubuntu without xulrunner-dev.  we recommend installing it" )
         return False
 
     incroot = "/usr/include/" + best + "/"
diff --git a/buildscripts/makealldists.py b/buildscripts/makealldists.py
index 762700e..6b6f365 100644
--- a/buildscripts/makealldists.py
+++ b/buildscripts/makealldists.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 from __future__ import with_statement
 import subprocess
@@ -184,14 +184,16 @@ def __main__():
     print "makedist output under: %s\ncombined repo: %s\n" % (outputroot, repodir)
     sys.stdout.flush()
     # Add more dist/version/architecture tuples as they're supported.
-    dists = (("ubuntu", "10.4"),
+    dists = (("ubuntu", "10.10"),
+             ("ubuntu", "10.4"),
              ("ubuntu", "9.10"),
              ("ubuntu", "9.4"),
-             ("ubuntu", "8.10"),
+             #("ubuntu", "8.10"),
              ("debian", "5.0"),
              ("centos", "5.4"),
-             ("fedora", "11"),
-             ("fedora", "12"))
+             #("fedora", "12"),
+             ("fedora", "13"),
+             ("fedora", "14"))
     arches = ("x86", "x86_64")
 #    mongos = branches.split(',')
     # Run a makedist for each distro/version/architecture tuple above.
@@ -202,7 +204,7 @@ def __main__():
     procs = []
     count = 0
     for ((distro, distro_version), arch, spec) in gen([dists, arches, [branches]]):
-        # FIXME: now x86 fedoras on RackSpace circa 04/10.
+        # FIXME: no x86 fedoras on RackSpace circa 04/10.
         if distro == "fedora" and arch == "x86":
             continue
         count+=1
@@ -264,9 +266,9 @@ def __main__():
         if r != 0:
             raise Exception("mergerepositories.py exited %d" % r)
         print repodir
-    pushrepo(repodir)
-    shutil.rmtree(outputroot)
-    shutil.rmtree(repodir)
+    #pushrepo(repodir)
+    #shutil.rmtree(outputroot)
+    #shutil.rmtree(repodir)
 
     return 0
 
diff --git a/buildscripts/makedist.py b/buildscripts/makedist.py
index 1928b76..b5387c2 100644
--- a/buildscripts/makedist.py
+++ b/buildscripts/makedist.py
@@ -123,7 +123,9 @@ class EC2InstanceConfigurator(BaseConfigurator):
     def __init__(self, **kwargs):
         super(EC2InstanceConfigurator, self).__init__(**kwargs)
         self.configuration += [("ec2_ami",
-                                ((("ubuntu", "10.4", "x86_64"), "ami-bf07ead6"),
+                                ((("ubuntu", "10.10", "x86_64"), "ami-688c7801"),
+                                 (("ubuntu", "10.10", "x86"), "ami-1a837773"),
+                                 (("ubuntu", "10.4", "x86_64"), "ami-bf07ead6"),
                                  (("ubuntu", "10.4", "x86"), "ami-f707ea9e"),
                                  (("ubuntu", "9.10", "x86_64"), "ami-55739e3c"),
                                  (("ubuntu", "9.10", "x86"), "ami-bb709dd2"),
@@ -140,9 +142,9 @@ class EC2InstanceConfigurator(BaseConfigurator):
                                  (("fedora", "8", "x86_64"), "ami-2547a34c"),
                                  (("fedora", "8", "x86"), "ami-5647a33f"))),
                                ("rackspace_imgname",
-                                ((("fedora", "11", "x86_64"), "Fedora 11"),
-                                 (("fedora", "12", "x86_64"), "Fedora 12"),
-                                 (("fedora", "13", "x86_64"), "Fedora 13"))),
+                                ((("fedora", "12", "x86_64"), "Fedora 12"),
+                                 (("fedora", "13", "x86_64"), "Fedora 13"),
+                                 (("fedora", "14", "x86_64"), "Fedora 14"))),
                                ("ec2_mtype",
                                 ((("*", "*", "x86"), "m1.small"),
                                  (("*", "*", "x86_64"), "m1.large"))),
@@ -266,6 +268,7 @@ class SshConnectionConfigurator (BaseConfigurator):
                                 # FLAW: this actually depends more on the AMI
                                 # than the triple.
                                 ((("debian", "*", "*"), "root"),
+                                 (("ubuntu", "10.10", "*"), "ubuntu"),
                                  (("ubuntu", "10.4", "*"), "ubuntu"),
                                  (("ubuntu", "9.10", "*"), "ubuntu"),
                                  (("ubuntu", "9.4", "*"), "root"),
@@ -420,8 +423,12 @@ cp {pkg_name}{pkg_name_suffix}*.tar.gz "{pkg_product_dir}/{distro_version}/10gen
 dpkg-scanpackages "{pkg_product_dir}/{distro_version}/10gen/binary-{distro_arch}" /dev/null | gzip -9c > "{pkg_product_dir}/{distro_version}/10gen/binary-{distro_arch}/Packages.gz"
 dpkg-scansources "{pkg_product_dir}/{distro_version}/10gen/source" /dev/null | gzip -9c > "{pkg_product_dir}/{distro_version}/10gen/source/Sources.gz"
 """
-    rpm_prereq_commands = """
-rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/{distro_arch}/epel-release-5-3.noarch.rpm
+    centos_prereq_commands = """
+rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/{distro_arch}/epel-release-5-4.noarch.rpm
+yum -y install {pkg_prereq_str}
+"""
+    fedora_prereq_commands = """
+#rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/{distro_arch}/epel-release-5-4.noarch.rpm
 yum -y install {pkg_prereq_str}
 """
     rpm_build_commands="""
@@ -462,6 +469,7 @@ rpm -ivh /usr/src/redhat/RPMS/{distro_arch}/boost-devel-1.38.0-1.{distro_arch}.r
     # 1.34, but 1.35 packages are available, so we want those.
     versioned_deb_boost_prereqs =  ["libboost-thread1.35-dev", "libboost-filesystem1.35-dev", "libboost-program-options1.35-dev", "libboost-date-time1.35-dev", "libboost1.35-dev"]
 
+    new_versioned_deb_boost_prereqs =  ["libboost-thread1.42-dev", "libboost-filesystem1.42-dev", "libboost-program-options1.42-dev", "libboost-date-time1.42-dev", "libboost1.42-dev"]
     unversioned_deb_xulrunner_prereqs = ["xulrunner-dev"]
 
     old_versioned_deb_xulrunner_prereqs = ["xulrunner-1.9-dev"]
@@ -511,6 +519,8 @@ git clone git://github.com/mongodb/mongo.git
                                   self.versioned_deb_boost_prereqs + self.unversioned_deb_xulrunner_prereqs + self.common_deb_prereqs),
                                  (("ubuntu", "9.10", "*"),
                                   self.unversioned_deb_boost_prereqs + self.unversioned_deb_xulrunner_prereqs + self.common_deb_prereqs),
+                                 (("ubuntu", "10.10", "*"),
+                                  self.new_versioned_deb_boost_prereqs + self.new_versioned_deb_xulrunner_prereqs + self.common_deb_prereqs),
                                  (("ubuntu", "10.4", "*"),
                                   self.unversioned_deb_boost_prereqs + self.new_versioned_deb_xulrunner_prereqs + self.common_deb_prereqs),
                                  (("ubuntu", "8.10", "*"),
@@ -532,22 +542,24 @@ git clone git://github.com/mongodb/mongo.git
                                   (("ubuntu", "*", "*"),
                                   self.preamble_commands + self.deb_prereq_commands + self.get_mongo_commands + self.mangle_files_commands + self.deb_build_commands),
                                  (("centos", "*", "*"),
-                                  self.preamble_commands + self.old_rpm_precommands + self.rpm_prereq_commands + self.get_mongo_commands + self.mangle_files_commands  + self.mangle_files_for_ancient_redhat_commands + self.rpm_build_commands),
+                                  self.preamble_commands + self.old_rpm_precommands + self.centos_prereq_commands + self.get_mongo_commands + self.mangle_files_commands  + self.mangle_files_for_ancient_redhat_commands + self.rpm_build_commands),
                                  (("fedora", "*", "*"),
-                                  self.preamble_commands + self.old_rpm_precommands + self.rpm_prereq_commands + self.get_mongo_commands + self.mangle_files_commands + self.rpm_build_commands))),
+                                  self.preamble_commands + self.old_rpm_precommands + self.fedora_prereq_commands + self.get_mongo_commands + self.mangle_files_commands + self.rpm_build_commands))),
                                ("preamble_commands",
                                 ((("*", "*", "*"), self.preamble_commands),
                                  )),
                                ("install_prereqs",
                                 ((("debian", "*", "*"), self.deb_prereq_commands),
                                  (("ubuntu", "*", "*"), self.deb_prereq_commands),
-                                 (("centos", "*", "*"), self.rpm_prereq_commands),
-                                 (("fedora", "*", "*"), self.rpm_prereq_commands))),
+                                 (("centos", "*", "*"), self.centos_prereq_commands),
+                                 (("fedora", "*", "*"), self.fedora_prereq_commands))),
                                ("get_mongo",
                                 ((("*", "*", "*"), self.get_mongo_commands),
                                  )),
                                ("mangle_mongo",
                                 ((("debian", "*", "*"), self.mangle_files_commands),
+                                 (("ubuntu", "10.10", "*"),
+                                  self.mangle_files_commands  + self.mangle_files_for_new_deb_xulrunner_commands),
                                  (("ubuntu", "10.4", "*"),
                                   self.mangle_files_commands  + self.mangle_files_for_new_deb_xulrunner_commands),
                                  (("ubuntu", "*", "*"), self.mangle_files_commands),
diff --git a/buildscripts/mergerepositories.py b/buildscripts/mergerepositories.py
index bc50d08..028b6e2 100644
--- a/buildscripts/mergerepositories.py
+++ b/buildscripts/mergerepositories.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 from __future__ import with_statement
 from libcloud.types import Provider
diff --git a/buildscripts/s3del.py b/buildscripts/s3del.py
new file mode 100644
index 0000000..7967de6
--- /dev/null
+++ b/buildscripts/s3del.py
@@ -0,0 +1,36 @@
+
+import os
+import sys
+import time
+
+sys.path.append( "." )
+sys.path.append( ".." )
+sys.path.append( "../../" )
+sys.path.append( "../../../" )
+
+import simples3
+import settings
+import subprocess
+
+# check s3 for md5 hashes
+
+def check_dir( bucket , prefix , todel ):
+    
+    for ( key , modify , etag , size ) in bucket.listdir( prefix=prefix ):
+        if key.find( todel ) < 0:
+            continue
+        print( key )
+        time.sleep( 2 )
+        bucket.delete( key )
+
+def clean( todel ):
+
+
+    bucket = simples3.S3Bucket( settings.bucket , settings.id , settings.key )
+    
+    for x in [ "osx" , "linux" , "win32" , "sunos5" , "src" ]:
+        check_dir( bucket , x , todel )
+    
+
+if __name__ == "__main__":
+    clean( sys.argv[1] )
diff --git a/buildscripts/smoke.py b/buildscripts/smoke.py
index 0023226..5fdd26f 100755
--- a/buildscripts/smoke.py
+++ b/buildscripts/smoke.py
@@ -1,8 +1,8 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # smoke.py: run some mongo tests.
 
-# Bugs, TODOs: 
+# Bugs, TODOs:
 
 # 0 Some tests hard-code pathnames relative to the mongo repository,
 #   so the smoke.py process and all its children must be run with the
@@ -34,49 +34,48 @@
 #   jobs on the same host at once.  So something's gotta change.
 
 from __future__ import with_statement
-from subprocess import Popen, PIPE, call
+
+import glob
+from optparse import OptionParser
 import os
+import parser
+import re
+import shutil
+import socket
+from subprocess import (Popen,
+                        PIPE,
+                        call)
 import sys
-import utils
 import time
-import socket
-from optparse import OptionParser
-import atexit
-import glob
-import shutil
-import re
-import parser
 
-mongoRepo = os.getcwd() #'./'
-testPath = None
+from pymongo import Connection
+
+import utils
 
-mongodExecutable = "./mongod"
-mongodPort = "32000"
-shellExecutable = "./mongo"
-continueOnFailure = False
-oneMongodPerTest = False
+# TODO clean this up so we don't need globals...
+mongo_repo = os.getcwd() #'./'
+test_path = None
+mongod_executable = None
+mongod_port = None
+shell_executable = None
+continue_on_failure = None
 
 tests = []
 winners = []
 losers = {}
 
-# Finally, atexit functions seem to be a little oblivious to whether
-# Python is exiting because of an error, so we'll use this to
-# communicate with the report() function.
-exit_bad = True
-
 # For replication hash checking
-replicated_dbs = []
+replicated_collections = []
 lost_in_slave = []
 lost_in_master = []
 screwy_in_slave = {}
 
-smokeDbPrefix = ''
-smallOplog = False
+smoke_db_prefix = ''
+small_oplog = False
 
 # This class just implements the with statement API, for a sneaky
 # purpose below.
-class nothing(object):
+class Nothing(object):
     def __enter__(self):
         return self
     def __exit__(self, type, value, traceback):
@@ -99,23 +98,23 @@ class mongod(object):
             print >> sys.stderr, e
         return not isinstance(value, Exception)
 
-    def ensureTestDirs(self):
-        utils.ensureDir( smokeDbPrefix + "/tmp/unittest/" )
-        utils.ensureDir( smokeDbPrefix + "/data/" )
-        utils.ensureDir( smokeDbPrefix + "/data/db/" )
+    def ensure_test_dirs(self):
+        utils.ensureDir(smoke_db_prefix + "/tmp/unittest/")
+        utils.ensureDir(smoke_db_prefix + "/data/")
+        utils.ensureDir(smoke_db_prefix + "/data/db/")
 
-    def checkMongoPort( self, port=27017 ):
+    def check_mongo_port(self, port=27017):
         sock = socket.socket()
         sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
         sock.settimeout(1)
         sock.connect(("localhost", int(port)))
         sock.close()
-    
-    def didMongodStart( self, port=mongodPort, timeout=20 ):
+
+    def did_mongod_start(self, port=mongod_port, timeout=20):
         while timeout > 0:
-            time.sleep( 1 )
+            time.sleep(1)
             try:
-                self.checkMongoPort( int(port) )
+                self.check_mongo_port(int(port))
                 return True
             except Exception,e:
                 print >> sys.stderr, e
@@ -123,47 +122,45 @@ class mongod(object):
         return False
 
     def start(self):
-        global mongodPort
+        global mongod_port
         global mongod
         if self.proc:
             print >> sys.stderr, "probable bug: self.proc already set in start()"
             return
-        self.ensureTestDirs()
-        dirName = smokeDbPrefix + "/data/db/sconsTests/"
-        self.port = int(mongodPort)
+        self.ensure_test_dirs()
+        dir_name = smoke_db_prefix + "/data/db/sconsTests/"
+        self.port = int(mongod_port)
         self.slave = False
         if 'slave' in self.kwargs:
-            dirName = smokeDbPrefix + '/data/db/sconsTestsSlave/'
-            srcport = mongodPort
+            dir_name = smoke_db_prefix + '/data/db/sconsTestsSlave/'
+            srcport = mongod_port
             self.port += 1
             self.slave = True
-        if os.path.exists ( dirName ):
+        if os.path.exists(dir_name):
             if 'slave' in self.kwargs:
-                argv = ["python", "buildscripts/cleanbb.py", '--nokill', dirName] 
-
+                argv = ["python", "buildscripts/cleanbb.py", '--nokill', dir_name]
             else:
-                argv = ["python", "buildscripts/cleanbb.py", dirName]
-            call( argv )
-        utils.ensureDir( dirName )
-        argv = [mongodExecutable, "--port", str(self.port), "--dbpath", dirName]
-        if self.kwargs.get('smallOplog'):
-            argv += ["--master", "--oplogSize", "10"]
+                argv = ["python", "buildscripts/cleanbb.py", dir_name]
+            call(argv)
+        utils.ensureDir(dir_name)
+        argv = [mongod_executable, "--port", str(self.port), "--dbpath", dir_name]
+        if self.kwargs.get('small_oplog'):
+            argv += ["--master", "--oplogSize", "128"]
         if self.slave:
-            argv += ['--slave', '--source', 'localhost:'+str(srcport)]
+            argv += ['--slave', '--source', 'localhost:' + str(srcport)]
         print "running " + " ".join(argv)
         self.proc = Popen(argv)
-        if not self.didMongodStart( self.port ):
-            raise Exception( "Failed to start mongod" )
-        
+        if not self.did_mongod_start(self.port):
+            raise Exception("Failed to start mongod")
+
         if self.slave:
-            while True:
-                argv = [shellExecutable, "--port", str(self.port), "--quiet", "--eval", 'db.printSlaveReplicationInfo()']
-                res = Popen(argv, stdout=PIPE).communicate()[0]
-                if res.find('initial sync') < 0:
-                    break
-            
-
-            
+            local = Connection(port=self.port, slave_okay=True).local
+            synced = False
+            while not synced:
+                synced = True
+                for source in local.sources.find(fields=["syncedTo"]):
+                    synced = synced and "syncedTo" in source and source["syncedTo"]
+
     def stop(self):
         if not self.proc:
             print >> sys.stderr, "probable bug: self.proc unset in stop()"
@@ -177,11 +174,14 @@ class mongod(object):
                 win32process.TerminateProcess(self.proc._handle, -1)
             else:
                 from os import kill
-                kill( self.proc.pid, 15 )
+                kill(self.proc.pid, 15)
         self.proc.wait()
         sys.stderr.flush()
         sys.stdout.flush()
-    
+
+    def wait_for_repl(self):
+        Connection(port=self.port).test.smokeWait.insert({}, w=2, wtimeout=5*60*1000)
+
 class Bug(Exception):
     def __str__(self):
         return 'bug in smoke.py: ' + super(Bug, self).__str__()
@@ -192,6 +192,7 @@ class TestFailure(Exception):
 class TestExitFailure(TestFailure):
     def __init__(self, *args):
         self.path = args[0]
+
         self.status=args[1]
     def __str__(self):
         return "test %s exited with status %d" % (self.path, self.status)
@@ -204,48 +205,41 @@ class TestServerFailure(TestFailure):
     def __str__(self):
         return 'mongod not running after executing test %s' % self.path
 
-def checkDbHashes(master, slave):
+def check_db_hashes(master, slave):
     # Need to pause a bit so a slave might catch up...
     if not slave.slave:
         raise(Bug("slave instance doesn't have slave attribute set"))
 
-    print "waiting for slave to catch up..."
-    ARB=10  # ARBITRARY
-    time.sleep(ARB)
-    while True:
-        # FIXME: it's probably better to do an empty insert and a
-        # getLastError() to force a sync.
-        argv = [shellExecutable, "--port", str(slave.port), "--quiet", "--eval", 'db.printSlaveReplicationInfo()']
-        res = Popen(argv, stdout=PIPE).communicate()[0]
-        m = re.search('(\d+)secs ', res)
-        if int(m.group(1)) > ARB: #res.find('initial sync') < 0:
-            break
-        time.sleep(3)
+    print "waiting for slave to catch up"
+    master.wait_for_repl()
+    print "caught up!"
 
     # FIXME: maybe make this run dbhash on all databases?
     for mongod in [master, slave]:
-        argv = [shellExecutable, "--port", str(mongod.port), "--quiet", "--eval", "x=db.runCommand('dbhash'); printjson(x.collections)"]
-        hashstr = Popen(argv, stdout=PIPE).communicate()[0]
-        # WARNING FIXME KLUDGE et al.: this is sleazy and unsafe.
-        mongod.dict = eval(hashstr)
+        mongod.dbhash = Connection(port=mongod.port, slave_okay=True).test.command("dbhash")
+        mongod.dict = mongod.dbhash["collections"]
+
+    global lost_in_slave, lost_in_master, screwy_in_slave, replicated_collections
 
-    global lost_in_slave, lost_in_master, screwy_in_slave, replicated_dbs
+    replicated_collections += master.dict.keys()
     
-    for db in replicated_dbs:
+    for db in replicated_collections:
         if db not in slave.dict:
             lost_in_slave.append(db)
         mhash = master.dict[db]
         shash = slave.dict[db]
         if mhash != shash:
             screwy_in_slave[db] = mhash + "/" + shash
+
     for db in slave.dict.keys():
         if db not in master.dict:
             lost_in_master.append(db)
-    replicated_dbs += master.dict.keys()
+
+
 
 # Blech.
 def skipTest(path):
-    if smallOplog:
+    if small_oplog:
         if os.path.basename(path) in ["cursor8.js", "indexh.js"]:
             return True
     return False
@@ -254,78 +248,79 @@ def runTest(test):
     (path, usedb) = test
     (ignore, ext) = os.path.splitext(path)
     if skipTest(path):
-        print "skippping " + path
+        print "skipping " + path
         return
     if ext == ".js":
-        argv=[shellExecutable, "--port", mongodPort]
+        argv = [shell_executable, "--port", mongod_port]
         if not usedb:
-            argv += ["--nodb"] 
-        if smallOplog:
+            argv += ["--nodb"]
+        if small_oplog:
             argv += ["--eval", 'testingReplication = true;']
         argv += [path]
     elif ext in ["", ".exe"]:
         # Blech.
         if os.path.basename(path) in ["test", "test.exe", "perftest", "perftest.exe"]:
-            argv=[path]
+            argv = [path]
         # more blech
         elif os.path.basename(path) == 'mongos':
-            argv=[path, "--test"]
+            argv = [path, "--test"]
         else:
-            argv=[testPath and os.path.abspath(os.path.join(testPath, path)) or path,
-                  "--port", mongodPort]
+            argv = [test_path and os.path.abspath(os.path.join(test_path, path)) or path,
+                    "--port", mongod_port]
     else:
         raise Bug("fell off in extenstion case: %s" % path)
     print " *******************************************"
     print "         Test : " + os.path.basename(path) + " ..."
-    t1=time.time()
+    t1 = time.time()
     # FIXME: we don't handle the case where the subprocess
     # hangs... that's bad.
-    r = call(argv, cwd=testPath)
-    t2=time.time()
-    print "                " + str((t2-t1)*1000) + "ms"
+    r = call(argv, cwd=test_path)
+    t2 = time.time()
+    print "                " + str((t2 - t1) * 1000) + "ms"
     if r != 0:
         raise TestExitFailure(path, r)
-    if Popen( [ mongodExecutable, "msg", "ping", mongodPort ], stdout=PIPE ).communicate()[0].count( "****ok" ) == 0:
-        raise TestServerFailure(path)
-    if call( [ mongodExecutable, "msg", "ping", mongodPort ] ) != 0:
+    
+    try:
+        c = Connection( "127.0.0.1" , int(mongod_port) )
+    except Exception,e:
         raise TestServerFailure(path)
-    print ""
 
-def runTests(tests):
-    # If we're in one-mongo-per-test mode, we instantiate a nothing
-    # around the loop, and a mongod inside the loop.
+    print ""
 
+def run_tests(tests):
     # FIXME: some suites of tests start their own mongod, so don't
     # need this.  (So long as there are no conflicts with port,
     # dbpath, etc., and so long as we shut ours down properly,
     # starting this mongod shouldn't break anything, though.)
-    with nothing() if oneMongodPerTest else mongod(smallOplog=smallOplog) as master1:
-        with nothing() if oneMongodPerTest else (mongod(slave=True) if smallOplog else nothing()) as slave1:
+    
+    # The reason we use with is so that we get __exit__ semantics
+
+    with mongod(small_oplog=small_oplog) as master:
+        with mongod(slave=True) if small_oplog else Nothing() as slave:
+            if small_oplog:
+                master.wait_for_repl()
+
             for test in tests:
                 try:
-                    with mongod(smallOplog=smallOplog) if oneMongodPerTest else nothing() as master2: 
-                        with mongod(slave=True) if oneMongodPerTest and smallOplog else nothing() as slave2:
-                            runTest(test)
+                    runTest(test)
                     winners.append(test)
-                    if isinstance(slave2, mongod):
-                        checkDbHashes(master2, slave2)
                 except TestFailure, f:
                     try:
                         print f
                         # Record the failing test and re-raise.
                         losers[f.path] = f.status
                         raise f
-                    except TestServerFailure, f: 
-                        if not oneMongodPerTest:
-                            return 2
+                    except TestServerFailure, f:
+                        return 2
                     except TestFailure, f:
-                        if not continueOnFailure:
+                        if not continue_on_failure:
                             return 1
-            if isinstance(slave1, mongod):
-                checkDbHashes(master1, slave1)
+            if isinstance(slave, mongod):
+                check_db_hashes(master, slave)
 
     return 0
 
+
 def report():
     print "%d test%s succeeded" % (len(winners), '' if len(winners) == 1 else 's')
     num_missed = len(tests) - (len(winners) + len(losers.keys()))
@@ -335,7 +330,7 @@ def report():
         print "The following tests failed (with exit code):"
         for loser in losers:
             print "%s\t%d" % (loser, losers[loser])
-    
+
     def missing(lst, src, dst):
         if lst:
             print """The following collections were present in the %s but not the %s
@@ -349,149 +344,124 @@ at the end of testing:""" % (src, dst)
 at the end of testing:"""
         for db in screwy_in_slave.keys():
             print "%s\t %s" % (db, screwy_in_slave[db])
-    if smallOplog and not (lost_in_master or lost_in_slave or screwy_in_slave):
-        print "replication ok for %d collections" % (len(replicated_dbs))
-    if (exit_bad or losers or lost_in_slave or lost_in_master or screwy_in_slave):
-        status = 1
-    else:
-        status = 0
-    exit (status)
+    if small_oplog and not (lost_in_master or lost_in_slave or screwy_in_slave):
+        print "replication ok for %d collections" % (len(replicated_collections))
+    if losers or lost_in_slave or lost_in_master or screwy_in_slave:
+        raise Exception("Test failures")
+
 
-def expandSuites(suites):
+def expand_suites(suites):
     globstr = None
-    global mongoRepo, tests
+    tests = []
     for suite in suites:
-        if suite == 'smokeAll':
-            tests = []
-            expandSuites(['smoke', 'smokePerf', 'smokeClient', 'smokeJs', 'smokeJsPerf', 'smokeJsSlowNightly', 'smokeJsSlowWeekly', 'smokeParallel', 'smokeClone', 'smokeParallel', 'smokeRepl', 'smokeAuth', 'smokeSharding', 'smokeTool'])
-            break
-        if suite == 'smoke':
+        if suite == 'all':
+            return expand_suites(['test', 'perf', 'client', 'js', 'jsPerf', 'jsSlowNightly', 'jsSlowWeekly', 'parallel', 'clone', 'parallel', 'repl', 'auth', 'sharding', 'tool'])
+        if suite == 'test':
             if os.sys.platform == "win32":
                 program = 'test.exe'
             else:
                 program = 'test'
             (globstr, usedb) = (program, False)
-        elif suite == 'smokePerf':
+        elif suite == 'perf':
             if os.sys.platform == "win32":
                 program = 'perftest.exe'
             else:
                 program = 'perftest'
             (globstr, usedb) = (program, False)
-        elif suite == 'smokeJs':
-            # FIXME: _runner.js seems equivalent to "[!_]*.js".
-            #(globstr, usedb) = ('_runner.js', True)
-            (globstr, usedb) = ('[!_]*.js', True)
-        elif suite == 'smokeQuota':
-            (globstr, usedb) = ('quota/*.js', True)
-        elif suite == 'smokeJsPerf':
-            (globstr, usedb) = ('perf/*.js', True)
-        elif suite == 'smokeDisk':
-            (globstr, usedb) = ('disk/*.js', True)
-        elif suite == 'smokeJsSlowNightly':
-            (globstr, usedb) = ('slowNightly/*.js', True)
-        elif suite == 'smokeJsSlowWeekly':
-            (globstr, usedb) = ('slowWeekly/*.js', True)
-        elif suite == 'smokeParallel':
-            (globstr, usedb) = ('parallel/*.js', True)
-        elif suite == 'smokeClone':
-            (globstr, usedb) = ('clone/*.js', False)
-        elif suite == 'smokeRepl':
-            (globstr, usedb) = ('repl/*.js', False)
-        elif suite == 'smokeReplSets':
-            (globstr, usedb) = ('replsets/*.js', False)
-        elif suite == 'smokeAuth':
-            (globstr, usedb) = ('auth/*.js', False)
-        elif suite == 'smokeSharding':
-            (globstr, usedb) = ('sharding/*.js', False)
-        elif suite == 'smokeTool':
-            (globstr, usedb) = ('tool/*.js', False)
-        # well, the above almost works for everything...
-        elif suite == 'smokeClient':
+        elif suite == 'client':
             paths = ["firstExample", "secondExample", "whereExample", "authTest", "clientTest", "httpClientTest"]
             if os.sys.platform == "win32":
-                paths = [path+'.exe' for path in paths]
+                paths = [path + '.exe' for path in paths]
             # hack
-            tests += [(testPath and path or os.path.join(mongoRepo, path), False) for path in paths]
+            tests += [(test_path and path or os.path.join(mongo_repo, path), False) for path in paths]
         elif suite == 'mongosTest':
             if os.sys.platform == "win32":
                 program = 'mongos.exe'
             else:
                 program = 'mongos'
-            tests += [(os.path.join(mongoRepo, program), False)]
+            tests += [(os.path.join(mongo_repo, program), False)]
+        elif os.path.exists( suite ):
+            tests += [ ( os.path.join( mongo_repo , suite ) , True ) ]
         else:
-            raise Exception('unknown test suite %s' % suite)
+            try:
+                globstr, usedb = {"js": ("[!_]*.js", True),
+                                  "quota": ("quota/*.js", True),
+                                  "jsPerf": ("perf/*.js", True),
+                                  "disk": ("disk/*.js", True),
+                                  "jsSlowNightly": ("slowNightly/*.js", True),
+                                  "jsSlowWeekly": ("slowWeekly/*.js", True),
+                                  "parallel": ("parallel/*.js", True),
+                                  "clone": ("clone/*.js", False),
+                                  "repl": ("repl/*.js", False),
+                                  "replSets": ("replsets/*.js", False),
+                                  "dur": ("dur/*.js", False),
+                                  "auth": ("auth/*.js", False),
+                                  "sharding": ("sharding/*.js", False),
+                                  "tool": ("tool/*.js", False)}[suite]
+            except KeyError:
+                raise Exception('unknown test suite %s' % suite)
 
         if globstr:
-            globstr = os.path.join(mongoRepo, (os.path.join(('jstests/' if globstr.endswith('.js') else ''), globstr)))
+            globstr = os.path.join(mongo_repo, (os.path.join(('jstests/' if globstr.endswith('.js') else ''), globstr)))
             paths = glob.glob(globstr)
             paths.sort()
             tests += [(path, usedb) for path in paths]
-    if not tests:
-        raise Exception( "no tests found" )
+
     return tests
 
+def add_exe(e):
+    if os.sys.platform.startswith( "win" ) and not e.endswith( ".exe" ):
+        e += ".exe"
+    return e
+
 def main():
+    global mongod_executable, mongod_port, shell_executable, continue_on_failure, small_oplog, smoke_db_prefix, test_path
     parser = OptionParser(usage="usage: smoke.py [OPTIONS] ARGS*")
     parser.add_option('--mode', dest='mode', default='suite',
-                      help='If "files", ARGS are filenames; if "suite", ARGS are sets of tests.  (default "suite")')
+                      help='If "files", ARGS are filenames; if "suite", ARGS are sets of tests (%default)')
     # Some of our tests hard-code pathnames e.g., to execute, so until
-    # th we don't have the freedom to run from anyplace.
-#    parser.add_option('--mongo-repo', dest='mongoRepo', default=None,
-#                      help='Top-level directory of mongo checkout to use.  (default: script will make a guess)')
-    parser.add_option('--test-path', dest='testPath', default=None,
-                      help="Path to the test executables to run "
-                      "(currently only used for smokeClient)")
-    parser.add_option('--mongod', dest='mongodExecutable', #default='./mongod',
-                      help='Path to mongod to run (default "./mongod")')
-    parser.add_option('--port', dest='mongodPort', default="32000",
-                      help='Port the mongod will bind to (default 32000)')
-    parser.add_option('--mongo', dest='shellExecutable', #default="./mongo",
-                      help='Path to mongo, for .js test files (default "./mongo")')
-    parser.add_option('--continue-on-failure', dest='continueOnFailure',
+    # that changes we don't have the freedom to run from anyplace.
+    # parser.add_option('--mongo-repo', dest='mongo_repo', default=None,
+    parser.add_option('--test-path', dest='test_path', default=None,
+                      help="Path to the test executables to run, "
+                      "currently only used for 'client' (%default)")
+    parser.add_option('--mongod', dest='mongod_executable', default=os.path.join(mongo_repo, 'mongod'),
+                      help='Path to mongod to run (%default)')
+    parser.add_option('--port', dest='mongod_port', default="32000",
+                      help='Port the mongod will bind to (%default)')
+    parser.add_option('--mongo', dest='shell_executable', default=os.path.join(mongo_repo, 'mongo'),
+                      help='Path to mongo, for .js test files (%default)')
+    parser.add_option('--continue-on-failure', dest='continue_on_failure',
                       action="store_true", default=False,
                       help='If supplied, continue testing even after a test fails')
-    parser.add_option('--one-mongod-per-test', dest='oneMongodPerTest',
-                      action="store_true", default=False,
-                      help='If supplied, run each test in a fresh mongod')
     parser.add_option('--from-file', dest='File',
                       help="Run tests/suites named in FILE, one test per line, '-' means stdin")
-    parser.add_option('--smoke-db-prefix', dest='smokeDbPrefix', default='',
-                      help="Prefix to use for the mongods' dbpaths.")
-    parser.add_option('--small-oplog', dest='smallOplog', default=False,
+    parser.add_option('--smoke-db-prefix', dest='smoke_db_prefix', default=smoke_db_prefix,
+                      help="Prefix to use for the mongods' dbpaths ('%default')")
+    parser.add_option('--small-oplog', dest='small_oplog', default=False,
                       action="store_true",
                       help='Run tests with master/slave replication & use a small oplog')
     global tests
     (options, tests) = parser.parse_args()
 
-#    global mongoRepo
-#    if options.mongoRepo:
-#        pass
-#        mongoRepo = options.mongoRepo
-#    else:
-#        prefix = ''
-#        while True:
-#            if os.path.exists(prefix+'buildscripts'):
-#                mongoRepo = os.path.normpath(prefix)
-#                break
-#            else:
-#                prefix += '../'
-#                # FIXME: will this be a device's root directory on
-#                # Windows?
-#                if os.path.samefile('/', prefix): 
-#                    raise Exception("couldn't guess the mongo repository path")
-
     print tests
 
-    global mongoRepo, mongodExecutable, mongodPort, shellExecutable, continueOnFailure, oneMongodPerTest, smallOplog, smokeDbPrefix, testPath
-    testPath = options.testPath
-    mongodExecutable = options.mongodExecutable if options.mongodExecutable else os.path.join(mongoRepo, 'mongod')
-    mongodPort = options.mongodPort if options.mongodPort else mongodPort
-    shellExecutable = options.shellExecutable if options.shellExecutable else os.path.join(mongoRepo, 'mongo')
-    continueOnFailure = options.continueOnFailure if options.continueOnFailure else continueOnFailure
-    oneMongodPerTest = options.oneMongodPerTest if options.oneMongodPerTest else oneMongodPerTest
-    smokeDbPrefix = options.smokeDbPrefix
-    smallOplog = options.smallOplog
-    
+    test_path = options.test_path
+
+    mongod_executable = add_exe(options.mongod_executable)
+    if not os.path.exists(mongod_executable):
+        raise Exception("no mongod found in this directory.")
+
+    mongod_port = options.mongod_port
+
+    shell_executable = add_exe( options.shell_executable )
+    if not os.path.exists(shell_executable):
+        raise Exception("no mongo shell found in this directory.")
+
+    continue_on_failure = options.continue_on_failure
+    smoke_db_prefix = options.smoke_db_prefix
+    small_oplog = options.small_oplog
+
     if options.File:
         if options.File == '-':
             tests = sys.stdin.readlines()
@@ -500,23 +470,20 @@ def main():
                 tests = f.readlines()
     tests = [t.rstrip('\n') for t in tests]
 
-    if not tests:
-        raise Exception( "no tests specified" )
     # If we're in suite mode, tests is a list of names of sets of tests.
     if options.mode == 'suite':
-        # Suites: smoke, smokePerf, smokeJs, smokeQuota, smokeJsPerf,
-        # smokeJsSlow, smokeParalell, smokeClone, smokeRepl, smokeDisk
-        suites = tests
-        tests = []
-        expandSuites(suites)
+        tests = expand_suites(tests)
     elif options.mode == 'files':
         tests = [(os.path.abspath(test), True) for test in tests]
 
-    runTests(tests)
-    global exit_bad
-    exit_bad = False
+    if not tests:
+        raise Exception( "no tests specified" )
+
+    try:
+        run_tests(tests)
+    finally:
+        report()
 
-atexit.register(report)
 
 if __name__ == "__main__":
     main()
diff --git a/buildscripts/utils.py b/buildscripts/utils.py
index 1ca2fdd..8021d87 100644
--- a/buildscripts/utils.py
+++ b/buildscripts/utils.py
@@ -5,10 +5,27 @@ import time
 import os
 # various utilities that are handy
 
+def getAllSourceFiles( arr=None , prefix="." ):
+    if arr is None:
+        arr = []
+
+    for x in os.listdir( prefix ):
+        if x.startswith( "." ) or x.startswith( "pcre-" ) or x.startswith( "32bit" ) or x.startswith( "mongodb-" ) or x.startswith("debian") or x.startswith( "mongo-cxx-driver" ):
+            continue
+        full = prefix + "/" + x
+        if os.path.isdir( full ) and not os.path.islink( full ):
+            getAllSourceFiles( arr , full )
+        else:
+            if full.endswith( ".cpp" ) or full.endswith( ".h" ) or full.endswith( ".c" ):
+                arr.append( full )
+
+    return arr
+
+
 def getGitBranch():
     if not os.path.exists( ".git" ):
         return None
-    
+
     version = open( ".git/HEAD" ,'r' ).read().strip()
     if not version.startswith( "ref: " ):
         return version
@@ -45,7 +62,6 @@ def getGitVersion():
         return version
     return open( f , 'r' ).read().strip()
 
-
 def execsys( args ):
     import subprocess
     if isinstance( args , str ):
@@ -65,7 +81,6 @@ def getprocesslist():
     r = re.compile( "[\r\n]+" )
     return r.split( raw )
 
-    
 def removeIfInList( lst , thing ):
     if thing in lst:
         lst.remove( thing )
diff --git a/client/clientOnly.cpp b/client/clientOnly.cpp
index 6178257..726c3a9 100644
--- a/client/clientOnly.cpp
+++ b/client/clientOnly.cpp
@@ -29,7 +29,7 @@ namespace mongo {
 
     bool dbexitCalled = false;
 
-    void dbexit( ExitCode returnCode, const char *whyMsg ) {
+    void dbexit( ExitCode returnCode, const char *whyMsg , bool tryToGetLock ) {
         dbexitCalled = true;
         out() << "dbexit called" << endl;
         if ( whyMsg )
@@ -37,12 +37,12 @@ namespace mongo {
         out() << "exiting" << endl;
         ::exit( returnCode );
     }
-    
-    bool inShutdown(){
+
+    bool inShutdown() {
         return dbexitCalled;
     }
 
-    void setupSignals(){
+    void setupSignals() {
         // maybe should do SIGPIPE here, not sure
     }
 
@@ -50,20 +50,20 @@ namespace mongo {
         return "in client only mode";
     }
 
-    bool haveLocalShardingInfo( const string& ns ){
+    bool haveLocalShardingInfo( const string& ns ) {
         return false;
     }
 
-    DBClientBase * createDirectClient(){
+    DBClientBase * createDirectClient() {
         uassert( 10256 ,  "no createDirectClient in clientOnly" , 0 );
         return 0;
     }
 
-    void Shard::getAllShards( vector<Shard>& all ){
+    void Shard::getAllShards( vector<Shard>& all ) {
         assert(0);
     }
 
-    bool Shard::isAShard( const string& ident ){
+    bool Shard::isAShardNode( const string& ident ) {
         assert(0);
         return false;
     }
diff --git a/client/connpool.cpp b/client/connpool.cpp
index dae13f6..a521699 100644
--- a/client/connpool.cpp
+++ b/client/connpool.cpp
@@ -26,162 +26,240 @@
 
 namespace mongo {
 
+    // ------ PoolForHost ------
+
+    PoolForHost::~PoolForHost() {
+        while ( ! _pool.empty() ) {
+            StoredConnection sc = _pool.top();
+            delete sc.conn;
+            _pool.pop();
+        }
+    }
+
+    void PoolForHost::done( DBClientBase * c ) {
+        if ( _pool.size() >= _maxPerHost ) {
+            delete c;
+        }
+        else {
+            _pool.push(c);
+        }
+    }
+
+    DBClientBase * PoolForHost::get() {
+
+        time_t now = time(0);
+
+        while ( ! _pool.empty() ) {
+            StoredConnection sc = _pool.top();
+            _pool.pop();
+            if ( sc.ok( now ) )
+                return sc.conn;
+            delete sc.conn;
+        }
+
+        return NULL;
+    }
+
+    void PoolForHost::flush() {
+        vector<StoredConnection> all;
+        while ( ! _pool.empty() ) {
+            StoredConnection c = _pool.top();
+            _pool.pop();
+            all.push_back( c );
+            bool res;
+            c.conn->isMaster( res );
+        }
+
+        for ( vector<StoredConnection>::iterator i=all.begin(); i != all.end(); ++i ) {
+            _pool.push( *i );
+        }
+    }
+
+    PoolForHost::StoredConnection::StoredConnection( DBClientBase * c ) {
+        conn = c;
+        when = time(0);
+    }
+
+    bool PoolForHost::StoredConnection::ok( time_t now ) {
+        // if connection has been idle for an hour, kill it
+        return ( now - when ) < 3600;
+    }
+
+    void PoolForHost::createdOne( DBClientBase * base) {
+        if ( _created == 0 )
+            _type = base->type();
+        _created++;
+    }
+
+    unsigned PoolForHost::_maxPerHost = 50;
+
+    // ------ DBConnectionPool ------
+
     DBConnectionPool pool;
-    
+
     DBClientBase* DBConnectionPool::_get(const string& ident) {
         scoped_lock L(_mutex);
-        
         PoolForHost& p = _pools[ident];
-        if ( p.pool.empty() )
-            return 0;
-        
-        DBClientBase *c = p.pool.top();
-        p.pool.pop();
-        return c;
+        return p.get();
     }
 
-    DBClientBase* DBConnectionPool::_finishCreate( const string& host , DBClientBase* conn ){
+    DBClientBase* DBConnectionPool::_finishCreate( const string& host , DBClientBase* conn ) {
         {
             scoped_lock L(_mutex);
             PoolForHost& p = _pools[host];
-            p.created++;
+            p.createdOne( conn );
         }
 
         onCreate( conn );
         onHandedOut( conn );
-        
+
         return conn;
     }
 
     DBClientBase* DBConnectionPool::get(const ConnectionString& url) {
         DBClientBase * c = _get( url.toString() );
-        if ( c ){
+        if ( c ) {
             onHandedOut( c );
             return c;
         }
-        
+
         string errmsg;
         c = url.connect( errmsg );
-        uassert( 13328 ,  (string)"dbconnectionpool: connect failed " + url.toString() + " : " + errmsg , c );
-        
+        uassert( 13328 ,  _name + ": connect failed " + url.toString() + " : " + errmsg , c );
+
         return _finishCreate( url.toString() , c );
     }
-    
+
     DBClientBase* DBConnectionPool::get(const string& host) {
         DBClientBase * c = _get( host );
-        if ( c ){
+        if ( c ) {
             onHandedOut( c );
             return c;
         }
-        
+
         string errmsg;
         ConnectionString cs = ConnectionString::parse( host , errmsg );
         uassert( 13071 , (string)"invalid hostname [" + host + "]" + errmsg , cs.isValid() );
-        
+
         c = cs.connect( errmsg );
-        uassert( 11002 ,  (string)"dbconnectionpool: connect failed " + host + " : " + errmsg , c );
+        if ( ! c )
+            throw SocketException( SocketException::CONNECT_ERROR , host , 11002 , str::stream() << _name << " error: " << errmsg );
         return _finishCreate( host , c );
     }
 
-    DBConnectionPool::~DBConnectionPool(){
-        for ( map<string,PoolForHost>::iterator i = _pools.begin(); i != _pools.end(); i++ ){
-            PoolForHost& p = i->second;
-
-            while ( ! p.pool.empty() ){
-                DBClientBase * c = p.pool.top();
-                delete c;
-                p.pool.pop();
-            }
-        }
+    DBConnectionPool::~DBConnectionPool() {
+        // connection closing is handled by ~PoolForHost
     }
 
-    void DBConnectionPool::flush(){
+    void DBConnectionPool::flush() {
         scoped_lock L(_mutex);
-        for ( map<string,PoolForHost>::iterator i = _pools.begin(); i != _pools.end(); i++ ){
+        for ( PoolMap::iterator i = _pools.begin(); i != _pools.end(); i++ ) {
             PoolForHost& p = i->second;
-
-            vector<DBClientBase*> all;
-            while ( ! p.pool.empty() ){
-                DBClientBase * c = p.pool.top();
-                p.pool.pop();
-                all.push_back( c );
-                bool res;
-                c->isMaster( res );
-            }
-            
-            for ( vector<DBClientBase*>::iterator i=all.begin(); i != all.end(); i++ ){
-                p.pool.push( *i );
-            }
+            p.flush();
         }
     }
 
-    void DBConnectionPool::addHook( DBConnectionHook * hook ){
+    void DBConnectionPool::addHook( DBConnectionHook * hook ) {
         _hooks.push_back( hook );
     }
 
-    void DBConnectionPool::onCreate( DBClientBase * conn ){
+    void DBConnectionPool::onCreate( DBClientBase * conn ) {
         if ( _hooks.size() == 0 )
             return;
-        
-        for ( list<DBConnectionHook*>::iterator i = _hooks.begin(); i != _hooks.end(); i++ ){
+
+        for ( list<DBConnectionHook*>::iterator i = _hooks.begin(); i != _hooks.end(); i++ ) {
             (*i)->onCreate( conn );
         }
     }
 
-    void DBConnectionPool::onHandedOut( DBClientBase * conn ){
+    void DBConnectionPool::onHandedOut( DBClientBase * conn ) {
         if ( _hooks.size() == 0 )
             return;
-        
-        for ( list<DBConnectionHook*>::iterator i = _hooks.begin(); i != _hooks.end(); i++ ){
+
+        for ( list<DBConnectionHook*>::iterator i = _hooks.begin(); i != _hooks.end(); i++ ) {
             (*i)->onHandedOut( conn );
         }
     }
 
-    void DBConnectionPool::appendInfo( BSONObjBuilder& b ){
-        scoped_lock lk( _mutex );
+    void DBConnectionPool::appendInfo( BSONObjBuilder& b ) {
         BSONObjBuilder bb( b.subobjStart( "hosts" ) );
-        for ( map<string,PoolForHost>::iterator i=_pools.begin(); i!=_pools.end(); ++i ){
-            string s = i->first;
-            BSONObjBuilder temp( bb.subobjStart( s.c_str() ) );
-            temp.append( "available" , (int)(i->second.pool.size()) );
-            temp.appendNumber( "created" , i->second.created );
-            temp.done();
+        int avail = 0;
+        long long created = 0;
+
+
+        map<ConnectionString::ConnectionType,long long> createdByType;
+
+        {
+            scoped_lock lk( _mutex );
+            for ( PoolMap::iterator i=_pools.begin(); i!=_pools.end(); ++i ) {
+                string s = i->first;
+                BSONObjBuilder temp( bb.subobjStart( s ) );
+                temp.append( "available" , i->second.numAvailable() );
+                temp.appendNumber( "created" , i->second.numCreated() );
+                temp.done();
+
+                avail += i->second.numAvailable();
+                created += i->second.numCreated();
+
+                long long& x = createdByType[i->second.type()];
+                x += i->second.numCreated();
+            }
         }
         bb.done();
+
+        {
+            BSONObjBuilder temp( bb.subobjStart( "createdByType" ) );
+            for ( map<ConnectionString::ConnectionType,long long>::iterator i=createdByType.begin(); i!=createdByType.end(); ++i ) {
+                temp.appendNumber( ConnectionString::typeToString( i->first ) , i->second );
+            }
+            temp.done();
+        }
+
+        b.append( "totalAvailable" , avail );
+        b.appendNumber( "totalCreated" , created );
     }
 
-    ScopedDbConnection * ScopedDbConnection::steal(){
+    bool DBConnectionPool::serverNameCompare::operator()( const string& a , const string& b ) const{
+        string ap = str::before( a , "/" );
+        string bp = str::before( b , "/" );
+        
+        return ap < bp;
+    }
+
+    // ------ ScopedDbConnection ------
+
+    ScopedDbConnection * ScopedDbConnection::steal() {
         assert( _conn );
         ScopedDbConnection * n = new ScopedDbConnection( _host , _conn );
         _conn = 0;
         return n;
     }
-    
+
     ScopedDbConnection::~ScopedDbConnection() {
-        if ( _conn ){
+        if ( _conn ) {
             if ( ! _conn->isFailed() ) {
                 /* see done() comments above for why we log this line */
-                log() << "~ScopedDBConnection: _conn != null" << endl;
+                log() << "~ScopedDbConnection: _conn != null" << endl;
             }
             kill();
         }
     }
 
     ScopedDbConnection::ScopedDbConnection(const Shard& shard )
-        : _host( shard.getConnString() ) , _conn( pool.get(_host) ){
+        : _host( shard.getConnString() ) , _conn( pool.get(_host) ) {
     }
-    
+
     ScopedDbConnection::ScopedDbConnection(const Shard* shard )
-        : _host( shard->getConnString() ) , _conn( pool.get(_host) ){
+        : _host( shard->getConnString() ) , _conn( pool.get(_host) ) {
     }
 
 
     class PoolFlushCmd : public Command {
     public:
-        PoolFlushCmd() : Command( "connPoolSync" , false , "connpoolsync" ){}
+        PoolFlushCmd() : Command( "connPoolSync" , false , "connpoolsync" ) {}
         virtual void help( stringstream &help ) const { help<<"internal"; }
         virtual LockType locktype() const { return NONE; }
-        virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool){
+        virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool) {
             pool.flush();
             return true;
         }
@@ -193,11 +271,13 @@ namespace mongo {
 
     class PoolStats : public Command {
     public:
-        PoolStats() : Command( "connPoolStats" ){}
+        PoolStats() : Command( "connPoolStats" ) {}
         virtual void help( stringstream &help ) const { help<<"stats about connection pool"; }
         virtual LockType locktype() const { return NONE; }
-        virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool){
+        virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool) {
             pool.appendInfo( result );
+            result.append( "numDBClientConnection" , DBClientConnection::getNumConnections() );
+            result.append( "numAScopedConnection" , AScopedConnection::getNumConnections() );
             return true;
         }
         virtual bool slaveOk() const {
@@ -206,5 +286,6 @@ namespace mongo {
 
     } poolStatsCmd;
 
+    AtomicUInt AScopedConnection::_numConnections;
 
 } // namespace mongo
diff --git a/client/connpool.h b/client/connpool.h
index 00570c5..e7f59d6 100644
--- a/client/connpool.h
+++ b/client/connpool.h
@@ -24,55 +24,109 @@
 namespace mongo {
 
     class Shard;
-    
-    struct PoolForHost {
+
+    /**
+     * not thread safe
+     * thread safety is handled by DBConnectionPool
+     */
+    class PoolForHost {
+    public:
         PoolForHost()
-            : created(0){}
-        PoolForHost( const PoolForHost& other ){
-            assert(other.pool.size() == 0);
-            created = other.created;
-            assert( created == 0 );
+            : _created(0) {}
+
+        PoolForHost( const PoolForHost& other ) {
+            assert(other._pool.size() == 0);
+            _created = other._created;
+            assert( _created == 0 );
         }
-            
-        std::stack<DBClientBase*> pool;
-        long long created;
+
+        ~PoolForHost();
+
+        int numAvailable() const { return (int)_pool.size(); }
+
+        void createdOne( DBClientBase * base);
+        long long numCreated() const { return _created; }
+
+        ConnectionString::ConnectionType type() const { assert(_created); return _type; }
+
+        /**
+         * gets a connection or return NULL
+         */
+        DBClientBase * get();
+
+        void done( DBClientBase * c );
+
+        void flush();
+
+        static void setMaxPerHost( unsigned max ) { _maxPerHost = max; }
+        static unsigned getMaxPerHost() { return _maxPerHost; }
+    private:
+
+        struct StoredConnection {
+            StoredConnection( DBClientBase * c );
+
+            bool ok( time_t now );
+
+            DBClientBase* conn;
+            time_t when;
+        };
+
+        std::stack<StoredConnection> _pool;
+        long long _created;
+        ConnectionString::ConnectionType _type;
+
+        static unsigned _maxPerHost;
     };
-    
+
     class DBConnectionHook {
     public:
-        virtual ~DBConnectionHook(){}
-        virtual void onCreate( DBClientBase * conn ){}
-        virtual void onHandedOut( DBClientBase * conn ){}
+        virtual ~DBConnectionHook() {}
+        virtual void onCreate( DBClientBase * conn ) {}
+        virtual void onHandedOut( DBClientBase * conn ) {}
     };
 
     /** Database connection pool.
 
         Generally, use ScopedDbConnection and do not call these directly.
 
-        This class, so far, is suitable for use with unauthenticated connections. 
-        Support for authenticated connections requires some adjustements: please 
+        This class, so far, is suitable for use with unauthenticated connections.
+        Support for authenticated connections requires some adjustements: please
         request...
 
         Usage:
-        
+
         {
            ScopedDbConnection c("myserver");
            c.conn()...
         }
     */
     class DBConnectionPool {
+        
+    public:
+
+        /** compares server namees, but is smart about replica set names */
+        struct serverNameCompare {
+            bool operator()( const string& a , const string& b ) const;
+        };
+
+    private:
+
         mongo::mutex _mutex;
-        map<string,PoolForHost> _pools; // servername -> pool
+        typedef map<string,PoolForHost,serverNameCompare> PoolMap; // servername -> pool
+        PoolMap _pools;
         list<DBConnectionHook*> _hooks;
+        string _name;
 
         DBClientBase* _get( const string& ident );
-        
+
         DBClientBase* _finishCreate( const string& ident , DBClientBase* conn );
 
-    public:        
-        DBConnectionPool() : _mutex("DBConnectionPool") { }
+    public:
+        DBConnectionPool() : _mutex("DBConnectionPool") , _name( "dbconnectionpool" ) { }
         ~DBConnectionPool();
 
+        /** right now just controls some asserts.  defaults to "dbconnectionpool" */
+        void setName( const string& name ) { _name = name; }
 
         void onCreate( DBClientBase * conn );
         void onHandedOut( DBClientBase * conn );
@@ -83,72 +137,78 @@ namespace mongo {
         DBClientBase *get(const ConnectionString& host);
 
         void release(const string& host, DBClientBase *c) {
-            if ( c->isFailed() ){
+            if ( c->isFailed() ) {
                 delete c;
                 return;
             }
             scoped_lock L(_mutex);
-            _pools[host].pool.push(c);
+            _pools[host].done(c);
         }
         void addHook( DBConnectionHook * hook );
         void appendInfo( BSONObjBuilder& b );
     };
-    
+
     extern DBConnectionPool pool;
 
     class AScopedConnection : boost::noncopyable {
     public:
-        virtual ~AScopedConnection(){}
+        AScopedConnection() { _numConnections++; }
+        virtual ~AScopedConnection() { _numConnections--; }
         virtual DBClientBase* get() = 0;
         virtual void done() = 0;
         virtual string getHost() const = 0;
+
+        /**
+         * @return total number of current instances of AScopedConnection
+         */
+        static int getNumConnections() { return _numConnections; }
+
+    private:
+        static AtomicUInt _numConnections;
     };
 
     /** Use to get a connection from the pool.  On exceptions things
-       clean up nicely.
+       clean up nicely (i.e. the socket gets closed automatically when the
+       scopeddbconnection goes out of scope).
     */
     class ScopedDbConnection : public AScopedConnection {
-        const string _host;
-        DBClientBase *_conn;
     public:
+        /** the main constructor you want to use
+            throws UserException if can't connect
+            */
+        explicit ScopedDbConnection(const string& host) : _host(host), _conn( pool.get(host) ) {}
+
+        ScopedDbConnection() : _host( "" ) , _conn(0) {}
+
+        /* @param conn - bind to an existing connection */
+        ScopedDbConnection(const string& host, DBClientBase* conn ) : _host( host ) , _conn( conn ) {}
+
+        /** throws UserException if can't connect */
+        explicit ScopedDbConnection(const ConnectionString& url ) : _host(url.toString()), _conn( pool.get(url) ) {}
+
+        /** throws UserException if can't connect */
+        explicit ScopedDbConnection(const Shard& shard );
+        explicit ScopedDbConnection(const Shard* shard );
+
+        ~ScopedDbConnection();
+
         /** get the associated connection object */
-        DBClientBase* operator->(){ 
-            uassert( 11004 ,  "did you call done already" , _conn );
-            return _conn; 
+        DBClientBase* operator->() {
+            uassert( 11004 ,  "connection was returned to the pool already" , _conn );
+            return _conn;
         }
-        
+
         /** get the associated connection object */
         DBClientBase& conn() {
-            uassert( 11005 ,  "did you call done already" , _conn );
+            uassert( 11005 ,  "connection was returned to the pool already" , _conn );
             return *_conn;
         }
 
         /** get the associated connection object */
         DBClientBase* get() {
-            uassert( 13102 ,  "did you call done already" , _conn );
+            uassert( 13102 ,  "connection was returned to the pool already" , _conn );
             return _conn;
         }
-        
-        ScopedDbConnection()
-            : _host( "" ) , _conn(0) {
-        }
-
-        /** throws UserException if can't connect */
-        ScopedDbConnection(const string& host)
-            : _host(host), _conn( pool.get(host) ) {
-        }
-        
-        ScopedDbConnection(const string& host, DBClientBase* conn )
-            : _host( host ) , _conn( conn ){
-        }
-        
-        ScopedDbConnection(const Shard& shard );
-        ScopedDbConnection(const Shard* shard );
-
-        ScopedDbConnection(const ConnectionString& url )
-            : _host(url.toString()), _conn( pool.get(url) ) {
-        }
-
 
         string getHost() const { return _host; }
 
@@ -161,8 +221,8 @@ namespace mongo {
         }
 
         /** Call this when you are done with the connection.
-            
-            If you do not call done() before this object goes out of scope, 
+
+            If you do not call done() before this object goes out of scope,
             we can't be sure we fully read all expected data of a reply on the socket.  so
             we don't try to reuse the connection in that situation.
         */
@@ -170,7 +230,7 @@ namespace mongo {
             if ( ! _conn )
                 return;
 
-            /* we could do this, but instead of assume one is using autoreconnect mode on the connection 
+            /* we could do this, but instead of assume one is using autoreconnect mode on the connection
             if ( _conn->isFailed() )
                 kill();
             else
@@ -178,10 +238,12 @@ namespace mongo {
             pool.release(_host, _conn);
             _conn = 0;
         }
-        
+
         ScopedDbConnection * steal();
 
-        ~ScopedDbConnection();
+    private:
+        const string _host;
+        DBClientBase *_conn;
 
     };
 
diff --git a/client/constants.h b/client/constants.h
index 66aa9b1..54f3fd2 100644
--- a/client/constants.h
+++ b/client/constants.h
@@ -2,22 +2,22 @@
 
 #pragma once
 
-namespace mongo { 
+namespace mongo {
 
     /* query results include a 32 result flag word consisting of these bits */
     enum ResultFlagType {
-        /* returned, with zero results, when getMore is called but the cursor id 
+        /* returned, with zero results, when getMore is called but the cursor id
            is not valid at the server. */
-        ResultFlag_CursorNotFound = 1,   
-        
+        ResultFlag_CursorNotFound = 1,
+
         /* { $err : ... } is being returned */
-        ResultFlag_ErrSet = 2,           
-        
+        ResultFlag_ErrSet = 2,
+
         /* Have to update config from the server, usually $err is also set */
-        ResultFlag_ShardConfigStale = 4,  
-        
-        /* for backward compatability: this let's us know the server supports 
-           the QueryOption_AwaitData option. if it doesn't, a repl slave client should sleep 
+        ResultFlag_ShardConfigStale = 4,
+
+        /* for backward compatability: this let's us know the server supports
+           the QueryOption_AwaitData option. if it doesn't, a repl slave client should sleep
         a little between getMore's.
         */
         ResultFlag_AwaitCapable = 8
diff --git a/client/dbclient.cpp b/client/dbclient.cpp
index aa9b7ae..b4214ab 100644
--- a/client/dbclient.cpp
+++ b/client/dbclient.cpp
@@ -31,8 +31,41 @@
 
 namespace mongo {
 
+    void ConnectionString::_fillServers( string s ) {
+        
+        {
+            string::size_type idx = s.find( '/' );
+            if ( idx != string::npos ) {
+                _setName = s.substr( 0 , idx );
+                s = s.substr( idx + 1 );
+                _type = SET;
+            }
+        }
+
+        string::size_type idx;
+        while ( ( idx = s.find( ',' ) ) != string::npos ) {
+            _servers.push_back( s.substr( 0 , idx ) );
+            s = s.substr( idx + 1 );
+        }
+        _servers.push_back( s );
+
+    }
+    
+    void ConnectionString::_finishInit() {
+        stringstream ss;
+        if ( _type == SET )
+            ss << _setName << "/";
+        for ( unsigned i=0; i<_servers.size(); i++ ) {
+            if ( i > 0 )
+                ss << ",";
+            ss << _servers[i].toString();
+        }
+        _string = ss.str();
+    }
+
+
     DBClientBase* ConnectionString::connect( string& errmsg ) const {
-        switch ( _type ){
+        switch ( _type ) {
         case MASTER: {
             DBClientConnection * c = new DBClientConnection(true);
             log(1) << "creating new connection to:" << _servers[0] << endl;
@@ -42,11 +75,11 @@ namespace mongo {
             }
             return c;
         }
-            
-        case PAIR: 
+
+        case PAIR:
         case SET: {
             DBClientReplicaSet * set = new DBClientReplicaSet( _setName , _servers );
-            if( ! set->connect() ){
+            if( ! set->connect() ) {
                 delete set;
                 errmsg = "connect failed to set ";
                 errmsg += toString();
@@ -54,7 +87,7 @@ namespace mongo {
             }
             return set;
         }
-            
+
         case SYNC: {
             // TODO , don't copy
             list<HostAndPort> l;
@@ -62,40 +95,58 @@ namespace mongo {
                 l.push_back( _servers[i] );
             return new SyncClusterConnection( l );
         }
-            
+
         case INVALID:
             throw UserException( 13421 , "trying to connect to invalid ConnectionString" );
             break;
         }
-        
+
         assert( 0 );
         return 0;
     }
 
-    ConnectionString ConnectionString::parse( const string& host , string& errmsg ){
-        
+    ConnectionString ConnectionString::parse( const string& host , string& errmsg ) {
+
         string::size_type i = host.find( '/' );
-        if ( i != string::npos ){
+        if ( i != string::npos && i != 0) {
             // replica set
             return ConnectionString( SET , host.substr( i + 1 ) , host.substr( 0 , i ) );
         }
 
-        int numCommas = DBClientBase::countCommas( host );
-        
-        if( numCommas == 0 ) 
+        int numCommas = str::count( host , ',' );
+
+        if( numCommas == 0 )
             return ConnectionString( HostAndPort( host ) );
-        
-        if ( numCommas == 1 ) 
+
+        if ( numCommas == 1 )
             return ConnectionString( PAIR , host );
 
         if ( numCommas == 2 )
             return ConnectionString( SYNC , host );
-        
+
         errmsg = (string)"invalid hostname [" + host + "]";
         return ConnectionString(); // INVALID
     }
 
-    Query& Query::where(const string &jscode, BSONObj scope) { 
+    string ConnectionString::typeToString( ConnectionType type ) {
+        switch ( type ) {
+        case INVALID:
+            return "invalid";
+        case MASTER:
+            return "master";
+        case PAIR:
+            return "pair";
+        case SET:
+            return "set";
+        case SYNC:
+            return "sync";
+        }
+        assert(0);
+        return "";
+    }
+
+
+    Query& Query::where(const string &jscode, BSONObj scope) {
         /* use where() before sort() and hint() and explain(), else this will assert. */
         assert( ! isComplex() );
         BSONObjBuilder b;
@@ -113,44 +164,44 @@ namespace mongo {
         obj = b.obj();
     }
 
-    Query& Query::sort(const BSONObj& s) { 
+    Query& Query::sort(const BSONObj& s) {
         appendComplex( "orderby", s );
-        return *this; 
+        return *this;
     }
 
     Query& Query::hint(BSONObj keyPattern) {
         appendComplex( "$hint", keyPattern );
-        return *this; 
+        return *this;
     }
 
     Query& Query::explain() {
         appendComplex( "$explain", true );
-        return *this; 
+        return *this;
     }
-    
+
     Query& Query::snapshot() {
         appendComplex( "$snapshot", true );
-        return *this; 
+        return *this;
     }
-    
+
     Query& Query::minKey( const BSONObj &val ) {
         appendComplex( "$min", val );
-        return *this; 
+        return *this;
     }
 
     Query& Query::maxKey( const BSONObj &val ) {
         appendComplex( "$max", val );
-        return *this; 
+        return *this;
     }
 
-    bool Query::isComplex( bool * hasDollar ) const{
-        if ( obj.hasElement( "query" ) ){
+    bool Query::isComplex( bool * hasDollar ) const {
+        if ( obj.hasElement( "query" ) ) {
             if ( hasDollar )
                 hasDollar[0] = false;
             return true;
         }
 
-        if ( obj.hasElement( "$query" ) ){
+        if ( obj.hasElement( "$query" ) ) {
             if ( hasDollar )
                 hasDollar[0] = true;
             return true;
@@ -158,12 +209,12 @@ namespace mongo {
 
         return false;
     }
-        
+
     BSONObj Query::getFilter() const {
         bool hasDollar;
         if ( ! isComplex( &hasDollar ) )
             return obj;
-        
+
         return obj.getObjectField( hasDollar ? "$query" : "query" );
     }
     BSONObj Query::getSort() const {
@@ -182,8 +233,8 @@ namespace mongo {
     bool Query::isExplain() const {
         return isComplex() && obj.getBoolField( "$explain" );
     }
-    
-    string Query::toString() const{
+
+    string Query::toString() const {
         return obj.toString();
     }
 
@@ -203,7 +254,7 @@ namespace mongo {
         }
         return _cachedAvailableOptions;
     }
-    
+
     inline bool DBClientWithCommands::runCommand(const string &dbname, const BSONObj& cmd, BSONObj &info, int options) {
         string ns = dbname + ".$cmd";
         info = findOne(ns, cmd, 0 , options);
@@ -222,38 +273,50 @@ namespace mongo {
         return runCommand(dbname, b.done(), *info);
     }
 
-    unsigned long long DBClientWithCommands::count(const string &_ns, const BSONObj& query, int options) { 
-        NamespaceString ns(_ns);
-        BSONObj cmd = BSON( "count" << ns.coll << "query" << query );
+    unsigned long long DBClientWithCommands::count(const string &myns, const BSONObj& query, int options, int limit, int skip ) {
+        NamespaceString ns(myns);
+        BSONObj cmd = _countCmd( myns , query , options , limit , skip );
         BSONObj res;
         if( !runCommand(ns.db.c_str(), cmd, res, options) )
             uasserted(11010,string("count fails:") + res.toString());
         return res["n"].numberLong();
     }
 
+    BSONObj DBClientWithCommands::_countCmd(const string &myns, const BSONObj& query, int options, int limit, int skip ) {
+        NamespaceString ns(myns);
+        BSONObjBuilder b;
+        b.append( "count" , ns.coll );
+        b.append( "query" , query );
+        if ( limit )
+            b.append( "limit" , limit );
+        if ( skip )
+            b.append( "skip" , skip );
+        return b.obj();
+    }
+
     BSONObj getlasterrorcmdobj = fromjson("{getlasterror:1}");
 
-    BSONObj DBClientWithCommands::getLastErrorDetailed() { 
+    BSONObj DBClientWithCommands::getLastErrorDetailed() {
         BSONObj info;
         runCommand("admin", getlasterrorcmdobj, info);
-		return info;
+        return info;
     }
 
-    string DBClientWithCommands::getLastError() { 
+    string DBClientWithCommands::getLastError() {
         BSONObj info = getLastErrorDetailed();
         return getLastErrorString( info );
     }
-    
-    string DBClientWithCommands::getLastErrorString( const BSONObj& info ){
+
+    string DBClientWithCommands::getLastErrorString( const BSONObj& info ) {
         BSONElement e = info["err"];
         if( e.eoo() ) return "";
         if( e.type() == Object ) return e.toString();
-        return e.str();        
+        return e.str();
     }
 
     BSONObj getpreverrorcmdobj = fromjson("{getpreverror:1}");
 
-    BSONObj DBClientWithCommands::getPrevError() { 
+    BSONObj DBClientWithCommands::getPrevError() {
         BSONObj info;
         runCommand("admin", getpreverrorcmdobj, info);
         return info;
@@ -261,7 +324,7 @@ namespace mongo {
 
     BSONObj getnoncecmdobj = fromjson("{getnonce:1}");
 
-    string DBClientWithCommands::createPasswordDigest( const string & username , const string & clearTextPassword ){
+    string DBClientWithCommands::createPasswordDigest( const string & username , const string & clearTextPassword ) {
         md5digest d;
         {
             md5_state_t st;
@@ -275,11 +338,9 @@ namespace mongo {
     }
 
     bool DBClientWithCommands::auth(const string &dbname, const string &username, const string &password_text, string& errmsg, bool digestPassword) {
-		//cout << "TEMP AUTH " << toString() << dbname << ' ' << username << ' ' << password_text << ' ' << digestPassword << endl;
-
-		string password = password_text;
-		if( digestPassword ) 
-			password = createPasswordDigest( username , password_text );
+        string password = password_text;
+        if( digestPassword )
+            password = createPasswordDigest( username , password_text );
 
         BSONObj info;
         string nonce;
@@ -310,8 +371,8 @@ namespace mongo {
             b << "key" << digestToString( d );
             authCmd = b.done();
         }
-        
-        if( runCommand(dbname, authCmd, info) ) 
+
+        if( runCommand(dbname, authCmd, info) )
             return true;
 
         errmsg = info.toString();
@@ -322,7 +383,7 @@ namespace mongo {
 
     bool DBClientWithCommands::isMaster(bool& isMaster, BSONObj *info) {
         BSONObj o;
-        if ( info == 0 )	
+        if ( info == 0 )
             info = &o;
         bool ok = runCommand("admin", ismastercmdobj, *info);
         isMaster = info->getField("ismaster").trueValue();
@@ -331,7 +392,7 @@ namespace mongo {
 
     bool DBClientWithCommands::createCollection(const string &ns, long long size, bool capped, int max, BSONObj *info) {
         BSONObj o;
-        if ( info == 0 )	info = &o;
+        if ( info == 0 )    info = &o;
         BSONObjBuilder b;
         string db = nsToDatabase(ns.c_str());
         b.append("create", ns.c_str() + db.length() + 1);
@@ -381,11 +442,11 @@ namespace mongo {
         return false;
     }
 
-    BSONObj DBClientWithCommands::mapreduce(const string &ns, const string &jsmapf, const string &jsreducef, BSONObj query, const string& outputcolname) { 
+    BSONObj DBClientWithCommands::mapreduce(const string &ns, const string &jsmapf, const string &jsreducef, BSONObj query, const string& outputcolname) {
         BSONObjBuilder b;
         b.append("mapreduce", nsGetCollection(ns));
-        b.appendCode("map", jsmapf.c_str());
-        b.appendCode("reduce", jsreducef.c_str());
+        b.appendCode("map", jsmapf);
+        b.appendCode("reduce", jsreducef);
         if( !query.isEmpty() )
             b.append("query", query);
         if( !outputcolname.empty() )
@@ -397,7 +458,7 @@ namespace mongo {
 
     bool DBClientWithCommands::eval(const string &dbname, const string &jscode, BSONObj& info, BSONElement& retValue, BSONObj *args) {
         BSONObjBuilder b;
-        b.appendCode("$eval", jscode.c_str());
+        b.appendCode("$eval", jscode);
         if ( args )
             b.appendArray("args", *args);
         bool ok = runCommand(dbname, b.done(), info);
@@ -412,27 +473,27 @@ namespace mongo {
         return eval(dbname, jscode, info, retValue);
     }
 
-    list<string> DBClientWithCommands::getDatabaseNames(){
+    list<string> DBClientWithCommands::getDatabaseNames() {
         BSONObj info;
         uassert( 10005 ,  "listdatabases failed" , runCommand( "admin" , BSON( "listDatabases" << 1 ) , info ) );
         uassert( 10006 ,  "listDatabases.databases not array" , info["databases"].type() == Array );
-        
+
         list<string> names;
-        
+
         BSONObjIterator i( info["databases"].embeddedObjectUserCheck() );
-        while ( i.more() ){
+        while ( i.more() ) {
             names.push_back( i.next().embeddedObjectUserCheck()["name"].valuestr() );
         }
 
         return names;
     }
 
-    list<string> DBClientWithCommands::getCollectionNames( const string& db ){
+    list<string> DBClientWithCommands::getCollectionNames( const string& db ) {
         list<string> names;
-        
+
         string ns = db + ".system.namespaces";
         auto_ptr<DBClientCursor> c = query( ns.c_str() , BSONObj() );
-        while ( c->more() ){
+        while ( c->more() ) {
             string name = c->next()["name"].valuestr();
             if ( name.find( "$" ) != string::npos )
                 continue;
@@ -441,37 +502,37 @@ namespace mongo {
         return names;
     }
 
-    bool DBClientWithCommands::exists( const string& ns ){
+    bool DBClientWithCommands::exists( const string& ns ) {
         list<string> names;
-        
+
         string db = nsGetDB( ns ) + ".system.namespaces";
         BSONObj q = BSON( "name" << ns );
-        return count( db.c_str() , q ) != 0;
+        return count( db.c_str() , q, QueryOption_SlaveOk ) != 0;
     }
 
     /* --- dbclientconnection --- */
 
-	bool DBClientConnection::auth(const string &dbname, const string &username, const string &password_text, string& errmsg, bool digestPassword) {
-		string password = password_text;
-		if( digestPassword ) 
-			password = createPasswordDigest( username , password_text );
+    bool DBClientConnection::auth(const string &dbname, const string &username, const string &password_text, string& errmsg, bool digestPassword) {
+        string password = password_text;
+        if( digestPassword )
+            password = createPasswordDigest( username , password_text );
 
-		if( autoReconnect ) {
-			/* note we remember the auth info before we attempt to auth -- if the connection is broken, we will 
-			   then have it for the next autoreconnect attempt. 
-			*/
-			pair<string,string> p = pair<string,string>(username, password);
-			authCache[dbname] = p;
-		}
+        if( autoReconnect ) {
+            /* note we remember the auth info before we attempt to auth -- if the connection is broken, we will
+               then have it for the next autoreconnect attempt.
+            */
+            pair<string,string> p = pair<string,string>(username, password);
+            authCache[dbname] = p;
+        }
 
-		return DBClientBase::auth(dbname, username, password.c_str(), errmsg, false);
-	}
+        return DBClientBase::auth(dbname, username, password.c_str(), errmsg, false);
+    }
 
     BSONObj DBClientInterface::findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn, int queryOptions) {
         auto_ptr<DBClientCursor> c =
             this->query(ns, query, 1, 0, fieldsToReturn, queryOptions);
 
-        uassert( 10276 ,  "DBClientBase::findOne: transport error", c.get() );
+        uassert( 10276 ,  str::stream() << "DBClientBase::findOne: transport error: " << getServerAddress() << " query: " << query.toString(), c.get() );
 
         if ( c->hasResultFlag( ResultFlag_ShardConfigStale ) )
             throw StaleConfigException( ns , "findOne has stale config" );
@@ -482,20 +543,20 @@ namespace mongo {
         return c->nextSafe().copy();
     }
 
-    bool DBClientConnection::connect(const HostAndPort& server, string& errmsg){
+    bool DBClientConnection::connect(const HostAndPort& server, string& errmsg) {
         _server = server;
         _serverString = _server.toString();
         return _connect( errmsg );
     }
 
-    bool DBClientConnection::_connect( string& errmsg ){
+    bool DBClientConnection::_connect( string& errmsg ) {
         _serverString = _server.toString();
         // we keep around SockAddr for connection life -- maybe MessagingPort
         // requires that?
         server.reset(new SockAddr(_server.host().c_str(), _server.port()));
-        p.reset(new MessagingPort( _timeout, _logLevel ));
+        p.reset(new MessagingPort( _so_timeout, _logLevel ));
 
-        if (server->getAddr() == "0.0.0.0"){
+        if (server->getAddr() == "0.0.0.0") {
             failed = true;
             return false;
         }
@@ -513,35 +574,39 @@ namespace mongo {
     void DBClientConnection::_checkConnection() {
         if ( !failed )
             return;
-        if ( lastReconnectTry && time(0)-lastReconnectTry < 2 )
-            return;
+        if ( lastReconnectTry && time(0)-lastReconnectTry < 2 ) {
+            // we wait a little before reconnect attempt to avoid constant hammering.
+            // but we throw we don't want to try to use a connection in a bad state
+            throw SocketException(SocketException::FAILED_STATE);
+        }
         if ( !autoReconnect )
-            return;
+            throw SocketException(SocketException::FAILED_STATE);
 
         lastReconnectTry = time(0);
         log(_logLevel) << "trying reconnect to " << _serverString << endl;
         string errmsg;
         failed = false;
-        if ( ! _connect(errmsg) ) { 
+        if ( ! _connect(errmsg) ) {
+            failed = true;
             log(_logLevel) << "reconnect " << _serverString << " failed " << errmsg << endl;
-			return;
-		}
+            throw SocketException(SocketException::CONNECT_ERROR);
+        }
 
-		log(_logLevel) << "reconnect " << _serverString << " ok" << endl;
-		for( map< string, pair<string,string> >::iterator i = authCache.begin(); i != authCache.end(); i++ ) { 
-			const char *dbname = i->first.c_str();
-			const char *username = i->second.first.c_str();
-			const char *password = i->second.second.c_str();
-			if( !DBClientBase::auth(dbname, username, password, errmsg, false) )
-				log(_logLevel) << "reconnect: auth failed db:" << dbname << " user:" << username << ' ' << errmsg << '\n';
-		}
+        log(_logLevel) << "reconnect " << _serverString << " ok" << endl;
+        for( map< string, pair<string,string> >::iterator i = authCache.begin(); i != authCache.end(); i++ ) {
+            const char *dbname = i->first.c_str();
+            const char *username = i->second.first.c_str();
+            const char *password = i->second.second.c_str();
+            if( !DBClientBase::auth(dbname, username, password, errmsg, false) )
+                log(_logLevel) << "reconnect: auth failed db:" << dbname << " user:" << username << ' ' << errmsg << '\n';
+        }
     }
 
     auto_ptr<DBClientCursor> DBClientBase::query(const string &ns, Query query, int nToReturn,
-                                                 int nToSkip, const BSONObj *fieldsToReturn, int queryOptions , int batchSize ) {
+            int nToSkip, const BSONObj *fieldsToReturn, int queryOptions , int batchSize ) {
         auto_ptr<DBClientCursor> c( new DBClientCursor( this,
-                                                        ns, query.obj, nToReturn, nToSkip,
-                                                        fieldsToReturn, queryOptions , batchSize ) );
+                                    ns, query.obj, nToReturn, nToSkip,
+                                    fieldsToReturn, queryOptions , batchSize ) );
         if ( c->init() )
             return c;
         return auto_ptr< DBClientCursor >( 0 );
@@ -562,14 +627,14 @@ namespace mongo {
         }
         boost::function<void(const BSONObj &)> _f;
     };
-    
+
     unsigned long long DBClientConnection::query( boost::function<void(const BSONObj&)> f, const string& ns, Query query, const BSONObj *fieldsToReturn, int queryOptions ) {
         DBClientFunConvertor fun;
         fun._f = f;
         boost::function<void(DBClientCursorBatchIterator &)> ptr( fun );
         return DBClientConnection::query( ptr, ns, query, fieldsToReturn, queryOptions );
     }
-        
+
     unsigned long long DBClientConnection::query( boost::function<void(DBClientCursorBatchIterator &)> f, const string& ns, Query query, const BSONObj *fieldsToReturn, int queryOptions ) {
         // mask options
         queryOptions &= (int)( QueryOption_NoCursorTimeout | QueryOption_SlaveOk );
@@ -577,11 +642,11 @@ namespace mongo {
 
         bool doExhaust = ( availableOptions() & QueryOption_Exhaust );
         if ( doExhaust ) {
-            queryOptions |= (int)QueryOption_Exhaust;            
+            queryOptions |= (int)QueryOption_Exhaust;
         }
         auto_ptr<DBClientCursor> c( this->query(ns, query, 0, 0, fieldsToReturn, queryOptions) );
-        massert( 13386, "socket error for mapping query", c.get() );
-        
+        uassert( 13386, "socket error for mapping query", c.get() );
+
         if ( !doExhaust ) {
             while( c->more() ) {
                 DBClientCursorBatchIterator i( *c );
@@ -591,21 +656,21 @@ namespace mongo {
             return n;
         }
 
-        try { 
-            while( 1 ) { 
-                while( c->moreInCurrentBatch() ) { 
+        try {
+            while( 1 ) {
+                while( c->moreInCurrentBatch() ) {
                     DBClientCursorBatchIterator i( *c );
                     f( i );
                     n += i.n();
                 }
 
-                if( c->getCursorId() == 0 ) 
+                if( c->getCursorId() == 0 )
                     break;
 
                 c->exhaustReceiveMore();
             }
         }
-        catch(std::exception&) { 
+        catch(std::exception&) {
             /* connection CANNOT be used anymore as more data may be on the way from the server.
                we have to reconnect.
                */
@@ -633,16 +698,16 @@ namespace mongo {
 
     void DBClientBase::insert( const string & ns , const vector< BSONObj > &v ) {
         Message toSend;
-        
+
         BufBuilder b;
         int opts = 0;
         b.appendNum( opts );
         b.appendStr( ns );
         for( vector< BSONObj >::const_iterator i = v.begin(); i != v.end(); ++i )
             i->appendSelfToBufBuilder( b );
-        
+
         toSend.setData( dbInsert, b.buf(), b.len() );
-        
+
         say( toSend );
     }
 
@@ -686,63 +751,63 @@ namespace mongo {
         say( toSend );
     }
 
-    auto_ptr<DBClientCursor> DBClientWithCommands::getIndexes( const string &ns ){
+    auto_ptr<DBClientCursor> DBClientWithCommands::getIndexes( const string &ns ) {
         return query( Namespace( ns.c_str() ).getSisterNS( "system.indexes" ).c_str() , BSON( "ns" << ns ) );
     }
-    
-    void DBClientWithCommands::dropIndex( const string& ns , BSONObj keys ){
+
+    void DBClientWithCommands::dropIndex( const string& ns , BSONObj keys ) {
         dropIndex( ns , genIndexName( keys ) );
     }
 
 
-    void DBClientWithCommands::dropIndex( const string& ns , const string& indexName ){
+    void DBClientWithCommands::dropIndex( const string& ns , const string& indexName ) {
         BSONObj info;
-        if ( ! runCommand( nsToDatabase( ns.c_str() ) , 
-                           BSON( "deleteIndexes" << NamespaceString( ns ).coll << "index" << indexName ) , 
-                           info ) ){
+        if ( ! runCommand( nsToDatabase( ns.c_str() ) ,
+                           BSON( "deleteIndexes" << NamespaceString( ns ).coll << "index" << indexName ) ,
+                           info ) ) {
             log(_logLevel) << "dropIndex failed: " << info << endl;
             uassert( 10007 ,  "dropIndex failed" , 0 );
         }
         resetIndexCache();
     }
-    
-    void DBClientWithCommands::dropIndexes( const string& ns ){
+
+    void DBClientWithCommands::dropIndexes( const string& ns ) {
         BSONObj info;
-        uassert( 10008 ,  "dropIndexes failed" , runCommand( nsToDatabase( ns.c_str() ) , 
-                                                    BSON( "deleteIndexes" << NamespaceString( ns ).coll << "index" << "*") , 
-                                                    info ) );
+        uassert( 10008 ,  "dropIndexes failed" , runCommand( nsToDatabase( ns.c_str() ) ,
+                 BSON( "deleteIndexes" << NamespaceString( ns ).coll << "index" << "*") ,
+                 info ) );
         resetIndexCache();
     }
 
-    void DBClientWithCommands::reIndex( const string& ns ){
+    void DBClientWithCommands::reIndex( const string& ns ) {
         list<BSONObj> all;
         auto_ptr<DBClientCursor> i = getIndexes( ns );
-        while ( i->more() ){
+        while ( i->more() ) {
             all.push_back( i->next().getOwned() );
         }
-        
+
         dropIndexes( ns );
-        
-        for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); i++ ){
+
+        for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); i++ ) {
             BSONObj o = *i;
             insert( Namespace( ns.c_str() ).getSisterNS( "system.indexes" ).c_str() , o );
         }
-        
+
     }
-    
 
-    string DBClientWithCommands::genIndexName( const BSONObj& keys ){
+
+    string DBClientWithCommands::genIndexName( const BSONObj& keys ) {
         stringstream ss;
-        
+
         bool first = 1;
         for ( BSONObjIterator i(keys); i.more(); ) {
             BSONElement f = i.next();
-            
+
             if ( first )
                 first = 0;
             else
                 ss << "_";
-            
+
             ss << f.fieldName() << "_";
             if( f.isNumber() )
                 ss << f.numberInt();
@@ -750,7 +815,7 @@ namespace mongo {
         return ss.str();
     }
 
-    bool DBClientWithCommands::ensureIndex( const string &ns , BSONObj keys , bool unique, const string & name ) {
+    bool DBClientWithCommands::ensureIndex( const string &ns , BSONObj keys , bool unique, const string & name , bool cache ) {
         BSONObjBuilder toSave;
         toSave.append( "ns" , ns );
         toSave.append( "key" , keys );
@@ -767,13 +832,15 @@ namespace mongo {
             toSave.append( "name" , nn );
             cacheKey += nn;
         }
-        
+
         if ( unique )
             toSave.appendBool( "unique", unique );
 
         if ( _seenIndexes.count( cacheKey ) )
             return 0;
-        _seenIndexes.insert( cacheKey );
+
+        if ( cache )
+            _seenIndexes.insert( cacheKey );
 
         insert( Namespace( ns.c_str() ).getSisterNS( "system.indexes"  ).c_str() , toSave.obj() );
         return 1;
@@ -808,9 +875,10 @@ namespace mongo {
 
     void DBClientConnection::say( Message &toSend ) {
         checkConnection();
-        try { 
+        try {
             port().say( toSend );
-        } catch( SocketException & ) { 
+        }
+        catch( SocketException & ) {
             failed = true;
             throw;
         }
@@ -820,24 +888,25 @@ namespace mongo {
         port().piggyBack( toSend );
     }
 
-    void DBClientConnection::recv( Message &m ) { 
+    void DBClientConnection::recv( Message &m ) {
         port().recv(m);
     }
 
-    bool DBClientConnection::call( Message &toSend, Message &response, bool assertOk ) {
-        /* todo: this is very ugly messagingport::call returns an error code AND can throw 
-                 an exception.  we should make it return void and just throw an exception anytime 
+    bool DBClientConnection::call( Message &toSend, Message &response, bool assertOk , string * actualServer ) {
+        /* todo: this is very ugly messagingport::call returns an error code AND can throw
+                 an exception.  we should make it return void and just throw an exception anytime
                  it fails
         */
-        try { 
+        try {
             if ( !port().call(toSend, response) ) {
                 failed = true;
                 if ( assertOk )
-                    uassert( 10278 , "dbclient error communicating with server", false);
+                    uasserted( 10278 , str::stream() << "dbclient error communicating with server: " << getServerAddress() );
+
                 return false;
             }
         }
-        catch( SocketException & ) { 
+        catch( SocketException & ) {
             failed = true;
             throw;
         }
@@ -858,222 +927,24 @@ namespace mongo {
         }
     }
 
-    void DBClientConnection::killCursor( long long cursorId ){
+    void DBClientConnection::killCursor( long long cursorId ) {
         BufBuilder b;
         b.appendNum( (int)0 ); // reserved
         b.appendNum( (int)1 ); // number
         b.appendNum( cursorId );
-        
+
         Message m;
         m.setData( dbKillCursors , b.buf() , b.len() );
         
-        sayPiggyBack( m );
+        if ( _lazyKillCursor )
+            sayPiggyBack( m );
+        else
+            say(m);
     }
 
-    /* --- class dbclientpaired --- */
+    AtomicUInt DBClientConnection::_numConnections;
+    bool DBClientConnection::_lazyKillCursor = true;
 
-    string DBClientReplicaSet::toString() {
-        return getServerAddress();
-    }
-
-    DBClientReplicaSet::DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers )
-        : _name( name ) , _currentMaster( 0 ), _servers( servers ){
-        
-        for ( unsigned i=0; i<_servers.size(); i++ )
-            _conns.push_back( new DBClientConnection( true , this ) );
-    }
-    
-    DBClientReplicaSet::~DBClientReplicaSet(){
-        for ( unsigned i=0; i<_conns.size(); i++ )
-            delete _conns[i];
-        _conns.clear();
-    }
-    
-    string DBClientReplicaSet::getServerAddress() const {
-        StringBuilder ss;
-        if ( _name.size() )
-            ss << _name << "/";
-    
-        for ( unsigned i=0; i<_servers.size(); i++ ){
-            if ( i > 0 )
-                ss << ",";
-            ss << _servers[i].toString();
-        }
-        return ss.str();
-    }
-
-    /* find which server, the left or right, is currently master mode */
-    void DBClientReplicaSet::_checkMaster() {
-        
-        bool triedQuickCheck = false;
-        
-        log( _logLevel + 1) <<  "_checkMaster on: " << toString() << endl;
-        for ( int retry = 0; retry < 2; retry++ ) {
-            for ( unsigned i=0; i<_conns.size(); i++ ){
-                DBClientConnection * c = _conns[i];
-                try {
-                    bool im;
-                    BSONObj o;
-                    c->isMaster(im, &o);
-                    
-                    if ( retry )
-                        log(_logLevel) << "checkmaster: " << c->toString() << ' ' << o << '\n';
-                    
-                    string maybePrimary;
-                    if ( o["hosts"].type() == Array ){
-                        if ( o["primary"].type() == String )
-                            maybePrimary = o["primary"].String();
-                        
-                        BSONObjIterator hi(o["hosts"].Obj());
-                        while ( hi.more() ){
-                            string toCheck = hi.next().String();
-                            int found = -1;
-                            for ( unsigned x=0; x<_servers.size(); x++ ){
-                                if ( toCheck == _servers[x].toString() ){
-                                    found = x;
-                                    break;
-                                }
-                            }
-                            
-                            if ( found == -1 ){
-                                HostAndPort h( toCheck );
-                                _servers.push_back( h );
-                                _conns.push_back( new DBClientConnection( true, this ) );
-                                string temp;
-                                _conns[ _conns.size() - 1 ]->connect( h , temp );
-                                log( _logLevel ) << "updated set to: " << toString() << endl;
-                            }
-                            
-                        }
-                    }
-
-                    if ( im ) {
-                        _currentMaster = c;
-                        return;
-                    }
-                    
-                    if ( maybePrimary.size() && ! triedQuickCheck ){
-                        for ( unsigned x=0; x<_servers.size(); x++ ){
-                            if ( _servers[i].toString() != maybePrimary )
-                                continue;
-                            triedQuickCheck = true;
-                            _conns[x]->isMaster( im , &o );
-                            if ( im ){
-                                _currentMaster = _conns[x];
-                                return;
-                            }
-                        }
-                    }
-                }
-                catch ( std::exception& e ) {
-                    if ( retry )
-                        log(_logLevel) << "checkmaster: caught exception " << c->toString() << ' ' << e.what() << endl;
-                }
-            }
-            sleepsecs(1);
-        }
-
-        uassert( 10009 , "checkmaster: no master found", false);
-    }
-
-    DBClientConnection * DBClientReplicaSet::checkMaster() {
-        if ( _currentMaster ){
-            // a master is selected.  let's just make sure connection didn't die
-            if ( ! _currentMaster->isFailed() )
-                return _currentMaster;
-            _currentMaster = 0;
-        }
-
-        _checkMaster();
-        assert( _currentMaster );
-        return _currentMaster;
-    }
-
-    DBClientConnection& DBClientReplicaSet::masterConn(){
-        return *checkMaster();
-    }
-
-    DBClientConnection& DBClientReplicaSet::slaveConn(){
-        DBClientConnection * m = checkMaster();
-        assert( ! m->isFailed() );
-        
-        DBClientConnection * failedSlave = 0;
-
-        for ( unsigned i=0; i<_conns.size(); i++ ){
-            if ( m == _conns[i] )
-                continue;
-            failedSlave = _conns[i];
-            if ( _conns[i]->isFailed() )
-                continue;
-            return *_conns[i];
-        }
-
-        assert(failedSlave);
-        return *failedSlave;
-    }
-
-    bool DBClientReplicaSet::connect(){
-        string errmsg;
-
-        bool anyGood = false;
-        for ( unsigned i=0; i<_conns.size(); i++ ){
-            if ( _conns[i]->connect( _servers[i] , errmsg ) )
-                anyGood = true;
-        }
-        
-        if ( ! anyGood )
-            return false;
-
-        try {
-            checkMaster();
-        }
-        catch (AssertionException&) {
-            return false;
-        }
-        return true;
-    }
-
-	bool DBClientReplicaSet::auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword ) { 
-		DBClientConnection * m = checkMaster();
-		if( !m->auth(dbname, username, pwd, errmsg, digestPassword ) )
-			return false;
-        
-		/* we try to authentiate with the other half of the pair -- even if down, that way the authInfo is cached. */
-        for ( unsigned i=0; i<_conns.size(); i++ ){
-            if ( _conns[i] == m )
-                continue;
-            try {
-                string e;
-                _conns[i]->auth( dbname , username , pwd , e , digestPassword );
-            }
-            catch ( AssertionException& ){
-            }
-        }
-
-		return true;
-	}
-
-    auto_ptr<DBClientCursor> DBClientReplicaSet::query(const string &a, Query b, int c, int d,
-                                                   const BSONObj *e, int f, int g){
-        // TODO: if slave ok is set go to a slave
-        return checkMaster()->query(a,b,c,d,e,f,g);
-    }
-
-    BSONObj DBClientReplicaSet::findOne(const string &a, const Query& b, const BSONObj *c, int d) {
-        return checkMaster()->findOne(a,b,c,d);
-    }
-
-    bool DBClientReplicaSet::isMember( const DBConnector * conn ) const {
-        if ( conn == this )
-            return true;
-        
-        for ( unsigned i=0; i<_conns.size(); i++ )
-            if ( _conns[i]->isMember( conn ) )
-                return true;
-        
-        return false;
-    }
-    
 
     bool serverAlive( const string &uri ) {
         DBClientConnection c( false, 0, 20 ); // potentially the connection to server could fail while we're checking if it's alive - so use timeouts
@@ -1084,5 +955,5 @@ namespace mongo {
             return false;
         return true;
     }
-    
+
 } // namespace mongo
diff --git a/client/dbclient.h b/client/dbclient.h
index 9448055..9cb6571 100644
--- a/client/dbclient.h
+++ b/client/dbclient.h
@@ -40,7 +40,7 @@ namespace mongo {
         /** allow query of replica slave.  normally these return an error except for namespace "local".
         */
         QueryOption_SlaveOk = 1 << 2,
-        
+
         // findingStart mode is used to find the first operation of interest when
         // we are scanning through a repl log.  For efficiency in the common case,
         // where the first operation of interest is closer to the tail than the head,
@@ -52,25 +52,31 @@ namespace mongo {
         QueryOption_OplogReplay = 1 << 3,
 
         /** The server normally times out idle cursors after an inactivy period to prevent excess memory uses
-            Set this option to prevent that. 
+            Set this option to prevent that.
         */
         QueryOption_NoCursorTimeout = 1 << 4,
 
-        /** Use with QueryOption_CursorTailable.  If we are at the end of the data, block for a while rather 
+        /** Use with QueryOption_CursorTailable.  If we are at the end of the data, block for a while rather
             than returning no data. After a timeout period, we do return as normal.
         */
         QueryOption_AwaitData = 1 << 5,
 
-        /** Stream the data down full blast in multiple "more" packages, on the assumption that the client 
-            will fully read all data queried.  Faster when you are pulling a lot of data and know you want to 
+        /** Stream the data down full blast in multiple "more" packages, on the assumption that the client
+            will fully read all data queried.  Faster when you are pulling a lot of data and know you want to
             pull it all down.  Note: it is not allowed to not read all the data unless you close the connection.
 
-            Use the query( boost::function<void(const BSONObj&)> f, ... ) version of the connection's query() 
+            Use the query( boost::function<void(const BSONObj&)> f, ... ) version of the connection's query()
             method, and it will take care of all the details for you.
         */
         QueryOption_Exhaust = 1 << 6,
-        
-        QueryOption_AllSupported = QueryOption_CursorTailable | QueryOption_SlaveOk | QueryOption_OplogReplay | QueryOption_NoCursorTimeout | QueryOption_AwaitData | QueryOption_Exhaust
+
+        /** When sharded, this means its ok to return partial results
+            Usually we will fail a query if all required shards aren't up
+            If this is set, it'll be a partial result set 
+         */
+        QueryOption_PartialResults = 1 << 7 ,
+
+        QueryOption_AllSupported = QueryOption_CursorTailable | QueryOption_SlaveOk | QueryOption_OplogReplay | QueryOption_NoCursorTimeout | QueryOption_AwaitData | QueryOption_Exhaust | QueryOption_PartialResults
 
     };
 
@@ -78,7 +84,7 @@ namespace mongo {
         /** Upsert - that is, insert the item if no matching item is found. */
         UpdateOption_Upsert = 1 << 0,
 
-        /** Update multiple documents (if multiple documents match query expression). 
+        /** Update multiple documents (if multiple documents match query expression).
            (Default is update a single document and stop.) */
         UpdateOption_Multi = 1 << 1,
 
@@ -96,28 +102,40 @@ namespace mongo {
 
     class DBClientBase;
 
+    /**
+     * ConnectionString handles parsing different ways to connect to mongo and determining method
+     * samples:
+     *    server
+     *    server:port
+     *    foo/server:port,server:port   SET
+     *    server,server,server          SYNC
+     *
+     * tyipcal use
+     * string errmsg,
+     * ConnectionString cs = ConnectionString::parse( url , errmsg );
+     * if ( ! cs.isValid() ) throw "bad: " + errmsg;
+     * DBClientBase * conn = cs.connect( errmsg );
+     */
     class ConnectionString {
     public:
         enum ConnectionType { INVALID , MASTER , PAIR , SET , SYNC };
-        
-        ConnectionString( const HostAndPort& server ){
+
+        ConnectionString() {
+            _type = INVALID;
+        }
+
+        ConnectionString( const HostAndPort& server ) {
             _type = MASTER;
             _servers.push_back( server );
             _finishInit();
         }
 
-        // TODO Delete if nobody is using
-        //ConnectionString( ConnectionType type , const vector<HostAndPort>& servers )
-        //    : _type( type ) , _servers( servers ){
-        //    _finishInit();
-        //}
-        
-        ConnectionString( ConnectionType type , const string& s , const string& setName = "" ){
+        ConnectionString( ConnectionType type , const string& s , const string& setName = "" ) {
             _type = type;
             _setName = setName;
             _fillServers( s );
-            
-            switch ( _type ){
+
+            switch ( _type ) {
             case MASTER:
                 assert( _servers.size() == 1 );
                 break;
@@ -131,73 +149,54 @@ namespace mongo {
             default:
                 assert( _servers.size() > 0 );
             }
-            
+
             _finishInit();
         }
 
-        ConnectionString( const string& s , ConnectionType favoredMultipleType ){
+        ConnectionString( const string& s , ConnectionType favoredMultipleType ) {
+            _type = INVALID;
+            
             _fillServers( s );
-            if ( _servers.size() == 1 ){
+            if ( _type != INVALID ) {
+                // set already
+            }
+            else if ( _servers.size() == 1 ) {
                 _type = MASTER;
             }
             else {
                 _type = favoredMultipleType;
-                assert( _type != MASTER );
+                assert( _type == SET || _type == SYNC );
             }
             _finishInit();
         }
 
         bool isValid() const { return _type != INVALID; }
-        
-        string toString() const {
-            return _string;
-        }
+
+        string toString() const { return _string; }
         
         DBClientBase* connect( string& errmsg ) const;
 
-        static ConnectionString parse( const string& url , string& errmsg );
-        
-        string getSetName() const{
-            return _setName;
-        }
+        string getSetName() const { return _setName; }
 
-        vector<HostAndPort> getServers() const {
-            return _servers;
-        }
+        vector<HostAndPort> getServers() const { return _servers; }
         
+        ConnectionType type() const { return _type; }
+
+        static ConnectionString parse( const string& url , string& errmsg );
+
+        static string typeToString( ConnectionType type );
+
     private:
 
-        ConnectionString(){
-            _type = INVALID;
-        }
-        
-        void _fillServers( string s ){
-            string::size_type idx;
-            while ( ( idx = s.find( ',' ) ) != string::npos ){
-                _servers.push_back( s.substr( 0 , idx ) );
-                s = s.substr( idx + 1 );
-            }
-            _servers.push_back( s );
-        }
-        
-        void _finishInit(){
-            stringstream ss;
-            if ( _type == SET )
-                ss << _setName << "/";
-            for ( unsigned i=0; i<_servers.size(); i++ ){
-                if ( i > 0 )
-                    ss << ",";
-                ss << _servers[i].toString();
-            }
-            _string = ss.str();
-        }
+        void _fillServers( string s );
+        void _finishInit();
 
         ConnectionType _type;
         vector<HostAndPort> _servers;
         string _string;
         string _setName;
     };
-    
+
     /**
      * controls how much a clients cares about writes
      * default is NORMAL
@@ -213,7 +212,7 @@ namespace mongo {
     class DBClientCursor;
     class DBClientCursorBatchIterator;
 
-    /** Represents a Mongo query expression.  Typically one uses the QUERY(...) macro to construct a Query object. 
+    /** Represents a Mongo query expression.  Typically one uses the QUERY(...) macro to construct a Query object.
         Examples:
            QUERY( "age" << 33 << "school" << "UCLA" ).sort("name")
            QUERY( "age" << GT << 30 << LT << 50 )
@@ -223,22 +222,22 @@ namespace mongo {
         BSONObj obj;
         Query() : obj(BSONObj()) { }
         Query(const BSONObj& b) : obj(b) { }
-        Query(const string &json) : 
+        Query(const string &json) :
             obj(fromjson(json)) { }
-        Query(const char * json) : 
+        Query(const char * json) :
             obj(fromjson(json)) { }
 
-        /** Add a sort (ORDER BY) criteria to the query expression. 
+        /** Add a sort (ORDER BY) criteria to the query expression.
             @param sortPattern the sort order template.  For example to order by name ascending, time descending:
               { name : 1, ts : -1 }
             i.e.
               BSON( "name" << 1 << "ts" << -1 )
-            or 
+            or
               fromjson(" name : 1, ts : -1 ")
         */
         Query& sort(const BSONObj& sortPattern);
 
-        /** Add a sort (ORDER BY) criteria to the query expression. 
+        /** Add a sort (ORDER BY) criteria to the query expression.
             This version of sort() assumes you want to sort on a single field.
             @param asc = 1 for ascending order
             asc = -1 for descending order
@@ -267,8 +266,8 @@ namespace mongo {
         */
         Query& explain();
 
-        /** Use snapshot mode for the query.  Snapshot mode assures no duplicates are returned, or objects missed, which were 
-            present at both the start and end of the query's execution (if an object is new during the query, or deleted during 
+        /** Use snapshot mode for the query.  Snapshot mode assures no duplicates are returned, or objects missed, which were
+            present at both the start and end of the query's execution (if an object is new during the query, or deleted during
             the query, it may or may not be returned, even with snapshot mode).
 
             Note that short query responses (less than 1MB) are always effectively snapshotted.
@@ -277,16 +276,16 @@ namespace mongo {
         */
         Query& snapshot();
 
-        /** Queries to the Mongo database support a $where parameter option which contains 
-            a javascript function that is evaluated to see whether objects being queried match 
-            its criteria.  Use this helper to append such a function to a query object. 
+        /** Queries to the Mongo database support a $where parameter option which contains
+            a javascript function that is evaluated to see whether objects being queried match
+            its criteria.  Use this helper to append such a function to a query object.
             Your query may also contain other traditional Mongo query terms.
 
-            @param jscode The javascript function to evaluate against each potential object 
-                   match.  The function must return true for matched objects.  Use the this 
+            @param jscode The javascript function to evaluate against each potential object
+                   match.  The function must return true for matched objects.  Use the this
                    variable to inspect the current object.
-            @param scope SavedContext for the javascript object.  List in a BSON object any 
-                   variables you would like defined when the jscode executes.  One can think 
+            @param scope SavedContext for the javascript object.  List in a BSON object any
+                   variables you would like defined when the jscode executes.  One can think
                    of these as "bind variables".
 
             Examples:
@@ -300,12 +299,12 @@ namespace mongo {
          * if this query has an orderby, hint, or some other field
          */
         bool isComplex( bool * hasDollar = 0 ) const;
-        
+
         BSONObj getFilter() const;
         BSONObj getSort() const;
         BSONObj getHint() const;
         bool isExplain() const;
-        
+
         string toString() const;
         operator string() const { return toString(); }
     private:
@@ -316,13 +315,13 @@ namespace mongo {
             BSONObjBuilder b;
             b.appendElements(obj);
             b.append(fieldName, val);
-            obj = b.obj();            
+            obj = b.obj();
         }
     };
-    
-/** Typically one uses the QUERY(...) macro to construct a Query object.
-    Example: QUERY( "age" << 33 << "school" << "UCLA" )
-*/
+
+    /** Typically one uses the QUERY(...) macro to construct a Query object.
+        Example: QUERY( "age" << 33 << "school" << "UCLA" )
+    */
 #define QUERY(x) mongo::Query( BSON(x) )
 
     /**
@@ -331,15 +330,14 @@ namespace mongo {
     class DBConnector {
     public:
         virtual ~DBConnector() {}
-        virtual bool call( Message &toSend, Message &response, bool assertOk=true ) = 0;
+        /** actualServer is set to the actual server where they call went if there was a choice (SlaveOk) */
+        virtual bool call( Message &toSend, Message &response, bool assertOk=true , string * actualServer = 0 ) = 0;
         virtual void say( Message &toSend ) = 0;
         virtual void sayPiggyBack( Message &toSend ) = 0;
         virtual void checkResponse( const char* data, int nReturned ) {}
 
         /* used by QueryOption_Exhaust.  To use that your subclass must implement this. */
         virtual void recv( Message& m ) { assert(false); }
-
-        virtual string getServerAddress() const = 0;
     };
 
     /**
@@ -352,9 +350,9 @@ namespace mongo {
 
         /** don't use this - called automatically by DBClientCursor for you */
         virtual auto_ptr<DBClientCursor> getMore( const string &ns, long long cursorId, int nToReturn = 0, int options = 0 ) = 0;
-        
+
         virtual void insert( const string &ns, BSONObj obj ) = 0;
-        
+
         virtual void insert( const string &ns, const vector< BSONObj >& v ) = 0;
 
         virtual void remove( const string &ns , Query query, bool justOne = 0 ) = 0;
@@ -369,6 +367,7 @@ namespace mongo {
         */
         virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0);
 
+        virtual string getServerAddress() const = 0;
 
     };
 
@@ -397,18 +396,18 @@ namespace mongo {
             directly call runCommand.
 
             @param dbname database name.  Use "admin" for global administrative commands.
-			@param cmd  the command object to execute.  For example, { ismaster : 1 }
-			@param info the result object the database returns. Typically has { ok : ..., errmsg : ... } fields
-			       set.
+            @param cmd  the command object to execute.  For example, { ismaster : 1 }
+            @param info the result object the database returns. Typically has { ok : ..., errmsg : ... } fields
+                   set.
             @param options see enum QueryOptions - normally not needed to run a command
             @return true if the command returned "ok".
         */
         virtual bool runCommand(const string &dbname, const BSONObj& cmd, BSONObj &info, int options=0);
 
         /** Authorize access to a particular database.
-            Authentication is separate for each database on the server -- you may authenticate for any 
+            Authentication is separate for each database on the server -- you may authenticate for any
             number of databases on a single connection.
-            The "admin" database is special and once authenticated provides access to all databases on the 
+            The "admin" database is special and once authenticated provides access to all databases on the
             server.
             @param digestPassword if password is plain text, set this to true.  otherwise assumed to be pre-digested
             @return true if successful
@@ -418,7 +417,7 @@ namespace mongo {
         /** count number of objects in collection ns that match the query criteria specified
             throws UserAssertion if database returns an error
         */
-        unsigned long long count(const string &ns, const BSONObj& query = BSONObj(), int options=0 );
+        virtual unsigned long long count(const string &ns, const BSONObj& query = BSONObj(), int options=0, int limit=0, int skip=0 );
 
         string createPasswordDigest( const string &username , const string &clearTextPassword );
 
@@ -450,14 +449,14 @@ namespace mongo {
         */
         bool createCollection(const string &ns, long long size = 0, bool capped = false, int max = 0, BSONObj *info = 0);
 
-        /** Get error result from the last operation on this connection. 
+        /** Get error result from the last operation on this connection.
             @return error message text, or empty string if no error.
         */
         string getLastError();
-		/** Get error result from the last operation on this connection. 
-			@return full error object.
-		*/
-		virtual BSONObj getLastErrorDetailed();
+        /** Get error result from the last operation on this connection.
+            @return full error object.
+        */
+        virtual BSONObj getLastErrorDetailed();
 
         static string getLastErrorString( const BSONObj& res );
 
@@ -466,23 +465,23 @@ namespace mongo {
            @return { err : <error message>, nPrev : <how_many_ops_back_occurred>, ok : 1 }
 
            result.err will be null if no error has occurred.
-        */        
+        */
         BSONObj getPrevError();
 
-        /** Reset the previous error state for this connection (accessed via getLastError and 
-            getPrevError).  Useful when performing several operations at once and then checking 
+        /** Reset the previous error state for this connection (accessed via getLastError and
+            getPrevError).  Useful when performing several operations at once and then checking
             for an error after attempting all operations.
         */
         bool resetError() { return simpleCommand("admin", 0, "reseterror"); }
 
-        /** Delete the specified collection. */        
-        virtual bool dropCollection( const string &ns ){
+        /** Delete the specified collection. */
+        virtual bool dropCollection( const string &ns ) {
             string db = nsGetDB( ns );
             string coll = nsGetCollection( ns );
             uassert( 10011 ,  "no collection name", coll.size() );
 
             BSONObj info;
-            
+
             bool res = runCommand( db.c_str() , BSON( "drop" << coll ) , info );
             resetIndexCache();
             return res;
@@ -494,7 +493,7 @@ namespace mongo {
         bool repairDatabase(const string &dbname, BSONObj *info = 0) {
             return simpleCommand(dbname, info, "repairDatabase");
         }
-        
+
         /** Copy database from one server or name to another server or name.
 
            Generally, you should dropDatabase() first as otherwise the copied information will MERGE
@@ -524,23 +523,23 @@ namespace mongo {
             ProfileOff = 0,
             ProfileSlow = 1, // log very slow (>100ms) operations
             ProfileAll = 2
-            
+
         };
         bool setDbProfilingLevel(const string &dbname, ProfilingLevel level, BSONObj *info = 0);
         bool getDbProfilingLevel(const string &dbname, ProfilingLevel& level, BSONObj *info = 0);
 
-        /** Run a map/reduce job on the server. 
+        /** Run a map/reduce job on the server.
 
             See http://www.mongodb.org/display/DOCS/MapReduce
 
             ns        namespace (db+collection name) of input data
-            jsmapf    javascript map function code 
-            jsreducef javascript reduce function code. 
+            jsmapf    javascript map function code
+            jsreducef javascript reduce function code.
             query     optional query filter for the input
-            output    optional permanent output collection name.  if not specified server will 
+            output    optional permanent output collection name.  if not specified server will
                       generate a temporary collection and return its name.
 
-            returns a result object which contains: 
+            returns a result object which contains:
              { result : <collection_name>,
                numObjects : <number_of_objects_scanned>,
                timeMillis : <job_time>,
@@ -548,8 +547,8 @@ namespace mongo {
                [, err : <errmsg_if_error>]
              }
 
-             For example one might call: 
-               result.getField("ok").trueValue() 
+             For example one might call:
+               result.getField("ok").trueValue()
              on the result to check if ok.
         */
         BSONObj mapreduce(const string &ns, const string &jsmapf, const string &jsreducef, BSONObj query = BSONObj(), const string& output = "");
@@ -560,7 +559,7 @@ namespace mongo {
            jscode    source code for a javascript function.
            info      the command object which contains any information on the invocation result including
                       the return value and other information.  If an error occurs running the jscode, error
-        			 information will be in info.  (try "out() << info.toString()")
+                     information will be in info.  (try "out() << info.toString()")
            retValue  return value from the jscode function.
            args      args to pass to the jscode function.  when invoked, the 'args' variable will be defined
                      for use by the jscode.
@@ -571,10 +570,10 @@ namespace mongo {
         */
         bool eval(const string &dbname, const string &jscode, BSONObj& info, BSONElement& retValue, BSONObj *args = 0);
 
-        /**
-           
+        /** validate a collection, checking for errors and reporting back statistics.
+            this operation is slow and blocking.
          */
-        bool validate( const string &ns , bool scandata=true ){
+        bool validate( const string &ns , bool scandata=true ) {
             BSONObj cmd = BSON( "validate" << nsGetCollection( ns ) << "scandata" << scandata );
             BSONObj info;
             return runCommand( nsGetDB( ns ).c_str() , cmd , info );
@@ -607,7 +606,7 @@ namespace mongo {
             ret = (NumType) retValue.number();
             return true;
         }
-        
+
         /**
            get a list of all the current databases
            uses the { listDatabases : 1 } command.
@@ -623,16 +622,18 @@ namespace mongo {
         bool exists( const string& ns );
 
         /** Create an index if it does not already exist.
-            ensureIndex calls are remembered so it is safe/fast to call this function many 
+            ensureIndex calls are remembered so it is safe/fast to call this function many
             times in your code.
            @param ns collection to be indexed
            @param keys the "key pattern" for the index.  e.g., { name : 1 }
            @param unique if true, indicates that key uniqueness should be enforced for this index
            @param name if not isn't specified, it will be created from the keys (recommended)
+           @param cache if set to false, the index cache for the connection won't remember this call
            @return whether or not sent message to db.
              should be true on first call, false on subsequent unless resetIndexCache was called
          */
-        virtual bool ensureIndex( const string &ns , BSONObj keys , bool unique = false, const string &name = "" );
+        virtual bool ensureIndex( const string &ns , BSONObj keys , bool unique = false, const string &name = "",
+                                  bool cache = true );
 
         /**
            clears the index cache, so the subsequent call to ensureIndex for any index will go to the server
@@ -640,17 +641,17 @@ namespace mongo {
         virtual void resetIndexCache();
 
         virtual auto_ptr<DBClientCursor> getIndexes( const string &ns );
-        
+
         virtual void dropIndex( const string& ns , BSONObj keys );
         virtual void dropIndex( const string& ns , const string& indexName );
-        
+
         /**
            drops all indexes for the collection
          */
         virtual void dropIndexes( const string& ns );
 
         virtual void reIndex( const string& ns );
-        
+
         string genIndexName( const BSONObj& keys );
 
         /** Erase / drop an entire database */
@@ -663,33 +664,35 @@ namespace mongo {
         virtual string toString() = 0;
 
         /** @return the database name portion of an ns string */
-        string nsGetDB( const string &ns ){
+        string nsGetDB( const string &ns ) {
             string::size_type pos = ns.find( "." );
             if ( pos == string::npos )
                 return ns;
-            
+
             return ns.substr( 0 , pos );
         }
-        
+
         /** @return the collection name portion of an ns string */
-        string nsGetCollection( const string &ns ){
+        string nsGetCollection( const string &ns ) {
             string::size_type pos = ns.find( "." );
             if ( pos == string::npos )
                 return "";
 
-            return ns.substr( pos + 1 );            
+            return ns.substr( pos + 1 );
         }
 
     protected:
         bool isOk(const BSONObj&);
-        
+
+        BSONObj _countCmd(const string &ns, const BSONObj& query, int options, int limit, int skip );
+
         enum QueryOptions availableOptions();
-        
+
     private:
         enum QueryOptions _cachedAvailableOptions;
         bool _haveCachedAvailableOptions;
     };
-    
+
     /**
      abstract class that implements the core db operations
      */
@@ -698,20 +701,20 @@ namespace mongo {
         WriteConcern _writeConcern;
 
     public:
-        DBClientBase(){
+        DBClientBase() {
             _writeConcern = W_NORMAL;
         }
-        
+
         WriteConcern getWriteConcern() const { return _writeConcern; }
-        void setWriteConcern( WriteConcern w ){ _writeConcern = w; }
-        
+        void setWriteConcern( WriteConcern w ) { _writeConcern = w; }
+
         /** send a query to the database.
          @param ns namespace to query, format is <dbname>.<collectname>[.<collectname>]*
          @param query query to perform on the collection.  this is a BSONObj (binary JSON)
          You may format as
            { query: { ... }, orderby: { ... } }
          to specify a sort order.
-         @param nToReturn n to return.  0 = unlimited
+         @param nToReturn n to return (i.e., limit).  0 = unlimited
          @param nToSkip start with the nth item
          @param fieldsToReturn optional template of which fields to select. if unspecified, returns all fields
          @param queryOptions see options enum at top of this file
@@ -744,23 +747,15 @@ namespace mongo {
            @param justOne if this true, then once a single match is found will stop
          */
         virtual void remove( const string &ns , Query q , bool justOne = 0 );
-        
+
         /**
            updates objects matching query
          */
         virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = false , bool multi = false );
-        
+
         virtual bool isFailed() const = 0;
-        
-        virtual void killCursor( long long cursorID ) = 0;
 
-        static int countCommas( const string& s ){
-            int n = 0;
-            for ( unsigned i=0; i<s.size(); i++ )
-                if ( s[i] == ',' )
-                    n++;
-            return n;
-        }
+        virtual void killCursor( long long cursorID ) = 0;
 
         virtual bool callRead( Message& toSend , Message& response ) = 0;
         // virtual bool callWrite( Message& toSend , Message& response ) = 0; // TODO: add this if needed
@@ -768,47 +763,35 @@ namespace mongo {
 
         virtual ConnectionString::ConnectionType type() const = 0;
 
-        /** @return true if conn is either equal to or contained in this connection */
-        virtual bool isMember( const DBConnector * conn ) const = 0;
     }; // DBClientBase
-    
+
     class DBClientReplicaSet;
-    
-    class ConnectException : public UserException { 
+
+    class ConnectException : public UserException {
     public:
         ConnectException(string msg) : UserException(9000,msg) { }
     };
 
-    /** 
-        A basic connection to the database. 
+    /**
+        A basic connection to the database.
         This is the main entry point for talking to a simple Mongo setup
     */
     class DBClientConnection : public DBClientBase {
-        DBClientReplicaSet *clientSet;
-        boost::scoped_ptr<MessagingPort> p;
-        boost::scoped_ptr<SockAddr> server;
-        bool failed; // true if some sort of fatal error has ever happened
-        bool autoReconnect;
-        time_t lastReconnectTry;
-        HostAndPort _server; // remember for reconnects
-        string _serverString;
-        int _port;
-        void _checkConnection();
-        void checkConnection() { if( failed ) _checkConnection(); }
-		map< string, pair<string,string> > authCache;
-        double _timeout;
-        
-        bool _connect( string& errmsg );
     public:
-
         /**
            @param _autoReconnect if true, automatically reconnect on a connection failure
            @param cp used by DBClientReplicaSet.  You do not need to specify this parameter
-           @param timeout tcp timeout in seconds - this is for read/write, not connect.  
+           @param timeout tcp timeout in seconds - this is for read/write, not connect.
            Connect timeout is fixed, but short, at 5 seconds.
          */
-        DBClientConnection(bool _autoReconnect=false, DBClientReplicaSet* cp=0, double timeout=0) :
-                clientSet(cp), failed(false), autoReconnect(_autoReconnect), lastReconnectTry(0), _timeout(timeout) { }
+        DBClientConnection(bool _autoReconnect=false, DBClientReplicaSet* cp=0, double so_timeout=0) :
+            clientSet(cp), failed(false), autoReconnect(_autoReconnect), lastReconnectTry(0), _so_timeout(so_timeout) {
+            _numConnections++;
+        }
+
+        virtual ~DBClientConnection() {
+            _numConnections--;
+        }
 
         /** Connect to a Mongo database server.
 
@@ -821,14 +804,14 @@ namespace mongo {
            @deprecated please use HostAndPort
            @return false if fails to connect.
         */
-        virtual bool connect(const char * hostname, string& errmsg){
+        virtual bool connect(const char * hostname, string& errmsg) {
             // TODO: remove this method
             HostAndPort t( hostname );
             return connect( t , errmsg );
         }
 
         /** Connect to a Mongo database server.
-            
+
            If autoReconnect is true, you can try to use the DBClientConnection even when
            false was returned -- it will try to connect again.
 
@@ -846,9 +829,9 @@ namespace mongo {
 
            @param serverHostname host to connect to.  can include port number ( 127.0.0.1 , 127.0.0.1:5555 )
         */
-        void connect(const string& serverHostname) { 
+        void connect(const string& serverHostname) {
             string errmsg;
-            if( !connect(HostAndPort(serverHostname), errmsg) ) 
+            if( !connect(HostAndPort(serverHostname), errmsg) )
                 throw ConnectException(string("can't connect ") + errmsg);
         }
 
@@ -860,23 +843,22 @@ namespace mongo {
             return DBClientBase::query( ns, query, nToReturn, nToSkip, fieldsToReturn, queryOptions , batchSize );
         }
 
-        /** uses QueryOption_Exhaust 
-            use DBClientCursorBatchIterator if you want to do items in large blocks, perhpas to avoid granular locking and such.
+        /** Uses QueryOption_Exhaust
+            Exhaust mode sends back all data queries as fast as possible, with no back-and-for for OP_GETMORE.  If you are certain
+            you will exhaust the query, it could be useful.
+
+            Use DBClientCursorBatchIterator version if you want to do items in large blocks, perhaps to avoid granular locking and such.
          */
         unsigned long long query( boost::function<void(const BSONObj&)> f, const string& ns, Query query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0);
         unsigned long long query( boost::function<void(DBClientCursorBatchIterator&)> f, const string& ns, Query query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0);
 
         /**
-           @return true if this connection is currently in a failed state.  When autoreconnect is on, 
+           @return true if this connection is currently in a failed state.  When autoreconnect is on,
                    a connection will transition back to an ok state after reconnecting.
          */
-        bool isFailed() const {
-            return failed;
-        }
+        bool isFailed() const { return failed; }
 
-        MessagingPort& port() {
-            return *p;
-        }
+        MessagingPort& port() { return *p; }
 
         string toStringLong() const {
             stringstream ss;
@@ -886,143 +868,59 @@ namespace mongo {
         }
 
         /** Returns the address of the server */
-        string toString() {
-            return _serverString;
-        }
-        
-        string getServerAddress() const {
-            return _serverString;
-        }
-        
-        virtual void killCursor( long long cursorID );
+        string toString() { return _serverString; }
 
-        virtual bool callRead( Message& toSend , Message& response ){
-            return call( toSend , response );
-        }
+        string getServerAddress() const { return _serverString; }
 
+        virtual void killCursor( long long cursorID );
+        virtual bool callRead( Message& toSend , Message& response ) { return call( toSend , response ); }
         virtual void say( Message &toSend );
-        virtual bool call( Message &toSend, Message &response, bool assertOk = true );
-        
-        virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; }  
-
-        virtual bool isMember( const DBConnector * conn ) const { return this == conn; };
-
+        virtual bool call( Message &toSend, Message &response, bool assertOk = true , string * actualServer = 0 );
+        virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; }
         virtual void checkResponse( const char *data, int nReturned );
+        void setSoTimeout(double to) { _so_timeout = to; }
+
+        static int getNumConnections() {
+            return _numConnections;
+        }
+        
+        static void setLazyKillCursor( bool lazy ) { _lazyKillCursor = lazy; }
+        static bool getLazyKillCursor() { return _lazyKillCursor; }
 
     protected:
         friend class SyncClusterConnection;
         virtual void recv( Message& m );
         virtual void sayPiggyBack( Message &toSend );
 
-    };
-    
-    /** Use this class to connect to a replica set of servers.  The class will manage
-       checking for which server in a replica set is master, and do failover automatically.
-       
-       This can also be used to connect to replica pairs since pairs are a subset of sets
-       
-	   On a failover situation, expect at least one operation to return an error (throw 
-	   an exception) before the failover is complete.  Operations are not retried.
-    */
-    class DBClientReplicaSet : public DBClientBase {
-        string _name;
-        DBClientConnection * _currentMaster;
-        vector<HostAndPort> _servers;
-        vector<DBClientConnection*> _conns;
-
-        
-        void _checkMaster();
-        DBClientConnection * checkMaster();
-
-    public:
-        /** Call connect() after constructing. autoReconnect is always on for DBClientReplicaSet connections. */
-        DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers );
-        virtual ~DBClientReplicaSet();
-
-        /** Returns false if nomember of the set were reachable, or neither is
-           master, although,
-           when false returned, you can still try to use this connection object, it will
-           try reconnects.
-           */
-        bool connect();
-
-        /** Authorize.  Authorizes all nodes as needed
-        */
-        virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true );
-
-        /** throws userassertion "no master found" */
-        virtual
-        auto_ptr<DBClientCursor> query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0,
-                                       const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 );
-
-        /** throws userassertion "no master found" */
-        virtual
-        BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0);
-
-        /** insert */
-        virtual void insert( const string &ns , BSONObj obj ) {
-            checkMaster()->insert(ns, obj);
-        }
-
-        /** insert multiple objects.  Note that single object insert is asynchronous, so this version 
-            is only nominally faster and not worth a special effort to try to use.  */
-        virtual void insert( const string &ns, const vector< BSONObj >& v ) {
-            checkMaster()->insert(ns, v);
-        }
-
-        /** remove */
-        virtual void remove( const string &ns , Query obj , bool justOne = 0 ) {
-            checkMaster()->remove(ns, obj, justOne);
-        }
-
-        /** update */
-        virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = 0 , bool multi = 0 ) {
-            return checkMaster()->update(ns, query, obj, upsert,multi);
-        }
-        
-        virtual void killCursor( long long cursorID ){
-            checkMaster()->killCursor( cursorID );
-        }
-
-        string toString();
-
-        /* this is the callback from our underlying connections to notify us that we got a "not master" error.
-         */
-        void isntMaster() {
-            _currentMaster = 0;
-        }
-        
-        string getServerAddress() const;
-        
-        DBClientConnection& masterConn();
-        DBClientConnection& slaveConn();
-
-
-        virtual bool call( Message &toSend, Message &response, bool assertOk=true ) { return checkMaster()->call( toSend , response , assertOk ); }
-        virtual void say( Message &toSend ) { checkMaster()->say( toSend ); }
-        virtual bool callRead( Message& toSend , Message& response ){ return checkMaster()->callRead( toSend , response ); }
-
-        virtual ConnectionString::ConnectionType type() const { return ConnectionString::SET; }  
+        DBClientReplicaSet *clientSet;
+        boost::scoped_ptr<MessagingPort> p;
+        boost::scoped_ptr<SockAddr> server;
+        bool failed;
+        const bool autoReconnect;
+        time_t lastReconnectTry;
+        HostAndPort _server; // remember for reconnects
+        string _serverString;
+        void _checkConnection();
 
-        virtual bool isMember( const DBConnector * conn ) const;
+        // throws SocketException if in failed state and not reconnecting or if waiting to reconnect
+        void checkConnection() { if( failed ) _checkConnection(); }
 
-        virtual void checkResponse( const char *data, int nReturned ) { checkMaster()->checkResponse( data , nReturned ); }
+        map< string, pair<string,string> > authCache;
+        double _so_timeout;
+        bool _connect( string& errmsg );
 
-    protected:                
-        virtual void sayPiggyBack( Message &toSend ) { checkMaster()->say( toSend ); }
-        
-        bool isFailed() const {
-            return _currentMaster == 0 || _currentMaster->isFailed();
-        }
+        static AtomicUInt _numConnections;
+        static bool _lazyKillCursor; // lazy means we piggy back kill cursors on next op
     };
-    
+
     /** pings server to check if it's up
      */
     bool serverAlive( const string &uri );
 
     DBClientBase * createDirectClient();
-    
+
 } // namespace mongo
 
 #include "dbclientcursor.h"
+#include "dbclient_rs.h"
 #include "undef_macros.h"
diff --git a/client/dbclient_rs.cpp b/client/dbclient_rs.cpp
new file mode 100644
index 0000000..fd8ecec
--- /dev/null
+++ b/client/dbclient_rs.cpp
@@ -0,0 +1,594 @@
+// dbclient.cpp - connect to a Mongo database as a database, from C++
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#include "pch.h"
+#include "dbclient.h"
+#include "../bson/util/builder.h"
+#include "../db/jsobj.h"
+#include "../db/json.h"
+#include "../db/dbmessage.h"
+#include "connpool.h"
+#include "dbclient_rs.h"
+#include "../util/background.h"
+
+namespace mongo {
+
+    // --------------------------------
+    // ----- ReplicaSetMonitor ---------
+    // --------------------------------
+
+    // global background job responsible for checking every X amount of time
+    class ReplicaSetMonitorWatcher : public BackgroundJob {
+    public:
+        ReplicaSetMonitorWatcher() : _safego("ReplicaSetMonitorWatcher::_safego") , _started(false) {}
+
+        virtual string name() const { return "ReplicaSetMonitorWatcher"; }
+        
+        void safeGo() {
+            // check outside of lock for speed
+            if ( _started )
+                return;
+            
+            scoped_lock lk( _safego );
+            if ( _started )
+                return;
+            _started = true;
+
+            go();
+        }
+    protected:
+        void run() {
+            while ( ! inShutdown() ) {
+                sleepsecs( 20 );
+                try {
+                    ReplicaSetMonitor::checkAll();
+                }
+                catch ( std::exception& e ) {
+                    error() << "ReplicaSetMonitorWatcher: check failed: " << e.what() << endl;
+                }
+            }
+        }
+
+        mongo::mutex _safego;
+        bool _started;
+
+    } replicaSetMonitorWatcher;
+
+
+    ReplicaSetMonitor::ReplicaSetMonitor( const string& name , const vector<HostAndPort>& servers )
+        : _lock( "ReplicaSetMonitor instance" ) , _checkConnectionLock( "ReplicaSetMonitor check connection lock" ), _name( name ) , _master(-1) {
+        
+        uassert( 13642 , "need at least 1 node for a replica set" , servers.size() > 0 );
+
+        if ( _name.size() == 0 ) {
+            warning() << "replica set name empty, first node: " << servers[0] << endl;
+        }
+
+        string errmsg;
+
+        for ( unsigned i=0; i<servers.size(); i++ ) {
+            auto_ptr<DBClientConnection> conn( new DBClientConnection( true , 0, 5.0 ) );
+            if (!conn->connect( servers[i] , errmsg ) ) {
+                log(1) << "error connecting to seed " << servers[i] << ": " << errmsg << endl;
+                // skip seeds that don't work
+                continue;
+            }
+
+            _nodes.push_back( Node( servers[i] , conn.release() ) );
+
+            string maybePrimary;
+            if (_checkConnection( _nodes[_nodes.size()-1].conn , maybePrimary, false)) {
+                break;
+            }
+        }
+    }
+
+    ReplicaSetMonitor::~ReplicaSetMonitor() {
+        for ( unsigned i=0; i<_nodes.size(); i++ )
+            delete _nodes[i].conn;
+        _nodes.clear();
+        _master = -1;
+    }
+
+    ReplicaSetMonitorPtr ReplicaSetMonitor::get( const string& name , const vector<HostAndPort>& servers ) {
+        scoped_lock lk( _setsLock );
+        ReplicaSetMonitorPtr& m = _sets[name];
+        if ( ! m )
+            m.reset( new ReplicaSetMonitor( name , servers ) );
+
+        replicaSetMonitorWatcher.safeGo();
+
+        return m;
+    }
+
+    void ReplicaSetMonitor::checkAll() {
+        set<string> seen;
+
+        while ( true ) {
+            ReplicaSetMonitorPtr m;
+            {
+                for ( map<string,ReplicaSetMonitorPtr>::iterator i=_sets.begin(); i!=_sets.end(); ++i ) {
+                    string name = i->first;
+                    if ( seen.count( name ) )
+                        continue;
+                    LOG(1) << "checking replica set: " << name << endl;
+                    seen.insert( name );
+                    m = i->second;
+                    break;
+                }
+            }
+
+            if ( ! m )
+                break;
+
+            m->check();
+        }
+
+
+    }
+
+    void ReplicaSetMonitor::setConfigChangeHook( ConfigChangeHook hook ) {
+        massert( 13610 , "ConfigChangeHook already specified" , _hook == 0 );
+        _hook = hook;
+    }
+    
+    string ReplicaSetMonitor::getServerAddress() const {
+        StringBuilder ss;
+        if ( _name.size() )
+            ss << _name << "/";
+
+        {
+            scoped_lock lk( _lock );
+            for ( unsigned i=0; i<_nodes.size(); i++ ) {
+                if ( i > 0 )
+                    ss << ",";
+                ss << _nodes[i].addr.toString();
+            }
+        }
+        return ss.str();
+    }
+
+    bool ReplicaSetMonitor::contains( const string& server ) const {
+        scoped_lock lk( _lock );
+        for ( unsigned i=0; i<_nodes.size(); i++ ) {
+            if ( _nodes[i].addr == server )
+                return true;
+        }
+        return false;
+    }
+    
+
+    void ReplicaSetMonitor::notifyFailure( const HostAndPort& server ) {
+        scoped_lock lk( _lock );
+        if ( _master >= 0 && _master < (int)_nodes.size() ) {
+            if ( server == _nodes[_master].addr )
+                _master = -1;
+        }
+    }
+
+
+
+    HostAndPort ReplicaSetMonitor::getMaster() {
+        {
+            scoped_lock lk( _lock );
+            if ( _master >= 0 && _nodes[_master].ok )
+                return _nodes[_master].addr;
+        }
+        
+        _check();
+        
+        scoped_lock lk( _lock );
+        uassert( 10009 , str::stream() << "ReplicaSetMonitor no master found for set: " << _name , _master >= 0 );
+        return _nodes[_master].addr;
+    }
+    
+    HostAndPort ReplicaSetMonitor::getSlave( const HostAndPort& prev ) {
+        // make sure its valid 
+        if ( prev.port() > 0 ) {
+            scoped_lock lk( _lock );
+            for ( unsigned i=0; i<_nodes.size(); i++ ) {
+                if ( prev != _nodes[i].addr ) 
+                    continue;
+
+                if ( _nodes[i].ok ) 
+                    return prev;
+                break;
+            }
+        }
+        
+        return getSlave();
+    }
+
+    HostAndPort ReplicaSetMonitor::getSlave() {
+        int x = rand() % _nodes.size();
+        {
+            scoped_lock lk( _lock );
+            for ( unsigned i=0; i<_nodes.size(); i++ ) {
+                int p = ( i + x ) % _nodes.size();
+                if ( p == _master )
+                    continue;
+                if ( _nodes[p].ok )
+                    return _nodes[p].addr;
+            }
+        }
+
+        return _nodes[0].addr;
+    }
+
+    /**
+     * notify the monitor that server has faild
+     */
+    void ReplicaSetMonitor::notifySlaveFailure( const HostAndPort& server ) {
+        int x = _find( server );
+        if ( x >= 0 ) {
+            scoped_lock lk( _lock );
+            _nodes[x].ok = false;
+        }
+    }
+
+    void ReplicaSetMonitor::_checkStatus(DBClientConnection *conn) {
+        BSONObj status;
+
+        if (!conn->runCommand("admin", BSON("replSetGetStatus" << 1), status) ||
+                !status.hasField("members") ||
+                status["members"].type() != Array) {
+            return;
+        }
+
+        BSONObjIterator hi(status["members"].Obj());
+        while (hi.more()) {
+            BSONObj member = hi.next().Obj();
+            string host = member["name"].String();
+
+            int m = -1;
+            if ((m = _find(host)) <= 0) {
+                continue;
+            }
+
+            double state = member["state"].Number();
+            if (member["health"].Number() == 1 && (state == 1 || state == 2)) {
+                scoped_lock lk( _lock );
+                _nodes[m].ok = true;
+            }
+            else {
+                scoped_lock lk( _lock );
+                _nodes[m].ok = false;
+            }
+        }
+    }
+
+    void ReplicaSetMonitor::_checkHosts( const BSONObj& hostList, bool& changed ) {
+        BSONObjIterator hi(hostList);
+        while ( hi.more() ) {
+            string toCheck = hi.next().String();
+
+            if ( _find( toCheck ) >= 0 )
+                continue;
+
+            HostAndPort h( toCheck );
+            DBClientConnection * newConn = new DBClientConnection( true, 0, 5.0 );
+            string temp;
+            newConn->connect( h , temp );
+            {
+                scoped_lock lk( _lock );
+                _nodes.push_back( Node( h , newConn ) );
+            }
+            log() << "updated set (" << _name << ") to: " << getServerAddress() << endl;
+            changed = true;
+        }
+    }
+    
+    
+
+    bool ReplicaSetMonitor::_checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose ) {
+        scoped_lock lk( _checkConnectionLock );
+        bool isMaster = false;
+        bool changed = false;
+        try {
+            BSONObj o;
+            c->isMaster(isMaster, &o);
+
+            log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: " << c->toString() << ' ' << o << '\n';
+
+            // add other nodes
+            string maybePrimary;
+            if ( o["hosts"].type() == Array ) {
+                if ( o["primary"].type() == String )
+                    maybePrimary = o["primary"].String();
+
+                _checkHosts(o["hosts"].Obj(), changed);
+            }
+            if (o.hasField("passives") && o["passives"].type() == Array) {
+                _checkHosts(o["passives"].Obj(), changed);
+            }
+
+            _checkStatus(c);
+        }
+        catch ( std::exception& e ) {
+            log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: caught exception " << c->toString() << ' ' << e.what() << endl;
+        }
+
+        if ( changed && _hook )
+            _hook( this );
+
+        return isMaster;
+    }
+
+    void ReplicaSetMonitor::_check() {
+
+        bool triedQuickCheck = false;
+
+        LOG(1) <<  "_check : " << getServerAddress() << endl;
+
+        for ( int retry = 0; retry < 2; retry++ ) {
+            for ( unsigned i=0; i<_nodes.size(); i++ ) {
+                DBClientConnection * c;
+                {
+                    scoped_lock lk( _lock );
+                    c = _nodes[i].conn;
+                }
+
+                string maybePrimary;
+                if ( _checkConnection( c , maybePrimary , retry ) ) {
+                    _master = i;
+                    return;
+                }
+
+                if ( ! triedQuickCheck && maybePrimary.size() ) {
+                    int x = _find( maybePrimary );
+                    if ( x >= 0 ) {
+                        triedQuickCheck = true;
+                        string dummy;
+                        DBClientConnection * testConn;
+                        {
+                            scoped_lock lk( _lock );
+                            testConn = _nodes[x].conn;
+                        }
+                        if ( _checkConnection( testConn , dummy , false ) ) {
+                            _master = x;
+                            return;
+                        }
+                    }
+                }
+
+            }
+            sleepsecs(1);
+        }
+
+    }
+
+    void ReplicaSetMonitor::check() {
+        // first see if the current master is fine
+        if ( _master >= 0 ) {
+            string temp;
+            if ( _checkConnection( _nodes[_master].conn , temp , false ) ) {
+                // current master is fine, so we're done
+                return;
+            }
+        }
+
+        // we either have no master, or the current is dead
+        _check();
+    }
+
+    int ReplicaSetMonitor::_find( const string& server ) const {
+        scoped_lock lk( _lock );
+        for ( unsigned i=0; i<_nodes.size(); i++ )
+            if ( _nodes[i].addr == server )
+                return i;
+        return -1;
+    }
+
+    int ReplicaSetMonitor::_find( const HostAndPort& server ) const {
+        scoped_lock lk( _lock );
+        for ( unsigned i=0; i<_nodes.size(); i++ )
+            if ( _nodes[i].addr == server )
+                return i;
+        return -1;
+    }
+
+
+    mongo::mutex ReplicaSetMonitor::_setsLock( "ReplicaSetMonitor" );
+    map<string,ReplicaSetMonitorPtr> ReplicaSetMonitor::_sets;
+    ReplicaSetMonitor::ConfigChangeHook ReplicaSetMonitor::_hook;
+    // --------------------------------
+    // ----- DBClientReplicaSet ---------
+    // --------------------------------
+
+    DBClientReplicaSet::DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers )
+        : _monitor( ReplicaSetMonitor::get( name , servers ) ) {
+    }
+
+    DBClientReplicaSet::~DBClientReplicaSet() {
+    }
+
+    DBClientConnection * DBClientReplicaSet::checkMaster() {
+        HostAndPort h = _monitor->getMaster();
+
+        if ( h == _masterHost ) {
+            // a master is selected.  let's just make sure connection didn't die
+            if ( ! _master->isFailed() )
+                return _master.get();
+            _monitor->notifyFailure( _masterHost );
+        }
+
+        _masterHost = _monitor->getMaster();
+        _master.reset( new DBClientConnection( true ) );
+        string errmsg;
+        if ( ! _master->connect( _masterHost , errmsg ) ) {
+            _monitor->notifyFailure( _masterHost );
+            uasserted( 13639 , str::stream() << "can't connect to new replica set master [" << _masterHost.toString() << "] err: " << errmsg );
+        }
+        _auth( _master.get() );
+        return _master.get();
+    }
+
+    DBClientConnection * DBClientReplicaSet::checkSlave() {
+        HostAndPort h = _monitor->getSlave( _slaveHost );
+
+        if ( h == _slaveHost ) {
+            if ( ! _slave->isFailed() )
+                return _slave.get();
+            _monitor->notifySlaveFailure( _slaveHost );
+        }
+        
+        _slaveHost = _monitor->getSlave();
+        _slave.reset( new DBClientConnection( true ) );
+        _slave->connect( _slaveHost );
+        _auth( _slave.get() );
+        return _slave.get();
+    }
+
+
+    void DBClientReplicaSet::_auth( DBClientConnection * conn ) {
+        for ( list<AuthInfo>::iterator i=_auths.begin(); i!=_auths.end(); ++i ) {
+            const AuthInfo& a = *i;
+            string errmsg;
+            if ( ! conn->auth( a.dbname , a.username , a.pwd , errmsg, a.digestPassword ) )
+                warning() << "cached auth failed for set: " << _monitor->getName() << " db: " << a.dbname << " user: " << a.username << endl;
+
+        }
+
+    }
+
+    DBClientConnection& DBClientReplicaSet::masterConn() {
+        return *checkMaster();
+    }
+
+    DBClientConnection& DBClientReplicaSet::slaveConn() {
+        return *checkSlave();
+    }
+
+    bool DBClientReplicaSet::connect() {
+        try {
+            checkMaster();
+        }
+        catch (AssertionException&) {
+            if (_master && _monitor) {
+                _monitor->notifyFailure(_masterHost);
+            }
+            return false;
+        }
+        return true;
+    }
+
+    bool DBClientReplicaSet::auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword ) {
+        DBClientConnection * m = checkMaster();
+
+        // first make sure it actually works
+        if( ! m->auth(dbname, username, pwd, errmsg, digestPassword ) )
+            return false;
+
+        // now that it does, we should save so that for a new node we can auth
+        _auths.push_back( AuthInfo( dbname , username , pwd , digestPassword ) );
+        return true;
+    }
+
+    // ------------- simple functions -----------------
+
+    void DBClientReplicaSet::insert( const string &ns , BSONObj obj ) {
+        checkMaster()->insert(ns, obj);
+    }
+
+    void DBClientReplicaSet::insert( const string &ns, const vector< BSONObj >& v ) {
+        checkMaster()->insert(ns, v);
+    }
+
+    void DBClientReplicaSet::remove( const string &ns , Query obj , bool justOne ) {
+        checkMaster()->remove(ns, obj, justOne);
+    }
+
+    void DBClientReplicaSet::update( const string &ns , Query query , BSONObj obj , bool upsert , bool multi ) {
+        return checkMaster()->update(ns, query, obj, upsert,multi);
+    }
+
+    auto_ptr<DBClientCursor> DBClientReplicaSet::query(const string &ns, Query query, int nToReturn, int nToSkip,
+            const BSONObj *fieldsToReturn, int queryOptions, int batchSize) {
+
+        if ( queryOptions & QueryOption_SlaveOk ) {
+            // we're ok sending to a slave
+            // we'll try 2 slaves before just using master
+            // checkSlave will try a different slave automatically after a failure
+            for ( int i=0; i<2; i++ ) {
+                try {
+                    return checkSlave()->query(ns,query,nToReturn,nToSkip,fieldsToReturn,queryOptions,batchSize);
+                }
+                catch ( DBException & ) {
+                    LOG(1) << "can't query replica set slave: " << _slaveHost << endl;
+                }
+            }
+        }
+
+        return checkMaster()->query(ns,query,nToReturn,nToSkip,fieldsToReturn,queryOptions,batchSize);
+    }
+
+    BSONObj DBClientReplicaSet::findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn, int queryOptions) {
+        if ( queryOptions & QueryOption_SlaveOk ) {
+            // we're ok sending to a slave
+            // we'll try 2 slaves before just using master
+            // checkSlave will try a different slave automatically after a failure
+            for ( int i=0; i<2; i++ ) {
+                try {
+                    return checkSlave()->findOne(ns,query,fieldsToReturn,queryOptions);
+                }
+                catch ( DBException & ) {
+                    LOG(1) << "can't query replica set slave: " << _slaveHost << endl;
+                }
+            }
+        }
+
+        return checkMaster()->findOne(ns,query,fieldsToReturn,queryOptions);
+    }
+
+    void DBClientReplicaSet::killCursor( long long cursorID ) {
+        // we should neve call killCursor on a replica set conncetion
+        // since we don't know which server it belongs to
+        // can't assume master because of slave ok
+        // and can have a cursor survive a master change
+        assert(0);
+    }
+
+
+    bool DBClientReplicaSet::call( Message &toSend, Message &response, bool assertOk , string * actualServer ) {
+        if ( toSend.operation() == dbQuery ) {
+            // TODO: might be possible to do this faster by changing api
+            DbMessage dm( toSend );
+            QueryMessage qm( dm );
+            if ( qm.queryOptions & QueryOption_SlaveOk ) {
+                for ( int i=0; i<2; i++ ) {
+                    try {
+                        DBClientConnection* s = checkSlave();
+                        if ( actualServer )
+                            *actualServer = s->getServerAddress();
+                        return s->call( toSend , response , assertOk );
+                    }
+                    catch ( DBException & ) {
+                        log(1) << "can't query replica set slave: " << _slaveHost << endl;
+                        if ( actualServer )
+                            *actualServer = "";
+                    }
+                }
+            }
+        }
+        
+        DBClientConnection* m = checkMaster();
+        if ( actualServer )
+            *actualServer = m->getServerAddress();
+        return m->call( toSend , response , assertOk );
+    }
+
+}
diff --git a/client/dbclient_rs.h b/client/dbclient_rs.h
new file mode 100644
index 0000000..43bf561
--- /dev/null
+++ b/client/dbclient_rs.h
@@ -0,0 +1,276 @@
+/** @file dbclient_rs.h - connect to a Replica Set, from C++ */
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+#include "../pch.h"
+#include "dbclient.h"
+
+namespace mongo {
+
+    class ReplicaSetMonitor;
+    typedef shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorPtr;
+
+    /**
+     * manages state about a replica set for client
+     * keeps tabs on whose master and what slaves are up
+     * can hand a slave to someone for SLAVE_OK
+     * one instace per process per replica set
+     * TODO: we might be able to use a regular Node * to avoid _lock
+     */
+    class ReplicaSetMonitor {
+    public:
+
+        typedef boost::function1<void,const ReplicaSetMonitor*> ConfigChangeHook;
+
+        /**
+         * gets a cached Monitor per name or will create if doesn't exist
+         */
+        static ReplicaSetMonitorPtr get( const string& name , const vector<HostAndPort>& servers );
+
+        /**
+         * checks all sets for current master and new secondaries
+         * usually only called from a BackgroundJob
+         */
+        static void checkAll();
+
+        /**
+         * this is called whenever the config of any repclia set changes
+         * currently only 1 globally
+         * asserts if one already exists
+         * ownership passes to ReplicaSetMonitor and the hook will actually never be deleted
+         */
+        static void setConfigChangeHook( ConfigChangeHook hook );
+
+        ~ReplicaSetMonitor();
+
+        /** @return HostAndPort or throws an exception */
+        HostAndPort getMaster();
+
+        /**
+         * notify the monitor that server has faild
+         */
+        void notifyFailure( const HostAndPort& server );
+
+        /** @return prev if its still ok, and if not returns a random slave that is ok for reads */
+        HostAndPort getSlave( const HostAndPort& prev );
+
+        /** @return a random slave that is ok for reads */
+        HostAndPort getSlave();
+
+
+        /**
+         * notify the monitor that server has faild
+         */
+        void notifySlaveFailure( const HostAndPort& server );
+
+        /**
+         * checks for current master and new secondaries
+         */
+        void check();
+
+        string getName() const { return _name; }
+
+        string getServerAddress() const;
+        
+        bool contains( const string& server ) const;
+
+    private:
+        /**
+         * This populates a list of hosts from the list of seeds (discarding the
+         * seed list).
+         * @param name set name
+         * @param servers seeds
+         */
+        ReplicaSetMonitor( const string& name , const vector<HostAndPort>& servers );
+
+        void _check();
+
+        /**
+         * Use replSetGetStatus command to make sure hosts in host list are up
+         * and readable.  Sets Node::ok appropriately.
+         */
+        void _checkStatus(DBClientConnection *conn);
+
+        /**
+         * Add array of hosts to host list. Doesn't do anything if hosts are
+         * already in host list.
+         * @param hostList the list of hosts to add
+         * @param changed if new hosts were added
+         */
+        void _checkHosts(const BSONObj& hostList, bool& changed);
+
+        /**
+         * Updates host list.
+         * @param c the connection to check
+         * @param maybePrimary OUT
+         * @param verbose
+         * @return if the connection is good
+         */
+        bool _checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose );
+
+        int _find( const string& server ) const ;
+        int _find( const HostAndPort& server ) const ;
+
+        mutable mongo::mutex _lock; // protects _nodes
+        mutable mongo::mutex  _checkConnectionLock;
+
+        string _name;
+        struct Node {
+            Node( const HostAndPort& a , DBClientConnection* c ) : addr( a ) , conn(c) , ok(true) {}
+            HostAndPort addr;
+            DBClientConnection* conn;
+
+            // if this node is in a failure state
+            // used for slave routing
+            // this is too simple, should make it better
+            bool ok;
+        };
+
+        /**
+         * Host list.
+         */
+        vector<Node> _nodes;
+
+        int _master; // which node is the current master.  -1 means no master is known
+
+
+        static mongo::mutex _setsLock; // protects _sets
+        static map<string,ReplicaSetMonitorPtr> _sets; // set name to Monitor
+
+        static ConfigChangeHook _hook;
+    };
+
+    /** Use this class to connect to a replica set of servers.  The class will manage
+       checking for which server in a replica set is master, and do failover automatically.
+
+       This can also be used to connect to replica pairs since pairs are a subset of sets
+
+       On a failover situation, expect at least one operation to return an error (throw
+       an exception) before the failover is complete.  Operations are not retried.
+    */
+    class DBClientReplicaSet : public DBClientBase {
+
+    public:
+        /** Call connect() after constructing. autoReconnect is always on for DBClientReplicaSet connections. */
+        DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers );
+        virtual ~DBClientReplicaSet();
+
+        /** Returns false if nomember of the set were reachable, or neither is
+         * master, although,
+         * when false returned, you can still try to use this connection object, it will
+         * try reconnects.
+         */
+        bool connect();
+
+        /** Authorize.  Authorizes all nodes as needed
+        */
+        virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true );
+
+        // ----------- simple functions --------------
+
+        /** throws userassertion "no master found" */
+        virtual auto_ptr<DBClientCursor> query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0,
+                                               const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 );
+
+        /** throws userassertion "no master found" */
+        virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0);
+
+        virtual void insert( const string &ns , BSONObj obj );
+
+        /** insert multiple objects.  Note that single object insert is asynchronous, so this version
+            is only nominally faster and not worth a special effort to try to use.  */
+        virtual void insert( const string &ns, const vector< BSONObj >& v );
+
+        virtual void remove( const string &ns , Query obj , bool justOne = 0 );
+
+        virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = 0 , bool multi = 0 );
+
+        virtual void killCursor( long long cursorID );
+
+        // ---- access raw connections ----
+
+        DBClientConnection& masterConn();
+        DBClientConnection& slaveConn();
+
+        // ---- callback pieces -------
+
+        virtual void checkResponse( const char *data, int nReturned ) { checkMaster()->checkResponse( data , nReturned ); }
+
+        /* this is the callback from our underlying connections to notify us that we got a "not master" error.
+         */
+        void isntMaster() { _master.reset(); }
+
+        // ----- status ------
+
+        virtual bool isFailed() const { return ! _master || _master->isFailed(); }
+
+        // ----- informational ----
+
+        string toString() { return getServerAddress(); }
+
+        string getServerAddress() const { return _monitor->getServerAddress(); }
+
+        virtual ConnectionString::ConnectionType type() const { return ConnectionString::SET; }
+
+        // ---- low level ------
+
+        virtual bool call( Message &toSend, Message &response, bool assertOk=true , string * actualServer = 0 );
+        virtual void say( Message &toSend ) { checkMaster()->say( toSend ); }
+        virtual bool callRead( Message& toSend , Message& response ) { return checkMaster()->callRead( toSend , response ); }
+
+
+    protected:
+        virtual void sayPiggyBack( Message &toSend ) { checkMaster()->say( toSend ); }
+
+    private:
+
+        DBClientConnection * checkMaster();
+        DBClientConnection * checkSlave();
+
+        void _auth( DBClientConnection * conn );
+
+        ReplicaSetMonitorPtr _monitor;
+
+        HostAndPort _masterHost;
+        scoped_ptr<DBClientConnection> _master;
+
+        HostAndPort _slaveHost;
+        scoped_ptr<DBClientConnection> _slave;
+
+        /**
+         * for storing authentication info
+         * fields are exactly for DBClientConnection::auth
+         */
+        struct AuthInfo {
+            AuthInfo( string d , string u , string p , bool di )
+                : dbname( d ) , username( u ) , pwd( p ) , digestPassword( di ) {}
+            string dbname;
+            string username;
+            string pwd;
+            bool digestPassword;
+        };
+
+        // we need to store so that when we connect to a new node on failure
+        // we can re-auth
+        // this could be a security issue, as the password is stored in memory
+        // not sure if/how we should handle
+        list<AuthInfo> _auths;
+    };
+
+
+}
diff --git a/client/dbclientcursor.cpp b/client/dbclientcursor.cpp
index 5f9db43..6c6afc0 100644
--- a/client/dbclientcursor.cpp
+++ b/client/dbclientcursor.cpp
@@ -26,14 +26,14 @@ namespace mongo {
 
     void assembleRequest( const string &ns, BSONObj query, int nToReturn, int nToSkip, const BSONObj *fieldsToReturn, int queryOptions, Message &toSend );
 
-    int DBClientCursor::nextBatchSize(){
+    int DBClientCursor::nextBatchSize() {
 
         if ( nToReturn == 0 )
             return batchSize;
 
         if ( batchSize == 0 )
             return nToReturn;
-        
+
         return batchSize < nToReturn ? batchSize : nToReturn;
     }
 
@@ -41,7 +41,8 @@ namespace mongo {
         Message toSend;
         if ( !cursorId ) {
             assembleRequest( ns, query, nextBatchSize() , nToSkip, fieldsToReturn, opts, toSend );
-        } else {
+        }
+        else {
             BufBuilder b;
             b.appendNum( opts );
             b.appendStr( ns );
@@ -49,10 +50,16 @@ namespace mongo {
             b.appendNum( cursorId );
             toSend.setData( dbGetMore, b.buf(), b.len() );
         }
-        if ( !connector->call( toSend, *m, false ) )
+        if ( !_client->call( toSend, *m, false ) ) {
+            // log msg temp?
+            log() << "DBClientCursor::init call() failed" << endl;
             return false;
-        if ( m->empty() )
+        }
+        if ( m->empty() ) {
+            // log msg temp?
+            log() << "DBClientCursor::init message from call() was empty" << endl;
             return false;
+        }
         dataReceived();
         return true;
     }
@@ -60,7 +67,7 @@ namespace mongo {
     void DBClientCursor::requestMore() {
         assert( cursorId && pos == nReturned );
 
-        if (haveLimit){
+        if (haveLimit) {
             nToReturn -= nReturned;
             assert(nToReturn > 0);
         }
@@ -69,13 +76,13 @@ namespace mongo {
         b.appendStr(ns);
         b.appendNum(nextBatchSize());
         b.appendNum(cursorId);
-        
+
         Message toSend;
         toSend.setData(dbGetMore, b.buf(), b.len());
         auto_ptr<Message> response(new Message());
-        
-        if ( connector ){
-            connector->call( toSend, *response );
+
+        if ( _client ) {
+            _client->call( toSend, *response );
             m = response;
             dataReceived();
         }
@@ -83,10 +90,10 @@ namespace mongo {
             assert( _scopedHost.size() );
             ScopedDbConnection conn( _scopedHost );
             conn->call( toSend , *response );
-            connector = conn.get();
+            _client = conn.get();
             m = response;
             dataReceived();
-            connector = 0;
+            _client = 0;
             conn.done();
         }
     }
@@ -96,8 +103,8 @@ namespace mongo {
         assert( cursorId && pos == nReturned );
         assert( !haveLimit );
         auto_ptr<Message> response(new Message());
-        assert( connector );
-        connector->recv(*response);
+        assert( _client );
+        _client->recv(*response);
         m = response;
         dataReceived();
     }
@@ -105,7 +112,7 @@ namespace mongo {
     void DBClientCursor::dataReceived() {
         QueryResult *qr = (QueryResult *) m->singleData();
         resultFlags = qr->resultFlags();
-        
+
         if ( qr->resultFlags() & ResultFlag_CursorNotFound ) {
             // cursor id no longer valid at the server.
             assert( qr->cursorId == 0 );
@@ -113,7 +120,7 @@ namespace mongo {
             if ( ! ( opts & QueryOption_CursorTailable ) )
                 throw UserException( 13127 , "getMore: cursor didn't exist on server, possible restart or timeout?" );
         }
-        
+
         if ( cursorId == 0 || ! ( opts & QueryOption_CursorTailable ) ) {
             // only set initially: we don't want to kill it on end of data
             // if it's a tailable cursor
@@ -124,7 +131,7 @@ namespace mongo {
         pos = 0;
         data = qr->data();
 
-        connector->checkResponse( data, nReturned );
+        _client->checkResponse( data, nReturned );
         /* this assert would fire the way we currently work:
             assert( nReturned || cursorId == 0 );
         */
@@ -136,7 +143,7 @@ namespace mongo {
 
         if ( !_putBack.empty() )
             return true;
-        
+
         if (haveLimit && pos >= nToReturn)
             return false;
 
@@ -171,7 +178,7 @@ namespace mongo {
         int m = atMost;
 
         /*
-        for( stack<BSONObj>::iterator i = _putBack.begin(); i != _putBack.end(); i++ ) { 
+        for( stack<BSONObj>::iterator i = _putBack.begin(); i != _putBack.end(); i++ ) {
             if( m == 0 )
                 return;
             v.push_back(*i);
@@ -190,13 +197,22 @@ namespace mongo {
             v.push_back(o);
         }
     }
-
-    void DBClientCursor::attach( AScopedConnection * conn ){
+    
+    void DBClientCursor::attach( AScopedConnection * conn ) {
         assert( _scopedHost.size() == 0 );
-        assert( conn->get()->isMember( connector ) );
-        _scopedHost = conn->getHost();
+        assert( conn );
+        assert( conn->get() );
+
+        if ( conn->get()->type() == ConnectionString::SET ||
+             conn->get()->type() == ConnectionString::SYNC ) {
+            _scopedHost = _client->getServerAddress();
+        }
+        else {
+            _scopedHost = conn->getHost();
+        }
+        
         conn->done();
-        connector = 0;
+        _client = 0;
     }
 
     DBClientCursor::~DBClientCursor() {
@@ -205,28 +221,28 @@ namespace mongo {
 
         DESTRUCTOR_GUARD (
 
-            if ( cursorId && _ownCursor ) {
-                BufBuilder b;
-                b.appendNum( (int)0 ); // reserved
-                b.appendNum( (int)1 ); // number
-                b.appendNum( cursorId );
-
-                Message m;
-                m.setData( dbKillCursors , b.buf() , b.len() );
-                
-                if ( connector ){
-                    connector->sayPiggyBack( m );
-                }
-                else {
-                    assert( _scopedHost.size() );
-                    ScopedDbConnection conn( _scopedHost );
-                    conn->sayPiggyBack( m );
-                    conn.done();
-                }
+        if ( cursorId && _ownCursor ) {
+        BufBuilder b;
+        b.appendNum( (int)0 ); // reserved
+            b.appendNum( (int)1 ); // number
+            b.appendNum( cursorId );
+
+            Message m;
+            m.setData( dbKillCursors , b.buf() , b.len() );
+
+            if ( _client ) {
+                _client->sayPiggyBack( m );
+            }
+            else {
+                assert( _scopedHost.size() );
+                ScopedDbConnection conn( _scopedHost );
+                conn->sayPiggyBack( m );
+                conn.done();
             }
+        }
 
         );
     }
 
-    
+
 } // namespace mongo
diff --git a/client/dbclientcursor.h b/client/dbclientcursor.h
index 51cdc13..5d795f4 100644
--- a/client/dbclientcursor.h
+++ b/client/dbclientcursor.h
@@ -1,4 +1,4 @@
-// file dbclientcursor.h 
+// file dbclientcursor.h
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -24,41 +24,55 @@
 #include <stack>
 
 namespace mongo {
-    
+
     class AScopedConnection;
-    
-	/** Queries return a cursor object */
-    class DBClientCursor : boost::noncopyable {
+
+    /** for mock purposes only -- do not create variants of DBClientCursor, nor hang code here */
+    class DBClientCursorInterface {
     public:
-		/** If true, safe to call next().  Requests more from server if necessary. */
+        virtual ~DBClientCursorInterface() {}
+
+        virtual bool more() = 0;
+        virtual BSONObj next() = 0;
+
+        // TODO bring more of the DBClientCursor interface to here
+
+    protected:
+        DBClientCursorInterface() {}
+    };
+
+    /** Queries return a cursor object */
+    class DBClientCursor : public DBClientCursorInterface {
+    public:
+        /** If true, safe to call next().  Requests more from server if necessary. */
         bool more();
 
-        /** If true, there is more in our local buffers to be fetched via next(). Returns 
-            false when a getMore request back to server would be required.  You can use this 
-            if you want to exhaust whatever data has been fetched to the client already but 
+        /** If true, there is more in our local buffers to be fetched via next(). Returns
+            false when a getMore request back to server would be required.  You can use this
+            if you want to exhaust whatever data has been fetched to the client already but
             then perhaps stop.
         */
         int objsLeftInBatch() const { _assertIfNull(); return _putBack.size() + nReturned - pos; }
         bool moreInCurrentBatch() { return objsLeftInBatch() > 0; }
 
         /** next
-		   @return next object in the result cursor.
+           @return next object in the result cursor.
            on an error at the remote server, you will get back:
              { $err: <string> }
            if you do not want to handle that yourself, call nextSafe().
         */
         BSONObj next();
-        
-        /** 
+
+        /**
             restore an object previously returned by next() to the cursor
          */
         void putBack( const BSONObj &o ) { _putBack.push( o.getOwned() ); }
 
-		/** throws AssertionException if get back { $err : ... } */
+        /** throws AssertionException if get back { $err : ... } */
         BSONObj nextSafe() {
             BSONObj o = next();
             BSONElement e = o.firstElement();
-            if( strcmp(e.fieldName(), "$err") == 0 ) { 
+            if( strcmp(e.fieldName(), "$err") == 0 ) {
                 if( logLevel >= 5 )
                     log() << "nextSafe() error " << o.toString() << endl;
                 uassert(13106, "nextSafe(): " + o.toString(), false);
@@ -67,7 +81,7 @@ namespace mongo {
         }
 
         /** peek ahead at items buffered for future next() calls.
-            never requests new data from the server.  so peek only effective 
+            never requests new data from the server.  so peek only effective
             with what is already buffered.
             WARNING: no support for _putBack yet!
         */
@@ -76,9 +90,9 @@ namespace mongo {
         /**
            iterate the rest of the cursor and return the number if items
          */
-        int itcount(){
+        int itcount() {
             int c = 0;
-            while ( more() ){
+            while ( more() ) {
                 next();
                 c++;
             }
@@ -97,48 +111,48 @@ namespace mongo {
         bool tailable() const {
             return (opts & QueryOption_CursorTailable) != 0;
         }
-        
-        /** see ResultFlagType (constants.h) for flag values 
-            mostly these flags are for internal purposes - 
+
+        /** see ResultFlagType (constants.h) for flag values
+            mostly these flags are for internal purposes -
             ResultFlag_ErrSet is the possible exception to that
         */
-        bool hasResultFlag( int flag ){
+        bool hasResultFlag( int flag ) {
             _assertIfNull();
             return (resultFlags & flag) != 0;
         }
 
-        DBClientCursor( DBConnector *_connector, const string &_ns, BSONObj _query, int _nToReturn,
+        DBClientCursor( DBClientBase* client, const string &_ns, BSONObj _query, int _nToReturn,
                         int _nToSkip, const BSONObj *_fieldsToReturn, int queryOptions , int bs ) :
-                connector(_connector),
-                ns(_ns),
-                query(_query),
-                nToReturn(_nToReturn),
-                haveLimit( _nToReturn > 0 && !(queryOptions & QueryOption_CursorTailable)),
-                nToSkip(_nToSkip),
-                fieldsToReturn(_fieldsToReturn),
-                opts(queryOptions),
-                batchSize(bs==1?2:bs),
-                m(new Message()),
-                cursorId(),
-                nReturned(),
-                pos(),
-                data(),
-                _ownCursor( true ){
+            _client(client),
+            ns(_ns),
+            query(_query),
+            nToReturn(_nToReturn),
+            haveLimit( _nToReturn > 0 && !(queryOptions & QueryOption_CursorTailable)),
+            nToSkip(_nToSkip),
+            fieldsToReturn(_fieldsToReturn),
+            opts(queryOptions),
+            batchSize(bs==1?2:bs),
+            m(new Message()),
+            cursorId(),
+            nReturned(),
+            pos(),
+            data(),
+            _ownCursor( true ) {
+        }
+
+        DBClientCursor( DBClientBase* client, const string &_ns, long long _cursorId, int _nToReturn, int options ) :
+            _client(client),
+            ns(_ns),
+            nToReturn( _nToReturn ),
+            haveLimit( _nToReturn > 0 && !(options & QueryOption_CursorTailable)),
+            opts( options ),
+            m(new Message()),
+            cursorId( _cursorId ),
+            nReturned(),
+            pos(),
+            data(),
+            _ownCursor( true ) {
         }
-        
-        DBClientCursor( DBConnector *_connector, const string &_ns, long long _cursorId, int _nToReturn, int options ) :
-                connector(_connector),
-                ns(_ns),
-                nToReturn( _nToReturn ),
-                haveLimit( _nToReturn > 0 && !(options & QueryOption_CursorTailable)),
-                opts( options ),
-                m(new Message()),
-                cursorId( _cursorId ),
-                nReturned(),
-                pos(),
-                data(),
-                _ownCursor( true ){
-        }            
 
         virtual ~DBClientCursor();
 
@@ -148,15 +162,15 @@ namespace mongo {
             message when ~DBClientCursor() is called. This function overrides that.
         */
         void decouple() { _ownCursor = false; }
-        
+
         void attach( AScopedConnection * conn );
-        
+
     private:
         friend class DBClientBase;
         friend class DBClientConnection;
-        bool init();        
+        bool init();
         int nextBatchSize();
-        DBConnector *connector;
+        DBClientBase* _client;
         string ns;
         BSONObj query;
         int nToReturn;
@@ -180,8 +194,12 @@ namespace mongo {
 
         // Don't call from a virtual function
         void _assertIfNull() const { uassert(13348, "connection died", this); }
+
+        // non-copyable , non-assignable
+        DBClientCursor( const DBClientCursor& );
+        DBClientCursor& operator=( const DBClientCursor& );
     };
-    
+
     /** iterate over objects in current batch only - will not cause a network call
      */
     class DBClientCursorBatchIterator {
@@ -198,7 +216,7 @@ namespace mongo {
         DBClientCursor &_c;
         int _n;
     };
-    
+
 } // namespace mongo
 
 #include "undef_macros.h"
diff --git a/client/dbclientmockcursor.h b/client/dbclientmockcursor.h
new file mode 100644
index 0000000..8d85ff5
--- /dev/null
+++ b/client/dbclientmockcursor.h
@@ -0,0 +1,40 @@
+//@file dbclientmockcursor.h
+
+/*    Copyright 2010 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+#include "dbclientcursor.h"
+
+namespace mongo {
+
+    class DBClientMockCursor : public DBClientCursorInterface {
+    public:
+        DBClientMockCursor( const BSONArray& mockCollection ) : _iter( mockCollection ) {}
+        virtual ~DBClientMockCursor() {}
+
+        bool more() { return _iter.more(); }
+        BSONObj next() { return _iter.next().Obj(); }
+
+    private:
+        BSONObjIterator _iter;
+
+        // non-copyable , non-assignable
+        DBClientMockCursor( const DBClientMockCursor& );
+        DBClientMockCursor& operator=( const DBClientMockCursor& );
+    };
+
+} // namespace mongo
diff --git a/client/distlock.cpp b/client/distlock.cpp
index 05e54c0..9ec98ea 100644
--- a/client/distlock.cpp
+++ b/client/distlock.cpp
@@ -21,23 +21,36 @@
 
 namespace mongo {
 
-    string lockPingNS = "config.lockpings";
+    static string lockPingNS = "config.lockpings";
+    static string locksNS = "config.locks";
 
     ThreadLocalValue<string> distLockIds("");
-    
-    string getDistLockProcess(){
-        static string s;
-        if ( s.empty() ){
-            stringstream ss;
-            ss << getHostNameCached() << ":" << time(0) << ":" << rand();
-            s = ss.str();
-        }
-        return s;
+
+    /* ==================
+     * Module initialization
+     */
+
+    boost::once_flag _init = BOOST_ONCE_INIT;
+    static string* _cachedProcessString = NULL;
+
+    static void initModule() {
+        // cache process string
+        stringstream ss;
+        ss << getHostName() << ":" << time(0) << ":" << rand();
+        _cachedProcessString = new string( ss.str() );
     }
 
-    string getDistLockId(){
+    /* =================== */
+
+    string getDistLockProcess() {
+        boost::call_once( initModule, _init );
+        assert( _cachedProcessString );
+        return *_cachedProcessString;
+    }
+
+    string getDistLockId() {
         string s = distLockIds.get();
-        if ( s.empty() ){
+        if ( s.empty() ) {
             stringstream ss;
             ss << getDistLockProcess() << ":" << getThreadName() << ":" << rand();
             s = ss.str();
@@ -45,50 +58,95 @@ namespace mongo {
         }
         return s;
     }
-    
-    void distLockPingThread( ConnectionString addr ){
+
+    void _distLockPingThread( ConnectionString addr ) {
         setThreadName( "LockPinger" );
+        
+        log() << "creating dist lock ping thread for: " << addr << endl;
 
         static int loops = 0;
-        while( ! inShutdown() ){
+        while( ! inShutdown() ) {
+
+            string process = getDistLockProcess();
+            log(4) << "dist_lock about to ping for: " << process << endl;
+
             try {
                 ScopedDbConnection conn( addr );
-                
-                // do ping
-                conn->update( lockPingNS , 
-                              BSON( "_id" << getDistLockProcess() ) , 
+
+                // refresh the entry corresponding to this process in the lockpings collection
+                conn->update( lockPingNS ,
+                              BSON( "_id" << process ) ,
                               BSON( "$set" << BSON( "ping" << DATENOW ) ) ,
                               true );
-                
-                
-                // remove really old entries
-                BSONObjBuilder f;
-                f.appendDate( "$lt" , jsTime() - ( 4 * 86400 * 1000 ) );
-                BSONObj r = BSON( "ping" << f.obj() );
-                conn->remove( lockPingNS , r );
-                
+                string err = conn->getLastError();
+                if ( ! err.empty() ) {
+                    warning() << "dist_lock process: " << process << " pinging: " << addr << " failed: "
+                              << err << endl;
+                    conn.done();
+                    sleepsecs(30);
+                    continue;
+                }
+
+                // remove really old entries from the lockpings collection if they're not holding a lock
+                // (this may happen if an instance of a process was taken down and no new instance came up to
+                // replace it for a quite a while)
+                // if the lock is taken, the take-over mechanism should handle the situation
+                auto_ptr<DBClientCursor> c = conn->query( locksNS , BSONObj() );
+                vector<string> pids;
+                while ( c->more() ) {
+                    BSONObj lock = c->next();
+                    if ( ! lock["process"].eoo() ) {
+                        pids.push_back( lock["process"].valuestrsafe() );
+                    }
+                }
+
+                Date_t fourDays = jsTime() - ( 4 * 86400 * 1000 ); // 4 days
+                conn->remove( lockPingNS , BSON( "_id" << BSON( "$nin" << pids ) << "ping" << LT << fourDays ) );
+                err = conn->getLastError();
+                if ( ! err.empty() ) {
+                    warning() << "dist_lock cleanup request from process: " << process << " to: " << addr
+                              << " failed: " << err << endl;
+                    conn.done();
+                    sleepsecs(30);
+                    continue;
+                }
+
                 // create index so remove is fast even with a lot of servers
-                if ( loops++ == 0 ){
+                if ( loops++ == 0 ) {
                     conn->ensureIndex( lockPingNS , BSON( "ping" << 1 ) );
                 }
-                
+
                 conn.done();
             }
-            catch ( std::exception& e ){
-                log( LL_WARNING ) << "couldn't ping: " << e.what() << endl;
+            catch ( std::exception& e ) {
+                warning() << "dist_lock exception during ping: " << e.what() << endl;
             }
+
+            log( loops % 10 == 0 ? 0 : 1) << "dist_lock pinged successfully for: " << process << endl;
             sleepsecs(30);
         }
     }
-        
-    
+
+    void distLockPingThread( ConnectionString addr ) {
+        try {
+            _distLockPingThread( addr );
+        }
+        catch ( std::exception& e ) {
+            error() << "unexpected error in distLockPingThread: " << e.what() << endl;
+        }
+        catch ( ... ) {
+            error() << "unexpected unknown error in distLockPingThread" << endl;
+        }
+    }
+
+
     class DistributedLockPinger {
     public:
         DistributedLockPinger()
-            : _mutex( "DistributedLockPinger" ){
+            : _mutex( "DistributedLockPinger" ) {
         }
-        
-        void got( const ConnectionString& conn ){
+
+        void got( const ConnectionString& conn ) {
             string s = conn.toString();
             scoped_lock lk( _mutex );
             if ( _seen.count( s ) > 0 )
@@ -96,80 +154,121 @@ namespace mongo {
             boost::thread t( boost::bind( &distLockPingThread , conn ) );
             _seen.insert( s );
         }
-        
+
         set<string> _seen;
         mongo::mutex _mutex;
-        
+
     } distLockPinger;
-    
+
     DistributedLock::DistributedLock( const ConnectionString& conn , const string& name , unsigned takeoverMinutes )
-        : _conn(conn),_name(name),_takeoverMinutes(takeoverMinutes){
+        : _conn(conn),_name(name),_takeoverMinutes(takeoverMinutes) {
         _id = BSON( "_id" << name );
         _ns = "config.locks";
         distLockPinger.got( conn );
     }
 
-       
-    bool DistributedLock::lock_try( string why , BSONObj * other ){
+
+    bool DistributedLock::lock_try( string why , BSONObj * other ) {
+        // write to dummy if 'other' is null
+        BSONObj dummyOther;
+        if ( other == NULL )
+            other = &dummyOther;
+
         ScopedDbConnection conn( _conn );
-            
+
         BSONObjBuilder queryBuilder;
         queryBuilder.appendElements( _id );
-        queryBuilder.append( "state" , 0 );            
+        queryBuilder.append( "state" , 0 );
 
-        { // make sure its there so we can use simple update logic below
-            BSONObj o = conn->findOne( _ns , _id );
-            if ( o.isEmpty() ){
+        {
+            // make sure its there so we can use simple update logic below
+            BSONObj o = conn->findOne( _ns , _id ).getOwned();
+            if ( o.isEmpty() ) {
                 try {
+                    log(4) << "dist_lock inserting initial doc in " << _ns << " for lock " << _name << endl;
                     conn->insert( _ns , BSON( "_id" << _name << "state" << 0 << "who" << "" ) );
                 }
-                catch ( UserException& ){
+                catch ( UserException& e ) {
+                    log() << "dist_lock could not insert initial doc: " << e << endl;
                 }
             }
-            else if ( o["state"].numberInt() > 0 ){
+            
+            else if ( o["state"].numberInt() > 0 ) {
                 BSONObj lastPing = conn->findOne( lockPingNS , o["process"].wrap( "_id" ) );
-                if ( lastPing.isEmpty() ){
-                    // TODO: maybe this should clear, not sure yet
-                    log() << "lastPing is empty! this could be bad: " << o << endl;
+                if ( lastPing.isEmpty() ) {
+                    // if a lock is taken but there's no ping for it, we're in an inconsistent situation
+                    // if the lock holder (mongos or d)  does not exist anymore, the lock could safely be removed
+                    // but we'd require analysis of the situation before a manual intervention
+                    error() << "config.locks: " << _name << " lock is taken by old process? "
+                            << "remove the following lock if the process is not active anymore: " << o << endl;
+                    *other = o;
                     conn.done();
                     return false;
                 }
 
-                unsigned long long elapsed = jsTime() - lastPing["ping"].Date(); // in ms
-                elapsed = elapsed / ( 1000 * 60 ); // convert to minutes
-
-                if ( elapsed <= _takeoverMinutes ){
-                    log(1) << "dist_lock lock failed because taken by: " << o << endl;
+                unsigned long long now = jsTime();
+                unsigned long long pingTime = lastPing["ping"].Date();
+                
+                if ( now < pingTime ) {
+                    // clock skew
+                    warning() << "dist_lock has detected clock skew of " << ( pingTime - now ) << "ms" << endl;
+                    *other = o;
                     conn.done();
                     return false;
                 }
                 
+                unsigned long long elapsed = now - pingTime;
+                elapsed = elapsed / ( 1000 * 60 ); // convert to minutes
+                
+                if ( elapsed > ( 60 * 24 * 365 * 100 ) /* 100 years */ ) {
+                    warning() << "distlock elapsed time seems impossible: " << lastPing << endl;
+                }
+                
+                if ( elapsed <= _takeoverMinutes ) {
+                    log(1) << "dist_lock lock failed because taken by: " << o << " elapsed minutes: " << elapsed << endl;
+                    *other = o;
+                    conn.done();
+                    return false;
+                }
+
                 log() << "dist_lock forcefully taking over from: " << o << " elapsed minutes: " << elapsed << endl;
                 conn->update( _ns , _id , BSON( "$set" << BSON( "state" << 0 ) ) );
+                string err = conn->getLastError();
+                if ( ! err.empty() ) {
+                    warning() << "dist_lock take over from: " << o << " failed: " << err << endl;
+                    *other = o.getOwned();
+                    other->getOwned();
+                    conn.done();
+                    return false;
+                }
+
             }
-            else if ( o["ts"].type() ){
+            else if ( o["ts"].type() ) {
                 queryBuilder.append( o["ts"] );
             }
         }
-        
+
         OID ts;
         ts.init();
 
         bool gotLock = false;
         BSONObj now;
-            
-        BSONObj whatIWant = BSON( "$set" << BSON( "state" << 1 << 
-                                                  "who" << getDistLockId() << "process" << getDistLockProcess() <<
-                                                  "when" << DATENOW << "why" << why << "ts" << ts ) );
+
+        BSONObj lockDetails = BSON( "state" << 1 << "who" << getDistLockId() << "process" << getDistLockProcess() <<
+                                    "when" << DATENOW << "why" << why << "ts" << ts );
+        BSONObj whatIWant = BSON( "$set" << lockDetails );
         try {
+            log(4) << "dist_lock about to aquire lock: " << lockDetails << endl;
+
             conn->update( _ns , queryBuilder.obj() , whatIWant );
-                
+
             BSONObj o = conn->getLastErrorDetailed();
             now = conn->findOne( _ns , _id );
-                
-            if ( o["n"].numberInt() == 0 ){
-                if ( other )
-                    *other = now;
+
+            if ( o["n"].numberInt() == 0 ) {
+                *other = now;
+                other->getOwned();
+                log() << "dist_lock error trying to aquire lock: " << lockDetails << " error: " << o << endl;
                 gotLock = false;
             }
             else {
@@ -177,40 +276,40 @@ namespace mongo {
             }
 
         }
-        catch ( UpdateNotTheSame& up ){
+        catch ( UpdateNotTheSame& up ) {
             // this means our update got through on some, but not others
+            log(4) << "dist_lock lock did not propagate properly" << endl;
 
-            for ( unsigned i=0; i<up.size(); i++ ){
+            for ( unsigned i=0; i<up.size(); i++ ) {
                 ScopedDbConnection temp( up[i].first );
                 BSONObj temp2 = temp->findOne( _ns , _id );
 
-                if ( now.isEmpty() || now["ts"] < temp2["ts"] ){
+                if ( now.isEmpty() || now["ts"] < temp2["ts"] ) {
                     now = temp2.getOwned();
                 }
 
                 temp.done();
             }
 
-            if ( now["ts"].OID() == ts ){
+            if ( now["ts"].OID() == ts ) {
+                log(4) << "dist_lock completed lock propagation" << endl;
                 gotLock = true;
                 conn->update( _ns , _id , whatIWant );
             }
             else {
+                log() << "dist_lock error trying to complete propagation" << endl;
                 gotLock = false;
             }
         }
-            
+
         conn.done();
-            
-        log(1) << "dist_lock lock gotLock: " << gotLock << " now: " << now << endl;
 
-        if ( ! gotLock )
-            return false;
-            
-        return true;
+        log(2) << "dist_lock lock gotLock: " << gotLock << " now: " << now << endl;
+
+        return gotLock;
     }
 
-    void DistributedLock::unlock(){
+    void DistributedLock::unlock() {
         const int maxAttempts = 3;
         int attempted = 0;
         while ( ++attempted <= maxAttempts ) {
@@ -218,22 +317,23 @@ namespace mongo {
             try {
                 ScopedDbConnection conn( _conn );
                 conn->update( _ns , _id, BSON( "$set" << BSON( "state" << 0 ) ) );
-                log(1) << "dist_lock unlock: " << conn->findOne( _ns , _id ) << endl;
+                log(2) << "dist_lock unlock: " << conn->findOne( _ns , _id ) << endl;
                 conn.done();
 
                 return;
 
-            
-            } catch ( std::exception& e) {
-                log( LL_WARNING ) << "dist_lock  " << _name << " failed to contact config server in unlock attempt " 
+
+            }
+            catch ( std::exception& e) {
+                log( LL_WARNING ) << "dist_lock  " << _name << " failed to contact config server in unlock attempt "
                                   << attempted << ": " << e.what() <<  endl;
 
                 sleepsecs(1 << attempted);
             }
         }
 
-        log( LL_WARNING ) << "dist_lock couldn't consumate unlock request. " << "Lock " << _name 
-                              << " will be taken over after " <<  _takeoverMinutes << " minutes timeout" << endl;
+        log( LL_WARNING ) << "dist_lock couldn't consumate unlock request. " << "Lock " << _name
+                          << " will be taken over after " <<  _takeoverMinutes << " minutes timeout" << endl;
     }
 
 }
diff --git a/client/distlock.h b/client/distlock.h
index 8a77338..753a241 100644
--- a/client/distlock.h
+++ b/client/distlock.h
@@ -15,10 +15,7 @@
  *    limitations under the License.
  */
 
-
-/**
- * distributed locking mechanism
- */
+#pragma once
 
 #include "../pch.h"
 #include "dbclient.h"
@@ -28,53 +25,71 @@
 
 namespace mongo {
 
+    /**
+     * The distributed lock is a configdb backed way of synchronizing system-wide tasks. A task must be identified by a
+     * unique name across the system (e.g., "balancer"). A lock is taken by writing a document in the configdb's locks
+     * collection with that name.
+     *
+     * To be maintained, each taken lock needs to be revalidaded ("pinged") within a pre-established amount of time. This
+     * class does this maintenance automatically once a DistributedLock object was constructed.
+     */
     class DistributedLock {
     public:
 
         /**
-         * @param takeoverMinutes how long before we steal lock in minutes
+         * The constructor does not connect to the configdb yet and constructing does not mean the lock was acquired.
+         * Construction does trigger a lock "pinging" mechanism, though.
+         *
+         * @param conn address of config(s) server(s)
+         * @param name identifier for the lock
+         * @param takeoverMinutes how long can the log go "unpinged" before a new attempt to lock steals it (in minutes)
          */
-        DistributedLock( const ConnectionString& conn , const string& name , unsigned takeoverMinutes = 10 );
+        DistributedLock( const ConnectionString& conn , const string& name , unsigned takeoverMinutes = 15 );
 
+        /**
+         * Attempts to aquire 'this' lock, checking if it could or should be stolen from the previous holder. Please
+         * consider using the dist_lock_try construct to acquire this lock in an exception safe way.
+         *
+         * @param why human readable description of why the lock is being taken (used to log)
+         * @param other configdb's lock document that is currently holding the lock, if lock is taken
+         * @return true if it managed to grab the lock
+         */
         bool lock_try( string why , BSONObj * other = 0 );
+
+        /**
+         * Releases a previously taken lock.
+         */
         void unlock();
 
     private:
         ConnectionString _conn;
         string _name;
         unsigned _takeoverMinutes;
-        
+
         string _ns;
         BSONObj _id;
     };
-    
+
     class dist_lock_try {
     public:
-
         dist_lock_try( DistributedLock * lock , string why )
-            : _lock(lock){
+            : _lock(lock) {
             _got = _lock->lock_try( why , &_other );
         }
 
-        ~dist_lock_try(){
-            if ( _got ){
+        ~dist_lock_try() {
+            if ( _got ) {
                 _lock->unlock();
             }
         }
 
-        bool got() const {
-            return _got;
-        }
+        bool got() const { return _got; }
+        BSONObj other() const { return _other; }
 
-        BSONObj other() const {
-            return _other;
-        }
- 
     private:
         DistributedLock * _lock;
         bool _got;
         BSONObj _other;
-        
     };
 
 }
diff --git a/client/distlock_test.cpp b/client/distlock_test.cpp
index 0879b6e..83d143f 100644
--- a/client/distlock_test.cpp
+++ b/client/distlock_test.cpp
@@ -21,60 +21,84 @@
 #include "../db/commands.h"
 
 namespace mongo {
-       
+
     class TestDistLockWithSync : public Command {
     public:
-        TestDistLockWithSync() : Command( "_testDistLockWithSyncCluster" ){}
+        TestDistLockWithSync() : Command( "_testDistLockWithSyncCluster" ) {}
         virtual void help( stringstream& help ) const {
             help << "should not be calling this directly" << endl;
         }
-        
+
         virtual bool slaveOk() const { return false; }
         virtual bool adminOnly() const { return true; }
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
 
-        static void runThread(){
-            for ( int i=0; i<1000; i++ ){
-                if ( current->lock_try( "test" ) ){
-                    gotit++;
-                    for ( int j=0; j<2000; j++ ){
-                        count++;
+        static void runThread() {
+            while ( keepGoing ) {
+                if ( current->lock_try( "test" ) ) {
+                    count++;
+                    int before = count;
+                    sleepmillis( 3 );
+                    int after = count;
+                    
+                    if ( after != before ) {
+                        error() << " before: " << before << " after: " << after << endl;
                     }
+                    
                     current->unlock();
                 }
             }
         }
-
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+            Timer t;
             DistributedLock lk( ConnectionString( cmdObj["host"].String() , ConnectionString::SYNC ), "testdistlockwithsync" );
             current = &lk;
             count = 0;
             gotit = 0;
+            errors = 0;
+            keepGoing = true;
             
             vector<shared_ptr<boost::thread> > l;
-            for ( int i=0; i<4; i++ ){
+            for ( int i=0; i<4; i++ ) {
                 l.push_back( shared_ptr<boost::thread>( new boost::thread( runThread ) ) );
             }
             
+            int secs = 10;
+            if ( cmdObj["secs"].isNumber() )
+                secs = cmdObj["secs"].numberInt();
+            sleepsecs( secs );
+            keepGoing = false;
+
             for ( unsigned i=0; i<l.size(); i++ )
                 l[i]->join();
 
+            current = 0;
+
             result.append( "count" , count );
             result.append( "gotit" , gotit );
-            current = 0;
-            return count == gotit * 2000;
+            result.append( "errors" , errors );
+            result.append( "timeMS" , t.millis() );
+
+            return errors == 0;
         }
         
+        // variables for test
         static DistributedLock * current;
-        static int count;
         static int gotit;
+        static int errors;
+        static AtomicUInt count;
+        
+        static bool keepGoing;
 
     } testDistLockWithSyncCmd;
 
 
     DistributedLock * TestDistLockWithSync::current;
-    int TestDistLockWithSync::count;
+    AtomicUInt TestDistLockWithSync::count;
     int TestDistLockWithSync::gotit;
+    int TestDistLockWithSync::errors;
+    bool TestDistLockWithSync::keepGoing;
 
 
 }
diff --git a/client/examples/authTest.cpp b/client/examples/authTest.cpp
index 77ce12d..71cdd39 100644
--- a/client/examples/authTest.cpp
+++ b/client/examples/authTest.cpp
@@ -22,7 +22,7 @@
 using namespace mongo;
 
 int main( int argc, const char **argv ) {
-    
+
     const char *port = "27017";
     if ( argc != 1 ) {
         if ( argc != 3 )
@@ -37,17 +37,18 @@ int main( int argc, const char **argv ) {
         throw -11;
     }
 
-    { // clean up old data from any previous tests
+    {
+        // clean up old data from any previous tests
         conn.remove( "test.system.users" , BSONObj() );
     }
 
     conn.insert( "test.system.users" , BSON( "user" << "eliot" << "pwd" << conn.createPasswordDigest( "eliot" , "bar" ) ) );
-    
+
     errmsg.clear();
     bool ok = conn.auth( "test" , "eliot" , "bar" , errmsg );
     if ( ! ok )
         cout << errmsg << endl;
-    assert( ok );
+    MONGO_assert( ok );
 
-    assert( ! conn.auth( "test" , "eliot" , "bars" , errmsg ) );
+    MONGO_assert( ! conn.auth( "test" , "eliot" , "bars" , errmsg ) );
 }
diff --git a/client/examples/clientTest.cpp b/client/examples/clientTest.cpp
index 83a556a..bd4432e 100644
--- a/client/examples/clientTest.cpp
+++ b/client/examples/clientTest.cpp
@@ -19,9 +19,14 @@
  * a simple test for the c++ driver
  */
 
+// this header should be first to ensure that it includes cleanly in any context
+#include "client/dbclient.h"
+
 #include <iostream>
 
-#include "client/dbclient.h"
+#ifndef assert
+#  define assert(x) MONGO_assert(x)
+#endif
 
 using namespace std;
 using namespace mongo;
@@ -125,12 +130,14 @@ int main( int argc, const char **argv ) {
 
     }
 
-    { // ensure index
+    {
+        // ensure index
         assert( conn.ensureIndex( ns , BSON( "name" << 1 ) ) );
         assert( ! conn.ensureIndex( ns , BSON( "name" << 1 ) ) );
     }
 
-    { // hint related tests
+    {
+        // hint related tests
         assert( conn.findOne(ns, "{}")["name"].str() == "sara" );
 
         assert( conn.findOne(ns, "{ name : 'eliot' }")["name"].str() == "eliot" );
@@ -141,7 +148,7 @@ int main( int argc, const char **argv ) {
         try {
             conn.findOne(ns, Query("{name:\"eliot\"}").hint("{foo:1}"));
         }
-        catch ( ... ){
+        catch ( ... ) {
             asserted = true;
         }
         assert( asserted );
@@ -153,7 +160,8 @@ int main( int argc, const char **argv ) {
         assert( conn.validate( ns ) );
     }
 
-    { // timestamp test
+    {
+        // timestamp test
 
         const char * tsns = "test.tstest1";
         conn.dropCollection( tsns );
@@ -185,32 +193,33 @@ int main( int argc, const char **argv ) {
                 ( oldTime == found["ts"].timestampTime() && oldInc < found["ts"].timestampInc() ) );
 
     }
-    
-    { // check that killcursors doesn't affect last error
+
+    {
+        // check that killcursors doesn't affect last error
         assert( conn.getLastError().empty() );
-        
+
         BufBuilder b;
         b.appendNum( (int)0 ); // reserved
         b.appendNum( (int)-1 ); // invalid # of cursors triggers exception
         b.appendNum( (int)-1 ); // bogus cursor id
-        
+
         Message m;
         m.setData( dbKillCursors, b.buf(), b.len() );
-        
+
         // say() is protected in DBClientConnection, so get superclass
         static_cast< DBConnector* >( &conn )->say( m );
-        
+
         assert( conn.getLastError().empty() );
     }
 
     {
         list<string> l = conn.getDatabaseNames();
-        for ( list<string>::iterator i = l.begin(); i != l.end(); i++ ){
+        for ( list<string>::iterator i = l.begin(); i != l.end(); i++ ) {
             cout << "db name : " << *i << endl;
         }
 
         l = conn.getCollectionNames( "test" );
-        for ( list<string>::iterator i = l.begin(); i != l.end(); i++ ){
+        for ( list<string>::iterator i = l.begin(); i != l.end(); i++ ) {
             cout << "coll name : " << *i << endl;
         }
     }
diff --git a/client/examples/first.cpp b/client/examples/first.cpp
index f3b654f..ab5efb3 100644
--- a/client/examples/first.cpp
+++ b/client/examples/first.cpp
@@ -40,7 +40,7 @@ int main( int argc, const char **argv ) {
             throw -12;
         port = argv[ 2 ];
     }
-    
+
     mongo::DBClientConnection conn;
     string errmsg;
     if ( ! conn.connect( string( "127.0.0.1:" ) + port , errmsg ) ) {
@@ -48,14 +48,15 @@ int main( int argc, const char **argv ) {
         throw -11;
     }
 
-    { // clean up old data from any previous tests
+    {
+        // clean up old data from any previous tests
         mongo::BSONObjBuilder query;
         conn.remove( "test.people" , query.obj() );
     }
 
     insert( conn , "eliot" , 15 );
     insert( conn , "sara" , 23 );
-    
+
     {
         mongo::BSONObjBuilder query;
         auto_ptr<mongo::DBClientCursor> cursor = conn.query( "test.people" , query.obj() );
@@ -66,14 +67,14 @@ int main( int argc, const char **argv ) {
         }
 
     }
-    
+
     {
         mongo::BSONObjBuilder query;
         query.append( "name" , "eliot" );
         mongo::BSONObj res = conn.findOne( "test.people" , query.obj() );
         cout << res.isEmpty() << "\t" << res.jsonString() << endl;
     }
-    
+
     {
         mongo::BSONObjBuilder query;
         query.append( "name" , "asd" );
diff --git a/client/examples/httpClientTest.cpp b/client/examples/httpClientTest.cpp
index 5d6c429..4fa5fd8 100644
--- a/client/examples/httpClientTest.cpp
+++ b/client/examples/httpClientTest.cpp
@@ -23,7 +23,7 @@
 using namespace mongo;
 
 int main( int argc, const char **argv ) {
-    
+
     int port = 27017;
     if ( argc != 1 ) {
         if ( argc != 3 )
@@ -31,13 +31,13 @@ int main( int argc, const char **argv ) {
         port = atoi( argv[ 2 ] );
     }
     port += 1000;
-    
+
     stringstream ss;
     ss << "http://localhost:" << port << "/";
     string url = ss.str();
-    
+
     cout << "[" << url << "]" << endl;
 
     HttpClient c;
-    assert( c.get( url ) == 200 );
+    MONGO_assert( c.get( url ) == 200 );
 }
diff --git a/client/examples/rs.cpp b/client/examples/rs.cpp
new file mode 100644
index 0000000..7813ec6
--- /dev/null
+++ b/client/examples/rs.cpp
@@ -0,0 +1,58 @@
+// rs.cpp
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+/**
+ * example of using replica sets from c++
+ */
+
+#include "client/dbclient.h"
+#include <iostream>
+
+using namespace mongo;
+using namespace std;
+
+int main( int argc , const char ** argv ) {
+    string errmsg;
+    ConnectionString cs = ConnectionString::parse( "foo/127.0.0.1" , errmsg );
+    if ( ! cs.isValid() ) {
+        cout << "error parsing url: " << errmsg << endl;
+        return 1;
+    }
+
+    DBClientReplicaSet * conn = (DBClientReplicaSet*)cs.connect( errmsg );
+    if ( ! conn ) {
+        cout << "error connecting: " << errmsg << endl;
+        return 2;
+    }
+
+    string collName = "test.rs1";
+
+    conn->dropCollection( collName );
+    while ( true ) {
+        try {
+            conn->update( collName , BSONObj() , BSON( "$inc" << BSON( "x" << 1 ) ) , true );
+            cout << conn->findOne( collName , BSONObj() ) << endl;
+            cout << "\t A" << conn->slaveConn().findOne( collName , BSONObj() , 0 , QueryOption_SlaveOk ) << endl;
+            cout << "\t B " << conn->findOne( collName , BSONObj() , 0 , QueryOption_SlaveOk ) << endl;
+        }
+        catch ( std::exception& e ) {
+            cout << "ERROR: " << e.what() << endl;
+        }
+        sleepsecs( 1 );
+    }
+
+}
diff --git a/client/examples/second.cpp b/client/examples/second.cpp
index 68eafaa..6cc2111 100644
--- a/client/examples/second.cpp
+++ b/client/examples/second.cpp
@@ -23,7 +23,7 @@ using namespace std;
 using namespace mongo;
 
 int main( int argc, const char **argv ) {
-    
+
     const char *port = "27017";
     if ( argc != 1 ) {
         if ( argc != 3 )
diff --git a/client/examples/tail.cpp b/client/examples/tail.cpp
index 3738b4f..90e62d2 100644
--- a/client/examples/tail.cpp
+++ b/client/examples/tail.cpp
@@ -23,24 +23,24 @@
 using namespace mongo;
 
 void tail(DBClientBase& conn, const char *ns) {
-  BSONElement lastId = minKey.firstElement();
-  Query query = Query();
-
-  auto_ptr<DBClientCursor> c =
-      conn.query(ns, query, 0, 0, 0, QueryOption_CursorTailable);
-
-  while( 1 ) {
-      if( !c->more() ) {
-          if( c->isDead() ) {
-              break;    // we need to requery
-          }
-
-          // all data (so far) exhausted, wait for more
-          sleepsecs(1); 
-          continue;     
-      }
-      BSONObj o = c->next();
-      lastId = o["_id"];
-      cout << o.toString() << endl;
-  }
+    BSONElement lastId = minKey.firstElement();
+    Query query = Query();
+
+    auto_ptr<DBClientCursor> c =
+        conn.query(ns, query, 0, 0, 0, QueryOption_CursorTailable);
+
+    while( 1 ) {
+        if( !c->more() ) {
+            if( c->isDead() ) {
+                break;    // we need to requery
+            }
+
+            // all data (so far) exhausted, wait for more
+            sleepsecs(1);
+            continue;
+        }
+        BSONObj o = c->next();
+        lastId = o["_id"];
+        cout << o.toString() << endl;
+    }
 }
diff --git a/client/examples/tutorial.cpp b/client/examples/tutorial.cpp
index 28e1b27..3cdf359 100644
--- a/client/examples/tutorial.cpp
+++ b/client/examples/tutorial.cpp
@@ -23,45 +23,45 @@
 using namespace mongo;
 
 void printIfAge(DBClientConnection& c, int age) {
-  auto_ptr<DBClientCursor> cursor = c.query("tutorial.persons", QUERY( "age" << age ).sort("name") );
-  while( cursor->more() ) {
-    BSONObj p = cursor->next();
-    cout << p.getStringField("name") << endl;
-  }
+    auto_ptr<DBClientCursor> cursor = c.query("tutorial.persons", QUERY( "age" << age ).sort("name") );
+    while( cursor->more() ) {
+        BSONObj p = cursor->next();
+        cout << p.getStringField("name") << endl;
+    }
 }
 
 void run() {
-  DBClientConnection c;
-  c.connect("localhost"); //"192.168.58.1");
-  cout << "connected ok" << endl;
-  BSONObj p = BSON( "name" << "Joe" << "age" << 33 );
-  c.insert("tutorial.persons", p);
-  p = BSON( "name" << "Jane" << "age" << 40 );
-  c.insert("tutorial.persons", p);
-  p = BSON( "name" << "Abe" << "age" << 33 );
-  c.insert("tutorial.persons", p);
-  p = BSON( "name" << "Samantha" << "age" << 21 << "city" << "Los Angeles" << "state" << "CA" );
-  c.insert("tutorial.persons", p);
+    DBClientConnection c;
+    c.connect("localhost"); //"192.168.58.1");
+    cout << "connected ok" << endl;
+    BSONObj p = BSON( "name" << "Joe" << "age" << 33 );
+    c.insert("tutorial.persons", p);
+    p = BSON( "name" << "Jane" << "age" << 40 );
+    c.insert("tutorial.persons", p);
+    p = BSON( "name" << "Abe" << "age" << 33 );
+    c.insert("tutorial.persons", p);
+    p = BSON( "name" << "Samantha" << "age" << 21 << "city" << "Los Angeles" << "state" << "CA" );
+    c.insert("tutorial.persons", p);
 
-  c.ensureIndex("tutorial.persons", fromjson("{age:1}"));
+    c.ensureIndex("tutorial.persons", fromjson("{age:1}"));
 
-  cout << "count:" << c.count("tutorial.persons") << endl;
+    cout << "count:" << c.count("tutorial.persons") << endl;
 
-  auto_ptr<DBClientCursor> cursor = c.query("tutorial.persons", BSONObj());
-  while( cursor->more() ) { 
-      cout << cursor->next().toString() << endl;
-  }
+    auto_ptr<DBClientCursor> cursor = c.query("tutorial.persons", BSONObj());
+    while( cursor->more() ) {
+        cout << cursor->next().toString() << endl;
+    }
 
-  cout << "\nprintifage:\n";
-  printIfAge(c, 33);
+    cout << "\nprintifage:\n";
+    printIfAge(c, 33);
 }
 
-int main() { 
-  try { 
-    run();
-  } 
-  catch( DBException &e ) { 
-    cout << "caught " << e.what() << endl;
-  }
-  return 0;
+int main() {
+    try {
+        run();
+    }
+    catch( DBException &e ) {
+        cout << "caught " << e.what() << endl;
+    }
+    return 0;
 }
diff --git a/client/examples/whereExample.cpp b/client/examples/whereExample.cpp
index a26d921..ce4174b 100644
--- a/client/examples/whereExample.cpp
+++ b/client/examples/whereExample.cpp
@@ -23,7 +23,7 @@ using namespace std;
 using namespace mongo;
 
 int main( int argc, const char **argv ) {
-    
+
     const char *port = "27017";
     if ( argc != 1 ) {
         if ( argc != 3 )
@@ -36,7 +36,7 @@ int main( int argc, const char **argv ) {
     if ( ! conn.connect( string( "127.0.0.1:" ) + port , errmsg ) ) {
         cout << "couldn't connect : " << errmsg << endl;
         throw -11;
-    } 
+    }
 
     const char * ns = "test.where";
 
@@ -44,9 +44,9 @@ int main( int argc, const char **argv ) {
 
     conn.insert( ns , BSON( "name" << "eliot" << "num" << 17 ) );
     conn.insert( ns , BSON( "name" << "sara" << "num" << 24 ) );
-    
+
     auto_ptr<DBClientCursor> cursor = conn.query( ns , BSONObj() );
-    
+
     while ( cursor->more() ) {
         BSONObj obj = cursor->next();
         cout << "\t" << obj.jsonString() << endl;
@@ -64,5 +64,5 @@ int main( int argc, const char **argv ) {
         cout << "\t" << obj.jsonString() << endl;
         num++;
     }
-    assert( num == 1 );
+    MONGO_assert( num == 1 );
 }
diff --git a/client/gridfs.cpp b/client/gridfs.cpp
index d740c76..233724a 100644
--- a/client/gridfs.cpp
+++ b/client/gridfs.cpp
@@ -34,11 +34,11 @@ namespace mongo {
 
     const unsigned DEFAULT_CHUNK_SIZE = 256 * 1024;
 
-    GridFSChunk::GridFSChunk( BSONObj o ){
+    GridFSChunk::GridFSChunk( BSONObj o ) {
         _data = o;
     }
 
-    GridFSChunk::GridFSChunk( BSONObj fileObject , int chunkNumber , const char * data , int len ){
+    GridFSChunk::GridFSChunk( BSONObj fileObject , int chunkNumber , const char * data , int len ) {
         BSONObjBuilder b;
         b.appendAs( fileObject["_id"] , "files_id" );
         b.append( "n" , chunkNumber );
@@ -47,7 +47,7 @@ namespace mongo {
     }
 
 
-    GridFS::GridFS( DBClientBase& client , const string& dbName , const string& prefix ) : _client( client ) , _dbName( dbName ) , _prefix( prefix ){
+    GridFS::GridFS( DBClientBase& client , const string& dbName , const string& prefix ) : _client( client ) , _dbName( dbName ) , _prefix( prefix ) {
         _filesNS = dbName + "." + prefix + ".files";
         _chunksNS = dbName + "." + prefix + ".chunks";
         _chunkSize = DEFAULT_CHUNK_SIZE;
@@ -56,7 +56,7 @@ namespace mongo {
         client.ensureIndex( _chunksNS , BSON( "files_id" << 1 << "n" << 1 ) );
     }
 
-    GridFS::~GridFS(){
+    GridFS::~GridFS() {
 
     }
 
@@ -65,7 +65,7 @@ namespace mongo {
         _chunkSize = size;
     }
 
-    BSONObj GridFS::storeFile( const char* data , size_t length , const string& remoteName , const string& contentType){
+    BSONObj GridFS::storeFile( const char* data , size_t length , const string& remoteName , const string& contentType) {
         char const * const end = data + length;
 
         OID id;
@@ -73,7 +73,7 @@ namespace mongo {
         BSONObj idObj = BSON("_id" << id);
 
         int chunkNumber = 0;
-        while (data < end){
+        while (data < end) {
             int chunkLen = MIN(_chunkSize, (unsigned)(end-data));
             GridFSChunk c(idObj, chunkNumber, data, chunkLen);
             _client.insert( _chunksNS.c_str() , c._data );
@@ -86,7 +86,7 @@ namespace mongo {
     }
 
 
-    BSONObj GridFS::storeFile( const string& fileName , const string& remoteName , const string& contentType){
+    BSONObj GridFS::storeFile( const string& fileName , const string& remoteName , const string& contentType) {
         uassert( 10012 ,  "file doesn't exist" , fileName == "-" || boost::filesystem::exists( fileName ) );
 
         FILE* fd;
@@ -102,12 +102,12 @@ namespace mongo {
 
         int chunkNumber = 0;
         gridfs_offset length = 0;
-        while (!feof(fd)){
+        while (!feof(fd)) {
             //boost::scoped_array<char>buf (new char[_chunkSize+1]);
             char * buf = new char[_chunkSize+1];
             char* bufPos = buf;//.get();
             unsigned int chunkLen = 0; // how much in the chunk now
-            while(chunkLen != _chunkSize && !feof(fd)){
+            while(chunkLen != _chunkSize && !feof(fd)) {
                 int readLen = fread(bufPos, 1, _chunkSize - chunkLen, fd);
                 chunkLen += readLen;
                 bufPos += readLen;
@@ -125,11 +125,11 @@ namespace mongo {
 
         if (fd != stdin)
             fclose( fd );
-        
+
         return insertFile((remoteName.empty() ? fileName : remoteName), id, length, contentType);
     }
 
-    BSONObj GridFS::insertFile(const string& name, const OID& id, gridfs_offset length, const string& contentType){
+    BSONObj GridFS::insertFile(const string& name, const OID& id, gridfs_offset length, const string& contentType) {
 
         BSONObj res;
         if ( ! _client.runCommand( _dbName.c_str() , BSON( "filemd5" << id << "root" << _prefix ) , res ) )
@@ -143,9 +143,10 @@ namespace mongo {
              << "md5" << res["md5"]
              ;
 
-        if (length < 1024*1024*1024){ // 2^30
+        if (length < 1024*1024*1024) { // 2^30
             file << "length" << (int) length;
-        }else{
+        }
+        else {
             file << "length" << (long long) length;
         }
 
@@ -158,9 +159,9 @@ namespace mongo {
         return ret;
     }
 
-    void GridFS::removeFile( const string& fileName ){
+    void GridFS::removeFile( const string& fileName ) {
         auto_ptr<DBClientCursor> files = _client.query( _filesNS , BSON( "filename" << fileName ) );
-        while (files->more()){
+        while (files->more()) {
             BSONObj file = files->next();
             BSONElement id = file["_id"];
             _client.remove( _filesNS.c_str() , BSON( "_id" << id ) );
@@ -168,38 +169,38 @@ namespace mongo {
         }
     }
 
-    GridFile::GridFile( GridFS * grid , BSONObj obj ){
+    GridFile::GridFile( GridFS * grid , BSONObj obj ) {
         _grid = grid;
         _obj = obj;
     }
 
-    GridFile GridFS::findFile( const string& fileName ){
+    GridFile GridFS::findFile( const string& fileName ) {
         return findFile( BSON( "filename" << fileName ) );
     };
 
-    GridFile GridFS::findFile( BSONObj query ){
+    GridFile GridFS::findFile( BSONObj query ) {
         query = BSON("query" << query << "orderby" << BSON("uploadDate" << -1));
         return GridFile( this , _client.findOne( _filesNS.c_str() , query ) );
     }
 
-    auto_ptr<DBClientCursor> GridFS::list(){
+    auto_ptr<DBClientCursor> GridFS::list() {
         return _client.query( _filesNS.c_str() , BSONObj() );
     }
 
-    auto_ptr<DBClientCursor> GridFS::list( BSONObj o ){
+    auto_ptr<DBClientCursor> GridFS::list( BSONObj o ) {
         return _client.query( _filesNS.c_str() , o );
     }
 
-    BSONObj GridFile::getMetadata(){
+    BSONObj GridFile::getMetadata() {
         BSONElement meta_element = _obj["metadata"];
-        if( meta_element.eoo() ){
+        if( meta_element.eoo() ) {
             return BSONObj();
         }
 
         return meta_element.embeddedObject();
     }
 
-    GridFSChunk GridFile::getChunk( int n ){
+    GridFSChunk GridFile::getChunk( int n ) {
         _exists();
         BSONObjBuilder b;
         b.appendAs( _obj["_id"] , "files_id" );
@@ -210,12 +211,12 @@ namespace mongo {
         return GridFSChunk(o);
     }
 
-    gridfs_offset GridFile::write( ostream & out ){
+    gridfs_offset GridFile::write( ostream & out ) {
         _exists();
 
         const int num = getNumChunks();
 
-        for ( int i=0; i<num; i++ ){
+        for ( int i=0; i<num; i++ ) {
             GridFSChunk c = getChunk( i );
 
             int len;
@@ -226,17 +227,18 @@ namespace mongo {
         return getContentLength();
     }
 
-    gridfs_offset GridFile::write( const string& where ){
-        if (where == "-"){
+    gridfs_offset GridFile::write( const string& where ) {
+        if (where == "-") {
             return write( cout );
-        } else {
+        }
+        else {
             ofstream out(where.c_str() , ios::out | ios::binary );
             uassert(13325, "couldn't open file: " + where, out.is_open() );
             return write( out );
         }
     }
 
-    void GridFile::_exists(){
+    void GridFile::_exists() {
         uassert( 10015 ,  "doesn't exists" , exists() );
     }
 
diff --git a/client/gridfs.h b/client/gridfs.h
index 1c55f79..b52cf75 100644
--- a/client/gridfs.h
+++ b/client/gridfs.h
@@ -32,13 +32,13 @@ namespace mongo {
         GridFSChunk( BSONObj data );
         GridFSChunk( BSONObj fileId , int chunkNumber , const char * data , int len );
 
-        int len(){
+        int len() {
             int len;
             _data["data"].binDataClean( len );
             return len;
         }
 
-        const char * data( int & len ){
+        const char * data( int & len ) {
             return _data["data"].binDataClean( len );
         }
 
@@ -49,9 +49,10 @@ namespace mongo {
 
 
     /**
-       this is the main entry point into the mongo grid fs
+      GridFS is for storing large file-style objects in MongoDB.
+      @see http://www.mongodb.org/display/DOCS/GridFS+Specification
      */
-    class GridFS{
+    class GridFS {
     public:
         /**
          * @param client - db connection
@@ -88,6 +89,7 @@ namespace mongo {
          * @return the file object
          */
         BSONObj storeFile( const char* data , size_t length , const string& remoteName , const string& contentType="");
+
         /**
          * removes file referenced by fileName from the db
          * @param fileName filename (in GridFS) of the file to remove
@@ -138,41 +140,41 @@ namespace mongo {
          * @return whether or not this file exists
          * findFile will always return a GriFile, so need to check this
          */
-        bool exists(){
+        bool exists() {
             return ! _obj.isEmpty();
         }
 
-        string getFilename(){
+        string getFilename() {
             return _obj["filename"].str();
         }
 
-        int getChunkSize(){
+        int getChunkSize() {
             return (int)(_obj["chunkSize"].number());
         }
 
-        gridfs_offset getContentLength(){
+        gridfs_offset getContentLength() {
             return (gridfs_offset)(_obj["length"].number());
         }
 
-        string getContentType(){
+        string getContentType() {
             return _obj["contentType"].valuestr();
         }
 
-        Date_t getUploadDate(){
+        Date_t getUploadDate() {
             return _obj["uploadDate"].date();
         }
 
-        string getMD5(){
+        string getMD5() {
             return _obj["md5"].str();
         }
 
-        BSONElement getFileField( const string& name ){
+        BSONElement getFileField( const string& name ) {
             return _obj[name];
         }
 
         BSONObj getMetadata();
 
-        int getNumChunks(){
+        int getNumChunks() {
             return (int) ceil( (double)getContentLength() / (double)getChunkSize() );
         }
 
diff --git a/client/model.cpp b/client/model.cpp
index 7861b91..bd10a3c 100644
--- a/client/model.cpp
+++ b/client/model.cpp
@@ -21,23 +21,23 @@
 
 namespace mongo {
 
-    bool Model::load(BSONObj& query){
+    bool Model::load(BSONObj& query) {
         ScopedDbConnection conn( modelServer() );
 
         BSONObj b = conn->findOne(getNS(), query);
         conn.done();
-        
+
         if ( b.isEmpty() )
             return false;
-        
+
         unserialize(b);
         _id = b["_id"].wrap().getOwned();
         return true;
     }
 
-    void Model::remove( bool safe ){
+    void Model::remove( bool safe ) {
         uassert( 10016 ,  "_id isn't set - needed for remove()" , _id["_id"].type() );
-        
+
         ScopedDbConnection conn( modelServer() );
         conn->remove( getNS() , _id );
 
@@ -46,34 +46,34 @@ namespace mongo {
             errmsg = conn->getLastError();
 
         conn.done();
-        
+
         if ( safe && errmsg.size() )
             throw UserException( 9002 , (string)"error on Model::remove: " + errmsg );
     }
 
-    void Model::save( bool safe ){
+    void Model::save( bool safe ) {
         ScopedDbConnection conn( modelServer() );
 
         BSONObjBuilder b;
         serialize( b );
-        
+
         BSONElement myId;
         {
             BSONObjIterator i = b.iterator();
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
-                if ( strcmp( e.fieldName() , "_id" ) == 0 ){
+                if ( strcmp( e.fieldName() , "_id" ) == 0 ) {
                     myId = e;
                     break;
                 }
             }
         }
 
-        if ( myId.type() ){
-            if ( _id.isEmpty() ){
+        if ( myId.type() ) {
+            if ( _id.isEmpty() ) {
                 _id = myId.wrap();
             }
-            else if ( myId.woCompare( _id.firstElement() ) ){
+            else if ( myId.woCompare( _id.firstElement() ) ) {
                 stringstream ss;
                 ss << "_id from serialize and stored differ: ";
                 ss << '[' << myId << "] != ";
@@ -82,11 +82,11 @@ namespace mongo {
             }
         }
 
-        if ( _id.isEmpty() ){
+        if ( _id.isEmpty() ) {
             OID oid;
             oid.init();
             b.appendOID( "_id" , &oid );
-            
+
             BSONObj o = b.obj();
             conn->insert( getNS() , o );
             _id = o["_id"].wrap().getOwned();
@@ -94,25 +94,25 @@ namespace mongo {
             log(4) << "inserted new model " << getNS() << "  " << o << endl;
         }
         else {
-            if ( myId.eoo() ){
+            if ( myId.eoo() ) {
                 myId = _id["_id"];
                 b.append( myId );
             }
-            
+
             assert( ! myId.eoo() );
 
             BSONObjBuilder qb;
             qb.append( myId );
-            
+
             BSONObj q = qb.obj();
             BSONObj o = b.obj();
 
             log(4) << "updated model" << getNS() << "  " << q << " " << o << endl;
 
             conn->update( getNS() , q , o , true );
-            
+
         }
-        
+
         string errmsg = "";
         if ( safe )
             errmsg = conn->getLastError();
@@ -123,13 +123,13 @@ namespace mongo {
             throw UserException( 9003 , (string)"error on Model::save: " + errmsg );
     }
 
-    BSONObj Model::toObject(){
+    BSONObj Model::toObject() {
         BSONObjBuilder b;
         serialize( b );
         return b.obj();
     }
 
-    void Model::append( const char * name , BSONObjBuilder& b ){
+    void Model::append( const char * name , BSONObjBuilder& b ) {
         BSONObjBuilder bb( b.subobjStart( name ) );
         serialize( bb );
         bb.done();
diff --git a/client/model.h b/client/model.h
index 108efc0..7dd3143 100644
--- a/client/model.h
+++ b/client/model.h
@@ -43,16 +43,16 @@ namespace mongo {
         virtual void unserialize(const BSONObj& from) = 0;
         virtual BSONObj toObject();
         virtual void append( const char * name , BSONObjBuilder& b );
-        
+
         virtual string modelServer() = 0;
-        
-        /** Load a single object. 
+
+        /** Load a single object.
             @return true if successful.
         */
         virtual bool load(BSONObj& query);
         virtual void save( bool safe=false );
         virtual void remove( bool safe=false );
-        
+
     protected:
         BSONObj _id;
     };
diff --git a/client/mongo_client_lib.cpp b/client/mongo_client_lib.cpp
new file mode 100644
index 0000000..69f801a
--- /dev/null
+++ b/client/mongo_client_lib.cpp
@@ -0,0 +1,66 @@
+/* @file client_lib.cpp
+
+   MongoDB C++ Driver
+
+   Normally one includes dbclient.h, and links against libmongoclient.a, when connecting to MongoDB
+   from C++.  However, if you have a situation where the pre-built library does not work, you can use
+   this file instead to build all the necessary symbols.  To do so, include client_lib.cpp in your
+   project.
+
+   For example, to build and run simple_client_demo.cpp with GCC and run it:
+
+    g++ -I .. simple_client_demo.cpp mongo_client_lib.cpp -lboost_thread-mt -lboost_filesystem
+    ./a.out
+*/
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#include "../util/md5main.cpp"
+
+#define MONGO_EXPOSE_MACROS
+#include "../pch.h"
+
+#include "../util/assert_util.cpp"
+#include "../util/message.cpp"
+#include "../util/util.cpp"
+#include "../util/background.cpp"
+#include "../util/base64.cpp"
+#include "../util/sock.cpp"
+#include "../util/log.cpp"
+#include "../util/password.cpp"
+
+#include "../util/concurrency/thread_pool.cpp"
+#include "../util/concurrency/vars.cpp"
+#include "../util/concurrency/task.cpp"
+
+#include "connpool.cpp"
+#include "syncclusterconnection.cpp"
+#include "dbclient.cpp"
+#include "clientOnly.cpp"
+#include "gridfs.cpp"
+#include "dbclientcursor.cpp"
+
+#include "../db/lasterror.cpp"
+#include "../db/json.cpp"
+#include "../db/jsobj.cpp"
+#include "../db/common.cpp"
+#include "../db/nonce.cpp"
+#include "../db/commands.cpp"
+
+extern "C" {
+#include "../util/md5.c"
+}
+
diff --git a/client/parallel.cpp b/client/parallel.cpp
index 92d1b04..c4905e3 100644
--- a/client/parallel.cpp
+++ b/client/parallel.cpp
@@ -25,10 +25,10 @@
 #include "../s/shard.h"
 
 namespace mongo {
-    
+
     // --------  ClusteredCursor -----------
-    
-    ClusteredCursor::ClusteredCursor( QueryMessage& q ){
+
+    ClusteredCursor::ClusteredCursor( QueryMessage& q ) {
         _ns = q.ns;
         _query = q.query.copy();
         _options = q.queryOptions;
@@ -41,7 +41,7 @@ namespace mongo {
         _didInit = false;
     }
 
-    ClusteredCursor::ClusteredCursor( const string& ns , const BSONObj& q , int options , const BSONObj& fields ){
+    ClusteredCursor::ClusteredCursor( const string& ns , const BSONObj& q , int options , const BSONObj& fields ) {
         _ns = ns;
         _query = q.getOwned();
         _options = options;
@@ -52,94 +52,112 @@ namespace mongo {
         _didInit = false;
     }
 
-    ClusteredCursor::~ClusteredCursor(){
+    ClusteredCursor::~ClusteredCursor() {
         _done = true; // just in case
     }
 
-    void ClusteredCursor::init(){
+    void ClusteredCursor::init() {
         if ( _didInit )
             return;
         _didInit = true;
         _init();
     }
-    
-    auto_ptr<DBClientCursor> ClusteredCursor::query( const string& server , int num , BSONObj extra , int skipLeft ){
+
+    auto_ptr<DBClientCursor> ClusteredCursor::query( const string& server , int num , BSONObj extra , int skipLeft ) {
         uassert( 10017 ,  "cursor already done" , ! _done );
         assert( _didInit );
-        
+
         BSONObj q = _query;
-        if ( ! extra.isEmpty() ){
+        if ( ! extra.isEmpty() ) {
             q = concatQuery( q , extra );
         }
 
-        ShardConnection conn( server , _ns );
-        
-        if ( conn.setVersion() ){
-            conn.done();
-            throw StaleConfigException( _ns , "ClusteredCursor::query ShardConnection had to change" , true );
-        }
-
-        if ( logLevel >= 5 ){
-            log(5) << "ClusteredCursor::query (" << type() << ") server:" << server 
-                   << " ns:" << _ns << " query:" << q << " num:" << num 
-                   << " _fields:" << _fields << " options: " << _options << endl;
-        }
-        
-        auto_ptr<DBClientCursor> cursor = 
-            conn->query( _ns , q , num , 0 , ( _fields.isEmpty() ? 0 : &_fields ) , _options , _batchSize == 0 ? 0 : _batchSize + skipLeft );
-
-        assert( cursor.get() );
-        
-        if ( cursor->hasResultFlag( ResultFlag_ShardConfigStale ) ){
+        try {
+            ShardConnection conn( server , _ns );
+            
+            if ( conn.setVersion() ) {
+                conn.done();
+                throw StaleConfigException( _ns , "ClusteredCursor::query ShardConnection had to change" , true );
+            }
+            
+            if ( logLevel >= 5 ) {
+                log(5) << "ClusteredCursor::query (" << type() << ") server:" << server
+                       << " ns:" << _ns << " query:" << q << " num:" << num
+                       << " _fields:" << _fields << " options: " << _options << endl;
+            }
+            
+            auto_ptr<DBClientCursor> cursor =
+                conn->query( _ns , q , num , 0 , ( _fields.isEmpty() ? 0 : &_fields ) , _options , _batchSize == 0 ? 0 : _batchSize + skipLeft );
+            
+            if ( ! cursor.get() && _options & QueryOption_PartialResults ) {
+                _done = true;
+                conn.done();
+                return cursor;
+            }
+            
+            massert( 13633 , str::stream() << "error querying server: " << server  , cursor.get() );
+            
+            if ( cursor->hasResultFlag( ResultFlag_ShardConfigStale ) ) {
+                conn.done();
+                throw StaleConfigException( _ns , "ClusteredCursor::query" );
+            }
+            
+            if ( cursor->hasResultFlag( ResultFlag_ErrSet ) ) {
+                conn.done();
+                BSONObj o = cursor->next();
+                throw UserException( o["code"].numberInt() , o["$err"].String() );
+            }
+            
+            
+            cursor->attach( &conn );
+            
             conn.done();
-            throw StaleConfigException( _ns , "ClusteredCursor::query" );
+            return cursor;
         }
-        
-        if ( cursor->hasResultFlag( ResultFlag_ErrSet ) ){
-            conn.done();
-            BSONObj o = cursor->next();
-            throw UserException( o["code"].numberInt() , o["$err"].String() );
+        catch ( SocketException& e ) {
+            if ( ! ( _options & QueryOption_PartialResults ) )
+                throw e;
+            _done = true;
+            return auto_ptr<DBClientCursor>();
         }
-
-
-        cursor->attach( &conn );
-
-        conn.done();
-        return cursor;
     }
 
-    BSONObj ClusteredCursor::explain( const string& server , BSONObj extra ){
+    BSONObj ClusteredCursor::explain( const string& server , BSONObj extra ) {
         BSONObj q = _query;
-        if ( ! extra.isEmpty() ){
+        if ( ! extra.isEmpty() ) {
             q = concatQuery( q , extra );
         }
 
+        BSONObj o;
+
         ShardConnection conn( server , _ns );
-        BSONObj o = conn->findOne( _ns , Query( q ).explain() );
+        auto_ptr<DBClientCursor> cursor = conn->query( _ns , Query( q ).explain() , abs( _batchSize ) * -1 , 0 , _fields.isEmpty() ? 0 : &_fields );
+        if ( cursor.get() && cursor->more() )
+            o = cursor->next().getOwned();
         conn.done();
         return o;
     }
 
-    BSONObj ClusteredCursor::concatQuery( const BSONObj& query , const BSONObj& extraFilter ){
+    BSONObj ClusteredCursor::concatQuery( const BSONObj& query , const BSONObj& extraFilter ) {
         if ( ! query.hasField( "query" ) )
             return _concatFilter( query , extraFilter );
 
         BSONObjBuilder b;
         BSONObjIterator i( query );
-        while ( i.more() ){
+        while ( i.more() ) {
             BSONElement e = i.next();
 
-            if ( strcmp( e.fieldName() , "query" ) ){
+            if ( strcmp( e.fieldName() , "query" ) ) {
                 b.append( e );
                 continue;
             }
-            
+
             b.append( "query" , _concatFilter( e.embeddedObjectUserCheck() , extraFilter ) );
         }
         return b.obj();
     }
-    
-    BSONObj ClusteredCursor::_concatFilter( const BSONObj& filter , const BSONObj& extra ){
+
+    BSONObj ClusteredCursor::_concatFilter( const BSONObj& filter , const BSONObj& extra ) {
         BSONObjBuilder b;
         b.appendElements( filter );
         b.appendElements( extra );
@@ -147,32 +165,41 @@ namespace mongo {
         // TODO: should do some simplification here if possibl ideally
     }
 
-    BSONObj ClusteredCursor::explain(){
+    BSONObj ClusteredCursor::explain() {
+        // Note: by default we filter out allPlans and oldPlan in the shell's
+        // explain() function. If you add any recursive structures, make sure to
+        // edit the JS to make sure everything gets filtered.
+
         BSONObjBuilder b;
         b.append( "clusteredType" , type() );
 
-        long long nscanned = 0;
-        long long nscannedObjects = 0;
-        long long n = 0;
         long long millis = 0;
         double numExplains = 0;
-        
+
+        map<string,long long> counters;
+
         map<string,list<BSONObj> > out;
         {
             _explain( out );
-            
+
             BSONObjBuilder x( b.subobjStart( "shards" ) );
-            for ( map<string,list<BSONObj> >::iterator i=out.begin(); i!=out.end(); ++i ){
+            for ( map<string,list<BSONObj> >::iterator i=out.begin(); i!=out.end(); ++i ) {
                 string shard = i->first;
                 list<BSONObj> l = i->second;
-                BSONArrayBuilder y( x.subarrayStart( shard.c_str() ) );
-                for ( list<BSONObj>::iterator j=l.begin(); j!=l.end(); ++j ){
+                BSONArrayBuilder y( x.subarrayStart( shard ) );
+                for ( list<BSONObj>::iterator j=l.begin(); j!=l.end(); ++j ) {
                     BSONObj temp = *j;
                     y.append( temp );
 
-                    nscanned += temp["nscanned"].numberLong();
-                    nscannedObjects += temp["nscannedObjects"].numberLong();
-                    n += temp["n"].numberLong();
+                    BSONObjIterator k( temp );
+                    while ( k.more() ) {
+                        BSONElement z = k.next();
+                        if ( z.fieldName()[0] != 'n' )
+                            continue;
+                        long long& c = counters[z.fieldName()];
+                        c += z.numberLong();
+                    }
+
                     millis += temp["millis"].numberLong();
                     numExplains++;
                 }
@@ -181,9 +208,9 @@ namespace mongo {
             x.done();
         }
 
-        b.appendNumber( "nscanned" , nscanned );
-        b.appendNumber( "nscannedObjects" , nscannedObjects );
-        b.appendNumber( "n" , n );
+        for ( map<string,long long>::iterator i=counters.begin(); i!=counters.end(); ++i )
+            b.appendNumber( i->first , i->second );
+
         b.appendNumber( "millisTotal" , millis );
         b.append( "millisAvg" , (int)((double)millis / numExplains ) );
         b.append( "numQueries" , (int)numExplains );
@@ -191,37 +218,37 @@ namespace mongo {
 
         return b.obj();
     }
-    
+
     // --------  FilteringClientCursor -----------
     FilteringClientCursor::FilteringClientCursor( const BSONObj filter )
-        : _matcher( filter ) , _done( true ){
+        : _matcher( filter ) , _done( true ) {
     }
 
     FilteringClientCursor::FilteringClientCursor( auto_ptr<DBClientCursor> cursor , const BSONObj filter )
-        : _matcher( filter ) , _cursor( cursor ) , _done( cursor.get() == 0 ){
+        : _matcher( filter ) , _cursor( cursor ) , _done( cursor.get() == 0 ) {
     }
-    
-    FilteringClientCursor::~FilteringClientCursor(){
+
+    FilteringClientCursor::~FilteringClientCursor() {
     }
-        
-    void FilteringClientCursor::reset( auto_ptr<DBClientCursor> cursor ){
+
+    void FilteringClientCursor::reset( auto_ptr<DBClientCursor> cursor ) {
         _cursor = cursor;
         _next = BSONObj();
         _done = _cursor.get() == 0;
     }
 
-    bool FilteringClientCursor::more(){
+    bool FilteringClientCursor::more() {
         if ( ! _next.isEmpty() )
             return true;
-        
+
         if ( _done )
             return false;
-        
+
         _advance();
         return ! _next.isEmpty();
     }
-    
-    BSONObj FilteringClientCursor::next(){
+
+    BSONObj FilteringClientCursor::next() {
         assert( ! _next.isEmpty() );
         assert( ! _done );
 
@@ -231,20 +258,20 @@ namespace mongo {
         return ret;
     }
 
-    BSONObj FilteringClientCursor::peek(){
+    BSONObj FilteringClientCursor::peek() {
         if ( _next.isEmpty() )
             _advance();
         return _next;
     }
-    
-    void FilteringClientCursor::_advance(){
+
+    void FilteringClientCursor::_advance() {
         assert( _next.isEmpty() );
         if ( ! _cursor.get() || _done )
             return;
-        
-        while ( _cursor->more() ){
+
+        while ( _cursor->more() ) {
             _next = _cursor->next();
-            if ( _matcher.matches( _next ) ){
+            if ( _matcher.matches( _next ) ) {
                 if ( ! _cursor->moreInCurrentBatch() )
                     _next = _next.getOwned();
                 return;
@@ -253,53 +280,53 @@ namespace mongo {
         }
         _done = true;
     }
-    
+
     // --------  SerialServerClusteredCursor -----------
-    
-    SerialServerClusteredCursor::SerialServerClusteredCursor( const set<ServerAndQuery>& servers , QueryMessage& q , int sortOrder) : ClusteredCursor( q ){
+
+    SerialServerClusteredCursor::SerialServerClusteredCursor( const set<ServerAndQuery>& servers , QueryMessage& q , int sortOrder) : ClusteredCursor( q ) {
         for ( set<ServerAndQuery>::const_iterator i = servers.begin(); i!=servers.end(); i++ )
             _servers.push_back( *i );
-        
+
         if ( sortOrder > 0 )
             sort( _servers.begin() , _servers.end() );
         else if ( sortOrder < 0 )
             sort( _servers.rbegin() , _servers.rend() );
-        
+
         _serverIndex = 0;
 
         _needToSkip = q.ntoskip;
     }
-    
-    bool SerialServerClusteredCursor::more(){
-        
+
+    bool SerialServerClusteredCursor::more() {
+
         // TODO: optimize this by sending on first query and then back counting
         //       tricky in case where 1st server doesn't have any after
         //       need it to send n skipped
-        while ( _needToSkip > 0 && _current.more() ){
+        while ( _needToSkip > 0 && _current.more() ) {
             _current.next();
             _needToSkip--;
         }
-        
+
         if ( _current.more() )
             return true;
-        
-        if ( _serverIndex >= _servers.size() ){
+
+        if ( _serverIndex >= _servers.size() ) {
             return false;
         }
-        
+
         ServerAndQuery& sq = _servers[_serverIndex++];
 
         _current.reset( query( sq._server , 0 , sq._extra ) );
         return more();
     }
-    
-    BSONObj SerialServerClusteredCursor::next(){
+
+    BSONObj SerialServerClusteredCursor::next() {
         uassert( 10018 ,  "no more items" , more() );
         return _current.next();
     }
 
-    void SerialServerClusteredCursor::_explain( map< string,list<BSONObj> >& out ){
-        for ( unsigned i=0; i<_servers.size(); i++ ){
+    void SerialServerClusteredCursor::_explain( map< string,list<BSONObj> >& out ) {
+        for ( unsigned i=0; i<_servers.size(); i++ ) {
             ServerAndQuery& sq = _servers[i];
             list<BSONObj> & l = out[sq._server];
             l.push_back( explain( sq._server , sq._extra ) );
@@ -307,132 +334,142 @@ namespace mongo {
     }
 
     // --------  ParallelSortClusteredCursor -----------
-    
-    ParallelSortClusteredCursor::ParallelSortClusteredCursor( const set<ServerAndQuery>& servers , QueryMessage& q , 
-                                                              const BSONObj& sortKey ) 
-        : ClusteredCursor( q ) , _servers( servers ){
+
+    ParallelSortClusteredCursor::ParallelSortClusteredCursor( const set<ServerAndQuery>& servers , QueryMessage& q ,
+            const BSONObj& sortKey )
+        : ClusteredCursor( q ) , _servers( servers ) {
         _sortKey = sortKey.getOwned();
         _needToSkip = q.ntoskip;
         _finishCons();
     }
 
-    ParallelSortClusteredCursor::ParallelSortClusteredCursor( const set<ServerAndQuery>& servers , const string& ns , 
-                                                              const Query& q , 
-                                                              int options , const BSONObj& fields  )
-        : ClusteredCursor( ns , q.obj , options , fields ) , _servers( servers ){
+    ParallelSortClusteredCursor::ParallelSortClusteredCursor( const set<ServerAndQuery>& servers , const string& ns ,
+            const Query& q ,
+            int options , const BSONObj& fields  )
+        : ClusteredCursor( ns , q.obj , options , fields ) , _servers( servers ) {
         _sortKey = q.getSort().copy();
         _needToSkip = 0;
         _finishCons();
     }
 
-    void ParallelSortClusteredCursor::_finishCons(){
+    void ParallelSortClusteredCursor::_finishCons() {
         _numServers = _servers.size();
         _cursors = 0;
 
-        if ( ! _sortKey.isEmpty() && ! _fields.isEmpty() ){
-            // we need to make sure the sort key is in the project
-            bool isNegative = false;
+        if ( ! _sortKey.isEmpty() && ! _fields.isEmpty() ) {
+            // we need to make sure the sort key is in the projection
+
+            set<string> sortKeyFields;
+            _sortKey.getFieldNames(sortKeyFields);
+
             BSONObjBuilder b;
+            bool isNegative = false;
             {
                 BSONObjIterator i( _fields );
-                while ( i.more() ){
+                while ( i.more() ) {
                     BSONElement e = i.next();
                     b.append( e );
-                    if ( ! e.trueValue() )
-                        isNegative = true;
-                }
-            }                    
-            
-            {
-                BSONObjIterator i( _sortKey );
-                while ( i.more() ){
-                    BSONElement e = i.next();
-                    BSONElement f = _fields.getField( e.fieldName() );
-                    if ( isNegative ){
-                        uassert( 13431 , "have to have sort key in projection and removing it" , f.eoo() );
+
+                    string fieldName = e.fieldName();
+
+                    // exact field
+                    bool found = sortKeyFields.erase(fieldName);
+
+                    // subfields
+                    set<string>::const_iterator begin = sortKeyFields.lower_bound(fieldName + ".\x00");
+                    set<string>::const_iterator end   = sortKeyFields.lower_bound(fieldName + ".\xFF");
+                    sortKeyFields.erase(begin, end);
+
+                    if ( ! e.trueValue() ) {
+                        uassert( 13431 , "have to have sort key in projection and removing it" , !found && begin == end );
                     }
-                    else if ( f.eoo() ){
-                        // add to projection
-                        b.append( e );
+                    else if (!e.isABSONObj()) {
+                        isNegative = true;
                     }
                 }
             }
-            
+
+            if (isNegative) {
+                for (set<string>::const_iterator it(sortKeyFields.begin()), end(sortKeyFields.end()); it != end; ++it) {
+                    b.append(*it, 1);
+                }
+            }
+
             _fields = b.obj();
         }
     }
-    
-    void ParallelSortClusteredCursor::_init(){
+
+    void ParallelSortClusteredCursor::_init() {
         assert( ! _cursors );
         _cursors = new FilteringClientCursor[_numServers];
-            
+
         // TODO: parellize
         int num = 0;
-        for ( set<ServerAndQuery>::iterator i = _servers.begin(); i!=_servers.end(); ++i ){
+        for ( set<ServerAndQuery>::iterator i = _servers.begin(); i!=_servers.end(); ++i ) {
             const ServerAndQuery& sq = *i;
             _cursors[num++].reset( query( sq._server , 0 , sq._extra , _needToSkip ) );
         }
-            
+
     }
-    
-    ParallelSortClusteredCursor::~ParallelSortClusteredCursor(){
+
+    ParallelSortClusteredCursor::~ParallelSortClusteredCursor() {
         delete [] _cursors;
         _cursors = 0;
     }
 
-    bool ParallelSortClusteredCursor::more(){
+    bool ParallelSortClusteredCursor::more() {
 
-        if ( _needToSkip > 0 ){
+        if ( _needToSkip > 0 ) {
             int n = _needToSkip;
             _needToSkip = 0;
 
-            while ( n > 0 && more() ){
+            while ( n > 0 && more() ) {
                 BSONObj x = next();
                 n--;
             }
 
             _needToSkip = n;
         }
-        
-        for ( int i=0; i<_numServers; i++ ){
+
+        for ( int i=0; i<_numServers; i++ ) {
             if ( _cursors[i].more() )
                 return true;
         }
         return false;
     }
-        
-    BSONObj ParallelSortClusteredCursor::next(){
+
+    BSONObj ParallelSortClusteredCursor::next() {
         BSONObj best = BSONObj();
         int bestFrom = -1;
-            
-        for ( int i=0; i<_numServers; i++){
+
+        for ( int i=0; i<_numServers; i++) {
             if ( ! _cursors[i].more() )
                 continue;
-            
+
             BSONObj me = _cursors[i].peek();
 
-            if ( best.isEmpty() ){
+            if ( best.isEmpty() ) {
                 best = me;
                 bestFrom = i;
                 continue;
             }
-                
+
             int comp = best.woSortOrder( me , _sortKey , true );
             if ( comp < 0 )
                 continue;
-                
+
             best = me;
             bestFrom = i;
         }
-        
+
         uassert( 10019 ,  "no more elements" , ! best.isEmpty() );
         _cursors[bestFrom].next();
-            
+
         return best;
     }
 
-    void ParallelSortClusteredCursor::_explain( map< string,list<BSONObj> >& out ){
-        for ( set<ServerAndQuery>::iterator i=_servers.begin(); i!=_servers.end(); ++i ){
+    void ParallelSortClusteredCursor::_explain( map< string,list<BSONObj> >& out ) {
+        for ( set<ServerAndQuery>::iterator i=_servers.begin(); i!=_servers.end(); ++i ) {
             const ServerAndQuery& sq = *i;
             list<BSONObj> & l = out[sq._server];
             l.push_back( explain( sq._server , sq._extra ) );
@@ -444,39 +481,50 @@ namespace mongo {
     // ---- Future -----
     // -----------------
 
-    Future::CommandResult::CommandResult( const string& server , const string& db , const BSONObj& cmd ){
+    Future::CommandResult::CommandResult( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn ) {
         _server = server;
         _db = db;
         _cmd = cmd;
+        _conn = conn;
         _done = false;
     }
 
-    bool Future::CommandResult::join(){
+    bool Future::CommandResult::join() {
         _thr->join();
         assert( _done );
         return _ok;
     }
 
-    void Future::commandThread( shared_ptr<CommandResult> res ){
+    void Future::commandThread(shared_ptr<CommandResult> res) {
         setThreadName( "future" );
 
         try {
-            ScopedDbConnection conn( res->_server );
+            DBClientBase * conn = res->_conn;
+            
+            scoped_ptr<ScopedDbConnection> myconn;
+            if ( ! conn ){
+                myconn.reset( new ScopedDbConnection( res->_server ) );
+                conn = myconn->get();
+            }
+            
             res->_ok = conn->runCommand( res->_db , res->_cmd , res->_res );
-            conn.done();
+
+            if ( myconn )
+                myconn->done();
+
         }
-        catch ( std::exception& e ){
+        catch ( std::exception& e ) {
             error() << "Future::commandThread exception: " << e.what() << endl;
             res->_ok = false;
         }
         res->_done = true;
     }
 
-    shared_ptr<Future::CommandResult> Future::spawnCommand( const string& server , const string& db , const BSONObj& cmd ){
-        shared_ptr<Future::CommandResult> res( new Future::CommandResult( server , db , cmd ) );
-        res->_thr.reset( new boost::thread( boost::bind( Future::commandThread , res ) ) );
+    shared_ptr<Future::CommandResult> Future::spawnCommand( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn ) {
+        shared_ptr<Future::CommandResult> res (new Future::CommandResult( server , db , cmd , conn  ));
+        res->_thr.reset( new boost::thread( boost::bind(Future::commandThread, res) ) );
+
         return res;
     }
-    
-    
+
 }
diff --git a/client/parallel.h b/client/parallel.h
index 603cfe7..0809376 100644
--- a/client/parallel.h
+++ b/client/parallel.h
@@ -24,6 +24,7 @@
 #include "redef_macros.h"
 #include "../db/dbmessage.h"
 #include "../db/matcher.h"
+#include "../util/concurrency/mvar.h"
 
 namespace mongo {
 
@@ -32,14 +33,14 @@ namespace mongo {
      */
     class ServerAndQuery {
     public:
-        ServerAndQuery( const string& server , BSONObj extra = BSONObj() , BSONObj orderObject = BSONObj() ) : 
-            _server( server ) , _extra( extra.getOwned() ) , _orderObject( orderObject.getOwned() ){
+        ServerAndQuery( const string& server , BSONObj extra = BSONObj() , BSONObj orderObject = BSONObj() ) :
+            _server( server ) , _extra( extra.getOwned() ) , _orderObject( orderObject.getOwned() ) {
         }
 
-        bool operator<( const ServerAndQuery& other ) const{
+        bool operator<( const ServerAndQuery& other ) const {
             if ( ! _orderObject.isEmpty() )
                 return _orderObject.woCompare( other._orderObject ) < 0;
-            
+
             if ( _server < other._server )
                 return true;
             if ( other._server > _server )
@@ -71,28 +72,28 @@ namespace mongo {
         ClusteredCursor( QueryMessage& q );
         ClusteredCursor( const string& ns , const BSONObj& q , int options=0 , const BSONObj& fields=BSONObj() );
         virtual ~ClusteredCursor();
-        
+
         /** call before using */
         void init();
-        
+
         virtual bool more() = 0;
         virtual BSONObj next() = 0;
-        
+
         static BSONObj concatQuery( const BSONObj& query , const BSONObj& extraFilter );
-        
+
         virtual string type() const = 0;
 
         virtual BSONObj explain();
 
     protected:
-        
+
         virtual void _init() = 0;
 
         auto_ptr<DBClientCursor> query( const string& server , int num = 0 , BSONObj extraFilter = BSONObj() , int skipLeft = 0 );
         BSONObj explain( const string& server , BSONObj extraFilter = BSONObj() );
-        
+
         static BSONObj _concatFilter( const BSONObj& filter , const BSONObj& extraFilter );
-        
+
         virtual void _explain( map< string,list<BSONObj> >& out ) = 0;
 
         string _ns;
@@ -112,19 +113,19 @@ namespace mongo {
         FilteringClientCursor( const BSONObj filter = BSONObj() );
         FilteringClientCursor( auto_ptr<DBClientCursor> cursor , const BSONObj filter = BSONObj() );
         ~FilteringClientCursor();
-        
+
         void reset( auto_ptr<DBClientCursor> cursor );
-        
+
         bool more();
         BSONObj next();
-        
+
         BSONObj peek();
     private:
         void _advance();
-        
+
         Matcher _matcher;
         auto_ptr<DBClientCursor> _cursor;
-        
+
         BSONObj _next;
         bool _done;
     };
@@ -132,22 +133,22 @@ namespace mongo {
 
     class Servers {
     public:
-        Servers(){
+        Servers() {
         }
-        
-        void add( const ServerAndQuery& s ){
+
+        void add( const ServerAndQuery& s ) {
             add( s._server , s._extra );
         }
-        
-        void add( const string& server , const BSONObj& filter ){
+
+        void add( const string& server , const BSONObj& filter ) {
             vector<BSONObj>& mine = _filters[server];
             mine.push_back( filter.getOwned() );
         }
-        
+
         // TOOO: pick a less horrible name
         class View {
-            View( const Servers* s ){
-                for ( map<string, vector<BSONObj> >::const_iterator i=s->_filters.begin(); i!=s->_filters.end(); ++i ){
+            View( const Servers* s ) {
+                for ( map<string, vector<BSONObj> >::const_iterator i=s->_filters.begin(); i!=s->_filters.end(); ++i ) {
                     _servers.push_back( i->first );
                     _filters.push_back( i->second );
                 }
@@ -164,7 +165,7 @@ namespace mongo {
             vector<BSONObj> getFilter( int n ) const {
                 return _filters[ n ];
             }
-            
+
         private:
             vector<string> _servers;
             vector< vector<BSONObj> > _filters;
@@ -175,7 +176,7 @@ namespace mongo {
         View view() const {
             return View( this );
         }
-        
+
 
     private:
         map<string, vector<BSONObj> > _filters;
@@ -198,13 +199,13 @@ namespace mongo {
     protected:
         virtual void _explain( map< string,list<BSONObj> >& out );
 
-        void _init(){}
+        void _init() {}
 
         vector<ServerAndQuery> _servers;
         unsigned _serverIndex;
-        
+
         FilteringClientCursor _current;
-        
+
         int _needToSkip;
     };
 
@@ -212,11 +213,11 @@ namespace mongo {
     /**
      * runs a query in parellel across N servers
      * sots
-     */        
+     */
     class ParallelSortClusteredCursor : public ClusteredCursor {
     public:
         ParallelSortClusteredCursor( const set<ServerAndQuery>& servers , QueryMessage& q , const BSONObj& sortKey );
-        ParallelSortClusteredCursor( const set<ServerAndQuery>& servers , const string& ns , 
+        ParallelSortClusteredCursor( const set<ServerAndQuery>& servers , const string& ns ,
                                      const Query& q , int options=0, const BSONObj& fields=BSONObj() );
         virtual ~ParallelSortClusteredCursor();
         virtual bool more();
@@ -231,7 +232,7 @@ namespace mongo {
         int _numServers;
         set<ServerAndQuery> _servers;
         BSONObj _sortKey;
-        
+
         FilteringClientCursor * _cursors;
         int _needToSkip;
     };
@@ -245,11 +246,11 @@ namespace mongo {
     public:
         class CommandResult {
         public:
-            
+
             string getServer() const { return _server; }
 
             bool isDone() const { return _done; }
-            
+
             bool ok() const {
                 assert( _done );
                 return _ok;
@@ -265,30 +266,37 @@ namespace mongo {
                returns ok()
              */
             bool join();
-            
+
         private:
-            
-            CommandResult( const string& server , const string& db , const BSONObj& cmd );
-            
+
+            CommandResult( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn );
+
             string _server;
             string _db;
             BSONObj _cmd;
+            DBClientBase * _conn;
 
             scoped_ptr<boost::thread> _thr;
-            
+
             BSONObj _res;
-            bool _done;
             bool _ok;
-            
+            bool _done;
+
             friend class Future;
         };
+
+        static void commandThread(shared_ptr<CommandResult> res);
         
-        static void commandThread( shared_ptr<CommandResult> res );
-        
-        static shared_ptr<CommandResult> spawnCommand( const string& server , const string& db , const BSONObj& cmd );
+        /**
+         * @param server server name
+         * @param db db name
+         * @param cmd cmd to exec
+         * @param conn optional connection to use.  will use standard pooled if non-specified
+         */
+        static shared_ptr<CommandResult> spawnCommand( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn = 0 );
     };
 
-    
+
 }
 
 #include "undef_macros.h"
diff --git a/client/redef_macros.h b/client/redef_macros.h
index dd2e66f..a4cb1c9 100644
--- a/client/redef_macros.h
+++ b/client/redef_macros.h
@@ -50,6 +50,9 @@
 #define RARELY MONGO_RARELY
 #define ONCE MONGO_ONCE
 
+// util/log.h
+#define LOG MONGO_LOG
+
 #undef MONGO_MACROS_CLEANED
 #endif
 
diff --git a/client/simple_client_demo.cpp b/client/simple_client_demo.cpp
new file mode 100644
index 0000000..fa2f4a8
--- /dev/null
+++ b/client/simple_client_demo.cpp
@@ -0,0 +1,36 @@
+/* simple_client_demo.cpp
+
+   See also : http://www.mongodb.org/pages/viewpage.action?pageId=133415
+
+   How to build and run:
+
+   (1) Using the mongoclient:
+    g++ simple_client_demo.cpp -lmongoclient -lboost_thread-mt -lboost_filesystem -lboost_program_options
+    ./a.out
+
+   (2) using client_lib.cpp:
+    g++ -I .. simple_client_demo.cpp mongo_client_lib.cpp -lboost_thread-mt -lboost_filesystem
+    ./a.out
+*/
+
+#include <iostream>
+#include "dbclient.h" // the mongo c++ driver
+
+using namespace std;
+using namespace mongo;
+using namespace bson;
+
+int main() {
+    cout << "connecting to localhost..." << endl;
+    DBClientConnection c;
+    c.connect("localhost");
+    cout << "connected ok" << endl;
+    unsigned long long count = c.count("test.foo");
+    cout << "count of exiting documents in collection test.foo : " << count << endl;
+
+    bo o = BSON( "hello" << "world" );
+    c.insert("test.foo", o);
+
+    return 0;
+}
+
diff --git a/client/syncclusterconnection.cpp b/client/syncclusterconnection.cpp
index 99f6067..4fafdc1 100644
--- a/client/syncclusterconnection.cpp
+++ b/client/syncclusterconnection.cpp
@@ -37,11 +37,11 @@ namespace mongo {
         for( list<HostAndPort>::const_iterator i = L.begin(); i != L.end(); i++ )
             _connect( i->toString() );
     }
-    
+
     SyncClusterConnection::SyncClusterConnection( string commaSeperated )  : _mutex("SyncClusterConnection") {
         _address = commaSeperated;
         string::size_type idx;
-        while ( ( idx = commaSeperated.find( ',' ) ) != string::npos ){
+        while ( ( idx = commaSeperated.find( ',' ) ) != string::npos ) {
             string h = commaSeperated.substr( 0 , idx );
             commaSeperated = commaSeperated.substr( idx + 1 );
             _connect( h );
@@ -50,7 +50,7 @@ namespace mongo {
         uassert( 8004 ,  "SyncClusterConnection needs 3 servers" , _conns.size() == 3 );
     }
 
-    SyncClusterConnection::SyncClusterConnection( string a , string b , string c )  : _mutex("SyncClusterConnection") { 
+    SyncClusterConnection::SyncClusterConnection( string a , string b , string c )  : _mutex("SyncClusterConnection") {
         _address = a + "," + b + "," + c;
         // connect to all even if not working
         _connect( a );
@@ -62,52 +62,55 @@ namespace mongo {
         assert(0);
     }
 
-    SyncClusterConnection::~SyncClusterConnection(){
+    SyncClusterConnection::~SyncClusterConnection() {
         for ( size_t i=0; i<_conns.size(); i++ )
             delete _conns[i];
         _conns.clear();
     }
 
-    bool SyncClusterConnection::prepare( string& errmsg ){
+    bool SyncClusterConnection::prepare( string& errmsg ) {
         _lastErrors.clear();
         return fsync( errmsg );
     }
-    
-    bool SyncClusterConnection::fsync( string& errmsg ){
+
+    bool SyncClusterConnection::fsync( string& errmsg ) {
         bool ok = true;
         errmsg = "";
-        for ( size_t i=0; i<_conns.size(); i++ ){
+        for ( size_t i=0; i<_conns.size(); i++ ) {
             BSONObj res;
             try {
                 if ( _conns[i]->simpleCommand( "admin" , 0 , "fsync" ) )
                     continue;
             }
-            catch ( std::exception& e ){
+            catch ( DBException& e ) {
+                errmsg += e.toString();
+            }
+            catch ( std::exception& e ) {
                 errmsg += e.what();
             }
-            catch ( ... ){
+            catch ( ... ) {
             }
             ok = false;
-            errmsg += _conns[i]->toString() + ":" + res.toString();
+            errmsg += " " + _conns[i]->toString() + ":" + res.toString();
         }
         return ok;
     }
 
-    void SyncClusterConnection::_checkLast(){
+    void SyncClusterConnection::_checkLast() {
         _lastErrors.clear();
         vector<string> errors;
 
-        for ( size_t i=0; i<_conns.size(); i++ ){
+        for ( size_t i=0; i<_conns.size(); i++ ) {
             BSONObj res;
             string err;
             try {
                 if ( ! _conns[i]->runCommand( "admin" , BSON( "getlasterror" << 1 << "fsync" << 1 ) , res ) )
                     err = "cmd failed: ";
             }
-            catch ( std::exception& e ){
+            catch ( std::exception& e ) {
                 err += e.what();
             }
-            catch ( ... ){
+            catch ( ... ) {
                 err += "unknown failure";
             }
             _lastErrors.push_back( res.getOwned() );
@@ -115,13 +118,13 @@ namespace mongo {
         }
 
         assert( _lastErrors.size() == errors.size() && _lastErrors.size() == _conns.size() );
-        
+
         stringstream err;
         bool ok = true;
-        
-        for ( size_t i = 0; i<_conns.size(); i++ ){
+
+        for ( size_t i = 0; i<_conns.size(); i++ ) {
             BSONObj res = _lastErrors[i];
-            if ( res["ok"].trueValue() && res["fsyncFiles"].numberInt() > 0 )
+            if ( res["ok"].trueValue() && (res["fsyncFiles"].numberInt() > 0 || res.hasElement("waited")))
                 continue;
             ok = false;
             err << _conns[i]->toString() << ": " << res << " " << errors[i];
@@ -132,13 +135,13 @@ namespace mongo {
         throw UserException( 8001 , (string)"SyncClusterConnection write op failed: " + err.str() );
     }
 
-    BSONObj SyncClusterConnection::getLastErrorDetailed(){
+    BSONObj SyncClusterConnection::getLastErrorDetailed() {
         if ( _lastErrors.size() )
             return _lastErrors[0];
         return DBClientBase::getLastErrorDetailed();
     }
 
-    void SyncClusterConnection::_connect( string host ){
+    void SyncClusterConnection::_connect( string host ) {
         log() << "SyncClusterConnection connecting to [" << host << "]" << endl;
         DBClientConnection * c = new DBClientConnection( true );
         string errmsg;
@@ -148,40 +151,42 @@ namespace mongo {
         _conns.push_back( c );
     }
 
-    bool SyncClusterConnection::callRead( Message& toSend , Message& response ){
+    bool SyncClusterConnection::callRead( Message& toSend , Message& response ) {
         // TODO: need to save state of which one to go back to somehow...
         return _conns[0]->callRead( toSend , response );
     }
 
     BSONObj SyncClusterConnection::findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn, int queryOptions) {
-        
-        if ( ns.find( ".$cmd" ) != string::npos ){
+
+        if ( ns.find( ".$cmd" ) != string::npos ) {
             string cmdName = query.obj.firstElement().fieldName();
 
             int lockType = _lockType( cmdName );
 
-            if ( lockType > 0 ){ // write $cmd
+            if ( lockType > 0 ) { // write $cmd
                 string errmsg;
                 if ( ! prepare( errmsg ) )
                     throw UserException( 13104 , (string)"SyncClusterConnection::findOne prepare failed: " + errmsg );
-                
+
                 vector<BSONObj> all;
-                for ( size_t i=0; i<_conns.size(); i++ ){
+                for ( size_t i=0; i<_conns.size(); i++ ) {
                     all.push_back( _conns[i]->findOne( ns , query , 0 , queryOptions ).getOwned() );
                 }
-                
+
                 _checkLast();
                 
-                for ( size_t i=0; i<all.size(); i++ ){
+                for ( size_t i=0; i<all.size(); i++ ) {
                     BSONObj temp = all[i];
                     if ( isOk( temp ) )
                         continue;
                     stringstream ss;
-                    ss << "write $cmd failed on a shard: " << temp.jsonString();
+                    ss << "write $cmd failed on a node: " << temp.jsonString();
                     ss << " " << _conns[i]->toString();
+                    ss << " ns: " << ns;
+                    ss << " cmd: " << query.toString();
                     throw UserException( 13105 , ss.str() );
                 }
-                
+
                 return all[0];
             }
         }
@@ -191,9 +196,9 @@ namespace mongo {
 
 
     auto_ptr<DBClientCursor> SyncClusterConnection::query(const string &ns, Query query, int nToReturn, int nToSkip,
-                                                          const BSONObj *fieldsToReturn, int queryOptions, int batchSize ){ 
+            const BSONObj *fieldsToReturn, int queryOptions, int batchSize ) {
         _lastErrors.clear();
-        if ( ns.find( ".$cmd" ) != string::npos ){
+        if ( ns.find( ".$cmd" ) != string::npos ) {
             string cmdName = query.obj.firstElement().fieldName();
             int lockType = _lockType( cmdName );
             uassert( 13054 , (string)"write $cmd not supported in SyncClusterConnection::query for:" + cmdName , lockType <= 0 );
@@ -202,7 +207,7 @@ namespace mongo {
         return _queryOnActive( ns , query , nToReturn , nToSkip , fieldsToReturn , queryOptions , batchSize );
     }
 
-    bool SyncClusterConnection::_commandOnActive(const string &dbname, const BSONObj& cmd, BSONObj &info, int options ){
+    bool SyncClusterConnection::_commandOnActive(const string &dbname, const BSONObj& cmd, BSONObj &info, int options ) {
         auto_ptr<DBClientCursor> cursor = _queryOnActive( dbname + ".$cmd" , cmd , 1 , 0 , 0 , options , 0 );
         if ( cursor->more() )
             info = cursor->next().copy();
@@ -210,153 +215,164 @@ namespace mongo {
             info = BSONObj();
         return isOk( info );
     }
-    
+
     auto_ptr<DBClientCursor> SyncClusterConnection::_queryOnActive(const string &ns, Query query, int nToReturn, int nToSkip,
-                                                                   const BSONObj *fieldsToReturn, int queryOptions, int batchSize ){ 
-        
-        for ( size_t i=0; i<_conns.size(); i++ ){
+            const BSONObj *fieldsToReturn, int queryOptions, int batchSize ) {
+
+        for ( size_t i=0; i<_conns.size(); i++ ) {
             try {
-                auto_ptr<DBClientCursor> cursor = 
+                auto_ptr<DBClientCursor> cursor =
                     _conns[i]->query( ns , query , nToReturn , nToSkip , fieldsToReturn , queryOptions , batchSize );
                 if ( cursor.get() )
                     return cursor;
                 log() << "query failed to: " << _conns[i]->toString() << " no data" << endl;
             }
-            catch ( ... ){
+            catch ( ... ) {
                 log() << "query failed to: " << _conns[i]->toString() << " exception" << endl;
             }
         }
         throw UserException( 8002 , "all servers down!" );
     }
-    
-    auto_ptr<DBClientCursor> SyncClusterConnection::getMore( const string &ns, long long cursorId, int nToReturn, int options ){
-        uassert( 10022 , "SyncClusterConnection::getMore not supported yet" , 0); 
+
+    auto_ptr<DBClientCursor> SyncClusterConnection::getMore( const string &ns, long long cursorId, int nToReturn, int options ) {
+        uassert( 10022 , "SyncClusterConnection::getMore not supported yet" , 0);
         auto_ptr<DBClientCursor> c;
         return c;
     }
-    
-    void SyncClusterConnection::insert( const string &ns, BSONObj obj ){ 
 
-        uassert( 13119 , (string)"SyncClusterConnection::insert obj has to have an _id: " + obj.jsonString() , 
+    void SyncClusterConnection::insert( const string &ns, BSONObj obj ) {
+
+        uassert( 13119 , (string)"SyncClusterConnection::insert obj has to have an _id: " + obj.jsonString() ,
                  ns.find( ".system.indexes" ) != string::npos || obj["_id"].type() );
-        
+
         string errmsg;
         if ( ! prepare( errmsg ) )
             throw UserException( 8003 , (string)"SyncClusterConnection::insert prepare failed: " + errmsg );
 
-        for ( size_t i=0; i<_conns.size(); i++ ){
+        for ( size_t i=0; i<_conns.size(); i++ ) {
             _conns[i]->insert( ns , obj );
         }
-        
+
         _checkLast();
     }
-        
-    void SyncClusterConnection::insert( const string &ns, const vector< BSONObj >& v ){ 
-        uassert( 10023 , "SyncClusterConnection bulk insert not implemented" , 0); 
+
+    void SyncClusterConnection::insert( const string &ns, const vector< BSONObj >& v ) {
+        uassert( 10023 , "SyncClusterConnection bulk insert not implemented" , 0);
     }
 
-    void SyncClusterConnection::remove( const string &ns , Query query, bool justOne ){ 
+    void SyncClusterConnection::remove( const string &ns , Query query, bool justOne ) {
         string errmsg;
         if ( ! prepare( errmsg ) )
             throw UserException( 8020 , (string)"SyncClusterConnection::remove prepare failed: " + errmsg );
-        
-        for ( size_t i=0; i<_conns.size(); i++ ){
+
+        for ( size_t i=0; i<_conns.size(); i++ ) {
             _conns[i]->remove( ns , query , justOne );
         }
-        
+
         _checkLast();
     }
 
-    void SyncClusterConnection::update( const string &ns , Query query , BSONObj obj , bool upsert , bool multi ){ 
+    void SyncClusterConnection::update( const string &ns , Query query , BSONObj obj , bool upsert , bool multi ) {
 
-        if ( upsert ){
+        if ( upsert ) {
             uassert( 13120 , "SyncClusterConnection::update upsert query needs _id" , query.obj["_id"].type() );
         }
 
-        if ( _writeConcern ){
+        if ( _writeConcern ) {
             string errmsg;
             if ( ! prepare( errmsg ) )
                 throw UserException( 8005 , (string)"SyncClusterConnection::udpate prepare failed: " + errmsg );
         }
 
-        for ( size_t i=0; i<_conns.size(); i++ ){
+        for ( size_t i=0; i<_conns.size(); i++ ) {
             try {
                 _conns[i]->update( ns , query , obj , upsert , multi );
             }
-            catch ( std::exception& e ){
+            catch ( std::exception& e ) {
                 if ( _writeConcern )
                     throw e;
             }
         }
-        
-        if ( _writeConcern ){
+
+        if ( _writeConcern ) {
             _checkLast();
             assert( _lastErrors.size() > 1 );
-            
+
             int a = _lastErrors[0]["n"].numberInt();
-            for ( unsigned i=1; i<_lastErrors.size(); i++ ){
+            for ( unsigned i=1; i<_lastErrors.size(); i++ ) {
                 int b = _lastErrors[i]["n"].numberInt();
                 if ( a == b )
                     continue;
-                
-                throw UpdateNotTheSame( 8017 , "update not consistent" , _connAddresses , _lastErrors );
+
+                throw UpdateNotTheSame( 8017 , 
+                                        str::stream() 
+                                        << "update not consistent " 
+                                        << " ns: " << ns
+                                        << " query: " << query.toString()
+                                        << " update: " << obj
+                                        << " gle1: " << _lastErrors[0]
+                                        << " gle2: " << _lastErrors[i] ,
+                                        _connAddresses , _lastErrors );
             }
         }
     }
 
-    string SyncClusterConnection::_toString() const { 
+    string SyncClusterConnection::_toString() const {
         stringstream ss;
         ss << "SyncClusterConnection [" << _address << "]";
         return ss.str();
     }
 
-    bool SyncClusterConnection::call( Message &toSend, Message &response, bool assertOk ){
-        uassert( 8006 , "SyncClusterConnection::call can only be used directly for dbQuery" , 
+    bool SyncClusterConnection::call( Message &toSend, Message &response, bool assertOk , string * actualServer ) {
+        uassert( 8006 , "SyncClusterConnection::call can only be used directly for dbQuery" ,
                  toSend.operation() == dbQuery );
-        
+
         DbMessage d( toSend );
         uassert( 8007 , "SyncClusterConnection::call can't handle $cmd" , strstr( d.getns(), "$cmd" ) == 0 );
 
-        for ( size_t i=0; i<_conns.size(); i++ ){
+        for ( size_t i=0; i<_conns.size(); i++ ) {
             try {
                 bool ok = _conns[i]->call( toSend , response , assertOk );
-                if ( ok )
+                if ( ok ) {
+                    if ( actualServer )
+                        *actualServer = _connAddresses[i];
                     return ok;
+                }
                 log() << "call failed to: " << _conns[i]->toString() << " no data" << endl;
             }
-            catch ( ... ){
+            catch ( ... ) {
                 log() << "call failed to: " << _conns[i]->toString() << " exception" << endl;
             }
         }
         throw UserException( 8008 , "all servers down!" );
     }
-    
-    void SyncClusterConnection::say( Message &toSend ){
+
+    void SyncClusterConnection::say( Message &toSend ) {
         string errmsg;
         if ( ! prepare( errmsg ) )
             throw UserException( 13397 , (string)"SyncClusterConnection::say prepare failed: " + errmsg );
 
-        for ( size_t i=0; i<_conns.size(); i++ ){
+        for ( size_t i=0; i<_conns.size(); i++ ) {
             _conns[i]->say( toSend );
         }
-        
+
         _checkLast();
     }
-    
-    void SyncClusterConnection::sayPiggyBack( Message &toSend ){
+
+    void SyncClusterConnection::sayPiggyBack( Message &toSend ) {
         assert(0);
     }
 
-    int SyncClusterConnection::_lockType( const string& name ){
+    int SyncClusterConnection::_lockType( const string& name ) {
         {
             scoped_lock lk(_mutex);
             map<string,int>::iterator i = _lockTypes.find( name );
             if ( i != _lockTypes.end() )
                 return i->second;
         }
-        
+
         BSONObj info;
-        uassert( 13053 , "help failed" , _commandOnActive( "admin" , BSON( name << "1" << "help" << 1 ) , info ) );
+        uassert( 13053 , str::stream() << "help failed: " << info , _commandOnActive( "admin" , BSON( name << "1" << "help" << 1 ) , info ) );
 
         int lockType = info["lockType"].numberInt();
 
@@ -365,20 +381,9 @@ namespace mongo {
         return lockType;
     }
 
-    void SyncClusterConnection::killCursor( long long cursorID ){
+    void SyncClusterConnection::killCursor( long long cursorID ) {
         // should never need to do this
         assert(0);
     }
 
-    bool SyncClusterConnection::isMember( const DBConnector * conn ) const {
-        if ( conn == this )
-            return true;
-        
-        for ( unsigned i=0; i<_conns.size(); i++ )
-            if ( _conns[i]->isMember( conn ) )
-                return true;
-        
-        return false;
-    }
-
 }
diff --git a/client/syncclusterconnection.h b/client/syncclusterconnection.h
index 4292e3d..c946073 100644
--- a/client/syncclusterconnection.h
+++ b/client/syncclusterconnection.h
@@ -16,6 +16,7 @@
  *    limitations under the License.
  */
 
+#pragma once
 
 #include "../pch.h"
 #include "dbclient.h"
@@ -26,15 +27,15 @@ namespace mongo {
     /**
      * This is a connection to a cluster of servers that operate as one
      * for super high durability.
-     * 
+     *
      * Write operations are two-phase.  First, all nodes are asked to fsync. If successful
-     * everywhere, the write is sent everywhere and then followed by an fsync.  There is no 
-     * rollback if a problem occurs during the second phase.  Naturally, with all these fsyncs, 
+     * everywhere, the write is sent everywhere and then followed by an fsync.  There is no
+     * rollback if a problem occurs during the second phase.  Naturally, with all these fsyncs,
      * these operations will be quite slow -- use sparingly.
-     * 
+     *
      * Read operations are sent to a single random node.
-     * 
-     * The class checks if a command is read or write style, and sends to a single 
+     *
+     * The class checks if a command is read or write style, and sends to a single
      * node if a read lock command and to all in two phases with a write style command.
      */
     class SyncClusterConnection : public DBClientBase {
@@ -46,7 +47,7 @@ namespace mongo {
         SyncClusterConnection( string commaSeparated );
         SyncClusterConnection( string a , string b , string c );
         ~SyncClusterConnection();
-        
+
         /**
          * @return true if all servers are up and ready for writes
          */
@@ -65,36 +66,34 @@ namespace mongo {
                                                const BSONObj *fieldsToReturn, int queryOptions, int batchSize );
 
         virtual auto_ptr<DBClientCursor> getMore( const string &ns, long long cursorId, int nToReturn, int options );
-        
+
         virtual void insert( const string &ns, BSONObj obj );
-        
+
         virtual void insert( const string &ns, const vector< BSONObj >& v );
 
         virtual void remove( const string &ns , Query query, bool justOne );
 
         virtual void update( const string &ns , Query query , BSONObj obj , bool upsert , bool multi );
 
-        virtual bool call( Message &toSend, Message &response, bool assertOk );
+        virtual bool call( Message &toSend, Message &response, bool assertOk , string * actualServer );
         virtual void say( Message &toSend );
         virtual void sayPiggyBack( Message &toSend );
 
         virtual void killCursor( long long cursorID );
-        
+
         virtual string getServerAddress() const { return _address; }
         virtual bool isFailed() const { return false; }
         virtual string toString() { return _toString(); }
 
-		virtual BSONObj getLastErrorDetailed();
+        virtual BSONObj getLastErrorDetailed();
 
         virtual bool callRead( Message& toSend , Message& response );
 
-        virtual ConnectionString::ConnectionType type() const { return ConnectionString::SYNC; }  
-
-        virtual bool isMember( const DBConnector * conn ) const;
+        virtual ConnectionString::ConnectionType type() const { return ConnectionString::SYNC; }
 
     private:
         SyncClusterConnection( SyncClusterConnection& prev );
-        string _toString() const;        
+        string _toString() const;
         bool _commandOnActive(const string &dbname, const BSONObj& cmd, BSONObj &info, int options=0);
         auto_ptr<DBClientCursor> _queryOnActive(const string &ns, Query query, int nToReturn, int nToSkip,
                                                 const BSONObj *fieldsToReturn, int queryOptions, int batchSize );
@@ -107,17 +106,17 @@ namespace mongo {
         vector<DBClientConnection*> _conns;
         map<string,int> _lockTypes;
         mongo::mutex _mutex;
-        
+
         vector<BSONObj> _lastErrors;
     };
-    
+
     class UpdateNotTheSame : public UserException {
     public:
         UpdateNotTheSame( int code , const string& msg , const vector<string>& addrs , const vector<BSONObj>& lastErrors )
-            : UserException( code , msg ) , _addrs( addrs ) , _lastErrors( lastErrors ){
+            : UserException( code , msg ) , _addrs( addrs ) , _lastErrors( lastErrors ) {
             assert( _addrs.size() == _lastErrors.size() );
         }
-        
+
         virtual ~UpdateNotTheSame() throw() {
         }
 
@@ -134,7 +133,7 @@ namespace mongo {
         vector<string> _addrs;
         vector<BSONObj> _lastErrors;
     };
-    
+
 };
 
 #include "undef_macros.h"
diff --git a/client/undef_macros.h b/client/undef_macros.h
index cce8692..bc59a84 100644
--- a/client/undef_macros.h
+++ b/client/undef_macros.h
@@ -54,5 +54,8 @@
 #undef RARELY
 #undef ONCE
 
+// util/log.h
+#undef LOG
+
 #define MONGO_MACROS_CLEANED
 #endif
diff --git a/db/background.h b/db/background.h
index 24ea1cb..ea424c9 100644
--- a/db/background.h
+++ b/db/background.h
@@ -21,16 +21,16 @@
 
 #pragma once
 
-namespace mongo { 
+namespace mongo {
 
-    /* these are administrative operations / jobs 
-       for a namespace running in the background, and that only one 
+    /* these are administrative operations / jobs
+       for a namespace running in the background, and that only one
        at a time per namespace is permitted, and that if in progress,
        you aren't allowed to do other NamespaceDetails major manipulations
-       (such as dropping ns or db) even in the foreground and must 
-       instead uassert. 
+       (such as dropping ns or db) even in the foreground and must
+       instead uassert.
 
-       It's assumed this is not for super-high RPS things, so we don't do 
+       It's assumed this is not for super-high RPS things, so we don't do
        anything special in the implementation here to be fast.
     */
     class BackgroundOperation : public boost::noncopyable {
diff --git a/db/btree.cpp b/db/btree.cpp
index d646de8..d547a1b 100644
--- a/db/btree.cpp
+++ b/db/btree.cpp
@@ -24,48 +24,92 @@
 #include "clientcursor.h"
 #include "client.h"
 #include "dbhelpers.h"
-#include "curop.h"
+#include "curop-inl.h"
 #include "stats/counters.h"
+#include "dur_commitjob.h"
 
 namespace mongo {
 
 #define VERIFYTHISLOC dassert( thisLoc.btree() == this );
 
+    /**
+     * give us a writable version of the btree bucket (declares write intent).
+     * note it is likely more efficient to declare write intent on something smaller when you can.
+     */
+    BtreeBucket* DiskLoc::btreemod() const {
+        assert( _a != -1 );
+        BtreeBucket *b = const_cast< BtreeBucket * >( btree() );
+        return static_cast< BtreeBucket* >( getDur().writingPtr( b, BucketSize ) );
+    }
+
+    _KeyNode& _KeyNode::writing() const {
+        return *getDur().writing( const_cast< _KeyNode* >( this ) );
+    }
+
     KeyNode::KeyNode(const BucketBasics& bb, const _KeyNode &k) :
-            prevChildBucket(k.prevChildBucket),
-            recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs())
+        prevChildBucket(k.prevChildBucket),
+        recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs())
     { }
 
-    const int KeyMax = BucketSize / 10;
+    // largest key size we allow.  note we very much need to support bigger keys (somehow) in the future.
+    static const int KeyMax = BucketSize / 10;
+
+    // We define this value as the maximum number of bytes such that, if we have
+    // fewer than this many bytes, we must be able to either merge with or receive
+    // keys from any neighboring node.  If our utilization goes below this value we
+    // know we can bring up the utilization with a simple operation.  Ignoring the
+    // 90/10 split policy which is sometimes employed and our 'unused' nodes, this
+    // is a lower bound on bucket utilization for non root buckets.
+    //
+    // Note that the exact value here depends on the implementation of
+    // rebalancedSeparatorPos().  The conditions for lowWaterMark - 1 are as
+    // follows:  We know we cannot merge with the neighbor, so the total data size
+    // for us, the neighbor, and the separator must be at least
+    // BtreeBucket::bodySize() + 1.  We must be able to accept one key of any
+    // allowed size, so our size plus storage for that additional key must be
+    // <= BtreeBucket::bodySize() / 2.  This way, with the extra key we'll have a
+    // new bucket data size < half the total data size and by the implementation
+    // of rebalancedSeparatorPos() the key must be added.
+    static const int lowWaterMark = BtreeBucket::bodySize() / 2 - KeyMax - sizeof( _KeyNode ) + 1;
+
+    static const int split_debug = 0;
+    static const int insert_debug = 0;
 
     extern int otherTraceLevel;
-    const int split_debug = 0;
-    const int insert_debug = 0;
 
-    static void alreadyInIndex() { 
+    /**
+     * this error is ok/benign when doing a background indexing -- that logic in pdfile checks explicitly
+     * for the 10287 error code.
+     */
+    static void alreadyInIndex() {
         // we don't use massert() here as that does logging and this is 'benign' - see catches in _indexRecord()
         throw MsgAssertionException(10287, "btree: key+recloc already in index");
     }
 
     /* BucketBasics --------------------------------------------------- */
 
-    inline void BucketBasics::modified(const DiskLoc& thisLoc) {
-        VERIFYTHISLOC
-        btreeStore->modified(thisLoc);
+    void BucketBasics::assertWritable() {
+        if( cmdLine.dur )
+            dur::assertAlreadyDeclared(this, sizeof(*this));
+    }
+
+    string BtreeBucket::bucketSummary() const {
+        stringstream ss;
+        ss << "  Bucket info:" << endl;
+        ss << "    n: " << n << endl;
+        ss << "    parent: " << parent.toString() << endl;
+        ss << "    nextChild: " << parent.toString() << endl;
+        ss << "    flags:" << flags << endl;
+        ss << "    emptySize: " << emptySize << " topSize: " << topSize << endl;
+        return ss.str();
     }
 
     int BucketBasics::Size() const {
         assert( _wasSize == BucketSize );
         return BucketSize;
     }
-    inline void BucketBasics::setNotPacked() {
-        flags &= ~Packed;
-    }
-    inline void BucketBasics::setPacked() {
-        flags |= Packed;
-    }
 
-    void BucketBasics::_shape(int level, stringstream& ss) {
+    void BucketBasics::_shape(int level, stringstream& ss) const {
         for ( int i = 0; i < level; i++ ) ss << ' ';
         ss << "*\n";
         for ( int i = 0; i < n; i++ )
@@ -78,13 +122,13 @@ namespace mongo {
     int bt_fv=0;
     int bt_dmp=0;
 
-    void BucketBasics::dumpTree(DiskLoc thisLoc, const BSONObj &order) {
+    void BtreeBucket::dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const {
         bt_dmp=1;
         fullValidate(thisLoc, order);
         bt_dmp=0;
     }
 
-    int BucketBasics::fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount) {
+    int BtreeBucket::fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount, bool strict) const {
         {
             bool f = false;
             assert( f = true );
@@ -93,8 +137,6 @@ namespace mongo {
 
         killCurrentOp.checkForInterrupt();
         assertValid(order, true);
-//	if( bt_fv==0 )
-//		return;
 
         if ( bt_dmp ) {
             out() << thisLoc.toString() << ' ';
@@ -105,26 +147,37 @@ namespace mongo {
         int kc = 0;
 
         for ( int i = 0; i < n; i++ ) {
-            _KeyNode& kn = k(i);
+            const _KeyNode& kn = k(i);
 
             if ( kn.isUsed() ) {
                 kc++;
-            } else {
+            }
+            else {
                 if ( unusedCount ) {
                     ++( *unusedCount );
                 }
             }
             if ( !kn.prevChildBucket.isNull() ) {
                 DiskLoc left = kn.prevChildBucket;
-                BtreeBucket *b = left.btree();
-                wassert( b->parent == thisLoc );
-                kc += b->fullValidate(kn.prevChildBucket, order, unusedCount);
+                const BtreeBucket *b = left.btree();
+                if ( strict ) {
+                    assert( b->parent == thisLoc );
+                }
+                else {
+                    wassert( b->parent == thisLoc );
+                }
+                kc += b->fullValidate(kn.prevChildBucket, order, unusedCount, strict);
             }
         }
         if ( !nextChild.isNull() ) {
-            BtreeBucket *b = nextChild.btree();
-            wassert( b->parent == thisLoc );
-            kc += b->fullValidate(nextChild, order, unusedCount);
+            const BtreeBucket *b = nextChild.btree();
+            if ( strict ) {
+                assert( b->parent == thisLoc );
+            }
+            else {
+                wassert( b->parent == thisLoc );
+            }
+            kc += b->fullValidate(nextChild, order, unusedCount, strict);
         }
 
         return kc;
@@ -132,12 +185,20 @@ namespace mongo {
 
     int nDumped = 0;
 
-    void BucketBasics::assertValid(const Ordering &order, bool force) {
+    void BucketBasics::assertValid(const Ordering &order, bool force) const {
         if ( !debug && !force )
             return;
         wassert( n >= 0 && n < Size() );
         wassert( emptySize >= 0 && emptySize < BucketSize );
         wassert( topSize >= n && topSize <= BucketSize );
+
+        // this is very slow so don't do often
+        {
+            static int _k;
+            if( ++_k % 128 )
+                return;
+        }
+
         DEV {
             // slow:
             for ( int i = 0; i < n-1; i++ ) {
@@ -204,15 +265,16 @@ namespace mongo {
         reserved = 0;
     }
 
-    /* see _alloc */
+    /** see _alloc */
     inline void BucketBasics::_unalloc(int bytes) {
         topSize -= bytes;
         emptySize += bytes;
     }
 
-    /* we allocate space from the end of the buffer for data.
-       the keynodes grow from the front.
-    */
+    /**
+     * we allocate space from the end of the buffer for data.
+     * the keynodes grow from the front.
+     */
     inline int BucketBasics::_alloc(int bytes) {
         topSize += bytes;
         emptySize -= bytes;
@@ -221,21 +283,23 @@ namespace mongo {
         return ofs;
     }
 
-    void BucketBasics::_delKeyAtPos(int keypos) {
+    void BucketBasics::_delKeyAtPos(int keypos, bool mayEmpty) {
         assert( keypos >= 0 && keypos <= n );
         assert( childForPos(keypos).isNull() );
+        // TODO audit cases where nextChild is null
+        assert( ( mayEmpty && n > 0 ) || n > 1 || nextChild.isNull() );
+        emptySize += sizeof(_KeyNode);
         n--;
-        assert( n > 0 || nextChild.isNull() );
         for ( int j = keypos; j < n; j++ )
             k(j) = k(j+1);
-        emptySize += sizeof(_KeyNode);
         setNotPacked();
     }
 
-    /* pull rightmost key from the bucket.  this version requires its right child to be null so it 
-	   does not bother returning that value.
-    */
-    void BucketBasics::popBack(DiskLoc& recLoc, BSONObj& key) { 
+    /**
+     * pull rightmost key from the bucket.  this version requires its right child to be null so it
+     *  does not bother returning that value.
+     */
+    void BucketBasics::popBack(DiskLoc& recLoc, BSONObj& key) {
         massert( 10282 ,  "n==0 in btree popBack()", n > 0 );
         assert( k(n-1).isUsed() ); // no unused skipping in this function at this point - btreebuilder doesn't require that
         KeyNode kn = keyNode(n-1);
@@ -243,18 +307,18 @@ namespace mongo {
         key = kn.key;
         int keysize = kn.key.objsize();
 
-		massert( 10283 , "rchild not null in btree popBack()", nextChild.isNull());
+        massert( 10283 , "rchild not null in btree popBack()", nextChild.isNull());
 
-		/* weirdly, we also put the rightmost down pointer in nextchild, even when bucket isn't full. */
-		nextChild = kn.prevChildBucket;
+        // weirdly, we also put the rightmost down pointer in nextchild, even when bucket isn't full.
+        nextChild = kn.prevChildBucket;
 
         n--;
         emptySize += sizeof(_KeyNode);
         _unalloc(keysize);
     }
 
-    /* add a key.  must be > all existing.  be careful to set next ptr right. */
-    bool BucketBasics::_pushBack(const DiskLoc& recordLoc, BSONObj& key, const Ordering &order, DiskLoc prevChild) {
+    /** add a key.  must be > all existing.  be careful to set next ptr right. */
+    bool BucketBasics::_pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild) {
         int bytesNeeded = key.objsize() + sizeof(_KeyNode);
         if ( bytesNeeded > emptySize )
             return false;
@@ -269,38 +333,96 @@ namespace mongo {
         memcpy(p, key.objdata(), key.objsize());
         return true;
     }
-    /*void BucketBasics::pushBack(const DiskLoc& recordLoc, BSONObj& key, const BSONObj &order, DiskLoc prevChild, DiskLoc nextChild) { 
-        pushBack(recordLoc, key, order, prevChild);
-        childForPos(n) = nextChild;
-    }*/
 
-    /* insert a key in a bucket with no complexity -- no splits required */
-    bool BucketBasics::basicInsert(const DiskLoc& thisLoc, int &keypos, const DiskLoc& recordLoc, const BSONObj& key, const Ordering &order) {
-        modified(thisLoc);
+    /* durability note
+       we do separate intent declarations herein.  arguably one could just declare
+       the whole bucket given we do group commits. this is something we could investigate
+       later as to what is faster under what situations.
+       */
+    /** insert a key in a bucket with no complexity -- no splits required
+        @return false if a split is required.
+    */
+    bool BucketBasics::basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const BSONObj& key, const Ordering &order) const {
         assert( keypos >= 0 && keypos <= n );
         int bytesNeeded = key.objsize() + sizeof(_KeyNode);
         if ( bytesNeeded > emptySize ) {
-            pack( order, keypos );
+            _pack(thisLoc, order, keypos);
             if ( bytesNeeded > emptySize )
                 return false;
         }
-        for ( int j = n; j > keypos; j-- ) // make room
-            k(j) = k(j-1);
-        n++;
-        emptySize -= sizeof(_KeyNode);
-        _KeyNode& kn = k(keypos);
+
+        BucketBasics *b;
+        {
+            const char *p = (const char *) &k(keypos);
+            const char *q = (const char *) &k(n+1);
+            // declare that we will write to [k(keypos),k(n)]
+            // todo: this writes a medium amount to the journal.  we may want to add a verb "shift" to the redo log so
+            //       we can log a very small amount.
+            b = (BucketBasics*) getDur().writingAtOffset((void *) this, p-(char*)this, q-p);
+
+            // e.g. n==3, keypos==2
+            // 1 4 9
+            // ->
+            // 1 4 _ 9
+            for ( int j = n; j > keypos; j-- ) // make room
+                b->k(j) = b->k(j-1);
+        }
+
+        getDur().declareWriteIntent(&b->emptySize, 12); // [b->emptySize..b->n] is 12 bytes and we are going to write those
+        b->emptySize -= sizeof(_KeyNode);
+        b->n++;
+
+        _KeyNode& kn = b->k(keypos);
         kn.prevChildBucket.Null();
         kn.recordLoc = recordLoc;
-        kn.setKeyDataOfs((short) _alloc(key.objsize()) );
-        char *p = dataAt(kn.keyDataOfs());
+        kn.setKeyDataOfs((short) b->_alloc(key.objsize()) );
+        char *p = b->dataAt(kn.keyDataOfs());
+        getDur().declareWriteIntent(p, key.objsize());
         memcpy(p, key.objdata(), key.objsize());
         return true;
     }
 
-    /* when we delete things we just leave empty space until the node is
-       full and then we repack it.
-    */
-    void BucketBasics::pack( const Ordering &order, int &refPos ) {
+    /** with this implementation, refPos == 0 disregards effect of refPos */
+    bool BucketBasics::mayDropKey( int index, int refPos ) const {
+        return index > 0 && ( index != refPos ) && k( index ).isUnused() && k( index ).prevChildBucket.isNull();
+    }
+
+    int BucketBasics::packedDataSize( int refPos ) const {
+        if ( flags & Packed ) {
+            return BucketSize - emptySize - headerSize();
+        }
+        int size = 0;
+        for( int j = 0; j < n; ++j ) {
+            if ( mayDropKey( j, refPos ) ) {
+                continue;
+            }
+            size += keyNode( j ).key.objsize() + sizeof( _KeyNode );
+        }
+        return size;
+    }
+
+    /**
+     * when we delete things we just leave empty space until the node is
+     * full and then we repack it.
+     */
+    void BucketBasics::_pack(const DiskLoc thisLoc, const Ordering &order, int &refPos) const {
+        if ( flags & Packed )
+            return;
+
+        VERIFYTHISLOC
+
+        /** TODO perhaps this can be optimized.  for example if packing does no write, we can skip intent decl.
+                 an empirical approach is probably best than just adding new code : perhaps the bucket would need
+                 declaration anyway within the group commit interval, in which case we would just be adding
+                 code and complexity without benefit.
+        */
+        thisLoc.btreemod()->_packReadyForMod(order, refPos);
+    }
+
+    /** version when write intent already declared */
+    void BucketBasics::_packReadyForMod( const Ordering &order, int &refPos ) {
+        assertWritable();
+
         if ( flags & Packed )
             return;
 
@@ -310,7 +432,7 @@ namespace mongo {
         topSize = 0;
         int i = 0;
         for ( int j = 0; j < n; j++ ) {
-            if( j > 0 && ( j != refPos ) && k( j ).isUnused() && k( j ).prevChildBucket.isNull() ) {
+            if( mayDropKey( j, refPos ) ) {
                 continue; // key is unused and has no children - drop it
             }
             if( i != j ) {
@@ -333,26 +455,104 @@ namespace mongo {
         n = i;
         int dataUsed = tdz - ofs;
         memcpy(data + ofs, temp + ofs, dataUsed);
+
+        // assertWritable();
+        // TEMP TEST getDur().declareWriteIntent(this, sizeof(*this));
+
         emptySize = tdz - dataUsed - n * sizeof(_KeyNode);
         assert( emptySize >= 0 );
 
         setPacked();
+
         assertValid( order );
     }
 
     inline void BucketBasics::truncateTo(int N, const Ordering &order, int &refPos) {
+        dbMutex.assertWriteLocked();
+        assertWritable();
+
         n = N;
         setNotPacked();
-        pack( order, refPos );
+        _packReadyForMod( order, refPos );
+    }
+
+    /**
+     * In the standard btree algorithm, we would split based on the
+     * existing keys _and_ the new key.  But that's more work to
+     * implement, so we split the existing keys and then add the new key.
+     *
+     * There are several published heuristic algorithms for doing splits,
+     * but basically what you want are (1) even balancing between the two
+     * sides and (2) a small split key so the parent can have a larger
+     * branching factor.
+     *
+     * We just have a simple algorithm right now: if a key includes the
+     * halfway point (or 10% way point) in terms of bytes, split on that key;
+     * otherwise split on the key immediately to the left of the halfway
+     * point.
+     *
+     * This function is expected to be called on a packed bucket.
+     */
+    int BucketBasics::splitPos( int keypos ) const {
+        assert( n > 2 );
+        int split = 0;
+        int rightSize = 0;
+        // when splitting a btree node, if the new key is greater than all the other keys, we should not do an even split, but a 90/10 split.
+        // see SERVER-983
+        int rightSizeLimit = ( topSize + sizeof( _KeyNode ) * n ) / ( keypos == n ? 10 : 2 );
+        for( int i = n - 1; i > -1; --i ) {
+            rightSize += keyNode( i ).key.objsize() + sizeof( _KeyNode );
+            if ( rightSize > rightSizeLimit ) {
+                split = i;
+                break;
+            }
+        }
+        // safeguards - we must not create an empty bucket
+        if ( split < 1 ) {
+            split = 1;
+        }
+        else if ( split > n - 2 ) {
+            split = n - 2;
+        }
+
+        return split;
+    }
+
+    void BucketBasics::reserveKeysFront( int nAdd ) {
+        assert( emptySize >= int( sizeof( _KeyNode ) * nAdd ) );
+        emptySize -= sizeof( _KeyNode ) * nAdd;
+        for( int i = n - 1; i > -1; --i ) {
+            k( i + nAdd ) = k( i );
+        }
+        n += nAdd;
+    }
+
+    void BucketBasics::setKey( int i, const DiskLoc recordLoc, const BSONObj &key, const DiskLoc prevChildBucket ) {
+        _KeyNode &kn = k( i );
+        kn.recordLoc = recordLoc;
+        kn.prevChildBucket = prevChildBucket;
+        short ofs = (short) _alloc( key.objsize() );
+        kn.setKeyDataOfs( ofs );
+        char *p = dataAt( ofs );
+        memcpy( p, key.objdata(), key.objsize() );
+    }
+
+    void BucketBasics::dropFront( int nDrop, const Ordering &order, int &refpos ) {
+        for( int i = nDrop; i < n; ++i ) {
+            k( i - nDrop ) = k( i );
+        }
+        n -= nDrop;
+        setNotPacked();
+        _packReadyForMod( order, refpos );
     }
 
     /* - BtreeBucket --------------------------------------------------- */
 
-    /* return largest key in the subtree. */
+    /** @return largest key in the subtree. */
     void BtreeBucket::findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey) {
         DiskLoc loc = thisLoc;
         while ( 1 ) {
-            BtreeBucket *b = loc.btree();
+            const BtreeBucket *b = loc.btree();
             if ( !b->nextChild.isNull() ) {
                 loc = b->nextChild;
                 continue;
@@ -365,23 +565,34 @@ namespace mongo {
             break;
         }
     }
-    
-    int BtreeBucket::customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, const vector< const BSONElement * > &rEnd, const Ordering &o ) {
+
+    /**
+     * NOTE Currently the Ordering implementation assumes a compound index will
+     * not have more keys than an unsigned variable has bits.  The same
+     * assumption is used in the implementation below with respect to the 'mask'
+     * variable.
+     */
+    int BtreeBucket::customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction ) {
         BSONObjIterator ll( l );
         BSONObjIterator rr( rBegin );
         vector< const BSONElement * >::const_iterator rr2 = rEnd.begin();
+        vector< bool >::const_iterator inc = rEndInclusive.begin();
         unsigned mask = 1;
         for( int i = 0; i < rBeginLen; ++i, mask <<= 1 ) {
             BSONElement lll = ll.next();
             BSONElement rrr = rr.next();
             ++rr2;
-            
+            ++inc;
+
             int x = lll.woCompare( rrr, false );
             if ( o.descending( mask ) )
                 x = -x;
             if ( x != 0 )
                 return x;
         }
+        if ( rSup ) {
+            return -direction;
+        }
         for( ; ll.more(); mask <<= 1 ) {
             BSONElement lll = ll.next();
             BSONElement rrr = **rr2;
@@ -391,11 +602,15 @@ namespace mongo {
                 x = -x;
             if ( x != 0 )
                 return x;
+            if ( !*inc ) {
+                return -direction;
+            }
+            ++inc;
         }
         return 0;
     }
 
-    bool BtreeBucket::exists(const IndexDetails& idx, DiskLoc thisLoc, const BSONObj& key, const Ordering& order) { 
+    bool BtreeBucket::exists(const IndexDetails& idx, const DiskLoc &thisLoc, const BSONObj& key, const Ordering& order) const {
         int pos;
         bool found;
         DiskLoc b = locate(idx, thisLoc, key, order, pos, found, minDiskLoc);
@@ -404,8 +619,8 @@ namespace mongo {
         while ( 1 ) {
             if( b.isNull() )
                 break;
-            BtreeBucket *bucket = b.btree();
-            _KeyNode& kn = bucket->k(pos);
+            const BtreeBucket *bucket = b.btree();
+            const _KeyNode& kn = bucket->k(pos);
             if ( kn.isUsed() )
                 return bucket->keyAt(pos).woEqual(key);
             b = bucket->advance(b, pos, 1, "BtreeBucket::exists");
@@ -413,22 +628,22 @@ namespace mongo {
         return false;
     }
 
-    /* @param self - don't complain about ourself already being in the index case.
-       @return true = there is a duplicate.
-    */
+    /**
+     * @param self - don't complain about ourself already being in the index case.
+     * @return true = there is a duplicate.
+     */
     bool BtreeBucket::wouldCreateDup(
-        const IndexDetails& idx, DiskLoc thisLoc, 
+        const IndexDetails& idx, const DiskLoc &thisLoc,
         const BSONObj& key, const Ordering& order,
-        DiskLoc self) 
-    { 
+        const DiskLoc &self) const {
         int pos;
         bool found;
         DiskLoc b = locate(idx, thisLoc, key, order, pos, found, minDiskLoc);
 
         while ( !b.isNull() ) {
             // we skip unused keys
-            BtreeBucket *bucket = b.btree();
-            _KeyNode& kn = bucket->k(pos);
+            const BtreeBucket *bucket = b.btree();
+            const _KeyNode& kn = bucket->k(pos);
             if ( kn.isUsed() ) {
                 if( bucket->keyAt(pos).woEqual(key) )
                     return kn.recordLoc != self;
@@ -440,7 +655,7 @@ namespace mongo {
         return false;
     }
 
-    string BtreeBucket::dupKeyError( const IndexDetails& idx , const BSONObj& key ){
+    string BtreeBucket::dupKeyError( const IndexDetails& idx , const BSONObj& key ) {
         stringstream ss;
         ss << "E11000 duplicate key error ";
         ss << "index: " << idx.indexNamespace() << "  ";
@@ -448,37 +663,38 @@ namespace mongo {
         return ss.str();
     }
 
-    /* Find a key withing this btree bucket.
- 
-       When duplicate keys are allowed, we use the DiskLoc of the record as if it were part of the 
-       key.  That assures that even when there are many duplicates (e.g., 1 million) for a key,
-       our performance is still good.
-
-       assertIfDup: if the key exists (ignoring the recordLoc), uassert
-
-       pos: for existing keys k0...kn-1.
-       returns # it goes BEFORE.  so key[pos-1] < key < key[pos]
-       returns n if it goes after the last existing key.
-       note result might be an Unused location!
-    */
-	char foo;
-    bool BtreeBucket::find(const IndexDetails& idx, const BSONObj& key, DiskLoc recordLoc, const Ordering &order, int& pos, bool assertIfDup) {
+    /**
+     * Find a key withing this btree bucket.
+     *
+     * When duplicate keys are allowed, we use the DiskLoc of the record as if it were part of the
+     * key.  That assures that even when there are many duplicates (e.g., 1 million) for a key,
+     * our performance is still good.
+     *
+     * assertIfDup: if the key exists (ignoring the recordLoc), uassert
+     *
+     * pos: for existing keys k0...kn-1.
+     * returns # it goes BEFORE.  so key[pos-1] < key < key[pos]
+     * returns n if it goes after the last existing key.
+     * note result might be an Unused location!
+     */
+    char foo;
+    bool BtreeBucket::find(const IndexDetails& idx, const BSONObj& key, const DiskLoc &recordLoc, const Ordering &order, int& pos, bool assertIfDup) const {
 #if defined(_EXPERIMENT1)
-		{
-			char *z = (char *) this;
-			int i = 0;
-			while( 1 ) {
-				i += 4096;
-				if( i >= BucketSize )
-					break;
-				foo += z[i];
-			}
-		}
+        {
+            char *z = (char *) this;
+            int i = 0;
+            while( 1 ) {
+                i += 4096;
+                if( i >= BucketSize )
+                    break;
+                foo += z[i];
+            }
+        }
 #endif
-        
+
         globalIndexCounters.btree( (char*)this );
-        
-        /* binary search for this key */
+
+        // binary search for this key
         bool dupsChecked = false;
         int l=0;
         int h=n-1;
@@ -486,13 +702,13 @@ namespace mongo {
             int m = (l+h)/2;
             KeyNode M = keyNode(m);
             int x = key.woCompare(M.key, order);
-            if ( x == 0 ) { 
+            if ( x == 0 ) {
                 if( assertIfDup ) {
-                    if( k(m).isUnused() ) { 
-                        // ok that key is there if unused.  but we need to check that there aren't other 
-                        // entries for the key then.  as it is very rare that we get here, we don't put any 
+                    if( k(m).isUnused() ) {
+                        // ok that key is there if unused.  but we need to check that there aren't other
+                        // entries for the key then.  as it is very rare that we get here, we don't put any
                         // coding effort in here to make this particularly fast
-                        if( !dupsChecked ) { 
+                        if( !dupsChecked ) {
                             dupsChecked = true;
                             if( idx.head.btree()->exists(idx, idx.head, key, order) ) {
                                 if( idx.head.btree()->wouldCreateDup(idx, idx.head, key, order, recordLoc) )
@@ -503,7 +719,7 @@ namespace mongo {
                         }
                     }
                     else {
-                        if( M.recordLoc == recordLoc ) 
+                        if( M.recordLoc == recordLoc )
                             alreadyInIndex();
                         uasserted( ASSERT_ID_DUPKEY , dupKeyError( idx , key ) );
                     }
@@ -537,86 +753,378 @@ namespace mongo {
         return false;
     }
 
-    void BtreeBucket::delBucket(const DiskLoc& thisLoc, IndexDetails& id) {
+    void BtreeBucket::delBucket(const DiskLoc thisLoc, const IndexDetails& id) {
         ClientCursor::informAboutToDeleteBucket(thisLoc); // slow...
         assert( !isHead() );
 
-        BtreeBucket *p = parent.btreemod();
-        if ( p->nextChild == thisLoc ) {
-            p->nextChild.Null();
-        }
-        else {
-            for ( int i = 0; i < p->n; i++ ) {
-                if ( p->k(i).prevChildBucket == thisLoc ) {
-                    p->k(i).prevChildBucket.Null();
-                    goto found;
-                }
-            }
-            out() << "ERROR: can't find ref to deleted bucket.\n";
-            out() << "To delete:\n";
-            dump();
-            out() << "Parent:\n";
-            p->dump();
-            assert(false);
-        }
-found:
+        const BtreeBucket *p = parent.btree();
+        int parentIdx = indexInParent( thisLoc );
+        p->childForPos( parentIdx ).writing().Null();
         deallocBucket( thisLoc, id );
     }
-    
-    void BtreeBucket::deallocBucket(const DiskLoc &thisLoc, IndexDetails &id) {
+
+    void BtreeBucket::deallocBucket(const DiskLoc thisLoc, const IndexDetails &id) {
 #if 0
-        /* as a temporary defensive measure, we zap the whole bucket, AND don't truly delete
-           it (meaning it is ineligible for reuse).
-           */
+        // as a temporary defensive measure, we zap the whole bucket, AND don't truly delete
+        // it (meaning it is ineligible for reuse).
         memset(this, 0, Size());
-        modified(thisLoc);
 #else
-        //defensive:
+        // defensive:
         n = -1;
         parent.Null();
         string ns = id.indexNamespace();
-        btreeStore->deleteRecord(ns.c_str(), thisLoc);
+        theDataFileMgr._deleteRecord(nsdetails(ns.c_str()), ns.c_str(), thisLoc.rec(), thisLoc);
 #endif
     }
 
-    /* note: may delete the entire bucket!  this invalid upon return sometimes. */
-    void BtreeBucket::delKeyAtPos(const DiskLoc& thisLoc, IndexDetails& id, int p) {
-        modified(thisLoc);
+    /** note: may delete the entire bucket!  this invalid upon return sometimes. */
+    void BtreeBucket::delKeyAtPos( const DiskLoc thisLoc, IndexDetails& id, int p, const Ordering &order) {
         assert(n>0);
         DiskLoc left = childForPos(p);
 
         if ( n == 1 ) {
             if ( left.isNull() && nextChild.isNull() ) {
-                if ( isHead() )
-                    _delKeyAtPos(p); // we don't delete the top bucket ever
-                else
-                    delBucket(thisLoc, id);
+                _delKeyAtPos(p);
+                if ( isHead() ) {
+                    // we don't delete the top bucket ever
+                }
+                else {
+                    if ( !mayBalanceWithNeighbors( thisLoc, id, order ) ) {
+                        // An empty bucket is only allowed as a transient state.  If
+                        // there are no neighbors to balance with, we delete ourself.
+                        // This condition is only expected in legacy btrees.
+                        delBucket(thisLoc, id);
+                    }
+                }
                 return;
             }
-            markUnused(p);
+            deleteInternalKey( thisLoc, p, id, order );
             return;
         }
 
-        if ( left.isNull() )
+        if ( left.isNull() ) {
             _delKeyAtPos(p);
-        else
-            markUnused(p);
+            mayBalanceWithNeighbors( thisLoc, id, order );
+        }
+        else {
+            deleteInternalKey( thisLoc, p, id, order );
+        }
     }
 
-    int qqq = 0;
+    /**
+     * This function replaces the specified key (k) by either the prev or next
+     * key in the btree (k').  We require that k have either a left or right
+     * child.  If k has a left child, we set k' to the prev key of k, which must
+     * be a leaf present in the left child.  If k does not have a left child, we
+     * set k' to the next key of k, which must be a leaf present in the right
+     * child.  When we replace k with k', we copy k' over k (which may cause a
+     * split) and then remove k' from its original location.  Because k' is
+     * stored in a descendent of k, replacing k by k' will not modify the
+     * storage location of the original k', and we can easily remove k' from
+     * its original location.
+     *
+     * This function is only needed in cases where k has a left or right child;
+     * in other cases a simpler key removal implementation is possible.
+     *
+     * NOTE on legacy btree structures:
+     * In legacy btrees, k' can be a nonleaf.  In such a case we 'delete' k by
+     * marking it as an unused node rather than replacing it with k'.  Also, k'
+     * may be a leaf but marked as an unused node.  In such a case we replace
+     * k by k', preserving the key's unused marking.  This function is only
+     * expected to mark a key as unused when handling a legacy btree.
+     */
+    void BtreeBucket::deleteInternalKey( const DiskLoc thisLoc, int keypos, IndexDetails &id, const Ordering &order ) {
+        DiskLoc lchild = childForPos( keypos );
+        DiskLoc rchild = childForPos( keypos + 1 );
+        assert( !lchild.isNull() || !rchild.isNull() );
+        int advanceDirection = lchild.isNull() ? 1 : -1;
+        int advanceKeyOfs = keypos;
+        DiskLoc advanceLoc = advance( thisLoc, advanceKeyOfs, advanceDirection, __FUNCTION__ );
+
+        if ( !advanceLoc.btree()->childForPos( advanceKeyOfs ).isNull() ||
+                !advanceLoc.btree()->childForPos( advanceKeyOfs + 1 ).isNull() ) {
+            // only expected with legacy btrees, see note above
+            markUnused( keypos );
+            return;
+        }
 
-    /* remove a key from the index */
-    bool BtreeBucket::unindex(const DiskLoc& thisLoc, IndexDetails& id, BSONObj& key, const DiskLoc& recordLoc ) {
-        if ( key.objsize() > KeyMax ) {
-            OCCASIONALLY problem() << "unindex: key too large to index, skipping " << id.indexNamespace() << /* ' ' << key.toString() << */ endl;
+        KeyNode kn = advanceLoc.btree()->keyNode( advanceKeyOfs );
+        setInternalKey( thisLoc, keypos, kn.recordLoc, kn.key, order, childForPos( keypos ), childForPos( keypos + 1 ), id );
+        advanceLoc.btreemod()->delKeyAtPos( advanceLoc, id, advanceKeyOfs, order );
+    }
+
+    void BtreeBucket::replaceWithNextChild( const DiskLoc thisLoc, IndexDetails &id ) {
+        assert( n == 0 && !nextChild.isNull() );
+        if ( parent.isNull() ) {
+            assert( id.head == thisLoc );
+            id.head.writing() = nextChild;
+        }
+        else {
+            parent.btree()->childForPos( indexInParent( thisLoc ) ).writing() = nextChild;
+        }
+        nextChild.btree()->parent.writing() = parent;
+        ClientCursor::informAboutToDeleteBucket( thisLoc );
+        deallocBucket( thisLoc, id );
+    }
+
+    bool BtreeBucket::canMergeChildren( const DiskLoc &thisLoc, int leftIndex ) const {
+        assert( leftIndex >= 0 && leftIndex < n );
+        DiskLoc leftNodeLoc = childForPos( leftIndex );
+        DiskLoc rightNodeLoc = childForPos( leftIndex + 1 );
+        if ( leftNodeLoc.isNull() || rightNodeLoc.isNull() ) {
+            // TODO if this situation is possible in long term implementation, maybe we should compact somehow anyway
             return false;
         }
+        int pos = 0;
+        {
+            const BtreeBucket *l = leftNodeLoc.btree();
+            const BtreeBucket *r = rightNodeLoc.btree();
+            if ( ( headerSize() + l->packedDataSize( pos ) + r->packedDataSize( pos ) + keyNode( leftIndex ).key.objsize() + sizeof(_KeyNode) > unsigned( BucketSize ) ) ) {
+                return false;
+            }
+        }
+        return true;
+    }
 
+    /**
+     * This implementation must respect the meaning and value of lowWaterMark.
+     * Also see comments in splitPos().
+     */
+    int BtreeBucket::rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const {
+        int split = -1;
+        int rightSize = 0;
+        const BtreeBucket *l = childForPos( leftIndex ).btree();
+        const BtreeBucket *r = childForPos( leftIndex + 1 ).btree();
+
+        int KNS = sizeof( _KeyNode );
+        int rightSizeLimit = ( l->topSize + l->n * KNS + keyNode( leftIndex ).key.objsize() + KNS + r->topSize + r->n * KNS ) / 2;
+        // This constraint should be ensured by only calling this function
+        // if we go below the low water mark.
+        assert( rightSizeLimit < BtreeBucket::bodySize() );
+        for( int i = r->n - 1; i > -1; --i ) {
+            rightSize += r->keyNode( i ).key.objsize() + KNS;
+            if ( rightSize > rightSizeLimit ) {
+                split = l->n + 1 + i;
+                break;
+            }
+        }
+        if ( split == -1 ) {
+            rightSize += keyNode( leftIndex ).key.objsize() + KNS;
+            if ( rightSize > rightSizeLimit ) {
+                split = l->n;
+            }
+        }
+        if ( split == -1 ) {
+            for( int i = l->n - 1; i > -1; --i ) {
+                rightSize += l->keyNode( i ).key.objsize() + KNS;
+                if ( rightSize > rightSizeLimit ) {
+                    split = i;
+                    break;
+                }
+            }
+        }
+        // safeguards - we must not create an empty bucket
+        if ( split < 1 ) {
+            split = 1;
+        }
+        else if ( split > l->n + 1 + r->n - 2 ) {
+            split = l->n + 1 + r->n - 2;
+        }
+
+        return split;
+    }
+
+    void BtreeBucket::doMergeChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) {
+        DiskLoc leftNodeLoc = childForPos( leftIndex );
+        DiskLoc rightNodeLoc = childForPos( leftIndex + 1 );
+        BtreeBucket *l = leftNodeLoc.btreemod();
+        BtreeBucket *r = rightNodeLoc.btreemod();
+        int pos = 0;
+        l->_packReadyForMod( order, pos );
+        r->_packReadyForMod( order, pos ); // pack r in case there are droppable keys
+
+        int oldLNum = l->n;
+        {
+            KeyNode kn = keyNode( leftIndex );
+            l->pushBack( kn.recordLoc, kn.key, order, l->nextChild ); // left child's right child becomes old parent key's left child
+        }
+        for( int i = 0; i < r->n; ++i ) {
+            KeyNode kn = r->keyNode( i );
+            l->pushBack( kn.recordLoc, kn.key, order, kn.prevChildBucket );
+        }
+        l->nextChild = r->nextChild;
+        l->fixParentPtrs( leftNodeLoc, oldLNum );
+        r->delBucket( rightNodeLoc, id );
+        childForPos( leftIndex + 1 ) = leftNodeLoc;
+        childForPos( leftIndex ) = DiskLoc();
+        _delKeyAtPos( leftIndex, true );
+        if ( n == 0 ) {
+            // will trash this and thisLoc
+            // TODO To ensure all leaves are of equal height, we should ensure
+            // this is only called on the root.
+            replaceWithNextChild( thisLoc, id );
+        }
+        else {
+            // balance recursively - maybe we should do this even when n == 0?
+            mayBalanceWithNeighbors( thisLoc, id, order );
+        }
+    }
+
+    int BtreeBucket::indexInParent( const DiskLoc &thisLoc ) const {
+        assert( !parent.isNull() );
+        const BtreeBucket *p = parent.btree();
+        if ( p->nextChild == thisLoc ) {
+            return p->n;
+        }
+        else {
+            for( int i = 0; i < p->n; ++i ) {
+                if ( p->k( i ).prevChildBucket == thisLoc ) {
+                    return i;
+                }
+            }
+        }
+        out() << "ERROR: can't find ref to child bucket.\n";
+        out() << "child: " << thisLoc << "\n";
+        dump();
+        out() << "Parent: " << parent << "\n";
+        p->dump();
+        assert(false);
+        return -1; // just to compile
+    }
+
+    bool BtreeBucket::tryBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) const {
+        // If we can merge, then we must merge rather than balance to preserve
+        // bucket utilization constraints.
+        if ( canMergeChildren( thisLoc, leftIndex ) ) {
+            return false;
+        }
+        thisLoc.btreemod()->doBalanceChildren( thisLoc, leftIndex, id, order );
+        return true;
+    }
+
+    void BtreeBucket::doBalanceLeftToRight( const DiskLoc thisLoc, int leftIndex, int split,
+                                            BtreeBucket *l, const DiskLoc lchild,
+                                            BtreeBucket *r, const DiskLoc rchild,
+                                            IndexDetails &id, const Ordering &order ) {
+        // TODO maybe do some audits the same way pushBack() does?
+        int rAdd = l->n - split;
+        r->reserveKeysFront( rAdd );
+        for( int i = split + 1, j = 0; i < l->n; ++i, ++j ) {
+            KeyNode kn = l->keyNode( i );
+            r->setKey( j, kn.recordLoc, kn.key, kn.prevChildBucket );
+        }
+        {
+            KeyNode kn = keyNode( leftIndex );
+            r->setKey( rAdd - 1, kn.recordLoc, kn.key, l->nextChild ); // left child's right child becomes old parent key's left child
+        }
+        r->fixParentPtrs( rchild, 0, rAdd - 1 );
+        {
+            KeyNode kn = l->keyNode( split );
+            l->nextChild = kn.prevChildBucket;
+            setInternalKey( thisLoc, leftIndex, kn.recordLoc, kn.key, order, lchild, rchild, id );
+        }
+        int zeropos = 0;
+        l->truncateTo( split, order, zeropos );
+    }
+
+    void BtreeBucket::doBalanceRightToLeft( const DiskLoc thisLoc, int leftIndex, int split,
+                                            BtreeBucket *l, const DiskLoc lchild,
+                                            BtreeBucket *r, const DiskLoc rchild,
+                                            IndexDetails &id, const Ordering &order ) {
+        int lN = l->n;
+        {
+            KeyNode kn = keyNode( leftIndex );
+            l->pushBack( kn.recordLoc, kn.key, order, l->nextChild ); // left child's right child becomes old parent key's left child
+        }
+        for( int i = 0; i < split - lN - 1; ++i ) {
+            KeyNode kn = r->keyNode( i );
+            l->pushBack( kn.recordLoc, kn.key, order, kn.prevChildBucket );
+        }
+        {
+            KeyNode kn = r->keyNode( split - lN - 1 );
+            l->nextChild = kn.prevChildBucket;
+            l->fixParentPtrs( lchild, lN + 1, l->n );
+            setInternalKey( thisLoc, leftIndex, kn.recordLoc, kn.key, order, lchild, rchild, id );
+        }
+        int zeropos = 0;
+        r->dropFront( split - lN, order, zeropos );
+    }
+
+    void BtreeBucket::doBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) {
+        DiskLoc lchild = childForPos( leftIndex );
+        DiskLoc rchild = childForPos( leftIndex + 1 );
+        int zeropos = 0;
+        BtreeBucket *l = lchild.btreemod();
+        l->_packReadyForMod( order, zeropos );
+        BtreeBucket *r = rchild.btreemod();
+        r->_packReadyForMod( order, zeropos );
+        int split = rebalancedSeparatorPos( thisLoc, leftIndex );
+
+        // By definition, if we are below the low water mark and cannot merge
+        // then we must actively balance.
+        assert( split != l->n );
+        if ( split < l->n ) {
+            doBalanceLeftToRight( thisLoc, leftIndex, split, l, lchild, r, rchild, id, order );
+        }
+        else {
+            doBalanceRightToLeft( thisLoc, leftIndex, split, l, lchild, r, rchild, id, order );
+        }
+    }
+
+    bool BtreeBucket::mayBalanceWithNeighbors( const DiskLoc thisLoc, IndexDetails &id, const Ordering &order ) const {
+        if ( parent.isNull() ) { // we are root, there are no neighbors
+            return false;
+        }
+
+        if ( packedDataSize( 0 ) >= lowWaterMark ) {
+            return false;
+        }
+
+        const BtreeBucket *p = parent.btree();
+        int parentIdx = indexInParent( thisLoc );
+
+        // TODO will missing neighbor case be possible long term?  Should we try to merge/balance somehow in that case if so?
+        bool mayBalanceRight = ( ( parentIdx < p->n ) && !p->childForPos( parentIdx + 1 ).isNull() );
+        bool mayBalanceLeft = ( ( parentIdx > 0 ) && !p->childForPos( parentIdx - 1 ).isNull() );
+
+        // Balance if possible on one side - we merge only if absolutely necessary
+        // to preserve btree bucket utilization constraints since that's a more
+        // heavy duty operation (especially if we must re-split later).
+        if ( mayBalanceRight &&
+                p->tryBalanceChildren( parent, parentIdx, id, order ) ) {
+            return true;
+        }
+        if ( mayBalanceLeft &&
+                p->tryBalanceChildren( parent, parentIdx - 1, id, order ) ) {
+            return true;
+        }
+
+        BtreeBucket *pm = parent.btreemod();
+        if ( mayBalanceRight ) {
+            pm->doMergeChildren( parent, parentIdx, id, order );
+            return true;
+        }
+        else if ( mayBalanceLeft ) {
+            pm->doMergeChildren( parent, parentIdx - 1, id, order );
+            return true;
+        }
+
+        return false;
+    }
+
+    /** remove a key from the index */
+    bool BtreeBucket::unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc ) const {
         int pos;
         bool found;
         DiskLoc loc = locate(id, thisLoc, key, Ordering::make(id.keyPattern()), pos, found, recordLoc, 1);
         if ( found ) {
-            loc.btree()->delKeyAtPos(loc, id, pos);
+
+            if ( key.objsize() > KeyMax ) {
+                OCCASIONALLY problem() << "unindex: key too large to index but was found for " << id.indexNamespace() << " reIndex suggested" << endl;
+            }
+            
+            loc.btreemod()->delKeyAtPos(loc, id, pos, Ordering::make(id.keyPattern()));
+            
             return true;
         }
         return false;
@@ -628,40 +1136,68 @@ found:
         return b;
     }
 
-    inline void fix(const DiskLoc& thisLoc, const DiskLoc& child) {
+    inline void BtreeBucket::fix(const DiskLoc thisLoc, const DiskLoc child) {
         if ( !child.isNull() ) {
             if ( insert_debug )
                 out() << "      " << child.toString() << ".parent=" << thisLoc.toString() << endl;
-            child.btreemod()->parent = thisLoc;
+            child.btree()->parent.writing() = thisLoc;
         }
     }
 
-    /* this sucks.  maybe get rid of parent ptrs. */
-    void BtreeBucket::fixParentPtrs(const DiskLoc& thisLoc) {
+    /** this sucks.  maybe get rid of parent ptrs. */
+    void BtreeBucket::fixParentPtrs(const DiskLoc thisLoc, int firstIndex, int lastIndex) const {
         VERIFYTHISLOC
-        fix(thisLoc, nextChild);
-        for ( int i = 0; i < n; i++ )
-            fix(thisLoc, k(i).prevChildBucket);
+        if ( lastIndex == -1 ) {
+            lastIndex = n;
+        }
+        for ( int i = firstIndex; i <= lastIndex; i++ ) {
+            fix(thisLoc, childForPos(i));
+        }
     }
 
-    /* insert a key in this bucket, splitting if necessary.
-       keypos - where to insert the key i3n range 0..n.  0=make leftmost, n=make rightmost.
-       NOTE this function may free some data, and as a result the value passed for keypos may
-       be invalid after calling insertHere()
-    */
-    void BtreeBucket::insertHere(DiskLoc thisLoc, int keypos,
-                                 DiskLoc recordLoc, const BSONObj& key, const Ordering& order,
-                                 DiskLoc lchild, DiskLoc rchild, IndexDetails& idx)
-    {
-        modified(thisLoc);
+    void BtreeBucket::setInternalKey( const DiskLoc thisLoc, int keypos,
+                                      const DiskLoc recordLoc, const BSONObj &key, const Ordering &order,
+                                      const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx ) {
+        childForPos( keypos ).Null();
+
+        // This may leave the bucket empty (n == 0) which is ok only as a
+        // transient state.  In the instant case, the implementation of
+        // insertHere behaves correctly when n == 0 and as a side effect
+        // increments n.
+        _delKeyAtPos( keypos, true );
+
+        // Ensure we do not orphan neighbor's old child.
+        assert( childForPos( keypos ) == rchild );
+
+        // Just set temporarily - required to pass validation in insertHere()
+        childForPos( keypos ) = lchild;
+
+        insertHere( thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx );
+    }
+
+    /**
+     * insert a key in this bucket, splitting if necessary.
+     * @keypos - where to insert the key in range 0..n.  0=make leftmost, n=make rightmost.
+     * NOTE this function may free some data, and as a result the value passed for keypos may
+     * be invalid after calling insertHere()
+     */
+    void BtreeBucket::insertHere( const DiskLoc thisLoc, int keypos,
+                                  const DiskLoc recordLoc, const BSONObj& key, const Ordering& order,
+                                  const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx) const {
         if ( insert_debug )
             out() << "   " << thisLoc.toString() << ".insertHere " << key.toString() << '/' << recordLoc.toString() << ' '
-                 << lchild.toString() << ' ' << rchild.toString() << " keypos:" << keypos << endl;
+                  << lchild.toString() << ' ' << rchild.toString() << " keypos:" << keypos << endl;
 
         DiskLoc oldLoc = thisLoc;
 
-        if ( basicInsert(thisLoc, keypos, recordLoc, key, order) ) {
-            _KeyNode& kn = k(keypos);
+        if ( !basicInsert(thisLoc, keypos, recordLoc, key, order) ) {
+            thisLoc.btreemod()->split(thisLoc, keypos, recordLoc, key, order, lchild, rchild, idx);
+            return;
+        }
+
+        {
+            const _KeyNode *_kn = &k(keypos);
+            _KeyNode *kn = (_KeyNode *) getDur().alreadyDeclared((_KeyNode*) _kn); // already declared intent in basicInsert()
             if ( keypos+1 == n ) { // last key
                 if ( nextChild != lchild ) {
                     out() << "ERROR nextChild != lchild" << endl;
@@ -671,22 +1207,16 @@ found:
                     out() << "  recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl;
                     out() << "  key: " << key.toString() << endl;
                     dump();
-#if 0
-                    out() << "\n\nDUMPING FULL INDEX" << endl;
-                    bt_dmp=1;
-                    bt_fv=1;
-                    idx.head.btree()->fullValidate(idx.head);
-#endif
                     assert(false);
                 }
-                kn.prevChildBucket = nextChild;
-                assert( kn.prevChildBucket == lchild );
-                nextChild = rchild;
+                kn->prevChildBucket = nextChild;
+                assert( kn->prevChildBucket == lchild );
+                nextChild.writing() = rchild;
                 if ( !rchild.isNull() )
-                    rchild.btreemod()->parent = thisLoc;
+                    rchild.btree()->parent.writing() = thisLoc;
             }
             else {
-                k(keypos).prevChildBucket = lchild;
+                kn->prevChildBucket = lchild;
                 if ( k(keypos+1).prevChildBucket != lchild ) {
                     out() << "ERROR k(keypos+1).prevChildBucket != lchild" << endl;
                     out() << "  thisLoc: " << thisLoc.toString() << ' ' << idx.indexNamespace() << endl;
@@ -695,33 +1225,24 @@ found:
                     out() << "  recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl;
                     out() << "  key: " << key.toString() << endl;
                     dump();
-#if 0
-                    out() << "\n\nDUMPING FULL INDEX" << endl;
-                    bt_dmp=1;
-                    bt_fv=1;
-                    idx.head.btree()->fullValidate(idx.head);
-#endif
                     assert(false);
                 }
-                k(keypos+1).prevChildBucket = rchild;
+                const DiskLoc *pc = &k(keypos+1).prevChildBucket;
+                *getDur().alreadyDeclared((DiskLoc*) pc) = rchild; // declared in basicInsert()
                 if ( !rchild.isNull() )
-                    rchild.btreemod()->parent = thisLoc;
+                    rchild.btree()->parent.writing() = thisLoc;
             }
             return;
         }
+    }
 
-        /* ---------- split ---------------- */
+    void BtreeBucket::split(const DiskLoc thisLoc, int keypos, const DiskLoc recordLoc, const BSONObj& key, const Ordering& order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx) {
+        assertWritable();
 
         if ( split_debug )
             out() << "    " << thisLoc.toString() << ".split" << endl;
 
-        int split = n / 2;
-        if ( keypos == n ) { // see SERVER-983
-            split = (int) (0.9 * n);
-            if ( split > n - 2 )
-                split = n - 2;
-        }
-
+        int split = splitPos( keypos );
         DiskLoc rLoc = addBucket(idx);
         BtreeBucket *r = rLoc.btreemod();
         if ( split_debug )
@@ -753,15 +1274,14 @@ found:
                 p->pushBack(splitkey.recordLoc, splitkey.key, order, thisLoc);
                 p->nextChild = rLoc;
                 p->assertValid( order );
-                parent = idx.head = L;
+                parent = idx.head.writing() = L;
                 if ( split_debug )
                     out() << "    we were root, making new root:" << hex << parent.getOfs() << dec << endl;
-                rLoc.btreemod()->parent = parent;
+                rLoc.btree()->parent.writing() = parent;
             }
             else {
-                /* set this before calling _insert - if it splits it will do fixParent() logic and change the value.
-                */
-                rLoc.btreemod()->parent = parent;
+                // set this before calling _insert - if it splits it will do fixParent() logic and change the value.
+                rLoc.btree()->parent.writing() = parent;
                 if ( split_debug )
                     out() << "    promoting splitkey key " << splitkey.key.toString() << endl;
                 parent.btree()->_insert(parent, splitkey.recordLoc, splitkey.key, order, /*dupsallowed*/true, thisLoc, rLoc, idx);
@@ -769,16 +1289,17 @@ found:
         }
 
         int newpos = keypos;
+        // note this may trash splitkey.key.  thus we had to promote it before finishing up here.
         truncateTo(split, order, newpos);  // note this may trash splitkey.key.  thus we had to promote it before finishing up here.
 
         // add our new key, there is room now
         {
-
             if ( keypos <= split ) {
                 if ( split_debug )
                     out() << "  keypos<split, insertHere() the new key" << endl;
                 insertHere(thisLoc, newpos, recordLoc, key, order, lchild, rchild, idx);
-            } else {
+            }
+            else {
                 int kp = keypos-split-1;
                 assert(kp>=0);
                 rLoc.btree()->insertHere(rLoc, kp, recordLoc, key, order, lchild, rchild, idx);
@@ -789,26 +1310,27 @@ found:
             out() << "     split end " << hex << thisLoc.getOfs() << dec << endl;
     }
 
-    /* start a new index off, empty */
-    DiskLoc BtreeBucket::addBucket(IndexDetails& id) {
-        DiskLoc loc = btreeStore->insert(id.indexNamespace().c_str(), 0, BucketSize, true);
+    /** start a new index off, empty */
+    DiskLoc BtreeBucket::addBucket(const IndexDetails& id) {
+        string ns = id.indexNamespace();
+        DiskLoc loc = theDataFileMgr.insert(ns.c_str(), 0, BucketSize, true);
         BtreeBucket *b = loc.btreemod();
         b->init();
         return loc;
     }
 
     void BtreeBucket::renameIndexNamespace(const char *oldNs, const char *newNs) {
-        btreeStore->rename( oldNs, newNs );
+        renameNamespace( oldNs, newNs );
     }
 
-    DiskLoc BtreeBucket::getHead(const DiskLoc& thisLoc) {
+    const DiskLoc BtreeBucket::getHead(const DiskLoc& thisLoc) const {
         DiskLoc p = thisLoc;
         while ( !p.btree()->isHead() )
             p = p.btree()->parent;
         return p;
     }
 
-    DiskLoc BtreeBucket::advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) {
+    DiskLoc BtreeBucket::advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) const {
         if ( keyOfs < 0 || keyOfs >= n ) {
             out() << "ASSERT failure BtreeBucket::advance, caller: " << caller << endl;
             out() << "  thisLoc: " << thisLoc.toString() << endl;
@@ -841,7 +1363,7 @@ found:
         while ( 1 ) {
             if ( ancestor.isNull() )
                 break;
-            BtreeBucket *an = ancestor.btree();
+            const BtreeBucket *an = ancestor.btree();
             for ( int i = 0; i < an->n; i++ ) {
                 if ( an->childForPos(i+adj) == childLoc ) {
                     keyOfs = i;
@@ -857,7 +1379,7 @@ found:
         return DiskLoc();
     }
 
-    DiskLoc BtreeBucket::locate(const IndexDetails& idx, const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, int& pos, bool& found, DiskLoc recordLoc, int direction) {
+    DiskLoc BtreeBucket::locate(const IndexDetails& idx, const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, int& pos, bool& found, const DiskLoc &recordLoc, int direction) const {
         int p;
         found = find(idx, key, recordLoc, order, p, /*assertIfDup*/ false);
         if ( found ) {
@@ -880,7 +1402,7 @@ found:
             return pos == n ? DiskLoc() /*theend*/ : thisLoc;
     }
 
-    bool BtreeBucket::customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) {
+    bool BtreeBucket::customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) const {
         while( 1 ) {
             if ( l + 1 == h ) {
                 keyOfs = ( direction > 0 ) ? h : l;
@@ -889,101 +1411,123 @@ found:
                     bestParent = make_pair( thisLoc, keyOfs );
                     thisLoc = next;
                     return true;
-                } else {
+                }
+                else {
                     return false;
                 }
             }
             int m = l + ( h - l ) / 2;
-            int cmp = customBSONCmp( thisLoc.btree()->keyNode( m ).key, keyBegin, keyBeginLen, keyEnd, order );
+            int cmp = customBSONCmp( thisLoc.btree()->keyNode( m ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction );
             if ( cmp < 0 ) {
                 l = m;
-            } else if ( cmp > 0 ) {
+            }
+            else if ( cmp > 0 ) {
                 h = m;
-            } else {
+            }
+            else {
                 if ( direction < 0 ) {
                     l = m;
-                } else {
+                }
+                else {
                     h = m;
                 }
             }
-        }        
+        }
     }
-    
-    // find smallest/biggest value greater-equal/less-equal than specified
-    // starting thisLoc + keyOfs will be strictly less than/strictly greater than keyBegin/keyBeginLen/keyEnd
-    // All the direction checks below allowed me to refactor the code, but possibly separate forward and reverse implementations would be more efficient
-    void BtreeBucket::advanceTo(const IndexDetails &id, DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction ) {
+
+    /**
+     * find smallest/biggest value greater-equal/less-equal than specified
+     * starting thisLoc + keyOfs will be strictly less than/strictly greater than keyBegin/keyBeginLen/keyEnd
+     * All the direction checks below allowed me to refactor the code, but possibly separate forward and reverse implementations would be more efficient
+     */
+    void BtreeBucket::advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) const {
         int l,h;
         bool dontGoUp;
         if ( direction > 0 ) {
             l = keyOfs;
             h = n - 1;
-            dontGoUp = ( customBSONCmp( keyNode( h ).key, keyBegin, keyBeginLen, keyEnd, order ) >= 0 );
-        } else {
+            dontGoUp = ( customBSONCmp( keyNode( h ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 );
+        }
+        else {
             l = 0;
             h = keyOfs;
-            dontGoUp = ( customBSONCmp( keyNode( l ).key, keyBegin, keyBeginLen, keyEnd, order ) <= 0 );
+            dontGoUp = ( customBSONCmp( keyNode( l ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 );
         }
         pair< DiskLoc, int > bestParent;
         if ( dontGoUp ) {
             // this comparison result assures h > l
-            if ( !customFind( l, h, keyBegin, keyBeginLen, keyEnd, order, direction, thisLoc, keyOfs, bestParent ) ) {
+            if ( !customFind( l, h, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction, thisLoc, keyOfs, bestParent ) ) {
                 return;
             }
-        } else {
+        }
+        else {
             // go up parents until rightmost/leftmost node is >=/<= target or at top
             while( !thisLoc.btree()->parent.isNull() ) {
                 thisLoc = thisLoc.btree()->parent;
                 if ( direction > 0 ) {
-                    if ( customBSONCmp( thisLoc.btree()->keyNode( thisLoc.btree()->n - 1 ).key, keyBegin, keyBeginLen, keyEnd, order ) >= 0 ) {
+                    if ( customBSONCmp( thisLoc.btree()->keyNode( thisLoc.btree()->n - 1 ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 ) {
                         break;
                     }
-                } else {
-                    if ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, keyEnd, order ) <= 0 ) {
+                }
+                else {
+                    if ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 ) {
                         break;
-                    }                    
+                    }
                 }
             }
         }
+        customLocate( thisLoc, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction, bestParent );
+    }
+
+    void BtreeBucket::customLocate(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, pair< DiskLoc, int > &bestParent ) const {
+        if ( thisLoc.btree()->n == 0 ) {
+            thisLoc = DiskLoc();
+            return;
+        }
         // go down until find smallest/biggest >=/<= target
         while( 1 ) {
-            l = 0;
-            h = thisLoc.btree()->n - 1;
+            int l = 0;
+            int h = thisLoc.btree()->n - 1;
             // leftmost/rightmost key may possibly be >=/<= search key
             bool firstCheck;
             if ( direction > 0 ) {
-                firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, keyEnd, order ) >= 0 );
-            } else {
-                firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key, keyBegin, keyBeginLen, keyEnd, order ) <= 0 );
+                firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) >= 0 );
+            }
+            else {
+                firstCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) <= 0 );
             }
             if ( firstCheck ) {
                 DiskLoc next;
                 if ( direction > 0 ) {
                     next = thisLoc.btree()->k( 0 ).prevChildBucket;
                     keyOfs = 0;
-                } else {
+                }
+                else {
                     next = thisLoc.btree()->nextChild;
                     keyOfs = h;
                 }
                 if ( !next.isNull() ) {
-                    bestParent = make_pair( thisLoc, keyOfs );
+                    bestParent = pair< DiskLoc, int >( thisLoc, keyOfs );
                     thisLoc = next;
                     continue;
-                } else {
+                }
+                else {
                     return;
                 }
             }
             bool secondCheck;
             if ( direction > 0 ) {
-                secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key, keyBegin, keyBeginLen, keyEnd, order ) < 0 );
-            } else {
-                secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, keyEnd, order ) > 0 );
+                secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( h ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) < 0 );
+            }
+            else {
+                secondCheck = ( customBSONCmp( thisLoc.btree()->keyNode( 0 ).key, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction ) > 0 );
             }
             if ( secondCheck ) {
                 DiskLoc next;
                 if ( direction > 0 ) {
                     next = thisLoc.btree()->nextChild;
-                } else {
+                }
+                else {
                     next = thisLoc.btree()->k( 0 ).prevChildBucket;
                 }
                 if ( next.isNull() ) {
@@ -991,23 +1535,23 @@ found:
                     thisLoc = bestParent.first;
                     keyOfs = bestParent.second;
                     return;
-                } else {
+                }
+                else {
                     thisLoc = next;
                     continue;
                 }
             }
-            if ( !customFind( l, h, keyBegin, keyBeginLen, keyEnd, order, direction, thisLoc, keyOfs, bestParent ) ) {
+            if ( !customFind( l, h, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction, thisLoc, keyOfs, bestParent ) ) {
                 return;
             }
         }
     }
 
-    
-    /* @thisLoc disk location of *this
-    */
-    int BtreeBucket::_insert(DiskLoc thisLoc, DiskLoc recordLoc,
+
+    /** @thisLoc disk location of *this */
+    int BtreeBucket::_insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
                              const BSONObj& key, const Ordering &order, bool dupsAllowed,
-                             DiskLoc lChild, DiskLoc rChild, IndexDetails& idx) {
+                             const DiskLoc lChild, const DiskLoc rChild, IndexDetails& idx) const {
         if ( key.objsize() > KeyMax ) {
             problem() << "ERROR: key too large len:" << key.objsize() << " max:" << KeyMax << ' ' << key.objsize() << ' ' << idx.indexNamespace() << endl;
             return 2;
@@ -1018,34 +1562,34 @@ found:
         bool found = find(idx, key, recordLoc, order, pos, !dupsAllowed);
         if ( insert_debug ) {
             out() << "  " << thisLoc.toString() << '.' << "_insert " <<
-                 key.toString() << '/' << recordLoc.toString() <<
-                 " l:" << lChild.toString() << " r:" << rChild.toString() << endl;
+                  key.toString() << '/' << recordLoc.toString() <<
+                  " l:" << lChild.toString() << " r:" << rChild.toString() << endl;
             out() << "    found:" << found << " pos:" << pos << " n:" << n << endl;
         }
 
         if ( found ) {
-            _KeyNode& kn = k(pos);
+            const _KeyNode& kn = k(pos);
             if ( kn.isUnused() ) {
                 log(4) << "btree _insert: reusing unused key" << endl;
                 massert( 10285 , "_insert: reuse key but lchild is not null", lChild.isNull());
                 massert( 10286 , "_insert: reuse key but rchild is not null", rChild.isNull());
-                kn.setUsed();
+                kn.writing().setUsed();
                 return 0;
             }
 
-            DEV { 
-                out() << "_insert(): key already exists in index (ok for background:true)\n";
-                out() << "  " << idx.indexNamespace().c_str() << " thisLoc:" << thisLoc.toString() << '\n';
-                out() << "  " << key.toString() << '\n';
-                out() << "  " << "recordLoc:" << recordLoc.toString() << " pos:" << pos << endl;
-                out() << "  old l r: " << childForPos(pos).toString() << ' ' << childForPos(pos+1).toString() << endl;
-                out() << "  new l r: " << lChild.toString() << ' ' << rChild.toString() << endl;
+            DEV {
+                log() << "_insert(): key already exists in index (ok for background:true)\n";
+                log() << "  " << idx.indexNamespace() << " thisLoc:" << thisLoc.toString() << '\n';
+                log() << "  " << key.toString() << '\n';
+                log() << "  " << "recordLoc:" << recordLoc.toString() << " pos:" << pos << endl;
+                log() << "  old l r: " << childForPos(pos).toString() << ' ' << childForPos(pos+1).toString() << endl;
+                log() << "  new l r: " << lChild.toString() << ' ' << rChild.toString() << endl;
             }
             alreadyInIndex();
         }
 
         DEBUGGING out() << "TEMP: key: " << key.toString() << endl;
-        DiskLoc& child = childForPos(pos);
+        DiskLoc child = childForPos(pos);
         if ( insert_debug )
             out() << "    getChild(" << pos << "): " << child.toString() << endl;
         if ( child.isNull() || !rChild.isNull() /* means an 'internal' insert */ ) {
@@ -1056,28 +1600,27 @@ found:
         return child.btree()->bt_insert(child, recordLoc, key, order, dupsAllowed, idx, /*toplevel*/false);
     }
 
-    void BtreeBucket::dump() {
+    void BtreeBucket::dump() const {
         out() << "DUMP btreebucket n:" << n;
         out() << " parent:" << hex << parent.getOfs() << dec;
         for ( int i = 0; i < n; i++ ) {
             out() << '\n';
             KeyNode k = keyNode(i);
             out() << '\t' << i << '\t' << k.key.toString() << "\tleft:" << hex <<
-                 k.prevChildBucket.getOfs() << "\tRecLoc:" << k.recordLoc.toString() << dec;
+                  k.prevChildBucket.getOfs() << "\tRecLoc:" << k.recordLoc.toString() << dec;
             if ( this->k(i).isUnused() )
                 out() << " UNUSED";
         }
         out() << " right:" << hex << nextChild.getOfs() << dec << endl;
     }
 
-    /* todo: meaning of return code unclear clean up */
-    int BtreeBucket::bt_insert(DiskLoc thisLoc, DiskLoc recordLoc,
-                            const BSONObj& key, const Ordering &order, bool dupsAllowed,
-                            IndexDetails& idx, bool toplevel)
-    {
+    /** todo: meaning of return code unclear clean up */
+    int BtreeBucket::bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
+                               const BSONObj& key, const Ordering &order, bool dupsAllowed,
+                               IndexDetails& idx, bool toplevel) const {
         if ( toplevel ) {
             if ( key.objsize() > KeyMax ) {
-                problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace().c_str() << ' ' << key.objsize() << ' ' << key.toString() << endl;
+                problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace() << ' ' << key.objsize() << ' ' << key.toString() << endl;
                 return 3;
             }
         }
@@ -1088,22 +1631,30 @@ found:
         return x;
     }
 
-    void BtreeBucket::shape(stringstream& ss) {
+    void BtreeBucket::shape(stringstream& ss) const {
         _shape(0, ss);
     }
-    
-    DiskLoc BtreeBucket::findSingle( const IndexDetails& indexdetails , const DiskLoc& thisLoc, const BSONObj& key ){
+
+    int BtreeBucket::getLowWaterMark() {
+        return lowWaterMark;
+    }
+
+    int BtreeBucket::getKeyMax() {
+        return KeyMax;
+    }
+
+    DiskLoc BtreeBucket::findSingle( const IndexDetails& indexdetails , const DiskLoc& thisLoc, const BSONObj& key ) const {
         int pos;
         bool found;
-        /* TODO: is it really ok here that the order is a default? */
+        // TODO: is it really ok here that the order is a default?
         Ordering o = Ordering::make(BSONObj());
         DiskLoc bucket = locate( indexdetails , indexdetails.head , key , o , pos , found , minDiskLoc );
         if ( bucket.isNull() )
             return bucket;
 
-        BtreeBucket *b = bucket.btree();
-        while ( 1 ){
-            _KeyNode& knraw = b->k(pos);
+        const BtreeBucket *b = bucket.btree();
+        while ( 1 ) {
+            const _KeyNode& knraw = b->k(pos);
             if ( knraw.isUsed() )
                 break;
             bucket = b->advance( bucket , pos , 1 , "findSingle" );
@@ -1125,7 +1676,7 @@ found:
 namespace mongo {
 
     void BtreeBucket::a_test(IndexDetails& id) {
-        BtreeBucket *b = id.head.btree();
+        BtreeBucket *b = id.head.btreemod();
 
         // record locs for testing
         DiskLoc A(1, 20);
@@ -1171,26 +1722,37 @@ namespace mongo {
 
     /* --- BtreeBuilder --- */
 
-    BtreeBuilder::BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx) : 
-      dupsAllowed(_dupsAllowed), 
-      idx(_idx), 
-      n(0),
-      order( idx.keyPattern() ),
-      ordering( Ordering::make(idx.keyPattern()) )
-    {
+    BtreeBuilder::BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx) :
+        dupsAllowed(_dupsAllowed),
+        idx(_idx),
+        n(0),
+        order( idx.keyPattern() ),
+        ordering( Ordering::make(idx.keyPattern()) ) {
         first = cur = BtreeBucket::addBucket(idx);
         b = cur.btreemod();
         committed = false;
     }
 
-    void BtreeBuilder::newBucket() { 
+    void BtreeBuilder::newBucket() {
         DiskLoc L = BtreeBucket::addBucket(idx);
         b->tempNext() = L;
         cur = L;
         b = cur.btreemod();
     }
 
-    void BtreeBuilder::addKey(BSONObj& key, DiskLoc loc) { 
+    void BtreeBuilder::mayCommitProgressDurably() {
+        if ( getDur().commitIfNeeded() ) {
+            b = cur.btreemod();
+        }
+    }
+
+    void BtreeBuilder::addKey(BSONObj& key, DiskLoc loc) {
+        if ( key.objsize() > KeyMax ) {
+            problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace() 
+                      << ' ' << key.objsize() << ' ' << key.toString() << endl;
+            return;
+        }
+
         if( !dupsAllowed ) {
             if( n > 0 ) {
                 int cmp = keyLast.woCompare(key, order);
@@ -1203,26 +1765,21 @@ namespace mongo {
             keyLast = key;
         }
 
-        if ( ! b->_pushBack(loc, key, ordering, DiskLoc()) ){
-            // no room
-            if ( key.objsize() > KeyMax ) {
-                problem() << "Btree::insert: key too large to index, skipping " << idx.indexNamespace().c_str() << ' ' << key.objsize() << ' ' << key.toString() << endl;
-            }
-            else { 
-                // bucket was full
-                newBucket();
-                b->pushBack(loc, key, ordering, DiskLoc());
-            }
+        if ( ! b->_pushBack(loc, key, ordering, DiskLoc()) ) {
+            // bucket was full
+            newBucket();
+            b->pushBack(loc, key, ordering, DiskLoc());
         }
         n++;
+        mayCommitProgressDurably();
     }
 
-    void BtreeBuilder::buildNextLevel(DiskLoc loc) { 
+    void BtreeBuilder::buildNextLevel(DiskLoc loc) {
         int levels = 1;
-        while( 1 ) { 
-            if( loc.btree()->tempNext().isNull() ) { 
+        while( 1 ) {
+            if( loc.btree()->tempNext().isNull() ) {
                 // only 1 bucket at this level. we are done.
-                idx.head = loc;
+                getDur().writingDiskLoc(idx.head) = loc;
                 break;
             }
             levels++;
@@ -1232,59 +1789,70 @@ namespace mongo {
             BtreeBucket *up = upLoc.btreemod();
 
             DiskLoc xloc = loc;
-            while( !xloc.isNull() ) { 
+            while( !xloc.isNull() ) {
+                if ( getDur().commitIfNeeded() ) {
+                    b = cur.btreemod();
+                    up = upLoc.btreemod();
+                }
+
                 BtreeBucket *x = xloc.btreemod();
-                BSONObj k; 
+                BSONObj k;
                 DiskLoc r;
                 x->popBack(r,k);
                 bool keepX = ( x->n != 0 );
                 DiskLoc keepLoc = keepX ? xloc : x->nextChild;
 
-                if ( ! up->_pushBack(r, k, ordering, keepLoc) ){
+                if ( ! up->_pushBack(r, k, ordering, keepLoc) ) {
                     // current bucket full
                     DiskLoc n = BtreeBucket::addBucket(idx);
                     up->tempNext() = n;
-                    upLoc = n; 
+                    upLoc = n;
                     up = upLoc.btreemod();
                     up->pushBack(r, k, ordering, keepLoc);
                 }
 
-                DiskLoc nextLoc = x->tempNext(); /* get next in chain at current level */
+                DiskLoc nextLoc = x->tempNext(); // get next in chain at current level
                 if ( keepX ) {
-                    x->parent = upLoc;                
-                } else {
+                    x->parent = upLoc;
+                }
+                else {
                     if ( !x->nextChild.isNull() )
                         x->nextChild.btreemod()->parent = upLoc;
                     x->deallocBucket( xloc, idx );
                 }
                 xloc = nextLoc;
             }
-            
+
             loc = upStart;
+            mayCommitProgressDurably();
         }
 
         if( levels > 1 )
             log(2) << "btree levels: " << levels << endl;
     }
 
-    /* when all addKeys are done, we then build the higher levels of the tree */
-    void BtreeBuilder::commit() { 
+    /** when all addKeys are done, we then build the higher levels of the tree */
+    void BtreeBuilder::commit() {
         buildNextLevel(first);
         committed = true;
     }
 
-    BtreeBuilder::~BtreeBuilder() { 
-        if( !committed ) { 
-            log(2) << "Rolling back partially built index space" << endl;
-            DiskLoc x = first;
-            while( !x.isNull() ) { 
-                DiskLoc next = x.btree()->tempNext();
-                btreeStore->deleteRecord(idx.indexNamespace().c_str(), x);
-                x = next;
+    BtreeBuilder::~BtreeBuilder() {
+        DESTRUCTOR_GUARD(
+            if( !committed ) {
+                log(2) << "Rolling back partially built index space" << endl;
+                DiskLoc x = first;
+                while( !x.isNull() ) {
+                    DiskLoc next = x.btree()->tempNext();
+                    string ns = idx.indexNamespace();
+                    theDataFileMgr._deleteRecord(nsdetails(ns.c_str()), ns.c_str(), x.rec(), x);
+                    x = next;
+                    getDur().commitIfNeeded();
+                }
+                assert( idx.head.isNull() );
+                log(2) << "done rollback" << endl;
             }
-            assert( idx.head.isNull() );
-            log(2) << "done rollback" << endl;
-        }
+        )
     }
 
 }
diff --git a/db/btree.h b/db/btree.h
index 233b4dc..bced95e 100644
--- a/db/btree.h
+++ b/db/btree.h
@@ -25,8 +25,12 @@
 
 namespace mongo {
 
+    const int BucketSize = 8192;
+
 #pragma pack(1)
     struct _KeyNode {
+        /** Signals that we are writing this _KeyNode and casts away const */
+        _KeyNode& writing() const;
         DiskLoc prevChildBucket; // the lchild
         DiskLoc recordLoc; // location of the record associated with the key
         short keyDataOfs() const {
@@ -41,15 +45,12 @@ namespace mongo {
             _kdo = s;
             assert(s>=0);
         }
-        void setUsed() { 
-            recordLoc.GETOFS() &= ~1;
-        }
+        void setUsed() { recordLoc.GETOFS() &= ~1; }
         void setUnused() {
-            /* Setting ofs to odd is the sentinel for unused, as real recordLoc's are always
-               even numbers.
-               Note we need to keep its value basically the same as we use the recordLoc
-               as part of the key in the index (to handle duplicate keys efficiently).
-            */
+            // Setting ofs to odd is the sentinel for unused, as real recordLoc's are always
+            //  even numbers.
+            // Note we need to keep its value basically the same as we use the recordLoc
+            // as part of the key in the index (to handle duplicate keys efficiently).
             recordLoc.GETOFS() |= 1;
         }
         int isUnused() const {
@@ -63,7 +64,12 @@ namespace mongo {
 
     class BucketBasics;
 
-    /* wrapper - this is our in memory representation of the key.  _KeyNode is the disk representation. */
+    /**
+     * wrapper - this is our in memory representation of the key.
+     * _KeyNode is the disk representation.
+     *
+     * This object and its bson key will become invalid if the key is moved.
+     */
     class KeyNode {
     public:
         KeyNode(const BucketBasics& bb, const _KeyNode &k);
@@ -73,51 +79,111 @@ namespace mongo {
     };
 
 #pragma pack(1)
-    /* this class is all about the storage management */
-    class BucketBasics {
+    class BtreeData {
+    protected:
+        DiskLoc parent;
+        DiskLoc nextChild; // child bucket off and to the right of the highest key.
+        unsigned short _wasSize; // can be reused, value is 8192 in current pdfile version Apr2010
+        unsigned short _reserved1; // zero
+        int flags;
+
+        // basicInsert() assumes these three are together and in this order:
+        int emptySize; // size of the empty region
+        int topSize; // size of the data at the top of the bucket (keys are at the beginning or 'bottom')
+        int n; // # of keys so far.
+
+        int reserved;
+        char data[4];
+    };
+
+    /**
+     * This class is all about the storage management
+     *
+     * Const member functions of this class are those which may be called on
+     * an object for which writing has not been signaled.  Non const member
+     * functions may only be called on objects for which writing has been
+     * signaled.  Note that currently some const functions write to the
+     * underlying memory representation of this bucket using optimized methods
+     * to signal write operations.
+     *
+     * DiskLoc parameters that may shadow references within the btree should
+     * be passed by value rather than by reference to non const member
+     * functions or const member functions which may perform writes.  This way
+     * a callee need not worry that write operations will change or invalidate
+     * its arguments.
+     *
+     * The current policy for dealing with bson arguments is the opposite of
+     * what is described above for DiskLoc arguments.  We do
+     * not want to want to copy bson into memory as an intermediate step for
+     * btree changes, so if bson is to be moved it must be copied to the new
+     * location before the old location is invalidated.
+     */
+    class BucketBasics : public BtreeData {
         friend class BtreeBuilder;
         friend class KeyNode;
     public:
-        void dumpTree(DiskLoc thisLoc, const BSONObj &order);
-        bool isHead() { return parent.isNull(); }
-        void assertValid(const Ordering &order, bool force = false);
-        void assertValid(const BSONObj &orderObj, bool force = false) { 
-            return assertValid(Ordering::make(orderObj),force); 
-        }
-        int fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount = 0); /* traverses everything */
+        /** assert write intent declared for this bucket already */
+        void assertWritable();
 
-        KeyNode keyNode(int i) const {
-            if ( i >= n ){
+        void assertValid(const Ordering &order, bool force = false) const;
+        void assertValid(const BSONObj &orderObj, bool force = false) const { return assertValid(Ordering::make(orderObj),force); }
+
+        /**
+         * @return KeyNode for key at index i.  The KeyNode will become invalid
+         * if the key is moved or reassigned, or if the node is packed.
+         */
+        const KeyNode keyNode(int i) const {
+            if ( i >= n ) {
                 massert( 13000 , (string)"invalid keyNode: " +  BSON( "i" << i << "n" << n ).jsonString() , i < n );
             }
             return KeyNode(*this, k(i));
         }
 
-    protected:
+        static int headerSize() {
+            const BucketBasics *d = 0;
+            return (char*)&(d->data) - (char*)&(d->parent);
+        }
+        static int bodySize() { return BucketSize - headerSize(); }
 
-        void modified(const DiskLoc& thisLoc);
+        // for testing
+        int nKeys() const { return n; }
+        const DiskLoc getNextChild() const { return nextChild; }
 
-        char * dataAt(short ofs) {
-            return data + ofs;
-        }
+    protected:
+        char * dataAt(short ofs) { return data + ofs; }
 
         void init(); // initialize a new node
 
-        /* returns false if node is full and must be split
-           keypos is where to insert -- inserted after that key #.  so keypos=0 is the leftmost one.
-        */
-        bool basicInsert(const DiskLoc& thisLoc, int &keypos, const DiskLoc& recordLoc, const BSONObj& key, const Ordering &order);
-        
         /**
-         * @return true if works, false if not enough space
+         * @return false if node is full and must be split
+         * @keypos is where to insert -- inserted before that key #.  so keypos=0 is the leftmost one.
+         *  keypos will be updated if keys are moved as a result of pack()
+         * This function will modify the btree bucket memory representation even
+         * though it is marked const.
          */
-        bool _pushBack(const DiskLoc& recordLoc, BSONObj& key, const Ordering &order, DiskLoc prevChild);
-        void pushBack(const DiskLoc& recordLoc, BSONObj& key, const Ordering &order, DiskLoc prevChild){
+        bool basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const BSONObj& key, const Ordering &order) const;
+
+        /** @return true if works, false if not enough space */
+        bool _pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild);
+        void pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild) {
             bool ok = _pushBack( recordLoc , key , order , prevChild );
             assert(ok);
         }
+
+        /**
+         * This is a special purpose function used by BtreeBuilder.  The
+         * interface is quite dangerous if you're not careful.  The bson key
+         * returned here points to bucket memory that has been invalidated but
+         * not yet reclaimed.
+         *
+         * TODO Maybe this could be replaced with two functions, one which
+         * returns the last key without deleting it and another which simply
+         * deletes the last key.  Then the caller would have enough control to
+         * ensure proper memory integrity.
+         */
         void popBack(DiskLoc& recLoc, BSONObj& key);
-        void _delKeyAtPos(int keypos); // low level version that doesn't deal with child ptrs.
+
+        void _delKeyAtPos(int keypos, bool mayEmpty = false); // low level version that doesn't deal with child ptrs.
 
         /* !Packed means there is deleted fragment space within the bucket.
            We "repack" when we run out of space before considering the node
@@ -125,145 +191,257 @@ namespace mongo {
            */
         enum Flags { Packed=1 };
 
-        DiskLoc& childForPos(int p) {
-            return p == n ? nextChild : k(p).prevChildBucket;
-        }
+        const DiskLoc& childForPos(int p) const { return p == n ? nextChild : k(p).prevChildBucket; }
+        DiskLoc& childForPos(int p) { return p == n ? nextChild : k(p).prevChildBucket; }
 
         int totalDataSize() const;
-        void pack( const Ordering &order, int &refPos);
-        void setNotPacked();
-        void setPacked();
+        /** @return true if the key may be dropped by pack() */
+        bool mayDropKey( int index, int refPos ) const;
+
+        /**
+         * Pack the bucket to reclaim space from invalidated memory.
+         * @refPos is an index in the bucket which will may be updated if we
+         *  delete keys from the bucket
+         * This function may cast away const and perform a write.
+         */
+        void _pack(const DiskLoc thisLoc, const Ordering &order, int &refPos) const;
+        /** Pack when already writable */
+        void _packReadyForMod(const Ordering &order, int &refPos);
+
+        /**
+         * @return the size of non header data in this bucket if we were to
+         * call pack().
+         */
+        int packedDataSize( int refPos ) const;
+        void setNotPacked() { flags &= ~Packed; }
+        void setPacked() { flags |= Packed; }
         int _alloc(int bytes);
         void _unalloc(int bytes);
         void truncateTo(int N, const Ordering &order, int &refPos);
+        /** drop specified number of keys from beginning of key array, and pack */
+        void dropFront(int nDrop, const Ordering &order, int &refPos);
         void markUnused(int keypos);
 
-        /* BtreeBuilder uses the parent var as a temp place to maintain a linked list chain. 
-           we use tempNext() when we do that to be less confusing. (one might have written a union in C)
-           */
+        /**
+         * BtreeBuilder uses the parent var as a temp place to maintain a linked list chain.
+         *   we use tempNext() when we do that to be less confusing. (one might have written a union in C)
+         */
+        const DiskLoc& tempNext() const { return parent; }
         DiskLoc& tempNext() { return parent; }
 
-    public:
-        DiskLoc parent;
-
-        string bucketSummary() const {
-            stringstream ss;
-            ss << "  Bucket info:" << endl;
-            ss << "    n: " << n << endl;
-            ss << "    parent: " << parent.toString() << endl;
-            ss << "    nextChild: " << parent.toString() << endl;
-            ss << "    flags:" << flags << endl;
-            ss << "    emptySize: " << emptySize << " topSize: " << topSize << endl;
-            return ss.str();
-        }
-        
-        bool isUsed( int i ) const {
-            return k(i).isUsed();
-        }
+        void _shape(int level, stringstream&) const;
+        int Size() const;
+        const _KeyNode& k(int i) const { return ((const _KeyNode*)data)[i]; }
+        _KeyNode& k(int i) { return ((_KeyNode*)data)[i]; }
 
-    protected:
-        void _shape(int level, stringstream&);
-        DiskLoc nextChild; // child bucket off and to the right of the highest key.
+        /** @return the key position where a split should occur on insert */
+        int splitPos( int keypos ) const;
 
-    private:
-        unsigned short _wasSize; // can be reused, value is 8192 in current pdfile version Apr2010
-        unsigned short _reserved1; // zero
+        /**
+         * Adds new entries to beginning of key array, shifting existing
+         * entries to the right.  After this is called, setKey() must be called
+         * on all the newly created entries in the key array.
+         */
+        void reserveKeysFront( int nAdd );
 
-    protected:
-        int Size() const;
-        int flags;
-        int emptySize; // size of the empty region
-        int topSize; // size of the data at the top of the bucket (keys are at the beginning or 'bottom')
-        int n; // # of keys so far.
-        int reserved;
-        const _KeyNode& k(int i) const {
-            return ((_KeyNode*)data)[i];
-        }
-        _KeyNode& k(int i) {
-            return ((_KeyNode*)data)[i];
-        }
-        char data[4];
+        /**
+         * Sets an existing key using the given parameters.
+         * @i index of key to set
+         */
+        void setKey( int i, const DiskLoc recordLoc, const BSONObj &key, const DiskLoc prevChildBucket );
     };
-#pragma pack()
 
-#pragma pack(1)
+    /**
+     * This class adds functionality for manipulating buckets that are assembled
+     * in a tree.  The requirements for const and non const functions and
+     * arguments are generally the same as in BtreeBucket.  Because this class
+     * deals with tree structure, some functions that are marked const may
+     * trigger modification of another node in the btree or potentially of the
+     * current node.  In such cases, the function's implementation explicitly
+     * casts away const when indicating an intent to write to the durability
+     * layer.  The DiskLocs provided to such functions should be passed by
+     * value if they shadow pointers within the btree.
+     *
+     * To clarify enforcement of referential integrity in this implementation,
+     * we use the following pattern when deleting data we have a persistent
+     * pointer to.  The pointer is cleared or removed explicitly, then the data
+     * it pointed to is cleaned up with a helper function.
+     *
+     * TODO It might make sense to put some of these functions in a class
+     * representing a full btree instead of a single btree bucket.  That would
+     * allow us to use the const qualifier in a manner more consistent with
+     * standard usage.  Right now the interface is for both a node and a tree,
+     * so assignment of const is sometimes nonideal.
+     *
+     * TODO There are several cases in which the this pointer is invalidated
+     * as a result of deallocation.  A seperate class representing a btree would
+     * alleviate some fragile cases where the implementation must currently
+     * behave correctly if the this pointer is suddenly invalidated by a
+     * callee.
+     */
     class BtreeBucket : public BucketBasics {
         friend class BtreeCursor;
     public:
-        void dump();
+        bool isHead() const { return parent.isNull(); }
+        void dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const;
+        int fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount = 0, bool strict = false) const; /* traverses everything */
 
-        /* @return true if key exists in index 
+        bool isUsed( int i ) const { return k(i).isUsed(); }
+        string bucketSummary() const;
+        void dump() const;
 
-           order - indicates order of keys in the index.  this is basically the index's key pattern, e.g.:
-             BSONObj order = ((IndexDetails&)idx).keyPattern();
-           likewise below in bt_insert() etc.
-        */
-        bool exists(const IndexDetails& idx, DiskLoc thisLoc, const BSONObj& key, const Ordering& order);
+        /**
+         * @return true if key exists in index
+         *
+         * @order - indicates order of keys in the index.  this is basically the index's key pattern, e.g.:
+         *    BSONObj order = ((IndexDetails&)idx).keyPattern();
+         * likewise below in bt_insert() etc.
+         */
+        bool exists(const IndexDetails& idx, const DiskLoc &thisLoc, const BSONObj& key, const Ordering& order) const;
 
         bool wouldCreateDup(
-            const IndexDetails& idx, DiskLoc thisLoc, 
+            const IndexDetails& idx, const DiskLoc &thisLoc,
             const BSONObj& key, const Ordering& order,
-            DiskLoc self); 
+            const DiskLoc &self) const;
+
+        static DiskLoc addBucket(const IndexDetails&); /* start a new index off, empty */
+        /** invalidates 'this' and thisLoc */
+        void deallocBucket(const DiskLoc thisLoc, const IndexDetails &id);
 
-        static DiskLoc addBucket(IndexDetails&); /* start a new index off, empty */
-        void deallocBucket(const DiskLoc &thisLoc, IndexDetails &id);
-        
         static void renameIndexNamespace(const char *oldNs, const char *newNs);
 
-        int bt_insert(DiskLoc thisLoc, DiskLoc recordLoc,
-                   const BSONObj& key, const Ordering &order, bool dupsAllowed,
-                   IndexDetails& idx, bool toplevel = true);
+        /** This function may change the btree root */
+        int bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
+                      const BSONObj& key, const Ordering &order, bool dupsAllowed,
+                      IndexDetails& idx, bool toplevel = true) const;
 
-        bool unindex(const DiskLoc& thisLoc, IndexDetails& id, BSONObj& key, const DiskLoc& recordLoc);
+        /** This function may change the btree root */
+        bool unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc) const;
 
-        /* locate may return an "unused" key that is just a marker.  so be careful.
-             looks for a key:recordloc pair.
+        /**
+         * locate may return an "unused" key that is just a marker.  so be careful.
+         *   looks for a key:recordloc pair.
+         *
+         * @found - returns true if exact match found.  note you can get back a position
+         *          result even if found is false.
+         */
+        DiskLoc locate(const IndexDetails &idx , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order,
+                       int& pos, bool& found, const DiskLoc &recordLoc, int direction=1) const;
 
-           found - returns true if exact match found.  note you can get back a position 
-                   result even if found is false.
-        */
-        DiskLoc locate(const IndexDetails& , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order, 
-                       int& pos, bool& found, DiskLoc recordLoc, int direction=1);
-        
         /**
          * find the first instance of the key
          * does not handle dups
-         * returned DiskLock isNull if can't find anything with that
+         * returned DiskLoc isNull if can't find anything with that
+         * @return the record location of the first match
          */
-        DiskLoc findSingle( const IndexDetails& , const DiskLoc& thisLoc, const BSONObj& key );
+        DiskLoc findSingle( const IndexDetails &indexdetails , const DiskLoc& thisLoc, const BSONObj& key ) const;
+
+        /** advance one key position in the index: */
+        DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) const;
 
-        /* advance one key position in the index: */
-        DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller);
-        
-        void advanceTo(const IndexDetails &id, DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction );
-        
-        DiskLoc getHead(const DiskLoc& thisLoc);
+        void advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) const;
+        void customLocate(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, pair< DiskLoc, int > &bestParent ) const;
 
-        /* get tree shape */
-        void shape(stringstream&);
+        const DiskLoc getHead(const DiskLoc& thisLoc) const;
+
+        /** get tree shape */
+        void shape(stringstream&) const;
 
         static void a_test(IndexDetails&);
 
-    private:
-        void fixParentPtrs(const DiskLoc& thisLoc);
-        void delBucket(const DiskLoc& thisLoc, IndexDetails&);
-        void delKeyAtPos(const DiskLoc& thisLoc, IndexDetails& id, int p);
-        BSONObj keyAt(int keyOfs) {
+        static int getLowWaterMark();
+        static int getKeyMax();
+
+    protected:
+        /**
+         * Fix parent pointers for children
+         * @firstIndex first index to modify
+         * @lastIndex last index to modify (-1 means last index is n)
+         */
+        void fixParentPtrs(const DiskLoc thisLoc, int firstIndex = 0, int lastIndex = -1) const;
+
+        /** invalidates this and thisLoc */
+        void delBucket(const DiskLoc thisLoc, const IndexDetails&);
+        /** may invalidate this and thisLoc */
+        void delKeyAtPos(const DiskLoc thisLoc, IndexDetails& id, int p, const Ordering &order);
+
+        /**
+         * May balance utilization of this bucket with a neighbor, either by
+         * merging the buckets or shifting nodes.
+         * @return true iff balancing was performed.
+         * NOTE This function may invalidate thisLoc.
+         */
+        bool mayBalanceWithNeighbors(const DiskLoc thisLoc, IndexDetails &id, const Ordering &order) const;
+
+        /** @return true if balance succeeded */
+        bool tryBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) const;
+        void doBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order );
+        void doBalanceLeftToRight( const DiskLoc thisLoc, int leftIndex, int split,
+                                   BtreeBucket *l, const DiskLoc lchild,
+                                   BtreeBucket *r, const DiskLoc rchild,
+                                   IndexDetails &id, const Ordering &order );
+        void doBalanceRightToLeft( const DiskLoc thisLoc, int leftIndex, int split,
+                                   BtreeBucket *l, const DiskLoc lchild,
+                                   BtreeBucket *r, const DiskLoc rchild,
+                                   IndexDetails &id, const Ordering &order );
+
+        /** may invalidate this and thisLoc */
+        void doMergeChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order);
+
+        /** will invalidate this and thisLoc */
+        void replaceWithNextChild( const DiskLoc thisLoc, IndexDetails &id );
+
+        /** @return true iff left and right child can be merged into one node */
+        bool canMergeChildren( const DiskLoc &thisLoc, int leftIndex ) const;
+
+        /**
+         * @return index of the rebalanced separator; the index value is
+         * determined as if we had an array
+         * <left bucket keys array>.push( <old separator> ).concat( <right bucket keys array> )
+         * This is only expected to be called if the left and right child
+         * cannot be merged.
+         * This function is expected to be called on packed buckets, see also
+         * comments for splitPos().
+         */
+        int rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const;
+
+        int indexInParent( const DiskLoc &thisLoc ) const;
+        BSONObj keyAt(int keyOfs) const {
             return keyOfs >= n ? BSONObj() : keyNode(keyOfs).key;
         }
         static BtreeBucket* allocTemp(); /* caller must release with free() */
-        void insertHere(DiskLoc thisLoc, int keypos,
-                        DiskLoc recordLoc, const BSONObj& key, const Ordering &order,
-                        DiskLoc lchild, DiskLoc rchild, IndexDetails&);
-        int _insert(DiskLoc thisLoc, DiskLoc recordLoc,
+
+        /** split bucket */
+        void split(const DiskLoc thisLoc, int keypos,
+                   const DiskLoc recordLoc, const BSONObj& key,
+                   const Ordering& order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx);
+
+        void insertHere(const DiskLoc thisLoc, int keypos,
+                        const DiskLoc recordLoc, const BSONObj& key, const Ordering &order,
+                        const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx) const;
+
+        int _insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
                     const BSONObj& key, const Ordering &order, bool dupsAllowed,
-                    DiskLoc lChild, DiskLoc rChild, IndexDetails&);
-        bool find(const IndexDetails& idx, const BSONObj& key, DiskLoc recordLoc, const Ordering &order, int& pos, bool assertIfDup);
-        bool customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent );
+                    const DiskLoc lChild, const DiskLoc rChild, IndexDetails &idx) const;
+        bool find(const IndexDetails& idx, const BSONObj& key, const DiskLoc &recordLoc, const Ordering &order, int& pos, bool assertIfDup) const;
+        bool customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) const;
         static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey);
-        static int customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, const vector< const BSONElement * > &rEnd, const Ordering &o );
+        static int customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction );
+        static void fix(const DiskLoc thisLoc, const DiskLoc child);
+
+        /** Replaces an existing key with the new specified key, splitting if necessary */
+        void setInternalKey( const DiskLoc thisLoc, int keypos,
+                             const DiskLoc recordLoc, const BSONObj &key, const Ordering &order,
+                             const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx);
+
+        /**
+         * Deletes the specified key, replacing it with the key immediately
+         * preceding or succeeding it in the btree.  Either the left or right
+         * child of the specified key must be non null.
+         */
+        void deleteInternalKey( const DiskLoc thisLoc, int keypos, IndexDetails &id, const Ordering &order );
     public:
-        // simply builds and returns a dup key error message string
+        /** simply builds and returns a dup key error message string */
         static string dupKeyError( const IndexDetails& idx , const BSONObj& key );
     };
 #pragma pack()
@@ -271,76 +449,59 @@ namespace mongo {
     class BtreeCursor : public Cursor {
     public:
         BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails&, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction );
-
         BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction );
-        ~BtreeCursor(){
-        }
-        virtual bool ok() {
-            return !bucket.isNull();
-        }
-        bool eof() {
-            return !ok();
-        }
+        virtual bool ok() { return !bucket.isNull(); }
         virtual bool advance();
-
         virtual void noteLocation(); // updates keyAtKeyOfs...
         virtual void checkLocation();
         virtual bool supportGetMore() { return true; }
         virtual bool supportYields() { return true; }
 
-        /* used for multikey index traversal to avoid sending back dups. see Matcher::matches().
-           if a multikey index traversal:
-             if loc has already been sent, returns true.
-             otherwise, marks loc as sent.
-             @return true if the loc has not been seen
-        */
+        /**
+         * used for multikey index traversal to avoid sending back dups. see Matcher::matches().
+         * if a multikey index traversal:
+         *   if loc has already been sent, returns true.
+         *   otherwise, marks loc as sent.
+         * @return true if the loc has not been seen
+         */
         virtual bool getsetdup(DiskLoc loc) {
-            if( multikey ) { 
-                pair<set<DiskLoc>::iterator, bool> p = dups.insert(loc);
+            if( _multikey ) {
+                pair<set<DiskLoc>::iterator, bool> p = _dups.insert(loc);
                 return !p.second;
             }
             return false;
         }
 
-        _KeyNode& _currKeyNode() {
+        virtual bool modifiedKeys() const { return _multikey; }
+        virtual bool isMultiKey() const { return _multikey; }
+
+        const _KeyNode& _currKeyNode() const {
             assert( !bucket.isNull() );
-            _KeyNode& kn = bucket.btree()->k(keyOfs);
+            const _KeyNode& kn = bucket.btree()->k(keyOfs);
             assert( kn.isUsed() );
             return kn;
         }
-        KeyNode currKeyNode() const {
+        const KeyNode currKeyNode() const {
             assert( !bucket.isNull() );
             return bucket.btree()->keyNode(keyOfs);
         }
-        virtual BSONObj currKey() const {
-            return currKeyNode().key;
-        }
 
-        virtual BSONObj indexKeyPattern() {
-            return indexDetails.keyPattern();
-        }
+        virtual BSONObj currKey() const { return currKeyNode().key; }
+        virtual BSONObj indexKeyPattern() { return indexDetails.keyPattern(); }
 
         virtual void aboutToDeleteBucket(const DiskLoc& b) {
             if ( bucket == b )
                 keyOfs = -1;
         }
 
-        virtual DiskLoc currLoc() {
-            return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc();
-        }
-        virtual DiskLoc refLoc() {
-            return currLoc();
-        }
-        virtual Record* _current() {
-            return currLoc().rec();
-        }
-        virtual BSONObj current() {
-            return BSONObj(_current());
-        }
+        virtual DiskLoc currLoc()  { return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc();  }
+        virtual DiskLoc refLoc()   { return currLoc(); }
+        virtual Record* _current() { return currLoc().rec(); }
+        virtual BSONObj current()  { return BSONObj(_current()); }
         virtual string toString() {
             string s = string("BtreeCursor ") + indexDetails.indexName();
-            if ( direction < 0 ) s += " reverse";
-            if ( bounds_.get() && bounds_->size() > 1 ) s += " multi";
+            if ( _direction < 0 ) s += " reverse";
+            if ( _bounds.get() && _bounds->size() > 1 ) s += " multi";
             return s;
         }
 
@@ -351,77 +512,81 @@ namespace mongo {
         virtual BSONObj prettyIndexBounds() const {
             if ( !_independentFieldRanges ) {
                 return BSON( "start" << prettyKey( startKey ) << "end" << prettyKey( endKey ) );
-            } else {
-                return bounds_->obj();
+            }
+            else {
+                return _bounds->obj();
             }
         }
-        
+
         void forgetEndKey() { endKey = BSONObj(); }
 
         virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); }
-        
-        virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) {
-            _matcher = matcher;
-        }
 
-        // for debugging only
-        DiskLoc getBucket() const { return bucket; }
-        
+        virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { _matcher = matcher;  }
+
+        virtual long long nscanned() { return _nscanned; }
+
+        /** for debugging only */
+        const DiskLoc getBucket() const { return bucket; }
+
     private:
-        /* Our btrees may (rarely) have "unused" keys when items are deleted.
-           Skip past them.
-        */
+        /**
+         * Our btrees may (rarely) have "unused" keys when items are deleted.
+         * Skip past them.
+         */
         bool skipUnusedKeys( bool mayJump );
         bool skipOutOfRangeKeysAndCheckEnd();
         void skipAndCheck();
         void checkEnd();
 
-        // selective audits on construction
+        /** selective audits on construction */
         void audit();
 
-        // set initial bucket
+        /** set initial bucket */
         void init();
 
-        void advanceTo( const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd);
-        
+        /** if afterKey is true, we want the first key with values of the keyBegin fields greater than keyBegin */
+        void advanceTo( const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive );
+
         friend class BtreeBucket;
-        set<DiskLoc> dups;
-        NamespaceDetails *d;
-        int idxNo;
-        
+
+        set<DiskLoc> _dups;
+        NamespaceDetails * const d;
+        const int idxNo;
         BSONObj startKey;
         BSONObj endKey;
-        bool endKeyInclusive_;
-        
-        bool multikey; // note this must be updated every getmore batch in case someone added a multikey...
-
+        bool _endKeyInclusive;
+        bool _multikey; // this must be updated every getmore batch in case someone added a multikey
         const IndexDetails& indexDetails;
-        BSONObj order;
-        Ordering _ordering;
+        const BSONObj _order;
+        const Ordering _ordering;
         DiskLoc bucket;
         int keyOfs;
-        int direction; // 1=fwd,-1=reverse
+        const int _direction; // 1=fwd,-1=reverse
         BSONObj keyAtKeyOfs; // so we can tell if things moved around on us between the query and the getMore call
         DiskLoc locAtKeyOfs;
-        shared_ptr< FieldRangeVector > bounds_;
+        const shared_ptr< FieldRangeVector > _bounds;
         auto_ptr< FieldRangeVector::Iterator > _boundsIterator;
         const IndexSpec& _spec;
         shared_ptr< CoveredIndexMatcher > _matcher;
         bool _independentFieldRanges;
+        long long _nscanned;
     };
 
 
-    inline bool IndexDetails::hasKey(const BSONObj& key) { 
+    inline bool IndexDetails::hasKey(const BSONObj& key) {
         return head.btree()->exists(*this, head, key, Ordering::make(keyPattern()));
     }
-    inline bool IndexDetails::wouldCreateDup(const BSONObj& key, DiskLoc self) { 
+    inline bool IndexDetails::wouldCreateDup(const BSONObj& key, DiskLoc self) {
         return head.btree()->wouldCreateDup(*this, head, key, Ordering::make(keyPattern()), self);
     }
 
-    /* build btree from the bottom up */
-    /* _ TODO dropDups */
+    /**
+     * build btree from the bottom up
+     * _ TODO dropDups
+     */
     class BtreeBuilder {
-        bool dupsAllowed; 
+        bool dupsAllowed;
         IndexDetails& idx;
         unsigned long long n;
         BSONObj keyLast;
@@ -434,18 +599,20 @@ namespace mongo {
 
         void newBucket();
         void buildNextLevel(DiskLoc);
+        void mayCommitProgressDurably();
 
     public:
         ~BtreeBuilder();
 
         BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx);
 
-        /* keys must be added in order */
+        /** keys must be added in order */
         void addKey(BSONObj& key, DiskLoc loc);
 
-        /* commit work.  if not called, destructor will clean up partially completed work 
-           (in case exception has happened).
-        */
+        /**
+         * commit work.  if not called, destructor will clean up partially completed work
+         *  (in case exception has happened).
+         */
         void commit();
 
         unsigned long long getn() { return n; }
diff --git a/db/btreecursor.cpp b/db/btreecursor.cpp
index d6d0c09..9cab95f 100644
--- a/db/btreecursor.cpp
+++ b/db/btreecursor.cpp
@@ -20,54 +20,56 @@
 #include "btree.h"
 #include "pdfile.h"
 #include "jsobj.h"
-#include "curop.h"
+#include "curop-inl.h"
 
 namespace mongo {
 
     extern int otherTraceLevel;
 
-    BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails &_id, 
+    BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails &_id,
                               const BSONObj &_startKey, const BSONObj &_endKey, bool endKeyInclusive, int _direction ) :
-            d(_d), idxNo(_idxNo), 
-            startKey( _startKey ),
-            endKey( _endKey ),
-            endKeyInclusive_( endKeyInclusive ),
-            multikey( d->isMultikey( idxNo ) ),
-            indexDetails( _id ),
-            order( _id.keyPattern() ),
-            _ordering( Ordering::make( order ) ),
-            direction( _direction ),
-            _spec( _id.getSpec() ),
-            _independentFieldRanges( false )
-    {
+        d(_d), idxNo(_idxNo),
+        startKey( _startKey ),
+        endKey( _endKey ),
+        _endKeyInclusive( endKeyInclusive ),
+        _multikey( d->isMultikey( idxNo ) ),
+        indexDetails( _id ),
+        _order( _id.keyPattern() ),
+        _ordering( Ordering::make( _order ) ),
+        _direction( _direction ),
+        _spec( _id.getSpec() ),
+        _independentFieldRanges( false ),
+        _nscanned( 0 ) {
         audit();
         init();
-        DEV assert( dups.size() == 0 );
+        dassert( _dups.size() == 0 );
     }
 
     BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction )
         :
-            d(_d), idxNo(_idxNo), 
-            endKeyInclusive_( true ),
-            multikey( d->isMultikey( idxNo ) ),
-            indexDetails( _id ),
-            order( _id.keyPattern() ),
-            _ordering( Ordering::make( order ) ),
-            direction( _direction ),
-            bounds_( ( assert( _bounds.get() ), _bounds ) ),
-            _boundsIterator( new FieldRangeVector::Iterator( *bounds_  ) ),
-            _spec( _id.getSpec() ),
-            _independentFieldRanges( true )
-    {
+        d(_d), idxNo(_idxNo),
+        _endKeyInclusive( true ),
+        _multikey( d->isMultikey( idxNo ) ),
+        indexDetails( _id ),
+        _order( _id.keyPattern() ),
+        _ordering( Ordering::make( _order ) ),
+        _direction( _direction ),
+        _bounds( ( assert( _bounds.get() ), _bounds ) ),
+        _boundsIterator( new FieldRangeVector::Iterator( *_bounds  ) ),
+        _spec( _id.getSpec() ),
+        _independentFieldRanges( true ),
+        _nscanned( 0 ) {
         massert( 13384, "BtreeCursor FieldRangeVector constructor doesn't accept special indexes", !_spec.getType() );
         audit();
-        startKey = bounds_->startKey();
-        bool found;
+        startKey = _bounds->startKey();
         _boundsIterator->advance( startKey ); // handles initialization
-        bucket = indexDetails.head.btree()->
-        locate(indexDetails, indexDetails.head, startKey, _ordering, keyOfs, found, direction > 0 ? minDiskLoc : maxDiskLoc, direction);
+        _boundsIterator->prepDive();
+        pair< DiskLoc, int > noBestParent;
+        bucket = indexDetails.head;
+        keyOfs = 0;
+        indexDetails.head.btree()->customLocate( bucket, keyOfs, startKey, 0, false, _boundsIterator->cmp(), _boundsIterator->inc(), _ordering, _direction, noBestParent );
         skipAndCheck();
-        DEV assert( dups.size() == 0 );
+        dassert( _dups.size() == 0 );
     }
 
     void BtreeCursor::audit() {
@@ -76,7 +78,7 @@ namespace mongo {
         if ( otherTraceLevel >= 12 ) {
             if ( otherTraceLevel >= 200 ) {
                 out() << "::BtreeCursor() qtl>200.  validating entire index." << endl;
-                indexDetails.head.btree()->fullValidate(indexDetails.head, order);
+                indexDetails.head.btree()->fullValidate(indexDetails.head, _order);
             }
             else {
                 out() << "BTreeCursor(). dumping head bucket" << endl;
@@ -86,17 +88,20 @@ namespace mongo {
     }
 
     void BtreeCursor::init() {
-        if ( _spec.getType() ){
+        if ( _spec.getType() ) {
             startKey = _spec.getType()->fixKey( startKey );
             endKey = _spec.getType()->fixKey( endKey );
         }
         bool found;
         bucket = indexDetails.head.btree()->
-            locate(indexDetails, indexDetails.head, startKey, _ordering, keyOfs, found, direction > 0 ? minDiskLoc : maxDiskLoc, direction);
+                 locate(indexDetails, indexDetails.head, startKey, _ordering, keyOfs, found, _direction > 0 ? minDiskLoc : maxDiskLoc, _direction);
+        if ( ok() ) {
+            _nscanned = 1;
+        }
         skipUnusedKeys( false );
         checkEnd();
     }
-    
+
     void BtreeCursor::skipAndCheck() {
         skipUnusedKeys( true );
         while( 1 ) {
@@ -109,7 +114,7 @@ namespace mongo {
             }
         }
     }
-    
+
     bool BtreeCursor::skipOutOfRangeKeysAndCheckEnd() {
         if ( !ok() ) {
             return false;
@@ -118,25 +123,30 @@ namespace mongo {
         if ( ret == -2 ) {
             bucket = DiskLoc();
             return false;
-        } else if ( ret == -1 ) {
+        }
+        else if ( ret == -1 ) {
+            ++_nscanned;
             return false;
         }
-        advanceTo( currKeyNode().key, ret, _boundsIterator->cmp() );
+        ++_nscanned;
+        advanceTo( currKeyNode().key, ret, _boundsIterator->after(), _boundsIterator->cmp(), _boundsIterator->inc() );
         return true;
     }
-    
+
     /* skip unused keys. */
     bool BtreeCursor::skipUnusedKeys( bool mayJump ) {
         int u = 0;
         while ( 1 ) {
             if ( !ok() )
                 break;
-            BtreeBucket *b = bucket.btree();
-            _KeyNode& kn = b->k(keyOfs);
+            const BtreeBucket *b = bucket.btree();
+            const _KeyNode& kn = b->k(keyOfs);
             if ( kn.isUsed() )
                 break;
-            bucket = b->advance(bucket, keyOfs, direction, "skipUnusedKeys");
+            bucket = b->advance(bucket, keyOfs, _direction, "skipUnusedKeys");
             u++;
+            //don't include unused keys in nscanned
+            //++_nscanned;
             if ( mayJump && ( u % 10 == 0 ) ) {
                 skipOutOfRangeKeysAndCheckEnd();
             }
@@ -158,31 +168,34 @@ namespace mongo {
         if ( bucket.isNull() )
             return;
         if ( !endKey.isEmpty() ) {
-            int cmp = sgn( endKey.woCompare( currKey(), order ) );
-            if ( ( cmp != 0 && cmp != direction ) ||
-                ( cmp == 0 && !endKeyInclusive_ ) )
+            int cmp = sgn( endKey.woCompare( currKey(), _order ) );
+            if ( ( cmp != 0 && cmp != _direction ) ||
+                    ( cmp == 0 && !_endKeyInclusive ) )
                 bucket = DiskLoc();
         }
     }
-    
-    void BtreeCursor::advanceTo( const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd) {
-        bucket.btree()->advanceTo( indexDetails, bucket, keyOfs, keyBegin, keyBeginLen, keyEnd, _ordering, direction );
+
+    void BtreeCursor::advanceTo( const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive) {
+        bucket.btree()->advanceTo( bucket, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, _ordering, _direction );
     }
-    
+
     bool BtreeCursor::advance() {
         killCurrentOp.checkForInterrupt();
         if ( bucket.isNull() )
             return false;
 
-        bucket = bucket.btree()->advance(bucket, keyOfs, direction, "BtreeCursor::advance");
+        bucket = bucket.btree()->advance(bucket, keyOfs, _direction, "BtreeCursor::advance");
 
         if ( !_independentFieldRanges ) {
             skipUnusedKeys( false );
             checkEnd();
-            return ok();
+            if ( ok() ) {
+                ++_nscanned;
+            }
+        }
+        else {
+            skipAndCheck();
         }
-        
-        skipAndCheck();
         return ok();
     }
 
@@ -207,10 +220,10 @@ namespace mongo {
         if ( eof() )
             return;
 
-        multikey = d->isMultikey(idxNo);
+        _multikey = d->isMultikey(idxNo);
 
         if ( keyOfs >= 0 ) {
-            BtreeBucket *b = bucket.btree();
+            const BtreeBucket *b = bucket.btree();
 
             assert( !keyAtKeyOfs.isEmpty() );
 
@@ -219,17 +232,17 @@ namespace mongo {
             int x = 0;
             while( 1 ) {
                 if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) &&
-                    b->k(keyOfs).recordLoc == locAtKeyOfs ) {
-                        if ( !b->k(keyOfs).isUsed() ) {
-                            /* we were deleted but still exist as an unused
-                            marker key. advance.
-                            */
-                            skipUnusedKeys( false );
-                        }
-                        return;
+                        b->k(keyOfs).recordLoc == locAtKeyOfs ) {
+                    if ( !b->k(keyOfs).isUsed() ) {
+                        /* we were deleted but still exist as an unused
+                        marker key. advance.
+                        */
+                        skipUnusedKeys( false );
+                    }
+                    return;
                 }
 
-                /* we check one key earlier too, in case a key was just deleted.  this is 
+                /* we check one key earlier too, in case a key was just deleted.  this is
                    important so that multi updates are reasonably fast.
                    */
                 if( keyOfs == 0 || x++ )
@@ -245,7 +258,7 @@ namespace mongo {
         bool found;
 
         /* TODO: Switch to keep indexdetails and do idx.head! */
-        bucket = indexDetails.head.btree()->locate(indexDetails, indexDetails.head, keyAtKeyOfs, _ordering, keyOfs, found, locAtKeyOfs, direction);
+        bucket = indexDetails.head.btree()->locate(indexDetails, indexDetails.head, keyAtKeyOfs, _ordering, keyOfs, found, locAtKeyOfs, _direction);
         RARELY log() << "  key seems to have moved in the index, refinding. found:" << found << endl;
         if ( ! bucket.isNull() )
             skipUnusedKeys( false );
diff --git a/db/cap.cpp b/db/cap.cpp
index c676429..198bd54 100644
--- a/db/cap.cpp
+++ b/db/cap.cpp
@@ -1,4 +1,5 @@
-// @file cap.cpp capped collection related 
+// @file cap.cpp capped collection related
+// the "old" version (<= v1.6)
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -49,7 +50,7 @@
 namespace mongo {
 
     /* combine adjacent deleted records *for the current extent* of the capped collection
-     
+
        this is O(n^2) but we call it for capped tables where typically n==1 or 2!
        (or 3...there will be a little unused sliver at the end of the extent.)
     */
@@ -62,7 +63,8 @@ namespace mongo {
         DiskLoc i = cappedFirstDeletedInCurExtent();
         for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted )
             drecs.push_back( i );
-        cappedFirstDeletedInCurExtent() = i;
+
+        getDur().writingDiskLoc( cappedFirstDeletedInCurExtent() ) = i;
 
         // This is the O(n^2) part.
         drecs.sort();
@@ -80,7 +82,7 @@ namespace mongo {
             DiskLoc b = *j;
             while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders == b.getOfs() ) {
                 // a & b are adjacent.  merge.
-                a.drec()->lengthWithHeaders += b.drec()->lengthWithHeaders;
+                getDur().writingInt( a.drec()->lengthWithHeaders ) += b.drec()->lengthWithHeaders;
                 j++;
                 if ( j == drecs.end() ) {
                     DEBUGGING out() << "temp: compact adddelrec2\n";
@@ -106,8 +108,8 @@ namespace mongo {
         // migrate old NamespaceDetails format
         assert( capped );
         if ( capExtent.a() == 0 && capExtent.getOfs() == 0 ) {
-            capFirstNewRecord = DiskLoc();
-            capFirstNewRecord.setInvalid();
+            //capFirstNewRecord = DiskLoc();
+            capFirstNewRecord.writing().setInvalid();
             // put all the DeletedRecords in cappedListOfAllDeletedRecords()
             for ( int i = 1; i < Buckets; ++i ) {
                 DiskLoc first = deletedList[ i ];
@@ -115,14 +117,14 @@ namespace mongo {
                     continue;
                 DiskLoc last = first;
                 for (; !last.drec()->nextDeleted.isNull(); last = last.drec()->nextDeleted );
-                last.drec()->nextDeleted = cappedListOfAllDeletedRecords();
-                cappedListOfAllDeletedRecords() = first;
-                deletedList[ i ] = DiskLoc();
+                last.drec()->nextDeleted.writing() = cappedListOfAllDeletedRecords();
+                cappedListOfAllDeletedRecords().writing() = first;
+                deletedList[i].writing() = DiskLoc();
             }
             // NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above
 
             // Last, in case we're killed before getting here
-            capExtent = firstExtent;
+            capExtent.writing() = firstExtent;
         }
     }
 
@@ -144,20 +146,20 @@ namespace mongo {
         // We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent
         // (or DiskLoc() if new capExtent == firstExtent)
         if ( capExtent == lastExtent )
-            cappedLastDelRecLastExtent() = DiskLoc();
+            getDur().writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc();
         else {
             DiskLoc i = cappedFirstDeletedInCurExtent();
             for (; !i.isNull() && nextIsInCapExtent( i ); i = i.drec()->nextDeleted );
-            cappedLastDelRecLastExtent() = i;
+            getDur().writingDiskLoc( cappedLastDelRecLastExtent() ) = i;
         }
 
-        capExtent = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext;
+        getDur().writingDiskLoc( capExtent ) = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext;
 
         /* this isn't true if a collection has been renamed...that is ok just used for diagnostics */
         //dassert( theCapExtent()->ns == ns );
 
         theCapExtent()->assertOk();
-        capFirstNewRecord = DiskLoc();
+        getDur().writingDiskLoc( capFirstNewRecord ) = DiskLoc();
     }
 
     DiskLoc NamespaceDetails::__capAlloc( int len ) {
@@ -176,25 +178,25 @@ namespace mongo {
         /* unlink ourself from the deleted list */
         if ( !ret.isNull() ) {
             if ( prev.isNull() )
-                cappedListOfAllDeletedRecords() = ret.drec()->nextDeleted;
+                cappedListOfAllDeletedRecords().writing() = ret.drec()->nextDeleted;
             else
-                prev.drec()->nextDeleted = ret.drec()->nextDeleted;
-            ret.drec()->nextDeleted.setInvalid(); // defensive.
+                prev.drec()->nextDeleted.writing() = ret.drec()->nextDeleted;
+            ret.drec()->nextDeleted.writing().setInvalid(); // defensive.
             assert( ret.drec()->extentOfs < ret.getOfs() );
         }
 
         return ret;
     }
 
-    DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) { 
+    DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) {
         // signal done allocating new extents.
         if ( !cappedLastDelRecLastExtent().isValid() )
-            cappedLastDelRecLastExtent() = DiskLoc();
-        
+            getDur().writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc();
+
         assert( len < 400000000 );
         int passes = 0;
         int maxPasses = ( len / 30 ) + 2; // 30 is about the smallest entry that could go in the oplog
-        if ( maxPasses < 5000 ){
+        if ( maxPasses < 5000 ) {
             // this is for bacwards safety since 5000 was the old value
             maxPasses = 5000;
         }
@@ -208,7 +210,7 @@ namespace mongo {
         theCapExtent()->assertOk();
         DiskLoc firstEmptyExtent;
         while ( 1 ) {
-            if ( nrecords < max ) {
+            if ( stats.nrecords < max ) {
                 loc = __capAlloc( len );
                 if ( !loc.isNull() )
                     break;
@@ -217,8 +219,9 @@ namespace mongo {
             // If on first iteration through extents, don't delete anything.
             if ( !capFirstNewRecord.isValid() ) {
                 advanceCapExtent( ns );
+
                 if ( capExtent != firstExtent )
-                    capFirstNewRecord.setInvalid();
+                    capFirstNewRecord.writing().setInvalid();
                 // else signal done with first iteration through extents.
                 continue;
             }
@@ -247,14 +250,14 @@ namespace mongo {
             compact();
             if( ++passes > maxPasses ) {
                 log() << "passes ns:" << ns << " len:" << len << " maxPasses: " << maxPasses << '\n';
-                log() << "passes max:" << max << " nrecords:" << nrecords << " datasize: " << datasize << endl;
+                log() << "passes max:" << max << " nrecords:" << stats.nrecords << " datasize: " << stats.datasize << endl;
                 massert( 10345 ,  "passes >= maxPasses in capped collection alloc", false );
             }
         }
 
         // Remember first record allocated on this iteration through capExtent.
         if ( capFirstNewRecord.isValid() && capFirstNewRecord.isNull() )
-            capFirstNewRecord = loc;
+            getDur().writingDiskLoc(capFirstNewRecord) = loc;
 
         return loc;
     }
@@ -269,123 +272,179 @@ namespace mongo {
         }
     }
 
-    void NamespaceDetails::cappedDumpDelInfo() { 
+    void NamespaceDetails::cappedDumpDelInfo() {
         cout << "dl[0]: " << deletedList[0].toString() << endl;
-        for( DiskLoc z = deletedList[0]; !z.isNull(); z = z.drec()->nextDeleted ) { 
-            cout << "  drec:" << z.toString() << " dreclen:" << hex << z.drec()->lengthWithHeaders << 
-                " ext:" << z.drec()->myExtent(z)->myLoc.toString() << endl;
+        for( DiskLoc z = deletedList[0]; !z.isNull(); z = z.drec()->nextDeleted ) {
+            cout << "  drec:" << z.toString() << " dreclen:" << hex << z.drec()->lengthWithHeaders <<
+                 " ext:" << z.drec()->myExtent(z)->myLoc.toString() << endl;
         }
         cout << "dl[1]: " << deletedList[1].toString() << endl;
     }
 
-    /* everything from end on, eliminate from the capped collection.
-       @param inclusive if true, deletes end (i.e. closed or open range)
-    */
+    void NamespaceDetails::cappedTruncateLastDelUpdate() {
+        if ( capExtent == firstExtent ) {
+            // Only one extent of the collection is in use, so there
+            // is no deleted record in a previous extent, so nullify
+            // cappedLastDelRecLastExtent().
+            cappedLastDelRecLastExtent().writing() = DiskLoc();
+        }
+        else {
+            // Scan through all deleted records in the collection
+            // until the last deleted record for the extent prior
+            // to the new capExtent is found.  Then set
+            // cappedLastDelRecLastExtent() to that deleted record.
+            DiskLoc i = cappedListOfAllDeletedRecords();
+            for( ;
+                    !i.drec()->nextDeleted.isNull() &&
+                    !inCapExtent( i.drec()->nextDeleted );
+                    i = i.drec()->nextDeleted );
+            // In our capped storage model, every extent must have at least one
+            // deleted record.  Here we check that 'i' is not the last deleted
+            // record.  (We expect that there will be deleted records in the new
+            // capExtent as well.)
+            assert( !i.drec()->nextDeleted.isNull() );
+            cappedLastDelRecLastExtent().writing() = i;
+        }
+    }
+
     void NamespaceDetails::cappedTruncateAfter(const char *ns, DiskLoc end, bool inclusive) {
         DEV assert( this == nsdetails(ns) );
         assert( cappedLastDelRecLastExtent().isValid() );
-        
+
+        // We iteratively remove the newest document until the newest document
+        // is 'end', then we remove 'end' if requested.
         bool foundLast = false;
         while( 1 ) {
             if ( foundLast ) {
+                // 'end' has been found and removed, so break.
                 break;
             }
+            // 'curr' will point to the newest document in the collection.
             DiskLoc curr = theCapExtent()->lastRecord;
             assert( !curr.isNull() );
             if ( curr == end ) {
                 if ( inclusive ) {
+                    // 'end' has been found, so break next iteration.
                     foundLast = true;
-                } else {
+                }
+                else {
+                    // 'end' has been found, so break.
                     break;
                 }
             }
-            
-            uassert( 13415, "emptying the collection is not allowed", nrecords > 1 );
-            
+
+            // TODO The algorithm used in this function cannot generate an
+            // empty collection, but we could call emptyCappedCollection() in
+            // this case instead of asserting.
+            uassert( 13415, "emptying the collection is not allowed", stats.nrecords > 1 );
+
+            // Delete the newest record, and coalesce the new deleted
+            // record with existing deleted records.
+            theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true);
+            compact();
+
+            // This is the case where we have not yet had to remove any
+            // documents to make room for other documents, and we are allocating
+            // documents from free space in fresh extents instead of reusing
+            // space from familiar extents.
             if ( !capLooped() ) {
-                theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true);
-                compact();
+
+                // We just removed the last record from the 'capExtent', and
+                // the 'capExtent' can't be empty, so we set 'capExtent' to
+                // capExtent's prev extent.
                 if ( theCapExtent()->lastRecord.isNull() ) {
                     assert( !theCapExtent()->xprev.isNull() );
-                    capExtent = theCapExtent()->xprev;
+                    // NOTE Because we didn't delete the last document, and
+                    // capLooped() is false, capExtent is not the first extent
+                    // so xprev will be nonnull.
+                    capExtent.writing() = theCapExtent()->xprev;
                     theCapExtent()->assertOk();
-                    if ( capExtent == firstExtent ) {
-                        cappedLastDelRecLastExtent() = DiskLoc();
-                    } else {
-                        // slow - there's no prev ptr for deleted rec
-                        DiskLoc i = cappedListOfAllDeletedRecords();
-                        for( ;
-                            !i.drec()->nextDeleted.isNull() &&
-                            !inCapExtent( i.drec()->nextDeleted );
-                            i = i.drec()->nextDeleted );
-                        assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent
-                        cappedLastDelRecLastExtent() = i;
-                    }
+
+                    // update cappedLastDelRecLastExtent()
+                    cappedTruncateLastDelUpdate();
                 }
                 continue;
             }
 
-            theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true);
-            compact();
-            if ( curr == capFirstNewRecord ) { // invalid, but can compare locations
-                capExtent = ( capExtent == firstExtent ) ? lastExtent : theCapExtent()->xprev;
-                theCapExtent()->assertOk();
-                assert( !theCapExtent()->firstRecord.isNull() );
-                capFirstNewRecord = theCapExtent()->firstRecord;
-                if ( capExtent == firstExtent ) {
-                    cappedLastDelRecLastExtent() = DiskLoc();
-                } else {
-                    // slow - there's no prev ptr for deleted rec
-                    DiskLoc i = cappedListOfAllDeletedRecords();
-                    for( ;
-                        !i.drec()->nextDeleted.isNull() &&
-                        !inCapExtent( i.drec()->nextDeleted );
-                        i = i.drec()->nextDeleted );
-                    assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent
-                    cappedLastDelRecLastExtent() = i;
+            // This is the case where capLooped() is true, and we just deleted
+            // from capExtent, and we just deleted capFirstNewRecord, which was
+            // the last record on the fresh side of capExtent.
+            // NOTE In this comparison, curr and potentially capFirstNewRecord
+            // may point to invalid data, but we can still compare the
+            // references themselves.
+            if ( curr == capFirstNewRecord ) {
+
+                // Set 'capExtent' to the first nonempty extent prior to the
+                // initial capExtent.  There must be such an extent because we
+                // have not deleted the last document in the collection.  It is
+                // possible that all extents other than the capExtent are empty.
+                // In this case we will keep the initial capExtent and specify
+                // that all records contained within are on the fresh rather than
+                // stale side of the extent.
+                DiskLoc newCapExtent = capExtent;
+                do {
+                    // Find the previous extent, looping if necessary.
+                    newCapExtent = ( newCapExtent == firstExtent ) ? lastExtent : newCapExtent.ext()->xprev;
+                    newCapExtent.ext()->assertOk();
                 }
+                while ( newCapExtent.ext()->firstRecord.isNull() );
+                capExtent.writing() = newCapExtent;
+
+                // Place all documents in the new capExtent on the fresh side
+                // of the capExtent by setting capFirstNewRecord to the first
+                // document in the new capExtent.
+                capFirstNewRecord.writing() = theCapExtent()->firstRecord;
+
+                // update cappedLastDelRecLastExtent()
+                cappedTruncateLastDelUpdate();
             }
         }
     }
-    
+
     void NamespaceDetails::emptyCappedCollection( const char *ns ) {
         DEV assert( this == nsdetails(ns) );
         massert( 13424, "collection must be capped", capped );
-        massert( 13425, "background index build in progress", !backgroundIndexBuildInProgress );
+        massert( 13425, "background index build in progress", !indexBuildInProgress );
         massert( 13426, "indexes present", nIndexes == 0 );
 
+        // Clear all references to this namespace.
         ClientCursor::invalidate( ns );
-		NamespaceDetailsTransient::clearForPrefix( ns );
+        NamespaceDetailsTransient::clearForPrefix( ns );
+
+        // Get a writeable reference to 'this' and reset all pertinent
+        // attributes.
+        NamespaceDetails *t = writingWithoutExtra();
+
+        t->cappedLastDelRecLastExtent() = DiskLoc();
+        t->cappedListOfAllDeletedRecords() = DiskLoc();
 
-        cappedLastDelRecLastExtent() = DiskLoc();
-        cappedListOfAllDeletedRecords() = DiskLoc();
-        
         // preserve firstExtent/lastExtent
-        capExtent = firstExtent;
-        datasize = nrecords = 0;
+        t->capExtent = firstExtent;
+        t->stats.datasize = stats.nrecords = 0;
         // lastExtentSize preserve
         // nIndexes preserve 0
         // capped preserve true
         // max preserve
-        paddingFactor = 1.0;
-        flags = 0;
-        capFirstNewRecord = DiskLoc();
-        capFirstNewRecord.setInvalid();
-        cappedLastDelRecLastExtent().setInvalid();
+        t->paddingFactor = 1.0;
+        t->flags = 0;
+        t->capFirstNewRecord = DiskLoc();
+        t->capFirstNewRecord.setInvalid();
+        t->cappedLastDelRecLastExtent().setInvalid();
         // dataFileVersion preserve
         // indexFileVersion preserve
-        multiKeyIndexBits = 0;
-        reservedA = 0;
-        extraOffset = 0;
-        // backgroundIndexBuildInProgress preserve 0
-        memset(reserved, 0, sizeof(reserved));
+        t->multiKeyIndexBits = 0;
+        t->reservedA = 0;
+        t->extraOffset = 0;
+        // indexBuildInProgress preserve 0
+        memset(t->reserved, 0, sizeof(t->reserved));
 
+        // Reset all existing extents and recreate the deleted list.
         for( DiskLoc ext = firstExtent; !ext.isNull(); ext = ext.ext()->xnext ) {
             DiskLoc prev = ext.ext()->xprev;
             DiskLoc next = ext.ext()->xnext;
             DiskLoc empty = ext.ext()->reuse( ns );
-            ext.ext()->xprev = prev;
-            ext.ext()->xnext = next;
+            ext.ext()->xprev.writing() = prev;
+            ext.ext()->xnext.writing() = next;
             addDeletedRec( empty.drec(), empty );
         }
     }
diff --git a/db/client.cpp b/db/client.cpp
index f9653f5..e4fd4b9 100644
--- a/db/client.cpp
+++ b/db/client.cpp
@@ -16,14 +16,14 @@
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-/* Client represents a connection to the database (the server-side) and corresponds 
+/* Client represents a connection to the database (the server-side) and corresponds
    to an open socket (or logical connection if pooling on sockets) from a client.
 */
 
 #include "pch.h"
 #include "db.h"
 #include "client.h"
-#include "curop.h"
+#include "curop-inl.h"
 #include "json.h"
 #include "security.h"
 #include "commands.h"
@@ -40,20 +40,31 @@ namespace mongo {
     set<Client*> Client::clients; // always be in clientsMutex when manipulating this
     boost::thread_specific_ptr<Client> currentClient;
 
-    Client::Client(const char *desc, MessagingPort *p) : 
-      _context(0),
-      _shutdown(false),
-      _desc(desc),
-      _god(0),
-      _lastOp(0), 
-      _mp(p)
-    {
+    /* each thread which does db operations has a Client object in TLS.
+       call this when your thread starts.
+    */
+    Client& Client::initThread(const char *desc, MessagingPort *mp) {
+        assert( currentClient.get() == 0 );
+        Client *c = new Client(desc, mp);
+        currentClient.reset(c);
+        mongo::lastError.initThread();
+        return *c;
+    }
+
+    Client::Client(const char *desc, MessagingPort *p) :
+        _context(0),
+        _shutdown(false),
+        _desc(desc),
+        _god(0),
+        _lastOp(0),
+        _mp(p) {
+        _connectionId = setThreadName(desc);
         _curOp = new CurOp( this );
         scoped_lock bl(clientsMutex);
         clients.insert(this);
     }
 
-    Client::~Client() { 
+    Client::~Client() {
         _god = 0;
 
         if ( _context )
@@ -62,90 +73,33 @@ namespace mongo {
         if ( ! _shutdown ) {
             error() << "Client::shutdown not called: " << _desc << endl;
         }
-        
+
         scoped_lock bl(clientsMutex);
         if ( ! _shutdown )
             clients.erase(this);
         delete _curOp;
     }
-    
-    void Client::_dropns( const string& ns ){
-        Top::global.collectionDropped( ns );
-                    
-        dblock l;
-        Client::Context ctx( ns );
-        if ( ! nsdetails( ns.c_str() ) )
-            return;
-        
-        try {
-            string err;
-            BSONObjBuilder b;
-            dropCollection( ns , err , b );
-        }
-        catch ( ... ){
-            warning() << "error dropping temp collection: " << ns << endl;
-        }
-
-    }
-    
-    void Client::_invalidateDB( const string& db ) {
-        assert( db.find( '.' ) == string::npos );
-
-        set<string>::iterator min = _tempCollections.lower_bound( db + "." );
-        set<string>::iterator max = _tempCollections.lower_bound( db + "|" );
-        
-        _tempCollections.erase( min , max );
-
-    }
-    
-    void Client::invalidateDB(const string& db) {
-        scoped_lock bl(clientsMutex);
-        for ( set<Client*>::iterator i = clients.begin(); i!=clients.end(); i++ ){
-            Client* cli = *i;
-            cli->_invalidateDB(db);
-        }
-    }
 
-    void Client::invalidateNS( const string& ns ){
-        scoped_lock bl(clientsMutex);
-        for ( set<Client*>::iterator i = clients.begin(); i!=clients.end(); i++ ){
-            Client* cli = *i;
-            cli->_tempCollections.erase( ns );
-        }
-    }
-
-
-    void Client::addTempCollection( const string& ns ) { 
-        _tempCollections.insert( ns ); 
-    }
-
-    bool Client::shutdown(){
+    bool Client::shutdown() {
         _shutdown = true;
         if ( inShutdown() )
             return false;
         {
             scoped_lock bl(clientsMutex);
             clients.erase(this);
-        }
-
-        bool didAnything = false;
-        
-        if ( _tempCollections.size() ){
-            didAnything = true;
-            for ( set<string>::iterator i = _tempCollections.begin(); i!=_tempCollections.end(); i++ ){
-                _dropns( *i );
+            if ( isSyncThread() ) {
+                syncThread = 0;
             }
-            _tempCollections.clear();
         }
-        
-        return didAnything;
+
+        return false;
     }
 
-    BSONObj CurOp::_tooBig = fromjson("{\"$msg\":\"query not recording (too large)\"}");
+    BSONObj CachedBSONObj::_tooBig = fromjson("{\"$msg\":\"query not recording (too large)\"}");
     AtomicUInt CurOp::_nextOpNum;
-    
+
     Client::Context::Context( string ns , Database * db, bool doauth )
-        : _client( currentClient.get() ) , _oldContext( _client->_context ) , 
+        : _client( currentClient.get() ) , _oldContext( _client->_context ) ,
           _path( dbpath ) , _lock(0) , _justCreated(false) {
         assert( db && db->isOk() );
         _ns = ns;
@@ -155,20 +109,36 @@ namespace mongo {
             _auth();
     }
 
-    void Client::Context::_finishInit( bool doauth ){
+    Client::Context::Context(const string& ns, string path , mongolock * lock , bool doauth )
+        : _client( currentClient.get() ) , _oldContext( _client->_context ) ,
+          _path( path ) , _lock( lock ) ,
+          _ns( ns ), _db(0) {
+        _finishInit( doauth );
+    }
+
+    /* this version saves the context but doesn't yet set the new one: */
+
+    Client::Context::Context()
+        : _client( currentClient.get() ) , _oldContext( _client->_context ),
+          _path( dbpath ) , _lock(0) , _justCreated(false), _db(0) {
+        _client->_context = this;
+        clear();
+    }
+
+    void Client::Context::_finishInit( bool doauth ) {
         int lockState = dbMutex.getState();
         assert( lockState );
-        
+
         _db = dbHolder.get( _ns , _path );
-        if ( _db ){
+        if ( _db ) {
             _justCreated = false;
         }
-        else if ( dbMutex.getState() > 0 ){
+        else if ( dbMutex.getState() > 0 ) {
             // already in a write lock
             _db = dbHolder.getOrCreate( _ns , _path , _justCreated );
             assert( _db );
         }
-        else if ( dbMutex.getState() < -1 ){
+        else if ( dbMutex.getState() < -1 ) {
             // nested read lock :(
             assert( _lock );
             _lock->releaseAndWriteLock();
@@ -181,50 +151,52 @@ namespace mongo {
             // to do that, we're going to unlock, then get a write lock
             // this is so that if this is the first query and its long doesn't block db
             // we just have to check that the db wasn't closed in the interim where we unlock
-            for ( int x=0; x<2; x++ ){
-                {                     
+            for ( int x=0; x<2; x++ ) {
+                {
                     dbtemprelease unlock;
                     writelock lk( _ns );
                     dbHolder.getOrCreate( _ns , _path , _justCreated );
                 }
-                
+
                 _db = dbHolder.get( _ns , _path );
-                
+
                 if ( _db )
                     break;
-                
+
                 log() << "db was closed on us right after we opened it: " << _ns << endl;
             }
-            
+
             uassert( 13005 , "can't create db, keeps getting closed" , _db );
         }
-        
-        _client->_context = this;
-        _client->_curOp->enter( this );
-        if ( doauth )
-            _auth( lockState );
 
-        switch ( _client->_curOp->getOp() ){
+        switch ( _client->_curOp->getOp() ) {
         case dbGetMore: // getMore's are special and should be handled else where
         case dbUpdate: // update & delete check shard version in instance.cpp, so don't check here as well
-        case dbDelete: 
+        case dbDelete:
             break;
         default: {
             string errmsg;
-            if ( ! shardVersionOk( _ns , lockState > 0 , errmsg ) ){
-                msgasserted( StaleConfigInContextCode , (string)"[" + _ns + "] shard version not ok in Client::Context: " + errmsg );
+            if ( ! shardVersionOk( _ns , lockState > 0 , errmsg ) ) {
+                ostringstream os;
+                os << "[" << _ns << "] shard version not ok in Client::Context: " << errmsg;
+                msgassertedNoTrace( StaleConfigInContextCode , os.str().c_str() );
             }
         }
         }
+
+        _client->_context = this;
+        _client->_curOp->enter( this );
+        if ( doauth )
+            _auth( lockState );
     }
-    
-    void Client::Context::_auth( int lockState ){
+
+    void Client::Context::_auth( int lockState ) {
         if ( _client->_ai.isAuthorizedForLock( _db->name , lockState ) )
             return;
 
         // before we assert, do a little cleanup
         _client->_context = _oldContext; // note: _oldContext may be null
-        
+
         stringstream ss;
         ss << "unauthorized db:" << _db->name << " lock type:" << lockState << " client:" << _client->clientAddress();
         uasserted( 10057 , ss.str() );
@@ -236,9 +208,35 @@ namespace mongo {
         _client->_context = _oldContext; // note: _oldContext may be null
     }
 
-    string Client::clientAddress() const {
+    bool Client::Context::inDB( const string& db , const string& path ) const {
+        if ( _path != path )
+            return false;
+
+        if ( db == _ns )
+            return true;
+
+        string::size_type idx = _ns.find( db );
+        if ( idx != 0 )
+            return false;
+
+        return  _ns[db.size()] == '.';
+    }
+
+    void Client::appendLastOp( BSONObjBuilder& b ) const {
+        if( theReplSet ) {
+            b.append("lastOp" , (long long) _lastOp);
+        }
+        else {
+            OpTime lo(_lastOp);
+            if ( ! lo.isNull() )
+                b.appendTimestamp( "lastOp" , lo.asDate() );
+        }
+    }
+
+
+    string Client::clientAddress(bool includePort) const {
         if( _curOp )
-            return _curOp->getRemoteString(false);
+            return _curOp->getRemoteString(includePort);
         return "";
     }
 
@@ -249,63 +247,75 @@ namespace mongo {
         return ss.str();
     }
 
-    string sayClientState(){
+    string sayClientState() {
         Client* c = currentClient.get();
         if ( !c )
             return "no client";
         return c->toString();
     }
-    
-    void curopWaitingForLock( int type ){
+
+    Client* curopWaitingForLock( int type ) {
         Client * c = currentClient.get();
         assert( c );
         CurOp * co = c->curop();
-        if ( co ){
+        if ( co ) {
             co->waitingForLock( type );
         }
+        return c;
     }
-    void curopGotLock(){
-        Client * c = currentClient.get();
+    void curopGotLock(Client *c) {
         assert(c);
         CurOp * co = c->curop();
-        if ( co ){
+        if ( co )
             co->gotLock();
-        }
     }
 
-    CurOp::~CurOp(){
-        if ( _wrapped ){
-            scoped_lock bl(Client::clientsMutex);
-            _client->_curOp = _wrapped;
+    void KillCurrentOp::interruptJs( AtomicUInt *op ) {
+        if ( !globalScriptEngine )
+            return;
+        if ( !op ) {
+            globalScriptEngine->interruptAll();
         }
-        
-        _client = 0;
+        else {
+            globalScriptEngine->interrupt( *op );
+        }
+    }
+
+    void KillCurrentOp::killAll() {
+        _globalKill = true;
+        interruptJs( 0 );
     }
 
-    BSONObj CurOp::query( bool threadSafe ) {
-        if( querySize() == 1 ) { 
-            return _tooBig;
+    void KillCurrentOp::kill(AtomicUInt i) {
+        bool found = false;
+        {
+            scoped_lock l( Client::clientsMutex );
+            for( set< Client* >::const_iterator j = Client::clients.begin(); !found && j != Client::clients.end(); ++j ) {
+                for( CurOp *k = ( *j )->curop(); !found && k; k = k->parent() ) {
+                    if ( k->opNum() == i ) {
+                        k->kill();
+                        for( CurOp *l = ( *j )->curop(); l != k; l = l->parent() ) {
+                            l->kill();
+                        }
+                        found = true;
+                    }
+                }
+            }
         }
-        
-        if ( ! threadSafe ){
-            BSONObj o(_queryBuf);
-            return o;
+        if ( found ) {
+            interruptJs( &i );
         }
-
-        int size = querySize();        
-        int before = checksum( _queryBuf , size );
-        BSONObj a(_queryBuf);
-        BSONObj b = a.copy();
-        int after = checksum( _queryBuf , size );
-        
-        if ( before == after )
-            return b;
-        
-        return BSON( "msg" << "query changed while capturing" );
     }
 
+    CurOp::~CurOp() {
+        if ( _wrapped ) {
+            scoped_lock bl(Client::clientsMutex);
+            _client->_curOp = _wrapped;
+        }
+        _client = 0;
+    }
 
-    BSONObj CurOp::infoNoauth( int attempt ) {
+    BSONObj CurOp::infoNoauth() {
         BSONObjBuilder b;
         b.append("opid", _opNum);
         bool a = _active && _start;
@@ -313,40 +323,16 @@ namespace mongo {
         if ( _lockType )
             b.append("lockType" , _lockType > 0 ? "write" : "read"  );
         b.append("waitingForLock" , _waitingForLock );
-        
-        if( a ){
+
+        if( a ) {
             b.append("secs_running", elapsedSeconds() );
         }
-        
+
         b.append( "op" , opToString( _op ) );
-        
+
         b.append("ns", _ns);
-        
-        {
-            int size = querySize();
-            if ( size == 0 ){
-                // do nothing
-            }
-            else if ( size == 1 ){
-                b.append( "query" , _tooBig );
-            }
-            else if ( attempt > 2 ){
-                b.append( "query" , BSON( "err" << "can't get a clean object" ) );
-                log( LL_WARNING ) << "CurOp changing too much to get reading" << endl;
-                         
-            }
-            else {
-                int before = checksum( _queryBuf , size );
-                b.appendObject( "query" , _queryBuf , size );
-                int after = checksum( _queryBuf , size );
-                
-                if ( after != before ){
-                    // this means something changed
-                    // going to retry
-                    return infoNoauth( attempt + 1 );
-                }
-            }
-        }
+
+        _query.append( b , "query" );
 
         // b.append("inLock",  ??
         stringstream clientStr;
@@ -355,9 +341,9 @@ namespace mongo {
 
         if ( _client )
             b.append( "desc" , _client->desc() );
-        
-        if ( ! _message.empty() ){
-            if ( _progressMeter.isActive() ){
+
+        if ( ! _message.empty() ) {
+            if ( _progressMeter.isActive() ) {
                 StringBuilder buf(128);
                 buf << _message.toString() << " " << _progressMeter.toString();
                 b.append( "msg" , buf.str() );
@@ -370,7 +356,7 @@ namespace mongo {
         return b.obj();
     }
 
-    void Client::gotHandshake( const BSONObj& o ){
+    void Client::gotHandshake( const BSONObj& o ) {
         BSONObjIterator i(o);
 
         {
@@ -378,7 +364,7 @@ namespace mongo {
             assert( id.type() );
             _remoteId = id.wrap( "_id" );
         }
-        
+
         BSONObjBuilder b;
         while ( i.more() )
             b.append( i.next() );
@@ -388,31 +374,31 @@ namespace mongo {
     class HandshakeCmd : public Command {
     public:
         void help(stringstream& h) const { h << "internal"; }
-        HandshakeCmd() : Command( "handshake" ){}
-        virtual LockType locktype() const { return NONE; } 
+        HandshakeCmd() : Command( "handshake" ) {}
+        virtual LockType locktype() const { return NONE; }
         virtual bool slaveOk() const { return true; }
         virtual bool adminOnly() const { return false; }
         virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             Client& c = cc();
             c.gotHandshake( cmdObj );
             return 1;
-        }        
+        }
 
     } handshakeCmd;
 
     class ClientListPlugin : public WebStatusPlugin {
     public:
-        ClientListPlugin() : WebStatusPlugin( "clients" , 20 ){}
-        virtual void init(){}
-        
-        virtual void run( stringstream& ss ){
+        ClientListPlugin() : WebStatusPlugin( "clients" , 20 ) {}
+        virtual void init() {}
+
+        virtual void run( stringstream& ss ) {
             using namespace mongoutils::html;
 
             ss << "\n<table border=1 cellpadding=2 cellspacing=0>";
             ss << "<tr align='left'>"
                << th( a("", "Connections to the database, both internal and external.", "Client") )
                << th( a("http://www.mongodb.org/display/DOCS/Viewing+and+Terminating+Current+Operation", "", "OpId") )
-               << "<th>Active</th>" 
+               << "<th>Active</th>"
                << "<th>LockType</th>"
                << "<th>Waiting</th>"
                << "<th>SecsRunning</th>"
@@ -426,11 +412,11 @@ namespace mongo {
                << "</tr>\n";
             {
                 scoped_lock bl(Client::clientsMutex);
-                for( set<Client*>::iterator i = Client::clients.begin(); i != Client::clients.end(); i++ ) { 
+                for( set<Client*>::iterator i = Client::clients.begin(); i != Client::clients.end(); i++ ) {
                     Client *c = *i;
                     CurOp& co = *(c->curop());
                     ss << "<tr><td>" << c->desc() << "</td>";
-                    
+
                     tablecell( ss , co.opNum() );
                     tablecell( ss , co.active() );
                     {
@@ -447,8 +433,9 @@ namespace mongo {
                         tablecell( ss , "" );
                     tablecell( ss , co.getOp() );
                     tablecell( ss , co.getNS() );
-                    if ( co.haveQuery() )
-                        tablecell( ss , co.query( true ) );
+                    if ( co.haveQuery() ) {
+                        tablecell( ss , co.query() );
+                    }
                     else
                         tablecell( ss , "" );
                     tablecell( ss , co.getRemoteString() );
@@ -463,18 +450,18 @@ namespace mongo {
             ss << "</table>\n";
 
         }
-        
+
     } clientListPlugin;
 
-    int Client::recommendedYieldMicros( int * writers , int * readers ){
+    int Client::recommendedYieldMicros( int * writers , int * readers ) {
         int num = 0;
         int w = 0;
         int r = 0;
         {
             scoped_lock bl(clientsMutex);
-            for ( set<Client*>::iterator i=clients.begin(); i!=clients.end(); ++i ){
+            for ( set<Client*>::iterator i=clients.begin(); i!=clients.end(); ++i ) {
                 Client* c = *i;
-                if ( c->curop()->isWaitingForLock() ){
+                if ( c->curop()->isWaitingForLock() ) {
                     num++;
                     if ( c->curop()->getLockType() > 0 )
                         w++;
@@ -483,15 +470,44 @@ namespace mongo {
                 }
             }
         }
-        
+
         if ( writers )
             *writers = w;
         if ( readers )
             *readers = r;
 
-        if ( num > 50 )
-            num = 50;
+        int time = r * 100;
+        time += w * 500;
+
+        time = min( time , 1000000 );
+
+        // there has been a kill request for this op - we should yield to allow the op to stop
+        // This function returns empty string if we aren't interrupted
+        if ( killCurrentOp.checkForInterruptNoAssert( false )[0] != '\0' ) {
+            return 100;
+        }
+
+        return time;
+    }
+
+    int Client::getActiveClientCount( int& writers, int& readers ) {
+        writers = 0;
+        readers = 0;
+
+        scoped_lock bl(clientsMutex);
+        for ( set<Client*>::iterator i=clients.begin(); i!=clients.end(); ++i ) {
+            Client* c = *i;
+            if ( ! c->curop()->active() )
+                continue;
+
+            int l = c->curop()->getLockType();
+            if ( l > 0 )
+                writers++;
+            else if ( l < 0 )
+                readers++;
+
+        }
 
-        return num * 100;
+        return writers + readers;
     }
 }
diff --git a/db/client.h b/db/client.h
index d0600e3..4e8589e 100644
--- a/db/client.h
+++ b/db/client.h
@@ -16,7 +16,7 @@
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-/* Client represents a connection to the database (the server-side) and corresponds 
+/* Client represents a connection to the database (the server-side) and corresponds
    to an open socket (or logical connection if pooling on sockets) from a client.
 
    todo: switch to asio...this will fit nicely with that.
@@ -26,11 +26,11 @@
 
 #include "../pch.h"
 #include "security.h"
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "lasterror.h"
 #include "stats/top.h"
 
-namespace mongo { 
+namespace mongo {
 
     extern class ReplSet *theReplSet;
     class AuthenticationInfo;
@@ -42,18 +42,83 @@ namespace mongo {
 
     extern boost::thread_specific_ptr<Client> currentClient;
 
-    class Client : boost::noncopyable { 
+    typedef long long ConnectionId;
+
+    class Client : boost::noncopyable {
     public:
+        class Context;
+
+        static mongo::mutex clientsMutex;
+        static set<Client*> clients; // always be in clientsMutex when manipulating this
+        static int recommendedYieldMicros( int * writers = 0 , int * readers = 0 );
+        static int getActiveClientCount( int& writers , int& readers );
+
         static Client *syncThread;
-        void iAmSyncThread() { 
+
+
+        /* each thread which does db operations has a Client object in TLS.
+           call this when your thread starts.
+        */
+        static Client& initThread(const char *desc, MessagingPort *mp = 0);
+
+        /*
+           this has to be called as the client goes away, but before thread termination
+           @return true if anything was done
+         */
+        bool shutdown();
+
+
+        ~Client();
+
+        void iAmSyncThread() {
             wassert( syncThread == 0 );
-            syncThread = this; 
+            syncThread = this;
         }
         bool isSyncThread() const { return this == syncThread; } // true if this client is the replication secondary pull thread
 
-        static mongo::mutex clientsMutex;
-        static set<Client*> clients; // always be in clientsMutex when manipulating this
-        static int recommendedYieldMicros( int * writers = 0 , int * readers = 0 );
+
+        string clientAddress(bool includePort=false) const;
+        AuthenticationInfo * getAuthenticationInfo() { return &_ai; }
+        bool isAdmin() { return _ai.isAuthorized( "admin" ); }
+        CurOp* curop() const { return _curOp; }
+        Context* getContext() const { return _context; }
+        Database* database() const {  return _context ? _context->db() : 0; }
+        const char *ns() const { return _context->ns(); }
+        const char *desc() const { return _desc; }
+        void setLastOp( ReplTime op ) { _lastOp = op; }
+        ReplTime getLastOp() const { return _lastOp; }
+
+        /* report what the last operation was.  used by getlasterror */
+        void appendLastOp( BSONObjBuilder& b ) const;
+
+        bool isGod() const { return _god; } /* this is for map/reduce writes */
+        string toString() const;
+        void gotHandshake( const BSONObj& o );
+        BSONObj getRemoteID() const { return _remoteId; }
+        BSONObj getHandshake() const { return _handshake; }
+
+        MessagingPort * port() const { return _mp; }
+
+        ConnectionId getConnectionId() const { return _connectionId; }
+
+    private:
+        ConnectionId _connectionId; // > 0 for things "conn", 0 otherwise
+        CurOp * _curOp;
+        Context * _context;
+        bool _shutdown;
+        const char *_desc;
+        bool _god;
+        AuthenticationInfo _ai;
+        ReplTime _lastOp;
+        BSONObj _handshake;
+        BSONObj _remoteId;
+        MessagingPort * const _mp;
+
+        Client(const char *desc, MessagingPort *p = 0);
+
+        friend class CurOp;
+
+    public:
 
         /* set _god=true temporarily, safely */
         class GodScope {
@@ -63,201 +128,99 @@ namespace mongo {
             ~GodScope();
         };
 
+
         /* Set database we want to use, then, restores when we finish (are out of scope)
            Note this is also helpful if an exception happens as the state if fixed up.
         */
-        class Context : boost::noncopyable{
-            Client * _client;
-            Context * _oldContext;
-            
-            string _path;
-            mongolock * _lock;
-            bool _justCreated;
-
-            string _ns;
-            Database * _db;
-
+        class Context : boost::noncopyable {
+        public:
             /**
-             * at this point _client, _oldContext and _ns have to be set
-             * _db should not have been touched
-             * this will set _db and create if needed
-             * will also set _client->_context to this
+             * this is the main constructor
+             * use this unless there is a good reason not to
              */
-            void _finishInit( bool doauth=true);
-            
-            void _auth( int lockState = dbMutex.getState() );
-        public:
-            Context(const string& ns, string path=dbpath, mongolock * lock = 0 , bool doauth=true ) 
-                : _client( currentClient.get() ) , _oldContext( _client->_context ) , 
-                  _path( path ) , _lock( lock ) , 
-                  _ns( ns ), _db(0){
-                _finishInit( doauth );
-            }
-            
+            Context(const string& ns, string path=dbpath, mongolock * lock = 0 , bool doauth=true );
+
             /* this version saves the context but doesn't yet set the new one: */
-            
-            Context() 
-                : _client( currentClient.get() ) , _oldContext( _client->_context ), 
-                  _path( dbpath ) , _lock(0) , _justCreated(false), _db(0){
-                _client->_context = this;
-                clear();
-            }
-            
+            Context();
+
             /**
              * if you are doing this after allowing a write there could be a race condition
              * if someone closes that db.  this checks that the DB is still valid
              */
             Context( string ns , Database * db, bool doauth=true );
-            
+
             ~Context();
 
-            Client* getClient() const { return _client; }            
+            Client* getClient() const { return _client; }
             Database* db() const { return _db; }
-            const char * ns() const { return _ns.c_str(); }            
+            const char * ns() const { return _ns.c_str(); }
+
+            /** @return if the db was created by this Context */
             bool justCreated() const { return _justCreated; }
 
-            bool equals( const string& ns , const string& path=dbpath ) const {
-                return _ns == ns && _path == path;
-            }
+            bool equals( const string& ns , const string& path=dbpath ) const { return _ns == ns && _path == path; }
 
-            bool inDB( const string& db , const string& path=dbpath ) const {
-                if ( _path != path )
-                    return false;
-                
-                if ( db == _ns )
-                    return true;
-
-                string::size_type idx = _ns.find( db );
-                if ( idx != 0 )
-                    return false;
-                
-                return  _ns[db.size()] == '.';
-            }
+            /**
+             * @return true iff the current Context is using db/path
+             */
+            bool inDB( const string& db , const string& path=dbpath ) const;
 
-            void clear(){
-                _ns = "";
-                _db = 0;
-            }
+            void clear() { _ns = ""; _db = 0; }
 
             /**
              * call before unlocking, so clear any non-thread safe state
              */
-            void unlocked(){
-                _db = 0;
-            }
+            void unlocked() { _db = 0; }
 
             /**
              * call after going back into the lock, will re-establish non-thread safe stuff
              */
-            void relocked(){
-                _finishInit();
-            }
+            void relocked() { _finishInit(); }
 
             friend class CurOp;
-        }; // class Client::Context
-        
-    private:
-        void _dropns( const string& ns );
-
-        CurOp * _curOp;
-        Context * _context;
-        bool _shutdown;
-        set<string> _tempCollections;
-        const char *_desc;
-        bool _god;
-        AuthenticationInfo _ai;
-        ReplTime _lastOp;
-        BSONObj _handshake;
-        BSONObj _remoteId;
-
-    public:
-        MessagingPort * const _mp;
 
-        string clientAddress() const;
-        AuthenticationInfo * getAuthenticationInfo(){ return &_ai; }
-        bool isAdmin() { return _ai.isAuthorized( "admin" ); }
-        CurOp* curop() { return _curOp; }        
-        Context* getContext(){ return _context; }
-        Database* database() {  return _context ? _context->db() : 0; }
-        const char *ns() const { return _context->ns(); }
-        const char *desc() const { return _desc; }
-        
-        Client(const char *desc, MessagingPort *p = 0);
-        ~Client();
+        private:
+            /**
+             * at this point _client, _oldContext and _ns have to be set
+             * _db should not have been touched
+             * this will set _db and create if needed
+             * will also set _client->_context to this
+             */
+            void _finishInit( bool doauth=true);
 
-        void addTempCollection( const string& ns );
-        
-        void _invalidateDB(const string& db);
-        static void invalidateDB(const string& db);
-        static void invalidateNS( const string& ns );
+            void _auth( int lockState = dbMutex.getState() );
 
-        void setLastOp( ReplTime op ) { _lastOp = op; }
-        ReplTime getLastOp() const { return _lastOp; }
+            Client * _client;
+            Context * _oldContext;
 
-        /* report what the last operation was.  used by getlasterror */
-        void appendLastOp( BSONObjBuilder& b ) {
-            if( theReplSet ) { 
-                b.append("lastOp" , (long long) _lastOp);
-            }
-            else {
-                OpTime lo(_lastOp);
-                if ( ! lo.isNull() )
-                    b.appendTimestamp( "lastOp" , lo.asDate() );
-            }
-        }
+            string _path;
+            mongolock * _lock;
+            bool _justCreated;
 
-        /* each thread which does db operations has a Client object in TLS.  
-           call this when your thread starts. 
-        */
-        static Client& initThread(const char *desc, MessagingPort *mp = 0);
+            string _ns;
+            Database * _db;
 
-        /* 
-           this has to be called as the client goes away, but before thread termination
-           @return true if anything was done
-         */
-        bool shutdown();
-        
-        /* this is for map/reduce writes */
-        bool isGod() const { return _god; }
+        }; // class Client::Context
 
-        friend class CurOp;
 
-        string toString() const;
-        void gotHandshake( const BSONObj& o );
-        BSONObj getRemoteID() const { return _remoteId; }
-        BSONObj getHandshake() const { return _handshake; }
     };
-    
+
     /** get the Client object for this thread. */
-    inline Client& cc() { 
+    inline Client& cc() {
         Client * c = currentClient.get();
         assert( c );
         return *c;
     }
 
-    /* each thread which does db operations has a Client object in TLS.  
-       call this when your thread starts. 
-    */
-    inline Client& Client::initThread(const char *desc, MessagingPort *mp) {
-        setThreadName(desc);
-        assert( currentClient.get() == 0 );
-        Client *c = new Client(desc, mp);
-        currentClient.reset(c);
-        mongo::lastError.initThread();
-        return *c;
-    }
-
-    inline Client::GodScope::GodScope(){
+    inline Client::GodScope::GodScope() {
         _prev = cc()._god;
         cc()._god = true;
     }
 
-    inline Client::GodScope::~GodScope(){
-        cc()._god = _prev;
-    }
+    inline Client::GodScope::~GodScope() { cc()._god = _prev; }
 
-	/* this unlocks, does NOT upgrade. that works for our current usage */
-    inline void mongolock::releaseAndWriteLock() { 
+    /* this unlocks, does NOT upgrade. that works for our current usage */
+    inline void mongolock::releaseAndWriteLock() {
         if( !_writelock ) {
 
 #if BOOST_VERSION >= 103500
@@ -278,6 +241,6 @@ namespace mongo {
     }
 
     string sayClientState();
-  
+
     inline bool haveClient() { return currentClient.get() > 0; }
 };
diff --git a/db/clientcursor.cpp b/db/clientcursor.cpp
index 23ef529..bc09457 100644
--- a/db/clientcursor.cpp
+++ b/db/clientcursor.cpp
@@ -32,18 +32,18 @@
 
 namespace mongo {
 
-    typedef multimap<DiskLoc, ClientCursor*> CCByLoc;
-
     CCById ClientCursor::clientCursorsById;
     boost::recursive_mutex ClientCursor::ccmutex;
     long long ClientCursor::numberTimedOut = 0;
 
-    /*static*/ void ClientCursor::assertNoCursors() { 
+    void aboutToDeleteForSharding( const Database* db , const DiskLoc& dl ); // from s/d_logic.h
+
+    /*static*/ void ClientCursor::assertNoCursors() {
         recursive_scoped_lock lock(ccmutex);
-        if( clientCursorsById.size() ) { 
+        if( clientCursorsById.size() ) {
             log() << "ERROR clientcursors exist but should not at this point" << endl;
             ClientCursor *cc = clientCursorsById.begin()->second;
-            log() << "first one: " << cc->cursorid << ' ' << cc->ns << endl;
+            log() << "first one: " << cc->_cursorid << ' ' << cc->_ns << endl;
             clientCursorsById.clear();
             assert(false);
         }
@@ -51,18 +51,19 @@ namespace mongo {
 
 
     void ClientCursor::setLastLoc_inlock(DiskLoc L) {
+        assert( _pos != -2 ); // defensive - see ~ClientCursor
+
         if ( L == _lastLoc )
             return;
 
         CCByLoc& bl = byLoc();
+
         if ( !_lastLoc.isNull() ) {
-            CCByLoc::iterator i = kv_find(bl, _lastLoc, this);
-            if ( i != bl.end() )
-                bl.erase(i);
+            bl.erase( ByLocKey( _lastLoc, _cursorid ) );
         }
 
         if ( !L.isNull() )
-            bl.insert( make_pair(L, this) );
+            bl[ByLocKey(L,_cursorid)] = this;
         _lastLoc = L;
     }
 
@@ -74,8 +75,8 @@ namespace mongo {
 
     /* todo: this implementation is incomplete.  we use it as a prefix for dropDatabase, which
              works fine as the prefix will end with '.'.  however, when used with drop and
-    		 dropIndexes, this could take out cursors that belong to something else -- if you
-    		 drop "foo", currently, this will kill cursors for "foobar".
+             dropIndexes, this could take out cursors that belong to something else -- if you
+             drop "foo", currently, this will kill cursors for "foobar".
     */
     void ClientCursor::invalidate(const char *nsPrefix) {
         vector<ClientCursor*> toDelete;
@@ -84,6 +85,7 @@ namespace mongo {
         assert( len > 0 && strchr(nsPrefix, '.') );
 
         {
+            //cout << "\nTEMP invalidate " << nsPrefix << endl;
             recursive_scoped_lock lock(ccmutex);
 
             Database *db = cc().database();
@@ -92,18 +94,18 @@ namespace mongo {
 
             for( CCById::iterator i = clientCursorsById.begin(); i != clientCursorsById.end(); ++i ) {
                 ClientCursor *cc = i->second;
-                if( cc->_db != db ) 
+                if( cc->_db != db )
                     continue;
-                if ( strncmp(nsPrefix, cc->ns.c_str(), len) == 0 ) {
+                if ( strncmp(nsPrefix, cc->_ns.c_str(), len) == 0 ) {
                     toDelete.push_back(i->second);
                 }
             }
 
             /*
             note : we can't iterate byloc because clientcursors may exist with a loc of null in which case
-                   they are not in the map.  perhaps they should not exist though in the future?  something to 
+                   they are not in the map.  perhaps they should not exist though in the future?  something to
                    change???
-                   
+
             CCByLoc& bl = db->ccByLoc;
             for ( CCByLoc::iterator i = bl.begin(); i != bl.end(); ++i ) {
                 ClientCursor *cc = i->second;
@@ -115,10 +117,16 @@ namespace mongo {
 
             for ( vector<ClientCursor*>::iterator i = toDelete.begin(); i != toDelete.end(); ++i )
                 delete (*i);
+
+            /*cout << "TEMP after invalidate " << endl;
+            for( auto i = clientCursorsById.begin(); i != clientCursorsById.end(); ++i ) {
+                cout << "  " << i->second->ns << endl;
+            }
+            cout << "TEMP after invalidate done" << endl;*/
         }
     }
 
-    bool ClientCursor::shouldTimeout( unsigned millis ){
+    bool ClientCursor::shouldTimeout( unsigned millis ) {
         _idleAgeMillis += millis;
         return _idleAgeMillis > 600000 && _pinValue == 0;
     }
@@ -130,9 +138,9 @@ namespace mongo {
         for ( CCById::iterator i = clientCursorsById.begin(); i != clientCursorsById.end();  ) {
             CCById::iterator j = i;
             i++;
-            if( j->second->shouldTimeout( millis ) ){
+            if( j->second->shouldTimeout( millis ) ) {
                 numberTimedOut++;
-                log(1) << "killing old cursor " << j->second->cursorid << ' ' << j->second->ns 
+                log(1) << "killing old cursor " << j->second->_cursorid << ' ' << j->second->_ns
                        << " idle:" << j->second->idleTime() << "ms\n";
                 delete j->second;
             }
@@ -150,10 +158,10 @@ namespace mongo {
             log() << "perf warning: byLoc.size=" << bl.size() << " in aboutToDeleteBucket\n";
         }
         for ( CCByLoc::iterator i = bl.begin(); i != bl.end(); i++ )
-            i->second->c->aboutToDeleteBucket(b);
+            i->second->_c->aboutToDeleteBucket(b);
     }
     void aboutToDeleteBucket(const DiskLoc& b) {
-        ClientCursor::informAboutToDeleteBucket(b); 
+        ClientCursor::informAboutToDeleteBucket(b);
     }
 
     /* must call this on a delete so we clean up the cursors. */
@@ -162,9 +170,12 @@ namespace mongo {
 
         Database *db = cc().database();
         assert(db);
+
+        aboutToDeleteForSharding( db , dl );
+
         CCByLoc& bl = db->ccByLoc;
-        CCByLoc::iterator j = bl.lower_bound(dl);
-        CCByLoc::iterator stop = bl.upper_bound(dl);
+        CCByLoc::iterator j = bl.lower_bound(ByLocKey::min(dl));
+        CCByLoc::iterator stop = bl.upper_bound(ByLocKey::max(dl));
         if ( j == stop )
             return;
 
@@ -172,26 +183,45 @@ namespace mongo {
 
         while ( 1 ) {
             toAdvance.push_back(j->second);
-            DEV assert( j->first == dl );
+            DEV assert( j->first.loc == dl );
             ++j;
             if ( j == stop )
                 break;
         }
 
-        wassert( toAdvance.size() < 5000 );
-        
-        for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ){
+        if( toAdvance.size() >= 3000 ) {
+            log() << "perf warning MPW101: " << toAdvance.size() << " cursors for one diskloc "
+                  << dl.toString()
+                  << ' ' << toAdvance[1000]->_ns
+                  << ' ' << toAdvance[2000]->_ns
+                  << ' ' << toAdvance[1000]->_pinValue
+                  << ' ' << toAdvance[2000]->_pinValue
+                  << ' ' << toAdvance[1000]->_pos
+                  << ' ' << toAdvance[2000]->_pos
+                  << ' ' << toAdvance[1000]->_idleAgeMillis
+                  << ' ' << toAdvance[2000]->_idleAgeMillis
+                  << ' ' << toAdvance[1000]->_doingDeletes
+                  << ' ' << toAdvance[2000]->_doingDeletes
+                  << endl;
+            //wassert( toAdvance.size() < 5000 );
+        }
+
+        for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ) {
             ClientCursor* cc = *i;
             wassert(cc->_db == db);
-            
+
             if ( cc->_doingDeletes ) continue;
 
-            Cursor *c = cc->c.get();
-            if ( c->capped() ){
+            Cursor *c = cc->_c.get();
+            if ( c->capped() ) {
+                /* note we cannot advance here. if this condition occurs, writes to the oplog
+                   have "caught" the reader.  skipping ahead, the reader would miss postentially
+                   important data.
+                   */
                 delete cc;
                 continue;
             }
-            
+
             c->checkLocation();
             DiskLoc tmp1 = c->refLoc();
             if ( tmp1 != dl ) {
@@ -213,53 +243,131 @@ namespace mongo {
     }
     void aboutToDelete(const DiskLoc& dl) { ClientCursor::aboutToDelete(dl); }
 
+    ClientCursor::ClientCursor(int queryOptions, const shared_ptr<Cursor>& c, const string& ns, BSONObj query ) :
+        _ns(ns), _db( cc().database() ),
+        _c(c), _pos(0),
+        _query(query),  _queryOptions(queryOptions),
+        _idleAgeMillis(0), _pinValue(0),
+        _doingDeletes(false), _yieldSometimesTracker(128,10) {
+        assert( _db );
+        assert( str::startsWith(_ns, _db->name) );
+        if( queryOptions & QueryOption_NoCursorTimeout )
+            noTimeout();
+        recursive_scoped_lock lock(ccmutex);
+        _cursorid = allocCursorId_inlock();
+        clientCursorsById.insert( make_pair(_cursorid, this) );
+
+        if ( ! _c->modifiedKeys() ) {
+            // store index information so we can decide if we can
+            // get something out of the index key rather than full object
+
+            int x = 0;
+            BSONObjIterator i( _c->indexKeyPattern() );
+            while ( i.more() ) {
+                BSONElement e = i.next();
+                if ( e.isNumber() ) {
+                    // only want basic index fields, not "2d" etc
+                    _indexedFields[e.fieldName()] = x;
+                }
+                x++;
+            }
+        }
+
+    }
+
+
     ClientCursor::~ClientCursor() {
-        assert( pos != -2 );
+        assert( _pos != -2 );
 
         {
             recursive_scoped_lock lock(ccmutex);
             setLastLoc_inlock( DiskLoc() ); // removes us from bylocation multimap
-            clientCursorsById.erase(cursorid);
+            clientCursorsById.erase(_cursorid);
 
             // defensive:
-            (CursorId&) cursorid = -1;
-            pos = -2;
+            (CursorId&)_cursorid = -1;
+            _pos = -2;
+        }
+    }
+
+    bool ClientCursor::getFieldsDotted( const string& name, BSONElementSet &ret ) {
+
+        map<string,int>::const_iterator i = _indexedFields.find( name );
+        if ( i == _indexedFields.end() ) {
+            current().getFieldsDotted( name , ret );
+            return false;
+        }
+
+        int x = i->second;
+
+        BSONObjIterator it( currKey() );
+        while ( x && it.more() ) {
+            it.next();
+            x--;
         }
+        assert( x == 0 );
+        ret.insert( it.next() );
+        return true;
+    }
+
+    BSONElement ClientCursor::getFieldDotted( const string& name , bool * fromKey ) {
+
+        map<string,int>::const_iterator i = _indexedFields.find( name );
+        if ( i == _indexedFields.end() ) {
+            if ( fromKey )
+                *fromKey = false;
+            return current().getFieldDotted( name );
+        }
+        
+        int x = i->second;
+
+        BSONObjIterator it( currKey() );
+        while ( x && it.more() ) {
+            it.next();
+            x--;
+        }
+        assert( x == 0 );
+
+        if ( fromKey )
+            *fromKey = true;
+        return it.next();
     }
 
+
     /* call when cursor's location changes so that we can update the
        cursorsbylocation map.  if you are locked and internally iterating, only
        need to call when you are ready to "unlock".
     */
     void ClientCursor::updateLocation() {
-        assert( cursorid );
+        assert( _cursorid );
         _idleAgeMillis = 0;
-        DiskLoc cl = c->refLoc();
+        DiskLoc cl = _c->refLoc();
         if ( lastLoc() == cl ) {
             //log() << "info: lastloc==curloc " << ns << '\n';
-        } else {
+        }
+        else {
             recursive_scoped_lock lock(ccmutex);
             setLastLoc_inlock(cl);
         }
         // may be necessary for MultiCursor even when cl hasn't changed
-        c->noteLocation();
+        _c->noteLocation();
     }
-    
+
     int ClientCursor::yieldSuggest() {
         int writers = 0;
         int readers = 0;
-        
+
         int micros = Client::recommendedYieldMicros( &writers , &readers );
-        
-        if ( micros > 0 && writers == 0 && dbMutex.getState() <= 0 ){
+
+        if ( micros > 0 && writers == 0 && dbMutex.getState() <= 0 ) {
             // we have a read lock, and only reads are coming on, so why bother unlocking
             micros = 0;
         }
-        
+
         return micros;
     }
-    
-    bool ClientCursor::yieldSometimes(){
+
+    bool ClientCursor::yieldSometimes() {
         if ( ! _yieldSometimesTracker.ping() )
             return true;
 
@@ -267,82 +375,83 @@ namespace mongo {
         return ( micros > 0 ) ? yield( micros ) : true;
     }
 
-    void ClientCursor::staticYield( int micros ) {
+    void ClientCursor::staticYield( int micros , const StringData& ns ) {
+        killCurrentOp.checkForInterrupt( false );
         {
             dbtempreleasecond unlock;
-            if ( unlock.unlocked() ){
+            if ( unlock.unlocked() ) {
                 if ( micros == -1 )
                     micros = Client::recommendedYieldMicros();
                 if ( micros > 0 )
-                    sleepmicros( micros ); 
+                    sleepmicros( micros );
             }
             else {
-                log( LL_WARNING ) << "ClientCursor::yield can't unlock b/c of recursive lock" << endl;
+                warning() << "ClientCursor::yield can't unlock b/c of recursive lock ns: " << ns << endl;
             }
-        }        
+        }
     }
-    
+
     bool ClientCursor::prepareToYield( YieldData &data ) {
-        if ( ! c->supportYields() )
+        if ( ! _c->supportYields() )
             return false;
         // need to store in case 'this' gets deleted
-        data._id = cursorid;
-        
+        data._id = _cursorid;
+
         data._doingDeletes = _doingDeletes;
         _doingDeletes = false;
-        
+
         updateLocation();
-        
+
         {
-            /* a quick test that our temprelease is safe. 
-             todo: make a YieldingCursor class 
+            /* a quick test that our temprelease is safe.
+             todo: make a YieldingCursor class
              and then make the following code part of a unit test.
              */
             const int test = 0;
             static bool inEmpty = false;
-            if( test && !inEmpty ) { 
+            if( test && !inEmpty ) {
                 inEmpty = true;
                 log() << "TEST: manipulate collection during cc:yield" << endl;
-                if( test == 1 ) 
-                    Helpers::emptyCollection(ns.c_str());
+                if( test == 1 )
+                    Helpers::emptyCollection(_ns.c_str());
                 else if( test == 2 ) {
                     BSONObjBuilder b; string m;
-                    dropCollection(ns.c_str(), m, b);
+                    dropCollection(_ns.c_str(), m, b);
                 }
-                else { 
-                    dropDatabase(ns.c_str());
+                else {
+                    dropDatabase(_ns.c_str());
                 }
             }
-        }        
+        }
         return true;
     }
-    
+
     bool ClientCursor::recoverFromYield( const YieldData &data ) {
         ClientCursor *cc = ClientCursor::find( data._id , false );
-        if ( cc == 0 ){
+        if ( cc == 0 ) {
             // id was deleted
             return false;
         }
-        
+
         cc->_doingDeletes = data._doingDeletes;
-        cc->c->checkLocation();
-        return true;        
+        cc->_c->checkLocation();
+        return true;
     }
-    
+
     bool ClientCursor::yield( int micros ) {
-        if ( ! c->supportYields() )
+        if ( ! _c->supportYields() )
             return true;
-        YieldData data; 
+        YieldData data;
         prepareToYield( data );
-        
-        staticYield( micros );
+
+        staticYield( micros , _ns );
 
         return ClientCursor::recoverFromYield( data );
     }
 
     int ctmLast = 0; // so we don't have to do find() which is a little slow very often.
     long long ClientCursor::allocCursorId_inlock() {
-        if( 0 ) { 
+        if( 0 ) {
             static long long z;
             ++z;
             cout << "TEMP alloccursorid " << z << endl;
@@ -362,32 +471,32 @@ namespace mongo {
         return x;
     }
 
-    void ClientCursor::storeOpForSlave( DiskLoc last ){
+    void ClientCursor::storeOpForSlave( DiskLoc last ) {
         if ( ! ( _queryOptions & QueryOption_OplogReplay ))
             return;
 
         if ( last.isNull() )
             return;
-        
+
         BSONElement e = last.obj()["ts"];
         if ( e.type() == Date || e.type() == Timestamp )
             _slaveReadTill = e._opTime();
     }
-    
-    void ClientCursor::updateSlaveLocation( CurOp& curop ){
+
+    void ClientCursor::updateSlaveLocation( CurOp& curop ) {
         if ( _slaveReadTill.isNull() )
             return;
-        mongo::updateSlaveLocation( curop , ns.c_str() , _slaveReadTill );
+        mongo::updateSlaveLocation( curop , _ns.c_str() , _slaveReadTill );
     }
 
 
-    void ClientCursor::appendStats( BSONObjBuilder& result ){
+    void ClientCursor::appendStats( BSONObjBuilder& result ) {
         recursive_scoped_lock lock(ccmutex);
-        result.appendNumber("totalOpen", (int)clientCursorsById.size() );
+        result.appendNumber("totalOpen", clientCursorsById.size() );
         result.appendNumber("clientCursors_size", (int) numCursors());
-        result.appendNumber("timedOut" , (int)numberTimedOut);
+        result.appendNumber("timedOut" , numberTimedOut);
     }
-    
+
     // QUESTION: Restrict to the namespace from which this command was issued?
     // Alternatively, make this command admin-only?
     class CmdCursorInfo : public Command {
@@ -398,19 +507,19 @@ namespace mongo {
             help << " example: { cursorInfo : 1 }";
         }
         virtual LockType locktype() const { return NONE; }
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             ClientCursor::appendStats( result );
             return true;
         }
     } cmdCursorInfo;
-    
-    void ClientCursorMonitor::run(){
+
+    void ClientCursorMonitor::run() {
         Client::initThread("clientcursormon");
         Client& client = cc();
-        
+
         unsigned old = curTimeMillis();
 
-        while ( ! inShutdown() ){
+        while ( ! inShutdown() ) {
             unsigned now = curTimeMillis();
             ClientCursor::idleTimeReport( now - old );
             old = now;
@@ -420,15 +529,28 @@ namespace mongo {
         client.shutdown();
     }
 
-    void ClientCursor::find( const string& ns , set<CursorId>& all ){
+    void ClientCursor::find( const string& ns , set<CursorId>& all ) {
         recursive_scoped_lock lock(ccmutex);
-        
-        for ( CCById::iterator i=clientCursorsById.begin(); i!=clientCursorsById.end(); ++i ){
-            if ( i->second->ns == ns )
+
+        for ( CCById::iterator i=clientCursorsById.begin(); i!=clientCursorsById.end(); ++i ) {
+            if ( i->second->_ns == ns )
                 all.insert( i->first );
         }
     }
 
+    int ClientCursor::erase(int n, long long *ids) {
+        int found = 0;
+        for ( int i = 0; i < n; i++ ) {
+            if ( erase(ids[i]) )
+                found++;
+
+            if ( inShutdown() )
+                break;
+        }
+        return found;
+
+    }
+
 
     ClientCursorMonitor clientCursorMonitor;
 
diff --git a/db/clientcursor.h b/db/clientcursor.h
index b895c17..f1d107f 100644
--- a/db/clientcursor.h
+++ b/db/clientcursor.h
@@ -33,6 +33,7 @@
 #include "dbhelpers.h"
 #include "matcher.h"
 #include "../client/dbclient.h"
+#include "projection.h"
 
 namespace mongo {
 
@@ -41,31 +42,35 @@ namespace mongo {
     class ClientCursor;
     class ParsedQuery;
 
+    struct ByLocKey {
+
+        ByLocKey( const DiskLoc & l , const CursorId& i ) : loc(l), id(i) {}
+
+        static ByLocKey min( const DiskLoc& l ) { return ByLocKey( l , numeric_limits<long long>::min() ); }
+        static ByLocKey max( const DiskLoc& l ) { return ByLocKey( l , numeric_limits<long long>::max() ); }
+
+        bool operator<( const ByLocKey &other ) const {
+            int x = loc.compare( other.loc );
+            if ( x )
+                return x < 0;
+            return id < other.id;
+        }
+
+        DiskLoc loc;
+        CursorId id;
+
+    };
+
     /* todo: make this map be per connection.  this will prevent cursor hijacking security attacks perhaps.
+     *       ERH: 9/2010 this may not work since some drivers send getMore over a different connection
     */
     typedef map<CursorId, ClientCursor*> CCById;
+    typedef map<ByLocKey, ClientCursor*> CCByLoc;
 
     extern BSONObj id_obj;
 
     class ClientCursor {
         friend class CmdCursorInfo;
-        DiskLoc _lastLoc;                        // use getter and setter not this (important)
-        unsigned _idleAgeMillis;                 // how long has the cursor been around, relative to server idle time
-
-        /* 0 = normal
-           1 = no timeout allowed
-           100 = in use (pinned) -- see Pointer class
-        */
-        unsigned _pinValue;
-
-        bool _doingDeletes;
-        ElapsedTracker _yieldSometimesTracker;
-
-        static CCById clientCursorsById;
-        static long long numberTimedOut;
-        static boost::recursive_mutex ccmutex;   // must use this for all statics above!        
-        static CursorId allocCursorId_inlock();        
-        
     public:
         static void assertNoCursors();
 
@@ -75,32 +80,38 @@ namespace mongo {
            at the same time - which might be bad.  That should never happen, but if a client driver
            had a bug, it could (or perhaps some sort of attack situation).
         */
-        class Pointer : boost::noncopyable { 
-        public:
+        class Pointer : boost::noncopyable {
             ClientCursor *_c;
+        public:
+            ClientCursor * c() { return _c; }
             void release() {
                 if( _c ) {
                     assert( _c->_pinValue >= 100 );
                     _c->_pinValue -= 100;
+                    _c = 0;
                 }
+            }
+            /**
+             * call this if during a yield, the cursor got deleted
+             * if so, we don't want to use the point address
+             */
+            void deleted() {
                 _c = 0;
             }
+            ~Pointer() { release(); }
             Pointer(long long cursorid) {
                 recursive_scoped_lock lock(ccmutex);
                 _c = ClientCursor::find_inlock(cursorid, true);
                 if( _c ) {
                     if( _c->_pinValue >= 100 ) {
                         _c = 0;
-                        uassert(12051, "clientcursor already in use? driver problem?", false);
+                        uasserted(12051, "clientcursor already in use? driver problem?");
                     }
                     _c->_pinValue += 100;
                 }
             }
-            ~Pointer() {
-                release();
-            }
-        }; 
-        
+        };
+
         // This object assures safe and reliable cleanup of the ClientCursor.
         // The implementation assumes that there will be no duplicate ids among cursors
         // (which is assured if cursors must last longer than 1 second).
@@ -108,19 +119,17 @@ namespace mongo {
         public:
             CleanupPointer() : _c( 0 ), _id( -1 ) {}
             void reset( ClientCursor *c = 0 ) {
-                if ( c == _c ) {
+                if ( c == _c )
                     return;
-                }
-
                 if ( _c ) {
                     // be careful in case cursor was deleted by someone else
                     ClientCursor::erase( _id );
                 }
-                
                 if ( c ) {
                     _c = c;
-                    _id = c->cursorid;
-                } else {
+                    _id = c->_cursorid;
+                }
+                else {
                     _c = 0;
                     _id = -1;
                 }
@@ -135,40 +144,19 @@ namespace mongo {
             CursorId _id;
         };
 
-        /*const*/ CursorId cursorid;
-        const string ns;
-        const shared_ptr<Cursor> c;
-        int pos;                        // # objects into the cursor so far 
-        const BSONObj query;            // used for logging diags only; optional in constructor
-        const int _queryOptions;        // see enum QueryOptions dbclient.h
-        OpTime _slaveReadTill;
-        Database * const _db;
-
-        ClientCursor(int queryOptions, shared_ptr<Cursor>& _c, const string& _ns, BSONObj _query = BSONObj()) :
-            _idleAgeMillis(0), _pinValue(0), 
-            _doingDeletes(false), _yieldSometimesTracker(128,10),
-            ns(_ns), c(_c), 
-            pos(0), query(_query), 
-            _queryOptions(queryOptions), 
-            _db( cc().database() )
-        {
-            assert( _db );
-            assert( str::startsWith(_ns, _db->name) );
-            if( queryOptions & QueryOption_NoCursorTimeout )
-                noTimeout();
-            recursive_scoped_lock lock(ccmutex);
-            cursorid = allocCursorId_inlock();
-            clientCursorsById.insert( make_pair(cursorid, this) );
-        }
+        ClientCursor(int queryOptions, const shared_ptr<Cursor>& c, const string& ns, BSONObj query = BSONObj() );
+
         ~ClientCursor();
 
-        DiskLoc lastLoc() const {
-            return _lastLoc;
-        }
+        // ***************  basic accessors *******************
 
-        shared_ptr< ParsedQuery > pq;
-        shared_ptr< FieldMatcher > fields; // which fields query wants returned
-        Message originalMessage; // this is effectively an auto ptr for data the matcher points to
+        CursorId cursorid() const { return _cursorid; }
+        string ns() const { return _ns; }
+        Database * db() const { return _db; }
+        const BSONObj& query() const { return _query; }
+        int queryOptions() const { return _queryOptions; }
+
+        DiskLoc lastLoc() const { return _lastLoc; }
 
         /* Get rid of cursors for namespaces that begin with nsprefix.
            Used by drop, dropIndexes, dropDatabase.
@@ -176,14 +164,14 @@ namespace mongo {
         static void invalidate(const char *nsPrefix);
 
         /**
-         * @param microsToSleep -1 : ask client 
+         * @param microsToSleep -1 : ask client
          *                     >=0 : sleep for that amount
-         * do a dbtemprelease 
-         * note: caller should check matcher.docMatcher().atomic() first and not yield if atomic - 
+         * do a dbtemprelease
+         * note: caller should check matcher.docMatcher().atomic() first and not yield if atomic -
          *       we don't do herein as this->matcher (above) is only initialized for true queries/getmore.
          *       (ie not set for remote/update)
-         * @return if the cursor is still valid. 
-         *         if false is returned, then this ClientCursor should be considered deleted - 
+         * @return if the cursor is still valid.
+         *         if false is returned, then this ClientCursor should be considered deleted -
          *         in fact, the whole database could be gone.
          */
         bool yield( int microsToSleep = -1 );
@@ -192,72 +180,82 @@ namespace mongo {
          * @return same as yield()
          */
         bool yieldSometimes();
-        
+
         static int yieldSuggest();
-        static void staticYield( int micros );
-        
+        static void staticYield( int micros , const StringData& ns );
+
         struct YieldData { CursorId _id; bool _doingDeletes; };
         bool prepareToYield( YieldData &data );
         static bool recoverFromYield( const YieldData &data );
 
         struct YieldLock : boost::noncopyable {
             explicit YieldLock( ptr<ClientCursor> cc )
-                : _canYield(cc->c->supportYields()) {
-                if ( _canYield ){
+                : _canYield(cc->_c->supportYields()) {
+                if ( _canYield ) {
                     cc->prepareToYield( _data );
                     _unlock.reset(new dbtempreleasecond());
                 }
             }
-            ~YieldLock(){
-                if ( _unlock ){
+            ~YieldLock() {
+                if ( _unlock ) {
                     log( LL_WARNING ) << "ClientCursor::YieldLock not closed properly" << endl;
                     relock();
                 }
             }
-
-            bool stillOk(){
+            bool stillOk() {
                 if ( ! _canYield )
                     return true;
-
                 relock();
-                
                 return ClientCursor::recoverFromYield( _data );
             }
-
-            void relock(){
+            void relock() {
                 _unlock.reset();
             }
-            
         private:
-            bool _canYield;
+            const bool _canYield;
             YieldData _data;
-            
             scoped_ptr<dbtempreleasecond> _unlock;
-
         };
 
         // --- some pass through helpers for Cursor ---
 
-        BSONObj indexKeyPattern() {
-            return c->indexKeyPattern();
-        }
+        Cursor* c() const { return _c.get(); }
+        int pos() const { return _pos; }
 
-        bool ok(){
-            return c->ok();
-        }
+        void incPos( int n ) { _pos += n; } // TODO: this is bad
+        void setPos( int n ) { _pos = n; } // TODO : this is bad too
 
-        bool advance(){
-            return c->advance();
-        }
+        BSONObj indexKeyPattern() { return _c->indexKeyPattern();  }
+        bool modifiedKeys() const { return _c->modifiedKeys(); }
+        bool isMultiKey() const { return _c->isMultiKey(); }
 
-        bool currentMatches(){
-            if ( ! c->matcher() )
-                return true;
-            return c->matcher()->matchesCurrent( c.get() );
-        }
+        bool ok() { return _c->ok(); }
+        bool advance() { return _c->advance(); }
+        BSONObj current() { return _c->current(); }
+        DiskLoc currLoc() { return _c->currLoc(); }
+        BSONObj currKey() const { return _c->currKey(); }
+
+
+        /**
+         * same as BSONObj::getFieldsDotted
+         * if it can be retrieved from key, it is
+         * @return if this was retrieved from key
+         */
+        bool getFieldsDotted( const string& name, BSONElementSet &ret );
+
+        /**
+         * same as BSONObj::getFieldDotted
+         * if it can be retrieved from key, it is
+         * @return if this was retrieved from key
+         */
+        BSONElement getFieldDotted( const string& name , bool * fromKey = 0 );
+
+        bool currentIsDup() { return _c->getsetdup( _c->currLoc() ); }
 
-        BSONObj current(){
-            return c->current();
+        bool currentMatches() {
+            if ( ! _c->matcher() )
+                return true;
+            return _c->matcher()->matchesCurrent( _c.get() );
         }
 
     private:
@@ -273,12 +271,12 @@ namespace mongo {
             return it->second;
         }
     public:
-        static ClientCursor* find(CursorId id, bool warn = true) { 
+        static ClientCursor* find(CursorId id, bool warn = true) {
             recursive_scoped_lock lock(ccmutex);
             ClientCursor *c = find_inlock(id, warn);
-			// if this asserts, your code was not thread safe - you either need to set no timeout 
-			// for the cursor or keep a ClientCursor::Pointer in scope for it.
-            massert( 12521, "internal error: use of an unlocked ClientCursor", c == 0 || c->_pinValue ); 
+            // if this asserts, your code was not thread safe - you either need to set no timeout
+            // for the cursor or keep a ClientCursor::Pointer in scope for it.
+            massert( 12521, "internal error: use of an unlocked ClientCursor", c == 0 || c->_pinValue );
             return c;
         }
 
@@ -293,6 +291,11 @@ namespace mongo {
             return false;
         }
 
+        /**
+         * @return number of cursors found
+         */
+        static int erase( int n , long long * ids );
+
         /* call when cursor's location changes so that we can update the
            cursorsbylocation map.  if you are locked and internally iterating, only
            need to call when you are ready to "unlock".
@@ -314,43 +317,82 @@ namespace mongo {
 
         void storeOpForSlave( DiskLoc last );
         void updateSlaveLocation( CurOp& curop );
-        
-        unsigned idleTime(){
-            return _idleAgeMillis;
-        }
+
+        unsigned idleTime() const { return _idleAgeMillis; }
+
+        void setDoingDeletes( bool doingDeletes ) {_doingDeletes = doingDeletes; }
+
+        void slaveReadTill( const OpTime& t ) { _slaveReadTill = t; }
+
+    public: // static methods
 
         static void idleTimeReport(unsigned millis);
-private:
-        // cursors normally timeout after an inactivy period to prevent excess memory use
-        // setting this prevents timeout of the cursor in question.
-        void noTimeout() { 
-            _pinValue++;
-        }
 
-        multimap<DiskLoc, ClientCursor*>& byLoc() { 
-            return _db->ccByLoc;
-        }
-public:
-        void setDoingDeletes( bool doingDeletes ){
-            _doingDeletes = doingDeletes;
-        }
-        
         static void appendStats( BSONObjBuilder& result );
-
         static unsigned numCursors() { return clientCursorsById.size(); }
-
         static void informAboutToDeleteBucket(const DiskLoc& b);
         static void aboutToDelete(const DiskLoc& dl);
-
         static void find( const string& ns , set<CursorId>& all );
+
+
+    private: // methods
+
+        // cursors normally timeout after an inactivy period to prevent excess memory use
+        // setting this prevents timeout of the cursor in question.
+        void noTimeout() { _pinValue++; }
+
+        CCByLoc& byLoc() { return _db->ccByLoc; }
+
+    private:
+
+        CursorId _cursorid;
+
+        const string _ns;
+        Database * _db;
+
+        const shared_ptr<Cursor> _c;
+        map<string,int> _indexedFields;  // map from indexed field to offset in key object
+        int _pos;                        // # objects into the cursor so far
+
+        const BSONObj _query;            // used for logging diags only; optional in constructor
+        int _queryOptions;        // see enum QueryOptions dbclient.h
+
+        OpTime _slaveReadTill;
+
+        DiskLoc _lastLoc;                        // use getter and setter not this (important)
+        unsigned _idleAgeMillis;                 // how long has the cursor been around, relative to server idle time
+
+        /* 0 = normal
+           1 = no timeout allowed
+           100 = in use (pinned) -- see Pointer class
+        */
+        unsigned _pinValue;
+
+        bool _doingDeletes;
+        ElapsedTracker _yieldSometimesTracker;
+
+    public:
+        shared_ptr<ParsedQuery> pq;
+        shared_ptr<Projection> fields; // which fields query wants returned
+        Message originalMessage; // this is effectively an auto ptr for data the matcher points to
+
+
+
+    private: // static members
+
+        static CCById clientCursorsById;
+        static long long numberTimedOut;
+        static boost::recursive_mutex ccmutex;   // must use this for all statics above!
+        static CursorId allocCursorId_inlock();
+
     };
 
     class ClientCursorMonitor : public BackgroundJob {
     public:
+        string name() const { return "ClientCursorMonitor"; }
         void run();
-        string name() { return "ClientCursorMonitor"; }
     };
 
     extern ClientCursorMonitor clientCursorMonitor;
-    
+
 } // namespace mongo
diff --git a/db/cloner.cpp b/db/cloner.cpp
index 9177a00..fe57463 100644
--- a/db/cloner.cpp
+++ b/db/cloner.cpp
@@ -31,7 +31,7 @@ namespace mongo {
 
     void ensureHaveIdIndex(const char *ns);
 
-    bool replAuthenticate(DBClientConnection *);
+    bool replAuthenticate(DBClientBase *);
 
     class Cloner: boost::noncopyable {
         auto_ptr< DBClientWithCommands > conn;
@@ -40,7 +40,7 @@ namespace mongo {
         struct Fun;
     public:
         Cloner() { }
-        
+
         /* slaveOk     - if true it is ok if the source of the data is !ismaster.
            useReplAuth - use the credentials we normally use as a replication slave for the cloning
            snapshot    - use $snapshot mode for copying collections.  note this should not be used when it isn't required, as it will be slower.
@@ -92,14 +92,14 @@ namespace mongo {
             if ( context ) {
                 context->relocked();
             }
-            
+
             while( i.moreInCurrentBatch() ) {
                 if ( n % 128 == 127 /*yield some*/ ) {
                     dbtemprelease t;
                 }
-                
+
                 BSONObj tmp = i.nextSafe();
-            
+
                 /* assure object is valid.  note this will slow us down a little. */
                 if ( !tmp.valid() ) {
                     stringstream ss;
@@ -109,15 +109,15 @@ namespace mongo {
                         e.validate();
                         ss << " firstElement: " << e;
                     }
-                    catch( ... ){
+                    catch( ... ) {
                         ss << " firstElement corrupt";
                     }
                     out() << ss.str() << endl;
                     continue;
                 }
-            
+
                 ++n;
-            
+
                 BSONObj js = tmp;
                 if ( isindex ) {
                     assert( strstr(from_collection, "system.indexes") );
@@ -125,16 +125,18 @@ namespace mongo {
                     storedForLater->push_back( js.getOwned() );
                     continue;
                 }
-            
-                try { 
+
+                try {
                     theDataFileMgr.insertWithObjMod(to_collection, js);
                     if ( logForRepl )
                         logOp("i", to_collection, js);
+
+                    getDur().commitIfNeeded();
                 }
-                catch( UserException& e ) { 
+                catch( UserException& e ) {
                     log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n';
                 }
-            
+
                 RARELY if ( time( 0 ) - saveLast > 60 ) {
                     log() << n << " objects cloned so far from collection " << from_collection << endl;
                     saveLast = time( 0 );
@@ -146,17 +148,17 @@ namespace mongo {
         const char *from_collection;
         const char *to_collection;
         time_t saveLast;
-        list<BSONObj> *storedForLater;     
+        list<BSONObj> *storedForLater;
         bool logForRepl;
         Client::Context *context;
     };
-    
+
     /* copy the specified collection
        isindex - if true, this is system.indexes collection, in which we do some transformation when copying.
     */
     void Cloner::copy(const char *from_collection, const char *to_collection, bool isindex, bool logForRepl, bool masterSameProcess, bool slaveOk, Query query) {
         list<BSONObj> storedForLater;
-        
+
         Fun f;
         f.n = 0;
         f.isindex = isindex;
@@ -165,7 +167,7 @@ namespace mongo {
         f.saveLast = time( 0 );
         f.storedForLater = &storedForLater;
         f.logForRepl = logForRepl;
-        
+
         int options = QueryOption_NoCursorTimeout | ( slaveOk ? QueryOption_SlaveOk : 0 );
         {
             dbtemprelease r;
@@ -173,7 +175,9 @@ namespace mongo {
             DBClientConnection *remote = dynamic_cast< DBClientConnection* >( conn.get() );
             if ( remote ) {
                 remote->query( boost::function<void(DBClientCursorBatchIterator &)>( f ), from_collection, query, 0, options );
-            } else { // no exhaust mode for direct client, so we have this hack
+            }
+            else {
+                // there is no exhaust mode for direct client, so we have this hack
                 auto_ptr<DBClientCursor> c = conn->query( from_collection, query, 0, 0, 0, options );
                 assert( c.get() );
                 while( c->more() ) {
@@ -182,16 +186,18 @@ namespace mongo {
                 }
             }
         }
-        
-        if ( storedForLater.size() ){
-            for ( list<BSONObj>::iterator i = storedForLater.begin(); i!=storedForLater.end(); i++ ){
+
+        if ( storedForLater.size() ) {
+            for ( list<BSONObj>::iterator i = storedForLater.begin(); i!=storedForLater.end(); i++ ) {
                 BSONObj js = *i;
-                try { 
+                try {
                     theDataFileMgr.insertWithObjMod(to_collection, js);
                     if ( logForRepl )
                         logOp("i", to_collection, js);
+
+                    getDur().commitIfNeeded();
                 }
-                catch( UserException& e ) { 
+                catch( UserException& e ) {
                     log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n';
                 }
             }
@@ -210,40 +216,44 @@ namespace mongo {
             return false;
 
         conn.reset( myconn.release() );
-        
+
         writelock lk(ns); // TODO: make this lower down
         Client::Context ctx(ns);
 
-        { // config
+        {
+            // config
             string temp = ctx.db()->name + ".system.namespaces";
             BSONObj config = conn->findOne( temp , BSON( "name" << ns ) );
             if ( config["options"].isABSONObj() )
                 if ( ! userCreateNS( ns.c_str() , config["options"].Obj() , errmsg, true , 0 ) )
                     return false;
         }
-        
-        { // main data
+
+        {
+            // main data
             copy( ns.c_str() , ns.c_str() , /*isindex*/false , logForRepl , false , true , Query(query).snapshot() );
         }
-        
+
         /* TODO : copyIndexes bool does not seem to be implemented! */
-        if( !copyIndexes ) { 
+        if( !copyIndexes ) {
             log() << "ERROR copy collection copyIndexes not implemented? " << ns << endl;
         }
 
-        { // indexes
+        {
+            // indexes
             string temp = ctx.db()->name + ".system.indexes";
             copy( temp.c_str() , temp.c_str() , /*isindex*/true , logForRepl , false , true , BSON( "ns" << ns ) );
         }
+        getDur().commitIfNeeded();
         return true;
     }
-    
+
     extern bool inDBRepair;
     void ensureIdIndexForNewNs(const char *ns);
 
     bool Cloner::go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth, bool snapshot) {
 
-		massert( 10289 ,  "useReplAuth is not written to replication log", !useReplAuth || !logForRepl );
+        massert( 10289 ,  "useReplAuth is not written to replication log", !useReplAuth || !logForRepl );
 
         string todb = cc().database()->name;
         stringstream a,b;
@@ -263,23 +273,26 @@ namespace mongo {
            */
         string ns = fromdb + ".system.namespaces";
         list<BSONObj> toClone;
-        {  
+        {
             dbtemprelease r;
-		
+
             // just using exhaust for collection copying right now
             auto_ptr<DBClientCursor> c;
             {
                 if ( conn.get() ) {
                     // nothing to do
-                } else if ( !masterSameProcess ) {
-                    auto_ptr< DBClientConnection > c( new DBClientConnection() );
-                    if ( !c->connect( masterHost, errmsg ) )
+                }
+                else if ( !masterSameProcess ) {
+                    ConnectionString cs = ConnectionString::parse( masterHost, errmsg );
+                    auto_ptr<DBClientBase> con( cs.connect( errmsg ));
+                    if ( !con.get() )
                         return false;
-                    if( !replAuthenticate(c.get()) )
+                    if( !replAuthenticate(con.get()) )
                         return false;
-                    
-                    conn = c;
-                } else {
+
+                    conn = con;
+                }
+                else {
                     conn.reset( new DBDirectClient() );
                 }
                 c = conn->query( ns.c_str(), BSONObj(), 0, 0, 0, slaveOk ? QueryOption_SlaveOk : 0 );
@@ -289,8 +302,8 @@ namespace mongo {
                 errmsg = "query failed " + ns;
                 return false;
             }
-            
-            while ( c->more() ){
+
+            while ( c->more() ) {
                 BSONObj collection = c->next();
 
                 log(2) << "\t cloner got " << collection << endl;
@@ -304,23 +317,23 @@ namespace mongo {
                 assert( e.type() == String );
                 const char *from_name = e.valuestr();
 
-                if( strstr(from_name, ".system.") ) { 
+                if( strstr(from_name, ".system.") ) {
                     /* system.users and s.js is cloned -- but nothing else from system.
                      * system.indexes is handled specially at the end*/
-                    if( legalClientSystemNS( from_name , true ) == 0 ){
+                    if( legalClientSystemNS( from_name , true ) == 0 ) {
                         log(2) << "\t\t not cloning because system collection" << endl;
                         continue;
                     }
                 }
-                if( ! nsDollarCheck( from_name ) ){
+                if( ! isANormalNSName( from_name ) ) {
                     log(2) << "\t\t not cloning because has $ " << endl;
                     continue;
-                }            
+                }
                 toClone.push_back( collection.getOwned() );
             }
         }
 
-        for ( list<BSONObj>::iterator i=toClone.begin(); i != toClone.end(); i++ ){
+        for ( list<BSONObj>::iterator i=toClone.begin(); i != toClone.end(); i++ ) {
             {
                 dbtemprelease r;
             }
@@ -328,7 +341,7 @@ namespace mongo {
             log(2) << "  really will clone: " << collection << endl;
             const char * from_name = collection["name"].valuestr();
             BSONObj options = collection.getObjectField("options");
-            
+
             /* change name "<fromdb>.collection" -> <todb>.collection */
             const char *p = strchr(from_name, '.');
             assert(p);
@@ -338,17 +351,17 @@ namespace mongo {
             {
                 string err;
                 const char *toname = to_name.c_str();
-                /* we defer building id index for performance - building it in batch is much faster */ 
+                /* we defer building id index for performance - building it in batch is much faster */
                 userCreateNS(toname, options, err, logForRepl, &wantIdIndex);
             }
             log(1) << "\t\t cloning " << from_name << " -> " << to_name << endl;
             Query q;
-            if( snapshot ) 
+            if( snapshot )
                 q.snapshot();
             copy(from_name, to_name.c_str(), false, logForRepl, masterSameProcess, slaveOk, q);
 
             if( wantIdIndex ) {
-                /* we need dropDups to be true as we didn't do a true snapshot and this is before applying oplog operations 
+                /* we need dropDups to be true as we didn't do a true snapshot and this is before applying oplog operations
                    that occur during the initial sync.  inDBRepair makes dropDups be true.
                    */
                 bool old = inDBRepair;
@@ -357,7 +370,7 @@ namespace mongo {
                     ensureIdIndexForNewNs(to_name.c_str());
                     inDBRepair = old;
                 }
-                catch(...) { 
+                catch(...) {
                     inDBRepair = old;
                     throw;
                 }
@@ -368,27 +381,26 @@ namespace mongo {
 
         string system_indexes_from = fromdb + ".system.indexes";
         string system_indexes_to = todb + ".system.indexes";
-        /* [dm]: is the ID index sometimes not called "_id_"?  There is other code in the system that looks for a "_id" prefix 
-                 rather than this exact value.  we should standardize.  OR, remove names - which is in the bugdb.  Anyway, this 
+        /* [dm]: is the ID index sometimes not called "_id_"?  There is other code in the system that looks for a "_id" prefix
+                 rather than this exact value.  we should standardize.  OR, remove names - which is in the bugdb.  Anyway, this
                  is dubious here at the moment.
         */
         copy(system_indexes_from.c_str(), system_indexes_to.c_str(), true, logForRepl, masterSameProcess, slaveOk, BSON( "name" << NE << "_id_" ) );
 
         return true;
     }
-    
+
     /* slaveOk     - if true it is ok if the source of the data is !ismaster.
        useReplAuth - use the credentials we normally use as a replication slave for the cloning
        snapshot    - use $snapshot mode for copying collections.  note this should not be used when it isn't required, as it will be slower.
                      for example repairDatabase need not use it.
     */
-    bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication, 
-				   bool slaveOk, bool useReplAuth, bool snapshot)
-    {
+    bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication,
+                   bool slaveOk, bool useReplAuth, bool snapshot) {
         Cloner c;
         return c.go(masterHost, errmsg, fromdb, logForReplication, slaveOk, useReplAuth, snapshot);
     }
-    
+
     /* Usage:
        mydb.$cmd.findOne( { clone: "fromhost" } );
     */
@@ -410,11 +422,11 @@ namespace mongo {
             /* replication note: we must logOp() not the command, but the cloned data -- if the slave
                were to clone it would get a different point-in-time and not match.
                */
-            return cloneFrom(from.c_str(), errmsg, dbname, 
+            return cloneFrom(from.c_str(), errmsg, dbname,
                              /*logForReplication=*/!fromRepl, /*slaveok*/false, /*usereplauth*/false, /*snapshot*/true);
         }
     } cmdclone;
-    
+
     class CmdCloneCollection : public Command {
     public:
         virtual bool slaveOk() const {
@@ -424,10 +436,10 @@ namespace mongo {
         CmdCloneCollection() : Command("cloneCollection") { }
         virtual void help( stringstream &help ) const {
             help << "{ cloneCollection: <namespace>, from: <host> [,query: <query_filter>] [,copyIndexes:<bool>] }"
-                "\nCopies a collection from one server to another. Do not use on a single server as the destination "
-                "is placed at the same db.collection (namespace) as the source.\n"
-                "Warning: the local copy of 'ns' is emptied before the copying begins. Any existing data will be lost there."
-                ;
+                 "\nCopies a collection from one server to another. Do not use on a single server as the destination "
+                 "is placed at the same db.collection (namespace) as the source.\n"
+                 "Warning: the local copy of 'ns' is emptied before the copying begins. Any existing data will be lost there."
+                 ;
         }
         virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             string fromhost = cmdObj.getStringField("from");
@@ -437,7 +449,7 @@ namespace mongo {
             }
             {
                 HostAndPort h(fromhost);
-                if( h.isSelf() ) { 
+                if( h.isSelf() ) {
                     errmsg = "can't cloneCollection from self";
                     return false;
                 }
@@ -450,13 +462,13 @@ namespace mongo {
             BSONObj query = cmdObj.getObjectField("query");
             if ( query.isEmpty() )
                 query = BSONObj();
-            
+
             BSONElement copyIndexesSpec = cmdObj.getField("copyindexes");
             bool copyIndexes = copyIndexesSpec.isBoolean() ? copyIndexesSpec.boolean() : true;
-            
-            log() << "cloneCollection.  db:" << dbname << " collection:" << collection << " from: " << fromhost 
+
+            log() << "cloneCollection.  db:" << dbname << " collection:" << collection << " from: " << fromhost
                   << " query: " << query << " " << ( copyIndexes ? "" : ", not copying indexes" ) << endl;
-            
+
             Cloner c;
             return c.copyCollection( fromhost , collection , query, errmsg , copyIndexes );
         }
@@ -557,7 +569,7 @@ namespace mongo {
             return res;
         }
     } cmdcopydb;
-    
+
     class CmdRenameCollection : public Command {
     public:
         CmdRenameCollection() : Command( "renameCollection" ) {}
@@ -581,7 +593,7 @@ namespace mongo {
                 errmsg = "invalid command syntax";
                 return false;
             }
-            
+
             bool capped = false;
             long long size = 0;
             {
@@ -593,10 +605,10 @@ namespace mongo {
                     for( DiskLoc i = nsd->firstExtent; !i.isNull(); i = i.ext()->xnext )
                         size += i.ext()->length;
             }
-            
+
             Client::Context ctx( target );
-            
-            if ( nsdetails( target.c_str() ) ){
+
+            if ( nsdetails( target.c_str() ) ) {
                 uassert( 10027 ,  "target namespace exists", cmdObj["dropTarget"].trueValue() );
                 BSONObjBuilder bb( result.subobjStart( "dropTarget" ) );
                 dropCollection( target , errmsg , bb );
@@ -623,7 +635,7 @@ namespace mongo {
             }
             if ( !userCreateNS( target.c_str(), spec.done(), errmsg, false ) )
                 return false;
-            
+
             auto_ptr< DBClientCursor > c;
             DBDirectClient bridge;
 
@@ -638,7 +650,7 @@ namespace mongo {
                 BSONObj o = c->next();
                 theDataFileMgr.insertWithObjMod( target.c_str(), o );
             }
-            
+
             char cl[256];
             nsToDatabase( source.c_str(), cl );
             string sourceIndexes = string( cl ) + ".system.indexes";
@@ -661,7 +673,8 @@ namespace mongo {
                         break;
                     if ( strcmp( e.fieldName(), "ns" ) == 0 ) {
                         b.append( "ns", target );
-                    } else {
+                    }
+                    else {
                         b.append( e );
                     }
                 }
diff --git a/db/cmdline.cpp b/db/cmdline.cpp
index 65ee179..900a782 100644
--- a/db/cmdline.cpp
+++ b/db/cmdline.cpp
@@ -20,47 +20,92 @@
 #include "cmdline.h"
 #include "commands.h"
 #include "../util/processinfo.h"
+#include "security_key.h"
+
+#ifdef _WIN32
+#include <direct.h>
+#endif
 
 namespace po = boost::program_options;
+namespace fs = boost::filesystem;
 
 namespace mongo {
 
-    void setupSignals();
+    void setupSignals( bool inFork );
+    string getHostNameCached();
     BSONArray argvArray;
 
-    void CmdLine::addGlobalOptions( boost::program_options::options_description& general , 
-                                    boost::program_options::options_description& hidden ){
+    void CmdLine::addGlobalOptions( boost::program_options::options_description& general ,
+                                    boost::program_options::options_description& hidden ) {
         /* support for -vv -vvvv etc. */
         for (string s = "vv"; s.length() <= 12; s.append("v")) {
             hidden.add_options()(s.c_str(), "verbose");
         }
-        
+
         general.add_options()
-            ("help,h", "show this usage information")
-            ("version", "show version information")
-            ("config,f", po::value<string>(), "configuration file specifying additional options")
-            ("verbose,v", "be more verbose (include multiple times for more verbosity e.g. -vvvvv)")
-            ("quiet", "quieter output")
-            ("port", po::value<int>(&cmdLine.port), "specify port number")
-            ("bind_ip", po::value<string>(&cmdLine.bind_ip), "comma separated list of ip addresses to listen on - all local ips by default")
-            ("logpath", po::value<string>() , "file to send all output to instead of stdout" )
-            ("logappend" , "append to logpath instead of over-writing" )
-            ("pidfilepath", po::value<string>(), "full path to pidfile (if not set, no pidfile is created)")
+        ("help,h", "show this usage information")
+        ("version", "show version information")
+        ("config,f", po::value<string>(), "configuration file specifying additional options")
+        ("verbose,v", "be more verbose (include multiple times for more verbosity e.g. -vvvvv)")
+        ("quiet", "quieter output")
+        ("port", po::value<int>(&cmdLine.port), "specify port number")
+        ("bind_ip", po::value<string>(&cmdLine.bind_ip), "comma separated list of ip addresses to listen on - all local ips by default")
+        ("logpath", po::value<string>() , "log file to send write to instead of stdout - has to be a file, not directory" )
+        ("logappend" , "append to logpath instead of over-writing" )
+        ("pidfilepath", po::value<string>(), "full path to pidfile (if not set, no pidfile is created)")
+        ("keyFile", po::value<string>(), "private key for cluster authentication (only for replica sets)")
 #ifndef _WIN32
-            ("fork" , "fork server process" )
+        ("unixSocketPrefix", po::value<string>(), "alternative directory for UNIX domain sockets (defaults to /tmp)")
+        ("fork" , "fork server process" )
 #endif
-            ;
-        
+        ;
+
     }
 
 
-    bool CmdLine::store( int argc , char ** argv , 
+#if defined(_WIN32)
+    void CmdLine::addWindowsOptions( boost::program_options::options_description& windows ,
+                                     boost::program_options::options_description& hidden ) {
+        windows.add_options()
+        ("install", "install mongodb service")
+        ("remove", "remove mongodb service")
+        ("reinstall", "reinstall mongodb service (equivilant of mongod --remove followed by mongod --install)")
+        ("serviceName", po::value<string>(), "windows service name")
+        ("serviceDisplayName", po::value<string>(), "windows service display name")
+        ("serviceDescription", po::value<string>(), "windows service description")
+        ("serviceUser", po::value<string>(), "user name service executes as")
+        ("servicePassword", po::value<string>(), "password used to authenticate serviceUser")
+        ;
+        hidden.add_options()("service", "start mongodb service");
+    }
+#endif
+
+
+    bool CmdLine::store( int argc , char ** argv ,
                          boost::program_options::options_description& visible,
                          boost::program_options::options_description& hidden,
                          boost::program_options::positional_options_description& positional,
-                         boost::program_options::variables_map &params ){
-        
+                         boost::program_options::variables_map &params ) {
+
+
+        {
+            // setup binary name
+            cmdLine.binaryName = argv[0];
+            size_t i = cmdLine.binaryName.rfind( '/' );
+            if ( i != string::npos )
+                cmdLine.binaryName = cmdLine.binaryName.substr( i + 1 );
+            
+            // setup cwd
+            char buffer[1024];
+#ifdef _WIN32
+            assert( _getcwd( buffer , 1000 ) );
+#else
+            assert( getcwd( buffer , 1000 ) );
+#endif
+            cmdLine.cwd = buffer;
+        }
         
+
         /* don't allow guessing - creates ambiguities when some options are
          * prefixes of others. allow long disguises and don't allow guessing
          * to get away with our vvvvvvv trick. */
@@ -69,7 +114,7 @@ namespace mongo {
                       po::command_line_style::allow_long_disguise) ^
                      po::command_line_style::allow_sticky);
 
-        
+
         try {
 
             po::options_description all;
@@ -80,26 +125,27 @@ namespace mongo {
                        .options( all )
                        .positional( positional )
                        .style( style )
-                       .run(), 
+                       .run(),
                        params );
 
-            if ( params.count("config") ){
+            if ( params.count("config") ) {
                 ifstream f( params["config"].as<string>().c_str() );
-                if ( ! f.is_open() ){
+                if ( ! f.is_open() ) {
                     cout << "ERROR: could not read from config file" << endl << endl;
                     cout << visible << endl;
                     return false;
                 }
-                
+
                 po::store( po::parse_config_file( f , all ) , params );
                 f.close();
             }
-            
+
             po::notify(params);
-        } 
+        }
         catch (po::error &e) {
-            cout << "ERROR: " << e.what() << endl << endl;
-            cout << visible << endl;
+            cout << "error command line: " << e.what() << endl;
+            cout << "use --help for help" << endl;
+            //cout << visible << endl;
             return false;
         }
 
@@ -120,44 +166,51 @@ namespace mongo {
         string logpath;
 
 #ifndef _WIN32
+        if (params.count("unixSocketPrefix")) {
+            cmdLine.socket = params["unixSocketPrefix"].as<string>();
+            if (!fs::is_directory(cmdLine.socket)) {
+                cout << cmdLine.socket << " must be a directory" << endl;
+                ::exit(-1);
+            }
+        }
+        
         if (params.count("fork")) {
-            if ( ! params.count( "logpath" ) ){
+            if ( ! params.count( "logpath" ) ) {
                 cout << "--fork has to be used with --logpath" << endl;
                 ::exit(-1);
             }
-            
-            { // test logpath
+
+            {
+                // test logpath
                 logpath = params["logpath"].as<string>();
                 assert( logpath.size() );
-                if ( logpath[0] != '/' ){
-                    char temp[256];
-                    assert( getcwd( temp , 256 ) );
-                    logpath = (string)temp + "/" + logpath;
+                if ( logpath[0] != '/' ) {
+                    logpath = cmdLine.cwd + "/" + logpath;
                 }
                 FILE * test = fopen( logpath.c_str() , "a" );
-                if ( ! test ){
+                if ( ! test ) {
                     cout << "can't open [" << logpath << "] for log file: " << errnoWithDescription() << endl;
                     ::exit(-1);
                 }
                 fclose( test );
             }
-            
+
             cout.flush();
             cerr.flush();
 
             pid_t c = fork();
-            if ( c ){
+            if ( c ) {
                 _exit(0);
             }
 
-            if ( chdir("/") < 0 ){
+            if ( chdir("/") < 0 ) {
                 cout << "Cant chdir() while forking server process: " << strerror(errno) << endl;
                 ::exit(-1);
             }
             setsid();
-            
+
             pid_t c2 = fork();
-            if ( c2 ){
+            if ( c2 ) {
                 cout << "forked process: " << c2 << endl;
                 _exit(0);
             }
@@ -170,19 +223,19 @@ namespace mongo {
             fclose(stdin);
 
             FILE* f = freopen("/dev/null", "w", stderr);
-            if ( f == NULL ){
+            if ( f == NULL ) {
                 cout << "Cant reassign stderr while forking server process: " << strerror(errno) << endl;
                 ::exit(-1);
             }
 
             f = freopen("/dev/null", "r", stdin);
-            if ( f == NULL ){
+            if ( f == NULL ) {
                 cout << "Cant reassign stdin while forking server process: " << strerror(errno) << endl;
                 ::exit(-1);
             }
 
             setupCoreSignals();
-            setupSignals();
+            setupSignals( true );
         }
 #endif
         if (params.count("logpath")) {
@@ -196,6 +249,18 @@ namespace mongo {
             writePidFile( params["pidfilepath"].as<string>() );
         }
 
+        if (params.count("keyFile")) {
+            const string f = params["keyFile"].as<string>();
+
+            if (!setUpSecurityKey(f)) {
+                // error message printed in setUpPrivateKey
+                dbexit(EXIT_BADOPTIONS);
+            }
+
+            noauth = false;
+        }
+
+
         {
             BSONArrayBuilder b;
             for (int i=0; i < argc; i++)
@@ -205,29 +270,51 @@ namespace mongo {
 
         return true;
     }
-    
-    void ignoreSignal( int signal ){
-    }
 
-    void setupCoreSignals(){
+    void ignoreSignal( int sig ) {}
+
+    void setupCoreSignals() {
 #if !defined(_WIN32)
         assert( signal(SIGUSR1 , rotateLogs ) != SIG_ERR );
         assert( signal(SIGHUP , ignoreSignal ) != SIG_ERR );
 #endif
     }
 
-    class CmdGetCmdLineOpts : Command{
-        public:
+    class CmdGetCmdLineOpts : Command {
+    public:
         CmdGetCmdLineOpts(): Command("getCmdLineOpts") {}
         void help(stringstream& h) const { h << "get argv"; }
         virtual LockType locktype() const { return NONE; }
         virtual bool adminOnly() const { return true; }
         virtual bool slaveOk() const { return true; }
 
-        virtual bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){
+        virtual bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             result.append("argv", argvArray);
             return true;
         }
 
     } cmdGetCmdLineOpts;
+
+    string prettyHostName() {
+        StringBuilder s(128);
+        s << getHostNameCached();
+        if( cmdLine.port != CmdLine::DefaultDBPort )
+            s << ':' << mongo::cmdLine.port;
+        return s.str();
+    }
+
+    ParameterValidator::ParameterValidator( const string& name ) : _name( name ) {
+        if ( ! _all )
+            _all = new map<string,ParameterValidator*>();
+        (*_all)[_name] = this;
+    }
+
+    ParameterValidator * ParameterValidator::get( const string& name ) {
+        map<string,ParameterValidator*>::iterator i = _all->find( name );
+        if ( i == _all->end() )
+            return NULL;
+        return i->second;
+    }
+    map<string,ParameterValidator*> * ParameterValidator::_all = 0;
+
 }
diff --git a/db/cmdline.h b/db/cmdline.h
index ef1bd57..4c8c7c4 100644
--- a/db/cmdline.h
+++ b/db/cmdline.h
@@ -17,72 +17,134 @@
 #pragma once
 
 #include "../pch.h"
+#include "jsobj.h"
 
 namespace mongo {
-    
-    /* command line options        
+
+    /* command line options
     */
     /* concurrency: OK/READ */
-    struct CmdLine { 
+    struct CmdLine {
+
+        CmdLine() :
+            port(DefaultDBPort), rest(false), jsonp(false), quiet(false), noTableScan(false), prealloc(true), smallfiles(sizeof(int*) == 4),
+            quota(false), quotaFiles(8), cpu(false), durOptions(0), oplogSize(0), defaultProfile(0), slowMS(100), pretouch(0), moveParanoia( true ),
+            syncdelay(60), socket("/tmp") {
+            // default may change for this later.
+#if defined(_DURABLEDEFAULTON)
+            dur = true;
+#else
+            dur = false;
+#endif
+        }
+
+        string binaryName;     // mongod or mongos
+        string cwd;            // cwd of when process started
+
         int port;              // --port
+        enum {
+            DefaultDBPort = 27017,
+            ConfigServerPort = 27019,
+            ShardServerPort = 27018
+        };
+        bool isDefaultPort() const { return port == DefaultDBPort; }
+
         string bind_ip;        // --bind_ip
         bool rest;             // --rest
+        bool jsonp;            // --jsonp
 
         string _replSet;       // --replSet[/<seedlist>]
-        string ourSetName() const { 
+        string ourSetName() const {
             string setname;
             size_t sl = _replSet.find('/');
             if( sl == string::npos )
                 return _replSet;
             return _replSet.substr(0, sl);
         }
+        bool usingReplSets() const { return !_replSet.empty(); }
 
+        // for master/slave replication
         string source;         // --source
         string only;           // --only
-        
+
         bool quiet;            // --quiet
-        bool notablescan;      // --notablescan
-        bool prealloc;         // --noprealloc
-        bool smallfiles;       // --smallfiles
-        
+        bool noTableScan;      // --notablescan no table scans allowed
+        bool prealloc;         // --noprealloc no preallocation of data files
+        bool smallfiles;       // --smallfiles allocate smaller data files
+
         bool quota;            // --quota
         int quotaFiles;        // --quotaFiles
         bool cpu;              // --cpu show cpu time periodically
 
+        bool dur;              // --dur durability
+
+        /** --durOptions 7      dump journal and terminate without doing anything further
+            --durOptions 4      recover and terminate without listening
+        */
+        enum { // bits to be ORed
+            DurDumpJournal = 1,   // dump diagnostics on the journal during recovery
+            DurScanOnly = 2,      // don't do any real work, just scan and dump if dump specified
+            DurRecoverOnly = 4,   // terminate after recovery step
+            DurParanoid = 8,      // paranoid mode enables extra checks
+            DurAlwaysCommit = 16  // do a group commit every time the writelock is released
+        };
+        int durOptions;          // --durOptions <n> for debugging
+
         long long oplogSize;   // --oplogSize
         int defaultProfile;    // --profile
         int slowMS;            // --time in ms that is "slow"
 
         int pretouch;          // --pretouch for replication application (experimental)
-        bool moveParanoia;     // for move chunk paranoia 
+        bool moveParanoia;     // for move chunk paranoia
+        double syncdelay;      // seconds between fsyncs
 
-        enum { 
-            DefaultDBPort = 27017,
-			ConfigServerPort = 27019,
-			ShardServerPort = 27018
-        };
+        string socket;         // UNIX domain socket directory
 
-        CmdLine() : 
-            port(DefaultDBPort), rest(false), quiet(false), notablescan(false), prealloc(true), smallfiles(false),
-            quota(false), quotaFiles(8), cpu(false), oplogSize(0), defaultProfile(0), slowMS(100), pretouch(0), moveParanoia( true )
-        { } 
-        
-
-        static void addGlobalOptions( boost::program_options::options_description& general , 
+        static void addGlobalOptions( boost::program_options::options_description& general ,
                                       boost::program_options::options_description& hidden );
 
-        
+        static void addWindowsOptions( boost::program_options::options_description& windows ,
+                                       boost::program_options::options_description& hidden );
+
+
         /**
          * @return true if should run program, false if should exit
          */
-        static bool store( int argc , char ** argv , 
+        static bool store( int argc , char ** argv ,
                            boost::program_options::options_description& visible,
                            boost::program_options::options_description& hidden,
                            boost::program_options::positional_options_description& positional,
                            boost::program_options::variables_map &output );
     };
-    
+
     extern CmdLine cmdLine;
-    
+
     void setupCoreSignals();
+
+    string prettyHostName();
+
+
+    /**
+     * used for setParameter
+     * so you can write validation code that lives with code using it
+     * rather than all in the command place
+     * also lets you have mongos or mongod specific code
+     * without pulling it all sorts of things
+     */
+    class ParameterValidator {
+    public:
+        ParameterValidator( const string& name );
+        virtual ~ParameterValidator() {}
+
+        virtual bool isValid( BSONElement e , string& errmsg ) = 0;
+
+        static ParameterValidator * get( const string& name );
+
+    private:
+        string _name;
+
+        // don't need to lock since this is all done in static init
+        static map<string,ParameterValidator*> * _all;
+    };
+
 }
diff --git a/db/commands.cpp b/db/commands.cpp
index ef219fe..770d035 100644
--- a/db/commands.cpp
+++ b/db/commands.cpp
@@ -38,7 +38,7 @@ namespace mongo {
         }
         ss << "\n<tr><td>";
         bool web = _webCommands->count(name) != 0;
-        if( web ) ss << "<a href=\"/" << name << "?text\">";
+        if( web ) ss << "<a href=\"/" << name << "?text=1\">";
         ss << name;
         if( web ) ss << "</a>";
         ss << "</td>\n";
@@ -55,7 +55,7 @@ namespace mongo {
         ss << "<td>";
         if( helpStr != "no help defined" ) {
             const char *p = helpStr.c_str();
-            while( *p ) { 
+            while( *p ) {
                 if( *p == '<' ) {
                     ss << "&lt;";
                     p++; continue;
@@ -67,7 +67,7 @@ namespace mongo {
                     p++;
                     continue;
                 }
-                if( strncmp(p, "http:", 5) == 0 ) { 
+                if( strncmp(p, "http:", 5) == 0 ) {
                     ss << "<a href=\"";
                     const char *q = p;
                     while( *q && *q != ' ' && *q != '\n' )
@@ -79,7 +79,7 @@ namespace mongo {
                     while( *q && *q != ' ' && *q != '\n' ) {
                         ss << (*q == '+' ? ' ' : *q);
                         q++;
-                        if( *q == '#' ) 
+                        if( *q == '#' )
                             while( *q && *q != ' ' && *q != '\n' ) q++;
                     }
                     ss << "</a>";
@@ -120,7 +120,7 @@ namespace mongo {
     void Command::help( stringstream& help ) const {
         help << "no help defined";
     }
-    
+
     bool Command::runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder) {
         const char *p = strchr(ns, '.');
         if ( !p ) return false;
@@ -145,7 +145,7 @@ namespace mongo {
                 ok = false;
                 errmsg = "access denied - use admin db";
             }
-            else if ( jsobj.getBoolField( "help" ) ){
+            else if ( jsobj.getBoolField( "help" ) ) {
                 stringstream help;
                 help << "help for: " << e.fieldName() << " ";
                 c->help( help );
@@ -161,18 +161,18 @@ namespace mongo {
 
             if (!have_ok)
                 anObjBuilder.append( "ok" , ok ? 1.0 : 0.0 );
-            
+
             if ( !ok && !have_errmsg) {
                 anObjBuilder.append("errmsg", errmsg);
                 uassert_nothrow(errmsg.c_str());
             }
             return true;
         }
-        
+
         return false;
     }
 
-    Command* Command::findCommand( const string& name ){
+    Command* Command::findCommand( const string& name ) {
         map<string,Command*>::iterator i = _commands->find( name );
         if ( i == _commands->end() )
             return 0;
@@ -180,7 +180,7 @@ namespace mongo {
     }
 
 
-    Command::LockType Command::locktype( const string& name ){
+    Command::LockType Command::locktype( const string& name ) {
         Command * c = findCommand( name );
         if ( ! c )
             return WRITE;
@@ -189,10 +189,10 @@ namespace mongo {
 
     void Command::logIfSlow( const Timer& timer, const string& msg ) {
         int ms = timer.millis();
-        if ( ms > cmdLine.slowMS ){
+        if ( ms > cmdLine.slowMS ) {
             out() << msg << " took " << ms << " ms." << endl;
         }
     }
-    
-    
+
+
 } // namespace mongo
diff --git a/db/commands.h b/db/commands.h
index a8a61c4..42e46a0 100644
--- a/db/commands.h
+++ b/db/commands.h
@@ -18,7 +18,9 @@
 #pragma once
 
 #include "../pch.h"
+
 #include "jsobj.h"
+#include "../util/timer.h"
 
 namespace mongo {
 
@@ -32,7 +34,7 @@ namespace mongo {
         */
     class Command {
     public:
-        
+
         enum LockType { READ = -1 , NONE = 0 , WRITE = 1 };
 
         const string name;
@@ -47,11 +49,11 @@ namespace mongo {
         */
         virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) = 0;
 
-        /* 
-		   note: logTheTop() MUST be false if READ
+        /*
+           note: logTheTop() MUST be false if READ
            if NONE, can't use Client::Context setup
                     use with caution
-		 */
+         */
         virtual LockType locktype() const = 0;
 
         /* Return true if only the admin ns has privileges to run this command. */
@@ -61,7 +63,7 @@ namespace mongo {
 
         void htmlHelp(stringstream&) const;
 
-        /* Like adminOnly, but even stricter: we must either be authenticated for admin db, 
+        /* Like adminOnly, but even stricter: we must either be authenticated for admin db,
            or, if running without auth, on the local interface.
 
            When localHostOnlyIfNoAuth() is true, adminOnly() must also be true.
@@ -72,7 +74,7 @@ namespace mongo {
            (the command directly from a client -- if fromRepl, always allowed).
         */
         virtual bool slaveOk() const = 0;
-        
+
         /* Return true if the client force a command to be run on a slave by
            turning on the 'slaveok' option in the command query.
         */
@@ -89,12 +91,12 @@ namespace mongo {
 
         virtual void help( stringstream& help ) const;
 
-        /* Return true if authentication and security applies to the commands.  Some commands 
+        /* Return true if authentication and security applies to the commands.  Some commands
            (e.g., getnonce, authenticate) can be done by anyone even unauthorized.
         */
         virtual bool requiresAuth() { return true; }
 
-        /** @param webUI expose the command in the web ui as localhost:28017/<name> 
+        /** @param webUI expose the command in the web ui as localhost:28017/<name>
             @param oldName an optional old, deprecated name for the command
         */
         Command(const char *_name, bool webUI = false, const char *oldName = 0);
@@ -102,7 +104,7 @@ namespace mongo {
         virtual ~Command() {}
 
     protected:
-        BSONObj getQuery( const BSONObj& cmdObj ){
+        BSONObj getQuery( const BSONObj& cmdObj ) {
             if ( cmdObj["query"].type() == Object )
                 return cmdObj["query"].embeddedObject();
             if ( cmdObj["q"].type() == Object )
diff --git a/db/commands/distinct.cpp b/db/commands/distinct.cpp
new file mode 100644
index 0000000..2e26bcd
--- /dev/null
+++ b/db/commands/distinct.cpp
@@ -0,0 +1,150 @@
+// distinct.cpp
+
+/**
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "../commands.h"
+#include "../instance.h"
+#include "../queryoptimizer.h"
+#include "../clientcursor.h"
+
+namespace mongo {
+
+    class DistinctCommand : public Command {
+    public:
+        DistinctCommand() : Command("distinct") {}
+        virtual bool slaveOk() const { return true; }
+        virtual LockType locktype() const { return READ; }
+        virtual void help( stringstream &help ) const {
+            help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }";
+        }
+
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+            Timer t;
+            string ns = dbname + '.' + cmdObj.firstElement().valuestr();
+
+            string key = cmdObj["key"].valuestrsafe();
+            BSONObj keyPattern = BSON( key << 1 );
+
+            BSONObj query = getQuery( cmdObj );
+
+            int bufSize = BSONObjMaxUserSize - 4096;
+            BufBuilder bb( bufSize );
+            char * start = bb.buf();
+
+            BSONArrayBuilder arr( bb );
+            BSONElementSet values;
+
+            long long nscanned = 0; // locations looked at
+            long long nscannedObjects = 0; // full objects looked at
+            long long n = 0; // matches
+            MatchDetails md;
+
+            NamespaceDetails * d = nsdetails( ns.c_str() );
+
+            if ( ! d ) {
+                result.appendArray( "values" , BSONObj() );
+                result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) );
+                return true;
+            }
+
+            shared_ptr<Cursor> cursor;
+            if ( ! query.isEmpty() ) {
+                cursor = bestGuessCursor(ns.c_str() , query , BSONObj() );
+            }
+            else {
+
+                // query is empty, so lets see if we can find an index
+                // with the key so we don't have to hit the raw data
+                NamespaceDetails::IndexIterator ii = d->ii();
+                while ( ii.more() ) {
+                    IndexDetails& idx = ii.next();
+
+                    if ( d->isMultikey( ii.pos() - 1 ) )
+                        continue;
+
+                    if ( idx.inKeyPattern( key ) ) {
+                        cursor = bestGuessCursor( ns.c_str() , BSONObj() , idx.keyPattern() );
+                        break;
+                    }
+
+                }
+
+                if ( ! cursor.get() )
+                    cursor = bestGuessCursor(ns.c_str() , query , BSONObj() );
+
+            }
+
+
+
+            scoped_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns));
+
+            while ( cursor->ok() ) {
+                nscanned++;
+                bool loadedObject = false;
+
+                if ( !cursor->matcher() || cursor->matcher()->matchesCurrent( cursor.get() , &md ) ) {
+                    n++;
+
+                    BSONElementSet temp;
+                    loadedObject = ! cc->getFieldsDotted( key , temp );
+
+                    for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) {
+                        BSONElement e = *i;
+                        if ( values.count( e ) )
+                            continue;
+
+                        int now = bb.len();
+
+                        uassert(10044,  "distinct too big, 4mb cap", ( now + e.size() + 1024 ) < bufSize );
+
+                        arr.append( e );
+                        BSONElement x( start + now );
+
+                        values.insert( x );
+                    }
+                }
+
+                if ( loadedObject || md.loadedObject )
+                    nscannedObjects++;
+
+                cursor->advance();
+
+                if (!cc->yieldSometimes())
+                    break;
+
+                RARELY killCurrentOp.checkForInterrupt();
+            }
+
+            assert( start == bb.buf() );
+
+            result.appendArray( "values" , arr.done() );
+
+            {
+                BSONObjBuilder b;
+                b.appendNumber( "n" , n );
+                b.appendNumber( "nscanned" , nscanned );
+                b.appendNumber( "nscannedObjects" , nscannedObjects );
+                b.appendNumber( "timems" , t.millis() );
+                result.append( "stats" , b.obj() );
+            }
+
+            return true;
+        }
+
+    } distinctCmd;
+
+}
diff --git a/db/commands/group.cpp b/db/commands/group.cpp
new file mode 100644
index 0000000..0cc6ab3
--- /dev/null
+++ b/db/commands/group.cpp
@@ -0,0 +1,202 @@
+// group.cpp
+
+/**
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "../commands.h"
+#include "../instance.h"
+#include "../queryoptimizer.h"
+
+namespace mongo {
+
+    class GroupCommand : public Command {
+    public:
+        GroupCommand() : Command("group") {}
+        virtual LockType locktype() const { return READ; }
+        virtual bool slaveOk() const { return false; }
+        virtual bool slaveOverrideOk() { return true; }
+        virtual void help( stringstream &help ) const {
+            help << "http://www.mongodb.org/display/DOCS/Aggregation";
+        }
+
+        BSONObj getKey( const BSONObj& obj , const BSONObj& keyPattern , ScriptingFunction func , double avgSize , Scope * s ) {
+            if ( func ) {
+                BSONObjBuilder b( obj.objsize() + 32 );
+                b.append( "0" , obj );
+                int res = s->invoke( func , b.obj() );
+                uassert( 10041 ,  (string)"invoke failed in $keyf: " + s->getError() , res == 0 );
+                int type = s->type("return");
+                uassert( 10042 ,  "return of $key has to be an object" , type == Object );
+                return s->getObject( "return" );
+            }
+            return obj.extractFields( keyPattern , true );
+        }
+
+        bool group( string realdbname , const string& ns , const BSONObj& query ,
+                    BSONObj keyPattern , string keyFunctionCode , string reduceCode , const char * reduceScope ,
+                    BSONObj initial , string finalize ,
+                    string& errmsg , BSONObjBuilder& result ) {
+
+
+            auto_ptr<Scope> s = globalScriptEngine->getPooledScope( realdbname );
+            s->localConnect( realdbname.c_str() );
+
+            if ( reduceScope )
+                s->init( reduceScope );
+
+            s->setObject( "$initial" , initial , true );
+
+            s->exec( "$reduce = " + reduceCode , "reduce setup" , false , true , true , 100 );
+            s->exec( "$arr = [];" , "reduce setup 2" , false , true , true , 100 );
+            ScriptingFunction f = s->createFunction(
+                                      "function(){ "
+                                      "  if ( $arr[n] == null ){ "
+                                      "    next = {}; "
+                                      "    Object.extend( next , $key ); "
+                                      "    Object.extend( next , $initial , true ); "
+                                      "    $arr[n] = next; "
+                                      "    next = null; "
+                                      "  } "
+                                      "  $reduce( obj , $arr[n] ); "
+                                      "}" );
+
+            ScriptingFunction keyFunction = 0;
+            if ( keyFunctionCode.size() ) {
+                keyFunction = s->createFunction( keyFunctionCode.c_str() );
+            }
+
+
+            double keysize = keyPattern.objsize() * 3;
+            double keynum = 1;
+
+            map<BSONObj,int,BSONObjCmp> map;
+            list<BSONObj> blah;
+
+            shared_ptr<Cursor> cursor = bestGuessCursor(ns.c_str() , query , BSONObj() );
+
+            while ( cursor->ok() ) {
+                if ( cursor->matcher() && ! cursor->matcher()->matchesCurrent( cursor.get() ) ) {
+                    cursor->advance();
+                    continue;
+                }
+
+                BSONObj obj = cursor->current();
+                cursor->advance();
+
+                BSONObj key = getKey( obj , keyPattern , keyFunction , keysize / keynum , s.get() );
+                keysize += key.objsize();
+                keynum++;
+
+                int& n = map[key];
+                if ( n == 0 ) {
+                    n = map.size();
+                    s->setObject( "$key" , key , true );
+
+                    uassert( 10043 ,  "group() can't handle more than 20000 unique keys" , n <= 20000 );
+                }
+
+                s->setObject( "obj" , obj , true );
+                s->setNumber( "n" , n - 1 );
+                if ( s->invoke( f , BSONObj() , 0 , true ) ) {
+                    throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() );
+                }
+            }
+
+            if (!finalize.empty()) {
+                s->exec( "$finalize = " + finalize , "finalize define" , false , true , true , 100 );
+                ScriptingFunction g = s->createFunction(
+                                          "function(){ "
+                                          "  for(var i=0; i < $arr.length; i++){ "
+                                          "  var ret = $finalize($arr[i]); "
+                                          "  if (ret !== undefined) "
+                                          "    $arr[i] = ret; "
+                                          "  } "
+                                          "}" );
+                s->invoke( g , BSONObj() , 0 , true );
+            }
+
+            result.appendArray( "retval" , s->getObject( "$arr" ) );
+            result.append( "count" , keynum - 1 );
+            result.append( "keys" , (int)(map.size()) );
+            s->exec( "$arr = [];" , "reduce setup 2" , false , true , true , 100 );
+            s->gc();
+
+            return true;
+        }
+
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+
+            /* db.$cmd.findOne( { group : <p> } ) */
+            const BSONObj& p = jsobj.firstElement().embeddedObjectUserCheck();
+
+            BSONObj q;
+            if ( p["cond"].type() == Object )
+                q = p["cond"].embeddedObject();
+            else if ( p["condition"].type() == Object )
+                q = p["condition"].embeddedObject();
+            else
+                q = getQuery( p );
+
+            if ( p["ns"].type() != String ) {
+                errmsg = "ns has to be set";
+                return false;
+            }
+
+            string ns = dbname + "." + p["ns"].String();
+
+            BSONObj key;
+            string keyf;
+            if ( p["key"].type() == Object ) {
+                key = p["key"].embeddedObjectUserCheck();
+                if ( ! p["$keyf"].eoo() ) {
+                    errmsg = "can't have key and $keyf";
+                    return false;
+                }
+            }
+            else if ( p["$keyf"].type() ) {
+                keyf = p["$keyf"]._asCode();
+            }
+            else {
+                // no key specified, will use entire object as key
+            }
+
+            BSONElement reduce = p["$reduce"];
+            if ( reduce.eoo() ) {
+                errmsg = "$reduce has to be set";
+                return false;
+            }
+
+            BSONElement initial = p["initial"];
+            if ( initial.type() != Object ) {
+                errmsg = "initial has to be an object";
+                return false;
+            }
+
+
+            string finalize;
+            if (p["finalize"].type())
+                finalize = p["finalize"]._asCode();
+
+            return group( dbname , ns , q ,
+                          key , keyf , reduce._asCode() , reduce.type() != CodeWScope ? 0 : reduce.codeWScopeScopeData() ,
+                          initial.embeddedObject() , finalize ,
+                          errmsg , result );
+        }
+
+    } cmdGroup;
+
+
+} // namespace mongo
diff --git a/db/commands/isself.cpp b/db/commands/isself.cpp
new file mode 100644
index 0000000..b97f51e
--- /dev/null
+++ b/db/commands/isself.cpp
@@ -0,0 +1,220 @@
+// isself.cpp
+
+#include "pch.h"
+#include "../../util/message.h"
+#include "../commands.h"
+#include "../../client/dbclient.h"
+
+#ifndef _WIN32
+# ifndef __sunos__
+#  include <ifaddrs.h>
+# endif
+# include <sys/resource.h>
+# include <sys/stat.h>
+#endif
+
+
+namespace mongo {
+
+#if !defined(_WIN32) && !defined(__sunos__)
+
+    vector<string> getMyAddrs() {
+        ifaddrs * addrs;
+
+        int status = getifaddrs(&addrs);
+        massert(13469, "getifaddrs failure: " + errnoWithDescription(errno), status == 0);
+
+        vector<string> out;
+
+        // based on example code from linux getifaddrs manpage
+        for (ifaddrs * addr = addrs; addr != NULL; addr = addr->ifa_next) {
+            if ( addr->ifa_addr == NULL ) continue;
+            int family = addr->ifa_addr->sa_family;
+            char host[NI_MAXHOST];
+
+            if (family == AF_INET || family == AF_INET6) {
+                status = getnameinfo(addr->ifa_addr,
+                                     (family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)),
+                                     host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+                if ( status != 0 ) {
+                    freeifaddrs( addrs );
+                    addrs = NULL;
+                    msgasserted( 13470, string("getnameinfo() failed: ") + gai_strerror(status) );
+                }
+
+                out.push_back(host);
+            }
+
+        }
+
+        freeifaddrs( addrs );
+        addrs = NULL;
+
+        if (logLevel >= 1) {
+            log(1) << "getMyAddrs():";
+            for (vector<string>::const_iterator it=out.begin(), end=out.end(); it!=end; ++it) {
+                log(1) << " [" << *it << ']';
+            }
+            log(1) << endl;
+        }
+
+        return out;
+    }
+
+    vector<string> getAllIPs(StringData iporhost) {
+        addrinfo* addrs = NULL;
+        addrinfo hints;
+        memset(&hints, 0, sizeof(addrinfo));
+        hints.ai_socktype = SOCK_STREAM;
+        hints.ai_family = (IPv6Enabled() ? AF_UNSPEC : AF_INET);
+
+        static string portNum = BSONObjBuilder::numStr(cmdLine.port);
+
+        vector<string> out;
+
+        int ret = getaddrinfo(iporhost.data(), portNum.c_str(), &hints, &addrs);
+        if ( ret ) {
+            warning() << "getaddrinfo(\"" << iporhost.data() << "\") failed: " << gai_strerror(ret) << endl;
+            return out;
+        }
+
+        for (addrinfo* addr = addrs; addr != NULL; addr = addr->ai_next) {
+            int family = addr->ai_family;
+            char host[NI_MAXHOST];
+
+            if (family == AF_INET || family == AF_INET6) {
+                int status = getnameinfo(addr->ai_addr, addr->ai_addrlen, host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+
+                massert(13472, string("getnameinfo() failed: ") + gai_strerror(status), status == 0);
+
+                out.push_back(host);
+            }
+
+        }
+
+        freeaddrinfo(addrs);
+
+        if (logLevel >= 1) {
+            log(1) << "getallIPs(\"" << iporhost << "\"):";
+            for (vector<string>::const_iterator it=out.begin(), end=out.end(); it!=end; ++it) {
+                log(1) << " [" << *it << ']';
+            }
+            log(1) << endl;
+        }
+
+        return out;
+    }
+#endif
+
+
+    class IsSelfCommand : public Command {
+    public:
+        IsSelfCommand() : Command("_isSelf") , _cacheLock( "IsSelfCommand::_cacheLock" ) {}
+        virtual bool slaveOk() const { return true; }
+        virtual LockType locktype() const { return NONE; }
+        virtual void help( stringstream &help ) const {
+            help << "{ _isSelf : 1 } INTERNAL ONLY";
+        }
+
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+            init();
+            result.append( "id" , _id );
+            return true;
+        }
+
+        void init() {
+            scoped_lock lk( _cacheLock );
+            if ( ! _id.isSet() )
+                _id.init();
+        }
+
+        OID _id;
+
+        mongo::mutex _cacheLock;
+        map<string,bool> _cache;
+    } isSelfCommand;
+
+    bool HostAndPort::isSelf() const {
+
+        int p = _port == -1 ? CmdLine::DefaultDBPort : _port;
+
+        if( p != cmdLine.port ) {
+            // shortcut - ports have to match at the very least
+            return false;
+        }
+
+        string host = str::stream() << _host << ":" << p;
+
+        {
+            // check cache for this host
+            // debatably something _could_ change, but I'm not sure right now (erh 10/14/2010)
+            scoped_lock lk( isSelfCommand._cacheLock );
+            map<string,bool>::const_iterator i = isSelfCommand._cache.find( host );
+            if ( i != isSelfCommand._cache.end() )
+                return i->second;
+        }
+
+#if !defined(_WIN32) && !defined(__sunos__)
+        // on linux and os x we can do a quick check for an ip match
+
+        const vector<string> myaddrs = getMyAddrs();
+        const vector<string> addrs = getAllIPs(_host);
+
+        for (vector<string>::const_iterator i=myaddrs.begin(), iend=myaddrs.end(); i!=iend; ++i) {
+            for (vector<string>::const_iterator j=addrs.begin(), jend=addrs.end(); j!=jend; ++j) {
+                string a = *i;
+                string b = *j;
+
+                if ( a == b ||
+                        ( str::startsWith( a , "127." ) && str::startsWith( b , "127." ) )  // 127. is all loopback
+                   ) {
+
+                    // add to cache
+                    scoped_lock lk( isSelfCommand._cacheLock );
+                    isSelfCommand._cache[host] = true;
+                    return true;
+                }
+            }
+        }
+
+#endif
+
+        if ( ! Listener::getTimeTracker() ) {
+            // this ensures we are actually running a server
+            // this may return true later, so may want to retry
+            return false;
+        }
+
+
+        try {
+
+            isSelfCommand.init();
+
+            DBClientConnection conn;
+            string errmsg;
+            if ( ! conn.connect( host , errmsg ) ) {
+                // should this go in the cache?
+                return false;
+            }
+
+            BSONObj out;
+            bool ok = conn.simpleCommand( "admin" , &out , "_isSelf" );
+
+            bool me = ok && out["id"].type() == jstOID && isSelfCommand._id == out["id"].OID();
+
+            // add to cache
+            scoped_lock lk( isSelfCommand._cacheLock );
+            isSelfCommand._cache[host] = me;
+
+            return me;
+        }
+        catch ( std::exception& e ) {
+            warning() << "could't check isSelf (" << host << ") " << e.what() << endl;
+        }
+
+        return false;
+    }
+
+
+
+}
diff --git a/db/commands/mr.cpp b/db/commands/mr.cpp
new file mode 100644
index 0000000..16c604a
--- /dev/null
+++ b/db/commands/mr.cpp
@@ -0,0 +1,1074 @@
+// mr.cpp
+
+/**
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "../db.h"
+#include "../instance.h"
+#include "../commands.h"
+#include "../../scripting/engine.h"
+#include "../../client/dbclient.h"
+#include "../../client/connpool.h"
+#include "../../client/parallel.h"
+#include "../queryoptimizer.h"
+#include "../matcher.h"
+#include "../clientcursor.h"
+#include "../replpair.h"
+#include "../../s/d_chunk_manager.h"
+#include "../../s/d_logic.h"
+
+#include "mr.h"
+
+namespace mongo {
+
+    namespace mr {
+
+        AtomicUInt Config::JOB_NUMBER;
+
+        JSFunction::JSFunction( string type , const BSONElement& e ) {
+            _type = type;
+            _code = e._asCode();
+
+            if ( e.type() == CodeWScope )
+                _wantedScope = e.codeWScopeObject();
+        }
+
+        void JSFunction::init( State * state ) {
+            _scope = state->scope();
+            assert( _scope );
+            _scope->init( &_wantedScope );
+
+            _func = _scope->createFunction( _code.c_str() );
+            uassert( 13598 , str::stream() << "couldn't compile code for: " << _type , _func );
+        }
+
+        void JSMapper::init( State * state ) {
+            _func.init( state );
+            _params = state->config().mapParams;
+        }
+
+        /**
+         * Applies the map function to an object, which should internally call emit()
+         */
+        void JSMapper::map( const BSONObj& o ) {
+            Scope * s = _func.scope();
+            assert( s );
+            s->setThis( &o );
+            if ( s->invoke( _func.func() , _params , 0 , true ) )
+                throw UserException( 9014, str::stream() << "map invoke failed: " + s->getError() );
+        }
+
+        /**
+         * Applies the finalize function to a tuple obj (key, val)
+         * Returns tuple obj {_id: key, value: newval}
+         */
+        BSONObj JSFinalizer::finalize( const BSONObj& o ) {
+            Scope * s = _func.scope();
+
+            Scope::NoDBAccess no = s->disableDBAccess( "can't access db inside finalize" );
+            s->invokeSafe( _func.func() , o );
+
+            // don't want to use o.objsize() to size b
+            // since there are many cases where the point of finalize
+            // is converting many fields to 1
+            BSONObjBuilder b;
+            b.append( o.firstElement() );
+            s->append( b , "value" , "return" );
+            return b.obj();
+        }
+
+        /**
+         * Reduces a list of tuple objects (key, value) to a single tuple {"0": key, "1": value}
+         */
+        BSONObj JSReducer::reduce( const BSONList& tuples ) {
+            if (tuples.size() <= 1)
+                return tuples[0];
+            BSONObj key;
+            int endSizeEstimate = 16;
+            _reduce( tuples , key , endSizeEstimate );
+
+            BSONObjBuilder b(endSizeEstimate);
+            b.appendAs( key.firstElement() , "0" );
+            _func.scope()->append( b , "1" , "return" );
+            return b.obj();
+        }
+
+        /**
+         * Reduces a list of tuple object (key, value) to a single tuple {_id: key, value: val}
+         * Also applies a finalizer method if present.
+         */
+        BSONObj JSReducer::finalReduce( const BSONList& tuples , Finalizer * finalizer ) {
+
+            BSONObj res;
+            BSONObj key;
+
+            if (tuples.size() == 1) {
+                // 1 obj, just use it
+                key = tuples[0];
+                BSONObjBuilder b(key.objsize());
+                BSONObjIterator it(key);
+                b.appendAs( it.next() , "_id" );
+                b.appendAs( it.next() , "value" );
+                res = b.obj();
+            }
+            else {
+                // need to reduce
+                int endSizeEstimate = 16;
+                _reduce( tuples , key , endSizeEstimate );
+                BSONObjBuilder b(endSizeEstimate);
+                b.appendAs( key.firstElement() , "_id" );
+                _func.scope()->append( b , "value" , "return" );
+                res = b.obj();
+            }
+
+            if ( finalizer ) {
+                res = finalizer->finalize( res );
+            }
+
+            return res;
+        }
+
+        /**
+         * actually applies a reduce, to a list of tuples (key, value).
+         * After the call, tuples will hold a single tuple {"0": key, "1": value}
+         */
+        void JSReducer::_reduce( const BSONList& tuples , BSONObj& key , int& endSizeEstimate ) {
+            uassert( 10074 ,  "need values" , tuples.size() );
+
+            int sizeEstimate = ( tuples.size() * tuples.begin()->getField( "value" ).size() ) + 128;
+
+            // need to build the reduce args: ( key, [values] )
+            BSONObjBuilder reduceArgs( sizeEstimate );
+            boost::scoped_ptr<BSONArrayBuilder>  valueBuilder;
+            int sizeSoFar = 0;
+            unsigned n = 0;
+            for ( ; n<tuples.size(); n++ ) {
+                BSONObjIterator j(tuples[n]);
+                BSONElement keyE = j.next();
+                if ( n == 0 ) {
+                    reduceArgs.append( keyE );
+                    key = keyE.wrap();
+                    sizeSoFar = 5 + keyE.size();
+                    valueBuilder.reset(new BSONArrayBuilder( reduceArgs.subarrayStart( "tuples" ) ));
+                }
+
+                BSONElement ee = j.next();
+
+                uassert( 13070 , "value too large to reduce" , ee.size() < ( BSONObjMaxUserSize / 2 ) );
+
+                if ( sizeSoFar + ee.size() > BSONObjMaxUserSize ) {
+                    assert( n > 1 ); // if not, inf. loop
+                    break;
+                }
+
+                valueBuilder->append( ee );
+                sizeSoFar += ee.size();
+            }
+            assert(valueBuilder);
+            valueBuilder->done();
+            BSONObj args = reduceArgs.obj();
+
+            Scope * s = _func.scope();
+
+            s->invokeSafe( _func.func() , args );
+
+            if ( s->type( "return" ) == Array ) {
+                uasserted( 10075 , "reduce -> multiple not supported yet");
+                return;
+            }
+
+            endSizeEstimate = key.objsize() + ( args.objsize() / tuples.size() );
+
+            if ( n == tuples.size() )
+                return;
+
+            // the input list was too large, add the rest of elmts to new tuples and reduce again
+            // note: would be better to use loop instead of recursion to avoid stack overflow
+            BSONList x;
+            for ( ; n < tuples.size(); n++ ) {
+                x.push_back( tuples[n] );
+            }
+            BSONObjBuilder temp( endSizeEstimate );
+            temp.append( key.firstElement() );
+            s->append( temp , "1" , "return" );
+            x.push_back( temp.obj() );
+            _reduce( x , key , endSizeEstimate );
+        }
+
+        Config::Config( const string& _dbname , const BSONObj& cmdObj ) {
+
+            dbname = _dbname;
+            ns = dbname + "." + cmdObj.firstElement().valuestr();
+
+            verbose = cmdObj["verbose"].trueValue();
+
+            uassert( 13602 , "outType is no longer a valid option" , cmdObj["outType"].eoo() );
+
+            if ( cmdObj["out"].type() == String ) {
+                finalShort = cmdObj["out"].String();
+                outType = REPLACE;
+            }
+            else if ( cmdObj["out"].type() == Object ) {
+                BSONObj o = cmdObj["out"].embeddedObject();
+
+                BSONElement e = o.firstElement();
+                string t = e.fieldName();
+
+                if ( t == "normal" || t == "replace" ) {
+                    outType = REPLACE;
+                    finalShort = e.String();
+                }
+                else if ( t == "merge" ) {
+                    outType = MERGE;
+                    finalShort = e.String();
+                }
+                else if ( t == "reduce" ) {
+                    outType = REDUCE;
+                    finalShort = e.String();
+                }
+                else if ( t == "inline" ) {
+                    outType = INMEMORY;
+                }
+                else {
+                    uasserted( 13522 , str::stream() << "unknown out specifier [" << t << "]" );
+                }
+
+                if (o.hasElement("db")) {
+                    outDB = o["db"].String();
+                }
+            }
+            else {
+                uasserted( 13606 , "'out' has to be a string or an object" );
+            }
+
+            if ( outType != INMEMORY ) { // setup names
+                tempLong = str::stream() << (outDB.empty() ? dbname : outDB) << ".tmp.mr." << cmdObj.firstElement().String() << "_" << finalShort << "_" << JOB_NUMBER++;
+
+                incLong = tempLong + "_inc";
+
+                finalLong = str::stream() << (outDB.empty() ? dbname : outDB) << "." << finalShort;
+            }
+
+            {
+                // scope and code
+
+                if ( cmdObj["scope"].type() == Object )
+                    scopeSetup = cmdObj["scope"].embeddedObjectUserCheck();
+
+                mapper.reset( new JSMapper( cmdObj["map"] ) );
+                reducer.reset( new JSReducer( cmdObj["reduce"] ) );
+                if ( cmdObj["finalize"].type() && cmdObj["finalize"].trueValue() )
+                    finalizer.reset( new JSFinalizer( cmdObj["finalize"] ) );
+
+                if ( cmdObj["mapparams"].type() == Array ) {
+                    mapParams = cmdObj["mapparams"].embeddedObjectUserCheck();
+                }
+
+            }
+
+            {
+                // query options
+                BSONElement q = cmdObj["query"];
+                if ( q.type() == Object )
+                    filter = q.embeddedObjectUserCheck();
+                else
+                    uassert( 13608 , "query has to be blank or an Object" , ! q.trueValue() );
+
+
+                BSONElement s = cmdObj["sort"];
+                if ( s.type() == Object )
+                    sort = s.embeddedObjectUserCheck();
+                else
+                    uassert( 13609 , "sort has to be blank or an Object" , ! s.trueValue() );
+
+                if ( cmdObj["limit"].isNumber() )
+                    limit = cmdObj["limit"].numberLong();
+                else
+                    limit = 0;
+            }
+        }
+
+        /**
+         * Create temporary collection, set up indexes
+         */
+        void State::prepTempCollection() {
+            if ( ! _onDisk )
+                return;
+
+            _db.dropCollection( _config.tempLong );
+
+            {
+                // create
+                writelock lock( _config.tempLong.c_str() );
+                Client::Context ctx( _config.tempLong.c_str() );
+                string errmsg;
+                if ( ! userCreateNS( _config.tempLong.c_str() , BSONObj() , errmsg , true ) ) {
+                    uasserted( 13630 , str::stream() << "userCreateNS failed for mr tempLong ns: " << _config.tempLong << " err: " << errmsg );
+                }
+            }
+
+
+            {
+                // copy indexes
+                auto_ptr<DBClientCursor> idx = _db.getIndexes( _config.finalLong );
+                while ( idx->more() ) {
+                    BSONObj i = idx->next();
+
+                    BSONObjBuilder b( i.objsize() + 16 );
+                    b.append( "ns" , _config.tempLong );
+                    BSONObjIterator j( i );
+                    while ( j.more() ) {
+                        BSONElement e = j.next();
+                        if ( str::equals( e.fieldName() , "_id" ) ||
+                                str::equals( e.fieldName() , "ns" ) )
+                            continue;
+
+                        b.append( e );
+                    }
+
+                    BSONObj indexToInsert = b.obj();
+                    insert( Namespace( _config.tempLong.c_str() ).getSisterNS( "system.indexes" ).c_str() , indexToInsert );
+                }
+
+            }
+
+        }
+
+        /**
+         * For inline mode, appends results to output object.
+         * Makes sure (key, value) tuple is formatted as {_id: key, value: val}
+         */
+        void State::appendResults( BSONObjBuilder& final ) {
+            if ( _onDisk )
+                return;
+
+            uassert( 13604 , "too much data for in memory map/reduce" , _size < ( BSONObjMaxUserSize / 2 ) );
+
+            BSONArrayBuilder b( (int)(_size * 1.2) ); // _size is data size, doesn't count overhead and keys
+
+            for ( InMemory::iterator i=_temp->begin(); i!=_temp->end(); ++i ) {
+                BSONObj key = i->first;
+                BSONList& all = i->second;
+
+                assert( all.size() == 1 );
+
+                BSONObjIterator vi( all[0] );
+                vi.next();
+
+                BSONObjBuilder temp( b.subobjStart() );
+                temp.appendAs( key.firstElement() , "_id" );
+                temp.appendAs( vi.next() , "value" );
+                temp.done();
+            }
+
+            BSONArray res = b.arr();
+            uassert( 13605 , "too much data for in memory map/reduce" , res.objsize() < ( BSONObjMaxUserSize * 2 / 3 ) );
+
+            final.append( "results" , res );
+        }
+
+        /**
+         * Does post processing on output collection.
+         * This may involve replacing, merging or reducing.
+         */
+        long long State::postProcessCollection() {
+            if ( _onDisk == false || _config.outType == Config::INMEMORY )
+                return _temp->size();
+
+            dblock lock;
+
+            if ( _config.finalLong == _config.tempLong )
+                return _db.count( _config.finalLong );
+
+            if ( _config.outType == Config::REPLACE || _db.count( _config.finalLong ) == 0 ) {
+                // replace: just rename from temp to final collection name, dropping previous collection
+                _db.dropCollection( _config.finalLong );
+                BSONObj info;
+                uassert( 10076 ,  "rename failed" ,
+                         _db.runCommand( "admin" , BSON( "renameCollection" << _config.tempLong << "to" << _config.finalLong ) , info ) );
+                _db.dropCollection( _config.tempLong );
+            }
+            else if ( _config.outType == Config::MERGE ) {
+                // merge: upsert new docs into old collection
+                auto_ptr<DBClientCursor> cursor = _db.query( _config.tempLong , BSONObj() );
+                while ( cursor->more() ) {
+                    BSONObj o = cursor->next();
+                    Helpers::upsert( _config.finalLong , o );
+                    getDur().commitIfNeeded();
+                }
+                _db.dropCollection( _config.tempLong );
+            }
+            else if ( _config.outType == Config::REDUCE ) {
+                // reduce: apply reduce op on new result and existing one
+                BSONList values;
+
+                auto_ptr<DBClientCursor> cursor = _db.query( _config.tempLong , BSONObj() );
+                while ( cursor->more() ) {
+                    BSONObj temp = cursor->next();
+                    BSONObj old;
+
+                    bool found;
+                    {
+                        Client::Context tx( _config.finalLong );
+                        found = Helpers::findOne( _config.finalLong.c_str() , temp["_id"].wrap() , old , true );
+                    }
+
+                    if ( found ) {
+                        // need to reduce
+                        values.clear();
+                        values.push_back( temp );
+                        values.push_back( old );
+                        Helpers::upsert( _config.finalLong , _config.reducer->finalReduce( values , _config.finalizer.get() ) );
+                    }
+                    else {
+                        Helpers::upsert( _config.finalLong , temp );
+                    }
+                    getDur().commitIfNeeded();
+                }
+                _db.dropCollection( _config.tempLong );
+            }
+
+            return _db.count( _config.finalLong );
+        }
+
+        /**
+         * Insert doc in collection
+         */
+        void State::insert( const string& ns , BSONObj& o ) {
+            assert( _onDisk );
+
+            writelock l( ns );
+            Client::Context ctx( ns );
+
+            theDataFileMgr.insertAndLog( ns.c_str() , o , false );
+        }
+
+        /**
+         * Insert doc into the inc collection
+         */
+        void State::_insertToInc( BSONObj& o ) {
+            assert( _onDisk );
+            theDataFileMgr.insertWithObjMod( _config.incLong.c_str() , o , true );
+            getDur().commitIfNeeded();
+        }
+
+        State::State( const Config& c ) : _config( c ), _size(0), _numEmits(0) {
+            _temp.reset( new InMemory() );
+            _onDisk = _config.outType != Config::INMEMORY;
+        }
+
+        bool State::sourceExists() {
+            return _db.exists( _config.ns );
+        }
+
+        long long State::incomingDocuments() {
+            return _db.count( _config.ns , _config.filter , QueryOption_SlaveOk , (unsigned) _config.limit );
+        }
+
+        State::~State() {
+            if ( _onDisk ) {
+                try {
+                    _db.dropCollection( _config.tempLong );
+                    _db.dropCollection( _config.incLong );
+                }
+                catch ( std::exception& e ) {
+                    error() << "couldn't cleanup after map reduce: " << e.what() << endl;
+                }
+            }
+        }
+
+        /**
+         * Initialize the mapreduce operation, creating the inc collection
+         */
+        void State::init() {
+            // setup js
+            _scope.reset(globalScriptEngine->getPooledScope( _config.dbname ).release() );
+            _scope->localConnect( _config.dbname.c_str() );
+
+            if ( ! _config.scopeSetup.isEmpty() )
+                _scope->init( &_config.scopeSetup );
+
+            _config.mapper->init( this );
+            _config.reducer->init( this );
+            if ( _config.finalizer )
+                _config.finalizer->init( this );
+
+            _scope->injectNative( "emit" , fast_emit );
+
+            if ( _onDisk ) {
+                // clear temp collections
+                _db.dropCollection( _config.tempLong );
+                _db.dropCollection( _config.incLong );
+
+                // create the inc collection and make sure we have index on "0" key
+                {
+                    writelock l( _config.incLong );
+                    Client::Context ctx( _config.incLong );
+                    string err;
+                    if ( ! userCreateNS( _config.incLong.c_str() , BSON( "autoIndexId" << 0 ) , err , false ) ) {
+                        uasserted( 13631 , str::stream() << "userCreateNS failed for mr incLong ns: " << _config.incLong << " err: " << err );
+                    }
+                }
+
+                BSONObj sortKey = BSON( "0" << 1 );
+                _db.ensureIndex( _config.incLong , sortKey );
+
+            }
+
+        }
+
+        /**
+         * Applies last reduce and finalize on a list of tuples (key, val)
+         * Inserts single result {_id: key, value: val} into temp collection
+         */
+        void State::finalReduce( BSONList& values ) {
+            if ( !_onDisk || values.size() == 0 )
+                return;
+
+            BSONObj res = _config.reducer->finalReduce( values , _config.finalizer.get() );
+            insert( _config.tempLong , res );
+        }
+
+        /**
+         * Applies last reduce and finalize.
+         * After calling this method, the temp collection will be completed.
+         * If inline, the results will be in the in memory map
+         */
+        void State::finalReduce( CurOp * op , ProgressMeterHolder& pm ) {
+            if ( ! _onDisk ) {
+                // all data has already been reduced, just finalize
+                if ( _config.finalizer ) {
+                    long size = 0;
+                    for ( InMemory::iterator i=_temp->begin(); i!=_temp->end(); ++i ) {
+                        BSONObj key = i->first;
+                        BSONList& all = i->second;
+
+                        assert( all.size() == 1 );
+
+                        BSONObj res = _config.finalizer->finalize( all[0] );
+
+                        all.clear();
+                        all.push_back( res );
+                        size += res.objsize();
+                    }
+                    _size = size;
+                }
+                return;
+            }
+
+            // use index on "0" to pull sorted data
+            assert( _temp->size() == 0 );
+            BSONObj sortKey = BSON( "0" << 1 );
+            {
+                bool foundIndex = false;
+
+                auto_ptr<DBClientCursor> idx = _db.getIndexes( _config.incLong );
+                while ( idx.get() && idx->more() ) {
+                    BSONObj x = idx->next();
+                    if ( sortKey.woCompare( x["key"].embeddedObject() ) == 0 ) {
+                        foundIndex = true;
+                        break;
+                    }
+                }
+
+                assert( foundIndex );
+            }
+
+            readlock rl( _config.incLong.c_str() );
+            Client::Context ctx( _config.incLong );
+
+            BSONObj prev;
+            BSONList all;
+
+            assert( pm == op->setMessage( "m/r: (3/3) final reduce to collection" , _db.count( _config.incLong, BSONObj(), QueryOption_SlaveOk ) ) );
+
+            shared_ptr<Cursor> temp = bestGuessCursor( _config.incLong.c_str() , BSONObj() , sortKey );
+            auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , _config.incLong.c_str() ) );
+
+            // iterate over all sorted objects
+            while ( cursor->ok() ) {
+                BSONObj o = cursor->current().getOwned();
+                cursor->advance();
+
+                pm.hit();
+
+                if ( o.woSortOrder( prev , sortKey ) == 0 ) {
+                    // object is same as previous, add to array
+                    all.push_back( o );
+                    if ( pm->hits() % 1000 == 0 ) {
+                        if ( ! cursor->yield() ) {
+                            cursor.release();
+                            break;
+                        }
+                        killCurrentOp.checkForInterrupt();
+                    }
+                    continue;
+                }
+
+                ClientCursor::YieldLock yield (cursor.get());
+                // reduce an finalize array
+                finalReduce( all );
+
+                all.clear();
+                prev = o;
+                all.push_back( o );
+
+                if ( ! yield.stillOk() ) {
+                    cursor.release();
+                    break;
+                }
+
+                killCurrentOp.checkForInterrupt();
+            }
+            
+            // we need to release here since we temp release below
+            cursor.release();
+
+            {
+                dbtempreleasecond tl;
+                if ( ! tl.unlocked() )
+                    log( LL_WARNING ) << "map/reduce can't temp release" << endl;
+                // reduce and finalize last array
+                finalReduce( all );
+            }
+
+            pm.finished();
+        }
+
+        /**
+         * Attempts to reduce objects in the memory map.
+         * A new memory map will be created to hold the results.
+         * If applicable, objects with unique key may be dumped to inc collection.
+         * Input and output objects are both {"0": key, "1": val}
+         */
+        void State::reduceInMemory() {
+
+            auto_ptr<InMemory> n( new InMemory() ); // for new data
+            long nSize = 0;
+            long dupCount = 0;
+
+            for ( InMemory::iterator i=_temp->begin(); i!=_temp->end(); ++i ) {
+                BSONObj key = i->first;
+                BSONList& all = i->second;
+
+                if ( all.size() == 1 ) {
+                    // only 1 value for this key
+                    if ( _onDisk ) {
+                        // this key has low cardinality, so just write to collection
+                        writelock l(_config.incLong);
+                        Client::Context ctx(_config.incLong.c_str());
+                        _insertToInc( *(all.begin()) );
+                    }
+                    else {
+                        // add to new map
+                        _add( n.get() , all[0] , nSize, dupCount );
+                    }
+                }
+                else if ( all.size() > 1 ) {
+                    // several values, reduce and add to map
+                    BSONObj res = _config.reducer->reduce( all );
+                    _add( n.get() , res , nSize, dupCount );
+                }
+            }
+
+            // swap maps
+            _temp.reset( n.release() );
+            _size = nSize;
+            _dupCount = dupCount;
+        }
+
+        /**
+         * Dumps the entire in memory map to the inc collection.
+         */
+        void State::dumpToInc() {
+            if ( ! _onDisk )
+                return;
+
+            writelock l(_config.incLong);
+            Client::Context ctx(_config.incLong);
+
+            for ( InMemory::iterator i=_temp->begin(); i!=_temp->end(); i++ ) {
+                BSONList& all = i->second;
+                if ( all.size() < 1 )
+                    continue;
+
+                for ( BSONList::iterator j=all.begin(); j!=all.end(); j++ )
+                    _insertToInc( *j );
+            }
+            _temp->clear();
+            _size = 0;
+
+        }
+
+        /**
+         * Adds object to in memory map
+         */
+        void State::emit( const BSONObj& a ) {
+            _numEmits++;
+            _add( _temp.get() , a , _size, _dupCount );
+        }
+
+        void State::_add( InMemory* im, const BSONObj& a , long& size, long& dupCount ) {
+            BSONList& all = (*im)[a];
+            all.push_back( a );
+            size += a.objsize() + 16;
+            if (all.size() > 1)
+            	++dupCount;
+        }
+
+        /**
+         * this method checks the size of in memory map and potentially flushes to disk
+         */
+        void State::checkSize() {
+            if ( _size < 1024 * 50 )
+                return;
+
+            // attempt to reduce in memory map, if we've seen duplicates
+            if ( _dupCount > 0) {
+				long before = _size;
+				reduceInMemory();
+				log(1) << "  mr: did reduceInMemory  " << before << " -->> " << _size << endl;
+            }
+
+            if ( ! _onDisk || _size < 1024 * 100 )
+                return;
+
+            dumpToInc();
+            log(1) << "  mr: dumping to db" << endl;
+        }
+
+        boost::thread_specific_ptr<State*> _tl;
+
+        /**
+         * emit that will be called by js function
+         */
+        BSONObj fast_emit( const BSONObj& args ) {
+            uassert( 10077 , "fast_emit takes 2 args" , args.nFields() == 2 );
+            uassert( 13069 , "an emit can't be more than half max bson size" , args.objsize() < ( BSONObjMaxUserSize / 2 ) );
+            (*_tl)->emit( args );
+            return BSONObj();
+        }
+
+        /**
+         * This class represents a map/reduce command executed on a single server
+         */
+        class MapReduceCommand : public Command {
+        public:
+            MapReduceCommand() : Command("mapReduce", false, "mapreduce") {}
+            virtual bool slaveOk() const { return !replSet; }
+            virtual bool slaveOverrideOk() { return true; }
+
+            virtual void help( stringstream &help ) const {
+                help << "Run a map/reduce operation on the server.\n";
+                help << "Note this is used for aggregation, not querying, in MongoDB.\n";
+                help << "http://www.mongodb.org/display/DOCS/MapReduce";
+            }
+            virtual LockType locktype() const { return NONE; }
+            bool run(const string& dbname , BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+                Timer t;
+                Client::GodScope cg;
+                Client& client = cc();
+                CurOp * op = client.curop();
+
+                Config config( dbname , cmd );
+
+                log(1) << "mr ns: " << config.ns << endl;
+
+                bool shouldHaveData = false;
+
+                long long num = 0;
+                long long inReduce = 0;
+
+                BSONObjBuilder countsBuilder;
+                BSONObjBuilder timingBuilder;
+                State state( config );
+
+                if ( ! state.sourceExists() ) {
+                    errmsg = "ns doesn't exist";
+                    return false;
+                }
+
+                if (replSet && state.isOnDisk()) {
+                    // this means that it will be doing a write operation, make sure we are on Master
+                    // ideally this check should be in slaveOk(), but at that point config is not known
+                    if (!isMaster(dbname.c_str())) {
+                        errmsg = "not master";
+                        return false;
+                    }
+                }
+
+                try {
+                    state.init();
+
+                    {
+                        State** s = new State*();
+                        s[0] = &state;
+                        _tl.reset( s );
+                    }
+
+                    wassert( config.limit < 0x4000000 ); // see case on next line to 32 bit unsigned
+                    ProgressMeterHolder pm( op->setMessage( "m/r: (1/3) emit phase" , state.incomingDocuments() ) );
+                    long long mapTime = 0;
+                    {
+                        readlock lock( config.ns );
+                        Client::Context ctx( config.ns );
+
+                        ShardChunkManagerPtr chunkManager;
+                        if ( shardingState.needShardChunkManager( config.ns ) ) {
+                            chunkManager = shardingState.getShardChunkManager( config.ns );
+                        }
+
+                        // obtain cursor on data to apply mr to, sorted
+                        shared_ptr<Cursor> temp = bestGuessCursor( config.ns.c_str(), config.filter, config.sort );
+                        auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , config.ns.c_str() ) );
+
+                        Timer mt;
+                        // go through each doc
+                        while ( cursor->ok() ) {
+                            // make sure we dont process duplicates in case data gets moved around during map
+                            if ( cursor->currentIsDup() ) {
+                                cursor->advance();
+                                continue;
+                            }
+
+                            if ( ! cursor->currentMatches() ) {
+                                cursor->advance();
+                                continue;
+                            }
+
+                            BSONObj o = cursor->current();
+                            cursor->advance();
+
+                            // check to see if this is a new object we don't own yet
+                            // because of a chunk migration
+                            if ( chunkManager && ! chunkManager->belongsToMe( o ) )
+                                continue;
+
+                            // do map
+                            if ( config.verbose ) mt.reset();
+                            config.mapper->map( o );
+                            if ( config.verbose ) mapTime += mt.micros();
+
+                            num++;
+                            if ( num % 100 == 0 ) {
+                                // try to yield lock regularly
+                                ClientCursor::YieldLock yield (cursor.get());
+                                Timer t;
+                                // check if map needs to be dumped to disk
+                                state.checkSize();
+                                inReduce += t.micros();
+
+                                if ( ! yield.stillOk() ) {
+                                    cursor.release();
+                                    break;
+                                }
+
+                                killCurrentOp.checkForInterrupt();
+                            }
+                            pm.hit();
+
+                            if ( config.limit && num >= config.limit )
+                                break;
+                        }
+                    }
+                    pm.finished();
+
+                    killCurrentOp.checkForInterrupt();
+                    // update counters
+                    countsBuilder.appendNumber( "input" , num );
+                    countsBuilder.appendNumber( "emit" , state.numEmits() );
+                    if ( state.numEmits() )
+                        shouldHaveData = true;
+
+                    timingBuilder.append( "mapTime" , mapTime / 1000 );
+                    timingBuilder.append( "emitLoop" , t.millis() );
+
+                    op->setMessage( "m/r: (2/3) final reduce in memory" );
+                    // do reduce in memory
+                    // this will be the last reduce needed for inline mode
+                    state.reduceInMemory();
+                    // if not inline: dump the in memory map to inc collection, all data is on disk
+                    state.dumpToInc();
+                    state.prepTempCollection();
+                    // final reduce
+                    state.finalReduce( op , pm );
+
+                    _tl.reset();
+                }
+                catch ( ... ) {
+                    log() << "mr failed, removing collection" << endl;
+                    throw;
+                }
+
+                long long finalCount = state.postProcessCollection();
+                state.appendResults( result );
+
+                timingBuilder.append( "total" , t.millis() );
+
+                if (!config.outDB.empty()) {
+                    BSONObjBuilder loc;
+                    if ( !config.outDB.empty())
+                        loc.append( "db" , config.outDB );
+                    if ( !config.finalShort.empty() )
+                        loc.append( "collection" , config.finalShort );
+                    result.append("result", loc.obj());
+                }
+                else {
+                    if ( !config.finalShort.empty() )
+                        result.append( "result" , config.finalShort );
+                }
+                result.append( "timeMillis" , t.millis() );
+                countsBuilder.appendNumber( "output" , finalCount );
+                if ( config.verbose ) result.append( "timing" , timingBuilder.obj() );
+                result.append( "counts" , countsBuilder.obj() );
+
+                if ( finalCount == 0 && shouldHaveData ) {
+                    result.append( "cmd" , cmd );
+                    errmsg = "there were emits but no data!";
+                    return false;
+                }
+
+                return true;
+            }
+
+        } mapReduceCommand;
+
+        /**
+         * This class represents a map/reduce command executed on the output server of a sharded env
+         */
+        class MapReduceFinishCommand : public Command {
+        public:
+            MapReduceFinishCommand() : Command( "mapreduce.shardedfinish" ) {}
+            virtual bool slaveOk() const { return !replSet; }
+            virtual bool slaveOverrideOk() { return true; }
+
+            virtual LockType locktype() const { return NONE; }
+            bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+                string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe();
+
+                Config config( dbname , cmdObj.firstElement().embeddedObjectUserCheck() );
+                config.incLong = config.tempLong;
+
+                set<ServerAndQuery> servers;
+
+                BSONObjBuilder shardCounts;
+                map<string,long long> counts;
+
+                BSONObj shards = cmdObj["shards"].embeddedObjectUserCheck();
+                vector< auto_ptr<DBClientCursor> > shardCursors;
+
+                {
+                    // parse per shard results
+                    BSONObjIterator i( shards );
+                    while ( i.more() ) {
+                        BSONElement e = i.next();
+                        string shard = e.fieldName();
+
+                        BSONObj res = e.embeddedObjectUserCheck();
+
+                        uassert( 10078 ,  "something bad happened" , shardedOutputCollection == res["result"].valuestrsafe() );
+                        servers.insert( shard );
+                        shardCounts.appendAs( res["counts"] , shard );
+
+                        BSONObjIterator j( res["counts"].embeddedObjectUserCheck() );
+                        while ( j.more() ) {
+                            BSONElement temp = j.next();
+                            counts[temp.fieldName()] += temp.numberLong();
+                        }
+
+                    }
+
+                }
+
+                State state(config);
+                state.prepTempCollection();
+
+                {
+                    // reduce from each stream
+
+                    BSONObj sortKey = BSON( "_id" << 1 );
+
+                    ParallelSortClusteredCursor cursor( servers , dbname + "." + shardedOutputCollection ,
+                                                        Query().sort( sortKey ) );
+                    cursor.init();
+                    state.init();
+
+                    BSONList values;
+                    if (!config.outDB.empty()) {
+                        BSONObjBuilder loc;
+                        if ( !config.outDB.empty())
+                            loc.append( "db" , config.outDB );
+                        if ( !config.finalShort.empty() )
+                            loc.append( "collection" , config.finalShort );
+                        result.append("result", loc.obj());
+                    }
+                    else {
+                        if ( !config.finalShort.empty() )
+                            result.append( "result" , config.finalShort );
+                    }
+
+                    while ( cursor.more() ) {
+                        BSONObj t = cursor.next().getOwned();
+
+                        if ( values.size() == 0 ) {
+                            values.push_back( t );
+                            continue;
+                        }
+
+                        if ( t.woSortOrder( *(values.begin()) , sortKey ) == 0 ) {
+                            values.push_back( t );
+                            continue;
+                        }
+
+
+                        state.emit( config.reducer->finalReduce( values , config.finalizer.get() ) );
+                        values.clear();
+                        values.push_back( t );
+                    }
+
+                    if ( values.size() )
+                        state.emit( config.reducer->finalReduce( values , config.finalizer.get() ) );
+                }
+
+
+                state.dumpToInc();
+                state.postProcessCollection();
+                state.appendResults( result );
+
+                for ( set<ServerAndQuery>::iterator i=servers.begin(); i!=servers.end(); i++ ) {
+                    ScopedDbConnection conn( i->_server );
+                    conn->dropCollection( dbname + "." + shardedOutputCollection );
+                    conn.done();
+                }
+
+                result.append( "shardCounts" , shardCounts.obj() );
+
+                {
+                    BSONObjBuilder c;
+                    for ( map<string,long long>::iterator i=counts.begin(); i!=counts.end(); i++ ) {
+                        c.append( i->first , i->second );
+                    }
+                    result.append( "counts" , c.obj() );
+                }
+
+                return 1;
+            }
+        } mapReduceFinishCommand;
+
+    }
+
+}
+
diff --git a/db/commands/mr.h b/db/commands/mr.h
new file mode 100644
index 0000000..f505a45
--- /dev/null
+++ b/db/commands/mr.h
@@ -0,0 +1,291 @@
+// mr.h
+
+/**
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "pch.h"
+
+namespace mongo {
+
+    namespace mr {
+
+        typedef vector<BSONObj> BSONList;
+
+        class State;
+
+        // ------------  function interfaces -----------
+
+        class Mapper : boost::noncopyable {
+        public:
+            virtual ~Mapper() {}
+            virtual void init( State * state ) = 0;
+
+            virtual void map( const BSONObj& o ) = 0;
+        };
+
+        class Finalizer : boost::noncopyable {
+        public:
+            virtual ~Finalizer() {}
+            virtual void init( State * state ) = 0;
+
+            /**
+             * this takes a tuple and returns a tuple
+             */
+            virtual BSONObj finalize( const BSONObj& tuple ) = 0;
+        };
+
+        class Reducer : boost::noncopyable {
+        public:
+            virtual ~Reducer() {}
+            virtual void init( State * state ) = 0;
+
+            virtual BSONObj reduce( const BSONList& tuples ) = 0;
+            /** this means its a final reduce, even if there is no finalizer */
+            virtual BSONObj finalReduce( const BSONList& tuples , Finalizer * finalizer ) = 0;
+        };
+
+        // ------------  js function implementations -----------
+
+        /**
+         * used as a holder for Scope and ScriptingFunction
+         * visitor like pattern as Scope is gotten from first access
+         */
+        class JSFunction : boost::noncopyable {
+        public:
+            /**
+             * @param type (map|reduce|finalize)
+             */
+            JSFunction( string type , const BSONElement& e );
+            virtual ~JSFunction() {}
+
+            virtual void init( State * state );
+
+            Scope * scope() const { return _scope; }
+            ScriptingFunction func() const { return _func; }
+
+        private:
+            string _type;
+            string _code; // actual javascript code
+            BSONObj _wantedScope; // this is for CodeWScope
+
+            Scope * _scope; // this is not owned by us, and might be shared
+            ScriptingFunction _func;
+        };
+
+        class JSMapper : public Mapper {
+        public:
+            JSMapper( const BSONElement & code ) : _func( "map" , code ) {}
+            virtual void map( const BSONObj& o );
+            virtual void init( State * state );
+
+        private:
+            JSFunction _func;
+            BSONObj _params;
+        };
+
+        class JSReducer : public Reducer {
+        public:
+            JSReducer( const BSONElement& code ) : _func( "reduce" , code ) {}
+            virtual void init( State * state ) { _func.init( state ); }
+
+            virtual BSONObj reduce( const BSONList& tuples );
+            virtual BSONObj finalReduce( const BSONList& tuples , Finalizer * finalizer );
+
+        private:
+
+            /**
+             * result in "return"
+             * @param key OUT
+             * @param endSizeEstimate OUT
+            */
+            void _reduce( const BSONList& values , BSONObj& key , int& endSizeEstimate );
+
+            JSFunction _func;
+
+        };
+
+        class JSFinalizer : public Finalizer  {
+        public:
+            JSFinalizer( const BSONElement& code ) : _func( "finalize" , code ) {}
+            virtual BSONObj finalize( const BSONObj& o );
+            virtual void init( State * state ) { _func.init( state ); }
+        private:
+            JSFunction _func;
+
+        };
+
+        // -----------------
+
+
+        class TupleKeyCmp {
+        public:
+            TupleKeyCmp() {}
+            bool operator()( const BSONObj &l, const BSONObj &r ) const {
+                return l.firstElement().woCompare( r.firstElement() ) < 0;
+            }
+        };
+
+        typedef map< BSONObj,BSONList,TupleKeyCmp > InMemory; // from key to list of tuples
+
+        /**
+         * holds map/reduce config information
+         */
+        class Config {
+        public:
+            Config( const string& _dbname , const BSONObj& cmdObj );
+
+            string dbname;
+            string ns;
+
+            // options
+            bool verbose;
+
+            // query options
+
+            BSONObj filter;
+            BSONObj sort;
+            long long limit;
+
+            // functions
+
+            scoped_ptr<Mapper> mapper;
+            scoped_ptr<Reducer> reducer;
+            scoped_ptr<Finalizer> finalizer;
+
+            BSONObj mapParams;
+            BSONObj scopeSetup;
+
+            // output tables
+            string incLong;
+            string tempLong;
+
+            string finalShort;
+            string finalLong;
+
+            string outDB;
+
+            enum { REPLACE , // atomically replace the collection
+                   MERGE ,  // merge keys, override dups
+                   REDUCE , // merge keys, reduce dups
+                   INMEMORY // only store in memory, limited in size
+                 } outType;
+
+            static AtomicUInt JOB_NUMBER;
+        }; // end MRsetup
+
+        /**
+         * stores information about intermediate map reduce state
+         * controls flow of data from map->reduce->finalize->output
+         */
+        class State {
+        public:
+            State( const Config& c );
+            ~State();
+
+            void init();
+
+            // ---- prep  -----
+            bool sourceExists();
+
+            long long incomingDocuments();
+
+            // ---- map stage ----
+
+            /**
+             * stages on in in-memory storage
+             */
+            void emit( const BSONObj& a );
+
+            /**
+             * if size is big, run a reduce
+             * if its still big, dump to temp collection
+             */
+            void checkSize();
+
+            /**
+             * run reduce on _temp
+             */
+            void reduceInMemory();
+
+            /**
+             * transfers in memory storage to temp collection
+             */
+            void dumpToInc();
+
+            // ------ reduce stage -----------
+
+            void prepTempCollection();
+
+            void finalReduce( BSONList& values );
+
+            void finalReduce( CurOp * op , ProgressMeterHolder& pm );
+
+            // ------- cleanup/data positioning ----------
+
+            /**
+               @return number objects in collection
+             */
+            long long postProcessCollection();
+
+            /**
+             * if INMEMORY will append
+             * may also append stats or anything else it likes
+             */
+            void appendResults( BSONObjBuilder& b );
+
+            // -------- util ------------
+
+            /**
+             * inserts with correct replication semantics
+             */
+            void insert( const string& ns , BSONObj& o );
+
+            // ------ simple accessors -----
+
+            /** State maintains ownership, do no use past State lifetime */
+            Scope* scope() { return _scope.get(); }
+
+            const Config& config() { return _config; }
+
+            const bool isOnDisk() { return _onDisk; }
+
+            long long numEmits() const { return _numEmits; }
+
+        protected:
+
+            void _insertToInc( BSONObj& o );
+            static void _add( InMemory* im , const BSONObj& a , long& size, long& dupCount );
+
+            scoped_ptr<Scope> _scope;
+            const Config& _config;
+            bool _onDisk; // if the end result of this map reduce is disk or not
+
+            DBDirectClient _db;
+
+            scoped_ptr<InMemory> _temp;
+            long _size; // bytes in _temp
+            long _dupCount; // number of duplicate key entries
+
+            long long _numEmits;
+        };
+
+        BSONObj fast_emit( const BSONObj& args );
+
+    } // end mr namespace
+}
+
+
diff --git a/db/common.cpp b/db/common.cpp
index b7883f5..44bc54d 100644
--- a/db/common.cpp
+++ b/db/common.cpp
@@ -26,4 +26,8 @@ namespace mongo {
     /* we use new here so we don't have to worry about destructor orders at program shutdown */
     MongoMutex &dbMutex( *(new MongoMutex("rw:dbMutex")) );
 
+    MongoMutex::MongoMutex(const char *name) : _m(name) {
+        _remapPrivateViewRequested = false;
+    }
+
 }
diff --git a/db/compact.cpp b/db/compact.cpp
new file mode 100644
index 0000000..6bafd91
--- /dev/null
+++ b/db/compact.cpp
@@ -0,0 +1,199 @@
+/* @file compact.cpp
+   compaction of deleted space in pdfiles (datafiles)
+*/
+
+/* NOTE 6Oct2010 : this file PRELIMINARY, EXPERIMENTAL, NOT DONE, NOT USED YET (not in SConstruct) */
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,b
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "pdfile.h"
+#include "concurrency.h"
+#include "commands.h"
+#include "curop-inl.h"
+#include "../util/concurrency/task.h"
+
+namespace mongo {
+
+    class CompactJob : public task::Task {
+    public:
+        CompactJob(string ns) : _ns(ns) { }
+    private:
+        virtual string name() const { return "compact"; }
+        virtual void doWork();
+        NamespaceDetails * beginBlock();
+        void doBatch();
+        void prep();
+        const string _ns;
+        unsigned long long _nrecords;
+        unsigned long long _ncompacted;
+        DiskLoc _firstExtent;
+    };
+
+    // lock & set context first.  this checks that collection still exists, and that it hasn't
+    // morphed into a capped collection between locks (which is possible)
+    NamespaceDetails * CompactJob::beginBlock() {
+        NamespaceDetails *nsd = nsdetails(_ns.c_str());
+        if( nsd == 0 ) throw "ns no longer present";
+        if( nsd->firstExtent.isNull() )
+            throw "no first extent";
+        if( nsd->capped )
+            throw "capped collection";
+        return nsd;
+    }
+
+    void CompactJob::doBatch() {
+        unsigned n = 0;
+        {
+            /* pre-touch records in a read lock so that paging happens in read not write lock.
+               note we are only touching the records though; if indexes aren't in RAM, they will
+               page later.  So the concept is only partial.
+               */
+            readlock lk;
+            Timer t;
+            Client::Context ctx(_ns);
+            NamespaceDetails *nsd = beginBlock();
+            if( nsd->firstExtent != _firstExtent )  {
+                // TEMP DEV - stop after 1st extent
+                throw "change of first extent";
+            }
+            DiskLoc loc = nsd->firstExtent.ext()->firstRecord;
+            while( !loc.isNull() ) {
+                Record *r = loc.rec();
+                loc = r->getNext(loc);
+                if( ++n >= 100 || (n % 8 == 0 && t.millis() > 50) )
+                    break;
+            }
+        }
+        {
+            writelock lk;
+            Client::Context ctx(_ns);
+            NamespaceDetails *nsd = beginBlock();
+            for( unsigned i = 0; i < n; i++ ) {
+                if( nsd->firstExtent != _firstExtent )  {
+                    // TEMP DEV - stop after 1st extent
+                    throw "change of first extent (or it is now null)";
+                }
+                DiskLoc loc = nsd->firstExtent.ext()->firstRecord;
+                Record *rec = loc.rec();
+                BSONObj o = loc.obj().getOwned(); // todo: inefficient, double mem copy...
+                try {
+                    theDataFileMgr.deleteRecord(_ns.c_str(), rec, loc, false);
+                }
+                catch(DBException&) { throw "error deleting record"; }
+                try {
+                    theDataFileMgr.insertNoReturnVal(_ns.c_str(), o);
+                }
+                catch(DBException&) {
+                    /* todo: save the record somehow??? try again with 'avoid' logic? */
+                    log() << "compact: error re-inserting record ns:" << _ns << " n:" << _nrecords << " _id:" << o["_id"].toString() << endl;
+                    throw "error re-inserting record";
+                }
+                ++_ncompacted;
+                if( killCurrentOp.globalInterruptCheck() )
+                    throw "interrupted";
+            }
+        }
+    }
+
+    void CompactJob::prep() {
+        readlock lk;
+        Client::Context ctx(_ns);
+        NamespaceDetails *nsd = beginBlock();
+        DiskLoc L = nsd->firstExtent;
+        assert( !L.isNull() );
+        _firstExtent = L;
+        _nrecords = nsd->stats.nrecords;
+        _ncompacted = 0;
+    }
+
+    static mutex m("compact");
+    static volatile bool running;
+
+    void CompactJob::doWork() {
+        Client::initThread("compact");
+        cc().curop()->reset();
+        cc().curop()->setNS(_ns.c_str());
+        cc().curop()->markCommand();
+        sleepsecs(60);
+        try {
+            prep();
+            while( _ncompacted < _nrecords )
+                doBatch();
+        }
+        catch(const char *p) {
+            log() << "info: exception compact " << p << endl;
+        }
+        catch(...) {
+            log() << "info: exception compact" << endl;
+        }
+        mongo::running = false;
+        cc().shutdown();
+    }
+
+    /* --- CompactCmd --- */
+
+    class CompactCmd : public Command {
+    public:
+        virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+            string coll = cmdObj.firstElement().valuestr();
+            if( coll.empty() || db.empty() ) {
+                errmsg = "no collection name specified";
+                return false;
+            }
+            string ns = db + '.' + coll;
+            assert( isANormalNSName(ns.c_str()) );
+            {
+                readlock lk;
+                Client::Context ctx(ns);
+                if( nsdetails(ns.c_str()) == 0 ) {
+                    errmsg = "namespace " + ns + " does not exist";
+                    return false;
+                }
+            }
+            {
+                scoped_lock lk(m);
+                if( running ) {
+                    errmsg = "a compaction is already running";
+                    return false;
+                }
+                running = true;
+                task::fork( new CompactJob(ns) );
+                return true;
+            }
+            errmsg = "not done";
+            return false;
+        }
+
+        virtual LockType locktype() const { return NONE; }
+        virtual bool adminOnly() const { return false; }
+        virtual bool slaveOk() const { return true; }
+        virtual bool logTheOp() { return false; }
+        virtual void help( stringstream& help ) const {
+            help << "compact / defragment a collection in the background, slowly, attempting to minimize disruptions to other operations\n"
+                 "{ compact : <collection> }";
+        }
+        virtual bool requiresAuth() { return true; }
+
+        /** @param webUI expose the command in the web ui as localhost:28017/<name>
+            @param oldName an optional old, deprecated name for the command
+        */
+        CompactCmd() : Command("compact") { }
+    };
+    static CompactCmd compactCmd;
+
+}
diff --git a/db/concurrency.h b/db/concurrency.h
index 9b91b0f..39cd853 100644
--- a/db/concurrency.h
+++ b/db/concurrency.h
@@ -1,3 +1,5 @@
+// @file concurrency.h
+
 /*
  *    Copyright (C) 2010 10gen Inc.
  *
@@ -14,9 +16,7 @@
  *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* concurrency.h
-
-   mongod concurrency rules & notes will be placed here.
+/*mongod concurrency rules & notes will be placed here.
 
    Mutex heirarchy (1 = "leaf")
      name                   level
@@ -31,19 +31,22 @@
 
 #include "../util/concurrency/rwlock.h"
 #include "../util/mmap.h"
+#include "../util/time_support.h"
 
 namespace mongo {
 
     string sayClientState();
     bool haveClient();
-    
-    void curopWaitingForLock( int type );
-    void curopGotLock();
+
+    class Client;
+    Client* curopWaitingForLock( int type );
+    void curopGotLock(Client*);
 
     /* mutex time stats */
     class MutexInfo {
-        unsigned long long start, enter, timeLocked; // all in microseconds
+        unsigned long long enter, timeLocked; // microseconds
         int locked;
+        unsigned long long start; // last as we touch this least often
 
     public:
         MutexInfo() : timeLocked(0) , locked(0) {
@@ -61,215 +64,53 @@ namespace mongo {
             if ( locked == 0 )
                 timeLocked += curTimeMicros64() - enter;
         }
-        int isLocked() const {
-            return locked;
-        }
+        int isLocked() const { return locked; }
         void getTimingInfo(unsigned long long &s, unsigned long long &tl) const {
             s = start;
             tl = timeLocked;
         }
-        unsigned long long getTimeLocked() const {
-            return timeLocked;
-        }
+        unsigned long long getTimeLocked() const { return timeLocked; }
     };
 
-    class MongoMutex {
-        MutexInfo _minfo;
-        RWLock _m;
-        ThreadLocalValue<int> _state;
-
-        /* we use a separate TLS value for releasedEarly - that is ok as 
-           our normal/common code path, we never even touch it.
-        */
-        ThreadLocalValue<bool> _releasedEarly;
-    public:
-        MongoMutex(const char * name) : _m(name) { }
-
-        /**
-         * @return
-         *    > 0  write lock
-         *    = 0  no lock
-         *    < 0  read lock
-         */
-        int getState() { return _state.get(); }
-        bool isWriteLocked() { return getState() > 0; }
-        void assertWriteLocked() { 
-            assert( getState() > 0 ); 
-            DEV assert( !_releasedEarly.get() );
-        }
-        bool atLeastReadLocked() { return _state.get() != 0; }
-        void assertAtLeastReadLocked() { assert(atLeastReadLocked()); }
-
-        bool _checkWriteLockAlready(){
-            //DEV cout << "LOCK" << endl;
-            DEV assert( haveClient() );
-                
-            int s = _state.get();
-            if( s > 0 ) {
-                _state.set(s+1);
-                return true;
-            }
-
-            massert( 10293 , (string)"internal error: locks are not upgradeable: " + sayClientState() , s == 0 );
-
-            return false;
-        }
-
-        void lock() { 
-            if ( _checkWriteLockAlready() )
-                return;
-            
-            _state.set(1);
-
-            curopWaitingForLock( 1 );
-            _m.lock(); 
-            curopGotLock();
-
-            _minfo.entered();
-
-            MongoFile::lockAll();
-        }
-
-        bool lock_try( int millis ) { 
-            if ( _checkWriteLockAlready() )
-                return true;
-
-            curopWaitingForLock( 1 );
-            bool got = _m.lock_try( millis ); 
-            curopGotLock();
-            
-            if ( got ){
-                _minfo.entered();
-                _state.set(1);
-                MongoFile::lockAll();
-            }                
-            
-            return got;
-        }
-
-
-        void unlock() { 
-            //DEV cout << "UNLOCK" << endl;
-            int s = _state.get();
-            if( s > 1 ) { 
-                _state.set(s-1);
-                return;
-            }
-            if( s != 1 ) { 
-                if( _releasedEarly.get() ) { 
-                    _releasedEarly.set(false);
-                    return;
-                }
-                massert( 12599, "internal error: attempt to unlock when wasn't in a write lock", false);
-            }
-
-            MongoFile::unlockAll();
-
-            _state.set(0);
-            _minfo.leaving();
-            _m.unlock(); 
-        }
-
-        /* unlock (write lock), and when unlock() is called later, 
-           be smart then and don't unlock it again.
-           */
-        void releaseEarly() {
-            assert( getState() == 1 ); // must not be recursive
-            assert( !_releasedEarly.get() );
-            _releasedEarly.set(true);
-            unlock();
-        }
-
-        void lock_shared() { 
-            //DEV cout << " LOCKSHARED" << endl;
-            int s = _state.get();
-            if( s ) {
-                if( s > 0 ) { 
-                    // already in write lock - just be recursive and stay write locked
-                    _state.set(s+1);
-                    return;
-                }
-                else { 
-                    // already in read lock - recurse
-                    _state.set(s-1);
-                    return;
-                }
-            }
-            _state.set(-1);
-            curopWaitingForLock( -1 );
-            _m.lock_shared(); 
-            curopGotLock();
-        }
-        
-        bool lock_shared_try( int millis ) {
-            int s = _state.get();
-            if ( s ){
-                // we already have a lock, so no need to try
-                lock_shared();
-                return true;
-            }
+}
 
-            bool got = _m.lock_shared_try( millis );
-            if ( got )
-                _state.set(-1);
-            return got;
-        }
-        
-        void unlock_shared() { 
-            //DEV cout << " UNLOCKSHARED" << endl;
-            int s = _state.get();
-            if( s > 0 ) { 
-                assert( s > 1 ); /* we must have done a lock write first to have s > 1 */
-                _state.set(s-1);
-                return;
-            }
-            if( s < -1 ) { 
-                _state.set(s+1);
-                return;
-            }
-            assert( s == -1 );
-            _state.set(0);
-            _m.unlock_shared(); 
-        }
-        
-        MutexInfo& info() { return _minfo; }
-    };
+#include "mongomutex.h"
 
-    extern MongoMutex &dbMutex;
+namespace mongo {
 
     inline void dbunlocking_write() { }
     inline void dbunlocking_read() { }
 
     struct writelock {
-        writelock(const string& ns) {
-            dbMutex.lock();
-        }
-        ~writelock() { 
+        writelock() { dbMutex.lock(); }
+        writelock(const string& ns) { dbMutex.lock(); }
+        ~writelock() {
             DESTRUCTOR_GUARD(
                 dbunlocking_write();
                 dbMutex.unlock();
             );
         }
     };
-    
+
     struct readlock {
         readlock(const string& ns) {
             dbMutex.lock_shared();
         }
-        ~readlock() { 
+        readlock() { dbMutex.lock_shared(); }
+        ~readlock() {
             DESTRUCTOR_GUARD(
                 dbunlocking_read();
                 dbMutex.unlock_shared();
             );
         }
-    };	
+    };
 
     struct readlocktry {
-        readlocktry( const string&ns , int tryms ){
+        readlocktry( const string&ns , int tryms ) {
             _got = dbMutex.lock_shared_try( tryms );
         }
         ~readlocktry() {
-            if ( _got ){
+            if ( _got ) {
                 dbunlocking_read();
                 dbMutex.unlock_shared();
             }
@@ -280,11 +121,11 @@ namespace mongo {
     };
 
     struct writelocktry {
-        writelocktry( const string&ns , int tryms ){
+        writelocktry( const string&ns , int tryms ) {
             _got = dbMutex.lock_try( tryms );
         }
         ~writelocktry() {
-            if ( _got ){
+            if ( _got ) {
                 dbunlocking_read();
                 dbMutex.unlock();
             }
@@ -294,10 +135,10 @@ namespace mongo {
         bool _got;
     };
 
-    struct readlocktryassert : public readlocktry { 
-        readlocktryassert(const string& ns, int tryms) : 
-          readlocktry(ns,tryms) { 
-              uassert(13142, "timeout getting readlock", got());
+    struct readlocktryassert : public readlocktry {
+        readlocktryassert(const string& ns, int tryms) :
+            readlocktry(ns,tryms) {
+            uassert(13142, "timeout getting readlock", got());
         }
     };
 
@@ -305,12 +146,12 @@ namespace mongo {
         if you have a write lock, that's ok too.
     */
     struct atleastreadlock {
-        atleastreadlock( const string& ns ){
+        atleastreadlock( const string& ns ) {
             _prev = dbMutex.getState();
             if ( _prev == 0 )
                 dbMutex.lock_shared();
         }
-        ~atleastreadlock(){
+        ~atleastreadlock() {
             if ( _prev == 0 )
                 dbMutex.unlock_shared();
         }
@@ -318,6 +159,9 @@ namespace mongo {
         int _prev;
     };
 
+    /* parameterized choice of read or write locking
+       use readlock and writelock instead of this when statically known which you want
+    */
     class mongolock {
         bool _writelock;
     public:
@@ -328,27 +172,28 @@ namespace mongo {
             else
                 dbMutex.lock_shared();
         }
-        ~mongolock() { 
+        ~mongolock() {
             DESTRUCTOR_GUARD(
-                if( _writelock ) { 
-                    dbunlocking_write();
-                    dbMutex.unlock();
-                } else {
-                    dbunlocking_read();
-                    dbMutex.unlock_shared();
-                }
+            if( _writelock ) {
+            dbunlocking_write();
+                dbMutex.unlock();
+            }
+            else {
+                dbunlocking_read();
+                dbMutex.unlock_shared();
+            }
             );
         }
         /* this unlocks, does NOT upgrade. that works for our current usage */
         void releaseAndWriteLock();
     };
-    
-    /* use writelock and readlock instead */
+
+    /* deprecated - use writelock and readlock instead */
     struct dblock : public writelock {
         dblock() : writelock("") { }
     };
 
-    // eliminate
+    // eliminate this - we should just type "dbMutex.assertWriteLocked();" instead
     inline void assertInWriteLock() { dbMutex.assertWriteLocked(); }
 
 }
diff --git a/db/curop-inl.h b/db/curop-inl.h
new file mode 100644
index 0000000..21d6f0a
--- /dev/null
+++ b/db/curop-inl.h
@@ -0,0 +1,42 @@
+// @file curop-inl.h
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "curop.h"
+
+namespace mongo {
+
+    // todo : move more here
+
+    inline CurOp::CurOp( Client * client , CurOp * wrapped ) {
+        _client = client;
+        _wrapped = wrapped;
+        if ( _wrapped )
+            _client->_curOp = this;
+        _start = _checkpoint = 0;
+        _active = false;
+        _reset();
+        _op = 0;
+        // These addresses should never be written to again.  The zeroes are
+        // placed here as a precaution because currentOp may be accessed
+        // without the db mutex.
+        memset(_ns, 0, sizeof(_ns));
+    }
+
+}
diff --git a/db/curop.h b/db/curop.h
index bf06a69..c6e949b 100644
--- a/db/curop.h
+++ b/db/curop.h
@@ -1,4 +1,5 @@
-// curop.h
+// @file curop.h
+
 /*
  *    Copyright (C) 2010 10gen Inc.
  *
@@ -18,152 +19,188 @@
 
 #pragma once
 
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "client.h"
 #include "../bson/util/atomic_int.h"
+#include "../util/concurrency/spin_lock.h"
+#include "../util/time_support.h"
 #include "db.h"
+#include "../scripting/engine.h"
 
-namespace mongo { 
+namespace mongo {
 
     /* lifespan is different than CurOp because of recursives with DBDirectClient */
     class OpDebug {
     public:
         StringBuilder str;
-        
-        void reset(){
-            str.reset();
-        }
+        void reset() { str.reset(); }
     };
-    
-    /* Current operation (for the current Client).
-       an embedded member of Client class, and typically used from within the mutex there. */
-    class CurOp : boost::noncopyable {
-        static AtomicUInt _nextOpNum;
+
+    /**
+     * stores a copy of a bson obj in a fixed size buffer
+     * if its too big for the buffer, says "too big"
+     * useful for keeping a copy around indefinitely without wasting a lot of space or doing malloc
+     */
+    class CachedBSONObj {
+    public:
+        enum { TOO_BIG_SENTINEL = 1 } ;
         static BSONObj _tooBig; // { $msg : "query not recording (too large)" }
-        
-        Client * _client;
-        CurOp * _wrapped;
 
-        unsigned long long _start;
-        unsigned long long _checkpoint;
-        unsigned long long _end;
+        CachedBSONObj() {
+            _size = (int*)_buf;
+            reset();
+        }
 
-        bool _active;
-        int _op;
-        bool _command;
-        int _lockType; // see concurrency.h for values
-        bool _waitingForLock;
-        int _dbprofile; // 0=off, 1=slow, 2=all
-        AtomicUInt _opNum;
-        char _ns[Namespace::MaxNsLen+2];
-        struct SockAddr _remote;
-        char _queryBuf[256];
-        
-        void resetQuery(int x=0) { *((int *)_queryBuf) = x; }
-        
-        OpDebug _debug;
-        
-        ThreadSafeString _message;
-        ProgressMeter _progressMeter;
+        void reset( int sz = 0 ) {
+            _lock.lock();
+            _reset( sz );
+            _lock.unlock();
+        }
+
+        void set( const BSONObj& o ) {
+            _lock.lock();
+            try {
+                int sz = o.objsize();
+
+                if ( sz > (int) sizeof(_buf) ) {
+                    _reset(TOO_BIG_SENTINEL);
+                }
+                else {
+                    memcpy(_buf, o.objdata(), sz );
+                }
+
+                _lock.unlock();
+            }
+            catch ( ... ) {
+                _lock.unlock();
+                throw;
+            }
 
-        void _reset(){
-            _command = false;
-            _lockType = 0;
-            _dbprofile = 0;
-            _end = 0;
-            _waitingForLock = false;
-            _message = "";
-            _progressMeter.finished();
         }
 
-        void setNS(const char *ns) {
-            strncpy(_ns, ns, Namespace::MaxNsLen);
+        int size() const { return *_size; }
+        bool have() const { return size() > 0; }
+
+        BSONObj get() {
+            _lock.lock();
+            BSONObj o;
+            try {
+                o = _get();
+                _lock.unlock();
+            }
+            catch ( ... ) {
+                _lock.unlock();
+                throw;
+            }
+            return o;
+        }
+
+        void append( BSONObjBuilder& b , const StringData& name ) {
+            _lock.lock();
+            try {
+                BSONObj temp = _get();
+                b.append( name , temp );
+                _lock.unlock();
+            }
+            catch ( ... ) {
+                _lock.unlock();
+                throw;
+            }
         }
 
+    private:
+        /** you have to be locked when you call this */
+        BSONObj _get() {
+            int sz = size();
+            if ( sz == 0 )
+                return BSONObj();
+            if ( sz == TOO_BIG_SENTINEL )
+                return _tooBig;
+            return BSONObj( _buf ).copy();
+        }
+
+        /** you have to be locked when you call this */
+        void _reset( int sz ) { _size[0] = sz; }
+
+        SpinLock _lock;
+        int * _size;
+        char _buf[512];
+    };
+
+    /* Current operation (for the current Client).
+       an embedded member of Client class, and typically used from within the mutex there.
+    */
+    class CurOp : boost::noncopyable {
     public:
-        
-        int querySize() const { return *((int *) _queryBuf); }
-        bool haveQuery() const { return querySize() != 0; }
+        CurOp( Client * client , CurOp * wrapped = 0 );
+        ~CurOp();
 
-        BSONObj query( bool threadSafe = false);
+        bool haveQuery() const { return _query.have(); }
+        BSONObj query() { return _query.get();  }
 
-        void ensureStarted(){
+        void ensureStarted() {
             if ( _start == 0 )
-                _start = _checkpoint = curTimeMicros64();            
+                _start = _checkpoint = curTimeMicros64();
         }
-        void enter( Client::Context * context ){
+        void enter( Client::Context * context ) {
             ensureStarted();
             setNS( context->ns() );
             if ( context->_db && context->_db->profile > _dbprofile )
                 _dbprofile = context->_db->profile;
         }
 
-        void leave( Client::Context * context ){
+        void leave( Client::Context * context ) {
             unsigned long long now = curTimeMicros64();
             Top::global.record( _ns , _op , _lockType , now - _checkpoint , _command );
             _checkpoint = now;
         }
 
-        void reset(){
+        void reset() {
             _reset();
             _start = _checkpoint = 0;
             _active = true;
             _opNum = _nextOpNum++;
             _ns[0] = '?'; // just in case not set later
             _debug.reset();
-            resetQuery();            
+            _query.reset();
         }
-        
+
         void reset( const SockAddr & remote, int op ) {
             reset();
             _remote = remote;
             _op = op;
         }
-        
-        void markCommand(){
-            _command = true;
-        }
 
-        void waitingForLock( int type ){
+        void markCommand() { _command = true; }
+
+        void waitingForLock( int type ) {
             _waitingForLock = true;
             if ( type > 0 )
                 _lockType = 1;
             else
                 _lockType = -1;
         }
-        void gotLock(){
-            _waitingForLock = false;
-        }
-
-        OpDebug& debug(){
-            return _debug;
-        }
-        
-        int profileLevel() const {
-            return _dbprofile;
-        }
-
-        const char * getNS() const {
-            return _ns;
-        }
+        void gotLock()             { _waitingForLock = false; }
+        OpDebug& debug()           { return _debug; }
+        int profileLevel() const   { return _dbprofile; }
+        const char * getNS() const { return _ns; }
 
         bool shouldDBProfile( int ms ) const {
             if ( _dbprofile <= 0 )
                 return false;
-            
+
             return _dbprofile >= 2 || ms >= cmdLine.slowMS;
         }
-        
+
         AtomicUInt opNum() const { return _opNum; }
 
         /** if this op is running */
         bool active() const { return _active; }
-        
+
         int getLockType() const { return _lockType; }
-        bool isWaitingForLock() const { return _waitingForLock; } 
+        bool isWaitingForLock() const { return _waitingForLock; }
         int getOp() const { return _op; }
-        
-        
+
         /** micros */
         unsigned long long startTime() {
             ensureStarted();
@@ -174,75 +211,41 @@ namespace mongo {
             _active = false;
             _end = curTimeMicros64();
         }
-        
+
         unsigned long long totalTimeMicros() {
             massert( 12601 , "CurOp not marked done yet" , ! _active );
             return _end - startTime();
         }
 
-        int totalTimeMillis() {
-            return (int) (totalTimeMicros() / 1000);
-        }
+        int totalTimeMillis() { return (int) (totalTimeMicros() / 1000); }
 
         int elapsedMillis() {
             unsigned long long total = curTimeMicros64() - startTime();
             return (int) (total / 1000);
         }
 
-        int elapsedSeconds() {
-            return elapsedMillis() / 1000;
-        }
+        int elapsedSeconds() { return elapsedMillis() / 1000; }
 
-        void setQuery(const BSONObj& query) { 
-            if( query.objsize() > (int) sizeof(_queryBuf) ) { 
-                resetQuery(1); // flag as too big and return
-                return;
-            }
-            memcpy(_queryBuf, query.objdata(), query.objsize());
-        }
+        void setQuery(const BSONObj& query) { _query.set( query ); }
 
-        Client * getClient() const { 
-            return _client;
-        }
+        Client * getClient() const { return _client; }
 
-        CurOp( Client * client , CurOp * wrapped = 0 ) { 
-            _client = client;
-            _wrapped = wrapped;
-            if ( _wrapped ){
-                _client->_curOp = this;
-            }
-            _start = _checkpoint = 0;
-            _active = false;
-            _reset();
-            _op = 0;
-            // These addresses should never be written to again.  The zeroes are
-            // placed here as a precaution because currentOp may be accessed
-            // without the db mutex.
-            memset(_ns, 0, sizeof(_ns));
-            memset(_queryBuf, 0, sizeof(_queryBuf));
-        }
-        
-        ~CurOp();
-
-        BSONObj info() { 
-            if( ! cc().getAuthenticationInfo()->isAuthorized("admin") ) { 
+        BSONObj info() {
+            if( ! cc().getAuthenticationInfo()->isAuthorized("admin") ) {
                 BSONObjBuilder b;
                 b.append("err", "unauthorized");
                 return b.obj();
             }
             return infoNoauth();
         }
-        
-        BSONObj infoNoauth( int attempt = 0 );
 
-        string getRemoteString( bool includePort = true ){
-            return _remote.toString(includePort);
-        }
+        BSONObj infoNoauth();
 
-        ProgressMeter& setMessage( const char * msg , long long progressMeterTotal = 0 , int secondsBetween = 3 ){
+        string getRemoteString( bool includePort = true ) { return _remote.toString(includePort); }
 
-            if ( progressMeterTotal ){
-                if ( _progressMeter.isActive() ){
+        ProgressMeter& setMessage( const char * msg , unsigned long long progressMeterTotal = 0 , int secondsBetween = 3 ) {
+            if ( progressMeterTotal ) {
+                if ( _progressMeter.isActive() ) {
                     cout << "about to assert, old _message: " << _message << " new message:" << msg << endl;
                     assert( ! _progressMeter.isActive() );
                 }
@@ -251,38 +254,93 @@ namespace mongo {
             else {
                 _progressMeter.finished();
             }
-            
+
             _message = msg;
-            
+
             return _progressMeter;
         }
-        
+
         string getMessage() const { return _message.toString(); }
         ProgressMeter& getProgressMeter() { return _progressMeter; }
-
+        CurOp *parent() const { return _wrapped; }
+        void kill() { _killed = true; }
+        bool killed() const { return _killed; }
+        void setNS(const char *ns) {
+            strncpy(_ns, ns, Namespace::MaxNsLen);
+            _ns[Namespace::MaxNsLen] = 0;
+        }
         friend class Client;
+
+    private:
+        static AtomicUInt _nextOpNum;
+        Client * _client;
+        CurOp * _wrapped;
+        unsigned long long _start;
+        unsigned long long _checkpoint;
+        unsigned long long _end;
+        bool _active;
+        int _op;
+        bool _command;
+        int _lockType; // see concurrency.h for values
+        bool _waitingForLock;
+        int _dbprofile; // 0=off, 1=slow, 2=all
+        AtomicUInt _opNum;
+        char _ns[Namespace::MaxNsLen+2];
+        struct SockAddr _remote;
+        CachedBSONObj _query;
+        OpDebug _debug;
+        ThreadSafeString _message;
+        ProgressMeter _progressMeter;
+        volatile bool _killed;
+
+        void _reset() {
+            _command = false;
+            _lockType = 0;
+            _dbprofile = 0;
+            _end = 0;
+            _waitingForLock = false;
+            _message = "";
+            _progressMeter.finished();
+            _killed = false;
+        }
     };
 
-    /* 0 = ok
-       1 = kill current operation and reset this to 0
-       future: maybe use this as a "going away" thing on process termination with a higher flag value 
+    /* _globalKill: we are shutting down
+       otherwise kill attribute set on specified CurOp
+       this class does not handle races between interruptJs and the checkForInterrupt functions - those must be
+       handled by the client of this class
     */
-    extern class KillCurrentOp { 
-        enum { Off, On, All } state;
-        AtomicUInt toKill;
+    extern class KillCurrentOp {
     public:
-        void killAll() { state = All; }
-        void kill(AtomicUInt i) { toKill = i; state = On; }
-        
-        void checkForInterrupt() { 
-            if( state != Off ) { 
-                if( state == All ) 
-                    uasserted(11600,"interrupted at shutdown");
-                if( cc().curop()->opNum() == toKill ) { 
-                    state = Off;
-                    uasserted(11601,"interrupted");
-                }
-            }
+        void killAll();
+        void kill(AtomicUInt i);
+
+        /** @return true if global interrupt and should terminate the operation */
+        bool globalInterruptCheck() const { return _globalKill; }
+
+        void checkForInterrupt( bool heedMutex = true ) {
+            if ( heedMutex && dbMutex.isWriteLocked() )
+                return;
+            if( _globalKill )
+                uasserted(11600,"interrupted at shutdown");
+            if( cc().curop()->killed() )
+                uasserted(11601,"interrupted");
         }
+
+        /** @return "" if not interrupted.  otherwise, you should stop. */
+        const char *checkForInterruptNoAssert( bool heedMutex = true ) {
+            if ( heedMutex && dbMutex.isWriteLocked() )
+                return "";
+            if( _globalKill )
+                return "interrupted at shutdown";
+            if( cc().curop()->killed() )
+                return "interrupted";
+            return "";
+        }
+
+    private:
+        void interruptJs( AtomicUInt *op );
+        volatile bool _globalKill;
     } killCurrentOp;
+
 }
diff --git a/db/cursor.cpp b/db/cursor.cpp
index e98cb7a..ac7afc1 100644
--- a/db/cursor.cpp
+++ b/db/cursor.cpp
@@ -16,7 +16,7 @@
 
 #include "pch.h"
 #include "pdfile.h"
-#include "curop.h"
+#include "curop-inl.h"
 
 namespace mongo {
 
@@ -24,14 +24,17 @@ namespace mongo {
         killCurrentOp.checkForInterrupt();
         if ( eof() ) {
             if ( tailable_ && !last.isNull() ) {
-                curr = s->next( last );                    
-            } else {
+                curr = s->next( last );
+            }
+            else {
                 return false;
             }
-        } else {
+        }
+        else {
             last = curr;
             curr = s->next( curr );
         }
+        incNscanned();
         return ok();
     }
 
@@ -72,7 +75,7 @@ namespace mongo {
     }
 
     ForwardCappedCursor::ForwardCappedCursor( NamespaceDetails *_nsd, const DiskLoc &startLoc ) :
-            nsd( _nsd ) {
+        nsd( _nsd ) {
         if ( !nsd )
             return;
         DiskLoc start = startLoc;
@@ -89,6 +92,7 @@ namespace mongo {
         }
         curr = start;
         s = this;
+        incNscanned();
     }
 
     DiskLoc ForwardCappedCursor::next( const DiskLoc &prev ) const {
@@ -112,19 +116,21 @@ namespace mongo {
     }
 
     ReverseCappedCursor::ReverseCappedCursor( NamespaceDetails *_nsd, const DiskLoc &startLoc ) :
-            nsd( _nsd ) {
+        nsd( _nsd ) {
         if ( !nsd )
             return;
         DiskLoc start = startLoc;
         if ( start.isNull() ) {
             if ( !nsd->capLooped() ) {
                 start = nsd->lastRecord();
-            } else {
+            }
+            else {
                 start = nsd->capExtent.ext()->lastRecord;
             }
         }
         curr = start;
         s = this;
+        incNscanned();
     }
 
     DiskLoc ReverseCappedCursor::next( const DiskLoc &prev ) const {
@@ -138,7 +144,8 @@ namespace mongo {
             if ( i == nextLoop( nsd, nsd->capExtent.ext()->lastRecord ) ) {
                 return DiskLoc();
             }
-        } else {
+        }
+        else {
             if ( i == nsd->capExtent.ext()->firstRecord ) {
                 return DiskLoc();
             }
diff --git a/db/cursor.h b/db/cursor.h
index db5d9a3..9797d66 100644
--- a/db/cursor.h
+++ b/db/cursor.h
@@ -23,14 +23,15 @@
 #include "matcher.h"
 
 namespace mongo {
-    
+
+    class NamespaceDetails;
     class Record;
     class CoveredIndexMatcher;
 
     /* Query cursors, base class.  This is for our internal cursors.  "ClientCursor" is a separate
        concept and is for the user's cursor.
 
-       WARNING concurrency: the vfunctions below are called back from within a 
+       WARNING concurrency: the vfunctions below are called back from within a
        ClientCursor::ccmutex.  Don't cause a deadlock, you've been warned.
     */
     class Cursor : boost::noncopyable {
@@ -49,7 +50,7 @@ namespace mongo {
         virtual DiskLoc refLoc() = 0;
 
         /* Implement these if you want the cursor to be "tailable" */
-        
+
         /* Request that the cursor starts tailing after advancing past last record. */
         /* The implementation may or may not honor this request. */
         virtual void setTailable() {}
@@ -76,10 +77,10 @@ namespace mongo {
 
         /* called before query getmore block is iterated */
         virtual void checkLocation() { }
-        
+
         virtual bool supportGetMore() = 0;
         virtual bool supportYields() = 0;
-        
+
         virtual string toString() { return "abstract?"; }
 
         /* used for multikey index traversal to avoid sending back dups. see Matcher::matches().
@@ -87,20 +88,33 @@ namespace mongo {
              if loc has already been sent, returns true.
              otherwise, marks loc as sent.
            @param deep - match was against an array, so we know it is multikey.  this is legacy and kept
-                         for backwards datafile compatibility.  'deep' can be eliminated next time we 
+                         for backwards datafile compatibility.  'deep' can be eliminated next time we
                          force a data file conversion. 7Jul09
         */
         virtual bool getsetdup(DiskLoc loc) = 0;
 
+        virtual bool isMultiKey() const = 0;
+
+        /**
+         * return true if the keys in the index have been modified from the main doc
+         * if you have { a : 1 , b : [ 1 , 2 ] }
+         * an index on { a : 1 } would not be modified
+         * an index on { b : 1 } would be since the values of the array are put in the index
+         *                       not the array
+         */
+        virtual bool modifiedKeys() const = 0;
+
         virtual BSONObj prettyIndexBounds() const { return BSONArray(); }
 
         virtual bool capped() const { return false; }
 
+        virtual long long nscanned() = 0;
+
         // The implementation may return different matchers depending on the
         // position of the cursor.  If matcher() is nonzero at the start,
         // matcher() should be checked each time advance() is called.
         virtual CoveredIndexMatcher *matcher() const { return 0; }
-        
+
         // A convenience function for setting the value of matcher() manually
         // so it may accessed later.  Implementations which must generate
         // their own matcher() should assert here.
@@ -121,20 +135,15 @@ namespace mongo {
 
     /* table-scan style cursor */
     class BasicCursor : public Cursor {
-    protected:
-        DiskLoc curr, last;
-        const AdvanceStrategy *s;
-
-    private:
-        bool tailable_;
-        shared_ptr< CoveredIndexMatcher > _matcher;
-        void init() {
-            tailable_ = false;
-        }
     public:
-        bool ok() {
-            return !curr.isNull();
+        BasicCursor(DiskLoc dl, const AdvanceStrategy *_s = forward()) : curr(dl), s( _s ), _nscanned() {
+            incNscanned();
+            init();
         }
+        BasicCursor(const AdvanceStrategy *_s = forward()) : s( _s ), _nscanned() {
+            init();
+        }
+        bool ok() { return !curr.isNull(); }
         Record* _current() {
             assert( ok() );
             return curr.rec();
@@ -144,42 +153,33 @@ namespace mongo {
             BSONObj j(r);
             return j;
         }
-        virtual DiskLoc currLoc() {
-            return curr;
-        }
-        virtual DiskLoc refLoc() {
-            return curr.isNull() ? last : curr;
-        }
-        
+        virtual DiskLoc currLoc() { return curr; }
+        virtual DiskLoc refLoc()  { return curr.isNull() ? last : curr; }
         bool advance();
-
-        BasicCursor(DiskLoc dl, const AdvanceStrategy *_s = forward()) : curr(dl), s( _s ) {
-            init();
-        }
-        BasicCursor(const AdvanceStrategy *_s = forward()) : s( _s ) {
-            init();
-        }
-        virtual string toString() {
-            return "BasicCursor";
-        }
+        virtual string toString() { return "BasicCursor"; }
         virtual void setTailable() {
             if ( !curr.isNull() || !last.isNull() )
                 tailable_ = true;
         }
-        virtual bool tailable() {
-            return tailable_;
-        }
+        virtual bool tailable() { return tailable_; }
         virtual bool getsetdup(DiskLoc loc) { return false; }
-
+        virtual bool isMultiKey() const { return false; }
+        virtual bool modifiedKeys() const { return false; }
         virtual bool supportGetMore() { return true; }
         virtual bool supportYields() { return true; }
-
         virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); }
-        
-        virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) {
-            _matcher = matcher;
-        }
-        
+        virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { _matcher = matcher; }
+        virtual long long nscanned() { return _nscanned; }
+
+    protected:
+        DiskLoc curr, last;
+        const AdvanceStrategy *s;
+        void incNscanned() { if ( !curr.isNull() ) { ++_nscanned; } }
+    private:
+        bool tailable_;
+        shared_ptr< CoveredIndexMatcher > _matcher;
+        long long _nscanned;
+        void init() { tailable_ = false; }
     };
 
     /* used for order { $natural: -1 } */
@@ -187,13 +187,9 @@ namespace mongo {
     public:
         ReverseCursor(DiskLoc dl) : BasicCursor( dl, reverse() ) { }
         ReverseCursor() : BasicCursor( reverse() ) { }
-        virtual string toString() {
-            return "ReverseCursor";
-        }
+        virtual string toString() { return "ReverseCursor"; }
     };
 
-    class NamespaceDetails;
-
     class ForwardCappedCursor : public BasicCursor, public AdvanceStrategy {
     public:
         ForwardCappedCursor( NamespaceDetails *nsd = 0, const DiskLoc &startLoc = DiskLoc() );
diff --git a/db/database.cpp b/db/database.cpp
index dde117f..d164ba5 100644
--- a/db/database.cpp
+++ b/db/database.cpp
@@ -20,15 +20,29 @@
 #include "pdfile.h"
 #include "database.h"
 #include "instance.h"
+#include "clientcursor.h"
 
 namespace mongo {
 
     bool Database::_openAllFiles = false;
 
+    Database::~Database() {
+        magic = 0;
+        size_t n = files.size();
+        for ( size_t i = 0; i < n; i++ )
+            delete files[i];
+        if( ccByLoc.size() ) {
+            log() << "\n\n\nWARNING: ccByLoc not empty on database close! " << ccByLoc.size() << ' ' << name << endl;
+        }
+    }
+
     Database::Database(const char *nm, bool& newDb, const string& _path )
-        : name(nm), path(_path), namespaceIndex( path, name ) {
-        
-        { // check db name is valid
+        : name(nm), path(_path), namespaceIndex( path, name ),
+          profileName(name + ".system.profile") {
+        try {
+
+        {
+            // check db name is valid
             size_t L = strlen(nm);
             uassert( 10028 ,  "db name is empty", L > 0 );
             uassert( 10029 ,  "bad db name [1]", *nm != '.' );
@@ -36,66 +50,184 @@ namespace mongo {
             uassert( 10031 ,  "bad char(s) in db name", strchr(nm, ' ') == 0 );
             uassert( 10032 ,  "db name too long", L < 64 );
         }
-        
+
         newDb = namespaceIndex.exists();
         profile = 0;
-        profileName = name + ".system.profile";
 
         {
             vector<string> others;
             getDatabaseNames( others , path );
-            
-            for ( unsigned i=0; i<others.size(); i++ ){
+
+            for ( unsigned i=0; i<others.size(); i++ ) {
 
                 if ( strcasecmp( others[i].c_str() , nm ) )
                     continue;
 
                 if ( strcmp( others[i].c_str() , nm ) == 0 )
                     continue;
-                
+
                 stringstream ss;
                 ss << "db already exists with different case other: [" << others[i] << "] me [" << nm << "]";
                 uasserted( DatabaseDifferCaseCode , ss.str() );
             }
         }
 
-        
+
         // If already exists, open.  Otherwise behave as if empty until
         // there's a write, then open.
         if ( ! newDb || cmdLine.defaultProfile ) {
             namespaceIndex.init();
             if( _openAllFiles )
                 openAllFiles();
-            
+
         }
-       
+
 
         magic = 781231;
+        } catch(...) { 
+            // since destructor won't be called:
+            for ( size_t i = 0; i < files.size(); i++ )
+                delete files[i];
+            throw;
+        }
+    }
+
+    boost::filesystem::path Database::fileName( int n ) const {
+        stringstream ss;
+        ss << name << '.' << n;
+        boost::filesystem::path fullName;
+        fullName = boost::filesystem::path(path);
+        if ( directoryperdb )
+            fullName /= name;
+        fullName /= ss.str();
+        return fullName;
+    }
+
+    void Database::openAllFiles() {
+        int n = 0;
+        while( exists(n) ) {
+            getFile(n);
+            n++;
+        }
+        // If last file is empty, consider it preallocated and make sure it's not mapped
+        // until a write is requested
+        if ( n > 1 && getFile( n - 1 )->getHeader()->isEmpty() ) {
+            delete files[ n - 1 ];
+            files.pop_back();
+        }
+    }
+
+    MongoDataFile* Database::getFile( int n, int sizeNeeded , bool preallocateOnly) {
+        assert(this);
+
+        namespaceIndex.init();
+        if ( n < 0 || n >= DiskLoc::MaxFiles ) {
+            out() << "getFile(): n=" << n << endl;
+            massert( 10295 , "getFile(): bad file number value (corrupt db?): run repair", false);
+        }
+        DEV {
+            if ( n > 100 )
+                out() << "getFile(): n=" << n << "?" << endl;
+        }
+        MongoDataFile* p = 0;
+        if ( !preallocateOnly ) {
+            while ( n >= (int) files.size() )
+                files.push_back(0);
+            p = files[n];
+        }
+        if ( p == 0 ) {
+            boost::filesystem::path fullName = fileName( n );
+            string fullNameString = fullName.string();
+            p = new MongoDataFile(n);
+            int minSize = 0;
+            if ( n != 0 && files[ n - 1 ] )
+                minSize = files[ n - 1 ]->getHeader()->fileLength;
+            if ( sizeNeeded + DataFileHeader::HeaderSize > minSize )
+                minSize = sizeNeeded + DataFileHeader::HeaderSize;
+            try {
+                p->open( fullNameString.c_str(), minSize, preallocateOnly );
+            }
+            catch ( AssertionException& ) {
+                delete p;
+                throw;
+            }
+            if ( preallocateOnly )
+                delete p;
+            else
+                files[n] = p;
+        }
+        return preallocateOnly ? 0 : p;
+    }
+
+    MongoDataFile* Database::addAFile( int sizeNeeded, bool preallocateNextFile ) {
+        int n = (int) files.size();
+        MongoDataFile *ret = getFile( n, sizeNeeded );
+        if ( preallocateNextFile )
+            preallocateAFile();
+        return ret;
     }
 
+    MongoDataFile* Database::suitableFile( int sizeNeeded, bool preallocate ) {
 
-    bool Database::setProfilingLevel( int newLevel , string& errmsg ){
+        // check existing files
+        for ( int i=numFiles()-1; i>=0; i-- ) {
+            MongoDataFile* f = getFile( i );
+            if ( f->getHeader()->unusedLength >= sizeNeeded )
+                return f;
+        }
+
+        // allocate files until we either get one big enough or hit maxSize
+        for ( int i = 0; i < 8; i++ ) {
+            MongoDataFile* f = addAFile( sizeNeeded, preallocate );
+
+            if ( f->getHeader()->unusedLength >= sizeNeeded )
+                return f;
+
+            if ( f->getHeader()->fileLength >= MongoDataFile::maxSize() ) // this is as big as they get so might as well stop
+                return f;
+        }
+
+        return 0;
+    }
+
+    MongoDataFile* Database::newestFile() {
+        int n = numFiles();
+        if ( n == 0 )
+            return 0;
+        return getFile(n-1);
+    }
+
+
+    Extent* Database::allocExtent( const char *ns, int size, bool capped ) {
+        Extent *e = DataFileMgr::allocFromFreeList( ns, size, capped );
+        if( e )
+            return e;
+        return suitableFile( size, !capped )->createExtent( ns, size, capped );
+    }
+
+
+    bool Database::setProfilingLevel( int newLevel , string& errmsg ) {
         if ( profile == newLevel )
             return true;
-        
-        if ( newLevel < 0 || newLevel > 2 ){
+
+        if ( newLevel < 0 || newLevel > 2 ) {
             errmsg = "profiling level has to be >=0 and <= 2";
             return false;
         }
-        
-        if ( newLevel == 0 ){
+
+        if ( newLevel == 0 ) {
             profile = 0;
             return true;
         }
-        
+
         assert( cc().database() == this );
 
-        if ( ! namespaceIndex.details( profileName.c_str() ) ){
+        if ( ! namespaceIndex.details( profileName.c_str() ) ) {
             log(1) << "creating profile ns: " << profileName << endl;
             BSONObjBuilder spec;
             spec.appendBool( "capped", true );
             spec.append( "size", 131072.0 );
-            if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , true ) ){
+            if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , true ) ) {
                 return false;
             }
         }
@@ -103,26 +235,57 @@ namespace mongo {
         return true;
     }
 
-    void Database::finishInit(){
+    void Database::finishInit() {
         if ( cmdLine.defaultProfile == profile )
             return;
-        
+
         string errmsg;
         massert( 12506 , errmsg , setProfilingLevel( cmdLine.defaultProfile , errmsg ) );
     }
 
-    bool Database::validDBName( const string& ns ){
+    bool Database::validDBName( const string& ns ) {
         if ( ns.size() == 0 || ns.size() > 64 )
             return false;
         size_t good = strcspn( ns.c_str() , "/\\. \"" );
         return good == ns.size();
     }
 
-    void Database::flushFiles( bool sync ){
+    void Database::flushFiles( bool sync ) const {
         dbMutex.assertAtLeastReadLocked();
-        for ( unsigned i=0; i<files.size(); i++ ){
+        for ( unsigned i=0; i<files.size(); i++ ) {
             files[i]->flush( sync );
         }
     }
 
+    long long Database::fileSize() const {
+        long long size=0;
+        for (int n=0; exists(n); n++)
+            size += boost::filesystem::file_size( fileName(n) );
+        return size;
+    }
+
+    Database* DatabaseHolder::getOrCreate( const string& ns , const string& path , bool& justCreated ) {
+        dbMutex.assertWriteLocked();
+        DBs& m = _paths[path];
+
+        string dbname = _todb( ns );
+
+        Database* & db = m[dbname];
+        if ( db ) {
+            justCreated = false;
+            return db;
+        }
+
+        log(1) << "Accessing: " << dbname << " for the first time" << endl;
+        try {
+            db = new Database( dbname.c_str() , justCreated , path );
+        }
+        catch ( ... ) {
+            m.erase( dbname );
+            throw;
+        }
+        _size++;
+        return db;
+    }
+
 } // namespace mongo
diff --git a/db/database.h b/db/database.h
index c7d72c5..6e72ba8 100644
--- a/db/database.h
+++ b/db/database.h
@@ -23,6 +23,8 @@
 namespace mongo {
 
     class ClientCursor;
+    struct ByLocKey;
+    typedef map<ByLocKey, ClientCursor*> CCByLoc;
 
     /**
      * Database represents a database database
@@ -32,176 +34,90 @@ namespace mongo {
     class Database {
     public:
         static bool _openAllFiles;
-        
-        Database(const char *nm, bool& newDb, const string& _path = dbpath);
-        
-        ~Database() {
-            magic = 0;
-            btreeStore->closeFiles(name, path);
-            size_t n = files.size();
-            for ( size_t i = 0; i < n; i++ )
-                delete files[i];
-        }
-        
+
+        Database(const char *nm, /*out*/ bool& newDb, const string& _path = dbpath);
+    private:
+        ~Database();
+    public:
+        /* you must use this to close - there is essential code in this method that is not in the ~Database destructor.
+           thus the destructor is private.  this could be cleaned up one day...
+        */
+        static void closeDatabase( const char *db, const string& path );
+
+        void openAllFiles();
+
+        void finishInit();
+
         /**
          * tries to make sure that this hasn't been deleted
          */
-        bool isOk(){
-            return magic == 781231;
-        }
+        bool isOk() const { return magic == 781231; }
 
-        bool isEmpty(){
-            return ! namespaceIndex.allocated();
-        }
+        bool isEmpty() { return ! namespaceIndex.allocated(); }
 
-        boost::filesystem::path fileName( int n ) {
-            stringstream ss;
-            ss << name << '.' << n;
-            boost::filesystem::path fullName;
-            fullName = boost::filesystem::path(path);
-            if ( directoryperdb )
-                fullName /= name;
-            fullName /= ss.str();
-            return fullName;
-        }
-        
-        bool exists(int n) { 
-            return boost::filesystem::exists( fileName( n ) );
-        }
+        /**
+         * total file size of Database in bytes
+         */
+        long long fileSize() const;
 
-        void openAllFiles() { 
-            int n = 0;
-            while( exists(n) ) { 
-                getFile(n);
-                n++;
-            }
-            // If last file is empty, consider it preallocated and make sure it's not mapped
-            // until a write is requested
-            if ( n > 1 && getFile( n - 1 )->getHeader()->isEmpty() ) {
-                delete files[ n - 1 ];
-                files.pop_back();
-            }
-        }
+        int numFiles() const { return (int)files.size(); }
 
-        MongoDataFile* getFile( int n, int sizeNeeded = 0, bool preallocateOnly = false ) {
-            assert(this);
-
-            namespaceIndex.init();
-            if ( n < 0 || n >= DiskLoc::MaxFiles ) {
-                out() << "getFile(): n=" << n << endl;
-#if 0
-                if( n >= RecCache::Base && n <= RecCache::Base+1000 )
-                    massert( 10294 , "getFile(): bad file number - using recstore db w/nonrecstore db build?", false);
-#endif
-                massert( 10295 , "getFile(): bad file number value (corrupt db?): run repair", false);
-            }
-            DEV {
-                if ( n > 100 )
-                    out() << "getFile(): n=" << n << "?" << endl;
-            }
-            MongoDataFile* p = 0;
-            if ( !preallocateOnly ) {
-                while ( n >= (int) files.size() )
-                    files.push_back(0);
-                p = files[n];
-            }
-            if ( p == 0 ) {
-                boost::filesystem::path fullName = fileName( n );
-                string fullNameString = fullName.string();
-                p = new MongoDataFile(n);
-                int minSize = 0;
-                if ( n != 0 && files[ n - 1 ] )
-                    minSize = files[ n - 1 ]->getHeader()->fileLength;
-                if ( sizeNeeded + DataFileHeader::HeaderSize > minSize )
-                    minSize = sizeNeeded + DataFileHeader::HeaderSize;
-                try {
-                    p->open( fullNameString.c_str(), minSize, preallocateOnly );
-                }
-                catch ( AssertionException& ) {
-                    delete p;
-                    throw;
-                }
-                if ( preallocateOnly )
-                    delete p;
-                else
-                    files[n] = p;
-            }
-            return preallocateOnly ? 0 : p;
-        }
+        /**
+         * returns file valid for file number n
+         */
+        boost::filesystem::path fileName( int n ) const;
 
-        MongoDataFile* addAFile( int sizeNeeded, bool preallocateNextFile ) {
-            int n = (int) files.size();
-            MongoDataFile *ret = getFile( n, sizeNeeded );
-            if ( preallocateNextFile )
-                preallocateAFile();
-            return ret;
-        }
-        
-        // safe to call this multiple times - the implementation will only preallocate one file
-        void preallocateAFile() {
-            int n = (int) files.size();
-            getFile( n, 0, true );
-        }
+        bool exists(int n) const { return boost::filesystem::exists( fileName( n ) ); }
 
-        MongoDataFile* suitableFile( int sizeNeeded, bool preallocate ) {
-            MongoDataFile* f = newestFile();
-            if ( !f ) {
-                f = addAFile( sizeNeeded, preallocate );                
-            }
-            for ( int i = 0; i < 8; i++ ) {
-                if ( f->getHeader()->unusedLength >= sizeNeeded )
-                    break;
-                f = addAFile( sizeNeeded, preallocate );
-                if ( f->getHeader()->fileLength >= MongoDataFile::maxSize() ) // this is as big as they get so might as well stop
-                    break;
-            }
-            return f;
-        }
+        /**
+         * return file n.  if it doesn't exist, create it
+         */
+        MongoDataFile* getFile( int n, int sizeNeeded = 0, bool preallocateOnly = false );
+
+        MongoDataFile* addAFile( int sizeNeeded, bool preallocateNextFile );
+
+        /**
+         * makes sure we have an extra file at the end that is empty
+         * safe to call this multiple times - the implementation will only preallocate one file
+         */
+        void preallocateAFile() { getFile( numFiles() , 0, true ); }
+
+        MongoDataFile* suitableFile( int sizeNeeded, bool preallocate );
+
+        Extent* allocExtent( const char *ns, int size, bool capped );
+
+        MongoDataFile* newestFile();
 
-        Extent* allocExtent( const char *ns, int size, bool capped ) { 
-            Extent *e = DataFileMgr::allocFromFreeList( ns, size, capped );
-            if( e ) return e;
-            return suitableFile( size, !capped )->createExtent( ns, size, capped );
-        }
-        
-        MongoDataFile* newestFile() {
-            int n = (int) files.size();
-            if ( n > 0 ) {
-                n--;
-            } else {
-                return 0;   
-            }
-            return getFile(n);
-        }
-        
         /**
-         * @return true if success, false otherwise
+         * @return true if success.  false if bad level or error creating profile ns
          */
         bool setProfilingLevel( int newLevel , string& errmsg );
 
-        void finishInit();
 
-        static bool validDBName( const string& ns );
+        void flushFiles( bool sync ) const;
 
-        long long fileSize(){
-            long long size=0;
-            for (int n=0; exists(n); n++)
-                size += boost::filesystem::file_size( fileName(n) );
-            return size;
+        /**
+         * @return true if ns is part of the database
+         *         ns=foo.bar, db=foo returns true
+         */
+        bool ownsNS( const string& ns ) const {
+            if ( ! startsWith( ns , name ) )
+                return false;
+            return ns[name.size()] == '.';
         }
 
-        void flushFiles( bool sync );
-        
+        static bool validDBName( const string& ns );
+
+    public: // this should be private later
+
         vector<MongoDataFile*> files;
-        string name; // "alleyinsider"
-        string path;
+        const string name; // "alleyinsider"
+        const string path;
         NamespaceIndex namespaceIndex;
         int profile; // 0=off.
-        string profileName; // "alleyinsider.system.profile"
-
-        multimap<DiskLoc, ClientCursor*> ccByLoc;
-
-        int magic; // used for making sure the object is still loaded in memory 
+        const string profileName; // "alleyinsider.system.profile"
+        CCByLoc ccByLoc;
+        int magic; // used for making sure the object is still loaded in memory
     };
 
 } // namespace mongo
diff --git a/db/db.cpp b/db/db.cpp
index d5b9339..548ac14 100644
--- a/db/db.cpp
+++ b/db/db.cpp
@@ -1,4 +1,4 @@
-// @file db.cpp : Defines the entry point for the mongod application.
+// @file db.cpp : Defines main() for the mongod program.
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -37,7 +37,10 @@
 #include "../util/concurrency/task.h"
 #include "../util/version.h"
 #include "client.h"
+#include "restapi.h"
 #include "dbwebserver.h"
+#include "dur.h"
+#include "concurrency.h"
 
 #if defined(_WIN32)
 # include "../util/ntservice.h"
@@ -55,31 +58,25 @@ namespace mongo {
 
     extern char *appsrvPath;
     extern int diagLogging;
-    extern int lenForNewNsFiles;
+    extern unsigned lenForNewNsFiles;
     extern int lockFile;
-    extern bool checkNsFilesOnLoad;    
+    extern bool checkNsFilesOnLoad;
     extern string repairpath;
 
-#if defined(_WIN32)
-    std::wstring windowsServiceName = L"MongoDB";
-    std::wstring windowsServiceUser = L"";
-    std::wstring windowsServicePassword = L"";
-#endif
-
-    void setupSignals();
+    void setupSignals( bool inFork );
     void startReplSets(ReplSetCmdline*);
     void startReplication();
     void pairWith(const char *remoteEnd, const char *arb);
     void exitCleanly( ExitCode code );
 
     CmdLine cmdLine;
-    bool useJNI = true;
+    static bool scriptingEnabled = true;
     bool noHttpInterface = false;
     bool shouldRepairDatabases = 0;
-    bool forceRepair = 0;
+    static bool forceRepair = 0;
     Timer startupSrandTimer;
 
-    const char *ourgetns() { 
+    const char *ourgetns() {
         Client *c = currentClient.get();
         if ( ! c )
             return "";
@@ -102,7 +99,7 @@ namespace mongo {
         OurListener(const string &ip, int p) : Listener(ip, p) { }
         virtual void accepted(MessagingPort *mp) {
 
-            if ( ! connTicketHolder.tryAcquire() ){
+            if ( ! connTicketHolder.tryAcquire() ) {
                 log() << "connection refused because too many open connections: " << connTicketHolder.used() << " of " << connTicketHolder.outof() << endl;
                 // TODO: would be nice if we notified them...
                 mp->shutdown();
@@ -113,12 +110,12 @@ namespace mongo {
             try {
                 boost::thread thr(boost::bind(&connThread,mp));
             }
-            catch ( boost::thread_resource_error& ){
+            catch ( boost::thread_resource_error& ) {
                 log() << "can't create new thread, closing connection" << endl;
                 mp->shutdown();
                 delete mp;
             }
-            catch ( ... ){
+            catch ( ... ) {
                 log() << "unkonwn exception starting connThread" << endl;
                 mp->shutdown();
                 delete mp;
@@ -126,14 +123,14 @@ namespace mongo {
         }
     };
 
-/* todo: make this a real test.  the stuff in dbtests/ seem to do all dbdirectclient which exhaust doesn't support yet. */
+    /* todo: make this a real test.  the stuff in dbtests/ seem to do all dbdirectclient which exhaust doesn't support yet. */
 // QueryOption_Exhaust
 #define TESTEXHAUST 0
 #if( TESTEXHAUST )
-    void testExhaust() { 
+    void testExhaust() {
         sleepsecs(1);
         unsigned n = 0;
-        auto f = [&n](const BSONObj& o) { 
+        auto f = [&n](const BSONObj& o) {
             assert( o.valid() );
             //cout << o << endl;
             n++;
@@ -145,20 +142,20 @@ namespace mongo {
         db.connect("localhost");
         const char *ns = "local.foo";
         if( db.count(ns) < 10000 )
-            for( int i = 0; i < 20000; i++ ) 
+            for( int i = 0; i < 20000; i++ )
                 db.insert(ns, BSON("aaa" << 3 << "b" << "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
 
         try {
             db.query(f, ns, Query() );
         }
-        catch(...) { 
+        catch(...) {
             cout << "hmmm" << endl;
         }
 
         try {
             db.query(f, ns, Query() );
         }
-        catch(...) { 
+        catch(...) {
             cout << "caught" << endl;
         }
 
@@ -173,7 +170,7 @@ namespace mongo {
         l.setAsTimeTracker();
         startReplication();
         if ( !noHttpInterface )
-            boost::thread thr(webServerThread);
+            boost::thread web( boost::bind(&webServerThread, new RestAdminAccess() /* takes ownership */));
 
 #if(TESTEXHAUST)
         boost::thread thr(testExhaust);
@@ -203,8 +200,7 @@ namespace mongo {
        app server will open a pool of threads.
        todo: one day, asio...
     */
-    void connThread( MessagingPort * inPort )
-    {
+    void connThread( MessagingPort * inPort ) {
         TicketHolderReleaser connTicketReleaser( &connTicketHolder );
 
         /* todo: move to Client object */
@@ -221,11 +217,11 @@ namespace mongo {
 
             Message m;
             while ( 1 ) {
-                m.reset();
+                inPort->clearCounters();
 
                 if ( !dbMsgPort->recv(m) ) {
                     if( !cmdLine.quiet )
-                      log() << "end connection " << dbMsgPort->farEnd.toString() << endl;
+                        log() << "end connection " << dbMsgPort->farEnd.toString() << endl;
                     dbMsgPort->shutdown();
                     break;
                 }
@@ -234,27 +230,15 @@ sendmore:
                     log() << "got request after shutdown()" << endl;
                     break;
                 }
-                
+
                 lastError.startRequest( m , le );
 
                 DbResponse dbresponse;
-                if ( !assembleResponse( m, dbresponse, dbMsgPort->farEnd ) ) {
-                    log() << curTimeMillis() % 10000 << "   end msg " << dbMsgPort->farEnd.toString() << endl;
-                    /* todo: we may not wish to allow this, even on localhost: very low priv accounts could stop us. */
-                    if ( dbMsgPort->farEnd.isLocalHost() ) {
-                        dbMsgPort->shutdown();
-                        sleepmillis(50);
-                        problem() << "exiting end msg" << endl;
-                        dbexit(EXIT_CLEAN);
-                    }
-                    else {
-                        log() << "  (not from localhost, ignoring end msg)" << endl;
-                    }
-                }
+                assembleResponse( m, dbresponse, dbMsgPort->farEnd );
 
                 if ( dbresponse.response ) {
                     dbMsgPort->reply(m, *dbresponse.response, dbresponse.responseTo);
-                    if( dbresponse.exhaust ) { 
+                    if( dbresponse.exhaust ) {
                         MsgData *header = dbresponse.response->header();
                         QueryResult *qr = (QueryResult *) header;
                         long long cursorid = qr->cursorId;
@@ -279,6 +263,10 @@ sendmore:
                         }
                     }
                 }
+
+                networkCounter.hit( inPort->getBytesIn() , inPort->getBytesOut() );
+
+                m.reset();
             }
 
         }
@@ -293,7 +281,7 @@ sendmore:
         }
         catch ( const ClockSkewException & ) {
             exitCleanly( EXIT_CLOCK_SKEW );
-        }        
+        }
         catch ( std::exception &e ) {
             problem() << "Uncaught std::exception: " << e.what() << ", terminating" << endl;
             dbexit( EXIT_UNCAUGHT );
@@ -303,91 +291,48 @@ sendmore:
             dbexit( EXIT_UNCAUGHT );
         }
 
-        // any thread cleanup can happen here
-
-        if ( currentClient.get() )
-            currentClient->shutdown();
-        globalScriptEngine->threadDone();
-    }
-
-    void msg(const char *m, const char *address, int port, int extras = 0) {
-        SockAddr db(address, port);
-
-        //  SockAddr db("127.0.0.1", DBPort);
-        //  SockAddr db("192.168.37.1", MessagingPort::DBPort);
-        //  SockAddr db("10.0.21.60", MessagingPort::DBPort);
-        //  SockAddr db("172.16.0.179", MessagingPort::DBPort);
-
-        MessagingPort p;
-        if ( !p.connect(db) ){
-            out() << "msg couldn't connect" << endl;
-            return;
-        }
-
-        const int Loops = 1;
-        for ( int q = 0; q < Loops; q++ ) {
-            Message send;
-            Message response;
-
-            send.setData( dbMsg , m);
-            int len = send.header()->dataLen();
-
-            for ( int i = 0; i < extras; i++ )
-                p.say(/*db, */send);
-
-            Timer t;
-            bool ok = p.call(send, response);
-            double tm = ((double) t.micros()) + 1;
-            out() << " ****ok. response.data:" << ok << " time:" << tm / 1000.0 << "ms "
-                  << "len: " << len << " data: " << response.singleData()->_data << endl;
-
-            if (  q+1 < Loops ) {
-                out() << "\t\tSLEEP 8 then sending again as a test" << endl;
-                sleepsecs(8);
-            }
+        // thread ending...
+        {
+            Client * c = currentClient.get();
+            if( c ) c->shutdown();
         }
-        sleepsecs(1);
-
-        p.shutdown();
-    }
-
-    void msg(const char *m, int extras = 0) {
-        msg(m, "127.0.0.1", CmdLine::DefaultDBPort, extras);
+        globalScriptEngine->threadDone();
     }
 
-    bool doDBUpgrade( const string& dbName , string errmsg , DataFileHeader * h ){
+    bool doDBUpgrade( const string& dbName , string errmsg , DataFileHeader * h ) {
         static DBDirectClient db;
-        
-        if ( h->version == 4 && h->versionMinor == 4 ){
+
+        if ( h->version == 4 && h->versionMinor == 4 ) {
             assert( VERSION == 4 );
             assert( VERSION_MINOR == 5 );
-            
+
             list<string> colls = db.getCollectionNames( dbName );
-            for ( list<string>::iterator i=colls.begin(); i!=colls.end(); i++){
+            for ( list<string>::iterator i=colls.begin(); i!=colls.end(); i++) {
                 string c = *i;
                 log() << "\t upgrading collection:" << c << endl;
                 BSONObj out;
                 bool ok = db.runCommand( dbName , BSON( "reIndex" << c.substr( dbName.size() + 1 ) ) , out );
-                if ( ! ok ){
+                if ( ! ok ) {
                     errmsg = "reindex failed";
                     log() << "\t\t reindex failed: " << out << endl;
                     return false;
                 }
             }
-            
+
             h->versionMinor = 5;
             return true;
         }
-        
+
         // do this in the general case
         return repairDatabase( dbName.c_str(), errmsg );
     }
-    
-    void repairDatabases() {
-		//        LastError * le = lastError.get( true );
+
+    // ran at startup.
+    static void repairDatabasesAndCheckVersion() {
+        //        LastError * le = lastError.get( true );
         Client::GodScope gs;
         log(1) << "enter repairDatabases (to check pdfile version #)" << endl;
-        
+
         //assert(checkNsFilesOnLoad);
         checkNsFilesOnLoad = false; // we are mainly just checking the header - don't scan the whole .ns file for every db here.
 
@@ -400,33 +345,39 @@ sendmore:
             Client::Context ctx( dbName );
             MongoDataFile *p = cc().database()->getFile( 0 );
             DataFileHeader *h = p->getHeader();
-            if ( !h->currentVersion() || forceRepair ) {
+            if ( !h->isCurrentVersion() || forceRepair ) {
+
+                if( h->version <= 0 ) {
+                    uasserted(10000, str::stream() << "db " << dbName << " appears corrupt pdfile version: " << h->version << " info: " << h->versionMinor << ' ' << h->fileLength);
+                }
+
                 log() << "****" << endl;
                 log() << "****" << endl;
                 log() << "need to upgrade database " << dbName << " with pdfile version " << h->version << "." << h->versionMinor << ", "
                       << "new version: " << VERSION << "." << VERSION_MINOR << endl;
-                if ( shouldRepairDatabases ){
+                if ( shouldRepairDatabases ) {
                     // QUESTION: Repair even if file format is higher version than code?
                     log() << "\t starting upgrade" << endl;
                     string errmsg;
                     assert( doDBUpgrade( dbName , errmsg , h ) );
                 }
                 else {
-                    log() << "\t Not upgrading, exiting!" << endl;
+                    log() << "\t Not upgrading, exiting" << endl;
                     log() << "\t run --upgrade to upgrade dbs, then start again" << endl;
                     log() << "****" << endl;
                     dbexit( EXIT_NEED_UPGRADE );
                     shouldRepairDatabases = 1;
                     return;
                 }
-            } else {
-                closeDatabase( dbName.c_str() );
+            }
+            else {
+                Database::closeDatabase( dbName.c_str(), dbpath );
             }
         }
 
         log(1) << "done repairDatabases" << endl;
 
-        if ( shouldRepairDatabases ){
+        if ( shouldRepairDatabases ) {
             log() << "finished checking dbs" << endl;
             cc().shutdown();
             dbexit( EXIT_CLEAN );
@@ -441,11 +392,11 @@ sendmore:
                 i != boost::filesystem::directory_iterator(); ++i ) {
             string fileName = boost::filesystem::path(*i).leaf();
             if ( boost::filesystem::is_directory( *i ) &&
-                fileName.length() && fileName[ 0 ] == '$' )
+                    fileName.length() && fileName[ 0 ] == '$' )
                 boost::filesystem::remove_all( *i );
         }
     }
-    
+
     void clearTmpCollections() {
         Client::GodScope gs;
         vector< string > toDelete;
@@ -460,35 +411,38 @@ sendmore:
             cli.dropCollection( *i );
         }
     }
-    
+
+    void flushDiagLog();
+
     /**
      * does background async flushes of mmapped files
      */
     class DataFileSync : public BackgroundJob {
     public:
-        string name() { return "DataFileSync"; }
-        void run(){
-            if( _sleepsecs == 0 )
+        string name() const { return "DataFileSync"; }
+        void run() {
+            if( cmdLine.syncdelay == 0 )
                 log() << "warning: --syncdelay 0 is not recommended and can have strange performance" << endl;
-            else if( _sleepsecs == 1 ) 
+            else if( cmdLine.syncdelay == 1 )
                 log() << "--syncdelay 1" << endl;
-            else if( _sleepsecs != 60 )
-                log(1) << "--syncdelay " << _sleepsecs << endl;
+            else if( cmdLine.syncdelay != 60 )
+                log(1) << "--syncdelay " << cmdLine.syncdelay << endl;
             int time_flushing = 0;
-            while ( ! inShutdown() ){
-                if ( _sleepsecs == 0 ){
+            while ( ! inShutdown() ) {
+                flushDiagLog();
+                if ( cmdLine.syncdelay == 0 ) {
                     // in case at some point we add an option to change at runtime
                     sleepsecs(5);
                     continue;
                 }
 
-                sleepmillis( (long long) std::max(0.0, (_sleepsecs * 1000) - time_flushing) );
-                
-                if ( inShutdown() ){
+                sleepmillis( (long long) std::max(0.0, (cmdLine.syncdelay * 1000) - time_flushing) );
+
+                if ( inShutdown() ) {
                     // occasional issue trying to flush during shutdown when sleep interrupted
                     break;
                 }
-                
+
                 Date_t start = jsTime();
                 int numFiles = MemoryMappedFile::flushAll( true );
                 time_flushing = (int) (jsTime() - start);
@@ -498,12 +452,22 @@ sendmore:
                 log(1) << "flushing mmap took " << time_flushing << "ms " << " for " << numFiles << " files" << endl;
             }
         }
-        
-        double _sleepsecs; // default value controlled by program options
+
     } dataFileSync;
 
+    const char * jsInterruptCallback() {
+        // should be safe to interrupt in js code, even if we have a write lock
+        return killCurrentOp.checkForInterruptNoAssert( false );
+    }
+
+    unsigned jsGetInterruptSpecCallback() {
+        return cc().curop()->opNum();
+    }
+
     void _initAndListen(int listenPort, const char *appserverLoc = NULL) {
 
+        Client::initThread("initandlisten");
+
         bool is32bit = sizeof(int*) == 4;
 
         {
@@ -534,38 +498,37 @@ sendmore:
             ss << "repairpath (" << repairpath << ") does not exist";
             uassert( 12590 ,  ss.str().c_str(), boost::filesystem::exists( repairpath ) );
         }
-        
+
         acquirePathLock();
         remove_all( dbpath + "/_tmp/" );
 
-        theFileAllocator().start();
+        FileAllocator::get()->start();
 
         BOOST_CHECK_EXCEPTION( clearTmpFiles() );
 
-        Client::initThread("initandlisten");
         _diaglog.init();
 
+        dur::startup();
+
+        if( cmdLine.durOptions & CmdLine::DurRecoverOnly )
+            return;
+
+        // comes after getDur().startup() because this reads from the database
         clearTmpCollections();
 
         Module::initAll();
 
-#if 0
-        {
-            stringstream indexpath;
-            indexpath << dbpath << "/indexes.dat";
-            RecCache::tempStore.init(indexpath.str().c_str(), BucketSize);
-        }
-#endif
-
-        if ( useJNI ) {
+        if ( scriptingEnabled ) {
             ScriptEngine::setup();
+            globalScriptEngine->setCheckInterruptCallback( jsInterruptCallback );
+            globalScriptEngine->setGetInterruptSpecCallback( jsGetInterruptSpecCallback );
         }
 
-        repairDatabases();
+        repairDatabasesAndCheckVersion();
 
         /* we didn't want to pre-open all fiels for the repair check above. for regular
            operation we do for read/write lock concurrency reasons.
-        */        
+        */
         Database::_openAllFiles = true;
 
         if ( shouldRepairDatabases )
@@ -597,7 +560,7 @@ sendmore:
             log() << "exception in initAndListen std::exception: " << e.what() << ", terminating" << endl;
             dbexit( EXIT_UNCAUGHT );
         }
-        catch ( int& n ){
+        catch ( int& n ) {
             log() << "exception in initAndListen int: " << n << ", terminating" << endl;
             dbexit( EXIT_UNCAUGHT );
         }
@@ -607,13 +570,13 @@ sendmore:
         }
     }
 
-    #if defined(_WIN32)
+#if defined(_WIN32)
     bool initService() {
         ServiceController::reportStatus( SERVICE_RUNNING );
         initAndListen( cmdLine.port, appsrvPath );
         return true;
     }
-    #endif
+#endif
 
 } // namespace mongo
 
@@ -647,16 +610,17 @@ string arg_error_check(int argc, char* argv[]) {
     return "";
 }
 
-int main(int argc, char* argv[], char *envp[] )
-{
+int main(int argc, char* argv[]) {
     static StaticObserver staticObserver;
     getcurns = ourgetns;
 
     po::options_description general_options("General options");
-	#if defined(_WIN32)
-	po::options_description windows_scm_options("Windows Service Control Manager options");
-	#endif
+#if defined(_WIN32)
+    po::options_description windows_scm_options("Windows Service Control Manager options");
+#endif
     po::options_description replication_options("Replication options");
+    po::options_description ms_options("Master/slave options");
+    po::options_description rs_options("Replica set options");
     po::options_description sharding_options("Sharding options");
     po::options_description visible_options("Allowed options");
     po::options_description hidden_options("Hidden options");
@@ -666,94 +630,106 @@ int main(int argc, char* argv[], char *envp[] )
     CmdLine::addGlobalOptions( general_options , hidden_options );
 
     general_options.add_options()
-        ("dbpath", po::value<string>() , "directory for datafiles")
-        ("directoryperdb", "each database will be stored in a separate directory")
-        ("repairpath", po::value<string>() , "root directory for repair files - defaults to dbpath" )
-        ("cpu", "periodically show cpu and iowait utilization")
-        ("noauth", "run without security")
-        ("auth", "run with security")
-        ("objcheck", "inspect client data for validity on receipt")
-        ("quota", "enable db quota management")
-        ("quotaFiles", po::value<int>(), "number of files allower per db, requires --quota")
-        ("appsrvpath", po::value<string>(), "root directory for the babble app server")
-        ("nocursors", "diagnostic/debugging option")
-        ("nohints", "ignore query hints")
-        ("nohttpinterface", "disable http interface")
-        ("rest","turn on simple rest api")
-        ("noscripting", "disable scripting engine")
-        ("noprealloc", "disable data file preallocation")
-        ("smallfiles", "use a smaller default file size")
-        ("nssize", po::value<int>()->default_value(16), ".ns file size (in MB) for new databases")
-        ("diaglog", po::value<int>(), "0=off 1=W 2=R 3=both 7=W+some reads")
-        ("sysinfo", "print some diagnostic system information")
-        ("upgrade", "upgrade db if needed")
-        ("repair", "run repair on all dbs")
-        ("notablescan", "do not allow table scans")
-        ("syncdelay",po::value<double>(&dataFileSync._sleepsecs)->default_value(60), "seconds between disk syncs (0=never, but not recommended)")
-        ("profile",po::value<int>(), "0=off 1=slow, 2=all")
-        ("slowms",po::value<int>(&cmdLine.slowMS)->default_value(100), "value of slow for profile and console log" )
-        ("maxConns",po::value<int>(), "max number of simultaneous connections")
-		#if !defined(_WIN32)
-        ("nounixsocket", "disable listening on unix sockets")
-		#endif
-        ("ipv6", "enable IPv6 support (disabled by default)")
-        ;
-	#if defined(_WIN32)
-    windows_scm_options.add_options()
-        ("install", "install mongodb service")
-        ("remove", "remove mongodb service")
-        ("reinstall", "reinstall mongodb service (equivilant of mongod --remove followed by mongod --install)")
-        ("service", "start mongodb service")
-        ("serviceName", po::value<string>(), "windows service name")
-        ("serviceUser", po::value<string>(), "user name service executes as")
-        ("servicePassword", po::value<string>(), "password used to authenticate serviceUser")
-		;
-	#endif
-
-	replication_options.add_options()
-        ("master", "master mode")
-        ("slave", "slave mode")
-        ("source", po::value<string>(), "when slave: specify master as <server:port>")
-        ("only", po::value<string>(), "when slave: specify a single database to replicate")
-        ("pairwith", po::value<string>(), "address of server to pair with")
-        ("arbiter", po::value<string>(), "address of arbiter server")
-        ("slavedelay", po::value<int>(), "specify delay (in seconds) to be used when applying master ops to slave")
-        ("fastsync", "indicate that this instance is starting from a dbpath snapshot of the repl peer")
-        ("autoresync", "automatically resync if slave data is stale")
-        ("oplogSize", po::value<int>(), "size limit (in MB) for op log")
-        ("opIdMem", po::value<long>(), "size limit (in bytes) for in memory storage of op ids")
-        ;
-
-	sharding_options.add_options()
-		("configsvr", "declare this is a config db of a cluster")
-		("shardsvr", "declare this is a shard db of a cluster")
-        ("noMoveParanoia" , "turn off paranoid saving of data for moveChunk.  this is on by default for now, but default will switch" )
-		;
+    ("auth", "run with security")
+    ("cpu", "periodically show cpu and iowait utilization")
+    ("dbpath", po::value<string>() , "directory for datafiles")
+    ("diaglog", po::value<int>(), "0=off 1=W 2=R 3=both 7=W+some reads")
+    ("directoryperdb", "each database will be stored in a separate directory")
+    ("journal", "enable journaling")
+    ("journalOptions", po::value<int>(), "journal diagnostic options")
+    ("ipv6", "enable IPv6 support (disabled by default)")
+    ("jsonp","allow JSONP access via http (has security implications)")
+    ("maxConns",po::value<int>(), "max number of simultaneous connections")
+    ("noauth", "run without security")
+    ("nohttpinterface", "disable http interface")
+    ("noprealloc", "disable data file preallocation - will often hurt performance")
+    ("noscripting", "disable scripting engine")
+    ("notablescan", "do not allow table scans")
+#if !defined(_WIN32)
+    ("nounixsocket", "disable listening on unix sockets")
+#endif
+    ("nssize", po::value<int>()->default_value(16), ".ns file size (in MB) for new databases")
+    ("objcheck", "inspect client data for validity on receipt")
+    ("profile",po::value<int>(), "0=off 1=slow, 2=all")
+    ("quota", "limits each database to a certain number of files (8 default)")
+    ("quotaFiles", po::value<int>(), "number of files allower per db, requires --quota")
+    ("rest","turn on simple rest api")
+    ("repair", "run repair on all dbs")
+    ("repairpath", po::value<string>() , "root directory for repair files - defaults to dbpath" )
+    ("slowms",po::value<int>(&cmdLine.slowMS)->default_value(100), "value of slow for profile and console log" )
+    ("smallfiles", "use a smaller default file size")
+    ("syncdelay",po::value<double>(&cmdLine.syncdelay)->default_value(60), "seconds between disk syncs (0=never, but not recommended)")
+    ("sysinfo", "print some diagnostic system information")
+    ("upgrade", "upgrade db if needed")
+    ;
+
+#if defined(_WIN32)
+    CmdLine::addWindowsOptions( windows_scm_options, hidden_options );
+#endif
+
+    replication_options.add_options()
+    ("fastsync", "indicate that this instance is starting from a dbpath snapshot of the repl peer")
+    ("autoresync", "automatically resync if slave data is stale")
+    ("oplogSize", po::value<int>(), "size limit (in MB) for op log")
+    ;
+
+    ms_options.add_options()
+    ("master", "master mode")
+    ("slave", "slave mode")
+    ("source", po::value<string>(), "when slave: specify master as <server:port>")
+    ("only", po::value<string>(), "when slave: specify a single database to replicate")
+    ("slavedelay", po::value<int>(), "specify delay (in seconds) to be used when applying master ops to slave")
+    ;
+
+    rs_options.add_options()
+    ("replSet", po::value<string>(), "arg is <setname>[/<optionalseedhostlist>]")
+    ;
+
+    sharding_options.add_options()
+    ("configsvr", "declare this is a config db of a cluster; default port 27019; default dir /data/configdb")
+    ("shardsvr", "declare this is a shard db of a cluster; default port 27018")
+    ("noMoveParanoia" , "turn off paranoid saving of data for moveChunk.  this is on by default for now, but default will switch" )
+    ;
 
     hidden_options.add_options()
-        ("pretouch", po::value<int>(), "n pretouch threads for applying replicationed operations")
-        ("replSet", po::value<string>(), "specify repl set seed hostnames format <set id>/<host1>,<host2>,etc...")
-        ("command", po::value< vector<string> >(), "command")
-        ("cacheSize", po::value<long>(), "cache size (in MB) for rec store")
-        ;
+    ("pretouch", po::value<int>(), "n pretouch threads for applying replicationed operations")
+    ("command", po::value< vector<string> >(), "command")
+    ("cacheSize", po::value<long>(), "cache size (in MB) for rec store")
+    // these move to unhidden later:
+    ("opIdMem", po::value<long>(), "size limit (in bytes) for in memory storage of op ids for replica pairs DEPRECATED")
+    ("pairwith", po::value<string>(), "address of server to pair with DEPRECATED")
+    ("arbiter", po::value<string>(), "address of replica pair arbiter server DEPRECATED")
+    ("nodur", "disable journaling (currently the default)")
+    ("appsrvpath", po::value<string>(), "root directory for the babble app server")
+    ("nocursors", "diagnostic/debugging option that turns off cursors DO NOT USE IN PRODUCTION")
+    ("nohints", "ignore query hints")
+    ("dur", "enable journaling") // deprecated version
+    ("durOptions", po::value<int>(), "durability diagnostic options") // deprecated version
+    ;
 
 
     positional_options.add("command", 3);
     visible_options.add(general_options);
-	#if defined(_WIN32)
-	visible_options.add(windows_scm_options);
-	#endif
+#if defined(_WIN32)
+    visible_options.add(windows_scm_options);
+#endif
     visible_options.add(replication_options);
+    visible_options.add(ms_options);
+    visible_options.add(rs_options);
     visible_options.add(sharding_options);
     Module::addOptions( visible_options );
 
     setupCoreSignals();
-    setupSignals();
+    setupSignals( false );
 
     dbExecCommand = argv[0];
 
     srand(curTimeMicros());
+#if( BOOST_VERSION >= 104500 )
+    boost::filesystem::path::default_name_check( boost::filesystem2::no_check );
+#else
     boost::filesystem::path::default_name_check( boost::filesystem::no_check );
+#endif
 
     {
         unsigned x = 0x12345678;
@@ -764,18 +740,12 @@ int main(int argc, char* argv[], char *envp[] )
         }
     }
 
-    UnitTest::runTests();
-
     if( argc == 1 )
         cout << dbExecCommand << " --help for help and startup options" << endl;
 
     {
-        bool installService = false;
-        bool removeService = false;
-        bool reinstallService = false;
-        bool startService = false;
         po::variables_map params;
-        
+
         string error_message = arg_error_check(argc, argv);
         if (error_message != "") {
             cout << error_message << endl << endl;
@@ -795,10 +765,19 @@ int main(int argc, char* argv[], char *envp[] )
             printGitVersion();
             return 0;
         }
-        if ( params.count( "dbpath" ) )
+        if ( params.count( "dbpath" ) ) {
             dbpath = params["dbpath"].as<string>();
-        else
+            if ( params.count( "fork" ) && dbpath[0] != '/' ) {
+                // we need to change dbpath if we fork since we change
+                // cwd to "/"
+                // fork only exists on *nix
+                // so '/' is safe 
+                dbpath = cmdLine.cwd + "/" + dbpath;
+            }
+        }
+        else {
             dbpath = "/data/db/";
+        }
 
         if ( params.count("directoryperdb")) {
             directoryperdb = true;
@@ -819,6 +798,18 @@ int main(int argc, char* argv[], char *envp[] )
             cmdLine.quota = true;
             cmdLine.quotaFiles = params["quotaFiles"].as<int>() - 1;
         }
+        if( params.count("nodur") ) {
+            cmdLine.dur = false;
+        }
+        if( params.count("dur") || params.count( "journal" ) ) {
+            cmdLine.dur = true;
+        }
+        if (params.count("durOptions")) {
+            cmdLine.durOptions = params["durOptions"].as<int>();
+        }
+        if (params.count("journalOptions")) {
+            cmdLine.durOptions = params["durOptions"].as<int>();
+        }
         if (params.count("objcheck")) {
             objcheck = true;
         }
@@ -828,8 +819,12 @@ int main(int argc, char* argv[], char *envp[] )
         }
         if (params.count("repairpath")) {
             repairpath = params["repairpath"].as<string>();
-            uassert( 12589, "repairpath has to be non-zero", repairpath.size() );
-        } else {
+            if (!repairpath.size()) {
+                out() << "repairpath has to be non-zero" << endl;
+                dbexit( EXIT_BADOPTIONS );
+            }
+        }
+        else {
             repairpath = dbpath;
         }
         if (params.count("nocursors")) {
@@ -844,11 +839,15 @@ int main(int argc, char* argv[], char *envp[] )
         if (params.count("rest")) {
             cmdLine.rest = true;
         }
+        if (params.count("jsonp")) {
+            cmdLine.jsonp = true;
+        }
         if (params.count("noscripting")) {
-            useJNI = false;
+            scriptingEnabled = false;
         }
         if (params.count("noprealloc")) {
             cmdLine.prealloc = false;
+            cout << "note: noprealloc may hurt performance in many applications" << endl;
         }
         if (params.count("smallfiles")) {
             cmdLine.smallfiles = true;
@@ -873,29 +872,7 @@ int main(int argc, char* argv[], char *envp[] )
             shouldRepairDatabases = 1;
         }
         if (params.count("notablescan")) {
-            cmdLine.notablescan = true;
-        }
-        if (params.count("install")) {
-            if ( ! params.count( "logpath" ) ){
-                cout << "--install has to be used with --logpath" << endl;
-                ::exit(-1);
-            }
-
-            installService = true;
-        }
-        if (params.count("remove")) {
-            removeService = true;
-        }
-        if (params.count("reinstall")) {
-            if ( ! params.count( "logpath" ) ){
-                cout << "--reinstall has to be used with --logpath" << endl;
-                ::exit(-1);
-            }
-
-            reinstallService = true;
-        }
-        if (params.count("service")) {
-            startService = true;
+            cmdLine.noTableScan = true;
         }
         if (params.count("master")) {
             replSettings.master = true;
@@ -916,16 +893,17 @@ int main(int argc, char* argv[], char *envp[] )
             /* specifies what the source in local.sources should be */
             cmdLine.source = params["source"].as<string>().c_str();
         }
-        if( params.count("pretouch") ) { 
+        if( params.count("pretouch") ) {
             cmdLine.pretouch = params["pretouch"].as<int>();
         }
         if (params.count("replSet")) {
             if (params.count("slavedelay")) {
-                cout << "--slavedelay cannot be used with --replSet" << endl;
-                ::exit(-1);
-            } else if (params.count("only")) {
-                cout << "--only cannot be used with --replSet" << endl;
-                ::exit(-1);
+                out() << "--slavedelay cannot be used with --replSet" << endl;
+                dbexit( EXIT_BADOPTIONS );
+            }
+            else if (params.count("only")) {
+                out() << "--only cannot be used with --replSet" << endl;
+                dbexit( EXIT_BADOPTIONS );
             }
             /* seed list of hosts for the repl set */
             cmdLine._replSet = params["replSet"].as<string>().c_str();
@@ -937,103 +915,108 @@ int main(int argc, char* argv[], char *envp[] )
             cout << "***********************************\n"
                  << "WARNING WARNING WARNING\n"
                  << " replica pairs are deprecated\n"
-                 << " see: http://www.mongodb.org/display/DOCS/Replica+Pairs \n" 
+                 << " see: http://www.mongodb.org/display/DOCS/Replica+Pairs \n"
                  << "***********************************" << endl;
             string paired = params["pairwith"].as<string>();
             if (params.count("arbiter")) {
                 string arbiter = params["arbiter"].as<string>();
                 pairWith(paired.c_str(), arbiter.c_str());
-            } else {
+            }
+            else {
                 pairWith(paired.c_str(), "-");
             }
-        } else if (params.count("arbiter")) {
-            uasserted(10999,"specifying --arbiter without --pairwith");
+        }
+        else if (params.count("arbiter")) {
+            out() << "specifying --arbiter without --pairwith" << endl;
+            dbexit( EXIT_BADOPTIONS );
         }
         if( params.count("nssize") ) {
             int x = params["nssize"].as<int>();
-            uassert( 10034 , "bad --nssize arg", x > 0 && x <= (0x7fffffff/1024/1024));
+            if (x <= 0 || x > (0x7fffffff/1024/1024)) {
+                out() << "bad --nssize arg" << endl;
+                dbexit( EXIT_BADOPTIONS );
+            }
             lenForNewNsFiles = x * 1024 * 1024;
             assert(lenForNewNsFiles > 0);
         }
         if (params.count("oplogSize")) {
-            long x = params["oplogSize"].as<int>();
-            uassert( 10035 , "bad --oplogSize arg", x > 0);
+            long long x = params["oplogSize"].as<int>();
+            if (x <= 0) {
+                out() << "bad --oplogSize arg" << endl;
+                dbexit( EXIT_BADOPTIONS );
+            }
+            // note a small size such as x==1 is ok for an arbiter.
+            if( x > 1000 && sizeof(void*) == 4 ) {
+                out() << "--oplogSize of " << x << "MB is too big for 32 bit version. Use 64 bit build instead." << endl;
+                dbexit( EXIT_BADOPTIONS );
+            }
             cmdLine.oplogSize = x * 1024 * 1024;
             assert(cmdLine.oplogSize > 0);
         }
         if (params.count("opIdMem")) {
             long x = params["opIdMem"].as<long>();
-            uassert( 10036 , "bad --opIdMem arg", x > 0);
+            if (x <= 0) {
+                out() << "bad --opIdMem arg" << endl;
+                dbexit( EXIT_BADOPTIONS );
+            }
             replSettings.opIdMem = x;
             assert(replSettings.opIdMem > 0);
         }
         if (params.count("cacheSize")) {
             long x = params["cacheSize"].as<long>();
-            uassert( 10037 , "bad --cacheSize arg", x > 0);
+            if (x <= 0) {
+                out() << "bad --cacheSize arg" << endl;
+                dbexit( EXIT_BADOPTIONS );
+            }
             log() << "--cacheSize option not currently supported" << endl;
-            //setRecCacheSize(x);
-        }
-		if (params.count("port") == 0 ) { 
-			if( params.count("configsvr") ) {
-				cmdLine.port = CmdLine::ConfigServerPort;
-			}
-			if( params.count("shardsvr") )
-				cmdLine.port = CmdLine::ShardServerPort;
-		}
-        else { 
-            if ( cmdLine.port <= 0 || cmdLine.port > 65535 ){
+        }
+        if (params.count("port") == 0 ) {
+            if( params.count("configsvr") ) {
+                cmdLine.port = CmdLine::ConfigServerPort;
+            }
+            if( params.count("shardsvr") )
+                cmdLine.port = CmdLine::ShardServerPort;
+        }
+        else {
+            if ( cmdLine.port <= 0 || cmdLine.port > 65535 ) {
                 out() << "bad --port number" << endl;
                 dbexit( EXIT_BADOPTIONS );
             }
         }
-        if ( params.count("configsvr" ) ){
+        if ( params.count("configsvr" ) ) {
+            if (cmdLine.usingReplSets() || replSettings.master || replSettings.slave) {
+                log() << "replication should not be enabled on a config server" << endl;
+                ::exit(-1);
+            }
             if ( params.count( "diaglog" ) == 0 )
                 _diaglog.level = 1;
             if ( params.count( "dbpath" ) == 0 )
                 dbpath = "/data/configdb";
         }
-        if ( params.count( "profile" ) ){
+        if ( params.count( "profile" ) ) {
             cmdLine.defaultProfile = params["profile"].as<int>();
         }
-        if ( params.count( "maxConns" ) ){
+        if ( params.count( "maxConns" ) ) {
             int newSize = params["maxConns"].as<int>();
-            uassert( 12507 , "maxConns has to be at least 5" , newSize >= 5 );
-            uassert( 12508 , "maxConns can't be greater than 10000000" , newSize < 10000000 );
+            if ( newSize < 5 ) {
+                out() << "maxConns has to be at least 5" << endl;
+                dbexit( EXIT_BADOPTIONS );
+            }
+            else if ( newSize >= 10000000 ) {
+                out() << "maxConns can't be greater than 10000000" << endl;
+                dbexit( EXIT_BADOPTIONS );
+            }
             connTicketHolder.resize( newSize );
         }
-        if (params.count("nounixsocket")){
+        if (params.count("nounixsocket")) {
             noUnixSocket = true;
         }
-        if (params.count("ipv6")){
+        if (params.count("ipv6")) {
             enableIPv6();
         }
-        if (params.count("noMoveParanoia")){
+        if (params.count("noMoveParanoia")) {
             cmdLine.moveParanoia = false;
         }
-#if defined(_WIN32)
-        if (params.count("serviceName")){
-            string x = params["serviceName"].as<string>();
-            windowsServiceName = wstring(x.size(),L' ');
-            for ( size_t i=0; i<x.size(); i++) {
-                windowsServiceName[i] = x[i];
-	    }
-        }
-        if (params.count("serviceUser")){
-            string x = params["serviceUser"].as<string>();
-            windowsServiceUser = wstring(x.size(),L' ');
-            for ( size_t i=0; i<x.size(); i++) {
-                windowsServiceUser[i] = x[i];
-	    }
-        }
-        if (params.count("servicePassword")){
-            string x = params["servicePassword"].as<string>();
-            windowsServicePassword = wstring(x.size(),L' ');
-            for ( size_t i=0; i<x.size(); i++) {
-                windowsServicePassword[i] = x[i];
-	    }
-        }
-        #endif
-
 
         Module::configAll( params );
         dataFileSync.go();
@@ -1041,20 +1024,6 @@ int main(int argc, char* argv[], char *envp[] )
         if (params.count("command")) {
             vector<string> command = params["command"].as< vector<string> >();
 
-            if (command[0].compare("msg") == 0) {
-                const char *m;
-
-                if (command.size() < 3) {
-                    cout << "Too few parameters to 'msg' command" << endl;
-                    cout << visible_options << endl;
-                    return 0;
-                }
-
-                m = command[1].c_str();
-
-                msg(m, "127.0.0.1", atoi(command[2].c_str()));
-                return 0;
-            }
             if (command[0].compare("run") == 0) {
                 if (command.size() > 1) {
                     cout << "Too many parameters to 'run' command" << endl;
@@ -1076,31 +1045,17 @@ int main(int argc, char* argv[], char *envp[] )
             return 0;
         }
 
+        if( cmdLine.pretouch )
+            log() << "--pretouch " << cmdLine.pretouch << endl;
+
 #if defined(_WIN32)
-        if ( reinstallService ) {
-            ServiceController::removeService( windowsServiceName );
-	}
-	if ( installService || reinstallService ) {
-            if ( !ServiceController::installService( windowsServiceName , L"Mongo DB", L"Mongo DB Server", windowsServiceUser, windowsServicePassword, dbpath, argc, argv ) )
-                dbexit( EXIT_NTSERVICE_ERROR );
-            dbexit( EXIT_CLEAN );
-        }
-        else if ( removeService ) {
-            if ( !ServiceController::removeService( windowsServiceName ) )
-                dbexit( EXIT_NTSERVICE_ERROR );
-            dbexit( EXIT_CLEAN );
-        }
-        else if ( startService ) {
-            if ( !ServiceController::startService( windowsServiceName , mongo::initService ) )
-                dbexit( EXIT_NTSERVICE_ERROR );
-            dbexit( EXIT_CLEAN );
+        if (serviceParamsCheck( params, dbpath, argc, argv )) {
+            return 0;
         }
 #endif
     }
 
-    if( cmdLine.pretouch )
-        log() << "--pretouch " << cmdLine.pretouch << endl;
-
+    UnitTest::runTests();
     initAndListen(cmdLine.port, appsrvPath);
     dbexit(EXIT_CLEAN);
     return 0;
@@ -1113,12 +1068,11 @@ namespace mongo {
 #undef out
 
     void exitCleanly( ExitCode code ) {
-        goingAway = true;
         killCurrentOp.killAll();
         {
             dblock lk;
             log() << "now exiting" << endl;
-            dbexit( code );        
+            dbexit( code );
         }
     }
 
@@ -1154,7 +1108,12 @@ namespace mongo {
         oss << "Backtrace:" << endl;
         printStackTrace( oss );
         rawOut( oss.str() );
-        dbexit( EXIT_ABRUBT );
+
+        if( cmdLine.dur ) { 
+            ::exit(EXIT_ABRUPT);
+        }
+
+        dbexit( EXIT_ABRUPT );
     }
 
     sigset_t asyncSignals;
@@ -1171,12 +1130,14 @@ namespace mongo {
     // this will be called in certain c++ error cases, for example if there are two active
     // exceptions
     void myterminate() {
-        rawOut( "terminate() called, printing stack:\n" );
+        rawOut( "terminate() called, printing stack:" );
         printStackTrace();
         abort();
     }
-    
-    void setupSignals() {
+
+    void setupSignals_ignoreHelper( int signal ) {}
+
+    void setupSignals( bool inFork ) {
         assert( signal(SIGSEGV, abruptQuit) != SIG_ERR );
         assert( signal(SIGFPE, abruptQuit) != SIG_ERR );
         assert( signal(SIGABRT, abruptQuit) != SIG_ERR );
@@ -1187,55 +1148,58 @@ namespace mongo {
         setupSIGTRAPforGDB();
 
         sigemptyset( &asyncSignals );
-        sigaddset( &asyncSignals, SIGHUP );
+
+        if ( inFork )
+            assert( signal( SIGHUP , setupSignals_ignoreHelper ) != SIG_ERR );
+        else
+            sigaddset( &asyncSignals, SIGHUP );
+
         sigaddset( &asyncSignals, SIGINT );
         sigaddset( &asyncSignals, SIGTERM );
         assert( pthread_sigmask( SIG_SETMASK, &asyncSignals, 0 ) == 0 );
         boost::thread it( interruptThread );
-        
+
         set_terminate( myterminate );
     }
 
 #else
-void ctrlCTerminate() {
-    log() << "got kill or ctrl-c signal, will terminate after current cmd ends" << endl;
-    Client::initThread( "ctrlCTerminate" );
-    exitCleanly( EXIT_KILL );
-}
-BOOL CtrlHandler( DWORD fdwCtrlType )
-{
-    switch( fdwCtrlType )
-    {
-    case CTRL_C_EVENT:
-        rawOut("Ctrl-C signal\n");
-        ctrlCTerminate();
-        return( TRUE );
-    case CTRL_CLOSE_EVENT:
-        rawOut("CTRL_CLOSE_EVENT signal\n");
-        ctrlCTerminate();
-        return( TRUE );
-    case CTRL_BREAK_EVENT:
-        rawOut("CTRL_BREAK_EVENT signal\n");
-        ctrlCTerminate();
-        return TRUE;
-    case CTRL_LOGOFF_EVENT:
-        rawOut("CTRL_LOGOFF_EVENT signal (ignored)\n");
-        return FALSE;
-    case CTRL_SHUTDOWN_EVENT:
-         rawOut("CTRL_SHUTDOWN_EVENT signal (ignored)\n");
-         return FALSE;
-    default:
-        return FALSE;
+    void ctrlCTerminate() {
+        log() << "got kill or ctrl-c signal, will terminate after current cmd ends" << endl;
+        Client::initThread( "ctrlCTerminate" );
+        exitCleanly( EXIT_KILL );
+    }
+    BOOL CtrlHandler( DWORD fdwCtrlType ) {
+        switch( fdwCtrlType ) {
+        case CTRL_C_EVENT:
+            rawOut("Ctrl-C signal");
+            ctrlCTerminate();
+            return( TRUE );
+        case CTRL_CLOSE_EVENT:
+            rawOut("CTRL_CLOSE_EVENT signal");
+            ctrlCTerminate();
+            return( TRUE );
+        case CTRL_BREAK_EVENT:
+            rawOut("CTRL_BREAK_EVENT signal");
+            ctrlCTerminate();
+            return TRUE;
+        case CTRL_LOGOFF_EVENT:
+            rawOut("CTRL_LOGOFF_EVENT signal (ignored)");
+            return FALSE;
+        case CTRL_SHUTDOWN_EVENT:
+            rawOut("CTRL_SHUTDOWN_EVENT signal (ignored)");
+            return FALSE;
+        default:
+            return FALSE;
+        }
     }
-}
 
     void myPurecallHandler() {
-        rawOut( "pure virtual method called, printing stack:\n" );
+        rawOut( "pure virtual method called, printing stack:" );
         printStackTrace();
-        abort();        
+        abort();
     }
-    
-    void setupSignals() {
+
+    void setupSignals( bool inFork ) {
         if( SetConsoleCtrlHandler( (PHANDLER_ROUTINE) CtrlHandler, TRUE ) )
             ;
         else
@@ -1245,6 +1209,3 @@ BOOL CtrlHandler( DWORD fdwCtrlType )
 #endif
 
 } // namespace mongo
-
-//#include "recstore.h"
-//#include "reccache.h"
diff --git a/db/db.h b/db/db.h
index a261f58..7ef7d03 100644
--- a/db/db.h
+++ b/db/db.h
@@ -26,19 +26,6 @@ namespace mongo {
 
 //    void jniCallback(Message& m, Message& out);
 
-    /* Note the limit here is rather arbitrary and is simply a standard. generally the code works
-       with any object that fits in ram.
-
-       Also note that the server has some basic checks to enforce this limit but those checks are not exhaustive
-       for example need to check for size too big after
-         update $push (append) operation
-         various db.eval() type operations
-
-       Note also we sometimes do work with objects slightly larger - an object in the replication local.oplog
-       could be slightly larger.
-    */
-    const int MaxBSONObjectSize = 4 * 1024 * 1024;
-    
     /**
      * class to hold path + dbname -> Database
      * might be able to optimizer further
@@ -48,8 +35,7 @@ namespace mongo {
         typedef map<string,Database*> DBs;
         typedef map<string,DBs> Paths;
 
-        DatabaseHolder() : _size(0){
-        }
+        DatabaseHolder() : _size(0) { }
 
         bool isLoaded( const string& ns , const string& path ) const {
             dbMutex.assertAtLeastReadLocked();
@@ -57,29 +43,29 @@ namespace mongo {
             if ( x == _paths.end() )
                 return false;
             const DBs& m = x->second;
-            
+
             string db = _todb( ns );
 
             DBs::const_iterator it = m.find(db);
             return it != m.end();
         }
-        
+
         Database * get( const string& ns , const string& path ) const {
             dbMutex.assertAtLeastReadLocked();
             Paths::const_iterator x = _paths.find( path );
             if ( x == _paths.end() )
                 return 0;
             const DBs& m = x->second;
-            
+
             string db = _todb( ns );
 
             DBs::const_iterator it = m.find(db);
-            if ( it != m.end() ) 
+            if ( it != m.end() )
                 return it->second;
             return 0;
         }
-        
-        void put( const string& ns , const string& path , Database * db ){
+
+        void put( const string& ns , const string& path , Database * db ) {
             dbMutex.assertWriteLocked();
             DBs& m = _paths[path];
             Database*& d = m[_todb(ns)];
@@ -87,35 +73,10 @@ namespace mongo {
                 _size++;
             d = db;
         }
-        
-        Database* getOrCreate( const string& ns , const string& path , bool& justCreated ){
-            dbMutex.assertWriteLocked();
-            DBs& m = _paths[path];
-            
-            string dbname = _todb( ns );
-
-            Database* & db = m[dbname];
-            if ( db ){
-                justCreated = false;
-                return db;
-            }
-            
-            log(1) << "Accessing: " << dbname << " for the first time" << endl;
-            try {
-                db = new Database( dbname.c_str() , justCreated , path );
-            }
-            catch ( ... ){
-                m.erase( dbname );
-                throw;
-            }
-            _size++;
-            return db;
-        }
-        
 
+        Database* getOrCreate( const string& ns , const string& path , bool& justCreated );
 
-
-        void erase( const string& ns , const string& path ){
+        void erase( const string& ns , const string& path ) {
             dbMutex.assertWriteLocked();
             DBs& m = _paths[path];
             _size -= (int)m.erase( _todb( ns ) );
@@ -124,71 +85,77 @@ namespace mongo {
         /* force - force close even if something underway - use at shutdown */
         bool closeAll( const string& path , BSONObjBuilder& result, bool force );
 
-        int size(){
+        int size() {
             return _size;
         }
-        
+
+        void forEach(boost::function<void(Database *)> f) const {
+            dbMutex.assertAtLeastReadLocked();
+            for ( Paths::const_iterator i=_paths.begin(); i!=_paths.end(); i++ ) {
+                DBs m = i->second;
+                for( DBs::const_iterator j=m.begin(); j!=m.end(); j++ ) {
+                    f(j->second);
+                }
+            }
+        }
+
         /**
          * gets all unique db names, ignoring paths
          */
         void getAllShortNames( set<string>& all ) const {
             dbMutex.assertAtLeastReadLocked();
-            for ( Paths::const_iterator i=_paths.begin(); i!=_paths.end(); i++ ){
+            for ( Paths::const_iterator i=_paths.begin(); i!=_paths.end(); i++ ) {
                 DBs m = i->second;
-                for( DBs::const_iterator j=m.begin(); j!=m.end(); j++ ){
+                for( DBs::const_iterator j=m.begin(); j!=m.end(); j++ ) {
                     all.insert( j->first );
                 }
             }
         }
 
     private:
-        
+
         string _todb( const string& ns ) const {
             string d = __todb( ns );
-            uassert( 13280 , (string)"invalid db name: " + ns , Database::validDBName( d ) );            
+            uassert( 13280 , (string)"invalid db name: " + ns , Database::validDBName( d ) );
             return d;
         }
 
         string __todb( const string& ns ) const {
             size_t i = ns.find( '.' );
-            if ( i == string::npos ){
+            if ( i == string::npos ) {
                 uassert( 13074 , "db name can't be empty" , ns.size() );
                 return ns;
             }
             uassert( 13075 , "db name can't be empty" , i > 0 );
             return ns.substr( 0 , i );
         }
-        
+
         Paths _paths;
         int _size;
-        
+
     };
 
     extern DatabaseHolder dbHolder;
 
-    // shared functionality for removing references to a database from this program instance
-    // does not delete the files on disk
-    void closeDatabase( const char *cl, const string& path = dbpath );
-    
     struct dbtemprelease {
         Client::Context * _context;
         int _locktype;
-        
+
         dbtemprelease() {
             _context = cc().getContext();
             _locktype = dbMutex.getState();
             assert( _locktype );
-            
+
             if ( _locktype > 0 ) {
-				massert( 10298 , "can't temprelease nested write lock", _locktype == 1);
+                massert( 10298 , "can't temprelease nested write lock", _locktype == 1);
                 if ( _context ) _context->unlocked();
                 dbMutex.unlock();
-			}
+            }
             else {
-				massert( 10299 , "can't temprelease nested read lock", _locktype == -1);
+                massert( 10299 , "can't temprelease nested read lock", _locktype == -1);
                 if ( _context ) _context->unlocked();
                 dbMutex.unlock_shared();
-			}
+            }
 
         }
         ~dbtemprelease() {
@@ -196,11 +163,11 @@ namespace mongo {
                 dbMutex.lock();
             else
                 dbMutex.lock_shared();
-            
+
             if ( _context ) _context->relocked();
         }
     };
-    
+
 
     /**
        only does a temp release if we're not nested and have a lock
@@ -208,22 +175,22 @@ namespace mongo {
     struct dbtempreleasecond {
         dbtemprelease * real;
         int locktype;
-        
-        dbtempreleasecond(){
+
+        dbtempreleasecond() {
             real = 0;
             locktype = dbMutex.getState();
             if ( locktype == 1 || locktype == -1 )
                 real = new dbtemprelease();
         }
-        
-        ~dbtempreleasecond(){
-            if ( real ){
+
+        ~dbtempreleasecond() {
+            if ( real ) {
                 delete real;
                 real = 0;
             }
         }
-        
-        bool unlocked(){
+
+        bool unlocked() {
             return real > 0;
         }
     };
diff --git a/db/db.sln b/db/db.sln
deleted file mode 100644
index b02b79d..0000000
--- a/db/db.sln
+++ /dev/null
@@ -1,86 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 10.00
-# Visual Studio 2008
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongod", "db.vcproj", "{215B2D68-0A70-4D10-8E75-B31010C62A91}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "examples", "examples", "{4082881B-EB00-486F-906C-843B8EC06E18}"
-	ProjectSection(SolutionItems) = preProject
-		driverHelpers.cpp = driverHelpers.cpp
-	EndProjectSection
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{2B262D59-9DC7-4BF1-A431-1BD4966899A5}"
-	ProjectSection(SolutionItems) = preProject
-		..\tools\bridge.cpp = ..\tools\bridge.cpp
-		..\tools\dump.cpp = ..\tools\dump.cpp
-		..\tools\export.cpp = ..\tools\export.cpp
-		..\tools\files.cpp = ..\tools\files.cpp
-		..\tools\import.cpp = ..\tools\import.cpp
-		..\tools\restore.cpp = ..\tools\restore.cpp
-		..\tools\sniffer.cpp = ..\tools\sniffer.cpp
-		..\tools\stat.cpp = ..\tools\stat.cpp
-		..\tools\tool.cpp = ..\tools\tool.cpp
-		..\tools\tool.h = ..\tools\tool.h
-	EndProjectSection
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mongos", "..\s\dbgrid.vcproj", "{E03717ED-69B4-4D21-BC55-DF6690B585C6}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test", "..\dbtests\test.vcproj", "{215B2D68-0A70-4D10-8E75-B33010C62A91}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "unix files", "unix files", "{2F760952-C71B-4865-998F-AABAE96D1373}"
-	ProjectSection(SolutionItems) = preProject
-		..\util\mmap_posix.cpp = ..\util\mmap_posix.cpp
-		..\util\processinfo_darwin.cpp = ..\util\processinfo_darwin.cpp
-		..\util\processinfo_linux2.cpp = ..\util\processinfo_linux2.cpp
-		..\util\processinfo_none.cpp = ..\util\processinfo_none.cpp
-	EndProjectSection
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shell", "shell", "{407B4B88-3451-433C-B74F-31B31FEB5791}"
-	ProjectSection(SolutionItems) = preProject
-		..\shell\dbshell.cpp = ..\shell\dbshell.cpp
-		..\shell\mongo_vstudio.cpp = ..\shell\mongo_vstudio.cpp
-		..\shell\utils.cpp = ..\shell\utils.cpp
-		..\shell\utils.h = ..\shell\utils.h
-	EndProjectSection
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "other source files", "other source files", "{12B11474-2D74-48C3-BB3D-F03249BEA88F}"
-	ProjectSection(SolutionItems) = preProject
-		..\buildscripts\buildboost.bat = ..\buildscripts\buildboost.bat
-		..\buildscripts\buildboost64.bat = ..\buildscripts\buildboost64.bat
-		..\SConstruct = ..\SConstruct
-	EndProjectSection
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bsondemo", "..\bson\bsondemo\bsondemo.vcproj", "{C9DB5EB7-81AA-4185-BAA1-DA035654402F}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Win32 = Debug|Win32
-		Release|Win32 = Release|Win32
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|Win32.ActiveCfg = Debug|Win32
-		{215B2D68-0A70-4D10-8E75-B31010C62A91}.Debug|Win32.Build.0 = Debug|Win32
-		{215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|Win32.ActiveCfg = Release|Win32
-		{215B2D68-0A70-4D10-8E75-B31010C62A91}.Release|Win32.Build.0 = Release|Win32
-		{E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|Win32.ActiveCfg = Debug|Win32
-		{E03717ED-69B4-4D21-BC55-DF6690B585C6}.Debug|Win32.Build.0 = Debug|Win32
-		{E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|Win32.ActiveCfg = Release|Win32
-		{E03717ED-69B4-4D21-BC55-DF6690B585C6}.Release|Win32.Build.0 = Release|Win32
-		{215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.ActiveCfg = Debug|Win32
-		{215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.Build.0 = Debug|Win32
-		{215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.ActiveCfg = Release|Win32
-		{215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.Build.0 = Release|Win32
-		{C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|Win32.ActiveCfg = Debug|Win32
-		{C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Debug|Win32.Build.0 = Debug|Win32
-		{C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|Win32.ActiveCfg = Release|Win32
-		{C9DB5EB7-81AA-4185-BAA1-DA035654402F}.Release|Win32.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(NestedProjects) = preSolution
-		{2B262D59-9DC7-4BF1-A431-1BD4966899A5} = {12B11474-2D74-48C3-BB3D-F03249BEA88F}
-		{2F760952-C71B-4865-998F-AABAE96D1373} = {12B11474-2D74-48C3-BB3D-F03249BEA88F}
-		{407B4B88-3451-433C-B74F-31B31FEB5791} = {12B11474-2D74-48C3-BB3D-F03249BEA88F}
-		{4082881B-EB00-486F-906C-843B8EC06E18} = {12B11474-2D74-48C3-BB3D-F03249BEA88F}
-	EndGlobalSection
-EndGlobal
diff --git a/db/db.vcproj b/db/db.vcproj
deleted file mode 100644
index 2eac6eb..0000000
--- a/db/db.vcproj
+++ /dev/null
@@ -1,1885 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
-	ProjectType="Visual C++"
-	Version="9.00"
-	Name="mongod"
-	ProjectGUID="{215B2D68-0A70-4D10-8E75-B31010C62A91}"
-	RootNamespace="db"
-	Keyword="Win32Proj"
-	TargetFrameworkVersion="196613"
-	>
-	<Platforms>
-		<Platform
-			Name="Win32"
-		/>
-	</Platforms>
-	<ToolFiles>
-	</ToolFiles>
-	<Configurations>
-		<Configuration
-			Name="Debug|Win32"
-			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
-			IntermediateDirectory="$(ConfigurationName)"
-			ConfigurationType="1"
-			UseOfMFC="0"
-			UseOfATL="0"
-			CharacterSet="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="0"
-				AdditionalIncludeDirectories="&quot;c:\program files\boost\latest&quot;;..\..\js\src;&quot;..\pcre-7.4&quot;;c:\boost;\boost"
-				PreprocessorDefinitions="MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC"
-				MinimalRebuild="true"
-				BasicRuntimeChecks="3"
-				RuntimeLibrary="3"
-				UsePrecompiledHeader="2"
-				PrecompiledHeaderThrough="pch.h"
-				WarningLevel="3"
-				Detect64BitPortabilityProblems="false"
-				DebugInformationFormat="4"
-				DisableSpecificWarnings="4355;4800"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				AdditionalDependencies="ws2_32.lib Psapi.lib"
-				LinkIncremental="2"
-				AdditionalLibraryDirectories="&quot;c:\program files\boost\latest\lib&quot;;c:\boost\lib;\boost\lib"
-				IgnoreAllDefaultLibraries="false"
-				IgnoreDefaultLibraryNames=""
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				TargetMachine="1"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-		<Configuration
-			Name="Release|Win32"
-			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
-			IntermediateDirectory="$(ConfigurationName)"
-			ConfigurationType="1"
-			CharacterSet="1"
-			WholeProgramOptimization="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="2"
-				EnableIntrinsicFunctions="true"
-				AdditionalIncludeDirectories="&quot;c:\program files\boost\latest&quot;;..\..\js\src;&quot;..\pcre-7.4&quot;;c:\boost;\boost"
-				PreprocessorDefinitions="MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC"
-				RuntimeLibrary="0"
-				EnableFunctionLevelLinking="true"
-				UsePrecompiledHeader="2"
-				PrecompiledHeaderThrough="pch.h"
-				WarningLevel="3"
-				DebugInformationFormat="3"
-				DisableSpecificWarnings="4355;4800"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				AdditionalDependencies="ws2_32.lib psapi.lib"
-				LinkIncremental="1"
-				AdditionalLibraryDirectories="&quot;c:\program files\boost\latest\lib&quot;;c:\boost\lib;\boost\lib"
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				OptimizeReferences="2"
-				EnableCOMDATFolding="2"
-				TargetMachine="1"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-	</Configurations>
-	<References>
-	</References>
-	<Files>
-		<Filter
-			Name="libs"
-			>
-			<File
-				RelativePath=".\db.rc"
-				>
-			</File>
-			<File
-				RelativePath="..\..\js\src\js.lib"
-				>
-			</File>
-			<File
-				RelativePath="..\pcre-7.4\pcrecpp.cc"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="release_nojni|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\SConstruct"
-				>
-			</File>
-			<File
-				RelativePath="..\targetver.h"
-				>
-			</File>
-			<Filter
-				Name="pcre"
-				>
-				<File
-					RelativePath="..\pcre-7.4\config.h"
-					>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre.h"
-					>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_chartables.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_compile.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_config.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_dfa_exec.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_exec.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_fullinfo.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_get.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_globals.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_info.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_maketables.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_newline.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_ord2utf8.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_refcount.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_scanner.cc"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_stringpiece.cc"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_study.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug Recstore|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_tables.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug Recstore|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_try_flipped.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug Recstore|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_ucp_searchfuncs.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug Recstore|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_valid_utf8.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug Recstore|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_version.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug Recstore|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_xclass.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug Recstore|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcreposix.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug Recstore|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-			</Filter>
-			<Filter
-				Name="old_repl"
-				>
-				<File
-					RelativePath=".\oplog.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\repl.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\repl.h"
-					>
-				</File>
-				<File
-					RelativePath=".\repl_block.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\replpair.h"
-					>
-				</File>
-			</Filter>
-		</Filter>
-		<Filter
-			Name="storage related"
-			>
-			<File
-				RelativePath=".\rec.h"
-				>
-			</File>
-			<File
-				RelativePath=".\reccache.h"
-				>
-			</File>
-			<File
-				RelativePath=".\reci.h"
-				>
-			</File>
-			<File
-				RelativePath=".\recstore.h"
-				>
-			</File>
-			<File
-				RelativePath=".\storage.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\storage.h"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="client"
-			>
-			<File
-				RelativePath="..\client\connpool.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\connpool.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\dbclient.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\dbclient.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\dbclientcursor.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\model.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\redef_macros.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\syncclusterconnection.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\syncclusterconnection.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\undef_macros.h"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="db"
-			>
-			<File
-				RelativePath="..\pch.cpp"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="1"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="1"
-					/>
-				</FileConfiguration>
-			</File>
-			<Filter
-				Name="cpp"
-				Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
-				UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
-				>
-				<File
-					RelativePath=".\client.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\clientcursor.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\cloner.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\commands.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\common.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\cursor.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\s\d_util.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\database.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\db.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\dbcommands.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\dbcommands_admin.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\dbeval.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\dbhelpers.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\dbwebserver.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\extsort.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\index.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\index_geo2d.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\indexkey.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\instance.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\introspect.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\jsobj.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\json.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\lasterror.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\matcher.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\matcher_covered.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\mmap_win.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\modules\mms.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\module.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\mr.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\namespace.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\nonce.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\client\parallel.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\pdfile.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\query.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\queryoptimizer.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\ramstore.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\security.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\security_commands.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\tests.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\update.cpp"
-					>
-				</File>
-			</Filter>
-			<Filter
-				Name="h"
-				>
-				<File
-					RelativePath=".\background.h"
-					>
-				</File>
-				<File
-					RelativePath=".\client.h"
-					>
-				</File>
-				<File
-					RelativePath=".\clientcursor.h"
-					>
-				</File>
-				<File
-					RelativePath=".\cmdline.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\cmdline.h"
-					>
-				</File>
-				<File
-					RelativePath=".\commands.h"
-					>
-				</File>
-				<File
-					RelativePath=".\concurrency.h"
-					>
-				</File>
-				<File
-					RelativePath=".\curop.h"
-					>
-				</File>
-				<File
-					RelativePath=".\cursor.h"
-					>
-				</File>
-				<File
-					RelativePath=".\database.h"
-					>
-				</File>
-				<File
-					RelativePath=".\db.h"
-					>
-				</File>
-				<File
-					RelativePath=".\dbhelpers.h"
-					>
-				</File>
-				<File
-					RelativePath=".\dbinfo.h"
-					>
-				</File>
-				<File
-					RelativePath=".\dbmessage.h"
-					>
-				</File>
-				<File
-					RelativePath=".\diskloc.h"
-					>
-				</File>
-				<File
-					RelativePath=".\index.h"
-					>
-				</File>
-				<File
-					RelativePath=".\indexkey.h"
-					>
-				</File>
-				<File
-					RelativePath=".\introspect.h"
-					>
-				</File>
-				<File
-					RelativePath=".\json.h"
-					>
-				</File>
-				<File
-					RelativePath=".\matcher.h"
-					>
-				</File>
-				<File
-					RelativePath="..\grid\message.h"
-					>
-				</File>
-				<File
-					RelativePath=".\minilex.h"
-					>
-				</File>
-				<File
-					RelativePath=".\namespace.h"
-					>
-				</File>
-				<File
-					RelativePath="..\pch.h"
-					>
-				</File>
-				<File
-					RelativePath=".\pdfile.h"
-					>
-				</File>
-				<File
-					RelativePath="..\grid\protocol.h"
-					>
-				</File>
-				<File
-					RelativePath=".\query.h"
-					>
-				</File>
-				<File
-					RelativePath=".\queryoptimizer.h"
-					>
-				</File>
-				<File
-					RelativePath=".\queryutil.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\resource.h"
-					>
-				</File>
-				<File
-					RelativePath=".\scanandorder.h"
-					>
-				</File>
-				<File
-					RelativePath=".\security.h"
-					>
-				</File>
-				<File
-					RelativePath=".\update.h"
-					>
-				</File>
-			</Filter>
-		</Filter>
-		<Filter
-			Name="util"
-			>
-			<File
-				RelativePath="..\util\allocator.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\array.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\assert_util.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\assert_util.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\background.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\background.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\base64.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\base64.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\builder.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\debug_util.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\embedded_builder.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\file.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\file_allocator.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\goodies.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\hashtab.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\hex.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\interlocked.h"
-				>
-			</File>
-			<File
-				RelativePath=".\lasterror.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\rwlock.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\log.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\lruishmap.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\mmap.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\mmap.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\mvar.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\ntservice.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\ntservice.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\optime.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\processinfo.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\processinfo_win32.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\queue.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\ramstore.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\thread_pool.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\text.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\text.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\unittest.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\util.cpp"
-				>
-			</File>
-			<Filter
-				Name="concurrency"
-				>
-				<File
-					RelativePath="..\util\concurrency\list.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\msg.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\task.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\task.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\value.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\vars.cpp"
-					>
-				</File>
-			</Filter>
-			<Filter
-				Name="web"
-				>
-				<File
-					RelativePath="..\util\web\html.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\httpclient.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\httpclient.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\miniwebserver.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\miniwebserver.h"
-					>
-				</File>
-			</Filter>
-			<Filter
-				Name="md5"
-				>
-				<File
-					RelativePath="..\util\md5.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-							PrecompiledHeaderThrough=""
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="release_nojni|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug Recstore|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\util\md5.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\md5.hpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\md5main.cpp"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="2"
-						/>
-					</FileConfiguration>
-				</File>
-			</Filter>
-			<Filter
-				Name="net"
-				>
-				<File
-					RelativePath="..\util\message.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\message.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\message_server.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\message_server_port.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\sock.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\sock.h"
-					>
-				</File>
-			</Filter>
-		</Filter>
-		<Filter
-			Name="shard"
-			>
-			<File
-				RelativePath="..\s\d_logic.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\s\shardconnection.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="scripting"
-			>
-			<File
-				RelativePath="..\scripting\engine.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\engine.h"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\engine_java.h"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\engine_spidermonkey.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\engine_spidermonkey.h"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\engine_v8.h"
-				>
-			</File>
-			<File
-				RelativePath="..\shell\mongo_vstudio.cpp"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Debug Recstore|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\scripting\utils.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\v8_db.h"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\v8_utils.h"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\v8_wrapper.h"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="stats"
-			>
-			<File
-				RelativePath=".\stats\counters.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\stats\snapshots.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\stats\top.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="btree"
-			>
-			<File
-				RelativePath=".\btree.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\btree.h"
-				>
-			</File>
-			<File
-				RelativePath=".\btreecursor.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="replsets"
-			>
-			<File
-				RelativePath=".\repl\connections.h"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\consensus.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\health.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\health.h"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\heartbeat.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\hostandport.h"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\manager.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\replset.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\replset.h"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\replset_commands.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\rs_config.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\rs_sync.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\rs_config.h"
-				>
-			</File>
-			<File
-				RelativePath=".\repl\rs_initiate.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="bson"
-			>
-			<File
-				RelativePath="..\bson\bsonelement.h"
-				>
-			</File>
-			<File
-				RelativePath="..\bson\bsoninlines.h"
-				>
-			</File>
-			<File
-				RelativePath="..\bson\bsonmisc.h"
-				>
-			</File>
-			<File
-				RelativePath="..\bson\bsonobj.h"
-				>
-			</File>
-			<File
-				RelativePath="..\bson\bsonobjbuilder.h"
-				>
-			</File>
-			<File
-				RelativePath="..\bson\bsonobjiterator.h"
-				>
-			</File>
-			<File
-				RelativePath="..\bson\bsontypes.h"
-				>
-			</File>
-			<File
-				RelativePath=".\jsobj.h"
-				>
-			</File>
-			<File
-				RelativePath="..\bson\oid.h"
-				>
-			</File>
-			<File
-				RelativePath="..\bson\ordering.h"
-				>
-			</File>
-		</Filter>
-	</Files>
-	<Globals>
-	</Globals>
-</VisualStudioProject>
diff --git a/db/db.vcxproj b/db/db.vcxproj
index 0cabbd0..ad9c6d2 100644
--- a/db/db.vcxproj
+++ b/db/db.vcxproj
@@ -89,6 +89,10 @@
     <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.;..;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;$(IncludePath)</IncludePath>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
@@ -192,7 +196,9 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
+    <ClCompile Include="..\bson\oid.cpp" />
     <ClCompile Include="..\client\dbclientcursor.cpp" />
+    <ClCompile Include="..\client\dbclient_rs.cpp" />
     <ClCompile Include="..\client\distlock.cpp" />
     <ClCompile Include="..\client\model.cpp" />
     <ClCompile Include="..\pcre-7.4\pcrecpp.cc">
@@ -435,8 +441,16 @@
       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
       </PrecompiledHeader>
     </ClCompile>
+    <ClCompile Include="..\scripting\bench.cpp" />
+    <ClCompile Include="..\shell\mongo_vstudio.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+    </ClCompile>
     <ClCompile Include="..\s\chunk.cpp" />
     <ClCompile Include="..\s\config.cpp" />
+    <ClCompile Include="..\s\d_chunk_manager.cpp" />
     <ClCompile Include="..\s\d_migrate.cpp" />
     <ClCompile Include="..\s\d_split.cpp" />
     <ClCompile Include="..\s\d_state.cpp" />
@@ -445,19 +459,40 @@
     <ClCompile Include="..\s\shard.cpp" />
     <ClCompile Include="..\s\shardconnection.cpp" />
     <ClCompile Include="..\s\shardkey.cpp" />
+    <ClCompile Include="..\util\alignedbuilder.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="..\util\concurrency\spin_lock.cpp" />
+    <ClCompile Include="..\util\concurrency\synchronization.cpp" />
     <ClCompile Include="..\util\concurrency\task.cpp" />
     <ClCompile Include="..\util\concurrency\thread_pool.cpp" />
     <ClCompile Include="..\util\concurrency\vars.cpp" />
+    <ClCompile Include="..\util\file_allocator.cpp" />
     <ClCompile Include="..\util\log.cpp" />
+    <ClCompile Include="..\util\logfile.cpp" />
     <ClCompile Include="..\util\processinfo.cpp" />
     <ClCompile Include="..\util\stringutils.cpp" />
     <ClCompile Include="..\util\text.cpp" />
     <ClCompile Include="..\util\version.cpp" />
     <ClCompile Include="cap.cpp" />
+    <ClCompile Include="commands\distinct.cpp" />
+    <ClCompile Include="commands\group.cpp" />
+    <ClCompile Include="commands\isself.cpp" />
+    <ClCompile Include="commands\mr.cpp" />
+    <ClCompile Include="compact.cpp" />
     <ClCompile Include="dbcommands_generic.cpp" />
+    <ClCompile Include="dur.cpp" />
+    <ClCompile Include="durop.cpp" />
+    <ClCompile Include="dur_commitjob.cpp" />
+    <ClCompile Include="dur_journal.cpp" />
+    <ClCompile Include="dur_preplogbuffer.cpp" />
+    <ClCompile Include="dur_recover.cpp" />
+    <ClCompile Include="dur_writetodatafiles.cpp" />
     <ClCompile Include="geo\2d.cpp" />
     <ClCompile Include="geo\haystack.cpp" />
+    <ClCompile Include="mongommf.cpp" />
     <ClCompile Include="oplog.cpp" />
+    <ClCompile Include="projection.cpp" />
     <ClCompile Include="repl.cpp" />
     <ClCompile Include="repl\consensus.cpp" />
     <ClCompile Include="repl\heartbeat.cpp" />
@@ -468,7 +503,6 @@
     <ClCompile Include="repl\rs_sync.cpp" />
     <ClCompile Include="repl_block.cpp" />
     <ClCompile Include="restapi.cpp" />
-    <ClCompile Include="storage.cpp" />
     <ClCompile Include="..\client\connpool.cpp" />
     <ClCompile Include="..\client\dbclient.cpp" />
     <ClCompile Include="..\client\syncclusterconnection.cpp" />
@@ -484,7 +518,6 @@
     <ClCompile Include="commands.cpp" />
     <ClCompile Include="common.cpp" />
     <ClCompile Include="cursor.cpp" />
-    <ClCompile Include="..\s\d_util.cpp" />
     <ClCompile Include="database.cpp" />
     <ClCompile Include="db.cpp" />
     <ClCompile Include="dbcommands.cpp" />
@@ -505,16 +538,15 @@
     <ClCompile Include="..\util\mmap_win.cpp" />
     <ClCompile Include="modules\mms.cpp" />
     <ClCompile Include="module.cpp" />
-    <ClCompile Include="mr.cpp" />
     <ClCompile Include="namespace.cpp" />
     <ClCompile Include="nonce.cpp" />
     <ClCompile Include="..\client\parallel.cpp" />
     <ClCompile Include="pdfile.cpp" />
     <ClCompile Include="query.cpp" />
     <ClCompile Include="queryoptimizer.cpp" />
-    <ClCompile Include="..\util\ramstore.cpp" />
     <ClCompile Include="security.cpp" />
     <ClCompile Include="security_commands.cpp" />
+    <ClCompile Include="security_key.cpp" />
     <ClCompile Include="tests.cpp" />
     <ClCompile Include="update.cpp" />
     <ClCompile Include="cmdline.cpp" />
@@ -552,16 +584,6 @@
     <ClCompile Include="..\s\d_logic.cpp" />
     <ClCompile Include="..\scripting\engine.cpp" />
     <ClCompile Include="..\scripting\engine_spidermonkey.cpp" />
-    <ClCompile Include="..\shell\mongo_vstudio.cpp">
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-      </PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-      </PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-      </PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-      </PrecompiledHeader>
-    </ClCompile>
     <ClCompile Include="..\scripting\utils.cpp" />
     <ClCompile Include="stats\counters.cpp" />
     <ClCompile Include="stats\snapshots.cpp" />
@@ -574,6 +596,8 @@
     <ClCompile Include="repl\rs_config.cpp" />
   </ItemGroup>
   <ItemGroup>
+    <None Include="..\jstests\dur\basic1.sh" />
+    <None Include="..\jstests\dur\dur1.js" />
     <None Include="..\jstests\replsets\replset1.js" />
     <None Include="..\jstests\replsets\replset2.js" />
     <None Include="..\jstests\replsets\replset3.js" />
@@ -604,26 +628,37 @@
     <ClInclude Include="..\targetver.h" />
     <ClInclude Include="..\pcre-7.4\config.h" />
     <ClInclude Include="..\pcre-7.4\pcre.h" />
+    <ClInclude Include="..\util\concurrency\race.h" />
     <ClInclude Include="..\util\concurrency\rwlock.h" />
     <ClInclude Include="..\util\concurrency\msg.h" />
     <ClInclude Include="..\util\concurrency\mutex.h" />
     <ClInclude Include="..\util\concurrency\mvar.h" />
     <ClInclude Include="..\util\concurrency\task.h" />
     <ClInclude Include="..\util\concurrency\thread_pool.h" />
+    <ClInclude Include="..\util\logfile.h" />
+    <ClInclude Include="..\util\mongoutils\checksum.h" />
     <ClInclude Include="..\util\mongoutils\html.h" />
     <ClInclude Include="..\util\mongoutils\str.h" />
+    <ClInclude Include="..\util\paths.h" />
     <ClInclude Include="..\util\ramlog.h" />
     <ClInclude Include="..\util\text.h" />
+    <ClInclude Include="..\util\time_support.h" />
+    <ClInclude Include="durop.h" />
+    <ClInclude Include="dur_commitjob.h" />
+    <ClInclude Include="dur_journal.h" />
+    <ClInclude Include="dur_journalformat.h" />
+    <ClInclude Include="dur_journalimpl.h" />
+    <ClInclude Include="dur_stats.h" />
     <ClInclude Include="geo\core.h" />
     <ClInclude Include="helpers\dblogger.h" />
     <ClInclude Include="instance.h" />
+    <ClInclude Include="mongommf.h" />
+    <ClInclude Include="mongomutex.h" />
+    <ClInclude Include="namespace-inl.h" />
     <ClInclude Include="oplogreader.h" />
+    <ClInclude Include="projection.h" />
     <ClInclude Include="repl.h" />
     <ClInclude Include="replpair.h" />
-    <ClInclude Include="rec.h" />
-    <ClInclude Include="reccache.h" />
-    <ClInclude Include="reci.h" />
-    <ClInclude Include="recstore.h" />
     <ClInclude Include="repl\connections.h" />
     <ClInclude Include="repl\multicmd.h" />
     <ClInclude Include="repl\rsmember.h" />
@@ -656,7 +691,6 @@
     <ClInclude Include="introspect.h" />
     <ClInclude Include="json.h" />
     <ClInclude Include="matcher.h" />
-    <ClInclude Include="minilex.h" />
     <ClInclude Include="namespace.h" />
     <ClInclude Include="..\pch.h" />
     <ClInclude Include="pdfile.h" />
diff --git a/db/db.vcxproj.filters b/db/db.vcxproj.filters
index bf30b4e..a2011df 100755
--- a/db/db.vcxproj.filters
+++ b/db/db.vcxproj.filters
@@ -1,928 +1,329 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup>
-    <ClCompile Include="repl\replset_commands.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\rs_config.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\health.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="btree.cpp">
-      <Filter>db\btree</Filter>
-    </ClCompile>
-    <ClCompile Include="btreecursor.cpp">
-      <Filter>db\btree</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\consensus.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="..\client\connpool.cpp">
-      <Filter>client</Filter>
-    </ClCompile>
-    <ClCompile Include="..\client\dbclient.cpp">
-      <Filter>client</Filter>
-    </ClCompile>
-    <ClCompile Include="..\client\dbclientcursor.cpp">
-      <Filter>client</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\manager.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcreposix.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_chartables.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_compile.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_config.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_dfa_exec.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_exec.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_fullinfo.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_get.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_globals.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_info.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_maketables.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_newline.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_ord2utf8.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_refcount.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_scanner.cc">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_stringpiece.cc">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_study.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_tables.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_try_flipped.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_ucp_searchfuncs.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_valid_utf8.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_version.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcre_xclass.c">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pcre-7.4\pcrecpp.cc">
-      <Filter>util\pcre</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\rs_initiate.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\concurrency\vars.cpp">
-      <Filter>util\concurrency</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\concurrency\task.cpp">
-      <Filter>util\concurrency</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\heartbeat.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="..\scripting\engine_spidermonkey.cpp">
-      <Filter>scripting</Filter>
-    </ClCompile>
-    <ClCompile Include="..\scripting\engine.cpp">
-      <Filter>scripting</Filter>
-    </ClCompile>
-    <ClCompile Include="..\scripting\utils.cpp">
-      <Filter>scripting</Filter>
-    </ClCompile>
-    <ClCompile Include="..\client\syncclusterconnection.cpp">
-      <Filter>client</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\rs.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="oplog.cpp">
-      <Filter>repl_old</Filter>
-    </ClCompile>
-    <ClCompile Include="client.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="clientcursor.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="cloner.cpp">
-      <Filter>repl_old</Filter>
-    </ClCompile>
-    <ClCompile Include="cmdline.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="commands.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="common.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="stats\counters.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="cursor.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\d_util.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\d_logic.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="database.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="db.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="dbeval.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="dbcommands.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="dbcommands_admin.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="dbhelpers.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="extsort.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="dbwebserver.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\httpclient.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="index.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="indexkey.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="instance.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="introspect.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="jsobj.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="json.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\md5.c">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="matcher_covered.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="lasterror.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="matcher.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\message_server_port.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\md5main.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\message.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="modules\mms.cpp">
-      <Filter>db\modules</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\mmap.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\mmap_win.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\shell\mongo_vstudio.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="module.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="mr.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="namespace.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="nonce.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\ntservice.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="queryutil.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\client\parallel.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="pdfile.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="query.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="queryoptimizer.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\ramstore.cpp">
-      <Filter>db\storage engine</Filter>
-    </ClCompile>
-    <ClCompile Include="repl_block.cpp">
-      <Filter>repl_old</Filter>
-    </ClCompile>
-    <ClCompile Include="repl.cpp">
-      <Filter>repl_old</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\shardconnection.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\sock.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\util.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="security.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="security_commands.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="stats\snapshots.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="storage.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="tests.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="stats\top.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="update.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\background.cpp">
-      <Filter>util\concurrency</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\assert_util.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\base64.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\miniwebserver.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\processinfo_win32.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\version.cpp">
-      <Filter>db</Filter>
-    </ClCompile>
-    <ClCompile Include="..\pch.cpp">
-      <Filter>db</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\d_writeback.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\d_state.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\text.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="geo\2d.cpp">
-      <Filter>db\geo</Filter>
-    </ClCompile>
-    <ClCompile Include="dbcommands_generic.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\config.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\chunk.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\shard.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\shardkey.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="..\client\model.cpp">
-      <Filter>client</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\stringutils.cpp">
-      <Filter>util\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\concurrency\thread_pool.cpp">
-      <Filter>util\concurrency</Filter>
-    </ClCompile>
-    <ClCompile Include="..\client\distlock.cpp">
-      <Filter>client</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\d_migrate.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="..\s\d_split.cpp">
-      <Filter>sharding</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\rs_sync.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\rs_initialsync.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="repl\rs_rollback.cpp">
-      <Filter>replSets</Filter>
-    </ClCompile>
-    <ClCompile Include="geo\haystack.cpp">
-      <Filter>db\geo</Filter>
-    </ClCompile>
-    <ClCompile Include="cap.cpp">
-      <Filter>db\core</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\log.cpp">
-      <Filter>util</Filter>
-    </ClCompile>
-    <ClCompile Include="..\util\processinfo.cpp">
-      <Filter>util</Filter>
-    </ClCompile>
-    <ClCompile Include="restapi.cpp">
-      <Filter>db</Filter>
-    </ClCompile>
+    <ClCompile Include="..\bson\oid.cpp" />
+    <ClCompile Include="..\client\dbclientcursor.cpp" />
+    <ClCompile Include="..\client\dbclient_rs.cpp" />
+    <ClCompile Include="..\client\distlock.cpp" />
+    <ClCompile Include="..\client\model.cpp" />
+    <ClCompile Include="..\pcre-7.4\pcrecpp.cc" />
+    <ClCompile Include="..\pcre-7.4\pcre_chartables.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_compile.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_config.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_dfa_exec.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_exec.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_fullinfo.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_get.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_globals.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_info.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_maketables.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_newline.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_ord2utf8.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_refcount.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_scanner.cc" />
+    <ClCompile Include="..\pcre-7.4\pcre_stringpiece.cc" />
+    <ClCompile Include="..\pcre-7.4\pcre_study.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_tables.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_try_flipped.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_ucp_searchfuncs.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_valid_utf8.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_version.c" />
+    <ClCompile Include="..\pcre-7.4\pcre_xclass.c" />
+    <ClCompile Include="..\pcre-7.4\pcreposix.c" />
+    <ClCompile Include="..\scripting\bench.cpp" />
+    <ClCompile Include="..\shell\mongo_vstudio.cpp" />
+    <ClCompile Include="..\s\chunk.cpp" />
+    <ClCompile Include="..\s\config.cpp" />
+    <ClCompile Include="..\s\d_chunk_manager.cpp" />
+    <ClCompile Include="..\s\d_migrate.cpp" />
+    <ClCompile Include="..\s\d_split.cpp" />
+    <ClCompile Include="..\s\d_state.cpp" />
+    <ClCompile Include="..\s\d_writeback.cpp" />
     <ClCompile Include="..\s\grid.cpp" />
+    <ClCompile Include="..\s\shard.cpp" />
+    <ClCompile Include="..\s\shardconnection.cpp" />
+    <ClCompile Include="..\s\shardkey.cpp" />
+    <ClCompile Include="..\util\alignedbuilder.cpp" />
+    <ClCompile Include="..\util\concurrency\spin_lock.cpp" />
+    <ClCompile Include="..\util\concurrency\synchronization.cpp" />
+    <ClCompile Include="..\util\concurrency\task.cpp" />
+    <ClCompile Include="..\util\concurrency\thread_pool.cpp" />
+    <ClCompile Include="..\util\concurrency\vars.cpp" />
+    <ClCompile Include="..\util\log.cpp" />
+    <ClCompile Include="..\util\logfile.cpp" />
+    <ClCompile Include="..\util\processinfo.cpp" />
+    <ClCompile Include="..\util\stringutils.cpp" />
+    <ClCompile Include="..\util\text.cpp" />
+    <ClCompile Include="..\util\version.cpp" />
+    <ClCompile Include="cap.cpp" />
+    <ClCompile Include="commands\distinct.cpp" />
+    <ClCompile Include="commands\group.cpp" />
+    <ClCompile Include="commands\isself.cpp" />
+    <ClCompile Include="commands\mr.cpp" />
+    <ClCompile Include="compact.cpp" />
+    <ClCompile Include="dbcommands_generic.cpp" />
+    <ClCompile Include="dur.cpp" />
+    <ClCompile Include="durop.cpp" />
+    <ClCompile Include="dur_commitjob.cpp" />
+    <ClCompile Include="dur_journal.cpp" />
+    <ClCompile Include="dur_preplogbuffer.cpp" />
+    <ClCompile Include="dur_recover.cpp" />
+    <ClCompile Include="dur_writetodatafiles.cpp" />
+    <ClCompile Include="geo\2d.cpp" />
+    <ClCompile Include="geo\haystack.cpp" />
+    <ClCompile Include="mongommf.cpp" />
+    <ClCompile Include="oplog.cpp" />
+    <ClCompile Include="projection.cpp" />
+    <ClCompile Include="repl.cpp" />
+    <ClCompile Include="repl\consensus.cpp" />
+    <ClCompile Include="repl\heartbeat.cpp" />
+    <ClCompile Include="repl\manager.cpp" />
+    <ClCompile Include="repl\rs_initialsync.cpp" />
+    <ClCompile Include="repl\rs_initiate.cpp" />
+    <ClCompile Include="repl\rs_rollback.cpp" />
+    <ClCompile Include="repl\rs_sync.cpp" />
+    <ClCompile Include="repl_block.cpp" />
+    <ClCompile Include="restapi.cpp" />
+    <ClCompile Include="..\client\connpool.cpp" />
+    <ClCompile Include="..\client\dbclient.cpp" />
+    <ClCompile Include="..\client\syncclusterconnection.cpp" />
+    <ClCompile Include="..\pch.cpp" />
+    <ClCompile Include="client.cpp" />
+    <ClCompile Include="clientcursor.cpp" />
+    <ClCompile Include="cloner.cpp" />
+    <ClCompile Include="commands.cpp" />
+    <ClCompile Include="common.cpp" />
+    <ClCompile Include="cursor.cpp" />
+    <ClCompile Include="database.cpp" />
+    <ClCompile Include="db.cpp" />
+    <ClCompile Include="dbcommands.cpp" />
+    <ClCompile Include="dbcommands_admin.cpp" />
+    <ClCompile Include="dbeval.cpp" />
+    <ClCompile Include="dbhelpers.cpp" />
+    <ClCompile Include="dbwebserver.cpp" />
+    <ClCompile Include="extsort.cpp" />
+    <ClCompile Include="index.cpp" />
+    <ClCompile Include="indexkey.cpp" />
+    <ClCompile Include="instance.cpp" />
+    <ClCompile Include="introspect.cpp" />
+    <ClCompile Include="jsobj.cpp" />
+    <ClCompile Include="json.cpp" />
+    <ClCompile Include="lasterror.cpp" />
+    <ClCompile Include="matcher.cpp" />
+    <ClCompile Include="matcher_covered.cpp" />
+    <ClCompile Include="..\util\mmap_win.cpp" />
+    <ClCompile Include="modules\mms.cpp" />
+    <ClCompile Include="module.cpp" />
+    <ClCompile Include="namespace.cpp" />
+    <ClCompile Include="nonce.cpp" />
+    <ClCompile Include="..\client\parallel.cpp" />
+    <ClCompile Include="pdfile.cpp" />
+    <ClCompile Include="query.cpp" />
+    <ClCompile Include="queryoptimizer.cpp" />
+    <ClCompile Include="security.cpp" />
+    <ClCompile Include="security_commands.cpp" />
+    <ClCompile Include="tests.cpp" />
+    <ClCompile Include="update.cpp" />
+    <ClCompile Include="cmdline.cpp" />
+    <ClCompile Include="queryutil.cpp" />
+    <ClCompile Include="..\util\assert_util.cpp" />
+    <ClCompile Include="..\util\background.cpp" />
+    <ClCompile Include="..\util\base64.cpp" />
+    <ClCompile Include="..\util\mmap.cpp" />
+    <ClCompile Include="..\util\ntservice.cpp" />
+    <ClCompile Include="..\util\processinfo_win32.cpp" />
+    <ClCompile Include="..\util\util.cpp" />
+    <ClCompile Include="..\util\httpclient.cpp" />
+    <ClCompile Include="..\util\miniwebserver.cpp" />
+    <ClCompile Include="..\util\md5.c" />
+    <ClCompile Include="..\util\md5main.cpp" />
+    <ClCompile Include="..\util\message.cpp" />
+    <ClCompile Include="..\util\message_server_port.cpp" />
+    <ClCompile Include="..\util\sock.cpp" />
+    <ClCompile Include="..\s\d_logic.cpp" />
+    <ClCompile Include="..\scripting\engine.cpp" />
+    <ClCompile Include="..\scripting\engine_spidermonkey.cpp" />
+    <ClCompile Include="..\scripting\utils.cpp" />
+    <ClCompile Include="stats\counters.cpp" />
+    <ClCompile Include="stats\snapshots.cpp" />
+    <ClCompile Include="stats\top.cpp" />
+    <ClCompile Include="btree.cpp" />
+    <ClCompile Include="btreecursor.cpp" />
+    <ClCompile Include="repl\health.cpp" />
+    <ClCompile Include="repl\rs.cpp" />
+    <ClCompile Include="repl\replset_commands.cpp" />
+    <ClCompile Include="repl\rs_config.cpp" />
+    <ClCompile Include="security_key.cpp" />
+    <ClCompile Include="..\util\file_allocator.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="repl\rs_config.h">
-      <Filter>replSets</Filter>
-    </ClInclude>
-    <ClInclude Include="repl\health.h">
-      <Filter>replSets</Filter>
-    </ClInclude>
-    <ClInclude Include="..\bson\bsonelement.h">
-      <Filter>bson</Filter>
-    </ClInclude>
-    <ClInclude Include="..\bson\bsontypes.h">
-      <Filter>bson</Filter>
-    </ClInclude>
-    <ClInclude Include="..\bson\bsoninlines.h">
-      <Filter>bson</Filter>
-    </ClInclude>
-    <ClInclude Include="..\bson\bsonmisc.h">
-      <Filter>bson</Filter>
-    </ClInclude>
-    <ClInclude Include="..\bson\bsonobj.h">
-      <Filter>bson</Filter>
-    </ClInclude>
-    <ClInclude Include="..\bson\bsonobjbuilder.h">
-      <Filter>bson</Filter>
-    </ClInclude>
-    <ClInclude Include="..\bson\bsonobjiterator.h">
-      <Filter>bson</Filter>
-    </ClInclude>
-    <ClInclude Include="btree.h">
-      <Filter>db\btree</Filter>
-    </ClInclude>
-    <ClInclude Include="repl\connections.h">
-      <Filter>replSets</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\connpool.h">
-      <Filter>client</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\dbclient.h">
-      <Filter>client</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\mongoutils\html.h">
-      <Filter>util\mongoutils</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\mongoutils\str.h">
-      <Filter>util\mongoutils</Filter>
-    </ClInclude>
-    <ClInclude Include="..\pcre-7.4\pcre.h">
-      <Filter>util\pcre</Filter>
-    </ClInclude>
-    <ClInclude Include="repl\rsmember.h">
-      <Filter>replSets</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\concurrency\list.h">
-      <Filter>util\concurrency</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\concurrency\value.h">
-      <Filter>util\concurrency</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\dbclientcursor.h">
-      <Filter>client</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\gridfs.h">
-      <Filter>client</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\parallel.h">
-      <Filter>client</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\concurrency\task.h">
-      <Filter>util\concurrency</Filter>
-    </ClInclude>
-    <ClInclude Include="repl\multicmd.h">
-      <Filter>replSets</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\concurrency\msg.h">
-      <Filter>util\concurrency</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\concurrency\mutex.h">
-      <Filter>util\concurrency</Filter>
-    </ClInclude>
-    <ClInclude Include="stats\counters.h">
-      <Filter>stats</Filter>
-    </ClInclude>
-    <ClInclude Include="stats\snapshots.h">
-      <Filter>stats</Filter>
-    </ClInclude>
-    <ClInclude Include="stats\top.h">
-      <Filter>stats</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\concurrency\rwlock.h">
-      <Filter>util\concurrency</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\concurrency\mvar.h">
-      <Filter>util\concurrency</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\concurrency\thread_pool.h">
-      <Filter>util\concurrency</Filter>
-    </ClInclude>
-    <ClInclude Include="..\scripting\v8_db.h">
-      <Filter>scripting</Filter>
-    </ClInclude>
-    <ClInclude Include="..\scripting\v8_wrapper.h">
-      <Filter>scripting</Filter>
-    </ClInclude>
-    <ClInclude Include="..\scripting\v8_utils.h">
-      <Filter>scripting</Filter>
-    </ClInclude>
-    <ClInclude Include="..\scripting\engine.h">
-      <Filter>scripting</Filter>
-    </ClInclude>
-    <ClInclude Include="..\scripting\engine_spidermonkey.h">
-      <Filter>scripting</Filter>
-    </ClInclude>
-    <ClInclude Include="..\scripting\engine_v8.h">
-      <Filter>scripting</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\syncclusterconnection.h">
-      <Filter>client</Filter>
-    </ClInclude>
-    <ClInclude Include="repl\rs_optime.h">
-      <Filter>replSets</Filter>
-    </ClInclude>
-    <ClInclude Include="repl\rs.h">
-      <Filter>replSets</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\optime.h">
-      <Filter>repl_old</Filter>
-    </ClInclude>
-    <ClInclude Include="client.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="clientcursor.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="cursor.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="cmdline.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="commands.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="concurrency.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\pcre-7.4\config.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="curop.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="database.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="db.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\embedded_builder.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\debug_util.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="diskloc.h">
-      <Filter>db\storage engine</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\hashtab.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\file.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\file_allocator.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\goodies.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="dbhelpers.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="dbinfo.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="helpers\dblogger.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="dbmessage.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\hostandport.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\hex.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\web\html.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\httpclient.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="introspect.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="index.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="indexkey.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="json.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="jsobj.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\log.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\md5.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="lasterror.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="matcher.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\md5.hpp">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\message.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\message_server.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="minilex.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\mmap.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\model.h">
-      <Filter>client</Filter>
-    </ClInclude>
-    <ClInclude Include="namespace.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\ntservice.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\bson\oid.h">
-      <Filter>bson</Filter>
-    </ClInclude>
-    <ClInclude Include="..\bson\ordering.h">
-      <Filter>bson</Filter>
-    </ClInclude>
-    <ClInclude Include="pdfile.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\grid\protocol.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="query.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="queryoptimizer.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\queue.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\ramstore.h">
-      <Filter>db\storage engine</Filter>
-    </ClInclude>
-    <ClInclude Include="recstore.h">
-      <Filter>db\storage engine</Filter>
-    </ClInclude>
-    <ClInclude Include="rec.h">
-      <Filter>db\storage engine</Filter>
-    </ClInclude>
-    <ClInclude Include="reccache.h">
-      <Filter>db\storage engine</Filter>
-    </ClInclude>
-    <ClInclude Include="reci.h">
-      <Filter>db\storage engine</Filter>
-    </ClInclude>
-    <ClInclude Include="repl.h">
-      <Filter>repl_old</Filter>
-    </ClInclude>
-    <ClInclude Include="replpair.h">
-      <Filter>repl_old</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\sock.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\redef_macros.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="update.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="resource.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="scanandorder.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="security.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\targetver.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\undef_macros.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\unittest.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="background.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\background.h">
-      <Filter>util\concurrency</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\ramlog.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\allocator.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\array.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\assert_util.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\base64.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\builder.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\lruishmap.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\miniwebserver.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\processinfo.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\pch.h">
-      <Filter>db</Filter>
-    </ClInclude>
-    <ClInclude Include="..\util\text.h">
-      <Filter>util\core</Filter>
-    </ClInclude>
-    <ClInclude Include="geo\core.h">
-      <Filter>db\geo</Filter>
-    </ClInclude>
-    <ClInclude Include="instance.h">
-      <Filter>db\core</Filter>
-    </ClInclude>
-    <ClInclude Include="..\client\distlock.h">
-      <Filter>client</Filter>
-    </ClInclude>
-    <ClInclude Include="..\s\d_logic.h">
-      <Filter>sharding</Filter>
-    </ClInclude>
-    <ClInclude Include="oplogreader.h">
-      <Filter>repl_old</Filter>
-    </ClInclude>
+    <ClInclude Include="..\client\dbclientcursor.h" />
+    <ClInclude Include="..\client\distlock.h" />
+    <ClInclude Include="..\client\gridfs.h" />
+    <ClInclude Include="..\client\parallel.h" />
+    <ClInclude Include="..\s\d_logic.h" />
+    <ClInclude Include="..\targetver.h" />
+    <ClInclude Include="..\pcre-7.4\config.h" />
+    <ClInclude Include="..\pcre-7.4\pcre.h" />
+    <ClInclude Include="..\util\concurrency\rwlock.h" />
+    <ClInclude Include="..\util\concurrency\msg.h" />
+    <ClInclude Include="..\util\concurrency\mutex.h" />
+    <ClInclude Include="..\util\concurrency\mvar.h" />
+    <ClInclude Include="..\util\concurrency\task.h" />
+    <ClInclude Include="..\util\concurrency\thread_pool.h" />
+    <ClInclude Include="..\util\logfile.h" />
+    <ClInclude Include="..\util\mongoutils\checksum.h" />
+    <ClInclude Include="..\util\mongoutils\html.h" />
+    <ClInclude Include="..\util\mongoutils\str.h" />
+    <ClInclude Include="..\util\paths.h" />
+    <ClInclude Include="..\util\ramlog.h" />
+    <ClInclude Include="..\util\text.h" />
+    <ClInclude Include="..\util\time_support.h" />
+    <ClInclude Include="durop.h" />
+    <ClInclude Include="dur_commitjob.h" />
+    <ClInclude Include="dur_journal.h" />
+    <ClInclude Include="dur_journalformat.h" />
+    <ClInclude Include="dur_stats.h" />
+    <ClInclude Include="geo\core.h" />
+    <ClInclude Include="helpers\dblogger.h" />
+    <ClInclude Include="instance.h" />
+    <ClInclude Include="mongommf.h" />
+    <ClInclude Include="mongomutex.h" />
+    <ClInclude Include="namespace-inl.h" />
+    <ClInclude Include="oplogreader.h" />
+    <ClInclude Include="projection.h" />
+    <ClInclude Include="repl.h" />
+    <ClInclude Include="replpair.h" />
+    <ClInclude Include="repl\connections.h" />
+    <ClInclude Include="repl\multicmd.h" />
+    <ClInclude Include="repl\rsmember.h" />
+    <ClInclude Include="repl\rs_optime.h" />
+    <ClInclude Include="stats\counters.h" />
+    <ClInclude Include="stats\snapshots.h" />
+    <ClInclude Include="stats\top.h" />
+    <ClInclude Include="..\client\connpool.h" />
+    <ClInclude Include="..\client\dbclient.h" />
+    <ClInclude Include="..\client\model.h" />
+    <ClInclude Include="..\client\redef_macros.h" />
+    <ClInclude Include="..\client\syncclusterconnection.h" />
+    <ClInclude Include="..\client\undef_macros.h" />
+    <ClInclude Include="background.h" />
+    <ClInclude Include="client.h" />
+    <ClInclude Include="clientcursor.h" />
+    <ClInclude Include="cmdline.h" />
+    <ClInclude Include="commands.h" />
+    <ClInclude Include="concurrency.h" />
+    <ClInclude Include="curop.h" />
+    <ClInclude Include="cursor.h" />
+    <ClInclude Include="database.h" />
+    <ClInclude Include="db.h" />
+    <ClInclude Include="dbhelpers.h" />
+    <ClInclude Include="dbinfo.h" />
+    <ClInclude Include="dbmessage.h" />
+    <ClInclude Include="diskloc.h" />
+    <ClInclude Include="index.h" />
+    <ClInclude Include="indexkey.h" />
+    <ClInclude Include="introspect.h" />
+    <ClInclude Include="json.h" />
+    <ClInclude Include="matcher.h" />
+    <ClInclude Include="namespace.h" />
+    <ClInclude Include="..\pch.h" />
+    <ClInclude Include="pdfile.h" />
+    <ClInclude Include="..\grid\protocol.h" />
+    <ClInclude Include="query.h" />
+    <ClInclude Include="queryoptimizer.h" />
+    <ClInclude Include="resource.h" />
+    <ClInclude Include="scanandorder.h" />
+    <ClInclude Include="security.h" />
+    <ClInclude Include="update.h" />
+    <ClInclude Include="..\util\allocator.h" />
+    <ClInclude Include="..\util\array.h" />
+    <ClInclude Include="..\util\assert_util.h" />
+    <ClInclude Include="..\util\background.h" />
+    <ClInclude Include="..\util\base64.h" />
+    <ClInclude Include="..\util\builder.h" />
+    <ClInclude Include="..\util\debug_util.h" />
+    <ClInclude Include="..\util\embedded_builder.h" />
+    <ClInclude Include="..\util\file.h" />
+    <ClInclude Include="..\util\file_allocator.h" />
+    <ClInclude Include="..\util\goodies.h" />
+    <ClInclude Include="..\util\hashtab.h" />
+    <ClInclude Include="..\util\hex.h" />
+    <ClInclude Include="lasterror.h" />
+    <ClInclude Include="..\util\log.h" />
+    <ClInclude Include="..\util\lruishmap.h" />
+    <ClInclude Include="..\util\mmap.h" />
+    <ClInclude Include="..\util\ntservice.h" />
+    <ClInclude Include="..\util\optime.h" />
+    <ClInclude Include="..\util\processinfo.h" />
+    <ClInclude Include="..\util\queue.h" />
+    <ClInclude Include="..\util\ramstore.h" />
+    <ClInclude Include="..\util\unittest.h" />
+    <ClInclude Include="..\util\concurrency\list.h" />
+    <ClInclude Include="..\util\concurrency\value.h" />
+    <ClInclude Include="..\util\web\html.h" />
+    <ClInclude Include="..\util\httpclient.h" />
+    <ClInclude Include="..\util\miniwebserver.h" />
+    <ClInclude Include="..\util\md5.h" />
+    <ClInclude Include="..\util\md5.hpp" />
+    <ClInclude Include="..\util\message.h" />
+    <ClInclude Include="..\util\message_server.h" />
+    <ClInclude Include="..\util\sock.h" />
+    <ClInclude Include="..\scripting\engine.h" />
+    <ClInclude Include="..\scripting\engine_spidermonkey.h" />
+    <ClInclude Include="..\scripting\engine_v8.h" />
+    <ClInclude Include="..\scripting\v8_db.h" />
+    <ClInclude Include="..\scripting\v8_utils.h" />
+    <ClInclude Include="..\scripting\v8_wrapper.h" />
+    <ClInclude Include="btree.h" />
+    <ClInclude Include="repl\health.h" />
+    <ClInclude Include="..\util\hostandport.h" />
+    <ClInclude Include="repl\rs.h" />
+    <ClInclude Include="repl\rs_config.h" />
+    <ClInclude Include="..\bson\bsonelement.h" />
+    <ClInclude Include="..\bson\bsoninlines.h" />
+    <ClInclude Include="..\bson\bsonmisc.h" />
+    <ClInclude Include="..\bson\bsonobj.h" />
+    <ClInclude Include="..\bson\bsonobjbuilder.h" />
+    <ClInclude Include="..\bson\bsonobjiterator.h" />
+    <ClInclude Include="..\bson\bsontypes.h" />
+    <ClInclude Include="jsobj.h" />
+    <ClInclude Include="..\bson\oid.h" />
+    <ClInclude Include="..\bson\ordering.h" />
+    <ClInclude Include="dur_journalimpl.h" />
+    <ClInclude Include="..\util\concurrency\race.h" />
   </ItemGroup>
   <ItemGroup>
-    <Filter Include="libs">
-      <UniqueIdentifier>{4b29c82d-d30a-4bf1-9c78-19f59c5777ba}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="util">
-      <UniqueIdentifier>{d2c3db88-7fb7-4365-a63b-b7ad45d316ae}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="util\concurrency">
-      <UniqueIdentifier>{8e6fe846-2833-45bb-b13b-c0f0d4d38593}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="util\mongoutils">
-      <UniqueIdentifier>{cc5d96e6-1805-422b-804d-adcb367dc721}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="util\pcre">
-      <UniqueIdentifier>{fa527226-9b03-4f17-8e4c-80d31fb1e449}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="client">
-      <UniqueIdentifier>{932baf83-ba80-49e5-8280-f1b9c8dbbde6}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="stats">
-      <UniqueIdentifier>{88f4374a-9d55-44a2-a234-c758cc4affa9}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="db">
-      <UniqueIdentifier>{6204f40e-3a9c-44e2-a88b-0e1b6fd9a510}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="db\btree">
-      <UniqueIdentifier>{37b238b2-21ec-4788-bdf9-a59b43490454}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="scripting">
-      <UniqueIdentifier>{6b78f34f-e6b0-49e4-b04e-6478c3a3c077}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="db\storage engine">
-      <UniqueIdentifier>{d565a775-7a99-4860-b25f-441e1655b7c6}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="db\modules">
-      <UniqueIdentifier>{466f15bb-4d5b-4634-ba6b-05a282e0a174}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="db\core">
-      <UniqueIdentifier>{d7f08f93-36bf-49cd-9e1c-ba1fec3234ce}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="repl_old">
-      <UniqueIdentifier>{e899caa1-9a90-4604-ac2e-68d5ca12425c}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="util\core">
-      <UniqueIdentifier>{9775f24c-3a29-4e0d-b5de-991c592cf376}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="Resource Files">
-      <UniqueIdentifier>{9aea1b83-cdcb-48a8-97e6-47805cacdc29}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="bson">
-      <UniqueIdentifier>{aff20a87-2efe-4861-930f-8780c08cbea5}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="db\geo">
-      <UniqueIdentifier>{2a0924a5-9bd9-4c86-a149-0df09dcb5548}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="sharding">
-      <UniqueIdentifier>{03b0d798-b13d-48f4-930d-ca827e2a3f00}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="replSets">
-      <UniqueIdentifier>{3b73f786-d352-446f-a5f5-df49384baf7a}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="replSets\testing">
-      <UniqueIdentifier>{4a1ea357-1077-4ad1-85b4-db48a6e1eb46}</UniqueIdentifier>
-    </Filter>
+    <ResourceCompile Include="db.rc" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="repl\notes.txt">
-      <Filter>replSets</Filter>
-    </None>
-    <None Include="..\util\mongoutils\README">
-      <Filter>util\mongoutils</Filter>
-    </None>
-    <None Include="..\SConstruct">
-      <Filter>db</Filter>
-    </None>
-    <None Include="mongo.ico">
-      <Filter>Resource Files</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replset_remove_node.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replset1.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replset2.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replset3.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replset4.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replset5.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replsetadd.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replsetarb1.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replsetarb2.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replsetprio1.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replsetrestart1.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\replsetrestart2.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\rollback.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\rollback2.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\sync1.js">
-      <Filter>replSets\testing</Filter>
-    </None>
-    <None Include="..\jstests\replsets\twosets.js">
-      <Filter>replSets\testing</Filter>
-    </None>
+    <None Include="..\jstests\dur\basic1.sh" />
+    <None Include="..\jstests\dur\dur1.js" />
+    <None Include="..\jstests\replsets\replset1.js" />
+    <None Include="..\jstests\replsets\replset2.js" />
+    <None Include="..\jstests\replsets\replset3.js" />
+    <None Include="..\jstests\replsets\replset4.js" />
+    <None Include="..\jstests\replsets\replset5.js" />
+    <None Include="..\jstests\replsets\replsetadd.js" />
+    <None Include="..\jstests\replsets\replsetarb1.js" />
+    <None Include="..\jstests\replsets\replsetarb2.js" />
+    <None Include="..\jstests\replsets\replsetprio1.js" />
+    <None Include="..\jstests\replsets\replsetrestart1.js" />
+    <None Include="..\jstests\replsets\replsetrestart2.js" />
+    <None Include="..\jstests\replsets\replset_remove_node.js" />
+    <None Include="..\jstests\replsets\rollback.js" />
+    <None Include="..\jstests\replsets\rollback2.js" />
+    <None Include="..\jstests\replsets\sync1.js" />
+    <None Include="..\jstests\replsets\twosets.js" />
+    <None Include="..\SConstruct" />
+    <None Include="..\util\mongoutils\README" />
+    <None Include="mongo.ico" />
+    <None Include="repl\notes.txt" />
   </ItemGroup>
   <ItemGroup>
-    <Library Include="..\..\js\js64r.lib">
-      <Filter>libs</Filter>
-    </Library>
-    <Library Include="..\..\js\js32d.lib">
-      <Filter>libs</Filter>
-    </Library>
-    <Library Include="..\..\js\js32r.lib">
-      <Filter>libs</Filter>
-    </Library>
-    <Library Include="..\..\js\js64d.lib">
-      <Filter>libs</Filter>
-    </Library>
-  </ItemGroup>
-  <ItemGroup>
-    <ResourceCompile Include="db.rc">
-      <Filter>Resource Files</Filter>
-    </ResourceCompile>
+    <Library Include="..\..\js\js32d.lib" />
+    <Library Include="..\..\js\js32r.lib" />
+    <Library Include="..\..\js\js64d.lib" />
+    <Library Include="..\..\js\js64r.lib" />
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/db/db_10.sln b/db/db_10.sln
old mode 100644
new mode 100755
index d68d897..f74ac3d
--- a/db/db_10.sln
+++ b/db/db_10.sln
@@ -8,16 +8,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "examples", "examples", "{40
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{2B262D59-9DC7-4BF1-A431-1BD4966899A5}"
 	ProjectSection(SolutionItems) = preProject
-		..\tools\bridge.cpp = ..\tools\bridge.cpp
-		..\tools\bsondump.cpp = ..\tools\bsondump.cpp
-		..\tools\dump.cpp = ..\tools\dump.cpp
 		..\tools\export.cpp = ..\tools\export.cpp
-		..\tools\import.cpp = ..\tools\import.cpp
-		..\tools\restore.cpp = ..\tools\restore.cpp
 		..\tools\sniffer.cpp = ..\tools\sniffer.cpp
-		..\tools\stat.cpp = ..\tools\stat.cpp
-		..\tools\tool.cpp = ..\tools\tool.cpp
-		..\tools\tool.h = ..\tools\tool.h
 	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "unix files", "unix files", "{2F760952-C71B-4865-998F-AABAE96D1373}"
diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp
index 7bd7203..8974bd3 100644
--- a/db/dbcommands.cpp
+++ b/db/dbcommands.cpp
@@ -40,11 +40,13 @@
 #include "stats/counters.h"
 #include "background.h"
 #include "../util/version.h"
+#include "../s/d_writeback.h"
+#include "dur_stats.h"
 
 namespace mongo {
 
     extern int otherTraceLevel;
-    void flushOpLog( stringstream &ss );
+    void flushDiagLog();
 
     /* reset any errors so that getlasterror comes back clean.
 
@@ -54,7 +56,7 @@ namespace mongo {
     */
     class CmdResetError : public Command {
     public:
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         virtual bool requiresAuth() { return false; }
         virtual bool logTheOp() {
             return false;
@@ -74,8 +76,8 @@ namespace mongo {
         }
     } cmdResetError;
 
-    /* set by replica sets if specified in the configuration. 
-       a pointer is used to avoid any possible locking issues with lockless reading (see below locktype() is NONE 
+    /* set by replica sets if specified in the configuration.
+       a pointer is used to avoid any possible locking issues with lockless reading (see below locktype() is NONE
        and would like to keep that)
        (for now, it simply orphans any old copy as config changes should be extremely rare).
        note: once non-null, never goes to null again.
@@ -84,33 +86,38 @@ namespace mongo {
 
     class CmdGetLastError : public Command {
     public:
-        virtual LockType locktype() const { return NONE; } 
-        virtual bool requiresAuth() { return false; }
-        virtual bool logTheOp() {
-            return false;
-        }
-        virtual bool slaveOk() const {
-            return true;
-        }
+        CmdGetLastError() : Command("getLastError", false, "getlasterror") { }
+        virtual LockType locktype() const { return NONE;  }
+        virtual bool requiresAuth()       { return false; }
+        virtual bool logTheOp()           { return false; }
+        virtual bool slaveOk() const      { return true;  }
         virtual void help( stringstream& help ) const {
-            help << "return error status of the last operation on this connection";
+            help << "return error status of the last operation on this connection\n"
+                 << "options:\n"
+                 << "  fsync - fsync before returning, or wait for journal commit if running with --dur\n"
+                 << "  w - await replication to w servers (including self) before returning\n"
+                 << "  wtimeout - timeout for w in milliseconds";
         }
-        CmdGetLastError() : Command("getLastError", false, "getlasterror") {}
-        bool run(const string& dbnamne, BSONObj& _cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+        bool run(const string& dbname, BSONObj& _cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             LastError *le = lastError.disableForCommand();
+
+            bool err = false;
+
             if ( le->nPrev != 1 )
-                LastError::noError.appendSelf( result );
+                err = LastError::noError.appendSelf( result , false );
             else
-                le->appendSelf( result );
-            
+                err = le->appendSelf( result , false );
+
             Client& c = cc();
             c.appendLastOp( result );
 
+            result.appendNumber( "connectionId" , c.getConnectionId() );
+
             BSONObj cmdObj = _cmdObj;
-            { 
+            {
                 BSONObj::iterator i(_cmdObj);
                 i.next();
-                if( !i.more() ) { 
+                if( !i.more() ) {
                     /* empty, use default */
                     BSONObj *def = getLastErrorDefault;
                     if( def )
@@ -118,13 +125,27 @@ namespace mongo {
                 }
             }
 
-            if ( cmdObj["fsync"].trueValue() ){
-                log() << "fsync from getlasterror" << endl;
-                result.append( "fsyncFiles" , MemoryMappedFile::flushAll( true ) );
+            if ( cmdObj["fsync"].trueValue() ) {
+                Timer t;
+                if( !getDur().awaitCommit() ) {
+                    // if get here, not running with --dur
+                    log() << "fsync from getlasterror" << endl;
+                    result.append( "fsyncFiles" , MemoryMappedFile::flushAll( true ) );
+                }
+                else {
+                    // this perhaps is temp.  how long we wait for the group commit to occur.
+                    result.append( "waited", t.millis() );
+                }
             }
-            
+
+            if ( err ) {
+                // doesn't make sense to wait for replication
+                // if there was an error
+                return true;
+            }
+
             BSONElement e = cmdObj["w"];
-            if ( e.isNumber() ){
+            if ( e.isNumber() ) {
                 int timeout = cmdObj["wtimeout"].numberInt();
                 Timer t;
 
@@ -132,15 +153,43 @@ namespace mongo {
 
                 long long passes = 0;
                 char buf[32];
-                while ( 1 ){
-                    if ( opReplicatedEnough( c.getLastOp() , w ) )
-                        break;
+                while ( 1 ) {
+                    OpTime op(c.getLastOp());
                     
-                    if ( timeout > 0 && t.millis() >= timeout ){
+                    if ( op.isNull() ) {
+                        if ( anyReplEnabled() ) {
+                            result.append( "wnote" , "no write has been done on this connection" );
+                        }
+                        else if ( w <= 1 ) {
+                            // don't do anything
+                            // w=1 and no repl, so this is fine
+                        }
+                        else {
+                            // w=2 and no repl
+                            result.append( "wnote" , "no replication has been enabled, so w=2+ won't work" );
+                            result.append( "err", "norepl" );
+                            return true; 
+                        }
+                        break;
+                    }
+
+                    // check this first for w=0 or w=1
+                    if ( opReplicatedEnough( op, w ) )
+                        break;
+
+                    // if replication isn't enabled (e.g., config servers)
+                    if ( ! anyReplEnabled() ) {
+                        result.append( "err", "norepl" );
+                        return true;
+                    }
+
+
+                    if ( timeout > 0 && t.millis() >= timeout ) {
                         result.append( "wtimeout" , true );
                         errmsg = "timed out waiting for slaves";
                         result.append( "waited" , t.millis() );
-                        return false;
+                        result.append( "err" , "timeout" );
+                        return true;
                     }
 
                     assert( sprintf( buf , "w block pass: %lld" , ++passes ) < 30 );
@@ -150,14 +199,15 @@ namespace mongo {
                 }
                 result.appendNumber( "wtime" , t.millis() );
             }
-            
+
+            result.appendNull( "err" );
             return true;
         }
     } cmdGetLastError;
 
     class CmdGetPrevError : public Command {
     public:
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         virtual bool requiresAuth() { return false; }
         virtual bool logTheOp() {
             return false;
@@ -169,7 +219,7 @@ namespace mongo {
             return true;
         }
         CmdGetPrevError() : Command("getPrevError", false, "getpreverror") {}
-        bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             LastError *le = lastError.disableForCommand();
             le->appendSelf( result );
             if ( le->valid )
@@ -191,16 +241,16 @@ namespace mongo {
         virtual bool slaveOk() const {
             return false;
         }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         CmdDropDatabase() : Command("dropDatabase") {}
-        bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             BSONElement e = cmdObj.firstElement();
-            log() << "dropDatabase " << dbnamne << endl;
+            log() << "dropDatabase " << dbname << endl;
             int p = (int) e.number();
             if ( p != 1 )
                 return false;
-            dropDatabase(dbnamne);
-            result.append( "dropped" , dbnamne );
+            dropDatabase(dbname);
+            result.append( "dropped" , dbname );
             return true;
         }
     } cmdDropDatabase;
@@ -216,7 +266,7 @@ namespace mongo {
         virtual void help( stringstream& help ) const {
             help << "repair database.  also compacts. note: slow.";
         }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         CmdRepairDatabase() : Command("repairDatabase") {}
         bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             BSONElement e = cmdObj.firstElement();
@@ -231,7 +281,7 @@ namespace mongo {
             return repairDatabase( dbname, errmsg, preserveClonedFilesOnFailure, backupOriginalFiles );
         }
     } cmdRepairDatabase;
-    
+
     /* set db profiling level
        todo: how do we handle profiling information put in the db with replication?
              sensibly or not?
@@ -245,9 +295,10 @@ namespace mongo {
             help << "enable or disable performance profiling\n";
             help << "{ profile : <n> }\n";
             help << "0=off 1=log slow ops 2=log all\n";
+            help << "-1 to get current values\n";
             help << "http://www.mongodb.org/display/DOCS/Database+Profiler";
         }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         CmdProfile() : Command("profile") {}
         bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             BSONElement e = cmdObj.firstElement();
@@ -256,7 +307,7 @@ namespace mongo {
 
             int p = (int) e.number();
             bool ok = false;
-            
+
             if ( p == -1 )
                 ok = true;
             else if ( p >= 0 && p <= 2 ) {
@@ -266,7 +317,7 @@ namespace mongo {
             BSONElement slow = cmdObj["slowms"];
             if ( slow.isNumber() )
                 cmdLine.slowMS = slow.numberInt();
-            
+
             return ok;
         }
     } cmdProfile;
@@ -279,8 +330,8 @@ namespace mongo {
         CmdServerStatus() : Command("serverStatus", true) {
             started = time(0);
         }
-        
-        virtual LockType locktype() const { return NONE; } 
+
+        virtual LockType locktype() const { return NONE; }
 
         virtual void help( stringstream& help ) const {
             help << "returns lots of administrative server statistics";
@@ -291,9 +342,11 @@ namespace mongo {
             BSONObjBuilder timeBuilder(128);
 
 
-			bool authed = cc().getAuthenticationInfo()->isAuthorizedReads("admin");
+            bool authed = cc().getAuthenticationInfo()->isAuthorizedReads("admin");
 
+            result.append( "host" , prettyHostName() );
             result.append("version", versionString);
+            result.append("process","mongod");
             result.append("uptime",(double) (time(0)-started));
             result.append("uptimeEstimate",(double) (start/1000));
             result.appendDate( "localTime" , jsTime() );
@@ -309,27 +362,41 @@ namespace mongo {
                 t.append("totalTime", tt);
                 t.append("lockTime", tl);
                 t.append("ratio", (tt ? tl/tt : 0));
-                
-                BSONObjBuilder ttt( t.subobjStart( "currentQueue" ) );
-                int w=0, r=0;
-                Client::recommendedYieldMicros( &w , &r );
-                ttt.append( "total" , w + r );
-                ttt.append( "readers" , r );
-                ttt.append( "writers" , w );
-                ttt.done();
+
+                {
+                    BSONObjBuilder ttt( t.subobjStart( "currentQueue" ) );
+                    int w=0, r=0;
+                    Client::recommendedYieldMicros( &w , &r );
+                    ttt.append( "total" , w + r );
+                    ttt.append( "readers" , r );
+                    ttt.append( "writers" , w );
+                    ttt.done();
+                }
+
+                {
+                    BSONObjBuilder ttt( t.subobjStart( "activeClients" ) );
+                    int w=0, r=0;
+                    Client::getActiveClientCount( w , r );
+                    ttt.append( "total" , w + r );
+                    ttt.append( "readers" , r );
+                    ttt.append( "writers" , w );
+                    ttt.done();
+                }
+
+
 
                 result.append( "globalLock" , t.obj() );
             }
             timeBuilder.appendNumber( "after basic" , Listener::getElapsedTimeMillis() - start );
 
-            if ( authed ){
-                
+            {
+
                 BSONObjBuilder t( result.subobjStart( "mem" ) );
-                
+
                 t.append("bits",  ( sizeof(int*) == 4 ? 32 : 64 ) );
 
                 ProcessInfo p;
-                if ( p.supported() ){
+                if ( p.supported() ) {
                     t.appendNumber( "resident" , p.getResidentSize() );
                     t.appendNumber( "virtual" , p.getVirtualMemorySize() );
                     t.appendBool( "supported" , true );
@@ -338,14 +405,16 @@ namespace mongo {
                     result.append( "note" , "not all mem info support on this platform" );
                     t.appendBool( "supported" , false );
                 }
-                    
+
+                timeBuilder.appendNumber( "middle of mem" , Listener::getElapsedTimeMillis() - start );
+
                 t.appendNumber( "mapped" , MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ) );
 
                 t.done();
-                    
+
             }
-            timeBuilder.appendNumber( "after is authed" , Listener::getElapsedTimeMillis() - start );
-            
+            timeBuilder.appendNumber( "after mem" , Listener::getElapsedTimeMillis() - start );
+
             {
                 BSONObjBuilder bb( result.subobjStart( "connections" ) );
                 bb.append( "current" , connTicketHolder.used() );
@@ -353,15 +422,15 @@ namespace mongo {
                 bb.done();
             }
             timeBuilder.appendNumber( "after connections" , Listener::getElapsedTimeMillis() - start );
-            
-            if ( authed ){
+
+            {
                 BSONObjBuilder bb( result.subobjStart( "extra_info" ) );
                 bb.append("note", "fields vary by platform");
                 ProcessInfo p;
                 p.getExtraInfo(bb);
                 bb.done();
                 timeBuilder.appendNumber( "after extra info" , Listener::getElapsedTimeMillis() - start );
-            
+
             }
 
             {
@@ -369,31 +438,43 @@ namespace mongo {
                 globalIndexCounters.append( bb );
                 bb.done();
             }
-            
+
             {
                 BSONObjBuilder bb( result.subobjStart( "backgroundFlushing" ) );
                 globalFlushCounters.append( bb );
                 bb.done();
             }
-            
+
             {
                 BSONObjBuilder bb( result.subobjStart( "cursors" ) );
                 ClientCursor::appendStats( bb );
                 bb.done();
             }
-            
-            timeBuilder.appendNumber( "after counters" , Listener::getElapsedTimeMillis() - start );            
 
-            if ( anyReplEnabled() ){
+            {
+                BSONObjBuilder bb( result.subobjStart( "network" ) );
+                networkCounter.append( bb );
+                bb.done();
+            }
+
+
+            timeBuilder.appendNumber( "after counters" , Listener::getElapsedTimeMillis() - start );
+
+            if ( anyReplEnabled() ) {
                 BSONObjBuilder bb( result.subobjStart( "repl" ) );
                 appendReplicationInfo( bb , authed , cmdObj["repl"].numberInt() );
                 bb.done();
+
+                if ( ! _isMaster() ) {
+                    result.append( "opcountersRepl" , replOpCounters.getObj() );
+                }
+
             }
 
-            timeBuilder.appendNumber( "after repl" , Listener::getElapsedTimeMillis() - start );            
-            
+            timeBuilder.appendNumber( "after repl" , Listener::getElapsedTimeMillis() - start );
+
             result.append( "opcounters" , globalOpCounters.getObj() );
-            
+
             {
                 BSONObjBuilder asserts( result.subobjStart( "asserts" ) );
                 asserts.append( "regular" , assertionCount.regular );
@@ -404,12 +485,18 @@ namespace mongo {
                 asserts.done();
             }
 
-            timeBuilder.appendNumber( "after asserts" , Listener::getElapsedTimeMillis() - start );            
+            timeBuilder.appendNumber( "after asserts" , Listener::getElapsedTimeMillis() - start );
+
+            result.append( "writeBacksQueued" , ! writeBackManager.queuesEmpty() );
+
+            if( cmdLine.dur ) {
+                result.append("dur", dur::stats.asObj());
+            }
 
             if ( ! authed )
                 result.append( "note" , "run against admin for more info" );
-            
-            if ( Listener::getElapsedTimeMillis() - start > 1000 ){
+
+            if ( Listener::getElapsedTimeMillis() - start > 1000 ) {
                 BSONObj t = timeBuilder.obj();
                 log() << "serverStatus was very slow: " << t << endl;
                 result.append( "timing" , t );
@@ -426,7 +513,7 @@ namespace mongo {
             return true;
         }
         virtual void help( stringstream& help ) const { help << "internal"; }
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         CmdGetOpTime() : Command("getoptime") { }
         bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             writelock l( "" );
@@ -456,12 +543,10 @@ namespace mongo {
             return true;
         }
         void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Monitoring+and+Diagnostics#MonitoringandDiagnostics-DatabaseRecord%2FReplay"; }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             int was = _diaglog.setLevel( cmdObj.firstElement().numberInt() );
-            stringstream ss;
-            flushOpLog( ss );
-            out() << ss.str() << endl;
+            flushDiagLog();
             if ( !cmdLine.quiet )
                 tlog() << "CMD: diagLogging set to " << _diaglog.level << " from: " << was << endl;
             result.append( "was" , was );
@@ -471,7 +556,7 @@ namespace mongo {
 
     /* remove bit from a bit array - actually remove its slot, not a clear
        note: this function does not work with x == 63 -- that is ok
-             but keep in mind in the future if max indexes were extended to 
+             but keep in mind in the future if max indexes were extended to
              exactly 64 it would be a problem
     */
     unsigned long long removeBit(unsigned long long b, int x) {
@@ -499,6 +584,7 @@ namespace mongo {
 
         BackgroundOperation::assertNoBgOpInProgForNs(ns);
 
+        d = d->writingWithExtra();
         d->aboutToDeleteAnIndex();
 
         /* there may be pointers pointing at keys in the btree(s).  kill them. */
@@ -513,7 +599,8 @@ namespace mongo {
                 for ( int i = 0; i < d->nIndexes; i++ ) {
                     if ( !mayDeleteIdIndex && d->idx(i).isIdIndex() ) {
                         idIndex = &d->idx(i);
-                    } else {
+                    }
+                    else {
                         d->idx(i).kill_idx();
                     }
                 }
@@ -526,9 +613,9 @@ namespace mongo {
             /* assuming here that id index is not multikey: */
             d->multiKeyIndexBits = 0;
             assureSysIndexesEmptied(ns, idIndex);
-            anObjBuilder.append("msg", mayDeleteIdIndex ? 
-                "indexes dropped for collection" : 
-                "non-_id indexes dropped for collection");
+            anObjBuilder.append("msg", mayDeleteIdIndex ?
+                                "indexes dropped for collection" :
+                                "non-_id indexes dropped for collection");
         }
         else {
             // delete just one index
@@ -551,9 +638,10 @@ namespace mongo {
                 d->nIndexes--;
                 for ( int i = x; i < d->nIndexes; i++ )
                     d->idx(i) = d->idx(i+1);
-            } else {
+            }
+            else {
                 int n = removeFromSysIndexes(ns, name); // just in case an orphaned listing there - i.e. should have been repaired but wasn't
-                if( n ) { 
+                if( n ) {
                     log() << "info: removeFromSysIndexes cleaned up " << n << " entries" << endl;
                 }
                 log() << "dropIndexes: " << name << " not found" << endl;
@@ -578,7 +666,7 @@ namespace mongo {
             return false;
         }
         virtual void help( stringstream& help ) const { help << "drop a collection\n{drop : <collectionName>}"; }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             string nsToDrop = dbname + '.' + cmdObj.firstElement().valuestr();
             NamespaceDetails *d = nsdetails(nsToDrop.c_str());
@@ -597,7 +685,7 @@ namespace mongo {
     /* select count(*) */
     class CmdCount : public Command {
     public:
-        virtual LockType locktype() const { return READ; } 
+        virtual LockType locktype() const { return READ; }
         CmdCount() : Command("count") { }
         virtual bool logTheOp() {
             return false;
@@ -619,7 +707,7 @@ namespace mongo {
             long long n = runCount(ns.c_str(), cmdObj, err);
             long long nn = n;
             bool ok = true;
-            if ( n == -1 ){
+            if ( n == -1 ) {
                 nn = 0;
                 result.appendBool( "missing" , true );
             }
@@ -647,7 +735,7 @@ namespace mongo {
         virtual bool adminOnly() const {
             return false;
         }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         virtual void help( stringstream& help ) const {
             help << "create a collection";
         }
@@ -670,7 +758,7 @@ namespace mongo {
         virtual bool slaveOk() const {
             return false;
         }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         virtual void help( stringstream& help ) const {
             help << "drop indexes for a collection";
         }
@@ -686,9 +774,9 @@ namespace mongo {
                 if ( f.type() == String ) {
                     return dropIndexes( d, toDeleteNs.c_str(), f.valuestr(), errmsg, anObjBuilder, false );
                 }
-                else if ( f.type() == Object ){
+                else if ( f.type() == Object ) {
                     int idxId = d->findIndexByKeyPattern( f.embeddedObject() );
-                    if ( idxId < 0 ){
+                    if ( idxId < 0 ) {
                         errmsg = "can't find index with key:";
                         errmsg += f.embeddedObject().toString();
                         return false;
@@ -715,7 +803,7 @@ namespace mongo {
     public:
         virtual bool logTheOp() { return false; } // only reindexes on the one node
         virtual bool slaveOk() const { return true; }    // can reindex on a secondary
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         virtual void help( stringstream& help ) const {
             help << "re-index a collection";
         }
@@ -729,7 +817,7 @@ namespace mongo {
             tlog() << "CMD: reIndex " << toDeleteNs << endl;
             BackgroundOperation::assertNoBgOpInProgForNs(toDeleteNs.c_str());
 
-            if ( ! d ){
+            if ( ! d ) {
                 errmsg = "ns not found";
                 return false;
             }
@@ -737,7 +825,7 @@ namespace mongo {
             list<BSONObj> all;
             auto_ptr<DBClientCursor> i = db.getIndexes( toDeleteNs );
             BSONObjBuilder b;
-            while ( i->more() ){
+            while ( i->more() ) {
                 BSONObj o = i->next().getOwned();
                 b.append( BSONObjBuilder::numStr( all.size() ) , o );
                 all.push_back( o );
@@ -745,12 +833,12 @@ namespace mongo {
 
 
             bool ok = dropIndexes( d, toDeleteNs.c_str(), "*" , errmsg, result, true );
-            if ( ! ok ){
+            if ( ! ok ) {
                 errmsg = "dropIndexes failed";
                 return false;
             }
 
-            for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); i++ ){
+            for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); i++ ) {
                 BSONObj o = *i;
                 theDataFileMgr.insertWithObjMod( Namespace( toDeleteNs.c_str() ).getSisterNS( "system.indexes" ).c_str() , o , true );
             }
@@ -773,9 +861,9 @@ namespace mongo {
         virtual bool adminOnly() const {
             return true;
         }
-        virtual LockType locktype() const { return READ; } 
+        virtual LockType locktype() const { return READ; }
         virtual void help( stringstream& help ) const { help << "list databases on this server"; }
-        CmdListDatabases() : Command("listDatabases") {}
+        CmdListDatabases() : Command("listDatabases" , true ) {}
         bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
             vector< string > dbNames;
             getDatabaseNames( dbNames );
@@ -795,11 +883,11 @@ namespace mongo {
 
                 seen.insert( i->c_str() );
             }
-            
+
             // TODO: erh 1/1/2010 I think this is broken where path != dbpath ??
             set<string> allShortNames;
             dbHolder.getAllShortNames( allShortNames );
-            for ( set<string>::iterator i = allShortNames.begin(); i != allShortNames.end(); i++ ){
+            for ( set<string>::iterator i = allShortNames.begin(); i != allShortNames.end(); i++ ) {
                 string name = *i;
 
                 if ( seen.count( name ) )
@@ -819,33 +907,45 @@ namespace mongo {
         }
     } cmdListDatabases;
 
-    /* note an access to a database right after this will open it back up - so this is mainly 
-       for diagnostic purposes. 
+    /* note an access to a database right after this will open it back up - so this is mainly
+       for diagnostic purposes.
        */
     class CmdCloseAllDatabases : public Command {
     public:
         virtual void help( stringstream& help ) const { help << "Close all database files.\nA new request will cause an immediate reopening; thus, this is mostly for testing purposes."; }
         virtual bool adminOnly() const { return true; }
         virtual bool slaveOk() const { return false; }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
 
         CmdCloseAllDatabases() : Command( "closeAllDatabases" ) {}
         bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
-            return dbHolder.closeAll( dbpath , result, false );
+            bool ok;
+            try {
+                ok = dbHolder.closeAll( dbpath , result, false );
+            }
+            catch(DBException&) { 
+                throw;
+            }
+            catch(...) { 
+                log() << "ERROR uncaught exception in command closeAllDatabases" << endl;
+                errmsg = "unexpected uncaught exception";
+                return false;
+            }
+            return ok;
         }
     } cmdCloseAllDatabases;
 
     class CmdFileMD5 : public Command {
     public:
-        CmdFileMD5() : Command( "filemd5" ){}
+        CmdFileMD5() : Command( "filemd5" ) {}
         virtual bool slaveOk() const {
             return true;
         }
         virtual void help( stringstream& help ) const {
             help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }";
         }
-        virtual LockType locktype() const { return READ; } 
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        virtual LockType locktype() const { return READ; }
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             string ns = dbname;
             ns += ".";
             {
@@ -867,8 +967,8 @@ namespace mongo {
             scoped_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns.c_str()));
 
             int n = 0;
-            while ( cursor->ok() ){
-                if ( ! cursor->matcher()->matchesCurrent( cursor.get() ) ){
+            while ( cursor->ok() ) {
+                if ( ! cursor->matcher()->matchesCurrent( cursor.get() ) ) {
                     log() << "**** NOT MATCHING ****" << endl;
                     PRINT(cursor->current());
                     cursor->advance();
@@ -884,7 +984,7 @@ namespace mongo {
                     BSONElement ne = obj["n"];
                     assert(ne.isNumber());
                     int myn = ne.numberInt();
-                    if ( n != myn ){
+                    if ( n != myn ) {
                         log() << "should have chunk: " << n << " have:" << myn << endl;
 
                         DBDirectClient client;
@@ -902,12 +1002,13 @@ namespace mongo {
                     md5_append( &st , (const md5_byte_t*)(data) , len );
 
                     n++;
-                } catch (...) {
+                }
+                catch (...) {
                     yield.relock(); // needed before yield goes out of scope
                     throw;
                 }
 
-                if ( ! yield.stillOk() ){
+                if ( ! yield.stillOk() ) {
                     uasserted(13281, "File deleted during filemd5 command");
                 }
             }
@@ -932,15 +1033,15 @@ namespace mongo {
     public:
         CmdDatasize() : Command( "dataSize", false, "datasize" ) {}
         virtual bool slaveOk() const { return true; }
-        virtual LockType locktype() const { return READ; } 
+        virtual LockType locktype() const { return READ; }
         virtual void help( stringstream &help ) const {
             help <<
-                "determine data size for a set of data in a certain range"
-                "\nexample: { datasize:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }"
-                "\nkeyPattern, min, and max parameters are optional."
-                "\nnote: This command may take a while to run";
+                 "determine data size for a set of data in a certain range"
+                 "\nexample: { dataSize:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }"
+                 "\nkeyPattern, min, and max parameters are optional."
+                 "\nnote: This command may take a while to run";
         }
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             Timer timer;
 
             string ns = jsobj.firstElement().String();
@@ -951,39 +1052,39 @@ namespace mongo {
 
             Client::Context ctx( ns );
             NamespaceDetails *d = nsdetails(ns.c_str());
-            
-            if ( ! d || d->nrecords == 0 ){
+
+            if ( ! d || d->stats.nrecords == 0 ) {
                 result.appendNumber( "size" , 0 );
                 result.appendNumber( "numObjects" , 0 );
                 result.append( "millis" , timer.millis() );
                 return true;
             }
-            
+
             result.appendBool( "estimate" , estimate );
 
             shared_ptr<Cursor> c;
             if ( min.isEmpty() && max.isEmpty() ) {
-                if ( estimate ){
-                    result.appendNumber( "size" , d->datasize );
-                    result.appendNumber( "numObjects" , d->nrecords );
+                if ( estimate ) {
+                    result.appendNumber( "size" , d->stats.datasize );
+                    result.appendNumber( "numObjects" , d->stats.nrecords );
                     result.append( "millis" , timer.millis() );
                     return 1;
                 }
                 c = theDataFileMgr.findAll( ns.c_str() );
-            } 
+            }
             else if ( min.isEmpty() || max.isEmpty() ) {
                 errmsg = "only one of min or max specified";
                 return false;
-            } 
+            }
             else {
                 IndexDetails *idx = cmdIndexDetailsForRange( ns.c_str(), errmsg, min, max, keyPattern );
                 if ( idx == 0 )
                     return false;
-                
+
                 c.reset( new BtreeCursor( d, d->idxNo(*idx), *idx, min, max, false, 1 ) );
             }
-            
-            long long avgObjSize = d->datasize / d->nrecords;
+
+            long long avgObjSize = d->stats.datasize / d->stats.nrecords;
 
             long long maxSize = jsobj["maxSize"].numberLong();
             long long maxObjects = jsobj["maxObjects"].numberLong();
@@ -996,11 +1097,11 @@ namespace mongo {
                     size += avgObjSize;
                 else
                     size += c->currLoc().rec()->netLength();
-                
+
                 numObjects++;
-                
-                if ( ( maxSize && size > maxSize ) || 
-                     ( maxObjects && numObjects > maxObjects ) ){
+
+                if ( ( maxSize && size > maxSize ) ||
+                        ( maxObjects && numObjects > maxObjects ) ) {
                     result.appendBool( "maxReached" , true );
                     break;
                 }
@@ -1010,7 +1111,7 @@ namespace mongo {
 
             ostringstream os;
             os <<  "Finding size for ns: " << ns;
-            if ( ! min.isEmpty() ){
+            if ( ! min.isEmpty() ) {
                 os << " between " << min << " and " << max;
             }
             logIfSlow( timer , os.str() );
@@ -1023,27 +1124,27 @@ namespace mongo {
     } cmdDatasize;
 
     namespace {
-        long long getIndexSizeForCollection(string db, string ns, BSONObjBuilder* details=NULL, int scale = 1 ){
+        long long getIndexSizeForCollection(string db, string ns, BSONObjBuilder* details=NULL, int scale = 1 ) {
             dbMutex.assertAtLeastReadLocked();
 
             NamespaceDetails * nsd = nsdetails( ns.c_str() );
             if ( ! nsd )
                 return 0;
-            
-            long long totalSize = 0;            
+
+            long long totalSize = 0;
 
             NamespaceDetails::IndexIterator ii = nsd->ii();
-            while ( ii.more() ){
+            while ( ii.more() ) {
                 IndexDetails& d = ii.next();
                 string collNS = d.indexNamespace();
                 NamespaceDetails * mine = nsdetails( collNS.c_str() );
-                if ( ! mine ){
+                if ( ! mine ) {
                     log() << "error: have index ["  << collNS << "] but no NamespaceDetails" << endl;
                     continue;
                 }
-                totalSize += mine->datasize;
+                totalSize += mine->stats.datasize;
                 if ( details )
-                    details->appendNumber( d.indexName() , mine->datasize / scale );
+                    details->appendNumber( d.indexName() , mine->stats.datasize / scale );
             }
             return totalSize;
         }
@@ -1053,42 +1154,48 @@ namespace mongo {
     public:
         CollectionStats() : Command( "collStats", false, "collstats" ) {}
         virtual bool slaveOk() const { return true; }
-        virtual LockType locktype() const { return READ; } 
+        virtual LockType locktype() const { return READ; }
         virtual void help( stringstream &help ) const {
             help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024";
         }
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             string ns = dbname + "." + jsobj.firstElement().valuestr();
             Client::Context cx( ns );
-            
+
             NamespaceDetails * nsd = nsdetails( ns.c_str() );
-            if ( ! nsd ){
+            if ( ! nsd ) {
                 errmsg = "ns not found";
                 return false;
             }
 
             result.append( "ns" , ns.c_str() );
-            
+
             int scale = 1;
-            if ( jsobj["scale"].isNumber() ){
+            if ( jsobj["scale"].isNumber() ) {
                 scale = jsobj["scale"].numberInt();
-                if ( scale <= 0 ){
+                if ( scale <= 0 ) {
                     errmsg = "scale has to be > 0";
                     return false;
                 }
-                    
+
             }
-            else if ( jsobj["scale"].trueValue() ){
+            else if ( jsobj["scale"].trueValue() ) {
                 errmsg = "scale has to be a number > 0";
                 return false;
             }
 
-            long long size = nsd->datasize / scale;
-            result.appendNumber( "count" , nsd->nrecords );
+            bool verbose = jsobj["verbose"].trueValue();
+
+            long long size = nsd->stats.datasize / scale;
+            result.appendNumber( "count" , nsd->stats.nrecords );
             result.appendNumber( "size" , size );
-            result.append      ( "avgObjSize" , double(size) / double(nsd->nrecords) );
+            if( nsd->stats.nrecords )
+                result.append      ( "avgObjSize" , double(size) / double(nsd->stats.nrecords) );
+
             int numExtents;
-            result.appendNumber( "storageSize" , nsd->storageSize( &numExtents ) / scale );
+            BSONArrayBuilder extents;
+
+            result.appendNumber( "storageSize" , nsd->storageSize( &numExtents , verbose ? &extents : 0  ) / scale );
             result.append( "numExtents" , numExtents );
             result.append( "nindexes" , nsd->nIndexes );
             result.append( "lastExtentSize" , nsd->lastExtentSize / scale );
@@ -1098,12 +1205,15 @@ namespace mongo {
             BSONObjBuilder indexSizes;
             result.appendNumber( "totalIndexSize" , getIndexSizeForCollection(dbname, ns, &indexSizes, scale) / scale );
             result.append("indexSizes", indexSizes.obj());
-            
-            if ( nsd->capped ){
+
+            if ( nsd->capped ) {
                 result.append( "capped" , nsd->capped );
                 result.append( "max" , nsd->max );
             }
 
+            if ( verbose )
+                result.appendArray( "extents" , extents.arr() );
+
             return true;
         }
     } cmdCollectionStatis;
@@ -1112,11 +1222,11 @@ namespace mongo {
     public:
         DBStats() : Command( "dbStats", false, "dbstats" ) {}
         virtual bool slaveOk() const { return true; }
-        virtual LockType locktype() const { return READ; } 
+        virtual LockType locktype() const { return READ; }
         virtual void help( stringstream &help ) const {
-            help << " example: { dbstats:1 } ";
+            help << " example: { dbStats:1 } ";
         }
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             list<string> collections;
             Database* d = cc().database();
             if ( d )
@@ -1130,19 +1240,19 @@ namespace mongo {
             long long indexes = 0;
             long long indexSize = 0;
 
-            for (list<string>::const_iterator it = collections.begin(); it != collections.end(); ++it){
+            for (list<string>::const_iterator it = collections.begin(); it != collections.end(); ++it) {
                 const string ns = *it;
 
                 NamespaceDetails * nsd = nsdetails( ns.c_str() );
-                if ( ! nsd ){
+                if ( ! nsd ) {
                     errmsg = "missing ns: ";
                     errmsg += ns;
                     return false;
                 }
 
                 ncollections += 1;
-                objects += nsd->nrecords;
-                size += nsd->datasize;
+                objects += nsd->stats.nrecords;
+                size += nsd->stats.datasize;
 
                 int temp;
                 storageSize += nsd->storageSize( &temp );
@@ -1151,10 +1261,11 @@ namespace mongo {
                 indexes += nsd->nIndexes;
                 indexSize += getIndexSizeForCollection(dbname, ns);
             }
-
+            
+            result.append      ( "db" , dbname );
             result.appendNumber( "collections" , ncollections );
             result.appendNumber( "objects" , objects );
-            result.append      ( "avgObjSize" , double(size) / double(objects) );
+            result.append      ( "avgObjSize" , objects == 0 ? 0 : double(size) / double(objects) );
             result.appendNumber( "dataSize" , size );
             result.appendNumber( "storageSize" , storageSize);
             result.appendNumber( "numExtents" , numExtents );
@@ -1162,7 +1273,7 @@ namespace mongo {
             result.appendNumber( "indexSize" , indexSize );
             result.appendNumber( "fileSize" , d->fileSize() );
 
-                return true;
+            return true;
         }
     } cmdDBStats;
 
@@ -1171,11 +1282,11 @@ namespace mongo {
     public:
         CmdCloneCollectionAsCapped() : Command( "cloneCollectionAsCapped" ) {}
         virtual bool slaveOk() const { return false; }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         virtual void help( stringstream &help ) const {
             help << "{ cloneCollectionAsCapped:<fromName>, toCollection:<toName>, size:<sizeInBytes> }";
         }
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             string from = jsobj.getStringField( "cloneCollectionAsCapped" );
             string to = jsobj.getStringField( "toCollection" );
             long long size = (long long)jsobj.getField( "size" ).number();
@@ -1189,7 +1300,7 @@ namespace mongo {
             string toNs = dbname + "." + to;
             NamespaceDetails *nsd = nsdetails( fromNs.c_str() );
             massert( 10301 ,  "source collection " + fromNs + " does not exist", nsd );
-            long long excessSize = nsd->datasize - size * 2; // datasize and extentSize can't be compared exactly, so add some padding to 'size'
+            long long excessSize = nsd->stats.datasize - size * 2; // datasize and extentSize can't be compared exactly, so add some padding to 'size'
             DiskLoc extent = nsd->firstExtent;
             for( ; excessSize > extent.ext()->length && extent != nsd->lastExtent; extent = extent.ext()->xnext ) {
                 excessSize -= extent.ext()->length;
@@ -1202,7 +1313,7 @@ namespace mongo {
             {
                 shared_ptr<Cursor> c = theDataFileMgr.findAll( fromNs.c_str(), startLoc );
                 ClientCursor *cc = new ClientCursor(0, c, fromNs.c_str());
-                id = cc->cursorid;
+                id = cc->cursorid();
             }
 
             DBDirectClient client;
@@ -1223,20 +1334,20 @@ namespace mongo {
         }
     } cmdCloneCollectionAsCapped;
 
-    /* jan2010: 
-       Converts the given collection to a capped collection w/ the specified size. 
-       This command is not highly used, and is not currently supported with sharded 
-       environments. 
+    /* jan2010:
+       Converts the given collection to a capped collection w/ the specified size.
+       This command is not highly used, and is not currently supported with sharded
+       environments.
        */
     class CmdConvertToCapped : public Command {
     public:
         CmdConvertToCapped() : Command( "convertToCapped" ) {}
         virtual bool slaveOk() const { return false; }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         virtual void help( stringstream &help ) const {
             help << "{ convertToCapped:<fromCollectionName>, size:<sizeInBytes> }";
         }
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             BackgroundOperation::assertNoBgOpInProgForDb(dbname.c_str());
 
             string from = jsobj.getStringField( "convertToCapped" );
@@ -1247,13 +1358,16 @@ namespace mongo {
                 return false;
             }
 
+            string shortTmpName = str::stream() << ".tmp.convertToCapped." << from;
+            string longTmpName = str::stream() << dbname << "." << shortTmpName;
+
             DBDirectClient client;
-            client.dropCollection( dbname + "." + from + ".$temp_convertToCapped" );
+            client.dropCollection( longTmpName );
 
             BSONObj info;
             if ( !client.runCommand( dbname ,
-                                    BSON( "cloneCollectionAsCapped" << from << "toCollection" << ( from + ".$temp_convertToCapped" ) << "size" << double( size ) ),
-                                    info ) ) {
+                                     BSON( "cloneCollectionAsCapped" << from << "toCollection" << shortTmpName << "size" << double( size ) ),
+                                     info ) ) {
                 errmsg = "cloneCollectionAsCapped failed: " + info.toString();
                 return false;
             }
@@ -1264,9 +1378,9 @@ namespace mongo {
             }
 
             if ( !client.runCommand( "admin",
-                                    BSON( "renameCollection" << ( dbname + "." + from + ".$temp_convertToCapped" ) 
-                                          << "to" << ( dbname + "." + from ) ),
-                                    info ) ) {
+                                     BSON( "renameCollection" << longTmpName <<
+                                           "to" << ( dbname + "." + from ) ),
+                                     info ) ) {
                 errmsg = "renameCollection failed: " + info.toString();
                 return false;
             }
@@ -1275,239 +1389,15 @@ namespace mongo {
         }
     } cmdConvertToCapped;
 
-    class GroupCommand : public Command {
-    public:
-        GroupCommand() : Command("group"){}
-        virtual LockType locktype() const { return READ; } 
-        virtual bool slaveOk() const { return true; }
-        virtual bool slaveOverrideOk() { return true; }
-        virtual void help( stringstream &help ) const {
-            help << "http://www.mongodb.org/display/DOCS/Aggregation";
-        }
-
-        BSONObj getKey( const BSONObj& obj , const BSONObj& keyPattern , ScriptingFunction func , double avgSize , Scope * s ){
-            if ( func ){
-                BSONObjBuilder b( obj.objsize() + 32 );
-                b.append( "0" , obj );
-                int res = s->invoke( func , b.obj() );
-                uassert( 10041 ,  (string)"invoke failed in $keyf: " + s->getError() , res == 0 );
-                int type = s->type("return");
-                uassert( 10042 ,  "return of $key has to be an object" , type == Object );
-                return s->getObject( "return" );
-            }
-            return obj.extractFields( keyPattern , true );
-        }
-
-        bool group( string realdbname , const string& ns , const BSONObj& query , 
-                    BSONObj keyPattern , string keyFunctionCode , string reduceCode , const char * reduceScope ,
-                    BSONObj initial , string finalize ,
-                    string& errmsg , BSONObjBuilder& result ){
-
-
-            auto_ptr<Scope> s = globalScriptEngine->getPooledScope( realdbname );
-            s->localConnect( realdbname.c_str() );
-
-            if ( reduceScope )
-                s->init( reduceScope );
-
-            s->setObject( "$initial" , initial , true );
-
-            s->exec( "$reduce = " + reduceCode , "reduce setup" , false , true , true , 100 );
-            s->exec( "$arr = [];" , "reduce setup 2" , false , true , true , 100 );
-            ScriptingFunction f = s->createFunction(
-                "function(){ "
-                "  if ( $arr[n] == null ){ "
-                "    next = {}; "
-                "    Object.extend( next , $key ); "
-                "    Object.extend( next , $initial , true ); "
-                "    $arr[n] = next; "
-                "    next = null; "
-                "  } "
-                "  $reduce( obj , $arr[n] ); "
-                "}" );
-
-            ScriptingFunction keyFunction = 0;
-            if ( keyFunctionCode.size() ){
-                keyFunction = s->createFunction( keyFunctionCode.c_str() );
-            }
-
-
-            double keysize = keyPattern.objsize() * 3;
-            double keynum = 1;
-
-            map<BSONObj,int,BSONObjCmp> map;
-            list<BSONObj> blah;
-
-            shared_ptr<Cursor> cursor = bestGuessCursor(ns.c_str() , query , BSONObj() );
-
-            while ( cursor->ok() ){
-                if ( cursor->matcher() && ! cursor->matcher()->matchesCurrent( cursor.get() ) ){
-                    cursor->advance();
-                    continue;
-                }
-
-                BSONObj obj = cursor->current();
-                cursor->advance();
-
-                BSONObj key = getKey( obj , keyPattern , keyFunction , keysize / keynum , s.get() );
-                keysize += key.objsize();
-                keynum++;
-
-                int& n = map[key];
-                if ( n == 0 ){
-                    n = map.size();
-                    s->setObject( "$key" , key , true );
-
-                    uassert( 10043 ,  "group() can't handle more than 10000 unique keys" , n <= 10000 );
-                }
-
-                s->setObject( "obj" , obj , true );
-                s->setNumber( "n" , n - 1 );
-                if ( s->invoke( f , BSONObj() , 0 , true ) ){
-                    throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() );
-                }
-            }
-
-            if (!finalize.empty()){
-                s->exec( "$finalize = " + finalize , "finalize define" , false , true , true , 100 );
-                ScriptingFunction g = s->createFunction(
-                    "function(){ "
-                    "  for(var i=0; i < $arr.length; i++){ "
-                    "  var ret = $finalize($arr[i]); "
-                    "  if (ret !== undefined) "
-                    "    $arr[i] = ret; "
-                    "  } "
-                    "}" );
-                s->invoke( g , BSONObj() , 0 , true );
-            }
-            
-            result.appendArray( "retval" , s->getObject( "$arr" ) );
-            result.append( "count" , keynum - 1 );
-            result.append( "keys" , (int)(map.size()) );
-            s->exec( "$arr = [];" , "reduce setup 2" , false , true , true , 100 );
-            s->gc();
-
-            return true;
-        }
-
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
-
-            /* db.$cmd.findOne( { group : <p> } ) */
-            const BSONObj& p = jsobj.firstElement().embeddedObjectUserCheck();
-
-            BSONObj q;
-            if ( p["cond"].type() == Object )
-                q = p["cond"].embeddedObject();
-            else if ( p["condition"].type() == Object )
-                q = p["condition"].embeddedObject();
-            else 
-                q = getQuery( p );
-
-            if ( p["ns"].type() != String ){
-                errmsg = "ns has to be set";
-                return false;
-            }
-            
-            string ns = dbname + "." + p["ns"].String();
-
-            BSONObj key;
-            string keyf;
-            if ( p["key"].type() == Object ){
-                key = p["key"].embeddedObjectUserCheck();
-                if ( ! p["$keyf"].eoo() ){
-                    errmsg = "can't have key and $keyf";
-                    return false;
-                }
-            }
-            else if ( p["$keyf"].type() ){
-                keyf = p["$keyf"]._asCode();
-            }
-            else {
-                // no key specified, will use entire object as key
-            }
-
-            BSONElement reduce = p["$reduce"];
-            if ( reduce.eoo() ){
-                errmsg = "$reduce has to be set";
-                return false;
-            }
-
-            BSONElement initial = p["initial"];
-            if ( initial.type() != Object ){
-                errmsg = "initial has to be an object";
-                return false;
-            }
-
-
-            string finalize;
-            if (p["finalize"].type())
-                finalize = p["finalize"]._asCode();
-
-            return group( dbname , ns , q ,
-                          key , keyf , reduce._asCode() , reduce.type() != CodeWScope ? 0 : reduce.codeWScopeScopeData() ,
-                          initial.embeddedObject() , finalize ,
-                          errmsg , result );
-        }
-
-    } cmdGroup;
-
-
-    class DistinctCommand : public Command {
-    public:
-        DistinctCommand() : Command("distinct"){}
-        virtual bool slaveOk() const { return true; }
-        virtual LockType locktype() const { return READ; } 
-        virtual void help( stringstream &help ) const {
-            help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }";
-        }
-
-        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
-            string ns = dbname + '.' + cmdObj.firstElement().valuestr();
-
-            string key = cmdObj["key"].valuestrsafe();
-            BSONObj keyPattern = BSON( key << 1 );
-
-            BSONObj query = getQuery( cmdObj );
-            
-            BSONElementSet values;
-            shared_ptr<Cursor> cursor = bestGuessCursor(ns.c_str() , query , BSONObj() );
-            scoped_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns));
-
-            while ( cursor->ok() ){
-                if ( !cursor->matcher() || cursor->matcher()->matchesCurrent( cursor.get() ) ){
-                    BSONObj o = cursor->current();
-                    o.getFieldsDotted( key, values );
-                }
-
-                cursor->advance();
-
-                if (!cc->yieldSometimes())
-                    break;
-            }
-
-            BSONArrayBuilder b( result.subarrayStart( "values" ) );
-            for ( BSONElementSet::iterator i = values.begin() ; i != values.end(); i++ ){
-                b.append( *i );
-            }
-            BSONObj arr = b.done();
-
-            uassert(10044,  "distinct too big, 4mb cap",
-                    (arr.objsize() + 1024) < (4 * 1024 * 1024));
-
-            return true;
-        }
-
-    } distinctCmd;
-
     /* Find and Modify an object returning either the old (default) or new value*/
     class CmdFindAndModify : public Command {
     public:
         virtual void help( stringstream &help ) const {
-            help << 
-                "{ findandmodify: \"collection\", query: {processed:false}, update: {$set: {processed:true}}, new: true}\n"
-                "{ findandmodify: \"collection\", query: {processed:false}, remove: true, sort: {priority:-1}}\n"
-                "Either update or remove is required, all other fields have default values.\n"
-                "Output is in the \"value\" field\n";
+            help <<
+                 "{ findAndModify: \"collection\", query: {processed:false}, update: {$set: {processed:true}}, new: true}\n"
+                 "{ findAndModify: \"collection\", query: {processed:false}, remove: true, sort: {priority:-1}}\n"
+                 "Either update or remove is required, all other fields have default values.\n"
+                 "Output is in the \"value\" field\n";
         }
 
         CmdFindAndModify() : Command("findAndModify", false, "findandmodify") { }
@@ -1517,7 +1407,7 @@ namespace mongo {
         virtual bool slaveOk() const {
             return false;
         }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return WRITE; }
         virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             static DBDirectClient db;
 
@@ -1535,8 +1425,8 @@ namespace mongo {
             const BSONObj* fields = (fieldsHolder.isEmpty() ? NULL : &fieldsHolder);
 
             BSONObj out = db.findOne(ns, q, fields);
-            if (out.isEmpty()){
-                if (!upsert){
+            if (out.isEmpty()) {
+                if (!upsert) {
                     errmsg = "No matching object found";
                     return false;
                 }
@@ -1546,9 +1436,13 @@ namespace mongo {
                 uassert(13330, "upsert mode requires query field", !origQuery.isEmpty());
                 db.update(ns, origQuery, update.embeddedObjectUserCheck(), true);
 
-                if (cmdObj["new"].trueValue()){
-                    BSONObj gle = db.getLastErrorDetailed();
+                BSONObj gle = db.getLastErrorDetailed();
+                if (gle["err"].type() == String) {
+                    errmsg = gle["err"].String();
+                    return false;
+                }
 
+                if (cmdObj["new"].trueValue()) {
                     BSONElement _id = gle["upserted"];
                     if (_id.eoo())
                         _id = origQuery["_id"];
@@ -1556,33 +1450,46 @@ namespace mongo {
                     out = db.findOne(ns, QUERY("_id" << _id), fields);
                 }
 
-            } else {
-
-                Query idQuery = QUERY( "_id" << out["_id"]);
+            }
+            else {
 
-                if (cmdObj["remove"].trueValue()){
+                if (cmdObj["remove"].trueValue()) {
                     uassert(12515, "can't remove and update", cmdObj["update"].eoo());
-                    db.remove(ns, idQuery, 1);
-
-                } else { // update
-
-                    // need to include original query for $ positional operator
-                    BSONObjBuilder b;
-                    b.append(out["_id"]);
-                    BSONObjIterator it(origQuery);
-                    while (it.more()){
-                        BSONElement e = it.next();
-                        if (strcmp(e.fieldName(), "_id"))
-                            b.append(e);
+                    db.remove(ns, QUERY("_id" << out["_id"]), 1);
+
+                }
+                else {   // update
+
+                    BSONElement queryId = origQuery["_id"];
+                    if (queryId.eoo() || getGtLtOp(queryId) != BSONObj::Equality) {
+                        // need to include original query for $ positional operator
+
+                        BSONObjBuilder b;
+                        b.append(out["_id"]);
+                        BSONObjIterator it(origQuery);
+                        while (it.more()) {
+                            BSONElement e = it.next();
+                            if (strcmp(e.fieldName(), "_id"))
+                                b.append(e);
+                        }
+                        q = Query(b.obj());
                     }
-                    q = Query(b.obj());
+
+                    if (q.isComplex()) // update doesn't work with complex queries
+                        q = Query(q.getFilter().getOwned());
 
                     BSONElement update = cmdObj["update"];
                     uassert(12516, "must specify remove or update", !update.eoo());
                     db.update(ns, q, update.embeddedObjectUserCheck());
 
+                    BSONObj gle = db.getLastErrorDetailed();
+                    if (gle["err"].type() == String) {
+                        errmsg = gle["err"].String();
+                        return false;
+                    }
+
                     if (cmdObj["new"].trueValue())
-                        out = db.findOne(ns, idQuery, fields);
+                        out = db.findOne(ns, QUERY("_id" << out["_id"]), fields);
                 }
             }
 
@@ -1591,7 +1498,7 @@ namespace mongo {
             return true;
         }
     } cmdFindAndModify;
-    
+
     /* Returns client's uri */
     class CmdWhatsMyUri : public Command {
     public:
@@ -1599,20 +1506,20 @@ namespace mongo {
         virtual bool slaveOk() const {
             return true;
         }
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         virtual bool requiresAuth() {
             return false;
         }
         virtual void help( stringstream &help ) const {
             help << "{whatsmyuri:1}";
-        }        
+        }
         virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             BSONObj info = cc().curop()->infoNoauth();
             result << "you" << info[ "client" ];
             return true;
         }
     } cmdWhatsMyUri;
-    
+
     /* For testing only, not for general use */
     class GodInsert : public Command {
     public:
@@ -1629,7 +1536,7 @@ namespace mongo {
         }
         virtual void help( stringstream &help ) const {
             help << "internal. for testing only.";
-        }        
+        }
         virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             string coll = cmdObj[ "godinsert" ].valuestrsafe();
             uassert( 13049, "godinsert must specify a collection", !coll.empty() );
@@ -1642,31 +1549,32 @@ namespace mongo {
 
     class DBHashCmd : public Command {
     public:
-        DBHashCmd() : Command( "dbHash", false, "dbhash" ){}
+        DBHashCmd() : Command( "dbHash", false, "dbhash" ) {}
         virtual bool slaveOk() const { return true; }
         virtual LockType locktype() const { return READ; }
-        virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             list<string> colls;
             Database* db = cc().database();
             if ( db )
                 db->namespaceIndex.getNamespaces( colls );
             colls.sort();
-            
+
             result.appendNumber( "numCollections" , (long long)colls.size() );
-            
+            result.append( "host" , prettyHostName() );
+
             md5_state_t globalState;
             md5_init(&globalState);
 
             BSONObjBuilder bb( result.subobjStart( "collections" ) );
-            for ( list<string>::iterator i=colls.begin(); i != colls.end(); i++ ){
+            for ( list<string>::iterator i=colls.begin(); i != colls.end(); i++ ) {
                 string c = *i;
                 if ( c.find( ".system.profil" ) != string::npos )
                     continue;
-                
+
                 shared_ptr<Cursor> cursor;
 
                 NamespaceDetails * nsd = nsdetails( c.c_str() );
-                
+
                 // debug SERVER-761
                 NamespaceDetails::IndexIterator ii = nsd->ii();
                 while( ii.more() ) {
@@ -1678,15 +1586,15 @@ namespace mongo {
                         log() << endl;
                     }
                 }
-                
+
                 int idNum = nsd->findIdIndex();
-                if ( idNum >= 0 ){
+                if ( idNum >= 0 ) {
                     cursor.reset( new BtreeCursor( nsd , idNum , nsd->idx( idNum ) , BSONObj() , BSONObj() , false , 1 ) );
                 }
-                else if ( c.find( ".system." ) != string::npos ){
+                else if ( c.find( ".system." ) != string::npos ) {
                     continue;
                 }
-                else if ( nsd->capped ){
+                else if ( nsd->capped ) {
                     cursor = findTableScan( c.c_str() , BSONObj() );
                 }
                 else {
@@ -1697,9 +1605,9 @@ namespace mongo {
 
                 md5_state_t st;
                 md5_init(&st);
-                
+
                 long long n = 0;
-                while ( cursor->ok() ){
+                while ( cursor->ok() ) {
                     BSONObj c = cursor->current();
                     md5_append( &st , (const md5_byte_t*)c.objdata() , c.objsize() );
                     n++;
@@ -1708,7 +1616,7 @@ namespace mongo {
                 md5digest d;
                 md5_finish(&st, d);
                 string hash = digestToString( d );
-                
+
                 bb.append( c.c_str() + ( dbname.size() + 1 ) , hash );
 
                 md5_append( &globalState , (const md5_byte_t*)hash.c_str() , hash.size() );
@@ -1727,9 +1635,9 @@ namespace mongo {
     } dbhashCmd;
 
     /* for diagnostic / testing purposes. */
-    class CmdSleep : public Command { 
+    class CmdSleep : public Command {
     public:
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         virtual bool adminOnly() const { return true; }
         virtual bool logTheOp() { return false; }
         virtual bool slaveOk() const { return true; }
@@ -1739,46 +1647,43 @@ namespace mongo {
         }
         CmdSleep() : Command("sleep") { }
         bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            if( cmdObj.getBoolField("w") ) { 
+
+
+            int secs = 100;
+            if ( cmdObj["secs"].isNumber() )
+                secs = cmdObj["secs"].numberInt();
+
+            if( cmdObj.getBoolField("w") ) {
                 writelock lk("");
-                sleepsecs(100);
+                sleepsecs(secs);
             }
             else {
                 readlock lk("");
-                sleepsecs(100);
+                sleepsecs(secs);
             }
+
             return true;
         }
     } cmdSleep;
 
-    class AvailableQueryOptions : public Command {
-    public:
-        AvailableQueryOptions() : Command( "availablequeryoptions" ){}
-        virtual bool slaveOk() const { return true; }
-        virtual LockType locktype() const { return NONE; }
-        virtual bool requiresAuth() { return false; }
-        virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
-            result << "options" << QueryOption_AllSupported;
-            return true;
-        }
-    } availableQueryOptionsCmd;    
-    
     // just for testing
     class CapTrunc : public Command {
     public:
-        CapTrunc() : Command( "captrunc" ){}
+        CapTrunc() : Command( "captrunc" ) {}
         virtual bool slaveOk() const { return false; }
         virtual LockType locktype() const { return WRITE; }
         virtual bool requiresAuth() { return true; }
-        virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             string coll = cmdObj[ "captrunc" ].valuestrsafe();
             uassert( 13416, "captrunc must specify a collection", !coll.empty() );
             string ns = dbname + "." + coll;
             int n = cmdObj.getIntField( "n" );
+
+            // inclusive range?
             bool inc = cmdObj.getBoolField( "inc" );
             NamespaceDetails *nsd = nsdetails( ns.c_str() );
             ReverseCappedCursor c( nsd );
-            massert( 13417, "captrunc invalid collection", c.ok() );
+            massert( 13417, "captrunc collection not found or empty", c.ok() );
             for( int i = 0; i < n; ++i ) {
                 massert( 13418, "captrunc invalid n", c.advance() );
             }
@@ -1786,16 +1691,16 @@ namespace mongo {
             nsd->cappedTruncateAfter( ns.c_str(), end, inc );
             return true;
         }
-    } capTruncCmd;    
-    
+    } capTruncCmd;
+
     // just for testing
     class EmptyCapped : public Command {
     public:
-        EmptyCapped() : Command( "emptycapped" ){}
+        EmptyCapped() : Command( "emptycapped" ) {}
         virtual bool slaveOk() const { return false; }
         virtual LockType locktype() const { return WRITE; }
         virtual bool requiresAuth() { return true; }
-        virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             string coll = cmdObj[ "emptycapped" ].valuestrsafe();
             uassert( 13428, "emptycapped must specify a collection", !coll.empty() );
             string ns = dbname + "." + coll;
@@ -1804,9 +1709,9 @@ namespace mongo {
             nsd->emptyCappedCollection( ns.c_str() );
             return true;
         }
-    } emptyCappedCmd;    
-    
-    /** 
+    } emptyCappedCmd;
+
+    /**
      * this handles
      - auth
      - locking
@@ -1814,53 +1719,52 @@ namespace mongo {
      then calls run()
     */
     bool execCommand( Command * c ,
-                      Client& client , int queryOptions , 
-                      const char *cmdns, BSONObj& cmdObj , 
-                      BSONObjBuilder& result, 
-                      bool fromRepl ){
-        
+                      Client& client , int queryOptions ,
+                      const char *cmdns, BSONObj& cmdObj ,
+                      BSONObjBuilder& result,
+                      bool fromRepl ) {
+
         string dbname = nsToDatabase( cmdns );
-        
-        AuthenticationInfo *ai = client.getAuthenticationInfo();    
 
-        if( c->adminOnly() && c->localHostOnlyIfNoAuth( cmdObj ) && noauth && !ai->isLocalHost ) { 
-            result.append( "errmsg" , 
+        AuthenticationInfo *ai = client.getAuthenticationInfo();
+
+        if( c->adminOnly() && c->localHostOnlyIfNoAuth( cmdObj ) && noauth && !ai->isLocalHost ) {
+            result.append( "errmsg" ,
                            "unauthorized: this command must run from localhost when running db without auth" );
             log() << "command denied: " << cmdObj.toString() << endl;
             return false;
         }
-        
 
         if ( c->adminOnly() && ! fromRepl && dbname != "admin" ) {
             result.append( "errmsg" ,  "access denied; use admin db" );
             log() << "command denied: " << cmdObj.toString() << endl;
             return false;
-        }        
+        }
 
-        if ( cmdObj["help"].trueValue() ){
+        if ( cmdObj["help"].trueValue() ) {
             stringstream ss;
             ss << "help for: " << c->name << " ";
             c->help( ss );
             result.append( "help" , ss.str() );
             result.append( "lockType" , c->locktype() );
             return true;
-        } 
+        }
 
-        bool canRunHere = 
+        bool canRunHere =
             isMaster( dbname.c_str() ) ||
             c->slaveOk() ||
             ( c->slaveOverrideOk() && ( queryOptions & QueryOption_SlaveOk ) ) ||
             fromRepl;
 
-        if ( ! canRunHere ){
+        if ( ! canRunHere ) {
             result.append( "errmsg" , "not master" );
             return false;
         }
 
         if ( c->adminOnly() )
             log( 2 ) << "command: " << cmdObj << endl;
-        
-        if ( c->locktype() == Command::NONE ){
+
+        if ( c->locktype() == Command::NONE ) {
             // we also trust that this won't crash
             string errmsg;
             int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl );
@@ -1868,35 +1772,35 @@ namespace mongo {
                 result.append( "errmsg" , errmsg );
             return ok;
         }
-     
+
         bool needWriteLock = c->locktype() == Command::WRITE;
-        
-        if ( ! needWriteLock ){
+
+        if ( ! needWriteLock ) {
             assert( ! c->logTheOp() );
         }
 
         mongolock lk( needWriteLock );
         Client::Context ctx( dbname , dbpath , &lk , c->requiresAuth() );
-        
+
         try {
             string errmsg;
-            if ( ! c->run(dbname, cmdObj, errmsg, result, fromRepl ) ){
+            if ( ! c->run(dbname, cmdObj, errmsg, result, fromRepl ) ) {
                 result.append( "errmsg" , errmsg );
                 return false;
             }
         }
-        catch ( DBException& e ){
+        catch ( DBException& e ) {
             stringstream ss;
             ss << "exception: " << e.what();
             result.append( "errmsg" , ss.str() );
             result.append( "code" , e.getCode() );
             return false;
         }
-        
-        if ( c->logTheOp() && ! fromRepl ){
+
+        if ( c->logTheOp() && ! fromRepl ) {
             logOp("c", cmdns, cmdObj);
         }
-        
+
         return true;
     }
 
@@ -1912,9 +1816,9 @@ namespace mongo {
         cc().curop()->ensureStarted();
         string dbname = nsToDatabase( ns );
 
-        if( logLevel >= 1 ) 
+        if( logLevel >= 1 )
             log() << "run command " << ns << ' ' << _cmdobj << endl;
-        
+
         const char *p = strchr(ns, '.');
         if ( !p ) return false;
         if ( strcmp(p, ".$cmd") != 0 ) return false;
@@ -1934,14 +1838,14 @@ namespace mongo {
         bool ok = false;
 
         BSONElement e = jsobj.firstElement();
-        
+
         Command * c = e.type() ? Command::findCommand( e.fieldName() ) : 0;
 
-        if ( c ){
+        if ( c ) {
             ok = execCommand( c , client , queryOptions , ns , jsobj , anObjBuilder , fromRepl );
         }
         else {
-            anObjBuilder.append("errmsg", "no such cmd");
+            anObjBuilder.append("errmsg", str::stream() << "no such cmd: " << e.fieldName() );
             anObjBuilder.append("bad cmd" , _cmdobj );
         }
 
@@ -1953,5 +1857,5 @@ namespace mongo {
 
         return true;
     }
-    
+
 } // namespace mongo
diff --git a/db/dbcommands_admin.cpp b/db/dbcommands_admin.cpp
index 2d08ac8..82a9c91 100644
--- a/db/dbcommands_admin.cpp
+++ b/db/dbcommands_admin.cpp
@@ -25,34 +25,36 @@
 #include "pch.h"
 #include "jsobj.h"
 #include "pdfile.h"
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "commands.h"
 #include "cmdline.h"
 #include "btree.h"
-#include "curop.h"
+#include "curop-inl.h"
 #include "../util/background.h"
+#include "../util/logfile.h"
+#include "../util/alignedbuilder.h"
 #include "../scripting/engine.h"
 
 namespace mongo {
 
     class CleanCmd : public Command {
     public:
-        CleanCmd() : Command( "clean" ){}
+        CleanCmd() : Command( "clean" ) {}
 
         virtual bool slaveOk() const { return true; }
-        virtual LockType locktype() const { return WRITE; } 
-        
+        virtual LockType locktype() const { return WRITE; }
+
         virtual void help(stringstream& h) const { h << "internal"; }
 
-        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             string dropns = dbname + "." + cmdObj.firstElement().valuestrsafe();
-            
+
             if ( !cmdLine.quiet )
                 tlog() << "CMD: clean " << dropns << endl;
-            
+
             NamespaceDetails *d = nsdetails(dropns.c_str());
-            
-            if ( ! d ){
+
+            if ( ! d ) {
                 errmsg = "ns not found";
                 return 0;
             }
@@ -63,39 +65,108 @@ namespace mongo {
             result.append("ns", dropns.c_str());
             return 1;
         }
-        
+
     } cleanCmd;
-    
+
+    namespace dur {
+        filesystem::path getJournalDir();
+    }
+ 
+    class JournalLatencyTestCmd : public Command {
+    public:
+        JournalLatencyTestCmd() : Command( "journalLatencyTest" ) {}
+
+        virtual bool slaveOk() const { return true; }
+        virtual LockType locktype() const { return NONE; }
+        virtual bool adminOnly() const { return true; }
+        virtual void help(stringstream& h) const { h << "test how long to write and fsync to a test file in the journal/ directory"; }
+
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+            filesystem::path p = dur::getJournalDir();
+            p /= "journalLatencyTest";
+        
+            // remove file if already present
+            try { 
+                remove(p);
+            }
+            catch(...) { }
+
+            BSONObjBuilder bb[2];
+            for( int pass = 0; pass < 2; pass++ ) {
+                LogFile f(p.string());
+                AlignedBuilder b(1024 * 1024);
+                {
+                    Timer t;
+                    for( int i = 0 ; i < 100; i++ ) { 
+                        f.synchronousAppend(b.buf(), 8192);
+                    }
+                    bb[pass].append("8KB", t.millis() / 100.0);
+                }
+                {
+                    const int N = 50;
+                    Timer t2;
+                    long long x = 0;
+                    for( int i = 0 ; i < N; i++ ) { 
+                        Timer t;
+                        f.synchronousAppend(b.buf(), 8192);
+                        x += t.micros();
+                        sleepmillis(4);
+                    }
+                    long long y = t2.micros() - 4*N*1000;
+                    // not really trusting the timer granularity on all platforms so whichever is higher of x and y
+                    bb[pass].append("8KBWithPauses", max(x,y) / (N*1000.0));
+                }
+                {
+                    Timer t;
+                    for( int i = 0 ; i < 20; i++ ) { 
+                        f.synchronousAppend(b.buf(), 1024 * 1024);
+                    }
+                    bb[pass].append("1MB", t.millis() / 20.0);
+                }
+                // second time around, we are prealloced.
+            }
+            result.append("timeMillis", bb[0].obj());
+            result.append("timeMillisWithPrealloc", bb[1].obj());
+
+            try { 
+                remove(p);
+            }
+            catch(...) { }
+
+            return 1;
+        }
+    } journalLatencyTestCmd;
+
     class ValidateCmd : public Command {
     public:
-        ValidateCmd() : Command( "validate" ){}
+        ValidateCmd() : Command( "validate" ) {}
 
         virtual bool slaveOk() const {
             return true;
         }
-        
+
         virtual void help(stringstream& h) const { h << "Validate contents of a namespace by scanning its data structures for correctness.  Slow."; }
 
-        virtual LockType locktype() const { return READ; } 
+        virtual LockType locktype() const { return READ; }
         //{ validate: "collectionnamewithoutthedbpart" [, scandata: <bool>] } */
-        
-        bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+
+        bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             string ns = dbname + "." + cmdObj.firstElement().valuestrsafe();
             NamespaceDetails * d = nsdetails( ns.c_str() );
             if ( !cmdLine.quiet )
                 tlog() << "CMD: validate " << ns << endl;
 
-            if ( ! d ){
+            if ( ! d ) {
                 errmsg = "ns not found";
                 return 0;
             }
-            
+
             result.append( "ns", ns );
             result.append( "result" , validateNS( ns.c_str() , d, &cmdObj ) );
             return 1;
         }
-                    
-        
+
+
         string validateNS(const char *ns, NamespaceDetails *d, BSONObj *cmdObj) {
             bool scanData = true;
             if( cmdObj && cmdObj->hasElement("scandata") && !cmdObj->getBoolField("scandata") )
@@ -106,13 +177,13 @@ namespace mongo {
             //ss << "  details: " << hex << d << " ofs:" << nsindex(ns)->detailsOffset(d) << dec << endl;
             if ( d->capped )
                 ss << "  capped:" << d->capped << " max:" << d->max << '\n';
-            
-            ss << "  firstExtent:" << d->firstExtent.toString() << " ns:" << d->firstExtent.ext()->nsDiagnostic.buf << '\n';
-            ss << "  lastExtent:" << d->lastExtent.toString()    << " ns:" << d->lastExtent.ext()->nsDiagnostic.buf << '\n';
+
+            ss << "  firstExtent:" << d->firstExtent.toString() << " ns:" << d->firstExtent.ext()->nsDiagnostic.toString()<< '\n';
+            ss << "  lastExtent:" << d->lastExtent.toString()    << " ns:" << d->lastExtent.ext()->nsDiagnostic.toString() << '\n';
             try {
                 d->firstExtent.ext()->assertOk();
                 d->lastExtent.ext()->assertOk();
-                
+
                 DiskLoc el = d->firstExtent;
                 int ne = 0;
                 while( !el.isNull() ) {
@@ -123,12 +194,13 @@ namespace mongo {
                     killCurrentOp.checkForInterrupt();
                 }
                 ss << "  # extents:" << ne << '\n';
-            } catch (...) {
+            }
+            catch (...) {
                 valid=false;
                 ss << " extent asserted ";
             }
 
-            ss << "  datasize?:" << d->datasize << " nrecords?:" << d->nrecords << " lastExtentSize:" << d->lastExtentSize << '\n';
+            ss << "  datasize?:" << d->stats.datasize << " nrecords?:" << d->stats.nrecords << " lastExtentSize:" << d->lastExtentSize << '\n';
             ss << "  padding:" << d->paddingFactor << '\n';
             try {
 
@@ -175,7 +247,7 @@ namespace mongo {
                         else ss << " (OK)";
                         ss << '\n';
                     }
-                    ss << "  " << n << " objects found, nobj:" << d->nrecords << '\n';
+                    ss << "  " << n << " objects found, nobj:" << d->stats.nrecords << '\n';
                     ss << "  " << len << " bytes data w/headers\n";
                     ss << "  " << nlen << " bytes data wout/headers\n";
                 }
@@ -198,7 +270,7 @@ namespace mongo {
                             ndel++;
 
                             if ( loc.questionable() ) {
-                                if( d->capped && !loc.isValid() && i == 1 ) { 
+                                if( d->capped && !loc.isValid() && i == 1 ) {
                                     /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
                                        see comments in namespace.h
                                     */
@@ -218,7 +290,8 @@ namespace mongo {
                             k++;
                             killCurrentOp.checkForInterrupt();
                         }
-                    } catch (...) {
+                    }
+                    catch (...) {
                         ss <<"    ?exception in deleted chain for bucket " << i << endl;
                         valid = false;
                     }
@@ -236,7 +309,7 @@ namespace mongo {
                     while( i.more() ) {
                         IndexDetails& id = i.next();
                         ss << "    " << id.indexNamespace() << " keys:" <<
-                            id.head.btree()->fullValidate(id.head, id.keyPattern()) << endl;
+                           id.head.btree()->fullValidate(id.head, id.keyPattern()) << endl;
                     }
                 }
                 catch (...) {
@@ -261,36 +334,36 @@ namespace mongo {
     extern unsigned lockedForWriting;
     extern mongo::mutex lockedForWritingMutex;
 
-/*
-    class UnlockCommand : public Command { 
-    public:
-        UnlockCommand() : Command( "unlock" ) { }
-        virtual bool readOnly() { return true; }
-        virtual bool slaveOk() const { return true; }
-        virtual bool adminOnly() const { return true; }
-        virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            if( lockedForWriting ) { 
-				log() << "command: unlock requested" << endl;
-                errmsg = "unlock requested";
-                unlockRequested = true;
-            }
-            else { 
-                errmsg = "not locked, so cannot unlock";
-                return 0;
+    /*
+        class UnlockCommand : public Command {
+        public:
+            UnlockCommand() : Command( "unlock" ) { }
+            virtual bool readOnly() { return true; }
+            virtual bool slaveOk() const { return true; }
+            virtual bool adminOnly() const { return true; }
+            virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+                if( lockedForWriting ) {
+                    log() << "command: unlock requested" << endl;
+                    errmsg = "unlock requested";
+                    unlockRequested = true;
+                }
+                else {
+                    errmsg = "not locked, so cannot unlock";
+                    return 0;
+                }
+                return 1;
             }
-            return 1;
-        }
-        
-    } unlockCommand;
-*/
+
+        } unlockCommand;
+    */
     /* see unlockFsync() for unlocking:
        db.$cmd.sys.unlock.findOne()
     */
     class FSyncCommand : public Command {
-        class LockDBJob : public BackgroundJob { 
+        class LockDBJob : public BackgroundJob {
         protected:
-            string name() { return "lockdbjob"; }
-            void run() { 
+            virtual string name() const { return "lockdbjob"; }
+            void run() {
                 Client::initThread("fsyncjob");
                 Client& c = cc();
                 {
@@ -301,8 +374,8 @@ namespace mongo {
                 MemoryMappedFile::flushAll(true);
                 log() << "db is now locked for snapshotting, no writes allowed. use db.$cmd.sys.unlock.findOne() to unlock" << endl;
                 _ready = true;
-                while( 1 ) { 
-                    if( unlockRequested ) { 
+                while( 1 ) {
+                    if( unlockRequested ) {
                         unlockRequested = false;
                         break;
                     }
@@ -316,54 +389,70 @@ namespace mongo {
             }
         public:
             bool& _ready;
-            LockDBJob(bool& ready) : _ready(ready) {
-                deleteSelf = true;
+            LockDBJob(bool& ready) : BackgroundJob( true /* delete self */ ), _ready(ready) {
                 _ready = false;
             }
         };
     public:
-        FSyncCommand() : Command( "fsync" ){}
-        virtual LockType locktype() const { return WRITE; } 
+        FSyncCommand() : Command( "fsync" ) {}
+        virtual LockType locktype() const { return WRITE; }
         virtual bool slaveOk() const { return true; }
         virtual bool adminOnly() const { return true; }
-        /*virtual bool localHostOnlyIfNoAuth(const BSONObj& cmdObj) { 
+        /*virtual bool localHostOnlyIfNoAuth(const BSONObj& cmdObj) {
             string x = cmdObj["exec"].valuestrsafe();
             return !x.empty();
         }*/
         virtual void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/fsync+Command"; }
         virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            /* async means do an fsync, but return immediately */
-            bool sync = ! cmdObj["async"].trueValue();
+            bool sync = !cmdObj["async"].trueValue(); // async means do an fsync, but return immediately
             bool lock = cmdObj["lock"].trueValue();
             log() << "CMD fsync:  sync:" << sync << " lock:" << lock << endl;
 
-            if( lock ) { 
+            if( lock ) {
+                // fsync and lock variation 
+
                 uassert(12034, "fsync: can't lock while an unlock is pending", !unlockRequested);
                 uassert(12032, "fsync: sync option must be true when using lock", sync);
-                /* With releaseEarly(), we must be extremely careful we don't do anything 
-                   where we would have assumed we were locked.  profiling is one of those things. 
-                   Perhaps at profile time we could check if we released early -- however, 
+                /* With releaseEarly(), we must be extremely careful we don't do anything
+                   where we would have assumed we were locked.  profiling is one of those things.
+                   Perhaps at profile time we could check if we released early -- however,
                    we need to be careful to keep that code very fast it's a very common code path when on.
                 */
                 uassert(12033, "fsync: profiling must be off to enter locked mode", cc().database()->profile == 0);
+
+                // todo future: Perhaps we could do this in the background thread.  As is now, writes may interleave between 
+                //              the releaseEarly below and the acquisition of the readlock in the background thread. 
+                //              However the real problem is that it seems complex to unlock here and then have a window for 
+                //              writes before the bg job -- can be done correctly but harder to reason about correctness.
+                //              If this command ran within a read lock in the first place, would it work, and then that 
+                //              would be quite easy?
+                //              Or, could we downgrade the write lock to a read lock, wait for ready, then release?
+                getDur().syncDataAndTruncateJournal();
+
                 bool ready = false;
                 LockDBJob *l = new LockDBJob(ready);
+
                 dbMutex.releaseEarly();
+
                 l->go();
-                // don't return until background thread has acquired the write lock
-                while( !ready ) { 
+                // don't return until background thread has acquired the read lock
+                while( !ready ) {
                     sleepmillis(10);
                 }
                 result.append("info", "now locked against writes, use db.$cmd.sys.unlock.findOne() to unlock");
             }
             else {
+                // the simple fsync command case
+
+                if (sync)
+                    getDur().commitNow();
                 result.append( "numFiles" , MemoryMappedFile::flushAll( sync ) );
             }
             return 1;
         }
-        
+
     } fsyncCmd;
-    
+
 
 
 }
diff --git a/db/dbcommands_generic.cpp b/db/dbcommands_generic.cpp
index 25c6a93..a555b6c 100644
--- a/db/dbcommands_generic.cpp
+++ b/db/dbcommands_generic.cpp
@@ -52,114 +52,192 @@ namespace mongo {
         CmdBuildInfo() : Command( "buildInfo", true, "buildinfo" ) {}
         virtual bool slaveOk() const { return true; }
         virtual bool adminOnly() const { return true; }
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         virtual void help( stringstream &help ) const {
             help << "get version #, etc.\n";
             help << "{ buildinfo:1 }";
         }
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             result << "version" << versionString << "gitVersion" << gitVersion() << "sysInfo" << sysInfo();
             result << "bits" << ( sizeof( int* ) == 4 ? 32 : 64 );
-            result.appendBool( "debug" , 
-#ifdef _DEBUG
-                               true
-#else
-                               false
-#endif
-                               );
+            result.appendBool( "debug" , debug );
+            result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize);
             return true;
         }
     } cmdBuildInfo;
 
+    /** experimental. either remove or add support in repl sets also.  in a repl set, getting this setting from the
+        repl set config could make sense.
+        */
+    unsigned replApplyBatchSize = 1;
 
-    /* just to check if the db has asserted */
-    class CmdAssertInfo : public Command {
+    class CmdGet : public Command {
     public:
-        virtual bool slaveOk() const {
+        CmdGet() : Command( "getParameter" ) { }
+        virtual bool slaveOk() const { return true; }
+        virtual bool adminOnly() const { return true; }
+        virtual LockType locktype() const { return NONE; }
+        virtual void help( stringstream &help ) const {
+            help << "get administrative option(s)\nexample:\n";
+            help << "{ getParameter:1, notablescan:1 }\n";
+            help << "supported so far:\n";
+            help << "  quiet\n";
+            help << "  notablescan\n";
+            help << "  logLevel\n";
+            help << "  syncdelay\n";
+            help << "{ getParameter:'*' } to get everything\n";
+        }
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+            bool all = *cmdObj.firstElement().valuestrsafe() == '*';
+
+            int before = result.len();
+
+            if( all || cmdObj.hasElement("quiet") ) {
+                result.append("quiet", cmdLine.quiet );
+            }
+            if( all || cmdObj.hasElement("notablescan") ) {
+                result.append("notablescan", cmdLine.noTableScan);
+            }
+            if( all || cmdObj.hasElement("logLevel") ) {
+                result.append("logLevel", logLevel);
+            }
+            if( all || cmdObj.hasElement("syncdelay") ) {
+                result.append("syncdelay", cmdLine.syncdelay);
+            }
+            if( all || cmdObj.hasElement("replApplyBatchSize") ) {
+                result.append("replApplyBatchSize", replApplyBatchSize);
+            }
+
+            if ( before == result.len() ) {
+                errmsg = "no option found to get";
+                return false;
+            }
             return true;
         }
-        virtual void help( stringstream& help ) const {
-            help << "check if any asserts have occurred on the server";
+    } cmdGet;
+
+    class CmdSet : public Command {
+    public:
+        CmdSet() : Command( "setParameter" ) { }
+        virtual bool slaveOk() const { return true; }
+        virtual bool adminOnly() const { return true; }
+        virtual LockType locktype() const { return NONE; }
+        virtual void help( stringstream &help ) const {
+            help << "set administrative option(s)\nexample:\n";
+            help << "{ setParameter:1, notablescan:true }\n";
+            help << "supported so far:\n";
+            help << "  notablescan\n";
+            help << "  logLevel\n";
+            help << "  quiet\n";
         }
-        virtual LockType locktype() const { return WRITE; } 
-        CmdAssertInfo() : Command("assertInfo",true,"assertinfo") {}
-        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            result.appendBool("dbasserted", lastAssert[0].isSet() || lastAssert[1].isSet() || lastAssert[2].isSet());
-            result.appendBool("asserted", lastAssert[0].isSet() || lastAssert[1].isSet() || lastAssert[2].isSet() || lastAssert[3].isSet());
-            result.append("assert", lastAssert[AssertRegular].toString());
-            result.append("assertw", lastAssert[AssertW].toString());
-            result.append("assertmsg", lastAssert[AssertMsg].toString());
-            result.append("assertuser", lastAssert[AssertUser].toString());
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+            int s = 0;
+            if( cmdObj.hasElement("notablescan") ) {
+                result.append("was", cmdLine.noTableScan);
+                cmdLine.noTableScan = cmdObj["notablescan"].Bool();
+                s++;
+            }
+            if( cmdObj.hasElement("quiet") ) {
+                result.append("was", cmdLine.quiet );
+                cmdLine.quiet = cmdObj["quiet"].Bool();
+                s++;
+            }
+            if( cmdObj.hasElement("syncdelay") ) {
+                result.append("was", cmdLine.syncdelay );
+                cmdLine.syncdelay = cmdObj["syncdelay"].Number();
+                s++;
+            }
+            if( cmdObj.hasElement( "logLevel" ) ) {
+                result.append("was", logLevel );
+                logLevel = cmdObj["logLevel"].numberInt();
+                s++;
+            }
+            if( cmdObj.hasElement( "replApplyBatchSize" ) ) {
+                result.append("was", replApplyBatchSize );
+                BSONElement e = cmdObj["replApplyBatchSize"];
+                ParameterValidator * v = ParameterValidator::get( e.fieldName() );
+                assert( v );
+                if ( ! v->isValid( e , errmsg ) )
+                    return false;
+                replApplyBatchSize = e.numberInt();
+                s++;
+            }
+
+            if( s == 0 ) {
+                errmsg = "no option found to set, use '*' to get all ";
+                return false;
+            }
+
             return true;
         }
-    } cmdAsserts;
+    } cmdSet;
 
     class PingCommand : public Command {
     public:
-        PingCommand() : Command( "ping" ){}
+        PingCommand() : Command( "ping" ) {}
         virtual bool slaveOk() const { return true; }
         virtual void help( stringstream &help ) const { help << "a way to check that the server is alive. responds immediately even if server is in a db lock."; }
         virtual LockType locktype() const { return NONE; }
         virtual bool requiresAuth() { return false; }
-        virtual bool run(const string& badns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        virtual bool run(const string& badns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             // IMPORTANT: Don't put anything in here that might lock db - including authentication
             return true;
         }
     } pingCmd;
-    
+
     class FeaturesCmd : public Command {
     public:
-        FeaturesCmd() : Command( "features", true ){}
-        void help(stringstream& h) const { h << "return on build level feature settings"; }
+        FeaturesCmd() : Command( "features", true ) {}
+        void help(stringstream& h) const { h << "return build level feature settings"; }
         virtual bool slaveOk() const { return true; }
-        virtual bool readOnly(){ return true; }
-        virtual LockType locktype() const { return READ; } 
-        virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){
-            if ( globalScriptEngine ){
+        virtual bool readOnly() { return true; }
+        virtual LockType locktype() const { return NONE; }
+        virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+            if ( globalScriptEngine ) {
                 BSONObjBuilder bb( result.subobjStart( "js" ) );
                 result.append( "utf8" , globalScriptEngine->utf8Ok() );
                 bb.done();
             }
-            if ( cmdObj["oidReset"].trueValue() ){
-                result.append( "oidMachineOld" , OID::staticMachine() );
-                OID::newState();
+            if ( cmdObj["oidReset"].trueValue() ) {
+                result.append( "oidMachineOld" , OID::getMachineId() );
+                OID::regenMachineId();
             }
-            result.append( "oidMachine" , OID::staticMachine() );
+            result.append( "oidMachine" , OID::getMachineId() );
             return true;
         }
-        
+
     } featuresCmd;
 
     class LogRotateCmd : public Command {
     public:
-        LogRotateCmd() : Command( "logRotate" ){}
-        virtual LockType locktype() const { return NONE; } 
+        LogRotateCmd() : Command( "logRotate" ) {}
+        virtual LockType locktype() const { return NONE; }
         virtual bool slaveOk() const { return true; }
         virtual bool adminOnly() const { return true; }
         virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             rotateLogs();
             return 1;
-        }        
-        
+        }
+
     } logRotateCmd;
-    
+
     class ListCommandsCmd : public Command {
     public:
         virtual void help( stringstream &help ) const { help << "get a list of all db commands"; }
-        ListCommandsCmd() : Command( "listCommands", false ){}
-        virtual LockType locktype() const { return NONE; } 
+        ListCommandsCmd() : Command( "listCommands", false ) {}
+        virtual LockType locktype() const { return NONE; }
         virtual bool slaveOk() const { return true; }
         virtual bool adminOnly() const { return false; }
         virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             BSONObjBuilder b( result.subobjStart( "commands" ) );
-            for ( map<string,Command*>::iterator i=_commands->begin(); i!=_commands->end(); ++i ){
+            for ( map<string,Command*>::iterator i=_commands->begin(); i!=_commands->end(); ++i ) {
                 Command * c = i->second;
 
                 // don't show oldnames
                 if (i->first != c->name)
                     continue;
 
-                BSONObjBuilder temp( b.subobjStart( c->name.c_str() ) );
+                BSONObjBuilder temp( b.subobjStart( c->name ) );
 
                 {
                     stringstream help;
@@ -174,10 +252,10 @@ namespace mongo {
             b.done();
 
             return 1;
-        }        
+        }
 
     } listCommandsCmd;
-    
+
     class CmdShutdown : public Command {
     public:
         virtual bool requiresAuth() { return true; }
@@ -189,7 +267,7 @@ namespace mongo {
         virtual bool slaveOk() const {
             return true;
         }
-        virtual LockType locktype() const { return WRITE; } 
+        virtual LockType locktype() const { return NONE; }
         virtual void help( stringstream& help ) const {
             help << "shutdown the database.  must be ran against admin db and either (1) ran from localhost or (2) authenticated.\n";
         }
@@ -199,8 +277,11 @@ namespace mongo {
             if ( c ) {
                 c->shutdown();
             }
+
             log() << "terminating, shutdown command received" << endl;
-            dbexit( EXIT_CLEAN ); // this never returns
+
+            dbexit( EXIT_CLEAN , "shutdown called" , true ); // this never returns
+            assert(0);
             return true;
         }
     } cmdShutdown;
@@ -217,7 +298,7 @@ namespace mongo {
         virtual bool slaveOk() const {
             return true;
         }
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         CmdForceError() : Command("forceerror") {}
         bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             uassert( 10038 , "forced error", false);
@@ -225,6 +306,17 @@ namespace mongo {
         }
     } cmdForceError;
 
-    
+    class AvailableQueryOptions : public Command {
+    public:
+        AvailableQueryOptions() : Command( "availableQueryOptions" , false , "availablequeryoptions" ) {}
+        virtual bool slaveOk() const { return true; }
+        virtual LockType locktype() const { return NONE; }
+        virtual bool requiresAuth() { return false; }
+        virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+            result << "options" << QueryOption_AllSupported;
+            return true;
+        }
+    } availableQueryOptionsCmd;
+
 
 }
diff --git a/db/dbeval.cpp b/db/dbeval.cpp
index e8a42b2..31d5260 100644
--- a/db/dbeval.cpp
+++ b/db/dbeval.cpp
@@ -37,7 +37,7 @@ namespace mongo {
 
     const int edebug=0;
 
-    bool dbEval(const char *ns, BSONObj& cmd, BSONObjBuilder& result, string& errmsg) {
+    bool dbEval(const string& dbName, BSONObj& cmd, BSONObjBuilder& result, string& errmsg) {
         BSONElement e = cmd.firstElement();
         uassert( 10046 ,  "eval needs Code" , e.type() == Code || e.type() == CodeWScope || e.type() == String );
 
@@ -60,16 +60,16 @@ namespace mongo {
             return false;
         }
 
-        auto_ptr<Scope> s = globalScriptEngine->getPooledScope( ns );
+        auto_ptr<Scope> s = globalScriptEngine->getPooledScope( dbName );
         ScriptingFunction f = s->createFunction(code);
         if ( f == 0 ) {
             errmsg = (string)"compile failed: " + s->getError();
             return false;
         }
-        
+
         if ( e.type() == CodeWScope )
             s->init( e.codeWScopeScopeData() );
-        s->localConnect( cc().database()->name.c_str() );
+        s->localConnect( dbName.c_str() );
 
         BSONObj args;
         {
@@ -89,7 +89,7 @@ namespace mongo {
             res = s->invoke(f,args, cmdLine.quota ? 10 * 60 * 1000 : 0 );
             int m = t.millis();
             if ( m > cmdLine.slowMS ) {
-                out() << "dbeval slow, time: " << dec << m << "ms " << ns << endl;
+                out() << "dbeval slow, time: " << dec << m << "ms " << dbName << endl;
                 if ( m >= 1000 ) log() << code << endl;
                 else OCCASIONALLY log() << code << endl;
             }
@@ -100,7 +100,7 @@ namespace mongo {
             errmsg += s->getError();
             return false;
         }
-        
+
         s->append( result , "retval" , "return" );
 
         return true;
@@ -122,16 +122,19 @@ namespace mongo {
         virtual LockType locktype() const { return NONE; }
         CmdEval() : Command("eval", false, "$eval") { }
         bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            
+
             AuthenticationInfo *ai = cc().getAuthenticationInfo();
             uassert( 12598 , "$eval reads unauthorized", ai->isAuthorizedReads(dbname.c_str()) );
-            
+
+            if ( cmdObj["nolock"].trueValue() ) {
+                return dbEval(dbname, cmdObj, result, errmsg);
+            }
+
             // write security will be enforced in DBDirectClient
             mongolock lk( ai->isAuthorized( dbname.c_str() ) );
             Client::Context ctx( dbname );
-            
 
-            return dbEval(dbname.c_str(), cmdObj, result, errmsg);
+            return dbEval(dbname, cmdObj, result, errmsg);
         }
     } cmdeval;
 
diff --git a/db/dbhelpers.cpp b/db/dbhelpers.cpp
index 205787e..75db430 100644
--- a/db/dbhelpers.cpp
+++ b/db/dbhelpers.cpp
@@ -28,39 +28,6 @@
 
 namespace mongo {
 
-    CursorIterator::CursorIterator( shared_ptr<Cursor> c , BSONObj filter )
-        : _cursor( c ){
-            if ( ! filter.isEmpty() )
-                _matcher.reset( new CoveredIndexMatcher( filter , BSONObj() ) );
-            _advance();
-    }
-
-    BSONObj CursorIterator::next(){
-        BSONObj o = _o;
-        _advance();
-        return o;
-    }
-    
-    bool CursorIterator::hasNext(){
-        return ! _o.isEmpty();
-    }
-
-    void CursorIterator::_advance(){
-        if ( ! _cursor->ok() ){
-            _o = BSONObj();
-            return;
-        }
-        
-        while ( _cursor->ok() ){
-            _o = _cursor->current();
-            _cursor->advance();
-            if ( _matcher.get() == 0 || _matcher->matches( _o ) )
-                return;
-        }
-
-        _o = BSONObj();
-    }
-
     void Helpers::ensureIndex(const char *ns, BSONObj keyPattern, bool unique, const char *name) {
         NamespaceDetails *d = nsdetails(ns);
         if( d == 0 )
@@ -74,7 +41,7 @@ namespace mongo {
             }
         }
 
-        if( d->nIndexes >= NamespaceDetails::NIndexesMax ) { 
+        if( d->nIndexes >= NamespaceDetails::NIndexesMax ) {
             problem() << "Helper::ensureIndex fails, MaxIndexes exceeded " << ns << '\n';
             return;
         }
@@ -91,6 +58,7 @@ namespace mongo {
         theDataFileMgr.insert(system_indexes.c_str(), o.objdata(), o.objsize());
     }
 
+    /** Simple QueryOp implementation to return first match.  Does not support yielding. */
     class FindOne : public QueryOp {
     public:
         FindOne( bool requireIndex ) : requireIndex_( requireIndex ) {}
@@ -111,10 +79,15 @@ namespace mongo {
                 one_ = c_->current();
                 loc_ = c_->currLoc();
                 setStop();
-            } else {
+            }
+            else {
                 c_->advance();
             }
         }
+        virtual long long nscanned() {
+            assert( c_.get() );
+            return c_->nscanned();
+        }
         virtual bool mayRecordPlan() const { return false; }
         virtual QueryOp *_createChild() const { return new FindOne( requireIndex_ ); }
         BSONObj one() const { return one_; }
@@ -125,11 +98,11 @@ namespace mongo {
         BSONObj one_;
         DiskLoc loc_;
     };
-    
-    /* fetch a single object from collection ns that matches query 
+
+    /* fetch a single object from collection ns that matches query
        set your db SavedContext first
     */
-    bool Helpers::findOne(const char *ns, const BSONObj &query, BSONObj& result, bool requireIndex) { 
+    bool Helpers::findOne(const char *ns, const BSONObj &query, BSONObj& result, bool requireIndex) {
         MultiPlanScanner s( ns, query, BSONObj(), 0, !requireIndex );
         FindOne original( requireIndex );
         shared_ptr< FindOne > res = s.runOp( original );
@@ -141,10 +114,10 @@ namespace mongo {
         return true;
     }
 
-    /* fetch a single object from collection ns that matches query 
+    /* fetch a single object from collection ns that matches query
        set your db SavedContext first
     */
-    DiskLoc Helpers::findOne(const char *ns, const BSONObj &query, bool requireIndex) { 
+    DiskLoc Helpers::findOne(const char *ns, const BSONObj &query, bool requireIndex) {
         MultiPlanScanner s( ns, query, BSONObj(), 0, !requireIndex );
         FindOne original( requireIndex );
         shared_ptr< FindOne > res = s.runOp( original );
@@ -153,15 +126,8 @@ namespace mongo {
         return res->loc();
     }
 
-    auto_ptr<CursorIterator> Helpers::find( const char *ns , BSONObj query , bool requireIndex ){
-        uassert( 10047 ,  "requireIndex not supported in Helpers::find yet" , ! requireIndex );
-        auto_ptr<CursorIterator> i;
-        i.reset( new CursorIterator( DataFileMgr::findAll( ns ) , query ) );
-        return i;
-    }
-    
     bool Helpers::findById(Client& c, const char *ns, BSONObj query, BSONObj& result ,
-                           bool * nsFound , bool * indexFound ){
+                           bool * nsFound , bool * indexFound ) {
         dbMutex.assertAtLeastReadLocked();
         Database *database = c.database();
         assert( database );
@@ -170,7 +136,7 @@ namespace mongo {
             return false;
         if ( nsFound )
             *nsFound = 1;
-        
+
         int idxNo = d->findIdIndex();
         if ( idxNo < 0 )
             return false;
@@ -178,9 +144,9 @@ namespace mongo {
             *indexFound = 1;
 
         IndexDetails& i = d->idx( idxNo );
-        
+
         BSONObj key = i.getKeyFromQuery( query );
-        
+
         DiskLoc loc = i.head.btree()->findSingle( i , i.head , key );
         if ( loc.isNull() )
             return false;
@@ -188,16 +154,16 @@ namespace mongo {
         return true;
     }
 
-     DiskLoc Helpers::findById(NamespaceDetails *d, BSONObj idquery) {
-         int idxNo = d->findIdIndex();
-         uassert(13430, "no _id index", idxNo>=0);
-         IndexDetails& i = d->idx( idxNo );        
-         BSONObj key = i.getKeyFromQuery( idquery );
-         return i.head.btree()->findSingle( i , i.head , key );
+    DiskLoc Helpers::findById(NamespaceDetails *d, BSONObj idquery) {
+        int idxNo = d->findIdIndex();
+        uassert(13430, "no _id index", idxNo>=0);
+        IndexDetails& i = d->idx( idxNo );
+        BSONObj key = i.getKeyFromQuery( idquery );
+        return i.head.btree()->findSingle( i , i.head , key );
     }
 
-    bool Helpers::isEmpty(const char *ns) {
-        Client::Context context(ns);
+    bool Helpers::isEmpty(const char *ns, bool doAuth) {
+        Client::Context context(ns, dbpath, NULL, doAuth);
         shared_ptr<Cursor> c = DataFileMgr::findAll(ns);
         return !c->ok();
     }
@@ -221,17 +187,17 @@ namespace mongo {
     bool Helpers::getLast(const char *ns, BSONObj& result) {
         Client::Context ctx(ns);
         shared_ptr<Cursor> c = findTableScan(ns, reverseNaturalObj);
-        if( !c->ok() ) 
+        if( !c->ok() )
             return false;
         result = c->current();
         return true;
     }
 
-    void Helpers::upsert( const string& ns , const BSONObj& o ){
+    void Helpers::upsert( const string& ns , const BSONObj& o ) {
         BSONElement e = o["_id"];
         assert( e.type() );
         BSONObj id = e.wrap();
-        
+
         OpDebug debug;
         Client::Context context(ns);
         updateObjects(ns.c_str(), o, /*pattern=*/id, /*upsert=*/true, /*multi=*/false , /*logtheop=*/true , debug );
@@ -249,12 +215,12 @@ namespace mongo {
         _updateObjects(/*god=*/true, ns, obj, /*pattern=*/BSONObj(), /*upsert=*/true, /*multi=*/false , logTheOp , debug );
     }
 
-    BSONObj Helpers::toKeyFormat( const BSONObj& o , BSONObj& key ){
+    BSONObj Helpers::toKeyFormat( const BSONObj& o , BSONObj& key ) {
         BSONObjBuilder me;
         BSONObjBuilder k;
 
         BSONObjIterator i( o );
-        while ( i.more() ){
+        while ( i.more() ) {
             BSONElement e = i.next();
             k.append( e.fieldName() , 1 );
             me.appendAs( e , "" );
@@ -262,8 +228,8 @@ namespace mongo {
         key = k.obj();
         return me.obj();
     }
-    
-    long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback ){
+
+    long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback ) {
         BSONObj keya , keyb;
         BSONObj minClean = toKeyFormat( min , keya );
         BSONObj maxClean = toKeyFormat( max , keyb );
@@ -276,33 +242,35 @@ namespace mongo {
 
         int ii = nsd->findIndexByKeyPattern( keya );
         assert( ii >= 0 );
-        
+
         long long num = 0;
-        
+
         IndexDetails& i = nsd->idx( ii );
 
         shared_ptr<Cursor> c( new BtreeCursor( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) );
         auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
         cc->setDoingDeletes( true );
-        
-        while ( c->ok() ){
+
+        while ( c->ok() ) {
             DiskLoc rloc = c->currLoc();
-            BSONObj key = c->currKey();
 
             if ( callback )
                 callback->goingToDelete( c->current() );
-            
+
             c->advance();
             c->noteLocation();
-            
+
             logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() );
             theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc);
             num++;
 
             c->checkLocation();
 
-            if ( yield && ! cc->yieldSometimes() ){
+            getDur().commitIfNeeded();
+
+            if ( yield && ! cc->yieldSometimes() ) {
                 // cursor got finished by someone else, so we're done
+                cc.release(); // if the collection/db is dropped, cc may be deleted
                 break;
             }
         }
@@ -325,11 +293,12 @@ namespace mongo {
                 BSONObjBuilder result;
                 dropCollection( name_, errmsg, result );
             }
-        } catch ( ... ) {
+        }
+        catch ( ... ) {
             problem() << "exception cleaning up DbSet" << endl;
         }
     }
-    
+
     void DbSet::reset( const string &name, const BSONObj &key ) {
         if ( !name.empty() )
             name_ = name;
@@ -338,74 +307,77 @@ namespace mongo {
         Client::Context c( name_.c_str() );
         if ( nsdetails( name_.c_str() ) ) {
             Helpers::emptyCollection( name_.c_str() );
-        } else {
+        }
+        else {
             string err;
             massert( 10303 ,  err, userCreateNS( name_.c_str(), fromjson( "{autoIndexId:false}" ), err, false ) );
         }
-        Helpers::ensureIndex( name_.c_str(), key_, true, "setIdx" );            
+        Helpers::ensureIndex( name_.c_str(), key_, true, "setIdx" );
     }
-    
+
     bool DbSet::get( const BSONObj &obj ) const {
         Client::Context c( name_.c_str() );
         BSONObj temp;
         return Helpers::findOne( name_.c_str(), obj, temp, true );
     }
-    
+
     void DbSet::set( const BSONObj &obj, bool val ) {
         Client::Context c( name_.c_str() );
         if ( val ) {
             try {
                 BSONObj k = obj;
                 theDataFileMgr.insertWithObjMod( name_.c_str(), k, false );
-            } catch ( DBException& ) {
+            }
+            catch ( DBException& ) {
                 // dup key - already in set
             }
-        } else {
+        }
+        else {
             deleteObjects( name_.c_str(), obj, true, false, false );
-        }                        
+        }
     }
 
-    RemoveSaver::RemoveSaver( const string& a , const string& b , const string& why) : _out(0){
+    RemoveSaver::RemoveSaver( const string& a , const string& b , const string& why) : _out(0) {
         static int NUM = 0;
-        
+
         _root = dbpath;
         if ( a.size() )
             _root /= a;
         if ( b.size() )
             _root /= b;
         assert( a.size() || b.size() );
-        
+
         _file = _root;
-        
+
         stringstream ss;
         ss << why << "." << terseCurrentTime(false) << "." << NUM++ << ".bson";
         _file /= ss.str();
 
     }
-    
-    RemoveSaver::~RemoveSaver(){
-        if ( _out ){
+
+    RemoveSaver::~RemoveSaver() {
+        if ( _out ) {
             _out->close();
             delete _out;
             _out = 0;
         }
     }
-    
-    void RemoveSaver::goingToDelete( const BSONObj& o ){
-        if ( ! _out ){
+
+    void RemoveSaver::goingToDelete( const BSONObj& o ) {
+        if ( ! _out ) {
             create_directories( _root );
             _out = new ofstream();
             _out->open( _file.string().c_str() , ios_base::out | ios_base::binary );
-            if ( ! _out->good() ){
+            if ( ! _out->good() ) {
                 log( LL_WARNING ) << "couldn't create file: " << _file.string() << " for remove saving" << endl;
                 delete _out;
                 _out = 0;
                 return;
             }
-            
+
         }
         _out->write( o.objdata() , o.objsize() );
     }
-    
-        
+
+
 } // namespace mongo
diff --git a/db/dbhelpers.h b/db/dbhelpers.h
index ee9a59c..e793d3f 100644
--- a/db/dbhelpers.h
+++ b/db/dbhelpers.h
@@ -33,24 +33,10 @@ namespace mongo {
     class Cursor;
     class CoveredIndexMatcher;
 
-    class CursorIterator {
-    public:
-        CursorIterator( shared_ptr<Cursor> c , BSONObj filter = BSONObj() );
-        BSONObj next();
-        bool hasNext();
-
-    private:
-        void _advance();
-
-        shared_ptr<Cursor> _cursor;
-        auto_ptr<CoveredIndexMatcher> _matcher;
-        BSONObj _o;
-    };
-
     /**
        all helpers assume locking is handled above them
      */
-    struct Helpers { 
+    struct Helpers {
 
         /* ensure the specified index exists.
 
@@ -68,7 +54,7 @@ namespace mongo {
         /* fetch a single object from collection ns that matches query.
            set your db SavedContext first.
 
-           @param query - the query to perform.  note this is the low level portion of query so "orderby : ..." 
+           @param query - the query to perform.  note this is the low level portion of query so "orderby : ..."
                           won't work.
 
            @param requireIndex if true, complain if no index for the query.  a way to guard against
@@ -77,21 +63,19 @@ namespace mongo {
            @return true if object found
         */
         static bool findOne(const char *ns, const BSONObj &query, BSONObj& result, bool requireIndex = false);
-        static DiskLoc findOne(const char *ns, const BSONObj &query, bool requireIndex);        
+        static DiskLoc findOne(const char *ns, const BSONObj &query, bool requireIndex);
 
         /**
          * @param foundIndex if passed in will be set to 1 if ns and index found
          * @return true if object found
          */
-        static bool findById(Client&, const char *ns, BSONObj query, BSONObj& result , 
+        static bool findById(Client&, const char *ns, BSONObj query, BSONObj& result ,
                              bool * nsFound = 0 , bool * indexFound = 0 );
 
-        /* uasserts if no _id index. 
+        /* uasserts if no _id index.
            @return null loc if not found */
         static DiskLoc findById(NamespaceDetails *d, BSONObj query);
 
-        static auto_ptr<CursorIterator> find( const char *ns , BSONObj query = BSONObj() , bool requireIndex = false );
-
         /** Get/put the first (or last) object from a collection.  Generally only useful if the collection
             only ever has a single object -- which is a "singleton collection".
 
@@ -103,7 +87,7 @@ namespace mongo {
         static void putSingleton(const char *ns, BSONObj obj);
         static void putSingletonGod(const char *ns, BSONObj obj, bool logTheOp);
         static bool getFirst(const char *ns, BSONObj& result) { return getSingleton(ns, result); }
-        static bool getLast(const char *ns, BSONObj& result); // get last object int he collection; e.g. {$natural : -1}        
+        static bool getLast(const char *ns, BSONObj& result); // get last object int he collection; e.g. {$natural : -1}
 
         /**
          * you have to lock
@@ -115,14 +99,14 @@ namespace mongo {
         /** You do not need to set the database before calling.
             @return true if collection is empty.
         */
-        static bool isEmpty(const char *ns);
+        static bool isEmpty(const char *ns, bool doAuth=true);
 
         // TODO: this should be somewhere else probably
         static BSONObj toKeyFormat( const BSONObj& o , BSONObj& key );
 
         class RemoveCallback {
         public:
-            virtual ~RemoveCallback(){}
+            virtual ~RemoveCallback() {}
             virtual void goingToDelete( const BSONObj& o ) = 0;
         };
         /* removeRange: operation is oplog'd */
@@ -163,13 +147,13 @@ namespace mongo {
         ~RemoveSaver();
 
         void goingToDelete( const BSONObj& o );
-        
+
     private:
         path _root;
         path _file;
         ofstream* _out;
-        
+
     };
 
-    
+
 } // namespace mongo
diff --git a/db/dbmessage.h b/db/dbmessage.h
index 2849de8..cc1d1d8 100644
--- a/db/dbmessage.h
+++ b/db/dbmessage.h
@@ -18,7 +18,7 @@
 
 #include "diskloc.h"
 #include "jsobj.h"
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "../util/message.h"
 #include "../client/constants.h"
 
@@ -35,7 +35,7 @@ namespace mongo {
     */
 
     extern bool objcheck;
-    
+
 #pragma pack(1)
     struct QueryResult : public MsgData {
         long long cursorId;
@@ -50,7 +50,7 @@ namespace mongo {
         int& _resultFlags() {
             return dataAsInt();
         }
-        void setResultFlagsToOk() { 
+        void setResultFlagsToOk() {
             _resultFlags() = ResultFlag_AwaitCapable;
         }
     };
@@ -63,8 +63,7 @@ namespace mongo {
     */
     class DbMessage {
     public:
-        DbMessage(const Message& _m) : m(_m)
-        {
+        DbMessage(const Message& _m) : m(_m) , mark(0) {
             // for received messages, Message has only one buffer
             theEnd = _m.singleData()->_data + _m.header()->dataLen();
             char *r = _m.singleData()->_data;
@@ -86,7 +85,7 @@ namespace mongo {
         const char * afterNS() const {
             return data + strlen( data ) + 1;
         }
-        
+
         int getInt( int num ) const {
             const int * foo = (const int*)afterNS();
             return foo[num];
@@ -96,7 +95,17 @@ namespace mongo {
             return getInt( 1 );
         }
 
-        void resetPull(){ nextjsobj = data; }
+        /**
+         * get an int64 at specified offsetBytes after ns
+         */
+        long long getInt64( int offsetBytes ) const {
+            const char * x = afterNS();
+            x += offsetBytes;
+            const long long * ll = (const long long*)x;
+            return ll[0];
+        }
+
+        void resetPull() { nextjsobj = data; }
         int pullInt() const { return pullInt(); }
         int& pullInt() {
             if ( nextjsobj == data )
@@ -140,10 +149,10 @@ namespace mongo {
             BSONObj js(nextjsobj);
             massert( 10305 ,  "Client Error: Invalid object size", js.objsize() > 3 );
             massert( 10306 ,  "Client Error: Next object larger than space left in message",
-                    js.objsize() < ( theEnd - data ) );
+                     js.objsize() < ( theEnd - data ) );
             if ( objcheck && !js.valid() ) {
                 massert( 10307 , "Client Error: bad object in message", false);
-            }            
+            }
             nextjsobj += js.objsize();
             if ( nextjsobj >= theEnd )
                 nextjsobj = 0;
@@ -152,11 +161,12 @@ namespace mongo {
 
         const Message& msg() const { return m; }
 
-        void markSet(){
+        void markSet() {
             mark = nextjsobj;
         }
-        
-        void markReset(){
+
+        void markReset() {
+            assert( mark );
             nextjsobj = mark;
         }
 
@@ -180,7 +190,7 @@ namespace mongo {
         int queryOptions;
         BSONObj query;
         BSONObj fields;
-        
+
         /* parses the message into the above fields */
         QueryMessage(DbMessage& d) {
             ns = d.getns();
@@ -232,8 +242,7 @@ namespace mongo {
     /* object reply helper. */
     inline void replyToQuery(int queryResultFlags,
                              AbstractMessagingPort* p, Message& requestMsg,
-                             BSONObj& responseObj)
-    {
+                             BSONObj& responseObj) {
         replyToQuery(queryResultFlags,
                      p, requestMsg,
                      (void *) responseObj.objdata(), responseObj.objsize(), 1);
diff --git a/db/dbwebserver.cpp b/db/dbwebserver.cpp
index f17a283..7aa6148 100644
--- a/db/dbwebserver.cpp
+++ b/db/dbwebserver.cpp
@@ -32,6 +32,7 @@
 #include "../util/version.h"
 #include "../util/ramlog.h"
 #include <pcrecpp.h>
+#include "../util/admin_access.h"
 #include "dbwebserver.h"
 #include <boost/date_time/posix_time/posix_time.hpp>
 #undef assert
@@ -52,18 +53,20 @@ namespace mongo {
     };
 
     bool execCommand( Command * c ,
-                      Client& client , int queryOptions , 
-                      const char *ns, BSONObj& cmdObj , 
-                      BSONObjBuilder& result, 
+                      Client& client , int queryOptions ,
+                      const char *ns, BSONObj& cmdObj ,
+                      BSONObjBuilder& result,
                       bool fromRepl );
 
     class DbWebServer : public MiniWebServer {
     public:
-        DbWebServer(const string& ip, int port) : MiniWebServer(ip, port) {
+        DbWebServer(const string& ip, int port, const AdminAccess* webUsers)
+            : MiniWebServer(ip, port), _webUsers(webUsers) {
             WebStatusPlugin::initAll();
         }
 
     private:
+        const AdminAccess* _webUsers; // not owned here
 
         void doUnlockedStuff(stringstream& ss) {
             /* this is in the header already ss << "port:      " << port << '\n'; */
@@ -75,37 +78,35 @@ namespace mongo {
             ss << "</pre>";
         }
 
-    private:
-        
         bool allowed( const char * rq , vector<string>& headers, const SockAddr &from ) {
             if ( from.isLocalHost() )
                 return true;
 
-            if ( ! webHaveAdminUsers() )
+            if ( ! _webUsers->haveAdminUsers() )
                 return true;
 
             string auth = getHeader( rq , "Authorization" );
 
-            if ( auth.size() > 0 && auth.find( "Digest " ) == 0 ){
+            if ( auth.size() > 0 && auth.find( "Digest " ) == 0 ) {
                 auth = auth.substr( 7 ) + ", ";
 
                 map<string,string> parms;
                 pcrecpp::StringPiece input( auth );
-                
+
                 string name, val;
                 pcrecpp::RE re("(\\w+)=\"?(.*?)\"?, ");
-                while ( re.Consume( &input, &name, &val) ){
+                while ( re.Consume( &input, &name, &val) ) {
                     parms[name] = val;
                 }
 
-                BSONObj user = webGetAdminUser( parms["username"] );
-                if ( ! user.isEmpty() ){
+                BSONObj user = _webUsers->getAdminUser( parms["username"] );
+                if ( ! user.isEmpty() ) {
                     string ha1 = user["pwd"].str();
                     string ha2 = md5simpledigest( (string)"GET" + ":" + parms["uri"] );
-                    
+
                     stringstream r;
                     r << ha1 << ':' << parms["nonce"];
-                    if ( parms["nc"].size() && parms["cnonce"].size() && parms["qop"].size() ){
+                    if ( parms["nc"].size() && parms["cnonce"].size() && parms["qop"].size() ) {
                         r << ':';
                         r << parms["nc"];
                         r << ':';
@@ -116,22 +117,20 @@ namespace mongo {
                     r << ':';
                     r << ha2;
                     string r1 = md5simpledigest( r.str() );
-                    
+
                     if ( r1 == parms["response"] )
                         return true;
                 }
-
-                
             }
-            
+
             stringstream authHeader;
-            authHeader 
-                << "WWW-Authenticate: "
-                << "Digest realm=\"mongo\", "
-                << "nonce=\"abc\", " 
-                << "algorithm=MD5, qop=\"auth\" "
-                ;
-            
+            authHeader
+                    << "WWW-Authenticate: "
+                    << "Digest realm=\"mongo\", "
+                    << "nonce=\"abc\", "
+                    << "algorithm=MD5, qop=\"auth\" "
+                    ;
+
             headers.push_back( authHeader.str() );
             return 0;
         }
@@ -144,24 +143,39 @@ namespace mongo {
             int& responseCode,
             vector<string>& headers, // if completely empty, content-type: text/html will be added
             const SockAddr &from
-        )
-        {
+        ) {
             if ( url.size() > 1 ) {
-                
+
                 if ( ! allowed( rq , headers, from ) ) {
                     responseCode = 401;
                     headers.push_back( "Content-Type: text/plain" );
                     responseMsg = "not allowed\n";
                     return;
-                }              
+                }
 
                 {
+                    BSONObj params;
+                    const size_t pos = url.find( "?" );
+                    if ( pos != string::npos ) {
+                        MiniWebServer::parseParams( params , url.substr( pos + 1 ) );
+                        url = url.substr(0, pos);
+                    }
+
                     DbWebHandler * handler = DbWebHandler::findHandler( url );
-                    if ( handler ){
-                        if ( handler->requiresREST( url ) && ! cmdLine.rest )
+                    if ( handler ) {
+                        if ( handler->requiresREST( url ) && ! cmdLine.rest ) {
                             _rejectREST( responseMsg , responseCode , headers );
-                        else
-                            handler->handle( rq , url , responseMsg , responseCode , headers , from );
+                        }
+                        else {
+                            string callback = params.getStringField("jsonp");
+                            uassert(13453, "server not started with --jsonp", callback.empty() || cmdLine.jsonp);
+
+                            handler->handle( rq , url , params , responseMsg , responseCode , headers , from );
+
+                            if (responseCode == 200 && !callback.empty()) {
+                                responseMsg = callback + '(' + responseMsg + ')';
+                            }
+                        }
                         return;
                     }
                 }
@@ -171,27 +185,27 @@ namespace mongo {
                     _rejectREST( responseMsg , responseCode , headers );
                     return;
                 }
-                
+
                 responseCode = 404;
                 headers.push_back( "Content-Type: text/html" );
                 responseMsg = "<html><body>unknown url</body></html>\n";
                 return;
             }
-            
+
             // generate home page
 
-            if ( ! allowed( rq , headers, from ) ){
+            if ( ! allowed( rq , headers, from ) ) {
                 responseCode = 401;
                 responseMsg = "not allowed\n";
                 return;
-            }            
+            }
 
             responseCode = 200;
             stringstream ss;
             string dbname;
             {
                 stringstream z;
-                z << "mongod " << prettyHostName();
+                z << cmdLine.binaryName << ' ' << prettyHostName();
                 dbname = z.str();
             }
             ss << start(dbname) << h2(dbname);
@@ -202,12 +216,18 @@ namespace mongo {
             {
                 const map<string, Command*> *m = Command::webCommands();
                 if( m ) {
-                    ss << a("", "These read-only context-less commands can be executed from the web interface.  Results are json format, unless ?text is appended in which case the result is output as text for easier human viewing", "Commands") << ": ";
-                    for( map<string, Command*>::const_iterator i = m->begin(); i != m->end(); i++ ) { 
+                    ss <<
+                       a("",
+                         "These read-only context-less commands can be executed from the web interface. "
+                         "Results are json format, unless ?text=1 is appended in which case the result is output as text "
+                         "for easier human viewing",
+                         "Commands")
+                       << ": ";
+                    for( map<string, Command*>::const_iterator i = m->begin(); i != m->end(); i++ ) {
                         stringstream h;
                         i->second->help(h);
                         string help = h.str();
-                        ss << "<a href=\"/" << i->first << "?text\"";
+                        ss << "<a href=\"/" << i->first << "?text=1\"";
                         if( help != "no help defined" )
                             ss << " title=\"" << help << '"';
                         ss << ">" << i->first << "</a> ";
@@ -216,69 +236,67 @@ namespace mongo {
                 }
             }
             ss << '\n';
-	    /*
-            ss << "HTTP <a "
-                "title=\"click for documentation on this http interface\""
-                "href=\"http://www.mongodb.org/display/DOCS/Http+Interface\">admin port</a>:" << _port << "<p>\n";
-	    */
+            /*
+                ss << "HTTP <a "
+                    "title=\"click for documentation on this http interface\""
+                    "href=\"http://www.mongodb.org/display/DOCS/Http+Interface\">admin port</a>:" << _port << "<p>\n";
+            */
 
             doUnlockedStuff(ss);
 
             WebStatusPlugin::runAll( ss );
-            
+
             ss << "</body></html>\n";
             responseMsg = ss.str();
-
-
         }
 
-        void _rejectREST( string& responseMsg , int& responseCode, vector<string>& headers ){
-                                responseCode = 403;
-                    stringstream ss;
-                    ss << "REST is not enabled.  use --rest to turn on.\n";
-                    ss << "check that port " << _port << " is secured for the network too.\n";
-                    responseMsg = ss.str();
-                    headers.push_back( "Content-Type: text/plain" );
+        void _rejectREST( string& responseMsg , int& responseCode, vector<string>& headers ) {
+            responseCode = 403;
+            stringstream ss;
+            ss << "REST is not enabled.  use --rest to turn on.\n";
+            ss << "check that port " << _port << " is secured for the network too.\n";
+            responseMsg = ss.str();
+            headers.push_back( "Content-Type: text/plain" );
         }
 
     };
     // ---
-    
-    bool prisort( const Prioritizable * a , const Prioritizable * b ){
+
+    bool prisort( const Prioritizable * a , const Prioritizable * b ) {
         return a->priority() < b->priority();
     }
 
     // -- status framework ---
-    WebStatusPlugin::WebStatusPlugin( const string& secionName , double priority , const string& subheader ) 
+    WebStatusPlugin::WebStatusPlugin( const string& secionName , double priority , const string& subheader )
         : Prioritizable(priority), _name( secionName ) , _subHeading( subheader ) {
         if ( ! _plugins )
             _plugins = new vector<WebStatusPlugin*>();
         _plugins->push_back( this );
     }
 
-    void WebStatusPlugin::initAll(){
+    void WebStatusPlugin::initAll() {
         if ( ! _plugins )
             return;
-        
+
         sort( _plugins->begin(), _plugins->end() , prisort );
-        
+
         for ( unsigned i=0; i<_plugins->size(); i++ )
             (*_plugins)[i]->init();
     }
 
-    void WebStatusPlugin::runAll( stringstream& ss ){
+    void WebStatusPlugin::runAll( stringstream& ss ) {
         if ( ! _plugins )
             return;
-        
-        for ( unsigned i=0; i<_plugins->size(); i++ ){
+
+        for ( unsigned i=0; i<_plugins->size(); i++ ) {
             WebStatusPlugin * p = (*_plugins)[i];
-            ss << "<hr>\n" 
+            ss << "<hr>\n"
                << "<b>" << p->_name << "</b>";
-            
+
             ss << " " << p->_subHeading;
 
             ss << "<br>\n";
-            
+
             p->run(ss);
         }
 
@@ -290,29 +308,30 @@ namespace mongo {
 
     class LogPlugin : public WebStatusPlugin {
     public:
-        LogPlugin() : WebStatusPlugin( "Log" , 100 ), _log(0){
+        LogPlugin() : WebStatusPlugin( "Log" , 100 ), _log(0) {
         }
-        
-        virtual void init(){
+
+        virtual void init() {
             assert( ! _log );
             _log = new RamLog();
             Logstream::get().addGlobalTee( _log );
         }
 
-        virtual void run( stringstream& ss ){
+        virtual void run( stringstream& ss ) {
             _log->toHTML( ss );
         }
         RamLog * _log;
     };
-      
+
     LogPlugin * logPlugin = new LogPlugin();
 
     // -- handler framework ---
 
     DbWebHandler::DbWebHandler( const string& name , double priority , bool requiresREST )
-        : Prioritizable(priority), _name(name) , _requiresREST(requiresREST){
+        : Prioritizable(priority), _name(name) , _requiresREST(requiresREST) {
 
-        { // setup strings
+        {
+            // setup strings
             _defaultUrl = "/";
             _defaultUrl += name;
 
@@ -320,8 +339,9 @@ namespace mongo {
             ss << name << " priority: " << priority << " rest: " << requiresREST;
             _toString = ss.str();
         }
-        
-        { // add to handler list
+
+        {
+            // add to handler list
             if ( ! _handlers )
                 _handlers = new vector<DbWebHandler*>();
             _handlers->push_back( this );
@@ -329,11 +349,11 @@ namespace mongo {
         }
     }
 
-    DbWebHandler * DbWebHandler::findHandler( const string& url ){
+    DbWebHandler * DbWebHandler::findHandler( const string& url ) {
         if ( ! _handlers )
             return 0;
-        
-        for ( unsigned i=0; i<_handlers->size(); i++ ){
+
+        for ( unsigned i=0; i<_handlers->size(); i++ ) {
             DbWebHandler * h = (*_handlers)[i];
             if ( h->handles( url ) )
                 return h;
@@ -341,76 +361,71 @@ namespace mongo {
 
         return 0;
     }
-    
+
     vector<DbWebHandler*> * DbWebHandler::_handlers = 0;
 
     // --- basic handlers ---
 
     class FavIconHandler : public DbWebHandler {
     public:
-        FavIconHandler() : DbWebHandler( "favicon.ico" , 0 , false ){}
+        FavIconHandler() : DbWebHandler( "favicon.ico" , 0 , false ) {}
 
-        virtual void handle( const char *rq, string url, 
+        virtual void handle( const char *rq, string url, BSONObj params,
                              string& responseMsg, int& responseCode,
-                             vector<string>& headers,  const SockAddr &from ){
+                             vector<string>& headers,  const SockAddr &from ) {
             responseCode = 404;
             headers.push_back( "Content-Type: text/plain" );
             responseMsg = "no favicon\n";
         }
 
     } faviconHandler;
-    
+
     class StatusHandler : public DbWebHandler {
     public:
-        StatusHandler() : DbWebHandler( "_status" , 1 , false ){}
-        
-        virtual void handle( const char *rq, string url, 
+        StatusHandler() : DbWebHandler( "_status" , 1 , false ) {}
+
+        virtual void handle( const char *rq, string url, BSONObj params,
                              string& responseMsg, int& responseCode,
-                             vector<string>& headers,  const SockAddr &from ){
+                             vector<string>& headers,  const SockAddr &from ) {
             headers.push_back( "Content-Type: application/json" );
             responseCode = 200;
-            
+
             static vector<string> commands;
-            if ( commands.size() == 0 ){
+            if ( commands.size() == 0 ) {
                 commands.push_back( "serverStatus" );
                 commands.push_back( "buildinfo" );
             }
-            
-            BSONObj params;
-            if ( url.find( "?" ) != string::npos ) {
-                MiniWebServer::parseParams( params , url.substr( url.find( "?" ) + 1 ) );
-            }
-            
+
             BSONObjBuilder buf(1024);
-            
-            for ( unsigned i=0; i<commands.size(); i++ ){
+
+            for ( unsigned i=0; i<commands.size(); i++ ) {
                 string cmd = commands[i];
 
                 Command * c = Command::findCommand( cmd );
                 assert( c );
                 assert( c->locktype() == 0 );
-                
+
                 BSONObj co;
                 {
                     BSONObjBuilder b;
                     b.append( cmd , 1 );
-                    
-                    if ( cmd == "serverStatus" && params["repl"].type() ){
+
+                    if ( cmd == "serverStatus" && params["repl"].type() ) {
                         b.append( "repl" , atoi( params["repl"].valuestr() ) );
                     }
-                    
+
                     co = b.obj();
                 }
-                
+
                 string errmsg;
-                
+
                 BSONObjBuilder sub;
                 if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) )
                     buf.append( cmd , errmsg );
                 else
                     buf.append( cmd , sub.obj() );
             }
-            
+
             responseMsg = buf.obj().jsonString();
 
         }
@@ -419,14 +434,14 @@ namespace mongo {
 
     class CommandListHandler : public DbWebHandler {
     public:
-        CommandListHandler() : DbWebHandler( "_commands" , 1 , true ){}
-        
-        virtual void handle( const char *rq, string url, 
+        CommandListHandler() : DbWebHandler( "_commands" , 1 , true ) {}
+
+        virtual void handle( const char *rq, string url, BSONObj params,
                              string& responseMsg, int& responseCode,
-                             vector<string>& headers,  const SockAddr &from ){
+                             vector<string>& headers,  const SockAddr &from ) {
             headers.push_back( "Content-Type: text/html" );
             responseCode = 200;
-            
+
             stringstream ss;
             ss << start("Commands List");
             ss << p( a("/", "back", "Home") );
@@ -435,41 +450,21 @@ namespace mongo {
             ss << "S:slave-ok  R:read-lock  W:write-lock  A:admin-only<br>\n";
             ss << table();
             ss << "<tr><th>Command</th><th>Attributes</th><th>Help</th></tr>\n";
-            for( map<string, Command*>::const_iterator i = m->begin(); i != m->end(); i++ ) 
+            for( map<string, Command*>::const_iterator i = m->begin(); i != m->end(); i++ )
                 i->second->htmlHelp(ss);
             ss << _table() << _end();
-            
+
             responseMsg = ss.str();
         }
     } commandListHandler;
 
     class CommandsHandler : public DbWebHandler {
     public:
-        CommandsHandler() : DbWebHandler( "DUMMY COMMANDS" , 2 , true ){}
-        
-        bool _cmd( const string& url , string& cmd , bool& text ) const {
-            const char * x = url.c_str();
-            
-            if ( x[0] != '/' ){
-                // this should never happen
-                return false;
-            }
-            
-            if ( strchr( x + 1 , '/' ) )
-                return false;
-            
-            x++;
+        CommandsHandler() : DbWebHandler( "DUMMY COMMANDS" , 2 , true ) {}
 
-            const char * end = strstr( x , "?text" );
-            if ( end ){
-                text = true;
-                cmd = string( x , end - x );
-            }
-            else {
-                text = false;
-                cmd = string(x);
-            }
-             
+        bool _cmd( const string& url , string& cmd , bool& text, bo params ) const {
+            cmd = str::after(url, '/');
+            text = params["text"].boolean();
             return true;
         }
 
@@ -477,45 +472,43 @@ namespace mongo {
             const map<string,Command*> *m = Command::webCommands();
             if( ! m )
                 return 0;
-            
+
             map<string,Command*>::const_iterator i = m->find(cmd);
             if ( i == m->end() )
                 return 0;
-            
+
             return i->second;
         }
 
-        virtual bool handles( const string& url ) const { 
+        virtual bool handles( const string& url ) const {
             string cmd;
             bool text;
-            if ( ! _cmd( url , cmd , text ) )
+            if ( ! _cmd( url , cmd , text, bo() ) )
                 return false;
-
-            return _cmd( cmd );
+            return _cmd(cmd) != 0;
         }
-        
-        virtual void handle( const char *rq, string url, 
+
+        virtual void handle( const char *rq, string url, BSONObj params,
                              string& responseMsg, int& responseCode,
-                             vector<string>& headers,  const SockAddr &from ){
-            
+                             vector<string>& headers,  const SockAddr &from ) {
             string cmd;
             bool text = false;
-            assert( _cmd( url , cmd , text ) );
+            assert( _cmd( url , cmd , text, params ) );
             Command * c = _cmd( cmd );
             assert( c );
 
             BSONObj cmdObj = BSON( cmd << 1 );
             Client& client = cc();
-            
+
             BSONObjBuilder result;
             execCommand(c, client, 0, "admin.", cmdObj , result, false);
-            
+
             responseCode = 200;
-            
-            string j = result.done().jsonString(JS, text );
+
+            string j = result.done().jsonString(Strict, text );
             responseMsg = j;
-            
-            if( text ){
+
+            if( text ) {
                 headers.push_back( "Content-Type: text/plain" );
                 responseMsg += '\n';
             }
@@ -524,23 +517,16 @@ namespace mongo {
             }
 
         }
-        
+
     } commandsHandler;
 
     // --- external ----
 
-    string prettyHostName() { 
-        stringstream s;
-        s << getHostName();
-        if( mongo::cmdLine.port != CmdLine::DefaultDBPort ) 
-            s << ':' << mongo::cmdLine.port;
-        return s.str();
-    }
-
-    void webServerThread() {
+    void webServerThread(const AdminAccess* adminAccess) {
+        boost::scoped_ptr<const AdminAccess> adminAccessPtr(adminAccess); // adminAccess is owned here
         Client::initThread("websvr");
         const int p = cmdLine.port + 1000;
-        DbWebServer mini(cmdLine.bind_ip, p);
+        DbWebServer mini(cmdLine.bind_ip, p, adminAccessPtr.get());
         log() << "web admin interface listening on port " << p << endl;
         mini.initAndListen();
         cc().shutdown();
diff --git a/db/dbwebserver.h b/db/dbwebserver.h
index d1a2f0d..bdbcba2 100644
--- a/db/dbwebserver.h
+++ b/db/dbwebserver.h
@@ -17,20 +17,22 @@
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+#include "../util/admin_access.h"
+
 namespace mongo {
 
     class Prioritizable {
     public:
-        Prioritizable( double p ) : _priority(p){}
+        Prioritizable( double p ) : _priority(p) {}
         double priority() const { return _priority; }
     private:
         double _priority;
     };
-    
+
     class DbWebHandler : public Prioritizable {
     public:
         DbWebHandler( const string& name , double priority , bool requiresREST );
-        virtual ~DbWebHandler(){}
+        virtual ~DbWebHandler() {}
 
         virtual bool handles( const string& url ) const { return url == _defaultUrl; }
 
@@ -38,20 +40,21 @@ namespace mongo {
 
         virtual void handle( const char *rq, // the full request
                              string url,
+                             BSONObj params,
                              // set these and return them:
                              string& responseMsg,
                              int& responseCode,
                              vector<string>& headers, // if completely empty, content-type: text/html will be added
                              const SockAddr &from
-                             ) = 0;
-        
+                           ) = 0;
+
         string toString() const { return _toString; }
         static DbWebHandler * findHandler( const string& url );
 
     private:
         string _name;
         bool _requiresREST;
-        
+
         string _defaultUrl;
         string _toString;
 
@@ -61,8 +64,8 @@ namespace mongo {
     class WebStatusPlugin : public Prioritizable {
     public:
         WebStatusPlugin( const string& secionName , double priority , const string& subheader = "" );
-        virtual ~WebStatusPlugin(){}
-        
+        virtual ~WebStatusPlugin() {}
+
         virtual void run( stringstream& ss ) = 0;
         /** called when web server stats up */
         virtual void init() = 0;
@@ -73,18 +76,10 @@ namespace mongo {
         string _name;
         string _subHeading;
         static vector<WebStatusPlugin*> * _plugins;
-        
+
     };
 
-    void webServerThread();
+    void webServerThread( const AdminAccess* admins );
     string prettyHostName();
-    
-    /** @return if there are any admin users.  this should not block for long and throw if can't get a lock if needed */
-    bool webHaveAdminUsers();
-    
-    /** @return admin user with this name.  this should not block for long and throw if can't get a lock if needed */
-    BSONObj webGetAdminUser( const string& username );
 
 };
-
-
diff --git a/db/diskloc.h b/db/diskloc.h
index 2747abd..f356c73 100644
--- a/db/diskloc.h
+++ b/db/diskloc.h
@@ -14,7 +14,7 @@
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-/* storage.h
+/* @file diskloc.h
 
    Storage subsystem management.
    Lays out our datafiles on disk, manages disk space.
@@ -26,7 +26,6 @@
 
 namespace mongo {
 
-
     class Record;
     class DeletedRecord;
     class Extent;
@@ -34,77 +33,64 @@ namespace mongo {
     class MongoDataFile;
 
 #pragma pack(1)
+    /** represents a disk location/offset on disk in a database.  64 bits.
+        it is assumed these will be passed around by value a lot so don't do anything to make them large
+        (such as adding a virtual function)
+     */
     class DiskLoc {
-        int fileNo; /* this will be volume, file #, etc. */
+        int _a;     // this will be volume, file #, etc. but is a logical value could be anything depending on storage engine
         int ofs;
+
     public:
-        // Note: MaxFiles imposes a limit of about 32TB of data per process
-        enum SentinelValues { MaxFiles=16000, NullOfs = -1 };
 
-        int a() const {
-            return fileNo;
-        }
+        enum SentinelValues {
+            NullOfs = -1,
+            MaxFiles=16000 // thus a limit of about 32TB of data per db
+        };
 
-        DiskLoc(int a, int b) : fileNo(a), ofs(b) {
-            //assert(ofs!=0);
-        }
+        DiskLoc(int a, int b) : _a(a), ofs(b) { }
         DiskLoc() { Null(); }
         DiskLoc(const DiskLoc& l) {
-            fileNo=l.fileNo;
+            _a=l._a;
             ofs=l.ofs;
         }
 
-        bool questionable() {
+        bool questionable() const {
             return ofs < -1 ||
-                   fileNo < -1 ||
-                   fileNo > 524288;
+                   _a < -1 ||
+                   _a > 524288;
         }
 
-        bool isNull() const {
-            return fileNo == -1;
-            //            return ofs == NullOfs;
-        }
+        bool isNull() const { return _a == -1; }
         void Null() {
-            fileNo = -1;
-            ofs = 0;
-        }
-        void assertOk() {
-            assert(!isNull());
+            _a = -1;
+            ofs = 0; /* note NullOfs is different. todo clean up.  see refs to NullOfs in code - use is valid but outside DiskLoc context so confusing as-is. */
         }
+        void assertOk() { assert(!isNull()); }
         void setInvalid() {
-            fileNo = -2; 
+            _a = -2;
             ofs = 0;
         }
-        bool isValid() const {
-            return fileNo != -2;
-        }
+        bool isValid() const { return _a != -2; }
 
         string toString() const {
             if ( isNull() )
                 return "null";
             stringstream ss;
-            ss << hex << fileNo << ':' << ofs;
+            ss << hex << _a << ':' << ofs;
             return ss.str();
         }
 
-        BSONObj toBSONObj() const {
-            return BSON( "file" << fileNo << "offset" << ofs );
-        }
+        BSONObj toBSONObj() const { return BSON( "file" << _a << "offset" << ofs );  }
 
-        int& GETOFS() {
-            return ofs;
-        }
-        int getOfs() const {
-            return ofs;
-        }
+        int a() const { return _a; }
+
+        int& GETOFS()      { return ofs; }
+        int getOfs() const { return ofs; }
         void set(int a, int b) {
-            fileNo=a;
+            _a=a;
             ofs=b;
         }
-        void setOfs(int _fileNo, int _ofs) {
-            fileNo = _fileNo;
-            ofs = _ofs;
-        }
 
         void inc(int amt) {
             assert( !isNull() );
@@ -112,23 +98,23 @@ namespace mongo {
         }
 
         bool sameFile(DiskLoc b) {
-            return fileNo == b.fileNo;
+            return _a== b._a;
         }
 
         bool operator==(const DiskLoc& b) const {
-            return fileNo==b.fileNo && ofs == b.ofs;
+            return _a==b._a&& ofs == b.ofs;
         }
         bool operator!=(const DiskLoc& b) const {
             return !(*this==b);
         }
         const DiskLoc& operator=(const DiskLoc& b) {
-            fileNo=b.fileNo;
+            _a=b._a;
             ofs = b.ofs;
             //assert(ofs!=0);
             return *this;
         }
         int compare(const DiskLoc& b) const {
-            int x = fileNo - b.fileNo;
+            int x = _a - b._a;
             if ( x )
                 return x;
             return ofs - b.ofs;
@@ -137,18 +123,27 @@ namespace mongo {
             return compare(b) < 0;
         }
 
-        /* get the "thing" associated with this disk location.
-           it is assumed the object is what it is -- you must asure that:
-           think of this as an unchecked type cast.
+        /**
+         * Marks this disk loc for writing
+         * @returns a non const reference to this disk loc
+         * This function explicitly signals we are writing and casts away const
+         */
+        DiskLoc& writing() const; // see dur.h
+
+        /* Get the "thing" associated with this disk location.
+           it is assumed the object is what you say it is -- you must assure that
+           (think of this as an unchecked type cast)
+           Note: set your Context first so that the database to which the diskloc applies is known.
         */
         BSONObj obj() const;
         Record* rec() const;
         DeletedRecord* drec() const;
         Extent* ext() const;
-        BtreeBucket* btree() const;
-        BtreeBucket* btreemod() const; // marks modified / dirty
+        const BtreeBucket* btree() const;
+        // Explicitly signals we are writing and casts away const
+        BtreeBucket* btreemod() const;
 
-        MongoDataFile& pdf() const;
+        /*MongoDataFile& pdf() const;*/
     };
 #pragma pack()
 
diff --git a/db/driverHelpers.cpp b/db/driverHelpers.cpp
index d8971ad..d98a33b 100644
--- a/db/driverHelpers.cpp
+++ b/db/driverHelpers.cpp
@@ -24,11 +24,11 @@
 #include "pch.h"
 #include "jsobj.h"
 #include "pdfile.h"
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "commands.h"
 #include "cmdline.h"
 #include "btree.h"
-#include "curop.h"
+#include "curop-inl.h"
 #include "../util/background.h"
 #include "../scripting/engine.h"
 
@@ -36,18 +36,18 @@ namespace mongo {
 
     class BasicDriverHelper : public Command {
     public:
-        BasicDriverHelper( const char * name ) : Command( name ){}
-        
+        BasicDriverHelper( const char * name ) : Command( name ) {}
+
         virtual LockType locktype() const { return NONE; }
         virtual bool slaveOk() const { return true; }
-        virtual bool slaveOverrideOk(){ return true; }        
+        virtual bool slaveOverrideOk() { return true; }
     };
 
     class ObjectIdTest : public BasicDriverHelper {
     public:
-        ObjectIdTest() : BasicDriverHelper( "driverOIDTest" ){}
-        virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){
-            if ( cmdObj.firstElement().type() != jstOID ){
+        ObjectIdTest() : BasicDriverHelper( "driverOIDTest" ) {}
+        virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+            if ( cmdObj.firstElement().type() != jstOID ) {
                 errmsg = "not oid";
                 return false;
             }
diff --git a/db/dur.cpp b/db/dur.cpp
new file mode 100644
index 0000000..15b4565
--- /dev/null
+++ b/db/dur.cpp
@@ -0,0 +1,635 @@
+// @file dur.cpp durability in the storage engine (crash-safeness / journaling)
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+   phases
+
+     PREPLOGBUFFER
+       we will build an output buffer ourself and then use O_DIRECT
+       we could be in read lock for this
+       for very large objects write directly to redo log in situ?
+     WRITETOJOURNAL
+       we could be unlocked (the main db lock that is...) for this, with sufficient care, but there is some complexity
+         have to handle falling behind which would use too much ram (going back into a read lock would suffice to stop that).
+         for now (1.7.5/1.8.0) we are in read lock which is not ideal.
+     WRITETODATAFILES
+       apply the writes back to the non-private MMF after they are for certain in redo log
+     REMAPPRIVATEVIEW
+       we could in a write lock quickly flip readers back to the main view, then stay in read lock and do our real
+         remapping. with many files (e.g., 1000), remapping could be time consuming (several ms), so we don't want
+         to be too frequent.
+       there could be a slow down immediately after remapping as fresh copy-on-writes for commonly written pages will
+         be required.  so doing these remaps fractionally is helpful. 
+
+     @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc
+*/
+
+#include "pch.h"
+#include "cmdline.h"
+#include "client.h"
+#include "dur.h"
+#include "dur_journal.h"
+#include "dur_commitjob.h"
+#include "dur_recover.h"
+#include "../util/concurrency/race.h"
+#include "../util/mongoutils/hash.h"
+#include "../util/mongoutils/str.h"
+#include "../util/timer.h"
+#include "dur_stats.h"
+
+using namespace mongoutils;
+
+namespace mongo {
+
+    namespace dur {
+
+        void WRITETODATAFILES();
+        void PREPLOGBUFFER();
+
+        /** declared later in this file
+            only used in this file -- use DurableInterface::commitNow() outside
+        */
+        static void groupCommit();
+
+        CommitJob commitJob;
+
+        Stats stats;
+
+        void Stats::S::reset() {
+            memset(this, 0, sizeof(*this));
+        }
+
+        Stats::Stats() {
+            _a.reset();
+            _b.reset();
+            curr = &_a;
+            _intervalMicros = 3000000;
+        }
+
+        Stats::S * Stats::other() {
+            return curr == &_a ? &_b : &_a;
+        }
+
+        BSONObj Stats::S::_asObj() {
+            return BSON(
+                       "commits" << _commits <<
+                       "journaledMB" << _journaledBytes / 1000000.0 <<
+                       "writeToDataFilesMB" << _writeToDataFilesBytes / 1000000.0 <<
+                       "commitsInWriteLock" << _commitsInWriteLock <<
+                       "earlyCommits" << _earlyCommits << 
+                       "timeMs" <<
+                       BSON( "dt" << _dtMillis <<
+                             "prepLogBuffer" << (unsigned) (_prepLogBufferMicros/1000) <<
+                             "writeToJournal" << (unsigned) (_writeToJournalMicros/1000) <<
+                             "writeToDataFiles" << (unsigned) (_writeToDataFilesMicros/1000) <<
+                             "remapPrivateView" << (unsigned) (_remapPrivateViewMicros/1000)
+                           )
+                   );
+        }
+
+        BSONObj Stats::asObj() {
+            return other()->_asObj();
+        }
+
+        void Stats::rotate() {
+            unsigned long long now = curTimeMicros64();
+            unsigned long long dt = now - _lastRotate;
+            if( dt >= _intervalMicros && _intervalMicros ) {
+                // rotate
+                curr->_dtMillis = (unsigned) (dt/1000);
+                _lastRotate = now;
+                curr = other();
+                curr->reset();
+            }
+        }
+
+        void NonDurableImpl::setNoJournal(void *dst, void *src, unsigned len) {
+            memcpy(dst, src, len);
+        }
+
+        void DurableImpl::setNoJournal(void *dst, void *src, unsigned len) {
+            MemoryMappedFile::makeWritable(dst, len);
+
+            // we stay in this mutex for everything to work with DurParanoid/validateSingleMapMatches
+            //
+            // this also makes setNoJournal threadsafe, which is good as we call it from a read (not a write) lock 
+            // in class SlaveTracking
+            //
+            scoped_lock lk( privateViews._mutex() );
+            size_t ofs;
+            MongoMMF *f = privateViews.find_inlock(dst, ofs);
+            assert(f);
+            void *w = (((char *)f->view_write())+ofs);
+            // first write it to the writable (file) view
+            memcpy(w, src, len);
+            if( memcmp(w, dst, len) ) {
+                // if we get here, a copy-on-write had previously occurred. so write it to the private view too
+                // to keep them in sync.  we do this as we do not want to cause a copy on write unnecessarily.
+                memcpy(dst, src, len);
+            }
+        }
+
+        /** base declare write intent function that all the helpers call. */
+        void DurableImpl::declareWriteIntent(void *p, unsigned len) {
+            commitJob.note(p, len);
+        }
+
+        static DurableImpl* durableImpl = new DurableImpl();
+        static NonDurableImpl* nonDurableImpl = new NonDurableImpl();
+        DurableInterface* DurableInterface::_impl = nonDurableImpl;
+
+        void DurableInterface::enableDurability() {
+            assert(_impl == nonDurableImpl);
+            _impl = durableImpl;
+        }
+
+        void DurableInterface::disableDurability() {
+            assert(_impl == durableImpl);
+            massert(13616, "can't disable durability with pending writes", !commitJob.hasWritten());
+            _impl = nonDurableImpl;
+        }
+
+        bool DurableImpl::commitNow() {
+            stats.curr->_earlyCommits++;
+            groupCommit();
+            return true;
+        }
+
+        bool DurableImpl::awaitCommit() {
+            commitJob.awaitNextCommit();
+            return true;
+        }
+
+        /** Declare that a file has been created
+            Normally writes are applied only after journaling, for safety.  But here the file
+            is created first, and the journal will just replay the creation if the create didn't
+            happen because of crashing.
+        */
+        void DurableImpl::createdFile(string filename, unsigned long long len) {
+            shared_ptr<DurOp> op( new FileCreatedOp(filename, len) );
+            commitJob.noteOp(op);
+        }
+
+        void* DurableImpl::writingPtr(void *x, unsigned len) {
+            void *p = x;
+            declareWriteIntent(p, len);
+            return p;
+        }
+
+        /** declare intent to write
+            @param ofs offset within buf at which we will write
+            @param len the length at ofs we will write
+            @return new buffer pointer.
+        */
+        void* DurableImpl::writingAtOffset(void *buf, unsigned ofs, unsigned len) {
+            char *p = (char *) buf;
+            declareWriteIntent(p+ofs, len);
+            return p;
+        }
+
+        void* DurableImpl::writingRangesAtOffsets(void *buf, const vector< pair< long long, unsigned > > &ranges ) {
+            char *p = (char *) buf;
+            for( vector< pair< long long, unsigned > >::const_iterator i = ranges.begin();
+                    i != ranges.end(); ++i ) {
+                declareWriteIntent( p + i->first, i->second );
+            }
+            return p;
+        }
+
+        bool DurableImpl::commitIfNeeded() {
+            DEV commitJob._nSinceCommitIfNeededCall = 0;
+            if (commitJob.bytes() > UncommittedBytesLimit) { // should this also fire if CmdLine::DurAlwaysCommit?
+                stats.curr->_earlyCommits++;
+                groupCommit();
+                return true;
+            }
+            return false;
+        }
+
+        /** Used in _DEBUG builds to check that we didn't overwrite the last intent
+            that was declared.  called just before writelock release.  we check a few
+            bytes after the declared region to see if they changed.
+
+            @see MongoMutex::_releasedWriteLock
+
+            SLOW
+        */
+#if 0
+        void DurableImpl::debugCheckLastDeclaredWrite() {
+            static int n;
+            ++n;
+
+            assert(debug && cmdLine.dur);
+            if (commitJob.writes().empty())
+                return;
+            const WriteIntent &i = commitJob.lastWrite();
+            size_t ofs;
+            MongoMMF *mmf = privateViews.find(i.start(), ofs);
+            if( mmf == 0 )
+                return;
+            size_t past = ofs + i.length();
+            if( mmf->length() < past + 8 )
+                return; // too close to end of view
+            char *priv = (char *) mmf->getView();
+            char *writ = (char *) mmf->view_write();
+            unsigned long long *a = (unsigned long long *) (priv+past);
+            unsigned long long *b = (unsigned long long *) (writ+past);
+            if( *a != *b ) {
+                for( set<WriteIntent>::iterator it(commitJob.writes().begin()), end((commitJob.writes().begin())); it != end; ++it ) {
+                    const WriteIntent& wi = *it;
+                    char *r1 = (char*) wi.start();
+                    char *r2 = (char*) wi.end();
+                    if( r1 <= (((char*)a)+8) && r2 > (char*)a ) {
+                        //log() << "it's ok " << wi.p << ' ' << wi.len << endl;
+                        return;
+                    }
+                }
+                log() << "dur data after write area " << i.start() << " does not agree" << endl;
+                log() << " was:  " << ((void*)b) << "  " << hexdump((char*)b, 8) << endl;
+                log() << " now:  " << ((void*)a) << "  " << hexdump((char*)a, 8) << endl;
+                log() << " n:    " << n << endl;
+                log() << endl;
+            }
+        }
+#endif
+
+        /** write the buffer we have built to the journal and fsync it.
+            outside of lock as that could be slow.
+        */
+        static void WRITETOJOURNAL(AlignedBuilder& ab) {
+            Timer t;
+            journal(ab);
+            stats.curr->_writeToJournalMicros += t.micros();
+        }
+
+        // Functor to be called over all MongoFiles
+
+        class validateSingleMapMatches {
+        public:
+            validateSingleMapMatches(unsigned long long& bytes) :_bytes(bytes)  {}
+            void operator () (MongoFile *mf) {
+                if( mf->isMongoMMF() ) {
+                    MongoMMF *mmf = (MongoMMF*) mf;
+                    const char *p = (const char *) mmf->getView();
+                    const char *w = (const char *) mmf->view_write();
+
+                    if (!p || !w) return; // File not fully opened yet
+
+                    _bytes += mmf->length();
+
+                    assert( mmf->length() == (unsigned) mmf->length() );
+                    {
+                        scoped_lock lk( privateViews._mutex() ); // see setNoJournal
+                        if (memcmp(p, w, (unsigned) mmf->length()) == 0)
+                            return; // next file
+                    }
+
+                    unsigned low = 0xffffffff;
+                    unsigned high = 0;
+                    log() << "DurParanoid mismatch in " << mmf->filename() << endl;
+                    int logged = 0;
+                    unsigned lastMismatch = 0xffffffff;
+                    for( unsigned i = 0; i < mmf->length(); i++ ) {
+                        if( p[i] != w[i] ) {
+                            if( lastMismatch != 0xffffffff && lastMismatch+1 != i )
+                                log() << endl; // separate blocks of mismatches
+                            lastMismatch= i;
+                            if( ++logged < 60 ) {
+                                stringstream ss;
+                                ss << "mismatch ofs:" << hex << i <<  "\tfilemap:" << setw(2) << (unsigned) w[i] << "\tprivmap:" << setw(2) << (unsigned) p[i];
+                                if( p[i] > 32 && p[i] <= 126 )
+                                    ss << '\t' << p[i];
+                                log() << ss.str() << endl;
+                            }
+                            if( logged == 60 )
+                                log() << "..." << endl;
+                            if( i < low ) low = i;
+                            if( i > high ) high = i;
+                        }
+                    }
+                    if( low != 0xffffffff ) {
+                        std::stringstream ss;
+                        ss << "dur error warning views mismatch " << mmf->filename() << ' ' << (hex) << low << ".." << high << " len:" << high-low+1;
+                        log() << ss.str() << endl;
+                        log() << "priv loc: " << (void*)(p+low) << ' ' << endl;
+                        set<WriteIntent>& b = commitJob.writes();
+                        (void)b; // mark as unused. Useful for inspection in debugger
+
+                        // should we abort() here so this isn't unnoticed in some circumstances?
+                        massert(13599, "Written data does not match in-memory view. Missing WriteIntent?", false);
+                    }
+                }
+            }
+        private:
+            unsigned long long& _bytes;
+        };
+
+        /** (SLOW) diagnostic to check that the private view and the non-private view are in sync.
+        */
+        void debugValidateAllMapsMatch() {
+            if( ! (cmdLine.durOptions & CmdLine::DurParanoid) )
+                return;
+
+            unsigned long long bytes = 0;
+            Timer t;
+            MongoFile::forEach(validateSingleMapMatches(bytes));
+            OCCASIONALLY log() << "DurParanoid map check " << t.millis() << "ms for " <<  (bytes / (1024*1024)) << "MB" << endl;
+        }
+
+        extern size_t privateMapBytes;
+
+        /** We need to remap the private views periodically. otherwise they would become very large.
+            Call within write lock.
+        */
+        void _REMAPPRIVATEVIEW() {
+            static unsigned startAt;
+            static unsigned long long lastRemap;
+
+            dbMutex.assertWriteLocked();
+            dbMutex._remapPrivateViewRequested = false;
+            assert( !commitJob.hasWritten() );
+
+            // we want to remap all private views about every 2 seconds.  there could be ~1000 views so
+            // we do a little each pass; beyond the remap time, more significantly, there will be copy on write
+            // faults after remapping, so doing a little bit at a time will avoid big load spikes on
+            // remapping.
+            unsigned long long now = curTimeMicros64();
+            double fraction = (now-lastRemap)/2000000.0;
+            lastRemap = now;
+
+            rwlock lk(MongoFile::mmmutex, false);
+            set<MongoFile*>& files = MongoFile::getAllFiles();
+            unsigned sz = files.size();
+            if( sz == 0 )
+                return;
+
+            {
+                // be careful not to use too much memory if the write rate is 
+                // extremely high
+                double f = privateMapBytes / ((double)UncommittedBytesLimit);
+                if( f > fraction ) { 
+                    fraction = f;
+                }
+                privateMapBytes = 0;
+            }
+
+            unsigned ntodo = (unsigned) (sz * fraction);
+            if( ntodo < 1 ) ntodo = 1;
+            if( ntodo > sz ) ntodo = sz;
+
+            const set<MongoFile*>::iterator b = files.begin();
+            const set<MongoFile*>::iterator e = files.end();
+            set<MongoFile*>::iterator i = b;
+            // skip to our starting position
+            for( unsigned x = 0; x < startAt; x++ ) {
+                i++;
+                if( i == e ) i = b;
+            }
+            startAt = (startAt + ntodo) % sz; // mark where to start next time
+
+            for( unsigned x = 0; x < ntodo; x++ ) {
+                dassert( i != e );
+                if( (*i)->isMongoMMF() ) {
+                    MongoMMF *mmf = (MongoMMF*) *i;
+                    assert(mmf);
+                    if( mmf->willNeedRemap() ) {
+                        mmf->willNeedRemap() = false;
+                        mmf->remapThePrivateView();
+                    }
+                    i++;
+                    if( i == e ) i = b;
+                }
+            }
+        }
+        void REMAPPRIVATEVIEW() {
+            Timer t;
+            _REMAPPRIVATEVIEW();
+            stats.curr->_remapPrivateViewMicros += t.micros();
+        }
+
+        mutex groupCommitMutex("groupCommit");
+
+        /** locking: in read lock when called. */
+        static void _groupCommit() {
+            stats.curr->_commits++;
+
+            if( !commitJob.hasWritten() ) {
+                // getlasterror request could have came after the data was already committed
+                commitJob.notifyCommitted();
+                return;
+            }
+
+            // we need to make sure two group commits aren't running at the same time
+            // (and we are only read locked in the dbMutex, so it could happen)
+            scoped_lock lk(groupCommitMutex);
+
+            PREPLOGBUFFER();
+
+            // todo : write to the journal outside locks, as this write can be slow.
+            //        however, be careful then about remapprivateview as that cannot be done 
+            //        if new writes are then pending in the private maps.
+            WRITETOJOURNAL(commitJob._ab);
+
+            // data is now in the journal, which is sufficient for acknowledging getLastError.
+            // (ok to crash after that)
+            commitJob.notifyCommitted();
+
+            WRITETODATAFILES();
+
+            commitJob.reset();
+
+            // REMAPPRIVATEVIEW
+            //
+            // remapping private views must occur after WRITETODATAFILES otherwise
+            // we wouldn't see newly written data on reads.
+            //
+            DEV assert( !commitJob.hasWritten() );
+            if( !dbMutex.isWriteLocked() ) {
+                // this needs done in a write lock (as there is a short window during remapping when each view 
+                // might not exist) thus we do it on the next acquisition of that instead of here (there is no 
+                // rush if you aren't writing anyway -- but it must happen, if it is done, before any uncommitted 
+                // writes occur).  If desired, perhpas this can be eliminated on posix as it may be that the remap 
+                // is race-free there.
+                //
+                dbMutex._remapPrivateViewRequested = true;
+            }
+            else {
+                stats.curr->_commitsInWriteLock++;
+                // however, if we are already write locked, we must do it now -- up the call tree someone
+                // may do a write without a new lock acquisition.  this can happen when MongoMMF::close() calls
+                // this method when a file (and its views) is about to go away.
+                //
+                REMAPPRIVATEVIEW();
+            }
+        }
+
+        /** locking in read lock when called
+            @see MongoMMF::close()
+        */
+        static void groupCommit() {
+            // we need to be at least read locked on the dbMutex so that we know the write intent data 
+            // structures are not changing while we work
+            dbMutex.assertAtLeastReadLocked();
+
+            try {
+                _groupCommit();
+            }
+            catch(DBException& e ) { 
+                log() << "dbexception in groupCommit causing immediate shutdown: " << e.toString() << endl;
+                abort();
+            }
+            catch(std::ios_base::failure& e) { 
+                log() << "ios_base exception in groupCommit causing immediate shutdown: " << e.what() << endl;
+                abort();
+            }
+            catch(std::bad_alloc& e) { 
+                log() << "bad_alloc exception in groupCommit causing immediate shutdown: " << e.what() << endl;
+                abort();
+            }
+            catch(std::exception& e) {
+                log() << "exception in dur::groupCommit causing immediate shutdown: " << e.what() << endl;
+                abort(); // based on myTerminate()
+            }
+        }
+
+        static void go() {
+            if( !commitJob.hasWritten() ){
+                commitJob.notifyCommitted();
+                return;
+            }
+
+            {
+                readlocktry lk("", 1000);
+                if( lk.got() ) {
+                    groupCommit();
+                    return;
+                }
+            }
+
+            // starvation on read locks could occur.  so if read lock acquisition is slow, try to get a
+            // write lock instead.  otherwise journaling could be delayed too long (too much data will 
+            // not accumulate though, as commitIfNeeded logic will have executed in the meantime if there 
+            // has been writes)
+            writelock lk;
+            groupCommit();
+        }
+
+        /** called when a MongoMMF is closing -- we need to go ahead and group commit in that case before its
+            views disappear
+        */
+        void closingFileNotification() {
+            if (!cmdLine.dur)
+                return;
+
+            if( dbMutex.atLeastReadLocked() ) {
+                groupCommit();
+            }
+            else {
+                assert( inShutdown() );
+                if( commitJob.hasWritten() ) {
+                    log() << "dur warning files are closing outside locks with writes pending" << endl;
+                }
+            }
+        }
+
+        CodeBlock durThreadMain;
+
+        void durThread() {
+            Client::initThread("dur");
+            const int HowOftenToGroupCommitMs = 90;
+            while( !inShutdown() ) {
+                sleepmillis(10);
+                CodeBlock::Within w(durThreadMain);
+                try {
+                    int millis = HowOftenToGroupCommitMs;
+                    {
+                        stats.rotate();
+                        {
+                            Timer t;
+                            journalRotate(); // note we do this part outside of mongomutex
+                            millis -= t.millis();
+                            assert( millis <= HowOftenToGroupCommitMs );
+                            if( millis < 5 )
+                                millis = 5;
+                        }
+
+                        // we do this in a couple blocks, which makes it a tiny bit faster (only a little) on throughput,
+                        // but is likely also less spiky on our cpu usage, which is good:
+                        sleepmillis(millis/2);
+                        commitJob.wi()._deferred.invoke();
+                        sleepmillis(millis/2);
+                        commitJob.wi()._deferred.invoke();
+                    }
+
+                    go();
+                }
+                catch(std::exception& e) {
+                    log() << "exception in durThread causing immediate shutdown: " << e.what() << endl;
+                    abort(); // based on myTerminate()
+                }
+            }
+            cc().shutdown();
+        }
+
+        void recover();
+
+        void releasingWriteLock() {
+            // implicit commitIfNeeded check on each write unlock
+            DEV commitJob._nSinceCommitIfNeededCall = 0; // implicit commit if needed
+            if( commitJob.bytes() > UncommittedBytesLimit || cmdLine.durOptions & CmdLine::DurAlwaysCommit ) {
+                stats.curr->_earlyCommits++;
+                groupCommit();
+            }
+        }
+
+        void preallocateFiles();
+
+        /** at startup, recover, and then start the journal threads */
+        void startup() {
+            if( !cmdLine.dur )
+                return;
+
+            DurableInterface::enableDurability();
+
+            journalMakeDir();
+            try {
+                recover();
+            }
+            catch(...) {
+                log() << "exception during recovery" << endl;
+                throw;
+            }
+
+            preallocateFiles();
+
+            boost::thread t(durThread);
+        }
+
+        void DurableImpl::syncDataAndTruncateJournal() {
+            dbMutex.assertWriteLocked();
+
+            groupCommit();
+            MongoFile::flushAll(true);
+            journalCleanup();
+
+            assert(!haveJournalFiles()); // Double check post-conditions
+        }
+
+    } // namespace dur
+
+} // namespace mongo
diff --git a/db/dur.h b/db/dur.h
new file mode 100644
index 0000000..a8035e4
--- /dev/null
+++ b/db/dur.h
@@ -0,0 +1,201 @@
+// @file dur.h durability support
+
+#pragma once
+
+#include "diskloc.h"
+#include "mongommf.h"
+
+namespace mongo {
+
+    class NamespaceDetails;
+
+    namespace dur {
+
+        // a smaller limit is likely better on 32 bit
+#if defined(__i386__) || defined(_M_IX86)
+        const unsigned UncommittedBytesLimit = 50 * 1024 * 1024;
+#else
+        const unsigned UncommittedBytesLimit = 100 * 1024 * 1024;
+#endif
+
+        /** Call during startup so durability module can initialize
+            Throws if fatal error
+            Does nothing if cmdLine.dur is false
+         */
+        void startup();
+
+        class DurableInterface : boost::noncopyable {
+        public:
+            virtual ~DurableInterface() { log() << "ERROR warning ~DurableInterface not intended to be called" << endl; }
+
+            /** Declare that a file has been created
+                Normally writes are applied only after journaling, for safety.  But here the file
+                is created first, and the journal will just replay the creation if the create didn't
+                happen because of crashing.
+            */
+            virtual void createdFile(string filename, unsigned long long len) = 0;
+
+            /** Declarations of write intent.
+
+                Use these methods to declare "i'm about to write to x and it should be logged for redo."
+
+                Failure to call writing...() is checked in _DEBUG mode by using a read only mapped view
+                (i.e., you'll segfault if the code is covered in that situation).  The _DEBUG check doesn't
+                verify that your length is correct though.
+            */
+
+            /** declare intent to write to x for up to len
+                @return pointer where to write.  this is modified when testIntent is true.
+            */
+            virtual void* writingPtr(void *x, unsigned len) = 0;
+
+            /** declare write intent; should already be in the write view to work correctly when testIntent is true.
+                if you aren't, use writingPtr() instead.
+            */
+            virtual void declareWriteIntent(void *x, unsigned len) = 0;
+
+            /** declare intent to write
+                @param ofs offset within buf at which we will write
+                @param len the length at ofs we will write
+                @return new buffer pointer.  this is modified when testIntent is true.
+            */
+            virtual void* writingAtOffset(void *buf, unsigned ofs, unsigned len) = 0;
+
+            /** declare intent to write
+                @param ranges vector of pairs representing ranges.  Each pair
+                comprises an offset from buf where a range begins, then the
+                range length.
+                @return new buffer pointer.  this is modified when testIntent is true.
+             */
+            virtual void* writingRangesAtOffsets(void *buf, const vector< pair< long long, unsigned > > &ranges ) = 0;
+
+            /** Wait for acknowledgement of the next group commit.
+                @return true if --dur is on.  There will be delay.
+                @return false if --dur is off.
+            */
+            virtual bool awaitCommit() = 0;
+
+            /** Commit immediately.
+
+                Generally, you do not want to do this often, as highly granular committing may affect
+                performance.
+
+                Does not return until the commit is complete.
+
+                You must be at least read locked when you call this.  Ideally, you are not write locked
+                and then read operations can occur concurrently.
+
+                @return true if --dur is on.
+                @return false if --dur is off. (in which case there is action)
+            */
+            virtual bool commitNow() = 0;
+
+            /** Commit if enough bytes have been modified. Current threshold is 50MB
+
+                The idea is that long running write operations that dont yield
+                (like creating an index or update with $atomic) can call this
+                whenever the db is in a sane state and it will prevent commits
+                from growing too large.
+                @return true if commited
+            */
+            virtual bool commitIfNeeded() = 0;
+
+            /** Declare write intent for a DiskLoc.  @see DiskLoc::writing() */
+            inline DiskLoc& writingDiskLoc(DiskLoc& d) { return *((DiskLoc*) writingPtr(&d, sizeof(d))); }
+
+            /** Declare write intent for an int */
+            inline int& writingInt(const int& d) { return *((int*) writingPtr((int*) &d, sizeof(d))); }
+
+            /** "assume i've already indicated write intent, let me write"
+                redeclaration is fine too, but this is faster.
+            */
+            template <typename T>
+            inline
+            T* alreadyDeclared(T *x) {
+#if defined(_TESTINTENT)
+                return (T*) MongoMMF::switchToPrivateView(x);
+#else
+                return x;
+#endif
+            }
+
+            /** declare intent to write to x for sizeof(*x) */
+            template <typename T>
+            inline
+            T* writing(T *x) {
+                return (T*) writingPtr(x, sizeof(T));
+            }
+
+            /** write something that doesn't have to be journaled, as this write is "unimportant".
+                a good example is paddingFactor.
+                can be thought of as memcpy(dst,src,len)
+                the dur implementation acquires a mutex in this method, so do not assume it is faster
+                without measuring!
+            */
+            virtual void setNoJournal(void *dst, void *src, unsigned len) = 0;
+
+            /** Commits pending changes, flushes all changes to main data
+                files, then removes the journal.
+                
+                This is useful as a "barrier" to ensure that writes before this
+                call will never go through recovery and be applied to files
+                that have had changes made after this call applied.
+             */
+            virtual void syncDataAndTruncateJournal() = 0;
+
+            static DurableInterface& getDur() { return *_impl; }
+
+        private:
+            /** Intentionally unimplemented method.
+             It's very easy to manipulate Record::data open ended.  Thus a call to writing(Record*) is suspect.
+             This will override the templated version and yield an unresolved external.
+             */
+            Record* writing(Record* r);
+            /** Intentionally unimplemented method. BtreeBuckets are allocated in buffers larger than sizeof( BtreeBucket ). */
+            BtreeBucket* writing( BtreeBucket* );
+            /** Intentionally unimplemented method. NamespaceDetails may be based on references to 'Extra' objects. */
+            NamespaceDetails* writing( NamespaceDetails* );
+
+            static DurableInterface* _impl; // NonDurableImpl at startup()
+            static void enableDurability(); // makes _impl a DurableImpl
+            static void disableDurability(); // makes _impl a NonDurableImpl
+
+            // these need to be able to enable/disable Durability
+            friend void startup();
+            friend class TempDisableDurability;
+        }; // class DurableInterface
+
+        class NonDurableImpl : public DurableInterface {
+            void* writingPtr(void *x, unsigned len) { return x; }
+            void* writingAtOffset(void *buf, unsigned ofs, unsigned len) { return buf; }
+            void* writingRangesAtOffsets(void *buf, const vector< pair< long long, unsigned > > &ranges) { return buf; }
+            void declareWriteIntent(void *, unsigned) { }
+            void createdFile(string filename, unsigned long long len) { }
+            bool awaitCommit() { return false; }
+            bool commitNow() { return false; }
+            bool commitIfNeeded() { return false; }
+            void setNoJournal(void *dst, void *src, unsigned len);
+            void syncDataAndTruncateJournal() {}
+        };
+
+        class DurableImpl : public DurableInterface {
+            void* writingPtr(void *x, unsigned len);
+            void* writingAtOffset(void *buf, unsigned ofs, unsigned len);
+            void* writingRangesAtOffsets(void *buf, const vector< pair< long long, unsigned > > &ranges);
+            void declareWriteIntent(void *, unsigned);
+            void createdFile(string filename, unsigned long long len);
+            bool awaitCommit();
+            bool commitNow();
+            bool commitIfNeeded();
+            void setNoJournal(void *dst, void *src, unsigned len);
+            void syncDataAndTruncateJournal();
+        };
+
+    } // namespace dur
+
+    inline dur::DurableInterface& getDur() { return dur::DurableInterface::getDur(); }
+
+    /** declare that we are modifying a diskloc and this is a datafile write. */
+    inline DiskLoc& DiskLoc::writing() const { return getDur().writingDiskLoc(*const_cast< DiskLoc * >( this )); }
+
+}
diff --git a/db/dur_commitjob.cpp b/db/dur_commitjob.cpp
new file mode 100644
index 0000000..aed38e8
--- /dev/null
+++ b/db/dur_commitjob.cpp
@@ -0,0 +1,210 @@
+/* @file dur_commitjob.cpp */
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "dur_commitjob.h"
+#include "taskqueue.h"
+
+namespace mongo {
+
+    namespace dur {
+
+        BOOST_STATIC_ASSERT( UncommittedBytesLimit > BSONObjMaxInternalSize * 3 );
+        BOOST_STATIC_ASSERT( sizeof(void*)==4 || UncommittedBytesLimit > BSONObjMaxInternalSize * 6 );
+
+        void Writes::D::go(const Writes::D& d) {
+            commitJob.wi()._insertWriteIntent(d.p, d.len);
+        }
+
+        void WriteIntent::absorb(const WriteIntent& other) {
+            dassert(overlaps(other));
+
+            void* newStart = min(start(), other.start());
+            p = max(p, other.p);
+            len = (char*)p - (char*)newStart;
+
+            dassert(contains(other));
+        }
+
+        void Writes::clear() {
+            dbMutex.assertAtLeastReadLocked();
+
+            _alreadyNoted.clear();
+            _writes.clear();
+            _ops.clear();
+            _drained = false;
+#if defined(DEBUG_WRITE_INTENT)
+            cout << "_debug clear\n";
+            _debug.clear();
+#endif
+        }
+
+#if defined(DEBUG_WRITE_INTENT)
+        void assertAlreadyDeclared(void *p, int len) {
+            if( commitJob.wi()._debug[p] >= len )
+                return;
+            log() << "assertAlreadyDeclared fails " << (void*)p << " len:" << len << ' ' << commitJob.wi()._debug[p] << endl;
+            printStackTrace();
+            abort();
+        }
+#endif
+
+        void Writes::_insertWriteIntent(void* p, int len) {
+            WriteIntent wi(p, len);
+
+            if (_writes.empty()) {
+                _writes.insert(wi);
+                return;
+            }
+
+            typedef set<WriteIntent>::const_iterator iterator; // shorter
+
+            iterator closest = _writes.lower_bound(wi);
+            // closest.end() >= wi.end()
+
+            if ((closest != _writes.end() && closest->overlaps(wi)) || // high end
+                    (closest != _writes.begin() && (--closest)->overlaps(wi))) { // low end
+                if (closest->contains(wi))
+                    return; // nothing to do
+
+                // find overlapping range and merge into wi
+                iterator   end(closest);
+                iterator begin(closest);
+                while (  end->overlaps(wi)) { wi.absorb(*end); ++end; if (end == _writes.end()) break; }  // look forwards
+                while (begin->overlaps(wi)) { wi.absorb(*begin); if (begin == _writes.begin()) break; --begin; } // look backwards
+                if (!begin->overlaps(wi)) ++begin; // make inclusive
+
+                DEV { // ensure we're not deleting anything we shouldn't
+                    for (iterator it(begin); it != end; ++it) {
+                        assert(wi.contains(*it));
+                    }
+                }
+
+                _writes.erase(begin, end);
+                _writes.insert(wi);
+
+                DEV { // ensure there are no overlaps
+                    // this can be very slow - n^2 - so make it RARELY
+                    RARELY {
+                        for (iterator it(_writes.begin()), end(boost::prior(_writes.end())); it != end; ++it) {
+                            assert(!it->overlaps(*boost::next(it)));
+                        }
+                    }
+                }
+            }
+            else { // no entries overlapping wi
+                _writes.insert(closest, wi);
+            }
+        }
+
+
+        /** note an operation other than a "basic write" */
+        void CommitJob::noteOp(shared_ptr<DurOp> p) {
+            DEV dbMutex.assertWriteLocked();
+            dassert( cmdLine.dur );
+            if( !_hasWritten ) {
+                assert( !dbMutex._remapPrivateViewRequested );
+                _hasWritten = true;
+            }
+            _wi._ops.push_back(p);
+        }
+
+        size_t privateMapBytes = 0; // used by _REMAPPRIVATEVIEW to track how much / how fast to remap
+
+        void CommitJob::reset() {
+            _hasWritten = false;
+            _wi.clear();
+            _ab.reset();
+            privateMapBytes += _bytes;
+            _bytes = 0;
+            _nSinceCommitIfNeededCall = 0;
+        }
+
+        CommitJob::CommitJob() : _ab(4 * 1024 * 1024) , _hasWritten(false), 
+            _bytes(0), _nSinceCommitIfNeededCall(0) { }
+
+        void CommitJob::note(void* p, int len) {
+            // from the point of view of the dur module, it would be fine (i think) to only
+            // be read locked here.  but must be at least read locked to avoid race with
+            // remapprivateview
+            DEV dbMutex.assertWriteLocked();
+            dassert( cmdLine.dur );
+            if( !_wi._alreadyNoted.checkAndSet(p, len) ) {
+                MemoryMappedFile::makeWritable(p, len);
+
+                if( !_hasWritten ) {
+                    // you can't be writing if one of these is pending, so this is a verification.
+                    assert( !dbMutex._remapPrivateViewRequested );
+
+                    // we don't bother doing a group commit when nothing is written, so we have a var to track that
+                    _hasWritten = true;
+                }
+
+                /** tips for debugging:
+                        if you have an incorrect diff between data files in different folders
+                        (see jstests/dur/quick.js for example),
+                        turn this on and see what is logged.  if you have a copy of its output from before the
+                        regression, a simple diff of these lines would tell you a lot likely.
+                */
+#if 0 && defined(_DEBUG)
+                {
+                    static int n;
+                    if( ++n < 10000 ) {
+                        size_t ofs;
+                        MongoMMF *mmf = privateViews._find(w.p, ofs);
+                        if( mmf ) {
+                            log() << "DEBUG note write intent " << w.p << ' ' << mmf->filename() << " ofs:" << hex << ofs << " len:" << w.len << endl;
+                        }
+                        else {
+                            log() << "DEBUG note write intent " << w.p << ' ' << w.len << " NOT FOUND IN privateViews" << endl;
+                        }
+                    }
+                    else if( n == 10000 ) {
+                        log() << "DEBUG stopping write intent logging, too much to log" << endl;
+                    }
+                }
+#endif
+
+                // remember intent. we will journal it in a bit
+                _wi.insertWriteIntent(p, len);
+                wassert( _wi._writes.size() <  2000000 );
+                assert(  _wi._writes.size() < 20000000 );
+
+                {
+                    // a bit over conservative in counting pagebytes used
+                    static size_t lastPos; // note this doesn't reset with each commit, but that is ok we aren't being that precise
+                    size_t x = ((size_t) p) & ~0xfff; // round off to page address (4KB)
+                    if( x != lastPos ) { 
+                        lastPos = x;
+                        unsigned b = (len+4095) & ~0xfff;
+                        _bytes += b;
+#if defined(_DEBUG)
+                        _nSinceCommitIfNeededCall++;
+                        if( _nSinceCommitIfNeededCall >= 80 ) {
+                            if( _nSinceCommitIfNeededCall % 40 == 0 )
+                                log() << "debug nsincecommitifneeded:" << _nSinceCommitIfNeededCall << " bytes:" << _bytes << endl;
+                        }
+#endif
+                        uassert(13623, "DR102 too much data written uncommitted", _bytes < UncommittedBytesLimit * 3);
+                    }
+                }
+            }
+        }
+
+    }
+}
diff --git a/db/dur_commitjob.h b/db/dur_commitjob.h
new file mode 100644
index 0000000..104d054
--- /dev/null
+++ b/db/dur_commitjob.h
@@ -0,0 +1,221 @@
+/* @file dur_commitjob.h used by dur.cpp
+*/
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../util/alignedbuilder.h"
+#include "../util/mongoutils/hash.h"
+#include "../util/concurrency/synchronization.h"
+#include "cmdline.h"
+#include "durop.h"
+#include "dur.h"
+#include "taskqueue.h"
+
+//#define DEBUG_WRITE_INTENT 1
+
+namespace mongo {
+    namespace dur {
+
+        /** declaration of an intent to write to a region of a memory mapped view
+         *
+         * We store the end rather than the start pointer to make operator< faster
+         * since that is heavily used in set lookup.
+         */
+        struct WriteIntent { /* copyable */
+            WriteIntent() : w_ptr(0), p(0) { }
+            WriteIntent(void *a, unsigned b) : w_ptr(0), p((char*)a+b), len(b) { }
+
+            void* start() const { return (char*)p - len; }
+            void* end() const { return p; }
+            unsigned length() const { return len; }
+
+            bool operator < (const WriteIntent& rhs) const { return end() < rhs.end(); }
+
+            // can they be merged?
+            bool overlaps(const WriteIntent& rhs) const {
+                return (start() <= rhs.end() && end() >= rhs.start());
+            }
+
+            // is merging necessary?
+            bool contains(const WriteIntent& rhs) const {
+                return (start() <= rhs.start() && end() >= rhs.end());
+            }
+
+            // merge into me
+            void absorb(const WriteIntent& other);
+
+            friend ostream& operator << (ostream& out, const WriteIntent& wi) {
+                return (out << "p: " << wi.p << " end: " << wi.end() << " len: " << wi.len);
+            }
+
+            mutable void *w_ptr;  // writable mapping of p.
+            // mutable because set::iterator is const but this isn't used in op<
+#if defined(_EXPERIMENTAL)
+            mutable unsigned ofsInJournalBuffer;
+#endif
+        private:
+            void *p;      // intent to write up to p
+            unsigned len; // up to this len
+        };
+
+        /** try to remember things we have already marked for journaling.  false negatives are ok if infrequent -
+            we will just log them twice.
+        */
+        template<int Prime>
+        class Already : boost::noncopyable {
+        public:
+            Already() { clear(); }
+            void clear() { memset(this, 0, sizeof(*this)); }
+
+            /* see if we have Already recorded/indicated our write intent for this region of memory.
+               automatically upgrades the length if the length was shorter previously.
+               @return true if already indicated.
+            */
+            bool checkAndSet(void* p, int len) {
+                unsigned x = mongoutils::hashPointer(p);
+                pair<void*, int> nd = nodes[x % N];
+                if( nd.first == p ) {
+                    if( nd.second < len ) {
+                        nd.second = len;
+                        return false; // haven't indicated this len yet
+                    }
+                    return true; // already indicated
+                }
+                nd.first = p;
+                nd.second = len;
+                return false; // a new set
+            }
+
+        private:
+            enum { N = Prime }; // this should be small the idea is that it fits in the cpu cache easily
+            pair<void*,int> nodes[N];
+        };
+
+        /** our record of pending/uncommitted write intents */
+        class Writes : boost::noncopyable {
+            struct D {
+                void *p;
+                unsigned len;
+                static void go(const D& d);
+            };
+        public:
+            TaskQueue<D> _deferred;
+            Already<127> _alreadyNoted;
+            set<WriteIntent> _writes;
+            vector< shared_ptr<DurOp> > _ops; // all the ops other than basic writes
+            bool _drained; // _deferred is drained?  for asserting/testing
+
+            /** reset the Writes structure (empties all the above) */
+            void clear();
+
+            /** merges into set (ie non-deferred version) */
+            void _insertWriteIntent(void* p, int len);
+
+            void insertWriteIntent(void* p, int len) {
+#if defined(DEBUG_WRITE_INTENT)
+                if( _debug[p] < len )
+                    _debug[p] = len;
+#endif
+                D d;
+                d.p = p;
+                d.len = len;
+                _deferred.defer(d);
+            }
+
+#ifdef _DEBUG
+            WriteIntent _last;
+#endif
+#if defined(DEBUG_WRITE_INTENT)
+            map<void*,int> _debug;
+#endif
+        };
+
+#if defined(DEBUG_WRITE_INTENT)
+        void assertAlreadyDeclared(void *, int len);
+#else
+        inline void assertAlreadyDeclared(void *, int len) { }
+#endif
+
+        /** A commit job object for a group commit.  Currently there is one instance of this object.
+
+            concurrency: assumption is caller is appropriately locking.
+                         for example note() invocations are from the write lock.
+                         other uses are in a read lock from a single thread (durThread)
+        */
+        class CommitJob : boost::noncopyable {
+        public:
+            AlignedBuilder _ab; // for direct i/o writes to journal
+
+            CommitJob();
+
+            /** record/note an intent to write */
+            void note(void* p, int len);
+
+            /** note an operation other than a "basic write" */
+            void noteOp(shared_ptr<DurOp> p);
+
+            set<WriteIntent>& writes() {
+                if( !_wi._drained ) {
+                    // generally, you don't want to use the set until it is prepared (after deferred ops are applied)
+                    // thus this assert here.
+                    assert(false);
+                }
+                return _wi._writes;
+            }
+
+            vector< shared_ptr<DurOp> >& ops() { return _wi._ops; }
+
+            /** this method is safe to call outside of locks. when haswritten is false we don't do any group commit and avoid even
+                trying to acquire a lock, which might be helpful at times.
+            */
+            bool hasWritten() const { return _hasWritten; }
+
+            /** we use the commitjob object over and over, calling reset() rather than reconstructing */
+            void reset();
+
+            /** the commit code calls this when data reaches the journal (on disk) */
+            void notifyCommitted() { _notify.notifyAll(); }
+
+            /** Wait until the next group commit occurs. That is, wait until someone calls notifyCommitted. */
+            void awaitNextCommit() {
+                if( hasWritten() )
+                    _notify.wait();
+            }
+
+            /** we check how much written and if it is getting to be a lot, we commit sooner. */
+            size_t bytes() const { return _bytes; }
+
+#if defined(_DEBUG)
+            const WriteIntent& lastWrite() const { return _wi._last; }
+#endif
+
+            Writes& wi() { return _wi; }
+        private:
+            bool _hasWritten;
+            Writes _wi; // todo: fix name
+            size_t _bytes;
+            NotifyAll _notify; // for getlasterror fsync:true acknowledgements
+        public:
+            unsigned _nSinceCommitIfNeededCall;
+        };
+
+        extern CommitJob commitJob;
+
+    }
+}
diff --git a/db/dur_journal.cpp b/db/dur_journal.cpp
new file mode 100644
index 0000000..946f94c
--- /dev/null
+++ b/db/dur_journal.cpp
@@ -0,0 +1,576 @@
+// @file dur_journal.cpp writing to the writeahead logging journal
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "client.h"
+#include "namespace.h"
+#include "dur_journal.h"
+#include "dur_journalformat.h"
+#include "dur_stats.h"
+#include "../util/logfile.h"
+#include "../util/timer.h"
+#include "../util/alignedbuilder.h"
+#include "../util/message.h" // getelapsedtimemillis
+#include "../util/concurrency/race.h"
+#include <boost/static_assert.hpp>
+#undef assert
+#define assert MONGO_assert
+#include "../util/mongoutils/str.h"
+#include "dur_journalimpl.h"
+#include "../util/file.h"
+
+using namespace mongoutils;
+
+namespace mongo {
+
+    class AlignedBuilder;
+
+    namespace dur {
+        BOOST_STATIC_ASSERT( sizeof(JHeader) == 8192 );
+        BOOST_STATIC_ASSERT( sizeof(JSectHeader) == 20 );
+        BOOST_STATIC_ASSERT( sizeof(JSectFooter) == 32 );
+        BOOST_STATIC_ASSERT( sizeof(JEntry) == 12 );
+        BOOST_STATIC_ASSERT( sizeof(LSNFile) == 88 );
+
+        bool usingPreallocate = false;
+
+        void removeOldJournalFile(path p);
+
+        filesystem::path getJournalDir() {
+            filesystem::path p(dbpath);
+            p /= "journal";
+            return p;
+        }
+
+        path lsnPath() {
+            return getJournalDir()/"lsn";
+        }
+
+        extern CodeBlock durThreadMain;
+
+        /** this should be called when something really bad happens so that we can flag appropriately
+        */
+        void journalingFailure(const char *msg) {
+            /** todo:
+                (1) don't log too much
+                (2) make an indicator in the journal dir that something bad happened.
+                (2b) refuse to do a recovery startup if that is there without manual override.
+            */
+            log() << "journaling error " << msg << endl;
+            assert(false);
+        }
+
+        JHeader::JHeader(string fname) {
+            magic[0] = 'j'; magic[1] = '\n';
+            _version = CurrentVersion;
+            memset(ts, 0, sizeof(ts));
+            time_t t = time(0);
+            strncpy(ts, time_t_to_String_short(t).c_str(), sizeof(ts)-1);
+            memset(dbpath, 0, sizeof(dbpath));
+            strncpy(dbpath, fname.c_str(), sizeof(dbpath)-1);
+            {
+                fileId = t&0xffffffff;
+                fileId |= ((unsigned long long)getRandomNumber()) << 32;
+            }
+            memset(reserved3, 0, sizeof(reserved3));
+            txt2[0] = txt2[1] = '\n';
+            n1 = n2 = n3 = n4 = '\n';
+        }
+
+        // class Journal
+
+        Journal j;
+
+        const unsigned long long LsnShutdownSentinel = ~((unsigned long long)0);
+
+        Journal::Journal() :
+            _curLogFileMutex("JournalLfMutex") {
+            _written = 0;
+            _nextFileNumber = 0;
+            _curLogFile = 0;
+            _curFileId = 0;
+            _preFlushTime = 0;
+            _lastFlushTime = 0;
+            _writeToLSNNeeded = false;
+        }
+
+        path Journal::getFilePathFor(int filenumber) const {
+            filesystem::path p(dir);
+            p /= string(str::stream() << "j._" << filenumber);
+            return p;
+        }
+
+        /** never throws
+            @return true if journal dir is not empty
+        */
+        bool haveJournalFiles() {
+            try {
+                for ( boost::filesystem::directory_iterator i( getJournalDir() );
+                        i != boost::filesystem::directory_iterator();
+                        ++i ) {
+                    string fileName = boost::filesystem::path(*i).leaf();
+                    if( str::startsWith(fileName, "j._") )
+                        return true;
+                }
+            }
+            catch(...) { }
+            return false;
+        }
+
+        /** throws */
+        void removeJournalFiles() {
+            log() << "removeJournalFiles" << endl;
+            try {
+                for ( boost::filesystem::directory_iterator i( getJournalDir() );
+                        i != boost::filesystem::directory_iterator();
+                        ++i ) {
+                    string fileName = boost::filesystem::path(*i).leaf();
+                    if( str::startsWith(fileName, "j._") ) {
+                        try {
+                            removeOldJournalFile(*i);
+                        }
+                        catch(std::exception& e) {
+                            log() << "couldn't remove " << fileName << ' ' << e.what() << endl;
+                            throw;
+                        }
+                    }
+                }
+                try {
+                    boost::filesystem::remove(lsnPath());
+                }
+                catch(...) {
+                    log() << "couldn't remove " << lsnPath().string() << endl;
+                    throw;
+                }
+            }
+            catch( std::exception& e ) {
+                log() << "error removing journal files " << e.what() << endl;
+                throw;
+            }
+            assert(!haveJournalFiles());
+            log(1) << "removeJournalFiles end" << endl;
+        }
+
+        /** at clean shutdown */
+        bool okToCleanUp = false; // successful recovery would set this to true
+        void Journal::cleanup() {
+            if( !okToCleanUp )
+                return;
+
+            try {
+                scoped_lock lk(_curLogFileMutex);
+                closeCurrentJournalFile();
+                removeJournalFiles();
+            }
+            catch(std::exception& e) {
+                log() << "error couldn't remove journal file during shutdown " << e.what() << endl;
+                throw;
+            }
+        }
+        void journalCleanup() { j.cleanup(); }
+
+        bool _preallocateIsFaster() {
+            bool faster = false;
+            filesystem::path p = getJournalDir() / "tempLatencyTest";
+            try { remove(p); } catch(...) { }
+            try {
+                AlignedBuilder b(8192);
+                int millis[2];
+                const int N = 50;
+                for( int pass = 0; pass < 2; pass++ ) {
+                    LogFile f(p.string());
+                    Timer t;
+                    for( int i = 0 ; i < N; i++ ) { 
+                        f.synchronousAppend(b.buf(), 8192);
+                    }
+                    millis[pass] = t.millis();
+                    // second time through, file exists and is prealloc case
+                }
+                int diff = millis[0] - millis[1];
+                if( diff > 2 * N ) {
+                    // at least 2ms faster for prealloc case?
+                    faster = true;
+                    log() << "preallocateIsFaster=true " << diff / (1.0*N) << endl;
+                }
+            }
+            catch(...) {
+                log() << "info preallocateIsFaster couldn't run; returning false" << endl;
+            }
+            try { remove(p); } catch(...) { }
+            return faster;
+        }
+        bool preallocateIsFaster() {
+            return _preallocateIsFaster() && _preallocateIsFaster() && _preallocateIsFaster(); 
+        }
+
+        // throws
+        void preallocateFile(filesystem::path p, unsigned long long len) {
+            if( exists(p) ) 
+                return;
+
+            const unsigned BLKSZ = 1024 * 1024;
+            log() << "preallocating a journal file " << p.string() << endl;
+            LogFile f(p.string());
+            AlignedBuilder b(BLKSZ);
+            for( unsigned long long x = 0; x < len; x += BLKSZ ) { 
+                f.synchronousAppend(b.buf(), BLKSZ);
+            }
+        }
+
+        // throws
+        void _preallocateFiles() {
+            for( int i = 0; i <= 2; i++ ) {
+                string fn = str::stream() << "prealloc." << i;
+                filesystem::path filepath = getJournalDir() / fn;
+
+                unsigned long long limit = Journal::DataLimit;
+                if( debug && i == 1 ) { 
+                    // moving 32->64, the prealloc files would be short.  that is "ok", but we want to exercise that 
+                    // case, so we force exercising here when _DEBUG is set by arbitrarily stopping prealloc at a low 
+                    // limit for a file.  also we want to be able to change in the future the constant without a lot of
+                    // work anyway.
+                    limit = 16 * 1024 * 1024;
+                }
+                preallocateFile(filepath, limit);
+            }
+        }
+
+        void preallocateFiles() {
+            if( preallocateIsFaster() ||
+                exists(getJournalDir()/"prealloc.0") || // if enabled previously, keep using
+                exists(getJournalDir()/"prealloc.1") ) {
+                    usingPreallocate = true;
+                    try {
+                        _preallocateFiles();
+                    }
+                    catch(...) { 
+                        log() << "warning caught exception in preallocateFiles, continuing" << endl;
+                    }
+            }
+            j.open();
+        }
+
+        void removeOldJournalFile(path p) { 
+            if( usingPreallocate ) {
+                try {
+                    for( int i = 0; i <= 2; i++ ) {
+                        string fn = str::stream() << "prealloc." << i;
+                        filesystem::path filepath = getJournalDir() / fn;
+                        if( !filesystem::exists(filepath) ) {
+                            // we can recycle this file into this prealloc file location
+                            boost::filesystem::rename(p, filepath);
+                            return;
+                        }
+                    }
+                } catch(...) { 
+                    log() << "warning exception in dur::removeOldJournalFile " << p.string() << endl;
+                    // fall through and try to delete the file
+                }
+            }
+
+            // already have 3 prealloc files, so delete this file
+            try {
+                boost::filesystem::remove(p);
+            }
+            catch(...) { 
+                log() << "warning exception removing " << p.string() << endl;
+            }
+        }
+
+        // find a prealloc.<n> file, presumably to take and use
+        path findPrealloced() { 
+            try {
+                for( int i = 0; i <= 2; i++ ) {
+                    string fn = str::stream() << "prealloc." << i;
+                    filesystem::path filepath = getJournalDir() / fn;
+                    if( filesystem::exists(filepath) )
+                        return filepath;
+                }
+            } catch(...) { 
+                log() << "warning exception in dur::findPrealloced()" << endl;
+            }
+            return path();
+        }
+
+        /** assure journal/ dir exists. throws. call during startup. */
+        void journalMakeDir() {
+            j.init();
+
+            filesystem::path p = getJournalDir();
+            j.dir = p.string();
+            log() << "journal dir=" << j.dir << endl;
+            if( !exists(j.dir) ) {
+                try {
+                    create_directory(j.dir);
+                }
+                catch(std::exception& e) {
+                    log() << "error creating directory " << j.dir << ' ' << e.what() << endl;
+                    throw;
+                }
+            }
+        }
+
+        void Journal::_open() {
+            _curFileId = 0;
+            assert( _curLogFile == 0 );
+            path fname = getFilePathFor(_nextFileNumber);
+
+            // if we have a prealloced file, use it 
+            {
+                path p = findPrealloced();
+                if( !p.empty() ) { 
+                    try { 
+                        {
+                            // JHeader::fileId must be updated before renaming to be race-safe
+                            LogFile f(p.string());
+                            JHeader h(p.string());
+                            AlignedBuilder b(8192);
+                            b.appendStruct(h);
+                            f.synchronousAppend(b.buf(), b.len());
+                        }
+                        boost::filesystem::rename(p, fname);
+                    }
+                    catch(...) { 
+                        log() << "warning couldn't write to / rename file " << p.string() << endl;
+                    }
+                }
+            }
+
+            _curLogFile = new LogFile(fname.string());
+            _nextFileNumber++;
+            {
+                JHeader h(fname.string());
+                _curFileId = h.fileId;
+                assert(_curFileId);
+                AlignedBuilder b(8192);
+                b.appendStruct(h);
+                _curLogFile->synchronousAppend(b.buf(), b.len());
+            }
+        }
+
+        void Journal::init() {
+            assert( _curLogFile == 0 );
+            MongoFile::notifyPreFlush = preFlush;
+            MongoFile::notifyPostFlush = postFlush;
+        }
+
+        void Journal::open() {
+            assert( MongoFile::notifyPreFlush == preFlush );
+            mutex::scoped_lock lk(_curLogFileMutex);
+            _open();
+        }
+
+        void LSNFile::set(unsigned long long x) {
+            memset(this, 0, sizeof(*this));
+            lsn = x;
+            checkbytes = ~x;
+        }
+
+        /** logs details of the situation, and returns 0, if anything surprising in the LSNFile
+            if something highly surprising, throws to abort
+        */
+        unsigned long long LSNFile::get() {
+            uassert(13614, "unexpected version number of lsn file in journal/ directory", ver == 0);
+            if( ~lsn != checkbytes ) {
+                log() << "lsnfile not valid. recovery will be from log start. lsn: " << hex << lsn << " checkbytes: " << hex << checkbytes << endl;
+                return 0;
+            }
+            return lsn;
+        }
+
+        /** called during recovery (the error message text below assumes that)
+        */
+        unsigned long long journalReadLSN() {
+            if( !debug ) {
+                // in nondebug build, for now, be conservative until more tests written, and apply the whole journal.
+                // however we will still write the lsn file to exercise that code, and use in _DEBUG build.
+                return 0;
+            }
+
+            if( !MemoryMappedFile::exists(lsnPath()) ) {
+                log() << "info no lsn file in journal/ directory" << endl;
+                return 0;
+            }
+
+            try {
+                // os can flush as it likes.  if it flushes slowly, we will just do extra work on recovery.
+                // however, given we actually close the file when writing, that seems unlikely.
+                LSNFile L;
+                File f;
+                f.open(lsnPath().string().c_str());
+                assert(f.is_open());
+                f.read(0,(char*)&L, sizeof(L));
+                unsigned long long lsn = L.get();
+                return lsn;
+            }
+            catch(std::exception& e) {
+                uasserted(13611, str::stream() << "can't read lsn file in journal directory : " << e.what());
+            }
+            return 0;
+        }
+
+        unsigned long long getLastDataFileFlushTime() {
+            return j.lastFlushTime();
+        }
+
+        /** remember "last sequence number" to speed recoveries
+            concurrency: called by durThread only.
+        */
+        void Journal::updateLSNFile() {
+            if( !_writeToLSNNeeded )
+                return;
+            durThreadMain.assertWithin();
+            _writeToLSNNeeded = false;
+            try {
+                // os can flush as it likes.  if it flushes slowly, we will just do extra work on recovery.
+                // however, given we actually close the file, that seems unlikely.
+                File f;
+                f.open(lsnPath().string().c_str());
+                if( !f.is_open() ) { 
+                    // can get 0 if an i/o error
+                    log() << "warning: open of lsn file failed" << endl;
+                    return;
+                }
+                log() << "lsn set " << _lastFlushTime << endl;
+                LSNFile lsnf;
+                lsnf.set(_lastFlushTime);
+                f.write(0, (char*)&lsnf, sizeof(lsnf));
+            }
+            catch(std::exception& e) {
+                log() << "warning: write to lsn file failed " << e.what() << endl;
+                // keep running (ignore the error). recovery will be slow.
+            }
+        }
+
+        void Journal::preFlush() {
+            j._preFlushTime = Listener::getElapsedTimeMillis();
+        }
+
+        void Journal::postFlush() {
+            j._lastFlushTime = j._preFlushTime;
+            j._writeToLSNNeeded = true;
+        }
+
+        // call from within _curLogFileMutex
+        void Journal::closeCurrentJournalFile() {
+            if (!_curLogFile)
+                return;
+
+            JFile jf;
+            jf.filename = _curLogFile->_name;
+            jf.lastEventTimeMs = Listener::getElapsedTimeMillis();
+            _oldJournalFiles.push_back(jf);
+
+            delete _curLogFile; // close
+            _curLogFile = 0;
+            _written = 0;
+        }
+
+        /** remove older journal files.
+            be in _curLogFileMutex but not dbMutex when calling
+        */
+        void Journal::removeUnneededJournalFiles() {
+            while( !_oldJournalFiles.empty() ) {
+                JFile f = _oldJournalFiles.front();
+
+                if( f.lastEventTimeMs < _lastFlushTime + ExtraKeepTimeMs ) {
+                    // eligible for deletion
+                    path p( f.filename );
+                    log() << "old journal file will be removed: " << f.filename << endl;
+                    removeOldJournalFile(p);
+                }
+                else {
+                    break;
+                }
+
+                _oldJournalFiles.pop_front();
+            }
+        }
+
+        /** check if time to rotate files.  assure a file is open.
+            done separately from the journal() call as we can do this part
+            outside of lock.
+            thread: durThread()
+         */
+        void journalRotate() {
+            j.rotate();
+        }
+        void Journal::rotate() {
+            assert( !dbMutex.atLeastReadLocked() );
+            durThreadMain.assertWithin();
+
+            scoped_lock lk(_curLogFileMutex);
+
+            if ( inShutdown() || !_curLogFile )
+                return;
+
+            j.updateLSNFile();
+
+            if( _curLogFile && _written < DataLimit )
+                return;
+
+            if( _curLogFile ) {
+
+                closeCurrentJournalFile();
+
+                removeUnneededJournalFiles();
+            }
+
+            try {
+                Timer t;
+                _open();
+                int ms = t.millis();
+                if( ms >= 200 ) {
+                    log() << "DR101 latency warning on journal file open " << ms << "ms" << endl;
+                }
+            }
+            catch(std::exception& e) {
+                log() << "warning exception opening journal file " << e.what() << endl;
+                throw;
+            }
+        }
+
+        /** write to journal
+        */
+        void journal(const AlignedBuilder& b) {
+            j.journal(b);
+        }
+        void Journal::journal(const AlignedBuilder& b) {
+            try {
+                mutex::scoped_lock lk(_curLogFileMutex);
+
+                // must already be open -- so that _curFileId is correct for previous buffer building
+                assert( _curLogFile );
+
+                stats.curr->_journaledBytes += b.len();
+                _written += b.len();
+                _curLogFile->synchronousAppend((void *) b.buf(), b.len());
+            }
+            catch(std::exception& e) {
+                log() << "warning exception in dur::journal " << e.what() << endl;
+                throw;
+            }
+        }
+
+    }
+}
+
+/* todo
+   test (and handle) disk full on journal append.  best quick thing to do is to terminate.
+   if we roll back operations, there are nuances such as is ReplSetImpl::lastOpTimeWritten too new in ram then?
+*/
diff --git a/db/dur_journal.h b/db/dur_journal.h
new file mode 100644
index 0000000..81957b5
--- /dev/null
+++ b/db/dur_journal.h
@@ -0,0 +1,68 @@
+// @file dur_journal.h
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+namespace mongo {
+    class AlignedBuilder;
+
+    namespace dur {
+
+        /** true if ok to cleanup journal files at termination. otherwise, files journal will be retained.
+        */
+        extern bool okToCleanUp;
+
+        /** at termination after db files closed & fsynced */
+        void journalCleanup();
+
+        /** assure journal/ dir exists. throws */
+        void journalMakeDir();
+
+        /** check if time to rotate files; assure a file is open.
+             done separately from the journal() call as we can do this part
+             outside of lock.
+            only called by durThread.
+         */
+        void journalRotate();
+
+        /** write/append to journal file *
+            @param buf - a buffer that will be written to the journal.
+            will not return until on disk
+        */
+        void journal(const AlignedBuilder& buf);
+
+        /** flag that something has gone wrong during writing to the journal
+            (not for recovery mode)
+        */
+        void journalingFailure(const char *msg);
+
+        /** read lsn from disk from the last run before doing recovery */
+        unsigned long long journalReadLSN();
+
+        unsigned long long getLastDataFileFlushTime();
+
+        /** never throws.
+            @return true if there are any journal files in the journal dir.
+        */
+        bool haveJournalFiles();
+
+        // in case disk controller buffers writes
+        const long long ExtraKeepTimeMs = 10000;
+
+    }
+}
diff --git a/db/dur_journalformat.h b/db/dur_journalformat.h
new file mode 100644
index 0000000..d29f94d
--- /dev/null
+++ b/db/dur_journalformat.h
@@ -0,0 +1,166 @@
+// @file dur_journalformat.h The format of our journal files.
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../util/md5.hpp"
+
+namespace mongo {
+
+    namespace dur {
+
+#pragma pack(1)
+        /** beginning header for a journal/j._<n> file
+            there is nothing important int this header at this time.  except perhaps version #.
+        */
+        struct JHeader {
+            JHeader() { }
+            JHeader(string fname);
+
+            char magic[2]; // "j\n". j means journal, then a linefeed, fwiw if you were to run "less" on the file or something...
+
+            // x4142 is asci--readable if you look at the file with head/less -- thus the starting values were near
+            // that.  simply incrementing the version # is safe on a fwd basis.
+            enum { CurrentVersion = 0x4147 };
+            unsigned short _version;
+
+            // these are just for diagnostic ease (make header more useful as plain text)
+            char n1;          // '\n'
+            char ts[20];      // ascii timestamp of file generation.  for user reading, not used by code.
+            char n2;          // '\n'
+            char dbpath[128]; // path/filename of this file for human reading and diagnostics.  not used by code.
+            char n3, n4;      // '\n', '\n'
+
+            unsigned long long fileId; // unique identifier that will be in each JSectHeader. important as we recycle prealloced files
+
+            char reserved3[8026]; // 8KB total for the file header
+            char txt2[2];         // "\n\n" at the end
+
+            bool versionOk() const { return _version == CurrentVersion; }
+            bool valid() const { return magic[0] == 'j' && txt2[1] == '\n' && fileId; }
+        };
+
+        /** "Section" header.  A section corresponds to a group commit.
+            len is length of the entire section including header and footer.
+        */
+        struct JSectHeader {
+            unsigned len;                  // length in bytes of the whole section
+            unsigned long long seqNumber;  // sequence number that can be used on recovery to not do too much work
+            unsigned long long fileId;     // matches JHeader::fileId
+        };
+
+        /** an individual write operation within a group commit section.  Either the entire section should
+            be applied, or nothing.  (We check the md5 for the whole section before doing anything on recovery.)
+        */
+        struct JEntry {
+            enum OpCodes {
+                OpCode_Footer      = 0xffffffff,
+                OpCode_DbContext   = 0xfffffffe,
+                OpCode_FileCreated = 0xfffffffd,
+                OpCode_DropDb      = 0xfffffffc,
+                OpCode_Min         = 0xfffff000
+            };
+            union {
+                unsigned len;    // length in bytes of the data of the JEntry. does not include the JEntry header
+                OpCodes opcode;
+            };
+
+            unsigned ofs;  // offset in file
+
+            // sentinel and masks for _fileNo
+            enum {
+                DotNsSuffix = 0x7fffffff, // ".ns" file
+                LocalDbBit  = 0x80000000  // assuming "local" db instead of using the JDbContext
+            };
+            int _fileNo;   // high bit is set to indicate it should be the <dbpath>/local database
+            // char data[len] follows
+
+            const char * srcData() const {
+                const int *i = &_fileNo;
+                return (const char *) (i+1);
+            }
+
+            int getFileNo() const { return _fileNo & (~LocalDbBit); }
+            void setFileNo(int f) { _fileNo = f; }
+            bool isNsSuffix() const { return getFileNo() == DotNsSuffix; }
+
+            void setLocalDbContextBit() { _fileNo |= LocalDbBit; }
+            bool isLocalDbContext() const { return _fileNo & LocalDbBit; }
+            void clearLocalDbContextBit() { _fileNo = getFileNo(); }
+
+            static string suffix(int fileno) {
+                if( fileno == DotNsSuffix ) return "ns";
+                stringstream ss;
+                ss << fileno;
+                return ss.str();
+            }
+        };
+
+        /** group commit section footer. md5 is a key field. */
+        struct JSectFooter {
+            JSectFooter(const void* begin, int len) { // needs buffer to compute hash
+                sentinel = JEntry::OpCode_Footer;
+                reserved = 0;
+                magic[0] = magic[1] = magic[2] = magic[3] = '\n';
+
+                // skip section header since size modified after hashing
+                (const char*&)begin += sizeof(JSectHeader);
+                len                 -= sizeof(JSectHeader);
+
+                md5(begin, len, hash);
+            }
+            unsigned sentinel;
+            md5digest hash; // unsigned char[16]
+            unsigned long long reserved;
+            char magic[4]; // "\n\n\n\n"
+
+            bool checkHash(const void* begin, int len) const {
+                // skip section header since size modified after hashing
+                (const char*&)begin += sizeof(JSectHeader);
+                len                 -= sizeof(JSectHeader);
+                md5digest current;
+                md5(begin, len, current);
+                DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16) << " current:" << toHex(current, 16) << endl;
+                return (memcmp(hash, current, sizeof(hash)) == 0);
+            }
+        };
+
+        /** declares "the next entry(s) are for this database / file path prefix" */
+        struct JDbContext {
+            JDbContext() : sentinel(JEntry::OpCode_DbContext) { }
+            const unsigned sentinel;   // compare to JEntry::len -- zero is our sentinel
+            //char dbname[];
+        };
+
+        /** "last sequence number" */
+        struct LSNFile {
+            unsigned ver;
+            unsigned reserved2;
+            unsigned long long lsn;
+            unsigned long long checkbytes;
+            unsigned long long reserved[8];
+
+            void set(unsigned long long lsn);
+            unsigned long long get();
+        };
+
+#pragma pack()
+
+    }
+
+}
diff --git a/db/dur_journalimpl.h b/db/dur_journalimpl.h
new file mode 100644
index 0000000..9566dff
--- /dev/null
+++ b/db/dur_journalimpl.h
@@ -0,0 +1,101 @@
+// @file dur_journal.h
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../util/logfile.h"
+
+namespace mongo {
+    namespace dur {
+
+        /** the writeahead journal for durability */
+        class Journal {
+        public:
+            string dir; // set by journalMakeDir() during initialization
+
+            Journal();
+
+            /** call during startup by journalMakeDir() */
+            void init();
+
+            /** check if time to rotate files.  assure a file is open.
+                done separately from the journal() call as we can do this part
+                outside of lock.
+                thread: durThread()
+             */
+            void rotate();
+
+            /** write to journal
+            */
+            void journal(const AlignedBuilder& b);
+
+            boost::filesystem::path getFilePathFor(int filenumber) const;
+
+            unsigned long long lastFlushTime() const { return _lastFlushTime; }
+            void cleanup();
+
+            // Rotate after reaching this data size in a journal (j._<n>) file
+            // We use a smaller size for 32 bit as the journal is mmapped during recovery (only)
+            // Note if you take a set of datafiles, including journal files, from 32->64 or vice-versa, it must 
+            // work.  (and should as-is)
+            static const unsigned long long DataLimit = (sizeof(void*)==4) ? 256 * 1024 * 1024 : 1 * 1024 * 1024 * 1024;
+
+            unsigned long long curFileId() const { return _curFileId; }
+
+            void assureLogFileOpen() {
+                mutex::scoped_lock lk(_curLogFileMutex);
+                if( _curLogFile == 0 )
+                    _open();
+            }
+
+            /** open a journal file to journal operations to. */
+            void open();
+
+        private:
+            void _open();
+            void closeCurrentJournalFile();
+            void removeUnneededJournalFiles();
+
+            unsigned long long _written; // bytes written so far to the current journal (log) file
+            unsigned _nextFileNumber;
+
+            mutex _curLogFileMutex;
+
+            LogFile *_curLogFile; // use _curLogFileMutex
+            unsigned long long _curFileId; // current file id see JHeader::fileId
+
+            struct JFile {
+                string filename;
+                unsigned long long lastEventTimeMs;
+            };
+
+            // files which have been closed but not unlinked (rotated out) yet
+            // ordered oldest to newest
+            list<JFile> _oldJournalFiles; // use _curLogFileMutex
+
+            // lsn related
+            static void preFlush();
+            static void postFlush();
+            unsigned long long _preFlushTime;
+            unsigned long long _lastFlushTime; // data < this time is fsynced in the datafiles (unless hard drive controller is caching)
+            bool _writeToLSNNeeded;
+            void updateLSNFile();
+        };
+
+    }
+}
diff --git a/db/dur_preplogbuffer.cpp b/db/dur_preplogbuffer.cpp
new file mode 100644
index 0000000..1648e89
--- /dev/null
+++ b/db/dur_preplogbuffer.cpp
@@ -0,0 +1,192 @@
+// @file dur_preplogbuffer.cpp
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+     PREPLOGBUFFER
+       we will build an output buffer ourself and then use O_DIRECT
+       we could be in read lock for this
+       for very large objects write directly to redo log in situ?
+     @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc
+*/
+
+#include "pch.h"
+#include "cmdline.h"
+#include "dur.h"
+#include "dur_journal.h"
+#include "dur_journalimpl.h"
+#include "dur_commitjob.h"
+#include "../util/mongoutils/hash.h"
+#include "../util/mongoutils/str.h"
+#include "../util/alignedbuilder.h"
+#include "../util/timer.h"
+#include "dur_stats.h"
+
+using namespace mongoutils;
+
+namespace mongo {
+    namespace dur {
+
+        extern Journal j;
+
+        RelativePath local = RelativePath::fromRelativePath("local");
+
+        MongoMMF* findMMF_inlock(void *ptr, size_t &ofs) {
+            MongoMMF *f = privateViews.find_inlock(ptr, ofs);
+            if( f == 0 ) {
+                string s = str::stream() << "view pointer cannot be resolved " << (size_t) ptr;
+                journalingFailure(s.c_str()); // asserts
+            }
+            return f;
+        }
+
+        /** put the basic write operation into the buffer (bb) to be journaled */
+        void prepBasicWrite_inlock(AlignedBuilder&bb, const WriteIntent *i, RelativePath& lastDbPath) {
+            size_t ofs = 1;
+            MongoMMF *mmf = findMMF_inlock(i->start(), /*out*/ofs);
+            dassert( i->w_ptr == 0 );
+
+            if( !mmf->willNeedRemap() ) {
+                // tag this mmf as needed a remap of its private view later.
+                // usually it will already be dirty/already set, so we do the if above first
+                // to avoid possibility of cpu cache line contention
+                mmf->willNeedRemap() = true;
+            }
+
+            // since we have already looked up the mmf, we go ahead and remember the write view location
+            // so we don't have to find the MongoMMF again later in WRITETODATAFILES()
+            dassert( i->w_ptr == 0 );
+            i->w_ptr = ((char*)mmf->view_write()) + ofs;
+
+            JEntry e;
+            e.len = min(i->length(), (unsigned)(mmf->length() - ofs)); //dont write past end of file
+            assert( ofs <= 0x80000000 );
+            e.ofs = (unsigned) ofs;
+            e.setFileNo( mmf->fileSuffixNo() );
+            if( mmf->relativePath() == local ) {
+                e.setLocalDbContextBit();
+            }
+            else if( mmf->relativePath() != lastDbPath ) {
+                lastDbPath = mmf->relativePath();
+                JDbContext c;
+                bb.appendStruct(c);
+                bb.appendStr(lastDbPath.toString());
+            }
+            bb.appendStruct(e);
+#if defined(_EXPERIMENTAL)
+            i->ofsInJournalBuffer = bb.len();
+#endif
+            bb.appendBuf(i->start(), e.len);
+
+            if (e.len != (unsigned)i->length()) {
+                log() << "dur info splitting prepBasicWrite at boundary" << endl;
+
+                // This only happens if we write to the last byte in a file and
+                // the fist byte in another file that is mapped adjacently. I
+                // think most OSs leave at least a one page gap between
+                // mappings, but better to be safe.
+
+                WriteIntent next ((char*)i->start() + e.len, i->length() - e.len);
+                prepBasicWrite_inlock(bb, &next, lastDbPath);
+            }
+        }
+
+        /** basic write ops / write intents.  note there is no particular order to these : if we have
+            two writes to the same location during the group commit interval, it is likely
+            (although not assured) that it is journaled here once.
+        */
+        void prepBasicWrites(AlignedBuilder& bb) {
+            scoped_lock lk(privateViews._mutex());
+
+            // each time events switch to a different database we journal a JDbContext
+            RelativePath lastDbPath;
+
+            for( set<WriteIntent>::iterator i = commitJob.writes().begin(); i != commitJob.writes().end(); i++ ) {
+                prepBasicWrite_inlock(bb, &(*i), lastDbPath);
+            }
+        }
+
+        void resetLogBuffer(AlignedBuilder& bb) {
+            bb.reset();
+
+            // JSectHeader
+            JSectHeader h;
+            h.len = (unsigned) 0xffffffff;  // total length, will fill in later
+            h.seqNumber = getLastDataFileFlushTime();
+            h.fileId = j.curFileId();
+
+            bb.appendStruct(h);
+        }
+
+        /** we will build an output buffer ourself and then use O_DIRECT
+            we could be in read lock for this
+            caller handles locking
+        */
+        void _PREPLOGBUFFER() {
+            assert( cmdLine.dur );
+
+            {
+                // now that we are locked, fully drain deferred notes of write intents
+                DEV dbMutex.assertAtLeastReadLocked();
+                Writes& writes = commitJob.wi();
+                writes._deferred.invoke();
+                writes._drained = true;
+            }
+
+            AlignedBuilder& bb = commitJob._ab;
+            resetLogBuffer(bb);
+
+            // ops other than basic writes (DurOp's)
+            {
+                for( vector< shared_ptr<DurOp> >::iterator i = commitJob.ops().begin(); i != commitJob.ops().end(); ++i ) {
+                    (*i)->serialize(bb);
+                }
+            }
+
+            {
+                prepBasicWrites(bb);
+            }
+
+            {
+                JSectFooter f(bb.buf(), bb.len());
+                bb.appendStruct(f);
+            }
+
+            {
+                // pad to alignment, and set the total section length in the JSectHeader
+                assert( 0xffffe000 == (~(Alignment-1)) );
+                unsigned L = (bb.len() + Alignment-1) & (~(Alignment-1));
+                dassert( L >= (unsigned) bb.len() );
+
+                *((unsigned*)bb.atOfs(0)) = L;
+
+                unsigned padding = L - bb.len();
+                bb.skip(padding);
+                dassert( bb.len() % Alignment == 0 );
+            }
+
+            return;
+        }
+        void PREPLOGBUFFER() {
+            Timer t;
+            j.assureLogFileOpen(); // so fileId is set
+            _PREPLOGBUFFER();
+            stats.curr->_prepLogBufferMicros += t.micros();
+        }
+
+    }
+}
diff --git a/db/dur_recover.cpp b/db/dur_recover.cpp
new file mode 100644
index 0000000..1480a59
--- /dev/null
+++ b/db/dur_recover.cpp
@@ -0,0 +1,457 @@
+// @file dur_recover.cpp crash recovery via the journal
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+
+#include "dur.h"
+#include "dur_recover.h"
+#include "dur_journal.h"
+#include "dur_journalformat.h"
+#include "durop.h"
+#include "namespace.h"
+#include "../util/mongoutils/str.h"
+#include "../util/bufreader.h"
+#include "pdfile.h"
+#include "database.h"
+#include "db.h"
+#include "../util/unittest.h"
+#include "cmdline.h"
+#include "curop.h"
+#include "mongommf.h"
+
+#include <sys/stat.h>
+#include <fcntl.h>
+
+using namespace mongoutils;
+
+namespace mongo {
+
+    namespace dur {
+
+        struct ParsedJournalEntry { /*copyable*/
+            ParsedJournalEntry() : e(0) { }
+
+            // relative path of database for the operation.
+            // might be a pointer into mmaped Journal file
+            const char *dbName;
+
+            // thse are pointers into the memory mapped journal file
+            const JEntry *e;  // local db sentinel is already parsed out here into dbName
+
+            // if not one of the two simple JEntry's above, this is the operation:
+            shared_ptr<DurOp> op;
+        };
+
+        void removeJournalFiles();
+        path getJournalDir();
+
+        /** get journal filenames, in order. throws if unexpected content found */
+        static void getFiles(path dir, vector<path>& files) {
+            map<unsigned,path> m;
+            for ( filesystem::directory_iterator i( dir );
+                    i != filesystem::directory_iterator();
+                    ++i ) {
+                filesystem::path filepath = *i;
+                string fileName = filesystem::path(*i).leaf();
+                if( str::startsWith(fileName, "j._") ) {
+                    unsigned u = str::toUnsigned( str::after(fileName, '_') );
+                    if( m.count(u) ) {
+                        uasserted(13531, str::stream() << "unexpected files in journal directory " << dir.string() << " : " << fileName);
+                    }
+                    m.insert( pair<unsigned,path>(u,filepath) );
+                }
+            }
+            for( map<unsigned,path>::iterator i = m.begin(); i != m.end(); ++i ) {
+                if( i != m.begin() && m.count(i->first - 1) == 0 ) {
+                    uasserted(13532,
+                    str::stream() << "unexpected file in journal directory " << dir.string()
+                      << " : " << filesystem::path(i->second).leaf() << " : can't find its preceeding file");
+                }
+                files.push_back(i->second);
+            }
+        }
+
+        /** read through the memory mapped data of a journal file (journal/j._<n> file)
+            throws
+        */
+        class JournalSectionIterator : boost::noncopyable {
+        public:
+            JournalSectionIterator(const void *p, unsigned len, bool doDurOps)
+                : _br(p, len)
+                , _sectHead(static_cast<const JSectHeader*>(_br.skip(sizeof(JSectHeader))))
+                , _lastDbName(NULL)
+                , _doDurOps(doDurOps)
+            {}
+
+            bool atEof() const { return _br.atEof(); }
+
+            unsigned long long seqNumber() const { return _sectHead->seqNumber; }
+
+            /** get the next entry from the log.  this function parses and combines JDbContext and JEntry's.
+             *  @return true if got an entry.  false at successful end of section (and no entry returned).
+             *  throws on premature end of section.
+             */
+            bool next(ParsedJournalEntry& e) {
+                unsigned lenOrOpCode;
+                _br.read(lenOrOpCode);
+
+                if (lenOrOpCode > JEntry::OpCode_Min) {
+                    switch( lenOrOpCode ) {
+
+                    case JEntry::OpCode_Footer: {
+                        if (_doDurOps) {
+                            const char* pos = (const char*) _br.pos();
+                            pos -= sizeof(lenOrOpCode); // rewind to include OpCode
+                            const JSectFooter& footer = *(const JSectFooter*)pos;
+                            int len = pos - (char*)_sectHead;
+                            if (!footer.checkHash(_sectHead, len)) {
+                                massert(13594, str::stream() << "Journal checksum doesn't match. recorded: "
+                                        << toHex(footer.hash, sizeof(footer.hash))
+                                        << " actual: " << md5simpledigest(_sectHead, len)
+                                        , false);
+                            }
+                        }
+                        return false; // false return value denotes end of section
+                    }
+
+                    case JEntry::OpCode_FileCreated:
+                    case JEntry::OpCode_DropDb: {
+                        e.dbName = 0;
+                        boost::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, _br);
+                        if (_doDurOps) {
+                            e.op = op;
+                        }
+                        return true;
+                    }
+
+                    case JEntry::OpCode_DbContext: {
+                        _lastDbName = (const char*) _br.pos();
+                        const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _br.remaining());
+                        const unsigned len = strnlen(_lastDbName, limit);
+                        massert(13533, "problem processing journal file during recovery", _lastDbName[len] == '\0');
+                        _br.skip(len+1); // skip '\0' too
+                        _br.read(lenOrOpCode);
+                    }
+                    // fall through as a basic operation always follows jdbcontext, and we don't have anything to return yet
+
+                    default:
+                        // fall through
+                        ;
+                    }
+                }
+
+                // JEntry - a basic write
+                assert( lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min );
+                _br.rewind(4);
+                e.e = (JEntry *) _br.skip(sizeof(JEntry));
+                e.dbName = e.e->isLocalDbContext() ? "local" : _lastDbName;
+                assert( e.e->len == lenOrOpCode );
+                _br.skip(e.e->len);
+                return true;
+            }
+        private:
+            BufReader _br;
+            const JSectHeader* _sectHead;
+            const char *_lastDbName; // pointer into mmaped journal file
+            const bool _doDurOps;
+        };
+
+        static string fileName(const char* dbName, int fileNo) {
+            stringstream ss;
+            ss << dbName << '.';
+            assert( fileNo >= 0 );
+            if( fileNo == JEntry::DotNsSuffix )
+                ss << "ns";
+            else
+                ss << fileNo;
+
+            // relative name -> full path name
+            path full(dbpath);
+            full /= ss.str();
+            return full.string();
+        }
+
+        RecoveryJob::~RecoveryJob() {
+            DESTRUCTOR_GUARD(
+                if( !_mmfs.empty() )
+                    close();
+            )
+        }
+
+        void RecoveryJob::close() {
+            scoped_lock lk(_mx);
+            _close();
+        }
+
+        void RecoveryJob::_close() {
+            MongoFile::flushAll(true);
+            _mmfs.clear();
+        }
+
+        void RecoveryJob::write(const ParsedJournalEntry& entry) {
+            const string fn = fileName(entry.dbName, entry.e->getFileNo());
+            MongoFile* file;
+            {
+                MongoFileFinder finder; // must release lock before creating new MongoMMF
+                file = finder.findByPath(fn);
+            }
+
+            MongoMMF* mmf;
+            if (file) {
+                assert(file->isMongoMMF());
+                mmf = (MongoMMF*)file;
+            }
+            else {
+                assert(_recovering);
+                boost::shared_ptr<MongoMMF> sp (new MongoMMF);
+                assert(sp->open(fn, false));
+                _mmfs.push_back(sp);
+                mmf = sp.get();
+            }
+
+            if ((entry.e->ofs + entry.e->len) <= mmf->length()) {
+                void* dest = (char*)mmf->view_write() + entry.e->ofs;
+                memcpy(dest, entry.e->srcData(), entry.e->len);
+            }
+            else {
+                massert(13622, "Trying to write past end of file in WRITETODATAFILES", _recovering);
+            }
+        }
+
+        void RecoveryJob::applyEntry(const ParsedJournalEntry& entry, bool apply, bool dump) {
+            if( entry.e ) {
+                if( dump ) {
+                    stringstream ss;
+                    ss << "  BASICWRITE " << setw(20) << entry.dbName << '.';
+                    if( entry.e->isNsSuffix() )
+                        ss << "ns";
+                    else
+                        ss << setw(2) << entry.e->getFileNo();
+                    ss << ' ' << setw(6) << entry.e->len << ' ' << /*hex << setw(8) << (size_t) fqe.srcData << dec <<*/
+                       "  " << hexdump(entry.e->srcData(), entry.e->len);
+                    log() << ss.str() << endl;
+                }
+                if( apply ) {
+                    write(entry);
+                }
+            }
+            else if(entry.op) {
+                // a DurOp subclass operation
+                if( dump ) {
+                    log() << "  OP " << entry.op->toString() << endl;
+                }
+                if( apply ) {
+                    if( entry.op->needFilesClosed() ) {
+                        _close(); // locked in processSection
+                    }
+                    entry.op->replay();
+                }
+            }
+        }
+
+        void RecoveryJob::applyEntries(const vector<ParsedJournalEntry> &entries) {
+            bool apply = (cmdLine.durOptions & CmdLine::DurScanOnly) == 0;
+            bool dump = cmdLine.durOptions & CmdLine::DurDumpJournal;
+            if( dump )
+                log() << "BEGIN section" << endl;
+
+            for( vector<ParsedJournalEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i ) {
+                applyEntry(*i, apply, dump);
+            }
+
+            if( dump )
+                log() << "END section" << endl;
+        }
+
+        void RecoveryJob::processSection(const void *p, unsigned len) {
+            scoped_lock lk(_mx);
+
+            vector<ParsedJournalEntry> entries;
+            JournalSectionIterator i(p, len, _recovering);
+
+            //DEV log() << "recovery processSection seq:" << i.seqNumber() << endl;
+            if( _recovering && _lastDataSyncedFromLastRun > i.seqNumber() + ExtraKeepTimeMs ) {
+                if( i.seqNumber() != _lastSeqMentionedInConsoleLog ) {
+                    log() << "recover skipping application of section seq:" << i.seqNumber() << " < lsn:" << _lastDataSyncedFromLastRun << endl;
+                    _lastSeqMentionedInConsoleLog = i.seqNumber();
+                }
+                return;
+            }
+
+            // first read all entries to make sure this section is valid
+            ParsedJournalEntry e;
+            while( i.next(e) ) {
+                entries.push_back(e);
+            }
+
+            // got all the entries for one group commit.  apply them:
+            applyEntries(entries);
+        }
+
+        /** apply a specific journal file, that is already mmap'd
+            @param p start of the memory mapped file
+            @return true if this is detected to be the last file (ends abruptly)
+        */
+        bool RecoveryJob::processFileBuffer(const void *p, unsigned len) {
+            try {
+                unsigned long long fileId;
+                BufReader br(p,len);
+
+                {
+                    // read file header
+                    JHeader h;
+                    br.read(h);
+                    if( !h.versionOk() ) {
+                        log() << "journal file version number mismatch. recover with old version of mongod, terminate cleanly, then upgrade." << endl;
+                        uasserted(13536, str::stream() << "journal version number mismatch " << h._version);
+                    }
+                    uassert(13537, "journal header invalid", h.valid());
+                    fileId = h.fileId;
+                    if(cmdLine.durOptions & CmdLine::DurDumpJournal) { 
+                        log() << "JHeader::fileId=" << fileId << endl;
+                    }
+                }
+
+                // read sections
+                while ( !br.atEof() ) {
+                    JSectHeader h;
+                    br.peek(h);
+                    if( h.fileId != fileId ) {
+                        if( debug || (cmdLine.durOptions & CmdLine::DurDumpJournal) ) {
+                            log() << "Ending processFileBuffer at differing fileId want:" << fileId << " got:" << h.fileId << endl;
+                            log() << "  sect len:" << h.len << " seqnum:" << h.seqNumber << endl;
+                        }
+                        return true;
+                    }
+                    processSection(br.skip(h.len), h.len);
+
+                    // ctrl c check
+                    killCurrentOp.checkForInterrupt(false);
+                }
+            }
+            catch( BufReader::eof& ) {
+                if( cmdLine.durOptions & CmdLine::DurDumpJournal )
+                    log() << "ABRUPT END" << endl;
+                return true; // abrupt end
+            }
+
+            return false; // non-abrupt end
+        }
+
+        /** apply a specific journal file */
+        bool RecoveryJob::processFile(path journalfile) {
+            log() << "recover " << journalfile.string() << endl;
+            MemoryMappedFile f;
+            void *p = f.mapWithOptions(journalfile.string().c_str(), MongoFile::READONLY | MongoFile::SEQUENTIAL);
+            massert(13544, str::stream() << "recover error couldn't open " << journalfile.string(), p);
+            return processFileBuffer(p, (unsigned) f.length());
+        }
+
+        /** @param files all the j._0 style files we need to apply for recovery */
+        void RecoveryJob::go(vector<path>& files) {
+            log() << "recover begin" << endl;
+            _recovering = true;
+
+            // load the last sequence number synced to the datafiles on disk before the last crash
+            _lastDataSyncedFromLastRun = journalReadLSN();
+            log() << "recover lsn: " << _lastDataSyncedFromLastRun << endl;
+
+            for( unsigned i = 0; i != files.size(); ++i ) {
+	      /*bool abruptEnd = */processFile(files[i]);
+                /*if( abruptEnd && i+1 < files.size() ) {
+                    log() << "recover error: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl;
+                    close();
+                    uasserted(13535, "recover abrupt journal file end");
+                }*/
+            }
+
+            close();
+
+            if( cmdLine.durOptions & CmdLine::DurScanOnly ) {
+                uasserted(13545, str::stream() << "--durOptions " << (int) CmdLine::DurScanOnly << " (scan only) specified");
+            }
+
+            log() << "recover cleaning up" << endl;
+            removeJournalFiles();
+            log() << "recover done" << endl;
+            okToCleanUp = true;
+            _recovering = false;
+        }
+
+        void _recover() {
+            assert( cmdLine.dur );
+
+            filesystem::path p = getJournalDir();
+            if( !exists(p) ) {
+                log() << "directory " << p.string() << " does not exist, there will be no recovery startup step" << endl;
+                okToCleanUp = true;
+                return;
+            }
+
+            vector<path> journalFiles;
+            getFiles(p, journalFiles);
+
+            if( journalFiles.empty() ) {
+                log() << "recover : no journal files present, no recovery needed" << endl;
+                okToCleanUp = true;
+                return;
+            }
+
+            RecoveryJob::get().go(journalFiles);
+        }
+
+        extern mutex groupCommitMutex;
+
+        /** recover from a crash
+            called during startup
+            throws on error
+        */
+        void recover() {
+            // we use a lock so that exitCleanly will wait for us
+            // to finish (or at least to notice what is up and stop)
+            writelock lk;
+
+            // this is so the mutexdebugger doesn't get confused.  we are actually single threaded 
+            // at this point in the program so it wouldn't have been a true problem (I think)
+            scoped_lock lk2(groupCommitMutex);
+
+            _recover(); // throws on interruption
+        }
+
+        struct BufReaderY { int a,b; };
+        class BufReaderUnitTest : public UnitTest {
+        public:
+            void run() {
+                BufReader r((void*) "abcdabcdabcd", 12);
+                char x;
+                BufReaderY y;
+                r.read(x); //cout << x; // a
+                assert( x == 'a' );
+                r.read(y);
+                r.read(x);
+                assert( x == 'b' );
+            }
+        } brunittest;
+
+        // can't free at termination because order of destruction of global vars is arbitrary
+        RecoveryJob &RecoveryJob::_instance = *(new RecoveryJob());
+
+    } // namespace dur
+
+} // namespace mongo
+
diff --git a/db/dur_recover.h b/db/dur_recover.h
new file mode 100644
index 0000000..1022fdc
--- /dev/null
+++ b/db/dur_recover.h
@@ -0,0 +1,45 @@
+// @file dur.h durability support
+
+#pragma once
+
+#include "../util/concurrency/mutex.h"
+#include "../util/file.h"
+
+namespace mongo {
+    class MongoMMF;
+
+    namespace dur {
+        struct ParsedJournalEntry;
+
+        /** call go() to execute a recovery from existing journal files.
+         */
+        class RecoveryJob : boost::noncopyable {
+        public:
+            RecoveryJob() :_lastDataSyncedFromLastRun(0), _mx("recovery"), _recovering(false) { _lastSeqMentionedInConsoleLog = 1; }
+            void go(vector<path>& files);
+            ~RecoveryJob();
+            void processSection(const void *, unsigned len);
+            void close(); // locks and calls _close()
+
+            static RecoveryJob & get() { return _instance; }
+        private:
+            void write(const ParsedJournalEntry& entry); // actually writes to the file
+            void applyEntry(const ParsedJournalEntry& entry, bool apply, bool dump);
+            void applyEntries(const vector<ParsedJournalEntry> &entries);
+            bool processFileBuffer(const void *, unsigned len);
+            bool processFile(path journalfile);
+            void _close(); // doesn't lock
+
+            list<boost::shared_ptr<MongoMMF> > _mmfs;
+
+            unsigned long long _lastDataSyncedFromLastRun;
+            unsigned long long _lastSeqMentionedInConsoleLog;
+
+            mongo::mutex _mx; // protects _mmfs
+
+            bool _recovering; // are we in recovery or WRITETODATAFILES
+
+            static RecoveryJob &_instance;
+        };
+    }
+}
diff --git a/db/dur_stats.h b/db/dur_stats.h
new file mode 100644
index 0000000..5f5a188
--- /dev/null
+++ b/db/dur_stats.h
@@ -0,0 +1,46 @@
+// @file dur_stats.h
+
+namespace mongo {
+    namespace dur {
+
+        /** journaling stats.  the model here is that the commit thread is the only writer, and that reads are
+            uncommon (from a serverStatus command and such).  Thus, there should not be multicore chatter overhead.
+        */
+        struct Stats {
+            Stats();
+            void rotate();
+            BSONObj asObj();
+            unsigned _intervalMicros;
+            struct S {
+                BSONObj _asObj();
+                void reset();
+
+                unsigned _commits;
+                unsigned _earlyCommits; // count of early commits from commitIfNeeded() or from getDur().commitNow()
+                unsigned long long _journaledBytes;
+                unsigned long long _writeToDataFilesBytes;
+
+                unsigned long long _prepLogBufferMicros;
+                unsigned long long _writeToJournalMicros;
+                unsigned long long _writeToDataFilesMicros;
+                unsigned long long _remapPrivateViewMicros;
+
+                // undesirable to be in write lock for the group commit (it can be done in a read lock), so good if we
+                // have visibility when this happens.  can happen for a couple reasons
+                // - read lock starvation
+                // - file being closed
+                // - data being written faster than the normal group commit interval
+                unsigned _commitsInWriteLock;
+
+                unsigned _dtMillis;
+            };
+            S *curr;
+        private:
+            S _a,_b;
+            unsigned long long _lastRotate;
+            S* other();
+        };
+        extern Stats stats;
+
+    }
+}
diff --git a/db/dur_writetodatafiles.cpp b/db/dur_writetodatafiles.cpp
new file mode 100644
index 0000000..50797ea
--- /dev/null
+++ b/db/dur_writetodatafiles.cpp
@@ -0,0 +1,99 @@
+// @file dur_writetodatafiles.cpp apply the writes back to the non-private MMF after they are for certain in redo log
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "dur_commitjob.h"
+#include "dur_stats.h"
+#include "dur_recover.h"
+#include "../util/timer.h"
+
+namespace mongo {
+    namespace dur {
+
+        void debugValidateAllMapsMatch();
+
+        /** apply the writes back to the non-private MMF after they are for certain in redo log
+
+            (1) todo we don't need to write back everything every group commit.  we MUST write back
+            that which is going to be a remapped on its private view - but that might not be all
+            views.
+
+            (2) todo should we do this using N threads?  would be quite easy
+                see Hackenberg paper table 5 and 6.  2 threads might be a good balance.
+
+            (3) with enough work, we could do this outside the read lock.  it's a bit tricky though.
+                - we couldn't do it from the private views then as they may be changing.  would have to then
+                  be from the journal alignedbuffer.
+                - we need to be careful the file isn't unmapped on us -- perhaps a mutex or something
+                  with MongoMMF on closes or something to coordinate that.
+
+            locking: in read lock when called
+
+            @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc&hl=en
+        */
+
+        void WRITETODATAFILES_Impl1() {
+            RecoveryJob::get().processSection(commitJob._ab.buf(), commitJob._ab.len());
+        }
+
+        // the old implementation
+        void WRITETODATAFILES_Impl2() {
+            /* we go backwards as what is at the end is most likely in the cpu cache.  it won't be much, but we'll take it. */
+            for( set<WriteIntent>::const_iterator it(commitJob.writes().begin()), end(commitJob.writes().end()); it != end; ++it ) {
+                const WriteIntent& intent = *it;
+                stats.curr->_writeToDataFilesBytes += intent.length();
+                dassert(intent.w_ptr);
+                memcpy(intent.w_ptr, intent.start(), intent.length());
+            }
+        }
+
+#if defined(_EXPERIMENTAL)
+        void WRITETODATAFILES_Impl3() {
+            /* we go backwards as what is at the end is most likely in the cpu cache.  it won't be much, but we'll take it. */
+            for( set<WriteIntent>::const_iterator it(commitJob.writes().begin()), end(commitJob.writes().end()); it != end; ++it ) {
+                const WriteIntent& intent = *it;
+                stats.curr->_writeToDataFilesBytes += intent.length();
+                dassert(intent.w_ptr);
+                memcpy(intent.w_ptr,
+                       commitJob._ab.atOfs(intent.ofsInJournalBuffer),
+                       intent.length());
+            }
+        }
+#endif
+
+        void WRITETODATAFILES() {
+            dbMutex.assertAtLeastReadLocked();
+
+            MongoFile::markAllWritable(); // for _DEBUG. normally we don't write in a read lock
+
+            Timer t;
+#if defined(_EXPERIMENTAL)
+            WRITETODATAFILES_Impl3();
+#else
+            WRITETODATAFILES_Impl1();
+#endif
+            stats.curr->_writeToDataFilesMicros += t.micros();
+
+            if (!dbMutex.isWriteLocked())
+                MongoFile::unmarkAllWritable();
+
+            debugValidateAllMapsMatch();
+        }
+
+    }
+}
diff --git a/db/durop.cpp b/db/durop.cpp
new file mode 100644
index 0000000..344b21e
--- /dev/null
+++ b/db/durop.cpp
@@ -0,0 +1,160 @@
+// @file durop.cpp
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "concurrency.h"
+#include "../util/alignedbuilder.h"
+#include "../util/mongoutils/str.h"
+#include "../util/file.h"
+#include "mongommf.h"
+#include "durop.h"
+#include "../util/file_allocator.h"
+
+using namespace mongoutils;
+
+namespace mongo {
+
+    extern string dbpath; // --dbpath parm
+
+    void _deleteDataFiles(const char *);
+
+    namespace dur {
+
+        /** read a durop from journal file referenced by br.
+            @param opcode the opcode which has already been written from the bufreader
+        */
+        shared_ptr<DurOp> DurOp::read(unsigned opcode, BufReader& br) {
+            shared_ptr<DurOp> op;
+            switch( opcode ) {
+            case JEntry::OpCode_FileCreated:
+                op = shared_ptr<DurOp>( new FileCreatedOp(br) );
+                break;
+            case JEntry::OpCode_DropDb:
+                op = shared_ptr<DurOp>( new DropDbOp(br) );
+                break;
+            default:
+                massert(13546, (str::stream() << "dur recover unrecognized opcode in journal " << opcode), false);
+            }
+            return op;
+        }
+
+        void DurOp::serialize(AlignedBuilder& ab) {
+            ab.appendNum(_opcode);
+            _serialize(ab);
+        }
+
+        DropDbOp::DropDbOp(BufReader& log) : DurOp(JEntry::OpCode_DropDb) {
+            unsigned long long reserved;
+            log.read(reserved);
+            log.read(reserved);
+            log.readStr(_db);
+            string reservedStr;
+            log.readStr(reservedStr);
+        }
+
+        void DropDbOp::_serialize(AlignedBuilder& ab) {
+            ab.appendNum((unsigned long long) 0); // reserved for future use
+            ab.appendNum((unsigned long long) 0); // reserved for future use
+            ab.appendStr(_db);
+            ab.appendStr(""); // reserved
+        }
+
+        /** throws */
+        void DropDbOp::replay() {
+            log() << "recover replay drop db " << _db << endl;
+            _deleteDataFiles(_db.c_str());
+        }
+
+        FileCreatedOp::FileCreatedOp(string f, unsigned long long l) :
+            DurOp(JEntry::OpCode_FileCreated) {
+            _p = RelativePath::fromFullPath(f);
+            _len = l;
+        }
+
+        FileCreatedOp::FileCreatedOp(BufReader& log) : DurOp(JEntry::OpCode_FileCreated) {
+            unsigned long long reserved;
+            log.read(reserved);
+            log.read(reserved);
+            log.read(_len); // size of file, not length of name
+            string s;
+            log.readStr(s);
+            _p._p = s;
+        }
+
+        void FileCreatedOp::_serialize(AlignedBuilder& ab) {
+            ab.appendNum((unsigned long long) 0); // reserved for future use
+            ab.appendNum((unsigned long long) 0); // reserved for future use
+            ab.appendNum(_len);
+            ab.appendStr(_p.toString());
+        }
+
+        string FileCreatedOp::toString() {
+            return str::stream() << "FileCreatedOp " << _p.toString() << ' ' << _len/1024.0/1024.0 << "MB";
+        }
+
+        // if an operation deletes or creates a file (or moves etc.), it may need files closed.
+        bool FileCreatedOp::needFilesClosed() {
+            return exists( _p.asFullPath() );
+        }
+
+        void FileCreatedOp::replay() {
+            // i believe the code assumes new files are filled with zeros.  thus we have to recreate the file,
+            // or rewrite at least, even if it were the right length.  perhaps one day we should change that
+            // although easier to avoid defects if we assume it is zeros perhaps.
+            string full = _p.asFullPath();
+            if( exists(full) ) {
+                try {
+                    remove(full);
+                }
+                catch(std::exception& e) {
+                    log(1) << "recover info FileCreateOp::replay unlink " << e.what() << endl;
+                }
+            }
+
+            log() << "recover create file " << full << ' ' << _len/1024.0/1024.0 << "MB" << endl;
+            if( MemoryMappedFile::exists(full) ) {
+                // first delete if exists.
+                try {
+                    remove(full);
+                }
+                catch(...) {
+                    log() << "warning could not delete file " << full << endl;
+                }
+            }
+            ensureParentDirCreated(full);
+            File f;
+            f.open(full.c_str());
+            massert(13547, str::stream() << "recover couldn't create file " << full, f.is_open());
+            unsigned long long left = _len;
+            const unsigned blksz = 64 * 1024;
+            scoped_array<char> v( new char[blksz] );
+            memset( v.get(), 0, blksz );
+            fileofs ofs = 0;
+            while( left ) {
+                unsigned long long w = left < blksz ? left : blksz;
+                f.write(ofs, v.get(), (unsigned) w);
+                left -= w;
+                ofs += w;
+            }
+            f.fsync();
+            massert(13628, str::stream() << "recover failure writing file " << full, !f.bad() );
+        }
+
+    }
+
+}
diff --git a/db/durop.h b/db/durop.h
new file mode 100644
index 0000000..c4574c2
--- /dev/null
+++ b/db/durop.h
@@ -0,0 +1,111 @@
+// @file durop.h class DurOp and descendants
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "dur_journalformat.h"
+#include "../util/bufreader.h"
+#include "../util/paths.h"
+
+namespace mongo {
+
+    class AlignedBuilder;
+
+    namespace dur {
+
+        const unsigned Alignment = 8192;
+
+        /** DurOp - Operations we journal that aren't just basic writes.
+         *
+         *  Basic writes are logged as JEntry's, and indicated in ram temporarily as struct dur::WriteIntent.
+         *  We don't make WriteIntent inherit from DurOp to keep it as lean as possible as there will be millions of
+         *  them (we don't want a vtable for example there).
+         *
+         *  For each op we want to journal, we define a subclass.
+         */
+        class DurOp { /* copyable */
+        public:
+            // @param opcode a sentinel value near max unsigned which uniquely identifies the operation.
+            // @see dur::JEntry
+            DurOp(unsigned opcode) : _opcode(opcode) { }
+
+            virtual ~DurOp() { }
+
+            /** serialize the op out to a builder which will then be written (presumably) to the journal */
+            void serialize(AlignedBuilder& ab);
+
+            /** read a durop from journal file referenced by br.
+                @param opcode the opcode which has already been written from the bufreader
+            */
+            static shared_ptr<DurOp> read(unsigned opcode, BufReader& br);
+
+            /** replay the operation (during recovery)
+                throws
+
+                For now, these are not replayed during the normal WRITETODATAFILES phase, since these
+                operations are handled in other parts of the code. At some point this may change.
+            */
+            virtual void replay() = 0;
+
+            virtual string toString() = 0;
+
+            /** if the op requires all file to be closed before doing its work, returns true. */
+            virtual bool needFilesClosed() { return false; }
+
+        protected:
+            /** DurOp will have already written the opcode for you */
+            virtual void _serialize(AlignedBuilder& ab) = 0;
+
+        private:
+            const unsigned _opcode;
+        };
+
+        /** indicates creation of a new file */
+        class FileCreatedOp : public DurOp {
+        public:
+            FileCreatedOp(BufReader& log);
+            /** param f filename to create with path */
+            FileCreatedOp(string f, unsigned long long l);
+            virtual void replay();
+            virtual string toString();
+            virtual bool needFilesClosed();
+        protected:
+            virtual void _serialize(AlignedBuilder& ab);
+        private:
+            RelativePath _p;
+            unsigned long long _len; // size of file, not length of name
+        };
+
+        /** record drop of a database */
+        class DropDbOp : public DurOp {
+        public:
+            DropDbOp(BufReader& log);
+            DropDbOp(string db) :
+                DurOp(JEntry::OpCode_DropDb), _db(db) { }
+            virtual void replay();
+            virtual string toString() { return string("DropDbOp ") + _db; }
+            virtual bool needFilesClosed() { return true; }
+        protected:
+            virtual void _serialize(AlignedBuilder& ab);
+        private:
+            string _db;
+        };
+
+    }
+
+}
diff --git a/db/extsort.cpp b/db/extsort.cpp
index 68e6b52..2e6d8d8 100644
--- a/db/extsort.cpp
+++ b/db/extsort.cpp
@@ -19,160 +19,160 @@
 #include "pch.h"
 
 #include "extsort.h"
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "../util/file.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 
 namespace mongo {
-    
+
     BSONObj BSONObjExternalSorter::extSortOrder;
     unsigned long long BSONObjExternalSorter::_compares = 0;
-    
+
     BSONObjExternalSorter::BSONObjExternalSorter( const BSONObj & order , long maxFileSize )
-        : _order( order.getOwned() ) , _maxFilesize( maxFileSize ) , 
-          _arraySize(1000000), _cur(0), _curSizeSoFar(0), _sorted(0){
-        
+        : _order( order.getOwned() ) , _maxFilesize( maxFileSize ) ,
+          _arraySize(1000000), _cur(0), _curSizeSoFar(0), _sorted(0) {
+
         stringstream rootpath;
         rootpath << dbpath;
         if ( dbpath[dbpath.size()-1] != '/' )
             rootpath << "/";
         rootpath << "_tmp/esort." << time(0) << "." << rand() << "/";
         _root = rootpath.str();
-        
+
         log(1) << "external sort root: " << _root.string() << endl;
 
         create_directories( _root );
         _compares = 0;
     }
-    
-    BSONObjExternalSorter::~BSONObjExternalSorter(){
-        if ( _cur ){
+
+    BSONObjExternalSorter::~BSONObjExternalSorter() {
+        if ( _cur ) {
             delete _cur;
             _cur = 0;
         }
-        
+
         unsigned long removed = remove_all( _root );
         wassert( removed == 1 + _files.size() );
     }
 
-    void BSONObjExternalSorter::_sortInMem(){
+    void BSONObjExternalSorter::_sortInMem() {
         // extSortComp needs to use glbals
         // qsort_r only seems available on bsd, which is what i really want to use
         dblock l;
         extSortOrder = _order;
         _cur->sort( BSONObjExternalSorter::extSortComp );
     }
-    
-    void BSONObjExternalSorter::sort(){
+
+    void BSONObjExternalSorter::sort() {
         uassert( 10048 ,  "already sorted" , ! _sorted );
-        
+
         _sorted = true;
 
-        if ( _cur && _files.size() == 0 ){
+        if ( _cur && _files.size() == 0 ) {
             _sortInMem();
             log(1) << "\t\t not using file.  size:" << _curSizeSoFar << " _compares:" << _compares << endl;
             return;
         }
-        
-        if ( _cur ){
+
+        if ( _cur ) {
             finishMap();
         }
-        
-        if ( _cur ){
+
+        if ( _cur ) {
             delete _cur;
             _cur = 0;
         }
-        
+
         if ( _files.size() == 0 )
             return;
-        
+
     }
 
-    void BSONObjExternalSorter::add( const BSONObj& o , const DiskLoc & loc ){
+    void BSONObjExternalSorter::add( const BSONObj& o , const DiskLoc & loc ) {
         uassert( 10049 ,  "sorted already" , ! _sorted );
-        
-        if ( ! _cur ){
+
+        if ( ! _cur ) {
             _cur = new InMemory( _arraySize );
         }
-        
+
         Data& d = _cur->getNext();
         d.first = o.getOwned();
         d.second = loc;
-        
+
         long size = o.objsize();
         _curSizeSoFar += size + sizeof( DiskLoc ) + sizeof( BSONObj );
-        
-        if (  _cur->hasSpace() == false ||  _curSizeSoFar > _maxFilesize ){
+
+        if (  _cur->hasSpace() == false ||  _curSizeSoFar > _maxFilesize ) {
             finishMap();
             log(1) << "finishing map" << endl;
         }
 
     }
-    
-    void BSONObjExternalSorter::finishMap(){
+
+    void BSONObjExternalSorter::finishMap() {
         uassert( 10050 ,  "bad" , _cur );
-        
+
         _curSizeSoFar = 0;
         if ( _cur->size() == 0 )
             return;
-        
+
         _sortInMem();
-        
+
         stringstream ss;
         ss << _root.string() << "/file." << _files.size();
         string file = ss.str();
-        
+
         ofstream out;
         out.open( file.c_str() , ios_base::out | ios_base::binary );
         assertStreamGood( 10051 ,  (string)"couldn't open file: " + file , out );
-        
+
         int num = 0;
-        for ( InMemory::iterator i=_cur->begin(); i != _cur->end(); ++i ){
+        for ( InMemory::iterator i=_cur->begin(); i != _cur->end(); ++i ) {
             Data p = *i;
             out.write( p.first.objdata() , p.first.objsize() );
             out.write( (char*)(&p.second) , sizeof( DiskLoc ) );
             num++;
         }
-        
+
         _cur->clear();
-        
+
         _files.push_back( file );
         out.close();
 
         log(2) << "Added file: " << file << " with " << num << "objects for external sort" << endl;
     }
-    
+
     // ---------------------------------
 
     BSONObjExternalSorter::Iterator::Iterator( BSONObjExternalSorter * sorter ) :
-        _cmp( sorter->_order ) , _in( 0 ){
-        
-        for ( list<string>::iterator i=sorter->_files.begin(); i!=sorter->_files.end(); i++ ){
+        _cmp( sorter->_order ) , _in( 0 ) {
+
+        for ( list<string>::iterator i=sorter->_files.begin(); i!=sorter->_files.end(); i++ ) {
             _files.push_back( new FileIterator( *i ) );
             _stash.push_back( pair<Data,bool>( Data( BSONObj() , DiskLoc() ) , false ) );
         }
-        
-        if ( _files.size() == 0 && sorter->_cur ){
+
+        if ( _files.size() == 0 && sorter->_cur ) {
             _in = sorter->_cur;
             _it = sorter->_cur->begin();
         }
 
-        
+
     }
-    
-    BSONObjExternalSorter::Iterator::~Iterator(){
+
+    BSONObjExternalSorter::Iterator::~Iterator() {
         for ( vector<FileIterator*>::iterator i=_files.begin(); i!=_files.end(); i++ )
             delete *i;
         _files.clear();
     }
-    
-    bool BSONObjExternalSorter::Iterator::more(){
+
+    bool BSONObjExternalSorter::Iterator::more() {
 
         if ( _in )
             return _it != _in->end();
-        
+
         for ( vector<FileIterator*>::iterator i=_files.begin(); i!=_files.end(); i++ )
             if ( (*i)->more() )
                 return true;
@@ -181,34 +181,34 @@ namespace mongo {
                 return true;
         return false;
     }
-        
-    BSONObjExternalSorter::Data BSONObjExternalSorter::Iterator::next(){
-        
-        if ( _in ){
+
+    BSONObjExternalSorter::Data BSONObjExternalSorter::Iterator::next() {
+
+        if ( _in ) {
             Data& d = *_it;
             ++_it;
             return d;
         }
-        
+
         Data best;
         int slot = -1;
-        
-        for ( unsigned i=0; i<_stash.size(); i++ ){
 
-            if ( ! _stash[i].second ){
+        for ( unsigned i=0; i<_stash.size(); i++ ) {
+
+            if ( ! _stash[i].second ) {
                 if ( _files[i]->more() )
                     _stash[i] = pair<Data,bool>( _files[i]->next() , true );
                 else
                     continue;
             }
-            
-            if ( slot == -1 || _cmp( best , _stash[i].first ) == 0 ){
+
+            if ( slot == -1 || _cmp( best , _stash[i].first ) == 0 ) {
                 best = _stash[i].first;
                 slot = i;
             }
-                
+
         }
-        
+
         assert( slot >= 0 );
         _stash[slot].second = false;
 
@@ -216,27 +216,26 @@ namespace mongo {
     }
 
     // -----------------------------------
-    
-    BSONObjExternalSorter::FileIterator::FileIterator( string file ){
-        long length;
+
+    BSONObjExternalSorter::FileIterator::FileIterator( string file ) {
+        unsigned long long length;
         _buf = (char*)_file.map( file.c_str() , length , MemoryMappedFile::SEQUENTIAL );
         massert( 10308 ,  "mmap failed" , _buf );
-        assert( (unsigned long long)length == (unsigned long long)file_size( file ) );
+        assert( length == (unsigned long long) file_size( file ) );
         _end = _buf + length;
     }
-    BSONObjExternalSorter::FileIterator::~FileIterator(){
-    }
-    
-    bool BSONObjExternalSorter::FileIterator::more(){
+    BSONObjExternalSorter::FileIterator::~FileIterator() {}
+
+    bool BSONObjExternalSorter::FileIterator::more() {
         return _buf < _end;
     }
-    
-    BSONObjExternalSorter::Data BSONObjExternalSorter::FileIterator::next(){
+
+    BSONObjExternalSorter::Data BSONObjExternalSorter::FileIterator::next() {
         BSONObj o( _buf );
         _buf += o.objsize();
         DiskLoc * l = (DiskLoc*)_buf;
         _buf += 8;
         return Data( o , *l );
     }
-    
+
 }
diff --git a/db/extsort.h b/db/extsort.h
index fa0eca4..c0791db 100644
--- a/db/extsort.h
+++ b/db/extsort.h
@@ -20,8 +20,8 @@
 
 #include "../pch.h"
 #include "jsobj.h"
-#include "namespace.h"
-#include "curop.h"
+#include "namespace-inl.h"
+#include "curop-inl.h"
 #include "../util/array.h"
 
 namespace mongo {
@@ -32,13 +32,13 @@ namespace mongo {
      */
     class BSONObjExternalSorter : boost::noncopyable {
     public:
-        
+
         typedef pair<BSONObj,DiskLoc> Data;
 
     private:
         static BSONObj extSortOrder;
 
-        static int extSortComp( const void *lv, const void *rv ){
+        static int extSortComp( const void *lv, const void *rv ) {
             RARELY killCurrentOp.checkForInterrupt();
             _compares++;
             Data * l = (Data*)lv;
@@ -54,7 +54,7 @@ namespace mongo {
             FileIterator( string file );
             ~FileIterator();
             bool more();
-            Data next();            
+            Data next();
         private:
             MemoryMappedFile _file;
             char * _buf;
@@ -63,7 +63,7 @@ namespace mongo {
 
         class MyCmp {
         public:
-            MyCmp( const BSONObj & order = BSONObj() ) : _order( order ){}
+            MyCmp( const BSONObj & order = BSONObj() ) : _order( order ) {}
             bool operator()( const Data &l, const Data &r ) const {
                 RARELY killCurrentOp.checkForInterrupt();
                 _compares++;
@@ -78,50 +78,50 @@ namespace mongo {
         };
 
     public:
-        
+
         typedef FastArray<Data> InMemory;
 
         class Iterator : boost::noncopyable {
         public:
-            
+
             Iterator( BSONObjExternalSorter * sorter );
             ~Iterator();
             bool more();
             Data next();
-            
+
         private:
             MyCmp _cmp;
             vector<FileIterator*> _files;
             vector< pair<Data,bool> > _stash;
-            
+
             InMemory * _in;
             InMemory::iterator _it;
-            
+
         };
-        
+
         BSONObjExternalSorter( const BSONObj & order = BSONObj() , long maxFileSize = 1024 * 1024 * 100 );
         ~BSONObjExternalSorter();
-        
+
         void add( const BSONObj& o , const DiskLoc & loc );
-        void add( const BSONObj& o , int a , int b ){
+        void add( const BSONObj& o , int a , int b ) {
             add( o , DiskLoc( a , b ) );
         }
 
         /* call after adding values, and before fetching the iterator */
         void sort();
-        
-        auto_ptr<Iterator> iterator(){
+
+        auto_ptr<Iterator> iterator() {
             uassert( 10052 ,  "not sorted" , _sorted );
             return auto_ptr<Iterator>( new Iterator( this ) );
         }
-        
-        int numFiles(){
+
+        int numFiles() {
             return _files.size();
         }
-        
-        long getCurSizeSoFar(){ return _curSizeSoFar; }
 
-        void hintNumObjects( long long numObjects ){
+        long getCurSizeSoFar() { return _curSizeSoFar; }
+
+        void hintNumObjects( long long numObjects ) {
             if ( numObjects < _arraySize )
                 _arraySize = (int)(numObjects + 100);
         }
@@ -129,18 +129,18 @@ namespace mongo {
     private:
 
         void _sortInMem();
-        
+
         void sort( string file );
         void finishMap();
-        
+
         BSONObj _order;
         long _maxFilesize;
         path _root;
-        
+
         int _arraySize;
         InMemory * _cur;
         long _curSizeSoFar;
-        
+
         list<string> _files;
         bool _sorted;
 
diff --git a/db/filever.h b/db/filever.h
index 4aa18d4..e89a824 100644
--- a/db/filever.h
+++ b/db/filever.h
@@ -20,11 +20,11 @@
 
 namespace mongo {
 
-inline void checkDataFileVersion(NamespaceDetails& d) { 
-}
+    inline void checkDataFileVersion(NamespaceDetails& d) {
+    }
 
-inline void checkIndexFileVersion(NamespaceDetails& d) { 
-}
+    inline void checkIndexFileVersion(NamespaceDetails& d) {
+    }
 
 }
 
diff --git a/db/geo/2d.cpp b/db/geo/2d.cpp
index 60818fc..934ee80 100644
--- a/db/geo/2d.cpp
+++ b/db/geo/2d.cpp
@@ -17,14 +17,14 @@
 */
 
 #include "pch.h"
-#include "../namespace.h"
+#include "../namespace-inl.h"
 #include "../jsobj.h"
 #include "../index.h"
 #include "../../util/unittest.h"
 #include "../commands.h"
 #include "../pdfile.h"
 #include "../btree.h"
-#include "../curop.h"
+#include "../curop-inl.h"
 #include "../matcher.h"
 
 #include "core.h"
@@ -33,7 +33,8 @@ namespace mongo {
 
 #if 0
 # define GEODEBUG(x) cout << x << endl;
-    inline void PREFIXDEBUG(GeoHash prefix, const GeoConvert* g){
+# define GEODEBUGPRINT(x) PRINT(x)
+    inline void PREFIXDEBUG(GeoHash prefix, const GeoConvert* g) {
         if (!prefix.constrains()) {
             cout << "\t empty prefix" << endl;
             return ;
@@ -46,18 +47,29 @@ namespace mongo {
         Point center ( (ll._x+tr._x)/2, (ll._y+tr._y)/2 );
         double radius = fabs(ll._x - tr._x) / 2;
 
-        cout << "\t ll: " << ll.toString() << " tr: " << tr.toString() 
+        cout << "\t ll: " << ll.toString() << " tr: " << tr.toString()
              << " center: " << center.toString() << " radius: " << radius << endl;
 
     }
 #else
-# define GEODEBUG(x) 
-# define PREFIXDEBUG(x, y) 
+# define GEODEBUG(x)
+# define GEODEBUGPRINT(x)
+# define PREFIXDEBUG(x, y)
 #endif
 
-    double EARTH_RADIUS_KM = 6371;
-    double EARTH_RADIUS_MILES = EARTH_RADIUS_KM * 0.621371192;
+    const double EARTH_RADIUS_KM = 6371;
+    const double EARTH_RADIUS_MILES = EARTH_RADIUS_KM * 0.621371192;
 
+    enum GeoDistType {
+        GEO_PLAIN,
+        GEO_SPHERE
+    };
+
+    inline double computeXScanDistance(double y, double maxDistDegrees) {
+        // TODO: this overestimates for large madDistDegrees far from the equator
+        return maxDistDegrees / min(cos(deg2rad(min(+89.0, y + maxDistDegrees))),
+                                    cos(deg2rad(max(-89.0, y - maxDistDegrees))));
+    }
 
     GeoBitSets geoBitSets;
 
@@ -66,14 +78,14 @@ namespace mongo {
     class Geo2dType : public IndexType , public GeoConvert {
     public:
         Geo2dType( const IndexPlugin * plugin , const IndexSpec* spec )
-            : IndexType( plugin , spec ){
-            
+            : IndexType( plugin , spec ) {
+
             BSONObjBuilder orderBuilder;
 
             BSONObjIterator i( spec->keyPattern );
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
-                if ( e.type() == String && GEO2DNAME == e.valuestr() ){
+                if ( e.type() == String && GEO2DNAME == e.valuestr() ) {
                     uassert( 13022 , "can't have 2 geo field" , _geo.size() == 0 );
                     uassert( 13023 , "2d has to be first in index" , _other.size() == 0 );
                     _geo = e.fieldName();
@@ -83,16 +95,16 @@ namespace mongo {
                 }
                 orderBuilder.append( "" , 1 );
             }
-            
+
             uassert( 13024 , "no geo field specified" , _geo.size() );
-            
+
             _bits = _configval( spec , "bits" , 26 ); // for lat/long, ~ 1ft
 
             uassert( 13028 , "can't have more than 32 bits in geo index" , _bits <= 32 );
 
             _max = _configval( spec , "max" , 180 );
             _min = _configval( spec , "min" , -180 );
-            
+
             _scaling = (1024*1024*1024*4.0)/(_max-_min);
 
             _order = orderBuilder.obj();
@@ -103,30 +115,30 @@ namespace mongo {
             _error = distance(a, b);
         }
 
-        int _configval( const IndexSpec* spec , const string& name , int def ){
+        int _configval( const IndexSpec* spec , const string& name , int def ) {
             BSONElement e = spec->info[name];
             if ( e.isNumber() )
                 return e.numberInt();
             return def;
         }
 
-        ~Geo2dType(){
-            
+        ~Geo2dType() {
+
         }
 
-        virtual BSONObj fixKey( const BSONObj& in ) { 
+        virtual BSONObj fixKey( const BSONObj& in ) {
             if ( in.firstElement().type() == BinData )
                 return in;
 
             BSONObjBuilder b(in.objsize()+16);
-            
+
             if ( in.firstElement().isABSONObj() )
                 _hash( in.firstElement().embeddedObject() ).append( b , "" );
             else if ( in.firstElement().type() == String )
                 GeoHash( in.firstElement().valuestr() ).append( b , "" );
             else if ( in.firstElement().type() == RegEx )
                 GeoHash( in.firstElement().regex() ).append( b , "" );
-            else 
+            else
                 return in;
 
             BSONObjIterator i(in);
@@ -152,19 +164,44 @@ namespace mongo {
 
             _hash( embed ).append( b , "" );
 
-            for ( size_t i=0; i<_other.size(); i++ ){
-                BSONElement e = obj[_other[i]];
-                if ( e.eoo() )
-                    e = _spec->missingField();
-                b.appendAs( e , "" );
-            }
+            // Go through all the other index keys
+            for ( vector<string>::const_iterator i = _other.begin(); i != _other.end(); ++i ){
+
+            	// Get *all* fields for the index key
+				BSONElementSet eSet;
+				obj.getFieldsDotted( *i, eSet );
+
+
+				if ( eSet.size() == 0 )
+					b.appendAs( _spec->missingField(), "" );
+				else if ( eSet.size() == 1 )
+					b.appendAs( *(eSet.begin()), "" );
+				else{
+
+					// If we have more than one key, store as an array of the objects
+					// TODO:  Store multiple keys?
+
+					BSONArrayBuilder aBuilder;
+
+					for( BSONElementSet::iterator ei = eSet.begin(); ei != eSet.end(); ++ei ){
+						aBuilder.append( *ei );
+					}
+
+					BSONArray arr = aBuilder.arr();
+
+					b.append( "", arr );
+
+				}
+
+			}
+
             keys.insert( b.obj() );
         }
-        
+
         GeoHash _tohash( const BSONElement& e ) const {
             if ( e.isABSONObj() )
                 return _hash( e.embeddedObject() );
-            
+
             return GeoHash( e , _bits );
         }
 
@@ -174,7 +211,7 @@ namespace mongo {
             BSONElement x = i.next();
             uassert( 13068 , "geo field only has 1 element" , i.more() );
             BSONElement y = i.next();
-            
+
             uassert( 13026 , "geo values have to be numbers: " + o.toString() , x.isNumber() && y.isNumber() );
 
             return hash( x.number() , y.number() );
@@ -192,33 +229,33 @@ namespace mongo {
             b.append( "y" , _unconvert( y ) );
             return b.obj();
         }
-        
+
         unsigned _convert( double in ) const {
             uassert( 13027 , "point not in range" , in <= (_max + _error) && in >= (_min - _error) );
             in -= _min;
             assert( in > 0 );
             return (unsigned)(in * _scaling);
         }
-        
+
         double _unconvert( unsigned in ) const {
             double x = in;
             x /= _scaling;
             x += _min;
             return x;
         }
-        
+
         void unhash( const GeoHash& h , double& x , double& y ) const {
             unsigned a,b;
             h.unhash(a,b);
             x = _unconvert( a );
             y = _unconvert( b );
         }
-        
+
         double distance( const GeoHash& a , const GeoHash& b ) const {
             double ax,ay,bx,by;
             unhash( a , ax , ay );
             unhash( b , bx , by );
-            
+
             double dx = bx - ax;
             double dy = by - ay;
 
@@ -237,6 +274,11 @@ namespace mongo {
             b.move( 1 , 1 );
             unhash( a, ax, ay );
             unhash( b, bx, by );
+
+            // _min and _max are a singularity
+            if (bx == _min)
+                bx = _max;
+
             return (fabs(ax-bx));
         }
 
@@ -248,10 +290,10 @@ namespace mongo {
 
         virtual IndexSuitability suitability( const BSONObj& query , const BSONObj& order ) const {
             BSONElement e = query.getFieldDotted(_geo.c_str());
-            switch ( e.type() ){
+            switch ( e.type() ) {
             case Object: {
                 BSONObj sub = e.embeddedObject();
-                switch ( sub.firstElement().getGtLtOp() ){
+                switch ( sub.firstElement().getGtLtOp() ) {
                 case BSONObj::opNEAR:
                 case BSONObj::opWITHIN:
                     return OPTIMAL;
@@ -259,6 +301,9 @@ namespace mongo {
                 }
             }
             case Array:
+            	// Non-geo index data is stored in a non-standard way, cannot use for exact lookups with
+            	// additional criteria
+            	if ( query.nFields() > 1 ) return USELESS;
                 return HELPFUL;
             default:
                 return USELESS;
@@ -267,7 +312,7 @@ namespace mongo {
 
         string _geo;
         vector<string> _other;
-        
+
         unsigned _bits;
         int _max;
         int _min;
@@ -279,38 +324,38 @@ namespace mongo {
 
     class Box {
     public:
-        
+
         Box( const Geo2dType * g , const GeoHash& hash )
-            : _min( g , hash ) , 
-              _max( _min._x + g->sizeEdge( hash ) , _min._y + g->sizeEdge( hash ) ){
+            : _min( g , hash ) ,
+              _max( _min._x + g->sizeEdge( hash ) , _min._y + g->sizeEdge( hash ) ) {
         }
-        
+
         Box( double x , double y , double size )
-            : _min( x , y ) , 
-              _max( x + size , y + size ){
+            : _min( x , y ) ,
+              _max( x + size , y + size ) {
         }
 
         Box( Point min , Point max )
-            : _min( min ) , _max( max ){
+            : _min( min ) , _max( max ) {
         }
 
-        Box(){}
+        Box() {}
 
         string toString() const {
             StringBuilder buf(64);
             buf << _min.toString() << " -->> " << _max.toString();
             return buf.str();
         }
-        
+
         bool between( double min , double max , double val , double fudge=0) const {
             return val + fudge >= min && val <= max + fudge;
         }
-        
+
         bool mid( double amin , double amax , double bmin , double bmax , bool min , double& res ) const {
             assert( amin <= amax );
             assert( bmin <= bmax );
 
-            if ( amin < bmin ){
+            if ( amin < bmin ) {
                 if ( amax < bmin )
                     return false;
                 res = min ? bmin : amax;
@@ -323,16 +368,16 @@ namespace mongo {
         }
 
         double intersects( const Box& other ) const {
-            
+
             Point boundMin(0,0);
             Point boundMax(0,0);
-            
+
             if ( mid( _min._x , _max._x , other._min._x , other._max._x , true , boundMin._x ) == false ||
-                 mid( _min._x , _max._x , other._min._x , other._max._x , false , boundMax._x ) == false ||
-                 mid( _min._y , _max._y , other._min._y , other._max._y , true , boundMin._y ) == false ||
-                 mid( _min._y , _max._y , other._min._y , other._max._y , false , boundMax._y ) == false )
+                    mid( _min._x , _max._x , other._min._x , other._max._x , false , boundMax._x ) == false ||
+                    mid( _min._y , _max._y , other._min._y , other._max._y , true , boundMin._y ) == false ||
+                    mid( _min._y , _max._y , other._min._y , other._max._y , false , boundMax._y ) == false )
                 return 0;
-            
+
             Box intersection( boundMin , boundMax );
 
             return intersection.area() / ( ( area() + other.area() ) / 2 );
@@ -347,45 +392,49 @@ namespace mongo {
                           ( _min._y + _max._y ) / 2 );
         }
 
-        bool inside( Point p , double fudge = 0 ){
+        bool inside( Point p , double fudge = 0 ) {
             bool res = inside( p._x , p._y , fudge );
             //cout << "is : " << p.toString() << " in " << toString() << " = " << res << endl;
             return res;
         }
-        
-        bool inside( double x , double y , double fudge = 0 ){
-            return 
+
+        bool inside( double x , double y , double fudge = 0 ) {
+            return
                 between( _min._x , _max._x  , x , fudge ) &&
                 between( _min._y , _max._y  , y , fudge );
         }
-        
+
+        bool contains(const Box& other, double fudge=0) {
+            return inside(other._min, fudge) && inside(other._max, fudge);
+        }
+
         Point _min;
         Point _max;
     };
-    
+
     class Geo2dPlugin : public IndexPlugin {
     public:
-        Geo2dPlugin() : IndexPlugin( GEO2DNAME ){
+        Geo2dPlugin() : IndexPlugin( GEO2DNAME ) {
         }
-        
+
         virtual IndexType* generate( const IndexSpec* spec ) const {
             return new Geo2dType( this , spec );
         }
     } geo2dplugin;
-    
+
     struct GeoUnitTest : public UnitTest {
-        
-        int round( double d ){
+
+        int round( double d ) {
             return (int)(.5+(d*1000));
         }
-        
+
 #define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); }
 
-        void run(){
+        void run() {
             assert( ! GeoHash::isBitSet( 0 , 0 ) );
             assert( ! GeoHash::isBitSet( 0 , 31 ) );
             assert( GeoHash::isBitSet( 1 , 31 ) );
-            
+
             IndexSpec i( BSON( "loc" << "2d" ) );
             Geo2dType g( &geo2dplugin , &i );
             {
@@ -411,7 +460,7 @@ namespace mongo {
                 assert( round( in["x"].number() ) == round( out["x"].number() ) );
                 assert( round( in["y"].number() ) == round( out["y"].number() ) );
             }
-            
+
             {
                 GeoHash h( "0000" );
                 h.move( 0 , 1 );
@@ -424,13 +473,13 @@ namespace mongo {
                 GEOHEQ( h , "0100" );
                 h.move( 0 , -1 );
                 GEOHEQ( h , "0001" );
-                
+
 
                 h.init( "0000" );
                 h.move( 1 , 0 );
                 GEOHEQ( h , "0010" );
             }
-            
+
             {
                 Box b( 5 , 5 , 2 );
                 assert( "(5,5) -->> (7,7)" == b.toString() );
@@ -444,7 +493,7 @@ namespace mongo {
                 b = g.hash( 42 , 44 );
                 assert( round(10) == round(g.distance( a , b )) );
             }
-            
+
             {
                 GeoHash x("0000");
                 assert( 0 == x.getHash() );
@@ -454,7 +503,7 @@ namespace mongo {
                 assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) );
                 assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) );
             }
-               
+
             {
                 GeoHash x("1010");
                 GEOHEQ( x , "1010" );
@@ -462,8 +511,8 @@ namespace mongo {
                 GEOHEQ( y , "101001" );
             }
 
-            { 
-                
+            {
+
                 GeoHash a = g.hash( 5 , 5 );
                 GeoHash b = g.hash( 5 , 7 );
                 GeoHash c = g.hash( 100 , 100 );
@@ -509,13 +558,13 @@ namespace mongo {
                 assert( entry.hasPrefix( GeoHash( "1100" ) ) );
                 assert( entry.hasPrefix( prefix ) );
             }
-            
+
             {
                 GeoHash a = g.hash( 50 , 50 );
                 GeoHash b = g.hash( 48 , 54 );
                 assert( round( 4.47214 ) == round( g.distance( a , b ) ) );
             }
-            
+
 
             {
                 Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) );
@@ -534,7 +583,7 @@ namespace mongo {
                 int N = 10000;
                 {
                     Timer t;
-                    for ( int i=0; i<N; i++ ){
+                    for ( int i=0; i<N; i++ ) {
                         unsigned x = (unsigned)rand();
                         unsigned y = (unsigned)rand();
                         GeoHash h( x , y );
@@ -548,7 +597,7 @@ namespace mongo {
 
                 {
                     Timer t;
-                    for ( int i=0; i<N; i++ ){
+                    for ( int i=0; i<N; i++ ) {
                         unsigned x = (unsigned)rand();
                         unsigned y = (unsigned)rand();
                         GeoHash h( x , y );
@@ -579,7 +628,7 @@ namespace mongo {
                 {
                     Point BNA (-1.5127, 0.6304);
                     Point LAX (-2.0665, 0.5924);
-                    
+
                     double dist1 = spheredist_rad(BNA, LAX);
                     double dist2 = spheredist_rad(LAX, BNA);
 
@@ -590,26 +639,42 @@ namespace mongo {
                 {
                     Point JFK (-73.77694444, 40.63861111 );
                     Point LAX (-118.40, 33.94);
-                    
+
                     double dist = spheredist_deg(JFK, LAX) * EARTH_RADIUS_MILES;
                     assert( dist > 2469 && dist < 2470 );
                 }
 
+                {
+                    Point BNA (-86.67, 36.12);
+                    Point LAX (-118.40, 33.94);
+                    Point JFK (-73.77694444, 40.63861111 );
+                    assert( spheredist_deg(BNA, BNA) < 1e-6);
+                    assert( spheredist_deg(LAX, LAX) < 1e-6);
+                    assert( spheredist_deg(JFK, JFK) < 1e-6);
+
+                    Point zero (0, 0);
+                    Point antizero (0,-180);
+
+                    // these were known to cause NaN
+                    assert( spheredist_deg(zero, zero) < 1e-6);
+                    assert( fabs(M_PI-spheredist_deg(zero, antizero)) < 1e-6);
+                    assert( fabs(M_PI-spheredist_deg(antizero, zero)) < 1e-6);
+                }
             }
         }
     } geoUnitTest;
-    
+
     class GeoPoint {
     public:
-        GeoPoint(){
+        GeoPoint() {
         }
 
         GeoPoint( const KeyNode& node , double distance )
-            : _key( node.key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _distance( distance ){
+            : _key( node.key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _distance( distance ) {
         }
 
         GeoPoint( const BSONObj& key , DiskLoc loc , double distance )
-            : _key(key) , _loc(loc) , _o( loc.obj() ) , _distance( distance ){
+            : _key(key) , _loc(loc) , _o( loc.obj() ) , _distance( distance ) {
         }
 
         bool operator<( const GeoPoint& other ) const {
@@ -630,44 +695,44 @@ namespace mongo {
     public:
         GeoAccumulator( const Geo2dType * g , const BSONObj& filter )
             : _g(g) , _lookedAt(0) , _objectsLoaded(0) , _found(0) {
-            if ( ! filter.isEmpty() ){
+            if ( ! filter.isEmpty() ) {
                 _matcher.reset( new CoveredIndexMatcher( filter , g->keyPattern() ) );
             }
         }
 
-        virtual ~GeoAccumulator(){
+        virtual ~GeoAccumulator() {
         }
 
-        virtual void add( const KeyNode& node ){
+        virtual void add( const KeyNode& node ) {
             // when looking at other boxes, don't want to look at some object twice
             pair<set<DiskLoc>::iterator,bool> seenBefore = _seen.insert( node.recordLoc );
-            if ( ! seenBefore.second ){
+            if ( ! seenBefore.second ) {
                 GEODEBUG( "\t\t\t\t already seen : " << node.recordLoc.obj()["_id"] );
                 return;
             }
             _lookedAt++;
-            
+
             // distance check
             double d = 0;
-            if ( ! checkDistance( GeoHash( node.key.firstElement() ) , d ) ){
+            if ( ! checkDistance( GeoHash( node.key.firstElement() ) , d ) ) {
                 GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj()  << "\t" << d );
                 return;
-            } 
+            }
             GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj()  << "\t" << d );
-            
+
             // matcher
             MatchDetails details;
-            if ( _matcher.get() ){
+            if ( _matcher.get() ) {
                 bool good = _matcher->matches( node.key , node.recordLoc , &details );
                 if ( details.loadedObject )
                     _objectsLoaded++;
-                
-                if ( ! good ){
+
+                if ( ! good ) {
                     GEODEBUG( "\t\t\t\t didn't match : " << node.recordLoc.obj()["_id"] );
                     return;
                 }
             }
-            
+
             if ( ! details.loadedObject ) // dont double count
                 _objectsLoaded++;
 
@@ -681,7 +746,7 @@ namespace mongo {
         long long found() const {
             return _found;
         }
-        
+
         const Geo2dType * _g;
         set<DiskLoc> _seen;
         auto_ptr<CoveredIndexMatcher> _matcher;
@@ -690,82 +755,96 @@ namespace mongo {
         long long _objectsLoaded;
         long long _found;
     };
-    
+
     class GeoHopper : public GeoAccumulator {
     public:
         typedef multiset<GeoPoint> Holder;
 
-        GeoHopper( const Geo2dType * g , unsigned max , const GeoHash& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() )
-            : GeoAccumulator( g , filter ) , _max( max ) , _near( n ), _maxDistance( maxDistance ) {
-            _farthest = -1;
-        }
+        GeoHopper( const Geo2dType * g , unsigned max , const Point& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN)
+            : GeoAccumulator( g , filter ) , _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _farthest(-1)
+        {}
 
-        virtual bool checkDistance( const GeoHash& h , double& d ){
-            d = _g->distance( _near , h );
+        virtual bool checkDistance( const GeoHash& h , double& d ) {
+            switch (_type) {
+            case GEO_PLAIN:
+                d = _near.distance( Point(_g, h) );
+                break;
+            case GEO_SPHERE:
+                d = spheredist_deg(_near, Point(_g, h));
+                break;
+            default:
+                assert(0);
+            }
             bool good = d < _maxDistance && ( _points.size() < _max || d < farthest() );
-            GEODEBUG( "\t\t\t\t\t\t\t checkDistance " << _near << "\t" << h << "\t" << d 
+            GEODEBUG( "\t\t\t\t\t\t\t checkDistance " << _near.toString() << "\t" << h << "\t" << d
                       << " ok: " << good << " farthest: " << farthest() );
             return good;
         }
-        
-        virtual void addSpecific( const KeyNode& node , double d ){
+
+        virtual void addSpecific( const KeyNode& node , double d ) {
             GEODEBUG( "\t\t" << GeoHash( node.key.firstElement() ) << "\t" << node.recordLoc.obj() << "\t" << d );
             _points.insert( GeoPoint( node.key , node.recordLoc , d ) );
-            if ( _points.size() > _max ){
+            if ( _points.size() > _max ) {
                 _points.erase( --_points.end() );
-            }
 
-            Holder::iterator i = _points.end();
-            i--;
-            _farthest = i->_distance;
+                Holder::iterator i = _points.end();
+                i--;
+                _farthest = i->_distance;
+            }
+            else {
+                if (d > _farthest)
+                    _farthest = d;
+            }
         }
 
         double farthest() const {
             return _farthest;
         }
 
+
         unsigned _max;
-        GeoHash _near;
+        Point _near;
         Holder _points;
         double _maxDistance;
+        GeoDistType _type;
         double _farthest;
     };
-    
+
     struct BtreeLocation {
         int pos;
         bool found;
         DiskLoc bucket;
-        
-        BSONObj key(){
+
+        BSONObj key() {
             if ( bucket.isNull() )
                 return BSONObj();
             return bucket.btree()->keyNode( pos ).key;
         }
-        
-        bool hasPrefix( const GeoHash& hash ){
+
+        bool hasPrefix( const GeoHash& hash ) {
             BSONElement e = key().firstElement();
             if ( e.eoo() )
                 return false;
             return GeoHash( e ).hasPrefix( hash );
         }
-        
-        bool advance( int direction , int& totalFound , GeoAccumulator* all ){
+
+        bool advance( int direction , int& totalFound , GeoAccumulator* all ) {
 
             if ( bucket.isNull() )
                 return false;
             bucket = bucket.btree()->advance( bucket , pos , direction , "btreelocation" );
-            
+
             if ( all )
                 return checkCur( totalFound , all );
-            
+
             return ! bucket.isNull();
         }
 
-        bool checkCur( int& totalFound , GeoAccumulator* all ){
+        bool checkCur( int& totalFound , GeoAccumulator* all ) {
             if ( bucket.isNull() )
                 return false;
 
-            if ( bucket.btree()->isUsed(pos) ){
+            if ( bucket.btree()->isUsed(pos) ) {
                 totalFound++;
                 all->add( bucket.btree()->keyNode( pos ) );
             }
@@ -776,51 +855,65 @@ namespace mongo {
             return true;
         }
 
-        string toString(){
+        string toString() {
             stringstream ss;
             ss << "bucket: " << bucket.toString() << " pos: " << pos << " found: " << found;
             return ss.str();
         }
 
-        static bool initial( const IndexDetails& id , const Geo2dType * spec , 
-                             BtreeLocation& min , BtreeLocation&  max , 
+        static bool initial( const IndexDetails& id , const Geo2dType * spec ,
+                             BtreeLocation& min , BtreeLocation&  max ,
                              GeoHash start ,
-                             int & found , GeoAccumulator * hopper )
-        {
-            
+                             int & found , GeoAccumulator * hopper ) {
+
             Ordering ordering = Ordering::make(spec->_order);
 
-            min.bucket = id.head.btree()->locate( id , id.head , start.wrap() , 
+            min.bucket = id.head.btree()->locate( id , id.head , start.wrap() ,
                                                   ordering , min.pos , min.found , minDiskLoc );
-            min.checkCur( found , hopper );
+            if (hopper) min.checkCur( found , hopper );
             max = min;
-            
-            if ( min.bucket.isNull() || ( !(hopper->found()) ) ){
-                min.bucket = id.head.btree()->locate( id , id.head , start.wrap() , 
+
+            if ( min.bucket.isNull() || ( hopper && !(hopper->found()) ) ) {
+                min.bucket = id.head.btree()->locate( id , id.head , start.wrap() ,
                                                       ordering , min.pos , min.found , minDiskLoc , -1 );
-                min.checkCur( found , hopper );
+                if (hopper) min.checkCur( found , hopper );
             }
-            
+
             return ! min.bucket.isNull() || ! max.bucket.isNull();
         }
     };
 
     class GeoSearch {
     public:
-        GeoSearch( const Geo2dType * g , const GeoHash& n , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() )
-            : _spec( g ) , _n( n ) , _start( n ) ,
+        GeoSearch( const Geo2dType * g , const GeoHash& n , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN)
+            : _spec( g ) ,_startPt(g,n), _start( n ) ,
               _numWanted( numWanted ) , _filter( filter ) , _maxDistance( maxDistance ) ,
-              _hopper( new GeoHopper( g , numWanted , n , filter , maxDistance ) )
-        {
+              _hopper( new GeoHopper( g , numWanted , _startPt , filter , maxDistance, type ) ), _type(type) {
             assert( g->getDetails() );
             _nscanned = 0;
             _found = 0;
+
+            if (type == GEO_PLAIN) {
+                _scanDistance = maxDistance;
+            }
+            else if (type == GEO_SPHERE) {
+                if (maxDistance == numeric_limits<double>::max()) {
+                    _scanDistance = maxDistance;
+                }
+                else {
+                    //TODO: consider splitting into x and y scan distances
+                    _scanDistance = computeXScanDistance(_startPt._y, rad2deg(maxDistance));
+                }
+            }
+            else {
+                assert(0);
+            }
         }
-        
-        void exec(){
+
+        void exec() {
             const IndexDetails& id = *_spec->getDetails();
-            
-            BtreeBucket * head = id.head.btree();
+
+            const BtreeBucket * head = id.head.btree();
             assert( head );
             /*
              * Search algorithm
@@ -829,144 +922,185 @@ namespace mongo {
              * 3) find optimal set of boxes that complete circle
              * 4) use regular btree cursors to scan those boxes
              */
-            
+
             GeoHopper * hopper = _hopper.get();
 
             _prefix = _start;
-            { // 1 regular geo hash algorithm
-                
+            BtreeLocation min,max;
+            {
+                // 1 regular geo hash algorithm
+
 
-                BtreeLocation min,max;
-                if ( ! BtreeLocation::initial( id , _spec , min , max , _n , _found , hopper ) )
+                if ( ! BtreeLocation::initial( id , _spec , min , max , _start , _found , NULL ) )
                     return;
-                
-                while ( _hopper->found() < _numWanted ){
+
+                while ( !_prefix.constrains() || // if next pass would cover universe, just keep going
+                        ( _hopper->found() < _numWanted && _spec->sizeEdge( _prefix ) <= _scanDistance)) {
                     GEODEBUG( _prefix << "\t" << _found << "\t DESC" );
-                    while ( min.hasPrefix( _prefix ) && min.advance( -1 , _found , hopper ) )
+                    while ( min.hasPrefix(_prefix) && min.checkCur(_found, hopper) && min.advance(-1, _found, NULL) )
                         _nscanned++;
                     GEODEBUG( _prefix << "\t" << _found << "\t ASC" );
-                    while ( max.hasPrefix( _prefix ) && max.advance( 1 , _found , hopper ) )
+                    while ( max.hasPrefix(_prefix) && max.checkCur(_found, hopper) && max.advance(+1, _found, NULL) )
                         _nscanned++;
-                    if ( ! _prefix.constrains() )
-                        break;
+
+                    if ( ! _prefix.constrains() ) {
+                        GEODEBUG( "done search w/o part 2" )
+                        return;
+                    }
+
+                    _alreadyScanned = Box(_spec, _prefix);
                     _prefix = _prefix.up();
-                    
-                    double temp = _spec->distance( _prefix , _start );
-                    if ( temp > ( _maxDistance * 2 ) )
-                        break;
                 }
             }
             GEODEBUG( "done part 1" );
-            if ( _found && _prefix.constrains() ){
+            {
                 // 2
-                Point center( _spec , _n );
                 double farthest = hopper->farthest();
-                // Phase 1 might not have found any points.
-                if (farthest == -1)
-                    farthest = _spec->sizeDiag( _prefix );
-                Box want( center._x - farthest , center._y - farthest , farthest * 2 );
-                _prefix = _n;
-                while ( _spec->sizeEdge( _prefix ) < ( farthest / 2 ) ){
+                GEODEBUGPRINT(hopper->farthest());
+                if (hopper->found() < _numWanted) {
+                    // Not enough found in Phase 1
+                    farthest = _scanDistance;
+                }
+                else if (_type == GEO_SPHERE) {
+                    farthest = std::min(_scanDistance, computeXScanDistance(_startPt._y, rad2deg(farthest)));
+                }
+                GEODEBUGPRINT(farthest);
+
+                Box want( _startPt._x - farthest , _startPt._y - farthest , farthest * 2 );
+                GEODEBUGPRINT(want.toString());
+
+                _prefix = _start;
+                while (_prefix.constrains() && _spec->sizeEdge( _prefix ) < farthest ) {
                     _prefix = _prefix.up();
                 }
-                
-                if ( logLevel > 0 ){
-                    log(1) << "want: " << want << " found:" << _found << " nscanned: " << _nscanned << " hash size:" << _spec->sizeEdge( _prefix ) 
+
+                PREFIXDEBUG(_prefix, _spec);
+
+                if (_prefix.getBits() <= 1) {
+                    // TODO consider walking in $natural order
+
+                    while ( min.checkCur(_found, hopper) && min.advance(-1, _found, NULL) )
+                        _nscanned++;
+                    while ( max.checkCur(_found, hopper) && max.advance(+1, _found, NULL) )
+                        _nscanned++;
+
+                    GEODEBUG( "done search after scanning whole collection" )
+                    return;
+                }
+
+                if ( logLevel > 0 ) {
+                    log(1) << "want: " << want << " found:" << _found << " nscanned: " << _nscanned << " hash size:" << _spec->sizeEdge( _prefix )
                            << " farthest: " << farthest << " using box: " << Box( _spec , _prefix ).toString() << endl;
                 }
-                
-                for ( int x=-1; x<=1; x++ ){
-                    for ( int y=-1; y<=1; y++ ){
+
+                for ( int x=-1; x<=1; x++ ) {
+                    for ( int y=-1; y<=1; y++ ) {
                         GeoHash toscan = _prefix;
                         toscan.move( x , y );
-                        
+
                         // 3 & 4
                         doBox( id , want , toscan );
                     }
                 }
             }
             GEODEBUG( "done search" )
-            
+
         }
 
-        void doBox( const IndexDetails& id , const Box& want , const GeoHash& toscan , int depth = 0 ){
+        void doBox( const IndexDetails& id , const Box& want , const GeoHash& toscan , int depth = 0 ) {
             Box testBox( _spec , toscan );
-            if ( logLevel > 2 ){
+            if ( logLevel > 2 ) {
                 cout << "\t";
                 for ( int i=0; i<depth; i++ )
                     cout << "\t";
                 cout << " doBox: " << testBox.toString() << "\t" << toscan.toString() << " scanned so far: " << _nscanned << endl;
             }
+            else {
+                GEODEBUGPRINT(testBox.toString());
+            }
+
+            if (_alreadyScanned.contains(testBox, _spec->_error)) {
+                GEODEBUG("skipping box: already scanned");
+                return; // been here, done this
+            }
 
             double intPer = testBox.intersects( want );
-            
-            if ( intPer <= 0 )
+
+            if ( intPer <= 0 ) {
+                GEODEBUG("skipping box: not in want");
                 return;
-            
+            }
+
             bool goDeeper = intPer < .5 && depth < 2;
 
             long long myscanned = 0;
-            
+
             BtreeLocation loc;
-            loc.bucket = id.head.btree()->locate( id , id.head , toscan.wrap() , Ordering::make(_spec->_order) , 
-                                                        loc.pos , loc.found , minDiskLoc );
+            loc.bucket = id.head.btree()->locate( id , id.head , toscan.wrap() , Ordering::make(_spec->_order) ,
+                                                  loc.pos , loc.found , minDiskLoc );
             loc.checkCur( _found , _hopper.get() );
-            while ( loc.hasPrefix( toscan ) && loc.advance( 1 , _found , _hopper.get() ) ){
+            while ( loc.hasPrefix( toscan ) && loc.advance( 1 , _found , _hopper.get() ) ) {
                 _nscanned++;
-                if ( ++myscanned > 100 && goDeeper ){
+                if ( ++myscanned > 100 && goDeeper ) {
                     doBox( id , want , toscan + "00" , depth + 1);
                     doBox( id , want , toscan + "01" , depth + 1);
                     doBox( id , want , toscan + "10" , depth + 1);
                     doBox( id , want , toscan + "11" , depth + 1);
-                    return;        
+                    return;
                 }
             }
-            
+
         }
 
 
         const Geo2dType * _spec;
 
-        GeoHash _n;
+        Point _startPt;
         GeoHash _start;
         GeoHash _prefix;
         int _numWanted;
         BSONObj _filter;
         double _maxDistance;
+        double _scanDistance;
         shared_ptr<GeoHopper> _hopper;
 
         long long _nscanned;
         int _found;
+        GeoDistType _type;
+
+        Box _alreadyScanned;
     };
 
     class GeoCursorBase : public Cursor {
     public:
         GeoCursorBase( const Geo2dType * spec )
-            : _spec( spec ), _id( _spec->getDetails() ){
+            : _spec( spec ), _id( _spec->getDetails() ) {
 
         }
 
-        virtual DiskLoc refLoc(){ return DiskLoc(); }
+        virtual DiskLoc refLoc() { return DiskLoc(); }
 
         virtual BSONObj indexKeyPattern() {
             return _spec->keyPattern();
         }
 
-        virtual void noteLocation() { 
-            assert(0);
+        virtual void noteLocation() {
+            // no-op since these are meant to be safe
         }
 
         /* called before query getmore block is iterated */
         virtual void checkLocation() {
-            assert(0);
+            // no-op since these are meant to be safe
         }
 
         virtual bool supportGetMore() { return false; }
         virtual bool supportYields() { return false; }
 
-        virtual bool getsetdup(DiskLoc loc){
-            return false;
-        }
+        virtual bool getsetdup(DiskLoc loc) { return false; }
+        virtual bool modifiedKeys() const { return true; }
+        virtual bool isMultiKey() const { return false; }
+
+
 
         const Geo2dType * _spec;
         const IndexDetails * _id;
@@ -975,20 +1109,23 @@ namespace mongo {
     class GeoSearchCursor : public GeoCursorBase {
     public:
         GeoSearchCursor( shared_ptr<GeoSearch> s )
-            : GeoCursorBase( s->_spec ) , 
-              _s( s ) , _cur( s->_hopper->_points.begin() ) , _end( s->_hopper->_points.end() ) {
+            : GeoCursorBase( s->_spec ) ,
+              _s( s ) , _cur( s->_hopper->_points.begin() ) , _end( s->_hopper->_points.end() ), _nscanned() {
+            if ( _cur != _end ) {
+                ++_nscanned;
+            }
         }
-        
+
         virtual ~GeoSearchCursor() {}
-        
-        virtual bool ok(){
+
+        virtual bool ok() {
             return _cur != _end;
         }
-        
-        virtual Record* _current(){ assert(ok()); return _cur->_loc.rec(); }
-        virtual BSONObj current(){ assert(ok()); return _cur->_o; }
-        virtual DiskLoc currLoc(){ assert(ok()); return _cur->_loc; }
-        virtual bool advance(){ _cur++; return ok(); }
+
+        virtual Record* _current() { assert(ok()); return _cur->_loc.rec(); }
+        virtual BSONObj current() { assert(ok()); return _cur->_o; }
+        virtual DiskLoc currLoc() { assert(ok()); return _cur->_loc; }
+        virtual bool advance() { _cur++; incNscanned(); return ok(); }
         virtual BSONObj currKey() const { return _cur->_key; }
 
         virtual string toString() {
@@ -996,82 +1133,103 @@ namespace mongo {
         }
 
 
-        virtual BSONObj prettyStartKey() const { 
-            return BSON( _s->_spec->_geo << _s->_prefix.toString() ); 
+        virtual BSONObj prettyStartKey() const {
+            return BSON( _s->_spec->_geo << _s->_prefix.toString() );
         }
-        virtual BSONObj prettyEndKey() const { 
+        virtual BSONObj prettyEndKey() const {
             GeoHash temp = _s->_prefix;
             temp.move( 1 , 1 );
-            return BSON( _s->_spec->_geo << temp.toString() ); 
+            return BSON( _s->_spec->_geo << temp.toString() );
         }
 
+        virtual long long nscanned() { return _nscanned; }
 
         shared_ptr<GeoSearch> _s;
         GeoHopper::Holder::iterator _cur;
         GeoHopper::Holder::iterator _end;
+
+        void incNscanned() { if ( ok() ) { ++_nscanned; } }
+        long long _nscanned;
     };
 
     class GeoBrowse : public GeoCursorBase , public GeoAccumulator {
     public:
         GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj() )
             : GeoCursorBase( g ) ,GeoAccumulator( g , filter ) ,
-              _type( type ) , _filter( filter ) , _firstCall(true) {
+              _type( type ) , _filter( filter ) , _firstCall(true), _nscanned() {
         }
-        
+
         virtual string toString() {
             return (string)"GeoBrowse-" + _type;
         }
 
-        virtual bool ok(){
-            if ( _firstCall ){
+        virtual bool ok() {
+            bool first = _firstCall;
+            if ( _firstCall ) {
                 fillStack();
                 _firstCall = false;
             }
-            if ( ! _cur.isEmpty() || _stack.size() )
+            if ( ! _cur.isEmpty() || _stack.size() ) {
+                if ( first ) {
+                    ++_nscanned;
+                }
                 return true;
+            }
 
-            while ( moreToDo() ){
+            while ( moreToDo() ) {
                 fillStack();
-                if ( ! _cur.isEmpty() )
+                if ( ! _cur.isEmpty() ) {
+                    if ( first ) {
+                        ++_nscanned;
+                    }
                     return true;
+                }
             }
-            
+
             return false;
         }
-        
-        virtual bool advance(){ 
+
+        virtual bool advance() {
             _cur._o = BSONObj();
-            
-            if ( _stack.size() ){
+
+            if ( _stack.size() ) {
                 _cur = _stack.front();
                 _stack.pop_front();
+                ++_nscanned;
                 return true;
             }
-            
+
             if ( ! moreToDo() )
                 return false;
-            
+
             while ( _cur.isEmpty() && moreToDo() )
                 fillStack();
-            return ! _cur.isEmpty();
+            return ! _cur.isEmpty() && ++_nscanned;
         }
-        
-        virtual Record* _current(){ assert(ok()); return _cur._loc.rec(); }
-        virtual BSONObj current(){ assert(ok()); return _cur._o; }
-        virtual DiskLoc currLoc(){ assert(ok()); return _cur._loc; }
+
+        virtual Record* _current() { assert(ok()); return _cur._loc.rec(); }
+        virtual BSONObj current() { assert(ok()); return _cur._o; }
+        virtual DiskLoc currLoc() { assert(ok()); return _cur._loc; }
         virtual BSONObj currKey() const { return _cur._key; }
 
 
         virtual bool moreToDo() = 0;
         virtual void fillStack() = 0;
 
-        virtual void addSpecific( const KeyNode& node , double d ){
+        virtual void addSpecific( const KeyNode& node , double d ) {
             if ( _cur.isEmpty() )
                 _cur = GeoPoint( node , d );
             else
                 _stack.push_back( GeoPoint( node , d ) );
         }
 
+        virtual long long nscanned() {
+            if ( _firstCall ) {
+                ok();
+            }
+            return _nscanned;
+        }
+
         string _type;
         BSONObj _filter;
         list<GeoPoint> _stack;
@@ -1079,25 +1237,28 @@ namespace mongo {
         GeoPoint _cur;
         bool _firstCall;
 
+        long long _nscanned;
+
     };
 
     class GeoCircleBrowse : public GeoBrowse {
     public:
-        
+
         enum State {
-            START , 
+            START ,
             DOING_EXPAND ,
             DOING_AROUND ,
             DONE
         } _state;
 
-        GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() )        
-            : GeoBrowse( g , "circle" , filter ){
-            
+        GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() , const string& type="$center")
+            : GeoBrowse( g , "circle" , filter ) {
+
             uassert( 13060 , "$center needs 2 fields (middle,max distance)" , circle.nFields() == 2 );
             BSONObjIterator i(circle);
-            _startPt = Point(i.next());
-            _start = _startPt.hash(g);
+            BSONElement center = i.next();
+            _start = g->_tohash(center);
+            _startPt = Point(center);
             _prefix = _start;
             _maxDistance = i.next().numberDouble();
             uassert( 13061 , "need a max distance > 0 " , _maxDistance > 0 );
@@ -1106,17 +1267,42 @@ namespace mongo {
             _state = START;
             _found = 0;
 
+            if (type == "$center") {
+                _type = GEO_PLAIN;
+                _xScanDistance = _maxDistance;
+                _yScanDistance = _maxDistance;
+            }
+            else if (type == "$centerSphere") {
+                uassert(13461, "Spherical MaxDistance > PI. Are you sure you are using radians?", _maxDistance < M_PI);
+
+                _type = GEO_SPHERE;
+                _yScanDistance = rad2deg(_maxDistance);
+                _xScanDistance = computeXScanDistance(_startPt._y, _yScanDistance);
+
+                uassert(13462, "Spherical distance would require wrapping, which isn't implemented yet",
+                        (_startPt._x + _xScanDistance < 180) && (_startPt._x - _xScanDistance > -180) &&
+                        (_startPt._y + _yScanDistance < 90) && (_startPt._y - _yScanDistance > -90));
+
+                GEODEBUGPRINT(_maxDistance);
+                GEODEBUGPRINT(_xScanDistance);
+                GEODEBUGPRINT(_yScanDistance);
+            }
+            else {
+                uassert(13460, "invalid $center query type: " + type, false);
+            }
+
             ok();
         }
 
-        virtual bool moreToDo(){
+        virtual bool moreToDo() {
             return _state != DONE;
         }
-        
-        virtual void fillStack(){
-            if ( _state == START ){
-                if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , 
-                                               _prefix , _found , this ) ){
+
+        virtual void fillStack() {
+
+            if ( _state == START ) {
+                if ( ! BtreeLocation::initial( *_id , _spec , _min , _max ,
+                                               _prefix , _found , this ) ) {
                     _state = DONE;
                     return;
                 }
@@ -1124,10 +1310,10 @@ namespace mongo {
             }
 
 
-            if ( _state == DOING_AROUND ){
+            if ( _state == DOING_AROUND ) {
                 // TODO could rework and return rather than looping
-                for (int i=-1; i<=1; i++){
-                    for (int j=-1; j<=1; j++){
+                for (int i=-1; i<=1; i++) {
+                    for (int j=-1; j<=1; j++) {
                         if (i == 0 && j == 0)
                             continue; // main box
 
@@ -1135,10 +1321,11 @@ namespace mongo {
                         newBox.move(i, j);
 
                         PREFIXDEBUG(newBox, _g);
-                        if (needToCheckBox(newBox)){
+                        if (needToCheckBox(newBox)) {
                             // TODO consider splitting into quadrants
                             getPointsForPrefix(newBox);
-                        } else  {
+                        }
+                        else  {
                             GEODEBUG("skipping box");
                         }
                     }
@@ -1147,20 +1334,19 @@ namespace mongo {
                 _state = DONE;
                 return;
             }
-            
-            if (_state == DOING_EXPAND){
+
+            if (_state == DOING_EXPAND) {
                 GEODEBUG( "circle prefix [" << _prefix << "]" );
                 PREFIXDEBUG(_prefix, _g);
 
                 while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _found , this ) );
                 while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _found , this ) );
 
-                if ( ! _prefix.constrains() ){
+                if ( ! _prefix.constrains() ) {
                     GEODEBUG( "\t exhausted the btree" );
                     _state = DONE;
                     return;
                 }
-                
 
                 Point ll (_g, _prefix);
                 GeoHash trHash = _prefix;
@@ -1168,50 +1354,52 @@ namespace mongo {
                 Point tr (_g, trHash);
                 double sideLen = fabs(tr._x - ll._x);
 
-                if (sideLen > _maxDistance){ // circle must be contained by surrounding squares
-                    if ( (ll._x + _maxDistance < _startPt._x && ll._y + _maxDistance < _startPt._y) && 
-                         (tr._x - _maxDistance > _startPt._x && tr._y - _maxDistance > _startPt._y) )
-                    {
+                if (sideLen > std::max(_xScanDistance, _yScanDistance)) { // circle must be contained by surrounding squares
+                    if ( (ll._x + _xScanDistance < _startPt._x && ll._y + _yScanDistance < _startPt._y) &&
+                            (tr._x - _xScanDistance > _startPt._x && tr._y - _yScanDistance > _startPt._y) ) {
                         GEODEBUG("square fully contains circle");
                         _state = DONE;
-                    } else if (_prefix.getBits() > 1){
+                    }
+                    else if (_prefix.getBits() > 1) {
                         GEODEBUG("checking surrounding squares");
                         _state = DOING_AROUND;
-                    } else {
+                    }
+                    else {
                         GEODEBUG("using simple search");
                         _prefix = _prefix.up();
                     }
-                } else {
+                }
+                else {
                     _prefix = _prefix.up();
                 }
 
                 return;
             }
-            
+
             /* Clients are expected to use moreToDo before calling
              * fillStack, so DONE is checked for there. If any more
              * State values are defined, you should handle them
-             * here. */ 
+             * here. */
             assert(0);
         }
 
-        bool needToCheckBox(const GeoHash& prefix){
+        bool needToCheckBox(const GeoHash& prefix) {
             Point ll (_g, prefix);
-            if (fabs(ll._x - _startPt._x) <= _maxDistance) return true;
-            if (fabs(ll._y - _startPt._y) <= _maxDistance) return true;
+            if (fabs(ll._x - _startPt._x) <= _xScanDistance) return true;
+            if (fabs(ll._y - _startPt._y) <= _yScanDistance) return true;
 
-            GeoHash trHash = _prefix;
+            GeoHash trHash = prefix;
             trHash.move( 1 , 1 );
             Point tr (_g, trHash);
 
-            if (fabs(tr._x - _startPt._x) <= _maxDistance) return true;
-            if (fabs(tr._y - _startPt._y) <= _maxDistance) return true;
+            if (fabs(tr._x - _startPt._x) <= _xScanDistance) return true;
+            if (fabs(tr._y - _startPt._y) <= _yScanDistance) return true;
 
             return false;
         }
 
-        void getPointsForPrefix(const GeoHash& prefix){
-            if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , prefix , _found , this ) ){
+        void getPointsForPrefix(const GeoHash& prefix) {
+            if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , prefix , _found , this ) ) {
                 return;
             }
 
@@ -1219,37 +1407,50 @@ namespace mongo {
             while ( _max.hasPrefix( prefix ) && _max.advance( 1 , _found , this ) );
         }
 
-        
-        virtual bool checkDistance( const GeoHash& h , double& d ){
-            d = _g->distance( _start , h );
+
+        virtual bool checkDistance( const GeoHash& h , double& d ) {
+            switch (_type) {
+            case GEO_PLAIN:
+                d = _g->distance( _start , h );
+                break;
+            case GEO_SPHERE:
+                d = spheredist_deg(_startPt, Point(_g, h));
+                break;
+            default:
+                assert(0);
+            }
+
             GEODEBUG( "\t " << h << "\t" << d );
             return d <= _maxDistance;
         }
 
+        GeoDistType _type;
         GeoHash _start;
         Point _startPt;
-        double _maxDistance;
-        
+        double _maxDistance; // user input
+        double _xScanDistance; // effected by GeoDistType
+        double _yScanDistance; // effected by GeoDistType
+
         int _found;
-        
-        GeoHash _prefix;        
+
+        GeoHash _prefix;
         BtreeLocation _min;
         BtreeLocation _max;
 
-    };    
+    };
 
     class GeoBoxBrowse : public GeoBrowse {
     public:
-        
+
         enum State {
-            START , 
+            START ,
             DOING_EXPAND ,
             DONE
         } _state;
 
-        GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj() )        
-            : GeoBrowse( g , "box" , filter ){
-            
+        GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj() )
+            : GeoBrowse( g , "box" , filter ) {
+
             uassert( 13063 , "$box needs 2 fields (bottomLeft,topRight)" , box.nFields() == 2 );
             BSONObjIterator i(box);
             _bl = g->_tohash( i.next() );
@@ -1265,7 +1466,7 @@ namespace mongo {
 
             Point center = _want.center();
             _prefix = _g->hash( center._x , center._y );
-            
+
             GEODEBUG( "center : " << center.toString() << "\t" << _prefix );
 
             {
@@ -1280,42 +1481,43 @@ namespace mongo {
             ok();
         }
 
-        virtual bool moreToDo(){
+        virtual bool moreToDo() {
             return _state != DONE;
         }
-        
-        virtual void fillStack(){
-            if ( _state == START ){
 
-                if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , 
-                                               _prefix , _found , this ) ){
+        virtual void fillStack() {
+            if ( _state == START ) {
+
+                if ( ! BtreeLocation::initial( *_id , _spec , _min , _max ,
+                                               _prefix , _found , this ) ) {
                     _state = DONE;
                     return;
                 }
                 _state = DOING_EXPAND;
             }
-            
-            if ( _state == DOING_EXPAND ){
+
+            if ( _state == DOING_EXPAND ) {
                 int started = _found;
-                while ( started == _found || _state == DONE ){
+                while ( started == _found || _state == DONE ) {
                     GEODEBUG( "box prefix [" << _prefix << "]" );
                     while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _found , this ) );
                     while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _found , this ) );
-                    
+
                     if ( _state == DONE )
                         return;
 
-                    if ( ! _prefix.constrains() ){
+                    if ( ! _prefix.constrains() ) {
                         GEODEBUG( "box exhausted" );
                         _state = DONE;
                         return;
                     }
 
-                    if (_g->sizeEdge(_prefix) < _wantLen){
+                    if (_g->sizeEdge(_prefix) < _wantLen) {
                         _prefix = _prefix.up();
-                    } else {
-                        for (int i=-1; i<=1; i++){
-                            for (int j=-1; j<=1; j++){
+                    }
+                    else {
+                        for (int i=-1; i<=1; i++) {
+                            for (int j=-1; j<=1; j++) {
 
                                 if (i == 0 && j == 0)
                                     continue; // main box
@@ -1326,36 +1528,37 @@ namespace mongo {
                                 PREFIXDEBUG(newBox, _g);
 
                                 Box cur( _g , newBox );
-                                if (_want.intersects(cur)){
+                                if (_want.intersects(cur)) {
                                     // TODO consider splitting into quadrants
                                     getPointsForPrefix(newBox);
-                                } else  {
+                                }
+                                else  {
                                     GEODEBUG("skipping box");
                                 }
                             }
                         }
                         _state = DONE;
                     }
-                    
+
                 }
                 return;
             }
 
         }
 
-        void getPointsForPrefix(const GeoHash& prefix){
-            if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , prefix , _found , this ) ){
+        void getPointsForPrefix(const GeoHash& prefix) {
+            if ( ! BtreeLocation::initial( *_id , _spec , _min , _max , prefix , _found , this ) ) {
                 return;
             }
 
             while ( _min.hasPrefix( prefix ) && _min.advance( -1 , _found , this ) );
             while ( _max.hasPrefix( prefix ) && _max.advance( 1 , _found , this ) );
         }
-        
-        virtual bool checkDistance( const GeoHash& h , double& d ){
+
+        virtual bool checkDistance( const GeoHash& h , double& d ) {
             bool res = _want.inside( Point( _g , h ) , _fudge );
-            GEODEBUG( "\t want : " << _want.toString() 
-                      << " point: " << Point( _g , h ).toString() 
+            GEODEBUG( "\t want : " << _want.toString()
+                      << " point: " << Point( _g , h ).toString()
                       << " in : " << res );
             return res;
         }
@@ -1366,23 +1569,23 @@ namespace mongo {
         double _wantLen;
 
         int _found;
-        
-        GeoHash _prefix;        
+
+        GeoHash _prefix;
         BtreeLocation _min;
         BtreeLocation _max;
 
         double _fudge;
-    };    
+    };
 
 
     shared_ptr<Cursor> Geo2dType::newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const {
         if ( numWanted < 0 )
             numWanted = numWanted * -1;
         else if ( numWanted == 0 )
-             numWanted = 100;
-        
+            numWanted = 100;
+
         BSONObjIterator i(query);
-        while ( i.more() ){
+        while ( i.more() ) {
             BSONElement e = i.next();
 
             if ( _geo != e.fieldName() )
@@ -1390,13 +1593,27 @@ namespace mongo {
 
             if ( e.type() != Object )
                 continue;
-            
-            switch ( e.embeddedObject().firstElement().getGtLtOp() ){
+
+            switch ( e.embeddedObject().firstElement().getGtLtOp() ) {
             case BSONObj::opNEAR: {
                 BSONObj n = e.embeddedObject();
                 e = n.firstElement();
+
+                const char* suffix = e.fieldName() + 5; // strlen("$near") == 5;
+                GeoDistType type;
+                if (suffix[0] == '\0') {
+                    type = GEO_PLAIN;
+                }
+                else if (strcmp(suffix, "Sphere") == 0) {
+                    type = GEO_SPHERE;
+                }
+                else {
+                    uassert(13464, string("invalid $near search type: ") + e.fieldName(), false);
+                    type = GEO_PLAIN; // prevents uninitialized warning
+                }
+
                 double maxDistance = numeric_limits<double>::max();
-                if ( e.isABSONObj() && e.embeddedObject().nFields() > 2 ){
+                if ( e.isABSONObj() && e.embeddedObject().nFields() > 2 ) {
                     BSONObjIterator i(e.embeddedObject());
                     i.next();
                     i.next();
@@ -1409,32 +1626,30 @@ namespace mongo {
                     if ( e.isNumber() )
                         maxDistance = e.numberDouble();
                 }
-                shared_ptr<GeoSearch> s( new GeoSearch( this , _tohash(e) , numWanted , query , maxDistance ) );
+                shared_ptr<GeoSearch> s( new GeoSearch( this , _tohash(e) , numWanted , query , maxDistance, type ) );
                 s->exec();
                 shared_ptr<Cursor> c;
                 c.reset( new GeoSearchCursor( s ) );
-                return c;   
+                return c;
             }
             case BSONObj::opWITHIN: {
                 e = e.embeddedObject().firstElement();
                 uassert( 13057 , "$within has to take an object or array" , e.isABSONObj() );
                 e = e.embeddedObject().firstElement();
                 string type = e.fieldName();
-                if ( type == "$center" ){
+                if ( startsWith(type,  "$center") ) {
                     uassert( 13059 , "$center has to take an object or array" , e.isABSONObj() );
-                    shared_ptr<Cursor> c;
-                    c.reset( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query ) );
-                    return c;   
+                    shared_ptr<Cursor> c( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query , type) );
+                    return c;
                 }
-                else if ( type == "$box" ){
+                else if ( type == "$box" ) {
                     uassert( 13065 , "$box has to take an object or array" , e.isABSONObj() );
-                    shared_ptr<Cursor> c;
-                    c.reset( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query ) );
-                    return c;   
+                    shared_ptr<Cursor> c( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query ) );
+                    return c;
                 }
                 throw UserException( 13058 , (string)"unknown $with type: " + type );
             }
-            default: 
+            default:
                 break;
             }
         }
@@ -1448,41 +1663,41 @@ namespace mongo {
 
     class Geo2dFindNearCmd : public Command {
     public:
-        Geo2dFindNearCmd() : Command( "geoNear" ){}
-        virtual LockType locktype() const { return READ; } 
+        Geo2dFindNearCmd() : Command( "geoNear" ) {}
+        virtual LockType locktype() const { return READ; }
         bool slaveOk() const { return true; }
         void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Geospatial+Indexing#GeospatialIndexing-geoNearCommand"; }
         bool slaveOverrideOk() { return true; }
-        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             string ns = dbname + "." + cmdObj.firstElement().valuestr();
 
             NamespaceDetails * d = nsdetails( ns.c_str() );
-            if ( ! d ){
+            if ( ! d ) {
                 errmsg = "can't find ns";
                 return false;
             }
 
             vector<int> idxs;
             d->findIndexByType( GEO2DNAME , idxs );
-            
-            if ( idxs.size() > 1 ){
+
+            if ( idxs.size() > 1 ) {
                 errmsg = "more than 1 geo indexes :(";
                 return false;
             }
-            
-            if ( idxs.size() == 0 ){
+
+            if ( idxs.size() == 0 ) {
                 errmsg = "no geo index :(";
                 return false;
             }
 
             int geoIdx = idxs[0];
-            
+
             result.append( "ns" , ns );
 
             IndexDetails& id = d->idx( geoIdx );
             Geo2dType * g = (Geo2dType*)id.getSpec().getType();
             assert( &id == g->getDetails() );
-            
+
             int numWanted = 100;
             if ( cmdObj["num"].isNumber() )
                 numWanted = cmdObj["num"].numberInt();
@@ -1499,37 +1714,41 @@ namespace mongo {
             if ( cmdObj["maxDistance"].isNumber() )
                 maxDistance = cmdObj["maxDistance"].number();
 
-            GeoSearch gs( g , n , numWanted , filter , maxDistance );
+            GeoDistType type = GEO_PLAIN;
+            if ( cmdObj["spherical"].trueValue() )
+                type = GEO_SPHERE;
+
+            GeoSearch gs( g , n , numWanted , filter , maxDistance , type);
 
-            if ( cmdObj["start"].type() == String){
+            if ( cmdObj["start"].type() == String) {
                 GeoHash start ((string) cmdObj["start"].valuestr());
                 gs._start = start;
             }
-            
+
             gs.exec();
 
             double distanceMultiplier = 1;
             if ( cmdObj["distanceMultiplier"].isNumber() )
                 distanceMultiplier = cmdObj["distanceMultiplier"].number();
-            
+
             double totalDistance = 0;
 
 
             BSONObjBuilder arr( result.subarrayStart( "results" ) );
             int x = 0;
-            for ( GeoHopper::Holder::iterator i=gs._hopper->_points.begin(); i!=gs._hopper->_points.end(); i++ ){
+            for ( GeoHopper::Holder::iterator i=gs._hopper->_points.begin(); i!=gs._hopper->_points.end(); i++ ) {
                 const GeoPoint& p = *i;
-                
+
                 double dis = distanceMultiplier * p._distance;
                 totalDistance += dis;
-                
-                BSONObjBuilder bb( arr.subobjStart( BSONObjBuilder::numStr( x++ ).c_str() ) );
+
+                BSONObjBuilder bb( arr.subobjStart( BSONObjBuilder::numStr( x++ ) ) );
                 bb.append( "dis" , dis );
                 bb.append( "obj" , p._o );
                 bb.done();
             }
             arr.done();
-            
+
             BSONObjBuilder stats( result.subobjStart( "stats" ) );
             stats.append( "time" , cc().curop()->elapsedMillis() );
             stats.appendNumber( "btreelocs" , gs._nscanned );
@@ -1538,23 +1757,23 @@ namespace mongo {
             stats.append( "avgDistance" , totalDistance / x );
             stats.append( "maxDistance" , gs._hopper->farthest() );
             stats.done();
-            
+
             return true;
         }
-        
+
     } geo2dFindNearCmd;
 
     class GeoWalkCmd : public Command {
     public:
-        GeoWalkCmd() : Command( "geoWalk" ){}
-        virtual LockType locktype() const { return READ; } 
+        GeoWalkCmd() : Command( "geoWalk" ) {}
+        virtual LockType locktype() const { return READ; }
         bool slaveOk() const { return true; }
         bool slaveOverrideOk() { return true; }
-        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             string ns = dbname + "." + cmdObj.firstElement().valuestr();
 
             NamespaceDetails * d = nsdetails( ns.c_str() );
-            if ( ! d ){
+            if ( ! d ) {
                 errmsg = "can't find ns";
                 return false;
             }
@@ -1562,10 +1781,10 @@ namespace mongo {
             int geoIdx = -1;
             {
                 NamespaceDetails::IndexIterator ii = d->ii();
-                while ( ii.more() ){
+                while ( ii.more() ) {
                     IndexDetails& id = ii.next();
-                    if ( id.getSpec().getTypeName() == GEO2DNAME ){
-                        if ( geoIdx >= 0 ){
+                    if ( id.getSpec().getTypeName() == GEO2DNAME ) {
+                        if ( geoIdx >= 0 ) {
                             errmsg = "2 geo indexes :(";
                             return false;
                         }
@@ -1573,12 +1792,12 @@ namespace mongo {
                     }
                 }
             }
-            
-            if ( geoIdx < 0 ){
+
+            if ( geoIdx < 0 ) {
                 errmsg = "no geo index :(";
                 return false;
             }
-            
+
 
             IndexDetails& id = d->idx( geoIdx );
             Geo2dType * g = (Geo2dType*)id.getSpec().getType();
@@ -1587,12 +1806,12 @@ namespace mongo {
             int max = 100000;
 
             BtreeCursor c( d , geoIdx , id , BSONObj() , BSONObj() , true , 1 );
-            while ( c.ok() && max-- ){
+            while ( c.ok() && max-- ) {
                 GeoHash h( c.currKey().firstElement() );
                 int len;
                 cout << "\t" << h.toString()
-                     << "\t" << c.current()[g->_geo] 
-                     << "\t" << hex << h.getHash() 
+                     << "\t" << c.current()[g->_geo]
+                     << "\t" << hex << h.getHash()
                      << "\t" << hex << ((long long*)c.currKey().firstElement().binData(len))[0]
                      << "\t" << c.current()["_id"]
                      << endl;
@@ -1601,7 +1820,7 @@ namespace mongo {
 
             return true;
         }
-        
+
     } geoWalkCmd;
 
 }
diff --git a/db/geo/core.h b/db/geo/core.h
index 13f3636..602b513 100644
--- a/db/geo/core.h
+++ b/db/geo/core.h
@@ -31,23 +31,23 @@ namespace mongo {
 
     class GeoBitSets {
     public:
-        GeoBitSets(){
-            for ( int i=0; i<32; i++ ){
+        GeoBitSets() {
+            for ( int i=0; i<32; i++ ) {
                 masks32[i] = ( 1 << ( 31 - i ) );
             }
-            for ( int i=0; i<64; i++ ){
+            for ( int i=0; i<64; i++ ) {
                 masks64[i] = ( 1LL << ( 63 - i ) );
             }
-            
-            for ( unsigned i=0; i<16; i++ ){
+
+            for ( unsigned i=0; i<16; i++ ) {
                 unsigned fixed = 0;
-                for ( int j=0; j<4; j++ ){
+                for ( int j=0; j<4; j++ ) {
                     if ( i & ( 1 << j ) )
                         fixed |= ( 1 << ( j * 2 ) );
                 }
                 hashedToNormal[fixed] = i;
             }
-            
+
         }
         int masks32[32];
         long long masks64[64];
@@ -56,24 +56,24 @@ namespace mongo {
     };
 
     extern GeoBitSets geoBitSets;
-    
+
     class GeoHash {
     public:
         GeoHash()
-            : _hash(0),_bits(0){
+            : _hash(0),_bits(0) {
         }
 
-        explicit GeoHash( const char * hash ){
+        explicit GeoHash( const char * hash ) {
             init( hash );
         }
 
-        explicit GeoHash( const string& hash ){
+        explicit GeoHash( const string& hash ) {
             init( hash );
         }
 
-        explicit GeoHash( const BSONElement& e , unsigned bits=32 ){
+        explicit GeoHash( const BSONElement& e , unsigned bits=32 ) {
             _bits = bits;
-            if ( e.type() == BinData ){
+            if ( e.type() == BinData ) {
                 int len = 0;
                 _copy( (char*)&_hash , e.binData( len ) );
                 assert( len == 8 );
@@ -85,26 +85,26 @@ namespace mongo {
             }
             _fix();
         }
-        
-        GeoHash( unsigned x , unsigned y , unsigned bits=32){
+
+        GeoHash( unsigned x , unsigned y , unsigned bits=32) {
             init( x , y , bits );
         }
 
-        GeoHash( const GeoHash& old ){
+        GeoHash( const GeoHash& old ) {
             _hash = old._hash;
             _bits = old._bits;
         }
 
         GeoHash( long long hash , unsigned bits )
-            : _hash( hash ) , _bits( bits ){
+            : _hash( hash ) , _bits( bits ) {
             _fix();
         }
 
-        void init( unsigned x , unsigned y , unsigned bits ){
+        void init( unsigned x , unsigned y , unsigned bits ) {
             assert( bits <= 32 );
             _hash = 0;
             _bits = bits;
-            for ( unsigned i=0; i<bits; i++ ){
+            for ( unsigned i=0; i<bits; i++ ) {
                 if ( isBitSet( x , i ) ) _hash |= geoBitSets.masks64[i*2];
                 if ( isBitSet( y , i ) ) _hash |= geoBitSets.masks64[(i*2)+1];
             }
@@ -114,7 +114,7 @@ namespace mongo {
             x = 0;
             y = 0;
             char * c = (char*)(&_hash);
-            for ( int i=0; i<8; i++ ){
+            for ( int i=0; i<8; i++ ) {
                 unsigned t = (unsigned)(c[i]) & 0x55;
                 y |= ( geoBitSets.hashedToNormal[t] << (4*(i)) );
 
@@ -126,7 +126,7 @@ namespace mongo {
         void unhash_slow( unsigned& x , unsigned& y ) const {
             x = 0;
             y = 0;
-            for ( unsigned i=0; i<_bits; i++ ){
+            for ( unsigned i=0; i<_bits; i++ ) {
                 if ( getBitX(i) )
                     x |= geoBitSets.masks32[i];
                 if ( getBitY(i) )
@@ -141,14 +141,14 @@ namespace mongo {
         /**
          * @param 0 = high
          */
-        static bool isBitSet( unsigned val , unsigned  bit ){
+        static bool isBitSet( unsigned val , unsigned  bit ) {
             return geoBitSets.masks32[bit] & val;
         }
-        
+
         GeoHash up() const {
             return GeoHash( _hash , _bits - 1 );
         }
-        
+
         bool hasPrefix( const GeoHash& other ) const {
             assert( other._bits <= _bits );
             if ( other._bits == 0 )
@@ -157,9 +157,9 @@ namespace mongo {
             x = x >> (64-(other._bits*2));
             return x == 0;
         }
-        
 
-        string toString() const { 
+
+        string toString() const {
             StringBuilder buf( _bits * 2 );
             for ( unsigned x=0; x<_bits*2; x++ )
                 buf.append( _hash & geoBitSets.masks64[x] ? "1" : "0" );
@@ -172,7 +172,7 @@ namespace mongo {
             return ss.str();
         }
 
-        void init( const string& s ){
+        void init( const string& s ) {
             _hash = 0;
             _bits = s.size() / 2;
             for ( unsigned pos=0; pos<s.size(); pos++ )
@@ -180,14 +180,14 @@ namespace mongo {
                     setBit( pos , 1 );
         }
 
-        void setBit( unsigned pos , bool one ){
+        void setBit( unsigned pos , bool one ) {
             assert( pos < _bits * 2 );
             if ( one )
                 _hash |= geoBitSets.masks64[pos];
             else if ( _hash & geoBitSets.masks64[pos] )
                 _hash &= ~geoBitSets.masks64[pos];
         }
-        
+
         bool getBit( unsigned pos ) const {
             return _hash & geoBitSets.masks64[pos];
         }
@@ -201,7 +201,7 @@ namespace mongo {
             assert( pos < 32 );
             return getBit( ( pos * 2 ) + 1 );
         }
-        
+
         BSONObj wrap() const {
             BSONObjBuilder b(20);
             append( b , "" );
@@ -213,20 +213,20 @@ namespace mongo {
         bool constrains() const {
             return _bits > 0;
         }
-        
-        void move( int x , int y ){
+
+        void move( int x , int y ) {
             assert( _bits );
             _move( 0 , x );
             _move( 1 , y );
         }
 
-        void _move( unsigned offset , int d ){
+        void _move( unsigned offset , int d ) {
             if ( d == 0 )
                 return;
             assert( d <= 1 && d>= -1 ); // TEMP
-            
+
             bool from, to;
-            if ( d > 0 ){
+            if ( d > 0 ) {
                 from = 0;
                 to = 1;
             }
@@ -238,34 +238,34 @@ namespace mongo {
             unsigned pos = ( _bits * 2 ) - 1;
             if ( offset == 0 )
                 pos--;
-            while ( true ){
-                if ( getBit(pos) == from ){
+            while ( true ) {
+                if ( getBit(pos) == from ) {
                     setBit( pos , to );
                     return;
                 }
 
-                if ( pos < 2 ){
+                if ( pos < 2 ) {
                     // overflow
-                    for ( ; pos < ( _bits * 2 ) ; pos += 2 ){
+                    for ( ; pos < ( _bits * 2 ) ; pos += 2 ) {
                         setBit( pos , from );
                     }
                     return;
                 }
-                
+
                 setBit( pos , from );
                 pos -= 2;
             }
-            
+
             assert(0);
         }
 
-        GeoHash& operator=(const GeoHash& h) { 
+        GeoHash& operator=(const GeoHash& h) {
             _hash = h._hash;
             _bits = h._bits;
             return *this;
         }
-        
-        bool operator==(const GeoHash& h ){
+
+        bool operator==(const GeoHash& h ) {
             return _hash == h._hash && _bits == h._bits;
         }
 
@@ -273,7 +273,7 @@ namespace mongo {
             unsigned pos = _bits * 2;
             _bits += strlen(s) / 2;
             assert( _bits <= 32 );
-            while ( s[0] ){
+            while ( s[0] ) {
                 if ( s[0] == '1' )
                     setBit( pos , 1 );
                 pos++;
@@ -288,19 +288,19 @@ namespace mongo {
             n+=s;
             return n;
         }
-      
-        void _fix(){
+
+        void _fix() {
             static long long FULL = 0xFFFFFFFFFFFFFFFFLL;
             long long mask = FULL << ( 64 - ( _bits * 2 ) );
             _hash &= mask;
         }
-        
+
         void append( BSONObjBuilder& b , const char * name ) const {
             char buf[8];
             _copy( buf , (char*)&_hash );
             b.appendBinData( name , 8 , bdtCustom , buf );
         }
-        
+
         long long getHash() const {
             return _hash;
         }
@@ -311,9 +311,9 @@ namespace mongo {
 
         GeoHash commonPrefix( const GeoHash& other ) const {
             unsigned i=0;
-            for ( ; i<_bits && i<other._bits; i++ ){
+            for ( ; i<_bits && i<other._bits; i++ ) {
                 if ( getBitX( i ) == other.getBitX( i ) &&
-                     getBitY( i ) == other.getBitY( i ) )
+                        getBitY( i ) == other.getBitY( i ) )
                     continue;
                 break;
             }
@@ -323,7 +323,7 @@ namespace mongo {
     private:
 
         void _copy( char * dst , const char * src ) const {
-            for ( unsigned a=0; a<8; a++ ){
+            for ( unsigned a=0; a<8; a++ ) {
                 dst[a] = src[7-a];
             }
         }
@@ -332,14 +332,14 @@ namespace mongo {
         unsigned _bits; // bits per field, so 1 to 32
     };
 
-    inline ostream& operator<<( ostream &s, const GeoHash &h ){
+    inline ostream& operator<<( ostream &s, const GeoHash &h ) {
         s << h.toString();
         return s;
-    } 
+    }
 
     class GeoConvert {
     public:
-        virtual ~GeoConvert(){}
+        virtual ~GeoConvert() {}
 
         virtual void unhash( const GeoHash& h , double& x , double& y ) const = 0;
         virtual GeoHash hash( double x , double y ) const = 0;
@@ -347,31 +347,31 @@ namespace mongo {
 
     class Point {
     public:
-        
-        Point( const GeoConvert * g , const GeoHash& hash ){
+
+        Point( const GeoConvert * g , const GeoHash& hash ) {
             g->unhash( hash , _x , _y );
         }
-        
-        explicit Point( const BSONElement& e ){
+
+        explicit Point( const BSONElement& e ) {
             BSONObjIterator i(e.Obj());
             _x = i.next().number();
             _y = i.next().number();
         }
 
-        explicit Point( const BSONObj& o ){
+        explicit Point( const BSONObj& o ) {
             BSONObjIterator i(o);
             _x = i.next().number();
             _y = i.next().number();
         }
 
         Point( double x , double y )
-            : _x( x ) , _y( y ){
+            : _x( x ) , _y( y ) {
         }
-        
-        Point() : _x(0),_y(0){
+
+        Point() : _x(0),_y(0) {
         }
 
-        GeoHash hash( const GeoConvert * g ){
+        GeoHash hash( const GeoConvert * g ) {
             return g->hash( _x , _y );
         }
 
@@ -380,12 +380,12 @@ namespace mongo {
             double b = _y - p._y;
             return sqrt( ( a * a ) + ( b * b ) );
         }
-        
+
         string toString() const {
             StringBuilder buf(32);
             buf << "(" << _x << "," << _y << ")";
             return buf.str();
-  
+
         }
 
         double _x;
@@ -393,8 +393,11 @@ namespace mongo {
     };
 
 
-    extern double EARTH_RADIUS_KM;
-    extern double EARTH_RADIUS_MILES;
+    extern const double EARTH_RADIUS_KM;
+    extern const double EARTH_RADIUS_MILES;
+
+    inline double deg2rad(double deg) { return deg * (M_PI/180); }
+    inline double rad2deg(double rad) { return rad * (180/M_PI); }
 
     // WARNING: _x and _y MUST be longitude and latitude in that order
     // note: multiply by earth radius for distance
@@ -407,20 +410,26 @@ namespace mongo {
         double sin_y1(sin(p1._y)), cos_y1(cos(p1._y));
         double sin_x2(sin(p2._x)), cos_x2(cos(p2._x));
         double sin_y2(sin(p2._y)), cos_y2(cos(p2._y));
-        
-        double cross_prod = 
+
+        double cross_prod =
             (cos_y1*cos_x1 * cos_y2*cos_x2) +
             (cos_y1*sin_x1 * cos_y2*sin_x2) +
             (sin_y1        * sin_y2);
 
+        if (cross_prod >= 1 || cross_prod <= -1) {
+            // fun with floats
+            assert( fabs(cross_prod)-1 < 1e-6 );
+            return cross_prod > 0 ? 0 : M_PI;
+        }
+
         return acos(cross_prod);
     }
 
     // note: return is still in radians as that can be multiplied by radius to get arc length
     inline double spheredist_deg( const Point& p1, const Point& p2 ) {
         return spheredist_rad(
-                    Point( p1._x * (M_PI/180), p1._y * (M_PI/180)),
-                    Point( p2._x * (M_PI/180), p2._y * (M_PI/180))
+                   Point( deg2rad(p1._x), deg2rad(p1._y) ),
+                   Point( deg2rad(p2._x), deg2rad(p2._y) )
                );
     }
 
diff --git a/db/geo/haystack.cpp b/db/geo/haystack.cpp
index 4a1d4a7..7f278ca 100644
--- a/db/geo/haystack.cpp
+++ b/db/geo/haystack.cpp
@@ -17,14 +17,14 @@
  */
 
 #include "pch.h"
-#include "../namespace.h"
+#include "../namespace-inl.h"
 #include "../jsobj.h"
 #include "../index.h"
 #include "../../util/unittest.h"
 #include "../commands.h"
 #include "../pdfile.h"
 #include "../btree.h"
-#include "../curop.h"
+#include "../curop-inl.h"
 #include "../matcher.h"
 #include "core.h"
 
@@ -38,29 +38,29 @@
  * should not be used for finding the closest restaurants that are open
  */
 namespace mongo {
-    
+
     string GEOSEARCHNAME = "geoHaystack";
-    
+
     class GeoHaystackSearchHopper {
     public:
         GeoHaystackSearchHopper( const BSONObj& n , double maxDistance , unsigned limit , const string& geoField )
-            : _near( n ) , _maxDistance( maxDistance ) , _limit( limit ) , _geoField(geoField){
-            
+            : _near( n ) , _maxDistance( maxDistance ) , _limit( limit ) , _geoField(geoField) {
+
         }
-        
-        void got( const DiskLoc& loc ){
+
+        void got( const DiskLoc& loc ) {
             Point p( loc.obj().getFieldDotted( _geoField ) );
             if ( _near.distance( p ) > _maxDistance )
                 return;
             _locs.push_back( loc );
         }
 
-        int append( BSONArrayBuilder& b ){
+        int append( BSONArrayBuilder& b ) {
             for ( unsigned i=0; i<_locs.size() && i<_limit; i++ )
                 b.append( _locs[i].obj() );
             return _locs.size();
         }
-        
+
         Point _near;
         double _maxDistance;
         unsigned _limit;
@@ -70,22 +70,22 @@ namespace mongo {
     };
 
     class GeoHaystackSearchIndex : public IndexType {
-        
+
     public:
-            
+
         GeoHaystackSearchIndex( const IndexPlugin* plugin , const IndexSpec* spec )
-            : IndexType( plugin , spec ){
-             
+            : IndexType( plugin , spec ) {
+
             BSONElement e = spec->info["bucketSize"];
             uassert( 13321 , "need bucketSize" , e.isNumber() );
             _bucketSize = e.numberDouble();
-            
+
             BSONObjBuilder orderBuilder;
-            
+
             BSONObjIterator i( spec->keyPattern );
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
-                if ( e.type() == String && GEOSEARCHNAME == e.valuestr() ){
+                if ( e.type() == String && GEOSEARCHNAME == e.valuestr() ) {
                     uassert( 13314 , "can't have 2 geo fields" , _geo.size() == 0 );
                     uassert( 13315 , "2d has to be first in index" , _other.size() == 0 );
                     _geo = e.fieldName();
@@ -95,13 +95,13 @@ namespace mongo {
                 }
                 orderBuilder.append( "" , 1 );
             }
-            
+
             uassert( 13316 , "no geo field specified" , _geo.size() );
             uassert( 13317 , "no other fields specified" , _other.size() );
             uassert( 13326 , "quadrant search can only have 1 other field for now" , _other.size() == 1 );
             _order = orderBuilder.obj();
         }
-        
+
         int hash( const BSONElement& e ) const {
             uassert( 13322 , "not a number" , e.isNumber() );
             return hash( e.numberDouble() );
@@ -126,18 +126,18 @@ namespace mongo {
                 buf.appendNull( "" );
             else
                 buf.appendAs( e , "" );
-            
+
             BSONObj key = buf.obj();
             GEOQUADDEBUG( obj << "\n\t" << root << "\n\t" << key );
             keys.insert( key );
         }
 
         void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const {
-            
+
             BSONElement loc = obj.getFieldDotted( _geo );
             if ( loc.eoo() )
                 return;
-                
+
             uassert( 13323 , "latlng not an array" , loc.isABSONObj() );
             string root;
             {
@@ -146,34 +146,34 @@ namespace mongo {
                 BSONElement y = i.next();
                 root = makeString( hash(x) , hash(y) );
             }
-            
-            
+
+
             assert( _other.size() == 1 );
-            
+
             BSONElementSet all;
             obj.getFieldsDotted( _other[0] , all );
-            
-            if ( all.size() == 0 ){
+
+            if ( all.size() == 0 ) {
                 _add( obj , root , BSONElement() , keys );
             }
             else {
-                for ( BSONElementSet::iterator i=all.begin(); i!=all.end(); ++i ){
+                for ( BSONElementSet::iterator i=all.begin(); i!=all.end(); ++i ) {
                     _add( obj , root , *i , keys );
                 }
             }
-            
+
         }
-            
+
         shared_ptr<Cursor> newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const {
             shared_ptr<Cursor> c;
             assert(0);
             return c;
         }
-            
-        void searchCommand( NamespaceDetails* nsd , int idxNo ,  
-                            const BSONObj& n /*near*/ , double maxDistance , const BSONObj& search , 
-                            BSONObjBuilder& result , unsigned limit ){
-         
+
+        void searchCommand( NamespaceDetails* nsd , int idxNo ,
+                            const BSONObj& n /*near*/ , double maxDistance , const BSONObj& search ,
+                            BSONObjBuilder& result , unsigned limit ) {
+
             Timer t;
 
             log(1) << "SEARCH near:" << n << " maxDistance:" << maxDistance << " search: " << search << endl;
@@ -184,33 +184,33 @@ namespace mongo {
                 y = hash( i.next() );
             }
             int scale = (int)ceil( maxDistance / _bucketSize );
-                
+
             GeoHaystackSearchHopper hopper(n,maxDistance,limit,_geo);
-                
+
             long long btreeMatches = 0;
 
-            for ( int a=-scale; a<=scale; a++ ){
-                for ( int b=-scale; b<=scale; b++ ){
+            for ( int a=-scale; a<=scale; a++ ) {
+                for ( int b=-scale; b<=scale; b++ ) {
 
                     BSONObjBuilder bb;
                     bb.append( "" , makeString( x + a , y + b ) );
-                    for ( unsigned i=0; i<_other.size(); i++ ){
+                    for ( unsigned i=0; i<_other.size(); i++ ) {
                         BSONElement e = search.getFieldDotted( _other[i] );
                         if ( e.eoo() )
                             bb.appendNull( "" );
                         else
                             bb.appendAs( e , "" );
                     }
-                    
+
                     BSONObj key = bb.obj();
-                    
+
                     GEOQUADDEBUG( "KEY: " << key );
-                    
+
                     set<DiskLoc> thisPass;
                     BtreeCursor cursor( nsd , idxNo , *getDetails() , key , key , true , 1 );
-                    while ( cursor.ok() ){
+                    while ( cursor.ok() ) {
                         pair<set<DiskLoc>::iterator, bool> p = thisPass.insert( cursor.currLoc() );
-                        if ( p.second ){
+                        if ( p.second ) {
                             hopper.got( cursor.currLoc() );
                             GEOQUADDEBUG( "\t" << cursor.current() );
                             btreeMatches++;
@@ -221,10 +221,10 @@ namespace mongo {
 
             }
 
-            BSONArrayBuilder arr( result.subarrayStart( "results" ) );                
+            BSONArrayBuilder arr( result.subarrayStart( "results" ) );
             int num = hopper.append( arr );
             arr.done();
-            
+
             {
                 BSONObjBuilder b( result.subobjStart( "stats" ) );
                 b.append( "time" , t.millis() );
@@ -237,20 +237,20 @@ namespace mongo {
         const IndexDetails* getDetails() const {
             return _spec->getDetails();
         }
-            
+
         string _geo;
         vector<string> _other;
-        
+
         BSONObj _order;
 
         double _bucketSize;
     };
-        
+
     class GeoHaystackSearchIndexPlugin : public IndexPlugin {
     public:
-        GeoHaystackSearchIndexPlugin() : IndexPlugin( GEOSEARCHNAME ){
+        GeoHaystackSearchIndexPlugin() : IndexPlugin( GEOSEARCHNAME ) {
         }
-        
+
         virtual IndexType* generate( const IndexSpec* spec ) const {
             return new GeoHaystackSearchIndex( this , spec );
         }
@@ -259,38 +259,38 @@ namespace mongo {
 
 
     class GeoHaystackSearchCommand : public Command {
-        public:
-        GeoHaystackSearchCommand() : Command( "geoSearch" ){}
-        virtual LockType locktype() const { return READ; } 
+    public:
+        GeoHaystackSearchCommand() : Command( "geoSearch" ) {}
+        virtual LockType locktype() const { return READ; }
         bool slaveOk() const { return true; }
         bool slaveOverrideOk() const { return true; }
-        bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){
-                
+        bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+
             string ns = dbname + "." + cmdObj.firstElement().valuestr();
-                
+
             NamespaceDetails * d = nsdetails( ns.c_str() );
-            if ( ! d ){
+            if ( ! d ) {
                 errmsg = "can't find ns";
                 return false;
             }
-            
+
             vector<int> idxs;
             d->findIndexByType( GEOSEARCHNAME , idxs );
-            if ( idxs.size() == 0 ){
+            if ( idxs.size() == 0 ) {
                 errmsg = "no geoSearch index";
                 return false;
             }
-            if ( idxs.size() > 1 ){
+            if ( idxs.size() > 1 ) {
                 errmsg = "more than 1 geosearch index";
                 return false;
             }
-                
+
             int idxNum = idxs[0];
-            
+
             IndexDetails& id = d->idx( idxNum );
             GeoHaystackSearchIndex * si = (GeoHaystackSearchIndex*)id.getSpec().getType();
-            assert( &id == si->getDetails() );         
-                
+            assert( &id == si->getDetails() );
+
             BSONElement n = cmdObj["near"];
             BSONElement maxDistance = cmdObj["maxDistance"];
             BSONElement search = cmdObj["search"];
@@ -298,20 +298,20 @@ namespace mongo {
             uassert( 13318 , "near needs to be an array" , n.isABSONObj() );
             uassert( 13319 , "maxDistance needs a number" , maxDistance.isNumber() );
             uassert( 13320 , "search needs to be an object" , search.type() == Object );
-            
+
             unsigned limit = 50;
             if ( cmdObj["limit"].isNumber() )
                 limit = (unsigned)cmdObj["limit"].numberInt();
 
             si->searchCommand( d , idxNum , n.Obj() , maxDistance.numberDouble() , search.Obj() , result , limit );
-            
+
             return 1;
         }
-        
-        } nameSearchCommand;
+
+    } nameSearchCommand;
+
+
 
 
-        
 
-    
 }
diff --git a/db/helpers/dblogger.h b/db/helpers/dblogger.h
index 572169b..4d6ee6d 100644
--- a/db/helpers/dblogger.h
+++ b/db/helpers/dblogger.h
@@ -18,14 +18,14 @@
 
 #pragma once
 
-namespace mongo { 
+namespace mongo {
 
     /** helper to log (and read log) of a capped collection in the database */
     class DBLogger {
         bool _inited;
     public:
         const string _ns;
-        DBLogger(string ns) : _inited(false), _ns(ns){ }
+        DBLogger(string ns) : _inited(false), _ns(ns) { }
     };
 
 }
diff --git a/db/index.cpp b/db/index.cpp
index 04eca73..c696e27 100644
--- a/db/index.cpp
+++ b/db/index.cpp
@@ -17,15 +17,16 @@
 */
 
 #include "pch.h"
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "index.h"
 #include "btree.h"
 #include "query.h"
 #include "background.h"
+#include "repl/rs.h"
 
 namespace mongo {
 
-    int removeFromSysIndexes(const char *ns, const char *idxName) { 
+    int removeFromSysIndexes(const char *ns, const char *idxName) {
         string system_indexes = cc().database()->name + ".system.indexes";
         BSONObjBuilder b;
         b.append("ns", ns);
@@ -34,24 +35,36 @@ namespace mongo {
         return (int) deleteObjects(system_indexes.c_str(), cond, false, false, true);
     }
 
-    /* this is just an attempt to clean up old orphaned stuff on a delete all indexes 
-       call. repair database is the clean solution, but this gives one a lighter weight 
+    /* this is just an attempt to clean up old orphaned stuff on a delete all indexes
+       call. repair database is the clean solution, but this gives one a lighter weight
        partial option.  see dropIndexes()
     */
-    void assureSysIndexesEmptied(const char *ns, IndexDetails *idIndex) { 
+    void assureSysIndexesEmptied(const char *ns, IndexDetails *idIndex) {
         string system_indexes = cc().database()->name + ".system.indexes";
         BSONObjBuilder b;
         b.append("ns", ns);
-        if( idIndex ) { 
+        if( idIndex ) {
             b.append("name", BSON( "$ne" << idIndex->indexName().c_str() ));
         }
         BSONObj cond = b.done();
         int n = (int) deleteObjects(system_indexes.c_str(), cond, false, false, true);
-        if( n ) { 
+        if( n ) {
             log() << "info: assureSysIndexesEmptied cleaned up " << n << " entries" << endl;
         }
     }
 
+    int IndexDetails::keyPatternOffset( const string& key ) const {
+        BSONObjIterator i( keyPattern() );
+        int n = 0;
+        while ( i.more() ) {
+            BSONElement e = i.next();
+            if ( key == e.fieldName() )
+                return n;
+            n++;
+        }
+        return -1;
+    }
+
     const IndexSpec& IndexDetails::getSpec() const {
         scoped_lock lk(NamespaceDetailsTransient::_qcMutex);
         return NamespaceDetailsTransient::get_inlock( info.obj()["ns"].valuestr() ).getIndexSpec( this );
@@ -62,29 +75,35 @@ namespace mongo {
     */
     void IndexDetails::kill_idx() {
         string ns = indexNamespace(); // e.g. foo.coll.$ts_1
+        try {
 
-        string pns = parentNS(); // note we need a copy, as parentNS() won't work after the drop() below 
-        
-        // clean up parent namespace index cache
-        NamespaceDetailsTransient::get_w( pns.c_str() ).deletedIndex();
+            string pns = parentNS(); // note we need a copy, as parentNS() won't work after the drop() below
 
-        string name = indexName();
+            // clean up parent namespace index cache
+            NamespaceDetailsTransient::get_w( pns.c_str() ).deletedIndex();
+
+            string name = indexName();
+
+            /* important to catch exception here so we can finish cleanup below. */
+            try {
+                dropNS(ns.c_str());
+            }
+            catch(DBException& ) {
+                log(2) << "IndexDetails::kill(): couldn't drop ns " << ns << endl;
+            }
+            head.setInvalid();
+            info.setInvalid();
+
+            // clean up in system.indexes.  we do this last on purpose.
+            int n = removeFromSysIndexes(pns.c_str(), name.c_str());
+            wassert( n == 1 );
 
-        /* important to catch exception here so we can finish cleanup below. */
-        try { 
-            btreeStore->drop(ns.c_str());
         }
-        catch(DBException& ) { 
-            log(2) << "IndexDetails::kill(): couldn't drop ns " << ns << endl;
+        catch ( DBException &e ) {
+            log() << "exception in kill_idx: " << e << ", ns: " << ns << endl;
         }
-        head.setInvalid();
-        info.setInvalid();
-
-        // clean up in system.indexes.  we do this last on purpose.
-        int n = removeFromSysIndexes(pns.c_str(), name.c_str());
-        wassert( n == 1 );
     }
-    
+
     void IndexDetails::getKeysFromObject( const BSONObj& obj, BSONObjSetDefaultOrder& keys) const {
         getSpec().getKeys( obj, keys );
     }
@@ -105,7 +124,7 @@ namespace mongo {
         }
     }
 
-    void getIndexChanges(vector<IndexChanges>& v, NamespaceDetails& d, BSONObj newObj, BSONObj oldObj, bool &changedId) { 
+    void getIndexChanges(vector<IndexChanges>& v, NamespaceDetails& d, BSONObj newObj, BSONObj oldObj, bool &changedId) {
         int z = d.nIndexesBeingBuilt();
         v.resize(z);
         NamespaceDetails::IndexIterator i = d.ii();
@@ -115,7 +134,7 @@ namespace mongo {
             IndexChanges& ch = v[i];
             idx.getKeysFromObject(oldObj, ch.oldkeys);
             idx.getKeysFromObject(newObj, ch.newkeys);
-            if( ch.newkeys.size() > 1 ) 
+            if( ch.newkeys.size() > 1 )
                 d.setIndexIsMultikey(i);
             setDifference(ch.oldkeys, ch.newkeys, ch.removed);
             setDifference(ch.newkeys, ch.oldkeys, ch.added);
@@ -133,12 +152,12 @@ namespace mongo {
         }
     }
 
-    // should be { <something> : <simpletype[1|-1]>, .keyp.. } 
-    static bool validKeyPattern(BSONObj kp) { 
+    // should be { <something> : <simpletype[1|-1]>, .keyp.. }
+    static bool validKeyPattern(BSONObj kp) {
         BSONObjIterator i(kp);
-        while( i.moreWithEOO() ) { 
+        while( i.moreWithEOO() ) {
             BSONElement e = i.next();
-            if( e.type() == Object || e.type() == Array ) 
+            if( e.type() == Object || e.type() == Array )
                 return false;
         }
         return true;
@@ -154,29 +173,23 @@ namespace mongo {
 
        throws DBException
 
-       @return 
-         true if ok to continue.  when false we stop/fail silently (index already exists)
-         sourceNS - source NS we are indexing
-         sourceCollection - its details ptr
+       @param sourceNS - source NS we are indexing
+       @param sourceCollection - its details ptr
+       @return true if ok to continue.  when false we stop/fail silently (index already exists)
     */
-    bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection) {
+    bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection, BSONObj& fixedIndexObject ) {
         sourceCollection = 0;
 
         // logical name of the index.  todo: get rid of the name, we don't need it!
-        const char *name = io.getStringField("name"); 
+        const char *name = io.getStringField("name");
         uassert(12523, "no index name specified", *name);
 
         // the collection for which we are building an index
-        sourceNS = io.getStringField("ns");  
+        sourceNS = io.getStringField("ns");
         uassert(10096, "invalid ns to index", sourceNS.find( '.' ) != string::npos);
-        uassert(10097, "bad table to index name on add index attempt", 
-            cc().database()->name == nsToDatabase(sourceNS.c_str()));
+        uassert(10097, "bad table to index name on add index attempt",
+                cc().database()->name == nsToDatabase(sourceNS.c_str()));
 
-        /* we can't build a new index for the ns if a build is already in progress in the background - 
-           EVEN IF this is a foreground build.
-           */
-        uassert(12588, "cannot add index with a background operation in progress", 
-            !BackgroundOperation::inProgForNs(sourceNS.c_str()));
 
         BSONObj key = io.getObjectField("key");
         uassert(12524, "index key pattern too large", key.objsize() <= 2048);
@@ -187,7 +200,7 @@ namespace mongo {
 
         if ( sourceNS.empty() || key.isEmpty() ) {
             log(2) << "bad add index attempt name:" << (name?name:"") << "\n  ns:" <<
-                sourceNS << "\n  idxobj:" << io.toString() << endl;
+                   sourceNS << "\n  idxobj:" << io.toString() << endl;
             string s = "bad add index attempt " + sourceNS + " key:" + key.toString();
             uasserted(12504, s);
         }
@@ -201,7 +214,7 @@ namespace mongo {
                 return false;
             }
             sourceCollection = nsdetails(sourceNS.c_str());
-            tlog() << "info: creating collection " << sourceNS << " on add index\n";
+            tlog() << "info: creating collection " << sourceNS << " on add index" << endl;
             assert( sourceCollection );
         }
 
@@ -222,24 +235,55 @@ namespace mongo {
             uasserted(12505,s);
         }
 
-        /* this is because we want key patterns like { _id : 1 } and { _id : <someobjid> } to 
+        /* we can't build a new index for the ns if a build is already in progress in the background -
+           EVEN IF this is a foreground build.
+           */
+        uassert(12588, "cannot add index with a background operation in progress",
+                !BackgroundOperation::inProgForNs(sourceNS.c_str()));
+
+        /* this is because we want key patterns like { _id : 1 } and { _id : <someobjid> } to
            all be treated as the same pattern.
         */
-        if ( !god && IndexDetails::isIdIndexPattern(key) ) {
-            ensureHaveIdIndex( sourceNS.c_str() );
-            return false;
+        if ( IndexDetails::isIdIndexPattern(key) ) {
+            if( !god ) {
+                ensureHaveIdIndex( sourceNS.c_str() );
+                return false;
+            }
+        }
+        else {
+            /* is buildIndexes:false set for this replica set member?
+               if so we don't build any indexes except _id
+            */
+            if( theReplSet && !theReplSet->buildIndexes() )
+                return false;
+        }
+
+        string pluginName = IndexPlugin::findPluginName( key );
+        IndexPlugin * plugin = pluginName.size() ? IndexPlugin::get( pluginName ) : 0;
+
+        if ( plugin ) {
+            fixedIndexObject = plugin->adjustIndexSpec( io );
+        }
+        else if ( io["v"].eoo() ) {
+            // add "v" if it doesn't exist
+            // if it does - leave whatever value was there
+            // this is for testing and replication
+            BSONObjBuilder b( io.objsize() + 32 );
+            b.appendElements( io );
+            b.append( "v" , 0 );
+            fixedIndexObject = b.obj();
         }
 
         return true;
     }
 
 
-    void IndexSpec::reset( const IndexDetails * details ){
+    void IndexSpec::reset( const IndexDetails * details ) {
         _details = details;
         reset( details->info );
     }
 
-    void IndexSpec::reset( const DiskLoc& loc ){
+    void IndexSpec::reset( const DiskLoc& loc ) {
         info = loc.obj();
         keyPattern = info["key"].embeddedObjectUserCheck();
         if ( keyPattern.objsize() == 0 ) {
diff --git a/db/index.h b/db/index.h
index a2d7e7e..8578ed3 100644
--- a/db/index.h
+++ b/db/index.h
@@ -25,20 +25,27 @@
 
 namespace mongo {
 
-	/* Details about a particular index. There is one of these effectively for each object in 
-	   system.namespaces (although this also includes the head pointer, which is not in that 
-	   collection).
+    /* Details about a particular index. There is one of these effectively for each object in
+       system.namespaces (although this also includes the head pointer, which is not in that
+       collection).
 
        ** MemoryMapped Record ** (i.e., this is on disk data)
-	 */
+     */
     class IndexDetails {
     public:
-        DiskLoc head; /* btree head disk location */
+        /**
+         * btree head disk location
+         * TODO We should make this variable private, since btree operations
+         * may change its value and we don't want clients to rely on an old
+         * value.  If we create a btree class, we can provide a btree object
+         * to clients instead of 'head'.
+         */
+        DiskLoc head;
 
         /* Location of index info object. Format:
 
              { name:"nameofindex", ns:"parentnsname", key: {keypattobject}
-               [, unique: <bool>, background: <bool>] 
+               [, unique: <bool>, background: <bool>]
              }
 
            This object is in the system.indexes collection.  Note that since we
@@ -70,6 +77,13 @@ namespace mongo {
             return info.obj().getObjectField("key");
         }
 
+        /**
+         * @return offset into keyPattern for key
+                   -1 if doesn't exist
+         */
+        int keyPatternOffset( const string& key ) const;
+        bool inKeyPattern( const string& key ) const { return keyPatternOffset( key ) >= 0; }
+
         /* true if the specified key is in the index */
         bool hasKey(const BSONObj& key);
         bool wouldCreateDup(const BSONObj& key, DiskLoc self);
@@ -96,11 +110,11 @@ namespace mongo {
             BSONObjIterator i(pattern);
             BSONElement e = i.next();
             if( strcmp(e.fieldName(), "_id") != 0 ) return false;
-            return i.next().eoo();            
+            return i.next().eoo();
         }
-        
+
         /* returns true if this is the _id index. */
-        bool isIdIndex() const { 
+        bool isIdIndex() const {
             return isIdIndexPattern( keyPattern() );
         }
 
@@ -112,11 +126,11 @@ namespace mongo {
             return io.getStringField("ns");
         }
 
-        bool unique() const { 
+        bool unique() const {
             BSONObj io = info.obj();
-            return io["unique"].trueValue() || 
-                /* temp: can we juse make unique:true always be there for _id and get rid of this? */
-                isIdIndex();
+            return io["unique"].trueValue() ||
+                   /* temp: can we juse make unique:true always be there for _id and get rid of this? */
+                   isIdIndex();
         }
 
         /* if set, when building index, if any duplicates, drop the duplicating object */
@@ -128,7 +142,7 @@ namespace mongo {
            (system.indexes or system.namespaces) -- only NamespaceIndex.
         */
         void kill_idx();
-        
+
         const IndexSpec& getSpec() const;
 
         string toString() const {
@@ -136,13 +150,13 @@ namespace mongo {
         }
     };
 
-    struct IndexChanges/*on an update*/ {
+    struct IndexChanges { /*on an update*/
         BSONObjSetDefaultOrder oldkeys;
         BSONObjSetDefaultOrder newkeys;
         vector<BSONObj*> removed; // these keys were removed as part of the change
         vector<BSONObj*> added;   // these keys were added as part of the change
 
-        /** @curObjLoc - the object we want to add's location.  if it is already in the 
+        /** @curObjLoc - the object we want to add's location.  if it is already in the
                          index, that is allowed here (for bg indexing case).
         */
         void dupCheck(IndexDetails& idx, DiskLoc curObjLoc) {
diff --git a/db/indexkey.cpp b/db/indexkey.cpp
index 70dd770..34f30fa 100644
--- a/db/indexkey.cpp
+++ b/db/indexkey.cpp
@@ -17,7 +17,7 @@
 */
 
 #include "pch.h"
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "index.h"
 #include "btree.h"
 #include "query.h"
@@ -28,98 +28,136 @@ namespace mongo {
     map<string,IndexPlugin*> * IndexPlugin::_plugins;
 
     IndexType::IndexType( const IndexPlugin * plugin , const IndexSpec * spec )
-        : _plugin( plugin ) , _spec( spec ){
-        
+        : _plugin( plugin ) , _spec( spec ) {
+
     }
 
-    IndexType::~IndexType(){
+    IndexType::~IndexType() {
     }
-    
-    const BSONObj& IndexType::keyPattern() const { 
-        return _spec->keyPattern; 
+
+    const BSONObj& IndexType::keyPattern() const {
+        return _spec->keyPattern;
     }
 
     IndexPlugin::IndexPlugin( const string& name )
-        : _name( name ){
+        : _name( name ) {
         if ( ! _plugins )
             _plugins = new map<string,IndexPlugin*>();
         (*_plugins)[name] = this;
     }
-    
-    int IndexType::compare( const BSONObj& l , const BSONObj& r ) const {
-        return l.woCompare( r , _spec->keyPattern );
-    }
 
-    void IndexSpec::_init(){
-        assert( keyPattern.objsize() );
-        
+    string IndexPlugin::findPluginName( const BSONObj& keyPattern ) {
         string pluginName = "";
 
         BSONObjIterator i( keyPattern );
-        BSONObjBuilder nullKeyB;
+
         while( i.more() ) {
             BSONElement e = i.next();
-            _fieldNames.push_back( e.fieldName() );
-            _fixed.push_back( BSONElement() );
-            nullKeyB.appendNull( "" );
-            if ( e.type() == String ){
-                uassert( 13007 , "can only have 1 index plugin / bad index key pattern" , pluginName.size() == 0 );
-                pluginName = e.valuestr();
-            }
-                
+            if ( e.type() != String )
+                continue;
+
+            uassert( 13007 , "can only have 1 index plugin / bad index key pattern" , pluginName.size() == 0 || pluginName == e.String() );
+            pluginName = e.String();
         }
-        
-        _nullKey = nullKeyB.obj();
-
-        BSONObjBuilder b;
-        b.appendNull( "" );
-        _nullObj = b.obj();
-        _nullElt = _nullObj.firstElement();
-        
-        if ( pluginName.size() ){
-            IndexPlugin * plugin = IndexPlugin::get( pluginName );
-            if ( ! plugin ){
-                log() << "warning: can't find plugin [" << pluginName << "]" << endl;
+
+        return pluginName;
+    }
+
+    int IndexType::compare( const BSONObj& l , const BSONObj& r ) const {
+        return l.woCompare( r , _spec->keyPattern );
+    }
+
+    void IndexSpec::_init() {
+        assert( keyPattern.objsize() );
+
+        // some basics
+        _nFields = keyPattern.nFields();
+        _sparse = info["sparse"].trueValue();
+        uassert( 13529 , "sparse only works for single field keys" , ! _sparse || _nFields );
+
+
+        {
+            // build _nullKey
+
+            BSONObjBuilder b;
+            BSONObjIterator i( keyPattern );
+
+            while( i.more() ) {
+                BSONElement e = i.next();
+                _fieldNames.push_back( e.fieldName() );
+                _fixed.push_back( BSONElement() );
+                b.appendNull( "" );
             }
-            else {
-                _indexType.reset( plugin->generate( this ) );
+            _nullKey = b.obj();
+        }
+
+        {
+            // _nullElt
+            BSONObjBuilder b;
+            b.appendNull( "" );
+            _nullObj = b.obj();
+            _nullElt = _nullObj.firstElement();
+        }
+
+        {
+            // handle plugins
+            string pluginName = IndexPlugin::findPluginName( keyPattern );
+            if ( pluginName.size() ) {
+                IndexPlugin * plugin = IndexPlugin::get( pluginName );
+                if ( ! plugin ) {
+                    log() << "warning: can't find plugin [" << pluginName << "]" << endl;
+                }
+                else {
+                    _indexType.reset( plugin->generate( this ) );
+                }
             }
         }
+
         _finishedInit = true;
     }
 
-    
+
     void IndexSpec::getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const {
-        if ( _indexType.get() ){
+        if ( _indexType.get() ) {
             _indexType->getKeys( obj , keys );
             return;
         }
         vector<const char*> fieldNames( _fieldNames );
         vector<BSONElement> fixed( _fixed );
         _getKeys( fieldNames , fixed , obj, keys );
-        if ( keys.empty() )
+        if ( keys.empty() && ! _sparse )
             keys.insert( _nullKey );
     }
 
     void IndexSpec::_getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const {
         BSONElement arrElt;
         unsigned arrIdx = ~0;
+        int numNotFound = 0;
+
         for( unsigned i = 0; i < fieldNames.size(); ++i ) {
             if ( *fieldNames[ i ] == '\0' )
                 continue;
+
             BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] );
-            if ( e.eoo() )
+
+            if ( e.eoo() ) {
                 e = _nullElt; // no matching field
+                numNotFound++;
+            }
+
             if ( e.type() != Array )
                 fieldNames[ i ] = ""; // no matching field or non-array match
+
             if ( *fieldNames[ i ] == '\0' )
                 fixed[ i ] = e; // no need for further object expansion (though array expansion still possible)
+
             if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here
                 arrIdx = i;
                 arrElt = e;
             }
+
             // enforce single array path here
-            if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ){
+            if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) {
                 stringstream ss;
                 ss << "cannot index parallel arrays [" << e.fieldName() << "] [" << arrElt.fieldName() << "]";
                 uasserted( 10088 ,  ss.str() );
@@ -127,13 +165,19 @@ namespace mongo {
         }
 
         bool allFound = true; // have we found elements for all field names in the key spec?
-        for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ){
-            if ( **i != '\0' ){
+        for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) {
+            if ( **i != '\0' ) {
                 allFound = false;
                 break;
             }
         }
 
+        if ( _sparse && numNotFound == _nFields ) {
+            // we didn't find any fields
+            // so we're not going to index this document
+            return;
+        }
+
         bool insertArrayNull = false;
 
         if ( allFound ) {
@@ -143,11 +187,11 @@ namespace mongo {
                 for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i )
                     b.appendAs( *i, "" );
                 keys.insert( b.obj() );
-            } 
+            }
             else {
                 // terminal array element to expand, so generate all keys
                 BSONObjIterator i( arrElt.embeddedObject() );
-                if ( i.more() ){
+                if ( i.more() ) {
                     while( i.more() ) {
                         BSONObjBuilder b(_sizeTracker);
                         for( unsigned j = 0; j < fixed.size(); ++j ) {
@@ -159,18 +203,19 @@ namespace mongo {
                         keys.insert( b.obj() );
                     }
                 }
-                else if ( fixed.size() > 1 ){
+                else if ( fixed.size() > 1 ) {
                     insertArrayNull = true;
                 }
             }
-        } else {
+        }
+        else {
             // nonterminal array element to expand, so recurse
             assert( !arrElt.eoo() );
             BSONObjIterator i( arrElt.embeddedObject() );
-            if ( i.more() ){
+            if ( i.more() ) {
                 while( i.more() ) {
                     BSONElement e = i.next();
-                    if ( e.type() == Object ){
+                    if ( e.type() == Object ) {
                         _getKeys( fieldNames, fixed, e.embeddedObject(), keys );
                     }
                 }
@@ -179,12 +224,12 @@ namespace mongo {
                 insertArrayNull = true;
             }
         }
-        
+
         if ( insertArrayNull ) {
             // x : [] - need to insert undefined
             BSONObjBuilder b(_sizeTracker);
             for( unsigned j = 0; j < fixed.size(); ++j ) {
-                if ( j == arrIdx ){
+                if ( j == arrIdx ) {
                     b.appendUndefined( "" );
                 }
                 else {
@@ -199,12 +244,12 @@ namespace mongo {
         }
     }
 
-    bool anyElementNamesMatch( const BSONObj& a , const BSONObj& b ){
+    bool anyElementNamesMatch( const BSONObj& a , const BSONObj& b ) {
         BSONObjIterator x(a);
-        while ( x.more() ){
+        while ( x.more() ) {
             BSONElement e = x.next();
             BSONObjIterator y(b);
-            while ( y.more() ){
+            while ( y.more() ) {
                 BSONElement f = y.next();
                 FieldCompareResult res = compareDottedFieldNames( e.fieldName() , f.fieldName() );
                 if ( res == SAME || res == LEFT_SUBFIELD || res == RIGHT_SUBFIELD )
@@ -213,13 +258,13 @@ namespace mongo {
         }
         return false;
     }
-        
+
     IndexSuitability IndexSpec::suitability( const BSONObj& query , const BSONObj& order ) const {
         if ( _indexType.get() )
             return _indexType->suitability( query , order );
         return _suitability( query , order );
     }
-    
+
     IndexSuitability IndexSpec::_suitability( const BSONObj& query , const BSONObj& order ) const {
         // TODO: optimize
         if ( anyElementNamesMatch( keyPattern , query ) == 0 && anyElementNamesMatch( keyPattern , order ) == 0 )
diff --git a/db/indexkey.h b/db/indexkey.h
index e73d9de..be73171 100644
--- a/db/indexkey.h
+++ b/db/indexkey.h
@@ -46,16 +46,16 @@ namespace mongo {
 
         virtual void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const = 0;
         virtual shared_ptr<Cursor> newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const = 0;
-        
+
         /** optional op : changes query to match what's in the index */
         virtual BSONObj fixKey( const BSONObj& in ) { return in; }
 
         /** optional op : compare 2 objects with regards to this index */
-        virtual int compare( const BSONObj& l , const BSONObj& r ) const;        
+        virtual int compare( const BSONObj& l , const BSONObj& r ) const;
 
         /** @return plugin */
         const IndexPlugin * getPlugin() const { return _plugin; }
-        
+
         const BSONObj& keyPattern() const;
 
         virtual IndexSuitability suitability( const BSONObj& query , const BSONObj& order ) const ;
@@ -66,7 +66,7 @@ namespace mongo {
         const IndexPlugin * _plugin;
         const IndexSpec * _spec;
     };
-    
+
     /**
      * this represents a plugin
      * a plugin could be something like full text search, sparse index, etc...
@@ -76,11 +76,21 @@ namespace mongo {
     class IndexPlugin : boost::noncopyable {
     public:
         IndexPlugin( const string& name );
-        virtual ~IndexPlugin(){}
-        
+        virtual ~IndexPlugin() {}
+
         virtual IndexType* generate( const IndexSpec * spec ) const = 0;
 
-        static IndexPlugin* get( const string& name ){
+        string getName() const { return _name; }
+
+        /**
+         * @return new keyPattern
+         * if nothing changes, should return keyPattern
+         */
+        virtual BSONObj adjustIndexSpec( const BSONObj& spec ) const { return spec; }
+
+        // ------- static below -------
+
+        static IndexPlugin* get( const string& name ) {
             if ( ! _plugins )
                 return 0;
             map<string,IndexPlugin*>::iterator i = _plugins->find( name );
@@ -89,7 +99,12 @@ namespace mongo {
             return i->second;
         }
 
-        string getName() const { return _name; }
+        /**
+         * @param keyPattern { x : "fts" }
+         * @return "" or the name
+         */
+        static string findPluginName( const BSONObj& keyPattern );
+
     private:
         string _name;
         static map<string,IndexPlugin*> * _plugins;
@@ -102,31 +117,31 @@ namespace mongo {
     public:
         BSONObj keyPattern; // e.g., { name : 1 }
         BSONObj info; // this is the same as IndexDetails::info.obj()
-        
+
         IndexSpec()
-            : _details(0) , _finishedInit(false){
+            : _details(0) , _finishedInit(false) {
         }
 
         IndexSpec( const BSONObj& k , const BSONObj& m = BSONObj() )
-            : keyPattern(k) , info(m) , _details(0) , _finishedInit(false){
+            : keyPattern(k) , info(m) , _details(0) , _finishedInit(false) {
             _init();
         }
-        
+
         /**
            this is a DiscLoc of an IndexDetails info
-           should have a key field 
+           should have a key field
          */
-        IndexSpec( const DiskLoc& loc ){
+        IndexSpec( const DiskLoc& loc ) {
             reset( loc );
         }
-        
+
         void reset( const DiskLoc& loc );
         void reset( const IndexDetails * details );
-        
+
         void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const;
 
         BSONElement missingField() const { return _nullElt; }
-        
+
         string getTypeName() const {
             if ( _indexType.get() )
                 return _indexType->getPlugin()->getName();
@@ -148,20 +163,24 @@ namespace mongo {
         IndexSuitability _suitability( const BSONObj& query , const BSONObj& order ) const ;
 
         void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const;
-        
+
         BSONSizeTracker _sizeTracker;
 
         vector<const char*> _fieldNames;
         vector<BSONElement> _fixed;
-        BSONObj _nullKey;
-        
-        BSONObj _nullObj;
-        BSONElement _nullElt;
-        
+
+        BSONObj _nullKey; // a full key with all fields null
+
+        BSONObj _nullObj; // only used for _nullElt
+        BSONElement _nullElt; // jstNull
+
+        int _nFields; // number of fields in the index
+        bool _sparse; // if the index is sparse
+
         shared_ptr<IndexType> _indexType;
 
         const IndexDetails * _details;
-        
+
         void _init();
 
     public:
diff --git a/db/instance.cpp b/db/instance.cpp
index a6873f2..3b668ee 100644
--- a/db/instance.cpp
+++ b/db/instance.cpp
@@ -27,7 +27,6 @@
 #include "lasterror.h"
 #include "security.h"
 #include "json.h"
-//#include "reccache.h"
 #include "replpair.h"
 #include "../s/d_logic.h"
 #include "../util/file_allocator.h"
@@ -38,6 +37,8 @@
 #endif
 #include "stats/counters.h"
 #include "background.h"
+#include "dur_journal.h"
+#include "dur_recover.h"
 
 namespace mongo {
 
@@ -61,29 +62,30 @@ namespace mongo {
 
     bool useCursors = true;
     bool useHints = true;
-    
-    void flushOpLog( stringstream &ss ) {
+
+    void flushDiagLog() {
         if( _diaglog.f && _diaglog.f->is_open() ) {
-            ss << "flushing op log and files\n";
+            log() << "flushing diag log" << endl;
             _diaglog.flush();
         }
     }
 
-    int ctr = 0;
-
     KillCurrentOp killCurrentOp;
-    
+
     int lockFile = 0;
+#ifdef WIN32
+    HANDLE lockFileHandle;
+#endif
 
     // see FSyncCommand:
-    unsigned lockedForWriting; 
+    unsigned lockedForWriting;
     mongo::mutex lockedForWritingMutex("lockedForWriting");
     bool unlockRequested = false;
 
     void inProgCmd( Message &m, DbResponse &dbresponse ) {
         BSONObjBuilder b;
 
-        if( ! cc().isAdmin() ){
+        if( ! cc().isAdmin() ) {
             BSONObjBuilder b;
             b.append("err", "unauthorized");
         }
@@ -95,12 +97,13 @@ namespace mongo {
             {
                 Client& me = cc();
                 scoped_lock bl(Client::clientsMutex);
-                for( set<Client*>::iterator i = Client::clients.begin(); i != Client::clients.end(); i++ ) { 
+                for( set<Client*>::iterator i = Client::clients.begin(); i != Client::clients.end(); i++ ) {
                     Client *c = *i;
                     assert( c );
-                    if ( c == &me )
-                        continue;
                     CurOp* co = c->curop();
+                    if ( c == &me && !co ) {
+                        continue;
+                    }
                     assert( co );
                     if( all || co->active() )
                         vals.push_back( co->infoNoauth() );
@@ -113,26 +116,26 @@ namespace mongo {
                 b.append("info", "use db.$cmd.sys.unlock.findOne() to terminate the fsync write/snapshot lock");
             }
         }
-        
+
         replyToQuery(0, m, dbresponse, b.obj());
     }
-    
+
     void killOp( Message &m, DbResponse &dbresponse ) {
         BSONObj obj;
-        if( ! cc().isAdmin() ){
+        if( ! cc().isAdmin() ) {
             obj = fromjson("{\"err\":\"unauthorized\"}");
         }
-        /*else if( !dbMutexInfo.isLocked() ) 
+        /*else if( !dbMutexInfo.isLocked() )
             obj = fromjson("{\"info\":\"no op in progress/not locked\"}");
             */
         else {
             DbMessage d(m);
             QueryMessage q(d);
             BSONElement e = q.query.getField("op");
-            if( !e.isNumber() ) { 
+            if( !e.isNumber() ) {
                 obj = fromjson("{\"err\":\"no op number field specified?\"}");
             }
-            else { 
+            else {
                 log() << "going to kill op: " << e << endl;
                 obj = fromjson("{\"info\":\"attempting to kill op\"}");
                 killCurrentOp.kill( (unsigned) e.number() );
@@ -143,23 +146,23 @@ namespace mongo {
 
     void unlockFsync(const char *ns, Message& m, DbResponse &dbresponse) {
         BSONObj obj;
-        if( ! cc().isAdmin() || strncmp(ns, "admin.", 6) != 0 ) { 
+        if( ! cc().isAdmin() || strncmp(ns, "admin.", 6) != 0 ) {
             obj = fromjson("{\"err\":\"unauthorized\"}");
         }
         else {
-            if( lockedForWriting ) { 
-				log() << "command: unlock requested" << endl;
+            if( lockedForWriting ) {
+                log() << "command: unlock requested" << endl;
                 obj = fromjson("{ok:1,\"info\":\"unlock requested\"}");
                 unlockRequested = true;
             }
-            else { 
+            else {
                 obj = fromjson("{ok:0,\"errmsg\":\"not locked\"}");
             }
         }
         replyToQuery(0, m, dbresponse, obj);
     }
 
-    static bool receivedQuery(Client& c, DbResponse& dbresponse, Message& m ){
+    static bool receivedQuery(Client& c, DbResponse& dbresponse, Message& m ) {
         bool ok = true;
         MSGID responseTo = m.header()->id;
 
@@ -168,7 +171,7 @@ namespace mongo {
         auto_ptr< Message > resp( new Message() );
 
         CurOp& op = *(c.curop());
-        
+
         try {
             dbresponse.exhaust = runQuery(m, q, op, *resp);
             assert( !resp->empty() );
@@ -176,9 +179,9 @@ namespace mongo {
         catch ( AssertionException& e ) {
             ok = false;
             op.debug().str << " exception ";
-            LOGSOME { 
+            LOGSOME {
                 log() << "assertion " << e.toString() << " ns:" << q.ns << " query:" <<
-                    (q.query.valid() ? q.query.toString() : "query object is corrupt") << endl;
+                (q.query.valid() ? q.query.toString() : "query object is corrupt") << endl;
                 if( q.ntoskip || q.ntoreturn )
                     log() << " ntoskip:" << q.ntoskip << " ntoreturn:" << q.ntoreturn << endl;
             }
@@ -207,18 +210,18 @@ namespace mongo {
             resp->setData( msgdata, true );
         }
 
-        if ( op.shouldDBProfile( 0 ) ){
+        if ( op.shouldDBProfile( 0 ) ) {
             op.debug().str << " bytes:" << resp->header()->dataLen();
         }
-        
+
         dbresponse.response = resp.release();
         dbresponse.responseTo = responseTo;
-        
+
         return ok;
     }
 
     // Returns false when request includes 'end'
-    bool assembleResponse( Message &m, DbResponse &dbresponse, const SockAddr &client ) {
+    void assembleResponse( Message &m, DbResponse &dbresponse, const SockAddr &client ) {
 
         // before we lock...
         int op = m.operation();
@@ -228,18 +231,18 @@ namespace mongo {
             if( strstr(ns, ".$cmd") ) {
                 isCommand = true;
                 opwrite(m);
-                if( strstr(ns, ".$cmd.sys.") ) { 
+                if( strstr(ns, ".$cmd.sys.") ) {
                     if( strstr(ns, "$cmd.sys.inprog") ) {
                         inProgCmd(m, dbresponse);
-                        return true;
+                        return;
                     }
-                    if( strstr(ns, "$cmd.sys.killop") ) { 
+                    if( strstr(ns, "$cmd.sys.killop") ) {
                         killOp(m, dbresponse);
-                        return true;
+                        return;
                     }
-                    if( strstr(ns, "$cmd.sys.unlock") ) { 
+                    if( strstr(ns, "$cmd.sys.unlock") ) {
                         unlockFsync(ns, m, dbresponse);
-                        return true;
+                        return;
                     }
                 }
             }
@@ -253,30 +256,30 @@ namespace mongo {
         else {
             opwrite(m);
         }
-        
+
         globalOpCounters.gotOp( op , isCommand );
-        
+
         Client& c = cc();
-        
+
         auto_ptr<CurOp> nestedOp;
         CurOp* currentOpP = c.curop();
-        if ( currentOpP->active() ){
+        if ( currentOpP->active() ) {
             nestedOp.reset( new CurOp( &c , currentOpP ) );
             currentOpP = nestedOp.get();
         }
         CurOp& currentOp = *currentOpP;
         currentOp.reset(client,op);
-        
+
         OpDebug& debug = currentOp.debug();
         StringBuilder& ss = debug.str;
         ss << opToString( op ) << " ";
 
         int logThreshold = cmdLine.slowMS;
         bool log = logLevel >= 1;
-        
+
         if ( op == dbQuery ) {
             if ( handlePossibleShardedMessage( m , &dbresponse ) )
-                return true;
+                return;
             receivedQuery(c , dbresponse, m );
         }
         else if ( op == dbGetMore ) {
@@ -289,7 +292,7 @@ namespace mongo {
             int len = strlen(p);
             if ( len > 400 )
                 out() << curTimeMillis() % 10000 <<
-                    " long msg received, len:" << len << endl;
+                      " long msg received, len:" << len << endl;
 
             Message *resp = new Message();
             if ( strcmp( "end" , p ) == 0 )
@@ -304,7 +307,7 @@ namespace mongo {
             const char *ns = m.singleData()->_data + 4;
             char cl[256];
             nsToDatabase(ns, cl);
-            if( ! c.getAuthenticationInfo()->isAuthorized(cl) ) { 
+            if( ! c.getAuthenticationInfo()->isAuthorized(cl) ) {
                 uassert_nothrow("unauthorized");
             }
             else {
@@ -330,37 +333,40 @@ namespace mongo {
                         log = true;
                     }
                 }
+                catch ( UserException& ue ) {
+                    tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << ue.toString() << endl;
+                    ss << " exception " << ue.toString();
+                }
                 catch ( AssertionException& e ) {
-                    static int n;
-                    tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing" << endl;
-                    ss << " exception " + e.toString();
-                    log = ++n < 10;
+                    tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << e.toString() << endl;
+                    ss << " exception " << e.toString();
+                    log = true;
                 }
             }
         }
         currentOp.ensureStarted();
         currentOp.done();
         int ms = currentOp.totalTimeMillis();
-        
-        log = log || (logLevel >= 2 && ++ctr % 512 == 0);
-        //DEV log = true; 
+
+        //DEV log = true;
         if ( log || ms > logThreshold ) {
             if( logLevel < 3 && op == dbGetMore && strstr(ns, ".oplog.") && ms < 3000 && !log ) {
                 /* it's normal for getMore on the oplog to be slow because of use of awaitdata flag. */
-            } else {
+            }
+            else {
                 ss << ' ' << ms << "ms";
                 mongo::tlog() << ss.str() << endl;
             }
         }
-        
-        if ( currentOp.shouldDBProfile( ms ) ){
+
+        if ( currentOp.shouldDBProfile( ms ) ) {
             // performance profiling is on
-            if ( dbMutex.getState() < 0 ){
+            if ( dbMutex.getState() < 0 ) {
                 mongo::log(1) << "note: not profiling because recursive read lock" << endl;
             }
             else {
-                mongolock lk(true);
-                if ( dbHolder.isLoaded( nsToDatabase( currentOp.getNS() ) , dbpath ) ){
+                writelock lk;
+                if ( dbHolder.isLoaded( nsToDatabase( currentOp.getNS() ) , dbpath ) ) {
                     Client::Context c( currentOp.getNS() );
                     profile(ss.str().c_str(), ms);
                 }
@@ -370,37 +376,44 @@ namespace mongo {
             }
         }
 
-        return true;
     } /* assembleResponse() */
 
-    void killCursors(int n, long long *ids);
     void receivedKillCursors(Message& m) {
         int *x = (int *) m.singleData()->_data;
         x++; // reserved
         int n = *x++;
+
+        assert( m.dataSize() == 8 + ( 8 * n ) );
+
         uassert( 13004 , "sent 0 cursors to kill" , n >= 1 );
         if ( n > 2000 ) {
             log( n < 30000 ? LL_WARNING : LL_ERROR ) << "receivedKillCursors, n=" << n << endl;
             assert( n < 30000 );
         }
-        killCursors(n, (long long *) x);
+
+        int found = ClientCursor::erase(n, (long long *) x);
+
+        if ( logLevel > 0 || found != n ) {
+            log( found == n ) << "killcursors: found " << found << " of " << n << endl;
+        }
+
     }
 
     /* db - database name
        path - db directory
     */
-    void closeDatabase( const char *db, const string& path ) {
+    /*static*/ void Database::closeDatabase( const char *db, const string& path ) {
         assertInWriteLock();
-        
+
         Client::Context * ctx = cc().getContext();
         assert( ctx );
         assert( ctx->inDB( db , path ) );
         Database *database = ctx->db();
         assert( database->name == db );
-        
-        oplogCheckCloseDatabase( database );
 
-        if( BackgroundOperation::inProgForDb(db) ) { 
+        oplogCheckCloseDatabase( database ); // oplog caches some things, dirty its caches
+
+        if( BackgroundOperation::inProgForDb(db) ) {
             log() << "warning: bg op in prog during close db? " << db << endl;
         }
 
@@ -412,8 +425,8 @@ namespace mongo {
         NamespaceDetailsTransient::clearForPrefix( prefix.c_str() );
 
         dbHolder.erase( db, path );
-        delete database; // closes files
         ctx->clear();
+        delete database; // closes files
     }
 
     void receivedUpdate(Message& m, CurOp& op) {
@@ -428,7 +441,7 @@ namespace mongo {
         assert( d.moreJSObjs() );
         assert( query.objsize() < m.header()->dataLen() );
         BSONObj toupdate = d.nextJsObj();
-        uassert( 10055 , "update object too large", toupdate.objsize() <= MaxBSONObjectSize);
+        uassert( 10055 , "update object too large", toupdate.objsize() <= BSONObjMaxUserSize);
         assert( toupdate.objsize() < m.header()->dataLen() );
         assert( query.objsize() + toupdate.objsize() < m.header()->dataLen() );
         bool upsert = flags & UpdateOption_Upsert;
@@ -436,15 +449,15 @@ namespace mongo {
         bool broadcast = flags & UpdateOption_Broadcast;
         {
             string s = query.toString();
-            /* todo: we shouldn't do all this ss stuff when we don't need it, it will slow us down. 
-               instead, let's just story the query BSON in the debug object, and it can toString() 
+            /* todo: we shouldn't do all this ss stuff when we don't need it, it will slow us down.
+               instead, let's just story the query BSON in the debug object, and it can toString()
                lazily
             */
             op.debug().str << " query: " << s;
             op.setQuery(query);
-        }        
+        }
 
-        mongolock lk(1);
+        writelock lk;
 
         // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit
         if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) )
@@ -461,6 +474,7 @@ namespace mongo {
         const char *ns = d.getns();
         assert(*ns);
         uassert( 10056 ,  "not master", isMasterNs( ns ) );
+        op.debug().str << ns << ' ';
         int flags = d.pullInt();
         bool justOne = flags & RemoveOption_JustOne;
         bool broadcast = flags & RemoveOption_Broadcast;
@@ -470,63 +484,63 @@ namespace mongo {
             string s = pattern.toString();
             op.debug().str << " query: " << s;
             op.setQuery(pattern);
-        }        
+        }
 
         writelock lk(ns);
         // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit
         if ( ! broadcast & handlePossibleShardedMessage( m , 0 ) )
             return;
-        
+
         Client::Context ctx(ns);
-        
+
         long long n = deleteObjects(ns, pattern, justOne, true);
         lastError.getSafe()->recordDelete( n );
     }
-    
+
     QueryResult* emptyMoreResult(long long);
 
     bool receivedGetMore(DbResponse& dbresponse, Message& m, CurOp& curop ) {
         StringBuilder& ss = curop.debug().str;
         bool ok = true;
-        
+
         DbMessage d(m);
 
         const char *ns = d.getns();
         int ntoreturn = d.pullInt();
         long long cursorid = d.pullInt64();
-        
+
         ss << ns << " cid:" << cursorid;
-        if( ntoreturn ) 
+        if( ntoreturn )
             ss << " ntoreturn:" << ntoreturn;
 
-		time_t start = 0;
-        int pass = 0;        
+        time_t start = 0;
+        int pass = 0;
         bool exhaust = false;
         QueryResult* msgdata;
         while( 1 ) {
             try {
-                mongolock lk(false);
+                readlock lk;
                 Client::Context ctx(ns);
                 msgdata = processGetMore(ns, ntoreturn, cursorid, curop, pass, exhaust);
             }
-            catch ( GetMoreWaitException& ) { 
+            catch ( GetMoreWaitException& ) {
                 exhaust = false;
                 massert(13073, "shutting down", !inShutdown() );
-				if( pass == 0 ) { 
-  				    start = time(0);
-				}
-				else { 
-				  if( time(0) - start >= 4 ) {
-					// after about 4 seconds, return.  this is a sanity check.  pass stops at 1000 normally 
-					// for DEV this helps and also if sleep is highly inaccurate on a platform.  we want to 
-					// return occasionally so slave can checkpoint.
-					pass = 10000;
-				  }
-				}
+                if( pass == 0 ) {
+                    start = time(0);
+                }
+                else {
+                    if( time(0) - start >= 4 ) {
+                        // after about 4 seconds, return.  this is a sanity check.  pass stops at 1000 normally
+                        // for DEV this helps and also if sleep is highly inaccurate on a platform.  we want to
+                        // return occasionally so slave can checkpoint.
+                        pass = 10000;
+                    }
+                }
                 pass++;
-                DEV 
-                    sleepmillis(20);
-                else 
+                DEV
+                sleepmillis(20);
+                else
                     sleepmillis(2);
                 continue;
             }
@@ -545,8 +559,8 @@ namespace mongo {
         ss << " nreturned:" << msgdata->nReturned;
         dbresponse.response = resp;
         dbresponse.responseTo = m.header()->id;
-        if( exhaust ) { 
-            ss << " exhaust "; 
+        if( exhaust ) {
+            ss << " exhaust ";
             dbresponse.exhaust = ns;
         }
         return ok;
@@ -554,8 +568,8 @@ namespace mongo {
 
     void receivedInsert(Message& m, CurOp& op) {
         DbMessage d(m);
-		const char *ns = d.getns();
-		assert(*ns);
+        const char *ns = d.getns();
+        assert(*ns);
         uassert( 10058 ,  "not master", isMasterNs( ns ) );
         op.debug().str << ns;
 
@@ -564,31 +578,32 @@ namespace mongo {
         if ( handlePossibleShardedMessage( m , 0 ) )
             return;
 
-        Client::Context ctx(ns);		
+        Client::Context ctx(ns);
+        int n = 0;
         while ( d.moreJSObjs() ) {
             BSONObj js = d.nextJsObj();
-            uassert( 10059 , "object to insert too large", js.objsize() <= MaxBSONObjectSize);
+            uassert( 10059 , "object to insert too large", js.objsize() <= BSONObjMaxUserSize);
+
+            {
+                // check no $ modifiers
+                BSONObjIterator i( js );
+                while ( i.more() ) {
+                    BSONElement e = i.next();
+                    uassert( 13511 , "object to insert can't have $ modifiers" , e.fieldName()[0] != '$' );
+                }
+            }
+
             theDataFileMgr.insertWithObjMod(ns, js, false);
             logOp("i", ns, js);
-            globalOpCounters.gotInsert();
+
+            if( ++n % 4 == 0 ) {
+                // if we are inserting quite a few, we may need to commit along the way
+                getDur().commitIfNeeded();
+            }
         }
+        globalOpCounters.incInsertInWriteLock(n);
     }
 
-    class JniMessagingPort : public AbstractMessagingPort {
-    public:
-        JniMessagingPort(Message& _container) : container(_container) { }
-        void reply(Message& received, Message& response, MSGID) {
-            container = response;
-        }
-        void reply(Message& received, Message& response) {
-            container = response;
-        }
-        unsigned remotePort(){
-            return 1;
-        }
-        Message & container;
-    };
-    
     void getDatabaseNames( vector< string > &names , const string& usePath ) {
         boost::filesystem::path path( usePath );
         for ( boost::filesystem::directory_iterator i( path );
@@ -599,7 +614,8 @@ namespace mongo {
                 p /= ( dbName + ".ns" );
                 if ( MMF::exists( p ) )
                     names.push_back( dbName );
-            } else {
+            }
+            else {
                 string fileName = boost::filesystem::path(*i).leaf();
                 if ( fileName.length() > 3 && fileName.substr( fileName.length() - 3, 3 ) == ".ns" )
                     names.push_back( fileName.substr( 0, fileName.length() - 3 ) );
@@ -607,14 +623,14 @@ namespace mongo {
         }
     }
 
-    /* returns true if there is data on this server.  useful when starting replication. 
+    /* returns true if there is data on this server.  useful when starting replication.
        local database does NOT count except for rsoplog collection.
     */
-    bool replHasDatabases() { 
+    bool replHasDatabases() {
         vector<string> names;
         getDatabaseNames(names);
         if( names.size() >= 2 ) return true;
-        if( names.size() == 1 ){
+        if( names.size() == 1 ) {
             if( names[0] != "local" )
                 return true;
             // we have a local database.  return true if oplog isn't empty
@@ -628,7 +644,7 @@ namespace mongo {
         return false;
     }
 
-    bool DBDirectClient::call( Message &toSend, Message &response, bool assertOk ) {
+    bool DBDirectClient::call( Message &toSend, Message &response, bool assertOk , string * actualServer ) {
         if ( lastError._get() )
             lastError.startRequest( toSend, lastError._get() );
         DbResponse dbResponse;
@@ -636,6 +652,7 @@ namespace mongo {
         assert( dbResponse.response );
         dbResponse.response->concat(); // can get rid of this if we make response handling smarter
         response = *dbResponse.response;
+        getDur().commitIfNeeded();
         return true;
     }
 
@@ -644,11 +661,12 @@ namespace mongo {
             lastError.startRequest( toSend, lastError._get() );
         DbResponse dbResponse;
         assembleResponse( toSend, dbResponse );
+        getDur().commitIfNeeded();
     }
 
     auto_ptr<DBClientCursor> DBDirectClient::query(const string &ns, Query query, int nToReturn , int nToSkip ,
-                                                   const BSONObj *fieldsToReturn , int queryOptions ){
-        
+            const BSONObj *fieldsToReturn , int queryOptions ) {
+
         //if ( ! query.obj.isEmpty() || nToReturn != 0 || nToSkip != 0 || fieldsToReturn || queryOptions )
         return DBClientBase::query( ns , query , nToReturn , nToSkip , fieldsToReturn , queryOptions );
         //
@@ -656,128 +674,181 @@ namespace mongo {
         //throw UserException( (string)"yay:" + ns );
     }
 
-    void DBDirectClient::killCursor( long long id ){
+    void DBDirectClient::killCursor( long long id ) {
         ClientCursor::erase( id );
     }
 
-    DBClientBase * createDirectClient(){
-        return new DBDirectClient();
+    unsigned long long DBDirectClient::count(const string &ns, const BSONObj& query, int options, int limit, int skip ) {
+        readlock lk( ns );
+        string errmsg;
+        long long res = runCount( ns.c_str() , _countCmd( ns , query , options , limit , skip ) , errmsg );
+        if ( res == -1 )
+            return 0;
+        uassert( 13637 , str::stream() << "count failed in DBDirectClient: " << errmsg , res >= 0 );
+        return (unsigned long long )res;
     }
 
-    //void recCacheCloseAll();
+    DBClientBase * createDirectClient() {
+        return new DBDirectClient();
+    }
 
     mongo::mutex exitMutex("exit");
     int numExitCalls = 0;
-    void shutdown();
 
-    bool inShutdown(){
+    bool inShutdown() {
         return numExitCalls > 0;
     }
 
-    void tryToOutputFatal( const string& s ){
+    void tryToOutputFatal( const string& s ) {
         try {
             rawOut( s );
             return;
         }
-        catch ( ... ){}
+        catch ( ... ) {}
 
         try {
             cerr << s << endl;
             return;
         }
-        catch ( ... ){}
-        
+        catch ( ... ) {}
+
         // uh - oh, not sure there is anything else we can do...
     }
 
+    /** also called by ntservice.cpp */
+    void shutdownServer() {
+
+        log() << "shutdown: going to close listening sockets..." << endl;
+        ListeningSockets::get()->closeAll();
+
+        log() << "shutdown: going to flush diaglog..." << endl;
+        flushDiagLog();
+
+        /* must do this before unmapping mem or you may get a seg fault */
+        log() << "shutdown: going to close sockets..." << endl;
+        boost::thread close_socket_thread( boost::bind(MessagingPort::closeAllSockets, 0) );
+
+        // wait until file preallocation finishes
+        // we would only hang here if the file_allocator code generates a
+        // synchronous signal, which we don't expect
+        log() << "shutdown: waiting for fs preallocator..." << endl;
+        FileAllocator::get()->waitUntilFinished();
+
+        if( cmdLine.dur ) {
+            log() << "shutdown: lock for final commit..." << endl;
+            {
+                int n = 10;
+                while( 1 ) {
+                    // we may already be in a read lock from earlier in the call stack, so do read lock here 
+                    // to be consistent with that.
+                    readlocktry w("", 20000);
+                    if( w.got() ) { 
+                        log() << "shutdown: final commit..." << endl;
+                        getDur().commitNow();
+                        break;
+                    }
+                    if( --n <= 0 ) {
+                        log() << "shutdown: couldn't acquire write lock, aborting" << endl;
+                        abort();
+                    }
+                    log() << "shutdown: waiting for write lock..." << endl;
+                }
+            }
+            MemoryMappedFile::flushAll(true);
+        }
+
+        log() << "shutdown: closing all files..." << endl;
+        stringstream ss3;
+        MemoryMappedFile::closeAllFiles( ss3 );
+        rawOut( ss3.str() );
+
+        if( cmdLine.dur ) {
+            log() << "shutdown: journalCleanup..." << endl;
+            dur::journalCleanup();
+        }
+
+#if !defined(__sunos__)
+        if ( lockFile ) {
+            log() << "shutdown: removing fs lock..." << endl;
+            /* This ought to be an unlink(), but Eliot says the last
+               time that was attempted, there was a race condition
+               with acquirePathLock().  */
+#ifdef WIN32
+            if( _chsize( lockFile , 0 ) )
+                log() << "couldn't remove fs lock " << getLastError() << endl;
+            CloseHandle(lockFileHandle);
+#else
+            if( ftruncate( lockFile , 0 ) )
+                log() << "couldn't remove fs lock " << errnoWithDescription() << endl;
+            flock( lockFile, LOCK_UN );
+#endif
+        }
+#endif
+    }
+
     /* not using log() herein in case we are already locked */
-    void dbexit( ExitCode rc, const char *why) {        
+    void dbexit( ExitCode rc, const char *why, bool tryToGetLock ) {
+
+        auto_ptr<writelocktry> wlt;
+        if ( tryToGetLock ) {
+            wlt.reset( new writelocktry( "" , 2 * 60 * 1000 ) );
+            uassert( 13455 , "dbexit timed out getting lock" , wlt->got() );
+        }
+
         Client * c = currentClient.get();
         {
             scoped_lock lk( exitMutex );
             if ( numExitCalls++ > 0 ) {
-                if ( numExitCalls > 5 ){
+                if ( numExitCalls > 5 ) {
                     // this means something horrible has happened
                     ::_exit( rc );
                 }
                 stringstream ss;
-                ss << "dbexit: " << why << "; exiting immediately" << endl;
+                ss << "dbexit: " << why << "; exiting immediately";
                 tryToOutputFatal( ss.str() );
                 if ( c ) c->shutdown();
-                ::exit( rc );                
+                ::exit( rc );
             }
         }
-        
-        stringstream ss;
-        ss << "dbexit: " << why << endl;
-        tryToOutputFatal( ss.str() );
-        
+
+        {
+            stringstream ss;
+            ss << "dbexit: " << why;
+            tryToOutputFatal( ss.str() );
+        }
+
         try {
-            shutdown(); // gracefully shutdown instance
+            shutdownServer(); // gracefully shutdown instance
         }
-        catch ( ... ){
+        catch ( ... ) {
             tryToOutputFatal( "shutdown failed with exception" );
         }
 
-        try { 
+        try {
             mutexDebugger.programEnding();
         }
         catch (...) { }
-        
+
         tryToOutputFatal( "dbexit: really exiting now" );
         if ( c ) c->shutdown();
         ::exit(rc);
     }
-    
-    void shutdown() {
-
-        log() << "shutdown: going to close listening sockets..." << endl;        
-        ListeningSockets::get()->closeAll();
 
-        log() << "shutdown: going to flush oplog..." << endl;
-        stringstream ss2;
-        flushOpLog( ss2 );
-        rawOut( ss2.str() );
-
-        /* must do this before unmapping mem or you may get a seg fault */
-        log() << "shutdown: going to close sockets..." << endl;
-        boost::thread close_socket_thread( boost::bind(MessagingPort::closeAllSockets, 0) );
-
-        // wait until file preallocation finishes
-        // we would only hang here if the file_allocator code generates a
-        // synchronous signal, which we don't expect
-        log() << "shutdown: waiting for fs preallocator..." << endl;
-        theFileAllocator().waitUntilFinished();
-        
-        log() << "shutdown: closing all files..." << endl;
-        stringstream ss3;
-        MemoryMappedFile::closeAllFiles( ss3 );
-        rawOut( ss3.str() );
-
-        // should we be locked here?  we aren't. might be ok as-is.
-        //recCacheCloseAll();
-        
-#if !defined(_WIN32) && !defined(__sunos__)
-        if ( lockFile ){
-            log() << "shutdown: removing fs lock..." << endl;
-            if( ftruncate( lockFile , 0 ) ) 
-                log() << "couldn't remove fs lock " << errnoWithDescription() << endl;
-            flock( lockFile, LOCK_UN );
-        }
-#endif
-    }
-
-#if !defined(_WIN32) && !defined(__sunos__)
+#if !defined(__sunos__)
     void writePid(int fd) {
         stringstream ss;
         ss << getpid() << endl;
         string s = ss.str();
         const char * data = s.c_str();
+#ifdef WIN32
+        assert ( _write( fd, data, strlen( data ) ) );
+#else
         assert ( write( fd, data, strlen( data ) ) );
+#endif
     }
 
     void acquirePathLock() {
-      string name = ( boost::filesystem::path( dbpath ) / "mongod.lock" ).native_file_string();
+        string name = ( boost::filesystem::path( dbpath ) / "mongod.lock" ).native_file_string();
 
         bool oldFile = false;
 
@@ -785,37 +856,117 @@ namespace mongo {
             oldFile = true;
         }
 
+#ifdef WIN32
+        lockFileHandle = CreateFileA( name.c_str(), GENERIC_READ | GENERIC_WRITE,
+            0 /* do not allow anyone else access */, NULL, 
+            OPEN_ALWAYS /* success if fh can open */, 0, NULL );
+
+        if (lockFileHandle == INVALID_HANDLE_VALUE) {
+            DWORD code = GetLastError();
+            char *msg;
+            FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
+                NULL, code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+                (LPSTR)&msg, 0, NULL);
+            uasserted( 13627 , msg );
+        }
+        lockFile = _open_osfhandle((intptr_t)lockFileHandle, 0);
+#else
         lockFile = open( name.c_str(), O_RDWR | O_CREAT , S_IRWXU | S_IRWXG | S_IRWXO );
-		if( lockFile <= 0 ) {
-		    uasserted( 10309 , str::stream() << "Unable to create / open lock file for lockfilepath: " << name << ' ' << errnoWithDescription());
+        if( lockFile <= 0 ) {
+            uasserted( 10309 , str::stream() << "Unable to create / open lock file for lockfilepath: " << name << ' ' << errnoWithDescription());
         }
         if (flock( lockFile, LOCK_EX | LOCK_NB ) != 0) {
             close ( lockFile );
             lockFile = 0;
             uassert( 10310 ,  "Unable to acquire lock for lockfilepath: " + name,  0 );
         }
+#endif
 
-        if ( oldFile ){
+        if ( oldFile ) {
             // we check this here because we want to see if we can get the lock
             // if we can't, then its probably just another mongod running
-            cout << "************** \n" 
-                 << "old lock file: " << name << ".  probably means unclean shutdown\n"
-                 << "recommend removing file and running --repair\n" 
-                 << "see: http://dochub.mongodb.org/core/repair for more information\n"
-                 << "*************" << endl;
-            close ( lockFile );
-            lockFile = 0;
-            uassert( 12596 , "old lock file" , 0 );
+            
+            string errmsg;
+            if (cmdLine.dur) {
+                if (!dur::haveJournalFiles()) {
+                    
+                    vector<string> dbnames;
+                    getDatabaseNames( dbnames );
+                    
+                    if ( dbnames.size() == 0 ) {
+                        // this means that mongod crashed
+                        // between initial startup and when journaling was initialized
+                        // it is safe to continue
+                    }
+                    else {
+                        errmsg = str::stream()
+                            << "************** \n"
+                            << "old lock file: " << name << ".  probably means unclean shutdown,\n"
+                            << "but there are no journal files to recover.\n"
+                            << "this is likely human error or filesystem corruption.\n"
+                            << "found " << dbnames.size() << " dbs.\n"
+                            << "see: http://dochub.mongodb.org/core/repair for more information\n"
+                            << "*************";
+                    }
+
+
+                }
+            }
+            else {
+                errmsg = str::stream()
+                         << "************** \n"
+                         << "old lock file: " << name << ".  probably means unclean shutdown\n"
+                         << "recommend removing file and running --repair\n"
+                         << "see: http://dochub.mongodb.org/core/repair for more information\n"
+                         << "*************";
+            }
+
+            if (!errmsg.empty()) {
+                cout << errmsg << endl;
+#ifdef WIN32
+                CloseHandle( lockFileHandle );
+#else
+                close ( lockFile );
+#endif
+                lockFile = 0;
+                uassert( 12596 , "old lock file" , 0 );
+            }
+        }
+
+        // Not related to lock file, but this is where we handle unclean shutdown
+        if( !cmdLine.dur && dur::haveJournalFiles() ) {
+            cout << "**************" << endl;
+            cout << "Error: journal files are present in journal directory, yet starting without --dur enabled." << endl;
+            cout << "It is recommended that you start with journaling enabled so that recovery may occur." << endl;
+            cout << "Alternatively (not recommended), you can backup everything, then delete the journal files, and run --repair" << endl;
+            cout << "**************" << endl;
+            uasserted(13597, "can't start without --dur enabled when journal/ files are present");
         }
 
+#ifdef WIN32
+        uassert( 13625, "Unable to truncate lock file", _chsize(lockFile, 0) == 0);
+        writePid( lockFile );
+        _commit( lockFile );
+#else
         uassert( 13342, "Unable to truncate lock file", ftruncate(lockFile, 0) == 0);
         writePid( lockFile );
         fsync( lockFile );
+#endif
     }
 #else
     void acquirePathLock() {
-        // TODO - this is very bad
+        // TODO - this is very bad that the code above not running here.
+
+        // Not related to lock file, but this is where we handle unclean shutdown
+        if( !cmdLine.dur && dur::haveJournalFiles() ) {
+            cout << "**************" << endl;
+            cout << "Error: journal files are present in journal directory, yet starting without --dur enabled." << endl;
+            cout << "It is recommended that you start with journaling enabled so that recovery may occur." << endl;
+            cout << "Alternatively (not recommended), you can backup everything, then delete the journal files, and run --repair" << endl;
+            cout << "**************" << endl;
+            uasserted(13618, "can't start without --dur enabled when journal/ files are present");
+        }
     }
-#endif    
-    
+#endif
+
 } // namespace mongo
diff --git a/db/instance.h b/db/instance.h
index 5458fc1..2516aec 100644
--- a/db/instance.h
+++ b/db/instance.h
@@ -21,7 +21,7 @@
 
 
 #include "../client/dbclient.h"
-#include "curop.h"
+#include "curop-inl.h"
 #include "security.h"
 #include "cmdline.h"
 #include "client.h"
@@ -40,7 +40,7 @@ namespace mongo {
 
         DiagLog() : f(0) , level(0), mutex("DiagLog") { }
         void init() {
-            if ( ! f && level ){
+            if ( ! f && level ) {
                 log() << "diagLogging = " << level << endl;
                 stringstream ss;
                 ss << dbpath << "/diaglog." << hex << time(0);
@@ -55,20 +55,20 @@ namespace mongo {
         /**
          * @return old
          */
-        int setLevel( int newLevel ){
+        int setLevel( int newLevel ) {
             int old = level;
             level = newLevel;
             init();
             return old;
         }
         void flush() {
-            if ( level ){
+            if ( level ) {
                 scoped_lock lk(mutex);
                 f->flush();
             }
         }
         void write(char *data,int len) {
-            if ( level & 1 ){
+            if ( level & 1 ) {
                 scoped_lock lk(mutex);
                 f->write(data,len);
             }
@@ -77,7 +77,7 @@ namespace mongo {
             if ( level & 2 ) {
                 bool log = (level & 4) == 0;
                 OCCASIONALLY log = true;
-                if ( log ){
+                if ( log ) {
                     scoped_lock lk(mutex);
                     assert( f );
                     f->write(data,len);
@@ -102,52 +102,56 @@ namespace mongo {
         }
         ~DbResponse() { delete response; }
     };
-    
-    bool assembleResponse( Message &m, DbResponse &dbresponse, const SockAddr &client = unknownAddress );
+
+    void assembleResponse( Message &m, DbResponse &dbresponse, const SockAddr &client = unknownAddress );
 
     void getDatabaseNames( vector< string > &names , const string& usePath = dbpath );
 
-    /* returns true if there is no data on this server.  useful when starting replication. 
-       local database does NOT count. 
+    /* returns true if there is no data on this server.  useful when starting replication.
+       local database does NOT count.
     */
     bool replHasDatabases();
 
-// --- local client ---
-    
+    /** "embedded" calls to the local server directly. 
+        Caller does not need to lock, that is handled within.
+     */
     class DBDirectClient : public DBClientBase {
-        
     public:
         virtual auto_ptr<DBClientCursor> query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0,
                                                const BSONObj *fieldsToReturn = 0, int queryOptions = 0);
-        
+
         virtual bool isFailed() const {
             return false;
         }
         virtual string toString() {
             return "DBDirectClient";
         }
-        virtual string getServerAddress() const{
+        virtual string getServerAddress() const {
             return "localhost"; // TODO: should this have the port?
         }
-        virtual bool call( Message &toSend, Message &response, bool assertOk=true );
+        virtual bool call( Message &toSend, Message &response, bool assertOk=true , string * actualServer = 0 );
         virtual void say( Message &toSend );
         virtual void sayPiggyBack( Message &toSend ) {
             // don't need to piggy back when connected locally
             return say( toSend );
         }
-        
+
         virtual void killCursor( long long cursorID );
-        
-        virtual bool callRead( Message& toSend , Message& response ){
+
+        virtual bool callRead( Message& toSend , Message& response ) {
             return call( toSend , response );
         }
-
-        virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; }  
-        virtual bool isMember( const DBConnector * conn ) const { return this == conn; };
+        
+        virtual unsigned long long count(const string &ns, const BSONObj& query = BSONObj(), int options=0, int limit=0, int skip=0 );
+        
+        virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; }
     };
 
     extern int lockFile;
+#ifdef WIN32
+    extern HANDLE lockFileHandle;
+#endif
     void acquirePathLock();
     void maybeCreatePidFile();
-    
+
 } // namespace mongo
diff --git a/db/introspect.cpp b/db/introspect.cpp
index d72bb3f..cee0da8 100644
--- a/db/introspect.cpp
+++ b/db/introspect.cpp
@@ -26,8 +26,7 @@
 
 namespace mongo {
 
-    void profile( const char *str, int millis)
-    {
+    void profile( const char *str, int millis) {
         BSONObjBuilder b;
         b.appendDate("ts", jsTime());
         b.append("info", str);
diff --git a/db/jsobj.cpp b/db/jsobj.cpp
index 9f613c7..25ab8a8 100644
--- a/db/jsobj.cpp
+++ b/db/jsobj.cpp
@@ -18,6 +18,7 @@
  */
 
 #include "pch.h"
+#include "../bson/oid.h"
 #include "jsobj.h"
 #include "nonce.h"
 #include "../bson/util/atomic_int.h"
@@ -34,6 +35,7 @@
 #define assert MONGO_assert
 
 // make sure our assumptions are valid
+BOOST_STATIC_ASSERT( sizeof(short) == 2 );
 BOOST_STATIC_ASSERT( sizeof(int) == 4 );
 BOOST_STATIC_ASSERT( sizeof(long long) == 8 );
 BOOST_STATIC_ASSERT( sizeof(double) == 8 );
@@ -48,6 +50,9 @@ namespace mongo {
 
     DateNowLabeler DATENOW;
 
+    MinKeyLabeler MINKEY;
+    MaxKeyLabeler MAXKEY;
+
     string escape( string s , bool escape_slash=false) {
         StringBuilder ret;
         for ( string::iterator i = s.begin(); i != s.end(); ++i ) {
@@ -81,7 +86,8 @@ namespace mongo {
                     //TODO: these should be utf16 code-units not bytes
                     char c = *i;
                     ret << "\\u00" << toHexLower(&c, 1);
-                } else {
+                }
+                else {
                     ret << *i;
                 }
             }
@@ -111,7 +117,8 @@ namespace mongo {
                     number() <= numeric_limits< double >::max() ) {
                 s.precision( 16 );
                 s << number();
-            } else {
+            }
+            else {
                 StringBuilder ss;
                 ss << "Number " << number() << " cannot be represented in JSON";
                 string message = ss.str();
@@ -170,13 +177,15 @@ namespace mongo {
         case jstOID:
             if ( format == TenGen ) {
                 s << "ObjectId( ";
-            } else {
+            }
+            else {
                 s << "{ \"$oid\" : ";
             }
             s << '"' << __oid() << '"';
             if ( format == TenGen ) {
                 s << " )";
-            } else {
+            }
+            else {
                 s << " }";
             }
             break;
@@ -203,7 +212,8 @@ namespace mongo {
                 if( d == 0 ) s << '0';
                 else
                     s << '"' << date().toString() << '"';
-            } else
+            }
+            else
                 s << date();
             if ( format == Strict )
                 s << " }";
@@ -211,13 +221,14 @@ namespace mongo {
                 s << " )";
             break;
         case RegEx:
-            if ( format == Strict ){
+            if ( format == Strict ) {
                 s << "{ \"$regex\" : \"" << escape( regex() );
                 s << "\", \"$options\" : \"" << regexFlags() << "\" }";
-            } else {
+            }
+            else {
                 s << "/" << escape( regex() , true ) << "/";
                 // FIXME Worry about alpha order?
-                for ( const char *f = regexFlags(); *f; ++f ){
+                for ( const char *f = regexFlags(); *f; ++f ) {
                     switch ( *f ) {
                     case 'g':
                     case 'i':
@@ -232,7 +243,7 @@ namespace mongo {
 
         case CodeWScope: {
             BSONObj scope = codeWScopeObject();
-            if ( ! scope.isEmpty() ){
+            if ( ! scope.isEmpty() ) {
                 s << "{ \"$code\" : " << _asCode() << " , "
                   << " \"$scope\" : " << scope.jsonString() << " }";
                 break;
@@ -243,7 +254,7 @@ namespace mongo {
         case Code:
             s << _asCode();
             break;
-            
+
         case Timestamp:
             s << "{ \"t\" : " << timestampTime() << " , \"i\" : " << timestampInc() << " }";
             break;
@@ -259,7 +270,7 @@ namespace mongo {
         default:
             StringBuilder ss;
             ss << "Cannot create a properly formatted JSON string with "
-            << "element: " << toString() << " of type: " << type();
+               << "element: " << toString() << " of type: " << type();
             string message = ss.str();
             massert( 10312 ,  message.c_str(), false );
         }
@@ -279,13 +290,13 @@ namespace mongo {
                     else if ( fn[3] == 'e' && fn[4] == 0 ) return BSONObj::LTE;
                 }
             }
-            else if ( fn[1] == 'n' && fn[2] == 'e' ){
+            else if ( fn[1] == 'n' && fn[2] == 'e' ) {
                 if ( fn[3] == 0 )
                     return BSONObj::NE;
-                if ( fn[3] == 'a' && fn[4] == 'r' && fn[5] == 0 )
+                if ( fn[3] == 'a' && fn[4] == 'r') // matches anything with $near prefix
                     return BSONObj::opNEAR;
             }
-            else if ( fn[1] == 'm' ){
+            else if ( fn[1] == 'm' ) {
                 if ( fn[2] == 'o' && fn[3] == 'd' && fn[4] == 0 )
                     return BSONObj::opMOD;
                 if ( fn[2] == 'a' && fn[3] == 'x' && fn[4] == 'D' && fn[5] == 'i' && fn[6] == 's' && fn[7] == 't' && fn[8] == 'a' && fn[9] == 'n' && fn[10] == 'c' && fn[11] == 'e' && fn[12] == 0 )
@@ -301,7 +312,7 @@ namespace mongo {
                 return BSONObj::opALL;
             else if ( fn[1] == 's' && fn[2] == 'i' && fn[3] == 'z' && fn[4] == 'e' && fn[5] == 0 )
                 return BSONObj::opSIZE;
-            else if ( fn[1] == 'e' ){
+            else if ( fn[1] == 'e' ) {
                 if ( fn[2] == 'x' && fn[3] == 'i' && fn[4] == 's' && fn[5] == 't' && fn[6] == 's' && fn[7] == 0 )
                     return BSONObj::opEXISTS;
                 if ( fn[2] == 'l' && fn[3] == 'e' && fn[4] == 'm' && fn[5] == 'M' && fn[6] == 'a' && fn[7] == 't' && fn[8] == 'c' && fn[9] == 'h' && fn[10] == 0 )
@@ -370,22 +381,24 @@ namespace mongo {
             double left = l.number();
             double right = r.number();
             bool lNan = !( left <= numeric_limits< double >::max() &&
-                         left >= -numeric_limits< double >::max() );
+                           left >= -numeric_limits< double >::max() );
             bool rNan = !( right <= numeric_limits< double >::max() &&
-                         right >= -numeric_limits< double >::max() );
+                           right >= -numeric_limits< double >::max() );
             if ( lNan ) {
                 if ( rNan ) {
                     return 0;
-                } else {
+                }
+                else {
                     return -1;
                 }
-            } else if ( rNan ) {
+            }
+            else if ( rNan ) {
                 return 1;
             }
             x = left - right;
             if ( x < 0 ) return -1;
             return x == 0 ? 0 : 1;
-            }
+        }
         case jstOID:
             return memcmp(l.value(), r.value(), 12);
         case Code:
@@ -408,8 +421,7 @@ namespace mongo {
             if ( lsz - rsz != 0 ) return lsz - rsz;
             return memcmp(l.value()+4, r.value()+4, lsz+1);
         }
-        case RegEx:
-        {
+        case RegEx: {
             int c = strcmp(l.regex(), r.regex());
             if ( c )
                 return c;
@@ -462,11 +474,14 @@ namespace mongo {
         return fe.getGtLtOp();
     }
 
-    FieldCompareResult compareDottedFieldNames( const string& l , const string& r ){
+    FieldCompareResult compareDottedFieldNames( const string& l , const string& r ) {
+        static int maxLoops = 1024 * 1024;
+
         size_t lstart = 0;
         size_t rstart = 0;
-        while ( 1 ){
-            if ( lstart >= l.size() ){
+
+        for ( int i=0; i<maxLoops; i++ ) {
+            if ( lstart >= l.size() ) {
                 if ( rstart >= r.size() )
                     return SAME;
                 return RIGHT_SUBFIELD;
@@ -493,6 +508,10 @@ namespace mongo {
             lstart = lend + 1;
             rstart = rend + 1;
         }
+
+        log() << "compareDottedFieldNames ERROR  l: " << l << " r: " << r << "  TOO MANY LOOPS" << endl;
+        assert(0);
+        return SAME; // will never get here
     }
 
     /* BSONObj ------------------------------------------------------------*/
@@ -534,33 +553,35 @@ namespace mongo {
         return s.str();
     }
 
-// todo: can be a little faster if we don't use toString() here.
     bool BSONObj::valid() const {
-        try{
+        try {
             BSONObjIterator it(*this);
-            while( it.moreWithEOO() ){
+            while( it.moreWithEOO() ) {
                 // both throw exception on failure
                 BSONElement e = it.next(true);
                 e.validate();
 
-                if (e.eoo()){
+                if (e.eoo()) {
                     if (it.moreWithEOO())
                         return false;
                     return true;
-                }else if (e.isABSONObj()){
+                }
+                else if (e.isABSONObj()) {
                     if(!e.embeddedObject().valid())
                         return false;
-                }else if (e.type() == CodeWScope){
+                }
+                else if (e.type() == CodeWScope) {
                     if(!e.codeWScopeObject().valid())
                         return false;
                 }
             }
-        } catch (...) {
+        }
+        catch (...) {
         }
         return false;
     }
 
-    int BSONObj::woCompare(const BSONObj& r, const Ordering &o, bool considerFieldName) const { 
+    int BSONObj::woCompare(const BSONObj& r, const Ordering &o, bool considerFieldName) const {
         if ( isEmpty() )
             return r.isEmpty() ? 0 : -1;
         if ( r.isEmpty() )
@@ -619,13 +640,13 @@ namespace mongo {
                 return 1;
 
             int x;
-/*
-            if( ordered && o.type() == String && strcmp(o.valuestr(), "ascii-proto") == 0 && 
-                l.type() == String && r.type() == String ) { 
-                // note: no negative support yet, as this is just sort of a POC
-                x = _stricmp(l.valuestr(), r.valuestr());
-            }
-            else*/ {
+            /*
+                        if( ordered && o.type() == String && strcmp(o.valuestr(), "ascii-proto") == 0 &&
+                            l.type() == String && r.type() == String ) {
+                            // note: no negative support yet, as this is just sort of a POC
+                            x = _stricmp(l.valuestr(), r.valuestr());
+                        }
+                        else*/ {
                 x = l.woCompare( r, considerFieldName );
                 if ( ordered && o.number() < 0 )
                     x = -x;
@@ -639,7 +660,7 @@ namespace mongo {
     BSONObj staticNull = fromjson( "{'':null}" );
 
     /* well ordered compare */
-    int BSONObj::woSortOrder(const BSONObj& other, const BSONObj& sortKey , bool useDotted ) const{
+    int BSONObj::woSortOrder(const BSONObj& other, const BSONObj& sortKey , bool useDotted ) const {
         if ( isEmpty() )
             return other.isEmpty() ? 0 : -1;
         if ( other.isEmpty() )
@@ -648,7 +669,7 @@ namespace mongo {
         uassert( 10060 ,  "woSortOrder needs a non-empty sortKey" , ! sortKey.isEmpty() );
 
         BSONObjIterator i(sortKey);
-        while ( 1 ){
+        while ( 1 ) {
             BSONElement f = i.next();
             if ( f.eoo() )
                 return 0;
@@ -678,36 +699,41 @@ namespace mongo {
                 const char* next = p+1;
                 BSONElement e = getField( left.c_str() );
 
-                if (e.type() == Object){
+                if (e.type() == Object) {
                     e.embeddedObject().getFieldsDotted(next, ret);
-                } else if (e.type() == Array) {
+                }
+                else if (e.type() == Array) {
                     bool allDigits = false;
-                    if ( isdigit( *next ) ){
+                    if ( isdigit( *next ) ) {
                         const char * temp = next + 1;
                         while ( isdigit( *temp ) )
                             temp++;
-                        allDigits = *temp == '.';
+                        allDigits = (*temp == '.' || *temp == '\0');
                     }
                     if (allDigits) {
                         e.embeddedObject().getFieldsDotted(next, ret);
-                    } else {
+                    }
+                    else {
                         BSONObjIterator i(e.embeddedObject());
-                        while ( i.more() ){
+                        while ( i.more() ) {
                             BSONElement e2 = i.next();
                             if (e2.type() == Object || e2.type() == Array)
                                 e2.embeddedObject().getFieldsDotted(next, ret);
                         }
                     }
-                } else {
+                }
+                else {
                     // do nothing: no match
                 }
             }
-        } else {
-            if (e.type() == Array){
+        }
+        else {
+            if (e.type() == Array) {
                 BSONObjIterator i(e.embeddedObject());
                 while ( i.more() )
                     ret.insert(i.next());
-            } else {
+            }
+            else {
                 ret.insert(e);
             }
         }
@@ -715,15 +741,18 @@ namespace mongo {
 
     BSONElement BSONObj::getFieldDottedOrArray(const char *&name) const {
         const char *p = strchr(name, '.');
-        string left;
+        
+        BSONElement sub;
+
         if ( p ) {
-            left = string(name, p-name);
+            sub = getField( string(name, p-name) );
             name = p + 1;
-        } else {
-            left = string(name);
+        }
+        else {
+            sub = getField( name );
             name = name + strlen(name);
         }
-        BSONElement sub = getField(left.c_str());
+
         if ( sub.eoo() )
             return nullElement;
         else if ( sub.type() == Array || name[0] == '\0')
@@ -778,7 +807,7 @@ namespace mongo {
                 break;
             BSONElement x = filter.getField( e.fieldName() );
             if ( ( x.eoo() && !inFilter ) ||
-                ( !x.eoo() && inFilter ) )
+                    ( !x.eoo() && inFilter ) )
                 b.append( e );
         }
         return b.obj();
@@ -858,7 +887,8 @@ namespace mongo {
                 gotId = gotId || strcmp(fname, "_id")==0;
                 if ( n == N && gotId )
                     break;
-            } else if ( strcmp(fname, "_id")==0 ) {
+            }
+            else if ( strcmp(fname, "_id")==0 ) {
                 b.append(e);
                 gotId = true;
                 if ( n == N && gotId )
@@ -882,20 +912,20 @@ namespace mongo {
             if ( e.eoo() )
                 break;
             switch( e.type() ) {
-                case MinKey: {
-                    BSONObjBuilder m;
-                    m.append( "$minElement", 1 );
-                    b.append( e.fieldName(), m.done() );
-                    break;
-                }
-                case MaxKey: {
-                    BSONObjBuilder m;
-                    m.append( "$maxElement", 1 );
-                    b.append( e.fieldName(), m.done() );
-                    break;
-                }
-                default:
-                    b.append( e );
+            case MinKey: {
+                BSONObjBuilder m;
+                m.append( "$minElement", 1 );
+                b.append( e.fieldName(), m.done() );
+                break;
+            }
+            case MaxKey: {
+                BSONObjBuilder m;
+                m.append( "$maxElement", 1 );
+                b.append( e.fieldName(), m.done() );
+                break;
+            }
+            default:
+                b.append( e );
             }
         }
         return b.obj();
@@ -913,7 +943,8 @@ namespace mongo {
             if ( !f.eoo() ) {
                 b.appendAs( e, f.fieldName() );
                 f = j.next();
-            } else {
+            }
+            else {
                 b.append( e );
             }
         }
@@ -922,20 +953,20 @@ namespace mongo {
 
     bool BSONObj::okForStorage() const {
         BSONObjIterator i( *this );
-        while ( i.more() ){
+        while ( i.more() ) {
             BSONElement e = i.next();
             const char * name = e.fieldName();
-            
+
             if ( strchr( name , '.' ) ||
-                 strchr( name , '$' ) ){
-                return 
+                    strchr( name , '$' ) ) {
+                return
                     strcmp( name , "$ref" ) == 0 ||
                     strcmp( name , "$id" ) == 0
                     ;
             }
-            
-            if ( e.mayEncapsulate() ){
-                switch ( e.type() ){
+
+            if ( e.mayEncapsulate() ) {
+                switch ( e.type() ) {
                 case Object:
                 case Array:
                     if ( ! e.embeddedObject().okForStorage() )
@@ -948,7 +979,7 @@ namespace mongo {
                 default:
                     uassert( 12579, "unhandled cases in BSONObj okForStorage" , 0 );
                 }
-                
+
             }
         }
         return true;
@@ -982,25 +1013,26 @@ namespace mongo {
         return ss.str();
     }
 
-    void nested2dotted(BSONObjBuilder& b, const BSONObj& obj, const string& base){
+    void nested2dotted(BSONObjBuilder& b, const BSONObj& obj, const string& base) {
         BSONObjIterator it(obj);
-        while (it.more()){
+        while (it.more()) {
             BSONElement e = it.next();
-            if (e.type() == Object){
+            if (e.type() == Object) {
                 string newbase = base + e.fieldName() + ".";
                 nested2dotted(b, e.embeddedObject(), newbase);
-            }else{
+            }
+            else {
                 string newbase = base + e.fieldName();
                 b.appendAs(e, newbase);
             }
         }
     }
 
-    void dotted2nested(BSONObjBuilder& b, const BSONObj& obj){
+    void dotted2nested(BSONObjBuilder& b, const BSONObj& obj) {
         //use map to sort fields
         BSONMap sorted = bson2map(obj);
         EmbeddedBuilder eb(&b);
-        for(BSONMap::const_iterator it=sorted.begin(); it!=sorted.end(); ++it){
+        for(BSONMap::const_iterator it=sorted.begin(); it!=sorted.end(); ++it) {
             eb.appendAs(it->second, it->first);
         }
         eb.done();
@@ -1037,16 +1069,16 @@ namespace mongo {
     } minkeydata;
     BSONObj minKey((const char *) &minkeydata);
 
-/*
-    struct JSObj0 {
-        JSObj0() {
-            totsize = 5;
-            eoo = EOO;
-        }
-        int totsize;
-        char eoo;
-    } js0;
-*/
+    /*
+        struct JSObj0 {
+            JSObj0() {
+                totsize = 5;
+                eoo = EOO;
+            }
+            int totsize;
+            char eoo;
+        } js0;
+    */
 #pragma pack()
 
     struct BsonUnitTest : public UnitTest {
@@ -1078,7 +1110,7 @@ namespace mongo {
             assert( b == id );
         }
 
-        void testbounds(){
+        void testbounds() {
             BSONObj l , r;
             {
                 BSONObjBuilder b;
@@ -1101,7 +1133,7 @@ namespace mongo {
             assert( r.woCompare( l ) > 0 );
         }
 
-        void testorder(){
+        void testorder() {
             {
                 BSONObj x,y,z;
                 { BSONObjBuilder b; b.append( "x" , (long long)2 ); x = b.obj(); }
@@ -1176,84 +1208,6 @@ namespace mongo {
         }
     } bson_unittest;
 
-/*
-    BSONObjBuilder& BSONObjBuilderValueStream::operator<<( const char * value ) {
-        _builder->append( _fieldName , value );
-        return *_builder;
-    }
-
-    BSONObjBuilder& BSONObjBuilderValueStream::operator<<( const int value ) {
-        _builder->append( _fieldName , value );
-        return *_builder;
-    }
-
-    BSONObjBuilder& BSONObjBuilderValueStream::operator<<( const double value ) {
-        _builder->append( _fieldName , value );
-        return *_builder;
-    }
-*/
-
-    void OID::init() {
-        static AtomicUInt inc = getRandomNumber();
-        unsigned t = (unsigned) time(0);
-        char *T = (char *) &t;
-        data[0] = T[3];
-        data[1] = T[2];
-        data[2] = T[1];
-        data[3] = T[0];
-
-        (unsigned&) data[4] = _machine;
-
-        int new_inc = inc++;
-        T = (char *) &new_inc;
-        char * raw = (char*)&b;
-        raw[0] = T[3];
-        raw[1] = T[2];
-        raw[2] = T[1];
-        raw[3] = T[0];
-    }
-
-    unsigned OID::_machine = (unsigned) security.getNonceInitSafe();
-    void OID::newState(){
-        unsigned before = _machine;
-        // using fresh Security object to avoid buffered devrandom
-        _machine = (unsigned)security.getNonce();
-        assert( _machine != before );
-    }
-    
-    void OID::init( string s ){
-        assert( s.size() == 24 );
-        const char *p = s.c_str();
-        for( int i = 0; i < 12; i++ ) {
-            data[i] = fromHex(p);
-            p += 2;
-        }
-    }
-
-    void OID::init(Date_t date, bool max){
-        int time = (int) (date / 1000);
-        char* T = (char *) &time;
-        data[0] = T[3];
-        data[1] = T[2];
-        data[2] = T[1];
-        data[3] = T[0];
-
-        if (max)
-            *(long long*)(data + 4) = 0xFFFFFFFFFFFFFFFFll;
-        else
-            *(long long*)(data + 4) = 0x0000000000000000ll;
-    }
-
-    time_t OID::asTimeT(){
-        int time;
-        char* T = (char *) &time;
-        T[0] = data[3];
-        T[1] = data[2];
-        T[2] = data[1];
-        T[3] = data[0];
-        return time;
-    }
-
     Labeler::Label GT( "$gt" );
     Labeler::Label GTE( "$gte" );
     Labeler::Label LT( "$lt" );
@@ -1268,21 +1222,20 @@ namespace mongo {
             timestamp = OpTime::now().asDate();
     }
 
-    void BSONObjBuilder::appendMinForType( const StringData& fieldName , int t ){
-        switch ( t ){
+    void BSONObjBuilder::appendMinForType( const StringData& fieldName , int t ) {
+        switch ( t ) {
         case MinKey: appendMinKey( fieldName ); return;
         case MaxKey: appendMinKey( fieldName ); return;
         case NumberInt:
         case NumberDouble:
         case NumberLong:
             append( fieldName , - numeric_limits<double>::max() ); return;
-        case jstOID:
-            {
-                OID o;
-                memset(&o, 0, sizeof(o));
-                appendOID( fieldName , &o);
-                return;
-            }
+        case jstOID: {
+            OID o;
+            memset(&o, 0, sizeof(o));
+            appendOID( fieldName , &o);
+            return;
+        }
         case Bool: appendBool( fieldName , false); return;
         case Date: appendDate( fieldName , 0); return;
         case jstNULL: appendNull( fieldName ); return;
@@ -1296,13 +1249,12 @@ namespace mongo {
         case Undefined:
             appendUndefined( fieldName ); return;
         case RegEx: appendRegex( fieldName , "" ); return;
-        case DBRef:
-            {
-                OID o;
-                memset(&o, 0, sizeof(o));
-                appendDBRef( fieldName , "" , o );
-                return;
-            }
+        case DBRef: {
+            OID o;
+            memset(&o, 0, sizeof(o));
+            appendDBRef( fieldName , "" , o );
+            return;
+        }
         case Code: appendCode( fieldName , "" ); return;
         case CodeWScope: appendCodeWScope( fieldName , "" , BSONObj() ); return;
         case Timestamp: appendTimestamp( fieldName , 0); return;
@@ -1312,8 +1264,8 @@ namespace mongo {
         uassert( 10061 ,  "type not supported for appendMinElementForType" , false );
     }
 
-    void BSONObjBuilder::appendMaxForType( const StringData& fieldName , int t ){
-        switch ( t ){
+    void BSONObjBuilder::appendMaxForType( const StringData& fieldName , int t ) {
+        switch ( t ) {
         case MinKey: appendMaxKey( fieldName );  break;
         case MaxKey: appendMaxKey( fieldName ); break;
         case NumberInt:
@@ -1324,13 +1276,12 @@ namespace mongo {
         case BinData:
             appendMinForType( fieldName , jstOID );
             break;
-        case jstOID:
-            {
-                OID o;
-                memset(&o, 0xFF, sizeof(o));
-                appendOID( fieldName , &o);
-                break;
-            }
+        case jstOID: {
+            OID o;
+            memset(&o, 0xFF, sizeof(o));
+            appendOID( fieldName , &o);
+            break;
+        }
         case Undefined:
         case jstNULL:
             appendMinForType( fieldName , NumberInt );
@@ -1349,7 +1300,7 @@ namespace mongo {
     }
 
     const string BSONObjBuilder::numStrs[] = {
-         "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",  "8",  "9",
+        "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",  "8",  "9",
         "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
         "20", "21", "22", "23", "24", "25", "26", "27", "28", "29",
         "30", "31", "32", "33", "34", "35", "36", "37", "38", "39",
@@ -1361,77 +1312,77 @@ namespace mongo {
         "90", "91", "92", "93", "94", "95", "96", "97", "98", "99",
     };
 
-    bool BSONObjBuilder::appendAsNumber( const StringData& fieldName , const string& data ){
+    bool BSONObjBuilder::appendAsNumber( const StringData& fieldName , const string& data ) {
         if ( data.size() == 0 || data == "-")
             return false;
-        
+
         unsigned int pos=0;
         if ( data[0] == '-' )
             pos++;
-        
+
         bool hasDec = false;
-        
-        for ( ; pos<data.size(); pos++ ){
+
+        for ( ; pos<data.size(); pos++ ) {
             if ( isdigit(data[pos]) )
                 continue;
 
-            if ( data[pos] == '.' ){
+            if ( data[pos] == '.' ) {
                 if ( hasDec )
                     return false;
                 hasDec = true;
                 continue;
             }
-            
+
             return false;
         }
-        
-        if ( hasDec ){
+
+        if ( hasDec ) {
             double d = atof( data.c_str() );
             append( fieldName , d );
             return true;
         }
-        
-        if ( data.size() < 8 ){
+
+        if ( data.size() < 8 ) {
             append( fieldName , atoi( data.c_str() ) );
             return true;
         }
-        
+
         try {
             long long num = boost::lexical_cast<long long>( data );
             append( fieldName , num );
             return true;
         }
-        catch(bad_lexical_cast &){
+        catch(bad_lexical_cast &) {
             return false;
         }
 
     }
 
-    void BSONObjBuilder::appendKeys( const BSONObj& keyPattern , const BSONObj& values ){
+    void BSONObjBuilder::appendKeys( const BSONObj& keyPattern , const BSONObj& values ) {
         BSONObjIterator i(keyPattern);
         BSONObjIterator j(values);
-        
-        while ( i.more() && j.more() ){
+
+        while ( i.more() && j.more() ) {
             appendAs( j.next() , i.next().fieldName() );
         }
-        
+
         assert( ! i.more() );
         assert( ! j.more() );
     }
 
-    int BSONElementFieldSorter( const void * a , const void * b ){
+    int BSONElementFieldSorter( const void * a , const void * b ) {
         const char * x = *((const char**)a);
         const char * y = *((const char**)b);
         x++; y++;
         return lexNumCmp( x , y );
     }
-    
-    BSONObjIteratorSorted::BSONObjIteratorSorted( const BSONObj& o ){
+
+    BSONObjIteratorSorted::BSONObjIteratorSorted( const BSONObj& o ) {
         _nfields = o.nFields();
         _fields = new const char*[_nfields];
         int x = 0;
         BSONObjIterator i( o );
-        while ( i.more() ){
+        while ( i.more() ) {
             _fields[x++] = i.next().rawdata();
             assert( _fields[x-1] );
         }
@@ -1441,10 +1392,10 @@ namespace mongo {
     }
 
     /** transform a BSON array into a vector of BSONElements.
-        we match array # positions with their vector position, and ignore 
-        any non-numeric fields. 
+        we match array # positions with their vector position, and ignore
+        any fields with non-numeric field names.
         */
-    vector<BSONElement> BSONElement::Array() const { 
+    vector<BSONElement> BSONElement::Array() const {
         chk(mongo::Array);
         vector<BSONElement> v;
         BSONObjIterator i(Obj());
@@ -1453,7 +1404,7 @@ namespace mongo {
             const char *f = e.fieldName();
             try {
                 unsigned u = stringToNum(f);
-                assert( u < 4096 );
+                assert( u < 1000000 );
                 if( u >= v.size() )
                     v.resize(u+1);
                 v[u] = e;
diff --git a/db/jsobj.h b/db/jsobj.h
index 258a952..a6472d5 100644
--- a/db/jsobj.h
+++ b/db/jsobj.h
@@ -1,4 +1,4 @@
-/** @file jsobj.h 
+/** @file jsobj.h
     BSON classes
 */
 
@@ -40,7 +40,7 @@
 #include "../bson/bsonmisc.h"
 #include "../bson/bsonobjbuilder.h"
 #include "../bson/bsonobjiterator.h"
-#include "../bson/bsoninlines.h"
+#include "../bson/bson-inl.h"
 #include "../bson/ordering.h"
 #include "../bson/stringdata.h"
 
diff --git a/db/jsobjmanipulator.h b/db/jsobjmanipulator.h
index c43e876..0b3c0c2 100644
--- a/db/jsobjmanipulator.h
+++ b/db/jsobjmanipulator.h
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "jsobj.h"
+#include "dur.h"
 
 namespace mongo {
 
@@ -35,41 +36,68 @@ namespace mongo {
             OpTime::now().asDate()
         */
         void initTimestamp();
-        
+
         /** Change the value, in place, of the number. */
         void setNumber(double d) {
             if ( _element.type() == NumberDouble ) *reinterpret_cast< double * >( value() )  = d;
             else if ( _element.type() == NumberInt ) *reinterpret_cast< int * >( value() ) = (int) d;
             else assert(0);
         }
-        void setLong(long long n) { 
+        void SetNumber(double d) {
+            if ( _element.type() == NumberDouble )
+                *getDur().writing( reinterpret_cast< double * >( value() )  ) = d;
+            else if ( _element.type() == NumberInt )
+                *getDur().writing( reinterpret_cast< int * >( value() ) ) = (int) d;
+            else assert(0);
+        }
+        void setLong(long long n) {
             assert( _element.type() == NumberLong );
             *reinterpret_cast< long long * >( value() ) = n;
         }
-        void setInt(int n) { 
+        void SetLong(long long n) {
+            assert( _element.type() == NumberLong );
+            *getDur().writing( reinterpret_cast< long long * >(value()) ) = n;
+        }
+        void setInt(int n) {
             assert( _element.type() == NumberInt );
             *reinterpret_cast< int * >( value() ) = n;
         }
+        void SetInt(int n) {
+            assert( _element.type() == NumberInt );
+            getDur().writingInt( *reinterpret_cast< int * >( value() ) ) = n;
+        }
+
 
-        
         /** Replace the type and value of the element with the type and value of e,
             preserving the original fieldName */
         void replaceTypeAndValue( const BSONElement &e ) {
             *data() = e.type();
             memcpy( value(), e.value(), e.valuesize() );
         }
-        
-        static void lookForTimestamps( const BSONObj& obj ){
+
+        /* dur:: version */
+        void ReplaceTypeAndValue( const BSONElement &e ) {
+            char *d = data();
+            char *v = value();
+            int valsize = e.valuesize();
+            int ofs = (int) (v-d);
+            dassert( ofs > 0 );
+            char *p = (char *) getDur().writingPtr(d, valsize + ofs);
+            *p = e.type();
+            memcpy( p + ofs, e.value(), valsize );
+        }
+
+        static void lookForTimestamps( const BSONObj& obj ) {
             // If have a Timestamp field as the first or second element,
             // update it to a Date field set to OpTime::now().asDate().  The
             // replacement policy is a work in progress.
-            
+
             BSONObjIterator i( obj );
             for( int j = 0; i.moreWithEOO() && j < 2; ++j ) {
                 BSONElement e = i.next();
                 if ( e.eoo() )
                     break;
-                if ( e.type() == Timestamp ){
+                if ( e.type() == Timestamp ) {
                     BSONElementManipulator( e ).initTimestamp();
                     break;
                 }
diff --git a/db/json.cpp b/db/json.cpp
index 185a8ca..4a6fad8 100644
--- a/db/json.cpp
+++ b/db/json.cpp
@@ -43,12 +43,12 @@ using namespace boost::spirit;
 namespace mongo {
 
     struct ObjectBuilder : boost::noncopyable {
-        ~ObjectBuilder(){
+        ~ObjectBuilder() {
             unsigned i = builders.size();
-            if ( i ){
+            if ( i ) {
                 i--;
-                for ( ; i>=1; i-- ){
-                    if ( builders[i] ){
+                for ( ; i>=1; i-- ) {
+                    if ( builders[i] ) {
                         builders[i]->done();
                     }
                 }
@@ -205,7 +205,8 @@ namespace mongo {
             else if ( first < 0x08 ) {
                 b.ss << char( 0xc0 | ( ( first << 2 ) | ( second >> 6 ) ) );
                 b.ss << char( 0x80 | ( ~0xc0 & second ) );
-            } else {
+            }
+            else {
                 b.ss << char( 0xe0 | ( first >> 4 ) );
                 b.ss << char( 0x80 | ( ~0xc0 & ( ( first << 2 ) | ( second >> 6 ) ) ) );
                 b.ss << char( 0x80 | ( ~0xc0 & second ) );
@@ -342,7 +343,7 @@ namespace mongo {
     struct dbrefEnd {
         dbrefEnd( ObjectBuilder &_b ) : b( _b ) {}
         void operator() ( const char *start, const char *end ) const {
-            b.back()->appendDBRef( b.fieldName(), b.ns.c_str(), b.oid );
+            b.back()->appendDBRef( b.fieldName(), b.ns, b.oid );
         }
         ObjectBuilder &b;
     };
@@ -417,8 +418,7 @@ namespace mongo {
     struct regexEnd {
         regexEnd( ObjectBuilder &_b ) : b( _b ) {}
         void operator() ( const char *start, const char *end ) const {
-            b.back()->appendRegex( b.fieldName(), b.regex.c_str(),
-                                   b.regexOptions.c_str() );
+            b.back()->appendRegex( b.fieldName(), b.regex, b.regexOptions );
         }
         ObjectBuilder &b;
     };
@@ -438,7 +438,7 @@ namespace mongo {
 // in the original z example on line 3, if the input was "ab", foo() would only
 // be called once.
     struct JsonGrammar : public grammar< JsonGrammar > {
-public:
+    public:
         JsonGrammar( ObjectBuilder &_b ) : b( _b ) {}
 
         template < typename ScannerT >
@@ -472,32 +472,32 @@ public:
                 str = lexeme_d[ ch_p( '"' )[ chClear( self.b ) ] >>
                                 *( ( ch_p( '\\' ) >>
                                      (
-                                       ch_p( 'b' )[ chE( self.b ) ] |
-                                       ch_p( 'f' )[ chE( self.b ) ] |
-                                       ch_p( 'n' )[ chE( self.b ) ] |
-                                       ch_p( 'r' )[ chE( self.b ) ] |
-                                       ch_p( 't' )[ chE( self.b ) ] |
-                                       ch_p( 'v' )[ chE( self.b ) ] |
-                                       ( ch_p( 'u' ) >> ( repeat_p( 4 )[ xdigit_p ][ chU( self.b ) ] ) ) |
-                                       ( ~ch_p('x') & (~range_p('0','9'))[ ch( self.b ) ] ) // hex and octal aren't supported
+                                         ch_p( 'b' )[ chE( self.b ) ] |
+                                         ch_p( 'f' )[ chE( self.b ) ] |
+                                         ch_p( 'n' )[ chE( self.b ) ] |
+                                         ch_p( 'r' )[ chE( self.b ) ] |
+                                         ch_p( 't' )[ chE( self.b ) ] |
+                                         ch_p( 'v' )[ chE( self.b ) ] |
+                                         ( ch_p( 'u' ) >> ( repeat_p( 4 )[ xdigit_p ][ chU( self.b ) ] ) ) |
+                                         ( ~ch_p('x') & (~range_p('0','9'))[ ch( self.b ) ] ) // hex and octal aren't supported
                                      )
                                    ) |
                                    ( ~range_p( 0x00, 0x1f ) & ~ch_p( '"' ) & ( ~ch_p( '\\' ) )[ ch( self.b ) ] ) ) >> '"' ];
 
                 singleQuoteStr = lexeme_d[ ch_p( '\'' )[ chClear( self.b ) ] >>
-                                *( ( ch_p( '\\' ) >>
-                                     (
-                                       ch_p( 'b' )[ chE( self.b ) ] |
-                                       ch_p( 'f' )[ chE( self.b ) ] |
-                                       ch_p( 'n' )[ chE( self.b ) ] |
-                                       ch_p( 'r' )[ chE( self.b ) ] |
-                                       ch_p( 't' )[ chE( self.b ) ] |
-                                       ch_p( 'v' )[ chE( self.b ) ] |
-                                       ( ch_p( 'u' ) >> ( repeat_p( 4 )[ xdigit_p ][ chU( self.b ) ] ) ) |
-                                       ( ~ch_p('x') & (~range_p('0','9'))[ ch( self.b ) ] ) // hex and octal aren't supported
-                                     )
-                                   ) |
-                                   ( ~range_p( 0x00, 0x1f ) & ~ch_p( '\'' ) & ( ~ch_p( '\\' ) )[ ch( self.b ) ] ) ) >> '\'' ];
+                                           *( ( ch_p( '\\' ) >>
+                                                (
+                                                    ch_p( 'b' )[ chE( self.b ) ] |
+                                                    ch_p( 'f' )[ chE( self.b ) ] |
+                                                    ch_p( 'n' )[ chE( self.b ) ] |
+                                                    ch_p( 'r' )[ chE( self.b ) ] |
+                                                    ch_p( 't' )[ chE( self.b ) ] |
+                                                    ch_p( 'v' )[ chE( self.b ) ] |
+                                                    ( ch_p( 'u' ) >> ( repeat_p( 4 )[ xdigit_p ][ chU( self.b ) ] ) ) |
+                                                    ( ~ch_p('x') & (~range_p('0','9'))[ ch( self.b ) ] ) // hex and octal aren't supported
+                                                )
+                                              ) |
+                                              ( ~range_p( 0x00, 0x1f ) & ~ch_p( '\'' ) & ( ~ch_p( '\\' ) )[ ch( self.b ) ] ) ) >> '\'' ];
 
                 // real_p accepts numbers with nonsignificant zero prefixes, which
                 // aren't allowed in JSON.  Oh well.
@@ -548,8 +548,8 @@ public:
                                    >> ( *( ch_p( 'i' ) | ch_p( 'g' ) | ch_p( 'm' ) ) )[ regexOptions( self.b ) ] ];
             }
             rule< ScannerT > object, members, array, elements, value, str, number, integer,
-            dbref, dbrefS, dbrefT, oid, oidS, oidT, bindata, date, dateS, dateT,
-            regex, regexS, regexT, quotedOid, fieldName, unquotedFieldName, singleQuoteStr;
+                  dbref, dbrefS, dbrefT, oid, oidS, oidT, bindata, date, dateS, dateT,
+                  regex, regexS, regexT, quotedOid, fieldName, unquotedFieldName, singleQuoteStr;
             const rule< ScannerT > &start() const {
                 return object;
             }
@@ -558,7 +558,7 @@ public:
     };
 
     BSONObj fromjson( const char *str , int* len) {
-        if ( str[0] == '\0' ){
+        if ( str[0] == '\0' ) {
             if (len) *len = 0;
             return BSONObj();
         }
@@ -568,7 +568,8 @@ public:
         parse_info<> result = parse( str, parser, space_p );
         if (len) {
             *len = result.stop - str;
-        } else if ( !result.full ) {
+        }
+        else if ( !result.full ) {
             int limit = strnlen(result.stop , 10);
             if (limit == -1) limit = 10;
             msgasserted(10340, "Failure parsing JSON string near: " + string( result.stop, limit ));
diff --git a/db/lasterror.cpp b/db/lasterror.cpp
index 12fc694..ba52111 100644
--- a/db/lasterror.cpp
+++ b/db/lasterror.cpp
@@ -34,28 +34,37 @@ namespace mongo {
     void raiseError(int code , const char *msg) {
         LastError *le = lastError.get();
         if ( le == 0 ) {
-            /* might be intentional (non-user thread) */    
+            /* might be intentional (non-user thread) */
             DEV {
                 static unsigned n;
                 if( ++n < 4 && !isShell ) log() << "dev: lastError==0 won't report:" << msg << endl;
             }
-        } else if ( le->disabled ) {
+        }
+        else if ( le->disabled ) {
             log() << "lastError disabled, can't report: " << code << ":" << msg << endl;
-        } else {
+        }
+        else {
             le->raiseError(code, msg);
         }
     }
-    
-    void LastError::appendSelf( BSONObjBuilder &b ) {
+
+    bool LastError::appendSelf( BSONObjBuilder &b , bool blankErr ) {
         if ( !valid ) {
-            b.appendNull( "err" );
+            if ( blankErr )
+                b.appendNull( "err" );
             b.append( "n", 0 );
-            return;
+            return false;
         }
-        if ( msg.empty() )
-            b.appendNull( "err" );
-        else
+
+        if ( msg.empty() ) {
+            if ( blankErr ) {
+                b.appendNull( "err" );
+            }
+        }
+        else {
             b.append( "err", msg );
+        }
+
         if ( code )
             b.append( "code" , code );
         if ( updatedExisting != NotUpdate )
@@ -65,13 +74,24 @@ namespace mongo {
         if ( writebackId.isSet() )
             b.append( "writeback" , writebackId );
         b.appendNumber( "n", nObjects );
+
+        return ! msg.empty();
+    }
+
+    LastErrorHolder::~LastErrorHolder() {
+        for ( IDMap::iterator i = _ids.begin(); i != _ids.end(); ++i ) {
+            delete i->second.lerr;
+            i->second.lerr = 0;
+        }
+        _ids.clear();
     }
 
-    void LastErrorHolder::setID( int id ){
+
+    void LastErrorHolder::setID( int id ) {
         _id.set( id );
     }
-    
-    int LastErrorHolder::getID(){
+
+    int LastErrorHolder::getID() {
         return _id.get();
     }
 
@@ -89,24 +109,24 @@ namespace mongo {
             return ret;
         return 0;
     }
-    
-    LastError * LastErrorHolder::_get( bool create ){
+
+    LastError * LastErrorHolder::_get( bool create ) {
         int id = _id.get();
-        if ( id == 0 ){
+        if ( id == 0 ) {
             LastError * le = _tl.get();
-            if ( ! le && create ){
+            if ( ! le && create ) {
                 le = new LastError();
                 _tl.reset( le );
             }
             return le;
         }
 
-        scoped_lock lock(_idsmutex);        
+        scoped_lock lock(_idsmutex);
         map<int,Status>::iterator i = _ids.find( id );
-        if ( i == _ids.end() ){
+        if ( i == _ids.end() ) {
             if ( ! create )
                 return 0;
-            
+
             LastError * le = new LastError();
             Status s;
             s.time = time(0);
@@ -114,42 +134,42 @@ namespace mongo {
             _ids[id] = s;
             return le;
         }
-        
+
         Status &status = i->second;
         status.time = time(0);
         return status.lerr;
     }
 
-    void LastErrorHolder::remove( int id ){
+    void LastErrorHolder::remove( int id ) {
         scoped_lock lock(_idsmutex);
         map<int,Status>::iterator i = _ids.find( id );
         if ( i == _ids.end() )
             return;
-        
+
         delete i->second.lerr;
         _ids.erase( i );
     }
 
-    void LastErrorHolder::release(){
+    void LastErrorHolder::release() {
         int id = _id.get();
-        if ( id == 0 ){
+        if ( id == 0 ) {
             _tl.release();
             return;
         }
-        
+
         remove( id );
     }
 
     /** ok to call more than once. */
-    void LastErrorHolder::initThread() { 
+    void LastErrorHolder::initThread() {
         if( _tl.get() ) return;
         assert( _id.get() == 0 );
         _tl.reset( new LastError() );
     }
-    
-    void LastErrorHolder::reset( LastError * le ){
+
+    void LastErrorHolder::reset( LastError * le ) {
         int id = _id.get();
-        if ( id == 0 ){
+        if ( id == 0 ) {
             _tl.reset( le );
             return;
         }
@@ -159,17 +179,18 @@ namespace mongo {
         status.time = time(0);
         status.lerr = le;
     }
-    
+
     void prepareErrForNewRequest( Message &m, LastError * err ) {
         // a killCursors message shouldn't affect last error
         if ( m.operation() == dbKillCursors ) {
             err->disabled = true;
-        } else {
+        }
+        else {
             err->disabled = false;
             err->nPrev++;
-        }        
+        }
     }
-    
+
     LastError * LastErrorHolder::startRequest( Message& m , int clientId ) {
         assert( clientId );
         setID( clientId );
@@ -183,33 +204,33 @@ namespace mongo {
         prepareErrForNewRequest( m, connectionOwned );
     }
 
-    void LastErrorHolder::disconnect( int clientId ){
+    void LastErrorHolder::disconnect( int clientId ) {
         if ( clientId )
             remove(clientId);
     }
 
     struct LastErrorHolderTest : public UnitTest {
     public:
-        
-        void test( int i ){
+
+        void test( int i ) {
             _tl.set( i );
             assert( _tl.get() == i );
         }
-        
-        void tlmaptest(){
+
+        void tlmaptest() {
             test( 1 );
             test( 12123123 );
             test( -123123 );
             test( numeric_limits<int>::min() );
             test( numeric_limits<int>::max() );
         }
-        
-        void run(){
+
+        void run() {
             tlmaptest();
 
             LastError * a = new LastError();
             LastError * b = new LastError();
-            
+
             LastErrorHolder holder;
             holder.reset( a );
             assert( a == holder.get() );
@@ -219,10 +240,10 @@ namespace mongo {
             assert( b == holder.get() );
             holder.setID( 0 );
             assert( a == holder.get() );
-            
+
             holder.remove( 1 );
         }
-        
+
         ThreadLocalValue<int> _tl;
     } lastErrorHolderTest;
 
diff --git a/db/lasterror.h b/db/lasterror.h
index 2006f1c..c77ec74 100644
--- a/db/lasterror.h
+++ b/db/lasterror.h
@@ -33,7 +33,7 @@ namespace mongo {
         int nPrev;
         bool valid;
         bool disabled;
-        void writeback( OID& oid ){
+        void writeback( OID& oid ) {
             reset( true );
             writebackId = oid;
         }
@@ -42,13 +42,13 @@ namespace mongo {
             code = _code;
             msg = _msg;
         }
-        void recordUpdate( bool _updateObjects , long long _nObjects , OID _upsertedId ){
+        void recordUpdate( bool _updateObjects , long long _nObjects , OID _upsertedId ) {
             reset( true );
             nObjects = _nObjects;
             updatedExisting = _updateObjects ? True : False;
             if ( _upsertedId.isSet() )
                 upsertedId = _upsertedId;
-                
+
         }
         void recordDelete( long long nDeleted ) {
             reset( true );
@@ -68,20 +68,25 @@ namespace mongo {
             upsertedId.clear();
             writebackId.clear();
         }
-        void appendSelf( BSONObjBuilder &b );
+
+        /**
+         * @return if there is an err
+         */
+        bool appendSelf( BSONObjBuilder &b , bool blankErr = true );
 
         struct Disabled : boost::noncopyable {
-            Disabled( LastError * le ){
+            Disabled( LastError * le ) {
                 _le = le;
-                if ( _le ){
+                if ( _le ) {
                     _prev = _le->disabled;
                     _le->disabled = true;
-                } else {
+                }
+                else {
                     _prev = false;
                 }
             }
-            
-            ~Disabled(){
+
+            ~Disabled() {
                 if ( _le )
                     _le->disabled = _prev;
             }
@@ -89,18 +94,19 @@ namespace mongo {
             LastError * _le;
             bool _prev;
         };
-        
+
         static LastError noError;
     };
 
     extern class LastErrorHolder {
     public:
         LastErrorHolder() : _id( 0 ) {}
+        ~LastErrorHolder();
 
         LastError * get( bool create = false );
-        LastError * getSafe(){
+        LastError * getSafe() {
             LastError * le = get(false);
-            if ( ! le ){
+            if ( ! le ) {
                 log( LL_ERROR ) << " no LastError!  id: " << getID() << endl;
                 assert( le );
             }
@@ -122,11 +128,11 @@ namespace mongo {
 
         void remove( int id );
         void release();
-        
+
         /** when db receives a message/request, call this */
         void startRequest( Message& m , LastError * connectionOwned );
         LastError * startRequest( Message& m , int clientId );
-        
+
         void disconnect( int clientId );
 
         // used to disable lastError reporting while processing a killCursors message
@@ -135,13 +141,15 @@ namespace mongo {
     private:
         ThreadLocalValue<int> _id;
         boost::thread_specific_ptr<LastError> _tl;
-        
+
         struct Status {
             time_t time;
             LastError *lerr;
         };
+        typedef map<int,Status> IDMap;
+
         static mongo::mutex _idsmutex;
-        map<int,Status> _ids;    
+        IDMap _ids;
     } lastError;
 
     void raiseError(int code , const char *msg);
diff --git a/db/matcher.cpp b/db/matcher.cpp
index cd62563..38e8e05 100644
--- a/db/matcher.cpp
+++ b/db/matcher.cpp
@@ -30,7 +30,7 @@
 #include "pdfile.h"
 
 namespace {
-    inline pcrecpp::RE_Options flags2options(const char* flags){
+    inline pcrecpp::RE_Options flags2options(const char* flags) {
         pcrecpp::RE_Options options;
         options.set_utf8(true);
         while ( flags && *flags ) {
@@ -52,7 +52,7 @@ namespace {
 namespace mongo {
 
     extern BSONObj staticNull;
-        
+
     class Where {
     public:
         Where() {
@@ -64,22 +64,22 @@ namespace mongo {
             if ( scope.get() )
                 scope->execSetup( "_mongo.readOnly = false;" , "make not read only" );
 
-            if ( jsScope ){
+            if ( jsScope ) {
                 delete jsScope;
                 jsScope = 0;
             }
             func = 0;
         }
-        
+
         auto_ptr<Scope> scope;
         ScriptingFunction func;
         BSONObj *jsScope;
-        
+
         void setFunc(const char *code) {
             massert( 10341 ,  "scope has to be created first!" , scope.get() );
             func = scope->createFunction( code );
         }
-        
+
     };
 
     Matcher::~Matcher() {
@@ -87,37 +87,48 @@ namespace mongo {
         where = 0;
     }
 
-    ElementMatcher::ElementMatcher( BSONElement _e , int _op, bool _isNot ) : toMatch( _e ) , compareOp( _op ), isNot( _isNot ) {
-        if ( _op == BSONObj::opMOD ){
+    ElementMatcher::ElementMatcher( BSONElement _e , int _op, bool _isNot )
+        : toMatch( _e ) , compareOp( _op ), isNot( _isNot ), subMatcherOnPrimitives(false) {
+        if ( _op == BSONObj::opMOD ) {
             BSONObj o = _e.embeddedObject();
             mod = o["0"].numberInt();
             modm = o["1"].numberInt();
-            
+
             uassert( 10073 ,  "mod can't be 0" , mod );
         }
-        else if ( _op == BSONObj::opTYPE ){
+        else if ( _op == BSONObj::opTYPE ) {
             type = (BSONType)(_e.numberInt());
         }
-        else if ( _op == BSONObj::opELEM_MATCH ){
+        else if ( _op == BSONObj::opELEM_MATCH ) {
             BSONElement m = _e;
             uassert( 12517 , "$elemMatch needs an Object" , m.type() == Object );
-            subMatcher.reset( new Matcher( m.embeddedObject() ) );
+            BSONObj x = m.embeddedObject();
+            if ( x.firstElement().getGtLtOp() == 0 ) {
+                subMatcher.reset( new Matcher( x ) );
+                subMatcherOnPrimitives = false;
+            }
+            else {
+                // meant to act on primitives
+                subMatcher.reset( new Matcher( BSON( "" << x ) ) );
+                subMatcherOnPrimitives = true;
+            }
         }
     }
 
-    ElementMatcher::ElementMatcher( BSONElement _e , int _op , const BSONObj& array, bool _isNot ) 
-        : toMatch( _e ) , compareOp( _op ), isNot( _isNot ) {
-        
+    ElementMatcher::ElementMatcher( BSONElement _e , int _op , const BSONObj& array, bool _isNot )
+        : toMatch( _e ) , compareOp( _op ), isNot( _isNot ), subMatcherOnPrimitives(false) {
+
         myset.reset( new set<BSONElement,element_lt>() );
-        
+
         BSONObjIterator i( array );
         while ( i.more() ) {
             BSONElement ie = i.next();
-            if ( _op == BSONObj::opALL && ie.type() == Object && ie.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ){
+            if ( _op == BSONObj::opALL && ie.type() == Object && ie.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ) {
                 shared_ptr<Matcher> s;
                 s.reset( new Matcher( ie.embeddedObject().firstElement().embeddedObjectUserCheck() ) );
                 allMatchers.push_back( s );
-            } else if ( ie.type() == RegEx ) {
+            }
+            else if ( ie.type() == RegEx ) {
                 if ( !myregex.get() ) {
                     myregex.reset( new vector< RegexMatcher >() );
                 }
@@ -132,19 +143,20 @@ namespace mongo {
                 string prefix = simpleRegex(rm.regex, rm.flags, &purePrefix);
                 if (purePrefix)
                     rm.prefix = prefix;
-            } else {
+            }
+            else {
                 myset->insert(ie);
             }
         }
-        
-        if ( allMatchers.size() ){
+
+        if ( allMatchers.size() ) {
             uassert( 13020 , "with $all, can't mix $elemMatch and others" , myset->size() == 0 && !myregex.get());
         }
-        
+
     }
-    
-    
-    void Matcher::addRegex(const char *fieldName, const char *regex, const char *flags, bool isNot){
+
+
+    void Matcher::addRegex(const char *fieldName, const char *regex, const char *flags, bool isNot) {
 
         if ( nRegex >= 4 ) {
             out() << "ERROR: too many regexes in query" << endl;
@@ -158,106 +170,106 @@ namespace mongo {
             rm.isNot = isNot;
             nRegex++;
 
-            if (!isNot){ //TODO something smarter
+            if (!isNot) { //TODO something smarter
                 bool purePrefix;
                 string prefix = simpleRegex(regex, flags, &purePrefix);
                 if (purePrefix)
                     rm.prefix = prefix;
             }
-        }        
+        }
     }
-    
+
     bool Matcher::addOp( const BSONElement &e, const BSONElement &fe, bool isNot, const char *& regex, const char *&flags ) {
         const char *fn = fe.fieldName();
         int op = fe.getGtLtOp( -1 );
-        if ( op == -1 ){
-            if ( !isNot && fn[1] == 'r' && fn[2] == 'e' && fn[3] == 'f' && fn[4] == 0 ){
+        if ( op == -1 ) {
+            if ( !isNot && fn[1] == 'r' && fn[2] == 'e' && fn[3] == 'f' && fn[4] == 0 ) {
                 return false; // { $ref : xxx } - treat as normal object
             }
             uassert( 10068 ,  (string)"invalid operator: " + fn , op != -1 );
         }
-        
-        switch ( op ){
-            case BSONObj::GT:
-            case BSONObj::GTE:
-            case BSONObj::LT:
-            case BSONObj::LTE:{
-                shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
-                _builders.push_back( b );
-                b->appendAs(fe, e.fieldName());
-                addBasic(b->done().firstElement(), op, isNot);
-                break;
-            }
-            case BSONObj::NE:{
-                haveNeg = true;
-                shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
-                _builders.push_back( b );
-                b->appendAs(fe, e.fieldName());
-                addBasic(b->done().firstElement(), BSONObj::NE, isNot);
-                break;
-            }
-            case BSONObj::opALL:
-                all = true;
-            case BSONObj::opIN:
-                uassert( 13276 , "$in needs an array" , fe.isABSONObj() );
-                basics.push_back( ElementMatcher( e , op , fe.embeddedObject(), isNot ) );
-                break;
-            case BSONObj::NIN:
-                uassert( 13277 , "$nin needs an array" , fe.isABSONObj() );
-                haveNeg = true;
-                basics.push_back( ElementMatcher( e , op , fe.embeddedObject(), isNot ) );
-                break;
-            case BSONObj::opMOD:
-            case BSONObj::opTYPE:
-            case BSONObj::opELEM_MATCH: {
-                shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
-                _builders.push_back( b );
-                b->appendAs(fe, e.fieldName());                                
-                // these are types where ElementMatcher has all the info
-                basics.push_back( ElementMatcher( b->done().firstElement() , op, isNot ) );
-                break;                                
-            }
-            case BSONObj::opSIZE:{
-                shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
-                _builders.push_back( b );
-                b->appendAs(fe, e.fieldName());
-                addBasic(b->done().firstElement(), BSONObj::opSIZE, isNot);    
-                haveSize = true;
-                break;
-            }
-            case BSONObj::opEXISTS:{
-                shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
-                _builders.push_back( b );
-                b->appendAs(fe, e.fieldName());
-                addBasic(b->done().firstElement(), BSONObj::opEXISTS, isNot);
-                break;
-            }
-            case BSONObj::opREGEX:{
-                uassert( 13032, "can't use $not with $regex, use BSON regex type instead", !isNot );
-                if ( fe.type() == RegEx ){
-                    regex = fe.regex();
-                    flags = fe.regexFlags();
-                }
-                else {
-                    regex = fe.valuestrsafe();
-                }
-                break;
-            }
-            case BSONObj::opOPTIONS:{
-                uassert( 13029, "can't use $not with $options, use BSON regex type instead", !isNot );
-                flags = fe.valuestrsafe();
-                break;
-            }
-            case BSONObj::opNEAR:
-            case BSONObj::opWITHIN:
-            case BSONObj::opMAX_DISTANCE:
-                break;
-            default:
-                uassert( 10069 ,  (string)"BUG - can't operator for: " + fn , 0 );
-        }        
+
+        switch ( op ) {
+        case BSONObj::GT:
+        case BSONObj::GTE:
+        case BSONObj::LT:
+        case BSONObj::LTE: {
+            shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
+            _builders.push_back( b );
+            b->appendAs(fe, e.fieldName());
+            addBasic(b->done().firstElement(), op, isNot);
+            break;
+        }
+        case BSONObj::NE: {
+            haveNeg = true;
+            shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
+            _builders.push_back( b );
+            b->appendAs(fe, e.fieldName());
+            addBasic(b->done().firstElement(), BSONObj::NE, isNot);
+            break;
+        }
+        case BSONObj::opALL:
+            all = true;
+        case BSONObj::opIN:
+            uassert( 13276 , "$in needs an array" , fe.isABSONObj() );
+            basics.push_back( ElementMatcher( e , op , fe.embeddedObject(), isNot ) );
+            break;
+        case BSONObj::NIN:
+            uassert( 13277 , "$nin needs an array" , fe.isABSONObj() );
+            haveNeg = true;
+            basics.push_back( ElementMatcher( e , op , fe.embeddedObject(), isNot ) );
+            break;
+        case BSONObj::opMOD:
+        case BSONObj::opTYPE:
+        case BSONObj::opELEM_MATCH: {
+            shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
+            _builders.push_back( b );
+            b->appendAs(fe, e.fieldName());
+            // these are types where ElementMatcher has all the info
+            basics.push_back( ElementMatcher( b->done().firstElement() , op, isNot ) );
+            break;
+        }
+        case BSONObj::opSIZE: {
+            shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
+            _builders.push_back( b );
+            b->appendAs(fe, e.fieldName());
+            addBasic(b->done().firstElement(), BSONObj::opSIZE, isNot);
+            haveSize = true;
+            break;
+        }
+        case BSONObj::opEXISTS: {
+            shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
+            _builders.push_back( b );
+            b->appendAs(fe, e.fieldName());
+            addBasic(b->done().firstElement(), BSONObj::opEXISTS, isNot);
+            break;
+        }
+        case BSONObj::opREGEX: {
+            uassert( 13032, "can't use $not with $regex, use BSON regex type instead", !isNot );
+            if ( fe.type() == RegEx ) {
+                regex = fe.regex();
+                flags = fe.regexFlags();
+            }
+            else {
+                regex = fe.valuestrsafe();
+            }
+            break;
+        }
+        case BSONObj::opOPTIONS: {
+            uassert( 13029, "can't use $not with $options, use BSON regex type instead", !isNot );
+            flags = fe.valuestrsafe();
+            break;
+        }
+        case BSONObj::opNEAR:
+        case BSONObj::opWITHIN:
+        case BSONObj::opMAX_DISTANCE:
+            break;
+        default:
+            uassert( 10069 ,  (string)"BUG - can't operator for: " + fn , 0 );
+        }
         return true;
     }
-    
+
     void Matcher::parseOr( const BSONElement &e, bool subMatcher, list< shared_ptr< Matcher > > &matchers ) {
         uassert( 13090, "nested $or/$nor not allowed", !subMatcher );
         uassert( 13086, "$or/$nor must be a nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 );
@@ -276,14 +288,16 @@ namespace mongo {
             return false;
         if ( ef[ 1 ] == 'o' && ef[ 2 ] == 'r' && ef[ 3 ] == 0 ) {
             parseOr( e, subMatcher, _orMatchers );
-        } else if ( ef[ 1 ] == 'n' && ef[ 2 ] == 'o' && ef[ 3 ] == 'r' && ef[ 4 ] == 0 ) {
+        }
+        else if ( ef[ 1 ] == 'n' && ef[ 2 ] == 'o' && ef[ 3 ] == 'r' && ef[ 4 ] == 0 ) {
             parseOr( e, subMatcher, _norMatchers );
-        } else {
+        }
+        else {
             return false;
         }
         return true;
     }
-    
+
     /* _jsobj          - the query pattern
     */
     Matcher::Matcher(const BSONObj &_jsobj, bool subMatcher) :
@@ -293,6 +307,8 @@ namespace mongo {
         while ( i.more() ) {
             BSONElement e = i.next();
             
+            uassert( 13629 , "can't have undefined in a query expression" , e.type() != Undefined );
+
             if ( parseOrNor( e, subMatcher ) ) {
                 continue;
             }
@@ -301,7 +317,7 @@ namespace mongo {
                 // $where: function()...
                 uassert( 10066 , "$where occurs twice?", where == 0 );
                 uassert( 10067 , "$where query, but no script engine", globalScriptEngine );
-                massert( 13089 , "no current client needed for $where" , haveClient() ); 
+                massert( 13089 , "no current client needed for $where" , haveClient() );
                 where = new Where();
                 where->scope = globalScriptEngine->getPooledScope( cc().ns() );
                 where->scope->localConnect( cc().database()->name.c_str() );
@@ -314,7 +330,7 @@ namespace mongo {
                     const char *code = e.valuestr();
                     where->setFunc(code);
                 }
-                
+
                 where->scope->execSetup( "_mongo.readOnly = true;" , "make read only" );
 
                 continue;
@@ -324,7 +340,7 @@ namespace mongo {
                 addRegex( e.fieldName(), e.regex(), e.regexFlags() );
                 continue;
             }
-            
+
             // greater than / less than...
             // e.g., e == { a : { $gt : 3 } }
             //       or
@@ -333,35 +349,36 @@ namespace mongo {
                 // support {$regex:"a|b", $options:"imx"}
                 const char* regex = NULL;
                 const char* flags = "";
-                
+
                 // e.g., fe == { $gt : 3 }
                 BSONObjIterator j(e.embeddedObject());
                 bool isOperator = false;
                 while ( j.more() ) {
                     BSONElement fe = j.next();
                     const char *fn = fe.fieldName();
-                    
+
                     if ( fn[0] == '$' && fn[1] ) {
                         isOperator = true;
-                        
+
                         if ( fn[1] == 'n' && fn[2] == 'o' && fn[3] == 't' && fn[4] == 0 ) {
                             haveNeg = true;
                             switch( fe.type() ) {
-                                case Object: {
-                                    BSONObjIterator k( fe.embeddedObject() );
-                                    uassert( 13030, "$not cannot be empty", k.more() );
-                                    while( k.more() ) {
-                                        addOp( e, k.next(), true, regex, flags );   
-                                    }
-                                    break;
+                            case Object: {
+                                BSONObjIterator k( fe.embeddedObject() );
+                                uassert( 13030, "$not cannot be empty", k.more() );
+                                while( k.more() ) {
+                                    addOp( e, k.next(), true, regex, flags );
                                 }
-                                case RegEx:
-                                    addRegex( e.fieldName(), fe.regex(), fe.regexFlags(), true );
-                                    break;
-                                default:
-                                    uassert( 13031, "invalid use of $not", false );
+                                break;
+                            }
+                            case RegEx:
+                                addRegex( e.fieldName(), fe.regex(), fe.regexFlags(), true );
+                                break;
+                            default:
+                                uassert( 13031, "invalid use of $not", false );
                             }
-                        } else {
+                        }
+                        else {
                             if ( !addOp( e, fe, false, regex, flags ) ) {
                                 isOperator = false;
                                 break;
@@ -373,43 +390,43 @@ namespace mongo {
                         break;
                     }
                 }
-                if (regex){
+                if (regex) {
                     addRegex(e.fieldName(), regex, flags);
                 }
                 if ( isOperator )
                     continue;
             }
 
-            if ( e.type() == Array ){
+            if ( e.type() == Array ) {
                 hasArray = true;
             }
             else if( strcmp(e.fieldName(), "$atomic") == 0 ) {
                 _atomic = e.trueValue();
                 continue;
             }
-            
+
             // normal, simple case e.g. { a : "foo" }
             addBasic(e, BSONObj::Equality, false);
         }
     }
-    
+
     Matcher::Matcher( const Matcher &other, const BSONObj &key ) :
-    where(0), constrainIndexKey_( key ), haveSize(), all(), hasArray(0), haveNeg(), _atomic(false), nRegex(0) {
+        where(0), constrainIndexKey_( key ), haveSize(), all(), hasArray(0), haveNeg(), _atomic(false), nRegex(0) {
         // do not include fields which would make keyMatch() false
         for( vector< ElementMatcher >::const_iterator i = other.basics.begin(); i != other.basics.end(); ++i ) {
             if ( key.hasField( i->toMatch.fieldName() ) ) {
                 switch( i->compareOp ) {
-                    case BSONObj::opSIZE:
-                    case BSONObj::opALL:
-                    case BSONObj::NE:
-                    case BSONObj::NIN:
-                        break;
-                    default: {
-                        if ( !i->isNot && i->toMatch.type() != Array ) {
-                            basics.push_back( *i );                            
-                        }
+                case BSONObj::opSIZE:
+                case BSONObj::opALL:
+                case BSONObj::NE:
+                case BSONObj::NIN:
+                    break;
+                default: {
+                    if ( !i->isNot && i->toMatch.type() != Array ) {
+                        basics.push_back( *i );
                     }
                 }
+                }
             }
         }
         for( int i = 0; i < other.nRegex; ++i ) {
@@ -421,29 +438,29 @@ namespace mongo {
             _orMatchers.push_back( shared_ptr< Matcher >( new Matcher( **i, key ) ) );
         }
     }
-    
+
     inline bool regexMatches(const RegexMatcher& rm, const BSONElement& e) {
-        switch (e.type()){
-            case String:
-            case Symbol:
-                if (rm.prefix.empty())
-                    return rm.re->PartialMatch(e.valuestr());
-                else
-                    return !strncmp(e.valuestr(), rm.prefix.c_str(), rm.prefix.size());
-            case RegEx:
-                return !strcmp(rm.regex, e.regex()) && !strcmp(rm.flags, e.regexFlags());
-            default:
-                return false;
+        switch (e.type()) {
+        case String:
+        case Symbol:
+            if (rm.prefix.empty())
+                return rm.re->PartialMatch(e.valuestr());
+            else
+                return !strncmp(e.valuestr(), rm.prefix.c_str(), rm.prefix.size());
+        case RegEx:
+            return !strcmp(rm.regex, e.regex()) && !strcmp(rm.flags, e.regexFlags());
+        default:
+            return false;
         }
     }
-        
+
     inline int Matcher::valuesMatch(const BSONElement& l, const BSONElement& r, int op, const ElementMatcher& bm) {
         assert( op != BSONObj::NE && op != BSONObj::NIN );
-        
+
         if ( op == BSONObj::Equality ) {
             return l.valuesEqual(r);
         }
-        
+
         if ( op == BSONObj::opIN ) {
             // { $in : [1,2,3] }
             int count = bm.myset->count(l);
@@ -471,15 +488,15 @@ namespace mongo {
             }
             return count == r.number();
         }
-        
-        if ( op == BSONObj::opMOD ){
+
+        if ( op == BSONObj::opMOD ) {
             if ( ! l.isNumber() )
                 return false;
-            
+
             return l.numberLong() % bm.mod == bm.modm;
         }
-        
-        if ( op == BSONObj::opTYPE ){
+
+        if ( op == BSONObj::opTYPE ) {
             return bm.type == l.type();
         }
 
@@ -506,7 +523,7 @@ namespace mongo {
             return 0;
         return bm.toMatch.boolean() ? -1 : 1;
     }
-    
+
     /* Check if a particular field matches.
 
        fieldName - field to match "a.b" if we are reaching into an embedded object.
@@ -519,8 +536,8 @@ namespace mongo {
 
          { "a.b" : 3 }             means       obj.a.b == 3
          { a : { $lt : 3 } }       means       obj.a < 3
-    	 { a : { $in : [1,2] } }   means       [1,2].contains(obj.a)
-         
+         { a : { $in : [1,2] } }   means       [1,2].contains(obj.a)
+
          return value
        -1 mismatch
         0 missing element
@@ -529,20 +546,20 @@ namespace mongo {
     int Matcher::matchesDotted(const char *fieldName, const BSONElement& toMatch, const BSONObj& obj, int compareOp, const ElementMatcher& em , bool isArr, MatchDetails * details ) {
         DEBUGMATCHER( "\t matchesDotted : " << fieldName << " hasDetails: " << ( details ? "yes" : "no" ) );
         if ( compareOp == BSONObj::opALL ) {
-            
-            if ( em.allMatchers.size() ){
+
+            if ( em.allMatchers.size() ) {
                 BSONElement e = obj.getFieldDotted( fieldName );
                 uassert( 13021 , "$all/$elemMatch needs to be applied to array" , e.type() == Array );
-                
-                for ( unsigned i=0; i<em.allMatchers.size(); i++ ){
+
+                for ( unsigned i=0; i<em.allMatchers.size(); i++ ) {
                     bool found = false;
                     BSONObjIterator x( e.embeddedObject() );
-                    while ( x.more() ){
+                    while ( x.more() ) {
                         BSONElement f = x.next();
 
                         if ( f.type() != Object )
                             continue;
-                        if ( em.allMatchers[i]->matches( f.embeddedObject() ) ){
+                        if ( em.allMatchers[i]->matches( f.embeddedObject() ) ) {
                             found = true;
                             break;
                         }
@@ -551,36 +568,32 @@ namespace mongo {
                     if ( ! found )
                         return -1;
                 }
-                
+
                 return 1;
             }
-            
+
             if ( em.myset->size() == 0 && !em.myregex.get() )
                 return -1; // is this desired?
-            
-            BSONObjSetDefaultOrder actualKeys;
-            IndexSpec( BSON( fieldName << 1 ) ).getKeys( obj, actualKeys );
-            if ( actualKeys.size() == 0 )
-                return 0;
-            
+
+            BSONElementSet myValues;
+            obj.getFieldsDotted( fieldName , myValues );
+
             for( set< BSONElement, element_lt >::const_iterator i = em.myset->begin(); i != em.myset->end(); ++i ) {
                 // ignore nulls
                 if ( i->type() == jstNULL )
                     continue;
-                // parallel traversal would be faster worst case I guess
-                BSONObjBuilder b;
-                b.appendAs( *i, "" );
-                if ( !actualKeys.count( b.done() ) )
+
+                if ( myValues.count( *i ) == 0 )
                     return -1;
             }
 
             if ( !em.myregex.get() )
                 return 1;
-            
+
             for( vector< RegexMatcher >::const_iterator i = em.myregex->begin(); i != em.myregex->end(); ++i ) {
                 bool match = false;
-                for( BSONObjSetDefaultOrder::const_iterator j = actualKeys.begin(); j != actualKeys.end(); ++j ) {
-                    if ( regexMatches( *i, j->firstElement() ) ) {
+                for( BSONElementSet::const_iterator j = myValues.begin(); j != myValues.end(); ++j ) {
+                    if ( regexMatches( *i, *j ) ) {
                         match = true;
                         break;
                     }
@@ -588,10 +601,10 @@ namespace mongo {
                 if ( !match )
                     return -1;
             }
-            
+
             return 1;
         } // end opALL
-        
+
         if ( compareOp == BSONObj::NE )
             return matchesNe( fieldName, toMatch, obj, em , details );
         if ( compareOp == BSONObj::NIN ) {
@@ -613,18 +626,19 @@ namespace mongo {
             }
             return 1;
         }
-        
+
         BSONElement e;
         bool indexed = !constrainIndexKey_.isEmpty();
         if ( indexed ) {
             e = obj.getFieldUsingIndexNames(fieldName, constrainIndexKey_);
-            if( e.eoo() ){
+            if( e.eoo() ) {
                 cout << "obj: " << obj << endl;
                 cout << "fieldName: " << fieldName << endl;
                 cout << "constrainIndexKey_: " << constrainIndexKey_ << endl;
                 assert( !e.eoo() );
             }
-        } else {
+        }
+        else {
 
             const char *p = strchr(fieldName, '.');
             if ( p ) {
@@ -662,7 +676,7 @@ namespace mongo {
                             if ( details )
                                 details->elemMatchKey = z.fieldName();
                             return 1;
-                        } 
+                        }
                         else if ( cmp < 0 ) {
                             found = true;
                         }
@@ -671,7 +685,7 @@ namespace mongo {
                 return found ? -1 : retMissing( em );
             }
 
-            if( p ) { 
+            if( p ) {
                 return retMissing( em );
             }
             else {
@@ -681,21 +695,31 @@ namespace mongo {
 
         if ( compareOp == BSONObj::opEXISTS ) {
             return ( e.eoo() ^ ( toMatch.boolean() ^ em.isNot ) ) ? 1 : -1;
-        } else if ( ( e.type() != Array || indexed || compareOp == BSONObj::opSIZE ) &&
-            valuesMatch(e, toMatch, compareOp, em ) ) {
+        }
+        else if ( ( e.type() != Array || indexed || compareOp == BSONObj::opSIZE ) &&
+                  valuesMatch(e, toMatch, compareOp, em ) ) {
             return 1;
-        } else if ( e.type() == Array && compareOp != BSONObj::opSIZE ) {
+        }
+        else if ( e.type() == Array && compareOp != BSONObj::opSIZE ) {
             BSONObjIterator ai(e.embeddedObject());
 
             while ( ai.moreWithEOO() ) {
                 BSONElement z = ai.next();
-                
-                if ( compareOp == BSONObj::opELEM_MATCH ){
-                    // SERVER-377
-                    if ( z.type() == Object && em.subMatcher->matches( z.embeddedObject() ) ){
-                        if ( details )
-                            details->elemMatchKey = z.fieldName();
-                        return 1;
+
+                if ( compareOp == BSONObj::opELEM_MATCH ) {
+                    if ( z.type() == Object ) {
+                        if ( em.subMatcher->matches( z.embeddedObject() ) ) {
+                            if ( details )
+                                details->elemMatchKey = z.fieldName();
+                            return 1;
+                        }
+                    }
+                    else if ( em.subMatcherOnPrimitives ) {
+                        if ( z.type() && em.subMatcher->matches( z.wrap( "" ) ) ) {
+                            if ( details )
+                                details->elemMatchKey = z.fieldName();
+                            return 1;
+                        }
                     }
                 }
                 else {
@@ -707,12 +731,12 @@ namespace mongo {
                 }
 
             }
-            
-            if ( compareOp == BSONObj::Equality && e.woCompare( toMatch , false ) == 0 ){
+
+            if ( compareOp == BSONObj::Equality && e.woCompare( toMatch , false ) == 0 ) {
                 // match an entire array to itself
                 return 1;
             }
-            
+
         }
         else if ( e.eoo() ) {
             // 0 indicates "missing element"
@@ -745,7 +769,8 @@ namespace mongo {
                     if ( ( bm.compareOp == BSONObj::NE ) ^ bm.isNot ) {
                         return false;
                     }
-                } else {
+                }
+                else {
                     if ( !bm.isNot ) {
                         return false;
                     }
@@ -760,7 +785,8 @@ namespace mongo {
                 BSONElement e = jsobj.getFieldUsingIndexNames(rm.fieldName, constrainIndexKey_);
                 if ( !e.eoo() )
                     s.insert( e );
-            } else {
+            }
+            else {
                 jsobj.getFieldsDotted( rm.fieldName, s );
             }
             bool match = false;
@@ -770,11 +796,11 @@ namespace mongo {
             if ( !match ^ rm.isNot )
                 return false;
         }
-        
+
         if ( _orMatchers.size() > 0 ) {
             bool match = false;
             for( list< shared_ptr< Matcher > >::const_iterator i = _orMatchers.begin();
-                i != _orMatchers.end(); ++i ) {
+                    i != _orMatchers.end(); ++i ) {
                 // SERVER-205 don't submit details - we don't want to track field
                 // matched within $or, and at this point we've already loaded the
                 // whole document
@@ -787,55 +813,56 @@ namespace mongo {
                 return false;
             }
         }
-        
+
         if ( _norMatchers.size() > 0 ) {
             for( list< shared_ptr< Matcher > >::const_iterator i = _norMatchers.begin();
-                i != _norMatchers.end(); ++i ) {
+                    i != _norMatchers.end(); ++i ) {
                 // SERVER-205 don't submit details - we don't want to track field
                 // matched within $nor, and at this point we've already loaded the
                 // whole document
                 if ( (*i)->matches( jsobj ) ) {
                     return false;
                 }
-            }            
+            }
         }
-        
+
         for( vector< shared_ptr< FieldRangeVector > >::const_iterator i = _orConstraints.begin();
-            i != _orConstraints.end(); ++i ) {
+                i != _orConstraints.end(); ++i ) {
             if ( (*i)->matches( jsobj ) ) {
                 return false;
             }
         }
-                
+
         if ( where ) {
             if ( where->func == 0 ) {
                 uassert( 10070 , "$where compile error", false);
                 return false; // didn't compile
             }
-            
-            if ( where->jsScope ){
+
+            if ( where->jsScope ) {
                 where->scope->init( where->jsScope );
             }
             where->scope->setThis( const_cast< BSONObj * >( &jsobj ) );
             where->scope->setObject( "obj", const_cast< BSONObj & >( jsobj ) );
             where->scope->setBoolean( "fullObject" , true ); // this is a hack b/c fullObject used to be relevant
-            
+
             int err = where->scope->invoke( where->func , BSONObj() , 1000 * 60 , false );
             where->scope->setThis( 0 );
             if ( err == -3 ) { // INVOKE_ERROR
                 stringstream ss;
-                ss << "error on invocation of $where function:\n" 
+                ss << "error on invocation of $where function:\n"
                    << where->scope->getError();
                 uassert( 10071 , ss.str(), false);
                 return false;
-            } else if ( err != 0 ) { // ! INVOKE_SUCCESS
+            }
+            else if ( err != 0 ) {   // ! INVOKE_SUCCESS
                 uassert( 10072 , "unknown error in invocation of $where function", false);
-                return false;                
+                return false;
             }
             return where->scope->getBoolean( "return" ) != 0;
 
         }
-        
+
         return true;
     }
 
@@ -880,9 +907,9 @@ namespace mongo {
             }
         }
         return true;
-    }        
-        
-    
+    }
+
+
     /*- just for testing -- */
 #pragma pack(1)
     struct JSObj1 {
@@ -946,7 +973,7 @@ namespace mongo {
             assert( !n.matches(j2) );
 
             BSONObj j0 = BSONObj();
-//		BSONObj j0((const char *) &js0);
+//      BSONObj j0((const char *) &js0);
             Matcher p(j0);
             assert( p.matches(j1) );
             assert( p.matches(j2) );
@@ -959,7 +986,7 @@ namespace mongo {
 
         RXTest() {
         }
-        
+
         void run() {
             /*
             static const boost::regex e("(\\d{4}[- ]){3}\\d{4}");
@@ -969,7 +996,7 @@ namespace mongo {
             */
 
             int ret = 0;
-            
+
             pcre_config( PCRE_CONFIG_UTF8 , &ret );
             massert( 10342 ,  "pcre not compiled with utf8 support" , ret );
 
@@ -987,7 +1014,7 @@ namespace mongo {
             pcre_config( PCRE_CONFIG_UNICODE_PROPERTIES , &ret );
             if ( ! ret )
                 cout << "warning: some regex utf8 things will not work.  pcre build doesn't have --enable-unicode-properties" << endl;
-            
+
         }
     } rxtest;
 
diff --git a/db/matcher.h b/db/matcher.h
index a4e1667..d242df6 100644
--- a/db/matcher.h
+++ b/db/matcher.h
@@ -24,7 +24,7 @@
 #include <pcrecpp.h>
 
 namespace mongo {
-    
+
     class Cursor;
     class CoveredIndexMatcher;
     class Matcher;
@@ -40,11 +40,9 @@ namespace mongo {
         bool isNot;
         RegexMatcher() : isNot() {}
     };
-    
-    struct element_lt
-    {
-        bool operator()(const BSONElement& l, const BSONElement& r) const
-        {
+
+    struct element_lt {
+        bool operator()(const BSONElement& l, const BSONElement& r) const {
             int x = (int) l.canonicalType() - (int) r.canonicalType();
             if ( x < 0 ) return true;
             else if ( x > 0 ) return false;
@@ -52,17 +50,17 @@ namespace mongo {
         }
     };
 
-    
+
     class ElementMatcher {
     public:
-    
+
         ElementMatcher() {
         }
-        
+
         ElementMatcher( BSONElement _e , int _op, bool _isNot );
-        
+
         ElementMatcher( BSONElement _e , int _op , const BSONObj& array, bool _isNot );
-        
+
         ~ElementMatcher() { }
 
         BSONElement toMatch;
@@ -70,13 +68,14 @@ namespace mongo {
         bool isNot;
         shared_ptr< set<BSONElement,element_lt> > myset;
         shared_ptr< vector<RegexMatcher> > myregex;
-        
+
         // these are for specific operators
         int mod;
         int modm;
         BSONType type;
 
         shared_ptr<Matcher> subMatcher;
+        bool subMatcherOnPrimitives ;
 
         vector< shared_ptr<Matcher> > allMatchers;
     };
@@ -85,15 +84,15 @@ namespace mongo {
     class DiskLoc;
 
     struct MatchDetails {
-        MatchDetails(){
+        MatchDetails() {
             reset();
         }
-        
-        void reset(){
+
+        void reset() {
             loadedObject = false;
             elemMatchKey = 0;
         }
-        
+
         string toString() const {
             stringstream ss;
             ss << "loadedObject: " << loadedObject << " ";
@@ -129,7 +128,7 @@ namespace mongo {
             const char *fieldName,
             const BSONElement &toMatch, const BSONObj &obj,
             const ElementMatcher&bm, MatchDetails * details );
-        
+
     public:
         static int opDirection(int op) {
             return op <= BSONObj::LTE ? -1 : 1;
@@ -140,14 +139,14 @@ namespace mongo {
         ~Matcher();
 
         bool matches(const BSONObj& j, MatchDetails * details = 0 );
-        
+
         // fast rough check to see if we must load the real doc - we also
         // compare field counts against covereed index matcher; for $or clauses
         // we just compare field counts
         bool keyMatch() const { return !all && !haveSize && !hasArray && !haveNeg; }
 
         bool atomic() const { return _atomic; }
-        
+
         bool hasType( BSONObj::MatchType type ) const;
 
         string toString() const {
@@ -157,18 +156,18 @@ namespace mongo {
         void addOrConstraint( const shared_ptr< FieldRangeVector > &frv ) {
             _orConstraints.push_back( frv );
         }
-        
+
         void popOrClause() {
             _orMatchers.pop_front();
         }
-        
+
         bool sameCriteriaCount( const Matcher &other ) const;
-        
+
     private:
         // Only specify constrainIndexKey if matches() will be called with
         // index keys having empty string field names.
         Matcher( const Matcher &other, const BSONObj &constrainIndexKey );
-        
+
         void addBasic(const BSONElement &e, int c, bool isNot) {
             // TODO May want to selectively ignore these element types based on op type.
             if ( e.type() == MinKey || e.type() == MaxKey )
@@ -178,7 +177,7 @@ namespace mongo {
 
         void addRegex(const char *fieldName, const char *regex, const char *flags, bool isNot = false);
         bool addOp( const BSONElement &e, const BSONElement &fe, bool isNot, const char *& regex, const char *&flags );
-        
+
         int valuesMatch(const BSONElement& l, const BSONElement& r, int op, const ElementMatcher& bm);
 
         bool parseOrNor( const BSONElement &e, bool subMatcher );
@@ -194,7 +193,7 @@ namespace mongo {
         bool haveNeg;
 
         /* $atomic - if true, a multi document operation (some removes, updates)
-                     should be done atomically.  in that case, we do not yield - 
+                     should be done atomically.  in that case, we do not yield -
                      i.e. we stay locked the whole time.
                      http://www.mongodb.org/display/DOCS/Removing[
         */
@@ -211,26 +210,27 @@ namespace mongo {
 
         friend class CoveredIndexMatcher;
     };
-    
+
     // If match succeeds on index key, then attempt to match full document.
     class CoveredIndexMatcher : boost::noncopyable {
     public:
         CoveredIndexMatcher(const BSONObj &pattern, const BSONObj &indexKeyPattern , bool alwaysUseRecord=false );
-        bool matches(const BSONObj &o){ return _docMatcher->matches( o ); }
-        bool matches(const BSONObj &key, const DiskLoc &recLoc , MatchDetails * details = 0 );
+        bool matches(const BSONObj &o) { return _docMatcher->matches( o ); }
+        bool matches(const BSONObj &key, const DiskLoc &recLoc , MatchDetails * details = 0 , bool keyUsable = true );
         bool matchesCurrent( Cursor * cursor , MatchDetails * details = 0 );
-        bool needRecord(){ return _needRecord; }
-        
+        bool needRecord() { return _needRecord; }
+
         Matcher& docMatcher() { return *_docMatcher; }
 
         // once this is called, shouldn't use this matcher for matching any more
         void advanceOrClause( const shared_ptr< FieldRangeVector > &frv ) {
             _docMatcher->addOrConstraint( frv );
-            // TODO this is not an optimal optimization, since we could skip an entire
+            // TODO this is not yet optimal.  Since we could skip an entire
             // or clause (if a match is impossible) between calls to advanceOrClause()
+            // we may not pop all the clauses we can.
             _docMatcher->popOrClause();
         }
-        
+
         CoveredIndexMatcher *nextClauseMatcher( const BSONObj &indexKeyPattern, bool alwaysUseRecord=false ) {
             return new CoveredIndexMatcher( _docMatcher, indexKeyPattern, alwaysUseRecord );
         }
@@ -239,7 +239,10 @@ namespace mongo {
         void init( bool alwaysUseRecord );
         shared_ptr< Matcher > _docMatcher;
         Matcher _keyMatcher;
-        bool _needRecord;
+
+        bool _needRecord; // if the key itself isn't good enough to determine a positive match
+        bool _needRecordReject; // if the key itself isn't good enough to determine a negative match
+        bool _useRecordOnly;
     };
-    
+
 } // namespace mongo
diff --git a/db/matcher_covered.cpp b/db/matcher_covered.cpp
index 5866505..18892be 100644
--- a/db/matcher_covered.cpp
+++ b/db/matcher_covered.cpp
@@ -33,48 +33,51 @@ namespace mongo {
 
     CoveredIndexMatcher::CoveredIndexMatcher( const BSONObj &jsobj, const BSONObj &indexKeyPattern, bool alwaysUseRecord) :
         _docMatcher( new Matcher( jsobj ) ),
-        _keyMatcher( *_docMatcher, indexKeyPattern )
-    {
+        _keyMatcher( *_docMatcher, indexKeyPattern ) {
         init( alwaysUseRecord );
     }
- 
+
     CoveredIndexMatcher::CoveredIndexMatcher( const shared_ptr< Matcher > &docMatcher, const BSONObj &indexKeyPattern , bool alwaysUseRecord ) :
         _docMatcher( docMatcher ),
-        _keyMatcher( *_docMatcher, indexKeyPattern )
-    {
+        _keyMatcher( *_docMatcher, indexKeyPattern ) {
         init( alwaysUseRecord );
     }
 
     void CoveredIndexMatcher::init( bool alwaysUseRecord ) {
-        _needRecord = 
-        alwaysUseRecord || 
-        ! ( _docMatcher->keyMatch() && 
-           _keyMatcher.sameCriteriaCount( *_docMatcher ) &&
-           ! _keyMatcher.hasType( BSONObj::opEXISTS ) );
-        ;        
+        _needRecord =
+            alwaysUseRecord ||
+            ! ( _docMatcher->keyMatch() &&
+                _keyMatcher.sameCriteriaCount( *_docMatcher ) );
+
+        _needRecordReject = _keyMatcher.hasType( BSONObj::opEXISTS );
     }
-    
-    bool CoveredIndexMatcher::matchesCurrent( Cursor * cursor , MatchDetails * details ){
-        return matches( cursor->currKey() , cursor->currLoc() , details );
+
+    bool CoveredIndexMatcher::matchesCurrent( Cursor * cursor , MatchDetails * details ) {
+        // bool keyUsable = ! cursor->isMultiKey() && check for $orish like conditions in matcher SERVER-1264
+        return matches( cursor->currKey() , cursor->currLoc() , details  );
     }
-    
-    bool CoveredIndexMatcher::matches(const BSONObj &key, const DiskLoc &recLoc , MatchDetails * details ) {
+
+    bool CoveredIndexMatcher::matches(const BSONObj &key, const DiskLoc &recLoc , MatchDetails * details , bool keyUsable ) {
         if ( details )
             details->reset();
-        
-        if ( !_keyMatcher.matches(key, details ) ){
-            return false;
-        }
-        
-        if ( ! _needRecord ){
-            return true;
+
+        if ( _needRecordReject == false && keyUsable ) {
+
+            if ( !_keyMatcher.matches(key, details ) ) {
+                return false;
+            }
+
+            if ( ! _needRecord ) {
+                return true;
+            }
+
         }
 
         if ( details )
             details->loadedObject = true;
 
-        return _docMatcher->matches(recLoc.rec() , details );
+        return _docMatcher->matches(recLoc.obj() , details );
     }
-    
+
 
 }
diff --git a/db/minilex.h b/db/minilex.h
index ba8df26..677514a 100644
--- a/db/minilex.h
+++ b/db/minilex.h
@@ -17,37 +17,39 @@
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+#error does anything use this?
+
 namespace mongo {
 
 #if defined(_WIN32)
-    
+
 } // namespace mongo
 
 #include <hash_map>
 using namespace stdext;
 
 namespace mongo {
-    
+
     typedef const char * MyStr;
     struct less_str {
         bool operator()(const MyStr & x, const MyStr & y) const {
             if ( strcmp(x, y) > 0)
                 return true;
-            
+
             return false;
         }
     };
-    
+
     typedef hash_map<const char*, int, hash_compare<const char *, less_str> > strhashmap;
-    
+
 #else
-    
+
 } // namespace mongo
 
 #include <ext/hash_map>
 
 namespace mongo {
-    
+
     using namespace __gnu_cxx;
 
     typedef const char * MyStr;
@@ -55,106 +57,108 @@ namespace mongo {
         bool operator()(const MyStr & x, const MyStr & y) const {
             if ( strcmp(x, y) == 0)
                 return true;
-            
+
             return false;
         }
     };
-    
+
     typedef hash_map<const char*, int, hash<const char *>, eq_str > strhashmap;
-    
+
 #endif
-    
-    struct MiniLex {
+
+    /*
+    struct MiniLexNotUsed {
         strhashmap reserved;
         bool ic[256]; // ic=Identifier Character
         bool starter[256];
 
         // dm: very dumb about comments and escaped quotes -- but we are faster then at least,
         // albeit returning too much (which is ok for jsbobj current usage).
-        void grabVariables(char *code /*modified and must stay in scope*/, strhashmap& vars) {
-            char *p = code;
-            char last = 0;
-            while ( *p ) {
-                if ( starter[*p] ) {
-                    char *q = p+1;
-                    while ( *q && ic[*q] ) q++;
-                    const char *identifier = p;
-                    bool done = *q == 0;
-                    *q = 0;
-                    if ( !reserved.count(identifier) ) {
-                        // we try to be smart about 'obj' but have to be careful as obj.obj
-                        // can happen; this is so that nFields is right for simplistic where cases
-                        // so we can stop scanning in jsobj when we find the field of interest.
-                        if ( strcmp(identifier,"obj")==0 && p>code && p[-1] != '.' )
-                            ;
-                        else
-                            vars[identifier] = 1;
-                    }
-                    if ( done )
-                        break;
-                    p = q + 1;
-                    continue;
-                }
-
-                if ( *p == '\'' ) {
-                    p++;
-                    while ( *p && *p != '\'' ) p++;
-                }
-                else if ( *p == '"' ) {
-                    p++;
-                    while ( *p && *p != '"' ) p++;
-                }
-                p++;
+        void grabVariables(char *code , strhashmap& vars) { // 'code' modified and must stay in scope*/
+    char *p = code;
+    char last = 0;
+    while ( *p ) {
+        if ( starter[*p] ) {
+            char *q = p+1;
+            while ( *q && ic[*q] ) q++;
+            const char *identifier = p;
+            bool done = *q == 0;
+            *q = 0;
+            if ( !reserved.count(identifier) ) {
+                // we try to be smart about 'obj' but have to be careful as obj.obj
+                // can happen; this is so that nFields is right for simplistic where cases
+                // so we can stop scanning in jsobj when we find the field of interest.
+                if ( strcmp(identifier,"obj")==0 && p>code && p[-1] != '.' )
+                    ;
+                else
+                    vars[identifier] = 1;
             }
+            if ( done )
+                break;
+            p = q + 1;
+            continue;
         }
 
-        MiniLex() {
-            strhashmap atest;
-            atest["foo"] = 3;
-            assert( atest.count("bar") == 0 );
-            assert( atest.count("foo") == 1 );
-            assert( atest["foo"] == 3 );
-
-            for ( int i = 0; i < 256; i++ ) {
-                ic[i] = starter[i] = false;
-            }
-            for ( int i = 'a'; i <= 'z'; i++ )
-                ic[i] = starter[i] = true;
-            for ( int i = 'A'; i <= 'Z'; i++ )
-                ic[i] = starter[i] = true;
-            for ( int i = '0'; i <= '9'; i++ )
-                ic[i] = true;
-            for ( int i = 128; i < 256; i++ )
-                ic[i] = starter[i] = true;
-            ic['$'] = starter['$'] = true;
-            ic['_'] = starter['_'] = true;
-
-            reserved["break"] = true;
-            reserved["case"] = true;
-            reserved["catch"] = true;
-            reserved["continue"] = true;
-            reserved["default"] = true;
-            reserved["delete"] = true;
-            reserved["do"] = true;
-            reserved["else"] = true;
-            reserved["finally"] = true;
-            reserved["for"] = true;
-            reserved["function"] = true;
-            reserved["if"] = true;
-            reserved["in"] = true;
-            reserved["instanceof"] = true;
-            reserved["new"] = true;
-            reserved["return"] = true;
-            reserved["switch"] = true;
-            reserved["this"] = true;
-            reserved["throw"] = true;
-            reserved["try"] = true;
-            reserved["typeof"] = true;
-            reserved["var"] = true;
-            reserved["void"] = true;
-            reserved["while"] = true;
-            reserved["with "] = true;
+        if ( *p == '\'' ) {
+            p++;
+            while ( *p && *p != '\'' ) p++;
         }
-    };
+        else if ( *p == '"' ) {
+            p++;
+            while ( *p && *p != '"' ) p++;
+        }
+        p++;
+    }
+}
+
+MiniLex() {
+    strhashmap atest;
+    atest["foo"] = 3;
+    assert( atest.count("bar") == 0 );
+    assert( atest.count("foo") == 1 );
+    assert( atest["foo"] == 3 );
+
+    for ( int i = 0; i < 256; i++ ) {
+        ic[i] = starter[i] = false;
+    }
+    for ( int i = 'a'; i <= 'z'; i++ )
+        ic[i] = starter[i] = true;
+    for ( int i = 'A'; i <= 'Z'; i++ )
+        ic[i] = starter[i] = true;
+    for ( int i = '0'; i <= '9'; i++ )
+        ic[i] = true;
+    for ( int i = 128; i < 256; i++ )
+        ic[i] = starter[i] = true;
+    ic['$'] = starter['$'] = true;
+    ic['_'] = starter['_'] = true;
+
+    reserved["break"] = true;
+    reserved["case"] = true;
+    reserved["catch"] = true;
+    reserved["continue"] = true;
+    reserved["default"] = true;
+    reserved["delete"] = true;
+    reserved["do"] = true;
+    reserved["else"] = true;
+    reserved["finally"] = true;
+    reserved["for"] = true;
+    reserved["function"] = true;
+    reserved["if"] = true;
+    reserved["in"] = true;
+    reserved["instanceof"] = true;
+    reserved["new"] = true;
+    reserved["return"] = true;
+    reserved["switch"] = true;
+    reserved["this"] = true;
+    reserved["throw"] = true;
+    reserved["try"] = true;
+    reserved["typeof"] = true;
+    reserved["var"] = true;
+    reserved["void"] = true;
+    reserved["while"] = true;
+    reserved["with "] = true;
+}
+};
+*/
 
 } // namespace mongo
diff --git a/db/module.cpp b/db/module.cpp
index 1e4f511..6a182f2 100644
--- a/db/module.cpp
+++ b/db/module.cpp
@@ -24,29 +24,29 @@ namespace mongo {
     std::list<Module*> * Module::_all;
 
     Module::Module( const string& name )
-        : _name( name ) , _options( (string)"Module " + name + " options" ){
+        : _name( name ) , _options( (string)"Module " + name + " options" ) {
         if ( ! _all )
             _all = new list<Module*>();
         _all->push_back( this );
     }
 
-    Module::~Module(){}
+    Module::~Module() {}
 
-    void Module::addOptions( program_options::options_description& options ){
+    void Module::addOptions( program_options::options_description& options ) {
         if ( ! _all ) {
             return;
         }
-        for ( list<Module*>::iterator i=_all->begin(); i!=_all->end(); i++ ){
+        for ( list<Module*>::iterator i=_all->begin(); i!=_all->end(); i++ ) {
             Module* m = *i;
             options.add( m->_options );
         }
     }
 
-    void Module::configAll( program_options::variables_map& params ){
+    void Module::configAll( program_options::variables_map& params ) {
         if ( ! _all ) {
             return;
         }
-        for ( list<Module*>::iterator i=_all->begin(); i!=_all->end(); i++ ){
+        for ( list<Module*>::iterator i=_all->begin(); i!=_all->end(); i++ ) {
             Module* m = *i;
             m->config( params );
         }
@@ -54,11 +54,11 @@ namespace mongo {
     }
 
 
-    void Module::initAll(){
+    void Module::initAll() {
         if ( ! _all ) {
             return;
         }
-        for ( list<Module*>::iterator i=_all->begin(); i!=_all->end(); i++ ){
+        for ( list<Module*>::iterator i=_all->begin(); i!=_all->end(); i++ ) {
             Module* m = *i;
             m->init();
         }
diff --git a/db/module.h b/db/module.h
index d4939dd..e90923a 100644
--- a/db/module.h
+++ b/db/module.h
@@ -34,8 +34,8 @@ namespace mongo {
     public:
         Module( const string& name );
         virtual ~Module();
-        
-        boost::program_options::options_description_easy_init add_options(){
+
+        boost::program_options::options_description_easy_init add_options() {
             return _options.add_options();
         }
 
@@ -54,10 +54,10 @@ namespace mongo {
          */
         virtual void shutdown() = 0;
 
-        const string& getName(){ return _name; }
-        
+        const string& getName() { return _name; }
+
         // --- static things
-        
+
         static void addOptions( program_options::options_description& options );
         static void configAll( program_options::variables_map& params );
         static void initAll();
diff --git a/db/modules/mms.cpp b/db/modules/mms.cpp
index 40e9001..b180262 100644
--- a/db/modules/mms.cpp
+++ b/db/modules/mms.cpp
@@ -37,54 +37,54 @@ namespace mongo {
         MMS()
             : Module( "mms" ) , _baseurl( "" ) ,
               _secsToSleep(1) , _token( "" ) , _name( "" ) {
-            
+
             add_options()
-                ( "mms-url" , po::value<string>()->default_value("http://mms.10gen.com/ping") , "url for mongo monitoring server" )
-                ( "mms-token" , po::value<string>() , "account token for mongo monitoring server" )
-                ( "mms-name" , po::value<string>() , "server name for mongo monitoring server" )
-                ( "mms-interval" , po::value<int>()->default_value(30) , "ping interval (in seconds) for mongo monitoring server" )
-                ;
-        }    
-        
-        ~MMS(){}
-        
-        void config( program_options::variables_map& params ){
+            ( "mms-url" , po::value<string>()->default_value("http://mms.10gen.com/ping") , "url for mongo monitoring server" )
+            ( "mms-token" , po::value<string>() , "account token for mongo monitoring server" )
+            ( "mms-name" , po::value<string>() , "server name for mongo monitoring server" )
+            ( "mms-interval" , po::value<int>()->default_value(30) , "ping interval (in seconds) for mongo monitoring server" )
+            ;
+        }
+
+        ~MMS() {}
+
+        void config( program_options::variables_map& params ) {
             _baseurl = params["mms-url"].as<string>();
-            if ( params.count( "mms-token" ) ){
+            if ( params.count( "mms-token" ) ) {
                 _token = params["mms-token"].as<string>();
             }
-            if ( params.count( "mms-name" ) ){
+            if ( params.count( "mms-name" ) ) {
                 _name = params["mms-name"].as<string>();
             }
             _secsToSleep = params["mms-interval"].as<int>();
         }
-        
-        void run(){
-            if ( _token.size() == 0  && _name.size() == 0 ){
+
+        void run() {
+            if ( _token.size() == 0  && _name.size() == 0 ) {
                 log(1) << "mms not configured" << endl;
                 return;
             }
 
-            if ( _token.size() == 0 ){
+            if ( _token.size() == 0 ) {
                 log() << "no token for mms - not running" << endl;
                 return;
             }
-        
-            if ( _name.size() == 0 ){
+
+            if ( _name.size() == 0 ) {
                 log() << "no name for mms - not running" << endl;
                 return;
             }
-            
+
             log() << "mms monitor staring...  token:" << _token << " name:" << _name << " interval: " << _secsToSleep << endl;
             Client::initThread( "mms" );
             Client& c = cc();
-            
-            
+
+
             // TODO: using direct client is bad, but easy for now
-            
-            while ( ! inShutdown() ){
+
+            while ( ! inShutdown() ) {
                 sleepsecs( _secsToSleep );
-                
+
                 try {
                     stringstream url;
                     url << _baseurl << "?"
@@ -92,47 +92,47 @@ namespace mongo {
                         << "name=" << _name << "&"
                         << "ts=" << time(0)
                         ;
-                    
+
                     BSONObjBuilder bb;
                     // duplicated so the post has everything
                     bb.append( "token" , _token );
                     bb.append( "name" , _name );
                     bb.appendDate( "ts" , jsTime()  );
-                    
+
                     // any commands
                     _add( bb , "buildinfo" );
                     _add( bb , "serverStatus" );
-                    
+
                     BSONObj postData = bb.obj();
-                    
+
                     log(1) << "mms url: " << url.str() << "\n\t post: " << postData << endl;;
-                    
+
                     HttpClient c;
                     HttpClient::Result r;
                     int rc = c.post( url.str() , postData.jsonString() , &r );
                     log(1) << "\t response code: " << rc << endl;
-                    if ( rc != 200 ){
+                    if ( rc != 200 ) {
                         log() << "mms error response code:" << rc << endl;
                         log(1) << "mms error body:" << r.getEntireResponse() << endl;
                     }
                 }
-                catch ( std::exception& e ){
+                catch ( std::exception& e ) {
                     log() << "mms exception: " << e.what() << endl;
                 }
             }
-            
+
             c.shutdown();
         }
-        
-        void _add( BSONObjBuilder& postData , const char* cmd ){
+
+        void _add( BSONObjBuilder& postData , const char* cmd ) {
             Command * c = Command::findCommand( cmd );
-            if ( ! c ){
+            if ( ! c ) {
                 log() << "MMS can't find command: " << cmd << endl;
                 postData.append( cmd , "can't find command" );
                 return;
             }
-            
-            if ( c->locktype() ){
+
+            if ( c->locktype() ) {
                 log() << "MMS can only use noLocking commands not: " << cmd << endl;
                 postData.append( cmd , "not noLocking" );
                 return;
@@ -147,24 +147,24 @@ namespace mongo {
             else
                 postData.append( cmd , sub.obj() );
         }
-        
 
-        void init(){ go(); }
 
-        void shutdown(){
+        void init() { go(); }
+
+        void shutdown() {
             // TODO
         }
 
     private:
         string _baseurl;
         int _secsToSleep;
-        
+
         string _token;
         string _name;
-        
+
     } /*mms*/ ;
 
 }
 
-        
+
 
diff --git a/db/mongommf.cpp b/db/mongommf.cpp
new file mode 100644
index 0000000..5ae573d
--- /dev/null
+++ b/db/mongommf.cpp
@@ -0,0 +1,391 @@
+// @file mongommf.cpp
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* this module adds some of our layers atop memory mapped files - specifically our handling of private views & such
+   if you don't care about journaling/durability (temp sort files & such) use MemoryMappedFile class, not this.
+*/
+
+#include "pch.h"
+#include "cmdline.h"
+#include "mongommf.h"
+#include "dur.h"
+#include "dur_journalformat.h"
+#include "../util/mongoutils/str.h"
+
+using namespace mongoutils;
+
+namespace mongo {
+
+#if defined(_WIN32)
+    extern mutex mapViewMutex;
+
+    __declspec(noinline) void makeChunkWritable(size_t chunkno) { 
+        scoped_lock lk(mapViewMutex);
+
+        if( writable.get(chunkno) ) // double check lock
+            return;
+
+        // remap all maps in this chunk.  common case is a single map, but could have more than one with smallfiles or .ns files
+        size_t chunkStart = chunkno * MemoryMappedFile::ChunkSize;
+        size_t chunkNext = chunkStart + MemoryMappedFile::ChunkSize;
+
+        scoped_lock lk2(privateViews._mutex());
+        map<void*,MongoMMF*>::iterator i = privateViews.finditer_inlock((void*) (chunkNext-1));
+        while( 1 ) {
+            const pair<void*,MongoMMF*> x = *(--i);
+            MongoMMF *mmf = x.second;
+            if( mmf == 0 )
+                break;
+
+            size_t viewStart = (size_t) x.first;
+            size_t viewEnd = viewStart + mmf->length();
+            if( viewEnd <= chunkStart )
+                break;
+
+            size_t protectStart = max(viewStart, chunkStart);
+            dassert(protectStart<chunkNext);
+
+            size_t protectEnd = min(viewEnd, chunkNext);
+            size_t protectSize = protectEnd - protectStart;
+            dassert(protectSize>0&&protectSize<=MemoryMappedFile::ChunkSize);
+
+            DWORD old;
+            bool ok = VirtualProtect((void*)protectStart, protectSize, PAGE_WRITECOPY, &old);
+            if( !ok ) {
+                DWORD e = GetLastError();
+                log() << "VirtualProtect failed " << chunkno << hex << protectStart << ' ' << protectSize << ' ' << errnoWithDescription(e) << endl;
+                assert(false);
+            }
+        }
+
+        writable.set(chunkno);
+    }
+
+    __declspec(noinline) void makeChunkWritableOld(size_t chunkno) { 
+        scoped_lock lk(mapViewMutex);
+
+        if( writable.get(chunkno) )
+            return;
+
+        size_t loc = chunkno * MemoryMappedFile::ChunkSize;
+        void *Loc = (void*) loc;
+        size_t ofs;
+        MongoMMF *mmf = privateViews.find( (void *) (loc), ofs );
+        MemoryMappedFile *f = (MemoryMappedFile*) mmf;
+        assert(f);
+
+        size_t len = MemoryMappedFile::ChunkSize;
+        assert( mmf->getView() <= Loc );
+        if( ofs + len > f->length() ) {
+            // at the very end of the map
+            len = f->length() - ofs;
+        }
+        else { 
+            ;
+        }
+
+        // todo: check this goes away on remap
+        DWORD old;
+        bool ok = VirtualProtect(Loc, len, PAGE_WRITECOPY, &old);
+        if( !ok ) {
+            DWORD e = GetLastError();
+            log() << "VirtualProtect failed " << Loc << ' ' << len << ' ' << errnoWithDescription(e) << endl;
+            assert(false);
+        }
+
+        writable.set(chunkno);
+    }
+
+    // align so that there is only one map per chunksize so our bitset works right
+    void* mapaligned(HANDLE h, unsigned long long _len) {
+        void *loc = 0;
+        int n = 0;
+        while( 1 ) { 
+            n++;
+            void *m = MapViewOfFileEx(h, FILE_MAP_READ, 0, 0, 0, loc);
+            if( m == 0 ) {
+                DWORD e = GetLastError();
+                if( n == 0 ) { 
+                    // if first fails, it isn't going to work
+                    log() << "mapaligned errno: " << e << endl;
+                    break;
+                }
+                if( debug && n == 1 ) { 
+                    log() << "mapaligned info e:" << e << " at n=1" << endl;
+                }
+                if( n > 98 ) {
+                    log() << "couldn't align mapped view of file len:" << _len/1024.0/1024.0 << "MB errno:" << e << endl;
+                    break;
+                }
+                loc = (void*) (((size_t)loc)+MemoryMappedFile::ChunkSize);
+                continue;
+            }
+
+            size_t x = (size_t) m;
+            if( x % MemoryMappedFile::ChunkSize == 0 ) {
+                void *end = (void*) (x+_len);
+                DEV log() << "mapaligned " << m << '-' << end << " len:" << _len << endl;
+                return m;
+            }
+
+            UnmapViewOfFile(m);
+            x = ((x+MemoryMappedFile::ChunkSize-1) / MemoryMappedFile::ChunkSize) * MemoryMappedFile::ChunkSize;
+            loc = (void*) x;
+            if( n % 20 == 0 ) { 
+                log() << "warning mapaligned n=20" << endl;
+            }
+            if( n > 100 ) {
+                log() << "couldn't align mapped view of file len:" << _len/1024.0/1024.0 << "MB" << endl;
+                break;
+            }
+        }
+        return 0;
+    }
+
+    void* MemoryMappedFile::createPrivateMap() {
+        assert( maphandle );
+        scoped_lock lk(mapViewMutex);
+        //void *p = mapaligned(maphandle, len);
+        void *p = MapViewOfFile(maphandle, FILE_MAP_READ, 0, 0, 0);
+        if ( p == 0 ) {
+            DWORD e = GetLastError();
+            log() << "createPrivateMap failed " << filename() << " " << errnoWithDescription(e) << endl;
+        }
+        else {
+            clearWritableBits(p);
+            views.push_back(p);
+        }
+        return p;
+    }
+
+    void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) {
+        dbMutex.assertWriteLocked(); // short window where we are unmapped so must be exclusive
+
+        // the mapViewMutex is to assure we get the same address on the remap
+        scoped_lock lk(mapViewMutex);
+
+        clearWritableBits(oldPrivateAddr);
+
+        if( !UnmapViewOfFile(oldPrivateAddr) ) {
+            DWORD e = GetLastError();
+            log() << "UnMapViewOfFile failed " << filename() << ' ' << errnoWithDescription(e) << endl;
+            assert(false);
+        }
+
+        // we want the new address to be the same as the old address in case things keep pointers around (as namespaceindex does).
+        void *p = MapViewOfFileEx(maphandle, FILE_MAP_READ, 0, 0,
+                                  /*dwNumberOfBytesToMap 0 means to eof*/0 /*len*/,
+                                  oldPrivateAddr);
+        
+        if ( p == 0 ) {
+            DWORD e = GetLastError();
+            log() << "MapViewOfFileEx failed " << filename() << " " << errnoWithDescription(e) << endl;
+            assert(p);
+        }
+        assert(p == oldPrivateAddr);
+        return p;
+    }
+#endif
+
+    void MongoMMF::remapThePrivateView() {
+        assert( cmdLine.dur );
+
+        // todo 1.9 : it turns out we require that we always remap to the same address.
+        // so the remove / add isn't necessary and can be removed
+        privateViews.remove(_view_private);
+        _view_private = remapPrivateView(_view_private);
+        privateViews.add(_view_private, this);
+    }
+
+    /** register view. threadsafe */
+    void PointerToMMF::add(void *view, MongoMMF *f) {
+        assert(view);
+        assert(f);
+        mutex::scoped_lock lk(_m);
+        _views.insert( pair<void*,MongoMMF*>(view,f) );
+    }
+
+    /** de-register view. threadsafe */
+    void PointerToMMF::remove(void *view) {
+        if( view ) {
+            mutex::scoped_lock lk(_m);
+            _views.erase(view);
+        }
+    }
+
+    PointerToMMF::PointerToMMF() : _m("PointerToMMF") {
+#if defined(SIZE_MAX)
+        size_t max = SIZE_MAX;
+#else
+        size_t max = ~((size_t)0);
+#endif
+        assert( max > (size_t) this ); // just checking that no one redef'd SIZE_MAX and that it is sane
+
+        // this way we don't need any boundary checking in _find()
+        _views.insert( pair<void*,MongoMMF*>((void*)0,(MongoMMF*)0) );
+        _views.insert( pair<void*,MongoMMF*>((void*)max,(MongoMMF*)0) );
+    }
+
+    /** underscore version of find is for when you are already locked
+        @param ofs out return our offset in the view
+        @return the MongoMMF to which this pointer belongs
+    */
+    MongoMMF* PointerToMMF::find_inlock(void *p, /*out*/ size_t& ofs) {
+        //
+        // .................memory..........................
+        //    v1       p                      v2
+        //    [--------------------]          [-------]
+        //
+        // e.g., _find(p) == v1
+        //
+        const pair<void*,MongoMMF*> x = *(--_views.upper_bound(p));
+        MongoMMF *mmf = x.second;
+        if( mmf ) {
+            size_t o = ((char *)p) - ((char*)x.first);
+            if( o < mmf->length() ) {
+                ofs = o;
+                return mmf;
+            }
+        }
+        return 0;
+    }
+
+    /** find associated MMF object for a given pointer.
+        threadsafe
+        @param ofs out returns offset into the view of the pointer, if found.
+        @return the MongoMMF to which this pointer belongs. null if not found.
+    */
+    MongoMMF* PointerToMMF::find(void *p, /*out*/ size_t& ofs) {
+        mutex::scoped_lock lk(_m);
+        return find_inlock(p, ofs);
+    }
+
+    PointerToMMF privateViews;
+
+    /* void* MongoMMF::switchToPrivateView(void *readonly_ptr) {
+        assert( cmdLine.dur );
+        assert( testIntent );
+
+        void *p = readonly_ptr;
+
+        {
+            size_t ofs=0;
+            MongoMMF *mmf = ourReadViews.find(p, ofs);
+            if( mmf ) {
+                void *res = ((char *)mmf->_view_private) + ofs;
+                return res;
+            }
+        }
+
+        {
+            size_t ofs=0;
+            MongoMMF *mmf = privateViews.find(p, ofs);
+            if( mmf ) {
+                log() << "dur: perf warning p=" << p << " is already in the writable view of " << mmf->filename() << endl;
+                return p;
+            }
+        }
+
+        // did you call writing() with a pointer that isn't into a datafile?
+        log() << "dur error switchToPrivateView " << p << endl;
+        return p;
+    }*/
+
+    /* switch to _view_write.  normally, this is a bad idea since your changes will not
+       show up in _view_private if there have been changes there; thus the leading underscore
+       as a tad of a "warning".  but useful when done with some care, such as during
+       initialization.
+    */
+    void* MongoMMF::_switchToWritableView(void *p) {
+        size_t ofs;
+        MongoMMF *f = privateViews.find(p, ofs);
+        assert( f );
+        return (((char *)f->_view_write)+ofs);
+    }
+
+    extern string dbpath;
+
+    // here so that it is precomputed...
+    void MongoMMF::setPath(string f) {
+        string suffix;
+        string prefix;
+        bool ok = str::rSplitOn(f, '.', prefix, suffix);
+        uassert(13520, str::stream() << "MongoMMF only supports filenames in a certain format " << f, ok);
+        if( suffix == "ns" )
+            _fileSuffixNo = dur::JEntry::DotNsSuffix;
+        else
+            _fileSuffixNo = (int) str::toUnsigned(suffix);
+
+        _p = RelativePath::fromFullPath(prefix);
+    }
+
+    bool MongoMMF::open(string fname, bool sequentialHint) {
+        setPath(fname);
+        _view_write = mapWithOptions(fname.c_str(), sequentialHint ? SEQUENTIAL : 0);
+        return finishOpening();
+    }
+
+    bool MongoMMF::create(string fname, unsigned long long& len, bool sequentialHint) {
+        setPath(fname);
+        _view_write = map(fname.c_str(), len, sequentialHint ? SEQUENTIAL : 0);
+        return finishOpening();
+    }
+
+    bool MongoMMF::finishOpening() {
+        if( _view_write ) {
+            if( cmdLine.dur ) {
+                _view_private = createPrivateMap();
+                if( _view_private == 0 ) {
+                    massert( 13636 , "createPrivateMap failed (look in log for error)" , false );
+                }
+                privateViews.add(_view_private, this); // note that testIntent builds use this, even though it points to view_write then...
+            }
+            else {
+                _view_private = _view_write;
+            }
+            return true;
+        }
+        return false;
+    }
+
+    MongoMMF::MongoMMF() : _willNeedRemap(false) {
+        _view_write = _view_private = 0;
+    }
+
+    MongoMMF::~MongoMMF() {
+        close();
+    }
+
+    namespace dur {
+        void closingFileNotification();
+    }
+
+    /*virtual*/ void MongoMMF::close() {
+        {
+            if( cmdLine.dur && _view_write/*actually was opened*/ ) {
+                if( debug )
+                    log() << "closingFileNotication:" << filename() << endl;
+                dur::closingFileNotification();
+            }
+            privateViews.remove(_view_private);
+        }
+        _view_write = _view_private = 0;
+        MemoryMappedFile::close();
+    }
+
+}
diff --git a/db/mongommf.h b/db/mongommf.h
new file mode 100644
index 0000000..5da46fc
--- /dev/null
+++ b/db/mongommf.h
@@ -0,0 +1,140 @@
+/** @file mongommf.h
+*
+*    Copyright (C) 2008 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../util/mmap.h"
+#include "../util/paths.h"
+
+namespace mongo {
+
+    /** MongoMMF adds some layers atop memory mapped files - specifically our handling of private views & such.
+        if you don't care about journaling/durability (temp sort files & such) use MemoryMappedFile class,
+        not this.
+    */
+    class MongoMMF : private MemoryMappedFile {
+    public:
+        MongoMMF();
+        virtual ~MongoMMF();
+        virtual void close();
+
+        /** @return true if opened ok. */
+        bool open(string fname, bool sequentialHint);
+
+        /** @return file length */
+        unsigned long long length() const { return MemoryMappedFile::length(); }
+
+        string filename() const { return MemoryMappedFile::filename(); }
+
+        void flush(bool sync)   { MemoryMappedFile::flush(sync); }
+
+        /* Creates with length if DNE, otherwise uses existing file length,
+           passed length.
+           @param sequentialHint if true will be sequentially accessed
+           @return true for ok
+        */
+        bool create(string fname, unsigned long long& len, bool sequentialHint);
+
+        /* Get the "standard" view (which is the private one).
+           @return the private view.
+        */
+        void* getView() const { return _view_private; }
+        
+        /* Get the "write" view (which is required for writing).
+           @return the write view.
+        */
+        void* view_write() const { return _view_write; }
+
+
+        /* switch to _view_write.  normally, this is a bad idea since your changes will not
+           show up in _view_private if there have been changes there; thus the leading underscore
+           as a tad of a "warning".  but useful when done with some care, such as during
+           initialization.
+        */
+        static void* _switchToWritableView(void *private_ptr);
+
+        /** for a filename a/b/c.3
+            filePath() is "a/b/c"
+            fileSuffixNo() is 3
+            if the suffix is "ns", fileSuffixNo -1
+        */
+        RelativePath relativePath() const {
+            DEV assert( !_p._p.empty() );
+            return _p;
+        }
+
+        int fileSuffixNo() const { return _fileSuffixNo; }
+
+        /** true if we have written.
+            set in PREPLOGBUFFER, it is NOT set immediately on write intent declaration.
+            reset to false in REMAPPRIVATEVIEW
+        */
+        bool& willNeedRemap() { return _willNeedRemap; }
+
+        void remapThePrivateView();
+
+        virtual bool isMongoMMF() { return true; }
+
+    private:
+
+        void *_view_write;
+        void *_view_private;
+        bool _willNeedRemap;
+        RelativePath _p;   // e.g. "somepath/dbname"
+        int _fileSuffixNo;  // e.g. 3.  -1="ns"
+
+        void setPath(string pathAndFileName);
+        bool finishOpening();
+    };
+
+    /** for durability support we want to be able to map pointers to specific MongoMMF objects.
+    */
+    class PointerToMMF : boost::noncopyable {
+    public:
+        PointerToMMF();
+
+        /** register view. \
+            threadsafe
+            */
+        void add(void *view, MongoMMF *f);
+
+        /** de-register view.
+            threadsafe
+            */
+        void remove(void *view);
+
+        /** find associated MMF object for a given pointer.
+            threadsafe
+            @param ofs out returns offset into the view of the pointer, if found.
+            @return the MongoMMF to which this pointer belongs. null if not found.
+        */
+        MongoMMF* find(void *p, /*out*/ size_t& ofs);
+
+        /** for doing many finds in a row with one lock operation */
+        mutex& _mutex() { return _m; }
+        MongoMMF* find_inlock(void *p, /*out*/ size_t& ofs);
+
+        map<void*,MongoMMF*>::iterator finditer_inlock(void *p) { return _views.upper_bound(p); }
+
+    private:
+        mutex _m;
+        map<void*, MongoMMF*> _views;
+    };
+
+    // allows a pointer into any private view of a MongoMMF to be resolved to the MongoMMF object
+    extern PointerToMMF privateViews;
+}
diff --git a/db/mongomutex.h b/db/mongomutex.h
new file mode 100644
index 0000000..fac4113
--- /dev/null
+++ b/db/mongomutex.h
@@ -0,0 +1,239 @@
+// @file mongomutex.h
+
+/*
+ *    Copyright (C) 2010 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+// note: include concurrency.h, not this.
+
+namespace mongo {
+
+    /** the 'big lock' we use for most operations. a read/write lock.
+        there is one of these, dbMutex.
+
+        generally if you need to declare a mutex use the right primitive class, not this.
+
+        use readlock and writelock classes for scoped locks on this rather than direct
+        manipulation.
+       */
+    class MongoMutex {
+    public:
+        MongoMutex(const char * name);
+
+        /** @return
+         *    > 0  write lock
+         *    = 0  no lock
+         *    < 0  read lock
+         */
+        int getState() const { return _state.get(); }
+
+        bool atLeastReadLocked() const { return _state.get() != 0; }
+        void assertAtLeastReadLocked() const { assert(atLeastReadLocked()); }
+        bool isWriteLocked() const { return getState() > 0; }
+        void assertWriteLocked() const {
+            assert( getState() > 0 );
+            DEV assert( !_releasedEarly.get() );
+        }
+
+        // write lock.  use the writelock scoped lock class, not this directly.
+        void lock() {
+            if ( _writeLockedAlready() )
+                return;
+
+            _state.set(1);
+
+            Client *c = curopWaitingForLock( 1 ); // stats
+            _m.lock();
+            curopGotLock(c);
+
+            _minfo.entered();
+
+            MongoFile::markAllWritable(); // for _DEBUG validation -- a no op for release build
+
+            _acquiredWriteLock();
+        }
+
+        // try write lock
+        bool lock_try( int millis ) {
+            if ( _writeLockedAlready() )
+                return true;
+
+            Client *c = curopWaitingForLock( 1 );
+            bool got = _m.lock_try( millis );
+
+            if ( got ) {
+                curopGotLock(c);
+                _minfo.entered();
+                _state.set(1);
+                MongoFile::markAllWritable(); // for _DEBUG validation -- a no op for release build
+                _acquiredWriteLock();
+            }
+
+            return got;
+        }
+
+        // un write lock
+        void unlock() {
+            int s = _state.get();
+            if( s > 1 ) {
+                _state.set(s-1); // recursive lock case
+                return;
+            }
+            if( s != 1 ) {
+                if( _releasedEarly.get() ) {
+                    _releasedEarly.set(false);
+                    return;
+                }
+                massert( 12599, "internal error: attempt to unlock when wasn't in a write lock", false);
+            }
+            _releasingWriteLock();
+            MongoFile::unmarkAllWritable(); // _DEBUG validation
+            _state.set(0);
+            _minfo.leaving();
+            _m.unlock();
+        }
+
+        /* unlock (write lock), and when unlock() is called later,
+           be smart then and don't unlock it again.
+           */
+        void releaseEarly() {
+            assert( getState() == 1 ); // must not be recursive
+            assert( !_releasedEarly.get() );
+            _releasedEarly.set(true);
+            unlock();
+        }
+
+        // read lock. don't call directly, use readlock.
+        void lock_shared() {
+            int s = _state.get();
+            if( s ) {
+                if( s > 0 ) {
+                    // already in write lock - just be recursive and stay write locked
+                    _state.set(s+1);
+                }
+                else {
+                    // already in read lock - recurse
+                    _state.set(s-1);
+                }
+            }
+            else {
+                _state.set(-1);
+                Client *c = curopWaitingForLock( -1 );
+                _m.lock_shared();
+                curopGotLock(c);
+            }
+        }
+
+        // try read lock
+        bool lock_shared_try( int millis ) {
+            int s = _state.get();
+            if ( s ) {
+                // we already have a lock, so no need to try
+                lock_shared();
+                return true;
+            }
+
+            /* [dm] should there be
+                             Client *c = curopWaitingForLock( 1 );
+               here?  i think so.  seems to be missing.
+               */
+            bool got = _m.lock_shared_try( millis );
+            if ( got )
+                _state.set(-1);
+            return got;
+        }
+
+        void unlock_shared() {
+            int s = _state.get();
+            if( s > 0 ) {
+                assert( s > 1 ); /* we must have done a lock write first to have s > 1 */
+                _state.set(s-1);
+                return;
+            }
+            if( s < -1 ) {
+                _state.set(s+1);
+                return;
+            }
+            assert( s == -1 );
+            _state.set(0);
+            _m.unlock_shared();
+        }
+
+        MutexInfo& info() { return _minfo; }
+
+    private:
+        void _acquiredWriteLock();
+        void _releasingWriteLock();
+
+        /* @return true if was already write locked.  increments recursive lock count. */
+        bool _writeLockedAlready();
+
+        RWLock _m;
+
+        /* > 0 write lock with recurse count
+           < 0 read lock
+        */
+        ThreadLocalValue<int> _state;
+
+        MutexInfo _minfo;
+
+    public:
+        // indicates we need to call dur::REMAPPRIVATEVIEW on the next write lock
+        bool _remapPrivateViewRequested;
+
+    private:
+        /* See the releaseEarly() method.
+           we use a separate TLS value for releasedEarly - that is ok as
+           our normal/common code path, we never even touch it */
+        ThreadLocalValue<bool> _releasedEarly;
+
+        /* this is for fsyncAndLock command.  otherwise write lock's greediness will
+           make us block on any attempted write lock the the fsync's lock.
+           */
+        //volatile bool _blockWrites;
+    };
+
+    extern MongoMutex &dbMutex;
+
+    namespace dur {
+        void REMAPPRIVATEVIEW();
+        void releasingWriteLock(); // because it's hard to include dur.h here
+    }
+
+    inline void MongoMutex::_releasingWriteLock() {
+        dur::releasingWriteLock();
+    }
+
+    inline void MongoMutex::_acquiredWriteLock() {
+        if( _remapPrivateViewRequested ) {
+            dur::REMAPPRIVATEVIEW();
+            dassert( !_remapPrivateViewRequested );
+        }
+    }
+
+    /* @return true if was already write locked.  increments recursive lock count. */
+    inline bool MongoMutex::_writeLockedAlready() {
+        int s = _state.get();
+        if( s > 0 ) {
+            _state.set(s+1);
+            return true;
+        }
+        massert( 10293 , string("internal error: locks are not upgradeable: ") + sayClientState() , s == 0 );
+        return false;
+    }
+
+}
diff --git a/db/mr.cpp b/db/mr.cpp
deleted file mode 100644
index 7786c85..0000000
--- a/db/mr.cpp
+++ /dev/null
@@ -1,721 +0,0 @@
-// mr.cpp
-
-/**
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "pch.h"
-#include "db.h"
-#include "instance.h"
-#include "commands.h"
-#include "../scripting/engine.h"
-#include "../client/dbclient.h"
-#include "../client/connpool.h"
-#include "../client/parallel.h"
-#include "queryoptimizer.h"
-#include "matcher.h"
-#include "clientcursor.h"
-
-namespace mongo {
-
-    namespace mr {
-
-        typedef vector<BSONObj> BSONList;
-
-        class MyCmp {
-        public:
-            MyCmp(){}
-            bool operator()( const BSONObj &l, const BSONObj &r ) const {
-                return l.firstElement().woCompare( r.firstElement() ) < 0;
-            }
-        };
-
-        typedef pair<BSONObj,BSONObj> Data;
-        //typedef list< Data > InMemory;
-        typedef map< BSONObj,BSONList,MyCmp > InMemory;
-
-        BSONObj reduceValues( BSONList& values , Scope * s , ScriptingFunction reduce , bool final , ScriptingFunction finalize ){
-            uassert( 10074 ,  "need values" , values.size() );
-            
-            int sizeEstimate = ( values.size() * values.begin()->getField( "value" ).size() ) + 128;
-            BSONObj key;
-
-            BSONObjBuilder reduceArgs( sizeEstimate );
-            boost::scoped_ptr<BSONArrayBuilder>  valueBuilder;
-            
-            int sizeSoFar = 0;
-            unsigned n = 0;
-            for ( ; n<values.size(); n++ ){
-                BSONObjIterator j(values[n]);
-                BSONElement keyE = j.next();
-                if ( n == 0 ){
-                    reduceArgs.append( keyE );
-                    key = keyE.wrap();
-                    sizeSoFar = 5 + keyE.size();
-                    valueBuilder.reset(new BSONArrayBuilder( reduceArgs.subarrayStart( "values" ) ));
-                }
-                
-                BSONElement ee = j.next();
-                
-                uassert( 13070 , "value to large to reduce" , ee.size() < ( 2 * 1024 * 1024 ) );
-
-                if ( sizeSoFar + ee.size() > ( 4 * 1024 * 1024 ) ){
-                    assert( n > 1 ); // if not, inf. loop
-                    break;
-                }
-                
-                valueBuilder->append( ee );
-                sizeSoFar += ee.size();
-            }
-            assert(valueBuilder);
-            valueBuilder->done();
-            BSONObj args = reduceArgs.obj();
-
-            s->invokeSafe( reduce , args );
-            if ( s->type( "return" ) == Array ){
-                uassert( 10075 , "reduce -> multiple not supported yet",0);                
-                return BSONObj();
-            }
-
-            int endSizeEstimate = key.objsize() + ( args.objsize() / values.size() );
-
-            if ( n < values.size() ){
-                BSONList x;
-                for ( ; n < values.size(); n++ ){
-                    x.push_back( values[n] );
-                }
-                BSONObjBuilder temp( endSizeEstimate );
-                temp.append( key.firstElement() );
-                s->append( temp , "1" , "return" );
-                x.push_back( temp.obj() );
-                return reduceValues( x , s , reduce , final , finalize );
-            }
-            
-
-
-            if ( finalize ){
-                BSONObjBuilder b(endSizeEstimate);
-                b.appendAs( key.firstElement() , "_id" );
-                s->append( b , "value" , "return" );
-                s->invokeSafe( finalize , b.obj() );
-            }
-            
-            BSONObjBuilder b(endSizeEstimate);
-            b.appendAs( key.firstElement() , final ? "_id" : "0" );
-            s->append( b , final ? "value" : "1" , "return" );
-            return b.obj();
-        }
-        
-        class MRSetup {
-        public:
-            MRSetup( const string& _dbname , const BSONObj& cmdObj , bool markAsTemp = true ){
-                static int jobNumber = 1;
-                
-                dbname = _dbname;
-                ns = dbname + "." + cmdObj.firstElement().valuestr();
-
-                verbose = cmdObj["verbose"].trueValue();
-                keeptemp = cmdObj["keeptemp"].trueValue();
-                
-                { // setup names
-                    stringstream ss;
-                    if ( ! keeptemp )
-                        ss << "tmp.";
-                    ss << "mr." << cmdObj.firstElement().fieldName() << "_" << time(0) << "_" << jobNumber++;    
-                    tempShort = ss.str();
-                    tempLong = dbname + "." + tempShort;
-                    incLong = tempLong + "_inc";
-
-                    if ( ! keeptemp && markAsTemp )
-                        cc().addTempCollection( tempLong );
-
-                    replicate = keeptemp;
-
-                    if ( cmdObj["out"].type() == String ){
-                        finalShort = cmdObj["out"].valuestr();
-                        replicate = true;
-                    }
-                    else
-                        finalShort = tempShort;
-                    
-                    finalLong = dbname + "." + finalShort;
-                    
-                }
-             
-                { // code
-                    mapCode = cmdObj["map"]._asCode();
-                    reduceCode = cmdObj["reduce"]._asCode();
-                    if ( cmdObj["finalize"].type() ){
-                        finalizeCode = cmdObj["finalize"]._asCode();
-                    }
-                    checkCodeWScope( "map" , cmdObj );
-                    checkCodeWScope( "reduce" , cmdObj );
-                    checkCodeWScope( "finalize" , cmdObj );
-                    
-                    if ( cmdObj["mapparams"].type() == Array ){
-                        mapparams = cmdObj["mapparams"].embeddedObjectUserCheck();
-                    }
-
-                    if ( cmdObj["scope"].type() == Object ){
-                        scopeSetup = cmdObj["scope"].embeddedObjectUserCheck();
-                    }
-                    
-                }
-                
-                { // query options
-                    if ( cmdObj["query"].type() == Object ){
-                        filter = cmdObj["query"].embeddedObjectUserCheck();
-                    }
-                    
-                    if ( cmdObj["sort"].type() == Object ){
-                        sort = cmdObj["sort"].embeddedObjectUserCheck();
-                    }
-
-                    if ( cmdObj["limit"].isNumber() )
-                        limit = cmdObj["limit"].numberLong();
-                    else 
-                        limit = 0;
-                }
-            }
-            
-            void checkCodeWScope( const char * field , const BSONObj& o ){
-                BSONElement e = o[field];
-                if ( e.type() != CodeWScope )
-                    return;
-                BSONObj x = e.codeWScopeObject();
-                uassert( 13035 , (string)"can't use CodeWScope with map/reduce function: " + field , x.isEmpty() );
-            }
-
-            /**
-               @return number objects in collection
-             */
-            long long renameIfNeeded( DBDirectClient& db ){
-                if ( finalLong != tempLong ){
-                    db.dropCollection( finalLong );
-                    if ( db.count( tempLong ) ){
-                        BSONObj info;
-                        uassert( 10076 ,  "rename failed" , db.runCommand( "admin" , BSON( "renameCollection" << tempLong << "to" << finalLong ) , info ) );
-                    }
-                }
-                return db.count( finalLong );
-            }
-                
-            string dbname;
-            string ns;
-            
-            // options
-            bool verbose;            
-            bool keeptemp;
-            bool replicate;
-
-            // query options
-            
-            BSONObj filter;
-            BSONObj sort;
-            long long limit;
-
-            // functions
-            
-            string mapCode;
-            string reduceCode;
-            string finalizeCode;
-            
-            BSONObj mapparams;
-            BSONObj scopeSetup;
-            
-            // output tables
-            string incLong;
-            
-            string tempShort;
-            string tempLong;
-            
-            string finalShort;
-            string finalLong;
-            
-        }; // end MRsetup
-
-        class MRState {
-        public:
-            MRState( MRSetup& s ) : setup(s){
-                scope = globalScriptEngine->getPooledScope( setup.dbname );
-                scope->localConnect( setup.dbname.c_str() );
-                
-                map = scope->createFunction( setup.mapCode.c_str() );
-                if ( ! map )
-                    throw UserException( 9012, (string)"map compile failed: " + scope->getError() );
-
-                reduce = scope->createFunction( setup.reduceCode.c_str() );
-                if ( ! reduce )
-                    throw UserException( 9013, (string)"reduce compile failed: " + scope->getError() );
-
-                if ( setup.finalizeCode.size() )
-                    finalize  = scope->createFunction( setup.finalizeCode.c_str() );
-                else
-                    finalize = 0;
-                
-                if ( ! setup.scopeSetup.isEmpty() )
-                    scope->init( &setup.scopeSetup );
-
-                db.dropCollection( setup.tempLong );
-                db.dropCollection( setup.incLong );
-                
-                writelock l( setup.incLong );
-                Client::Context ctx( setup.incLong );
-                string err;
-                assert( userCreateNS( setup.incLong.c_str() , BSON( "autoIndexId" << 0 ) , err , false ) );
-
-            }
-
-            void finalReduce( BSONList& values ){
-                if ( values.size() == 0 )
-                    return;
-
-                BSONObj key = values.begin()->firstElement().wrap( "_id" );
-                BSONObj res = reduceValues( values , scope.get() , reduce , 1 , finalize );
-                
-                writelock l( setup.tempLong );
-                Client::Context ctx( setup.incLong );
-                if ( setup.replicate )
-                    theDataFileMgr.insertAndLog( setup.tempLong.c_str() , res , false );
-                else
-                    theDataFileMgr.insertWithObjMod( setup.tempLong.c_str() , res , false );
-            }
-
-            
-            MRSetup& setup;
-            auto_ptr<Scope> scope;
-            DBDirectClient db;
-
-            ScriptingFunction map;
-            ScriptingFunction reduce;
-            ScriptingFunction finalize;
-            
-        };
-        
-        class MRTL {
-        public:
-            MRTL( MRState& state ) 
-                : _state( state )
-                , _temp(new InMemory())
-            {
-                _size = 0;
-                numEmits = 0;
-            }
-            
-            void reduceInMemory(){
-                boost::shared_ptr<InMemory> old = _temp;
-                _temp.reset(new InMemory());
-                _size = 0;
-                
-                for ( InMemory::iterator i=old->begin(); i!=old->end(); i++ ){
-                    BSONObj key = i->first;
-                    BSONList& all = i->second;
-                    
-                    if ( all.size() == 1 ){
-                        // this key has low cardinality, so just write to db
-                        writelock l(_state.setup.incLong);
-                        Client::Context ctx(_state.setup.incLong.c_str());
-                        write( *(all.begin()) );
-                    }
-                    else if ( all.size() > 1 ){
-                        BSONObj res = reduceValues( all , _state.scope.get() , _state.reduce , false , 0 );
-                        insert( res );
-                    }
-                }
-            }
-
-            void dump(){
-                writelock l(_state.setup.incLong);
-                Client::Context ctx(_state.setup.incLong);
-                    
-                for ( InMemory::iterator i=_temp->begin(); i!=_temp->end(); i++ ){
-                    BSONList& all = i->second;
-                    if ( all.size() < 1 )
-                        continue;
-                    
-                    for ( BSONList::iterator j=all.begin(); j!=all.end(); j++ )
-                        write( *j );
-                }
-                _temp->clear();
-                _size = 0;
-
-            }
-            
-            void insert( const BSONObj& a ){
-                BSONList& all = (*_temp)[a];
-                all.push_back( a );
-                _size += a.objsize() + 16;
-            }
-
-            void checkSize(){
-                if ( _size < 1024 * 5 )
-                    return;
-
-                long before = _size;
-                reduceInMemory();
-                log(1) << "  mr: did reduceInMemory  " << before << " -->> " << _size << endl;
-
-                if ( _size < 1024 * 15 )
-                    return;
-                
-                dump();
-                log(1) << "  mr: dumping to db" << endl;
-            }
-
-        private:
-            void write( BSONObj& o ){
-                theDataFileMgr.insertWithObjMod( _state.setup.incLong.c_str() , o , true );
-            }
-            
-            MRState& _state;
-        
-            boost::shared_ptr<InMemory> _temp;
-            long _size;
-            
-        public:
-            long long numEmits;
-        };
-
-        boost::thread_specific_ptr<MRTL> _tlmr;
-
-        BSONObj fast_emit( const BSONObj& args ){
-            uassert( 10077 , "fast_emit takes 2 args" , args.nFields() == 2 );
-            uassert( 13069 , "an emit can't be more than 2mb" , args.objsize() < ( 2 * 1024 * 1024 ) );
-            _tlmr->insert( args );
-            _tlmr->numEmits++;
-            return BSONObj();
-        }
-
-        class MapReduceCommand : public Command {
-        public:
-            MapReduceCommand() : Command("mapReduce", false, "mapreduce"){}
-            virtual bool slaveOk() const { return true; }
-        
-            virtual void help( stringstream &help ) const {
-                help << "Run a map/reduce operation on the server.\n";
-                help << "Note this is used for aggregation, not querying, in MongoDB.\n";
-                help << "http://www.mongodb.org/display/DOCS/MapReduce";
-            }
-            virtual LockType locktype() const { return NONE; } 
-            bool run(const string& dbname , BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
-                Timer t;
-                Client::GodScope cg;
-                Client& client = cc();
-                CurOp * op = client.curop();
-
-                MRSetup mr( dbname , cmd );
-
-                log(1) << "mr ns: " << mr.ns << endl;
-                
-                if ( ! db.exists( mr.ns ) ){
-                    errmsg = "ns doesn't exist";
-                    return false;
-                }
-                
-                bool shouldHaveData = false;
-                
-                long long num = 0;
-                long long inReduce = 0;
-                
-                BSONObjBuilder countsBuilder;
-                BSONObjBuilder timingBuilder;
-                try {
-                    
-                    MRState state( mr );
-                    state.scope->injectNative( "emit" , fast_emit );
-                    
-                    MRTL * mrtl = new MRTL( state );
-                    _tlmr.reset( mrtl );
-
-                    ProgressMeterHolder pm( op->setMessage( "m/r: (1/3) emit phase" , db.count( mr.ns , mr.filter ) ) );
-                    long long mapTime = 0;
-                    {
-                        readlock lock( mr.ns );
-                        Client::Context ctx( mr.ns );
-                        
-                        shared_ptr<Cursor> temp = bestGuessCursor( mr.ns.c_str(), mr.filter, mr.sort );
-                        auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.ns.c_str() ) );
-
-                        Timer mt;
-                        while ( cursor->ok() ){
-                            
-                            if ( ! cursor->currentMatches() ){
-                                cursor->advance();
-                                continue;
-                            }
-                            
-                            BSONObj o = cursor->current(); 
-                            cursor->advance();
-                            
-                            if ( mr.verbose ) mt.reset();
-                            
-                            state.scope->setThis( &o );
-                            if ( state.scope->invoke( state.map , state.setup.mapparams , 0 , true ) )
-                                throw UserException( 9014, (string)"map invoke failed: " + state.scope->getError() );
-                            
-                            if ( mr.verbose ) mapTime += mt.micros();
-                            
-                            num++;
-                            if ( num % 100 == 0 ){
-                                ClientCursor::YieldLock yield (cursor.get());
-                                Timer t;
-                                mrtl->checkSize();
-                                inReduce += t.micros();
-                                
-                                if ( ! yield.stillOk() ){
-                                    cursor.release();
-                                    break;
-                                }
-
-                                killCurrentOp.checkForInterrupt();
-                            }
-                            pm.hit();
-                            
-                            if ( mr.limit && num >= mr.limit )
-                                break;
-                        }
-                    }
-                    pm.finished();
-                    
-                    killCurrentOp.checkForInterrupt();
-
-                    countsBuilder.appendNumber( "input" , num );
-                    countsBuilder.appendNumber( "emit" , mrtl->numEmits );
-                    if ( mrtl->numEmits )
-                        shouldHaveData = true;
-                    
-                    timingBuilder.append( "mapTime" , mapTime / 1000 );
-                    timingBuilder.append( "emitLoop" , t.millis() );
-                    
-                    // final reduce
-                    op->setMessage( "m/r: (2/3) final reduce in memory" );
-                    mrtl->reduceInMemory();
-                    mrtl->dump();
-                    
-                    BSONObj sortKey = BSON( "0" << 1 );
-                    db.ensureIndex( mr.incLong , sortKey );
-                    
-                    {
-                        writelock lock( mr.tempLong.c_str() );
-                        Client::Context ctx( mr.tempLong.c_str() );
-                        assert( userCreateNS( mr.tempLong.c_str() , BSONObj() , errmsg , mr.replicate ) );
-                    }
-
-
-                    {
-                        readlock rl(mr.incLong.c_str());
-                        Client::Context ctx( mr.incLong );
-                        
-                        BSONObj prev;
-                        BSONList all;
-                        
-                        assert( pm == op->setMessage( "m/r: (3/3) final reduce to collection" , db.count( mr.incLong ) ) );
-
-                        shared_ptr<Cursor> temp = bestGuessCursor( mr.incLong.c_str() , BSONObj() , sortKey );
-                        auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.incLong.c_str() ) );
-                        
-                        while ( cursor->ok() ){
-                            BSONObj o = cursor->current().getOwned();
-                            cursor->advance();
-                            
-                            pm.hit();
-                            
-                            if ( o.woSortOrder( prev , sortKey ) == 0 ){
-                                all.push_back( o );
-                                if ( pm->hits() % 1000 == 0 ){
-                                    if ( ! cursor->yield() ){
-                                        cursor.release();
-                                        break;
-                                    } 
-                                    killCurrentOp.checkForInterrupt();
-                                }
-                                continue;
-                            }
-                        
-                            ClientCursor::YieldLock yield (cursor.get());
-                            state.finalReduce( all );
-                            
-                            all.clear();
-                            prev = o;
-                            all.push_back( o );
-
-                            if ( ! yield.stillOk() ){
-                                cursor.release();
-                                break;
-                            }
-                            
-                            killCurrentOp.checkForInterrupt();
-                        }
-
-                        {
-                            dbtempreleasecond tl;
-                            if ( ! tl.unlocked() )
-                                log( LL_WARNING ) << "map/reduce can't temp release" << endl;
-                            state.finalReduce( all );
-                        }
-
-                        pm.finished();
-                    }
-
-                    _tlmr.reset( 0 );
-                }
-                catch ( ... ){
-                    log() << "mr failed, removing collection" << endl;
-                    db.dropCollection( mr.tempLong );
-                    db.dropCollection( mr.incLong );
-                    throw;
-                }
-                
-                long long finalCount = 0;
-                {
-                    dblock lock;
-                    db.dropCollection( mr.incLong );
-                
-                    finalCount = mr.renameIfNeeded( db );
-                }
-
-                timingBuilder.append( "total" , t.millis() );
-                
-                result.append( "result" , mr.finalShort );
-                result.append( "timeMillis" , t.millis() );
-                countsBuilder.appendNumber( "output" , finalCount );
-                if ( mr.verbose ) result.append( "timing" , timingBuilder.obj() );
-                result.append( "counts" , countsBuilder.obj() );
-
-                if ( finalCount == 0 && shouldHaveData ){
-                    result.append( "cmd" , cmd );
-                    errmsg = "there were emits but no data!";
-                    return false;
-                }
-
-                return true;
-            }
-
-        private:
-            DBDirectClient db;
-
-        } mapReduceCommand;
-        
-        class MapReduceFinishCommand : public Command {
-        public:
-            MapReduceFinishCommand() : Command( "mapreduce.shardedfinish" ){}
-            virtual bool slaveOk() const { return true; }
-            
-            virtual LockType locktype() const { return NONE; } 
-            bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
-                string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe();
-
-                MRSetup mr( dbname , cmdObj.firstElement().embeddedObjectUserCheck() , false );
-                
-                set<ServerAndQuery> servers;
-                
-                BSONObjBuilder shardCounts;
-                map<string,long long> counts;
-                
-                BSONObj shards = cmdObj["shards"].embeddedObjectUserCheck();
-                vector< auto_ptr<DBClientCursor> > shardCursors;
-
-                { // parse per shard results 
-                    BSONObjIterator i( shards );
-                    while ( i.more() ){
-                        BSONElement e = i.next();
-                        string shard = e.fieldName();
-                        
-                        BSONObj res = e.embeddedObjectUserCheck();
-                        
-                        uassert( 10078 ,  "something bad happened" , shardedOutputCollection == res["result"].valuestrsafe() );
-                        servers.insert( shard );
-                        shardCounts.appendAs( res["counts"] , shard.c_str() );
-                        
-                        BSONObjIterator j( res["counts"].embeddedObjectUserCheck() );
-                        while ( j.more() ){
-                            BSONElement temp = j.next();
-                            counts[temp.fieldName()] += temp.numberLong();
-                        }
-                        
-                    }
-                    
-                }
-                
-                DBDirectClient db;
-                    
-                { // reduce from each stream
-                    
-                    BSONObj sortKey = BSON( "_id" << 1 );
-                    
-                    ParallelSortClusteredCursor cursor( servers , dbname + "." + shardedOutputCollection ,
-                                                        Query().sort( sortKey ) );
-                    cursor.init();
-                    
-                    auto_ptr<Scope> s = globalScriptEngine->getPooledScope( dbname );
-                    s->localConnect( dbname.c_str() );
-                    ScriptingFunction reduceFunction = s->createFunction( mr.reduceCode.c_str() );
-                    ScriptingFunction finalizeFunction = 0;
-                    if ( mr.finalizeCode.size() )
-                        finalizeFunction = s->createFunction( mr.finalizeCode.c_str() );
-                    
-                    BSONList values;
-                    
-                    result.append( "result" , mr.finalShort );
-                    
-                    while ( cursor.more() ){
-                        BSONObj t = cursor.next().getOwned();
-                        
-                        if ( values.size() == 0 ){
-                            values.push_back( t );
-                            continue;
-                        }
-                        
-                        if ( t.woSortOrder( *(values.begin()) , sortKey ) == 0 ){
-                            values.push_back( t );
-                            continue;
-                        }
-                        
-                        
-                        db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) );
-                        values.clear();
-                        values.push_back( t );
-                    }
-                    
-                    if ( values.size() )
-                        db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) );
-                }
-                
-                long long finalCount = mr.renameIfNeeded( db );
-                log(0) << " mapreducefinishcommand " << mr.finalLong << " " << finalCount << endl;
-
-                for ( set<ServerAndQuery>::iterator i=servers.begin(); i!=servers.end(); i++ ){
-                    ScopedDbConnection conn( i->_server );
-                    conn->dropCollection( dbname + "." + shardedOutputCollection );
-                    conn.done();
-                }
-                
-                result.append( "shardCounts" , shardCounts.obj() );
-                
-                {
-                    BSONObjBuilder c;
-                    for ( map<string,long long>::iterator i=counts.begin(); i!=counts.end(); i++ ){
-                        c.append( i->first , i->second );
-                    }
-                    result.append( "counts" , c.obj() );
-                }
-
-                return 1;
-            }
-        } mapReduceFinishCommand;
-
-    }
-
-}
-
diff --git a/db/namespace-inl.h b/db/namespace-inl.h
new file mode 100644
index 0000000..a777ff8
--- /dev/null
+++ b/db/namespace-inl.h
@@ -0,0 +1,130 @@
+// @file namespace-inl.h
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "namespace.h"
+
+namespace mongo {
+
+    inline Namespace& Namespace::operator=(const char *ns) {
+        // we fill the remaining space with all zeroes here.  as the full Namespace struct is in
+        // the datafiles (the .ns files specifically), that is helpful as then they are deterministic
+        // in the bytes they have for a given sequence of operations.  that makes testing and debugging
+        // the data files easier.
+        //
+        // if profiling indicates this method is a significant bottleneck, we could have a version we
+        // use for reads which does not fill with zeroes, and keep the zeroing behavior on writes.
+        //
+        unsigned len = strlen(ns);
+        uassert( 10080 , "ns name too long, max size is 128", len < MaxNsLen);
+        memset(buf, 0, MaxNsLen);
+        memcpy(buf, ns, len);
+        return *this;
+    }
+
+    inline string Namespace::extraName(int i) const {
+        char ex[] = "$extra";
+        ex[5] += i;
+        string s = string(buf) + ex;
+        massert( 10348 , "$extra: ns name too long", s.size() < MaxNsLen);
+        return s;
+    }
+
+    inline bool Namespace::isExtra() const {
+        const char *p = strstr(buf, "$extr");
+        return p && p[5] && p[6] == 0; //==0 important in case an index uses name "$extra_1" for example
+    }
+
+    inline int Namespace::hash() const {
+        unsigned x = 0;
+        const char *p = buf;
+        while ( *p ) {
+            x = x * 131 + *p;
+            p++;
+        }
+        return (x & 0x7fffffff) | 0x8000000; // must be > 0
+    }
+
+    /* future : this doesn't need to be an inline. */
+    inline string Namespace::getSisterNS( const char * local ) const {
+        assert( local && local[0] != '.' );
+        string old(buf);
+        if ( old.find( "." ) != string::npos )
+            old = old.substr( 0 , old.find( "." ) );
+        return old + "." + local;
+    }
+
+    inline IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected ) {
+        if( idxNo < NIndexesBase )
+            return _indexes[idxNo];
+        Extra *e = extra();
+        if ( ! e ) {
+            if ( missingExpected )
+                throw MsgAssertionException( 13283 , "Missing Extra" );
+            massert(13282, "missing Extra", e);
+        }
+        int i = idxNo - NIndexesBase;
+        if( i >= NIndexesExtra ) {
+            e = e->next(this);
+            if ( ! e ) {
+                if ( missingExpected )
+                    throw MsgAssertionException( 13283 , "missing extra" );
+                massert(13283, "missing Extra", e);
+            }
+            i -= NIndexesExtra;
+        }
+        return e->details[i];
+    }
+
+    inline int NamespaceDetails::idxNo(IndexDetails& idx) {
+        IndexIterator i = ii();
+        while( i.more() ) {
+            if( &i.next() == &idx )
+                return i.pos()-1;
+        }
+        massert( 10349 , "E12000 idxNo fails", false);
+        return -1;
+    }
+
+    inline int NamespaceDetails::findIndexByKeyPattern(const BSONObj& keyPattern) {
+        IndexIterator i = ii();
+        while( i.more() ) {
+            if( i.next().keyPattern() == keyPattern )
+                return i.pos()-1;
+        }
+        return -1;
+    }
+
+    // @return offset in indexes[]
+    inline int NamespaceDetails::findIndexByName(const char *name) {
+        IndexIterator i = ii();
+        while( i.more() ) {
+            if ( strcmp(i.next().info.obj().getStringField("name"),name) == 0 )
+                return i.pos()-1;
+        }
+        return -1;
+    }
+
+    inline NamespaceDetails::IndexIterator::IndexIterator(NamespaceDetails *_d) {
+        d = _d;
+        i = 0;
+        n = d->nIndexes;
+    }
+
+}
diff --git a/db/namespace.cpp b/db/namespace.cpp
index 8a1ab6f..fcdaee2 100644
--- a/db/namespace.cpp
+++ b/db/namespace.cpp
@@ -19,7 +19,7 @@
 #include "pch.h"
 #include "pdfile.h"
 #include "db.h"
-#include "../util/mmap.h"
+#include "mongommf.h"
 #include "../util/hashtab.h"
 #include "../scripting/engine.h"
 #include "btree.h"
@@ -31,6 +31,8 @@
 
 namespace mongo {
 
+    BOOST_STATIC_ASSERT( sizeof(Namespace) == 128 );
+
     BSONObj idKeyPattern = fromjson("{\"_id\":1}");
 
     /* deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various sizes
@@ -45,7 +47,7 @@ namespace mongo {
     NamespaceDetails::NamespaceDetails( const DiskLoc &loc, bool _capped ) {
         /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */
         firstExtent = lastExtent = capExtent = loc;
-        datasize = nrecords = 0;
+        stats.datasize = stats.nrecords = 0;
         lastExtentSize = 0;
         nIndexes = 0;
         capped = _capped;
@@ -58,20 +60,23 @@ namespace mongo {
         // For capped case, signal that we are doing initial extent allocation.
         if ( capped )
             cappedLastDelRecLastExtent().setInvalid();
-		assert( sizeof(dataFileVersion) == 2 );
-		dataFileVersion = 0;
-		indexFileVersion = 0;
+        assert( sizeof(dataFileVersion) == 2 );
+        dataFileVersion = 0;
+        indexFileVersion = 0;
         multiKeyIndexBits = 0;
         reservedA = 0;
         extraOffset = 0;
-        backgroundIndexBuildInProgress = 0;
+        indexBuildInProgress = 0;
+        reservedB = 0;
+        capped2.cc2_ptr = 0;
+        capped2.fileNumber = 0;
         memset(reserved, 0, sizeof(reserved));
     }
 
     bool NamespaceIndex::exists() const {
         return !MMF::exists(path());
     }
-    
+
     boost::filesystem::path NamespaceIndex::path() const {
         boost::filesystem::path ret( dir_ );
         if ( directoryperdb )
@@ -88,23 +93,56 @@ namespace mongo {
         if ( !boost::filesystem::exists( dir ) )
             BOOST_CHECK_EXCEPTION( boost::filesystem::create_directory( dir ) );
     }
-    
-	int lenForNewNsFiles = 16 * 1024 * 1024;
-    
-    void NamespaceDetails::onLoad(const Namespace& k) { 
-        if( k.isExtra() ) { 
+
+    unsigned lenForNewNsFiles = 16 * 1024 * 1024;
+
+#if defined(_DEBUG)
+    void NamespaceDetails::dump(const Namespace& k) {
+        if( !cmdLine.dur )
+            cout << "ns offsets which follow will not display correctly with --dur disabled" << endl;
+
+        size_t ofs = 1; // 1 is sentinel that the find call below failed
+        privateViews.find(this, /*out*/ofs);
+
+        cout << "ns" << hex << setw(8) << ofs << ' ';
+        cout << k.toString() << '\n';
+
+        if( k.isExtra() ) {
+            cout << "ns\t extra" << endl;
+            return;
+        }
+
+        cout << "ns         " << firstExtent.toString() << ' ' << lastExtent.toString() << " nidx:" << nIndexes << '\n';
+        cout << "ns         " << stats.datasize << ' ' << stats.nrecords << ' ' << nIndexes << '\n';
+        cout << "ns         " << capped << ' ' << paddingFactor << ' ' << flags << ' ' << dataFileVersion << '\n';
+        cout << "ns         " << multiKeyIndexBits << ' ' << indexBuildInProgress << '\n';
+        cout << "ns         " << (int) reserved[0] << ' ' << (int) reserved[59];
+        cout << endl;
+    }
+#endif
+
+    void NamespaceDetails::onLoad(const Namespace& k) {
+        //dump(k);
+
+        if( k.isExtra() ) {
             /* overflow storage for indexes - so don't treat as a NamespaceDetails object. */
             return;
         }
 
-        assertInWriteLock();
-        if( backgroundIndexBuildInProgress ) { 
-            log() << "backgroundIndexBuildInProgress was " << backgroundIndexBuildInProgress << " for " << k << ", indicating an abnormal db shutdown" << endl;
-            backgroundIndexBuildInProgress = 0;
+        DEV assertInWriteLock();
+
+        if( indexBuildInProgress || capped2.cc2_ptr ) {
+            assertInWriteLock();
+            if( indexBuildInProgress ) {
+                log() << "indexBuildInProgress was " << indexBuildInProgress << " for " << k << ", indicating an abnormal db shutdown" << endl;
+                getDur().writingInt( indexBuildInProgress ) = 0;
+            }
+            if( capped2.cc2_ptr )
+                *getDur().writing(&capped2.cc2_ptr) = 0;
         }
     }
 
-    static void namespaceOnLoadCallback(const Namespace& k, NamespaceDetails& v) { 
+    static void namespaceOnLoadCallback(const Namespace& k, NamespaceDetails& v) {
         v.onLoad(k);
     }
 
@@ -117,105 +155,113 @@ namespace mongo {
            we need to be sure to clear any cached info for the database in
            local.*.
         */
-		/*
+        /*
         if ( "local" != database_ ) {
             DBInfo i(database_.c_str());
             i.dbDropped();
         }
-		*/
-		int len = -1;
+        */
+
+        unsigned long long len = 0;
         boost::filesystem::path nsPath = path();
         string pathString = nsPath.string();
-        MMF::Pointer p;
-        if( MMF::exists(nsPath) ) { 
-			p = f.map(pathString.c_str());
-            if( !p.isNull() ) {
+        void *p = 0;
+        if( MMF::exists(nsPath) ) {
+            if( f.open(pathString, true) ) {
                 len = f.length();
-                if ( len % (1024*1024) != 0 ){
+                if ( len % (1024*1024) != 0 ) {
                     log() << "bad .ns file: " << pathString << endl;
                     uassert( 10079 ,  "bad .ns file length, cannot open database", len % (1024*1024) == 0 );
                 }
+                p = f.getView();
             }
-		}
-		else {
-			// use lenForNewNsFiles, we are making a new database
-			massert( 10343 ,  "bad lenForNewNsFiles", lenForNewNsFiles >= 1024*1024 );
+        }
+        else {
+            // use lenForNewNsFiles, we are making a new database
+            massert( 10343, "bad lenForNewNsFiles", lenForNewNsFiles >= 1024*1024 );
             maybeMkdir();
-			long l = lenForNewNsFiles;
-			p = f.map(pathString.c_str(), l);
-            if( !p.isNull() ) {
-                len = (int) l;
+            unsigned long long l = lenForNewNsFiles;
+            if( f.create(pathString, l, true) ) {
+                getDur().createdFile(pathString, l); // always a new file
+                len = l;
                 assert( len == lenForNewNsFiles );
+                p = f.getView();
             }
-		}
+        }
 
-        if ( p.isNull() ) {
-            problem() << "couldn't open file " << pathString << " terminating" << endl;
+        if ( p == 0 ) {
+            /** TODO: this shouldn't terminate? */
+            log() << "error couldn't open file " << pathString << " terminating" << endl;
             dbexit( EXIT_FS );
         }
 
-        ht = new HashTable<Namespace,NamespaceDetails,MMF::Pointer>(p, len, "namespace index");
+
+        assert( len <= 0x7fffffff );
+        ht = new HashTable<Namespace,NamespaceDetails>(p, (int) len, "namespace index");
         if( checkNsFilesOnLoad )
             ht->iterAll(namespaceOnLoadCallback);
     }
-    
+
     static void namespaceGetNamespacesCallback( const Namespace& k , NamespaceDetails& v , void * extra ) {
         list<string> * l = (list<string>*)extra;
         if ( ! k.hasDollarSign() )
             l->push_back( (string)k );
     }
-
     void NamespaceIndex::getNamespaces( list<string>& tofill , bool onlyCollections ) const {
         assert( onlyCollections ); // TODO: need to implement this
         //                                  need boost::bind or something to make this less ugly
-        
+
         if ( ht )
             ht->iterAll( namespaceGetNamespacesCallback , (void*)&tofill );
     }
 
     void NamespaceDetails::addDeletedRec(DeletedRecord *d, DiskLoc dloc) {
-		BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) );
+        BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) );
+
         {
+            Record *r = (Record *) getDur().writingPtr(d, sizeof(Record));
+            d = &r->asDeleted();
             // defensive code: try to make us notice if we reference a deleted record
-            (unsigned&) (((Record *) d)->data) = 0xeeeeeeee;
+            (unsigned&) (r->data) = 0xeeeeeeee;
         }
-        dassert( dloc.drec() == d );
-        DEBUGGING out() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl;
+        DEBUGGING log() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl;
         if ( capped ) {
             if ( !cappedLastDelRecLastExtent().isValid() ) {
                 // Initial extent allocation.  Insert at end.
                 d->nextDeleted = DiskLoc();
                 if ( cappedListOfAllDeletedRecords().isNull() )
-                    cappedListOfAllDeletedRecords() = dloc;
+                    getDur().writingDiskLoc( cappedListOfAllDeletedRecords() ) = dloc;
                 else {
                     DiskLoc i = cappedListOfAllDeletedRecords();
-                    for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted );
-                    i.drec()->nextDeleted = dloc;
+                    for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted )
+                        ;
+                    i.drec()->nextDeleted.writing() = dloc;
                 }
-            } else {
+            }
+            else {
                 d->nextDeleted = cappedFirstDeletedInCurExtent();
-                cappedFirstDeletedInCurExtent() = dloc;
+                getDur().writingDiskLoc( cappedFirstDeletedInCurExtent() ) = dloc;
                 // always compact() after this so order doesn't matter
             }
-        } else {
+        }
+        else {
             int b = bucket(d->lengthWithHeaders);
             DiskLoc& list = deletedList[b];
             DiskLoc oldHead = list;
-            list = dloc;
+            getDur().writingDiskLoc(list) = dloc;
             d->nextDeleted = oldHead;
         }
     }
 
-    /*
-       lenToAlloc is WITH header
-    */
+    // lenToAlloc is WITH header
     DiskLoc NamespaceDetails::alloc(const char *ns, int lenToAlloc, DiskLoc& extentLoc) {
         lenToAlloc = (lenToAlloc + 3) & 0xfffffffc;
         DiskLoc loc = _alloc(ns, lenToAlloc);
         if ( loc.isNull() )
             return loc;
 
-        DeletedRecord *r = loc.drec();
+        const DeletedRecord *r = loc.drec();
+        //r = getDur().writing(r);
 
         /* note we want to grab from the front so our next pointers on disk tend
         to go in a forward direction which is important for performance. */
@@ -229,20 +275,21 @@ namespace mongo {
         if ( capped == 0 ) {
             if ( left < 24 || left < (lenToAlloc >> 3) ) {
                 // you get the whole thing.
-				DataFileMgr::grow(loc, regionlen);
+                //DataFileMgr::grow(loc, regionlen);
                 return loc;
             }
         }
 
         /* split off some for further use. */
-        r->lengthWithHeaders = lenToAlloc;
-		DataFileMgr::grow(loc, lenToAlloc);
+        getDur().writingInt(r->lengthWithHeaders) = lenToAlloc;
+        //DataFileMgr::grow(loc, lenToAlloc);
         DiskLoc newDelLoc = loc;
         newDelLoc.inc(lenToAlloc);
         DeletedRecord *newDel = DataFileMgr::makeDeletedRecord(newDelLoc, left);
-        newDel->extentOfs = r->extentOfs;
-        newDel->lengthWithHeaders = left;
-        newDel->nextDeleted.Null();
+        DeletedRecord *newDelW = getDur().writing(newDel);
+        newDelW->extentOfs = r->extentOfs;
+        newDelW->lengthWithHeaders = left;
+        newDelW->nextDeleted.Null();
 
         addDeletedRec(newDel, newDelLoc);
 
@@ -267,7 +314,7 @@ namespace mongo {
                 int a = cur.a();
                 if ( a < -1 || a >= 100000 ) {
                     problem() << "~~ Assertion - cur out of range in _alloc() " << cur.toString() <<
-                    " a:" << a << " b:" << b << " chain:" << chain << '\n';
+                              " a:" << a << " b:" << b << " chain:" << chain << '\n';
                     sayDbContext();
                     if ( cur == *prev )
                         prev->Null();
@@ -303,7 +350,7 @@ namespace mongo {
                 cur.Null();
             }
             else {
-                /*this defensive check only made sense for the mmap storage engine: 
+                /*this defensive check only made sense for the mmap storage engine:
                   if ( r->nextDeleted.getOfs() == 0 ) {
                     problem() << "~~ Assertion - bad nextDeleted " << r->nextDeleted.toString() <<
                     " b:" << b << " chain:" << chain << ", fixing.\n";
@@ -316,9 +363,9 @@ namespace mongo {
 
         /* unlink ourself from the deleted list */
         {
-            DeletedRecord *bmr = bestmatch.drec();
-            *bestprev = bmr->nextDeleted;
-            bmr->nextDeleted.setInvalid(); // defensive.
+            const DeletedRecord *bmr = bestmatch.drec();
+            *getDur().writing(bestprev) = bmr->nextDeleted;
+            bmr->nextDeleted.writing().setInvalid(); // defensive.
             assert(bmr->extentOfs < bestmatch.getOfs());
         }
 
@@ -371,9 +418,9 @@ namespace mongo {
                 if ( e == capExtent )
                     out() << " (capExtent)";
                 out() << '\n';
-                out() << "    magic: " << hex << e.ext()->magic << dec << " extent->ns: " << e.ext()->nsDiagnostic.buf << '\n';
+                out() << "    magic: " << hex << e.ext()->magic << dec << " extent->ns: " << e.ext()->nsDiagnostic.toString() << '\n';
                 out() << "    fr: " << e.ext()->firstRecord.toString() <<
-                     " lr: " << e.ext()->lastRecord.toString() << " extent->len: " << e.ext()->length << '\n';
+                      " lr: " << e.ext()->lastRecord.toString() << " extent->len: " << e.ext()->length << '\n';
             }
             assert( len * 5 > lastExtentSize ); // assume it is unusually large record; if not, something is broken
         }
@@ -387,12 +434,27 @@ namespace mongo {
         return cappedAlloc(ns,len);
     }
 
+    void NamespaceIndex::kill_ns(const char *ns) {
+        if ( !ht )
+            return;
+        Namespace n(ns);
+        ht->kill(n);
+
+        for( int i = 0; i<=1; i++ ) {
+            try {
+                Namespace extra(n.extraName(i).c_str());
+                ht->kill(extra);
+            }
+            catch(DBException&) { }
+        }
+    }
+
     /* extra space for indexes when more than 10 */
     NamespaceDetails::Extra* NamespaceIndex::newExtra(const char *ns, int i, NamespaceDetails *d) {
         assert( i >= 0 && i <= 1 );
         Namespace n(ns);
         Namespace extra(n.extraName(i).c_str()); // throws userexception if ns name too long
-        
+
         massert( 10350 ,  "allocExtra: base ns missing?", d );
         massert( 10351 ,  "allocExtra: extra already exists", ht->get(extra) == 0 );
 
@@ -409,10 +471,10 @@ namespace mongo {
         long ofs = e->ofsFrom(this);
         if( i == 0 ) {
             assert( extraOffset == 0 );
-            extraOffset = ofs;
+            *getDur().writing(&extraOffset) = ofs;
             assert( extra() == e );
         }
-        else { 
+        else {
             Extra *hd = extra();
             assert( hd->next(this) == 0 );
             hd->setNext(ofs);
@@ -422,25 +484,23 @@ namespace mongo {
 
     /* you MUST call when adding an index.  see pdfile.cpp */
     IndexDetails& NamespaceDetails::addIndex(const char *thisns, bool resetTransient) {
-        assert( nsdetails(thisns) == this );
-
         IndexDetails *id;
         try {
             id = &idx(nIndexes,true);
         }
-        catch(DBException&) { 
+        catch(DBException&) {
             allocExtra(thisns, nIndexes);
             id = &idx(nIndexes,false);
         }
 
-        nIndexes++;
+        (*getDur().writing(&nIndexes))++;
         if ( resetTransient )
             NamespaceDetailsTransient::get_w(thisns).addedIndex();
         return *id;
     }
 
     // must be called when renaming a NS to fix up extra
-    void NamespaceDetails::copyingFrom(const char *thisns, NamespaceDetails *src) { 
+    void NamespaceDetails::copyingFrom(const char *thisns, NamespaceDetails *src) {
         extraOffset = 0; // we are a copy -- the old value is wrong.  fixing it up below.
         Extra *se = src->extra();
         int n = NIndexesBase;
@@ -454,7 +514,7 @@ namespace mongo {
                 Extra *nxt = allocExtra(thisns, n);
                 e->setNext( nxt->ofsFrom(this) );
                 e = nxt;
-            } 
+            }
             assert( extraOffset );
         }
     }
@@ -473,25 +533,39 @@ namespace mongo {
         }*/
         return -1;
     }
-    
-    long long NamespaceDetails::storageSize( int * numExtents ){
+
+    long long NamespaceDetails::storageSize( int * numExtents , BSONArrayBuilder * extentInfo ) const {
         Extent * e = firstExtent.ext();
         assert( e );
-        
+
         long long total = 0;
         int n = 0;
-        while ( e ){
+        while ( e ) {
             total += e->length;
-            e = e->getNextExtent();
             n++;
+
+            if ( extentInfo ) {
+                extentInfo->append( BSON( "len" << e->length << "loc: " << e->myLoc.toBSONObj() ) );
+            }
+
+            e = e->getNextExtent();
         }
-        
+
         if ( numExtents )
             *numExtents = n;
-        
+
         return total;
     }
-    
+
+    NamespaceDetails *NamespaceDetails::writingWithExtra() {
+        vector< pair< long long, unsigned > > writeRanges;
+        writeRanges.push_back( make_pair( 0, sizeof( NamespaceDetails ) ) );
+        for( Extra *e = extra(); e; e = e->next( this ) ) {
+            writeRanges.push_back( make_pair( (char*)e - (char*)this, sizeof( Extra ) ) );
+        }
+        return reinterpret_cast< NamespaceDetails* >( getDur().writingRangesAtOffsets( this, writeRanges ) );
+    }
+
     /* ------------------------------------------------------------------------- */
 
     mongo::mutex NamespaceDetailsTransient::_qcMutex("qc");
@@ -505,14 +579,14 @@ namespace mongo {
         _keysComputed = false;
         _indexSpecs.clear();
     }
-    
-/*    NamespaceDetailsTransient& NamespaceDetailsTransient::get(const char *ns) {
-        shared_ptr< NamespaceDetailsTransient > &t = map_[ ns ];
-        if ( t.get() == 0 )
-            t.reset( new NamespaceDetailsTransient(ns) );
-        return *t;
-    }
-*/
+
+    /*    NamespaceDetailsTransient& NamespaceDetailsTransient::get(const char *ns) {
+            shared_ptr< NamespaceDetailsTransient > &t = map_[ ns ];
+            if ( t.get() == 0 )
+                t.reset( new NamespaceDetailsTransient(ns) );
+            return *t;
+        }
+    */
     void NamespaceDetailsTransient::clearForPrefix(const char *prefix) {
         assertInWriteLock();
         vector< string > found;
@@ -523,7 +597,7 @@ namespace mongo {
             _map[ *i ].reset();
         }
     }
-    
+
     void NamespaceDetailsTransient::computeIndexKeys() {
         _keysComputed = true;
         _indexKeys.clear();
@@ -565,92 +639,92 @@ namespace mongo {
 
     void renameNamespace( const char *from, const char *to ) {
         NamespaceIndex *ni = nsindex( from );
-		assert( ni );
+        assert( ni );
         assert( ni->details( from ) );
         assert( ! ni->details( to ) );
-		
-		// Our namespace and index details will move to a different 
-		// memory location.  The only references to namespace and 
-		// index details across commands are in cursors and nsd
-		// transient (including query cache) so clear these.
-		ClientCursor::invalidate( from );
-		NamespaceDetailsTransient::clearForPrefix( from );
-
-		NamespaceDetails *details = ni->details( from );
-		ni->add_ns( to, *details );
+
+        // Our namespace and index details will move to a different
+        // memory location.  The only references to namespace and
+        // index details across commands are in cursors and nsd
+        // transient (including query cache) so clear these.
+        ClientCursor::invalidate( from );
+        NamespaceDetailsTransient::clearForPrefix( from );
+
+        NamespaceDetails *details = ni->details( from );
+        ni->add_ns( to, *details );
         NamespaceDetails *todetails = ni->details( to );
-        try { 
+        try {
             todetails->copyingFrom(to, details); // fixes extraOffset
         }
-        catch( DBException& ) { 
+        catch( DBException& ) {
             // could end up here if .ns is full - if so try to clean up / roll back a little
             ni->kill_ns(to);
             throw;
         }
-		ni->kill_ns( from );
-		details = todetails;
-		
-		BSONObj oldSpec;
-		char database[MaxDatabaseLen];
-		nsToDatabase(from, database);
-		string s = database;
-		s += ".system.namespaces";
-		assert( Helpers::findOne( s.c_str(), BSON( "name" << from ), oldSpec ) );
-		
-		BSONObjBuilder newSpecB;
-		BSONObjIterator i( oldSpec.getObjectField( "options" ) );
-		while( i.more() ) {
-			BSONElement e = i.next();
-			if ( strcmp( e.fieldName(), "create" ) != 0 )
-				newSpecB.append( e );
-			else
-				newSpecB << "create" << to;
-		}
-		BSONObj newSpec = newSpecB.done();    
-		addNewNamespaceToCatalog( to, newSpec.isEmpty() ? 0 : &newSpec );
-
-		deleteObjects( s.c_str(), BSON( "name" << from ), false, false, true );
-		// oldSpec variable no longer valid memory
-
-		BSONObj oldIndexSpec;
-		s = database;
-		s += ".system.indexes";
-		while( Helpers::findOne( s.c_str(), BSON( "ns" << from ), oldIndexSpec ) ) {
-			BSONObjBuilder newIndexSpecB;
-			BSONObjIterator i( oldIndexSpec );
-			while( i.more() ) {
-				BSONElement e = i.next();
-				if ( strcmp( e.fieldName(), "ns" ) != 0 )
-					newIndexSpecB.append( e );
-				else
-					newIndexSpecB << "ns" << to;
-			}
-			BSONObj newIndexSpec = newIndexSpecB.done();
-			DiskLoc newIndexSpecLoc = theDataFileMgr.insert( s.c_str(), newIndexSpec.objdata(), newIndexSpec.objsize(), true, BSONElement(), false );
-			int indexI = details->findIndexByName( oldIndexSpec.getStringField( "name" ) );
-			IndexDetails &indexDetails = details->idx(indexI);
-			string oldIndexNs = indexDetails.indexNamespace();
-			indexDetails.info = newIndexSpecLoc;
-			string newIndexNs = indexDetails.indexNamespace();
-			
-			BtreeBucket::renameIndexNamespace( oldIndexNs.c_str(), newIndexNs.c_str() );
-			deleteObjects( s.c_str(), oldIndexSpec.getOwned(), true, false, true );
-		}
-	}
-
-    bool legalClientSystemNS( const string& ns , bool write ){
+        ni->kill_ns( from );
+        details = todetails;
+
+        BSONObj oldSpec;
+        char database[MaxDatabaseNameLen];
+        nsToDatabase(from, database);
+        string s = database;
+        s += ".system.namespaces";
+        assert( Helpers::findOne( s.c_str(), BSON( "name" << from ), oldSpec ) );
+
+        BSONObjBuilder newSpecB;
+        BSONObjIterator i( oldSpec.getObjectField( "options" ) );
+        while( i.more() ) {
+            BSONElement e = i.next();
+            if ( strcmp( e.fieldName(), "create" ) != 0 )
+                newSpecB.append( e );
+            else
+                newSpecB << "create" << to;
+        }
+        BSONObj newSpec = newSpecB.done();
+        addNewNamespaceToCatalog( to, newSpec.isEmpty() ? 0 : &newSpec );
+
+        deleteObjects( s.c_str(), BSON( "name" << from ), false, false, true );
+        // oldSpec variable no longer valid memory
+
+        BSONObj oldIndexSpec;
+        s = database;
+        s += ".system.indexes";
+        while( Helpers::findOne( s.c_str(), BSON( "ns" << from ), oldIndexSpec ) ) {
+            BSONObjBuilder newIndexSpecB;
+            BSONObjIterator i( oldIndexSpec );
+            while( i.more() ) {
+                BSONElement e = i.next();
+                if ( strcmp( e.fieldName(), "ns" ) != 0 )
+                    newIndexSpecB.append( e );
+                else
+                    newIndexSpecB << "ns" << to;
+            }
+            BSONObj newIndexSpec = newIndexSpecB.done();
+            DiskLoc newIndexSpecLoc = theDataFileMgr.insert( s.c_str(), newIndexSpec.objdata(), newIndexSpec.objsize(), true, BSONElement(), false );
+            int indexI = details->findIndexByName( oldIndexSpec.getStringField( "name" ) );
+            IndexDetails &indexDetails = details->idx(indexI);
+            string oldIndexNs = indexDetails.indexNamespace();
+            indexDetails.info = newIndexSpecLoc;
+            string newIndexNs = indexDetails.indexNamespace();
+
+            BtreeBucket::renameIndexNamespace( oldIndexNs.c_str(), newIndexNs.c_str() );
+            deleteObjects( s.c_str(), oldIndexSpec.getOwned(), true, false, true );
+        }
+    }
+
+    bool legalClientSystemNS( const string& ns , bool write ) {
         if( ns == "local.system.replset" ) return true;
 
         if ( ns.find( ".system.users" ) != string::npos )
             return true;
 
-        if ( ns.find( ".system.js" ) != string::npos ){
+        if ( ns.find( ".system.js" ) != string::npos ) {
             if ( write )
                 Scope::storedFuncMod();
             return true;
         }
-        
+
         return false;
     }
-	
+
 } // namespace mongo
diff --git a/db/namespace.h b/db/namespace.h
index abc35bb..4ec1edd 100644
--- a/db/namespace.h
+++ b/db/namespace.h
@@ -23,130 +23,66 @@
 #include "queryutil.h"
 #include "diskloc.h"
 #include "../util/hashtab.h"
-#include "../util/mmap.h"
+#include "mongommf.h"
 
 namespace mongo {
 
-	/* in the mongo source code, "client" means "database". */
+    /* in the mongo source code, "client" means "database". */
 
-    const int MaxDatabaseLen = 256; // max str len for the db name, including null char
+    const int MaxDatabaseNameLen = 256; // max str len for the db name, including null char
 
-	// "database.a.b.c" -> "database"
-    inline void nsToDatabase(const char *ns, char *database) {
-        const char *p = ns;
-        char *q = database;
-        while ( *p != '.' ) {
-            if ( *p == 0 )
-                break;
-            *q++ = *p++;
-        }
-        *q = 0;
-        if (q-database>=MaxDatabaseLen) {
-            log() << "nsToDatabase: ns too long. terminating, buf overrun condition" << endl;
-            dbexit( EXIT_POSSIBLE_CORRUPTION );
-        }
-    }
-    inline string nsToDatabase(const char *ns) {
-        char buf[MaxDatabaseLen];
-        nsToDatabase(ns, buf);
-        return buf;
-    }
-    inline string nsToDatabase(const string& ns) {
-        size_t i = ns.find( '.' );
-        if ( i == string::npos )
-            return ns;
-        return ns.substr( 0 , i );
-    }
-
-	/* e.g.
-	   NamespaceString ns("acme.orders");
-	   cout << ns.coll; // "orders"
-	*/
+    /* e.g.
+       NamespaceString ns("acme.orders");
+       cout << ns.coll; // "orders"
+    */
     class NamespaceString {
     public:
         string db;
         string coll; // note collection names can have periods in them for organizing purposes (e.g. "system.indexes")
+
+        NamespaceString( const char * ns ) { init(ns); }
+        NamespaceString( const string& ns ) { init(ns.c_str()); }
+        string ns() const { return db + '.' + coll; }
+        bool isSystem() const { return strncmp(coll.c_str(), "system.", 7) == 0; }
     private:
-        void init(const char *ns) { 
+        void init(const char *ns) {
             const char *p = strchr(ns, '.');
             if( p == 0 ) return;
             db = string(ns, p - ns);
             coll = p + 1;
         }
-    public:
-        NamespaceString( const char * ns ) { init(ns); }
-        NamespaceString( const string& ns ) { init(ns.c_str()); }
-
-        string ns() const { 
-            return db + '.' + coll;
-        }
-
-        bool isSystem() { 
-            return strncmp(coll.c_str(), "system.", 7) == 0;
-        }
     };
 
 #pragma pack(1)
-	/* This helper class is used to make the HashMap below in NamespaceDetails */
+    /* This helper class is used to make the HashMap below in NamespaceDetails e.g. see line:
+          HashTable<Namespace,NamespaceDetails> *ht;
+    */
     class Namespace {
     public:
-        enum MaxNsLenValue { MaxNsLen = 128 };
-        Namespace(const char *ns) {
-            *this = ns;
-        }
-        Namespace& operator=(const char *ns) {
-            uassert( 10080 , "ns name too long, max size is 128", strlen(ns) < MaxNsLen);
-            //memset(buf, 0, MaxNsLen); /* this is just to keep stuff clean in the files for easy dumping and reading */
-            strcpy_s(buf, MaxNsLen, ns);
-            return *this;
-        }
+        explicit Namespace(const char *ns) { *this = ns; }
+        Namespace& operator=(const char *ns);
 
-        /* for more than 10 indexes -- see NamespaceDetails::Extra */
-        string extraName(int i) {
-            char ex[] = "$extra";
-            ex[5] += i;
-            string s = string(buf) + ex;
-            massert( 10348 , "$extra: ns name too long", s.size() < MaxNsLen);
-            return s;
-        }
-        bool isExtra() const { 
-            const char *p = strstr(buf, "$extr");
-            return p && p[5] && p[6] == 0; //==0 important in case an index uses name "$extra_1" for example
-        }
         bool hasDollarSign() const { return strchr( buf , '$' ) > 0;  }
         void kill() { buf[0] = 0x7f; }
         bool operator==(const char *r) const { return strcmp(buf, r) == 0; }
         bool operator==(const Namespace& r) const { return strcmp(buf, r.buf) == 0; }
-        int hash() const {
-            unsigned x = 0;
-            const char *p = buf;
-            while ( *p ) {
-                x = x * 131 + *p;
-                p++;
-            }
-            return (x & 0x7fffffff) | 0x8000000; // must be > 0
-        }
-
-        /**
-           ( foo.bar ).getSisterNS( "blah" ) == foo.blah
-		   perhaps this should move to the NamespaceString helper?
+        int hash() const; // value returned is always > 0
+        string toString() const { return (string) buf; }
+        operator string() const { return (string) buf; }
+
+        /* NamespaceDetails::Extra was added after fact to allow chaining of data blocks to support more than 10 indexes
+           (more than 10 IndexDetails).  It's a bit hacky because of this late addition with backward
+           file support. */
+        string extraName(int i) const;
+        bool isExtra() const; /* ends with $extr... -- when true an extra block not a normal NamespaceDetails block */
+
+        /** ( foo.bar ).getSisterNS( "blah" ) == foo.blah
+            perhaps this should move to the NamespaceString helper?
          */
-        string getSisterNS( const char * local ) {
-            assert( local && local[0] != '.' );
-            string old(buf);
-            if ( old.find( "." ) != string::npos )
-                old = old.substr( 0 , old.find( "." ) );
-            return old + "." + local;
-        }
-
-        string toString() const {
-            return (string)buf;
-        }
-
-        operator string() const {
-            return (string)buf;
-        }
+        string getSisterNS( const char * local ) const;
 
+        enum MaxNsLenValue { MaxNsLen = 128 };
+    private:
         char buf[MaxNsLen];
     };
 #pragma pack()
@@ -158,7 +94,9 @@ namespace mongo {
 namespace mongo {
 
     /** @return true if a client can modify this namespace
-        things like *.system.users */
+        things like *.system.users
+        @param write used when .system.js
+    */
     bool legalClientSystemNS( const string& ns , bool write );
 
     /* deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various sizes
@@ -170,92 +108,106 @@ namespace mongo {
     extern int bucketSizes[];
 
 #pragma pack(1)
-    /* this is the "header" for a collection that has all its details.  in the .ns file.
+    /* NamespaceDetails : this is the "header" for a collection that has all its details.
+       It's in the .ns file and this is a memory mapped region (thus the pack pragma above).
     */
     class NamespaceDetails {
-        friend class NamespaceIndex;
-        enum { NIndexesExtra = 30,
-               NIndexesBase  = 10
-        };
     public:
-        struct ExtraOld {
-            // note we could use this field for more chaining later, so don't waste it:
-            unsigned long long reserved1;
-            IndexDetails details[NIndexesExtra];
-            unsigned reserved2;
-            unsigned reserved3;
-        };
-        class Extra { 
+        enum { NIndexesMax = 64, NIndexesExtra = 30, NIndexesBase  = 10 };
+
+        /*-------- data fields, as present on disk : */
+        DiskLoc firstExtent;
+        DiskLoc lastExtent;
+        /* NOTE: capped collections v1 override the meaning of deletedList.
+                 deletedList[0] points to a list of free records (DeletedRecord's) for all extents in
+                 the capped namespace.
+                 deletedList[1] points to the last record in the prev extent.  When the "current extent"
+                 changes, this value is updated.  !deletedList[1].isValid() when this value is not
+                 yet computed.
+        */
+        DiskLoc deletedList[Buckets];
+        // ofs 168 (8 byte aligned)
+        struct Stats {
+            // datasize and nrecords MUST Be adjacent code assumes!
+            long long datasize; // this includes padding, but not record headers
+            long long nrecords;
+        } stats;
+        int lastExtentSize;
+        int nIndexes;
+    private:
+        // ofs 192
+        IndexDetails _indexes[NIndexesBase];
+    public:
+        // ofs 352 (16 byte aligned)
+        int capped;
+        int max;                              // max # of objects for a capped table.  TODO: should this be 64 bit?
+        double paddingFactor;                 // 1.0 = no padding.
+        // ofs 386 (16)
+        int flags;
+        DiskLoc capExtent;
+        DiskLoc capFirstNewRecord;
+        unsigned short dataFileVersion;       // NamespaceDetails version.  So we can do backward compatibility in the future. See filever.h
+        unsigned short indexFileVersion;
+        unsigned long long multiKeyIndexBits;
+    private:
+        // ofs 400 (16)
+        unsigned long long reservedA;
+        long long extraOffset;                // where the $extra info is located (bytes relative to this)
+    public:
+        int indexBuildInProgress;   // 1 if in prog
+        unsigned reservedB;
+        // ofs 424 (8)
+        struct Capped2 {
+            unsigned long long cc2_ptr;       // see capped.cpp
+            unsigned fileNumber;
+        } capped2;
+        char reserved[60];
+        /*-------- end data 496 bytes */
+
+        explicit NamespaceDetails( const DiskLoc &loc, bool _capped );
+
+        class Extra {
             long long _next;
-		public:
+        public:
             IndexDetails details[NIndexesExtra];
-		private:
+        private:
             unsigned reserved2;
             unsigned reserved3;
-			Extra(const Extra&) { assert(false); }
-			Extra& operator=(const Extra& r) { assert(false); return *this; }
+            Extra(const Extra&) { assert(false); }
+            Extra& operator=(const Extra& r) { assert(false); return *this; }
         public:
             Extra() { }
-            long ofsFrom(NamespaceDetails *d) { 
+            long ofsFrom(NamespaceDetails *d) {
                 return ((char *) this) - ((char *) d);
             }
             void init() { memset(this, 0, sizeof(Extra)); }
-            Extra* next(NamespaceDetails *d) { 
+            Extra* next(NamespaceDetails *d) {
                 if( _next == 0 ) return 0;
                 return (Extra*) (((char *) d) + _next);
             }
-            void setNext(long ofs) { _next = ofs;  }
-            void copy(NamespaceDetails *d, const Extra& e) { 
+            void setNext(long ofs) { *getDur().writing(&_next) = ofs;  }
+            void copy(NamespaceDetails *d, const Extra& e) {
                 memcpy(this, &e, sizeof(Extra));
                 _next = 0;
             }
-        }; // Extra
-
-        Extra* extra() { 
+        };
+        Extra* extra() {
             if( extraOffset == 0 ) return 0;
             return (Extra *) (((char *) this) + extraOffset);
         }
-
-    public:
         /* add extra space for indexes when more than 10 */
         Extra* allocExtra(const char *ns, int nindexessofar);
-
         void copyingFrom(const char *thisns, NamespaceDetails *src); // must be called when renaming a NS to fix up extra
 
-        enum { NIndexesMax = 64 };
-
-        BOOST_STATIC_ASSERT( NIndexesMax <= NIndexesBase + NIndexesExtra*2 );
-        BOOST_STATIC_ASSERT( NIndexesMax <= 64 ); // multiKey bits
-		BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::ExtraOld) == 496 );
-		BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) == 496 );
-
         /* called when loaded from disk */
         void onLoad(const Namespace& k);
 
-        NamespaceDetails( const DiskLoc &loc, bool _capped );
-
-        DiskLoc firstExtent;
-        DiskLoc lastExtent;
-
-        /* NOTE: capped collections override the meaning of deleted list.  
-                 deletedList[0] points to a list of free records (DeletedRecord's) for all extents in
-                 the capped namespace.
-                 deletedList[1] points to the last record in the prev extent.  When the "current extent" 
-                 changes, this value is updated.  !deletedList[1].isValid() when this value is not 
-                 yet computed.
-        */
-        DiskLoc deletedList[Buckets];
+        /* dump info on this namespace.  for debugging. */
+        void dump(const Namespace& k);
 
+        /* dump info on all extents for this namespace.  for debugging. */
         void dumpExtents();
 
-        long long datasize;
-        long long nrecords;
-        int lastExtentSize;
-        int nIndexes;
-
-    private:
-        IndexDetails _indexes[NIndexesBase];
-
     private:
         Extent *theCapExtent() const { return capExtent.ext(); }
         void advanceCapExtent( const char *ns );
@@ -263,6 +215,7 @@ namespace mongo {
         DiskLoc cappedAlloc(const char *ns, int len);
         DiskLoc &cappedFirstDeletedInCurExtent();
         bool nextIsInCapExtent( const DiskLoc &dl ) const;
+
     public:
         DiskLoc& cappedListOfAllDeletedRecords() { return deletedList[0]; }
         DiskLoc& cappedLastDelRecLastExtent()    { return deletedList[1]; }
@@ -270,122 +223,79 @@ namespace mongo {
         bool capLooped() const { return capped && capFirstNewRecord.isValid();  }
         bool inCapExtent( const DiskLoc &dl ) const;
         void cappedCheckMigrate();
-        void cappedTruncateAfter(const char *ns, DiskLoc after, bool inclusive); /** remove rest of the capped collection from this point onward */
+        /**
+         * Truncate documents newer than the document at 'end' from the capped
+         * collection.  The collection cannot be completely emptied using this
+         * function.  An assertion will be thrown if that is attempted.
+         * @param inclusive - Truncate 'end' as well iff true
+         */
+        void cappedTruncateAfter(const char *ns, DiskLoc end, bool inclusive);
+        /** Remove all documents from the capped collection */
         void emptyCappedCollection(const char *ns);
-        
-        int capped;
-
-        int max; // max # of objects for a capped table.  TODO: should this be 64 bit? 
-        double paddingFactor; // 1.0 = no padding.
-        int flags;
-
-        DiskLoc capExtent;
-        DiskLoc capFirstNewRecord;
-
-        /* NamespaceDetails version.  So we can do backward compatibility in the future.
-		   See filever.h
-        */
-		unsigned short dataFileVersion;
-		unsigned short indexFileVersion;
 
-        unsigned long long multiKeyIndexBits;
-    private:
-        unsigned long long reservedA;
-        long long extraOffset; // where the $extra info is located (bytes relative to this)
-    public:
-        int backgroundIndexBuildInProgress; // 1 if in prog
-        char reserved[76];
-
-        /* when a background index build is in progress, we don't count the index in nIndexes until 
+        /* when a background index build is in progress, we don't count the index in nIndexes until
            complete, yet need to still use it in _indexRecord() - thus we use this function for that.
         */
-        int nIndexesBeingBuilt() const { return nIndexes + backgroundIndexBuildInProgress; }
+        int nIndexesBeingBuilt() const { return nIndexes + indexBuildInProgress; }
 
-        /* NOTE: be careful with flags.  are we manipulating them in read locks?  if so, 
+        /* NOTE: be careful with flags.  are we manipulating them in read locks?  if so,
                  this isn't thread safe.  TODO
         */
         enum NamespaceFlags {
             Flag_HaveIdIndex = 1 << 0 // set when we have _id index (ONLY if ensureIdIndex was called -- 0 if that has never been called)
         };
 
-        IndexDetails& idx(int idxNo, bool missingExpected = false ) {
-            if( idxNo < NIndexesBase ) 
-                return _indexes[idxNo];
-            Extra *e = extra();
-            if ( ! e ){
-                if ( missingExpected )
-                    throw MsgAssertionException( 13283 , "Missing Extra" );
-                massert(13282, "missing Extra", e);
-            }
-            int i = idxNo - NIndexesBase;
-            if( i >= NIndexesExtra ) {
-                e = e->next(this);
-                if ( ! e ){
-                    if ( missingExpected )
-                        throw MsgAssertionException( 13283 , "missing extra" );
-                    massert(13283, "missing Extra", e);
-                }
-                i -= NIndexesExtra;
-            }
-            return e->details[i];
-        }
-        IndexDetails& backgroundIdx() { 
-            DEV assert(backgroundIndexBuildInProgress);
+        IndexDetails& idx(int idxNo, bool missingExpected = false );
+
+        /** get the IndexDetails for the index currently being built in the background. (there is at most one) */
+        IndexDetails& inProgIdx() {
+            DEV assert(indexBuildInProgress);
             return idx(nIndexes);
         }
 
-        class IndexIterator { 
-            friend class NamespaceDetails;
-            int i;
-            int n;
-            NamespaceDetails *d;
-            IndexIterator(NamespaceDetails *_d) { 
-                d = _d;
-                i = 0;
-                n = d->nIndexes;
-            }
+        class IndexIterator {
         public:
             int pos() { return i; } // note this is the next one to come
             bool more() { return i < n; }
             IndexDetails& next() { return d->idx(i++); }
-        }; // IndexIterator
+        private:
+            friend class NamespaceDetails;
+            int i, n;
+            NamespaceDetails *d;
+            IndexIterator(NamespaceDetails *_d);
+        };
 
         IndexIterator ii() { return IndexIterator(this); }
 
-        /* hackish - find our index # in the indexes array
-        */
-        int idxNo(IndexDetails& idx) { 
-            IndexIterator i = ii();
-            while( i.more() ) {
-                if( &i.next() == &idx )
-                    return i.pos()-1;
-            }
-            massert( 10349 , "E12000 idxNo fails", false);
-            return -1;
-        }
+        /* hackish - find our index # in the indexes array */
+        int idxNo(IndexDetails& idx);
 
         /* multikey indexes are indexes where there are more than one key in the index
              for a single document. see multikey in wiki.
            for these, we have to do some dedup work on queries.
         */
-        bool isMultikey(int i) {
-            return (multiKeyIndexBits & (((unsigned long long) 1) << i)) != 0;
-        }
-        void setIndexIsMultikey(int i) { 
+        bool isMultikey(int i) const { return (multiKeyIndexBits & (((unsigned long long) 1) << i)) != 0; }
+        void setIndexIsMultikey(int i) {
             dassert( i < NIndexesMax );
-            multiKeyIndexBits |= (((unsigned long long) 1) << i);
+            unsigned long long x = ((unsigned long long) 1) << i;
+            if( multiKeyIndexBits & x ) return;
+            *getDur().writing(&multiKeyIndexBits) |= x;
         }
-        void clearIndexIsMultikey(int i) { 
+        void clearIndexIsMultikey(int i) {
             dassert( i < NIndexesMax );
-            multiKeyIndexBits &= ~(((unsigned long long) 1) << i);
+            unsigned long long x = ((unsigned long long) 1) << i;
+            if( (multiKeyIndexBits & x) == 0 ) return;
+            *getDur().writing(&multiKeyIndexBits) &= ~x;
         }
 
         /* add a new index.  does not add to system.indexes etc. - just to NamespaceDetails.
-           caller must populate returned object. 
+           caller must populate returned object.
          */
         IndexDetails& addIndex(const char *thisns, bool resetTransient=true);
 
-        void aboutToDeleteAnIndex() { flags &= ~Flag_HaveIdIndex;  }
+        void aboutToDeleteAnIndex() { 
+            *getDur().writing(&flags) = flags & ~Flag_HaveIdIndex;
+        }
 
         /* returns index of the first index in which the field is present. -1 if not present. */
         int fieldIsIndexed(const char *fieldName);
@@ -393,49 +303,35 @@ namespace mongo {
         void paddingFits() {
             double x = paddingFactor - 0.01;
             if ( x >= 1.0 )
-                paddingFactor = x;
+                getDur().setNoJournal(&paddingFactor, &x, sizeof(x));
         }
         void paddingTooSmall() {
             double x = paddingFactor + 0.6;
             if ( x <= 2.0 )
-                paddingFactor = x;
+                getDur().setNoJournal(&paddingFactor, &x, sizeof(x));
         }
 
-        //returns offset in indexes[]
-        int findIndexByName(const char *name) {
-            IndexIterator i = ii();
-            while( i.more() ) {
-                if ( strcmp(i.next().info.obj().getStringField("name"),name) == 0 )
-                    return i.pos()-1;
-            }
-            return -1;
-        }
+        // @return offset in indexes[]
+        int findIndexByName(const char *name);
+
+        // @return offset in indexes[]
+        int findIndexByKeyPattern(const BSONObj& keyPattern);
 
-        //returns offset in indexes[]
-        int findIndexByKeyPattern(const BSONObj& keyPattern) {
-            IndexIterator i = ii();
-            while( i.more() ) {
-                if( i.next().keyPattern() == keyPattern ) 
-                    return i.pos()-1;
-            }
-            return -1;
-        }
-        
         void findIndexByType( const string& name , vector<int>& matches ) {
             IndexIterator i = ii();
-            while ( i.more() ){
+            while ( i.more() ) {
                 if ( i.next().getSpec().getTypeName() == name )
                     matches.push_back( i.pos() - 1 );
             }
         }
 
-        /* @return -1 = not found 
+        /* @return -1 = not found
            generally id is first index, so not that expensive an operation (assuming present).
         */
         int findIdIndex() {
             IndexIterator i = ii();
             while( i.more() ) {
-                if( i.next().isIdIndex() ) 
+                if( i.next().isIdIndex() )
                     return i.pos()-1;
             }
             return -1;
@@ -451,25 +347,46 @@ namespace mongo {
 
         /* allocate a new record.  lenToAlloc includes headers. */
         DiskLoc alloc(const char *ns, int lenToAlloc, DiskLoc& extentLoc);
-
         /* add a given record to the deleted chains for this NS */
         void addDeletedRec(DeletedRecord *d, DiskLoc dloc);
-
         void dumpDeleted(set<DiskLoc> *extents = 0);
-
         // Start from firstExtent by default.
         DiskLoc firstRecord( const DiskLoc &startExtent = DiskLoc() ) const;
-
         // Start from lastExtent by default.
         DiskLoc lastRecord( const DiskLoc &startExtent = DiskLoc() ) const;
+        long long storageSize( int * numExtents = 0 , BSONArrayBuilder * extentInfo = 0 ) const;
+
+        int averageObjectSize() {
+            if ( stats.nrecords == 0 )
+                return 5;
+            return (int) (stats.datasize / stats.nrecords);
+        }
+
+        NamespaceDetails *writingWithoutExtra() {
+            return ( NamespaceDetails* ) getDur().writingPtr( this, sizeof( NamespaceDetails ) );
+        }
+        /** Make all linked Extra objects writeable as well */
+        NamespaceDetails *writingWithExtra();
 
-        long long storageSize( int * numExtents = 0 );
-        
     private:
         DiskLoc _alloc(const char *ns, int len);
         void maybeComplain( const char *ns, int len ) const;
         DiskLoc __stdAlloc(int len);
         void compact(); // combine adjacent deleted records
+        friend class NamespaceIndex;
+        struct ExtraOld {
+            // note we could use this field for more chaining later, so don't waste it:
+            unsigned long long reserved1;
+            IndexDetails details[NIndexesExtra];
+            unsigned reserved2;
+            unsigned reserved3;
+        };
+        /** Update cappedLastDelRecLastExtent() after capExtent changed in cappedTruncateAfter() */
+        void cappedTruncateLastDelUpdate();
+        BOOST_STATIC_ASSERT( NIndexesMax <= NIndexesBase + NIndexesExtra*2 );
+        BOOST_STATIC_ASSERT( NIndexesMax <= 64 ); // multiKey bits
+        BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::ExtraOld) == 496 );
+        BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) == 496 );
     }; // NamespaceDetails
 #pragma pack()
 
@@ -486,7 +403,7 @@ namespace mongo {
        todo: cleanup code, need abstractions and separation
     */
     class NamespaceDetailsTransient : boost::noncopyable {
-		BOOST_STATIC_ASSERT( sizeof(NamespaceDetails) == 496 );
+        BOOST_STATIC_ASSERT( sizeof(NamespaceDetails) == 496 );
 
         /* general ------------------------------------------------------------- */
     private:
@@ -494,18 +411,18 @@ namespace mongo {
         void reset();
         static std::map< string, shared_ptr< NamespaceDetailsTransient > > _map;
     public:
-        NamespaceDetailsTransient(const char *ns) : _ns(ns), _keysComputed(false), _qcWriteCount(){ }
+        NamespaceDetailsTransient(const char *ns) : _ns(ns), _keysComputed(false), _qcWriteCount() { }
         /* _get() is not threadsafe -- see get_inlock() comments */
         static NamespaceDetailsTransient& _get(const char *ns);
         /* use get_w() when doing write operations */
-        static NamespaceDetailsTransient& get_w(const char *ns) { 
+        static NamespaceDetailsTransient& get_w(const char *ns) {
             DEV assertInWriteLock();
             return _get(ns);
         }
         void addedIndex() { reset(); }
         void deletedIndex() { reset(); }
         /* Drop cached information on all namespaces beginning with the specified prefix.
-           Can be useful as index namespaces share the same start as the regular collection. 
+           Can be useful as index namespaces share the same start as the regular collection.
            SLOW - sequential scan of all NamespaceDetailsTransient objects */
         static void clearForPrefix(const char *prefix);
 
@@ -531,11 +448,11 @@ namespace mongo {
         map<const IndexDetails*,IndexSpec> _indexSpecs;
         static mongo::mutex _isMutex;
     public:
-        const IndexSpec& getIndexSpec( const IndexDetails * details ){
+        const IndexSpec& getIndexSpec( const IndexDetails * details ) {
             IndexSpec& spec = _indexSpecs[details];
-            if ( ! spec._finishedInit ){
+            if ( ! spec._finishedInit ) {
                 scoped_lock lk(_isMutex);
-                if ( ! spec._finishedInit ){
+                if ( ! spec._finishedInit ) {
                     spec.reset( details );
                     assert( spec._finishedInit );
                 }
@@ -591,7 +508,7 @@ namespace mongo {
 
     public:
         NamespaceIndex(const string &dir, const string &database) :
-          ht( 0 ), dir_( dir ), database_( database ) {}
+            ht( 0 ), dir_( dir ), database_( database ) {}
 
         /* returns true if new db will be created if we init lazily */
         bool exists() const;
@@ -600,13 +517,13 @@ namespace mongo {
 
         void add_ns(const char *ns, DiskLoc& loc, bool capped) {
             NamespaceDetails details( loc, capped );
-			add_ns( ns, details );
+            add_ns( ns, details );
         }
-		void add_ns( const char *ns, const NamespaceDetails &details ) {
+        void add_ns( const char *ns, const NamespaceDetails &details ) {
             init();
             Namespace n(ns);
             uassert( 10081 , "too many namespaces/collections", ht->put(n, details));
-		}
+        }
 
         /* just for diagnostics */
         /*size_t detailsOffset(NamespaceDetails *d) {
@@ -625,20 +542,7 @@ namespace mongo {
             return d;
         }
 
-        void kill_ns(const char *ns) {
-            if ( !ht )
-                return;
-            Namespace n(ns);
-            ht->kill(n);
-
-            for( int i = 0; i<=1; i++ ) {
-                try {
-                    Namespace extra(n.extraName(i).c_str());
-                    ht->kill(extra);
-                }
-                catch(DBException&) { }
-            }
-        }
+        void kill_ns(const char *ns);
 
         bool find(const char *ns, DiskLoc& loc) {
             NamespaceDetails *l = details(ns);
@@ -658,12 +562,12 @@ namespace mongo {
         NamespaceDetails::Extra* newExtra(const char *ns, int n, NamespaceDetails *d);
 
         boost::filesystem::path path() const;
-    private:
 
+    private:
         void maybeMkdir() const;
-        
-        MMF f;
-        HashTable<Namespace,NamespaceDetails,MMF::Pointer> *ht;
+
+        MongoMMF f;
+        HashTable<Namespace,NamespaceDetails> *ht;
         string dir_;
         string database_;
     };
@@ -675,4 +579,31 @@ namespace mongo {
     // (Arguments should include db name)
     void renameNamespace( const char *from, const char *to );
 
+    // "database.a.b.c" -> "database"
+    inline void nsToDatabase(const char *ns, char *database) {
+        const char *p = ns;
+        char *q = database;
+        while ( *p != '.' ) {
+            if ( *p == 0 )
+                break;
+            *q++ = *p++;
+        }
+        *q = 0;
+        if (q-database>=MaxDatabaseNameLen) {
+            log() << "nsToDatabase: ns too long. terminating, buf overrun condition" << endl;
+            dbexit( EXIT_POSSIBLE_CORRUPTION );
+        }
+    }
+    inline string nsToDatabase(const char *ns) {
+        char buf[MaxDatabaseNameLen];
+        nsToDatabase(ns, buf);
+        return buf;
+    }
+    inline string nsToDatabase(const string& ns) {
+        size_t i = ns.find( '.' );
+        if ( i == string::npos )
+            return ns;
+        return ns.substr( 0 , i );
+    }
+
 } // namespace mongo
diff --git a/db/nonce.cpp b/db/nonce.cpp
index 519cfaa..6f35c79 100644
--- a/db/nonce.cpp
+++ b/db/nonce.cpp
@@ -17,22 +17,25 @@
 
 #include "pch.h"
 #include "nonce.h"
+#include "../util/time_support.h"
 
 extern int do_md5_test(void);
 
 namespace mongo {
-    
-	Security::Security() {
-		static int n;
-		massert( 10352 , "Security is a singleton class", ++n == 1);
-		init(); 
-	}
 
-    void Security::init(){
-		if( _initialized ) return;
-		_initialized = true;
+    BOOST_STATIC_ASSERT( sizeof(nonce) == 8 );
 
-#if defined(__linux__) || defined(__sunos__)
+    Security::Security() {
+        static int n;
+        massert( 10352 , "Security is a singleton class", ++n == 1);
+        init();
+    }
+
+    void Security::init() {
+        if( _initialized ) return;
+        _initialized = true;
+
+#if defined(__linux__) || defined(__sunos__) || defined(__APPLE__)
         _devrandom = new ifstream("/dev/urandom", ios::binary|ios::in);
         massert( 10353 ,  "can't open dev/urandom", _devrandom->is_open() );
 #elif defined(_WIN32)
@@ -40,36 +43,41 @@ namespace mongo {
 #else
         srandomdev();
 #endif
-        assert( sizeof(nonce) == 8 );
-        
+
 #ifndef NDEBUG
         if ( do_md5_test() )
-	    massert( 10354 , "md5 unit test fails", false);
+            massert( 10354 , "md5 unit test fails", false);
 #endif
     }
-    
-    nonce Security::getNonce(){
+
+    nonce Security::getNonce() {
         static mongo::mutex m("getNonce");
         scoped_lock lk(m);
+        
+        if ( ! _initialized )
+            init();
 
-		/* question/todo: /dev/random works on OS X.  is it better 
-		   to use that than random() / srandom()?
-		*/
+        /* question/todo: /dev/random works on OS X.  is it better
+           to use that than random() / srandom()?
+        */
 
         nonce n;
-#if defined(__linux__) || defined(__sunos__)
+#if defined(__linux__) || defined(__sunos__) || defined(__APPLE__)
         _devrandom->read((char*)&n, sizeof(n));
         massert( 10355 , "devrandom failed", !_devrandom->fail());
 #elif defined(_WIN32)
-        n = (((unsigned long long)rand())<<32) | rand();
+        unsigned a=0, b=0;
+        assert( rand_s(&a) == 0 );
+        assert( rand_s(&b) == 0 );
+        n = (((unsigned long long)a)<<32) | b;
 #else
         n = (((unsigned long long)random())<<32) | random();
 #endif
         return n;
     }
     unsigned getRandomNumber() { return (unsigned) security.getNonce(); }
-    
-	bool Security::_initialized;
+
+    bool Security::_initialized;
     Security security;
-        
+
 } // namespace mongo
diff --git a/db/nonce.h b/db/nonce.h
index 593931f..21592ab 100644
--- a/db/nonce.h
+++ b/db/nonce.h
@@ -20,23 +20,23 @@
 namespace mongo {
 
     typedef unsigned long long nonce;
-    
+
     struct Security {
         Security();
 
         nonce getNonce();
 
-		/** safe during global var initialization */
-		nonce getNonceInitSafe() { 
-			init();
-			return getNonce();
-		}
-	private:
+        /** safe during global var initialization */
+        nonce getNonceInitSafe() {
+            init();
+            return getNonce();
+        }
+    private:
         ifstream *_devrandom;
-		static bool _initialized;
-		void init(); // can call more than once
+        static bool _initialized;
+        void init(); // can call more than once
     };
-    
+
     extern Security security;
-        
+
 } // namespace mongo
diff --git a/db/oplog.cpp b/db/oplog.cpp
index 93800c7..1557cbd 100644
--- a/db/oplog.cpp
+++ b/db/oplog.cpp
@@ -22,18 +22,19 @@
 #include "repl.h"
 #include "commands.h"
 #include "repl/rs.h"
+#include "stats/counters.h"
 
 namespace mongo {
 
     void logOpForSharding( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt );
 
-    int __findingStartInitialTimeout = 5; // configurable for testing    
+    int __findingStartInitialTimeout = 5; // configurable for testing
 
     // cached copies of these...so don't rename them, drop them, etc.!!!
     static NamespaceDetails *localOplogMainDetails = 0;
     static Database *localDB = 0;
     static NamespaceDetails *rsOplogDetails = 0;
-    void oplogCheckCloseDatabase( Database * db ){
+    void oplogCheckCloseDatabase( Database * db ) {
         localDB = 0;
         localOplogMainDetails = 0;
         rsOplogDetails = 0;
@@ -44,10 +45,10 @@ namespace mongo {
         uassert(13288, "replSet error write op to db before replSet initialized", str::startsWith(ns, "local.") || *opstr == 'n');
     }
 
-    /** write an op to the oplog that is already built. 
+    /** write an op to the oplog that is already built.
         todo : make _logOpRS() call this so we don't repeat ourself?
         */
-    void _logOpObjRS(const BSONObj& op) { 
+    void _logOpObjRS(const BSONObj& op) {
         DEV assertInWriteLock();
 
         const OpTime ts = op["ts"]._opTime();
@@ -62,11 +63,11 @@ namespace mongo {
                 rsOplogDetails = nsdetails(logns);
                 massert(13389, "local.oplog.rs missing. did you drop it? if so restart server", rsOplogDetails);
             }
-            Client::Context ctx( "" , localDB, false );
+            Client::Context ctx( logns , localDB, false );
             {
                 int len = op.objsize();
                 Record *r = theDataFileMgr.fast_oplog_insert(rsOplogDetails, logns, len);
-                memcpy(r->data, op.objdata(), len);
+                memcpy(getDur().writingPtr(r->data, len), op.objdata(), len);
             }
             /* todo: now() has code to handle clock skew.  but if the skew server to server is large it will get unhappy.
                      this code (or code in now() maybe) should be improved.
@@ -82,11 +83,42 @@ namespace mongo {
         }
     }
 
+    /** given a BSON object, create a new one at dst which is the existing (partial) object
+        with a new object element appended at the end with fieldname "o".
+
+        @param partial already build object with everything except the o member.  e.g. something like:
+               { ts:..., ns:..., os2:... }
+        @param o a bson object to be added with fieldname "o"
+        @dst   where to put the newly built combined object.  e.g. ends up as something like:
+               { ts:..., ns:..., os2:..., o:... }
+    */
+    void append_O_Obj(char *dst, const BSONObj& partial, const BSONObj& o) {
+        const int size1 = partial.objsize() - 1;  // less the EOO char
+        const int oOfs = size1+3;                 // 3 = byte BSONOBJTYPE + byte 'o' + byte \0
+
+        void *p = getDur().writingPtr(dst, oOfs+o.objsize()+1);
+
+        memcpy(p, partial.objdata(), size1);
+
+        // adjust overall bson object size for the o: field
+        *(static_cast<unsigned*>(p)) += o.objsize() + 1/*fieldtype byte*/ + 2/*"o" fieldname*/;
+
+        char *b = static_cast<char *>(p);
+        b += size1;
+        *b++ = (char) Object;
+        *b++ = 'o'; // { o : ... }
+        *b++ = 0;   // null terminate "o" fieldname
+        memcpy(b, o.objdata(), o.objsize());
+        b += o.objsize();
+        *b = EOO;
+    }
+
     static void _logOpRS(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) {
         DEV assertInWriteLock();
+        // ^- static is safe as we are in write lock
         static BufBuilder bufbuilder(8*1024);
-        
-        if ( strncmp(ns, "local.", 6) == 0 ){
+
+        if ( strncmp(ns, "local.", 6) == 0 ) {
             if ( strncmp(ns, "local.slaves", 12) == 0 )
                 resetSlaveCache();
             return;
@@ -94,15 +126,15 @@ namespace mongo {
 
         const OpTime ts = OpTime::now();
 
-        long long hNew;
-        if( theReplSet ) { 
+        long long hashNew;
+        if( theReplSet ) {
             massert(13312, "replSet error : logOp() but not primary?", theReplSet->box.getState().primary());
-            hNew = (theReplSet->lastH * 131 + ts.asLL()) * 17 + theReplSet->selfId();
+            hashNew = (theReplSet->lastH * 131 + ts.asLL()) * 17 + theReplSet->selfId();
         }
         else {
             // must be initiation
             assert( *ns == 0 );
-            hNew = 0;
+            hashNew = 0;
         }
 
         /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
@@ -113,7 +145,7 @@ namespace mongo {
         BSONObjBuilder b(bufbuilder);
 
         b.appendTimestamp("ts", ts.asDate());
-        b.append("h", hNew);
+        b.append("h", hashNew);
 
         b.append("op", opstr);
         b.append("ns", ns);
@@ -136,7 +168,7 @@ namespace mongo {
                 rsOplogDetails = nsdetails(logns);
                 massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", rsOplogDetails);
             }
-            Client::Context ctx( "" , localDB, false );
+            Client::Context ctx( logns , localDB, false );
             r = theDataFileMgr.fast_oplog_insert(rsOplogDetails, logns, len);
             /* todo: now() has code to handle clock skew.  but if the skew server to server is large it will get unhappy.
                      this code (or code in now() maybe) should be improved.
@@ -147,22 +179,13 @@ namespace mongo {
                     log() << "replSet " << theReplSet->isPrimary() << rsLog;
                 }
                 theReplSet->lastOpTimeWritten = ts;
-                theReplSet->lastH = hNew;
+                theReplSet->lastH = hashNew;
                 ctx.getClient()->setLastOp( ts.asDate() );
             }
         }
 
-        char *p = r->data;
-        memcpy(p, partial.objdata(), posz);
-        *((unsigned *)p) += obj.objsize() + 1 + 2;
-        p += posz - 1;
-        *p++ = (char) Object;
-        *p++ = 'o';
-        *p++ = 0;
-        memcpy(p, obj.objdata(), obj.objsize());
-        p += obj.objsize();
-        *p = EOO;
-        
+        append_O_Obj(r->data, partial, obj);
+
         if ( logLevel >= 6 ) {
             BSONObj temp(r);
             log( 6 ) << "logOp:" << temp << endl;
@@ -192,9 +215,9 @@ namespace mongo {
     static void _logOpOld(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) {
         DEV assertInWriteLock();
         static BufBuilder bufbuilder(8*1024);
-        
-        if ( strncmp(ns, "local.", 6) == 0 ){
-            if ( strncmp(ns, "local.slaves", 12) == 0 ){
+
+        if ( strncmp(ns, "local.", 6) == 0 ) {
+            if ( strncmp(ns, "local.slaves", 12) == 0 ) {
                 resetSlaveCache();
             }
             return;
@@ -202,7 +225,7 @@ namespace mongo {
 
         const OpTime ts = OpTime::now();
         Client::Context context;
-        
+
         /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
            instead we do a single copy to the destination position in the memory mapped file.
         */
@@ -216,9 +239,10 @@ namespace mongo {
             b.appendBool("b", *bb);
         if ( o2 )
             b.append("o2", *o2);
-        BSONObj partial = b.done();
-        int posz = partial.objsize();
-        int len = posz + obj.objsize() + 1 + 2 /*o:*/;
+        BSONObj partial = b.done(); // partial is everything except the o:... part.
+
+        int po_sz = partial.objsize();
+        int len = po_sz + obj.objsize() + 1 + 2 /*o:*/;
 
         Record *r;
         if( logNS == 0 ) {
@@ -230,25 +254,18 @@ namespace mongo {
                 localOplogMainDetails = nsdetails(logNS);
                 assert( localOplogMainDetails );
             }
-            Client::Context ctx( "" , localDB, false );
+            Client::Context ctx( logNS , localDB, false );
             r = theDataFileMgr.fast_oplog_insert(localOplogMainDetails, logNS, len);
-        } else {
+        }
+        else {
             Client::Context ctx( logNS, dbpath, 0, false );
             assert( nsdetails( logNS ) );
+            // first we allocate the space, then we fill it below.
             r = theDataFileMgr.fast_oplog_insert( nsdetails( logNS ), logNS, len);
         }
 
-        char *p = r->data;
-        memcpy(p, partial.objdata(), posz);
-        *((unsigned *)p) += obj.objsize() + 1 + 2;
-        p += posz - 1;
-        *p++ = (char) Object;
-        *p++ = 'o';
-        *p++ = 0;
-        memcpy(p, obj.objdata(), obj.objsize());
-        p += obj.objsize();
-        *p = EOO;
-        
+        append_O_Obj(r->data, partial, obj);
+
         context.getClient()->setLastOp( ts.asDate() );
 
         if ( logLevel >= 6 ) {
@@ -259,17 +276,17 @@ namespace mongo {
     }
 
     static void (*_logOp)(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) = _logOpOld;
-    void newReplUp() { 
+    void newReplUp() {
         replSettings.master = true;
-        _logOp = _logOpRS; 
+        _logOp = _logOpRS;
     }
-    void newRepl() { 
+    void newRepl() {
         replSettings.master = true;
-        _logOp = _logOpUninitialized; 
+        _logOp = _logOpUninitialized;
     }
     void oldRepl() { _logOp = _logOpOld; }
 
-    void logKeepalive() { 
+    void logKeepalive() {
         _logOp("n", "", 0, BSONObj(), 0, 0);
     }
     void logOpComment(const BSONObj& obj) {
@@ -289,13 +306,10 @@ namespace mongo {
     void logOp(const char *opstr, const char *ns, const BSONObj& obj, BSONObj *patt, bool *b) {
         if ( replSettings.master ) {
             _logOp(opstr, ns, 0, obj, patt, b);
-            // why? :
-            //char cl[ 256 ];
-            //nsToDatabase( ns, cl );
         }
-        
+
         logOpForSharding( opstr , ns , obj , patt );
-    }    
+    }
 
     void createOplog() {
         dblock lk;
@@ -307,15 +321,15 @@ namespace mongo {
             ns = rsoplog;
 
         Client::Context ctx(ns);
-        
+
         NamespaceDetails * nsd = nsdetails( ns );
 
         if ( nsd ) {
-            
-            if ( cmdLine.oplogSize != 0 ){
+
+            if ( cmdLine.oplogSize != 0 ) {
                 int o = (int)(nsd->storageSize() / ( 1024 * 1024 ) );
                 int n = (int)(cmdLine.oplogSize / ( 1024 * 1024 ) );
-                if ( n != o ){
+                if ( n != o ) {
                     stringstream ss;
                     ss << "cmdline oplogsize (" << n << ") different than existing (" << o << ") see: http://dochub.mongodb.org/core/increase-oplog";
                     log() << ss.str() << endl;
@@ -332,19 +346,19 @@ namespace mongo {
             }
             return;
         }
-        
+
         /* create an oplog collection, if it doesn't yet exist. */
         BSONObjBuilder b;
         double sz;
         if ( cmdLine.oplogSize != 0 )
             sz = (double)cmdLine.oplogSize;
         else {
-			/* not specified. pick a default size */
+            /* not specified. pick a default size */
             sz = 50.0 * 1000 * 1000;
             if ( sizeof(int *) >= 8 ) {
 #if defined(__APPLE__)
-				// typically these are desktops (dev machines), so keep it smallish
-				sz = (256-64) * 1000 * 1000;
+                // typically these are desktops (dev machines), so keep it smallish
+                sz = (256-64) * 1000 * 1000;
 #else
                 sz = 990.0 * 1000 * 1000;
                 boost::intmax_t free = freeSpace(); //-1 if call not supported.
@@ -356,7 +370,7 @@ namespace mongo {
         }
 
         log() << "******" << endl;
-        log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB... (use --oplogSize to change)" << endl;
+        log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB..." << endl;
 
         b.append("size", sz);
         b.appendBool("capped", 1);
@@ -366,7 +380,7 @@ namespace mongo {
         BSONObj o = b.done();
         userCreateNS(ns, o, err, false);
         if( !rs )
-            logOp( "n", "dummy", BSONObj() );
+            logOp( "n", "", BSONObj() );
 
         /* sync here so we don't get any surprising lag later when we try to sync */
         MemoryMappedFile::flushAll(true);
@@ -394,8 +408,8 @@ namespace mongo {
     void pretouchN(vector<BSONObj>& v, unsigned a, unsigned b) {
         DEV assert( !dbMutex.isWriteLocked() );
 
-        Client *c = &cc();
-        if( c == 0 ) { 
+        Client *c = currentClient.get();
+        if( c == 0 ) {
             Client::initThread("pretouchN");
             c = &cc();
         }
@@ -413,7 +427,7 @@ namespace mongo {
                 continue;
             /* todo : other operations */
 
-            try { 
+            try {
                 BSONObj o = op.getObjectField(which);
                 BSONElement _id;
                 if( o.getObjectID(_id) ) {
@@ -426,7 +440,7 @@ namespace mongo {
                         _dummy_z += result.objsize(); // touch
                 }
             }
-            catch( DBException& e ) { 
+            catch( DBException& e ) {
                 log() << "ignoring assertion in pretouchN() " << a << ' ' << b << ' ' << i << ' ' << e.toString() << endl;
             }
         }
@@ -447,7 +461,7 @@ namespace mongo {
             return;
         /* todo : other operations */
 
-        try { 
+        try {
             BSONObj o = op.getObjectField(which);
             BSONElement _id;
             if( o.getObjectID(_id) ) {
@@ -461,15 +475,17 @@ namespace mongo {
                     _dummy_z += result.objsize(); // touch
             }
         }
-        catch( DBException& ) { 
+        catch( DBException& ) {
             log() << "ignoring assertion in pretouchOperation()" << endl;
         }
     }
 
-    void applyOperation_inlock(const BSONObj& op){
-        if( logLevel >= 6 ) 
+    void applyOperation_inlock(const BSONObj& op , bool fromRepl ) {
+        OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters;
+
+        if( logLevel >= 6 )
             log() << "applying op: " << op << endl;
-        
+
         assertInWriteLock();
 
         OpDebug debug;
@@ -479,6 +495,8 @@ namespace mongo {
         const char *opType = op.getStringField("op");
 
         if ( *opType == 'i' ) {
+            opCounters->gotInsert();
+
             const char *p = strchr(ns, '.');
             if ( p && strcmp(p, ".system.indexes") == 0 ) {
                 // updates aren't allowed for indexes -- so we will do a regular insert. if index already
@@ -499,11 +517,11 @@ namespace mongo {
                 else {
                     BSONObjBuilder b;
                     b.append(_id);
-                    
+
                     /* erh 10/16/2009 - this is probably not relevant any more since its auto-created, but not worth removing */
-                    RARELY ensureHaveIdIndex(ns); // otherwise updates will be slow 
+                    RARELY ensureHaveIdIndex(ns); // otherwise updates will be slow
 
-                    /* todo : it may be better to do an insert here, and then catch the dup key exception and do update 
+                    /* todo : it may be better to do an insert here, and then catch the dup key exception and do update
                               then.  very few upserts will not be inserts...
                               */
                     updateObjects(ns, o, b.done(), true, false, false , debug );
@@ -511,10 +529,14 @@ namespace mongo {
             }
         }
         else if ( *opType == 'u' ) {
+            opCounters->gotUpdate();
+
             RARELY ensureHaveIdIndex(ns); // otherwise updates will be super slow
             updateObjects(ns, o, op.getObjectField("o2"), /*upsert*/ op.getBoolField("b"), /*multi*/ false, /*logop*/ false , debug );
         }
         else if ( *opType == 'd' ) {
+            opCounters->gotDelete();
+
             if ( opType[1] == 0 )
                 deleteObjects(ns, o, op.getBoolField("b"));
             else
@@ -523,7 +545,9 @@ namespace mongo {
         else if ( *opType == 'n' ) {
             // no op
         }
-        else if ( *opType == 'c' ){
+        else if ( *opType == 'c' ) {
+            opCounters->gotCommand();
+
             BufBuilder bb;
             BSONObjBuilder ob;
             _runCommands(ns, o, bb, ob, true, 0);
@@ -533,9 +557,9 @@ namespace mongo {
             ss << "unknown opType [" << opType << "]";
             throw MsgAssertionException( 13141 , ss.str() );
         }
-        
+
     }
-    
+
     class ApplyOpsCmd : public Command {
     public:
         virtual bool slaveOk() const { return false; }
@@ -545,17 +569,18 @@ namespace mongo {
             help << "examples: { applyOps : [ ] , preCondition : [ { ns : ... , q : ... , res : ... } ] }";
         }
         virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            
-            if ( cmdObj.firstElement().type() != Array ){
+
+            if ( cmdObj.firstElement().type() != Array ) {
                 errmsg = "ops has to be an array";
                 return false;
             }
-            
+
             BSONObj ops = cmdObj.firstElement().Obj();
-            
-            { // check input
+
+            {
+                // check input
                 BSONObjIterator i( ops );
-                while ( i.more() ){
+                while ( i.more() ) {
                     BSONElement e = i.next();
                     if ( e.type() == Object )
                         continue;
@@ -564,16 +589,16 @@ namespace mongo {
                     return false;
                 }
             }
-            
-            if ( cmdObj["preCondition"].type() == Array ){
+
+            if ( cmdObj["preCondition"].type() == Array ) {
                 BSONObjIterator i( cmdObj["preCondition"].Obj() );
-                while ( i.more() ){
+                while ( i.more() ) {
                     BSONObj f = i.next().Obj();
-                    
+
                     BSONObj realres = db.findOne( f["ns"].String() , f["q"].Obj() );
-                    
+
                     Matcher m( f["res"].Obj() );
-                    if ( ! m.matches( realres ) ){
+                    if ( ! m.matches( realres ) ) {
                         result.append( "got" , realres );
                         result.append( "whatFailed" , f );
                         errmsg = "pre-condition failed";
@@ -581,23 +606,32 @@ namespace mongo {
                     }
                 }
             }
-            
+
             // apply
             int num = 0;
             BSONObjIterator i( ops );
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
-                applyOperation_inlock( e.Obj() );
+                applyOperation_inlock( e.Obj() , false );
                 num++;
             }
 
             result.append( "applied" , num );
 
+            if ( ! fromRepl ) {
+                // We want this applied atomically on slaves
+                // so we re-wrap without the pre-condition for speed
+
+                string tempNS = str::stream() << dbname << ".$cmd";
+
+                logOp( "c" , tempNS.c_str() , cmdObj.firstElement().wrap() );
+            }
+
             return true;
         }
 
         DBDirectClient db;
-        
+
     } applyOpsCmd;
 
 }
diff --git a/db/oplog.h b/db/oplog.h
index 34c345f..d9073ab 100644
--- a/db/oplog.h
+++ b/db/oplog.h
@@ -16,7 +16,7 @@
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-/* 
+/*
 
      local.oplog.$main is the default
 */
@@ -30,6 +30,7 @@
 #include "queryoptimizer.h"
 #include "../client/dbclient.h"
 #include "../util/optime.h"
+#include "../util/timer.h"
 
 namespace mongo {
 
@@ -38,7 +39,7 @@ namespace mongo {
     void _logOpObjRS(const BSONObj& op);
 
     /** Write operation to the log (local.oplog.$main)
-      
+
        @param opstr
         "i" insert
         "u" update
@@ -47,89 +48,88 @@ namespace mongo {
         "n" no-op
         "db" declares presence of a database (ns is set to the db name + '.')
 
-       See _logOp() in oplog.cpp for more details.   
+       See _logOp() in oplog.cpp for more details.
     */
     void logOp(const char *opstr, const char *ns, const BSONObj& obj, BSONObj *patt = 0, bool *b = 0);
 
     void logKeepalive();
 
-    /** puts obj in the oplog as a comment (a no-op).  Just for diags. 
-        convention is 
+    /** puts obj in the oplog as a comment (a no-op).  Just for diags.
+        convention is
           { msg : "text", ... }
     */
     void logOpComment(const BSONObj& obj);
 
     void oplogCheckCloseDatabase( Database * db );
-    
-    extern int __findingStartInitialTimeout; // configurable for testing    
+
+    extern int __findingStartInitialTimeout; // configurable for testing
 
     class FindingStartCursor {
     public:
-        FindingStartCursor( const QueryPlan & qp ) : 
-        _qp( qp ),
-        _findingStart( true ),
-        _findingStartMode(),
-        _findingStartTimer( 0 ),
-        _findingStartCursor( 0 )
+        FindingStartCursor( const QueryPlan & qp ) :
+            _qp( qp ),
+            _findingStart( true ),
+            _findingStartMode(),
+            _findingStartTimer( 0 )
         { init(); }
         bool done() const { return !_findingStart; }
         shared_ptr<Cursor> cRelease() { return _c; }
         void next() {
-            if ( !_findingStartCursor || !_findingStartCursor->c->ok() ) {
+            if ( !_findingStartCursor || !_findingStartCursor->ok() ) {
                 _findingStart = false;
                 _c = _qp.newCursor(); // on error, start from beginning
                 destroyClientCursor();
                 return;
             }
             switch( _findingStartMode ) {
-                case Initial: {
-                    if ( !_matcher->matches( _findingStartCursor->c->currKey(), _findingStartCursor->c->currLoc() ) ) {
-                        _findingStart = false; // found first record out of query range, so scan normally
-                        _c = _qp.newCursor( _findingStartCursor->c->currLoc() );
-                        destroyClientCursor();
-                        return;
-                    }
-                    _findingStartCursor->c->advance();
-                    RARELY {
-                        if ( _findingStartTimer.seconds() >= __findingStartInitialTimeout ) {
-                            createClientCursor( startLoc( _findingStartCursor->c->currLoc() ) );
-                            _findingStartMode = FindExtent;
-                            return;
-                        }
-                    }
+            case Initial: {
+                if ( !_matcher->matches( _findingStartCursor->currKey(), _findingStartCursor->currLoc() ) ) {
+                    _findingStart = false; // found first record out of query range, so scan normally
+                    _c = _qp.newCursor( _findingStartCursor->currLoc() );
+                    destroyClientCursor();
                     return;
                 }
-                case FindExtent: {
-                    if ( !_matcher->matches( _findingStartCursor->c->currKey(), _findingStartCursor->c->currLoc() ) ) {
-                        _findingStartMode = InExtent;
-                        return;
-                    }
-                    DiskLoc prev = prevLoc( _findingStartCursor->c->currLoc() );
-                    if ( prev.isNull() ) { // hit beginning, so start scanning from here
-                        createClientCursor();
-                        _findingStartMode = InExtent;
+                _findingStartCursor->advance();
+                RARELY {
+                    if ( _findingStartTimer.seconds() >= __findingStartInitialTimeout ) {
+                        createClientCursor( startLoc( _findingStartCursor->currLoc() ) );
+                        _findingStartMode = FindExtent;
                         return;
                     }
-                    // There might be a more efficient implementation than creating new cursor & client cursor each time,
-                    // not worrying about that for now
-                    createClientCursor( prev );
+                }
+                return;
+            }
+            case FindExtent: {
+                if ( !_matcher->matches( _findingStartCursor->currKey(), _findingStartCursor->currLoc() ) ) {
+                    _findingStartMode = InExtent;
                     return;
                 }
-                case InExtent: {
-                    if ( _matcher->matches( _findingStartCursor->c->currKey(), _findingStartCursor->c->currLoc() ) ) {
-                        _findingStart = false; // found first record in query range, so scan normally
-                        _c = _qp.newCursor( _findingStartCursor->c->currLoc() );
-                        destroyClientCursor();
-                        return;
-                    }
-                    _findingStartCursor->c->advance();
+                DiskLoc prev = prevLoc( _findingStartCursor->currLoc() );
+                if ( prev.isNull() ) { // hit beginning, so start scanning from here
+                    createClientCursor();
+                    _findingStartMode = InExtent;
                     return;
                 }
-                default: {
-                    massert( 12600, "invalid _findingStartMode", false );
+                // There might be a more efficient implementation than creating new cursor & client cursor each time,
+                // not worrying about that for now
+                createClientCursor( prev );
+                return;
+            }
+            case InExtent: {
+                if ( _matcher->matches( _findingStartCursor->currKey(), _findingStartCursor->currLoc() ) ) {
+                    _findingStart = false; // found first record in query range, so scan normally
+                    _c = _qp.newCursor( _findingStartCursor->currLoc() );
+                    destroyClientCursor();
+                    return;
                 }
-            }                
-        }     
+                _findingStartCursor->advance();
+                return;
+            }
+            default: {
+                massert( 12600, "invalid _findingStartMode", false );
+            }
+            }
+        }
         bool prepareToYield() {
             if ( _findingStartCursor ) {
                 return _findingStartCursor->prepareToYield( _yieldData );
@@ -139,10 +139,10 @@ namespace mongo {
         void recoverFromYield() {
             if ( _findingStartCursor ) {
                 if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
-                    _findingStartCursor = 0;
+                    _findingStartCursor.reset( 0 );
                 }
             }
-        }        
+        }
     private:
         enum FindingStartMode { Initial, FindExtent, InExtent };
         const QueryPlan &_qp;
@@ -150,7 +150,7 @@ namespace mongo {
         FindingStartMode _findingStartMode;
         auto_ptr< CoveredIndexMatcher > _matcher;
         Timer _findingStartTimer;
-        ClientCursor * _findingStartCursor;
+        ClientCursor::CleanupPointer _findingStartCursor;
         shared_ptr<Cursor> _c;
         ClientCursor::YieldData _yieldData;
         DiskLoc startLoc( const DiskLoc &rec ) {
@@ -162,7 +162,7 @@ namespace mongo {
             // doesn't matter if we start the extent scan with capFirstNewRecord.
             return _qp.nsd()->capFirstNewRecord;
         }
-        
+
         // should never have an empty extent in the oplog, so don't worry about that case
         DiskLoc prevLoc( const DiskLoc &rec ) {
             Extent *e = rec.rec()->myExtent( rec );
@@ -173,7 +173,8 @@ namespace mongo {
                     e = e->xprev.ext();
                 if ( e->myLoc != _qp.nsd()->capExtent )
                     return e->firstRecord;
-            } else {
+            }
+            else {
                 if ( !e->xprev.isNull() ) {
                     e = e->xprev.ext();
                     return e->firstRecord;
@@ -183,19 +184,16 @@ namespace mongo {
         }
         void createClientCursor( const DiskLoc &startLoc = DiskLoc() ) {
             shared_ptr<Cursor> c = _qp.newCursor( startLoc );
-            _findingStartCursor = new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns());
+            _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns()) );
         }
         void destroyClientCursor() {
-            if ( _findingStartCursor ) {
-                ClientCursor::erase( _findingStartCursor->cursorid );
-                _findingStartCursor = 0;
-            }
+            _findingStartCursor.reset( 0 );
         }
         void init() {
             // Use a ClientCursor here so we can release db mutex while scanning
             // oplog (can take quite a while with large oplogs).
             shared_ptr<Cursor> c = _qp.newReverseCursor();
-            _findingStartCursor = new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj());
+            _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()) );
             _findingStartTimer.reset();
             _findingStartMode = Initial;
             BSONElement tsElt = _qp.originalQuery()[ "ts" ];
@@ -210,5 +208,10 @@ namespace mongo {
     void pretouchOperation(const BSONObj& op);
     void pretouchN(vector<BSONObj>&, unsigned a, unsigned b);
 
-    void applyOperation_inlock(const BSONObj& op);
+    /**
+     * take an op and apply locally
+     * used for applying from an oplog
+     * @param fromRepl really from replication or for testing/internal/command/etc...
+     */
+    void applyOperation_inlock(const BSONObj& op , bool fromRepl = true );
 }
diff --git a/db/oplogreader.h b/db/oplogreader.h
index 5c2881b..54c90d9 100644
--- a/db/oplogreader.h
+++ b/db/oplogreader.h
@@ -8,7 +8,7 @@
 
 namespace mongo {
 
-    /* started abstracting out the querying of the primary/master's oplog 
+    /* started abstracting out the querying of the primary/master's oplog
        still fairly awkward but a start.
     */
     class OplogReader {
@@ -16,28 +16,24 @@ namespace mongo {
         auto_ptr<DBClientCursor> cursor;
     public:
 
-        OplogReader() { 
-            DEV log() << "TEMP *** OplogReader()" << endl;
+        OplogReader() {
         }
-        ~OplogReader() { 
-            DEV log() << "TEMP *** ~OplogReader()" << endl;
+        ~OplogReader() {
         }
 
         void resetCursor() {
-            DEV log() << "TEMP *** OplogReader::resetCursor" << endl;
             cursor.reset();
         }
         void resetConnection() {
-            DEV log() << "TEMP *** OplogReader::resetConnection" << endl;
             cursor.reset();
             _conn.reset();
         }
         DBClientConnection* conn() { return _conn.get(); }
-        BSONObj findOne(const char *ns, const Query& q) { 
-            return conn()->findOne(ns, q);
+        BSONObj findOne(const char *ns, const Query& q) {
+            return conn()->findOne(ns, q, 0, QueryOption_SlaveOk);
         }
 
-        BSONObj getLastOp(const char *ns) { 
+        BSONObj getLastOp(const char *ns) {
             return findOne(ns, Query().sort(reverseNaturalObj));
         }
 
@@ -45,7 +41,7 @@ namespace mongo {
         bool connect(string hostname);
 
         void tailCheck() {
-            if( cursor.get() && cursor->isDead() ) { 
+            if( cursor.get() && cursor->isDead() ) {
                 log() << "repl: old cursor isDead, will initiate a new one" << endl;
                 resetCursor();
             }
@@ -53,19 +49,19 @@ namespace mongo {
 
         bool haveCursor() { return cursor.get() != 0; }
 
-        void query(const char *ns, const BSONObj& query) { 
+        void query(const char *ns, const BSONObj& query) {
             assert( !haveCursor() );
             cursor = _conn->query(ns, query, 0, 0, 0, QueryOption_SlaveOk);
         }
 
-        void tailingQuery(const char *ns, const BSONObj& query) { 
+        void tailingQuery(const char *ns, const BSONObj& query) {
             assert( !haveCursor() );
             log(2) << "repl: " << ns << ".find(" << query.toString() << ')' << endl;
-            cursor = _conn->query( ns, query, 0, 0, 0, 
-                                  QueryOption_CursorTailable | QueryOption_SlaveOk | QueryOption_OplogReplay |
-                                  /* TODO: slaveok maybe shouldn't use? */
-                                  QueryOption_AwaitData
-                                  );
+            cursor = _conn->query( ns, query, 0, 0, 0,
+                                   QueryOption_CursorTailable | QueryOption_SlaveOk | QueryOption_OplogReplay |
+                                   /* TODO: slaveok maybe shouldn't use? */
+                                   QueryOption_AwaitData
+                                 );
         }
 
         void tailingQueryGTE(const char *ns, OpTime t) {
@@ -76,34 +72,34 @@ namespace mongo {
             tailingQuery(ns, query.done());
         }
 
-        bool more() { 
+        bool more() {
             assert( cursor.get() );
             return cursor->more();
         }
-        bool moreInCurrentBatch() { 
+        bool moreInCurrentBatch() {
             assert( cursor.get() );
             return cursor->moreInCurrentBatch();
         }
 
         /* old mongod's can't do the await flag... */
-        bool awaitCapable() { 
+        bool awaitCapable() {
             return cursor->hasResultFlag(ResultFlag_AwaitCapable);
         }
 
-        void peek(vector<BSONObj>& v, int n) { 
+        void peek(vector<BSONObj>& v, int n) {
             if( cursor.get() )
                 cursor->peek(v,n);
         }
 
         BSONObj nextSafe() { return cursor->nextSafe(); }
 
-        BSONObj next() { 
+        BSONObj next() {
             return cursor->next();
         }
 
-        void putBack(BSONObj op) { 
+        void putBack(BSONObj op) {
             cursor->putBack(op);
         }
     };
-    
+
 }
diff --git a/db/pdfile.cpp b/db/pdfile.cpp
index 216f21a..20a7423 100644
--- a/db/pdfile.cpp
+++ b/db/pdfile.cpp
@@ -20,7 +20,6 @@
 todo:
 _ table scans must be sequential, not next/prev pointers
 _ coalesce deleted
-
 _ disallow system* manipulations from the database.
 */
 
@@ -37,21 +36,21 @@ _ disallow system* manipulations from the database.
 #include "query.h"
 #include "repl.h"
 #include "dbhelpers.h"
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "queryutil.h"
 #include "extsort.h"
-#include "curop.h"
+#include "curop-inl.h"
 #include "background.h"
 
 namespace mongo {
 
     bool inDBRepair = false;
     struct doingRepair {
-        doingRepair(){
+        doingRepair() {
             assert( ! inDBRepair );
             inDBRepair = true;
         }
-        ~doingRepair(){
+        ~doingRepair() {
             inDBRepair = false;
         }
     };
@@ -64,42 +63,42 @@ namespace mongo {
         return dbsInProg[db] != 0;
     }
 
-    bool BackgroundOperation::inProgForNs(const char *ns) { 
+    bool BackgroundOperation::inProgForNs(const char *ns) {
         assertInWriteLock();
         return nsInProg.count(ns) != 0;
     }
 
-    void BackgroundOperation::assertNoBgOpInProgForDb(const char *db) { 
+    void BackgroundOperation::assertNoBgOpInProgForDb(const char *db) {
         uassert(12586, "cannot perform operation: a background operation is currently running for this database",
-            !inProgForDb(db));
+                !inProgForDb(db));
     }
 
-    void BackgroundOperation::assertNoBgOpInProgForNs(const char *ns) { 
+    void BackgroundOperation::assertNoBgOpInProgForNs(const char *ns) {
         uassert(12587, "cannot perform operation: a background operation is currently running for this collection",
-            !inProgForNs(ns));
-    } 
+                !inProgForNs(ns));
+    }
 
-    BackgroundOperation::BackgroundOperation(const char *ns) : _ns(ns) { 
+    BackgroundOperation::BackgroundOperation(const char *ns) : _ns(ns) {
         assertInWriteLock();
         dbsInProg[_ns.db]++;
         assert( nsInProg.count(_ns.ns()) == 0 );
         nsInProg.insert(_ns.ns());
     }
 
-    BackgroundOperation::~BackgroundOperation() { 
+    BackgroundOperation::~BackgroundOperation() {
         assertInWriteLock();
         dbsInProg[_ns.db]--;
         nsInProg.erase(_ns.ns());
     }
 
     void BackgroundOperation::dump(stringstream& ss) {
-        if( nsInProg.size() ) { 
+        if( nsInProg.size() ) {
             ss << "\n<b>Background Jobs in Progress</b>\n";
             for( set<string>::iterator i = nsInProg.begin(); i != nsInProg.end(); i++ )
                 ss << "  " << *i << '\n';
         }
-        for( map<string,unsigned>::iterator i = dbsInProg.begin(); i != dbsInProg.end(); i++ ) { 
-            if( i->second ) 
+        for( map<string,unsigned>::iterator i = dbsInProg.begin(); i != dbsInProg.end(); i++ ) {
+            if( i->second )
                 ss << "database " << i->first << ": " << i->second << '\n';
         }
     }
@@ -114,24 +113,23 @@ namespace mongo {
     DataFileMgr theDataFileMgr;
     DatabaseHolder dbHolder;
     int MAGIC = 0x1000;
-//    int curOp = -2;
 
     extern int otherTraceLevel;
     void addNewNamespaceToCatalog(const char *ns, const BSONObj *options = 0);
     void ensureIdIndexForNewNs(const char *ns) {
         if ( ( strstr( ns, ".system." ) == 0 || legalClientSystemNS( ns , false ) ) &&
-             strstr( ns, ".$freelist" ) == 0 ){
+                strstr( ns, ".$freelist" ) == 0 ) {
             log( 1 ) << "adding _id index for collection " << ns << endl;
             ensureHaveIdIndex( ns );
-        }        
+        }
     }
 
     string getDbContext() {
         stringstream ss;
         Client * c = currentClient.get();
-        if ( c ){
+        if ( c ) {
             Client::Context * cx = c->getContext();
-            if ( cx ){
+            if ( cx ) {
                 Database *database = cx->db();
                 if ( database ) {
                     ss << database->name << ' ';
@@ -142,20 +140,44 @@ namespace mongo {
         return ss.str();
     }
 
-    BSONObj::BSONObj(const Record *r) {
-        init(r->data, false);
-    }
-
     /*---------------------------------------------------------------------*/
 
-    int initialExtentSize(int len) {
+    // inheritable class to implement an operation that may be applied to all
+    // files in a database using _applyOpToDataFiles()
+    class FileOp {
+    public:
+        virtual ~FileOp() {}
+        // Return true if file exists and operation successful
+        virtual bool apply( const boost::filesystem::path &p ) = 0;
+        virtual const char * op() const = 0;
+    };
+
+    void _applyOpToDataFiles( const char *database, FileOp &fo, bool afterAllocator = false, const string& path = dbpath );
+
+    void _deleteDataFiles(const char *database) {
+        if ( directoryperdb ) {
+            FileAllocator::get()->waitUntilFinished();
+            BOOST_CHECK_EXCEPTION( boost::filesystem::remove_all( boost::filesystem::path( dbpath ) / database ) );
+            return;
+        }
+        class : public FileOp {
+            virtual bool apply( const boost::filesystem::path &p ) {
+                return boost::filesystem::remove( p );
+            }
+            virtual const char * op() const {
+                return "remove";
+            }
+        } deleter;
+        _applyOpToDataFiles( database, deleter, true );
+    }
+
+    int Extent::initialSize(int len) {
         long long sz = len * 16;
         if ( len < 1000 ) sz = len * 64;
         if ( sz > 1000000000 )
             sz = 1000000000;
         int z = ((int)sz) & 0xffffff00;
         assert( z > len );
-        //DEV tlog() << "initialExtentSize(" << len << ") returns " << z << endl;
         return z;
     }
 
@@ -165,7 +187,7 @@ namespace mongo {
             return false;
         }
 
-        log(1) << "create collection " << ns << ' ' << options << '\n';
+        log(1) << "create collection " << ns << ' ' << options << endl;
 
         /* todo: do this only when we have allocated space successfully? or we could insert with a { ok: 0 } field
            and then go back and set to ok : 1 after we are done.
@@ -174,33 +196,48 @@ namespace mongo {
         if( !isFreeList )
             addNewNamespaceToCatalog(ns, options.isEmpty() ? 0 : &options);
 
-        long long size = initialExtentSize(128);
-        BSONElement e = options.getField("size");
-        if ( e.isNumber() ) {
-            size = e.numberLong();
-            size += 256;
-            size &= 0xffffffffffffff00LL;
+        long long size = Extent::initialSize(128);
+        {
+            BSONElement e = options.getField("size");
+            if ( e.isNumber() ) {
+                size = e.numberLong();
+                size += 256;
+                size &= 0xffffffffffffff00LL;
+            }
         }
-        
+
         uassert( 10083 ,  "invalid size spec", size > 0 );
 
         bool newCapped = false;
         int mx = 0;
-        e = options.getField("capped");
-        if ( e.type() == Bool && e.boolean() ) {
+        if( options.getBoolField("capped") ) {
             newCapped = true;
-            e = options.getField("max");
+            BSONElement e = options.getField("max");
             if ( e.isNumber() ) {
                 mx = e.numberInt();
             }
         }
 
-        // $nExtents just for debug/testing.  We create '$nExtents' extents,
-        // each of size 'size'.
-        e = options.getField( "$nExtents" );
-        int nExtents = int( e.number() );
+        // $nExtents just for debug/testing.
+        BSONElement e = options.getField( "$nExtents" );
         Database *database = cc().database();
-        if ( nExtents > 0 ) {
+        if ( e.type() == Array ) {
+            // We create one extent per array entry, with size specified
+            // by the array value.
+            BSONObjIterator i( e.embeddedObject() );
+            while( i.more() ) {
+                BSONElement e = i.next();
+                int size = int( e.number() );
+                assert( size <= 0x7fffffff );
+                // $nExtents is just for testing - always allocate new extents
+                // rather than reuse existing extents so we have some predictibility
+                // in the extent size used by our tests
+                database->suitableFile( (int) size, false )->createExtent( ns, (int) size, newCapped );
+            }
+        }
+        else if ( int( e.number() ) > 0 ) {
+            // We create '$nExtents' extents, each of size 'size'.
+            int nExtents = int( e.number() );
             assert( size <= 0x7fffffff );
             for ( int i = 0; i < nExtents; ++i ) {
                 assert( size <= 0x7fffffff );
@@ -209,10 +246,16 @@ namespace mongo {
                 // in the extent size used by our tests
                 database->suitableFile( (int) size, false )->createExtent( ns, (int) size, newCapped );
             }
-        } else {
+        }
+        else {
+            // This is the non test case, where we don't have a $nExtents spec.
             while ( size > 0 ) {
                 int max = MongoDataFile::maxSize() - DataFileHeader::HeaderSize;
                 int desiredExtentSize = (int) (size > max ? max : size);
+                if ( desiredExtentSize < Extent::minSize() ) {
+                    desiredExtentSize = Extent::minSize();
+                }
+                desiredExtentSize &= 0xffffff00;
                 Extent *e = database->allocExtent( ns, desiredExtentSize, newCapped );
                 size -= e->length;
             }
@@ -223,15 +266,16 @@ namespace mongo {
 
         bool ensure = false;
         if ( options.getField( "autoIndexId" ).type() ) {
-            if ( options["autoIndexId"].trueValue() ){
+            if ( options["autoIndexId"].trueValue() ) {
                 ensure = true;
             }
-        } else {
+        }
+        else {
             if ( !newCapped ) {
                 ensure=true;
             }
         }
-        if( ensure ) { 
+        if( ensure ) {
             if( deferIdIndex )
                 *deferIdIndex = true;
             else
@@ -239,7 +283,7 @@ namespace mongo {
         }
 
         if ( mx > 0 )
-            d->max = mx;
+            getDur().writingInt( d->max ) = mx;
 
         return true;
     }
@@ -250,7 +294,7 @@ namespace mongo {
     */
     bool userCreateNS(const char *ns, BSONObj options, string& err, bool logForReplication, bool *deferIdIndex) {
         const char *coll = strchr( ns, '.' ) + 1;
-        massert( 10356 ,  "invalid ns", coll && *coll );
+        massert( 10356 ,  str::stream() << "invalid ns: " << ns , coll && *coll );
         char cl[ 256 ];
         nsToDatabase( ns, cl );
         bool ok = _userCreateNS(ns, options, err, deferIdIndex);
@@ -272,14 +316,22 @@ namespace mongo {
     int MongoDataFile::maxSize() {
         if ( sizeof( int* ) == 4 ) {
             return 512 * 1024 * 1024;
-        } else if ( cmdLine.smallfiles ) {
+        }
+        else if ( cmdLine.smallfiles ) {
             return 0x7ff00000 >> 2;
-        } else {
+        }
+        else {
             return 0x7ff00000;
         }
     }
 
-    void MongoDataFile::badOfs(int ofs) const { 
+    void MongoDataFile::badOfs2(int ofs) const {
+        stringstream ss;
+        ss << "bad offset:" << ofs << " accessing file: " << mmf.filename() << " - consider repairing database";
+        uasserted(13441, ss.str());
+    }
+
+    void MongoDataFile::badOfs(int ofs) const {
         stringstream ss;
         ss << "bad offset:" << ofs << " accessing file: " << mmf.filename() << " - consider repairing database";
         uasserted(13440, ss.str());
@@ -293,26 +345,18 @@ namespace mongo {
         else
             size = 0x7ff00000;
 
-        if ( strstr(filename, "_hudsonSmall") ) {
-            int mult = 1;
-            if ( fileNo > 1 && fileNo < 1000 )
-                mult = fileNo;
-            size = 1024 * 512 * mult;
-            log() << "Warning : using small files for _hudsonSmall" << endl;
-        }
-        else if ( cmdLine.smallfiles ){
+        if ( cmdLine.smallfiles ) {
             size = size >> 2;
         }
-        
-        
+
+
         return size;
     }
 
     void MongoDataFile::open( const char *filename, int minSize, bool preallocateOnly ) {
         {
             /* check quotas
-               very simple temporary implementation - we will in future look up
-               the quota from the grid database
+               very simple temporary implementation for now
             */
             if ( cmdLine.quota && fileNo > cmdLine.quotaFiles && !MMF::exists(filename) ) {
                 /* todo: if we were adding / changing keys in an index did we do some
@@ -340,58 +384,66 @@ namespace mongo {
         if ( size > maxSize() )
             size = maxSize();
 
-        assert( ( size >= 64*1024*1024 ) || cmdLine.smallfiles || ( strstr( filename, "_hudsonSmall" ) ) );
+        assert( size >= 64*1024*1024 || cmdLine.smallfiles );
         assert( size % 4096 == 0 );
 
         if ( preallocateOnly ) {
             if ( cmdLine.prealloc ) {
-                theFileAllocator().requestAllocation( filename, size );
+                FileAllocator::get()->requestAllocation( filename, size );
             }
             return;
         }
-        
-        _p = mmf.map(filename, size);
-        header = (DataFileHeader *) _p.at(0, DataFileHeader::HeaderSize);
-        if( sizeof(char *) == 4 ) 
-            uassert( 10084 , "can't map file memory - mongo requires 64 bit build for larger datasets", header);
+
+        {
+            assert( _mb == 0 );
+            unsigned long long sz = size;
+            if( mmf.create(filename, sz, false) )
+                _mb = mmf.getView();
+            assert( sz <= 0x7fffffff );
+            size = (int) sz;
+        }
+        //header = (DataFileHeader *) _p;
+        if( sizeof(char *) == 4 )
+            uassert( 10084 , "can't map file memory - mongo requires 64 bit build for larger datasets", _mb != 0);
         else
-            uassert( 10085 , "can't map file memory", header);
-        header->init(fileNo, size);
+            uassert( 10085 , "can't map file memory", _mb != 0);
+        header()->init(fileNo, size, filename);
     }
 
-    void MongoDataFile::flush( bool sync ){
+    void MongoDataFile::flush( bool sync ) {
         mmf.flush( sync );
     }
 
-    void addNewExtentToNamespace(const char *ns, Extent *e, DiskLoc eloc, DiskLoc emptyLoc, bool capped) { 
-        DiskLoc oldExtentLoc;
+    void addNewExtentToNamespace(const char *ns, Extent *e, DiskLoc eloc, DiskLoc emptyLoc, bool capped) {
         NamespaceIndex *ni = nsindex(ns);
         NamespaceDetails *details = ni->details(ns);
         if ( details ) {
             assert( !details->lastExtent.isNull() );
             assert( !details->firstExtent.isNull() );
-            e->xprev = details->lastExtent;
-            details->lastExtent.ext()->xnext = eloc;
+            getDur().writingDiskLoc(e->xprev) = details->lastExtent;
+            getDur().writingDiskLoc(details->lastExtent.ext()->xnext) = eloc;
             assert( !eloc.isNull() );
-            details->lastExtent = eloc;
+            getDur().writingDiskLoc(details->lastExtent) = eloc;
         }
         else {
             ni->add_ns(ns, eloc, capped);
             details = ni->details(ns);
         }
 
-        details->lastExtentSize = e->length;
-        DEBUGGING out() << "temp: newextent adddelrec " << ns << endl;
+        {
+            NamespaceDetails *dw = details->writingWithoutExtra();
+            dw->lastExtentSize = e->length;
+        }
         details->addDeletedRec(emptyLoc.drec(), emptyLoc);
     }
 
     Extent* MongoDataFile::createExtent(const char *ns, int approxSize, bool newCapped, int loops) {
-        massert( 10357 ,  "shutdown in progress", !goingAway );
-        massert( 10358 ,  "bad new extent size", approxSize >= 0 && approxSize <= Extent::maxSize() );
-        massert( 10359 ,  "header==0 on new extent: 32 bit mmap space exceeded?", header ); // null if file open failed
-        int ExtentSize = approxSize <= header->unusedLength ? approxSize : header->unusedLength;
+        massert( 10357 ,  "shutdown in progress", ! inShutdown() );
+        massert( 10358 ,  "bad new extent size", approxSize >= Extent::minSize() && approxSize <= Extent::maxSize() );
+        massert( 10359 ,  "header==0 on new extent: 32 bit mmap space exceeded?", header() ); // null if file open failed
+        int ExtentSize = approxSize <= header()->unusedLength ? approxSize : header()->unusedLength;
         DiskLoc loc;
-        if ( ExtentSize <= 0 ) {
+        if ( ExtentSize < Extent::minSize() ) {
             /* not there could be a lot of looping here is db just started and
                no files are open yet.  we might want to do something about that. */
             if ( loops > 8 ) {
@@ -401,12 +453,14 @@ namespace mongo {
             log() << "newExtent: " << ns << " file " << fileNo << " full, adding a new file\n";
             return cc().database()->addAFile( 0, true )->createExtent(ns, approxSize, newCapped, loops+1);
         }
-        int offset = header->unused.getOfs();
-        header->unused.setOfs( fileNo, offset + ExtentSize );
-        header->unusedLength -= ExtentSize;
-        loc.setOfs(fileNo, offset);
+        int offset = header()->unused.getOfs();
+
+        DataFileHeader *h = getDur().writing(header());
+        h->unused.set( fileNo, offset + ExtentSize );
+        h->unusedLength -= ExtentSize;
+        loc.set(fileNo, offset);
         Extent *e = _getExtent(loc);
-        DiskLoc emptyLoc = e->init(ns, ExtentSize, fileNo, offset);
+        DiskLoc emptyLoc = getDur().writing(e)->init(ns, ExtentSize, fileNo, offset);
 
         addNewExtentToNamespace(ns, e, loc, emptyLoc, newCapped);
 
@@ -415,7 +469,7 @@ namespace mongo {
         return e;
     }
 
-    Extent* DataFileMgr::allocFromFreeList(const char *ns, int approxSize, bool capped) { 
+    Extent* DataFileMgr::allocFromFreeList(const char *ns, int approxSize, bool capped) {
         string s = cc().database()->name + ".$freelist";
         NamespaceDetails *f = nsdetails(s.c_str());
         if( f ) {
@@ -426,7 +480,7 @@ namespace mongo {
                 if( low > 2048 ) low -= 256;
                 high = (int) (approxSize * 1.05) + 256;
             }
-            else { 
+            else {
                 low = (int) (approxSize * 0.8);
                 high = (int) (approxSize * 1.4);
             }
@@ -436,20 +490,20 @@ namespace mongo {
             int bestDiff = 0x7fffffff;
             {
                 DiskLoc L = f->firstExtent;
-                while( !L.isNull() ) { 
+                while( !L.isNull() ) {
                     Extent * e = L.ext();
-                    if( e->length >= low && e->length <= high ) { 
+                    if( e->length >= low && e->length <= high ) {
                         int diff = abs(e->length - approxSize);
-                        if( diff < bestDiff ) { 
+                        if( diff < bestDiff ) {
                             bestDiff = diff;
                             best = e;
-                            if( diff == 0 ) 
+                            if( diff == 0 )
                                 break;
                         }
                     }
                     L = e->xnext;
                     ++n;
-                
+
                 }
             }
             OCCASIONALLY if( n > 512 ) log() << "warning: newExtent " << n << " scanned\n";
@@ -457,13 +511,13 @@ namespace mongo {
                 Extent *e = best;
                 // remove from the free list
                 if( !e->xprev.isNull() )
-                    e->xprev.ext()->xnext = e->xnext;
+                    e->xprev.ext()->xnext.writing() = e->xnext;
                 if( !e->xnext.isNull() )
-                    e->xnext.ext()->xprev = e->xprev;
+                    e->xnext.ext()->xprev.writing() = e->xprev;
                 if( f->firstExtent == e->myLoc )
-                    f->firstExtent = e->xnext;
+                    f->firstExtent.writing() = e->xnext;
                 if( f->lastExtent == e->myLoc )
-                    f->lastExtent = e->xprev;
+                    f->lastExtent.writing() = e->xprev;
 
                 // use it
                 OCCASIONALLY if( n > 512 ) log() << "warning: newExtent " << n << " scanned\n";
@@ -479,9 +533,11 @@ namespace mongo {
 
     /*---------------------------------------------------------------------*/
 
-    DiskLoc Extent::reuse(const char *nsname) { 
-		/*TODOMMF - work to do when extent is freed. */
-        log(3) << "reset extent was:" << nsDiagnostic.buf << " now:" << nsname << '\n';
+    DiskLoc Extent::reuse(const char *nsname) {
+        return getDur().writing(this)->_reuse(nsname);
+    }
+    DiskLoc Extent::_reuse(const char *nsname) {
+        log(3) << "reset extent was:" << nsDiagnostic.toString() << " now:" << nsname << '\n';
         massert( 10360 ,  "Extent::reset bad magic value", magic == 0x41424344 );
         xnext.Null();
         xprev.Null();
@@ -493,12 +549,9 @@ namespace mongo {
         emptyLoc.inc( (int) (_extentData-(char*)this) );
 
         int delRecLength = length - (_extentData - (char *) this);
-        //DeletedRecord *empty1 = (DeletedRecord *) extentData;
-        DeletedRecord *empty = DataFileMgr::makeDeletedRecord(emptyLoc, delRecLength);//(DeletedRecord *) getRecord(emptyLoc);
-        //assert( empty == empty1 );
-
-        // do we want to zero the record? memset(empty, ...)
 
+        DeletedRecord *empty = DataFileMgr::makeDeletedRecord(emptyLoc, delRecLength);//(DeletedRecord *) getRecord(emptyLoc);
+        empty = getDur().writing(empty);
         empty->lengthWithHeaders = delRecLength;
         empty->extentOfs = myLoc.getOfs();
         empty->nextDeleted.Null();
@@ -509,7 +562,7 @@ namespace mongo {
     /* assumes already zeroed -- insufficient for block 'reuse' perhaps */
     DiskLoc Extent::init(const char *nsname, int _length, int _fileNo, int _offset) {
         magic = 0x41424344;
-        myLoc.setOfs(_fileNo, _offset);
+        myLoc.set(_fileNo, _offset);
         xnext.Null();
         xprev.Null();
         nsDiagnostic = nsname;
@@ -521,9 +574,7 @@ namespace mongo {
         emptyLoc.inc( (int) (_extentData-(char*)this) );
 
         int l = _length - (_extentData - (char *) this);
-        //DeletedRecord *empty1 = (DeletedRecord *) extentData;
-        DeletedRecord *empty = DataFileMgr::makeDeletedRecord(emptyLoc, l);
-        //assert( empty == empty1 );
+        DeletedRecord *empty = getDur().writing( DataFileMgr::makeDeletedRecord(emptyLoc, l) );
         empty->lengthWithHeaders = l;
         empty->extentOfs = myLoc.getOfs();
         return emptyLoc;
@@ -582,7 +633,7 @@ namespace mongo {
         }
         return maxExtentSize;
     }
-    
+
     /*---------------------------------------------------------------------*/
 
     shared_ptr<Cursor> DataFileMgr::findAll(const char *ns, const DiskLoc &startLoc) {
@@ -612,12 +663,12 @@ namespace mongo {
             d->dumpDeleted(&extents);
         }
 
-        if ( d->capped ) 
+        if ( d->capped )
             return shared_ptr<Cursor>( new ForwardCappedCursor( d , startLoc ) );
-        
+
         if ( !startLoc.isNull() )
-            return shared_ptr<Cursor>(new BasicCursor( startLoc ));                
-        
+            return shared_ptr<Cursor>(new BasicCursor( startLoc ));
+
         while ( e->firstRecord.isNull() && !e->xnext.isNull() ) {
             /* todo: if extent is empty, free it for reuse elsewhere.
                that is a bit complicated have to clean up the freelists.
@@ -638,37 +689,38 @@ namespace mongo {
 
         if ( el.number() >= 0 )
             return DataFileMgr::findAll(ns, startLoc);
-        
+
         // "reverse natural order"
         NamespaceDetails *d = nsdetails(ns);
-        
+
         if ( !d )
             return shared_ptr<Cursor>(new BasicCursor(DiskLoc()));
-        
+
         if ( !d->capped ) {
             if ( !startLoc.isNull() )
-                return shared_ptr<Cursor>(new ReverseCursor( startLoc ));                
+                return shared_ptr<Cursor>(new ReverseCursor( startLoc ));
             Extent *e = d->lastExtent.ext();
             while ( e->lastRecord.isNull() && !e->xprev.isNull() ) {
                 OCCASIONALLY out() << "  findTableScan: extent empty, skipping ahead" << endl;
                 e = e->getPrevExtent();
             }
             return shared_ptr<Cursor>(new ReverseCursor( e->lastRecord ));
-        } else {
+        }
+        else {
             return shared_ptr<Cursor>( new ReverseCappedCursor( d, startLoc ) );
         }
     }
 
-    void printFreeList() { 
+    void printFreeList() {
         string s = cc().database()->name + ".$freelist";
         log() << "dump freelist " << s << '\n';
         NamespaceDetails *freeExtents = nsdetails(s.c_str());
-        if( freeExtents == 0 ) { 
+        if( freeExtents == 0 ) {
             log() << "  freeExtents==0" << endl;
             return;
         }
         DiskLoc a = freeExtents->firstExtent;
-        while( !a.isNull() ) { 
+        while( !a.isNull() ) {
             Extent *e = a.ext();
             log() << "  " << a.toString() << " len:" << e->length << " prev:" << e->xprev.toString() << '\n';
             a = e->xnext;
@@ -687,7 +739,7 @@ namespace mongo {
         NamespaceString s(nsToDrop);
         assert( s.db == cc().database()->name );
         if( s.isSystem() ) {
-            if( s.coll == "system.profile" ) 
+            if( s.coll == "system.profile" )
                 uassert( 10087 ,  "turn off profiling before dropping system.profile collection", cc().database()->profile == 0 );
             else
                 uasserted( 12502, "can't drop system ns" );
@@ -698,32 +750,31 @@ namespace mongo {
             BSONObj cond = BSON( "name" << nsToDrop );   // { name: "colltodropname" }
             string system_namespaces = cc().database()->name + ".system.namespaces";
             /*int n = */ deleteObjects(system_namespaces.c_str(), cond, false, false, true);
-			// no check of return code as this ns won't exist for some of the new storage engines
+            // no check of return code as this ns won't exist for some of the new storage engines
         }
 
         // free extents
         if( !d->firstExtent.isNull() ) {
             string s = cc().database()->name + ".$freelist";
             NamespaceDetails *freeExtents = nsdetails(s.c_str());
-            if( freeExtents == 0 ) { 
+            if( freeExtents == 0 ) {
                 string err;
                 _userCreateNS(s.c_str(), BSONObj(), err, 0);
                 freeExtents = nsdetails(s.c_str());
                 massert( 10361 , "can't create .$freelist", freeExtents);
             }
-            if( freeExtents->firstExtent.isNull() ) { 
-                freeExtents->firstExtent = d->firstExtent;
-                freeExtents->lastExtent = d->lastExtent;
+            if( freeExtents->firstExtent.isNull() ) {
+                freeExtents->firstExtent.writing() = d->firstExtent;
+                freeExtents->lastExtent.writing() = d->lastExtent;
             }
-            else { 
+            else {
                 DiskLoc a = freeExtents->firstExtent;
                 assert( a.ext()->xprev.isNull() );
-                a.ext()->xprev = d->lastExtent;
-                d->lastExtent.ext()->xnext = a;
-                freeExtents->firstExtent = d->firstExtent;
-
-                d->firstExtent.setInvalid();
-                d->lastExtent.setInvalid();
+                getDur().writingDiskLoc( a.ext()->xprev ) = d->lastExtent;
+                getDur().writingDiskLoc( d->lastExtent.ext()->xnext ) = a;
+                getDur().writingDiskLoc( freeExtents->firstExtent ) = d->firstExtent;
+                getDur().writingDiskLoc( d->firstExtent ).setInvalid();
+                getDur().writingDiskLoc( d->lastExtent ).setInvalid();
             }
         }
 
@@ -740,7 +791,7 @@ namespace mongo {
         BackgroundOperation::assertNoBgOpInProgForNs(name.c_str());
 
         if ( d->nIndexes != 0 ) {
-            try { 
+            try {
                 assert( dropIndexes(d, name.c_str(), "*", errmsg, result, true) );
             }
             catch( DBException& e ) {
@@ -754,11 +805,10 @@ namespace mongo {
         log(1) << "\t dropIndexes done" << endl;
         result.append("ns", name.c_str());
         ClientCursor::invalidate(name.c_str());
-        Client::invalidateNS( name );
         Top::global.collectionDropped( name );
-        dropNS(name);        
+        dropNS(name);
     }
-    
+
     int nUnindexes = 0;
 
     /* unindex all keys in index for this record. */
@@ -797,63 +847,69 @@ namespace mongo {
         int n = d->nIndexes;
         for ( int i = 0; i < n; i++ )
             _unindexRecord(d->idx(i), obj, dl, !noWarn);
-        if( d->backgroundIndexBuildInProgress ) {
+        if( d->indexBuildInProgress ) { // background index
             // always pass nowarn here, as this one may be missing for valid reasons as we are concurrently building it
-            _unindexRecord(d->idx(n), obj, dl, false); 
+            _unindexRecord(d->idx(n), obj, dl, false);
         }
     }
 
-    /* deletes a record, just the pdfile portion -- no index cleanup, no cursor cleanup, etc. 
+    /* deletes a record, just the pdfile portion -- no index cleanup, no cursor cleanup, etc.
        caller must check if capped
     */
-    void DataFileMgr::_deleteRecord(NamespaceDetails *d, const char *ns, Record *todelete, const DiskLoc& dl)
-    {
+    void DataFileMgr::_deleteRecord(NamespaceDetails *d, const char *ns, Record *todelete, const DiskLoc& dl) {
         /* remove ourself from the record next/prev chain */
         {
             if ( todelete->prevOfs != DiskLoc::NullOfs )
-                todelete->getPrev(dl).rec()->nextOfs = todelete->nextOfs;
+                getDur().writingInt( todelete->getPrev(dl).rec()->nextOfs ) = todelete->nextOfs;
             if ( todelete->nextOfs != DiskLoc::NullOfs )
-                todelete->getNext(dl).rec()->prevOfs = todelete->prevOfs;
+                getDur().writingInt( todelete->getNext(dl).rec()->prevOfs ) = todelete->prevOfs;
         }
 
         /* remove ourself from extent pointers */
         {
-            Extent *e = todelete->myExtent(dl);
+            Extent *e = getDur().writing( todelete->myExtent(dl) );
             if ( e->firstRecord == dl ) {
                 if ( todelete->nextOfs == DiskLoc::NullOfs )
                     e->firstRecord.Null();
                 else
-                    e->firstRecord.setOfs(dl.a(), todelete->nextOfs);
+                    e->firstRecord.set(dl.a(), todelete->nextOfs);
             }
             if ( e->lastRecord == dl ) {
                 if ( todelete->prevOfs == DiskLoc::NullOfs )
                     e->lastRecord.Null();
                 else
-                    e->lastRecord.setOfs(dl.a(), todelete->prevOfs);
+                    e->lastRecord.set(dl.a(), todelete->prevOfs);
             }
         }
 
         /* add to the free list */
         {
-            d->nrecords--;
-            d->datasize -= todelete->netLength();
-            /* temp: if in system.indexes, don't reuse, and zero out: we want to be
-               careful until validated more, as IndexDetails has pointers
-               to this disk location.  so an incorrectly done remove would cause
-               a lot of problems.
-            */
+            {
+                NamespaceDetails::Stats *s = getDur().writing(&d->stats);
+                s->datasize -= todelete->netLength();
+                s->nrecords--;
+            }
+
             if ( strstr(ns, ".system.indexes") ) {
-                memset(todelete, 0, todelete->lengthWithHeaders);
+                /* temp: if in system.indexes, don't reuse, and zero out: we want to be
+                   careful until validated more, as IndexDetails has pointers
+                   to this disk location.  so an incorrectly done remove would cause
+                   a lot of problems.
+                */
+                memset(getDur().writingPtr(todelete, todelete->lengthWithHeaders), 0, todelete->lengthWithHeaders);
             }
             else {
-                DEV memset(todelete->data, 0, todelete->netLength()); // attempt to notice invalid reuse.
+                DEV {
+                    unsigned long long *p = (unsigned long long *) todelete->data;
+                    *getDur().writing(p) = 0;
+                    //DEV memset(todelete->data, 0, todelete->netLength()); // attempt to notice invalid reuse.
+                }
                 d->addDeletedRec((DeletedRecord*)todelete, dl);
             }
         }
     }
 
-    void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn)
-    {
+    void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn) {
         dassert( todelete == dl.rec() );
 
         NamespaceDetails* d = nsdetails(ns);
@@ -880,8 +936,7 @@ namespace mongo {
         NamespaceDetails *d,
         NamespaceDetailsTransient *nsdt,
         Record *toupdate, const DiskLoc& dl,
-        const char *_buf, int _len, OpDebug& debug, bool &changedId, bool god)
-    {
+        const char *_buf, int _len, OpDebug& debug,  bool god) {
         StringBuilder& ss = debug.str;
         dassert( toupdate == dl.rec() );
 
@@ -891,7 +946,7 @@ namespace mongo {
         DEV assert( objNew.objdata() == _buf );
 
         if( !objNew.hasElement("_id") && objOld.hasElement("_id") ) {
-            /* add back the old _id value if the update removes it.  Note this implementation is slow 
+            /* add back the old _id value if the update removes it.  Note this implementation is slow
                (copies entire object multiple times), but this shouldn't happen often, so going for simple
                code, not speed.
             */
@@ -903,11 +958,13 @@ namespace mongo {
             objNew = b.obj();
         }
 
-        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further  
+        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
            below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
         */
         vector<IndexChanges> changes;
+        bool changedId = false;
         getIndexChanges(changes, *d, objNew, objOld, changedId);
+        uassert( 13596 , str::stream() << "cannot change _id of a document old:" << objOld << " new:" << objNew , ! changedId );
         dupCheck(changes, *d, dl);
 
         if ( toupdate->netLength() < objNew.objsize() ) {
@@ -946,8 +1003,8 @@ namespace mongo {
                     try {
                         /* we did the dupCheck() above.  so we don't have to worry about it here. */
                         idx.head.btree()->bt_insert(
-                                                    idx.head,
-                                                    dl, *changes[x].added[i], ordering, /*dupsAllowed*/true, idx);
+                            idx.head,
+                            dl, *changes[x].added[i], ordering, /*dupsAllowed*/true, idx);
                     }
                     catch (AssertionException& e) {
                         ss << " exception update index ";
@@ -959,25 +1016,30 @@ namespace mongo {
                 ss << '\n' << keyUpdates << " key updates ";
         }
 
-        //	update in place
-        memcpy(toupdate->data, objNew.objdata(), objNew.objsize());
+        //  update in place
+        int sz = objNew.objsize();
+        memcpy(getDur().writingPtr(toupdate->data, sz), objNew.objdata(), sz);
         return dl;
     }
 
-    int followupExtentSize(int len, int lastExtentLen) {
+    int Extent::followupSize(int len, int lastExtentLen) {
         assert( len < Extent::maxSize() );
-        int x = initialExtentSize(len);
+        int x = initialSize(len);
         int y = (int) (lastExtentLen < 4000000 ? lastExtentLen * 4.0 : lastExtentLen * 1.2);
         int sz = y > x ? y : x;
 
-        if ( sz < lastExtentLen )
-            sz = lastExtentLen;
-        else if ( sz > Extent::maxSize() )
+        if ( sz < lastExtentLen ) {
+            // this means there was an int overflow
+            // so we should turn it into maxSize
+            sz = Extent::maxSize();
+        }
+        else if ( sz > Extent::maxSize() ) {
             sz = Extent::maxSize();
-        
+        }
+
         sz = ((int)sz) & 0xffffff00;
         assert( sz > len );
-        
+
         return sz;
     }
 
@@ -990,7 +1052,7 @@ namespace mongo {
         Ordering ordering = Ordering::make(order);
         int n = 0;
         for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) {
-            if( ++n == 2 ) { 
+            if( ++n == 2 ) {
                 d->setIndexIsMultikey(idxNo);
             }
             assert( !recordLoc.isNull() );
@@ -999,7 +1061,7 @@ namespace mongo {
                                             *i, ordering, dupsAllowed, idx);
             }
             catch (AssertionException& e) {
-                if( e.getCode() == 10287 && idxNo == d->nIndexes ) { 
+                if( e.getCode() == 10287 && idxNo == d->nIndexes ) {
                     DEV log() << "info: caught key already in index on bg indexing (ok)" << endl;
                     continue;
                 }
@@ -1012,8 +1074,7 @@ namespace mongo {
         }
     }
 
-    void testSorting() 
-    {
+    void testSorting() {
         BSONObjBuilder b;
         b.appendNull("");
         BSONObj x = b.obj();
@@ -1027,9 +1088,9 @@ namespace mongo {
         sorter.add(x, DiskLoc(3,77));
 
         sorter.sort();
-        
+
         auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator();
-        while( i->more() ) { 
+        while( i->more() ) {
             BSONObjExternalSorter::Data d = i->next();
             /*cout << d.second.toString() << endl;
             cout << d.first.objsize() << endl;
@@ -1039,7 +1100,6 @@ namespace mongo {
 
     // throws DBException
     unsigned long long fastBuildIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) {
-        assert( d->backgroundIndexBuildInProgress == 0 );
         CurOp * op = cc().curop();
 
         Timer t;
@@ -1050,17 +1110,17 @@ namespace mongo {
         bool dropDups = idx.dropDups() || inDBRepair;
         BSONObj order = idx.keyPattern();
 
-        idx.head.Null();
-        
+        getDur().writingDiskLoc(idx.head).Null();
+
         if ( logLevel > 1 ) printMemInfo( "before index start" );
 
         /* get and sort all the keys ----- */
         unsigned long long n = 0;
         shared_ptr<Cursor> c = theDataFileMgr.findAll(ns);
         BSONObjExternalSorter sorter(order);
-        sorter.hintNumObjects( d->nrecords );
+        sorter.hintNumObjects( d->stats.nrecords );
         unsigned long long nkeys = 0;
-        ProgressMeterHolder pm( op->setMessage( "index: (1/3) external sort" , d->nrecords , 10 ) );
+        ProgressMeterHolder pm( op->setMessage( "index: (1/3) external sort" , d->stats.nrecords , 10 ) );
         while ( c->ok() ) {
             BSONObj o = c->current();
             DiskLoc loc = c->currLoc();
@@ -1069,17 +1129,17 @@ namespace mongo {
             idx.getKeysFromObject(o, keys);
             int k = 0;
             for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) {
-                if( ++k == 2 )
+                if( ++k == 2 ) {
                     d->setIndexIsMultikey(idxNo);
-                //cout<<"SORTER ADD " << i->toString() << ' ' << loc.toString() << endl;
+                }
                 sorter.add(*i, loc);
                 nkeys++;
             }
-            
+
             c->advance();
             n++;
             pm.hit();
-            if ( logLevel > 1 && n % 10000 == 0 ){
+            if ( logLevel > 1 && n % 10000 == 0 ) {
                 printMemInfo( "\t iterating objects" );
             }
 
@@ -1089,37 +1149,37 @@ namespace mongo {
         if ( logLevel > 1 ) printMemInfo( "before final sort" );
         sorter.sort();
         if ( logLevel > 1 ) printMemInfo( "after final sort" );
-        
+
         log(t.seconds() > 5 ? 0 : 1) << "\t external sort used : " << sorter.numFiles() << " files " << " in " << t.seconds() << " secs" << endl;
 
         list<DiskLoc> dupsToDrop;
 
-        /* build index --- */ 
+        /* build index --- */
         {
             BtreeBuilder btBuilder(dupsAllowed, idx);
             BSONObj keyLast;
             auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator();
             assert( pm == op->setMessage( "index: (2/3) btree bottom up" , nkeys , 10 ) );
-            while( i->more() ) { 
+            while( i->more() ) {
                 RARELY killCurrentOp.checkForInterrupt();
                 BSONObjExternalSorter::Data d = i->next();
 
-                try { 
+                try {
                     btBuilder.addKey(d.first, d.second);
                 }
-                catch( AssertionException& e ) { 
-                    if ( dupsAllowed ){
+                catch( AssertionException& e ) {
+                    if ( dupsAllowed ) {
                         // unknow exception??
                         throw;
                     }
-                    
+
                     if( e.interrupted() )
                         throw;
 
                     if ( ! dropDups )
                         throw;
 
-                    /* we could queue these on disk, but normally there are very few dups, so instead we 
+                    /* we could queue these on disk, but normally there are very few dups, so instead we
                        keep in ram and have a limit.
                     */
                     dupsToDrop.push_back(d.second);
@@ -1131,9 +1191,11 @@ namespace mongo {
             op->setMessage( "index: (3/3) btree-middle" );
             log(t.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit" << endl;
             btBuilder.commit();
-            wassert( btBuilder.getn() == nkeys || dropDups ); 
+            if ( btBuilder.getn() != nkeys && ! dropDups ) {
+                warning() << "not all entries were added to the index, probably some keys were too large" << endl;
+            }
         }
-        
+
         log(1) << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl;
 
         for( list<DiskLoc>::iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); i++ )
@@ -1142,13 +1204,13 @@ namespace mongo {
         return n;
     }
 
-    class BackgroundIndexBuildJob : public BackgroundOperation { 
+    class BackgroundIndexBuildJob : public BackgroundOperation {
 
         unsigned long long addExistingToIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) {
             bool dupsAllowed = !idx.unique();
             bool dropDups = idx.dropDups();
 
-            ProgressMeter& progress = cc().curop()->setMessage( "bg index build" , d->nrecords );
+            ProgressMeter& progress = cc().curop()->setMessage( "bg index build" , d->stats.nrecords );
 
             unsigned long long n = 0;
             auto_ptr<ClientCursor> cc;
@@ -1156,25 +1218,26 @@ namespace mongo {
                 shared_ptr<Cursor> c = theDataFileMgr.findAll(ns);
                 cc.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, ns) );
             }
-            CursorId id = cc->cursorid;
+            CursorId id = cc->cursorid();
 
-            while ( cc->c->ok() ) {
-                BSONObj js = cc->c->current();
-                try { 
-                    _indexRecord(d, idxNo, js, cc->c->currLoc(), dupsAllowed);
-                    cc->c->advance();
-                } catch( AssertionException& e ) { 
+            while ( cc->ok() ) {
+                BSONObj js = cc->current();
+                try {
+                    _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed);
+                    cc->advance();
+                }
+                catch( AssertionException& e ) {
                     if( e.interrupted() )
                         throw;
 
                     if ( dropDups ) {
-                        DiskLoc toDelete = cc->c->currLoc();
-                        bool ok = cc->c->advance();
+                        DiskLoc toDelete = cc->currLoc();
+                        bool ok = cc->advance();
                         cc->updateLocation();
                         theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true );
                         if( ClientCursor::find(id, false) == 0 ) {
                             cc.release();
-                            if( !ok ) { 
+                            if( !ok ) {
                                 /* we were already at the end. normal. */
                             }
                             else {
@@ -1182,7 +1245,8 @@ namespace mongo {
                             }
                             break;
                         }
-                    } else {
+                    }
+                    else {
                         log() << "background addExistingToIndex exception " << e.what() << endl;
                         throw;
                     }
@@ -1200,7 +1264,7 @@ namespace mongo {
             return n;
         }
 
-        /* we do set a flag in the namespace for quick checking, but this is our authoritative info - 
+        /* we do set a flag in the namespace for quick checking, but this is our authoritative info -
            that way on a crash/restart, we don't think we are still building one. */
         set<NamespaceDetails*> bgJobsInProgress;
 
@@ -1208,12 +1272,8 @@ namespace mongo {
             assertInWriteLock();
             uassert( 13130 , "can't start bg index b/c in recursive lock (db.eval?)" , dbMutex.getState() == 1 );
             bgJobsInProgress.insert(d);
-            d->backgroundIndexBuildInProgress = 1;
-            d->nIndexes--;
         }
         void done(const char *ns, NamespaceDetails *d) {
-            d->nIndexes++;
-            d->backgroundIndexBuildInProgress = 0;
             NamespaceDetailsTransient::get_w(ns).addedIndex(); // clear query optimizer cache
             assertInWriteLock();
         }
@@ -1221,16 +1281,16 @@ namespace mongo {
     public:
         BackgroundIndexBuildJob(const char *ns) : BackgroundOperation(ns) { }
 
-        unsigned long long go(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { 
+        unsigned long long go(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) {
             unsigned long long n = 0;
 
             prep(ns.c_str(), d);
             assert( idxNo == d->nIndexes );
-            try { 
+            try {
                 idx.head = BtreeBucket::addBucket(idx);
                 n = addExistingToIndex(ns.c_str(), d, idx, idxNo);
             }
-            catch(...) { 
+            catch(...) {
                 if( cc().database() && nsdetails(ns.c_str()) == d ) {
                     assert( idxNo == d->nIndexes );
                     done(ns.c_str(), d);
@@ -1246,25 +1306,51 @@ namespace mongo {
         }
     };
 
+    /**
+     * For the lifetime of this object, an index build is indicated on the specified
+     * namespace and the newest index is marked as absent.  This simplifies
+     * the cleanup required on recovery.
+     */
+    class RecoverableIndexState {
+    public:
+        RecoverableIndexState( NamespaceDetails *d ) : _d( d ) {
+            indexBuildInProgress() = 1;
+            nIndexes()--;
+        }
+        ~RecoverableIndexState() {
+            DESTRUCTOR_GUARD (
+                nIndexes()++;
+                indexBuildInProgress() = 0;
+            )
+        }
+    private:
+        int &nIndexes() { return getDur().writingInt( _d->nIndexes ); }
+        int &indexBuildInProgress() { return getDur().writingInt( _d->indexBuildInProgress ); }
+        NamespaceDetails *_d;
+    };
+
     // throws DBException
-    static void buildAnIndex(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo, bool background) { 
+    static void buildAnIndex(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo, bool background) {
         tlog() << "building new index on " << idx.keyPattern() << " for " << ns << ( background ? " background" : "" ) << endl;
         Timer t;
-		unsigned long long n;
+        unsigned long long n;
 
         if( background ) {
             log(2) << "buildAnIndex: background=true\n";
         }
 
         assert( !BackgroundOperation::inProgForNs(ns.c_str()) ); // should have been checked earlier, better not be...
+        assert( d->indexBuildInProgress == 0 );
+        assertInWriteLock();
+        RecoverableIndexState recoverable( d );
         if( inDBRepair || !background ) {
-			n = fastBuildIndex(ns.c_str(), d, idx, idxNo);
-			assert( !idx.head.isNull() );
-		}
-		else {
+            n = fastBuildIndex(ns.c_str(), d, idx, idxNo);
+            assert( !idx.head.isNull() );
+        }
+        else {
             BackgroundIndexBuildJob j(ns.c_str());
             n = j.go(ns, d, idx, idxNo);
-		}
+        }
         tlog() << "done for " << n << " records " << t.millis() / 1000.0 << "secs" << endl;
     }
 
@@ -1272,20 +1358,20 @@ namespace mongo {
     static void indexRecord(NamespaceDetails *d, BSONObj obj, DiskLoc loc) {
         int n = d->nIndexesBeingBuilt();
         for ( int i = 0; i < n; i++ ) {
-            try { 
+            try {
                 bool unique = d->idx(i).unique();
                 _indexRecord(d, i, obj, loc, /*dupsAllowed*/!unique);
             }
-            catch( DBException& ) { 
+            catch( DBException& ) {
                 /* try to roll back previously added index entries
                    note <= i (not < i) is important here as the index we were just attempted
                    may be multikey and require some cleanup.
                 */
-                for( int j = 0; j <= i; j++ ) { 
+                for( int j = 0; j <= i; j++ ) {
                     try {
                         _unindexRecord(d->idx(j), obj, loc, false);
                     }
-                    catch(...) { 
+                    catch(...) {
                         log(3) << "unindex fails on rollback after unique failure\n";
                     }
                 }
@@ -1301,7 +1387,7 @@ namespace mongo {
         if ( d == 0 || (d->flags & NamespaceDetails::Flag_HaveIdIndex) )
             return;
 
-        d->flags |= NamespaceDetails::Flag_HaveIdIndex;
+        *getDur().writing(&d->flags) |= NamespaceDetails::Flag_HaveIdIndex;
 
         {
             NamespaceDetails::IndexIterator i = d->ii();
@@ -1324,7 +1410,7 @@ namespace mongo {
     }
 
 #pragma pack(1)
-    struct IDToInsert_ { 
+    struct IDToInsert_ {
         char type;
         char _id[4];
         OID oid;
@@ -1338,13 +1424,13 @@ namespace mongo {
         IDToInsert() : BSONElement( ( char * )( &idToInsert_ ) ) {}
     } idToInsert;
 #pragma pack()
-    
+
     void DataFileMgr::insertAndLog( const char *ns, const BSONObj &o, bool god ) {
         BSONObj tmp = o;
         insertWithObjMod( ns, tmp, god );
         logOp( "i", ns, tmp );
     }
-    
+
     DiskLoc DataFileMgr::insertWithObjMod(const char *ns, BSONObj &o, bool god) {
         DiskLoc loc = insert( ns, o.objdata(), o.objsize(), god );
         if ( !loc.isNull() )
@@ -1356,12 +1442,12 @@ namespace mongo {
         insert( ns, o.objdata(), o.objsize(), god );
     }
 
-    bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection);
+    bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection, BSONObj& fixedIndexObject );
 
     // We are now doing two btree scans for all unique indexes (one here, and one when we've
     // written the record to the collection.  This could be made more efficient inserting
     // dummy data here, keeping pointers to the btree nodes holding the dummy data and then
-    // updating the dummy data with the DiskLoc of the real record.    
+    // updating the dummy data with the DiskLoc of the real record.
     void checkNoIndexConflicts( NamespaceDetails *d, const BSONObj &obj ) {
         for ( int idxNo = 0; idxNo < d->nIndexes; idxNo++ ) {
             if( d->idx(idxNo).unique() ) {
@@ -1371,19 +1457,19 @@ namespace mongo {
                 BSONObj order = idx.keyPattern();
                 for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) {
                     uassert( 12582, "duplicate key insert for unique index of capped collection",
-                            idx.head.btree()->findSingle(idx, idx.head, *i ).isNull() );
+                             idx.head.btree()->findSingle(idx, idx.head, *i ).isNull() );
                 }
             }
-        }        
+        }
     }
 
-    /* note: if god==true, you may pass in obuf of NULL and then populate the returned DiskLoc 
+    /* note: if god==true, you may pass in obuf of NULL and then populate the returned DiskLoc
              after the call -- that will prevent a double buffer copy in some cases (btree.cpp).
     */
     DiskLoc DataFileMgr::insert(const char *ns, const void *obuf, int len, bool god, const BSONElement &writeId, bool mayAddIndex) {
         bool wouldAddIndex = false;
-        massert( 10093 , "cannot insert into reserved $ collection", god || nsDollarCheck( ns ) );
-        uassert( 10094 , "invalid ns", strchr( ns , '.' ) > 0 );
+        massert( 10093 , "cannot insert into reserved $ collection", god || isANormalNSName( ns ) );
+        uassert( 10094 , str::stream() << "invalid ns: " << ns , isValidNS( ns ) );
         const char *sys = strstr(ns, "system.");
         if ( sys ) {
             uassert( 10095 , "attempt to insert in reserved database name 'system'", sys != ns);
@@ -1411,7 +1497,7 @@ namespace mongo {
                also if this is an addIndex, those checks should happen before this!
             */
             // This may create first file in the database.
-            cc().database()->allocExtent(ns, initialExtentSize(len), false);
+            cc().database()->allocExtent(ns, Extent::initialSize(len), false);
             d = nsdetails(ns);
             if ( !god )
                 ensureIdIndexForNewNs(ns);
@@ -1421,17 +1507,24 @@ namespace mongo {
         NamespaceDetails *tableToIndex = 0;
 
         string tabletoidxns;
+        BSONObj fixedIndexObject;
         if ( addIndex ) {
             assert( obuf );
             BSONObj io((const char *) obuf);
-            if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex) )
+            if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex, fixedIndexObject ) )
                 return DiskLoc();
+
+            if ( ! fixedIndexObject.isEmpty() ) {
+                obuf = fixedIndexObject.objdata();
+                len = fixedIndexObject.objsize();
+            }
+
         }
 
         const BSONElement *newId = &writeId;
         int addID = 0;
         if( !god ) {
-            /* Check if we have an _id field. If we don't, we'll add it. 
+            /* Check if we have an _id field. If we don't, we'll add it.
                Note that btree buckets which we insert aren't BSONObj's, but in that case god==true.
             */
             BSONObj io((const char *) obuf);
@@ -1446,7 +1539,7 @@ namespace mongo {
                 }
                 len += newId->size();
             }
-            
+
             BSONElementManipulator::lookForTimestamps( io );
         }
 
@@ -1456,28 +1549,28 @@ namespace mongo {
         if ( lenWHdr == 0 ) {
             // old datafiles, backward compatible here.
             assert( d->paddingFactor == 0 );
-            d->paddingFactor = 1.0;
+            *getDur().writing(&d->paddingFactor) = 1.0;
             lenWHdr = len + Record::HeaderSize;
         }
-        
+
         // If the collection is capped, check if the new object will violate a unique index
         // constraint before allocating space.
         if ( d->nIndexes && d->capped && !god ) {
             checkNoIndexConflicts( d, BSONObj( reinterpret_cast<const char *>( obuf ) ) );
         }
-        
+
         DiskLoc loc = d->alloc(ns, lenWHdr, extentLoc);
         if ( loc.isNull() ) {
             // out of space
             if ( d->capped == 0 ) { // size capped doesn't grow
                 log(1) << "allocating new extent for " << ns << " padding:" << d->paddingFactor << " lenWHdr: " << lenWHdr << endl;
-                cc().database()->allocExtent(ns, followupExtentSize(lenWHdr, d->lastExtentSize), false);
+                cc().database()->allocExtent(ns, Extent::followupSize(lenWHdr, d->lastExtentSize), false);
                 loc = d->alloc(ns, lenWHdr, extentLoc);
-                if ( loc.isNull() ){
+                if ( loc.isNull() ) {
                     log() << "WARNING: alloc() failed after allocating new extent. lenWHdr: " << lenWHdr << " last extent size:" << d->lastExtentSize << "; trying again\n";
-                    for ( int zzz=0; zzz<10 && lenWHdr > d->lastExtentSize; zzz++ ){
+                    for ( int zzz=0; zzz<10 && lenWHdr > d->lastExtentSize; zzz++ ) {
                         log() << "try #" << zzz << endl;
-                        cc().database()->allocExtent(ns, followupExtentSize(len, d->lastExtentSize), false);
+                        cc().database()->allocExtent(ns, Extent::followupSize(len, d->lastExtentSize), false);
                         loc = d->alloc(ns, lenWHdr, extentLoc);
                         if ( ! loc.isNull() )
                             break;
@@ -1492,45 +1585,55 @@ namespace mongo {
         }
 
         Record *r = loc.rec();
-        assert( r->lengthWithHeaders >= lenWHdr );
-        if( addID ) { 
-            /* a little effort was made here to avoid a double copy when we add an ID */
-            ((int&)*r->data) = *((int*) obuf) + newId->size();
-            memcpy(r->data+4, newId->rawdata(), newId->size());
-            memcpy(r->data+4+newId->size(), ((char *)obuf)+4, addID-4);
-        }
-        else {
-            if( obuf )
-                memcpy(r->data, obuf, len);
-        }
-        Extent *e = r->myExtent(loc);
-        if ( e->lastRecord.isNull() ) {
-            e->firstRecord = e->lastRecord = loc;
-            r->prevOfs = r->nextOfs = DiskLoc::NullOfs;
+        {
+            assert( r->lengthWithHeaders >= lenWHdr );
+            r = (Record*) getDur().writingPtr(r, lenWHdr);
+            if( addID ) {
+                /* a little effort was made here to avoid a double copy when we add an ID */
+                ((int&)*r->data) = *((int*) obuf) + newId->size();
+                memcpy(r->data+4, newId->rawdata(), newId->size());
+                memcpy(r->data+4+newId->size(), ((char *)obuf)+4, addID-4);
+            }
+            else {
+                if( obuf )
+                    memcpy(r->data, obuf, len);
+            }
         }
-        else {
 
-            Record *oldlast = e->lastRecord.rec();
-            r->prevOfs = e->lastRecord.getOfs();
-            r->nextOfs = DiskLoc::NullOfs;
-            oldlast->nextOfs = loc.getOfs();
-            e->lastRecord = loc;
+        {
+            Extent *e = r->myExtent(loc);
+            if ( e->lastRecord.isNull() ) {
+                Extent::FL *fl = getDur().writing(e->fl());
+                fl->firstRecord = fl->lastRecord = loc;
+                r->prevOfs = r->nextOfs = DiskLoc::NullOfs;
+            }
+            else {
+                Record *oldlast = e->lastRecord.rec();
+                r->prevOfs = e->lastRecord.getOfs();
+                r->nextOfs = DiskLoc::NullOfs;
+                getDur().writingInt(oldlast->nextOfs) = loc.getOfs();
+                getDur().writingDiskLoc(e->lastRecord) = loc;
+            }
         }
 
-        d->nrecords++;
-        d->datasize += r->netLength();
+        /* durability todo : this could be a bit annoying / slow to record constantly */
+        {
+            NamespaceDetails::Stats *s = getDur().writing(&d->stats);
+            s->datasize += r->netLength();
+            s->nrecords++;
+        }
 
         // we don't bother clearing those stats for the god tables - also god is true when adidng a btree bucket
         if ( !god )
             NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp();
-        
+
         if ( tableToIndex ) {
             uassert( 13143 , "can't create index on system.indexes" , tabletoidxns.find( ".system.indexes" ) == string::npos );
 
             BSONObj info = loc.obj();
             bool background = info["background"].trueValue();
-            if( background && cc().isSyncThread() ) { 
-                /* don't do background indexing on slaves.  there are nuances.  this could be added later 
+            if( background && cc().isSyncThread() ) {
+                /* don't do background indexing on slaves.  there are nuances.  this could be added later
                    but requires more code.
                    */
                 log() << "info: indexing in foreground on this replica; was a background index build on the primary" << endl;
@@ -1539,10 +1642,11 @@ namespace mongo {
 
             int idxNo = tableToIndex->nIndexes;
             IndexDetails& idx = tableToIndex->addIndex(tabletoidxns.c_str(), !background); // clear transient info caches so they refresh; increments nIndexes
-            idx.info = loc;
+            getDur().writingDiskLoc(idx.info) = loc;
             try {
                 buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo, background);
-            } catch( DBException& e ) {
+            }
+            catch( DBException& e ) {
                 // save our error msg string as an exception or dropIndexes will overwrite our message
                 LastError *le = lastError.get();
                 int savecode = 0;
@@ -1564,7 +1668,7 @@ namespace mongo {
                 if( !ok ) {
                     log() << "failed to drop index after a unique key error building it: " << errmsg << ' ' << tabletoidxns << ' ' << name << endl;
                 }
-                
+
                 assert( le && !saveerrmsg.empty() );
                 raiseError(savecode,saveerrmsg.c_str());
                 throw;
@@ -1573,20 +1677,20 @@ namespace mongo {
 
         /* add this record to our indexes */
         if ( d->nIndexes ) {
-            try { 
+            try {
                 BSONObj obj(r->data);
                 indexRecord(d, obj, loc);
-            } 
-            catch( AssertionException& e ) { 
+            }
+            catch( AssertionException& e ) {
                 // should be a dup key error on _id index
                 if( tableToIndex || d->capped ) {
                     massert( 12583, "unexpected index insertion failure on capped collection", !d->capped );
                     string s = e.toString();
                     s += " : on addIndex/capped - collection and its index will not match";
                     uassert_nothrow(s.c_str());
-                    log() << s << '\n';
+                    error() << s << endl;
                 }
-                else { 
+                else {
                     // normal case -- we can roll back
                     _deleteRecord(d, ns, r, loc);
                     throw;
@@ -1594,7 +1698,7 @@ namespace mongo {
             }
         }
 
-        //	out() << "   inserted at loc:" << hex << loc.getOfs() << " lenwhdr:" << hex << lenWHdr << dec << ' ' << ns << endl;
+        //  out() << "   inserted at loc:" << hex << loc.getOfs() << " lenwhdr:" << hex << lenWHdr << dec << ' ' << ns << endl;
         return loc;
     }
 
@@ -1619,18 +1723,27 @@ namespace mongo {
 
         Extent *e = r->myExtent(loc);
         if ( e->lastRecord.isNull() ) {
-            e->firstRecord = e->lastRecord = loc;
-            r->prevOfs = r->nextOfs = DiskLoc::NullOfs;
+            Extent::FL *fl = getDur().writing( e->fl() );
+            fl->firstRecord = fl->lastRecord = loc;
+
+            Record::NP *np = getDur().writing(r->np());
+            np->nextOfs = np->prevOfs = DiskLoc::NullOfs;
         }
         else {
             Record *oldlast = e->lastRecord.rec();
-            r->prevOfs = e->lastRecord.getOfs();
-            r->nextOfs = DiskLoc::NullOfs;
-            oldlast->nextOfs = loc.getOfs();
-            e->lastRecord = loc;
+            Record::NP *np = getDur().writing(r->np());
+            np->prevOfs = e->lastRecord.getOfs();
+            np->nextOfs = DiskLoc::NullOfs;
+            getDur().writingInt( oldlast->nextOfs ) = loc.getOfs();
+            e->lastRecord.writing() = loc;
         }
 
-        d->nrecords++;
+        /* todo: don't update for oplog?  seems wasteful. */
+        {
+            NamespaceDetails::Stats *s = getDur().writing(&d->stats);
+            s->datasize += r->netLength();
+            s->nrecords++;
+        }
 
         return r;
     }
@@ -1641,7 +1754,7 @@ namespace mongo {
 
 namespace mongo {
 
-    void dropAllDatabasesExceptLocal() { 
+    void dropAllDatabasesExceptLocal() {
         writelock lk("");
 
         vector<string> n;
@@ -1658,14 +1771,17 @@ namespace mongo {
 
     void dropDatabase(string db) {
         log(1) << "dropDatabase " << db << endl;
-        assert( cc().database() );
-        assert( cc().database()->name == db );
+        Database *d = cc().database();
+        assert( d );
+        assert( d->name == db );
 
-        BackgroundOperation::assertNoBgOpInProgForDb(db.c_str());
+        BackgroundOperation::assertNoBgOpInProgForDb(d->name.c_str());
 
-        Client::invalidateDB( db );
+        getDur().syncDataAndTruncateJournal();
+
+        Database::closeDatabase( d->name.c_str(), d->path );
+        d = 0; // d is now deleted
 
-        closeDatabase( db.c_str() );
         _deleteDataFiles( db.c_str() );
     }
 
@@ -1674,13 +1790,14 @@ namespace mongo {
     void boostRenameWrapper( const Path &from, const Path &to ) {
         try {
             boost::filesystem::rename( from, to );
-        } catch ( const boost::filesystem::filesystem_error & ) {
+        }
+        catch ( const boost::filesystem::filesystem_error & ) {
             // boost rename doesn't work across partitions
             boost::filesystem::copy_file( from, to);
             boost::filesystem::remove( from );
         }
     }
-    
+
     // back up original database files to 'temp' dir
     void _renameForBackup( const char *database, const Path &reservedPath ) {
         Path newPath( reservedPath );
@@ -1738,7 +1855,8 @@ namespace mongo {
             ss << prefix << "_repairDatabase_" << i++;
             reservedPath = repairPath / ss.str();
             BOOST_CHECK_EXCEPTION( exists = boost::filesystem::exists( reservedPath ) );
-        } while ( exists );
+        }
+        while ( exists );
         return reservedPath;
     }
 
@@ -1790,12 +1908,15 @@ namespace mongo {
         stringstream ss;
         ss << "localhost:" << cmdLine.port;
         string localhost = ss.str();
-        
+
         problem() << "repairDatabase " << dbName << endl;
         assert( cc().database()->name == dbName );
+        assert( cc().database()->path == dbpath );
 
         BackgroundOperation::assertNoBgOpInProgForDb(dbName);
 
+        getDur().syncDataAndTruncateJournal(); // Must be done before and after repair
+
         boost::intmax_t totalSize = dbSize( dbName );
         boost::intmax_t freeSize = freeSpace( repairpath );
         if ( freeSize > -1 && freeSize < totalSize ) {
@@ -1812,30 +1933,37 @@ namespace mongo {
                                 "backup" : "$tmp" );
         BOOST_CHECK_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
         string reservedPathString = reservedPath.native_directory_string();
-        
+
         bool res;
-        { // clone to temp location, which effectively does repair
+        {
+            // clone to temp location, which effectively does repair
             Client::Context ctx( dbName, reservedPathString );
             assert( ctx.justCreated() );
-            
-            res = cloneFrom(localhost.c_str(), errmsg, dbName, 
-                                 /*logForReplication=*/false, /*slaveok*/false, /*replauth*/false, /*snapshot*/false);
-            closeDatabase( dbName, reservedPathString.c_str() );
+
+            res = cloneFrom(localhost.c_str(), errmsg, dbName,
+                            /*logForReplication=*/false, /*slaveok*/false, /*replauth*/false, /*snapshot*/false);
+            Database::closeDatabase( dbName, reservedPathString.c_str() );
         }
 
         if ( !res ) {
             problem() << "clone failed for " << dbName << " with error: " << errmsg << endl;
             if ( !preserveClonedFilesOnFailure )
                 BOOST_CHECK_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );
+
+            getDur().syncDataAndTruncateJournal(); // Must be done before and after repair
+
             return false;
         }
 
+        MongoFile::flushAll(true);
+
         Client::Context ctx( dbName );
-        closeDatabase( dbName );
+        Database::closeDatabase( dbName, dbpath );
 
         if ( backupOriginalFiles ) {
             _renameForBackup( dbName, reservedPath );
-        } else {
+        }
+        else {
             _deleteDataFiles( dbName );
             BOOST_CHECK_EXCEPTION( boost::filesystem::create_directory( Path( dbpath ) / dbName ) );
         }
@@ -1845,12 +1973,14 @@ namespace mongo {
         if ( !backupOriginalFiles )
             BOOST_CHECK_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );
 
+        getDur().syncDataAndTruncateJournal(); // Must be done before and after repair
+
         return true;
     }
 
     void _applyOpToDataFiles( const char *database, FileOp &fo, bool afterAllocator, const string& path ) {
         if ( afterAllocator )
-            theFileAllocator().waitUntilFinished();
+            FileAllocator::get()->waitUntilFinished();
         string c = database;
         c += '.';
         boost::filesystem::path p(path);
@@ -1871,8 +2001,8 @@ namespace mongo {
             q = p / ss.str();
             BOOST_CHECK_EXCEPTION( ok = fo.apply(q) );
             if ( ok ) {
-                if ( extra != 10 ){
-                    log(1) << fo.op() << " file " << q.string() << '\n';
+                if ( extra != 10 ) {
+                    log(1) << fo.op() << " file " << q.string() << endl;
                     log() << "  _applyOpToDataFiles() warning: extra == " << extra << endl;
                 }
             }
@@ -1883,19 +2013,20 @@ namespace mongo {
     }
 
     NamespaceDetails* nsdetails_notinline(const char *ns) { return nsdetails(ns); }
-    
-    bool DatabaseHolder::closeAll( const string& path , BSONObjBuilder& result , bool force ){
+
+    bool DatabaseHolder::closeAll( const string& path , BSONObjBuilder& result , bool force ) {
         log() << "DatabaseHolder::closeAll path:" << path << endl;
         dbMutex.assertWriteLocked();
-        
+
         map<string,Database*>& m = _paths[path];
         _size -= m.size();
-        
+
         set< string > dbs;
         for ( map<string,Database*>::iterator i = m.begin(); i != m.end(); i++ ) {
+            wassert( i->second->path == path );
             dbs.insert( i->first );
         }
-        
+
         currentClient.get()->getContext()->clear();
 
         BSONObjBuilder bb( result.subarrayStart( "dbs" ) );
@@ -1910,7 +2041,7 @@ namespace mongo {
                 nNotClosed++;
             }
             else {
-                closeDatabase( name.c_str() , path );
+                Database::closeDatabase( name.c_str() , path );
                 bb.append( bb.numStr( n++ ) , name );
             }
         }
@@ -1923,6 +2054,17 @@ namespace mongo {
 
         return true;
     }
-    
+
+    bool isValidNS( const StringData& ns ) {
+        // TODO: should check for invalid characters
+
+        const char * x = strchr( ns.data() , '.' );
+        if ( ! x )
+            return false;
+
+        x++;
+        return *x > 0;
+    }
+
 
 } // namespace mongo
diff --git a/db/pdfile.h b/db/pdfile.h
index d268aac..91f4877 100644
--- a/db/pdfile.h
+++ b/db/pdfile.h
@@ -29,8 +29,9 @@
 #include "../util/mmap.h"
 #include "diskloc.h"
 #include "jsobjmanipulator.h"
-#include "namespace.h"
+#include "namespace-inl.h"
 #include "client.h"
+#include "mongommf.h"
 
 namespace mongo {
 
@@ -45,53 +46,60 @@ namespace mongo {
 
     /* low level - only drops this ns */
     void dropNS(const string& dropNs);
-    
+
     /* deletes this ns, indexes and cursors */
-    void dropCollection( const string &name, string &errmsg, BSONObjBuilder &result ); 
+    void dropCollection( const string &name, string &errmsg, BSONObjBuilder &result );
     bool userCreateNS(const char *ns, BSONObj j, string& err, bool logForReplication, bool *deferIdIndex = 0);
     shared_ptr<Cursor> findTableScan(const char *ns, const BSONObj& order, const DiskLoc &startLoc=DiskLoc());
 
-// -1 if library unavailable.
+    // -1 if library unavailable.
     boost::intmax_t freeSpace( const string &path = dbpath );
 
+    bool isValidNS( const StringData& ns );
+
     /*---------------------------------------------------------------------*/
 
     class MongoDataFile {
         friend class DataFileMgr;
         friend class BasicCursor;
     public:
-        MongoDataFile(int fn) : fileNo(fn) { }
+        MongoDataFile(int fn) : _mb(0), fileNo(fn) { }
         void open(const char *filename, int requestedDataSize = 0, bool preallocateOnly = false);
 
-        /* allocate a new extent from this datafile. 
+        /* allocate a new extent from this datafile.
            @param capped - true if capped collection
            @param loops is our recursion check variable - you want to pass in zero
         */
         Extent* createExtent(const char *ns, int approxSize, bool capped = false, int loops = 0);
 
-        DataFileHeader *getHeader() {
-            return header;
-        }
+        DataFileHeader *getHeader() { return header(); }
+
+        unsigned long long length() const { return mmf.length(); }
 
         /* return max size an extent may be */
         static int maxSize();
-        
+
+        /** fsync */
         void flush( bool sync );
-        
+
+        /** only use fore debugging */
+        Extent* debug_getExtent(DiskLoc loc) { return _getExtent( loc ); }
     private:
         void badOfs(int) const;
-
+        void badOfs2(int) const;
         int defaultSize( const char *filename ) const;
 
-        Extent* getExtent(DiskLoc loc);
-        Extent* _getExtent(DiskLoc loc);
+        Extent* getExtent(DiskLoc loc) const;
+        Extent* _getExtent(DiskLoc loc) const;
         Record* recordAt(DiskLoc dl);
         Record* makeRecord(DiskLoc dl, int size);
-		void grow(DiskLoc dl, int size);
+        void grow(DiskLoc dl, int size);
 
-        MMF mmf;
-        MMF::Pointer _p;
-        DataFileHeader *header;
+        char* p() const { return (char *) _mb; }
+        DataFileHeader* header() { return (DataFileHeader*) _mb; }
+
+        MongoMMF mmf;
+        void *_mb; // the memory mapped view
         int fileNo;
     };
 
@@ -110,9 +118,9 @@ namespace mongo {
             NamespaceDetails *d,
             NamespaceDetailsTransient *nsdt,
             Record *toupdate, const DiskLoc& dl,
-            const char *buf, int len, OpDebug& debug, bool &changedId, bool god=false);
+            const char *buf, int len, OpDebug& debug, bool god=false);
 
-        // The object o may be updated if modified on insert.                                
+        // The object o may be updated if modified on insert.
         void insertAndLog( const char *ns, const BSONObj &o, bool god = false );
 
         /** @param obj both and in and out param -- insert can sometimes modify an object (such as add _id). */
@@ -122,7 +130,6 @@ namespace mongo {
         void insertNoReturnVal(const char *ns, BSONObj o, bool god = false);
 
         DiskLoc insert(const char *ns, const void *buf, int len, bool god = false, const BSONElement &writeId = BSONElement(), bool mayAddIndex = true);
-        void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false);
         static shared_ptr<Cursor> findAll(const char *ns, const DiskLoc &startLoc = DiskLoc());
 
         /* special version of insert for transaction logging -- streamlined a bit.
@@ -134,9 +141,10 @@ namespace mongo {
         static Extent* getExtent(const DiskLoc& dl);
         static Record* getRecord(const DiskLoc& dl);
         static DeletedRecord* makeDeletedRecord(const DiskLoc& dl, int len);
-		static void grow(const DiskLoc& dl, int len);
 
-        /* does not clean up indexes, etc. : just deletes the record in the pdfile. */
+        void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false);
+
+        /* does not clean up indexes, etc. : just deletes the record in the pdfile. use deleteRecord() to unindex */
         void _deleteRecord(NamespaceDetails *d, const char *ns, Record *todelete, const DiskLoc& dl);
 
     private:
@@ -175,7 +183,10 @@ namespace mongo {
         int extentOfs;
         int nextOfs;
         int prevOfs;
+
+        /** be careful when referencing this that your write intent was correct */
         char data[4];
+
         int netLength() {
             return lengthWithHeaders - HeaderSize;
         }
@@ -192,6 +203,12 @@ namespace mongo {
         /* get the next record in the namespace, traversing extents as necessary */
         DiskLoc getNext(const DiskLoc& myLoc);
         DiskLoc getPrev(const DiskLoc& myLoc);
+
+        struct NP {
+            int nextOfs;
+            int prevOfs;
+        };
+        NP* np() { return (NP*) &nextOfs; }
     };
 
     /* extents are datafile regions where all the records within the region
@@ -206,13 +223,14 @@ namespace mongo {
         DiskLoc myLoc;
         DiskLoc xnext, xprev; /* next/prev extent for this namespace */
 
-        /* which namespace this extent is for.  this is just for troubleshooting really 
+        /* which namespace this extent is for.  this is just for troubleshooting really
            and won't even be correct if the collection were renamed!
         */
-        Namespace nsDiagnostic; 
+        Namespace nsDiagnostic;
 
         int length;   /* size of the extent, including these fields */
-        DiskLoc firstRecord, lastRecord;
+        DiskLoc firstRecord;
+        DiskLoc lastRecord;
         char _extentData[4];
 
         static int HeaderSize() { return sizeof(Extent)-4; }
@@ -224,7 +242,7 @@ namespace mongo {
 
         void dump(iostream& s) {
             s << "    loc:" << myLoc.toString() << " xnext:" << xnext.toString() << " xprev:" << xprev.toString() << '\n';
-            s << "    nsdiag:" << nsDiagnostic.buf << '\n';
+            s << "    nsdiag:" << nsDiagnostic.toString() << '\n';
             s << "    size:" << length << " firstRecord:" << firstRecord.toString() << " lastRecord:" << lastRecord.toString() << '\n';
         }
 
@@ -237,9 +255,8 @@ namespace mongo {
         /* like init(), but for a reuse case */
         DiskLoc reuse(const char *nsname);
 
-        void assertOk() {
-            assert(magic == 0x41424344);
-        }
+        bool isOk() const { return magic == 0x41424344; }
+        void assertOk() const { assert(isOk()); }
 
         Record* newRecord(int len);
 
@@ -251,19 +268,38 @@ namespace mongo {
             return (Record *) (((char *) this) + x);
         }
 
-        Extent* getNextExtent() {
-            return xnext.isNull() ? 0 : DataFileMgr::getExtent(xnext);
-        }
-        Extent* getPrevExtent() {
-            return xprev.isNull() ? 0 : DataFileMgr::getExtent(xprev);
-        }
-        
+        Extent* getNextExtent() { return xnext.isNull() ? 0 : DataFileMgr::getExtent(xnext); }
+        Extent* getPrevExtent() { return xprev.isNull() ? 0 : DataFileMgr::getExtent(xprev); }
+
         static int maxSize();
+        static int minSize() { return 0x100; }
+        /**
+         * @param len lengt of record we need
+         * @param lastRecord size of last extent which is a factor in next extent size
+         */
+        static int followupSize(int len, int lastExtentLen);
+
+        /**
+         * @param len lengt of record we need
+         */
+        static int initialSize(int len);
+
+        struct FL {
+            DiskLoc firstRecord;
+            DiskLoc lastRecord;
+        };
+        /** often we want to update just the firstRecord and lastRecord fields.
+            this helper is for that -- for use with getDur().writing() method
+        */
+        FL* fl() { return (FL*) &firstRecord; }
+    private:
+        DiskLoc _reuse(const char *nsname);
     };
 
-    /*
+    /*  a datafile - i.e. the "dbname.<#>" files :
+
           ----------------------
-          Header
+          DataFileHeader
           ----------------------
           Extent (for a particular namespace)
             Record
@@ -273,7 +309,6 @@ namespace mongo {
           more Extents...
           ----------------------
     */
-
     class DataFileHeader {
     public:
         int version;
@@ -287,35 +322,27 @@ namespace mongo {
 
         enum { HeaderSize = 8192 };
 
-        bool currentVersion() const {
-            return ( version == VERSION ) && ( versionMinor == VERSION_MINOR );
-        }
-
-        bool uninitialized() const {
-            if ( version == 0 ) return true;
-            return false;
-        }
+        bool isCurrentVersion() const { return ( version == VERSION ) && ( versionMinor == VERSION_MINOR ); }
 
-        /*Record* __getRecord(DiskLoc dl) {
-            int ofs = dl.getOfs();
-            assert( ofs >= HeaderSize );
-            return (Record*) (((char *) this) + ofs);
-        }*/
+        bool uninitialized() const { return version == 0; }
 
-        void init(int fileno, int filelength) {
+        void init(int fileno, int filelength, const char* filename) {
             if ( uninitialized() ) {
-                assert(filelength > 32768 );
+                if( !(filelength > 32768 ) ) { 
+                    massert(13640, str::stream() << "DataFileHeader looks corrupt at file open filelength:" << filelength << " fileno:" << fileno, false);
+                }
+                getDur().createdFile(filename, filelength);
                 assert( HeaderSize == 8192 );
-                fileLength = filelength;
-                version = VERSION;
-                versionMinor = VERSION_MINOR;
-                unused.setOfs( fileno, HeaderSize );
+                DataFileHeader *h = getDur().writing(this);
+                h->fileLength = filelength;
+                h->version = VERSION;
+                h->versionMinor = VERSION_MINOR;
+                h->unused.set( fileno, HeaderSize );
                 assert( (data-(char*)this) == HeaderSize );
-                unusedLength = fileLength - HeaderSize - 16;
-                //memcpy(data+unusedLength, "      \nthe end\n", 16);
+                h->unusedLength = fileLength - HeaderSize - 16;
             }
         }
-        
+
         bool isEmpty() const {
             return uninitialized() || ( unusedLength == fileLength - HeaderSize - 16 );
         }
@@ -323,13 +350,13 @@ namespace mongo {
 
 #pragma pack()
 
-    inline Extent* MongoDataFile::_getExtent(DiskLoc loc) {
+    inline Extent* MongoDataFile::_getExtent(DiskLoc loc) const {
         loc.assertOk();
-        Extent *e = (Extent *) _p.at(loc.getOfs(), Extent::HeaderSize());
+        Extent *e = (Extent *) (p()+loc.getOfs());
         return e;
     }
 
-    inline Extent* MongoDataFile::getExtent(DiskLoc loc) {
+    inline Extent* MongoDataFile::getExtent(DiskLoc loc) const {
         Extent *e = _getExtent(loc);
         e->assertOk();
         return e;
@@ -344,18 +371,13 @@ namespace mongo {
     inline Record* MongoDataFile::recordAt(DiskLoc dl) {
         int ofs = dl.getOfs();
         if( ofs < DataFileHeader::HeaderSize ) badOfs(ofs); // will uassert - external call to keep out of the normal code path
-        return (Record*) _p.at(ofs, -1);
+        return (Record*) (p()+ofs);
     }
 
-	inline void MongoDataFile::grow(DiskLoc dl, int size) { 
-        int ofs = dl.getOfs();
-        _p.grow(ofs, size);
-	}
-
-    inline Record* MongoDataFile::makeRecord(DiskLoc dl, int size) { 
+    inline Record* MongoDataFile::makeRecord(DiskLoc dl, int size) {
         int ofs = dl.getOfs();
-        assert( ofs >= DataFileHeader::HeaderSize );
-        return (Record*) _p.at(ofs, size);
+        if( ofs < DataFileHeader::HeaderSize ) badOfs(ofs); // will uassert - external call to keep out of the normal code path
+        return (Record*) (p()+ofs);
     }
 
     inline DiskLoc Record::getNext(const DiskLoc& myLoc) {
@@ -395,50 +417,23 @@ namespace mongo {
         return BSONObj(rec());
     }
     inline DeletedRecord* DiskLoc::drec() const {
-        assert( fileNo != -1 );
+        assert( _a != -1 );
         return (DeletedRecord*) rec();
     }
     inline Extent* DiskLoc::ext() const {
         return DataFileMgr::getExtent(*this);
     }
-
-    /*---------------------------------------------------------------------*/
+    inline const BtreeBucket* DiskLoc::btree() const {
+        assert( _a != -1 );
+        return (const BtreeBucket *) rec()->data;
+    }
 
 } // namespace mongo
 
-#include "rec.h"
 #include "database.h"
 
 namespace mongo {
 
-    // Heritable class to implement an operation that may be applied to all
-    // files in a database using _applyOpToDataFiles()
-    class FileOp {
-    public:
-        virtual ~FileOp() {}
-        // Return true if file exists and operation successful
-        virtual bool apply( const boost::filesystem::path &p ) = 0;
-        virtual const char * op() const = 0;
-    };
-
-    void _applyOpToDataFiles( const char *database, FileOp &fo, bool afterAllocator = false, const string& path = dbpath );
-
-    inline void _deleteDataFiles(const char *database) {
-        if ( directoryperdb ) {
-            BOOST_CHECK_EXCEPTION( boost::filesystem::remove_all( boost::filesystem::path( dbpath ) / database ) );
-            return;
-        }
-        class : public FileOp {
-            virtual bool apply( const boost::filesystem::path &p ) {
-                return boost::filesystem::remove( p );
-            }
-            virtual const char * op() const {
-                return "remove";
-            }
-        } deleter;
-        _applyOpToDataFiles( database, deleter, true );
-    }
-
     boost::intmax_t dbSize( const char *database );
 
     inline NamespaceIndex* nsindex(const char *ns) {
@@ -462,11 +457,6 @@ namespace mongo {
         return nsindex(ns)->details(ns);
     }
 
-    inline MongoDataFile& DiskLoc::pdf() const {
-        assert( fileNo != -1 );
-        return *cc().database()->getFile(fileNo);
-    }
-
     inline Extent* DataFileMgr::getExtent(const DiskLoc& dl) {
         assert( dl.a() != -1 );
         return cc().database()->getFile(dl.a())->getExtent(dl);
@@ -477,30 +467,30 @@ namespace mongo {
         return cc().database()->getFile(dl.a())->recordAt(dl);
     }
 
-	BOOST_STATIC_ASSERT( 16 == sizeof(DeletedRecord) );
-
-	inline void DataFileMgr::grow(const DiskLoc& dl, int len) { 
-        assert( dl.a() != -1 );
-        cc().database()->getFile(dl.a())->grow(dl, len);
-	}
+    BOOST_STATIC_ASSERT( 16 == sizeof(DeletedRecord) );
 
-    inline DeletedRecord* DataFileMgr::makeDeletedRecord(const DiskLoc& dl, int len) { 
+    inline DeletedRecord* DataFileMgr::makeDeletedRecord(const DiskLoc& dl, int len) {
         assert( dl.a() != -1 );
         return (DeletedRecord*) cc().database()->getFile(dl.a())->makeRecord(dl, sizeof(DeletedRecord));
     }
-    
+
     void ensureHaveIdIndex(const char *ns);
-    
+
     bool dropIndexes( NamespaceDetails *d, const char *ns, const char *name, string &errmsg, BSONObjBuilder &anObjBuilder, bool maydeleteIdIndex );
 
 
     /**
-     * @return true if ns is ok
+     * @return true if ns is 'normal'.  $ used for collections holding index data, which do not contain BSON objects in their records.
+     * special case for the local.oplog.$main ns -- naming it as such was a mistake.
      */
-    inline bool nsDollarCheck( const char* ns ){
+    inline bool isANormalNSName( const char* ns ) {
         if ( strchr( ns , '$' ) == 0 )
             return true;
-        
         return strcmp( ns, "local.oplog.$main" ) == 0;
     }
+
+    inline BSONObj::BSONObj(const Record *r) {
+        init(r->data, false);
+    }
+
 } // namespace mongo
diff --git a/db/projection.cpp b/db/projection.cpp
new file mode 100644
index 0000000..3dcfef7
--- /dev/null
+++ b/db/projection.cpp
@@ -0,0 +1,301 @@
+// projection.cpp
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#include "pch.h"
+#include "projection.h"
+#include "../util/mongoutils/str.h"
+
+namespace mongo {
+
+    void Projection::init( const BSONObj& o ) {
+        massert( 10371 , "can only add to Projection once", _source.isEmpty());
+        _source = o;
+
+        BSONObjIterator i( o );
+        int true_false = -1;
+        while ( i.more() ) {
+            BSONElement e = i.next();
+
+            if ( ! e.isNumber() )
+                _hasNonSimple = true;
+
+            if (e.type() == Object) {
+                BSONObj obj = e.embeddedObject();
+                BSONElement e2 = obj.firstElement();
+                if ( strcmp(e2.fieldName(), "$slice") == 0 ) {
+                    if (e2.isNumber()) {
+                        int i = e2.numberInt();
+                        if (i < 0)
+                            add(e.fieldName(), i, -i); // limit is now positive
+                        else
+                            add(e.fieldName(), 0, i);
+
+                    }
+                    else if (e2.type() == Array) {
+                        BSONObj arr = e2.embeddedObject();
+                        uassert(13099, "$slice array wrong size", arr.nFields() == 2 );
+
+                        BSONObjIterator it(arr);
+                        int skip = it.next().numberInt();
+                        int limit = it.next().numberInt();
+                        uassert(13100, "$slice limit must be positive", limit > 0 );
+                        add(e.fieldName(), skip, limit);
+
+                    }
+                    else {
+                        uassert(13098, "$slice only supports numbers and [skip, limit] arrays", false);
+                    }
+                }
+                else {
+                    uassert(13097, string("Unsupported projection option: ") + obj.firstElement().fieldName(), false);
+                }
+
+            }
+            else if (!strcmp(e.fieldName(), "_id") && !e.trueValue()) {
+                _includeID = false;
+
+            }
+            else {
+
+                add (e.fieldName(), e.trueValue());
+
+                // validate input
+                if (true_false == -1) {
+                    true_false = e.trueValue();
+                    _include = !e.trueValue();
+                }
+                else {
+                    uassert( 10053 , "You cannot currently mix including and excluding fields. Contact us if this is an issue." ,
+                             (bool)true_false == e.trueValue() );
+                }
+            }
+        }
+    }
+
+    void Projection::add(const string& field, bool include) {
+        if (field.empty()) { // this is the field the user referred to
+            _include = include;
+        }
+        else {
+            _include = !include;
+
+            const size_t dot = field.find('.');
+            const string subfield = field.substr(0,dot);
+            const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos));
+
+            boost::shared_ptr<Projection>& fm = _fields[subfield];
+            if (!fm)
+                fm.reset(new Projection());
+
+            fm->add(rest, include);
+        }
+    }
+
+    void Projection::add(const string& field, int skip, int limit) {
+        _special = true; // can't include or exclude whole object
+
+        if (field.empty()) { // this is the field the user referred to
+            _skip = skip;
+            _limit = limit;
+        }
+        else {
+            const size_t dot = field.find('.');
+            const string subfield = field.substr(0,dot);
+            const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos));
+
+            boost::shared_ptr<Projection>& fm = _fields[subfield];
+            if (!fm)
+                fm.reset(new Projection());
+
+            fm->add(rest, skip, limit);
+        }
+    }
+
+    void Projection::transform( const BSONObj& in , BSONObjBuilder& b ) const {
+        BSONObjIterator i(in);
+        while ( i.more() ) {
+            BSONElement e = i.next();
+            if ( mongoutils::str::equals( "_id" , e.fieldName() ) ) {
+                if ( _includeID )
+                    b.append( e );
+            }
+            else {
+                append( b , e );
+            }
+        }
+    }
+
+    BSONObj Projection::transform( const BSONObj& in ) const {
+        BSONObjBuilder b;
+        transform( in , b );
+        return b.obj();
+    }
+
+
+    //b will be the value part of an array-typed BSONElement
+    void Projection::appendArray( BSONObjBuilder& b , const BSONObj& a , bool nested) const {
+        int skip  = nested ?  0 : _skip;
+        int limit = nested ? -1 : _limit;
+
+        if (skip < 0) {
+            skip = max(0, skip + a.nFields());
+        }
+
+        int i=0;
+        BSONObjIterator it(a);
+        while (it.more()) {
+            BSONElement e = it.next();
+
+            if (skip) {
+                skip--;
+                continue;
+            }
+
+            if (limit != -1 && (limit-- == 0)) {
+                break;
+            }
+
+            switch(e.type()) {
+            case Array: {
+                BSONObjBuilder subb;
+                appendArray(subb , e.embeddedObject(), true);
+                b.appendArray(b.numStr(i++), subb.obj());
+                break;
+            }
+            case Object: {
+                BSONObjBuilder subb;
+                BSONObjIterator jt(e.embeddedObject());
+                while (jt.more()) {
+                    append(subb , jt.next());
+                }
+                b.append(b.numStr(i++), subb.obj());
+                break;
+            }
+            default:
+                if (_include)
+                    b.appendAs(e, b.numStr(i++));
+            }
+        }
+    }
+
+    void Projection::append( BSONObjBuilder& b , const BSONElement& e ) const {
+        FieldMap::const_iterator field = _fields.find( e.fieldName() );
+
+        if (field == _fields.end()) {
+            if (_include)
+                b.append(e);
+        }
+        else {
+            Projection& subfm = *field->second;
+
+            if ((subfm._fields.empty() && !subfm._special) || !(e.type()==Object || e.type()==Array) ) {
+                if (subfm._include)
+                    b.append(e);
+            }
+            else if (e.type() == Object) {
+                BSONObjBuilder subb;
+                BSONObjIterator it(e.embeddedObject());
+                while (it.more()) {
+                    subfm.append(subb, it.next());
+                }
+                b.append(e.fieldName(), subb.obj());
+
+            }
+            else { //Array
+                BSONObjBuilder subb;
+                subfm.appendArray(subb, e.embeddedObject());
+                b.appendArray(e.fieldName(), subb.obj());
+            }
+        }
+    }
+
+    Projection::KeyOnly* Projection::checkKey( const BSONObj& keyPattern ) const {
+        if ( _include ) {
+            // if we default to including then we can't
+            // use an index because we don't know what we're missing
+            return 0;
+        }
+
+        if ( _hasNonSimple )
+            return 0;
+
+        if ( _includeID && keyPattern["_id"].eoo() )
+            return 0;
+
+        // at this point we know its all { x : 1 } style
+
+        auto_ptr<KeyOnly> p( new KeyOnly() );
+
+        int got = 0;
+        BSONObjIterator i( keyPattern );
+        while ( i.more() ) {
+            BSONElement k = i.next();
+
+            if ( _source[k.fieldName()].type() ) {
+
+                if ( strchr( k.fieldName() , '.' ) ) {
+                    // TODO we currently don't support dotted fields
+                    //      SERVER-2104
+                    return 0;
+                }
+
+                if ( ! _includeID && mongoutils::str::equals( k.fieldName() , "_id" ) ) {
+                    p->addNo();
+                }
+                else {
+                    p->addYes( k.fieldName() );
+                    got++;
+                }
+            }
+            else if ( mongoutils::str::equals( "_id" , k.fieldName() ) && _includeID ) {
+                p->addYes( "_id" );
+            }
+            else {
+                p->addNo();
+            }
+
+        }
+
+        int need = _source.nFields();
+        if ( ! _includeID )
+            need--;
+
+        if ( got == need )
+            return p.release();
+
+        return 0;
+    }
+
+    BSONObj Projection::KeyOnly::hydrate( const BSONObj& key ) const {
+        assert( _include.size() == _names.size() );
+
+        BSONObjBuilder b( key.objsize() + _stringSize + 16 );
+
+        BSONObjIterator i(key);
+        unsigned n=0;
+        while ( i.more() ) {
+            assert( n < _include.size() );
+            BSONElement e = i.next();
+            if ( _include[n] ) {
+                b.appendAs( e , _names[n] );
+            }
+            n++;
+        }
+
+        return b.obj();
+    }
+}
diff --git a/db/projection.h b/db/projection.h
new file mode 100644
index 0000000..fd3b856
--- /dev/null
+++ b/db/projection.h
@@ -0,0 +1,127 @@
+// projection.h
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+#include "pch.h"
+#include "jsobj.h"
+
+namespace mongo {
+
+    /**
+     * given a document and a projection specification
+     * can transform the document
+     * currently supports specifying which fields and $slice
+     */
+    class Projection {
+    public:
+
+        class KeyOnly {
+        public:
+
+            KeyOnly() : _stringSize(0) {}
+
+            BSONObj hydrate( const BSONObj& key ) const;
+
+            void addNo() { _add( false , "" ); }
+            void addYes( const string& name ) { _add( true , name ); }
+
+        private:
+
+            void _add( bool b , const string& name ) {
+                _include.push_back( b );
+                _names.push_back( name );
+                _stringSize += name.size();
+            }
+
+            vector<bool> _include; // one entry per field in key.  true iff should be in output
+            vector<string> _names; // name of field since key doesn't have names
+
+            int _stringSize;
+        };
+
+        Projection() :
+            _include(true) ,
+            _special(false) ,
+            _includeID(true) ,
+            _skip(0) ,
+            _limit(-1) ,
+            _hasNonSimple(false) {
+        }
+
+        /**
+         * called once per lifetime
+         * e.g. { "x" : 1 , "a.y" : 1 }
+         */
+        void init( const BSONObj& spec );
+
+        /**
+         * @return the spec init was called with
+         */
+        BSONObj getSpec() const { return _source; }
+
+        /**
+         * transforms in according to spec
+         */
+        BSONObj transform( const BSONObj& in ) const;
+
+
+        /**
+         * transforms in according to spec
+         */
+        void transform( const BSONObj& in , BSONObjBuilder& b ) const;
+
+
+        /**
+         * @return if the keyPattern has all the information needed to return then
+         *         return a new KeyOnly otherwise null
+         *         NOTE: a key may have modified the actual data
+         *               which has to be handled above this (arrays, geo)
+         */
+        KeyOnly* checkKey( const BSONObj& keyPattern ) const;
+
+    private:
+
+        /**
+         * appends e to b if user wants it
+         * will descend into e if needed
+         */
+        void append( BSONObjBuilder& b , const BSONElement& e ) const;
+
+
+        void add( const string& field, bool include );
+        void add( const string& field, int skip, int limit );
+        void appendArray( BSONObjBuilder& b , const BSONObj& a , bool nested=false) const;
+
+        bool _include; // true if default at this level is to include
+        bool _special; // true if this level can't be skipped or included without recursing
+
+        //TODO: benchmark vector<pair> vs map
+        typedef map<string, boost::shared_ptr<Projection> > FieldMap;
+        FieldMap _fields;
+        BSONObj _source;
+        bool _includeID;
+
+        // used for $slice operator
+        int _skip;
+        int _limit;
+
+        bool _hasNonSimple;
+    };
+
+
+}
diff --git a/db/query.cpp b/db/query.cpp
index 154fd15..df09fce 100644
--- a/db/query.cpp
+++ b/db/query.cpp
@@ -30,7 +30,7 @@
 #include "replpair.h"
 #include "scanandorder.h"
 #include "security.h"
-#include "curop.h"
+#include "curop-inl.h"
 #include "commands.h"
 #include "queryoptimizer.h"
 #include "lasterror.h"
@@ -67,7 +67,7 @@ namespace mongo {
                 _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , qp().ns() ) );
             }
             return _cc->prepareToYield( _yieldData );
-        }        
+        }
         virtual void recoverFromYield() {
             if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
                 _cc.reset();
@@ -75,24 +75,28 @@ namespace mongo {
                 massert( 13340, "cursor dropped during delete", false );
             }
         }
+        virtual long long nscanned() {
+            assert( c_.get() );
+            return c_->nscanned();
+        }
         virtual void next() {
             if ( !c_->ok() ) {
                 setComplete();
                 return;
             }
-            
+
             DiskLoc rloc = c_->currLoc();
-            
+
             if ( matcher()->matches(c_->currKey(), rloc ) ) {
                 if ( !c_->getsetdup(rloc) )
                     ++count_;
             }
 
             c_->advance();
-            ++_nscanned;
+            _nscanned = c_->nscanned();
             if ( count_ > bestCount_ )
                 bestCount_ = count_;
-            
+
             if ( count_ > 0 ) {
                 if ( justOne_ )
                     setComplete();
@@ -115,7 +119,7 @@ namespace mongo {
         ClientCursor::CleanupPointer _cc;
         ClientCursor::YieldData _yieldData;
     };
-    
+
     /* ns:      namespace, e.g. <database>.<collection>
        pattern: the "where" clause / criteria
        justOne: stop after 1 match
@@ -124,13 +128,13 @@ namespace mongo {
     long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) {
         if( !god ) {
             if ( strstr(ns, ".system.") ) {
-                /* note a delete from system.indexes would corrupt the db 
-                if done here, as there are pointers into those objects in 
+                /* note a delete from system.indexes would corrupt the db
+                if done here, as there are pointers into those objects in
                 NamespaceDetails.
                 */
                 uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) );
             }
-            if ( strchr( ns , '$' ) ){
+            if ( strchr( ns , '$' ) ) {
                 log() << "cannot delete from collection with reserved $ in name: " << ns << endl;
                 uassert( 10100 ,  "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 );
             }
@@ -145,55 +149,56 @@ namespace mongo {
 
         int best = 0;
         shared_ptr< MultiCursor::CursorOp > opPtr( new DeleteOp( justOneOrig, best ) );
-        shared_ptr< MultiCursor > creal( new MultiCursor( ns, pattern, BSONObj(), opPtr, true ) );
-        
+        shared_ptr< MultiCursor > creal( new MultiCursor( ns, pattern, BSONObj(), opPtr, !god ) );
+
         if( !creal->ok() )
             return nDeleted;
-            
+
         shared_ptr< Cursor > cPtr = creal;
         auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) );
         cc->setDoingDeletes( true );
-            
-        CursorId id = cc->cursorid;
-            
+
+        CursorId id = cc->cursorid();
+
         bool justOne = justOneOrig;
         bool canYield = !god && !creal->matcher()->docMatcher().atomic();
+
         do {
-            if ( canYield && ! cc->yieldSometimes() ){
+            if ( canYield && ! cc->yieldSometimes() ) {
                 cc.release(); // has already been deleted elsewhere
                 // TODO should we assert or something?
                 break;
             }
-            if ( !cc->c->ok() ) {
+            if ( !cc->ok() ) {
                 break; // if we yielded, could have hit the end
             }
-                
+
             // this way we can avoid calling updateLocation() every time (expensive)
             // as well as some other nuances handled
             cc->setDoingDeletes( true );
-                
-            DiskLoc rloc = cc->c->currLoc();
-            BSONObj key = cc->c->currKey();
 
-            // NOTE Calling advance() may change the matcher, so it's important 
+            DiskLoc rloc = cc->currLoc();
+            BSONObj key = cc->currKey();
+
+            // NOTE Calling advance() may change the matcher, so it's important
             // to try to match first.
             bool match = creal->matcher()->matches( key , rloc );
-            
-            if ( ! cc->c->advance() )
+
+            if ( ! cc->advance() )
                 justOne = true;
-                
+
             if ( ! match )
                 continue;
-                            
-            assert( !cc->c->getsetdup(rloc) ); // can't be a dup, we deleted it!
-                
+
+            assert( !cc->c()->getsetdup(rloc) ); // can't be a dup, we deleted it!
+
             if ( !justOne ) {
                 /* NOTE: this is SLOW.  this is not good, noteLocation() was designed to be called across getMore
                     blocks.  here we might call millions of times which would be bad.
                     */
-                cc->c->noteLocation();
+                cc->c()->noteLocation();
             }
-                
+
             if ( logop ) {
                 BSONElement e;
                 if( BSONObj( rloc.rec() ).getObjectID( e ) ) {
@@ -201,7 +206,8 @@ namespace mongo {
                     b.append( e );
                     bool replJustOne = true;
                     logOp( "d", ns, b.done(), 0, &replJustOne );
-                } else {
+                }
+                else {
                     problem() << "deleted object without id, not logging" << endl;
                 }
             }
@@ -214,14 +220,20 @@ namespace mongo {
             if ( justOne ) {
                 break;
             }
-            cc->c->checkLocation();
-                
-        } while ( cc->c->ok() );
+            cc->c()->checkLocation();
+         
+            if( !god ) 
+                getDur().commitIfNeeded();
 
-        if ( cc.get() && ClientCursor::find( id , false ) == 0 ){
+            if( debug && god && nDeleted == 100 ) 
+                log() << "warning high number of deletes with god=true which could use significant memory" << endl;
+        }
+        while ( cc->ok() );
+
+        if ( cc.get() && ClientCursor::find( id , false ) == 0 ) {
             cc.release();
         }
-              
+
         return nDeleted;
     }
 
@@ -246,16 +258,6 @@ namespace mongo {
 
     int nCaught = 0;
 
-    void killCursors(int n, long long *ids) {
-        int k = 0;
-        for ( int i = 0; i < n; i++ ) {
-            if ( ClientCursor::erase(ids[i]) )
-                k++;
-        }
-        if ( logLevel > 0 || k != n ){
-            log( k == n ) << "killcursors: found " << k << " of " << n << endl;
-        }
-    }
 
     BSONObj id_obj = fromjson("{\"_id\":1}");
     BSONObj empty_obj = fromjson("{}");
@@ -278,21 +280,20 @@ namespace mongo {
     }
 
     QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) {
-//        log() << "TEMP GETMORE " << ns << ' ' << cursorid << ' ' << pass << endl;
         exhaust = false;
         ClientCursor::Pointer p(cursorid);
-        ClientCursor *cc = p._c;
-        
+        ClientCursor *cc = p.c();
+
         int bufSize = 512;
-        if ( cc ){
+        if ( cc ) {
             bufSize += sizeof( QueryResult );
-            bufSize += ( ntoreturn ? 4 : 1 ) * 1024 * 1024;
+            bufSize += MaxBytesToReturnToClientAtOnce;
         }
 
         BufBuilder b( bufSize );
 
         b.skip(sizeof(QueryResult));
-        
+
         int resultFlags = ResultFlag_AwaitCapable;
         int start = 0;
         int n = 0;
@@ -306,23 +307,27 @@ namespace mongo {
             if ( pass == 0 )
                 cc->updateSlaveLocation( curop );
 
-            int queryOptions = cc->_queryOptions;
+            int queryOptions = cc->queryOptions();
 
             if( pass == 0 ) {
                 StringBuilder& ss = curop.debug().str;
-                ss << " getMore: " << cc->query.toString() << " ";
+                ss << " getMore: " << cc->query().toString() << " ";
             }
-            
-            start = cc->pos;
-            Cursor *c = cc->c.get();
+
+            start = cc->pos();
+            Cursor *c = cc->c();
             c->checkLocation();
             DiskLoc last;
 
+            scoped_ptr<Projection::KeyOnly> keyFieldsOnly;
+            if ( cc->modifiedKeys() == false && cc->isMultiKey() == false && cc->fields )
+                keyFieldsOnly.reset( cc->fields->checkKey( cc->indexKeyPattern() ) );
+
             while ( 1 ) {
                 if ( !c->ok() ) {
                     if ( c->tailable() ) {
-                        /* when a tailable cursor hits "EOF", ok() goes false, and current() is null.  however 
-                           advance() can still be retries as a reactivation attempt.  when there is new data, it will 
+                        /* when a tailable cursor hits "EOF", ok() goes false, and current() is null.  however
+                           advance() can still be retries as a reactivation attempt.  when there is new data, it will
                            return true.  that's what we are doing here.
                            */
                         if ( c->advance() )
@@ -356,27 +361,40 @@ namespace mongo {
                     }
                     else {
                         last = c->currLoc();
-                        BSONObj js = c->current();
-
-                        // show disk loc should be part of the main query, not in an $or clause, so this should be ok
-                        fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0));
                         n++;
-                        if ( (ntoreturn>0 && (n >= ntoreturn || b.len() > MaxBytesToReturnToClientAtOnce)) ||
-                             (ntoreturn==0 && b.len()>1*1024*1024) ) {
+
+                        if ( keyFieldsOnly ) {
+                            fillQueryResultFromObj(b, 0, keyFieldsOnly->hydrate( c->currKey() ) );
+                        }
+                        else {
+                            BSONObj js = c->current();
+                            // show disk loc should be part of the main query, not in an $or clause, so this should be ok
+                            fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0));
+                        }
+
+                        if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) {
                             c->advance();
-                            cc->pos += n;
+                            cc->incPos( n );
                             break;
                         }
                     }
                 }
                 c->advance();
+
+                if ( ! cc->yieldSometimes() ) {
+                    ClientCursor::erase(cursorid);
+                    cursorid = 0;
+                    cc = 0;
+                    p.deleted();
+                    break;
+                }
             }
             
             if ( cc ) {
                 cc->updateLocation();
                 cc->mayUpgradeStorage();
                 cc->storeOpForSlave( last );
-                exhaust = cc->_queryOptions & QueryOption_Exhaust;
+                exhaust = cc->queryOptions() & QueryOption_Exhaust;
             }
         }
 
@@ -395,104 +413,120 @@ namespace mongo {
     class CountOp : public QueryOp {
     public:
         CountOp( const string& ns , const BSONObj &spec ) :
-            _ns(ns), count_(),
-            skip_( spec["skip"].numberLong() ),
-            limit_( spec["limit"].numberLong() ),
-            bc_(){
+            _ns(ns), _capped(false), _count(), _myCount(),
+            _skip( spec["skip"].numberLong() ),
+            _limit( spec["limit"].numberLong() ),
+            _bc() {
         }
-        
+
         virtual void _init() {
-            c_ = qp().newCursor();
-            
+            _c = qp().newCursor();
+            _capped = _c->capped();
             if ( qp().exactKeyMatch() && ! matcher()->needRecord() ) {
-                query_ = qp().simplifiedQuery( qp().indexKey() );
-                bc_ = dynamic_cast< BtreeCursor* >( c_.get() );
-                bc_->forgetEndKey();
+                _query = qp().simplifiedQuery( qp().indexKey() );
+                _bc = dynamic_cast< BtreeCursor* >( _c.get() );
+                _bc->forgetEndKey();
             }
         }
 
+        virtual long long nscanned() {
+            assert( _c.get() );
+            return _c->nscanned();
+        }
+
         virtual bool prepareToYield() {
             if ( ! _cc ) {
-                _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , _ns.c_str() ) );
+                _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _ns.c_str() ) );
             }
             return _cc->prepareToYield( _yieldData );
         }
-        
+
         virtual void recoverFromYield() {
             if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
-                c_.reset();
+                _c.reset();
                 _cc.reset();
-                massert( 13337, "cursor dropped during count", false );
-                // TODO maybe we want to prevent recording the winning plan as well?
+
+                if ( _capped ) {
+                    msgassertedNoTrace( 13337, str::stream() << "capped cursor overrun during count: " << _ns );
+                }
+                else {
+                    // we don't fail query since we're fine with returning partial data if collection dropped
+                }
             }
         }
-        
+
         virtual void next() {
-            if ( !c_->ok() ) {
+            if ( ! _c || !_c->ok() ) {
                 setComplete();
                 return;
             }
 
-            if ( bc_ ) {
-                if ( firstMatch_.isEmpty() ) {
-                    firstMatch_ = bc_->currKeyNode().key;
+            if ( _bc ) {
+                if ( _firstMatch.isEmpty() ) {
+                    _firstMatch = _bc->currKeyNode().key.copy();
                     // if not match
-                    if ( query_.woCompare( firstMatch_, BSONObj(), false ) ) {
+                    if ( _query.woCompare( _firstMatch, BSONObj(), false ) ) {
                         setComplete();
                         return;
                     }
                     _gotOne();
-                } else {
-                    if ( !firstMatch_.woEqual( bc_->currKeyNode().key ) ) {
+                }
+                else {
+                    if ( ! _firstMatch.woEqual( _bc->currKeyNode().key ) ) {
                         setComplete();
                         return;
                     }
                     _gotOne();
                 }
-            } 
+            }
             else {
-                if ( !matcher()->matches(c_->currKey(), c_->currLoc() ) ) {
+                if ( !matcher()->matches(_c->currKey(), _c->currLoc() ) ) {
                 }
-                else if( !c_->getsetdup(c_->currLoc()) ) {
+                else if( !_c->getsetdup(_c->currLoc()) ) {
                     _gotOne();
-                }                
+                }
             }
-            c_->advance();
+            _c->advance();
         }
         virtual QueryOp *_createChild() const {
             CountOp *ret = new CountOp( _ns , BSONObj() );
-            ret->count_ = count_;
-            ret->skip_ = skip_;
-            ret->limit_ = limit_;
+            ret->_count = _count;
+            ret->_skip = _skip;
+            ret->_limit = _limit;
             return ret;
         }
-        long long count() const { return count_; }
-        virtual bool mayRecordPlan() const { return true; }
+        long long count() const { return _count; }
+        virtual bool mayRecordPlan() const {
+            return ( _myCount > _limit / 2 ) || ( complete() && !stopRequested() );
+        }
     private:
-        
-        void _gotOne(){
-            if ( skip_ ){
-                skip_--;
+
+        void _gotOne() {
+            if ( _skip ) {
+                _skip--;
                 return;
             }
-            
-            if ( limit_ > 0 && count_ >= limit_ ){
+
+            if ( _limit > 0 && _count >= _limit ) {
                 setStop();
                 return;
             }
 
-            count_++;
+            _count++;
+            _myCount++;
         }
 
         string _ns;
-        
-        long long count_;
-        long long skip_;
-        long long limit_;
-        shared_ptr<Cursor> c_;
-        BSONObj query_;
-        BtreeCursor *bc_;
-        BSONObj firstMatch_;
+        bool _capped;
+
+        long long _count;
+        long long _myCount;
+        long long _skip;
+        long long _limit;
+        shared_ptr<Cursor> _c;
+        BSONObj _query;
+        BtreeCursor * _bc;
+        BSONObj _firstMatch;
 
         ClientCursor::CleanupPointer _cc;
         ClientCursor::YieldData _yieldData;
@@ -500,7 +534,7 @@ namespace mongo {
 
     /* { count: "collectionname"[, query: <query>] }
        returns -1 on ns does not exist error.
-    */    
+    */
     long long runCount( const char *ns, const BSONObj &cmd, string &err ) {
         Client::Context cx(ns);
         NamespaceDetails *d = nsdetails( ns );
@@ -509,10 +543,10 @@ namespace mongo {
             return -1;
         }
         BSONObj query = cmd.getObjectField("query");
-        
+
         // count of all objects
-        if ( query.isEmpty() ){
-            return applySkipLimit( d->nrecords , cmd );
+        if ( query.isEmpty() ) {
+            return applySkipLimit( d->stats.nrecords , cmd );
         }
         MultiPlanScanner mps( ns, query, BSONObj(), 0, true, BSONObj(), BSONObj(), false, true );
         CountOp original( ns , cmd );
@@ -525,8 +559,11 @@ namespace mongo {
         }
         return res->count();
     }
-    
+
     class ExplainBuilder {
+        // Note: by default we filter out allPlans and oldPlan in the shell's
+        // explain() function. If you add any recursive structures, make sure to
+        // edit the JS to make sure everything gets filtered.
     public:
         ExplainBuilder() : _i() {}
         void ensureStartScan() {
@@ -539,14 +576,16 @@ namespace mongo {
             b << "cursor" << c->toString() << "indexBounds" << c->prettyIndexBounds();
             b.done();
         }
-        void noteScan( Cursor *c, long long nscanned, long long nscannedObjects, int n, bool scanAndOrder, int millis, bool hint ) {
+        void noteScan( Cursor *c, long long nscanned, long long nscannedObjects, int n, bool scanAndOrder,
+                       int millis, bool hint, int nYields , int nChunkSkips , bool indexOnly ) {
             if ( _i == 1 ) {
                 _c.reset( new BSONArrayBuilder() );
                 *_c << _b->obj();
             }
             if ( _i == 0 ) {
                 _b.reset( new BSONObjBuilder() );
-            } else {
+            }
+            else {
                 _b.reset( new BSONObjBuilder( _c->subobjStart() ) );
             }
             *_b << "cursor" << c->toString();
@@ -559,6 +598,11 @@ namespace mongo {
 
             *_b << "millis" << millis;
 
+            *_b << "nYields" << nYields;
+            *_b << "nChunkSkips" << nChunkSkips;
+            *_b << "isMultiKey" << c->isMultiKey();
+            *_b << "indexOnly" << indexOnly;
+
             *_b << "indexBounds" << c->prettyIndexBounds();
 
             if ( !hint ) {
@@ -570,19 +614,20 @@ namespace mongo {
             _a.reset( 0 );
             ++_i;
         }
-        BSONObj finishWithSuffix( long long nscanned, long long nscannedObjects, int n, int millis, const BSONObj &suffix ) { 
+        BSONObj finishWithSuffix( long long nscanned, long long nscannedObjects, int n, int millis, const BSONObj &suffix ) {
             if ( _i > 1 ) {
                 BSONObjBuilder b;
                 b << "clauses" << _c->arr();
                 b.appendNumber( "nscanned", nscanned );
-                b.appendNumber( "nscanneObjects", nscannedObjects );
+                b.appendNumber( "nscannedObjects", nscannedObjects );
                 b << "n" << n;
                 b << "millis" << millis;
                 b.appendElements( suffix );
                 return b.obj();
-            } else {
+            }
+            else {
                 _b->appendElements( suffix );
-                return _b->obj();                
+                return _b->obj();
             }
         }
     private:
@@ -591,11 +636,11 @@ namespace mongo {
         auto_ptr< BSONArrayBuilder > _c;
         int _i;
     };
-    
+
     // Implements database 'query' requests using the query optimizer's QueryOp interface
     class UserQueryOp : public QueryOp {
     public:
-        
+
         UserQueryOp( const ParsedQuery& pq, Message &response, ExplainBuilder &eb, CurOp &curop ) :
             _buf( 32768 ) , // TODO be smarter here
             _pq( pq ) ,
@@ -603,8 +648,12 @@ namespace mongo {
             _nscanned(0), _oldNscanned(0), _nscannedObjects(0), _oldNscannedObjects(0),
             _n(0),
             _oldN(0),
-            _chunkMatcher(shardingState.getChunkMatcher(pq.ns())),
+            _nYields(),
+            _nChunkSkips(),
+            _chunkManager( shardingState.needShardChunkManager(pq.ns()) ?
+                           shardingState.getShardChunkManager(pq.ns()) : ShardChunkManagerPtr() ),
             _inMemSort(false),
+            _capped(false),
             _saveClientCursor(false),
             _wouldSaveClientCursor(false),
             _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ),
@@ -612,82 +661,111 @@ namespace mongo {
             _eb( eb ),
             _curop( curop )
         {}
-        
+
         virtual void _init() {
             // only need to put the QueryResult fields there if we're building the first buffer in the message.
             if ( _response.empty() ) {
                 _buf.skip( sizeof( QueryResult ) );
             }
-            
+
             if ( _oplogReplay ) {
                 _findingStartCursor.reset( new FindingStartCursor( qp() ) );
-            } else {
+                _capped = true;
+            }
+            else {
                 _c = qp().newCursor( DiskLoc() , _pq.getNumToReturn() + _pq.getSkip() );
+                _capped = _c->capped();
+
+                // setup check for if we can only use index to extract
+                if ( _c->modifiedKeys() == false && _c->isMultiKey() == false && _pq.getFields() ) {
+                    _keyFieldsOnly.reset( _pq.getFields()->checkKey( _c->indexKeyPattern() ) );
+                }
             }
 
             if ( qp().scanAndOrderRequired() ) {
                 _inMemSort = true;
                 _so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder() ) );
             }
-            
+
             if ( _pq.isExplain() ) {
                 _eb.noteCursor( _c.get() );
             }
+
         }
-        
+
         virtual bool prepareToYield() {
             if ( _findingStartCursor.get() ) {
                 return _findingStartCursor->prepareToYield();
-            } else {
+            }
+            else {
                 if ( ! _cc ) {
                     _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _pq.ns() ) );
                 }
                 return _cc->prepareToYield( _yieldData );
             }
         }
-        
+
         virtual void recoverFromYield() {
+            _nYields++;
+
             if ( _findingStartCursor.get() ) {
                 _findingStartCursor->recoverFromYield();
-            } else {
-                if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
-                    _c.reset();
-                    _cc.reset();
-                    _so.reset();
-                    massert( 13338, "cursor dropped during query", false );
-                    // TODO maybe we want to prevent recording the winning plan as well?
-                } 
+            }
+            else if ( ! ClientCursor::recoverFromYield( _yieldData ) ) {
+                _c.reset();
+                _cc.reset();
+                _so.reset();
+
+                if ( _capped ) {
+                    msgassertedNoTrace( 13338, str::stream() << "capped cursor overrun during query: " << _pq.ns() );
+                }
+                else {
+                    // we don't fail query since we're fine with returning partial data if collection dropped
+
+                    // todo: this is wrong.  the cursor could be gone if closeAllDatabases command just ran
+                }
+
             }
         }
-        
+
+        virtual long long nscanned() {
+            if ( _findingStartCursor.get() ) {
+                return 0; // should only be one query plan, so value doesn't really matter.
+            }
+            assert( _c.get() );
+            return _c->nscanned();
+        }
+
         virtual void next() {
             if ( _findingStartCursor.get() ) {
                 if ( _findingStartCursor->done() ) {
                     _c = _findingStartCursor->cRelease();
                     _findingStartCursor.reset( 0 );
-                } else {
+                }
+                else {
                     _findingStartCursor->next();
                 }
+                _capped = true;
                 return;
             }
-            
-            if ( !_c->ok() ) {
+
+            if ( !_c || !_c->ok() ) {
                 finish( false );
                 return;
             }
 
             bool mayCreateCursor1 = _pq.wantMore() && ! _inMemSort && _pq.getNumToReturn() != 1 && useCursors;
-            
-            if( 0 ) { 
+
+            if( 0 ) {
                 cout << "SCANNING this: " << this << " key: " << _c->currKey() << " obj: " << _c->current() << endl;
             }
-            
-            if ( _pq.getMaxScan() && _nscanned >= _pq.getMaxScan() ){
+
+            if ( _pq.getMaxScan() && _nscanned >= _pq.getMaxScan() ) {
                 finish( true ); //?
                 return;
             }
 
-            _nscanned++;
+            _nscanned = _c->nscanned();
             if ( !matcher()->matches(_c->currKey(), _c->currLoc() , &_details ) ) {
                 // not a match, continue onward
                 if ( _details.loadedObject )
@@ -696,22 +774,23 @@ namespace mongo {
             else {
                 _nscannedObjects++;
                 DiskLoc cl = _c->currLoc();
-                if ( _chunkMatcher && ! _chunkMatcher->belongsToMe( _c->currKey(), _c->currLoc() ) ){
-                    // cout << "TEMP skipping un-owned chunk: " << _c->current() << endl;
+                if ( _chunkManager && ! _chunkManager->belongsToMe( cl.obj() ) ) {
+                    _nChunkSkips++;
+                    // log() << "TEMP skipping un-owned chunk: " << _c->current() << endl;
                 }
-                else if( _c->getsetdup(cl) ) { 
+                else if( _c->getsetdup(cl) ) {
                     // dup
                 }
                 else {
                     // got a match.
-                    
+
                     if ( _inMemSort ) {
                         // note: no cursors for non-indexed, ordered results.  results must be fairly small.
                         _so->add( _pq.returnKey() ? _c->currKey() : _c->current(), _pq.showDiskLoc() ? &cl : 0 );
                     }
                     else if ( _ntoskip > 0 ) {
                         _ntoskip--;
-                    } 
+                    }
                     else {
                         if ( _pq.isExplain() ) {
                             _n++;
@@ -723,16 +802,19 @@ namespace mongo {
                         }
                         else {
 
-                            if ( _pq.returnKey() ){
+                            if ( _pq.returnKey() ) {
                                 BSONObjBuilder bb( _buf );
                                 bb.appendKeys( _c->indexKeyPattern() , _c->currKey() );
                                 bb.done();
                             }
+                            else if ( _keyFieldsOnly ) {
+                                fillQueryResultFromObj( _buf , 0 , _keyFieldsOnly->hydrate( _c->currKey() ) );
+                            }
                             else {
                                 BSONObj js = _c->current();
                                 assert( js.isValid() );
 
-                                if ( _oplogReplay ){
+                                if ( _oplogReplay ) {
                                     BSONElement e = js["ts"];
                                     if ( e.type() == Date || e.type() == Timestamp )
                                         _slaveReadTill = e._opTime();
@@ -741,13 +823,13 @@ namespace mongo {
                                 fillQueryResultFromObj( _buf , _pq.getFields() , js , (_pq.showDiskLoc() ? &cl : 0));
                             }
                             _n++;
-                            if ( ! _c->supportGetMore() ){
-                                if ( _pq.enough( n() ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ){
+                            if ( ! _c->supportGetMore() ) {
+                                if ( _pq.enough( n() ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ) {
                                     finish( true );
                                     return;
                                 }
                             }
-                            else if ( _pq.enoughForFirstBatch( n() , _buf.len() ) ){
+                            else if ( _pq.enoughForFirstBatch( n() , _buf.len() ) ) {
                                 /* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */
                                 if ( mayCreateCursor1 ) {
                                     _wouldSaveClientCursor = true;
@@ -763,60 +845,73 @@ namespace mongo {
                     }
                 }
             }
-            _c->advance();            
+            _c->advance();
         }
 
         // this plan won, so set data for response broadly
         void finish( bool stop ) {
+
             if ( _pq.isExplain() ) {
                 _n = _inMemSort ? _so->size() : _n;
-            } 
+            }
             else if ( _inMemSort ) {
                 if( _so.get() )
                     _so->fill( _buf, _pq.getFields() , _n );
             }
-            
-            if ( _pq.hasOption( QueryOption_CursorTailable ) && _pq.getNumToReturn() != 1 )
-                _c->setTailable();
-            
-            // If the tailing request succeeded.
-            if ( _c->tailable() )
-                _saveClientCursor = true;
-
-            if ( _pq.isExplain()) {
-                _eb.noteScan( _c.get(), _nscanned, _nscannedObjects, _n, scanAndOrderRequired(), _curop.elapsedMillis(), useHints && !_pq.getHint().eoo() );
-            } else {
-                if (_buf.len()) {
+
+            if ( _c.get() ) {
+                _nscanned = _c->nscanned();
+
+                if ( _pq.hasOption( QueryOption_CursorTailable ) && _pq.getNumToReturn() != 1 )
+                    _c->setTailable();
+
+                // If the tailing request succeeded.
+                if ( _c->tailable() )
+                    _saveClientCursor = true;
+            }
+
+            if ( _pq.isExplain() ) {
+                massert( 13638, "client cursor dropped during explain query yield", _c.get() );
+                _eb.noteScan( _c.get(), _nscanned, _nscannedObjects, _n, scanAndOrderRequired(),
+                              _curop.elapsedMillis(), useHints && !_pq.getHint().eoo(), _nYields ,
+                              _nChunkSkips, _keyFieldsOnly.get() > 0 );
+            }
+            else {
+                if ( _buf.len() ) {
                     _response.appendData( _buf.buf(), _buf.len() );
                     _buf.decouple();
                 }
             }
+
             if ( stop ) {
                 setStop();
-            } else {
+            }
+            else {
                 setComplete();
             }
 
         }
-        
+
         void finishExplain( const BSONObj &suffix ) {
-            BSONObj obj = _eb.finishWithSuffix( nscanned(), nscannedObjects(), n(), _curop.elapsedMillis(), suffix);
+            BSONObj obj = _eb.finishWithSuffix( totalNscanned(), nscannedObjects(), n(), _curop.elapsedMillis(), suffix);
             fillQueryResultFromObj(_buf, 0, obj);
             _n = 1;
             _oldN = 0;
             _response.appendData( _buf.buf(), _buf.len() );
             _buf.decouple();
         }
-        
-        virtual bool mayRecordPlan() const { return _pq.getNumToReturn() != 1; }
-        
+
+        virtual bool mayRecordPlan() const {
+            return ( _pq.getNumToReturn() != 1 ) && ( ( _n > _pq.getNumToReturn() / 2 ) || ( complete() && !stopRequested() ) );
+        }
+
         virtual QueryOp *_createChild() const {
             if ( _pq.isExplain() ) {
                 _eb.ensureStartScan();
             }
             UserQueryOp *ret = new UserQueryOp( _pq, _response, _eb, _curop );
             ret->_oldN = n();
-            ret->_oldNscanned = nscanned();
+            ret->_oldNscanned = totalNscanned();
             ret->_oldNscannedObjects = nscannedObjects();
             ret->_ntoskip = _ntoskip;
             return ret;
@@ -825,19 +920,20 @@ namespace mongo {
         bool scanAndOrderRequired() const { return _inMemSort; }
         shared_ptr<Cursor> cursor() { return _c; }
         int n() const { return _oldN + _n; }
-        long long nscanned() const { return _nscanned + _oldNscanned; }
+        long long totalNscanned() const { return _nscanned + _oldNscanned; }
         long long nscannedObjects() const { return _nscannedObjects + _oldNscannedObjects; }
         bool saveClientCursor() const { return _saveClientCursor; }
         bool wouldSaveClientCursor() const { return _wouldSaveClientCursor; }
-        
-        void finishForOplogReplay( ClientCursor * cc ){
+
+        void finishForOplogReplay( ClientCursor * cc ) {
             if ( _oplogReplay && ! _slaveReadTill.isNull() )
-                cc->_slaveReadTill = _slaveReadTill;
+                cc->slaveReadTill( _slaveReadTill );
 
         }
     private:
         BufBuilder _buf;
         const ParsedQuery& _pq;
+        scoped_ptr<Projection::KeyOnly> _keyFieldsOnly;
 
         long long _ntoskip;
         long long _nscanned;
@@ -846,30 +942,36 @@ namespace mongo {
         long long _oldNscannedObjects;
         int _n; // found so far
         int _oldN;
-        
+
+        int _nYields;
+        int _nChunkSkips;
+
         MatchDetails _details;
 
-        ChunkMatcherPtr _chunkMatcher;
-        
+        ShardChunkManagerPtr _chunkManager;
+
         bool _inMemSort;
         auto_ptr< ScanAndOrder > _so;
-        
+
         shared_ptr<Cursor> _c;
         ClientCursor::CleanupPointer _cc;
         ClientCursor::YieldData _yieldData;
 
+        bool _capped;
         bool _saveClientCursor;
         bool _wouldSaveClientCursor;
         bool _oplogReplay;
         auto_ptr< FindingStartCursor > _findingStartCursor;
-        
+
         Message &_response;
         ExplainBuilder &_eb;
         CurOp &_curop;
         OpTime _slaveReadTill;
     };
-    
-    /* run a query -- includes checking for and running a Command */
+
+    /* run a query -- includes checking for and running a Command \
+       @return points to ns if exhaust mode. 0=normal mode
+    */
     const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
         StringBuilder& ss = curop.debug().str;
         shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
@@ -878,25 +980,26 @@ namespace mongo {
         BSONObj jsobj = q.query;
         int queryOptions = q.queryOptions;
         const char *ns = q.ns;
-        
+
         if( logLevel >= 2 )
             log() << "query: " << ns << jsobj << endl;
-        
+
         ss << ns;
         {
-            // only say ntoreturn if nonzero. 
+            // only say ntoreturn if nonzero.
             int n =  pq.getNumToReturn();
-            if( n ) 
+            if( n )
                 ss << " ntoreturn:" << n;
         }
         curop.setQuery(jsobj);
-        
+
         if ( pq.couldBeCommand() ) {
             BufBuilder bb;
             bb.skip(sizeof(QueryResult));
             BSONObjBuilder cmdResBuf;
             if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
-                ss << " command: " << jsobj.toString();
+                ss << " command: ";
+                jsobj.toString( ss );
                 curop.markCommand();
                 auto_ptr< QueryResult > qr;
                 qr.reset( (QueryResult *) bb.buf() );
@@ -910,9 +1013,12 @@ namespace mongo {
                 qr->nReturned = 1;
                 result.setData( qr.release(), true );
             }
-            return false;
+            else {
+                uasserted(13530, "bad or malformed command request?");
+            }
+            return 0;
         }
-        
+
         /* --- regular query --- */
 
         int n = 0;
@@ -932,7 +1038,7 @@ namespace mongo {
             out() << query.toString() << endl;
             uassert( 10110 , "bad query object", false);
         }
-            
+
         /* --- read lock --- */
 
         mongolock lk(false);
@@ -947,17 +1053,18 @@ namespace mongo {
             const BSONObj nat1 = BSON( "$natural" << 1 );
             if ( order.isEmpty() ) {
                 order = nat1;
-            } else {
+            }
+            else {
                 uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 );
             }
         }
-        
+
         BSONObj snapshotHint; // put here to keep the data in scope
-        if( snapshot ) { 
+        if( snapshot ) {
             NamespaceDetails *d = nsdetails(ns);
-            if ( d ){
+            if ( d ) {
                 int i = d->findIdIndex();
-                if( i < 0 ) { 
+                if( i < 0 ) {
                     if ( strstr( ns , ".system." ) == 0 )
                         log() << "warning: no _id index on $snapshot query, ns:" << ns << endl;
                 }
@@ -973,7 +1080,7 @@ namespace mongo {
                 }
             }
         }
-            
+
         if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
             bool nsFound = false;
             bool indexFound = false;
@@ -981,12 +1088,12 @@ namespace mongo {
             BSONObj resObject;
             Client& c = cc();
             bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound );
-            if ( nsFound == false || indexFound == true ){
+            if ( nsFound == false || indexFound == true ) {
                 BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32);
                 bb.skip(sizeof(QueryResult));
-                
+
                 ss << " idhack ";
-                if ( found ){
+                if ( found ) {
                     n = 1;
                     fillQueryResultFromObj( bb , pq.getFields() , resObject );
                 }
@@ -999,16 +1106,16 @@ namespace mongo {
                 qr->setOperation(opReply);
                 qr->cursorId = 0;
                 qr->startingFrom = 0;
-                qr->nReturned = n;      
+                qr->nReturned = n;
                 result.setData( qr.release(), true );
                 return false;
-            }     
+            }
         }
-        
+
         // regular, not QO bypass query
-        
+
         BSONObj oldPlan;
-        if ( explain && ! pq.hasIndexSpecifier() ){
+        if ( explain && ! pq.hasIndexSpecifier() ) {
             MultiPlanScanner mps( ns, query, order );
             if ( mps.usingPrerecordedPlan() )
                 oldPlan = mps.oldExplain();
@@ -1031,7 +1138,7 @@ namespace mongo {
             dqo.finishExplain( explainSuffix );
         }
         n = dqo.n();
-        long long nscanned = dqo.nscanned();
+        long long nscanned = dqo.totalNscanned();
         if ( dqo.scanAndOrderRequired() )
             ss << " scanAndOrder ";
         shared_ptr<Cursor> cursor = dqo.cursor();
@@ -1046,18 +1153,19 @@ namespace mongo {
                 // this MultiCursor will use a dumb NoOp to advance(), so no need to specify mayYield
                 shared_ptr< Cursor > multi( new MultiCursor( mps, cursor, dqo.matcher(), dqo ) );
                 cc = new ClientCursor(queryOptions, multi, ns, jsobj.getOwned());
-            } else {
+            }
+            else {
                 cursor->setMatcher( dqo.matcher() );
                 cc = new ClientCursor( queryOptions, cursor, ns, jsobj.getOwned() );
             }
-            cursorid = cc->cursorid;
+            cursorid = cc->cursorid();
             DEV tlog(2) << "query has more, cursorid: " << cursorid << endl;
-            cc->pos = n;
+            cc->setPos( n );
             cc->pq = pq_shared;
             cc->fields = pq.getFieldPtr();
             cc->originalMessage = m;
             cc->updateLocation();
-            if ( !cc->c->ok() && cc->c->tailable() )
+            if ( !cc->ok() && cc->c()->tailable() )
                 DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
             if( queryOptions & QueryOption_Exhaust ) {
                 exhaust = ns;
@@ -1087,6 +1195,6 @@ namespace mongo {
         }
         ss << " nreturned:" << n;
         return exhaust;
-    }    
-    
+    }
+
 } // namespace mongo
diff --git a/db/query.h b/db/query.h
index cc88e5c..5de7ced 100644
--- a/db/query.h
+++ b/db/query.h
@@ -23,6 +23,7 @@
 #include "dbmessage.h"
 #include "jsobj.h"
 #include "diskloc.h"
+#include "projection.h"
 
 /* db request message format
 
@@ -37,29 +38,29 @@
       a series of JSObjects
    dbDelete:
       string collection;
-	  int flags=0; // 1=DeleteSingle
+      int flags=0; // 1=DeleteSingle
       JSObject query;
    dbUpdate:
       string collection;
-	  int flags; // 1=upsert
+      int flags; // 1=upsert
       JSObject query;
-	  JSObject objectToUpdate;
+      JSObject objectToUpdate;
         objectToUpdate may include { $inc: <field> } or { $set: ... }, see struct Mod.
    dbQuery:
       string collection;
-	  int nToSkip;
-	  int nToReturn; // how many you want back as the beginning of the cursor data (0=no limit)            
+      int nToSkip;
+      int nToReturn; // how many you want back as the beginning of the cursor data (0=no limit)
                      // greater than zero is simply a hint on how many objects to send back per "cursor batch".
                      // a negative number indicates a hard limit.
       JSObject query;
-	  [JSObject fieldsToReturn]
+      [JSObject fieldsToReturn]
    dbGetMore:
-	  string collection; // redundant, might use for security.
+      string collection; // redundant, might use for security.
       int nToReturn;
       int64 cursorID;
    dbKillCursors=2007:
       int n;
-	  int64 cursorIDs[n];
+      int64 cursorIDs[n];
 
    Note that on Update, there is only one object, which is different
    from insert where you can pass a list of objects to insert in the db.
@@ -77,7 +78,7 @@ namespace mongo {
     struct GetMoreWaitException { };
 
     QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& op, int pass, bool& exhaust);
-    
+
     struct UpdateResult {
         bool existing; // if existing objects were modified
         bool mod;      // was this a $ mod
@@ -85,25 +86,25 @@ namespace mongo {
         OID upserted;  // if something was upserted, the new _id of the object
 
         UpdateResult( bool e, bool m, unsigned long long n , const BSONObj& upsertedObject = BSONObj() )
-            : existing(e) , mod(m), num(n){
+            : existing(e) , mod(m), num(n) {
             upserted.clear();
 
             BSONElement id = upsertedObject["_id"];
-            if ( ! e && n == 1 && id.type() == jstOID ){
+            if ( ! e && n == 1 && id.type() == jstOID ) {
                 upserted = id.OID();
             }
         }
-        
+
     };
 
     class RemoveSaver;
-    
+
     /* returns true if an existing object was updated, false if no existing object was found.
        multi - update multiple objects - mostly useful with things like $set
        god - allow access to system namespaces
     */
     UpdateResult updateObjects(const char *ns, const BSONObj& updateobj, BSONObj pattern, bool upsert, bool multi , bool logop , OpDebug& debug );
-    UpdateResult _updateObjects(bool god, const char *ns, const BSONObj& updateobj, BSONObj pattern, 
+    UpdateResult _updateObjects(bool god, const char *ns, const BSONObj& updateobj, BSONObj pattern,
                                 bool upsert, bool multi , bool logop , OpDebug& debug , RemoveSaver * rs = 0 );
 
     // If justOne is true, deletedId is set to the id of the deleted object.
@@ -112,7 +113,7 @@ namespace mongo {
     long long runCount(const char *ns, const BSONObj& cmd, string& err);
 
     const char * runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result);
-    
+
     /* This is for languages whose "objects" are not well ordered (JSON is well ordered).
        [ { a : ... } , { b : ... } ] -> { a : ..., b : ... }
     */
@@ -144,24 +145,24 @@ namespace mongo {
     class ParsedQuery {
     public:
         ParsedQuery( QueryMessage& qm )
-            : _ns( qm.ns ) , _ntoskip( qm.ntoskip ) , _ntoreturn( qm.ntoreturn ) , _options( qm.queryOptions ){
+            : _ns( qm.ns ) , _ntoskip( qm.ntoskip ) , _ntoreturn( qm.ntoreturn ) , _options( qm.queryOptions ) {
             init( qm.query );
             initFields( qm.fields );
         }
         ParsedQuery( const char* ns , int ntoskip , int ntoreturn , int queryoptions , const BSONObj& query , const BSONObj& fields )
-            : _ns( ns ) , _ntoskip( ntoskip ) , _ntoreturn( ntoreturn ) , _options( queryoptions ){
+            : _ns( ns ) , _ntoskip( ntoskip ) , _ntoreturn( ntoreturn ) , _options( queryoptions ) {
             init( query );
             initFields( fields );
         }
-        
-        ~ParsedQuery(){}
+
+        ~ParsedQuery() {}
 
         const char * ns() const { return _ns; }
         bool isLocalDB() const { return strncmp(_ns, "local.", 6) == 0; }
 
         const BSONObj& getFilter() const { return _filter; }
-        FieldMatcher* getFields() const { return _fields.get(); }
-        shared_ptr<FieldMatcher> getFieldPtr() const { return _fields; }
+        Projection* getFields() const { return _fields.get(); }
+        shared_ptr<Projection> getFieldPtr() const { return _fields; }
 
         int getSkip() const { return _ntoskip; }
         int getNumToReturn() const { return _ntoreturn; }
@@ -169,7 +170,7 @@ namespace mongo {
         int getOptions() const { return _options; }
         bool hasOption( int x ) const { return x & _options; }
 
-        
+
         bool isExplain() const { return _explain; }
         bool isSnapshot() const { return _snapshot; }
         bool returnKey() const { return _returnKey; }
@@ -180,7 +181,7 @@ namespace mongo {
         const BSONObj& getOrder() const { return _order; }
         const BSONElement& getHint() const { return _hint; }
         int getMaxScan() const { return _maxScan; }
-        
+
         bool couldBeCommand() const {
             /* we assume you are using findOne() for running a cmd... */
             return _ntoreturn == 1 && strstr( _ns , ".$cmd" );
@@ -193,7 +194,7 @@ namespace mongo {
         /* if ntoreturn is zero, we return up to 101 objects.  on the subsequent getmore, there
            is only a size limit.  The idea is that on a find() where one doesn't use much results,
            we don't return much, but once getmore kicks in, we start pushing significant quantities.
-           
+
            The n limit (vs. size) is important when someone fetches only one small field from big
            objects, which causes massive scanning server-side.
         */
@@ -208,14 +209,14 @@ namespace mongo {
                 return false;
             return n >= _ntoreturn;
         }
-        
+
     private:
-        void init( const BSONObj& q ){
+        void init( const BSONObj& q ) {
             _reset();
             uassert( 10105 , "bad skip value in query", _ntoskip >= 0);
-            
-            if ( _ntoreturn < 0 ){
-                /* _ntoreturn greater than zero is simply a hint on how many objects to send back per 
+
+            if ( _ntoreturn < 0 ) {
+                /* _ntoreturn greater than zero is simply a hint on how many objects to send back per
                    "cursor batch".
                    A negative number indicates a hard limit.
                 */
@@ -223,12 +224,12 @@ namespace mongo {
                 _ntoreturn = -_ntoreturn;
             }
 
-            
+
             BSONElement e = q["query"];
             if ( ! e.isABSONObj() )
                 e = q["$query"];
-            
-            if ( e.isABSONObj() ){
+
+            if ( e.isABSONObj() ) {
                 _filter = e.embeddedObject();
                 _initTop( q );
             }
@@ -237,7 +238,7 @@ namespace mongo {
             }
         }
 
-        void _reset(){
+        void _reset() {
             _wantMore = true;
             _explain = false;
             _snapshot = false;
@@ -246,20 +247,23 @@ namespace mongo {
             _maxScan = 0;
         }
 
-        void _initTop( const BSONObj& top ){
+        void _initTop( const BSONObj& top ) {
             BSONObjIterator i( top );
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
                 const char * name = e.fieldName();
 
                 if ( strcmp( "$orderby" , name ) == 0 ||
-                     strcmp( "orderby" , name ) == 0 ){
-                    if ( e.type() == Object )
+                        strcmp( "orderby" , name ) == 0 ) {
+                    if ( e.type() == Object ) {
                         _order = e.embeddedObject();
-                    else if ( e.type() == Array )
+                    }
+                    else if ( e.type() == Array ) {
                         _order = transformOrderFromArrayFormat( _order );
-                    else
-                        assert( 0 );
+                    }
+                    else {
+                        uassert(13513, "sort must be an object or array", 0);
+                    }
                 }
                 else if ( strcmp( "$explain" , name ) == 0 )
                     _explain = e.trueValue();
@@ -277,25 +281,25 @@ namespace mongo {
                     _maxScan = e.numberInt();
                 else if ( strcmp( "$showDiskLoc" , name ) == 0 )
                     _showDiskLoc = e.trueValue();
-                
+
 
             }
 
-            if ( _snapshot ){
+            if ( _snapshot ) {
                 uassert( 12001 , "E12001 can't sort with $snapshot", _order.isEmpty() );
                 uassert( 12002 , "E12002 can't use hint with $snapshot", _hint.eoo() );
             }
-            
+
         }
 
-        void initFields( const BSONObj& fields ){
+        void initFields( const BSONObj& fields ) {
             if ( fields.isEmpty() )
                 return;
-            _fields.reset( new FieldMatcher() );
-            _fields->add( fields );
+            _fields.reset( new Projection() );
+            _fields->init( fields );
         }
 
-        ParsedQuery( const ParsedQuery& other ){
+        ParsedQuery( const ParsedQuery& other ) {
             assert(0);
         }
 
@@ -303,10 +307,10 @@ namespace mongo {
         int _ntoskip;
         int _ntoreturn;
         int _options;
-        
+
         BSONObj _filter;
-        shared_ptr< FieldMatcher > _fields;
-        
+        shared_ptr< Projection > _fields;
+
         bool _wantMore;
 
         bool _explain;
@@ -319,7 +323,7 @@ namespace mongo {
         BSONObj _order;
         int _maxScan;
     };
-    
+
 
 } // namespace mongo
 
diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp
index e7068c2..0b9dce7 100644
--- a/db/queryoptimizer.cpp
+++ b/db/queryoptimizer.cpp
@@ -24,24 +24,25 @@
 #include "queryoptimizer.h"
 #include "cmdline.h"
 #include "clientcursor.h"
+#include <queue>
 
 //#define DEBUGQO(x) cout << x << endl;
 #define DEBUGQO(x)
 
 namespace mongo {
 
-    void checkTableScanAllowed( const char * ns ){
-        if ( ! cmdLine.notablescan )
+    void checkTableScanAllowed( const char * ns ) {
+        if ( ! cmdLine.noTableScan )
             return;
-        
+
         if ( strstr( ns , ".system." ) ||
-             strstr( ns , "local." ) )
+                strstr( ns , "local." ) )
             return;
-        
+
         if ( ! nsdetails( ns ) )
             return;
 
-        uassert( 10111 ,  (string)"table scans not allowed:" + ns , ! cmdLine.notablescan );
+        uassert( 10111 ,  (string)"table scans not allowed:" + ns , ! cmdLine.noTableScan );
     }
 
     double elementDirection( const BSONElement &e ) {
@@ -49,58 +50,59 @@ namespace mongo {
             return e.number();
         return 1;
     }
-    
-    QueryPlan::QueryPlan( 
-        NamespaceDetails *_d, int _idxNo,
-        const FieldRangeSet &fbs, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) :
-    d(_d), idxNo(_idxNo),
-    fbs_( fbs ),
-    _originalQuery( originalQuery ),
-    order_( order ),
-    index_( 0 ),
-    optimal_( false ),
-    scanAndOrderRequired_( true ),
-    exactKeyMatch_( false ),
-    direction_( 0 ),
-    endKeyInclusive_( endKey.isEmpty() ),
-    unhelpful_( false ),
-    _special( special ),
-    _type(0),
-    _startOrEndSpec( !startKey.isEmpty() || !endKey.isEmpty() ){
-
-        if ( !fbs_.matchPossible() ) {
-            unhelpful_ = true;
-            scanAndOrderRequired_ = false;
+
+    QueryPlan::QueryPlan(
+        NamespaceDetails *d, int idxNo,
+        const FieldRangeSet &fbs, const FieldRangeSet &originalFrs, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) :
+        _d(d), _idxNo(idxNo),
+        _fbs( fbs ),
+        _originalQuery( originalQuery ),
+        _order( order ),
+        _index( 0 ),
+        _optimal( false ),
+        _scanAndOrderRequired( true ),
+        _exactKeyMatch( false ),
+        _direction( 0 ),
+        _endKeyInclusive( endKey.isEmpty() ),
+        _unhelpful( false ),
+        _special( special ),
+        _type(0),
+        _startOrEndSpec( !startKey.isEmpty() || !endKey.isEmpty() ) {
+
+        if ( !_fbs.matchPossible() ) {
+            _unhelpful = true;
+            _scanAndOrderRequired = false;
             return;
         }
 
-        if( idxNo >= 0 ) {
-            index_ = &d->idx(idxNo);
-        } else {
+        if( _idxNo >= 0 ) {
+            _index = &d->idx(_idxNo);
+        }
+        else {
             // full table scan case
-            if ( order_.isEmpty() || !strcmp( order_.firstElement().fieldName(), "$natural" ) )
-                scanAndOrderRequired_ = false;
+            if ( _order.isEmpty() || !strcmp( _order.firstElement().fieldName(), "$natural" ) )
+                _scanAndOrderRequired = false;
             return;
         }
 
-        if ( _special.size() ){
-            optimal_ = true;
-            _type  = index_->getSpec().getType();
+        if ( _special.size() ) {
+            _optimal = true;
+            _type  = _index->getSpec().getType();
             massert( 13040 , (string)"no type for special: " + _special , _type );
             // hopefully safe to use original query in these contexts - don't think we can mix special with $or clause separation yet
-            scanAndOrderRequired_ = _type->scanAndOrderRequired( _originalQuery , order );
+            _scanAndOrderRequired = _type->scanAndOrderRequired( _originalQuery , order );
             return;
         }
 
-        BSONObj idxKey = index_->keyPattern();
+        BSONObj idxKey = _index->keyPattern();
         BSONObjIterator o( order );
         BSONObjIterator k( idxKey );
         if ( !o.moreWithEOO() )
-            scanAndOrderRequired_ = false;
+            _scanAndOrderRequired = false;
         while( o.moreWithEOO() ) {
             BSONElement oe = o.next();
             if ( oe.eoo() ) {
-                scanAndOrderRequired_ = false;
+                _scanAndOrderRequired = false;
                 break;
             }
             if ( !k.moreWithEOO() )
@@ -116,14 +118,14 @@ namespace mongo {
                     goto doneCheckOrder;
             }
             int d = elementDirection( oe ) == elementDirection( ke ) ? 1 : -1;
-            if ( direction_ == 0 )
-                direction_ = d;
-            else if ( direction_ != d )
+            if ( _direction == 0 )
+                _direction = d;
+            else if ( _direction != d )
                 break;
         }
-    doneCheckOrder:
-        if ( scanAndOrderRequired_ )
-            direction_ = 0;
+doneCheckOrder:
+        if ( _scanAndOrderRequired )
+            _direction = 0;
         BSONObjIterator i( idxKey );
         int exactIndexedQueryCount = 0;
         int optimalIndexedQueryCount = 0;
@@ -140,7 +142,8 @@ namespace mongo {
                     ++optimalIndexedQueryCount;
                 if ( !fb.equality() )
                     stillOptimalIndexedQueryCount = false;
-            } else {
+            }
+            else {
                 if ( fb.nontrivial() )
                     optimalIndexedQueryCount = -1;
             }
@@ -151,16 +154,17 @@ namespace mongo {
             }
             orderFieldsUnindexed.erase( e.fieldName() );
         }
-        if ( !scanAndOrderRequired_ &&
-             ( optimalIndexedQueryCount == fbs.nNontrivialRanges() ) )
-            optimal_ = true;
+        if ( !_scanAndOrderRequired &&
+                ( optimalIndexedQueryCount == fbs.nNontrivialRanges() ) )
+            _optimal = true;
         if ( exactIndexedQueryCount == fbs.nNontrivialRanges() &&
-            orderFieldsUnindexed.size() == 0 &&
-            exactIndexedQueryCount == index_->keyPattern().nFields() &&
-            exactIndexedQueryCount == _originalQuery.nFields() ) {
-            exactKeyMatch_ = true;
+                orderFieldsUnindexed.size() == 0 &&
+                exactIndexedQueryCount == _index->keyPattern().nFields() &&
+                exactIndexedQueryCount == _originalQuery.nFields() ) {
+            _exactKeyMatch = true;
         }
-        _frv.reset( new FieldRangeVector( fbs, idxKey, direction_ ) );
+        _frv.reset( new FieldRangeVector( fbs, idxKey, _direction ) );
+        _originalFrv.reset( new FieldRangeVector( originalFrs, idxKey, _direction ) );
         if ( _startOrEndSpec ) {
             BSONObj newStart, newEnd;
             if ( !startKey.isEmpty() )
@@ -173,100 +177,124 @@ namespace mongo {
                 _endKey = _frv->endKey();
         }
 
-        if ( ( scanAndOrderRequired_ || order_.isEmpty() ) &&
-            !fbs.range( idxKey.firstElement().fieldName() ).nontrivial() ) {
-            unhelpful_ = true;
+        if ( ( _scanAndOrderRequired || _order.isEmpty() ) &&
+                !fbs.range( idxKey.firstElement().fieldName() ).nontrivial() ) {
+            _unhelpful = true;
         }
     }
-    
+
     shared_ptr<Cursor> QueryPlan::newCursor( const DiskLoc &startLoc , int numWanted ) const {
 
         if ( _type ) {
-            // hopefully safe to use original query in these contexts - don't think we can mix type with $or clause separation yet   
-            return _type->newCursor( _originalQuery , order_ , numWanted );
+            // hopefully safe to use original query in these contexts - don't think we can mix type with $or clause separation yet
+            return _type->newCursor( _originalQuery , _order , numWanted );
         }
-        
-        if ( !fbs_.matchPossible() ){
-            if ( fbs_.nNontrivialRanges() )
-                checkTableScanAllowed( fbs_.ns() );
+
+        if ( !_fbs.matchPossible() ) {
+            if ( _fbs.nNontrivialRanges() )
+                checkTableScanAllowed( _fbs.ns() );
             return shared_ptr<Cursor>( new BasicCursor( DiskLoc() ) );
         }
-        if ( !index_ ){
-            if ( fbs_.nNontrivialRanges() )
-                checkTableScanAllowed( fbs_.ns() );
-            return findTableScan( fbs_.ns(), order_, startLoc );
+        if ( !_index ) {
+            if ( _fbs.nNontrivialRanges() )
+                checkTableScanAllowed( _fbs.ns() );
+            return findTableScan( _fbs.ns(), _order, startLoc );
         }
 
         massert( 10363 ,  "newCursor() with start location not implemented for indexed plans", startLoc.isNull() );
-        
+
         if ( _startOrEndSpec ) {
-            // we are sure to spec endKeyInclusive_
-            return shared_ptr<Cursor>( new BtreeCursor( d, idxNo, *index_, _startKey, _endKey, endKeyInclusive_, direction_ >= 0 ? 1 : -1 ) );
-        } else if ( index_->getSpec().getType() ) {
-            return shared_ptr<Cursor>( new BtreeCursor( d, idxNo, *index_, _frv->startKey(), _frv->endKey(), true, direction_ >= 0 ? 1 : -1 ) );            
-        } else {
-            return shared_ptr<Cursor>( new BtreeCursor( d, idxNo, *index_, _frv, direction_ >= 0 ? 1 : -1 ) );
+            // we are sure to spec _endKeyInclusive
+            return shared_ptr<Cursor>( new BtreeCursor( _d, _idxNo, *_index, _startKey, _endKey, _endKeyInclusive, _direction >= 0 ? 1 : -1 ) );
+        }
+        else if ( _index->getSpec().getType() ) {
+            return shared_ptr<Cursor>( new BtreeCursor( _d, _idxNo, *_index, _frv->startKey(), _frv->endKey(), true, _direction >= 0 ? 1 : -1 ) );
+        }
+        else {
+            return shared_ptr<Cursor>( new BtreeCursor( _d, _idxNo, *_index, _frv, _direction >= 0 ? 1 : -1 ) );
         }
     }
-    
+
     shared_ptr<Cursor> QueryPlan::newReverseCursor() const {
-        if ( !fbs_.matchPossible() )
+        if ( !_fbs.matchPossible() )
             return shared_ptr<Cursor>( new BasicCursor( DiskLoc() ) );
-        if ( !index_ ) {
-            int orderSpec = order_.getIntField( "$natural" );
+        if ( !_index ) {
+            int orderSpec = _order.getIntField( "$natural" );
             if ( orderSpec == INT_MIN )
                 orderSpec = 1;
-            return findTableScan( fbs_.ns(), BSON( "$natural" << -orderSpec ) );
+            return findTableScan( _fbs.ns(), BSON( "$natural" << -orderSpec ) );
         }
         massert( 10364 ,  "newReverseCursor() not implemented for indexed plans", false );
         return shared_ptr<Cursor>();
     }
-    
+
     BSONObj QueryPlan::indexKey() const {
-        if ( !index_ )
+        if ( !_index )
             return BSON( "$natural" << 1 );
-        return index_->keyPattern();
+        return _index->keyPattern();
     }
-    
+
     void QueryPlan::registerSelf( long long nScanned ) const {
-        if ( fbs_.matchPossible() ) {
+        if ( _fbs.matchPossible() ) {
             scoped_lock lk(NamespaceDetailsTransient::_qcMutex);
-            NamespaceDetailsTransient::get_inlock( ns() ).registerIndexForPattern( fbs_.pattern( order_ ), indexKey(), nScanned );  
-        }
-    }
-    
-    QueryPlanSet::QueryPlanSet( const char *_ns, auto_ptr< FieldRangeSet > frs, const BSONObj &originalQuery, const BSONObj &order, const BSONElement *hint, bool honorRecordedPlan, const BSONObj &min, const BSONObj &max, bool bestGuessOnly, bool mayYield ) :
-    ns(_ns),
-    _originalQuery( originalQuery ),
-    fbs_( frs ),
-    mayRecordPlan_( true ),
-    usingPrerecordedPlan_( false ),
-    hint_( BSONObj() ),
-    order_( order.getOwned() ),
-    oldNScanned_( 0 ),
-    honorRecordedPlan_( honorRecordedPlan ),
-    min_( min.getOwned() ),
-    max_( max.getOwned() ),
-    _bestGuessOnly( bestGuessOnly ),
-    _mayYield( mayYield ),
-    _yieldSometimesTracker( 256, 20 ){
+            NamespaceDetailsTransient::get_inlock( ns() ).registerIndexForPattern( _fbs.pattern( _order ), indexKey(), nScanned );
+        }
+    }
+
+    bool QueryPlan::isMultiKey() const {
+        if ( _idxNo < 0 )
+            return false;
+        return _d->isMultikey( _idxNo );
+    }
+
+    QueryPlanSet::QueryPlanSet( const char *ns, auto_ptr< FieldRangeSet > frs, auto_ptr< FieldRangeSet > originalFrs, const BSONObj &originalQuery, const BSONObj &order, const BSONElement *hint, bool honorRecordedPlan, const BSONObj &min, const BSONObj &max, bool bestGuessOnly, bool mayYield ) :
+        _ns(ns),
+        _originalQuery( originalQuery ),
+        _fbs( frs ),
+        _originalFrs( originalFrs ),
+        _mayRecordPlan( true ),
+        _usingPrerecordedPlan( false ),
+        _hint( BSONObj() ),
+        _order( order.getOwned() ),
+        _oldNScanned( 0 ),
+        _honorRecordedPlan( honorRecordedPlan ),
+        _min( min.getOwned() ),
+        _max( max.getOwned() ),
+        _bestGuessOnly( bestGuessOnly ),
+        _mayYield( mayYield ),
+        _yieldSometimesTracker( 256, 20 ) {
         if ( hint && !hint->eoo() ) {
-            hint_ = hint->wrap();
+            _hint = hint->wrap();
         }
         init();
     }
-    
+
+    bool QueryPlanSet::modifiedKeys() const {
+        for( PlanSet::const_iterator i = _plans.begin(); i != _plans.end(); ++i )
+            if ( (*i)->isMultiKey() )
+                return true;
+        return false;
+    }
+
+    bool QueryPlanSet::hasMultiKey() const {
+        for( PlanSet::const_iterator i = _plans.begin(); i != _plans.end(); ++i )
+            if ( (*i)->isMultiKey() )
+                return true;
+        return false;
+    }
+
+
     void QueryPlanSet::addHint( IndexDetails &id ) {
-        if ( !min_.isEmpty() || !max_.isEmpty() ) {
+        if ( !_min.isEmpty() || !_max.isEmpty() ) {
             string errmsg;
             BSONObj keyPattern = id.keyPattern();
-            // This reformats min_ and max_ to be used for index lookup.
-            massert( 10365 ,  errmsg, indexDetailsForRange( fbs_->ns(), errmsg, min_, max_, keyPattern ) );
+            // This reformats _min and _max to be used for index lookup.
+            massert( 10365 ,  errmsg, indexDetailsForRange( _fbs->ns(), errmsg, _min, _max, keyPattern ) );
         }
-        NamespaceDetails *d = nsdetails(ns);
-        plans_.push_back( PlanPtr( new QueryPlan( d, d->idxNo(id), *fbs_, _originalQuery, order_, min_, max_ ) ) );
+        NamespaceDetails *d = nsdetails(_ns);
+        _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_fbs, *_originalFrs, _originalQuery, _order, _min, _max ) ) );
     }
-    
+
     // returns an IndexDetails * for a hint, 0 if hint is $natural.
     // hint must not be eoo()
     IndexDetails *parseHint( const BSONElement &hint, NamespaceDetails *d ) {
@@ -281,7 +309,7 @@ namespace mongo {
                 }
             }
         }
-        else if( hint.type() == Object ) { 
+        else if( hint.type() == Object ) {
             BSONObj hintobj = hint.embeddedObject();
             uassert( 10112 ,  "bad hint", !hintobj.isEmpty() );
             if ( !strcmp( hintobj.firstElement().fieldName(), "$natural" ) ) {
@@ -294,92 +322,93 @@ namespace mongo {
                     return &ii;
                 }
             }
-        }        
+        }
         uassert( 10113 ,  "bad hint", false );
         return 0;
     }
-    
+
     void QueryPlanSet::init() {
         DEBUGQO( "QueryPlanSet::init " << ns << "\t" << _originalQuery );
-        plans_.clear();
-        mayRecordPlan_ = true;
-        usingPrerecordedPlan_ = false;
-        
-        const char *ns = fbs_->ns();
+        _plans.clear();
+        _mayRecordPlan = true;
+        _usingPrerecordedPlan = false;
+
+        const char *ns = _fbs->ns();
         NamespaceDetails *d = nsdetails( ns );
-        if ( !d || !fbs_->matchPossible() ) {
+        if ( !d || !_fbs->matchPossible() ) {
             // Table scan plan, when no matches are possible
-            plans_.push_back( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ) );
+            _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ) );
             return;
         }
-        
-        BSONElement hint = hint_.firstElement();
+
+        BSONElement hint = _hint.firstElement();
         if ( !hint.eoo() ) {
-            mayRecordPlan_ = false;
+            _mayRecordPlan = false;
             IndexDetails *id = parseHint( hint, d );
             if ( id ) {
                 addHint( *id );
-            } else {
-                massert( 10366 ,  "natural order cannot be specified with $min/$max", min_.isEmpty() && max_.isEmpty() );
+            }
+            else {
+                massert( 10366 ,  "natural order cannot be specified with $min/$max", _min.isEmpty() && _max.isEmpty() );
                 // Table scan plan
-                plans_.push_back( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ) );                
+                _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ) );
             }
             return;
         }
-        
-        if ( !min_.isEmpty() || !max_.isEmpty() ) {
+
+        if ( !_min.isEmpty() || !_max.isEmpty() ) {
             string errmsg;
             BSONObj keyPattern;
-            IndexDetails *idx = indexDetailsForRange( ns, errmsg, min_, max_, keyPattern );
+            IndexDetails *idx = indexDetailsForRange( ns, errmsg, _min, _max, keyPattern );
             massert( 10367 ,  errmsg, idx );
-            plans_.push_back( PlanPtr( new QueryPlan( d, d->idxNo(*idx), *fbs_, _originalQuery, order_, min_, max_ ) ) );
+            _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_fbs, *_originalFrs, _originalQuery, _order, _min, _max ) ) );
             return;
         }
 
-        if ( isSimpleIdQuery( _originalQuery ) ){
+        if ( isSimpleIdQuery( _originalQuery ) ) {
             int idx = d->findIdIndex();
-            if ( idx >= 0 ){
-                usingPrerecordedPlan_ = true;
-                mayRecordPlan_ = false;
-                plans_.push_back( PlanPtr( new QueryPlan( d , idx , *fbs_ , _originalQuery, order_ ) ) );
+            if ( idx >= 0 ) {
+                _usingPrerecordedPlan = true;
+                _mayRecordPlan = false;
+                _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_fbs , *_fbs , _originalQuery, _order ) ) );
                 return;
             }
         }
 
-        if ( _originalQuery.isEmpty() && order_.isEmpty() ){
-            plans_.push_back( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ) );
+        if ( _originalQuery.isEmpty() && _order.isEmpty() ) {
+            _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ) );
             return;
         }
 
-        DEBUGQO( "\t special : " << fbs_->getSpecial() );
-        if ( fbs_->getSpecial().size() ){
-            _special = fbs_->getSpecial();
+        DEBUGQO( "\t special : " << _fbs->getSpecial() );
+        if ( _fbs->getSpecial().size() ) {
+            _special = _fbs->getSpecial();
             NamespaceDetails::IndexIterator i = d->ii();
             while( i.more() ) {
                 int j = i.pos();
                 IndexDetails& ii = i.next();
                 const IndexSpec& spec = ii.getSpec();
-                if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , order_ ) ){
-                    usingPrerecordedPlan_ = true;
-                    mayRecordPlan_ = false;
-                    plans_.push_back( PlanPtr( new QueryPlan( d , j , *fbs_ , _originalQuery, order_ , 
-                                                              BSONObj() , BSONObj() , _special ) ) );
+                if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , _order ) ) {
+                    _usingPrerecordedPlan = true;
+                    _mayRecordPlan = false;
+                    _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_fbs , *_fbs , _originalQuery, _order ,
+                                                    BSONObj() , BSONObj() , _special ) ) );
                     return;
                 }
             }
             uassert( 13038 , (string)"can't find special index: " + _special + " for: " + _originalQuery.toString() , 0 );
         }
 
-        if ( honorRecordedPlan_ ) {
+        if ( _honorRecordedPlan ) {
             scoped_lock lk(NamespaceDetailsTransient::_qcMutex);
             NamespaceDetailsTransient& nsd = NamespaceDetailsTransient::get_inlock( ns );
-            BSONObj bestIndex = nsd.indexForPattern( fbs_->pattern( order_ ) );
+            BSONObj bestIndex = nsd.indexForPattern( _fbs->pattern( _order ) );
             if ( !bestIndex.isEmpty() ) {
-                PlanPtr p;
-                oldNScanned_ = nsd.nScannedForPattern( fbs_->pattern( order_ ) );
+                QueryPlanPtr p;
+                _oldNScanned = nsd.nScannedForPattern( _fbs->pattern( _order ) );
                 if ( !strcmp( bestIndex.firstElement().fieldName(), "$natural" ) ) {
                     // Table scan plan
-                    p.reset( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) );
+                    p.reset( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) );
                 }
 
                 NamespaceDetails::IndexIterator i = d->ii();
@@ -387,55 +416,56 @@ namespace mongo {
                     int j = i.pos();
                     IndexDetails& ii = i.next();
                     if( ii.keyPattern().woCompare(bestIndex) == 0 ) {
-                        p.reset( new QueryPlan( d, j, *fbs_, _originalQuery, order_ ) );
+                        p.reset( new QueryPlan( d, j, *_fbs, *_originalFrs, _originalQuery, _order ) );
                     }
                 }
 
                 massert( 10368 ,  "Unable to locate previously recorded index", p.get() );
                 if ( !( _bestGuessOnly && p->scanAndOrderRequired() ) ) {
-                    usingPrerecordedPlan_ = true;
-                    mayRecordPlan_ = false;
-                    plans_.push_back( p );
+                    _usingPrerecordedPlan = true;
+                    _mayRecordPlan = false;
+                    _plans.push_back( p );
                     return;
                 }
             }
         }
-        
+
         addOtherPlans( false );
     }
-    
+
     void QueryPlanSet::addOtherPlans( bool checkFirst ) {
-        const char *ns = fbs_->ns();
+        const char *ns = _fbs->ns();
         NamespaceDetails *d = nsdetails( ns );
         if ( !d )
             return;
 
         // If table scan is optimal or natural order requested or tailable cursor requested
-        if ( !fbs_->matchPossible() || ( fbs_->nNontrivialRanges() == 0 && order_.isEmpty() ) ||
-            ( !order_.isEmpty() && !strcmp( order_.firstElement().fieldName(), "$natural" ) ) ) {
+        if ( !_fbs->matchPossible() || ( _fbs->nNontrivialRanges() == 0 && _order.isEmpty() ) ||
+                ( !_order.isEmpty() && !strcmp( _order.firstElement().fieldName(), "$natural" ) ) ) {
             // Table scan plan
-            addPlan( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ), checkFirst );
+            addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ), checkFirst );
             return;
         }
-        
-        bool normalQuery = hint_.isEmpty() && min_.isEmpty() && max_.isEmpty();
+
+        bool normalQuery = _hint.isEmpty() && _min.isEmpty() && _max.isEmpty();
 
         PlanSet plans;
         for( int i = 0; i < d->nIndexes; ++i ) {
             IndexDetails& id = d->idx(i);
             const IndexSpec& spec = id.getSpec();
             IndexSuitability suitability = HELPFUL;
-            if ( normalQuery ){
-                suitability = spec.suitability( fbs_->simplifiedQuery() , order_ );
+            if ( normalQuery ) {
+                suitability = spec.suitability( _fbs->simplifiedQuery() , _order );
                 if ( suitability == USELESS )
                     continue;
             }
 
-            PlanPtr p( new QueryPlan( d, i, *fbs_, _originalQuery, order_ ) );
+            QueryPlanPtr p( new QueryPlan( d, i, *_fbs, *_originalFrs, _originalQuery, _order ) );
             if ( p->optimal() ) {
                 addPlan( p, checkFirst );
                 return;
-            } else if ( !p->unhelpful() ) {
+            }
+            else if ( !p->unhelpful() ) {
                 plans.push_back( p );
             }
         }
@@ -443,29 +473,29 @@ namespace mongo {
             addPlan( *i, checkFirst );
 
         // Table scan plan
-        addPlan( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ), checkFirst );
+        addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ), checkFirst );
     }
-    
+
     shared_ptr< QueryOp > QueryPlanSet::runOp( QueryOp &op ) {
-        if ( usingPrerecordedPlan_ ) {
+        if ( _usingPrerecordedPlan ) {
             Runner r( *this, op );
             shared_ptr< QueryOp > res = r.run();
-            // plans_.size() > 1 if addOtherPlans was called in Runner::run().
-            if ( _bestGuessOnly || res->complete() || plans_.size() > 1 )
+            // _plans.size() > 1 if addOtherPlans was called in Runner::run().
+            if ( _bestGuessOnly || res->complete() || _plans.size() > 1 )
                 return res;
             {
                 scoped_lock lk(NamespaceDetailsTransient::_qcMutex);
-                NamespaceDetailsTransient::get_inlock( fbs_->ns() ).registerIndexForPattern( fbs_->pattern( order_ ), BSONObj(), 0 );
+                NamespaceDetailsTransient::get_inlock( _fbs->ns() ).registerIndexForPattern( _fbs->pattern( _order ), BSONObj(), 0 );
             }
             init();
         }
         Runner r( *this, op );
         return r.run();
     }
-    
+
     BSONObj QueryPlanSet::explain() const {
         vector< BSONObj > arr;
-        for( PlanSet::const_iterator i = plans_.begin(); i != plans_.end(); ++i ) {
+        for( PlanSet::const_iterator i = _plans.begin(); i != _plans.end(); ++i ) {
             shared_ptr<Cursor> c = (*i)->newCursor();
             BSONObjBuilder explain;
             explain.append( "cursor", c->toString() );
@@ -477,37 +507,37 @@ namespace mongo {
         return b.obj();
     }
 
-    QueryPlanSet::PlanPtr QueryPlanSet::getBestGuess() const {
-        assert( plans_.size() );
-        if ( plans_[ 0 ]->scanAndOrderRequired() ){
-            for ( unsigned i=1; i<plans_.size(); i++ ){
-                if ( ! plans_[i]->scanAndOrderRequired() )
-                    return plans_[i];
+    QueryPlanSet::QueryPlanPtr QueryPlanSet::getBestGuess() const {
+        assert( _plans.size() );
+        if ( _plans[ 0 ]->scanAndOrderRequired() ) {
+            for ( unsigned i=1; i<_plans.size(); i++ ) {
+                if ( ! _plans[i]->scanAndOrderRequired() )
+                    return _plans[i];
             }
-            
+
             stringstream ss;
             ss << "best guess plan requested, but scan and order required:";
-            ss << " query: " << fbs_->simplifiedQuery();
-            ss << " order: " << order_;
+            ss << " query: " << _fbs->simplifiedQuery();
+            ss << " order: " << _order;
             ss << " choices: ";
-            for ( unsigned i=0; i<plans_.size(); i++ ){
-                ss << plans_[i]->indexKey() << " ";
+            for ( unsigned i=0; i<_plans.size(); i++ ) {
+                ss << _plans[i]->indexKey() << " ";
             }
 
             string s = ss.str();
             msgassertedNoTrace( 13284, s.c_str() );
         }
-        return plans_[0];
+        return _plans[0];
     }
-    
+
     QueryPlanSet::Runner::Runner( QueryPlanSet &plans, QueryOp &op ) :
-    op_( op ),
-    plans_( plans ) {
+        _op( op ),
+        _plans( plans ) {
     }
-    
+
     void QueryPlanSet::Runner::mayYield( const vector< shared_ptr< QueryOp > > &ops ) {
-        if ( plans_._mayYield ) {
-            if ( plans_._yieldSometimesTracker.ping() ) {
+        if ( _plans._mayYield ) {
+            if ( _plans._yieldSometimesTracker.ping() ) {
                 int micros = ClientCursor::yieldSuggest();
                 if ( micros > 0 ) {
                     for( vector< shared_ptr< QueryOp > >::const_iterator i = ops.begin(); i != ops.end(); ++i ) {
@@ -515,28 +545,38 @@ namespace mongo {
                             return;
                         }
                     }
-                    ClientCursor::staticYield( micros );
+                    ClientCursor::staticYield( micros , _plans._ns );
                     for( vector< shared_ptr< QueryOp > >::const_iterator i = ops.begin(); i != ops.end(); ++i ) {
                         recoverFromYield( **i );
-                    }                        
+                    }
                 }
             }
-        }        
+        }
     }
-    
+
+    struct OpHolder {
+        OpHolder( const shared_ptr< QueryOp > &op ) : _op( op ), _offset() {}
+        shared_ptr< QueryOp > _op;
+        long long _offset;
+        bool operator<( const OpHolder &other ) const {
+            return _op->nscanned() + _offset > other._op->nscanned() + other._offset;
+        }
+    };
+
     shared_ptr< QueryOp > QueryPlanSet::Runner::run() {
-        massert( 10369 ,  "no plans", plans_.plans_.size() > 0 );
-        
+        massert( 10369 ,  "no plans", _plans._plans.size() > 0 );
+
         vector< shared_ptr< QueryOp > > ops;
-        if ( plans_._bestGuessOnly ) {
-            shared_ptr< QueryOp > op( op_.createChild() );
-            op->setQueryPlan( plans_.getBestGuess().get() );
-            ops.push_back( op );            
-        } else {
-            if ( plans_.plans_.size() > 1 )
-                log(1) << "  running multiple plans" << endl;            
-            for( PlanSet::iterator i = plans_.plans_.begin(); i != plans_.plans_.end(); ++i ) {
-                shared_ptr< QueryOp > op( op_.createChild() );
+        if ( _plans._bestGuessOnly ) {
+            shared_ptr< QueryOp > op( _op.createChild() );
+            op->setQueryPlan( _plans.getBestGuess().get() );
+            ops.push_back( op );
+        }
+        else {
+            if ( _plans._plans.size() > 1 )
+                log(1) << "  running multiple plans" << endl;
+            for( PlanSet::iterator i = _plans._plans.begin(); i != _plans._plans.end(); ++i ) {
+                shared_ptr< QueryOp > op( _op.createChild() );
                 op->setQueryPlan( i->get() );
                 ops.push_back( op );
             }
@@ -547,53 +587,51 @@ namespace mongo {
             if ( (*i)->complete() )
                 return *i;
         }
-        
-        long long nScanned = 0;
-        long long nScannedBackup = 0;
-        while( 1 ) {
-            ++nScanned;
-            unsigned errCount = 0;
-            bool first = true;
-            for( vector< shared_ptr< QueryOp > >::iterator i = ops.begin(); i != ops.end(); ++i ) {
-                mayYield( ops );
-                QueryOp &op = **i;
-                nextOp( op );
-                if ( op.complete() ) {
-                    if ( first ) {
-                        nScanned += nScannedBackup;
-                    }
-                    if ( plans_.mayRecordPlan_ && op.mayRecordPlan() ) {
-                        op.qp().registerSelf( nScanned );
-                    }
-                    return *i;
+
+        std::priority_queue< OpHolder > queue;
+        for( vector< shared_ptr< QueryOp > >::iterator i = ops.begin(); i != ops.end(); ++i ) {
+            if ( !(*i)->error() ) {
+                queue.push( *i );
+            }
+        }
+
+        while( !queue.empty() ) {
+            mayYield( ops );
+            OpHolder holder = queue.top();
+            queue.pop();
+            QueryOp &op = *holder._op;
+            nextOp( op );
+            if ( op.complete() ) {
+                if ( _plans._mayRecordPlan && op.mayRecordPlan() ) {
+                    op.qp().registerSelf( op.nscanned() );
                 }
-                if ( op.error() )
-                    ++errCount;
-                first = false;
+                return holder._op;
             }
-            if ( errCount == ops.size() )
-                break;
-            if ( !plans_._bestGuessOnly && plans_.usingPrerecordedPlan_ && nScanned > plans_.oldNScanned_ * 10 && plans_._special.empty() ) {
-                plans_.addOtherPlans( true );
-                PlanSet::iterator i = plans_.plans_.begin();
+            if ( op.error() ) {
+                continue;
+            }
+            queue.push( holder );
+            if ( !_plans._bestGuessOnly && _plans._usingPrerecordedPlan && op.nscanned() > _plans._oldNScanned * 10 && _plans._special.empty() ) {
+                holder._offset = -op.nscanned();
+                _plans.addOtherPlans( true );
+                PlanSet::iterator i = _plans._plans.begin();
                 ++i;
-                for( ; i != plans_.plans_.end(); ++i ) {
-                    shared_ptr< QueryOp > op( op_.createChild() );
+                for( ; i != _plans._plans.end(); ++i ) {
+                    shared_ptr< QueryOp > op( _op.createChild() );
                     op->setQueryPlan( i->get() );
                     ops.push_back( op );
                     initOp( *op );
                     if ( op->complete() )
                         return op;
-                }                
-                plans_.mayRecordPlan_ = true;
-                plans_.usingPrerecordedPlan_ = false;
-                nScannedBackup = nScanned;
-                nScanned = 0;
+                    queue.push( op );
+                }
+                _plans._mayRecordPlan = true;
+                _plans._usingPrerecordedPlan = false;
             }
         }
         return ops[ 0 ];
     }
-    
+
 #define GUARD_OP_EXCEPTION( op, expression ) \
     try { \
         expression; \
@@ -607,8 +645,8 @@ namespace mongo {
     catch ( ... ) { \
         op.setException( ExceptionInfo( "Caught unknown exception" , 0 ) ); \
     }
-        
-    
+
+
     void QueryPlanSet::Runner::initOp( QueryOp &op ) {
         GUARD_OP_EXCEPTION( op, op.init() );
     }
@@ -619,39 +657,39 @@ namespace mongo {
 
     bool QueryPlanSet::Runner::prepareToYield( QueryOp &op ) {
         GUARD_OP_EXCEPTION( op,
-                           if ( op.error() ) {
-                               return true;
-                           } else {
-                               return op.prepareToYield();
-                           } );
+        if ( op.error() ) {
+        return true;
+    }
+    else {
+        return op.prepareToYield();
+        } );
         return true;
     }
 
     void QueryPlanSet::Runner::recoverFromYield( QueryOp &op ) {
         GUARD_OP_EXCEPTION( op, if ( !op.error() ) { op.recoverFromYield(); } );
     }
-    
-    
+
+
     MultiPlanScanner::MultiPlanScanner( const char *ns,
-                                       const BSONObj &query,
-                                       const BSONObj &order,
-                                       const BSONElement *hint,
-                                       bool honorRecordedPlan,
-                                       const BSONObj &min,
-                                       const BSONObj &max,
-                                       bool bestGuessOnly,
-                                       bool mayYield ) :
-    _ns( ns ),
-    _or( !query.getField( "$or" ).eoo() ),
-    _query( query.getOwned() ),
-    _fros( ns, _query ),
-    _i(),
-    _honorRecordedPlan( honorRecordedPlan ),
-    _bestGuessOnly( bestGuessOnly ),
-    _hint( ( hint && !hint->eoo() ) ? hint->wrap() : BSONObj() ),
-    _mayYield( mayYield ),
-    _tableScanned()
-    {
+                                        const BSONObj &query,
+                                        const BSONObj &order,
+                                        const BSONElement *hint,
+                                        bool honorRecordedPlan,
+                                        const BSONObj &min,
+                                        const BSONObj &max,
+                                        bool bestGuessOnly,
+                                        bool mayYield ) :
+        _ns( ns ),
+        _or( !query.getField( "$or" ).eoo() ),
+        _query( query.getOwned() ),
+        _fros( ns, _query ),
+        _i(),
+        _honorRecordedPlan( honorRecordedPlan ),
+        _bestGuessOnly( bestGuessOnly ),
+        _hint( ( hint && !hint->eoo() ) ? hint->wrap() : BSONObj() ),
+        _mayYield( mayYield ),
+        _tableScanned() {
         if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() || !_fros.getSpecial().empty() ) {
             _or = false;
         }
@@ -661,8 +699,10 @@ namespace mongo {
         // if _or == false, don't use or clauses for index selection
         if ( !_or ) {
             auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns, _query ) );
-            _currentQps.reset( new QueryPlanSet( ns, frs, _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) );
-        } else {
+            auto_ptr< FieldRangeSet > oldFrs( new FieldRangeSet( *frs ) );
+            _currentQps.reset( new QueryPlanSet( ns, frs, oldFrs, _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) );
+        }
+        else {
             BSONElement e = _query.getField( "$or" );
             massert( 13268, "invalid $or spec", e.type() == Array && e.embeddedObject().nFields() > 0 );
         }
@@ -676,16 +716,17 @@ namespace mongo {
         }
         ++_i;
         auto_ptr< FieldRangeSet > frs( _fros.topFrs() );
+        auto_ptr< FieldRangeSet > originalFrs( _fros.topFrsOriginal() );
         BSONElement hintElt = _hint.firstElement();
-        _currentQps.reset( new QueryPlanSet( _ns, frs, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) );
+        _currentQps.reset( new QueryPlanSet( _ns, frs, originalFrs, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) );
         shared_ptr< QueryOp > ret( _currentQps->runOp( op ) );
         if ( ret->qp().willScanTable() ) {
             _tableScanned = true;
         }
-        _fros.popOrClause();
+        _fros.popOrClause( ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() );
         return ret;
     }
-    
+
     shared_ptr< QueryOp > MultiPlanScanner::runOp( QueryOp &op ) {
         shared_ptr< QueryOp > ret = runOpOnce( op );
         while( !ret->stopRequested() && mayRunMore() ) {
@@ -693,7 +734,7 @@ namespace mongo {
         }
         return ret;
     }
-    
+
     bool MultiPlanScanner::uselessOr( const BSONElement &hint ) const {
         NamespaceDetails *nsd = nsdetails( _ns );
         if ( !nsd ) {
@@ -713,7 +754,8 @@ namespace mongo {
                 if ( id->getSpec().suitability( *i, BSONObj() ) == USELESS ) {
                     return true;
                 }
-            } else {
+            }
+            else {
                 bool useful = false;
                 NamespaceDetails::IndexIterator j = nsd->ii();
                 while( j.more() ) {
@@ -725,12 +767,12 @@ namespace mongo {
                 }
                 if ( !useful ) {
                     return true;
-                }       
+                }
             }
         }
         return false;
     }
-    
+
     bool indexWorks( const BSONObj &idxPattern, const BSONObj &sampleKey, int direction, int firstSignificantField ) {
         BSONObjIterator p( idxPattern );
         BSONObjIterator k( sampleKey );
@@ -761,19 +803,19 @@ namespace mongo {
             int idxDirection = e.number() >= 0 ? 1 : -1;
             int direction = idxDirection * baseDirection;
             switch( direction ) {
-                case 1:
-                    b.appendMaxKey( e.fieldName() );
-                    break;
-                case -1:
-                    b.appendMinKey( e.fieldName() );
-                    break;
-                default:
-                    assert( false );
+            case 1:
+                b.appendMaxKey( e.fieldName() );
+                break;
+            case -1:
+                b.appendMinKey( e.fieldName() );
+                break;
+            default:
+                assert( false );
             }
         }
-        return b.obj();        
+        return b.obj();
     }
-    
+
     pair< int, int > keyAudit( const BSONObj &min, const BSONObj &max ) {
         int direction = 0;
         int firstSignificantField = 0;
@@ -802,18 +844,19 @@ namespace mongo {
     pair< int, int > flexibleKeyAudit( const BSONObj &min, const BSONObj &max ) {
         if ( min.isEmpty() || max.isEmpty() ) {
             return make_pair( 1, -1 );
-        } else {
+        }
+        else {
             return keyAudit( min, max );
         }
     }
-    
+
     // NOTE min, max, and keyPattern will be updated to be consistent with the selected index.
     IndexDetails *indexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern ) {
         if ( min.isEmpty() && max.isEmpty() ) {
             errmsg = "one of min or max must be specified";
             return 0;
         }
-        
+
         Client::Context ctx( ns );
         IndexDetails *id = 0;
         NamespaceDetails *d = nsdetails( ns );
@@ -821,7 +864,7 @@ namespace mongo {
             errmsg = "ns not found";
             return 0;
         }
-        
+
         pair< int, int > ret = flexibleKeyAudit( min, max );
         if ( ret == make_pair( -1, -1 ) ) {
             errmsg = "min and max keys do not share pattern";
@@ -832,15 +875,16 @@ namespace mongo {
             while( i.more() ) {
                 IndexDetails& ii = i.next();
                 if ( indexWorks( ii.keyPattern(), min.isEmpty() ? max : min, ret.first, ret.second ) ) {
-                    if ( ii.getSpec().getType() == 0 ){
+                    if ( ii.getSpec().getType() == 0 ) {
                         id = &ii;
                         keyPattern = ii.keyPattern();
                         break;
                     }
                 }
             }
-            
-        } else {            
+
+        }
+        else {
             if ( !indexWorks( keyPattern, min.isEmpty() ? max : min, ret.first, ret.second ) ) {
                 errmsg = "requested keyPattern does not match specified keys";
                 return 0;
@@ -853,30 +897,31 @@ namespace mongo {
                     break;
                 }
                 if ( keyPattern.nFields() == 1 && ii.keyPattern().nFields() == 1 &&
-                     IndexDetails::isIdIndexPattern( keyPattern ) &&
-                     ii.isIdIndex() ){
+                        IndexDetails::isIdIndexPattern( keyPattern ) &&
+                        ii.isIdIndex() ) {
                     id = &ii;
                     break;
                 }
-                     
+
             }
         }
 
         if ( min.isEmpty() ) {
             min = extremeKeyForIndex( keyPattern, -1 );
-        } else if ( max.isEmpty() ) {
+        }
+        else if ( max.isEmpty() ) {
             max = extremeKeyForIndex( keyPattern, 1 );
         }
-                
+
         if ( !id ) {
             errmsg = (string)"no index found for specified keyPattern: " + keyPattern.toString();
             return 0;
         }
-        
+
         min = min.extractFieldsUnDotted( keyPattern );
         max = max.extractFieldsUnDotted( keyPattern );
 
         return id;
     }
-        
+
 } // namespace mongo
diff --git a/db/queryoptimizer.h b/db/queryoptimizer.h
index 8314bfa..cf3180a 100644
--- a/db/queryoptimizer.h
+++ b/db/queryoptimizer.h
@@ -25,15 +25,17 @@
 #include "../util/message.h"
 
 namespace mongo {
-    
+
     class IndexDetails;
     class IndexType;
 
     class QueryPlan : boost::noncopyable {
     public:
-        QueryPlan(NamespaceDetails *_d, 
-                  int _idxNo, // -1 = no index
+
+        QueryPlan(NamespaceDetails *d,
+                  int idxNo, // -1 = no index
                   const FieldRangeSet &fbs,
+                  const FieldRangeSet &originalFrs,
                   const BSONObj &originalQuery,
                   const BSONObj &order,
                   const BSONObj &startKey = BSONObj(),
@@ -41,44 +43,50 @@ namespace mongo {
                   string special="" );
 
         /* If true, no other index can do better. */
-        bool optimal() const { return optimal_; }
+        bool optimal() const { return _optimal; }
         /* ScanAndOrder processing will be required if true */
-        bool scanAndOrderRequired() const { return scanAndOrderRequired_; }
+        bool scanAndOrderRequired() const { return _scanAndOrderRequired; }
         /* When true, the index we are using has keys such that it can completely resolve the
          query expression to match by itself without ever checking the main object.
          */
-        bool exactKeyMatch() const { return exactKeyMatch_; }
-        /* If true, the startKey and endKey are unhelpful and the index order doesn't match the 
+        bool exactKeyMatch() const { return _exactKeyMatch; }
+        /* If true, the startKey and endKey are unhelpful and the index order doesn't match the
            requested sort order */
-        bool unhelpful() const { return unhelpful_; }
-        int direction() const { return direction_; }
+        bool unhelpful() const { return _unhelpful; }
+        int direction() const { return _direction; }
         shared_ptr<Cursor> newCursor( const DiskLoc &startLoc = DiskLoc() , int numWanted=0 ) const;
         shared_ptr<Cursor> newReverseCursor() const;
         BSONObj indexKey() const;
-        bool willScanTable() const { return !index_ && fbs_.matchPossible(); }
-        const char *ns() const { return fbs_.ns(); }
-        NamespaceDetails *nsd() const { return d; }
+        bool indexed() const { return _index; }
+        bool willScanTable() const { return !_index && _fbs.matchPossible(); }
+        const char *ns() const { return _fbs.ns(); }
+        NamespaceDetails *nsd() const { return _d; }
         BSONObj originalQuery() const { return _originalQuery; }
-        BSONObj simplifiedQuery( const BSONObj& fields = BSONObj() ) const { return fbs_.simplifiedQuery( fields ); }
-        const FieldRange &range( const char *fieldName ) const { return fbs_.range( fieldName ); }
+        BSONObj simplifiedQuery( const BSONObj& fields = BSONObj() ) const { return _fbs.simplifiedQuery( fields ); }
+        const FieldRange &range( const char *fieldName ) const { return _fbs.range( fieldName ); }
         void registerSelf( long long nScanned ) const;
+        shared_ptr< FieldRangeVector > originalFrv() const { return _originalFrv; }
+        // just for testing
         shared_ptr< FieldRangeVector > frv() const { return _frv; }
+        bool isMultiKey() const;
+
     private:
-        NamespaceDetails *d;
-        int idxNo;
-        const FieldRangeSet &fbs_;
+        NamespaceDetails * _d;
+        int _idxNo;
+        const FieldRangeSet &_fbs;
         const BSONObj &_originalQuery;
-        const BSONObj &order_;
-        const IndexDetails *index_;
-        bool optimal_;
-        bool scanAndOrderRequired_;
-        bool exactKeyMatch_;
-        int direction_;
+        const BSONObj &_order;
+        const IndexDetails * _index;
+        bool _optimal;
+        bool _scanAndOrderRequired;
+        bool _exactKeyMatch;
+        int _direction;
         shared_ptr< FieldRangeVector > _frv;
+        shared_ptr< FieldRangeVector > _originalFrv;
         BSONObj _startKey;
         BSONObj _endKey;
-        bool endKeyInclusive_;
-        bool unhelpful_;
+        bool _endKeyInclusive;
+        bool _unhelpful;
         string _special;
         IndexType * _type;
         bool _startOrEndSpec;
@@ -93,16 +101,17 @@ namespace mongo {
 
         // Used when handing off from one QueryOp type to another
         QueryOp( const QueryOp &other ) :
-        _complete(), _stopRequested(), _qp(), _error(), _matcher( other._matcher ),
-        _orConstraint( other._orConstraint ) {}
-        
+            _complete(), _stopRequested(), _qp(), _error(), _matcher( other._matcher ),
+            _orConstraint( other._orConstraint ) {}
+
         virtual ~QueryOp() {}
-        
+
         /** these gets called after a query plan is set */
-        void init() { 
+        void init() {
             if ( _oldMatcher.get() ) {
                 _matcher.reset( _oldMatcher->nextClauseMatcher( qp().indexKey() ) );
-            } else {
+            }
+            else {
                 _matcher.reset( new CoveredIndexMatcher( qp().originalQuery(), qp().indexKey(), alwaysUseRecord() ) );
             }
             _init();
@@ -110,10 +119,12 @@ namespace mongo {
         virtual void next() = 0;
 
         virtual bool mayRecordPlan() const = 0;
-        
+
         virtual bool prepareToYield() { massert( 13335, "yield not supported", false ); return false; }
         virtual void recoverFromYield() { massert( 13336, "yield not supported", false ); }
-        
+
+        virtual long long nscanned() = 0;
+
         /** @return a copy of the inheriting class, which will be run with its own
                     query plan.  If multiple plan sets are required for an $or query,
                     the QueryOp of the winning plan from a given set will be cloned
@@ -143,17 +154,17 @@ namespace mongo {
         shared_ptr< CoveredIndexMatcher > matcher() const { return _matcher; }
     protected:
         void setComplete() {
-            _orConstraint = qp().frv();
+            _orConstraint = qp().originalFrv();
             _complete = true;
         }
         void setStop() { setComplete(); _stopRequested = true; }
 
         virtual void _init() = 0;
-        
+
         virtual QueryOp *_createChild() const = 0;
-        
+
         virtual bool alwaysUseRecord() const { return false; }
-    
+
     private:
         bool _complete;
         bool _stopRequested;
@@ -164,42 +175,47 @@ namespace mongo {
         shared_ptr< CoveredIndexMatcher > _oldMatcher;
         shared_ptr< FieldRangeVector > _orConstraint;
     };
-    
+
     // Set of candidate query plans for a particular query.  Used for running
     // a QueryOp on these plans.
     class QueryPlanSet {
     public:
 
-        typedef boost::shared_ptr< QueryPlan > PlanPtr;
-        typedef vector< PlanPtr > PlanSet;
+        typedef boost::shared_ptr< QueryPlan > QueryPlanPtr;
+        typedef vector< QueryPlanPtr > PlanSet;
 
         QueryPlanSet( const char *ns,
-                     auto_ptr< FieldRangeSet > frs,
-                     const BSONObj &originalQuery,
-                     const BSONObj &order,
-                     const BSONElement *hint = 0,
-                     bool honorRecordedPlan = true,
-                     const BSONObj &min = BSONObj(),
-                     const BSONObj &max = BSONObj(),
-                     bool bestGuessOnly = false,
-                     bool mayYield = false);
-        int nPlans() const { return plans_.size(); }
+                      auto_ptr< FieldRangeSet > frs,
+                      auto_ptr< FieldRangeSet > originalFrs,
+                      const BSONObj &originalQuery,
+                      const BSONObj &order,
+                      const BSONElement *hint = 0,
+                      bool honorRecordedPlan = true,
+                      const BSONObj &min = BSONObj(),
+                      const BSONObj &max = BSONObj(),
+                      bool bestGuessOnly = false,
+                      bool mayYield = false);
+        int nPlans() const { return _plans.size(); }
         shared_ptr< QueryOp > runOp( QueryOp &op );
         template< class T >
         shared_ptr< T > runOp( T &op ) {
             return dynamic_pointer_cast< T >( runOp( static_cast< QueryOp& >( op ) ) );
         }
         BSONObj explain() const;
-        bool usingPrerecordedPlan() const { return usingPrerecordedPlan_; }
-        PlanPtr getBestGuess() const;
+        bool usingPrerecordedPlan() const { return _usingPrerecordedPlan; }
+        QueryPlanPtr getBestGuess() const;
         //for testing
-        const FieldRangeSet &fbs() const { return *fbs_; }
+        const FieldRangeSet &fbs() const { return *_fbs; }
+        const FieldRangeSet &originalFrs() const { return *_originalFrs; }
+        bool modifiedKeys() const;
+        bool hasMultiKey() const;
+
     private:
         void addOtherPlans( bool checkFirst );
-        void addPlan( PlanPtr plan, bool checkFirst ) {
-            if ( checkFirst && plan->indexKey().woCompare( plans_[ 0 ]->indexKey() ) == 0 )
+        void addPlan( QueryPlanPtr plan, bool checkFirst ) {
+            if ( checkFirst && plan->indexKey().woCompare( _plans[ 0 ]->indexKey() ) == 0 )
                 return;
-            plans_.push_back( plan );
+            _plans.push_back( plan );
         }
         void init();
         void addHint( IndexDetails &id );
@@ -207,25 +223,27 @@ namespace mongo {
             Runner( QueryPlanSet &plans, QueryOp &op );
             shared_ptr< QueryOp > run();
             void mayYield( const vector< shared_ptr< QueryOp > > &ops );
-            QueryOp &op_;
-            QueryPlanSet &plans_;
+            QueryOp &_op;
+            QueryPlanSet &_plans;
             static void initOp( QueryOp &op );
             static void nextOp( QueryOp &op );
             static bool prepareToYield( QueryOp &op );
             static void recoverFromYield( QueryOp &op );
         };
-        const char *ns;
+
+        const char *_ns;
         BSONObj _originalQuery;
-        auto_ptr< FieldRangeSet > fbs_;
-        PlanSet plans_;
-        bool mayRecordPlan_;
-        bool usingPrerecordedPlan_;
-        BSONObj hint_;
-        BSONObj order_;
-        long long oldNScanned_;
-        bool honorRecordedPlan_;
-        BSONObj min_;
-        BSONObj max_;
+        auto_ptr< FieldRangeSet > _fbs;
+        auto_ptr< FieldRangeSet > _originalFrs;
+        PlanSet _plans;
+        bool _mayRecordPlan;
+        bool _usingPrerecordedPlan;
+        BSONObj _hint;
+        BSONObj _order;
+        long long _oldNScanned;
+        bool _honorRecordedPlan;
+        BSONObj _min;
+        BSONObj _max;
         string _special;
         bool _bestGuessOnly;
         bool _mayYield;
@@ -258,24 +276,24 @@ namespace mongo {
     class MultiPlanScanner {
     public:
         MultiPlanScanner( const char *ns,
-                         const BSONObj &query,
-                         const BSONObj &order,
-                         const BSONElement *hint = 0,
-                         bool honorRecordedPlan = true,
-                         const BSONObj &min = BSONObj(),
-                         const BSONObj &max = BSONObj(),
-                         bool bestGuessOnly = false,
-                         bool mayYield = false);
+                          const BSONObj &query,
+                          const BSONObj &order,
+                          const BSONElement *hint = 0,
+                          bool honorRecordedPlan = true,
+                          const BSONObj &min = BSONObj(),
+                          const BSONObj &max = BSONObj(),
+                          bool bestGuessOnly = false,
+                          bool mayYield = false);
         shared_ptr< QueryOp > runOp( QueryOp &op );
         template< class T >
         shared_ptr< T > runOp( T &op ) {
             return dynamic_pointer_cast< T >( runOp( static_cast< QueryOp& >( op ) ) );
-        }       
+        }
         shared_ptr< QueryOp > runOpOnce( QueryOp &op );
         template< class T >
         shared_ptr< T > runOpOnce( T &op ) {
             return dynamic_pointer_cast< T >( runOpOnce( static_cast< QueryOp& >( op ) ) );
-        }       
+        }
         bool mayRunMore() const { return _or ? ( !_tableScanned && !_fros.orFinished() ) : _i == 0; }
         BSONObj oldExplain() const { assertNotOr(); return _currentQps->explain(); }
         // just report this when only one query op
@@ -284,6 +302,9 @@ namespace mongo {
         }
         void setBestGuessOnly() { _bestGuessOnly = true; }
         void mayYield( bool val ) { _mayYield = val; }
+        bool modifiedKeys() const { return _currentQps->modifiedKeys(); }
+        bool hasMultiKey() const { return _currentQps->hasMultiKey(); }
+
     private:
         void assertNotOr() const {
             massert( 13266, "not implemented for $or query", !_or );
@@ -301,21 +322,22 @@ namespace mongo {
         bool _mayYield;
         bool _tableScanned;
     };
-    
+
     class MultiCursor : public Cursor {
     public:
         class CursorOp : public QueryOp {
         public:
             CursorOp() {}
             CursorOp( const QueryOp &other ) : QueryOp( other ) {}
-            virtual shared_ptr< Cursor > newCursor() const = 0;  
+            virtual shared_ptr< Cursor > newCursor() const = 0;
         };
         // takes ownership of 'op'
         MultiCursor( const char *ns, const BSONObj &pattern, const BSONObj &order, shared_ptr< CursorOp > op = shared_ptr< CursorOp >(), bool mayYield = false )
-        : _mps( new MultiPlanScanner( ns, pattern, order, 0, true, BSONObj(), BSONObj(), !op.get(), mayYield ) ) {
+            : _mps( new MultiPlanScanner( ns, pattern, order, 0, true, BSONObj(), BSONObj(), !op.get(), mayYield ) ), _nscanned() {
             if ( op.get() ) {
                 _op = op;
-            } else {
+            }
+            else {
                 _op.reset( new NoOp() );
             }
             if ( _mps->mayRunMore() ) {
@@ -323,13 +345,14 @@ namespace mongo {
                 if ( !ok() ) {
                     advance();
                 }
-            } else {
+            }
+            else {
                 _c.reset( new BasicCursor( DiskLoc() ) );
             }
         }
         // used to handoff a query to a getMore()
         MultiCursor( auto_ptr< MultiPlanScanner > mps, const shared_ptr< Cursor > &c, const shared_ptr< CoveredIndexMatcher > &matcher, const QueryOp &op )
-        : _op( new NoOp( op ) ), _c( c ), _mps( mps ), _matcher( matcher ) {
+            : _op( new NoOp( op ) ), _c( c ), _mps( mps ), _matcher( matcher ), _nscanned( -1 ) {
             _mps->setBestGuessOnly();
             _mps->mayYield( false ); // with a NoOp, there's no need to yield in QueryPlanSet
             if ( !ok() ) {
@@ -355,16 +378,24 @@ namespace mongo {
         }
         virtual void checkLocation() {
             _c->checkLocation();
-        }        
+        }
         virtual bool supportGetMore() { return true; }
         virtual bool supportYields() { return _c->supportYields(); }
+
         // with update we could potentially get the same document on multiple
         // indexes, but update appears to already handle this with seenObjects
         // so we don't have to do anything special here.
         virtual bool getsetdup(DiskLoc loc) {
-            return _c->getsetdup( loc );   
+            return _c->getsetdup( loc );
         }
+
+        virtual bool modifiedKeys() const { return _mps->modifiedKeys(); }
+
+        virtual bool isMultiKey() const { return _mps->hasMultiKey(); }
+
         virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); }
+        // return -1 if we're a getmore handoff
+        virtual long long nscanned() { return _nscanned >= 0 ? _nscanned + _c->nscanned() : _nscanned; }
         // just for testing
         shared_ptr< Cursor > sub_c() const { return _c; }
     private:
@@ -377,8 +408,12 @@ namespace mongo {
             virtual bool mayRecordPlan() const { return false; }
             virtual QueryOp *_createChild() const { return new NoOp(); }
             virtual shared_ptr< Cursor > newCursor() const { return qp().newCursor(); }
+            virtual long long nscanned() { assert( false ); return 0; }
         };
         void nextClause() {
+            if ( _nscanned >= 0 && _c.get() ) {
+                _nscanned += _c->nscanned();
+            }
             shared_ptr< CursorOp > best = _mps->runOpOnce( *_op );
             if ( ! best->complete() )
                 throw MsgAssertionException( best->exception() );
@@ -390,12 +425,13 @@ namespace mongo {
         shared_ptr< Cursor > _c;
         auto_ptr< MultiPlanScanner > _mps;
         shared_ptr< CoveredIndexMatcher > _matcher;
+        long long _nscanned;
     };
-    
+
     // NOTE min, max, and keyPattern will be updated to be consistent with the selected index.
     IndexDetails *indexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern );
 
-    inline bool isSimpleIdQuery( const BSONObj& query ){
+    inline bool isSimpleIdQuery( const BSONObj& query ) {
         BSONObjIterator i(query);
         if( !i.more() ) return false;
         BSONElement e = i.next();
@@ -403,14 +439,16 @@ namespace mongo {
         if( strcmp("_id", e.fieldName()) != 0 ) return false;
         return e.isSimpleType(); // e.g. not something like { _id : { $gt : ...
     }
-    
+
     // matcher() will always work on the returned cursor
     inline shared_ptr< Cursor > bestGuessCursor( const char *ns, const BSONObj &query, const BSONObj &sort ) {
         if( !query.getField( "$or" ).eoo() ) {
             return shared_ptr< Cursor >( new MultiCursor( ns, query, sort ) );
-        } else {
+        }
+        else {
             auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns, query ) );
-            shared_ptr< Cursor > ret = QueryPlanSet( ns, frs, query, sort ).getBestGuess()->newCursor();
+            auto_ptr< FieldRangeSet > origFrs( new FieldRangeSet( *frs ) );
+            shared_ptr< Cursor > ret = QueryPlanSet( ns, frs, origFrs, query, sort ).getBestGuess()->newCursor();
             if ( !query.isEmpty() ) {
                 shared_ptr< CoveredIndexMatcher > matcher( new CoveredIndexMatcher( query, ret->indexKeyPattern() ) );
                 ret->setMatcher( matcher );
@@ -418,5 +456,5 @@ namespace mongo {
             return ret;
         }
     }
-        
+
 } // namespace mongo
diff --git a/db/queryutil.cpp b/db/queryutil.cpp
index 2153046..1cd750b 100644
--- a/db/queryutil.cpp
+++ b/db/queryutil.cpp
@@ -23,111 +23,119 @@
 #include "queryoptimizer.h"
 #include "../util/unittest.h"
 #include "dbmessage.h"
+#include "indexkey.h"
 
 namespace mongo {
     extern BSONObj staticNull;
-    
+
     /** returns a string that when used as a matcher, would match a super set of regex()
         returns "" for complex regular expressions
         used to optimize queries in some simple regex cases that start with '^'
 
         if purePrefix != NULL, sets it to whether the regex can be converted to a range query
     */
-    string simpleRegex(const char* regex, const char* flags, bool* purePrefix){
+    string simpleRegex(const char* regex, const char* flags, bool* purePrefix) {
         string r = "";
 
         if (purePrefix) *purePrefix = false;
 
         bool multilineOK;
-        if ( regex[0] == '\\' && regex[1] == 'A'){
+        if ( regex[0] == '\\' && regex[1] == 'A') {
             multilineOK = true;
             regex += 2;
-        } else if (regex[0] == '^') {
+        }
+        else if (regex[0] == '^') {
             multilineOK = false;
             regex += 1;
-        } else {
+        }
+        else {
             return r;
         }
 
         bool extended = false;
-        while (*flags){
-            switch (*(flags++)){
-                case 'm': // multiline
-                    if (multilineOK)
-                        continue;
-                    else
-                        return r;
-                case 'x': // extended
-                    extended = true;
-                    break;
-                default:
-                    return r; // cant use index
+        while (*flags) {
+            switch (*(flags++)) {
+            case 'm': // multiline
+                if (multilineOK)
+                    continue;
+                else
+                    return r;
+            case 'x': // extended
+                extended = true;
+                break;
+            default:
+                return r; // cant use index
             }
         }
 
         stringstream ss;
 
-        while(*regex){
+        while(*regex) {
             char c = *(regex++);
-            if ( c == '*' || c == '?' ){
+            if ( c == '*' || c == '?' ) {
                 // These are the only two symbols that make the last char optional
                 r = ss.str();
                 r = r.substr( 0 , r.size() - 1 );
                 return r; //breaking here fails with /^a?/
-            } else if (c == '\\'){
+            }
+            else if (c == '\\') {
                 // slash followed by non-alphanumeric represents the following char
                 c = *(regex++);
                 if ((c >= 'A' && c <= 'Z') ||
-                    (c >= 'a' && c <= 'z') ||
-                    (c >= '0' && c <= '0') ||
-                    (c == '\0'))
-                {
+                        (c >= 'a' && c <= 'z') ||
+                        (c >= '0' && c <= '0') ||
+                        (c == '\0')) {
                     r = ss.str();
                     break;
-                } else {
+                }
+                else {
                     ss << c;
                 }
-            } else if (strchr("^$.[|()+{", c)){
+            }
+            else if (strchr("^$.[|()+{", c)) {
                 // list of "metacharacters" from man pcrepattern
                 r = ss.str();
                 break;
-            } else if (extended && c == '#'){
+            }
+            else if (extended && c == '#') {
                 // comment
                 r = ss.str();
                 break;
-            } else if (extended && isspace(c)){
+            }
+            else if (extended && isspace(c)) {
                 continue;
-            } else {
+            }
+            else {
                 // self-matching char
                 ss << c;
             }
         }
 
-        if ( r.empty() && *regex == 0 ){
+        if ( r.empty() && *regex == 0 ) {
             r = ss.str();
             if (purePrefix) *purePrefix = !r.empty();
         }
 
         return r;
     }
-    inline string simpleRegex(const BSONElement& e){
-        switch(e.type()){
-            case RegEx:
-                return simpleRegex(e.regex(), e.regexFlags());
-            case Object:{
-                BSONObj o = e.embeddedObject();
-                return simpleRegex(o["$regex"].valuestrsafe(), o["$options"].valuestrsafe());
-            }
-            default: assert(false); return ""; //return squashes compiler warning
+    inline string simpleRegex(const BSONElement& e) {
+        switch(e.type()) {
+        case RegEx:
+            return simpleRegex(e.regex(), e.regexFlags());
+        case Object: {
+            BSONObj o = e.embeddedObject();
+            return simpleRegex(o["$regex"].valuestrsafe(), o["$options"].valuestrsafe());
+        }
+        default: assert(false); return ""; //return squashes compiler warning
         }
     }
 
     string simpleRegexEnd( string regex ) {
         ++regex[ regex.length() - 1 ];
         return regex;
-    }    
-    
-    
+    }
+
+
     FieldRange::FieldRange( const BSONElement &e, bool isNot, bool optimize ) {
         // NOTE with $not, we could potentially form a complementary set of intervals.
         if ( !isNot && !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) {
@@ -139,7 +147,8 @@ namespace mongo {
                 BSONElement ie = i.next();
                 if ( ie.type() == RegEx ) {
                     regexes.push_back( FieldRange( ie, false, optimize ) );
-                } else {
+                }
+                else {
                     vals.insert( ie );
                 }
             }
@@ -149,22 +158,22 @@ namespace mongo {
 
             for( vector< FieldRange >::const_iterator i = regexes.begin(); i != regexes.end(); ++i )
                 *this |= *i;
-            
+
             return;
         }
-        
-        if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ){
-            
+
+        if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ) {
+
             _intervals.push_back( FieldInterval(e) );
-            
+
             const BSONElement& temp = e.embeddedObject().firstElement();
-            if ( ! temp.eoo() ){
+            if ( ! temp.eoo() ) {
                 if ( temp < e )
                     _intervals.insert( _intervals.begin() , temp );
                 else
                     _intervals.push_back( FieldInterval(temp) );
             }
-            
+
             return;
         }
 
@@ -181,17 +190,19 @@ namespace mongo {
 
         if ( e.eoo() )
             return;
+        int op = e.getGtLtOp();
         if ( e.type() == RegEx
-             || (e.type() == Object && !e.embeddedObject()["$regex"].eoo())
-           )
-        {
+                || (e.type() == Object && !e.embeddedObject()["$regex"].eoo())
+           ) {
+            uassert( 13454, "invalid regular expression operator", op == BSONObj::Equality || op == BSONObj::opREGEX );
             if ( !isNot ) { // no optimization for negated regex - we could consider creating 2 intervals comprising all nonmatching prefixes
                 const string r = simpleRegex(e);
                 if ( r.size() ) {
                     lower = addObj( BSON( "" << r ) ).firstElement();
                     upper = addObj( BSON( "" << simpleRegexEnd( r ) ) ).firstElement();
                     upperInclusive = false;
-                } else {
+                }
+                else {
                     BSONObjBuilder b1(32), b2(32);
                     b1.appendMinForType( "" , String );
                     lower = addObj( b1.obj() ).firstElement();
@@ -202,10 +213,11 @@ namespace mongo {
                 }
 
                 // regex matches self - regex type > string type
-                if (e.type() == RegEx){
+                if (e.type() == RegEx) {
                     BSONElement re = addObj( BSON( "" << e ) ).firstElement();
                     _intervals.push_back( FieldInterval(re) );
-                } else {
+                }
+                else {
                     BSONObj orig = e.embeddedObject();
                     BSONObjBuilder b;
                     b.appendRegex("", orig["$regex"].valuestrsafe(), orig["$options"].valuestrsafe());
@@ -216,38 +228,53 @@ namespace mongo {
             }
             return;
         }
-        int op = e.getGtLtOp();
         if ( isNot ) {
             switch( op ) {
-                case BSONObj::Equality:
-                case BSONObj::opALL:
-                case BSONObj::opMOD: // NOTE for mod and type, we could consider having 1-2 intervals comprising the complementary types (multiple intervals already possible with $in)
-                case BSONObj::opTYPE:
-                    op = BSONObj::NE; // no bound calculation
-                    break;
-                case BSONObj::NE:
-                    op = BSONObj::Equality;
-                    break;
-                case BSONObj::LT:
-                    op = BSONObj::GTE;
-                    break;
-                case BSONObj::LTE:
-                    op = BSONObj::GT;
-                    break;
-                case BSONObj::GT:
-                    op = BSONObj::LTE;
-                    break;
-                case BSONObj::GTE:
-                    op = BSONObj::LT;
-                    break;
-                default: // otherwise doesn't matter
-                    break;
+            case BSONObj::Equality:
+                return;
+//                    op = BSONObj::NE;
+//                    break;
+            case BSONObj::opALL:
+            case BSONObj::opMOD: // NOTE for mod and type, we could consider having 1-2 intervals comprising the complementary types (multiple intervals already possible with $in)
+            case BSONObj::opTYPE:
+                // no bound calculation
+                return;
+            case BSONObj::NE:
+                op = BSONObj::Equality;
+                break;
+            case BSONObj::LT:
+                op = BSONObj::GTE;
+                break;
+            case BSONObj::LTE:
+                op = BSONObj::GT;
+                break;
+            case BSONObj::GT:
+                op = BSONObj::LTE;
+                break;
+            case BSONObj::GTE:
+                op = BSONObj::LT;
+                break;
+            default: // otherwise doesn't matter
+                break;
             }
         }
         switch( op ) {
         case BSONObj::Equality:
             lower = upper = e;
             break;
+        case BSONObj::NE: {
+            // this will invalidate the upper/lower references above
+            _intervals.push_back( FieldInterval() );
+            // optimize doesn't make sense for negative ranges
+            _intervals[ 0 ]._upper._bound = e;
+            _intervals[ 0 ]._upper._inclusive = false;
+            _intervals[ 1 ]._lower._bound = e;
+            _intervals[ 1 ]._lower._inclusive = false;
+            _intervals[ 1 ]._upper._bound = maxKey.firstElement();
+            _intervals[ 1 ]._upper._inclusive = true;
+            optimize = false; // don't run optimize code below
+            break;
+        }
         case BSONObj::LT:
             upperInclusive = false;
         case BSONObj::LTE:
@@ -262,9 +289,9 @@ namespace mongo {
             massert( 10370 ,  "$all requires array", e.type() == Array );
             BSONObjIterator i( e.embeddedObject() );
             bool bound = false;
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement x = i.next();
-                if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ){
+                if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ) {
                     // taken care of elsewhere
                 }
                 else if ( x.type() != RegEx ) {
@@ -299,7 +326,7 @@ namespace mongo {
                 BSONObjBuilder b;
                 b.appendMaxForType( "" , NumberDouble );
                 upper = addObj( b.obj() ).firstElement();
-            }            
+            }
             break;
         }
         case BSONObj::opTYPE: {
@@ -314,7 +341,7 @@ namespace mongo {
                 b.appendMaxForType( "" , t );
                 upper = addObj( b.obj() ).firstElement();
             }
-            
+
             break;
         }
         case BSONObj::opREGEX:
@@ -332,14 +359,14 @@ namespace mongo {
         default:
             break;
         }
-        
-        if ( optimize ){
-            if ( lower.type() != MinKey && upper.type() == MaxKey && lower.isSimpleType() ){ // TODO: get rid of isSimpleType
+
+        if ( optimize ) {
+            if ( lower.type() != MinKey && upper.type() == MaxKey && lower.isSimpleType() ) { // TODO: get rid of isSimpleType
                 BSONObjBuilder b;
                 b.appendMaxForType( lower.fieldName() , lower.type() );
                 upper = addObj( b.obj() ).firstElement();
             }
-            else if ( lower.type() == MinKey && upper.type() != MaxKey && upper.isSimpleType() ){ // TODO: get rid of isSimpleType
+            else if ( lower.type() == MinKey && upper.type() != MaxKey && upper.isSimpleType() ) { // TODO: get rid of isSimpleType
                 BSONObjBuilder b;
                 b.appendMinForType( upper.fieldName() , upper.type() );
                 lower = addObj( b.obj() ).firstElement();
@@ -355,7 +382,7 @@ namespace mongo {
         if ( _special.size() == 0 && other._special.size() )
             _special = other._special;
     }
-    
+
     // as called, these functions find the max/min of a bound in the
     // opposite direction, so inclusive bounds are considered less
     // superlative
@@ -378,41 +405,46 @@ namespace mongo {
         result._upper = minFieldBound( one._upper, two._upper );
         return result.strictValid();
     }
-    
-	// NOTE Not yet tested for complex $or bounds, just for simple bounds generated by $in
+
     const FieldRange &FieldRange::operator&=( const FieldRange &other ) {
         vector< FieldInterval > newIntervals;
         vector< FieldInterval >::const_iterator i = _intervals.begin();
         vector< FieldInterval >::const_iterator j = other._intervals.begin();
         while( i != _intervals.end() && j != other._intervals.end() ) {
             FieldInterval overlap;
-            if ( fieldIntervalOverlap( *i, *j, overlap ) )
+            if ( fieldIntervalOverlap( *i, *j, overlap ) ) {
                 newIntervals.push_back( overlap );
-            if ( i->_upper == minFieldBound( i->_upper, j->_upper ) )
+            }
+            if ( i->_upper == minFieldBound( i->_upper, j->_upper ) ) {
                 ++i;
-            else
-                ++j;      
+            }
+            else {
+                ++j;
+            }
         }
         finishOperation( newIntervals, other );
         return *this;
     }
-    
+
     void handleInterval( const FieldInterval &lower, FieldBound &low, FieldBound &high, vector< FieldInterval > &newIntervals ) {
         if ( low._bound.eoo() ) {
             low = lower._lower; high = lower._upper;
-        } else {
-            if ( high._bound.woCompare( lower._lower._bound, false ) < 0 ) { // when equal but neither inclusive, just assume they overlap, since current btree scanning code just as efficient either way
+        }
+        else {
+            int cmp = high._bound.woCompare( lower._lower._bound, false );
+            if ( ( cmp < 0 ) || ( cmp == 0 && !high._inclusive && !lower._lower._inclusive ) ) {
                 FieldInterval tmp;
                 tmp._lower = low;
                 tmp._upper = high;
                 newIntervals.push_back( tmp );
-                low = lower._lower; high = lower._upper;                    
-            } else {
+                low = lower._lower; high = lower._upper;
+            }
+            else {
                 high = lower._upper;
             }
-        }        
+        }
     }
-    
+
     const FieldRange &FieldRange::operator|=( const FieldRange &other ) {
         vector< FieldInterval > newIntervals;
         FieldBound low;
@@ -424,90 +456,107 @@ namespace mongo {
             if ( ( cmp == 0 && i->_lower._inclusive ) || cmp < 0 ) {
                 handleInterval( *i, low, high, newIntervals );
                 ++i;
-            } else {
+            }
+            else {
                 handleInterval( *j, low, high, newIntervals );
                 ++j;
-            } 
+            }
         }
         while( i != _intervals.end() ) {
             handleInterval( *i, low, high, newIntervals );
-            ++i;            
+            ++i;
         }
         while( j != other._intervals.end() ) {
             handleInterval( *j, low, high, newIntervals );
-            ++j;            
+            ++j;
         }
         FieldInterval tmp;
         tmp._lower = low;
         tmp._upper = high;
-        newIntervals.push_back( tmp );        
+        newIntervals.push_back( tmp );
         finishOperation( newIntervals, other );
-        return *this;        
+        return *this;
     }
-    
+
     const FieldRange &FieldRange::operator-=( const FieldRange &other ) {
+        vector< FieldInterval > newIntervals;
         vector< FieldInterval >::iterator i = _intervals.begin();
         vector< FieldInterval >::const_iterator j = other._intervals.begin();
         while( i != _intervals.end() && j != other._intervals.end() ) {
             int cmp = i->_lower._bound.woCompare( j->_lower._bound, false );
             if ( cmp < 0 ||
-                ( cmp == 0 && i->_lower._inclusive && !j->_lower._inclusive ) ) {
+                    ( cmp == 0 && i->_lower._inclusive && !j->_lower._inclusive ) ) {
                 int cmp2 = i->_upper._bound.woCompare( j->_lower._bound, false );
                 if ( cmp2 < 0 ) {
+                    newIntervals.push_back( *i );
                     ++i;
-                } else if ( cmp2 == 0 ) {
-                    if ( i->_upper._inclusive && j->_lower._inclusive ) {
-                        i->_upper._inclusive = false;
+                }
+                else if ( cmp2 == 0 ) {
+                    newIntervals.push_back( *i );
+                    if ( newIntervals.back()._upper._inclusive && j->_lower._inclusive ) {
+                        newIntervals.back()._upper._inclusive = false;
                     }
                     ++i;
-                } else {
+                }
+                else {
+                    newIntervals.push_back( *i );
+                    newIntervals.back()._upper = j->_lower;
+                    newIntervals.back()._upper.flipInclusive();
                     int cmp3 = i->_upper._bound.woCompare( j->_upper._bound, false );
                     if ( cmp3 < 0 ||
-                        ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
-                        i->_upper = j->_lower;
-                        i->_upper.flipInclusive();
+                            ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
                         ++i;
-                    } else {
+                    }
+                    else {
+                        i->_lower = j->_upper;
+                        i->_lower.flipInclusive();
                         ++j;
                     }
                 }
-            } else {
+            }
+            else {
                 int cmp2 = i->_lower._bound.woCompare( j->_upper._bound, false );
                 if ( cmp2 > 0 ||
-                    ( cmp2 == 0 && ( !i->_lower._inclusive || !j->_lower._inclusive ) ) ) {
+                        ( cmp2 == 0 && ( !i->_lower._inclusive || !j->_upper._inclusive ) ) ) {
                     ++j;
-                } else {
+                }
+                else {
                     int cmp3 = i->_upper._bound.woCompare( j->_upper._bound, false );
                     if ( cmp3 < 0 ||
-                        ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
-                        i = _intervals.erase( i );
-                    } else {
+                            ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
+                        ++i;
+                    }
+                    else {
                         i->_lower = j->_upper;
-                        i->_lower.flipInclusive();                        
+                        i->_lower.flipInclusive();
                         ++j;
                     }
-                }                
+                }
             }
         }
-        finishOperation( _intervals, other );
-        return *this;        
+        while( i != _intervals.end() ) {
+            newIntervals.push_back( *i );
+            ++i;
+        }
+        finishOperation( newIntervals, other );
+        return *this;
     }
-    
+
     // TODO write a proper implementation that doesn't do a full copy
     bool FieldRange::operator<=( const FieldRange &other ) {
         FieldRange temp = *this;
         temp -= other;
         return temp.empty();
     }
-    
+
     BSONObj FieldRange::addObj( const BSONObj &o ) {
         _objData.push_back( o );
         return o;
     }
-        
+
     string FieldRangeSet::getSpecial() const {
         string s = "";
-        for ( map<string,FieldRange>::iterator i=_ranges.begin(); i!=_ranges.end(); i++ ){
+        for ( map<string,FieldRange>::iterator i=_ranges.begin(); i!=_ranges.end(); i++ ) {
             if ( i->second.getSpecial().size() == 0 )
                 continue;
             uassert( 13033 , "can't have 2 special fields" , s.size() == 0 );
@@ -533,34 +582,35 @@ namespace mongo {
         }
         if ( op2 == BSONObj::opELEM_MATCH ) {
             BSONObjIterator k( g.embeddedObjectUserCheck() );
-            while ( k.more() ){
+            while ( k.more() ) {
                 BSONElement h = k.next();
                 StringBuilder buf(32);
                 buf << fieldName << "." << h.fieldName();
                 string fullname = buf.str();
-                
+
                 int op3 = getGtLtOp( h );
-                if ( op3 == BSONObj::Equality ){
+                if ( op3 == BSONObj::Equality ) {
                     _ranges[ fullname ] &= FieldRange( h , isNot , optimize );
                 }
                 else {
                     BSONObjIterator l( h.embeddedObject() );
-                    while ( l.more() ){
+                    while ( l.more() ) {
                         _ranges[ fullname ] &= FieldRange( l.next() , isNot , optimize );
                     }
                 }
-            }                        
-        } else {
+            }
+        }
+        else {
             _ranges[ fieldName ] &= FieldRange( f , isNot , optimize );
-        }        
+        }
     }
-    
+
     void FieldRangeSet::processQueryField( const BSONElement &e, bool optimize ) {
         bool equality = ( getGtLtOp( e ) == BSONObj::Equality );
         if ( equality && e.type() == Object ) {
             equality = ( strcmp( e.embeddedObject().firstElement().fieldName(), "$not" ) != 0 );
         }
-        
+
         if ( equality || ( e.type() == Object && !e.embeddedObject()[ "$regex" ].eoo() ) ) {
             _ranges[ e.fieldName() ] &= FieldRange( e , false , optimize );
         }
@@ -570,67 +620,69 @@ namespace mongo {
                 BSONElement f = j.next();
                 if ( strcmp( f.fieldName(), "$not" ) == 0 ) {
                     switch( f.type() ) {
-                        case Object: {
-                            BSONObjIterator k( f.embeddedObject() );
-                            while( k.more() ) {
-                                BSONElement g = k.next();
-                                uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality );
-                                processOpElement( e.fieldName(), g, true, optimize );
-                            }
-                            break;
+                    case Object: {
+                        BSONObjIterator k( f.embeddedObject() );
+                        while( k.more() ) {
+                            BSONElement g = k.next();
+                            uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality );
+                            processOpElement( e.fieldName(), g, true, optimize );
                         }
-                        case RegEx:
-                            processOpElement( e.fieldName(), f, true, optimize );
-                            break;
-                        default:
-                            uassert( 13041, "invalid use of $not", false );
+                        break;
                     }
-                } else {
+                    case RegEx:
+                        processOpElement( e.fieldName(), f, true, optimize );
+                        break;
+                    default:
+                        uassert( 13041, "invalid use of $not", false );
+                    }
+                }
+                else {
                     processOpElement( e.fieldName(), f, false, optimize );
                 }
-            }                
-        }   
+            }
+        }
     }
-    
+
     FieldRangeSet::FieldRangeSet( const char *ns, const BSONObj &query , bool optimize )
         : _ns( ns ), _queries( 1, query.getOwned() ) {
-            BSONObjIterator i( _queries[ 0 ] );
-            
-            while( i.more() ) {
-                BSONElement e = i.next();
-                // e could be x:1 or x:{$gt:1}
-                
-                if ( strcmp( e.fieldName(), "$where" ) == 0 ) {
-                    continue;
-                }
-                
-                if ( strcmp( e.fieldName(), "$or" ) == 0 ) {                                                                                                                                                        
-                    continue;
-                }
-                
-                if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
-                    continue;
-                }
-                
-                processQueryField( e, optimize );
-            }   
+        BSONObjIterator i( _queries[ 0 ] );
+
+        while( i.more() ) {
+            BSONElement e = i.next();
+            // e could be x:1 or x:{$gt:1}
+
+            if ( strcmp( e.fieldName(), "$where" ) == 0 ) {
+                continue;
+            }
+
+            if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
+                continue;
+            }
+
+            if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
+                continue;
+            }
+
+            processQueryField( e, optimize );
         }
+    }
 
     FieldRangeOrSet::FieldRangeOrSet( const char *ns, const BSONObj &query , bool optimize )
         : _baseSet( ns, query, optimize ), _orFound() {
 
         BSONObjIterator i( _baseSet._queries[ 0 ] );
-        
+
         while( i.more() ) {
             BSONElement e = i.next();
-            if ( strcmp( e.fieldName(), "$or" ) == 0 ) {                                                                                                                                                        
-                massert( 13262, "$or requires nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 );                                                                                         
-                BSONObjIterator j( e.embeddedObject() );                                                                                                                                                        
-                while( j.more() ) {                                                                                                                                                                             
-                    BSONElement f = j.next();                                                                                                                                                                   
-                    massert( 13263, "$or array must contain objects", f.type() == Object );                                                                                                                     
+            if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
+                massert( 13262, "$or requires nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 );
+                BSONObjIterator j( e.embeddedObject() );
+                while( j.more() ) {
+                    BSONElement f = j.next();
+                    massert( 13263, "$or array must contain objects", f.type() == Object );
                     _orSets.push_back( FieldRangeSet( ns, f.embeddedObject(), optimize ) );
                     massert( 13291, "$or may not contain 'special' query", _orSets.back().getSpecial().empty() );
+                    _originalOrSets.push_back( _orSets.back() );
                 }
                 _orFound = true;
                 continue;
@@ -638,13 +690,41 @@ namespace mongo {
         }
     }
 
+    void FieldRangeOrSet::popOrClause( const BSONObj &indexSpec ) {
+        massert( 13274, "no or clause to pop", !orFinished() );
+        auto_ptr< FieldRangeSet > holder;
+        FieldRangeSet *toDiff = &_originalOrSets.front();
+        if ( toDiff->matchPossible() && !indexSpec.isEmpty() ) {
+            holder.reset( toDiff->subset( indexSpec ) );
+            toDiff = holder.get();
+        }
+        list< FieldRangeSet >::iterator i = _orSets.begin();
+        list< FieldRangeSet >::iterator j = _originalOrSets.begin();
+        ++i;
+        ++j;
+        while( i != _orSets.end() ) {
+            *i -= *toDiff;
+            if( !i->matchPossible() ) {
+                i = _orSets.erase( i );
+                j = _originalOrSets.erase( j );
+            }
+            else {
+                ++i;
+                ++j;
+            }
+        }
+        _oldOrSets.push_front( _orSets.front() );
+        _orSets.pop_front();
+        _originalOrSets.pop_front();
+    }
+
     FieldRange *FieldRangeSet::trivialRange_ = 0;
     FieldRange &FieldRangeSet::trivialRange() {
         if ( trivialRange_ == 0 )
             trivialRange_ = new FieldRange();
         return *trivialRange_;
     }
-    
+
     BSONObj FieldRangeSet::simplifiedQuery( const BSONObj &_fields ) const {
         BSONObj fields = _fields;
         if ( fields.isEmpty() ) {
@@ -676,14 +756,15 @@ namespace mongo {
         }
         return b.obj();
     }
-    
+
     QueryPattern FieldRangeSet::pattern( const BSONObj &sort ) const {
         QueryPattern qp;
         for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
             assert( !i->second.empty() );
             if ( i->second.equality() ) {
                 qp._fieldTypes[ i->first ] = QueryPattern::Equality;
-            } else if ( i->second.nontrivial() ) {
+            }
+            else if ( i->second.nontrivial() ) {
                 bool upper = i->second.max().type() != MaxKey;
                 bool lower = i->second.min().type() != MinKey;
                 if ( upper && lower )
@@ -691,18 +772,18 @@ namespace mongo {
                 else if ( upper )
                     qp._fieldTypes[ i->first ] = QueryPattern::UpperBound;
                 else if ( lower )
-                    qp._fieldTypes[ i->first ] = QueryPattern::LowerBound;                    
+                    qp._fieldTypes[ i->first ] = QueryPattern::LowerBound;
             }
         }
         qp.setSort( sort );
         return qp;
     }
-    
+
     // TODO get rid of this
     BoundList FieldRangeSet::indexBounds( const BSONObj &keyPattern, int direction ) const {
         typedef vector< pair< shared_ptr< BSONObjBuilder >, shared_ptr< BSONObjBuilder > > > BoundBuilders;
         BoundBuilders builders;
-        builders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );        
+        builders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
         BSONObjIterator i( keyPattern );
         bool ineq = false; // until ineq is true, we are just dealing with equality and $in bounds
         while( i.more() ) {
@@ -716,7 +797,8 @@ namespace mongo {
                         j->first->appendAs( fr.min(), "" );
                         j->second->appendAs( fr.min(), "" );
                     }
-                } else {
+                }
+                else {
                     if ( !fr.inQuery() ) {
                         ineq = true;
                     }
@@ -725,18 +807,21 @@ namespace mongo {
                     for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) {
                         BSONObj first = i->first->obj();
                         BSONObj second = i->second->obj();
+
+                        const unsigned maxCombinations = 4000000;
                         if ( forward ) {
                             for( vector< FieldInterval >::const_iterator j = intervals.begin(); j != intervals.end(); ++j ) {
-                                uassert( 13303, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < 1000000 );
+                                uassert( 13303, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < maxCombinations );
                                 newBuilders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
                                 newBuilders.back().first->appendElements( first );
                                 newBuilders.back().second->appendElements( second );
                                 newBuilders.back().first->appendAs( j->_lower._bound, "" );
                                 newBuilders.back().second->appendAs( j->_upper._bound, "" );
                             }
-                        } else {
+                        }
+                        else {
                             for( vector< FieldInterval >::const_reverse_iterator j = intervals.rbegin(); j != intervals.rend(); ++j ) {
-                                uassert( 13304, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < 1000000 );
+                                uassert( 13304, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < maxCombinations );
                                 newBuilders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
                                 newBuilders.back().first->appendElements( first );
                                 newBuilders.back().second->appendElements( second );
@@ -747,7 +832,8 @@ namespace mongo {
                     }
                     builders = newBuilders;
                 }
-            } else {
+            }
+            else {
                 for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) {
                     j->first->appendAs( forward ? fr.min() : fr.max(), "" );
                     j->second->appendAs( forward ? fr.max() : fr.min(), "" );
@@ -758,204 +844,45 @@ namespace mongo {
         for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i )
             ret.push_back( make_pair( i->first->obj(), i->second->obj() ) );
         return ret;
-    }    
-    
-    ///////////////////
-    // FieldMatcher //
-    ///////////////////
-    
-    void FieldMatcher::add( const BSONObj& o ){
-        massert( 10371 , "can only add to FieldMatcher once", _source.isEmpty());
-        _source = o;
-
-        BSONObjIterator i( o );
-        int true_false = -1;
-        while ( i.more() ){
-            BSONElement e = i.next();
-
-            if (e.type() == Object){
-                BSONObj obj = e.embeddedObject();
-                BSONElement e2 = obj.firstElement();
-                if ( strcmp(e2.fieldName(), "$slice") == 0 ){
-                    if (e2.isNumber()){
-                        int i = e2.numberInt();
-                        if (i < 0)
-                            add(e.fieldName(), i, -i); // limit is now positive
-                        else
-                            add(e.fieldName(), 0, i);
-
-                    } else if (e2.type() == Array) {
-                        BSONObj arr = e2.embeddedObject();
-                        uassert(13099, "$slice array wrong size", arr.nFields() == 2 );
-
-                        BSONObjIterator it(arr);
-                        int skip = it.next().numberInt();
-                        int limit = it.next().numberInt();
-                        uassert(13100, "$slice limit must be positive", limit > 0 );
-                        add(e.fieldName(), skip, limit);
-
-                    } else {
-                        uassert(13098, "$slice only supports numbers and [skip, limit] arrays", false);
-                    }
-                } else {
-                    uassert(13097, string("Unsupported projection option: ") + obj.firstElement().fieldName(), false);
-                }
-
-            } else if (!strcmp(e.fieldName(), "_id") && !e.trueValue()){
-                _includeID = false;
-
-            } else {
-
-                add (e.fieldName(), e.trueValue());
-
-                // validate input
-                if (true_false == -1){
-                    true_false = e.trueValue();
-                    _include = !e.trueValue();
-                }
-                else{
-                    uassert( 10053 , "You cannot currently mix including and excluding fields. Contact us if this is an issue." , 
-                             (bool)true_false == e.trueValue() );
-                }
-            }
-        }
-    }
-
-    void FieldMatcher::add(const string& field, bool include){
-        if (field.empty()){ // this is the field the user referred to
-            _include = include;
-        } else {
-            _include = !include;
-
-            const size_t dot = field.find('.');
-            const string subfield = field.substr(0,dot);
-            const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos)); 
-
-            boost::shared_ptr<FieldMatcher>& fm = _fields[subfield];
-            if (!fm)
-                fm.reset(new FieldMatcher());
-
-            fm->add(rest, include);
-        }
-    }
-
-    void FieldMatcher::add(const string& field, int skip, int limit){
-        _special = true; // can't include or exclude whole object
-
-        if (field.empty()){ // this is the field the user referred to
-            _skip = skip;
-            _limit = limit;
-        } else {
-            const size_t dot = field.find('.');
-            const string subfield = field.substr(0,dot);
-            const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos));
-
-            boost::shared_ptr<FieldMatcher>& fm = _fields[subfield];
-            if (!fm)
-                fm.reset(new FieldMatcher());
-
-            fm->add(rest, skip, limit);
-        }
     }
 
-    BSONObj FieldMatcher::getSpec() const{
-        return _source;
-    }
-
-    //b will be the value part of an array-typed BSONElement
-    void FieldMatcher::appendArray( BSONObjBuilder& b , const BSONObj& a , bool nested) const {
-        int skip  = nested ?  0 : _skip;
-        int limit = nested ? -1 : _limit;
-
-        if (skip < 0){
-            skip = max(0, skip + a.nFields());
-        }
-
-        int i=0;
-        BSONObjIterator it(a);
-        while (it.more()){
-            BSONElement e = it.next();
-
-            if (skip){
-                skip--;
-                continue;
-            }
-
-            if (limit != -1 && (limit-- == 0)){
-                break;
-            }
-
-            switch(e.type()){
-                case Array:{
-                    BSONObjBuilder subb;
-                    appendArray(subb , e.embeddedObject(), true);
-                    b.appendArray(b.numStr(i++), subb.obj());
-                    break;
-                }
-                case Object:{
-                    BSONObjBuilder subb;
-                    BSONObjIterator jt(e.embeddedObject());
-                    while (jt.more()){
-                        append(subb , jt.next());
-                    }
-                    b.append(b.numStr(i++), subb.obj());
-                    break;
-                }
-                default:
-                    if (_include)
-                        b.appendAs(e, b.numStr(i++));
+    FieldRangeSet *FieldRangeSet::subset( const BSONObj &fields ) const {
+        FieldRangeSet *ret = new FieldRangeSet( _ns, BSONObj() );
+        BSONObjIterator i( fields );
+        while( i.more() ) {
+            BSONElement e = i.next();
+            if ( _ranges[ e.fieldName() ].nontrivial() ) {
+                ret->_ranges[ e.fieldName() ] = _ranges[ e.fieldName() ];
             }
         }
+        ret->_queries = _queries;
+        return ret;
     }
 
-    void FieldMatcher::append( BSONObjBuilder& b , const BSONElement& e ) const {
-        FieldMap::const_iterator field = _fields.find( e.fieldName() );
-        
-        if (field == _fields.end()){
-            if (_include)
-                b.append(e);
-        } 
-        else {
-            FieldMatcher& subfm = *field->second;
-            
-            if ((subfm._fields.empty() && !subfm._special) || !(e.type()==Object || e.type()==Array) ){
-                if (subfm._include)
-                    b.append(e);
-            }
-            else if (e.type() == Object){ 
-                BSONObjBuilder subb;
-                BSONObjIterator it(e.embeddedObject());
-                while (it.more()){
-                    subfm.append(subb, it.next());
-                }
-                b.append(e.fieldName(), subb.obj());
-
-            } 
-            else { //Array
-                BSONObjBuilder subb;
-                subfm.appendArray(subb, e.embeddedObject());
-                b.appendArray(e.fieldName(), subb.obj());
-            }
-        }
-    }
-    
     bool FieldRangeVector::matchesElement( const BSONElement &e, int i, bool forward ) const {
-        int l = matchingLowElement( e, i, forward );
-        return ( l % 2 == 0 ); // if we're inside an interval        
+        bool eq;
+        int l = matchingLowElement( e, i, forward, eq );
+        return ( l % 2 == 0 ); // if we're inside an interval
     }
-    
+
     // binary search for interval containing the specified element
     // an even return value indicates that the element is contained within a valid interval
-    int FieldRangeVector::matchingLowElement( const BSONElement &e, int i, bool forward ) const {
+    int FieldRangeVector::matchingLowElement( const BSONElement &e, int i, bool forward, bool &lowEquality ) const {
+        lowEquality = false;
         int l = -1;
         int h = _ranges[ i ].intervals().size() * 2;
         while( l + 1 < h ) {
             int m = ( l + h ) / 2;
             BSONElement toCmp;
+            bool toCmpInclusive;
+            const FieldInterval &interval = _ranges[ i ].intervals()[ m / 2 ];
             if ( m % 2 == 0 ) {
-                toCmp = _ranges[ i ].intervals()[ m / 2 ]._lower._bound;
-            } else {
-                toCmp = _ranges[ i ].intervals()[ m / 2 ]._upper._bound;
+                toCmp = interval._lower._bound;
+                toCmpInclusive = interval._lower._inclusive;
+            }
+            else {
+                toCmp = interval._upper._bound;
+                toCmpInclusive = interval._upper._inclusive;
             }
             int cmp = toCmp.woCompare( e, false );
             if ( !forward ) {
@@ -963,41 +890,60 @@ namespace mongo {
             }
             if ( cmp < 0 ) {
                 l = m;
-            } else if ( cmp > 0 ) {
+            }
+            else if ( cmp > 0 ) {
                 h = m;
-            } else {
-                return ( m % 2 == 0 ) ? m : m - 1;
+            }
+            else {
+                if ( m % 2 == 0 ) {
+                    lowEquality = true;
+                }
+                int ret = m;
+                // if left match and inclusive, all good
+                // if left match and not inclusive, return right before left bound
+                // if right match and inclusive, return left bound
+                // if right match and not inclusive, return right bound
+                if ( ( m % 2 == 0 && !toCmpInclusive ) || ( m % 2 == 1 && toCmpInclusive ) ) {
+                    --ret;
+                }
+                return ret;
             }
         }
         assert( l + 1 == h );
         return l;
     }
-    
+
     bool FieldRangeVector::matches( const BSONObj &obj ) const {
-        BSONObjIterator k( _keyPattern );
-        for( int i = 0; i < (int)_ranges.size(); ++i ) {
-            if ( _ranges[ i ].empty() ) {
-                return false;
-            }
-            BSONElement kk = k.next();
-            int number = (int) kk.number();
-            bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0;
-            BSONElementSet keys;
-            obj.getFieldsDotted( kk.fieldName(), keys );
-            bool match = false;
-            for( BSONElementSet::const_iterator j = keys.begin(); j != keys.end(); ++j ) {
-                if ( matchesElement( *j, i, forward ) ) {
-                    match = true;
+        if ( !_indexSpec.get() ) {
+            _indexSpec.reset( new IndexSpec( _keyPattern ) );
+        }
+        // TODO The representation of matching keys could potentially be optimized
+        // more for the case at hand.  (For example, we can potentially consider
+        // fields individually instead of constructing several bson objects using
+        // multikey arrays.)  But getKeys() canonically defines the key set for a
+        // given object and for now we are using it as is.
+        BSONObjSetDefaultOrder keys;
+        _indexSpec->getKeys( obj, keys );
+        for( BSONObjSetDefaultOrder::const_iterator i = keys.begin(); i != keys.end(); ++i ) {
+            BSONObjIterator j( *i );
+            BSONObjIterator k( _keyPattern );
+            bool match = true;
+            for( int l = 0; l < (int)_ranges.size(); ++l ) {
+                int number = (int) k.next().number();
+                bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0;
+                if ( !matchesElement( j.next(), l, forward ) ) {
+                    match = false;
                     break;
                 }
             }
-            if ( !match ) {
-                return false;
+            if ( match ) {
+                // The *i key matched a valid range for every element.
+                return true;
             }
         }
-        return true;
+        return false;
     }
-    
+
     // TODO optimize more
     int FieldRangeVector::Iterator::advance( const BSONObj &curr ) {
         BSONObjIterator j( curr );
@@ -1009,7 +955,8 @@ namespace mongo {
         for( int i = 0; i < (int)_i.size(); ++i ) {
             if ( i > 0 && !_v._ranges[ i - 1 ].intervals()[ _i[ i - 1 ] ].equality() ) {
                 // if last bound was inequality, we don't know anything about where we are for this field
-                // TODO if possible avoid this certain cases when field in prev key is the same
+                // TODO if possible avoid this certain cases when value in previous field of the previous
+                // key is the same as value of previous field in current key
                 setMinus( i );
             }
             bool eq = false;
@@ -1017,20 +964,23 @@ namespace mongo {
             bool reverse = ( ( oo.number() < 0 ) ^ ( _v._direction < 0 ) );
             BSONElement jj = j.next();
             if ( _i[ i ] == -1 ) { // unknown position for this field, do binary search
-                int l = _v.matchingLowElement( jj, i, !reverse );
+                bool lowEquality;
+                int l = _v.matchingLowElement( jj, i, !reverse, lowEquality );
                 if ( l % 2 == 0 ) { // we are in a valid range for this field
                     _i[ i ] = l / 2;
                     int diff = (int)_v._ranges[ i ].intervals().size() - _i[ i ];
                     if ( diff > 1 ) {
                         latestNonEndpoint = i;
-                    } else if ( diff == 1 ) {
+                    }
+                    else if ( diff == 1 ) {
                         int x = _v._ranges[ i ].intervals()[ _i[ i ] ]._upper._bound.woCompare( jj, false );
                         if ( x != 0 ) {
                             latestNonEndpoint = i;
                         }
                     }
                     continue;
-                } else { // not in a valid range for this field - determine if and how to advance
+                }
+                else {   // not in a valid range for this field - determine if and how to advance
                     // check if we're after the last interval for this field
                     if ( l == (int)_v._ranges[ i ].intervals().size() * 2 - 1 ) {
                         if ( latestNonEndpoint == -1 ) {
@@ -1038,18 +988,24 @@ namespace mongo {
                         }
                         setZero( latestNonEndpoint + 1 );
                         // skip to curr / latestNonEndpoint + 1 / superlative
-                        for( int j = latestNonEndpoint + 1; j < (int)_i.size(); ++j ) {
-                            _cmp[ j ] = _superlative[ j ];
-                        }
-                        return latestNonEndpoint + 1;                        
+                        _after = true;
+                        return latestNonEndpoint + 1;
                     }
                     _i[ i ] = ( l + 1 ) / 2;
+                    if ( lowEquality ) {
+                        // skip to curr / i + 1 / superlative
+                        _after = true;
+                        return i + 1;
+                    }
                     // skip to curr / i / nextbounds
                     _cmp[ i ] = &_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound;
+                    _inc[ i ] = _v._ranges[ i ].intervals()[ _i[ i ] ]._lower._inclusive;
                     for( int j = i + 1; j < (int)_i.size(); ++j ) {
                         _cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound;
+                        _inc[ j ] = _v._ranges[ j ].intervals().front()._lower._inclusive;
                     }
-                    return i;                    
+                    _after = false;
+                    return i;
                 }
             }
             bool first = true;
@@ -1062,7 +1018,7 @@ namespace mongo {
                 if ( reverse ) {
                     x = -x;
                 }
-                if ( x == 0 ) {
+                if ( x == 0 && _v._ranges[ i ].intervals()[ _i[ i ] ]._upper._inclusive ) {
                     eq = true;
                     break;
                 }
@@ -1081,16 +1037,27 @@ namespace mongo {
                             x = -x;
                         }
                     }
+                    // if we're equal to and not inclusive the lower bound, advance
+                    if ( ( x == 0 && !_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._inclusive ) ) {
+                        setZero( i + 1 );
+                        // skip to curr / i + 1 / superlative
+                        _after = true;
+                        return i + 1;
+                    }
                     // if we're less than the lower bound, advance
                     if ( x > 0 ) {
                         setZero( i + 1 );
                         // skip to curr / i / nextbounds
                         _cmp[ i ] = &_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound;
+                        _inc[ i ] = _v._ranges[ i ].intervals()[ _i[ i ] ]._lower._inclusive;
                         for( int j = i + 1; j < (int)_i.size(); ++j ) {
                             _cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound;
+                            _inc[ j ] = _v._ranges[ j ].intervals().front()._lower._inclusive;
                         }
+                        _after = false;
                         return i;
-                    } else {
+                    }
+                    else {
                         break;
                     }
                 }
@@ -1101,26 +1068,32 @@ namespace mongo {
             }
             int diff = (int)_v._ranges[ i ].intervals().size() - _i[ i ];
             if ( diff > 1 || ( !eq && diff == 1 ) ) {
-                 // check if we're not at the end of valid values for this field
+                // check if we're not at the end of valid values for this field
                 latestNonEndpoint = i;
-            } else if ( diff == 0 ) { // check if we're past the last interval for this field
+            }
+            else if ( diff == 0 ) {   // check if we're past the last interval for this field
                 if ( latestNonEndpoint == -1 ) {
                     return -2;
                 }
                 // more values possible, skip...
                 setZero( latestNonEndpoint + 1 );
                 // skip to curr / latestNonEndpoint + 1 / superlative
-                for( int j = latestNonEndpoint + 1; j < (int)_i.size(); ++j ) {
-                    _cmp[ j ] = _superlative[ j ];
-                }
+                _after = true;
                 return latestNonEndpoint + 1;
             }
         }
-        return -1;        
+        return -1;
     }
-    
+
+    void FieldRangeVector::Iterator::prepDive() {
+        for( int j = 0; j < (int)_i.size(); ++j ) {
+            _cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound;
+            _inc[ j ] = _v._ranges[ j ].intervals().front()._lower._inclusive;
+        }
+    }
+
     struct SimpleRegexUnitTest : UnitTest {
-        void run(){
+        void run() {
             {
                 BSONObjBuilder b;
                 b.appendRegex("r", "^foo");
@@ -1179,38 +1152,39 @@ namespace mongo {
     } simple_regex_unittest;
 
 
-    long long applySkipLimit( long long num , const BSONObj& cmd ){
+    long long applySkipLimit( long long num , const BSONObj& cmd ) {
         BSONElement s = cmd["skip"];
         BSONElement l = cmd["limit"];
-        
-        if ( s.isNumber() ){
+
+        if ( s.isNumber() ) {
             num = num - s.numberLong();
             if ( num < 0 ) {
                 num = 0;
             }
         }
-        
-        if ( l.isNumber() ){
+
+        if ( l.isNumber() ) {
             long long limit = l.numberLong();
-            if ( limit < num ){
+            if ( limit < num ) {
                 num = limit;
             }
         }
 
-        return num;        
+        return num;
     }
 
-    string debugString( Message& m ){
+    string debugString( Message& m ) {
         stringstream ss;
         ss << "op: " << opToString( m.operation() ) << " len: " << m.size();
-        if ( m.operation() >= 2000 && m.operation() < 2100 ){
+        if ( m.operation() >= 2000 && m.operation() < 2100 ) {
             DbMessage d(m);
             ss << " ns: " << d.getns();
-            switch ( m.operation() ){
+            switch ( m.operation() ) {
             case dbUpdate: {
                 int flags = d.pullInt();
                 BSONObj q = d.nextJsObj();
-                ss << " flags: " << flags << " query: " << q;
+                BSONObj o = d.nextJsObj();
+                ss << " flags: " << flags << " query: " << q << " update: " << o;
                 break;
             }
             case dbInsert:
@@ -1225,10 +1199,10 @@ namespace mongo {
             default:
                 ss << " CANNOT HANDLE YET";
             }
-                    
-                
+
+
         }
         return ss.str();
-    }    
+    }
 
 } // namespace mongo
diff --git a/db/queryutil.h b/db/queryutil.h
index 37dfa2a..2746695 100644
--- a/db/queryutil.h
+++ b/db/queryutil.h
@@ -26,7 +26,7 @@ namespace mongo {
         bool _inclusive;
         bool operator==( const FieldBound &other ) const {
             return _bound.woCompare( other._bound ) == 0 &&
-            _inclusive == other._inclusive;
+                   _inclusive == other._inclusive;
         }
         void flipInclusive() { _inclusive = !_inclusive; }
     };
@@ -59,8 +59,6 @@ namespace mongo {
         FieldRange( const BSONElement &e = BSONObj().firstElement() , bool isNot=false , bool optimize=true );
         const FieldRange &operator&=( const FieldRange &other );
         const FieldRange &operator|=( const FieldRange &other );
-        // does not remove fully contained ranges (eg [1,3] - [2,2] doesn't remove anything)
-        // in future we can change so that an or on $in:[3] combined with $in:{$gt:2} doesn't scan 3 a second time
         const FieldRange &operator-=( const FieldRange &other );
         // true iff other includes this
         bool operator<=( const FieldRange &other );
@@ -79,7 +77,7 @@ namespace mongo {
             if ( equality() ) {
                 return true;
             }
-            for( vector< FieldInterval >::const_iterator i = _intervals.begin(); i != _intervals.end(); ++i ) {            
+            for( vector< FieldInterval >::const_iterator i = _intervals.begin(); i != _intervals.end(); ++i ) {
                 if ( !i->equality() ) {
                     return false;
                 }
@@ -88,13 +86,14 @@ namespace mongo {
         }
         bool nontrivial() const {
             return
-                ! empty() && 
-                ( minKey.firstElement().woCompare( min(), false ) != 0 ||
+                ! empty() &&
+                ( _intervals.size() != 1 ||
+                  minKey.firstElement().woCompare( min(), false ) != 0 ||
                   maxKey.firstElement().woCompare( max(), false ) != 0 );
         }
         bool empty() const { return _intervals.empty(); }
         void makeEmpty() { _intervals.clear(); }
-		const vector< FieldInterval > &intervals() const { return _intervals; }
+        const vector< FieldInterval > &intervals() const { return _intervals; }
         string getSpecial() const { return _special; }
         void setExclusiveBounds() {
             for( vector< FieldInterval >::iterator i = _intervals.begin(); i != _intervals.end(); ++i ) {
@@ -122,7 +121,7 @@ namespace mongo {
         vector< BSONObj > _objData;
         string _special;
     };
-    
+
     // implements query pattern matching, used to determine if a query is
     // similar to an earlier query and should use the same plan
     class QueryPattern {
@@ -193,8 +192,8 @@ namespace mongo {
     // the specified direction of traversal.  For example, given a simple index {i:1}
     // and direction +1, one valid BoundList is: (1, 2); (4, 6).  The same BoundList
     // would be valid for index {i:-1} with direction -1.
-    typedef vector< pair< BSONObj, BSONObj > > BoundList;	
-    
+    typedef vector< pair< BSONObj, BSONObj > > BoundList;
+
     // ranges of fields' value that may be determined from query -- used to
     // determine index limits
     class FieldRangeSet {
@@ -210,19 +209,20 @@ namespace mongo {
             map< string, FieldRange >::const_iterator f = _ranges.find( fieldName );
             if ( f == _ranges.end() )
                 return trivialRange();
-            return f->second;            
+            return f->second;
         }
         FieldRange &range( const char *fieldName ) {
             map< string, FieldRange >::iterator f = _ranges.find( fieldName );
             if ( f == _ranges.end() )
                 return trivialRange();
-            return f->second;            
+            return f->second;
         }
         int nNontrivialRanges() const {
             int count = 0;
-            for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i )
+            for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
                 if ( i->second.nontrivial() )
                     ++count;
+            }
             return count;
         }
         const char *ns() const { return _ns; }
@@ -236,6 +236,18 @@ namespace mongo {
         }
         QueryPattern pattern( const BSONObj &sort = BSONObj() ) const;
         string getSpecial() const;
+        // Btree scanning for a multidimentional key range will yield a
+        // multidimensional box.  The idea here is that if an 'other'
+        // multidimensional box contains the current box we don't have to scan
+        // the current box.  If the 'other' box contains the current box in
+        // all dimensions but one, we can safely subtract the values of 'other'
+        // along that one dimension from the values for the current box on the
+        // same dimension.  In other situations, subtracting the 'other'
+        // box from the current box yields a result that is not a box (but
+        // rather can be expressed as a union of boxes).  We don't support
+        // such splitting currently in calculating index ranges.  Note that
+        // where I have said 'box' above, I actually mean sets of boxes because
+        // a field range can consist of multiple intervals.
         const FieldRangeSet &operator-=( const FieldRangeSet &other ) {
             int nUnincluded = 0;
             string unincludedKey;
@@ -246,22 +258,25 @@ namespace mongo {
                 if ( cmp == 0 ) {
                     if ( i->second <= j->second ) {
                         // nothing
-                    } else {
+                    }
+                    else {
                         ++nUnincluded;
                         unincludedKey = i->first;
                     }
                     ++i;
                     ++j;
-                } else if ( cmp < 0 ) {
+                }
+                else if ( cmp < 0 ) {
                     ++i;
-                } else {
+                }
+                else {
                     // other has a bound we don't, nothing can be done
                     return *this;
                 }
             }
             if ( j != other._ranges.end() ) {
                 // other has a bound we don't, nothing can be done
-                return *this;                
+                return *this;
             }
             if ( nUnincluded > 1 ) {
                 return *this;
@@ -284,27 +299,37 @@ namespace mongo {
                     i->second &= j->second;
                     ++i;
                     ++j;
-                } else if ( cmp < 0 ) {
+                }
+                else if ( cmp < 0 ) {
                     ++i;
-                } else {
+                }
+                else {
                     _ranges[ j->first ] = j->second;
                     ++j;
                 }
             }
             while( j != other._ranges.end() ) {
                 _ranges[ j->first ] = j->second;
-                ++j;                
+                ++j;
             }
             appendQueries( other );
             return *this;
         }
         // TODO get rid of this
         BoundList indexBounds( const BSONObj &keyPattern, int direction ) const;
+
+        /**
+         * @param return - A new FieldRangeSet based on this FieldRangeSet, but with only
+         * a subset of the fields.
+         * @param fields - Only fields which are represented as field names in this object
+         * will be included in the returned FieldRangeSet.
+         */
+        FieldRangeSet *subset( const BSONObj &fields ) const;
     private:
         void appendQueries( const FieldRangeSet &other ) {
             for( vector< BSONObj >::const_iterator i = other._queries.begin(); i != other._queries.end(); ++i ) {
-                _queries.push_back( *i );                
-            }                        
+                _queries.push_back( *i );
+            }
         }
         void makeEmpty() {
             for( map< string, FieldRange >::iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
@@ -321,11 +346,21 @@ namespace mongo {
         vector< BSONObj > _queries;
     };
 
+    class IndexSpec;
+
+    /**
+     * This class manages the ranges of valid element values for each field in
+     * an ordered list of signed fields corresponding to an index specification.
+     */
     class FieldRangeVector {
     public:
+        /**
+         * @param frs The valid ranges for all fields, as defined by the query spec
+         * @prarm keyPattern The index key pattern
+         * @param direction The direction of index traversal
+         */
         FieldRangeVector( const FieldRangeSet &frs, const BSONObj &keyPattern, int direction )
-        :_keyPattern( keyPattern ), _direction( direction >= 0 ? 1 : -1 )
-        {
+            :_keyPattern( keyPattern ), _direction( direction >= 0 ? 1 : -1 ) {
             _queries = frs._queries;
             BSONObjIterator i( _keyPattern );
             while( i.more() ) {
@@ -334,7 +369,8 @@ namespace mongo {
                 bool forward = ( ( number >= 0 ? 1 : -1 ) * ( direction >= 0 ? 1 : -1 ) > 0 );
                 if ( forward ) {
                     _ranges.push_back( frs.range( e.fieldName() ) );
-                } else {
+                }
+                else {
                     _ranges.push_back( FieldRange() );
                     frs.range( e.fieldName() ).reverse( _ranges.back() );
                 }
@@ -348,14 +384,14 @@ namespace mongo {
                 ret *= i->intervals().size();
             }
             return ret;
-        }        
+        }
         BSONObj startKey() const {
             BSONObjBuilder b;
             for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
                 const FieldInterval &fi = i->intervals().front();
                 b.appendAs( fi._lower._bound, "" );
             }
-            return b.obj();            
+            return b.obj();
         }
         BSONObj endKey() const {
             BSONObjBuilder b;
@@ -363,7 +399,7 @@ namespace mongo {
                 const FieldInterval &fi = i->intervals().back();
                 b.appendAs( fi._upper._bound, "" );
             }
-            return b.obj();            
+            return b.obj();
         }
         BSONObj obj() const {
             BSONObjBuilder b;
@@ -371,27 +407,23 @@ namespace mongo {
             for( int i = 0; i < (int)_ranges.size(); ++i ) {
                 BSONArrayBuilder a( b.subarrayStart( k.next().fieldName() ) );
                 for( vector< FieldInterval >::const_iterator j = _ranges[ i ].intervals().begin();
-                    j != _ranges[ i ].intervals().end(); ++j ) {
+                        j != _ranges[ i ].intervals().end(); ++j ) {
                     a << BSONArray( BSON_ARRAY( j->_lower._bound << j->_upper._bound ).clientReadable() );
                 }
                 a.done();
             }
             return b.obj();
         }
+        /**
+         * @return true iff the provided document matches valid ranges on all
+         * of this FieldRangeVector's fields, which is the case iff this document
+         * would be returned while scanning the index corresponding to this
+         * FieldRangeVector.  This function is used for $or clause deduping.
+         */
         bool matches( const BSONObj &obj ) const;
         class Iterator {
         public:
-            Iterator( const FieldRangeVector &v ) : _v( v ), _i( _v._ranges.size(), -1 ), _cmp( _v._ranges.size(), 0 ), _superlative( _v._ranges.size(), 0 ) {
-                static BSONObj minObj = minObject();
-                static BSONElement minElt = minObj.firstElement();
-                static BSONObj maxObj = maxObject();
-                static BSONElement maxElt = maxObj.firstElement();
-                BSONObjIterator i( _v._keyPattern );
-                for( int j = 0; j < (int)_superlative.size(); ++j ) {
-                    int number = (int) i.next().number();
-                    bool forward = ( ( number >= 0 ? 1 : -1 ) * ( _v._direction >= 0 ? 1 : -1 ) > 0 );
-                    _superlative[ j ] = forward ? &maxElt : &minElt;
-                }
+            Iterator( const FieldRangeVector &v ) : _v( v ), _i( _v._ranges.size(), -1 ), _cmp( _v._ranges.size(), 0 ), _inc( _v._ranges.size(), false ), _after() {
             }
             static BSONObj minObject() {
                 BSONObjBuilder b;
@@ -413,7 +445,8 @@ namespace mongo {
                     for( unsigned j = i + 1; j < _i.size(); ++j ) {
                         _i[ j ] = 0;
                     }
-                } else {
+                }
+                else {
                     _i[ 0 ] = _v._ranges[ 0 ].intervals().size();
                 }
                 return ok();
@@ -424,6 +457,9 @@ namespace mongo {
             // >= 0 skip parameter
             int advance( const BSONObj &curr );
             const vector< const BSONElement * > &cmp() const { return _cmp; }
+            const vector< bool > &inc() const { return _inc; }
+            bool after() const { return _after; }
+            void prepDive();
             void setZero( int i ) {
                 for( int j = i; j < (int)_i.size(); ++j ) {
                     _i[ j ] = 0;
@@ -452,55 +488,61 @@ namespace mongo {
                     const FieldInterval &fi = _v._ranges[ i ].intervals()[ _i[ i ] ];
                     b.appendAs( fi._upper._bound, "" );
                 }
-                return b.obj();            
+                return b.obj();
             }
             // check
         private:
             const FieldRangeVector &_v;
             vector< int > _i;
             vector< const BSONElement* > _cmp;
-            vector< const BSONElement* > _superlative;
+            vector< bool > _inc;
+            bool _after;
         };
     private:
-        int matchingLowElement( const BSONElement &e, int i, bool direction ) const;
+        int matchingLowElement( const BSONElement &e, int i, bool direction, bool &lowEquality ) const;
         bool matchesElement( const BSONElement &e, int i, bool direction ) const;
         vector< FieldRange > _ranges;
         BSONObj _keyPattern;
         int _direction;
         vector< BSONObj > _queries; // make sure mem owned
+        // This IndexSpec is lazily constructed directly from _keyPattern if needed.
+        mutable shared_ptr< IndexSpec > _indexSpec;
     };
-        
+
     // generages FieldRangeSet objects, accounting for or clauses
     class FieldRangeOrSet {
     public:
         FieldRangeOrSet( const char *ns, const BSONObj &query , bool optimize=true );
         // if there's a useless or clause, we won't use or ranges to help with scanning
         bool orFinished() const { return _orFound && _orSets.empty(); }
-        // removes first or clause, and removes the field ranges it covers from all subsequent or clauses
-        // this could invalidate the result of the last topFrs()
-        void popOrClause() {
-            massert( 13274, "no or clause to pop", !orFinished() );
-            const FieldRangeSet &toPop = _orSets.front();
-            list< FieldRangeSet >::iterator i = _orSets.begin();
-            ++i;
-            while( i != _orSets.end() ) {
-                *i -= toPop;
-                if( !i->matchPossible() ) {
-                    i = _orSets.erase( i );
-                } else {    
-                    ++i;
-                }
-            }
-            _oldOrSets.push_front( toPop );
-            _orSets.pop_front();
-        }
+        /**
+         * Removes the top or clause, which would have been recently scanned, and
+         * removes the field ranges it covers from all subsequent or clauses.  As a
+         * side effect, this function may invalidate the return values of topFrs()
+         * calls made before this function was called.
+         * @param indexSpec - Keys of the index that was used to satisfy the last or
+         * clause.  Used to determine the range of keys that were scanned.  If
+         * empty we do not constrain the previous clause's ranges using index keys,
+         * which may reduce opportunities for range elimination.
+         */
+        void popOrClause( const BSONObj &indexSpec = BSONObj() );
         FieldRangeSet *topFrs() const {
             FieldRangeSet *ret = new FieldRangeSet( _baseSet );
-            if (_orSets.size()){
+            if (_orSets.size()) {
                 *ret &= _orSets.front();
             }
             return ret;
         }
+        // while the original bounds are looser, they are composed of fewer
+        // ranges and it is faster to do operations with them; when they can be
+        // used instead of more precise bounds, they should
+        FieldRangeSet *topFrsOriginal() const {
+            FieldRangeSet *ret = new FieldRangeSet( _baseSet );
+            if (_originalOrSets.size()) {
+                *ret &= _originalOrSets.front();
+            }
+            return ret;
+        }
         void allClausesSimplified( vector< BSONObj > &ret ) const {
             for( list< FieldRangeSet >::const_iterator i = _orSets.begin(); i != _orSets.end(); ++i ) {
                 if ( i->matchPossible() ) {
@@ -514,47 +556,10 @@ namespace mongo {
     private:
         FieldRangeSet _baseSet;
         list< FieldRangeSet > _orSets;
+        list< FieldRangeSet > _originalOrSets;
         list< FieldRangeSet > _oldOrSets; // make sure memory is owned
         bool _orFound;
     };
-    
-    /**
-       used for doing field limiting
-     */
-    class FieldMatcher {
-    public:
-        FieldMatcher()
-            : _include(true)
-            , _special(false)
-            , _includeID(true)
-            , _skip(0)
-            , _limit(-1)
-        {}
-        
-        void add( const BSONObj& o );
-
-        void append( BSONObjBuilder& b , const BSONElement& e ) const;
-
-        BSONObj getSpec() const;
-        bool includeID() { return _includeID; }
-    private:
-
-        void add( const string& field, bool include );
-        void add( const string& field, int skip, int limit );
-        void appendArray( BSONObjBuilder& b , const BSONObj& a , bool nested=false) const;
-
-        bool _include; // true if default at this level is to include
-        bool _special; // true if this level can't be skipped or included without recursing
-        //TODO: benchmark vector<pair> vs map
-        typedef map<string, boost::shared_ptr<FieldMatcher> > FieldMap;
-        FieldMap _fields;
-        BSONObj _source;
-        bool _includeID;
-
-        // used for $slice operator
-        int _skip;
-        int _limit;
-    };
 
     /** returns a string that when used as a matcher, would match a super set of regex()
         returns "" for complex regular expressions
diff --git a/db/rec.h b/db/rec.h
deleted file mode 100644
index 7b79c73..0000000
--- a/db/rec.h
+++ /dev/null
@@ -1,137 +0,0 @@
-// rec.h
-/*
- *    Copyright (C) 2010 10gen Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-/* TODO for _RECSTORE
-
-   _ support > 2GB data per file
-   _ multiple files, not just indexes.dat
-   _ lazier writes? (may be done?)
-   _ configurable cache size
-   _ fix on abnormal terminations to be able to restart some
-*/
-
-#pragma once
-
-#include "reci.h"
-//#include "reccache.h"
-
-namespace mongo { 
-
-/* --------------------------------------------------------------------------
-   A RecStoreInterface for the normal mongo mem mapped file (MongoDataFile) 
-   storage
-*/
-
-NamespaceDetails* nsdetails_notinline(const char *ns);
-
-class MongoMemMapped_RecStore : public RecStoreInterface { 
-public:
-    VIRT char* get(DiskLoc d, unsigned len) { return d.rec()->data; }
-
-    VIRT DiskLoc insert(const char *ns, const void *obuf, int len, bool god) { 
-        return theDataFileMgr.insert(ns, obuf, len, god);
-    }
-
-    VIRT void deleteRecord(const char *ns, DiskLoc d) { 
-        theDataFileMgr._deleteRecord(nsdetails_notinline(ns), ns, d.rec(), d);
-    }
-
-    VIRT void modified(DiskLoc d) { }
-
-    VIRT void drop(const char *ns) { 
-        dropNS(ns);
-    }
-
-    VIRT void rename(const char *fromNs, const char *toNs) {
-      renameNamespace( fromNs, toNs );
-    }
-
-    /* close datafiles associated with the db specified. */
-    VIRT void closeFiles(string dbname, string path) {
-        /* as this is only used for indexes so far, and we are in the same 
-           PDFiles as the nonindex data, we just rely on them having been closed 
-           at the same time.  one day this may need to change.
-        */
-    }
-
-};
-
-/* An in memory RecStoreInterface implementation ----------------------------
-*/
-
-#if 0
-class InMem_RecStore : public RecStoreInterface { 
-    enum InmemfileValue { INMEMFILE = 0x70000000 };
-public:
-    static char* get(DiskLoc d, unsigned len) { 
-        assert( d.a() == INMEMFILE );
-#ifdef __LP64__
-		massert( 10372 , "64 bit not done", false);
-		return 0;
-#else
-		return (char *) d.getOfs();
-#endif
-    }
-
-    static DiskLoc insert(const char *ns, const void *obuf, int len, bool god) {
-#ifdef __LP64__
-      assert( 0 );
-      throw -1;
-#else
-        char *p = (char *) malloc(len);
-        assert( p );
-        memcpy(p, obuf, len);
-        int b = (int) p;
-        assert( b > 0 );
-        return DiskLoc(INMEMFILE, b);
-#endif
-    }
-
-    static void modified(DiskLoc d) { }
-
-    static void drop(const char *ns) { 
-        log() << "warning: drop() not yet implemented for InMem_RecStore" << endl;
-    }
-
-    virtual void rename(const char *fromNs, const char *toNs) {
-      massert( 10373 ,  "rename not yet implemented for InMem_RecStore", false );
-    }
-};
-#endif
-
-/* Glue btree to RecStoreInterface: ---------------------------- */
-
-typedef MongoMemMapped_RecStore StoreToUse;
-
-extern StoreToUse *btreeStore;
-
-const int BucketSize = 8192;
-
-inline BtreeBucket* DiskLoc::btree() const {
-    assert( fileNo != -1 );
-    return (BtreeBucket*) btreeStore->get(*this, BucketSize);
-}
-
-inline BtreeBucket* DiskLoc::btreemod() const {
-    assert( fileNo != -1 );
-    BtreeBucket *b = (BtreeBucket*) btreeStore->get(*this, BucketSize);
-    btreeStore->modified(*this);
-    return b;
-}
-
-}
diff --git a/db/reccache.cpp b/db/reccache.cpp
deleted file mode 100644
index eb20728..0000000
--- a/db/reccache.cpp
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- *    Copyright (C) 2010 10gen Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-// storage.cpp
-
-#include "pch.h"
-#include "pdfile.h"
-//#include "reccache.h"
-#include "rec.h"
-#include "db.h"
-
-#error deprecated - do not include in project
-
-namespace mongo {
-
-//RecCache theRecCache(BucketSize);
-
-// 100k * 8KB = 800MB
-unsigned RecCache::MAXNODES = 50000;
-
-void setRecCacheSize(unsigned mb) {
-    unsigned long long MB = mb;
-    log(2) << "reccache size: " << MB << "MB\n";
-    uassert( 10114 ,  "bad cache size", MB > 0 && MB < 1000000 );
-    RecCache::MAXNODES = (unsigned) MB * 1024 * 1024 / 8192;
-    log(3) << "RecCache::MAXNODES=" << RecCache::MAXNODES << '\n';
-}
-
-void writerThread() { 
-    sleepsecs(10);
-    while( 1 ) { 
-        try { 
-            theRecCache.writeLazily();
-        }
-        catch(...) { 
-            log() << "exception in writerThread()" << endl;
-            sleepsecs(3);
-        }
-    }
-}
-
-// called on program exit.
-void recCacheCloseAll() { 
-#if defined(_RECSTORE)
-    theRecCache.closing();
-#endif
-}
-
-int ndirtywritten;
-
-inline static string escape(const char *ns) {
-    char buf[256];
-    char *p = buf;
-    while( 1 ) {
-        if( *ns == '$' ) *p = '~';
-        else
-            *p = *ns;
-        if( *ns == 0 )
-            break;
-        p++; ns++;
-    }
-    assert( p - buf < (int) sizeof(buf) );
-    return buf;
-}
-
-inline static string unescape(const char *ns) {
-    char buf[256];
-    char *p = buf;
-    while( 1 ) {
-        if( *ns == '~' ) *p = '$';
-        else
-            *p = *ns;
-        if( *ns == 0 )
-            break;
-        p++; ns++;
-    }
-    assert( p - buf < (int) sizeof(buf) );
-    return buf;
-}
-
-string RecCache::directory() { 
-    return cc().database()->path;
-}
-
-/* filename format is 
-
-     <n>-<ns>.idx
-*/
-
-BasicRecStore* RecCache::_initStore(string fname) { 
-
-    assert( strchr(fname.c_str(), '/') == 0 );
-    assert( strchr(fname.c_str(), '\\') == 0 );
-
-    stringstream ss(fname);
-    int n;
-    ss >> n;
-    assert( n >= 0 );
-    char ch;
-    ss >> ch;
-    assert( ch == '-' );
-    string rest;
-    ss >> rest;
-    const char *p = rest.c_str();
-    const char *q = strstr(p, ".idx");
-    assert( q );
-    string escaped_ns(p, q-p);
-
-    // arbitrary limit.  if you are hitting, we should use fewer files and put multiple 
-    // indexes in a single file (which is easy to do)
-    massert( 10374 ,  "too many index files", n < 10000 );
-
-    if( stores.size() < (unsigned)n+1 )
-        stores.resize(n+1);
-    assert( stores[n] == 0 );
-    BasicRecStore *rs = new BasicRecStore(n);
-    path pf(directory());
-    pf /= fname;
-    string full = pf.string();
-    rs->init(full.c_str(), recsize);
-    stores[n] = rs;
-    string ns = unescape(escaped_ns.c_str());
-    storesByNsKey[mknskey(ns.c_str())] = rs;
-    return rs;
-}
-
-BasicRecStore* RecCache::initStore(int n) { 
-    string ns;
-    { 
-        stringstream ss;
-        ss << '/' << n << '-';
-        ns = ss.str();
-    }
-
-    /* this will be slow if there are thousands of files */
-    path dir(directory());
-    directory_iterator end;
-    try {
-        directory_iterator i(dir);
-        while ( i != end ) {
-            string s = i->string();
-            const char *p = strstr(s.c_str(), ns.c_str());
-            if( p && strstr(p, ".idx") ) { 
-                // found it
-                path P = *i;
-                return _initStore(P.leaf());
-            }
-            i++;
-        }
-    }
-    catch( DBException & ) { 
-        throw;
-    }
-    catch (...) {
-        string s = string("i/o error looking for .idx file in ") + directory();
-        massert( 10375 , s, false);
-    }
-    stringstream ss;
-    ss << "index datafile missing? n=" << n;
-    uasserted(12500,ss.str());
-    return 0;
-}
-
-/* find the filename for a given ns.
-   format is 
-     <n>-<escaped_ns>.idx
-   returns filename.  found is true if found.  If false, a proposed name is returned for (optional) creation
-   of the file.
-*/
-string RecCache::findStoreFilename(const char *_ns, bool& found) {
-    string namefrag;
-    { 
-        stringstream ss;
-        ss << '-';
-        ss << escape(_ns);
-        ss << ".idx";
-        namefrag = ss.str();
-    }
-
-    path dir(directory());
-    directory_iterator end;
-    int nmax = -1;
-    try {
-        directory_iterator i(dir);
-        while ( i != end ) {
-            string s = path(*i).leaf();
-            const char *p = strstr(s.c_str(), namefrag.c_str());
-            if( p ) {
-                found = true;
-                return s;
-            }
-            if( strstr(s.c_str(), ".idx") ) { 
-                stringstream ss(s);
-                int n = -1;
-                ss >> n;
-                if( n > nmax )
-                    nmax = n;
-            }
-            i++;
-        }
-    }
-    catch (...) {
-        string s = string("i/o error looking for .idx file in ") + directory();
-        massert( 10376 , s, false);
-    }
-
-    // DNE.  return a name that would work.
-    stringstream ss;
-    ss << nmax+1 << namefrag;
-    found = false;
-    return ss.str();
-}
-
-void RecCache::initStoreByNs(const char *_ns, const string& nskey) {
-    bool found;
-    string fn = findStoreFilename(_ns, found);
-    _initStore(fn);
-}
-
-inline void RecCache::writeIfDirty(Node *n) {
-    if( n->dirty ) {
-        ndirtywritten++;
-        n->dirty = false;
-        store(n->loc).update(fileOfs(n->loc), n->data, recsize);
-    }
-}
-
-void RecCache::closeFiles(string dbname, string path) { 
-    assertInWriteLock();
-    scoped_lock lk(rcmutex);
-
-    // first we write all dirty pages.  it is not easy to check which Nodes are for a particular
-    // db, so we just write them all.
-    writeDirty( dirtyl.begin(), true );
-
-    string key = path + dbname + '.';
-    unsigned sz = key.size();
-    for( map<string, BasicRecStore*>::iterator i = storesByNsKey.begin(); i != storesByNsKey.end(); i++ ) { 
-        map<string, BasicRecStore*>::iterator j = i;
-        i++;
-        if( strncmp(j->first.c_str(), key.c_str(), sz) == 0 ) {
-            assert( stores[j->second->fileNumber] != 0 );
-            stores[j->second->fileNumber] = 0;
-            delete j->second;
-            storesByNsKey.erase(j);
-        }
-    }
-}
-
-void RecCache::closing() { 
-    scoped_lock lk(rcmutex);
-    (cout << "TEMP: recCacheCloseAll() writing dirty pages...\n").flush();
-    writeDirty( dirtyl.begin(), true );
-    for( unsigned i = 0; i < stores.size(); i++ ) { 
-        if( stores[i] ) {
-            delete stores[i];
-        }
-    }
-    (cout << "TEMP: write dirty done\n").flush();
-}
-
-/* note that this is written in order, as much as possible, given that dirtyl is of type set. */
-void RecCache::writeDirty( set<DiskLoc>::iterator startAt, bool rawLog ) { 
-    try { 
-        ndirtywritten=0;
-        for( set<DiskLoc>::iterator i = startAt; i != dirtyl.end(); i++ ) { 
-            map<DiskLoc, Node*>::iterator j = m.find(*i);
-            if( j != m.end() )
-                writeIfDirty(j->second);
-        }
-        OCCASIONALLY out() << "TEMP: ndirtywritten: " << ndirtywritten << endl;
-    }
-    catch(...) {
-        const char *message = "Problem: bad() in RecCache::writeDirty, file io error\n";
-
-        if ( rawLog )
-            rawOut( message );
-        else
-            ( log() << message ).flush();
-    }
-    dirtyl.clear();
-}
-
-void RecCache::writeLazily() {
-    int sleep = 0;
-    int k;
-    {
-        scoped_lock lk(rcmutex);
-        Timer t;
-        set<DiskLoc>::iterator i = dirtyl.end();
-        for( k = 0; k < 100; k++ ) {
-            if( i == dirtyl.begin() ) { 
-                // we're not very far behind
-                sleep = k < 20 ? 2000 : 1000;
-                break;
-            }
-            i--;
-        }
-        writeDirty(i);
-        if( sleep == 0 ) {
-            sleep = t.millis() * 4 + 10;
-        }
-    }
-
-    OCCASIONALLY cout << "writeLazily " << k << " sleep:" << sleep << '\n';
-    sleepmillis(sleep);
-}
-
-void RecCache::_ejectOld() { 
-    scoped_lock lk(rcmutex);
-    if( nnodes <= MAXNODES )
-        return;
-    Node *n = oldest;
-    while( 1 ) {
-        if( nnodes <= MAXNODES - 4 ) { 
-            n->older = 0;
-            oldest = n;
-            assert( oldest ) ;
-            break;
-        }
-        nnodes--;
-        assert(n);
-        Node *nxt = n->newer;
-        writeIfDirty(n);
-        m.erase(n->loc);
-        delete n;
-        n = nxt;
-    }
-}
-
-void RecCache::dump() { 
-    Node *n = oldest;
-    Node *last = 0;
-    while( n ) { 
-        assert( n->older == last );
-        last = n;
-//        cout << n << ' ' << n->older << ' ' << n->newer << '\n';
-        n=n->newer;
-    }
-    assert( newest == last );
-//    cout << endl;
-}
-
-/* cleans up everything EXCEPT storesByNsKey.
-   note this function is slow should not be invoked often
-*/
-void RecCache::closeStore(BasicRecStore *rs) { 
-    int n = rs->fileNumber + Base;
-    for( set<DiskLoc>::iterator i = dirtyl.begin(); i != dirtyl.end(); ) { 
-        DiskLoc k = *i++;
-        if( k.a() == n )
-            dirtyl.erase(k);
-    }
-
-    for( map<DiskLoc,Node*>::iterator i = m.begin(); i != m.end(); ) { 
-        DiskLoc k = i->first;
-        i++;
-        if( k.a() == n )
-            m.erase(k);
-    }
-
-    assert( stores[rs->fileNumber] != 0 );
-    stores[rs->fileNumber] = 0;
-/*
-    for( unsigned i = 0; i < stores.size(); i++ ) { 
-        if( stores[i] == rs ) { 
-            stores[i] = 0;
-            break;
-        }
-    }*/
-    delete rs; // closes file
-}
-
-void RecCache::drop(const char *_ns) { 
-    // todo: test with a non clean shutdown file
-    scoped_lock lk(rcmutex);
-
-    map<string, BasicRecStore*>::iterator it = storesByNsKey.find(mknskey(_ns));
-    string fname;
-    if( it != storesByNsKey.end() ) {
-        fname = it->second->filename;
-        closeStore(it->second); // cleans up stores[] etc.
-        storesByNsKey.erase(it);
-    }
-    else { 
-        bool found;
-        fname = findStoreFilename(_ns, found);
-        if( !found ) { 
-            log() << "RecCache::drop: no idx file found for " << _ns << endl;
-            return;
-        }
-        path pf(directory());
-        pf /= fname;
-        fname = pf.string();
-    }
-    try {
-        if( !boost::filesystem::exists(fname) ) 
-            log() << "RecCache::drop: can't find file to remove " << fname << endl;
-        boost::filesystem::remove(fname);
-    } 
-    catch(...) { 
-        log() << "RecCache::drop: exception removing file " << fname << endl;
-    }
-}
-
-}
diff --git a/db/reccache.h b/db/reccache.h
deleted file mode 100644
index d0fd118..0000000
--- a/db/reccache.h
+++ /dev/null
@@ -1,262 +0,0 @@
-// reccache.h
-/*
- *    Copyright (C) 2010 10gen Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-/* CachedBasicRecStore
-   This is our store which implements a traditional page-cache type of storage
-   (not memory mapped files).
-*/
-
-/* LOCK HIERARCHY
-     
-     dblock
-       RecCache::rcmutex
-
-     i.e. always lock dblock first if you lock both
-
-*/
-
-#pragma once
-
-#error deprecated 
-
-#include "reci.h"
-#include "recstore.h"
-
-namespace mongo { 
-
-class RecCache {
-    struct Node { 
-        Node(void* _data) : data((char *) _data) { dirty = false; newer = 0; }
-        ~Node() { 
-            free(data);
-            data = 0;
-        }
-        char *data;
-        DiskLoc loc;
-        bool dirty;
-        Node *older, *newer; // lru
-    };
-    mongo::mutex rcmutex; // mainly to coordinate with the lazy writer thread
-    unsigned recsize;
-    map<DiskLoc, Node*> m; // the cache
-    Node *newest, *oldest;
-    unsigned nnodes;
-    set<DiskLoc> dirtyl;
-    vector<BasicRecStore*> stores; // DiskLoc::a() indicates the index into this vector
-    map<string, BasicRecStore*> storesByNsKey; // nskey -> BasicRecStore*
-public:
-    static unsigned MAXNODES;
-    enum BaseValue { Base = 10000 };
-private:
-    BasicRecStore* _initStore(string fname);
-    BasicRecStore* initStore(int n);
-    string findStoreFilename(const char *_ns, bool& found);
-    void initStoreByNs(const char *ns, const string& nskey);
-    void closeStore(BasicRecStore *rs);
-
-    static string directory();
-    static string mknskey(const char *ns) { 
-        return directory() + ns;
-    }
-
-    /* get the right file for a given diskloc */
-    BasicRecStore& store(DiskLoc& d) { 
-        int n = d.a() - Base;
-        if( (int) stores.size() > n ) { 
-            BasicRecStore *rs = stores[n];
-            if( rs ) {
-                assert( rs->fileNumber == n );
-                return *rs;
-            }
-        }
-        return *initStore(n);
-    }
-    BasicRecStore& store(const char *ns) {
-        string nskey = mknskey(ns);
-        BasicRecStore *&rs = storesByNsKey[nskey];
-        if( rs )
-            return *rs;
-        initStoreByNs(ns, nskey);
-        return *rs;
-    }
-
-    void writeDirty( set<DiskLoc>::iterator i, bool rawLog = false );
-    void writeIfDirty(Node *n);
-    void touch(Node* n) { 
-        if( n == newest )
-            return;
-        if( n == oldest ) {
-            oldest = oldest->newer;
-            assert( oldest || nnodes == 1 );
-        }
-        if( n->older ) 
-            n->older->newer = n->newer;
-        if( n->newer ) 
-            n->newer->older = n->older;
-        n->newer = 0;        
-        n->older = newest;
-        newest->newer = n;
-        newest = n;
-    }
-    Node* mkNode() { 
-        Node *n = new Node(calloc(recsize,1)); // calloc is TEMP for testing.  change to malloc
-        n->older = newest;
-        if( newest )
-            newest->newer = n;
-        else {
-            assert( oldest == 0 );
-            oldest = n;
-        }
-        newest = n;
-        nnodes++;
-        return n;
-    }
-    fileofs fileOfs(DiskLoc d) { 
-        return ((fileofs) d.getOfs()) * recsize;
-    }
-
-    void dump();
-    void _ejectOld();
-
-public:
-    /* all public functions (except constructor) should use the mutex */
-
-    RecCache(unsigned recsz) : recsize(recsz) { 
-        nnodes = 0;
-        newest = oldest = 0;
-    }
-
-    /* call this after doing some work, after you are sure you are done with modifications.
-       we call it from dbunlocking().
-    */
-    void ejectOld() { 
-        if( nnodes > MAXNODES ) // just enough here to be inlineable for speed reasons.  _ejectOld does the real work
-            _ejectOld();
-    }
-
-    /* bg writer thread invokes this */
-    void writeLazily();
-
-    /* Note that this may be called BEFORE the actual writing to the node 
-       takes place.  We do flushing later on a dbunlocking() call, which happens 
-       after the writing.
-    */
-    void dirty(DiskLoc d) {
-        assert( d.a() >= Base );
-        scoped_lock lk(rcmutex);
-        map<DiskLoc, Node*>::iterator i = m.find(d);
-        if( i != m.end() ) {
-            Node *n = i->second;
-            if( !n->dirty ) { 
-                n->dirty = true;
-                dirtyl.insert(n->loc);
-            }
-        }
-    }
-
-    char* get(DiskLoc d, unsigned len) { 
-        assert( d.a() >= Base );
-        assert( len == recsize );
-
-        scoped_lock lk(rcmutex);
-        map<DiskLoc, Node*>::iterator i = m.find(d);
-        if( i != m.end() ) {
-            touch(i->second);
-            return i->second->data;
-        }
-
-        Node *n = mkNode();
-        n->loc = d;
-        store(d).get(fileOfs(d), n->data, recsize); // could throw exception
-        m.insert( pair<DiskLoc, Node*>(d, n) );
-        return n->data;
-    }
-
-    void drop(const char *ns);
-
-    DiskLoc insert(const char *ns, const void *obuf, int len, bool god) {
-        scoped_lock lk(rcmutex);
-        BasicRecStore& rs = store(ns);
-        fileofs o = rs.insert((const char *) obuf, len);
-        assert( o % recsize == 0 );
-        fileofs recnum = o / recsize;
-        massert( 10377 ,  "RecCache file too large?", recnum <= 0x7fffffff );
-        Node *n = mkNode();
-        memcpy(n->data, obuf, len);
-        DiskLoc d(rs.fileNumber + Base, (int) recnum);
-        n->loc = d;
-        m[d] = n;
-        return d;
-    }
-
-    void closeFiles(string dbname, string path);
-
-    // at termination: write dirty pages and close all files
-    void closing();
-};
-
-extern RecCache theRecCache;
-
-class CachedBasicRecStore : public RecStoreInterface { 
-public:
-    VIRT char* get(DiskLoc d, unsigned len) { 
-        return theRecCache.get(d, len);
-    }
-
-    VIRT DiskLoc insert(const char *ns, const void *obuf, int len, bool god) { 
-        return theRecCache.insert(ns, obuf, len, god);
-    }
-
-    VIRT void modified(DiskLoc d) { 
-        theRecCache.dirty(d);
-    }
-
-    /* drop collection */
-    VIRT void drop(const char *ns) { 
-        theRecCache.drop(ns);
-    }
-
-    VIRT void rename(const char *fromNs, const char *toNs) {
-      massert( 10378 ,  "rename not yet implemented for CachedBasicRecStore", false );
-    }
-
-    /* close datafiles associated with the db specified. */
-    VIRT void closeFiles(string dbname, string path) {
-        theRecCache.closeFiles(dbname, dbpath);
-    }
-};
-
-/* see concurrency.h - note on a lock reset from read->write we don't 
-   call dbunlocking_read, we just wait for the final dbunlocking_write 
-   call 
-*/
-
-//inline void dbunlocking_read() { 
-    /*
-    Client *c = currentClient.get();
-    if ( c )
-        c->top.clientStop();
-    */
-//}
-
-//inline void dbunlocking_write() { 
-    //theRecCache.ejectOld();
-//	dbunlocking_read();
-//}
-
-} /*namespace*/
diff --git a/db/reci.h b/db/reci.h
deleted file mode 100644
index a22f1f1..0000000
--- a/db/reci.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// reci.h
-/*
- *    Copyright (C) 2010 10gen Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#pragma once
-
-#include "diskloc.h"
-
-namespace mongo { 
-
-// #define VIRT virtual
-#define VIRT 
-
-/* Subclass this and implement your real storage interface.
-*/
-class RecStoreInterface {
-public:
-    //VIRT ~RecStoreInterface() {}
-
-    /* Get a pointer to the data at diskloc d.  Pointer guaranteed to stay in
-       scope through the current database operation's life.
-    */
-    //VIRT char* get(DiskLoc d, unsigned len) = 0;
-
-    /* indicate that the diskloc specified has been updated. note that as-is today, the modification may come AFTER this 
-       call -- we handle that currently -- until the dblock finishes.
-    */
-    //VIRT void modified(DiskLoc d) = 0;
-
-    /* insert specified data as a record */
-    //VIRT DiskLoc insert(const char *ns, const void *obuf, int len, bool god) = 0;
-
-    //VIRT void deleteRecord(const char *ns, DiskLoc d) { massert( 10379 , "not implemented RecStoreInterface::deleteRecord", false); }
-
-    /* drop the collection */
-    //VIRT void drop(const char *ns) = 0;
-
-    /* rename collection */
-    //VIRT void rename(const char *fromNs, const char *toNs) = 0;
-
-    /* close datafiles associated with the db specified. */
-    //VIRT void closeFiles(string dbname, string path) = 0;
-
-    /* todo add: 
-       closeFiles(dbname)
-       eraseFiles(dbname)
-    */
-};
-
-}
diff --git a/db/recstore.h b/db/recstore.h
deleted file mode 100644
index 913070f..0000000
--- a/db/recstore.h
+++ /dev/null
@@ -1,126 +0,0 @@
-// recstore.h
-/*
- *    Copyright (C) 2010 10gen Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#pragma once
-
-#error deprecated
-
-#include "../util/file.h"
-
-namespace mongo { 
-
-using boost::uint32_t;
-using boost::uint64_t;
-
-/* Current version supports only consistent record sizes within a store. */
-
-class BasicRecStore { 
-    struct RecStoreHeader { 
-        uint32_t version;
-        uint32_t recsize;
-        uint64_t leof; // logical eof, actual file might be prealloc'd further
-        uint64_t firstDeleted; // 0 = no deleted recs
-        uint32_t cleanShutdown; // 0 = clean
-        char reserved[8192-8-8-4-4-4]; // we want our records page-aligned in the file if they are a multiple of a page's size -- so we make this 8KB with that goal
-        RecStoreHeader() { 
-            version = 65;
-            recsize = 0;
-            leof = sizeof(RecStoreHeader);
-            firstDeleted = 0;
-            cleanShutdown = 1;
-            memset(reserved, 0, sizeof(reserved));
-        }
-    };
-
-public:
-    BasicRecStore(int _fileNumber) : fileNumber(_fileNumber) { }
-    ~BasicRecStore();
-    void init(const char *fn, unsigned recsize);
-    fileofs insert(const char *buf, unsigned len);
-    void update(fileofs o, const char *buf, unsigned len);
-    void remove(fileofs o, unsigned len);
-    void get(fileofs o, char *buf, unsigned len);
-
-    int fileNumber; // this goes in DiskLoc::a
-
-    string filename;
-
-private:
-
-    void writeHeader();
-    File f;
-    fileofs len;
-    RecStoreHeader h; // h.reserved is wasteful here; fix later.
-    void write(fileofs ofs, const char *data, unsigned len) { 
-        f.write(ofs, data, len);
-        massert( 10380 , "basicrecstore write io error", !f.bad());
-    }
-};
-
-/* --- implementation --- */
-
-inline BasicRecStore::~BasicRecStore() { 
-    h.cleanShutdown = 0;
-    if( f.is_open() ) {
-        writeHeader();
-        f.fsync();
-    }
-}
-
-inline void BasicRecStore::writeHeader() { 
-    write(0, (const char *) &h, 28); // update header in file for new leof
-    uassert( 10115 , "file io error in BasicRecStore [1]", !f.bad()); 
-}
-
-inline fileofs BasicRecStore::insert(const char *buf, unsigned reclen) { 
-    if( h.firstDeleted ) { 
-        uasserted(11500, "deleted not yet implemented recstoreinsert");
-    }
-    massert( 10381 , "bad len", reclen == h.recsize);
-    fileofs ofs = h.leof;
-    h.leof += reclen;
-    if( h.leof > len ) { 
-        // grow the file.  we grow quite a bit to avoid excessive file system fragmentations
-        len += (len / 8) + h.recsize;
-        uassert( 10116 ,  "recstore file too big for 32 bit", len <= 0x7fffffff || sizeof(std::streamoff) > 4 );
-        write(len, "", 0);
-    }
-    writeHeader();
-    write(ofs, buf, reclen);
-    uassert( 10117 , "file io error in BasicRecStore [2]", !f.bad());
-    return ofs;
-}
-
-/* so far, it's ok to read or update a subset of a record */
-
-inline void BasicRecStore::update(fileofs o, const char *buf, unsigned len) { 
-    assert(o <= h.leof && o >= sizeof(RecStoreHeader));
-    write(o, buf, len);
-}
-
-inline void BasicRecStore::get(fileofs o, char *buf, unsigned len) { 
-    assert(o <= h.leof && o >= sizeof(RecStoreHeader));
-    f.read(o, buf, len);
-    massert( 10382 , "basicrestore::get I/O error", !f.bad());
-}
-
-inline void BasicRecStore::remove(fileofs o, unsigned len) { 
-    uasserted(11501, "not yet implemented recstoreremove");
-}
-
-}
diff --git a/db/repl.cpp b/db/repl.cpp
index ea0eab9..b14034d 100644
--- a/db/repl.cpp
+++ b/db/repl.cpp
@@ -25,7 +25,7 @@
 
    local.sources         - indicates what sources we pull from as a "slave", and the last update of each
    local.oplog.$main     - our op log as "master"
-   local.dbinfo.<dbname>
+   local.dbinfo.<dbname> - no longer used???
    local.pair.startup    - can contain a special value indicating for a pair that we have the master copy.
                            used when replacing other half of the pair which has permanently failed.
    local.pair.sync       - { initialsynccomplete: 1 }
@@ -49,13 +49,13 @@
 #include "repl/rs.h"
 
 namespace mongo {
-    
+
     // our config from command line etc.
     ReplSettings replSettings;
 
     /* if 1 sync() is running */
     volatile int syncing = 0;
-	static volatile int relinquishSyncingSome = 0;
+    static volatile int relinquishSyncingSome = 0;
 
     /* if true replace our peer in a replication pair -- don't worry about if his
        local.oplog.$main is empty.
@@ -68,9 +68,9 @@ namespace mongo {
     const char *replAllDead = 0;
 
     time_t lastForcedResync = 0;
-    
+
     IdTracker &idTracker = *( new IdTracker() );
-    
+
 } // namespace mongo
 
 #include "replpair.h"
@@ -159,8 +159,8 @@ namespace mongo {
                     break;
                 {
                     dbtemprelease t;
-					relinquishSyncingSome = 1;
-					sleepmillis(1);
+                    relinquishSyncingSome = 1;
+                    sleepmillis(1);
                 }
             }
             if ( syncing ) {
@@ -206,7 +206,7 @@ namespace mongo {
             return true;
         }
     } cmdForceDead;
-    
+
     /* operator requested resynchronization of replication (on the slave).  { resync : 1 } */
     class CmdResync : public Command {
     public:
@@ -221,22 +221,28 @@ namespace mongo {
         void help(stringstream&h) const { h << "resync (from scratch) an out of date replica slave.\nhttp://www.mongodb.org/display/DOCS/Master+Slave"; }
         CmdResync() : Command("resync") { }
         virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+            if( cmdLine.usingReplSets() ) {
+                errmsg = "resync command not currently supported with replica sets.  See RS102 info in the mongodb documentations";
+                result.append("info", "http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member");
+                return false;
+            }
+
             if ( cmdObj.getBoolField( "force" ) ) {
                 if ( !waitForSyncToFinish( errmsg ) )
                     return false;
                 replAllDead = "resync forced";
-            }            
+            }
             if ( !replAllDead ) {
                 errmsg = "not dead, no need to resync";
                 return false;
             }
             if ( !waitForSyncToFinish( errmsg ) )
                 return false;
-            
+
             ReplSource::forceResyncDead( "client" );
             result.append( "info", "triggered resync for all sources" );
-            return true;                
-        }        
+            return true;
+        }
         bool waitForSyncToFinish( string &errmsg ) const {
             // Wait for slave thread to finish syncing, so sources will be be
             // reloaded with new saved state on next pass.
@@ -246,7 +252,7 @@ namespace mongo {
                     break;
                 {
                     dbtemprelease t;
-					relinquishSyncingSome = 1;
+                    relinquishSyncingSome = 1;
                     sleepmillis(1);
                 }
             }
@@ -257,16 +263,31 @@ namespace mongo {
             return true;
         }
     } cmdResync;
-    
-    bool anyReplEnabled(){
-        return replPair || replSettings.slave || replSettings.master;
+
+    bool anyReplEnabled() {
+        return replPair || replSettings.slave || replSettings.master || theReplSet;
     }
 
-    void appendReplicationInfo( BSONObjBuilder& result , bool authed , int level ){
-        
+    bool replAuthenticate(DBClientBase *conn);
+
+    void appendReplicationInfo( BSONObjBuilder& result , bool authed , int level ) {
+
+        if ( replSet ) {
+            if( theReplSet == 0 ) {
+                result.append("ismaster", false);
+                result.append("secondary", false);
+                result.append("info", ReplSet::startupStatusMsg);
+                result.append( "isreplicaset" , true );
+                return;
+            }
+
+            theReplSet->fillIsMaster(result);
+            return;
+        }
+
         if ( replAllDead ) {
             result.append("ismaster", 0);
-            if( authed ) { 
+            if( authed ) {
                 if ( replPair )
                     result.append("remote", replPair->remote);
             }
@@ -285,25 +306,25 @@ namespace mongo {
             result.appendBool("ismaster", _isMaster() );
         }
 
-        if ( level && replSet ){
+        if ( level && replSet ) {
             result.append( "info" , "is replica set" );
         }
-        else if ( level ){
+        else if ( level ) {
             BSONObjBuilder sources( result.subarrayStart( "sources" ) );
-            
+
             readlock lk( "local.sources" );
-            Client::Context ctx( "local.sources" );
+            Client::Context ctx( "local.sources", dbpath, 0, authed );
             shared_ptr<Cursor> c = findTableScan("local.sources", BSONObj());
             int n = 0;
-            while ( c->ok() ){
+            while ( c->ok() ) {
                 BSONObj s = c->current();
-                
+
                 BSONObjBuilder bb;
                 bb.append( s["host"] );
                 string sourcename = s["source"].valuestr();
                 if ( sourcename != "main" )
                     bb.append( s["source"] );
-                
+
                 {
                     BSONElement e = s["syncedTo"];
                     BSONObjBuilder t( bb.subobjStart( "syncedTo" ) );
@@ -311,23 +332,27 @@ namespace mongo {
                     t.append( "inc" , e.timestampInc() );
                     t.done();
                 }
-                
-                if ( level > 1 ){
+
+                if ( level > 1 ) {
                     dbtemprelease unlock;
+                    // note: there is no so-style timeout on this connection; perhaps we should have one.
                     ScopedDbConnection conn( s["host"].valuestr() );
-                    BSONObj first = conn->findOne( (string)"local.oplog.$" + sourcename , Query().sort( BSON( "$natural" << 1 ) ) );
-                    BSONObj last = conn->findOne( (string)"local.oplog.$" + sourcename , Query().sort( BSON( "$natural" << -1 ) ) );
-                    bb.appendDate( "masterFirst" , first["ts"].timestampTime() );
-                    bb.appendDate( "masterLast" , last["ts"].timestampTime() );
-                    double lag = (double) (last["ts"].timestampTime() - s["syncedTo"].timestampTime());
-                    bb.append( "lagSeconds" , lag / 1000 );
+                    DBClientConnection *cliConn = dynamic_cast< DBClientConnection* >( &conn.conn() );
+                    if ( cliConn && replAuthenticate( cliConn ) ) {
+                        BSONObj first = conn->findOne( (string)"local.oplog.$" + sourcename , Query().sort( BSON( "$natural" << 1 ) ) );
+                        BSONObj last = conn->findOne( (string)"local.oplog.$" + sourcename , Query().sort( BSON( "$natural" << -1 ) ) );
+                        bb.appendDate( "masterFirst" , first["ts"].timestampTime() );
+                        bb.appendDate( "masterLast" , last["ts"].timestampTime() );
+                        double lag = (double) (last["ts"].timestampTime() - s["syncedTo"].timestampTime());
+                        bb.append( "lagSeconds" , lag / 1000 );
+                    }
                     conn.done();
                 }
 
                 sources.append( BSONObjBuilder::numStr( n++ ) , bb.obj() );
                 c->advance();
             }
-            
+
             sources.done();
         }
     }
@@ -345,26 +370,15 @@ namespace mongo {
         virtual LockType locktype() const { return NONE; }
         CmdIsMaster() : Command("isMaster", true, "ismaster") { }
         virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
-			/* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not 
-			   authenticated.
-			   we allow unauthenticated ismaster but we aren't as verbose informationally if 
-			   one is not authenticated for admin db to be safe.
-			*/
-
-            if( replSet ) {
-                if( theReplSet == 0 ) { 
-                    result.append("ismaster", false);
-                    result.append("secondary", false);
-                    errmsg = "replSet still trying to initialize";
-                    result.append("info", ReplSet::startupStatusMsg);
-                    return true;
-                }
-                theReplSet->fillIsMaster(result);
-                return true;
-            }
-            
-			bool authed = cc().getAuthenticationInfo()->isAuthorizedReads("admin");
+            /* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not
+               authenticated.
+               we allow unauthenticated ismaster but we aren't as verbose informationally if
+               one is not authenticated for admin db to be safe.
+            */
+            bool authed = cc().getAuthenticationInfo()->isAuthorizedReads("admin");
             appendReplicationInfo( result , authed );
+
+            result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize);
             return true;
         }
     } cmdismaster;
@@ -375,14 +389,14 @@ namespace mongo {
         virtual bool slaveOk() const {
             return true;
         }
-        virtual LockType locktype() const { return WRITE; }
+        virtual LockType locktype() const { return NONE; }
         CmdIsInitialSyncComplete() : Command( "isinitialsynccomplete" ) {}
         virtual bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
             result.appendBool( "initialsynccomplete", getInitialSyncCompleted() );
             return true;
         }
     } cmdisinitialsynccomplete;
-    
+
     /* negotiate who is master
 
        -1=not set (probably means we just booted)
@@ -482,7 +496,7 @@ namespace mongo {
             return true;
         }
     } cmdnegotiatemaster;
-    
+
     int ReplPair::negotiate(DBClientConnection *conn, string method) {
         BSONObjBuilder b;
         b.append("negotiatemaster",1);
@@ -491,7 +505,7 @@ namespace mongo {
         b.append("your_port", remotePort);
         BSONObj cmd = b.done();
         BSONObj res = conn->findOne("admin.$cmd", cmd);
-        if ( ! res["ok"].trueValue() ){
+        if ( ! res["ok"].trueValue() ) {
             string message = method + " negotiate failed";
             problem() << message << ": " << res.toString() << '\n';
             setMasterLocked(State_Confused, message.c_str());
@@ -503,7 +517,8 @@ namespace mongo {
         // choose who is master.
         if ( x != State_Slave && x != State_Master && x != State_Negotiating ) {
             problem() << method << " negotiate: bad you_are value " << res.toString() << endl;
-        } else if ( x != State_Negotiating ) {
+        }
+        else if ( x != State_Negotiating ) {
             string message = method + " negotiation";
             setMasterLocked(x, message.c_str());
         }
@@ -542,8 +557,8 @@ namespace mongo {
                     break;
                 addDbNextPass.insert( e.fieldName() );
             }
-        }        
-        
+        }
+
         dbsObj = o.getObjectField("incompleteCloneDbs");
         if ( !dbsObj.isEmpty() ) {
             BSONObjIterator i(dbsObj);
@@ -553,7 +568,7 @@ namespace mongo {
                     break;
                 incompleteCloneDbs.insert( e.fieldName() );
             }
-        }        
+        }
 
         _lastSavedLocalTs = OpTime( o.getField( "localLogTs" ).date() );
     }
@@ -569,7 +584,7 @@ namespace mongo {
             b.appendTimestamp("syncedTo", syncedTo.asDate());
 
         b.appendTimestamp("localLogTs", _lastSavedLocalTs.asDate());
-        
+
         BSONObjBuilder dbsNextPassBuilder;
         int n = 0;
         for ( set<string>::iterator i = addDbNextPass.begin(); i != addDbNextPass.end(); i++ ) {
@@ -622,7 +637,7 @@ namespace mongo {
         }
     }
 
-    static void addSourceToList(ReplSource::SourceVector &v, ReplSource& s, const BSONObj &spec, ReplSource::SourceVector &old) {
+    static void addSourceToList(ReplSource::SourceVector &v, ReplSource& s, ReplSource::SourceVector &old) {
         if ( !s.syncedTo.isNull() ) { // Don't reuse old ReplSource if there was a forced resync.
             for ( ReplSource::SourceVector::iterator i = old.begin(); i != old.end();  ) {
                 if ( s == **i ) {
@@ -684,11 +699,12 @@ namespace mongo {
         else {
             try {
                 massert( 10384 , "--only requires use of --source", cmdLine.only.empty());
-            } catch ( ... ) {
+            }
+            catch ( ... ) {
                 dbexit( EXIT_BADOPTIONS );
             }
         }
-        
+
         if ( replPair ) {
             const string &remote = replPair->remote;
             // --pairwith host specified.
@@ -730,9 +746,9 @@ namespace mongo {
                     tmp.syncedTo = OpTime();
                     tmp.replacing = true;
                 }
-            } 
+            }
             if ( ( !replPair && tmp.syncedTo.isNull() ) ||
-                ( replPair && replSettings.fastsync ) ) {
+                    ( replPair && replSettings.fastsync ) ) {
                 DBDirectClient c;
                 if ( c.exists( "local.oplog.$main" ) ) {
                     BSONObj op = c.findOne( "local.oplog.$main", QUERY( "op" << NE << "n" ).sort( BSON( "$natural" << -1 ) ) );
@@ -742,7 +758,7 @@ namespace mongo {
                     }
                 }
             }
-            addSourceToList(v, tmp, c->current(), old);
+            addSourceToList(v, tmp, old);
             c->advance();
         }
 
@@ -766,7 +782,7 @@ namespace mongo {
         }
         return false;
     }
-    
+
     void ReplSource::forceResyncDead( const char *requester ) {
         if ( !replAllDead )
             return;
@@ -775,9 +791,9 @@ namespace mongo {
         for( SourceVector::iterator i = sources.begin(); i != sources.end(); ++i ) {
             (*i)->forceResync( requester );
         }
-        replAllDead = 0;        
+        replAllDead = 0;
     }
-    
+
     void ReplSource::forceResync( const char *requester ) {
         BSONObj info;
         {
@@ -800,7 +816,7 @@ namespace mongo {
                     }
                 }
             }
-        }        
+        }
         syncedTo = OpTime();
         addDbNextPass.clear();
         save();
@@ -812,7 +828,7 @@ namespace mongo {
         dropDatabase(db);
         return db;
     }
-    
+
     /* grab initial copy of a database from the master */
     bool ReplSource::resync(string db) {
         string dummyNs = resyncDrop( db.c_str(), "internal" );
@@ -841,7 +857,7 @@ namespace mongo {
             log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;;
         }
         catch ( DBException& e ) {
-            log() << "sync: caught db exception " << e << " while applying op: " << op << endl;;            
+            log() << "sync: caught db exception " << e << " while applying op: " << op << endl;;
         }
 
     }
@@ -850,15 +866,17 @@ namespace mongo {
          { ts: ..., op: <optype>, ns: ..., o: <obj> , o2: <extraobj>, b: <boolflag> }
          ...
        see logOp() comments.
+
+       @param alreadyLocked caller already put us in write lock if true
     */
-    void ReplSource::sync_pullOpLog_applyOperation(BSONObj& op, OpTime *localLogTail) {
+    void ReplSource::sync_pullOpLog_applyOperation(BSONObj& op, OpTime *localLogTail, bool alreadyLocked) {
         if( logLevel >= 6 ) // op.tostring is expensive so doing this check explicitly
             log(6) << "processing op: " << op << endl;
 
         if( op.getStringField("op")[0] == 'n' )
             return;
 
-        char clientName[MaxDatabaseLen];
+        char clientName[MaxDatabaseNameLen];
         const char *ns = op.getStringField("ns");
         nsToDatabase(ns, clientName);
 
@@ -867,22 +885,27 @@ namespace mongo {
             return;
         }
         else if ( *ns == 0 ) {
-            problem() << "halting replication, bad op in oplog:\n  " << op.toString() << endl;
-            replAllDead = "bad object in oplog";
-            throw SyncException();
+            /*if( op.getStringField("op")[0] != 'n' )*/ {
+                problem() << "halting replication, bad op in oplog:\n  " << op.toString() << endl;
+                replAllDead = "bad object in oplog";
+                throw SyncException();
+            }
+            //ns = "local.system.x";
+            //nsToDatabase(ns, clientName);
         }
 
         if ( !only.empty() && only != clientName )
             return;
 
-        if( cmdLine.pretouch ) {
+        if( cmdLine.pretouch && !alreadyLocked/*doesn't make sense if in write lock already*/ ) {
             if( cmdLine.pretouch > 1 ) {
                 /* note: this is bad - should be put in ReplSource.  but this is first test... */
                 static int countdown;
+                assert( countdown >= 0 );
                 if( countdown > 0 ) {
                     countdown--; // was pretouched on a prev pass
-                    assert( countdown >= 0 );
-                } else {
+                }
+                else {
                     const int m = 4;
                     if( tp.get() == 0 ) {
                         int nthr = min(8, cmdLine.pretouch);
@@ -911,7 +934,7 @@ namespace mongo {
             }
         }
 
-        dblock lk;
+        scoped_ptr<writelock> lk( alreadyLocked ? 0 : new writelock() );
 
         if ( localLogTail && replPair && replPair->state == ReplPair::State_Master ) {
             updateSetsWithLocalOps( *localLogTail, true ); // allow unlocking
@@ -923,7 +946,7 @@ namespace mongo {
             log() << "replAllDead, throwing SyncException: " << replAllDead << endl;
             throw SyncException();
         }
-        
+
         Client::Context ctx( ns );
         ctx.getClient()->curop()->reset();
 
@@ -932,14 +955,14 @@ namespace mongo {
 
         if( logLevel >= 6 )
             log(6) << "ns: " << ns << ", justCreated: " << ctx.justCreated() << ", empty: " << empty << ", incompleteClone: " << incompleteClone << endl;
-        
+
         // always apply admin command command
         // this is a bit hacky -- the semantics of replication/commands aren't well specified
         if ( strcmp( clientName, "admin" ) == 0 && *op.getStringField( "op" ) == 'c' ) {
             applyOperation( op );
             return;
         }
-        
+
         if ( ctx.justCreated() || empty || incompleteClone ) {
             // we must add to incomplete list now that setClient has been called
             incompleteCloneDbs.insert( clientName );
@@ -950,7 +973,8 @@ namespace mongo {
                  clone 100 databases in one pass.)
                  */
                 addDbNextPass.insert( clientName );
-            } else {
+            }
+            else {
                 if ( incompleteClone ) {
                     log() << "An earlier initial clone of '" << clientName << "' did not complete, now resyncing." << endl;
                 }
@@ -962,21 +986,25 @@ namespace mongo {
                 incompleteCloneDbs.erase( clientName );
             }
             save();
-        } else {
+        }
+        else {
             bool mod;
             if ( replPair && replPair->state == ReplPair::State_Master ) {
                 BSONObj id = idForOp( op, mod );
                 if ( !idTracker.haveId( ns, id ) ) {
-                    applyOperation( op );    
-                } else if ( idTracker.haveModId( ns, id ) ) {
+                    applyOperation( op );
+                }
+                else if ( idTracker.haveModId( ns, id ) ) {
                     log( 6 ) << "skipping operation matching mod id object " << op << endl;
                     BSONObj existing;
                     if ( Helpers::findOne( ns, id, existing ) )
                         logOp( "i", ns, existing );
-                } else {
+                }
+                else {
                     log( 6 ) << "skipping operation matching changed id object " << op << endl;
                 }
-            } else {
+            }
+            else {
                 applyOperation( op );
             }
             addDbNextPass.erase( clientName );
@@ -988,33 +1016,33 @@ namespace mongo {
         const char *opType = op.getStringField( "op" );
         BSONObj o = op.getObjectField( "o" );
         switch( opType[ 0 ] ) {
-            case 'i': {
-                BSONObjBuilder idBuilder;
-                BSONElement id;
-                if ( !o.getObjectID( id ) )
-                    return BSONObj();                    
-                idBuilder.append( id );
-                return idBuilder.obj();
-            }
-            case 'u': {
-                BSONObj o2 = op.getObjectField( "o2" );
-                if ( strcmp( o2.firstElement().fieldName(), "_id" ) != 0 )
-                    return BSONObj();
-                if ( o.firstElement().fieldName()[ 0 ] == '$' )
-                    mod = true;
-                return o2;
-            }
-            case 'd': {
-                if ( opType[ 1 ] != '\0' )
-                    return BSONObj(); // skip "db" op type
-                return o;
-            }
-            default:
-                break;
-        }        
+        case 'i': {
+            BSONObjBuilder idBuilder;
+            BSONElement id;
+            if ( !o.getObjectID( id ) )
+                return BSONObj();
+            idBuilder.append( id );
+            return idBuilder.obj();
+        }
+        case 'u': {
+            BSONObj o2 = op.getObjectField( "o2" );
+            if ( strcmp( o2.firstElement().fieldName(), "_id" ) != 0 )
+                return BSONObj();
+            if ( o.firstElement().fieldName()[ 0 ] == '$' )
+                mod = true;
+            return o2;
+        }
+        case 'd': {
+            if ( opType[ 1 ] != '\0' )
+                return BSONObj(); // skip "db" op type
+            return o;
+        }
+        default:
+            break;
+        }
         return BSONObj();
     }
-    
+
     void ReplSource::updateSetsWithOp( const BSONObj &op, bool mayUnlock ) {
         if ( mayUnlock ) {
             idTracker.mayUpgradeStorage();
@@ -1029,42 +1057,42 @@ namespace mongo {
             if ( mod )
                 idTracker.haveModId( ns, id, true );
             idTracker.haveId( ns, id, true );
-        }        
+        }
     }
-    
+
     void ReplSource::syncToTailOfRemoteLog() {
         string _ns = ns();
         BSONObjBuilder b;
         if ( !only.empty() ) {
             b.appendRegex("ns", string("^") + only);
-        }        
+        }
         BSONObj last = oplogReader.findOne( _ns.c_str(), Query( b.done() ).sort( BSON( "$natural" << -1 ) ) );
         if ( !last.isEmpty() ) {
             BSONElement ts = last.getField( "ts" );
             massert( 10386 ,  "non Date ts found: " + last.toString(), ts.type() == Date || ts.type() == Timestamp );
             syncedTo = OpTime( ts.date() );
-        }        
+        }
     }
-    
+
     OpTime ReplSource::nextLastSavedLocalTs() const {
         Client::Context ctx( "local.oplog.$main" );
         shared_ptr<Cursor> c = findTableScan( "local.oplog.$main", BSON( "$natural" << -1 ) );
         if ( c->ok() )
-            return OpTime( c->current().getField( "ts" ).date() );        
+            return OpTime( c->current().getField( "ts" ).date() );
         return OpTime();
     }
-    
+
     void ReplSource::setLastSavedLocalTs( const OpTime &nextLocalTs ) {
         _lastSavedLocalTs = nextLocalTs;
         log( 3 ) << "updated _lastSavedLocalTs to: " << _lastSavedLocalTs << endl;
     }
-    
+
     void ReplSource::resetSlave() {
         log() << "**********************************************************\n";
         log() << "Sending forcedead command to slave to stop its replication\n";
         log() << "Host: " << hostName << " paired: " << paired << endl;
         massert( 10387 ,  "request to kill slave replication failed",
-                oplogReader.conn()->simpleCommand( "admin", 0, "forcedead" ) );
+                 oplogReader.conn()->simpleCommand( "admin", 0, "forcedead" ) );
         syncToTailOfRemoteLog();
         {
             dblock lk;
@@ -1073,7 +1101,7 @@ namespace mongo {
             oplogReader.resetCursor();
         }
     }
-    
+
     bool ReplSource::updateSetsWithLocalOps( OpTime &localLogTail, bool mayUnlock ) {
         Client::Context ctx( "local.oplog.$main" );
         shared_ptr<Cursor> localLog = findTableScan( "local.oplog.$main", BSON( "$natural" << -1 ) );
@@ -1099,14 +1127,16 @@ namespace mongo {
             dbtemprelease t;
             resetSlave();
             massert( 10388 ,  "local master log filled, forcing slave resync", false );
-        }        
+        }
         if ( !newTail.isNull() )
             localLogTail = newTail;
         return true;
     }
-    
+
+    extern unsigned replApplyBatchSize;
+
     /* slave: pull some data from the master's oplog
-       note: not yet in db mutex at this point. 
+       note: not yet in db mutex at this point.
        @return -1 error
                0 ok, don't sleep
                1 ok, sleep
@@ -1126,7 +1156,7 @@ namespace mongo {
         OpTime localLogTail = _lastSavedLocalTs;
 
         bool initial = syncedTo.isNull();
-        
+
         if ( !oplogReader.haveCursor() || initial ) {
             if ( initial ) {
                 // Important to grab last oplog timestamp before listing databases.
@@ -1152,13 +1182,13 @@ namespace mongo {
                 dblock lk;
                 save();
             }
-                        
+
             BSONObjBuilder q;
             q.appendDate("$gte", syncedTo.asDate());
             BSONObjBuilder query;
             query.append("ts", q.done());
             if ( !only.empty() ) {
-               // note we may here skip a LOT of data table scanning, a lot of work for the master.
+                // note we may here skip a LOT of data table scanning, a lot of work for the master.
                 query.appendRegex("ns", string("^") + only); // maybe append "\\." here?
             }
             BSONObj queryObj = query.done();
@@ -1185,7 +1215,7 @@ namespace mongo {
                 b.append("ns", *i + '.');
                 b.append("op", "db");
                 BSONObj op = b.done();
-                sync_pullOpLog_applyOperation(op, 0);
+                sync_pullOpLog_applyOperation(op, 0, false);
             }
         }
 
@@ -1195,7 +1225,8 @@ namespace mongo {
                 if( oplogReader.awaitCapable() )
                     okResultCode = 0; // don't sleep
 
-            } else {
+            }
+            else {
                 log() << "repl:   " << ns << " oplog is empty\n";
             }
             {
@@ -1207,11 +1238,11 @@ namespace mongo {
                         setLastSavedLocalTs( nextLastSaved );
                     }
                 }
-                save();            
+                save();
             }
             return okResultCode;
         }
-        
+
         OpTime nextOpTime;
         {
             BSONObj op = oplogReader.next();
@@ -1234,32 +1265,31 @@ namespace mongo {
                     massert( 10391 , "repl: bad object read from remote oplog", false);
                 }
             }
-        
+
             if ( replPair && replPair->state == ReplPair::State_Master ) {
-            
+
                 OpTime next( ts.date() );
                 if ( !tailing && !initial && next != syncedTo ) {
                     log() << "remote slave log filled, forcing slave resync" << endl;
                     resetSlave();
                     return 1;
-                }            
-            
+                }
+
                 dblock lk;
                 updateSetsWithLocalOps( localLogTail, true );
             }
-        
+
             nextOpTime = OpTime( ts.date() );
             log(2) << "repl: first op time received: " << nextOpTime.toString() << '\n';
-            if ( tailing || initial ) {
-                if ( initial )
-                    log(1) << "repl:   initial run\n";
-                else {
-                    if( !( syncedTo <= nextOpTime ) ) { 
-                        log() << "repl ASSERTION failed : syncedTo <= nextOpTime" << endl;
-                        log() << "repl syncTo:     " << syncedTo.toStringLong() << endl;
-                        log() << "repl nextOpTime: " << nextOpTime.toStringLong() << endl;
-                        assert(false);
-                    }
+            if ( initial ) {
+                log(1) << "repl:   initial run\n";
+            }
+            if( tailing ) {
+                if( !( syncedTo < nextOpTime ) ) {
+                    log() << "repl ASSERTION failed : syncedTo < nextOpTime" << endl;
+                    log() << "repl syncTo:     " << syncedTo.toStringLong() << endl;
+                    log() << "repl nextOpTime: " << nextOpTime.toStringLong() << endl;
+                    assert(false);
                 }
                 oplogReader.putBack( op ); // op will be processed in the loop below
                 nextOpTime = OpTime(); // will reread the op below
@@ -1281,14 +1311,14 @@ namespace mongo {
                 throw SyncException();
             }
             else {
-                /* t == syncedTo, so the first op was applied previously. */
+                /* t == syncedTo, so the first op was applied previously or it is the first op of initial query and need not be applied. */
             }
         }
 
         // apply operations
         {
             int n = 0;
-			time_t saveLast = time(0);
+            time_t saveLast = time(0);
             while ( 1 ) {
                 /* from a.s.:
                    I think the idea here is that we can establish a sync point between the local op log and the remote log with the following steps:
@@ -1316,7 +1346,8 @@ namespace mongo {
                             if ( getInitialSyncCompleted() ) { // if initial sync hasn't completed, break out of loop so we can set to completed or clone more dbs
                                 continue;
                             }
-                        } else {
+                        }
+                        else {
                             setLastSavedLocalTs( nextLastSaved );
                         }
                     }
@@ -1332,109 +1363,132 @@ namespace mongo {
                 else {
                 }
 
-                OCCASIONALLY if( n > 0 && ( n > 100000 || time(0) - saveLast > 60 ) ) { 
-					// periodically note our progress, in case we are doing a lot of work and crash
-					dblock lk;
+                OCCASIONALLY if( n > 0 && ( n > 100000 || time(0) - saveLast > 60 ) ) {
+                    // periodically note our progress, in case we are doing a lot of work and crash
+                    dblock lk;
                     syncedTo = nextOpTime;
                     // can't update local log ts since there are pending operations from our peer
-					save();
+                    save();
                     log() << "repl:   checkpoint applied " << n << " operations" << endl;
                     log() << "repl:   syncedTo: " << syncedTo.toStringLong() << endl;
-					saveLast = time(0);
-					n = 0;
-				}
+                    saveLast = time(0);
+                    n = 0;
+                }
 
                 BSONObj op = oplogReader.next();
-                BSONElement ts = op.getField("ts");
-                if( !( ts.type() == Date || ts.type() == Timestamp ) ) { 
-                    log() << "sync error: problem querying remote oplog record\n";
-                    log() << "op: " << op.toString() << '\n';
-                    log() << "halting replication" << endl;
-                    replInfo = replAllDead = "sync error: no ts found querying remote oplog record";
-                    throw SyncException();
-                }
-                OpTime last = nextOpTime;
-                nextOpTime = OpTime( ts.date() );
-                if ( !( last < nextOpTime ) ) {
-                    log() << "sync error: last applied optime at slave >= nextOpTime from master" << endl;
-                    log() << " last:       " << last.toStringLong() << '\n';
-                    log() << " nextOpTime: " << nextOpTime.toStringLong() << '\n';
-                    log() << " halting replication" << endl;
-                    replInfo = replAllDead = "sync error last >= nextOpTime";
-                    uassert( 10123 , "replication error last applied optime at slave >= nextOpTime from master", false);
-                }
-                if ( replSettings.slavedelay && ( unsigned( time( 0 ) ) < nextOpTime.getSecs() + replSettings.slavedelay ) ) {
-                    oplogReader.putBack( op );
-                    _sleepAdviceTime = nextOpTime.getSecs() + replSettings.slavedelay + 1;
-                    dblock lk;
-                    if ( n > 0 ) {
-                        syncedTo = last;
-                        save();
+
+                unsigned b = replApplyBatchSize;
+                bool justOne = b == 1;
+                scoped_ptr<writelock> lk( justOne ? 0 : new writelock() );
+                while( 1 ) {
+
+                    BSONElement ts = op.getField("ts");
+                    if( !( ts.type() == Date || ts.type() == Timestamp ) ) {
+                        log() << "sync error: problem querying remote oplog record" << endl;
+                        log() << "op: " << op.toString() << endl;
+                        log() << "halting replication" << endl;
+                        replInfo = replAllDead = "sync error: no ts found querying remote oplog record";
+                        throw SyncException();
+                    }
+                    OpTime last = nextOpTime;
+                    nextOpTime = OpTime( ts.date() );
+                    if ( !( last < nextOpTime ) ) {
+                        log() << "sync error: last applied optime at slave >= nextOpTime from master" << endl;
+                        log() << " last:       " << last.toStringLong() << endl;
+                        log() << " nextOpTime: " << nextOpTime.toStringLong() << endl;
+                        log() << " halting replication" << endl;
+                        replInfo = replAllDead = "sync error last >= nextOpTime";
+                        uassert( 10123 , "replication error last applied optime at slave >= nextOpTime from master", false);
+                    }
+                    if ( replSettings.slavedelay && ( unsigned( time( 0 ) ) < nextOpTime.getSecs() + replSettings.slavedelay ) ) {
+                        assert( justOne );
+                        oplogReader.putBack( op );
+                        _sleepAdviceTime = nextOpTime.getSecs() + replSettings.slavedelay + 1;
+                        dblock lk;
+                        if ( n > 0 ) {
+                            syncedTo = last;
+                            save();
+                        }
+                        log() << "repl:   applied " << n << " operations" << endl;
+                        log() << "repl:   syncedTo: " << syncedTo.toStringLong() << endl;
+                        log() << "waiting until: " << _sleepAdviceTime << " to continue" << endl;
+                        return okResultCode;
                     }
-                    log() << "repl:   applied " << n << " operations" << endl;
-                    log() << "repl:   syncedTo: " << syncedTo.toStringLong() << endl;
-                    log() << "waiting until: " << _sleepAdviceTime << " to continue" << endl;
-                    break;
-                }
 
-                sync_pullOpLog_applyOperation(op, &localLogTail);
-                n++;
+                    sync_pullOpLog_applyOperation(op, &localLogTail, !justOne);
+                    n++;
+
+                    if( --b == 0 )
+                        break;
+                    // if to here, we are doing mulpile applications in a singel write lock acquisition
+                    if( !oplogReader.moreInCurrentBatch() ) {
+                        // break if no more in batch so we release lock while reading from the master
+                        break;
+                    }
+                    op = oplogReader.next();
+
+                    getDur().commitIfNeeded();
+                }
             }
         }
 
         return okResultCode;
     }
 
-	BSONObj userReplQuery = fromjson("{\"user\":\"repl\"}");
-    
-	bool replAuthenticate(DBClientConnection *conn) {
-		if( ! cc().isAdmin() ){
-			log() << "replauthenticate: requires admin permissions, failing\n";
-			return false;
-		}
-
-		BSONObj user;
-		{
-			dblock lk;
-			Client::Context ctxt("local.");
-			if( !Helpers::findOne("local.system.users", userReplQuery, user) ) { 
-				// try the first user is local
-				if( !Helpers::getSingleton("local.system.users", user) ) {
-					if( noauth ) 
-						return true; // presumably we are running a --noauth setup all around.
-
-					log() << "replauthenticate: no user in local.system.users to use for authentication\n";
-					return false;
-				}
-			}
-            
-		}
-
-		string u = user.getStringField("user");
-		string p = user.getStringField("pwd");
-		massert( 10392 , "bad user object? [1]", !u.empty());
-		massert( 10393 , "bad user object? [2]", !p.empty());
-		string err;
-		if( !conn->auth("local", u.c_str(), p.c_str(), err, false) ) {
-			log() << "replauthenticate: can't authenticate to master server, user:" << u << endl;
-			return false;
-		}
-		return true;
-	}
+    BSONObj userReplQuery = fromjson("{\"user\":\"repl\"}");
+
+    bool replAuthenticate(DBClientBase *conn) {
+        if( ! cc().isAdmin() ) {
+            log() << "replauthenticate: requires admin permissions, failing\n";
+            return false;
+        }
+
+        string u;
+        string p;
+        if (internalSecurity.pwd.length() > 0) {
+            u = internalSecurity.user;
+            p = internalSecurity.pwd;
+        }
+        else {
+            BSONObj user;
+            {
+                dblock lk;
+                Client::Context ctxt("local.");
+                if( !Helpers::findOne("local.system.users", userReplQuery, user) ||
+                        // try the first user in local
+                        !Helpers::getSingleton("local.system.users", user) ) {
+                    log() << "replauthenticate: no user in local.system.users to use for authentication\n";
+                    return noauth;
+                }
+            }
+            u = user.getStringField("user");
+            p = user.getStringField("pwd");
+            massert( 10392 , "bad user object? [1]", !u.empty());
+            massert( 10393 , "bad user object? [2]", !p.empty());
+        }
+
+        string err;
+        if( !conn->auth("local", u.c_str(), p.c_str(), err, false) ) {
+            log() << "replauthenticate: can't authenticate to master server, user:" << u << endl;
+            return false;
+        }
+        return true;
+    }
 
     bool replHandshake(DBClientConnection *conn) {
-        
+
         BSONObj me;
         {
             dblock l;
-            if ( ! Helpers::getSingleton( "local.me" , me ) ){
+            // local.me is an identifier for a server for getLastError w:2+
+            if ( ! Helpers::getSingleton( "local.me" , me ) ) {
                 BSONObjBuilder b;
                 b.appendOID( "_id" , 0 , true );
                 me = b.obj();
                 Helpers::putSingleton( "local.me" , me );
             }
         }
-        
+
         BSONObjBuilder cmd;
         cmd.appendAs( me["_id"] , "handshake" );
 
@@ -1450,9 +1504,9 @@ namespace mongo {
             _conn = auto_ptr<DBClientConnection>(new DBClientConnection( false, 0, replPair ? 20 : 0 /* tcp timeout */));
             string errmsg;
             ReplInfo r("trying to connect to sync source");
-            if ( !_conn->connect(hostName.c_str(), errmsg) || 
-                 !replAuthenticate(_conn.get()) ||
-                 !replHandshake(_conn.get()) ) {
+            if ( !_conn->connect(hostName.c_str(), errmsg) ||
+                    (!noauth && !replAuthenticate(_conn.get())) ||
+                    !replHandshake(_conn.get()) ) {
                 resetConnection();
                 log() << "repl:  " << errmsg << endl;
                 return false;
@@ -1460,7 +1514,7 @@ namespace mongo {
         }
         return true;
     }
-    
+
     /* note: not yet in mutex at this point.
        returns >= 0 if ok.  return -1 if you want to reconnect.
        return value of zero indicates no sleep necessary before next call
@@ -1486,14 +1540,14 @@ namespace mongo {
         }
 
         if ( !oplogReader.connect(hostName) ) {
-			log(4) << "repl:  can't connect to sync source" << endl;
+            log(4) << "repl:  can't connect to sync source" << endl;
             if ( replPair && paired ) {
                 assert( startsWith(hostName.c_str(), replPair->remoteHost.c_str()) );
                 replPair->arbitrate();
             }
             return -1;
         }
-        
+
         if ( paired ) {
             int remote = replPair->negotiate(oplogReader.conn(), "direct");
             int nMasters = ( remote == ReplPair::State_Master ) + ( replPair->state == ReplPair::State_Master );
@@ -1504,17 +1558,17 @@ namespace mongo {
         }
 
         /*
-        	// get current mtime at the server.
-        	BSONObj o = conn->findOne("admin.$cmd", opTimeQuery);
-        	BSONElement e = o.getField("optime");
-        	if( e.eoo() ) {
-        		log() << "repl:   failed to get cur optime from master" << endl;
-        		log() << "        " << o.toString() << endl;
-        		return false;
-        	}
-        	uassert( 10124 ,  e.type() == Date );
-        	OpTime serverCurTime;
-        	serverCurTime.asDate() = e.date();
+            // get current mtime at the server.
+            BSONObj o = conn->findOne("admin.$cmd", opTimeQuery);
+            BSONElement e = o.getField("optime");
+            if( e.eoo() ) {
+                log() << "repl:   failed to get cur optime from master" << endl;
+                log() << "        " << o.toString() << endl;
+                return false;
+            }
+            uassert( 10124 ,  e.type() == Date );
+            OpTime serverCurTime;
+            serverCurTime.asDate() = e.date();
         */
         return sync_pullOpLog(nApplied);
     }
@@ -1527,7 +1581,7 @@ namespace mongo {
     _ reuse that cursor when we can
     */
 
-    /* returns: # of seconds to sleep before next pass 
+    /* returns: # of seconds to sleep before next pass
                 0 = no sleep recommended
                 1 = special sentinel indicating adaptive sleep recommended
     */
@@ -1543,6 +1597,7 @@ namespace mongo {
             /* replication is not configured yet (for --slave) in local.sources.  Poll for config it
             every 20 seconds.
             */
+            log() << "no source given, add a master to local.sources to start replication" << endl;
             return 20;
         }
 
@@ -1553,7 +1608,7 @@ namespace mongo {
             try {
                 res = s->sync(nApplied);
                 bool moreToSync = s->haveMoreDbsToSync();
-                if( res < 0 ) { 
+                if( res < 0 ) {
                     sleepAdvice = 3;
                 }
                 else if( moreToSync ) {
@@ -1562,7 +1617,7 @@ namespace mongo {
                 else if ( s->sleepAdvice() ) {
                     sleepAdvice = s->sleepAdvice();
                 }
-                else 
+                else
                     sleepAdvice = res;
                 if ( res >= 0 && !moreToSync /*&& !s->syncedTo.isNull()*/ ) {
                     pairSync->setInitialSyncCompletedLocking();
@@ -1588,9 +1643,9 @@ namespace mongo {
             }
             catch ( const std::exception &e ) {
                 log() << "repl: std::exception " << e.what() << endl;
-                replInfo = "replMain caught std::exception";                
+                replInfo = "replMain caught std::exception";
             }
-            catch ( ... ) { 
+            catch ( ... ) {
                 log() << "unexpected exception during replication.  replication will halt" << endl;
                 replAllDead = "caught unexpected exception during replication";
             }
@@ -1616,15 +1671,16 @@ namespace mongo {
             try {
                 int nApplied = 0;
                 s = _replMain(sources, nApplied);
-                if( s == 1 ) { 
+                if( s == 1 ) {
                     if( nApplied == 0 ) s = 2;
-                    else if( nApplied > 100 ) { 
+                    else if( nApplied > 100 ) {
                         // sleep very little - just enought that we aren't truly hammering master
                         sleepmillis(75);
                         s = 0;
                     }
                 }
-            } catch (...) {
+            }
+            catch (...) {
                 out() << "caught exception in _replMain" << endl;
                 s = 4;
             }
@@ -1634,10 +1690,10 @@ namespace mongo {
                 syncing--;
             }
 
-			if( relinquishSyncingSome )  { 
-				relinquishSyncingSome = 0;
-				s = 1; // sleep before going back in to syncing=1
-			}
+            if( relinquishSyncingSome )  {
+                relinquishSyncingSome = 0;
+                s = 1; // sleep before going back in to syncing=1
+            }
 
             if ( s ) {
                 stringstream ss;
@@ -1660,21 +1716,21 @@ namespace mongo {
         while( 1 ) {
 
             sleepsecs( toSleep );
-            /* write a keep-alive like entry to the log.  this will make things like 
+            /* write a keep-alive like entry to the log.  this will make things like
                printReplicationStatus() and printSlaveReplicationStatus() stay up-to-date
                even when things are idle.
             */
             {
                 writelocktry lk("",1);
-                if ( lk.got() ){
+                if ( lk.got() ) {
                     toSleep = 10;
-                    
-                    cc().getAuthenticationInfo()->authorize("admin");   
-                        
-                    try { 
+
+                    cc().getAuthenticationInfo()->authorize("admin");
+
+                    try {
                         logKeepalive();
                     }
-                    catch(...) { 
+                    catch(...) {
                         log() << "caught exception in replMasterThread()" << endl;
                     }
                 }
@@ -1690,11 +1746,11 @@ namespace mongo {
         sleepsecs(1);
         Client::initThread("replslave");
         cc().iAmSyncThread();
-            
+
         {
             dblock lk;
             cc().getAuthenticationInfo()->authorize("admin");
-        
+
             BSONObj obj;
             if ( Helpers::getSingleton("local.pair.startup", obj) ) {
                 // should be: {replacepeer:1}
@@ -1730,12 +1786,11 @@ namespace mongo {
     void startReplication() {
         /* if we are going to be a replica set, we aren't doing other forms of replication. */
         if( !cmdLine._replSet.empty() ) {
-            if( replSettings.slave || replSettings.master || replPair ) { 
+            if( replSettings.slave || replSettings.master || replPair ) {
                 log() << "***" << endl;
                 log() << "ERROR: can't use --slave or --master replication options with --replSet" << endl;
                 log() << "***" << endl;
             }
-            createOplog();
             newRepl();
             return;
         }
@@ -1773,7 +1828,7 @@ namespace mongo {
             createOplog();
             boost::thread t(replMasterThread);
         }
-        
+
         while( replSettings.fastsync ) // don't allow writes until we've set up from log
             sleepmillis( 50 );
     }
@@ -1807,5 +1862,29 @@ namespace mongo {
         }
         tp.join();
     }
-    
+
+    class ReplApplyBatchSizeValidator : public ParameterValidator {
+    public:
+        ReplApplyBatchSizeValidator() : ParameterValidator( "replApplyBatchSize" ) {}
+
+        virtual bool isValid( BSONElement e , string& errmsg ) {
+            int b = e.numberInt();
+            if( b < 1 || b > 1024 ) {
+                errmsg = "replApplyBatchSize has to be >= 1 and < 1024";
+                return false;
+            }
+
+            if ( replSettings.slavedelay != 0 && b > 1 ) {
+                errmsg = "can't use a batch size > 1 with slavedelay";
+                return false;
+            }
+            if ( ! replSettings.slave ) {
+                errmsg = "can't set replApplyBatchSize on a non-slave machine";
+                return false;
+            }
+
+            return true;
+        }
+    } replApplyBatchSizeValidator;
+
 } // namespace mongo
diff --git a/db/repl.h b/db/repl.h
index f33acad..45036fa 100644
--- a/db/repl.h
+++ b/db/repl.h
@@ -40,16 +40,16 @@
 
 namespace mongo {
 
-	/* replication slave? (possibly with slave or repl pair nonmaster)
+    /* replication slave? (possibly with slave or repl pair nonmaster)
        --slave cmd line setting -> SimpleSlave
-	*/
-	typedef enum { NotSlave=0, SimpleSlave, ReplPairSlave } SlaveTypes;
+    */
+    typedef enum { NotSlave=0, SimpleSlave, ReplPairSlave } SlaveTypes;
 
     class ReplSettings {
     public:
         SlaveTypes slave;
 
-        /* true means we are master and doing replication.  if we are not writing to oplog (no --master or repl pairing), 
+        /* true means we are master and doing replication.  if we are not writing to oplog (no --master or repl pairing),
            this won't be true.
         */
         bool master;
@@ -57,9 +57,9 @@ namespace mongo {
         int opIdMem;
 
         bool fastsync;
-        
+
         bool autoresync;
-        
+
         int slavedelay;
 
         ReplSettings()
@@ -69,14 +69,14 @@ namespace mongo {
     };
 
     extern ReplSettings replSettings;
-    
-    bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication, 
-				   bool slaveOk, bool useReplAuth, bool snapshot);
+
+    bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication,
+                   bool slaveOk, bool useReplAuth, bool snapshot);
 
     /* A replication exception */
     class SyncException : public DBException {
     public:
-        SyncException() : DBException( "sync exception" , 10001 ){}
+        SyncException() : DBException( "sync exception" , 10001 ) {}
     };
 
     /* A Source is a source from which we can pull (replicate) data.
@@ -94,11 +94,14 @@ namespace mongo {
 
         bool resync(string db);
 
-        /* pull some operations from the master's oplog, and apply them. */
+        /** @param alreadyLocked caller already put us in write lock if true */
+        void sync_pullOpLog_applyOperation(BSONObj& op, OpTime *localLogTail, bool alreadyLocked);
+
+        /* pull some operations from the master's oplog, and apply them.
+           calls sync_pullOpLog_applyOperation
+        */
         int sync_pullOpLog(int& nApplied);
 
-        void sync_pullOpLog_applyOperation(BSONObj& op, OpTime *localLogTail);
-        
         /* we only clone one database per pass, even if a lot need done.  This helps us
            avoid overflowing the master's transaction log by doing too much work before going
            back to read more transactions. (Imagine a scenario of slave startup where we try to
@@ -109,7 +112,7 @@ namespace mongo {
         set<string> incompleteCloneDbs;
 
         ReplSource();
-        
+
         // returns the dummy ns used to do the drop
         string resyncDrop( const char *db, const char *requester );
         // returns possibly unowned id spec for the operation.
@@ -127,7 +130,7 @@ namespace mongo {
         bool updateSetsWithLocalOps( OpTime &localLogTail, bool mayUnlock );
         string ns() const { return string( "local.oplog.$" ) + sourceName(); }
         unsigned _sleepAdviceTime;
-        
+
     public:
         OplogReader oplogReader;
 
@@ -136,9 +139,7 @@ namespace mongo {
         bool paired; // --pair in use
         string hostName;    // ip addr or hostname plus optionally, ":<port>"
         string _sourceName;  // a logical source name.
-        string sourceName() const {
-            return _sourceName.empty() ? "main" : _sourceName;
-        }
+        string sourceName() const { return _sourceName.empty() ? "main" : _sourceName; }
         string only; // only a certain db. note that in the sources collection, this may not be changed once you start replicating.
 
         /* the last time point we have already synced up to (in the remote/master's oplog). */
@@ -146,8 +147,8 @@ namespace mongo {
 
         /* This is for repl pairs.
            _lastSavedLocalTs is the most recent point in the local log that we know is consistent
-           with the remote log ( ie say the local op log has entries ABCDE and the remote op log 
-           has ABCXY, then _lastSavedLocalTs won't be greater than C until we have reconciled 
+           with the remote log ( ie say the local op log has entries ABCDE and the remote op log
+           has ABCXY, then _lastSavedLocalTs won't be greater than C until we have reconciled
            the DE-XY difference.)
         */
         OpTime _lastSavedLocalTs;
@@ -171,15 +172,15 @@ namespace mongo {
             return hostName == r.hostName && sourceName() == r.sourceName();
         }
         string toString() const { return sourceName() + "@" + hostName; }
-        
-        bool haveMoreDbsToSync() const { return !addDbNextPass.empty(); }        
+
+        bool haveMoreDbsToSync() const { return !addDbNextPass.empty(); }
         int sleepAdvice() const {
             if ( !_sleepAdviceTime )
                 return 0;
             int wait = _sleepAdviceTime - unsigned( time( 0 ) );
             return wait > 0 ? wait : 0;
         }
-        
+
         static bool throttledForceResyncDead( const char *requester );
         static void forceResyncDead( const char *requester );
         void forceResync( const char *requester );
@@ -200,7 +201,8 @@ namespace mongo {
                 if ( imp_[ ns ].insert( id.getOwned() ).second ) {
                     size_ += id.objsize() + sizeof( BSONObj );
                 }
-            } else {
+            }
+            else {
                 if ( imp_[ ns ].erase( id ) == 1 ) {
                     size_ -= id.objsize() + sizeof( BSONObj );
                 }
@@ -236,7 +238,7 @@ namespace mongo {
             // rename _id to id since there may be duplicates
             b.appendAs( id.firstElement(), "id" );
             return b.obj();
-        }        
+        }
         DbSet impl_;
     };
 
@@ -244,14 +246,14 @@ namespace mongo {
     // All functions must be called with db mutex held
     // Kind of sloppy class structure, for now just want to keep the in mem
     // version speedy.
-	// see http://www.mongodb.org/display/DOCS/Pairing+Internals
+    // see http://www.mongodb.org/display/DOCS/Pairing+Internals
     class IdTracker {
     public:
         IdTracker() :
-        dbIds_( "local.temp.replIds" ),
-        dbModIds_( "local.temp.replModIds" ),
-        inMem_( true ),
-        maxMem_( replSettings.opIdMem ) {
+            dbIds_( "local.temp.replIds" ),
+            dbModIds_( "local.temp.replModIds" ),
+            inMem_( true ),
+            maxMem_( replSettings.opIdMem ) {
         }
         void reset( int maxMem = replSettings.opIdMem ) {
             memIds_.reset();
@@ -309,7 +311,7 @@ namespace mongo {
         void upgrade( MemIds &a, DbIds &b ) {
             for( MemIds::IdSets::const_iterator i = a.imp_.begin(); i != a.imp_.end(); ++i ) {
                 for( BSONObjSetDefaultOrder::const_iterator j = i->second.begin(); j != i->second.end(); ++j ) {
-                    set( b, i->first.c_str(), *j, true );            
+                    set( b, i->first.c_str(), *j, true );
                     RARELY {
                         dbtemprelease t;
                     }
@@ -323,9 +325,9 @@ namespace mongo {
         bool inMem_;
         int maxMem_;
     };
-    
+
     bool anyReplEnabled();
     void appendReplicationInfo( BSONObjBuilder& result , bool authed , int level = 0 );
-    
-    
+
+
 } // namespace mongo
diff --git a/db/repl/connections.h b/db/repl/connections.h
index cdf2fad..7e7bfe5 100644
--- a/db/repl/connections.h
+++ b/db/repl/connections.h
@@ -1,4 +1,4 @@
-// @file 
+// @file
 
 /*
  *    Copyright (C) 2010 10gen Inc.
@@ -20,11 +20,12 @@
 
 #include <map>
 #include "../../client/dbclient.h"
+#include "../security_key.h"
 
-namespace mongo { 
+namespace mongo {
 
-    /** here we keep a single connection (with reconnect) for a set of hosts, 
-        one each, and allow one user at a time per host.  if in use already for that 
+    /** here we keep a single connection (with reconnect) for a set of hosts,
+        one each, and allow one user at a time per host.  if in use already for that
         host, we block.  so this is an easy way to keep a 1-deep pool of connections
         that many threads can share.
 
@@ -39,35 +40,37 @@ namespace mongo {
         throws exception on connect error (but fine to try again later with a new
         scopedconn object for same host).
     */
-    class ScopedConn { 
+    class ScopedConn {
     public:
         /** throws assertions if connect failure etc. */
         ScopedConn(string hostport);
         ~ScopedConn();
 
         /* If we were to run a query and not exhaust the cursor, future use of the connection would be problematic.
-           So here what we do is wrapper known safe methods and not allow cursor-style queries at all.  This makes 
+           So here what we do is wrapper known safe methods and not allow cursor-style queries at all.  This makes
            ScopedConn limited in functionality but very safe.  More non-cursor wrappers can be added here if needed.
            */
 
         bool runCommand(const string &dbname, const BSONObj& cmd, BSONObj &info, int options=0) {
             return conn()->runCommand(dbname, cmd, info, options);
         }
-        unsigned long long count(const string &ns) { 
-            return conn()->count(ns); 
+        unsigned long long count(const string &ns) {
+            return conn()->count(ns);
         }
-        BSONObj findOne(const string &ns, const Query& q, const BSONObj *fieldsToReturn = 0, int queryOptions = 0) { 
+        BSONObj findOne(const string &ns, const Query& q, const BSONObj *fieldsToReturn = 0, int queryOptions = 0) {
             return conn()->findOne(ns, q, fieldsToReturn, queryOptions);
         }
+        void setTimeout(double to) {
+            conn()->setSoTimeout(to);
+        }
 
     private:
         auto_ptr<scoped_lock> connLock;
-        static mutex mapMutex;
-        struct X { 
-            mutex z;
+        static mongo::mutex mapMutex;
+        struct X {
+            mongo::mutex z;
             DBClientConnection cc;
-            X() : z("X"), cc(/*reconnect*/ true, 0, 
-                             /*timeout*/ theReplSet ? theReplSet->config().ho.heartbeatTimeoutMillis/1000.0 : 10.0) { 
+            X() : z("X"), cc(/*reconnect*/ true, 0, /*timeout*/ 10.0) {
                 cc._logLevel = 2;
             }
         } *x;
@@ -87,22 +90,30 @@ namespace mongo {
                 connLock.reset( new scoped_lock(x->z) );
             }
         }
-        if( !first ) { 
+        if( !first ) {
             connLock.reset( new scoped_lock(x->z) );
             return;
         }
 
         // we already locked above...
         string err;
-        x->cc.connect(hostport, err);
+        if (!x->cc.connect(hostport, err)) {
+            log() << "couldn't connect to " << hostport << ": " << err << rsLog;
+            return;
+        }
+
+        if (!noauth && !x->cc.auth("local", internalSecurity.user, internalSecurity.pwd, err, false)) {
+            log() << "could not authenticate against " << conn()->toString() << ", " << err << rsLog;
+            return;
+        }
     }
 
-    inline ScopedConn::~ScopedConn() { 
+    inline ScopedConn::~ScopedConn() {
         // conLock releases...
     }
 
-    /*inline DBClientConnection* ScopedConn::operator->() { 
-        return &x->cc; 
+    /*inline DBClientConnection* ScopedConn::operator->() {
+        return &x->cc;
     }*/
 
 }
diff --git a/db/repl/consensus.cpp b/db/repl/consensus.cpp
index 1519c26..f764abe 100644
--- a/db/repl/consensus.cpp
+++ b/db/repl/consensus.cpp
@@ -19,9 +19,9 @@
 #include "rs.h"
 #include "multicmd.h"
 
-namespace mongo { 
+namespace mongo {
 
-    class CmdReplSetFresh : public ReplSetCommand { 
+    class CmdReplSetFresh : public ReplSetCommand {
     public:
         CmdReplSetFresh() : ReplSetCommand("replSetFresh") { }
     private:
@@ -29,23 +29,23 @@ namespace mongo {
             if( !check(errmsg, result) )
                 return false;
 
-            if( cmdObj["set"].String() != theReplSet->name() ) { 
+            if( cmdObj["set"].String() != theReplSet->name() ) {
                 errmsg = "wrong repl set name";
                 return false;
             }
             string who = cmdObj["who"].String();
             int cfgver = cmdObj["cfgver"].Int();
-			OpTime opTime(cmdObj["opTime"].Date());
+            OpTime opTime(cmdObj["opTime"].Date());
 
             bool weAreFresher = false;
-            if( theReplSet->config().version > cfgver ) { 
+            if( theReplSet->config().version > cfgver ) {
                 log() << "replSet member " << who << " is not yet aware its cfg version " << cfgver << " is stale" << rsLog;
-				result.append("info", "config version stale");
+                result.append("info", "config version stale");
+                weAreFresher = true;
+            }
+            else if( opTime < theReplSet->lastOpTimeWritten )  {
                 weAreFresher = true;
             }
-            else if( opTime < theReplSet->lastOpTimeWritten )  { 
-				weAreFresher = true;
-			}
             result.appendDate("opTime", theReplSet->lastOpTimeWritten.asDate());
             result.append("fresher", weAreFresher);
             return true;
@@ -66,19 +66,19 @@ namespace mongo {
         }
     } cmdReplSetElect;
 
-    int Consensus::totalVotes() const { 
+    int Consensus::totalVotes() const {
         static int complain = 0;
         int vTot = rs._self->config().votes;
-        for( Member *m = rs.head(); m; m=m->next() ) 
+        for( Member *m = rs.head(); m; m=m->next() )
             vTot += m->config().votes;
         if( vTot % 2 == 0 && vTot && complain++ == 0 )
-            log() << "replSet warning total number of votes is even - considering giving one member an extra vote" << rsLog;
+            log() << "replSet " /*buildbot! warning */ "total number of votes is even - add arbiter or give one member an extra vote" << rsLog;
         return vTot;
     }
 
     bool Consensus::aMajoritySeemsToBeUp() const {
         int vUp = rs._self->config().votes;
-        for( Member *m = rs.head(); m; m=m->next() ) 
+        for( Member *m = rs.head(); m; m=m->next() )
             vUp += m->hbinfo().up() ? m->config().votes : 0;
         return vUp * 2 > totalVotes();
     }
@@ -98,13 +98,13 @@ namespace mongo {
 
     const time_t LeaseTime = 30;
 
-    unsigned Consensus::yea(unsigned memberId) /* throws VoteException */ {
+    unsigned Consensus::yea(unsigned memberId) { /* throws VoteException */
         Atomic<LastYea>::tran t(ly);
         LastYea &ly = t.ref();
         time_t now = time(0);
         if( ly.when + LeaseTime >= now && ly.who != memberId ) {
             log(1) << "replSet not voting yea for " << memberId <<
-                " voted for " << ly.who << ' ' << now-ly.when << " secs ago" << rsLog;
+                   " voted for " << ly.who << ' ' << now-ly.when << " secs ago" << rsLog;
             throw VoteException();
         }
         ly.when = now;
@@ -112,7 +112,7 @@ namespace mongo {
         return rs._self->config().votes;
     }
 
-    /* we vote for ourself at start of election.  once it fails, we can cancel the lease we had in 
+    /* we vote for ourself at start of election.  once it fails, we can cancel the lease we had in
        place instead of leaving it for a long time.
        */
     void Consensus::electionFailed(unsigned meid) {
@@ -124,7 +124,7 @@ namespace mongo {
     }
 
     /* todo: threading **************** !!!!!!!!!!!!!!!! */
-    void Consensus::electCmdReceived(BSONObj cmd, BSONObjBuilder* _b) { 
+    void Consensus::electCmdReceived(BSONObj cmd, BSONObjBuilder* _b) {
         BSONObjBuilder& b = *_b;
         DEV log() << "replSet received elect msg " << cmd.toString() << rsLog;
         else log(2) << "replSet received elect msg " << cmd.toString() << rsLog;
@@ -138,14 +138,14 @@ namespace mongo {
         const Member* hopeful = rs.findById(whoid);
 
         int vote = 0;
-        if( set != rs.name() ) { 
+        if( set != rs.name() ) {
             log() << "replSet error received an elect request for '" << set << "' but our set name is '" << rs.name() << "'" << rsLog;
 
         }
-        else if( myver < cfgver ) { 
+        else if( myver < cfgver ) {
             // we are stale.  don't vote
         }
-        else if( myver > cfgver ) { 
+        else if( myver > cfgver ) {
             // they are stale!
             log() << "replSet info got stale version # during election" << rsLog;
             vote = -10000;
@@ -154,10 +154,10 @@ namespace mongo {
             log() << "couldn't find member with id " << whoid << rsLog;
             vote = -10000;
         }
-        else if( primary && primary->hbinfo().opTime > hopeful->hbinfo().opTime ) {
+        else if( primary && primary->hbinfo().opTime >= hopeful->hbinfo().opTime ) {
             // other members might be aware of more up-to-date nodes
             log() << hopeful->fullName() << " is trying to elect itself but " <<
-                primary->fullName() << " is already primary and more up-to-date" << rsLog;
+                  primary->fullName() << " is already primary and more up-to-date" << rsLog;
             vote = -10000;
         }
         else {
@@ -166,7 +166,7 @@ namespace mongo {
                 rs.relinquish();
                 log() << "replSet info voting yea for " << whoid << rsLog;
             }
-            catch(VoteException&) { 
+            catch(VoteException&) {
                 log() << "replSet voting no already voted for another" << rsLog;
             }
         }
@@ -182,10 +182,10 @@ namespace mongo {
                 L.push_back( Target(m->fullName()) );
     }
 
-    /* config version is returned as it is ok to use this unlocked.  BUT, if unlocked, you would need 
+    /* config version is returned as it is ok to use this unlocked.  BUT, if unlocked, you would need
        to check later that the config didn't change. */
     void ReplSetImpl::getTargets(list<Target>& L, int& configVersion) {
-        if( lockedByMe() ) { 
+        if( lockedByMe() ) {
             _getTargets(L, configVersion);
             return;
         }
@@ -200,15 +200,21 @@ namespace mongo {
     bool Consensus::weAreFreshest(bool& allUp, int& nTies) {
         const OpTime ord = theReplSet->lastOpTimeWritten;
         nTies = 0;
-		assert( !ord.isNull() );
+        assert( !ord.isNull() );
         BSONObj cmd = BSON(
-               "replSetFresh" << 1 <<
-               "set" << rs.name() << 
-			   "opTime" << Date_t(ord.asDate()) <<
-               "who" << rs._self->fullName() << 
-               "cfgver" << rs._cfg->version );
+                          "replSetFresh" << 1 <<
+                          "set" << rs.name() <<
+                          "opTime" << Date_t(ord.asDate()) <<
+                          "who" << rs._self->fullName() <<
+                          "cfgver" << rs._cfg->version );
         list<Target> L;
         int ver;
+        /* the following queries arbiters, even though they are never fresh.  wonder if that makes sense.
+           it doesn't, but it could, if they "know" what freshness it one day.  so consider removing
+           arbiters from getTargets() here.  although getTargets is used elsewhere for elections; there
+           arbiters are certainly targets - so a "includeArbs" bool would be necessary if we want to make
+           not fetching them herein happen.
+           */
         rs.getTargets(L, ver);
         multiCommand(cmd, L);
         int nok = 0;
@@ -228,25 +234,25 @@ namespace mongo {
                 allUp = false;
             }
         }
-        DEV log() << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog; 
+        log(1) << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog;
         assert( ord <= theReplSet->lastOpTimeWritten ); // <= as this may change while we are working...
         return true;
     }
 
     extern time_t started;
 
-    void Consensus::multiCommand(BSONObj cmd, list<Target>& L) { 
+    void Consensus::multiCommand(BSONObj cmd, list<Target>& L) {
         assert( !rs.lockedByMe() );
         mongo::multiCommand(cmd, L);
     }
 
     void Consensus::_electSelf() {
-        if( time(0) < steppedDown ) 
+        if( time(0) < steppedDown )
             return;
 
         {
             const OpTime ord = theReplSet->lastOpTimeWritten;
-            if( ord == 0 ) { 
+            if( ord == 0 ) {
                 log() << "replSet info not trying to elect self, do not yet have a complete set of data from any point in time" << rsLog;
                 return;
             }
@@ -254,16 +260,16 @@ namespace mongo {
 
         bool allUp;
         int nTies;
-        if( !weAreFreshest(allUp, nTies) ) { 
+        if( !weAreFreshest(allUp, nTies) ) {
             log() << "replSet info not electing self, we are not freshest" << rsLog;
             return;
         }
 
         rs.sethbmsg("",9);
 
-        if( !allUp && time(0) - started < 60 * 5 ) { 
-            /* the idea here is that if a bunch of nodes bounce all at once, we don't want to drop data 
-               if we don't have to -- we'd rather be offline and wait a little longer instead 
+        if( !allUp && time(0) - started < 60 * 5 ) {
+            /* the idea here is that if a bunch of nodes bounce all at once, we don't want to drop data
+               if we don't have to -- we'd rather be offline and wait a little longer instead
                todo: make this configurable.
                */
             rs.sethbmsg("not electing self, not all members up and we have been up less than 5 minutes");
@@ -276,9 +282,10 @@ namespace mongo {
             /* tie?  we then randomly sleep to try to not collide on our voting. */
             /* todo: smarter. */
             if( me.id() == 0 || sleptLast ) {
-                // would be fine for one node not to sleep 
+                // would be fine for one node not to sleep
                 // todo: biggest / highest priority nodes should be the ones that get to not sleep
-            } else {
+            }
+            else {
                 assert( !rs.lockedByMe() ); // bad to go to sleep locked
                 unsigned ms = ((unsigned) rand()) % 1000 + 50;
                 DEV log() << "replSet tie " << nTies << " sleeping a little " << ms << "ms" << rsLog;
@@ -297,13 +304,13 @@ namespace mongo {
             log() << "replSet info electSelf " << meid << rsLog;
 
             BSONObj electCmd = BSON(
-                   "replSetElect" << 1 <<
-                   "set" << rs.name() << 
-                   "who" << me.fullName() << 
-                   "whoid" << me.hbinfo().id() << 
-                   "cfgver" << rs._cfg->version << 
-                   "round" << OID::gen() /* this is just for diagnostics */
-                );
+                                   "replSetElect" << 1 <<
+                                   "set" << rs.name() <<
+                                   "who" << me.fullName() <<
+                                   "whoid" << me.hbinfo().id() <<
+                                   "cfgver" << rs._cfg->version <<
+                                   "round" << OID::gen() /* this is just for diagnostics */
+                               );
 
             int configVersion;
             list<Target> L;
@@ -326,7 +333,7 @@ namespace mongo {
                     // defensive; should never happen as we have timeouts on connection and operation for our conn
                     log() << "replSet too much time passed during our election, ignoring result" << rsLog;
                 }
-                else if( configVersion != rs.config().version ) { 
+                else if( configVersion != rs.config().version ) {
                     log() << "replSet config version changed during our election, ignoring result" << rsLog;
                 }
                 else {
@@ -334,9 +341,10 @@ namespace mongo {
                     log(1) << "replSet election succeeded, assuming primary role" << rsLog;
                     success = true;
                     rs.assumePrimary();
-                } 
+                }
             }
-        } catch( std::exception& ) { 
+        }
+        catch( std::exception& ) {
             if( !success ) electionFailed(meid);
             throw;
         }
@@ -347,19 +355,19 @@ namespace mongo {
         assert( !rs.lockedByMe() );
         assert( !rs.myConfig().arbiterOnly );
         assert( rs.myConfig().slaveDelay == 0 );
-        try { 
-            _electSelf(); 
-        } 
-        catch(RetryAfterSleepException&) { 
+        try {
+            _electSelf();
+        }
+        catch(RetryAfterSleepException&) {
             throw;
         }
-        catch(VoteException& ) { 
+        catch(VoteException& ) {
             log() << "replSet not trying to elect self as responded yea to someone else recently" << rsLog;
         }
-        catch(DBException& e) { 
+        catch(DBException& e) {
             log() << "replSet warning caught unexpected exception in electSelf() " << e.toString() << rsLog;
         }
-        catch(...) { 
+        catch(...) {
             log() << "replSet warning caught unexpected exception in electSelf()" << rsLog;
         }
     }
diff --git a/db/repl/health.cpp b/db/repl/health.cpp
index c75221c..762ca90 100644
--- a/db/repl/health.cpp
+++ b/db/repl/health.cpp
@@ -32,20 +32,22 @@
 #include "../dbhelpers.h"
 
 namespace mongo {
+
     /* decls for connections.h */
-    ScopedConn::M& ScopedConn::_map = *(new ScopedConn::M());    
+    ScopedConn::M& ScopedConn::_map = *(new ScopedConn::M());
     mutex ScopedConn::mapMutex("ScopedConn::mapMutex");
 }
 
-namespace mongo { 
+namespace mongo {
 
     using namespace mongoutils::html;
     using namespace bson;
 
     static RamLog _rsLog;
     Tee *rsLog = &_rsLog;
+    extern bool replSetBlind;
 
-    string ago(time_t t) { 
+    string ago(time_t t) {
         if( t == 0 ) return "";
 
         time_t x = time(0) - t;
@@ -58,14 +60,14 @@ namespace mongo {
             s.precision(2);
             s << x / 60.0 << " mins";
         }
-        else { 
+        else {
             s.precision(2);
             s << x / 3600.0 << " hrs";
         }
         return s.str();
     }
 
-    void Member::summarizeMember(stringstream& s) const { 
+    void Member::summarizeMember(stringstream& s) const {
         s << tr();
         {
             stringstream u;
@@ -89,27 +91,29 @@ namespace mongo {
             s << td(h);
         }
         s << td(config().votes);
-        { 
+        s << td(config().priority);
+        {
             string stateText = state().toString();
             if( _config.hidden )
                 stateText += " (hidden)";
-            if( ok || stateText.empty() ) 
+            if( ok || stateText.empty() )
                 s << td(stateText); // text blank if we've never connected
             else
                 s << td( grey(str::stream() << "(was " << state().toString() << ')', true) );
         }
         s << td( grey(hbinfo().lastHeartbeatMsg,!ok) );
         stringstream q;
-        q << "/_replSetOplog?" << id();
+        q << "/_replSetOplog?_id=" << id();
         s << td( a(q.str(), "", never ? "?" : hbinfo().opTime.toString()) );
         if( hbinfo().skew > INT_MIN ) {
             s << td( grey(str::stream() << hbinfo().skew,!ok) );
-        } else
+        }
+        else
             s << td("");
         s << _tr();
     }
-   
-    string ReplSetImpl::stateAsHtml(MemberState s) { 
+
+    string ReplSetImpl::stateAsHtml(MemberState s) {
         if( s.s == MemberState::RS_STARTUP ) return a("", "serving still starting up, or still trying to initiate the set", "STARTUP");
         if( s.s == MemberState::RS_PRIMARY ) return a("", "this server thinks it is primary", "PRIMARY");
         if( s.s == MemberState::RS_SECONDARY ) return a("", "this server thinks it is a secondary (slave mode)", "SECONDARY");
@@ -122,7 +126,7 @@ namespace mongo {
         return "";
     }
 
-    string MemberState::toString() const { 
+    string MemberState::toString() const {
         if( s == MemberState::RS_STARTUP ) return "STARTUP";
         if( s == MemberState::RS_PRIMARY ) return "PRIMARY";
         if( s == MemberState::RS_SECONDARY ) return "SECONDARY";
@@ -143,9 +147,9 @@ namespace mongo {
 
         set<string> skip;
         be e = op["ts"];
-        if( e.type() == Date || e.type() == Timestamp ) { 
+        if( e.type() == Date || e.type() == Timestamp ) {
             OpTime ot = e._opTime();
-	    ss << td( time_t_to_String_short( ot.getSecs() ) );
+            ss << td( time_t_to_String_short( ot.getSecs() ) );
             ss << td( ot.toString() );
             skip.insert("ts");
         }
@@ -155,7 +159,8 @@ namespace mongo {
         if( e.type() == NumberLong ) {
             ss << "<td>" << hex << e.Long() << "</td>\n";
             skip.insert("h");
-        } else
+        }
+        else
             ss << td("?");
 
         ss << td(op["op"].valuestrsafe());
@@ -164,20 +169,17 @@ namespace mongo {
         skip.insert("ns");
 
         ss << "<td>";
-        for( bo::iterator i(op); i.more(); ) { 
+        for( bo::iterator i(op); i.more(); ) {
             be e = i.next();
             if( skip.count(e.fieldName()) ) continue;
             ss << e.toString() << ' ';
         }
-        ss << "</td>";
-
-        ss << "</tr>";
-        ss << '\n';
+        ss << "</td></tr>\n";
     }
 
-    void ReplSetImpl::_getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const { 
+    void ReplSetImpl::_getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const {
         const Member *m = findById(server_id);
-        if( m == 0 ) { 
+        if( m == 0 ) {
             ss << "Error : can't find a member with id: " << server_id << '\n';
             return;
         }
@@ -187,21 +189,29 @@ namespace mongo {
         //const bo fields = BSON( "o" << false << "o2" << false );
         const bo fields;
 
-        ScopedDbConnection conn(m->fullName());
+        /** todo fix we might want an so timeout here */
+        DBClientConnection conn(false, 0, /*timeout*/ 20);
+        {
+            string errmsg;
+            if( !conn.connect(m->fullName(), errmsg) ) {
+                ss << "couldn't connect to " << m->fullName() << ' ' << errmsg;
+                return;
+            }
+        }
 
-        auto_ptr<DBClientCursor> c = conn->query(rsoplog, Query().sort("$natural",1), 20, 0, &fields);
-        if( c.get() == 0 ) { 
+        auto_ptr<DBClientCursor> c = conn.query(rsoplog, Query().sort("$natural",1), 20, 0, &fields);
+        if( c.get() == 0 ) {
             ss << "couldn't query " << rsoplog;
             return;
         }
         static const char *h[] = {"ts","optime", "h","op","ns","rest",0};
 
         ss << "<style type=\"text/css\" media=\"screen\">"
-            "table { font-size:75% }\n"
+           "table { font-size:75% }\n"
 //            "th { background-color:#bbb; color:#000 }\n"
 //            "td,th { padding:.25em }\n"
-            "</style>\n";
-        
+           "</style>\n";
+
         ss << table(h, true);
         //ss << "<pre>\n";
         int n = 0;
@@ -211,17 +221,17 @@ namespace mongo {
         while( c->more() ) {
             bo o = c->next();
             otLast = o["ts"]._opTime();
-            if( otFirst.isNull() ) 
+            if( otFirst.isNull() )
                 otFirst = otLast;
             say(ss, o);
-            n++;            
+            n++;
         }
         if( n == 0 ) {
             ss << rsoplog << " is empty\n";
         }
-        else { 
-            auto_ptr<DBClientCursor> c = conn->query(rsoplog, Query().sort("$natural",-1), 20, 0, &fields);
-            if( c.get() == 0 ) { 
+        else {
+            auto_ptr<DBClientCursor> c = conn.query(rsoplog, Query().sort("$natural",-1), 20, 0, &fields);
+            if( c.get() == 0 ) {
                 ss << "couldn't query [2] " << rsoplog;
                 return;
             }
@@ -230,7 +240,7 @@ namespace mongo {
             otEnd = o["ts"]._opTime();
             while( 1 ) {
                 stringstream z;
-                if( o["ts"]._opTime() == otLast ) 
+                if( o["ts"]._opTime() == otLast )
                     break;
                 say(z, o);
                 x = z.str() + x;
@@ -253,32 +263,31 @@ namespace mongo {
             ss.precision(3);
             if( h < 72 )
                 ss << h << " hours";
-            else 
+            else
                 ss << h / 24.0 << " days";
             ss << "</p>\n";
         }
-
-        conn.done();
     }
 
-    void ReplSetImpl::_summarizeAsHtml(stringstream& s) const { 
+    void ReplSetImpl::_summarizeAsHtml(stringstream& s) const {
         s << table(0, false);
         s << tr("Set name:", _name);
         s << tr("Majority up:", elect.aMajoritySeemsToBeUp()?"yes":"no" );
         s << _table();
 
-        const char *h[] = {"Member", 
-            "<a title=\"member id in the replset config\">id</a>", 
-            "Up", 
-            "<a title=\"length of time we have been continuously connected to the other member with no reconnects (for self, shows uptime)\">cctime</a>", 
-            "<a title=\"when this server last received a heartbeat response - includes error code responses\">Last heartbeat</a>", 
-            "Votes", "State", "Status", 
-            "<a title=\"how up to date this server is.  this value polled every few seconds so actually lag is typically much lower than value shown here.\">optime</a>", 
-            "<a title=\"Clock skew in seconds relative to this server. Informational; server clock variances will make the diagnostics hard to read, but otherwise are benign..\">skew</a>", 
-            0};
+        const char *h[] = {"Member",
+                           "<a title=\"member id in the replset config\">id</a>",
+                           "Up",
+                           "<a title=\"length of time we have been continuously connected to the other member with no reconnects (for self, shows uptime)\">cctime</a>",
+                           "<a title=\"when this server last received a heartbeat response - includes error code responses\">Last heartbeat</a>",
+                           "Votes", "Priority", "State", "Messages",
+                           "<a title=\"how up to date this server is.  this value polled every few seconds so actually lag is typically much lower than value shown here.\">optime</a>",
+                           "<a title=\"Clock skew in seconds relative to this server. Informational; server clock variances will make the diagnostics hard to read, but otherwise are benign..\">skew</a>",
+                           0
+                          };
         s << table(h);
 
-        /* this is to sort the member rows by their ordinal _id, so they show up in the same 
+        /* this is to sort the member rows by their ordinal _id, so they show up in the same
            order on all the different web ui's; that is less confusing for the operator. */
         map<int,string> mp;
 
@@ -287,13 +296,13 @@ namespace mongo {
             readlocktry lk("local.replset.minvalid", 300);
             if( lk.got() ) {
                 BSONObj mv;
-                if( Helpers::getSingleton("local.replset.minvalid", mv) ) { 
+                if( Helpers::getSingleton("local.replset.minvalid", mv) ) {
                     myMinValid = "minvalid:" + mv["ts"]._opTime().toString();
                 }
             }
             else myMinValid = ".";
         }
-        catch(...) { 
+        catch(...) {
             myMinValid = "exception fetching minvalid";
         }
 
@@ -301,25 +310,26 @@ namespace mongo {
             stringstream s;
             /* self row */
             s << tr() << td(_self->fullName() + " (me)") <<
-                td(_self->id()) <<
-  	        td("1") <<  //up
-                td(ago(started)) << 
-	        td("") << // last heartbeat
-                td(ToString(_self->config().votes)) << 
-                td( stateAsHtml(box.getState()) + (_self->config().hidden?" (hidden)":"") );
+              td(_self->id()) <<
+              td("1") <<  //up
+              td(ago(started)) <<
+              td("") << // last heartbeat
+              td(ToString(_self->config().votes)) <<
+              td(ToString(_self->config().priority)) <<
+              td( stateAsHtml(box.getState()) + (_self->config().hidden?" (hidden)":"") );
             s << td( _hbmsg );
             stringstream q;
-            q << "/_replSetOplog?" << _self->id();
+            q << "/_replSetOplog?_id=" << _self->id();
             s << td( a(q.str(), myMinValid, theReplSet->lastOpTimeWritten.toString()) );
             s << td(""); // skew
             s << _tr();
-			mp[_self->hbinfo().id()] = s.str();
+            mp[_self->hbinfo().id()] = s.str();
         }
         Member *m = head();
         while( m ) {
-			stringstream s;
+            stringstream s;
             m->summarizeMember(s);
-			mp[m->hbinfo().id()] = s.str();
+            mp[m->hbinfo().id()] = s.str();
             m = m->next();
         }
 
@@ -333,26 +343,27 @@ namespace mongo {
         _rsLog.toHTML( s );
     }
 
-    const Member* ReplSetImpl::findById(unsigned id) const { 
+    const Member* ReplSetImpl::findById(unsigned id) const {
         if( id == _self->id() ) return _self;
         for( Member *m = head(); m; m = m->next() )
-            if( m->id() == id ) 
+            if( m->id() == id )
                 return m;
         return 0;
     }
 
-    void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const { 
+    void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const {
         vector<BSONObj> v;
 
         // add self
         {
-            HostAndPort h(getHostName(), cmdLine.port);
-
             BSONObjBuilder bb;
             bb.append("_id", (int) _self->id());
-            bb.append("name", h.toString());
+            bb.append("name", _self->fullName());
             bb.append("health", 1.0);
             bb.append("state", (int) box.getState().s);
+            bb.append("stateStr", box.getState().toString());
+            bb.appendTimestamp("optime", lastOpTimeWritten.asDate());
+            bb.appendDate("optimeDate", lastOpTimeWritten.getSecs() * 1000LL);
             string s = _self->lhb();
             if( !s.empty() )
                 bb.append("errmsg", s);
@@ -365,9 +376,19 @@ namespace mongo {
             BSONObjBuilder bb;
             bb.append("_id", (int) m->id());
             bb.append("name", m->fullName());
-            bb.append("health", m->hbinfo().health);
+            double h = m->hbinfo().health;
+            bb.append("health", h);
             bb.append("state", (int) m->state().s);
+            if( h == 0 ) {
+                // if we can't connect the state info is from the past and could be confusing to show
+                bb.append("stateStr", "(not reachable/healthy)");
+            }
+            else {
+                bb.append("stateStr", m->state().toString());
+            }
             bb.append("uptime", (unsigned) (m->hbinfo().upSince ? (time(0)-m->hbinfo().upSince) : 0));
+            bb.appendTimestamp("optime", m->hbinfo().opTime.asDate());
+            bb.appendDate("optimeDate", m->hbinfo().opTime.getSecs() * 1000LL);
             bb.appendTimeT("lastHeartbeat", m->hbinfo().lastHeartbeat);
             string s = m->lhb();
             if( !s.empty() )
@@ -380,10 +401,12 @@ namespace mongo {
         b.appendTimeT("date", time(0));
         b.append("myState", box.getState().s);
         b.append("members", v);
+        if( replSetBlind )
+            b.append("blind",true); // to avoid confusion if set...normally never set except for testing.
     }
 
-    static struct Test : public UnitTest { 
-        void run() { 
+    static struct Test : public UnitTest {
+        void run() {
             HealthOptions a,b;
             assert( a == b );
             assert( a.isDefault() );
diff --git a/db/repl/health.h b/db/repl/health.h
index 645a3b5..a32db00 100644
--- a/db/repl/health.h
+++ b/db/repl/health.h
@@ -23,8 +23,8 @@ namespace mongo {
     /* throws */
     bool requestHeartbeat(string setname, string fromHost, string memberFullName, BSONObj& result, int myConfigVersion, int& theirConfigVersion, bool checkEmpty = false);
 
-    struct HealthOptions { 
-        HealthOptions() { 
+    struct HealthOptions {
+        HealthOptions() {
             heartbeatSleepMillis = 2000;
             heartbeatTimeoutMillis = 10000;
             heartbeatConnRetries  = 2;
@@ -42,8 +42,8 @@ namespace mongo {
             uassert(13113, "bad replset heartbeat option", heartbeatTimeoutMillis >= 10);
         }
 
-        bool operator==(const HealthOptions& r) const { 
-            return heartbeatSleepMillis==r.heartbeatSleepMillis && heartbeatTimeoutMillis==r.heartbeatTimeoutMillis && heartbeatConnRetries==heartbeatConnRetries; 
+        bool operator==(const HealthOptions& r) const {
+            return heartbeatSleepMillis==r.heartbeatSleepMillis && heartbeatTimeoutMillis==r.heartbeatTimeoutMillis && heartbeatConnRetries==heartbeatConnRetries;
         }
     };
 
diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp
index b39fad7..3972466 100644
--- a/db/repl/heartbeat.cpp
+++ b/db/repl/heartbeat.cpp
@@ -31,7 +31,7 @@
 #include "../../util/unittest.h"
 #include "../instance.h"
 
-namespace mongo { 
+namespace mongo {
 
     using namespace bson;
 
@@ -42,7 +42,7 @@ namespace mongo {
 
     long long HeartbeatInfo::timeDown() const {
         if( up() ) return 0;
-        if( downSince == 0 ) 
+        if( downSince == 0 )
             return 0; // still waiting on first heartbeat
         return jsTime() - downSince;
     }
@@ -53,10 +53,10 @@ namespace mongo {
         virtual bool adminOnly() const { return false; }
         CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { }
         virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            if( replSetBlind ) 
+            if( replSetBlind )
                 return false;
 
-            /* we don't call ReplSetCommand::check() here because heartbeat 
+            /* we don't call ReplSetCommand::check() here because heartbeat
                checks many things that are pre-initialization. */
             if( !replSet ) {
                 errmsg = "not running with --replSet";
@@ -65,12 +65,12 @@ namespace mongo {
 
             /* we want to keep heartbeat connections open when relinquishing primary.  tag them here. */
             {
-                MessagingPort *mp = cc()._mp;
-                if( mp ) 
+                MessagingPort *mp = cc().port();
+                if( mp )
                     mp->tag |= 1;
             }
 
-            if( cmdObj["pv"].Int() != 1 ) { 
+            if( cmdObj["pv"].Int() != 1 ) {
                 errmsg = "incompatible replset protocol version";
                 return false;
             }
@@ -86,7 +86,7 @@ namespace mongo {
             }
 
             result.append("rs", true);
-            if( cmdObj["checkEmpty"].trueValue() ) { 
+            if( cmdObj["checkEmpty"].trueValue() ) {
                 result.append("hasData", replHasDatabases());
             }
             if( theReplSet == 0 ) {
@@ -98,7 +98,7 @@ namespace mongo {
                 return false;
             }
 
-            if( theReplSet->name() != cmdObj.getStringField("replSetHeartbeat") ) { 
+            if( theReplSet->name() != cmdObj.getStringField("replSetHeartbeat") ) {
                 errmsg = "repl set names do not match (2)";
                 result.append("mismatch", true);
                 return false;
@@ -118,8 +118,8 @@ namespace mongo {
     } cmdReplSetHeartbeat;
 
     /* throws dbexception */
-    bool requestHeartbeat(string setName, string from, string memberFullName, BSONObj& result, int myCfgVersion, int& theirCfgVersion, bool checkEmpty) { 
-        if( replSetBlind ) { 
+    bool requestHeartbeat(string setName, string from, string memberFullName, BSONObj& result, int myCfgVersion, int& theirCfgVersion, bool checkEmpty) {
+        if( replSetBlind ) {
             //sleepmillis( rand() );
             return false;
         }
@@ -144,8 +144,8 @@ namespace mongo {
     public:
         ReplSetHealthPollTask(const HostAndPort& hh, const HeartbeatInfo& mm) : h(hh), m(mm) { }
 
-        string name() { return "ReplSetHealthPollTask"; }
-        void doWork() { 
+        string name() const { return "ReplSetHealthPollTask"; }
+        void doWork() {
             if ( !theReplSet ) {
                 log(2) << "theReplSet not initialized yet, skipping health poll this round" << rsLog;
                 return;
@@ -153,7 +153,7 @@ namespace mongo {
 
             HeartbeatInfo mem = m;
             HeartbeatInfo old = mem;
-            try { 
+            try {
                 BSONObj info;
                 int theirConfigVersion = -10000;
 
@@ -163,15 +163,17 @@ namespace mongo {
 
                 time_t after = mem.lastHeartbeat = time(0); // we set this on any response - we don't get this far if couldn't connect because exception is thrown
 
-                try {
-                    mem.skew = 0;
-                    long long t = info["time"].Long();
-                    if( t > after ) 
+                if ( info["time"].isNumber() ) {
+                    long long t = info["time"].numberLong();
+                    if( t > after )
                         mem.skew = (int) (t - after);
-                    else if( t < before ) 
+                    else if( t < before )
                         mem.skew = (int) (t - before); // negative
                 }
-                catch(...) { 
+                else {
+                    // it won't be there if remote hasn't initialized yet
+                    if( info.hasElement("time") )
+                        warning() << "heatbeat.time isn't a number: " << info << endl;
                     mem.skew = INT_MIN;
                 }
 
@@ -182,7 +184,7 @@ namespace mongo {
                 }
                 if( ok ) {
                     if( mem.upSince == 0 ) {
-                        log() << "replSet info " << h.toString() << " is now up" << rsLog;
+                        log() << "replSet info " << h.toString() << " is up" << rsLog;
                         mem.upSince = mem.lastHeartbeat;
                     }
                     mem.health = 1.0;
@@ -193,17 +195,20 @@ namespace mongo {
                     be cfg = info["config"];
                     if( cfg.ok() ) {
                         // received a new config
-                        boost::function<void()> f = 
+                        boost::function<void()> f =
                             boost::bind(&Manager::msgReceivedNewConfig, theReplSet->mgr, cfg.Obj().copy());
                         theReplSet->mgr->send(f);
                     }
                 }
-                else { 
+                else {
                     down(mem, info.getStringField("errmsg"));
                 }
             }
-            catch(...) { 
-                down(mem, "connect/transport error");             
+            catch(DBException& e) {
+                down(mem, e.what());
+            }
+            catch(...) {
+                down(mem, "something unusual went wrong");
             }
             m = mem;
 
@@ -212,9 +217,9 @@ namespace mongo {
             static time_t last = 0;
             time_t now = time(0);
             bool changed = mem.changed(old);
-            if( changed ) { 
-                if( old.hbstate != mem.hbstate ) 
-                    log() << "replSet " << h.toString() << ' ' << mem.hbstate.toString() << rsLog;
+            if( changed ) {
+                if( old.hbstate != mem.hbstate )
+                    log() << "replSet member " << h.toString() << ' ' << mem.hbstate.toString() << rsLog;
             }
             if( changed || now-last>4 ) {
                 last = now;
@@ -228,18 +233,18 @@ namespace mongo {
             if( mem.upSince || mem.downSince == 0 ) {
                 mem.upSince = 0;
                 mem.downSince = jsTime();
-                log() << "replSet info " << h.toString() << " is now down (or slow to respond)" << rsLog;
+                log() << "replSet info " << h.toString() << " is down (or slow to respond): " << msg << rsLog;
             }
             mem.lastHeartbeatMsg = msg;
         }
     };
 
-    void ReplSetImpl::endOldHealthTasks() { 
+    void ReplSetImpl::endOldHealthTasks() {
         unsigned sz = healthTasks.size();
         for( set<ReplSetHealthPollTask*>::iterator i = healthTasks.begin(); i != healthTasks.end(); i++ )
             (*i)->halt();
         healthTasks.clear();
-        if( sz ) 
+        if( sz )
             DEV log() << "replSet debug: cleared old tasks " << sz << endl;
     }
 
@@ -251,8 +256,8 @@ namespace mongo {
 
     void startSyncThread();
 
-    /** called during repl set startup.  caller expects it to return fairly quickly. 
-        note ReplSet object is only created once we get a config - so this won't run 
+    /** called during repl set startup.  caller expects it to return fairly quickly.
+        note ReplSet object is only created once we get a config - so this won't run
         until the initiation.
     */
     void ReplSetImpl::startThreads() {
diff --git a/db/repl/manager.cpp b/db/repl/manager.cpp
index 862ac46..ed39c31 100644
--- a/db/repl/manager.cpp
+++ b/db/repl/manager.cpp
@@ -1,4 +1,4 @@
-/* @file manager.cpp 
+/* @file manager.cpp
 */
 
 /**
@@ -23,20 +23,20 @@
 
 namespace mongo {
 
-    enum { 
+    enum {
         NOPRIMARY = -2,
         SELFPRIMARY = -1
     };
 
     /* check members OTHER THAN US to see if they think they are primary */
-    const Member * Manager::findOtherPrimary(bool& two) { 
+    const Member * Manager::findOtherPrimary(bool& two) {
         two = false;
         Member *m = rs->head();
         Member *p = 0;
         while( m ) {
             DEV assert( m != rs->_self );
             if( m->state().primary() && m->hbinfo().up() ) {
-                if( p ) { 
+                if( p ) {
                     two = true;
                     return 0;
                 }
@@ -44,33 +44,36 @@ namespace mongo {
             }
             m = m->next();
         }
-        if( p ) 
+        if( p )
             noteARemoteIsPrimary(p);
         return p;
     }
 
-    Manager::Manager(ReplSetImpl *_rs) : 
-    task::Server("rs Manager"), rs(_rs), busyWithElectSelf(false), _primary(NOPRIMARY)
-    { 
+    Manager::Manager(ReplSetImpl *_rs) :
+        task::Server("rs Manager"), rs(_rs), busyWithElectSelf(false), _primary(NOPRIMARY) {
     }
-    
-    Manager::~Manager() { 
-        log() << "ERROR: ~Manager should never be called" << rsLog;
+
+    Manager::~Manager() {
+        /* we don't destroy the replset object we sit in; however, the destructor could have thrown on init.
+           the log message below is just a reminder to come back one day and review this code more, and to
+           make it cleaner.
+           */
+        log() << "info: ~Manager called" << rsLog;
         rs->mgr = 0;
-        assert(false);
     }
 
-    void Manager::starting() { 
+    void Manager::starting() {
         Client::initThread("rs Manager");
     }
 
-    void Manager::noteARemoteIsPrimary(const Member *m) { 
+    void Manager::noteARemoteIsPrimary(const Member *m) {
         if( rs->box.getPrimary() == m )
             return;
         rs->_self->lhb() = "";
         if( rs->iAmArbiterOnly() ) {
             rs->box.set(MemberState::RS_ARBITER, m);
-        } else {
+        }
+        else {
             rs->box.noteRemoteIsPrimary(m);
         }
     }
@@ -87,9 +90,8 @@ namespace mongo {
 
             const Member *p = rs->box.getPrimary();
             if( p && p != rs->_self ) {
-                if( !p->hbinfo().up() || 
-                    !p->hbinfo().hbstate.primary() ) 
-                {
+                if( !p->hbinfo().up() ||
+                        !p->hbinfo().hbstate.primary() ) {
                     p = 0;
                     rs->box.setOtherPrimary(0);
                 }
@@ -101,36 +103,36 @@ namespace mongo {
                 p2 = findOtherPrimary(two);
                 if( two ) {
                     /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
-                    log() << "replSet warning DIAG two primaries (transiently)" << rsLog;
+                    log() << "replSet info two primaries (transiently)" << rsLog;
                     return;
                 }
             }
 
             if( p2 ) {
                 /* someone else thinks they are primary. */
-                if( p == p2 ) { 
+                if( p == p2 ) {
                     // we thought the same; all set.
                     return;
                 }
                 if( p == 0 ) {
-                    noteARemoteIsPrimary(p2); 
+                    noteARemoteIsPrimary(p2);
                     return;
                 }
                 // todo xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
                 if( p != rs->_self ) {
                     // switch primary from oldremotep->newremotep2
-                    noteARemoteIsPrimary(p2); 
+                    noteARemoteIsPrimary(p2);
                     return;
                 }
                 /* we thought we were primary, yet now someone else thinks they are. */
                 if( !rs->elect.aMajoritySeemsToBeUp() ) {
                     /* we can't see a majority.  so the other node is probably the right choice. */
-                    noteARemoteIsPrimary(p2); 
+                    noteARemoteIsPrimary(p2);
                     return;
                 }
-                /* ignore for now, keep thinking we are master. 
-                   this could just be timing (we poll every couple seconds) or could indicate 
-                   a problem?  if it happens consistently for a duration of time we should 
+                /* ignore for now, keep thinking we are master.
+                   this could just be timing (we poll every couple seconds) or could indicate
+                   a problem?  if it happens consistently for a duration of time we should
                    alert the sysadmin.
                 */
                 return;
@@ -138,17 +140,17 @@ namespace mongo {
 
             /* didn't find anyone who wants to be primary */
 
-            if( p ) { 
+            if( p ) {
                 /* we are already primary */
 
-                if( p != rs->_self ) { 
+                if( p != rs->_self ) {
                     rs->sethbmsg("error p != rs->self in checkNewState");
                     log() << "replSet " << p->fullName() << rsLog;
                     log() << "replSet " << rs->_self->fullName() << rsLog;
                     return;
                 }
 
-                if( rs->elect.shouldRelinquish() ) { 
+                if( rs->elect.shouldRelinquish() ) {
                     log() << "replSet can't see a majority of the set, relinquishing primary" << rsLog;
                     rs->relinquish();
                 }
@@ -162,7 +164,7 @@ namespace mongo {
             /* TODO : CHECK PRIORITY HERE.  can't be elected if priority zero. */
 
             /* no one seems to be primary.  shall we try to elect ourself? */
-            if( !rs->elect.aMajoritySeemsToBeUp() ) { 
+            if( !rs->elect.aMajoritySeemsToBeUp() ) {
                 static time_t last;
                 static int n;
                 int ll = 0;
@@ -175,15 +177,15 @@ namespace mongo {
 
             busyWithElectSelf = true; // don't try to do further elections & such while we are already working on one.
         }
-        try { 
-            rs->elect.electSelf(); 
+        try {
+            rs->elect.electSelf();
         }
         catch(RetryAfterSleepException&) {
             /* we want to process new inbounds before trying this again.  so we just put a checkNewstate in the queue for eval later. */
             requeue();
         }
-        catch(...) { 
-            log() << "replSet error unexpected assertion in rs manager" << rsLog; 
+        catch(...) {
+            log() << "replSet error unexpected assertion in rs manager" << rsLog;
         }
         busyWithElectSelf = false;
     }
diff --git a/db/repl/multicmd.h b/db/repl/multicmd.h
index 9eb9a17..df7c4e5 100644
--- a/db/repl/multicmd.h
+++ b/db/repl/multicmd.h
@@ -21,7 +21,7 @@
 #include "../../util/background.h"
 #include "connections.h"
 
-namespace mongo { 
+namespace mongo {
 
     struct Target {
         Target(string hostport) : toHost(hostport), ok(false) { }
@@ -33,38 +33,37 @@ namespace mongo {
 
     /* -- implementation ------------- */
 
-    class _MultiCommandJob : public BackgroundJob { 
+    class _MultiCommandJob : public BackgroundJob {
     public:
         BSONObj& cmd;
         Target& d;
         _MultiCommandJob(BSONObj& _cmd, Target& _d) : cmd(_cmd), d(_d) { }
+
     private:
-        string name() { return "MultiCommandJob"; }
+        string name() const { return "MultiCommandJob"; }
         void run() {
-            try { 
+            try {
                 ScopedConn c(d.toHost);
                 d.ok = c.runCommand("admin", cmd, d.result);
             }
-            catch(DBException&) { 
+            catch(DBException&) {
                 DEV log() << "dev caught dbexception on multiCommand " << d.toHost << rsLog;
             }
         }
     };
 
-    inline void multiCommand(BSONObj cmd, list<Target>& L) { 
-        typedef shared_ptr<_MultiCommandJob> P;
-        list<P> jobs;
-        list<BackgroundJob *> _jobs;
+    inline void multiCommand(BSONObj cmd, list<Target>& L) {
+        list<BackgroundJob *> jobs;
 
-        for( list<Target>::iterator i = L.begin(); i != L.end(); i++ ) { 
+        for( list<Target>::iterator i = L.begin(); i != L.end(); i++ ) {
             Target& d = *i;
             _MultiCommandJob *j = new _MultiCommandJob(cmd, d);
-            jobs.push_back(P(j));
-            _jobs.push_back(j);
+            j->go();
+            jobs.push_back(j);
         }
 
-        BackgroundJob::go(_jobs);
-        BackgroundJob::wait(_jobs,5);
+        for( list<BackgroundJob*>::iterator i = jobs.begin(); i != jobs.end(); i++ ) {
+            (*i)->wait();
+        }
     }
-
 }
diff --git a/db/repl/replset_commands.cpp b/db/repl/replset_commands.cpp
index 328b0ab..dc8567a 100644
--- a/db/repl/replset_commands.cpp
+++ b/db/repl/replset_commands.cpp
@@ -24,7 +24,9 @@
 #include "../../util/mongoutils/html.h"
 #include "../../client/dbclient.h"
 
-namespace mongo { 
+using namespace bson;
+
+namespace mongo {
 
     void checkMembersUpForConfigChange(const ReplSetConfig& cfg, bool initial);
 
@@ -50,7 +52,7 @@ namespace mongo {
             }
 
             // may not need this, but if removed check all tests still work:
-            if( !check(errmsg, result) ) 
+            if( !check(errmsg, result) )
                 return false;
 
             if( cmdObj.hasElement("blind") ) {
@@ -61,6 +63,7 @@ namespace mongo {
         }
     } cmdReplSetTest;
 
+    /** get rollback id */
     class CmdReplSetGetRBID : public ReplSetCommand {
     public:
         /* todo: ideally this should only change on rollbacks NOT on mongod restarts also. fix... */
@@ -68,26 +71,28 @@ namespace mongo {
         virtual void help( stringstream &help ) const {
             help << "internal";
         }
-        CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") { 
+        CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") {
             rbid = (int) curTimeMillis();
         }
         virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            if( !check(errmsg, result) ) 
+            if( !check(errmsg, result) )
                 return false;
             result.append("rbid",rbid);
             return true;
         }
     } cmdReplSetRBID;
 
-    using namespace bson;
-    void incRBID() { 
+    /** we increment the rollback id on every rollback event. */
+    void incRBID() {
         cmdReplSetRBID.rbid++;
     }
-    int getRBID(DBClientConnection *c) { 
+
+    /** helper to get rollback id from another server. */
+    int getRBID(DBClientConnection *c) {
         bo info;
         c->simpleCommand("admin", &info, "replSetGetRBID");
         return info["rbid"].numberInt();
-    } 
+    }
 
     class CmdReplSetGetStatus : public ReplSetCommand {
     public:
@@ -98,7 +103,10 @@ namespace mongo {
         }
         CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) { }
         virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            if( !check(errmsg, result) ) 
+            if ( cmdObj["forShell"].trueValue() )
+                lastError.disableForCommand();
+
+            if( !check(errmsg, result) )
                 return false;
             theReplSet->summarizeStatus(result);
             return true;
@@ -115,7 +123,7 @@ namespace mongo {
         }
         CmdReplSetReconfig() : ReplSetCommand("replSetReconfig"), mutex("rsreconfig") { }
         virtual bool run(const string& a, BSONObj& b, string& errmsg, BSONObjBuilder& c, bool d) {
-            try { 
+            try {
                 rwlock_try_write lk(mutex);
                 return _run(a,b,errmsg,c,d);
             }
@@ -125,16 +133,16 @@ namespace mongo {
         }
     private:
         bool _run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
-            if( !check(errmsg, result) ) 
+            if( !check(errmsg, result) )
                 return false;
-            if( !theReplSet->box.getState().primary() ) { 
+            if( !theReplSet->box.getState().primary() ) {
                 errmsg = "replSetReconfig command must be sent to the current replica set primary.";
                 return false;
             }
 
             {
-                // just make sure we can get a write lock before doing anything else.  we'll reacquire one 
-                // later.  of course it could be stuck then, but this check lowers the risk if weird things 
+                // just make sure we can get a write lock before doing anything else.  we'll reacquire one
+                // later.  of course it could be stuck then, but this check lowers the risk if weird things
                 // are up - we probably don't want a change to apply 30 minutes after the initial attempt.
                 time_t t = time(0);
                 writelock lk("");
@@ -159,7 +167,7 @@ namespace mongo {
 
                 log() << "replSet replSetReconfig config object parses ok, " << newConfig.members.size() << " members specified" << rsLog;
 
-                if( !ReplSetConfig::legalChange(theReplSet->getConfig(), newConfig, errmsg) ) { 
+                if( !ReplSetConfig::legalChange(theReplSet->getConfig(), newConfig, errmsg) ) {
                     return false;
                 }
 
@@ -170,7 +178,7 @@ namespace mongo {
                 theReplSet->haveNewConfig(newConfig, true);
                 ReplSet::startupStatusMsg = "replSetReconfig'd";
             }
-            catch( DBException& e ) { 
+            catch( DBException& e ) {
                 log() << "replSet replSetReconfig exception: " << e.what() << rsLog;
                 throw;
             }
@@ -182,8 +190,11 @@ namespace mongo {
     class CmdReplSetFreeze : public ReplSetCommand {
     public:
         virtual void help( stringstream &help ) const {
-            help << "Enable / disable failover for the set - locks current primary as primary even if issues occur.\nFor use during system maintenance.\n";
-            help << "{ replSetFreeze : <bool> }";
+            help << "{ replSetFreeze : <seconds> }";
+            help << "'freeze' state of member to the extent we can do that.  What this really means is that\n";
+            help << "this node will not attempt to become primary until the time period specified expires.\n";
+            help << "You can call again with {replSetFreeze:0} to unfreeze sooner.\n";
+            help << "A process restart unfreezes the member also.\n";
             help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands";
         }
 
@@ -191,15 +202,22 @@ namespace mongo {
         virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             if( !check(errmsg, result) )
                 return false;
-            errmsg = "not yet implemented"; /*TODO*/
-            return false;
+            int secs = (int) cmdObj.firstElement().numberInt();
+            if( theReplSet->freeze(secs) ) {
+                if( secs == 0 )
+                    result.append("info","unfreezing");
+            }
+            if( secs == 1 )
+                result.append("warning", "you really want to freeze for only 1 second?");
+            return true;
         }
     } cmdReplSetFreeze;
 
     class CmdReplSetStepDown: public ReplSetCommand {
     public:
         virtual void help( stringstream &help ) const {
-            help << "Step down as primary.  Will not try to reelect self or 1 minute.\n";
+            help << "{ replSetStepDown : <seconds> }\n";
+            help << "Step down as primary.  Will not try to reelect self for the specified time period (1 minute if no numeric secs value specified).\n";
             help << "(If another member with same priority takes over in the meantime, it will stay primary.)\n";
             help << "http://www.mongodb.org/display/DOCS/Replica+Set+Commands";
         }
@@ -212,7 +230,10 @@ namespace mongo {
                 errmsg = "not primary so can't step down";
                 return false;
             }
-            return theReplSet->stepDown();
+            int secs = (int) cmdObj.firstElement().numberInt();
+            if( secs == 0 )
+                secs = 60;
+            return theReplSet->stepDown(secs);
         }
     } cmdReplSetStepDown;
 
@@ -222,45 +243,46 @@ namespace mongo {
 
     class ReplSetHandler : public DbWebHandler {
     public:
-        ReplSetHandler() : DbWebHandler( "_replSet" , 1 , true ){}
+        ReplSetHandler() : DbWebHandler( "_replSet" , 1 , true ) {}
 
         virtual bool handles( const string& url ) const {
             return startsWith( url , "/_replSet" );
         }
 
-        virtual void handle( const char *rq, string url, 
+        virtual void handle( const char *rq, string url, BSONObj params,
                              string& responseMsg, int& responseCode,
-                             vector<string>& headers,  const SockAddr &from ){
-            
-            string s = str::after(url, "/_replSetOplog?");
-            if( !s.empty() )
-                responseMsg = _replSetOplog(s);
+                             vector<string>& headers,  const SockAddr &from ) {
+
+            if( url == "/_replSetOplog" ) {
+                responseMsg = _replSetOplog(params);
+            }
             else
                 responseMsg = _replSet();
             responseCode = 200;
         }
 
+        string _replSetOplog(bo parms) {
+            int _id = (int) str::toUnsigned( parms["_id"].String() );
 
-        string _replSetOplog(string parms) { 
             stringstream s;
             string t = "Replication oplog";
             s << start(t);
             s << p(t);
 
-            if( theReplSet == 0 ) { 
-                if( cmdLine._replSet.empty() ) 
+            if( theReplSet == 0 ) {
+                if( cmdLine._replSet.empty() )
                     s << p("Not using --replSet");
                 else  {
-                    s << p("Still starting up, or else set is not yet " + a("http://www.mongodb.org/display/DOCS/Replica+Set+Configuration#InitialSetup", "", "initiated") 
+                    s << p("Still starting up, or else set is not yet " + a("http://www.mongodb.org/display/DOCS/Replica+Set+Configuration#InitialSetup", "", "initiated")
                            + ".<br>" + ReplSet::startupStatusMsg);
                 }
             }
             else {
                 try {
-                    theReplSet->getOplogDiagsAsHtml(stringToNum(parms.c_str()), s);
+                    theReplSet->getOplogDiagsAsHtml(_id, s);
                 }
-                catch(std::exception& e) { 
-                    s << "error querying oplog: " << e.what() << '\n'; 
+                catch(std::exception& e) {
+                    s << "error querying oplog: " << e.what() << '\n';
                 }
             }
 
@@ -269,20 +291,20 @@ namespace mongo {
         }
 
         /* /_replSet show replica set status in html format */
-        string _replSet() { 
+        string _replSet() {
             stringstream s;
             s << start("Replica Set Status " + prettyHostName());
-            s << p( a("/", "back", "Home") + " | " + 
+            s << p( a("/", "back", "Home") + " | " +
                     a("/local/system.replset/?html=1", "", "View Replset Config") + " | " +
-                    a("/replSetGetStatus?text", "", "replSetGetStatus") + " | " +
+                    a("/replSetGetStatus?text=1", "", "replSetGetStatus") + " | " +
                     a("http://www.mongodb.org/display/DOCS/Replica+Sets", "", "Docs")
                   );
 
-            if( theReplSet == 0 ) { 
-                if( cmdLine._replSet.empty() ) 
+            if( theReplSet == 0 ) {
+                if( cmdLine._replSet.empty() )
                     s << p("Not using --replSet");
                 else  {
-                    s << p("Still starting up, or else set is not yet " + a("http://www.mongodb.org/display/DOCS/Replica+Set+Configuration#InitialSetup", "", "initiated") 
+                    s << p("Still starting up, or else set is not yet " + a("http://www.mongodb.org/display/DOCS/Replica+Set+Configuration#InitialSetup", "", "initiated")
                            + ".<br>" + ReplSet::startupStatusMsg);
                 }
             }
diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp
index 1c0444a..90ed9f4 100644
--- a/db/repl/rs.cpp
+++ b/db/repl/rs.cpp
@@ -20,9 +20,12 @@
 #include "../client.h"
 #include "../../client/dbclient.h"
 #include "../dbhelpers.h"
+#include "../../s/d_logic.h"
 #include "rs.h"
+#include "connections.h"
+#include "../repl.h"
 
-namespace mongo { 
+namespace mongo {
 
     using namespace bson;
 
@@ -30,18 +33,18 @@ namespace mongo {
     ReplSet *theReplSet = 0;
     extern string *discoveredSeed;
 
-    void ReplSetImpl::sethbmsg(string s, int logLevel) { 
+    void ReplSetImpl::sethbmsg(string s, int logLevel) {
         static time_t lastLogged;
         _hbmsgTime = time(0);
 
-        if( s == _hbmsg ) { 
+        if( s == _hbmsg ) {
             // unchanged
             if( _hbmsgTime - lastLogged < 60 )
                 return;
         }
 
         unsigned sz = s.size();
-        if( sz >= 256 ) 
+        if( sz >= 256 )
             memcpy(_hbmsg, s.c_str(), 255);
         else {
             _hbmsg[sz] = 0;
@@ -53,7 +56,7 @@ namespace mongo {
         }
     }
 
-    void ReplSetImpl::assumePrimary() { 
+    void ReplSetImpl::assumePrimary() {
         assert( iAmPotentiallyHot() );
         writelock lk("admin."); // so we are synchronized with _logOp()
         box.setSelfPrimary(_self);
@@ -62,17 +65,26 @@ namespace mongo {
 
     void ReplSetImpl::changeState(MemberState s) { box.change(s, _self); }
 
-    void ReplSetImpl::relinquish() { 
+    const bool closeOnRelinquish = true;
+
+    void ReplSetImpl::relinquish() {
         if( box.getState().primary() ) {
             log() << "replSet relinquishing primary state" << rsLog;
-            changeState(MemberState::RS_RECOVERING);
-            
-            /* close sockets that were talking to us */
-            /*log() << "replSet closing sockets after reqlinquishing primary" << rsLog;
-            MessagingPort::closeAllSockets(1);*/
+            changeState(MemberState::RS_SECONDARY);
+
+            if( closeOnRelinquish ) {
+                /* close sockets that were talking to us so they don't blithly send many writes that will fail
+                   with "not master" (of course client could check result code, but in case they are not)
+                */
+                log() << "replSet closing client sockets after reqlinquishing primary" << rsLog;
+                MessagingPort::closeAllSockets(1);
+            }
+
+            // now that all connections were closed, strip this mongod from all sharding details
+            // if and when it gets promoted to a primary again, only then it should reload the sharding state
+            // the rationale here is that this mongod won't bring stale state when it regains primaryhood
+            shardingState.resetShardingState();
 
-            // todo: >
-            //changeState(MemberState::RS_SECONDARY);
         }
         else if( box.getState().startup2() ) {
             // ? add comment
@@ -81,26 +93,48 @@ namespace mongo {
     }
 
     /* look freshly for who is primary - includes relinquishing ourself. */
-    void ReplSetImpl::forgetPrimary() { 
-        if( box.getState().primary() ) 
+    void ReplSetImpl::forgetPrimary() {
+        if( box.getState().primary() )
             relinquish();
         else {
             box.setOtherPrimary(0);
         }
     }
 
-    bool ReplSetImpl::_stepDown() { 
+    // for the replSetStepDown command
+    bool ReplSetImpl::_stepDown(int secs) {
         lock lk(this);
-        if( box.getState().primary() ) { 
-            changeState(MemberState::RS_RECOVERING);
-            elect.steppedDown = time(0) + 60;
-            log() << "replSet info stepped down as primary" << rsLog;
+        if( box.getState().primary() ) {
+            elect.steppedDown = time(0) + secs;
+            log() << "replSet info stepping down as primary secs=" << secs << rsLog;
+            relinquish();
             return true;
         }
         return false;
     }
 
-    void ReplSetImpl::msgUpdateHBInfo(HeartbeatInfo h) { 
+    bool ReplSetImpl::_freeze(int secs) {
+        lock lk(this);
+        /* note if we are primary we remain primary but won't try to elect ourself again until
+           this time period expires.
+           */
+        if( secs == 0 ) {
+            elect.steppedDown = 0;
+            log() << "replSet info 'unfreezing'" << rsLog;
+        }
+        else {
+            if( !box.getState().primary() ) {
+                elect.steppedDown = time(0) + secs;
+                log() << "replSet info 'freezing' for " << secs << " seconds" << rsLog;
+            }
+            else {
+                log() << "replSet info received freeze command but we are primary" << rsLog;
+            }
+        }
+        return true;
+    }
+
+    void ReplSetImpl::msgUpdateHBInfo(HeartbeatInfo h) {
         for( Member *m = _members.head(); m; m=m->next() ) {
             if( m->id() == h.id() ) {
                 m->_hbinfo = h;
@@ -109,7 +143,7 @@ namespace mongo {
         }
     }
 
-    list<HostAndPort> ReplSetImpl::memberHostnames() const { 
+    list<HostAndPort> ReplSetImpl::memberHostnames() const {
         list<HostAndPort> L;
         L.push_back(_self->h());
         for( Member *m = _members.head(); m; m = m->next() )
@@ -118,6 +152,7 @@ namespace mongo {
     }
 
     void ReplSetImpl::_fillIsMasterHost(const Member *m, vector<string>& hosts, vector<string>& passives, vector<string>& arbiters) {
+        assert( m );
         if( m->config().hidden )
             return;
 
@@ -126,8 +161,9 @@ namespace mongo {
         }
         else if( !m->config().arbiterOnly ) {
             if( m->config().slaveDelay ) {
-                /* hmmm - we don't list these as they are stale. */   
-            } else {
+                /* hmmm - we don't list these as they are stale. */
+            }
+            else {
                 passives.push_back(m->h().toString());
             }
         }
@@ -147,6 +183,7 @@ namespace mongo {
             _fillIsMasterHost(_self, hosts, passives, arbiters);
 
             for( Member *m = _members.head(); m; m = m->next() ) {
+                assert( m );
                 _fillIsMasterHost(m, hosts, passives, arbiters);
             }
 
@@ -161,23 +198,27 @@ namespace mongo {
             }
         }
 
-        if( !isp ) { 
+        if( !isp ) {
             const Member *m = sp.primary;
             if( m )
                 b.append("primary", m->h().toString());
         }
         if( myConfig().arbiterOnly )
             b.append("arbiterOnly", true);
+        if( myConfig().priority == 0 )
+            b.append("passive", true);
         if( myConfig().slaveDelay )
             b.append("slaveDelay", myConfig().slaveDelay);
         if( myConfig().hidden )
             b.append("hidden", true);
+        if( !myConfig().buildIndexes )
+            b.append("buildIndexes", false);
     }
 
     /** @param cfgString <setname>/<seedhost1>,<seedhost2> */
 
-    void parseReplsetCmdLine(string cfgString, string& setname, vector<HostAndPort>& seeds, set<HostAndPort>& seedSet ) { 
-        const char *p = cfgString.c_str(); 
+    void parseReplsetCmdLine(string cfgString, string& setname, vector<HostAndPort>& seeds, set<HostAndPort>& seedSet ) {
+        const char *p = cfgString.c_str();
         const char *slash = strchr(p, '/');
         if( slash )
             setname = string(p, slash-p);
@@ -207,7 +248,8 @@ namespace mongo {
                 //uassert(13101, "can't use localhost in replset host list", !m.isLocalHost());
                 if( m.isSelf() ) {
                     log(1) << "replSet ignoring seed " << m.toString() << " (=self)" << rsLog;
-                } else
+                }
+                else
                     seeds.push_back(m);
                 if( *comma == 0 )
                     break;
@@ -216,10 +258,9 @@ namespace mongo {
         }
     }
 
-    ReplSetImpl::ReplSetImpl(ReplSetCmdline& replSetCmdline) : elect(this), 
-        _self(0), 
-        mgr( new Manager(this) )
-    {
+    ReplSetImpl::ReplSetImpl(ReplSetCmdline& replSetCmdline) : elect(this),
+        _self(0),
+        mgr( new Manager(this) ) {
         _cfg = 0;
         memset(_hbmsg, 0, sizeof(_hbmsg));
         *_hbmsg = '.'; // temp...just to see
@@ -240,20 +281,21 @@ namespace mongo {
         }
         for( set<HostAndPort>::iterator i = replSetCmdline.seedSet.begin(); i != replSetCmdline.seedSet.end(); i++ ) {
             if( i->isSelf() ) {
-                if( sss == 1 ) 
+                if( sss == 1 )
                     log(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog;
-            } else
+            }
+            else
                 log() << "replSet warning command line seed " << i->toString() << " is not present in the current repl set config" << rsLog;
         }
     }
 
     void newReplUp();
 
-    void ReplSetImpl::loadLastOpTimeWritten() { 
+    void ReplSetImpl::loadLastOpTimeWritten() {
         //assert( lastOpTimeWritten.isNull() );
         readlock lk(rsoplog);
         BSONObj o;
-        if( Helpers::getLast(rsoplog, o) ) { 
+        if( Helpers::getLast(rsoplog, o) ) {
             lastH = o["h"].numberLong();
             lastOpTimeWritten = o["ts"]._opTime();
             uassert(13290, "bad replSet oplog entry?", !lastOpTimeWritten.isNull());
@@ -261,11 +303,11 @@ namespace mongo {
     }
 
     /* call after constructing to start - returns fairly quickly after launching its threads */
-    void ReplSetImpl::_go() { 
-        try { 
+    void ReplSetImpl::_go() {
+        try {
             loadLastOpTimeWritten();
         }
-        catch(std::exception& e) { 
+        catch(std::exception& e) {
             log() << "replSet error fatal couldn't query the local " << rsoplog << " collection.  Terminating mongod after 30 seconds." << rsLog;
             log() << e.what() << rsLog;
             sleepsecs(30);
@@ -283,11 +325,17 @@ namespace mongo {
 
     extern BSONObj *getLastErrorDefault;
 
+    void ReplSetImpl::setSelfTo(Member *m) {
+        _self = m;
+        if( m ) _buildIndexes = m->config().buildIndexes;
+        else _buildIndexes = true;
+    }
+
     /** @param reconf true if this is a reconfiguration and not an initial load of the configuration.
         @return true if ok; throws if config really bad; false if config doesn't include self
     */
     bool ReplSetImpl::initFromConfig(ReplSetConfig& c, bool reconf) {
-        /* NOTE: haveNewConfig() writes the new config to disk before we get here.  So 
+        /* NOTE: haveNewConfig() writes the new config to disk before we get here.  So
                  we cannot error out at this point, except fatally.  Check errors earlier.
                  */
         lock lk(this);
@@ -302,25 +350,24 @@ namespace mongo {
         {
             unsigned nfound = 0;
             int me = 0;
-            for( vector<ReplSetConfig::MemberCfg>::iterator i = c.members.begin(); i != c.members.end(); i++ ) { 
+            for( vector<ReplSetConfig::MemberCfg>::iterator i = c.members.begin(); i != c.members.end(); i++ ) {
                 const ReplSetConfig::MemberCfg& m = *i;
                 if( m.h.isSelf() ) {
                     nfound++;
                     me++;
-
                     if( !reconf || (_self && _self->id() == (unsigned) m._id) )
                         ;
-                    else { 
+                    else {
                         log() << "replSet " << _self->id() << ' ' << m._id << rsLog;
                         assert(false);
                     }
                 }
-                else if( reconf ) { 
+                else if( reconf ) {
                     const Member *old = findById(m._id);
-                    if( old ) { 
+                    if( old ) {
                         nfound++;
                         assert( (int) old->id() == m._id );
-                        if( old->config() == m ) { 
+                        if( old->config() == m ) {
                             additive = false;
                         }
                     }
@@ -328,16 +375,24 @@ namespace mongo {
                         newOnes.push_back(&m);
                     }
                 }
+
+                // change timeout settings, if necessary
+                ScopedConn conn(m.h.toString());
+                conn.setTimeout(c.ho.heartbeatTimeoutMillis/1000.0);
             }
             if( me == 0 ) {
+                // initial startup with fastsync
+                if (!reconf && replSettings.fastsync) {
+                    return false;
+                }
                 // log() << "replSet config : " << _cfg->toString() << rsLog;
-                log() << "replSet error can't find self in the repl set configuration:" << rsLog;
+                log() << "replSet error self not present in the repl set configuration:" << rsLog;
                 log() << c.toString() << rsLog;
-                assert(false);
+                uasserted(13497, "replSet error self not present in the configuration");
             }
             uassert( 13302, "replSet error self appears twice in the repl set configuration", me<=1 );
 
-            if( reconf && config().members.size() != nfound ) 
+            if( reconf && config().members.size() != nfound )
                 additive = false;
         }
 
@@ -347,14 +402,14 @@ namespace mongo {
         _name = _cfg->_id;
         assert( !_name.empty() );
 
-        if( additive ) { 
+        if( additive ) {
             log() << "replSet info : additive change to configuration" << rsLog;
             for( list<const ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) {
                 const ReplSetConfig::MemberCfg* m = *i;
                 Member *mi = new Member(m->h, m->_id, m, false);
 
-                /** we will indicate that new members are up() initially so that we don't relinquish our 
-                    primary state because we can't (transiently) see a majority.  they should be up as we 
+                /** we will indicate that new members are up() initially so that we don't relinquish our
+                    primary state because we can't (transiently) see a majority.  they should be up as we
                     check that new members are up before getting here on reconfig anyway.
                     */
                 mi->get_hbinfo().health = 0.1;
@@ -373,20 +428,30 @@ namespace mongo {
         int oldPrimaryId = -1;
         {
             const Member *p = box.getPrimary();
-            if( p ) 
+            if( p )
                 oldPrimaryId = p->id();
         }
         forgetPrimary();
-        _self = 0;
-        for( vector<ReplSetConfig::MemberCfg>::iterator i = _cfg->members.begin(); i != _cfg->members.end(); i++ ) { 
+
+        bool iWasArbiterOnly = _self ? iAmArbiterOnly() : false;
+        setSelfTo(0);
+        for( vector<ReplSetConfig::MemberCfg>::iterator i = _cfg->members.begin(); i != _cfg->members.end(); i++ ) {
             const ReplSetConfig::MemberCfg& m = *i;
             Member *mi;
             if( m.h.isSelf() ) {
                 assert( _self == 0 );
-                mi = _self = new Member(m.h, m._id, &m, true);
+                mi = new Member(m.h, m._id, &m, true);
+                setSelfTo(mi);
+
+                // if the arbiter status changed
+                if (iWasArbiterOnly ^ iAmArbiterOnly()) {
+                    _changeArbiterState();
+                }
+
                 if( (int)mi->id() == oldPrimaryId )
                     box.setSelfPrimary(mi);
-            } else {
+            }
+            else {
                 mi = new Member(m.h, m._id, &m, false);
                 _members.push(mi);
                 startHealthTaskFor(mi);
@@ -397,26 +462,57 @@ namespace mongo {
         return true;
     }
 
+    void startSyncThread();
+
+    void ReplSetImpl::_changeArbiterState() {
+        if (iAmArbiterOnly()) {
+            changeState(MemberState::RS_ARBITER);
+
+            // if there is an oplog, free it
+            // not sure if this is necessary, maybe just leave the oplog and let
+            // the user delete it if they want the space?
+            writelock lk(rsoplog);
+            Client::Context c(rsoplog);
+            NamespaceDetails *d = nsdetails(rsoplog);
+            if (d) {
+                string errmsg;
+                bob res;
+                dropCollection(rsoplog, errmsg, res);
+
+                // clear last op time to force initial sync (if the arbiter
+                // becomes a "normal" server again)
+                lastOpTimeWritten = OpTime();
+            }
+        }
+        else {
+            changeState(MemberState::RS_RECOVERING);
+
+            // oplog will be allocated when sync begins
+            /* TODO : could this cause two sync threads to exist (race condition)? */
+            boost::thread t(startSyncThread);
+        }
+    }
+
     // Our own config must be the first one.
-    bool ReplSetImpl::_loadConfigFinish(vector<ReplSetConfig>& cfgs) { 
+    bool ReplSetImpl::_loadConfigFinish(vector<ReplSetConfig>& cfgs) {
         int v = -1;
         ReplSetConfig *highest = 0;
         int myVersion = -2000;
         int n = 0;
-        for( vector<ReplSetConfig>::iterator i = cfgs.begin(); i != cfgs.end(); i++ ) { 
+        for( vector<ReplSetConfig>::iterator i = cfgs.begin(); i != cfgs.end(); i++ ) {
             ReplSetConfig& cfg = *i;
             if( ++n == 1 ) myVersion = cfg.version;
-            if( cfg.ok() && cfg.version > v ) { 
+            if( cfg.ok() && cfg.version > v ) {
                 highest = &cfg;
                 v = cfg.version;
             }
         }
         assert( highest );
 
-        if( !initFromConfig(*highest) ) 
+        if( !initFromConfig(*highest) )
             return false;
 
-        if( highest->version > myVersion && highest->version >= 0 ) { 
+        if( highest->version > myVersion && highest->version >= 0 ) {
             log() << "replSet got config version " << highest->version << " from a remote, saving locally" << rsLog;
             writelock lk("admin.");
             highest->saveConfigLocally(BSONObj());
@@ -430,7 +526,7 @@ namespace mongo {
             startupStatusMsg = "loading " + rsConfigNs + " config (LOADINGCONFIG)";
             try {
                 vector<ReplSetConfig> configs;
-                try { 
+                try {
                     configs.push_back( ReplSetConfig(HostAndPort::me()) );
                 }
                 catch(DBException& e) {
@@ -438,26 +534,26 @@ namespace mongo {
                     throw;
                 }
                 for( vector<HostAndPort>::const_iterator i = _seeds->begin(); i != _seeds->end(); i++ ) {
-                    try { 
+                    try {
                         configs.push_back( ReplSetConfig(*i) );
                     }
-                    catch( DBException& e ) { 
+                    catch( DBException& e ) {
                         log() << "replSet exception trying to load config from " << *i << " : " << e.toString() << rsLog;
                     }
                 }
 
-                if( discoveredSeed ) { 
+                if( discoveredSeed ) {
                     try {
                         configs.push_back( ReplSetConfig(HostAndPort(*discoveredSeed)) );
                     }
-                    catch( DBException& ) { 
+                    catch( DBException& ) {
                         log(1) << "replSet exception trying to load config from discovered seed " << *discoveredSeed << rsLog;
                     }
                 }
 
                 int nok = 0;
                 int nempty = 0;
-                for( vector<ReplSetConfig>::iterator i = configs.begin(); i != configs.end(); i++ ) { 
+                for( vector<ReplSetConfig>::iterator i = configs.begin(); i != configs.end(); i++ ) {
                     if( i->ok() )
                         nok++;
                     if( i->empty() )
@@ -469,7 +565,9 @@ namespace mongo {
                         startupStatus = EMPTYCONFIG;
                         startupStatusMsg = "can't get " + rsConfigNs + " config from self or any seed (EMPTYCONFIG)";
                         log() << "replSet can't get " << rsConfigNs << " config from self or any seed (EMPTYCONFIG)" << rsLog;
-                        log(1) << "replSet have you ran replSetInitiate yet?" << rsLog;
+                        static unsigned once;
+                        if( ++once == 1 )
+                            log() << "replSet info you may need to run replSetInitiate -- rs.initiate() in the shell -- if that is not already done" << rsLog;
                         if( _seeds->size() == 0 )
                             log(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog;
                     }
@@ -483,13 +581,13 @@ namespace mongo {
                     continue;
                 }
 
-                if( !_loadConfigFinish(configs) ) { 
+                if( !_loadConfigFinish(configs) ) {
                     log() << "replSet info Couldn't load config yet. Sleeping 20sec and will try again." << rsLog;
                     sleepsecs(20);
                     continue;
                 }
             }
-            catch(DBException& e) { 
+            catch(DBException& e) {
                 startupStatus = BADCONFIG;
                 startupStatusMsg = "replSet error loading set config (BADCONFIG)";
                 log() << "replSet error loading configurations " << e.toString() << rsLog;
@@ -504,30 +602,34 @@ namespace mongo {
         startupStatus = STARTED;
     }
 
-    void ReplSetImpl::_fatal() 
-    { 
+    void ReplSetImpl::_fatal() {
         //lock l(this);
         box.set(MemberState::RS_FATAL, 0);
         //sethbmsg("fatal error");
-        log() << "replSet error fatal, stopping replication" << rsLog; 
+        log() << "replSet error fatal, stopping replication" << rsLog;
     }
 
-    void ReplSet::haveNewConfig(ReplSetConfig& newConfig, bool addComment) { 
+    void ReplSet::haveNewConfig(ReplSetConfig& newConfig, bool addComment) {
         lock l(this); // convention is to lock replset before taking the db rwlock
         writelock lk("");
         bo comment;
         if( addComment )
             comment = BSON( "msg" << "Reconfig set" << "version" << newConfig.version );
         newConfig.saveConfigLocally(comment);
-        try { 
+        try {
             initFromConfig(newConfig, true);
             log() << "replSet replSetReconfig new config saved locally" << rsLog;
         }
-        catch(DBException& e) { 
+        catch(DBException& e) {
+            if( e.getCode() == 13497 /* removed from set */ ) {
+                cc().shutdown();
+                dbexit( EXIT_CLEAN , "removed from replica set" ); // never returns
+                assert(0);
+            }
             log() << "replSet error unexpected exception in haveNewConfig() : " << e.toString() << rsLog;
             _fatal();
         }
-        catch(...) { 
+        catch(...) {
             log() << "replSet error unexpected exception in haveNewConfig()" << rsLog;
             _fatal();
         }
@@ -538,30 +640,33 @@ namespace mongo {
         ReplSetConfig c(o);
         if( c.version > rs->config().version )
             theReplSet->haveNewConfig(c, false);
-        else { 
-            log() << "replSet info msgReceivedNewConfig but version isn't higher " << 
-                c.version << ' ' << rs->config().version << rsLog;
+        else {
+            log() << "replSet info msgReceivedNewConfig but version isn't higher " <<
+                  c.version << ' ' << rs->config().version << rsLog;
         }
     }
 
-    /* forked as a thread during startup 
-       it can run quite a while looking for config.  but once found, 
+    /* forked as a thread during startup
+       it can run quite a while looking for config.  but once found,
        a separate thread takes over as ReplSetImpl::Manager, and this thread
        terminates.
     */
     void startReplSets(ReplSetCmdline *replSetCmdline) {
         Client::initThread("startReplSets");
-        try { 
+        try {
             assert( theReplSet == 0 );
             if( replSetCmdline == 0 ) {
                 assert(!replSet);
                 return;
             }
+            if( !noauth ) {
+                cc().getAuthenticationInfo()->authorize("local");
+            }
             (theReplSet = new ReplSet(*replSetCmdline))->go();
         }
-        catch(std::exception& e) { 
+        catch(std::exception& e) {
             log() << "replSet caught exception in startReplSets thread: " << e.what() << rsLog;
-            if( theReplSet ) 
+            if( theReplSet )
                 theReplSet->fatal();
         }
         cc().shutdown();
@@ -569,10 +674,9 @@ namespace mongo {
 
 }
 
-namespace boost { 
+namespace boost {
 
-    void assertion_failed(char const * expr, char const * function, char const * file, long line)
-    {
+    void assertion_failed(char const * expr, char const * function, char const * file, long line) {
         mongo::log() << "boost assertion failure " << expr << ' ' << function << ' ' << file << ' ' << line << endl;
     }
 
diff --git a/db/repl/rs.h b/db/repl/rs.h
index 6c4d9a8..1419ad6 100644
--- a/db/repl/rs.h
+++ b/db/repl/rs.h
@@ -43,6 +43,7 @@ namespace mongo {
     class Member : public List1<Member>::Base {
     public:
         Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self);
+
         string fullName() const { return h().toString(); }
         const ReplSetConfig::MemberCfg& config() const { return _config; }
         const HeartbeatInfo& hbinfo() const { return _hbinfo; }
@@ -51,10 +52,12 @@ namespace mongo {
         MemberState state() const { return _hbinfo.hbstate; }
         const HostAndPort& h() const { return _h; }
         unsigned id() const { return _hbinfo.id(); }
+
         bool potentiallyHot() const { return _config.potentiallyHot(); } // not arbiter, not priority 0
         void summarizeMember(stringstream& s) const;
-        friend class ReplSetImpl;
+
     private:
+        friend class ReplSetImpl;
         const ReplSetConfig::MemberCfg _config;
         const HostAndPort _h;
         HeartbeatInfo _hbinfo;
@@ -65,8 +68,8 @@ namespace mongo {
         bool busyWithElectSelf;
         int _primary;
 
-        /** @param two - if true two primaries were seen.  this can happen transiently, in addition to our 
-                         polling being only occasional.  in this case null is returned, but the caller should 
+        /** @param two - if true two primaries were seen.  this can happen transiently, in addition to our
+                         polling being only occasional.  in this case null is returned, but the caller should
                          not assume primary itself in that situation.
         */
         const Member* findOtherPrimary(bool& two);
@@ -75,7 +78,7 @@ namespace mongo {
         virtual void starting();
     public:
         Manager(ReplSetImpl *rs);
-        ~Manager();
+        virtual ~Manager();
         void msgReceivedNewConfig(BSONObj);
         void msgCheckNewState();
     };
@@ -84,7 +87,7 @@ namespace mongo {
 
     class Consensus {
         ReplSetImpl &rs;
-        struct LastYea { 
+        struct LastYea {
             LastYea() : when(0), who(0xffffffff) { }
             time_t when;
             unsigned who;
@@ -96,12 +99,12 @@ namespace mongo {
         bool weAreFreshest(bool& allUp, int& nTies);
         bool sleptLast; // slept last elect() pass
     public:
-        Consensus(ReplSetImpl *t) : rs(*t) { 
+        Consensus(ReplSetImpl *t) : rs(*t) {
             sleptLast = false;
             steppedDown = 0;
         }
 
-        /* if we've stepped down, this is when we are allowed to try to elect ourself again. 
+        /* if we've stepped down, this is when we are allowed to try to elect ourself again.
            todo: handle possible weirdnesses at clock skews etc.
         */
         time_t steppedDown;
@@ -115,40 +118,40 @@ namespace mongo {
     };
 
     /** most operations on a ReplSet object should be done while locked. that logic implemented here. */
-    class RSBase : boost::noncopyable { 
+    class RSBase : boost::noncopyable {
     public:
         const unsigned magic;
         void assertValid() { assert( magic == 0x12345677 ); }
     private:
-        mutex m;
+        mongo::mutex m;
         int _locked;
         ThreadLocalValue<bool> _lockedByMe;
     protected:
         RSBase() : magic(0x12345677), m("RSBase"), _locked(0) { }
-        ~RSBase() { 
+        ~RSBase() {
             /* this can happen if we throw in the constructor; otherwise never happens.  thus we log it as it is quite unusual. */
             log() << "replSet ~RSBase called" << rsLog;
         }
 
-        class lock { 
+        class lock {
             RSBase& rsbase;
             auto_ptr<scoped_lock> sl;
         public:
-            lock(RSBase* b) : rsbase(*b) { 
+            lock(RSBase* b) : rsbase(*b) {
                 if( rsbase._lockedByMe.get() )
                     return; // recursive is ok...
 
                 sl.reset( new scoped_lock(rsbase.m) );
                 DEV assert(rsbase._locked == 0);
-                rsbase._locked++; 
+                rsbase._locked++;
                 rsbase._lockedByMe.set(true);
             }
-            ~lock() { 
+            ~lock() {
                 if( sl.get() ) {
                     assert( rsbase._lockedByMe.get() );
                     DEV assert(rsbase._locked == 1);
                     rsbase._lockedByMe.set(false);
-                    rsbase._locked--; 
+                    rsbase._locked--;
                 }
             }
         };
@@ -157,11 +160,11 @@ namespace mongo {
         /* for asserts */
         bool locked() const { return _locked != 0; }
 
-        /* if true, is locked, and was locked by this thread. note if false, it could be in the lock or not for another 
+        /* if true, is locked, and was locked by this thread. note if false, it could be in the lock or not for another
            just for asserts & such so we can make the contracts clear on who locks what when.
            we don't use these locks that frequently, so the little bit of overhead is fine.
         */
-        bool lockedByMe() { return _lockedByMe.get(); } 
+        bool lockedByMe() { return _lockedByMe.get(); }
     };
 
     class ReplSetHealthPollTask;
@@ -174,19 +177,19 @@ namespace mongo {
             MemberState state;
             const Member *primary;
         };
-        const SP get() { 
+        const SP get() {
             scoped_lock lk(m);
             return sp;
         }
         MemberState getState() const { return sp.state; }
         const Member* getPrimary() const { return sp.primary; }
-        void change(MemberState s, const Member *self) { 
+        void change(MemberState s, const Member *self) {
             scoped_lock lk(m);
-            if( sp.state != s ) { 
+            if( sp.state != s ) {
                 log() << "replSet " << s.toString() << rsLog;
             }
             sp.state = s;
-            if( s.primary() ) { 
+            if( s.primary() ) {
                 sp.primary = self;
             }
             else {
@@ -194,17 +197,17 @@ namespace mongo {
                     sp.primary = 0;
             }
         }
-        void set(MemberState s, const Member *p) { 
+        void set(MemberState s, const Member *p) {
             scoped_lock lk(m);
             sp.state = s; sp.primary = p;
         }
         void setSelfPrimary(const Member *self) { change(MemberState::RS_PRIMARY, self); }
-        void setOtherPrimary(const Member *mem) { 
+        void setOtherPrimary(const Member *mem) {
             scoped_lock lk(m);
             assert( !sp.state.primary() );
             sp.primary = mem;
         }
-        void noteRemoteIsPrimary(const Member *remote) { 
+        void noteRemoteIsPrimary(const Member *remote) {
             scoped_lock lk(m);
             if( !sp.state.secondary() && !sp.state.fatal() )
                 sp.state = MemberState::RS_RECOVERING;
@@ -212,10 +215,10 @@ namespace mongo {
         }
         StateBox() : m("StateBox") { }
     private:
-        mutex m;
+        mongo::mutex m;
         SP sp;
     };
-    
+
     void parseReplsetCmdLine(string cfgString, string& setname, vector<HostAndPort>& seeds, set<HostAndPort>& seedSet );
 
     /** Parameter given to the --replSet command line option (parsed).
@@ -230,15 +233,15 @@ namespace mongo {
     };
 
     /* information about the entire repl set, such as the various servers in the set, and their state */
-    /* note: We currently do not free mem when the set goes away - it is assumed the replset is a 
+    /* note: We currently do not free mem when the set goes away - it is assumed the replset is a
              singleton and long lived.
     */
     class ReplSetImpl : protected RSBase {
     public:
         /** info on our state if the replset isn't yet "up".  for example, if we are pre-initiation. */
-        enum StartupStatus { 
-            PRESTART=0, LOADINGCONFIG=1, BADCONFIG=2, EMPTYCONFIG=3, 
-            EMPTYUNREACHABLE=4, STARTED=5, SOON=6 
+        enum StartupStatus {
+            PRESTART=0, LOADINGCONFIG=1, BADCONFIG=2, EMPTYCONFIG=3,
+            EMPTYUNREACHABLE=4, STARTED=5, SOON=6
         };
         static StartupStatus startupStatus;
         static string startupStatusMsg;
@@ -260,18 +263,21 @@ namespace mongo {
         void relinquish();
         void forgetPrimary();
     protected:
-        bool _stepDown();
+        bool _stepDown(int secs);
+        bool _freeze(int secs);
     private:
         void assumePrimary();
         void loadLastOpTimeWritten();
         void changeState(MemberState s);
+        const Member* getMemberToSyncTo();
+        void _changeArbiterState();
     protected:
         // "heartbeat message"
-        // sent in requestHeartbeat respond in field "hbm" 
+        // sent in requestHeartbeat respond in field "hbm"
         char _hbmsg[256]; // we change this unlocked, thus not an stl::string
         time_t _hbmsgTime; // when it was logged
     public:
-        void sethbmsg(string s, int logLevel = 0); 
+        void sethbmsg(string s, int logLevel = 0);
     protected:
         bool initFromConfig(ReplSetConfig& c, bool reconf=false); // true if ok; throws if config really bad; false if config doesn't include self
         void _fillIsMaster(BSONObjBuilder&);
@@ -281,7 +287,7 @@ namespace mongo {
         MemberState state() const { return box.getState(); }
         void _fatal();
         void _getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const;
-        void _summarizeAsHtml(stringstream&) const;        
+        void _summarizeAsHtml(stringstream&) const;
         void _summarizeStatus(BSONObjBuilder&) const; // for replSetGetStatus command
 
         /* throws exception if a problem initializing. */
@@ -295,7 +301,7 @@ namespace mongo {
         const vector<HostAndPort> *_seeds;
         ReplSetConfig *_cfg;
 
-        /** load our configuration from admin.replset.  try seed machines too. 
+        /** load our configuration from admin.replset.  try seed machines too.
             @return true if ok; throws if config really bad; false if config doesn't include self
         */
         bool _loadConfigFinish(vector<ReplSetConfig>& v);
@@ -306,7 +312,9 @@ namespace mongo {
         bool iAmArbiterOnly() const { return myConfig().arbiterOnly; }
         bool iAmPotentiallyHot() const { return myConfig().potentiallyHot(); }
     protected:
-        Member *_self;        
+        Member *_self;
+        bool _buildIndexes;       // = _self->config().buildIndexes
+        void setSelfTo(Member *); // use this as it sets buildIndexes var
     private:
         List1<Member> _members; /* all members of the set EXCEPT self. */
 
@@ -330,7 +338,7 @@ namespace mongo {
 
     private:
         /* pulling data from primary related - see rs_sync.cpp */
-        bool initialSyncOplogApplication(string hn, const Member *primary, OpTime applyGTE, OpTime minValid);
+        bool initialSyncOplogApplication(const Member *primary, OpTime applyGTE, OpTime minValid);
         void _syncDoInitialSync();
         void syncDoInitialSync();
         void _syncThread();
@@ -340,21 +348,29 @@ namespace mongo {
         unsigned _syncRollback(OplogReader& r);
         void syncRollback(OplogReader& r);
         void syncFixUp(HowToFixUp& h, OplogReader& r);
+        bool _getOplogReader(OplogReader& r, string& hn);
+        bool _isStale(OplogReader& r, const string& hn);
     public:
         void syncThread();
     };
 
-    class ReplSet : public ReplSetImpl { 
+    class ReplSet : public ReplSetImpl {
     public:
         ReplSet(ReplSetCmdline& replSetCmdline) : ReplSetImpl(replSetCmdline) {  }
 
-        bool stepDown() { return _stepDown(); }
+        // for the replSetStepDown command
+        bool stepDown(int secs) { return _stepDown(secs); }
 
-        string selfFullName() { 
+        // for the replSetFreeze command
+        bool freeze(int secs) { return _freeze(secs); }
+
+        string selfFullName() {
             lock lk(this);
             return _self->fullName();
         }
 
+        bool buildIndexes() const { return _buildIndexes; }
+
         /* call after constructing to start - returns fairly quickly after la[unching its threads */
         void go() { _go(); }
 
@@ -369,7 +385,7 @@ namespace mongo {
         void summarizeStatus(BSONObjBuilder& b) const  { _summarizeStatus(b); }
         void fillIsMaster(BSONObjBuilder& b) { _fillIsMaster(b); }
 
-        /* we have a new config (reconfig) - apply it. 
+        /* we have a new config (reconfig) - apply it.
            @param comment write a no-op comment to the oplog about it.  only makes sense if one is primary and initiating the reconf.
         */
         void haveNewConfig(ReplSetConfig& c, bool comment);
@@ -380,16 +396,16 @@ namespace mongo {
         bool lockedByMe() { return RSBase::lockedByMe(); }
 
         // heartbeat msg to send to others; descriptive diagnostic info
-        string hbmsg() const { 
+        string hbmsg() const {
             if( time(0)-_hbmsgTime > 120 ) return "";
-            return _hbmsg; 
+            return _hbmsg;
         }
     };
 
-    /** base class for repl set commands.  checks basic things such as in rs mode before the command 
+    /** base class for repl set commands.  checks basic things such as in rs mode before the command
         does its real work
         */
-    class ReplSetCommand : public Command { 
+    class ReplSetCommand : public Command {
     protected:
         ReplSetCommand(const char * s, bool show=false) : Command(s, show) { }
         virtual bool slaveOk() const { return true; }
@@ -398,14 +414,14 @@ namespace mongo {
         virtual LockType locktype() const { return NONE; }
         virtual void help( stringstream &help ) const { help << "internal"; }
         bool check(string& errmsg, BSONObjBuilder& result) {
-            if( !replSet ) { 
+            if( !replSet ) {
                 errmsg = "not running with --replSet";
                 return false;
             }
             if( theReplSet == 0 ) {
                 result.append("startupStatus", ReplSet::startupStatus);
                 errmsg = ReplSet::startupStatusMsg.empty() ? "replset unknown error 2" : ReplSet::startupStatusMsg;
-                if( ReplSet::startupStatus == 3 ) 
+                if( ReplSet::startupStatus == 3 )
                     result.append("info", "run rs.initiate(...) if not yet done for the set");
                 return false;
             }
@@ -415,9 +431,8 @@ namespace mongo {
 
     /** inlines ----------------- */
 
-    inline Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self) : 
-        _config(*c), _h(h), _hbinfo(ord) 
-    { 
+    inline Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self) :
+        _config(*c), _h(h), _hbinfo(ord) {
         if( self )
             _hbinfo.health = 1.0;
     }
diff --git a/db/repl/rs_config.cpp b/db/repl/rs_config.cpp
index 371507d..5998f51 100644
--- a/db/repl/rs_config.cpp
+++ b/db/repl/rs_config.cpp
@@ -27,11 +27,11 @@
 
 using namespace bson;
 
-namespace mongo { 
+namespace mongo {
 
     void logOpInitiate(const bo&);
 
-    void assertOnlyHas(BSONObj o, const set<string>& fields) { 
+    void assertOnlyHas(BSONObj o, const set<string>& fields) {
         BSONObj::iterator i(o);
         while( i.more() ) {
             BSONElement e = i.next();
@@ -41,7 +41,7 @@ namespace mongo {
         }
     }
 
-    list<HostAndPort> ReplSetConfig::otherMemberHostnames() const { 
+    list<HostAndPort> ReplSetConfig::otherMemberHostnames() const {
         list<HostAndPort> L;
         for( vector<MemberCfg>::const_iterator i = members.begin(); i != members.end(); i++ ) {
             if( !i->h.isSelf() )
@@ -49,12 +49,12 @@ namespace mongo {
         }
         return L;
     }
-    
+
     /* comment MUST only be set when initiating the set by the initiator */
-    void ReplSetConfig::saveConfigLocally(bo comment) { 
+    void ReplSetConfig::saveConfigLocally(bo comment) {
         checkRsConfig();
         log() << "replSet info saving a newer config version to local.system.replset" << rsLog;
-        { 
+        {
             writelock lk("");
             Client::Context cx( rsConfigNs );
             cx.db()->flushFiles(true);
@@ -70,21 +70,21 @@ namespace mongo {
         }
         DEV log() << "replSet saveConfigLocally done" << rsLog;
     }
-    
-    /*static*/ 
-    /*void ReplSetConfig::receivedNewConfig(BSONObj cfg) { 
+
+    /*static*/
+    /*void ReplSetConfig::receivedNewConfig(BSONObj cfg) {
         if( theReplSet )
             return; // this is for initial setup only, so far. todo
 
         ReplSetConfig c(cfg);
 
         writelock lk("admin.");
-        if( theReplSet ) 
+        if( theReplSet )
             return;
         c.saveConfigLocally(bo());
     }*/
 
-    bo ReplSetConfig::MemberCfg::asBson() const { 
+    bo ReplSetConfig::MemberCfg::asBson() const {
         bob b;
         b << "_id" << _id;
         b.append("host", h.toString());
@@ -93,18 +93,28 @@ namespace mongo {
         if( arbiterOnly ) b << "arbiterOnly" << true;
         if( slaveDelay ) b << "slaveDelay" << slaveDelay;
         if( hidden ) b << "hidden" << hidden;
+        if( !buildIndexes ) b << "buildIndexes" << buildIndexes;
+        if( !tags.empty() ) {
+            BSONArrayBuilder a;
+            for( set<string>::const_iterator i = tags.begin(); i != tags.end(); i++ )
+                a.append(*i);
+            b.appendArray("tags", a.done());
+        }
+        if( !initialSync.isEmpty() ) {
+            b << "initialSync" << initialSync;
+        }
         return b.obj();
     }
 
-    bo ReplSetConfig::asBson() const { 
+    bo ReplSetConfig::asBson() const {
         bob b;
         b.append("_id", _id).append("version", version);
         if( !ho.isDefault() || !getLastErrorDefaults.isEmpty() ) {
             bob settings;
             if( !ho.isDefault() )
-                settings << "heartbeatConnRetries " << ho.heartbeatConnRetries  << 
-                             "heartbeatSleep" << ho.heartbeatSleepMillis / 1000 << 
-                             "heartbeatTimeout" << ho.heartbeatTimeoutMillis / 1000;
+                settings << "heartbeatConnRetries " << ho.heartbeatConnRetries  <<
+                         "heartbeatSleep" << ho.heartbeatSleepMillis / 1000.0 <<
+                         "heartbeatTimeout" << ho.heartbeatTimeoutMillis / 1000.0;
             if( !getLastErrorDefaults.isEmpty() )
                 settings << "getLastErrorDefaults" << getLastErrorDefaults;
             b << "settings" << settings.obj();
@@ -122,7 +132,7 @@ namespace mongo {
         uassert(13126, "bad Member config", expr);
     }
 
-    void ReplSetConfig::MemberCfg::check() const{ 
+    void ReplSetConfig::MemberCfg::check() const {
         mchk(_id >= 0 && _id <= 255);
         mchk(priority >= 0 && priority <= 1000);
         mchk(votes >= 0 && votes <= 100);
@@ -130,41 +140,80 @@ namespace mongo {
         uassert(13437, "slaveDelay requires priority be zero", slaveDelay == 0 || priority == 0);
         uassert(13438, "bad slaveDelay value", slaveDelay >= 0 && slaveDelay <= 3600 * 24 * 366);
         uassert(13439, "priority must be 0 when hidden=true", priority == 0 || !hidden);
+        uassert(13477, "priority must be 0 when buildIndexes=false", buildIndexes || priority == 0);
+
+        if (!initialSync.isEmpty()) {
+            static const string legal[] = {"state", "name", "_id","optime"};
+            static const set<string> legals(legal, legal + 4);
+            assertOnlyHas(initialSync, legals);
+
+            if (initialSync.hasElement("state")) {
+                uassert(13525, "initialSync source state must be 1 or 2",
+                        initialSync["state"].isNumber() &&
+                        (initialSync["state"].Number() == 1 ||
+                         initialSync["state"].Number() == 2));
+            }
+            if (initialSync.hasElement("name")) {
+                uassert(13526, "initialSync source name must be a string",
+                        initialSync["name"].type() == mongo::String);
+            }
+            if (initialSync.hasElement("_id")) {
+                uassert(13527, "initialSync source _id must be a number",
+                        initialSync["_id"].isNumber());
+            }
+            if (initialSync.hasElement("optime")) {
+                uassert(13528, "initialSync source optime must be a timestamp",
+                        initialSync["optime"].type() == mongo::Timestamp ||
+                        initialSync["optime"].type() == mongo::Date);
+            }
+        }
     }
 
     /** @param o old config
-        @param n new config 
+        @param n new config
         */
-    /*static*/ bool ReplSetConfig::legalChange(const ReplSetConfig& o, const ReplSetConfig& n, string& errmsg) { 
+    /*static*/
+    bool ReplSetConfig::legalChange(const ReplSetConfig& o, const ReplSetConfig& n, string& errmsg) {
         assert( theReplSet );
 
-        if( o._id != n._id ) { 
-            errmsg = "set name may not change"; 
+        if( o._id != n._id ) {
+            errmsg = "set name may not change";
             return false;
         }
         /* TODO : wonder if we need to allow o.version < n.version only, which is more lenient.
-                  if someone had some intermediate config this node doesnt have, that could be 
+                  if someone had some intermediate config this node doesnt have, that could be
                   necessary.  but then how did we become primary?  so perhaps we are fine as-is.
                   */
-        if( o.version + 1 != n.version ) { 
+        if( o.version + 1 != n.version ) {
             errmsg = "version number wrong";
             return false;
         }
 
         map<HostAndPort,const ReplSetConfig::MemberCfg*> old;
-        for( vector<ReplSetConfig::MemberCfg>::const_iterator i = o.members.begin(); i != o.members.end(); i++ ) { 
+        for( vector<ReplSetConfig::MemberCfg>::const_iterator i = o.members.begin(); i != o.members.end(); i++ ) {
             old[i->h] = &(*i);
         }
         int me = 0;
-        for( vector<ReplSetConfig::MemberCfg>::const_iterator i = n.members.begin(); i != n.members.end(); i++ ) { 
+        for( vector<ReplSetConfig::MemberCfg>::const_iterator i = n.members.begin(); i != n.members.end(); i++ ) {
             const ReplSetConfig::MemberCfg& m = *i;
-            if( old.count(m.h) ) { 
-                if( old[m.h]->_id != m._id ) { 
+            if( old.count(m.h) ) {
+                const ReplSetConfig::MemberCfg& oldCfg = *old[m.h];
+                if( oldCfg._id != m._id ) {
                     log() << "replSet reconfig error with member: " << m.h.toString() << rsLog;
                     uasserted(13432, "_id may not change for members");
                 }
+                if( oldCfg.buildIndexes != m.buildIndexes ) {
+                    log() << "replSet reconfig error with member: " << m.h.toString() << rsLog;
+                    uasserted(13476, "buildIndexes may not change for members");
+                }
+                /* are transitions to and from arbiterOnly guaranteed safe?  if not, we should disallow here.
+                   there is a test at replsets/replsetarb3.js */
+                if( oldCfg.arbiterOnly != m.arbiterOnly ) {
+                    log() << "replSet reconfig error with member: " << m.h.toString() << " arbiterOnly cannot change. remove and readd the member instead " << rsLog;
+                    uasserted(13510, "arbiterOnly may not change for members");
+                }
             }
-            if( m.h.isSelf() ) 
+            if( m.h.isSelf() )
                 me++;
         }
 
@@ -172,24 +221,33 @@ namespace mongo {
 
         /* TODO : MORE CHECKS HERE */
 
-        log() << "replSet TODO : don't allow removal of a node until we handle it at the removed node end?" << endl;
+        DEV log() << "replSet TODO : don't allow removal of a node until we handle it at the removed node end?" << endl;
         // we could change its votes to zero perhaps instead as a short term...
 
         return true;
     }
 
-    void ReplSetConfig::clear() { 
+    void ReplSetConfig::clear() {
         version = -5;
         _ok = false;
     }
 
-    void ReplSetConfig::checkRsConfig() const { 
+    void ReplSetConfig::checkRsConfig() const {
         uassert(13132,
-            "nonmatching repl set name in _id field; check --replSet command line",
-            _id == cmdLine.ourSetName());
+                "nonmatching repl set name in _id field; check --replSet command line",
+                _id == cmdLine.ourSetName());
         uassert(13308, "replSet bad config version #", version > 0);
         uassert(13133, "replSet bad config no members", members.size() >= 1);
-        uassert(13309, "replSet bad config maximum number of members is 7 (for now)", members.size() <= 7);
+        uassert(13309, "replSet bad config maximum number of members is 12", members.size() <= 12);
+        {
+            unsigned voters = 0;
+            for( vector<MemberCfg>::const_iterator i = members.begin(); i != members.end(); ++i ) {
+                if( i->votes )
+                    voters++;
+            }
+            uassert(13612, "replSet bad config maximum number of voting members is 7", voters <= 7);
+            uassert(13613, "replSet bad config no voting members", voters > 0);
+        }
     }
 
     void ReplSetConfig::from(BSONObj o) {
@@ -213,7 +271,8 @@ namespace mongo {
             if( settings["heartbeatTimeout"].ok() )
                 ho.heartbeatTimeoutMillis = (unsigned) (settings["heartbeatTimeout"].Number() * 1000);
             ho.check();
-            try { getLastErrorDefaults = settings["getLastErrorDefaults"].Obj().copy(); } catch(...) { }
+            try { getLastErrorDefaults = settings["getLastErrorDefaults"].Obj().copy(); }
+            catch(...) { }
         }
 
         set<string> hosts;
@@ -231,43 +290,57 @@ namespace mongo {
             BSONObj mobj = members[i].Obj();
             MemberCfg m;
             try {
-                static const string legal[] = {"_id","votes","priority","host","hidden","slaveDelay","arbiterOnly"};
-                static const set<string> legals(legal, legal + 7);
+                static const string legal[] = {
+                    "_id","votes","priority","host", "hidden","slaveDelay",
+                    "arbiterOnly","buildIndexes","tags","initialSync"
+                };
+                static const set<string> legals(legal, legal + 10);
                 assertOnlyHas(mobj, legals);
 
-                try { 
+                try {
                     m._id = (int) mobj["_id"].Number();
-                } catch(...) { 
+                }
+                catch(...) {
                     /* TODO: use of string exceptions may be problematic for reconfig case! */
-                    throw "_id must be numeric"; 
+                    throw "_id must be numeric";
                 }
                 string s;
                 try {
                     s = mobj["host"].String();
                     m.h = HostAndPort(s);
                 }
-                catch(...) { 
+                catch(...) {
                     throw string("bad or missing host field? ") + mobj.toString();
                 }
-                if( m.h.isLocalHost() ) 
+                if( m.h.isLocalHost() )
                     localhosts++;
                 m.arbiterOnly = mobj.getBoolField("arbiterOnly");
                 m.slaveDelay = mobj["slaveDelay"].numberInt();
                 if( mobj.hasElement("hidden") )
                     m.hidden = mobj.getBoolField("hidden");
+                if( mobj.hasElement("buildIndexes") )
+                    m.buildIndexes = mobj.getBoolField("buildIndexes");
                 if( mobj.hasElement("priority") )
                     m.priority = mobj["priority"].Number();
                 if( mobj.hasElement("votes") )
                     m.votes = (unsigned) mobj["votes"].Number();
+                if( mobj.hasElement("tags") ) {
+                    vector<BSONElement> v = mobj["tags"].Array();
+                    for( unsigned i = 0; i < v.size(); i++ )
+                        m.tags.insert( v[i].String() );
+                }
+                if( mobj.hasElement("initialSync")) {
+                    m.initialSync = mobj["initialSync"].Obj().getOwned();
+                }
                 m.check();
             }
-            catch( const char * p ) { 
+            catch( const char * p ) {
                 log() << "replSet cfg parsing exception for members[" << i << "] " << p << rsLog;
                 stringstream ss;
                 ss << "replSet members[" << i << "] " << p;
                 uassert(13107, ss.str(), false);
             }
-            catch(DBException& e) { 
+            catch(DBException& e) {
                 log() << "replSet cfg parsing exception for members[" << i << "] " << e.what() << rsLog;
                 stringstream ss;
                 ss << "bad config for member[" << i << "] " << e.what();
@@ -289,7 +362,7 @@ namespace mongo {
         uassert(13122, "bad repl set config?", expr);
     }
 
-    ReplSetConfig::ReplSetConfig(BSONObj cfg) { 
+    ReplSetConfig::ReplSetConfig(BSONObj cfg) {
         clear();
         from(cfg);
         configAssert( version < 0 /*unspecified*/ || (version >= 1 && version <= 5000) );
@@ -315,18 +388,19 @@ namespace mongo {
                 BSONObj cmd = BSON( "replSetHeartbeat" << setname );
                 int theirVersion;
                 BSONObj info;
+                log() << "trying to contact " << h.toString() << rsLog;
                 bool ok = requestHeartbeat(setname, "", h.toString(), info, -2, theirVersion);
-                if( info["rs"].trueValue() ) { 
+                if( info["rs"].trueValue() ) {
                     // yes, it is a replicate set, although perhaps not yet initialized
                 }
                 else {
                     if( !ok ) {
                         log() << "replSet TEMP !ok heartbeating " << h.toString() << " on cfg load" << rsLog;
-                        if( !info.isEmpty() ) 
+                        if( !info.isEmpty() )
                             log() << "replSet info " << h.toString() << " : " << info.toString() << rsLog;
                         return;
                     }
-                    { 
+                    {
                         stringstream ss;
                         ss << "replSet error: member " << h.toString() << " is not in --replSet mode";
                         msgassertedNoTrace(13260, ss.str().c_str()); // not caught as not a user exception - we want it not caught
@@ -343,7 +417,7 @@ namespace mongo {
                 cfg = conn.findOne(rsConfigNs, Query()).getOwned();
                 count = conn.count(rsConfigNs);
             }
-            catch ( DBException& e) {
+            catch ( DBException& ) {
                 if ( !h.isSelf() ) {
                     throw;
                 }
@@ -356,14 +430,14 @@ namespace mongo {
 
             if( count > 1 )
                 uasserted(13109, str::stream() << "multiple rows in " << rsConfigNs << " not supported host: " << h.toString());
-            
+
             if( cfg.isEmpty() ) {
                 version = EMPTYCONFIG;
                 return;
             }
             version = -1;
         }
-        catch( DBException& e) { 
+        catch( DBException& e) {
             version = v;
             log(level) << "replSet load config couldn't get from " << h.toString() << ' ' << e.what() << rsLog;
             return;
diff --git a/db/repl/rs_config.h b/db/repl/rs_config.h
index e39dad7..7d43fe6 100644
--- a/db/repl/rs_config.h
+++ b/db/repl/rs_config.h
@@ -23,7 +23,7 @@
 #include "../../util/hostandport.h"
 #include "health.h"
 
-namespace mongo { 
+namespace mongo {
 
     /* singleton config object is stored here */
     const string rsConfigNs = "local.system.replset";
@@ -31,7 +31,7 @@ namespace mongo {
     class ReplSetConfig {
         enum { EMPTYCONFIG = -2 };
     public:
-        /* if something is misconfigured, throws an exception. 
+        /* if something is misconfigured, throws an exception.
         if couldn't be queried or is just blank, ok() will be false.
         */
         ReplSetConfig(const HostAndPort& h);
@@ -41,7 +41,7 @@ namespace mongo {
         bool ok() const { return _ok; }
 
         struct MemberCfg {
-            MemberCfg() : _id(-1), votes(1), priority(1.0), arbiterOnly(false), slaveDelay(0), hidden(false) { }
+            MemberCfg() : _id(-1), votes(1), priority(1.0), arbiterOnly(false), slaveDelay(0), hidden(false), buildIndexes(true) { }
             int _id;              /* ordinal */
             unsigned votes;       /* how many votes this node gets. default 1. */
             HostAndPort h;
@@ -49,15 +49,17 @@ namespace mongo {
             bool arbiterOnly;
             int slaveDelay;       /* seconds.  int rather than unsigned for convenient to/front bson conversion. */
             bool hidden;          /* if set, don't advertise to drives in isMaster. for non-primaries (priority 0) */
+            bool buildIndexes;    /* if false, do not create any non-_id indexes */
+            set<string> tags;     /* tagging for data center, rack, etc. */
+            BSONObj initialSync;  /* directions for initial sync source */
 
             void check() const;   /* check validity, assert if not. */
             BSONObj asBson() const;
-            bool potentiallyHot() const { 
-                return !arbiterOnly && priority > 0;
-            }
-            bool operator==(const MemberCfg& r) const { 
-                return _id==r._id && votes == r.votes && h == r.h && priority == r.priority && 
-                    arbiterOnly == r.arbiterOnly && slaveDelay == r.slaveDelay && hidden == r.hidden;
+            bool potentiallyHot() const { return !arbiterOnly && priority > 0; }
+            bool operator==(const MemberCfg& r) const {
+                return _id==r._id && votes == r.votes && h == r.h && priority == r.priority &&
+                       arbiterOnly == r.arbiterOnly && slaveDelay == r.slaveDelay && hidden == r.hidden &&
+                       buildIndexes == buildIndexes;
             }
             bool operator!=(const MemberCfg& r) const { return !(*this == r); }
         };
diff --git a/db/repl/rs_exception.h b/db/repl/rs_exception.h
old mode 100755
new mode 100644
index e71cad2..fc372fc
--- a/db/repl/rs_exception.h
+++ b/db/repl/rs_exception.h
@@ -1,15 +1,15 @@
-// @file rs_exception.h
-
-#pragma once
-
-namespace mongo { 
-
-    class VoteException : public std::exception { 
+// @file rs_exception.h
+
+#pragma once
+
+namespace mongo {
+
+    class VoteException : public std::exception {
     public:
-        const char * what() const throw () { return "VoteException"; }
+        const char * what() const throw () { return "VoteException"; }
     };
 
-    class RetryAfterSleepException : public std::exception { 
+    class RetryAfterSleepException : public std::exception {
     public:
         const char * what() const throw () { return "RetryAfterSleepException"; }
     };
diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp
index 3851c66..5a54059 100644
--- a/db/repl/rs_initialsync.cpp
+++ b/db/repl/rs_initialsync.cpp
@@ -15,6 +15,7 @@
 */
 
 #include "pch.h"
+#include "../repl.h"
 #include "../client.h"
 #include "../../client/dbclient.h"
 #include "rs.h"
@@ -33,15 +34,17 @@ namespace mongo {
 
     // add try/catch with sleep
 
-    void isyncassert(const char *msg, bool expr) { 
-        if( !expr ) { 
+    void isyncassert(const char *msg, bool expr) {
+        if( !expr ) {
             string m = str::stream() << "initial sync " << msg;
             theReplSet->sethbmsg(m, 0);
             uasserted(13404, m);
         }
     }
 
-    void ReplSetImpl::syncDoInitialSync() { 
+    void ReplSetImpl::syncDoInitialSync() {
+        createOplog();
+
         while( 1 ) {
             try {
                 _syncDoInitialSync();
@@ -54,14 +57,14 @@ namespace mongo {
         }
     }
 
-    bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication, 
-				   bool slaveOk, bool useReplAuth, bool snapshot);
+    bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication,
+                   bool slaveOk, bool useReplAuth, bool snapshot);
 
     /* todo : progress metering to sethbmsg. */
     static bool clone(const char *master, string db) {
         string err;
-        return cloneFrom(master, err, db, false, 
-            /*slaveok later can be true*/ false, true, false);
+        return cloneFrom(master, err, db, false,
+                         /* slave_ok */ true, true, false);
     }
 
     void _logOpObjRS(const BSONObj& op);
@@ -71,11 +74,11 @@ namespace mongo {
     static void emptyOplog() {
         writelock lk(rsoplog);
         Client::Context ctx(rsoplog);
-		NamespaceDetails *d = nsdetails(rsoplog);
+        NamespaceDetails *d = nsdetails(rsoplog);
 
-		// temp
-		if( d && d->nrecords == 0 )
-		  return; // already empty, ok.
+        // temp
+        if( d && d->stats.nrecords == 0 )
+            return; // already empty, ok.
 
         log(1) << "replSet empty oplog" << rsLog;
         d->emptyCappedCollection(rsoplog);
@@ -84,10 +87,10 @@ namespace mongo {
         string errmsg;
         bob res;
         dropCollection(rsoplog, errmsg, res);
-		log() << "replSet recreated oplog so it is empty.  todo optimize this..." << rsLog;
-		createOplog();*/
+        log() << "replSet recreated oplog so it is empty.  todo optimize this..." << rsLog;
+        createOplog();*/
 
-      	// TEMP: restart to recreate empty oplog
+        // TEMP: restart to recreate empty oplog
         //log() << "replSet FATAL error during initial sync.  mongod restart required." << rsLog;
         //dbexit( EXIT_CLEAN );
 
@@ -100,106 +103,182 @@ namespace mongo {
         */
     }
 
-    void ReplSetImpl::_syncDoInitialSync() { 
-        sethbmsg("initial sync pending",0);
+    /**
+     * Choose a member to sync from.
+     *
+     * The initalSync option is an object with 1 k/v pair:
+     *
+     * "state" : 1|2
+     * "name" : "host"
+     * "_id" : N
+     * "optime" : t
+     *
+     * All except optime are exact matches.  "optime" will find a secondary with
+     * an optime >= to the optime given.
+     */
+    const Member* ReplSetImpl::getMemberToSyncTo() {
+        BSONObj sync = myConfig().initialSync;
+        bool secondaryOnly = false, isOpTime = false;
+        char *name = 0;
+        int id = -1;
+        OpTime optime;
 
         StateBox::SP sp = box.get();
         assert( !sp.state.primary() ); // wouldn't make sense if we were.
 
-        const Member *cp = sp.primary;
-        if( cp == 0 ) {
-            sethbmsg("initial sync need a member to be primary",0);
+        // if it exists, we've already checked that these fields are valid in
+        // rs_config.cpp
+        if ( !sync.isEmpty() ) {
+            if (sync.hasElement("state")) {
+                if (sync["state"].Number() == 1) {
+                    if (sp.primary) {
+                        sethbmsg( str::stream() << "syncing to primary: " << sp.primary->fullName(), 0);
+                        return const_cast<Member*>(sp.primary);
+                    }
+                    else {
+                        sethbmsg("couldn't clone from primary");
+                        return NULL;
+                    }
+                }
+                else {
+                    secondaryOnly = true;
+                }
+            }
+            if (sync.hasElement("name")) {
+                name = (char*)sync["name"].valuestr();
+            }
+            if (sync.hasElement("_id")) {
+                id = (int)sync["_id"].Number();
+            }
+            if (sync.hasElement("optime")) {
+                isOpTime = true;
+                optime = sync["optime"]._opTime();
+            }
+        }
+
+        for( Member *m = head(); m; m = m->next() ) {
+            if (!m->hbinfo().up() ||
+                    (m->state() != MemberState::RS_SECONDARY &&
+                     m->state() != MemberState::RS_PRIMARY) ||
+                    (secondaryOnly && m->state() != MemberState::RS_SECONDARY) ||
+                    (id != -1 && (int)m->id() != id) ||
+                    (name != 0 && strcmp(name, m->fullName().c_str()) != 0) ||
+                    (isOpTime && optime >= m->hbinfo().opTime)) {
+                continue;
+            }
+
+            sethbmsg( str::stream() << "syncing to: " << m->fullName(), 0);
+            return const_cast<Member*>(m);
+        }
+
+        sethbmsg( str::stream() << "couldn't find a member matching the sync criteria: " <<
+                  "\nstate? " << (secondaryOnly ? "2" : "none") <<
+                  "\nname? " << (name ? name : "none") <<
+                  "\n_id? " << id <<
+                  "\noptime? " << optime.toStringPretty() );
+
+        return NULL;
+    }
+
+    /**
+     * Do the initial sync for this member.
+     */
+    void ReplSetImpl::_syncDoInitialSync() {
+        sethbmsg("initial sync pending",0);
+
+        const Member *source = getMemberToSyncTo();
+        if (!source) {
+            sethbmsg("initial sync need a member to be primary or secondary to do our initial sync", 0);
             sleepsecs(15);
             return;
         }
 
-        string masterHostname = cp->h().toString();
+        string sourceHostname = source->h().toString();
         OplogReader r;
-        if( !r.connect(masterHostname) ) {
-            sethbmsg( str::stream() << "initial sync couldn't connect to " << cp->h().toString() , 0);
+        if( !r.connect(sourceHostname) ) {
+            sethbmsg( str::stream() << "initial sync couldn't connect to " << source->h().toString() , 0);
             sleepsecs(15);
             return;
         }
 
         BSONObj lastOp = r.getLastOp(rsoplog);
-        if( lastOp.isEmpty() ) { 
+        if( lastOp.isEmpty() ) {
             sethbmsg("initial sync couldn't read remote oplog", 0);
             sleepsecs(15);
             return;
         }
         OpTime startingTS = lastOp["ts"]._opTime();
-        
-        {
-            /* make sure things aren't too flappy */
-            sleepsecs(5);
-            isyncassert( "flapping?", box.getPrimary() == cp );
-            BSONObj o = r.getLastOp(rsoplog);
-            isyncassert( "flapping [2]?", !o.isEmpty() );
-        }
-
-        sethbmsg("initial sync drop all databases", 0);
-        dropAllDatabasesExceptLocal();
 
-//        sethbmsg("initial sync drop oplog", 0);
-//        emptyOplog();
-
-        list<string> dbs = r.conn()->getDatabaseNames();
-        for( list<string>::iterator i = dbs.begin(); i != dbs.end(); i++ ) {
-            string db = *i;
-            if( db != "local" ) {
-                sethbmsg( str::stream() << "initial sync cloning db: " << db , 0);
-                bool ok;
-                {
-                    writelock lk(db);
-                    Client::Context ctx(db);
-                    ok = clone(masterHostname.c_str(), db);
-                }
-                if( !ok ) { 
-                    sethbmsg( str::stream() << "initial sync error clone of " << db << " failed sleeping 5 minutes" ,0);
-                    sleepsecs(300);
-                    return;
+        if (replSettings.fastsync) {
+            log() << "fastsync: skipping database clone" << rsLog;
+        }
+        else {
+            sethbmsg("initial sync drop all databases", 0);
+            dropAllDatabasesExceptLocal();
+
+            sethbmsg("initial sync clone all databases", 0);
+
+            list<string> dbs = r.conn()->getDatabaseNames();
+            for( list<string>::iterator i = dbs.begin(); i != dbs.end(); i++ ) {
+                string db = *i;
+                if( db != "local" ) {
+                    sethbmsg( str::stream() << "initial sync cloning db: " << db , 0);
+                    bool ok;
+                    {
+                        writelock lk(db);
+                        Client::Context ctx(db);
+                        ok = clone(sourceHostname.c_str(), db);
+                    }
+                    if( !ok ) {
+                        sethbmsg( str::stream() << "initial sync error clone of " << db << " failed sleeping 5 minutes" ,0);
+                        sleepsecs(300);
+                        return;
+                    }
                 }
             }
         }
 
         sethbmsg("initial sync query minValid",0);
 
-        /* our cloned copy will be strange until we apply oplog events that occurred 
+        isyncassert( "initial sync source must remain readable throughout our initial sync", source->state().readable() );
+
+        /* our cloned copy will be strange until we apply oplog events that occurred
            through the process.  we note that time point here. */
         BSONObj minValid = r.getLastOp(rsoplog);
-        assert( !minValid.isEmpty() );
+        isyncassert( "getLastOp is empty ", !minValid.isEmpty() );
         OpTime mvoptime = minValid["ts"]._opTime();
         assert( !mvoptime.isNull() );
 
-        /* copy the oplog 
+        /* apply relevant portion of the oplog
         */
         {
-            sethbmsg("initial sync copy+apply oplog");
-            if( ! initialSyncOplogApplication(masterHostname, cp, startingTS, mvoptime) ) { // note we assume here that this call does not throw
+            sethbmsg("initial sync initial oplog application");
+            isyncassert( "initial sync source must remain readable throughout our initial sync [2]", source->state().readable() );
+            if( ! initialSyncOplogApplication(source, /*applyGTE*/startingTS, /*minValid*/mvoptime) ) { // note we assume here that this call does not throw
                 log() << "replSet initial sync failed during applyoplog" << rsLog;
                 emptyOplog(); // otherwise we'll be up!
-				lastOpTimeWritten = OpTime();
-				lastH = 0;
+                lastOpTimeWritten = OpTime();
+                lastH = 0;
                 log() << "replSet cleaning up [1]" << rsLog;
                 {
                     writelock lk("local.");
                     Client::Context cx( "local." );
-                    cx.db()->flushFiles(true);            
+                    cx.db()->flushFiles(true);
                 }
                 log() << "replSet cleaning up [2]" << rsLog;
-                sleepsecs(2);
+                sleepsecs(5);
                 return;
             }
         }
 
         sethbmsg("initial sync finishing up",0);
-        
+
         assert( !box.getState().primary() ); // wouldn't make sense if we were.
 
         {
             writelock lk("local.");
             Client::Context cx( "local." );
-            cx.db()->flushFiles(true);            
+            cx.db()->flushFiles(true);
             try {
                 log() << "replSet set minValid=" << minValid["ts"]._opTime().toString() << rsLog;
             }
diff --git a/db/repl/rs_initiate.cpp b/db/repl/rs_initiate.cpp
index 9c74be0..cf1941f 100644
--- a/db/repl/rs_initiate.cpp
+++ b/db/repl/rs_initiate.cpp
@@ -26,47 +26,63 @@
 #include "rs.h"
 #include "rs_config.h"
 #include "../dbhelpers.h"
+#include "../oplog.h"
 
 using namespace bson;
 using namespace mongoutils;
 
-namespace mongo { 
+namespace mongo {
 
     /* called on a reconfig AND on initiate
-       throws 
+       throws
        @param initial true when initiating
     */
     void checkMembersUpForConfigChange(const ReplSetConfig& cfg, bool initial) {
         int failures = 0;
         int me = 0;
+        stringstream selfs;
         for( vector<ReplSetConfig::MemberCfg>::const_iterator i = cfg.members.begin(); i != cfg.members.end(); i++ ) {
             if( i->h.isSelf() ) {
                 me++;
-                if( !i->potentiallyHot() ) { 
+                if( me > 1 )
+                    selfs << ',';
+                selfs << i->h.toString();
+                if( !i->potentiallyHot() ) {
                     uasserted(13420, "initiation and reconfiguration of a replica set must be sent to a node that can become primary");
                 }
             }
         }
-        uassert(13278, "bad config - dups?", me <= 1); // dups?
-        uassert(13279, "can't find self in the replset config", me == 1);
+        uassert(13278, "bad config: isSelf is true for multiple hosts: " + selfs.str(), me <= 1); // dups?
+        if( me != 1 ) {
+            stringstream ss;
+            ss << "can't find self in the replset config";
+            if( !cmdLine.isDefaultPort() ) ss << " my port: " << cmdLine.port;
+            if( me != 0 ) ss << " found: " << me;
+            uasserted(13279, ss.str());
+        }
 
         for( vector<ReplSetConfig::MemberCfg>::const_iterator i = cfg.members.begin(); i != cfg.members.end(); i++ ) {
+            // we know we're up
+            if (i->h.isSelf()) {
+                continue;
+            }
+
             BSONObj res;
             {
                 bool ok = false;
                 try {
                     int theirVersion = -1000;
-                    ok = requestHeartbeat(cfg._id, "", i->h.toString(), res, -1, theirVersion, initial/*check if empty*/); 
-                    if( theirVersion >= cfg.version ) { 
+                    ok = requestHeartbeat(cfg._id, "", i->h.toString(), res, -1, theirVersion, initial/*check if empty*/);
+                    if( theirVersion >= cfg.version ) {
                         stringstream ss;
                         ss << "replSet member " << i->h.toString() << " has too new a config version (" << theirVersion << ") to reconfigure";
                         uasserted(13259, ss.str());
                     }
                 }
-                catch(DBException& e) { 
+                catch(DBException& e) {
                     log() << "replSet cmufcc requestHeartbeat " << i->h.toString() << " : " << e.toString() << rsLog;
                 }
-                catch(...) { 
+                catch(...) {
                     log() << "replSet cmufcc error exception in requestHeartbeat?" << rsLog;
                 }
                 if( res.getBoolField("mismatch") )
@@ -96,7 +112,7 @@ namespace mongo {
                            trying to keep change small as release is near.
                            */
                         const Member* m = theReplSet->findById( i->_id );
-                        if( m ) { 
+                        if( m ) {
                             // ok, so this was an existing member (wouldn't make sense to add to config a new member that is down)
                             assert( m->h().toString() == i->h.toString() );
                             allowFailure = true;
@@ -113,24 +129,24 @@ namespace mongo {
             }
             if( initial ) {
                 bool hasData = res["hasData"].Bool();
-                uassert(13311, "member " + i->h.toString() + " has data already, cannot initiate set.  All members except initiator must be empty.", 
-                    !hasData || i->h.isSelf());
+                uassert(13311, "member " + i->h.toString() + " has data already, cannot initiate set.  All members except initiator must be empty.",
+                        !hasData || i->h.isSelf());
             }
         }
     }
 
-    class CmdReplSetInitiate : public ReplSetCommand { 
+    class CmdReplSetInitiate : public ReplSetCommand {
     public:
         virtual LockType locktype() const { return NONE; }
         CmdReplSetInitiate() : ReplSetCommand("replSetInitiate") { }
-        virtual void help(stringstream& h) const { 
-            h << "Initiate/christen a replica set."; 
+        virtual void help(stringstream& h) const {
+            h << "Initiate/christen a replica set.";
             h << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands";
         }
         virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             log() << "replSet replSetInitiate admin command received from client" << rsLog;
 
-            if( !replSet ) { 
+            if( !replSet ) {
                 errmsg = "server is not running with --replSet";
                 return false;
             }
@@ -141,12 +157,12 @@ namespace mongo {
             }
 
             {
-                // just make sure we can get a write lock before doing anything else.  we'll reacquire one 
-                // later.  of course it could be stuck then, but this check lowers the risk if weird things 
+                // just make sure we can get a write lock before doing anything else.  we'll reacquire one
+                // later.  of course it could be stuck then, but this check lowers the risk if weird things
                 // are up.
                 time_t t = time(0);
                 writelock lk("");
-                if( time(0)-t > 10 ) { 
+                if( time(0)-t > 10 ) {
                     errmsg = "took a long time to get write lock, so not initiating.  Initiate when server less busy?";
                     return false;
                 }
@@ -155,7 +171,7 @@ namespace mongo {
                    it is ok if the initiating member has *other* data than that.
                    */
                 BSONObj o;
-                if( Helpers::getFirst(rsoplog, o) ) { 
+                if( Helpers::getFirst(rsoplog, o) ) {
                     errmsg = rsoplog + string(" is not empty on the initiating member.  cannot initiate.");
                     return false;
                 }
@@ -194,7 +210,7 @@ namespace mongo {
                 configObj = b.obj();
                 log() << "replSet created this configuration for initiation : " << configObj.toString() << rsLog;
             }
-            else { 
+            else {
                 configObj = cmdObj["replSetInitiate"].Obj();
             }
 
@@ -203,7 +219,7 @@ namespace mongo {
                 ReplSetConfig newConfig(configObj);
                 parsed = true;
 
-                if( newConfig.version > 1 ) { 
+                if( newConfig.version > 1 ) {
                     errmsg = "can't initiate with a version number greater than 1";
                     return false;
                 }
@@ -214,6 +230,8 @@ namespace mongo {
 
                 log() << "replSet replSetInitiate all members seem up" << rsLog;
 
+                createOplog();
+
                 writelock lk("");
                 bo comment = BSON( "msg" << "initiating set");
                 newConfig.saveConfigLocally(comment);
@@ -222,9 +240,9 @@ namespace mongo {
                 ReplSet::startupStatus = ReplSet::SOON;
                 ReplSet::startupStatusMsg = "Received replSetInitiate - should come online shortly.";
             }
-            catch( DBException& e ) { 
+            catch( DBException& e ) {
                 log() << "replSet replSetInitiate exception: " << e.what() << rsLog;
-                if( !parsed ) 
+                if( !parsed )
                     errmsg = string("couldn't parse cfg object ") + e.what();
                 else
                     errmsg = string("couldn't initiate : ") + e.what();
diff --git a/db/repl/rs_member.h b/db/repl/rs_member.h
index 099cb22..017b6ea 100644
--- a/db/repl/rs_member.h
+++ b/db/repl/rs_member.h
@@ -30,18 +30,18 @@ namespace mongo {
         RS_FATAL      something bad has occurred and server is not completely offline with regard to the replica set.  fatal error.
         RS_STARTUP2   loaded config, still determining who is primary
     */
-    struct MemberState { 
-        enum MS { 
-            RS_STARTUP,
-            RS_PRIMARY,
-            RS_SECONDARY,
-            RS_RECOVERING,
-            RS_FATAL,
-            RS_STARTUP2,
-            RS_UNKNOWN, /* remote node not yet reached */
-            RS_ARBITER,
-            RS_DOWN, /* node not reachable for a report */
-            RS_ROLLBACK
+    struct MemberState {
+        enum MS {
+            RS_STARTUP = 0,
+            RS_PRIMARY = 1,
+            RS_SECONDARY = 2,
+            RS_RECOVERING = 3,
+            RS_FATAL = 4,
+            RS_STARTUP2 = 5,
+            RS_UNKNOWN = 6, /* remote node not yet reached */
+            RS_ARBITER = 7,
+            RS_DOWN = 8, /* node not reachable for a report */
+            RS_ROLLBACK = 9
         } s;
 
         MemberState(MS ms = RS_UNKNOWN) : s(ms) { }
@@ -53,6 +53,7 @@ namespace mongo {
         bool startup2() const { return s == RS_STARTUP2; }
         bool fatal() const { return s == RS_FATAL; }
         bool rollback() const { return s == RS_ROLLBACK; }
+        bool readable() const { return s == RS_PRIMARY || s == RS_SECONDARY; }
 
         string toString() const;
 
@@ -60,9 +61,9 @@ namespace mongo {
         bool operator!=(const MemberState& r) const { return s != r.s; }
     };
 
-    /* this is supposed to be just basic information on a member, 
+    /* this is supposed to be just basic information on a member,
        and copy constructable. */
-    class HeartbeatInfo { 
+    class HeartbeatInfo {
         unsigned _id;
     public:
         HeartbeatInfo() : _id(0xffffffff),hbstate(MemberState::RS_UNKNOWN),health(-1.0),downSince(0),skew(INT_MIN) { }
@@ -88,15 +89,15 @@ namespace mongo {
         bool changed(const HeartbeatInfo& old) const;
     };
 
-    inline HeartbeatInfo::HeartbeatInfo(unsigned id) : _id(id) { 
+    inline HeartbeatInfo::HeartbeatInfo(unsigned id) : _id(id) {
         hbstate = MemberState::RS_UNKNOWN;
         health = -1.0;
         downSince = 0;
-        lastHeartbeat = upSince = 0; 
+        lastHeartbeat = upSince = 0;
         skew = INT_MIN;
     }
 
-    inline bool HeartbeatInfo::changed(const HeartbeatInfo& old) const { 
+    inline bool HeartbeatInfo::changed(const HeartbeatInfo& old) const {
         return health != old.health ||
                hbstate != old.hbstate;
     }
diff --git a/db/repl/rs_optime.h b/db/repl/rs_optime.h
index b3607fa..f0ca569 100644
--- a/db/repl/rs_optime.h
+++ b/db/repl/rs_optime.h
@@ -1,58 +1,58 @@
-// @file rs_optime.h
-
-/*
- *    Copyright (C) 2010 10gen Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include "../../util/optime.h"
-
-namespace mongo {
-
+// @file rs_optime.h
+
+/*
+ *    Copyright (C) 2010 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "../../util/optime.h"
+
+namespace mongo {
+
     const char rsoplog[] = "local.oplog.rs";
-
-    /*
-    class RSOpTime : public OpTime { 
-    public:
-        bool initiated() const { return getSecs() != 0; }
-    };*/
-
-    /*struct RSOpTime { 
-        unsigned long long ord;
-
-        RSOpTime() : ord(0) { }
-
-        bool initiated() const { return ord > 0; }
-
-        void initiate() { 
-            assert( !initiated() );
-            ord = 1000000;
-        }
-
-        ReplTime inc() {
-            DEV assertInWriteLock();
-            return ++ord;
-        }
-
-        string toString() const { return str::stream() << ord; }
-
-        // query the oplog and set the highest value herein.  acquires a db read lock. throws.
-        void load();
-    };
-
-    extern RSOpTime rsOpTime;*/
-
-}
+
+    /*
+    class RSOpTime : public OpTime {
+    public:
+        bool initiated() const { return getSecs() != 0; }
+    };*/
+
+    /*struct RSOpTime {
+        unsigned long long ord;
+
+        RSOpTime() : ord(0) { }
+
+        bool initiated() const { return ord > 0; }
+
+        void initiate() {
+            assert( !initiated() );
+            ord = 1000000;
+        }
+
+        ReplTime inc() {
+            DEV assertInWriteLock();
+            return ++ord;
+        }
+
+        string toString() const { return str::stream() << ord; }
+
+        // query the oplog and set the highest value herein.  acquires a db read lock. throws.
+        void load();
+    };
+
+    extern RSOpTime rsOpTime;*/
+
+}
diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp
index 6b2544c..0b4cc28 100644
--- a/db/repl/rs_rollback.cpp
+++ b/db/repl/rs_rollback.cpp
@@ -1,5 +1,5 @@
 /* @file rs_rollback.cpp
-* 
+*
 *    Copyright (C) 2008 10gen Inc.
 *
 *    This program is free software: you can redistribute it and/or  modify
@@ -25,7 +25,7 @@
 /* Scenarios
 
    We went offline with ops not replicated out.
- 
+
        F = node that failed and coming back.
        P = node that took over, new primary
 
@@ -33,11 +33,11 @@
        F : a b c d e f g
        P : a b c d q
 
-   The design is "keep P".  One could argue here that "keep F" has some merits, however, in most cases P 
-   will have significantly more data.  Also note that P may have a proper subset of F's stream if there were 
+   The design is "keep P".  One could argue here that "keep F" has some merits, however, in most cases P
+   will have significantly more data.  Also note that P may have a proper subset of F's stream if there were
    no subsequent writes.
 
-   For now the model is simply : get F back in sync with P.  If P was really behind or something, we should have 
+   For now the model is simply : get F back in sync with P.  If P was really behind or something, we should have
    just chosen not to fail over anyway.
 
    #2:
@@ -50,9 +50,9 @@
 
    Steps
     find an event in common. 'd'.
-    undo our events beyond that by: 
+    undo our events beyond that by:
       (1) taking copy from other server of those objects
-      (2) do not consider copy valid until we pass reach an optime after when we fetched the new version of object 
+      (2) do not consider copy valid until we pass reach an optime after when we fetched the new version of object
           -- i.e., reset minvalid.
       (3) we could skip operations on objects that are previous in time to our capture of the object as an optimization.
 
@@ -65,15 +65,15 @@ namespace mongo {
     bool copyCollectionFromRemote(const string& host, const string& ns, const BSONObj& query, string& errmsg, bool logforrepl);
     void incRBID();
 
-    class rsfatal : public std::exception { 
+    class rsfatal : public std::exception {
     public:
-        virtual const char* what() const throw(){ return "replica set fatal exception"; }
+        virtual const char* what() const throw() { return "replica set fatal exception"; }
     };
 
     struct DocID {
         const char *ns;
         be _id;
-        bool operator<(const DocID& d) const { 
+        bool operator<(const DocID& d) const {
             int c = strcmp(ns, d.ns);
             if( c < 0 ) return true;
             if( c > 0 ) return false;
@@ -82,7 +82,7 @@ namespace mongo {
     };
 
     struct HowToFixUp {
-        /* note this is a set -- if there are many $inc's on a single document we need to rollback, we only 
+        /* note this is a set -- if there are many $inc's on a single document we need to rollback, we only
            need to refetch it once. */
         set<DocID> toRefetch;
 
@@ -97,9 +97,9 @@ namespace mongo {
         int rbid; // remote server's current rollback sequence #
     };
 
-    static void refetch(HowToFixUp& h, const BSONObj& ourObj) { 
+    static void refetch(HowToFixUp& h, const BSONObj& ourObj) {
         const char *op = ourObj.getStringField("op");
-        if( *op == 'n' ) 
+        if( *op == 'n' )
             return;
 
         unsigned long long totSize = 0;
@@ -108,53 +108,54 @@ namespace mongo {
             throw "rollback too large";
 
         DocID d;
+        // NOTE The assigned ns value may become invalid if we yield.
         d.ns = ourObj.getStringField("ns");
-        if( *d.ns == 0 ) { 
+        if( *d.ns == 0 ) {
             log() << "replSet WARNING ignoring op on rollback no ns TODO : " << ourObj.toString() << rsLog;
             return;
         }
 
         bo o = ourObj.getObjectField(*op=='u' ? "o2" : "o");
-        if( o.isEmpty() ) { 
+        if( o.isEmpty() ) {
             log() << "replSet warning ignoring op on rollback : " << ourObj.toString() << rsLog;
             return;
         }
 
-        if( *op == 'c' ) { 
+        if( *op == 'c' ) {
             be first = o.firstElement();
             NamespaceString s(d.ns); // foo.$cmd
             string cmdname = first.fieldName();
             Command *cmd = Command::findCommand(cmdname.c_str());
-            if( cmd == 0 ) { 
+            if( cmd == 0 ) {
                 log() << "replSet warning rollback no suchcommand " << first.fieldName() << " - different mongod versions perhaps?" << rsLog;
                 return;
             }
             else {
                 /* findandmodify - tranlated?
-                   godinsert?,  
+                   godinsert?,
                    renamecollection a->b.  just resync a & b
                 */
                 if( cmdname == "create" ) {
-                    /* Create collection operation 
-                       { ts: ..., h: ..., op: "c", ns: "foo.$cmd", o: { create: "abc", ... } }
+                    /* Create collection operation
+                       { ts: ..., h: ..., op: "c", ns: "foo.$cmd", o: { create: "abc", ... } }
                     */
                     string ns = s.db + '.' + o["create"].String(); // -> foo.abc
                     h.toDrop.insert(ns);
                     return;
                 }
-                else if( cmdname == "drop" ) { 
+                else if( cmdname == "drop" ) {
                     string ns = s.db + '.' + first.valuestr();
                     h.collectionsToResync.insert(ns);
                     return;
                 }
-                else if( cmdname == "dropIndexes" || cmdname == "deleteIndexes" ) { 
+                else if( cmdname == "dropIndexes" || cmdname == "deleteIndexes" ) {
                     /* TODO: this is bad.  we simply full resync the collection here, which could be very slow. */
                     log() << "replSet info rollback of dropIndexes is slow in this version of mongod" << rsLog;
                     string ns = s.db + '.' + first.valuestr();
                     h.collectionsToResync.insert(ns);
                     return;
                 }
-                else if( cmdname == "renameCollection" ) { 
+                else if( cmdname == "renameCollection" ) {
                     /* TODO: slow. */
                     log() << "replSet info rollback of renameCollection is slow in this version of mongod" << rsLog;
                     string from = first.valuestr();
@@ -163,15 +164,15 @@ namespace mongo {
                     h.collectionsToResync.insert(to);
                     return;
                 }
-                else if( cmdname == "reIndex" ) { 
+                else if( cmdname == "reIndex" ) {
                     return;
                 }
-                else if( cmdname == "dropDatabase" ) { 
+                else if( cmdname == "dropDatabase" ) {
                     log() << "replSet error rollback : can't rollback drop database full resync will be required" << rsLog;
                     log() << "replSet " << o.toString() << rsLog;
                     throw rsfatal();
                 }
-                else { 
+                else {
                     log() << "replSet error can't rollback this command yet: " << o.toString() << rsLog;
                     log() << "replSet cmdname=" << cmdname << rsLog;
                     throw rsfatal();
@@ -190,15 +191,15 @@ namespace mongo {
 
     int getRBID(DBClientConnection*);
 
-    static void syncRollbackFindCommonPoint(DBClientConnection *them, HowToFixUp& h) { 
+    static void syncRollbackFindCommonPoint(DBClientConnection *them, HowToFixUp& h) {
         static time_t last;
-        if( time(0)-last < 60 ) { 
+        if( time(0)-last < 60 ) {
             throw "findcommonpoint waiting a while before trying again";
         }
         last = time(0);
 
         assert( dbMutex.atLeastReadLocked() );
-        Client::Context c(rsoplog, dbpath, 0, false);
+        Client::Context c(rsoplog);
         NamespaceDetails *nsd = nsdetails(rsoplog);
         assert(nsd);
         ReverseCappedCursor u(nsd);
@@ -226,7 +227,7 @@ namespace mongo {
             log() << "replSet info rollback our last optime:   " << ourTime.toStringPretty() << rsLog;
             log() << "replSet info rollback their last optime: " << theirTime.toStringPretty() << rsLog;
             log() << "replSet info rollback diff in end of log times: " << diff << " seconds" << rsLog;
-            if( diff > 3600 ) { 
+            if( diff > 3600 ) {
                 log() << "replSet rollback too long a time period for a rollback." << rsLog;
                 throw "error not willing to roll back more than one hour of data";
             }
@@ -236,8 +237,8 @@ namespace mongo {
         while( 1 ) {
             scanned++;
             /* todo add code to assure no excessive scanning for too long */
-            if( ourTime == theirTime ) { 
-                if( ourObj["h"].Long() == theirObj["h"].Long() ) { 
+            if( ourTime == theirTime ) {
+                if( ourObj["h"].Long() == theirObj["h"].Long() ) {
                     // found the point back in time where we match.
                     // todo : check a few more just to be careful about hash collisions.
                     log() << "replSet rollback found matching events at " << ourTime.toStringPretty() << rsLog;
@@ -249,7 +250,7 @@ namespace mongo {
 
                 refetch(h, ourObj);
 
-                if( !t->more() ) { 
+                if( !t->more() ) {
                     log() << "replSet rollback error RS100 reached beginning of remote oplog" << rsLog;
                     log() << "replSet   them:      " << them->toString() << " scanned: " << scanned << rsLog;
                     log() << "replSet   theirTime: " << theirTime.toStringLong() << rsLog;
@@ -270,8 +271,8 @@ namespace mongo {
                 ourObj = u.current();
                 ourTime = ourObj["ts"]._opTime();
             }
-            else if( theirTime > ourTime ) { 
-                if( !t->more() ) { 
+            else if( theirTime > ourTime ) {
+                if( !t->more() ) {
                     log() << "replSet rollback error RS100 reached beginning of remote oplog" << rsLog;
                     log() << "replSet   them:      " << them->toString() << " scanned: " << scanned << rsLog;
                     log() << "replSet   theirTime: " << theirTime.toStringLong() << rsLog;
@@ -281,11 +282,11 @@ namespace mongo {
                 theirObj = t->nextSafe();
                 theirTime = theirObj["ts"]._opTime();
             }
-            else { 
+            else {
                 // theirTime < ourTime
                 refetch(h, ourObj);
                 u.advance();
-                if( !u.ok() ) { 
+                if( !u.ok() ) {
                     log() << "replSet rollback error RS101 reached beginning of local oplog" << rsLog;
                     log() << "replSet   them:      " << them->toString() << " scanned: " << scanned << rsLog;
                     log() << "replSet   theirTime: " << theirTime.toStringLong() << rsLog;
@@ -298,299 +299,303 @@ namespace mongo {
         }
     }
 
-    struct X { 
+    struct X {
         const bson::bo *op;
         bson::bo goodVersionOfObject;
     };
 
-    static void setMinValid(bo newMinValid) { 
-       try {
-           log() << "replSet minvalid=" << newMinValid["ts"]._opTime().toStringLong() << rsLog;
-       }
-       catch(...) { }
-       {
-           Helpers::putSingleton("local.replset.minvalid", newMinValid);
-           Client::Context cx( "local." );
-           cx.db()->flushFiles(true);            
-       }
+    static void setMinValid(bo newMinValid) {
+        try {
+            log() << "replSet minvalid=" << newMinValid["ts"]._opTime().toStringLong() << rsLog;
+        }
+        catch(...) { }
+        {
+            Helpers::putSingleton("local.replset.minvalid", newMinValid);
+            Client::Context cx( "local." );
+            cx.db()->flushFiles(true);
+        }
     }
 
     void ReplSetImpl::syncFixUp(HowToFixUp& h, OplogReader& r) {
-       DBClientConnection *them = r.conn();
-
-       // fetch all first so we needn't handle interruption in a fancy way
-
-       unsigned long long totSize = 0;
-
-       list< pair<DocID,bo> > goodVersions;
-
-       bo newMinValid;
-
-       /* fetch all the goodVersions of each document from current primary */
-       DocID d;
-       unsigned long long n = 0;
-       try {
-           for( set<DocID>::iterator i = h.toRefetch.begin(); i != h.toRefetch.end(); i++ ) { 
-               d = *i;
-
-               assert( !d._id.eoo() );
-
-               {
-                   /* TODO : slow.  lots of round trips. */
-                   n++;
-                   bo good= them->findOne(d.ns, d._id.wrap()).getOwned();
-                   totSize += good.objsize();
-                   uassert( 13410, "replSet too much data to roll back", totSize < 300 * 1024 * 1024 );
-
-                   // note good might be eoo, indicating we should delete it
-                   goodVersions.push_back(pair<DocID,bo>(d,good));
-               }
-           }
-           newMinValid = r.getLastOp(rsoplog);
-           if( newMinValid.isEmpty() ) { 
-               sethbmsg("rollback error newMinValid empty?");
-               return;
-           }
-       }
-       catch(DBException& e) {
-           sethbmsg(str::stream() << "rollback re-get objects: " << e.toString(),0);
-           log() << "rollback couldn't re-get ns:" << d.ns << " _id:" << d._id << ' ' << n << '/' << h.toRefetch.size() << rsLog;
-           throw e;
-       }
-
-       MemoryMappedFile::flushAll(true);
-
-       sethbmsg("rollback 3.5");
-       if( h.rbid != getRBID(r.conn()) ) { 
-           // our source rolled back itself.  so the data we received isn't necessarily consistent.
-           sethbmsg("rollback rbid on source changed during rollback, cancelling this attempt");
-           return;
-       }
-
-       // update them
-       sethbmsg(str::stream() << "rollback 4 n:" << goodVersions.size());
-
-       bool warn = false;
-
-       assert( !h.commonPointOurDiskloc.isNull() );
-
-       dbMutex.assertWriteLocked();
-
-       /* we have items we are writing that aren't from a point-in-time.  thus best not to come online 
-	      until we get to that point in freshness. */
-       setMinValid(newMinValid);
-       
-       /** any full collection resyncs required? */
-       if( !h.collectionsToResync.empty() ) {
-           for( set<string>::iterator i = h.collectionsToResync.begin(); i != h.collectionsToResync.end(); i++ ) { 
-               string ns = *i;
-               sethbmsg(str::stream() << "rollback 4.1 coll resync " << ns);
-               Client::Context c(*i, dbpath, 0, /*doauth*/false);
-               try {
-                   bob res;
-                   string errmsg;
-                   dropCollection(ns, errmsg, res);
-                   {
-                       dbtemprelease r;
-                       bool ok = copyCollectionFromRemote(them->getServerAddress(), ns, bo(), errmsg, false);
-                       if( !ok ) { 
-                           log() << "replSet rollback error resyncing collection " << ns << ' ' << errmsg << rsLog;
-                           throw "rollback error resyncing rollection [1]";
-                       }
-                   }
-               }
-               catch(...) { 
-                   log() << "replset rollback error resyncing collection " << ns << rsLog;
-                   throw "rollback error resyncing rollection [2]";
-               }
-           }
-
-           /* we did more reading from primary, so check it again for a rollback (which would mess us up), and 
-              make minValid newer. 
-              */
-           sethbmsg("rollback 4.2");
-           { 
-               string err;
-               try {
-                   newMinValid = r.getLastOp(rsoplog);
-                   if( newMinValid.isEmpty() ) {
-                       err = "can't get minvalid from primary";
-                   } else { 
-                       setMinValid(newMinValid);
-                   }
-               }
-               catch(...) { 
-                   err = "can't get/set minvalid"; 
-               }
-               if( h.rbid != getRBID(r.conn()) ) {
-                   // our source rolled back itself.  so the data we received isn't necessarily consistent.
-                   // however, we've now done writes.  thus we have a problem.
-                   err += "rbid at primary changed during resync/rollback";
-               }
-               if( !err.empty() ) {
-                   log() << "replSet error rolling back : " << err << ". A full resync will be necessary." << rsLog;
-                   /* todo: reset minvalid so that we are permanently in fatal state */
-                   /* todo: don't be fatal, but rather, get all the data first. */
-                   sethbmsg("rollback error");
-                   throw rsfatal();
-               }
-           }
-           sethbmsg("rollback 4.3");
-       }
-
-       sethbmsg("rollback 4.6");
-       /** drop collections to drop before doing individual fixups - that might make things faster below actually if there were subsequent inserts to rollback */
-       for( set<string>::iterator i = h.toDrop.begin(); i != h.toDrop.end(); i++ ) { 
-           Client::Context c(*i, dbpath, 0, /*doauth*/false);
-           try {
-               bob res;
-               string errmsg;
-               log(1) << "replSet rollback drop: " << *i << rsLog;
-               dropCollection(*i, errmsg, res);
-           }
-           catch(...) { 
-               log() << "replset rollback error dropping collection " << *i << rsLog;
-           }
-       }
-
-       sethbmsg("rollback 4.7");
-       Client::Context c(rsoplog, dbpath, 0, /*doauth*/false);
-       NamespaceDetails *oplogDetails = nsdetails(rsoplog);
-       uassert(13423, str::stream() << "replSet error in rollback can't find " << rsoplog, oplogDetails);
-
-       map<string,shared_ptr<RemoveSaver> > removeSavers;
-
-       unsigned deletes = 0, updates = 0;
-       for( list<pair<DocID,bo> >::iterator i = goodVersions.begin(); i != goodVersions.end(); i++ ) {
-           const DocID& d = i->first;
-           bo pattern = d._id.wrap(); // { _id : ... }
-           try { 
-               assert( d.ns && *d.ns );
-               if( h.collectionsToResync.count(d.ns) ) {
-                   /* we just synced this entire collection */
-                   continue;
-               }
-
-               /* keep an archive of items rolled back */
-               shared_ptr<RemoveSaver>& rs = removeSavers[d.ns];
-               if ( ! rs )
-                   rs.reset( new RemoveSaver( "rollback" , "" , d.ns ) );
-
-               // todo: lots of overhead in context, this can be faster
-               Client::Context c(d.ns, dbpath, 0, /*doauth*/false);
-               if( i->second.isEmpty() ) {
-                   // wasn't on the primary; delete.
-                   /* TODO1.6 : can't delete from a capped collection.  need to handle that here. */
-                   deletes++;
-
-                   NamespaceDetails *nsd = nsdetails(d.ns);
-                   if( nsd ) {
-                       if( nsd->capped ) { 
-                           /* can't delete from a capped collection - so we truncate instead. if this item must go, 
-                           so must all successors!!! */
-                           try { 
-                               /** todo: IIRC cappedTrunateAfter does not handle completely empty.  todo. */
-                               // this will crazy slow if no _id index.
-                               long long start = Listener::getElapsedTimeMillis();
-                               DiskLoc loc = Helpers::findOne(d.ns, pattern, false);
-                               if( Listener::getElapsedTimeMillis() - start > 200 ) 
-                                   log() << "replSet warning roll back slow no _id index for " << d.ns << " perhaps?" << rsLog; 
-                               //would be faster but requires index: DiskLoc loc = Helpers::findById(nsd, pattern);
-                               if( !loc.isNull() ) {
-                                   try {
-                                       nsd->cappedTruncateAfter(d.ns, loc, true);
-                                   }
-                                   catch(DBException& e) { 
-                                       if( e.getCode() == 13415 ) {
-                                           // hack: need to just make cappedTruncate do this...
-                                           nsd->emptyCappedCollection(d.ns);
-                                       } else {
-                                           throw;
-                                       }
-                                   }
-                               }
-                           }
-                           catch(DBException& e) { 
-                               log() << "replSet error rolling back capped collection rec " << d.ns << ' ' << e.toString() << rsLog;
-                           }
-                       }
-                       else {
-                           try { 
-                               deletes++;
-                               deleteObjects(d.ns, pattern, /*justone*/true, /*logop*/false, /*god*/true, rs.get() );
-                           }
-                           catch(...) { 
-                               log() << "replSet error rollback delete failed ns:" << d.ns << rsLog;
-                           }
-                       }
-                       // did we just empty the collection?  if so let's check if it even exists on the source.
-                       if( nsd->nrecords == 0 ) {
-                           try { 
-                               string sys = cc().database()->name + ".system.namespaces";
-                               bo o = them->findOne(sys, QUERY("name"<<d.ns));
-                               if( o.isEmpty() ) { 
-                                   // we should drop
-                                   try {
-                                       bob res;
-                                       string errmsg;
-                                       dropCollection(d.ns, errmsg, res);
-                                   }
-                                   catch(...) { 
-                                       log() << "replset error rolling back collection " << d.ns << rsLog;
-                                   }
-                               }
-                           }
-                           catch(DBException& ) { 
-                               /* this isn't *that* big a deal, but is bad. */
-                               log() << "replSet warning rollback error querying for existence of " << d.ns << " at the primary, ignoring" << rsLog;
-                           }
-                       }
-                   }
-               }
-               else {
-                   // todo faster...
-                   OpDebug debug;
-                   updates++;
-                   _updateObjects(/*god*/true, d.ns, i->second, pattern, /*upsert=*/true, /*multi=*/false , /*logtheop=*/false , debug, rs.get() );
-               }
-           }
-           catch(DBException& e) { 
-               log() << "replSet exception in rollback ns:" << d.ns << ' ' << pattern.toString() << ' ' << e.toString() << " ndeletes:" << deletes << rsLog;
-               warn = true;
-           }
-       }
-
-       removeSavers.clear(); // this effectively closes all of them
-
-       sethbmsg(str::stream() << "rollback 5 d:" << deletes << " u:" << updates);
-       MemoryMappedFile::flushAll(true);
-       sethbmsg("rollback 6");
-
-       // clean up oplog
-       log(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog;
-       // todo: fatal error if this throws?
-       oplogDetails->cappedTruncateAfter(rsoplog, h.commonPointOurDiskloc, false);
-
-       /* reset cached lastoptimewritten and h value */
-       loadLastOpTimeWritten();
-
-       sethbmsg("rollback 7");
-       MemoryMappedFile::flushAll(true);
-
-       // done
-       if( warn ) 
-           sethbmsg("issues during syncRollback, see log");
-       else
-           sethbmsg("rollback done");
-   }
-
-    void ReplSetImpl::syncRollback(OplogReader&r) { 
+        DBClientConnection *them = r.conn();
+
+        // fetch all first so we needn't handle interruption in a fancy way
+
+        unsigned long long totSize = 0;
+
+        list< pair<DocID,bo> > goodVersions;
+
+        bo newMinValid;
+
+        /* fetch all the goodVersions of each document from current primary */
+        DocID d;
+        unsigned long long n = 0;
+        try {
+            for( set<DocID>::iterator i = h.toRefetch.begin(); i != h.toRefetch.end(); i++ ) {
+                d = *i;
+
+                assert( !d._id.eoo() );
+
+                {
+                    /* TODO : slow.  lots of round trips. */
+                    n++;
+                    bo good= them->findOne(d.ns, d._id.wrap()).getOwned();
+                    totSize += good.objsize();
+                    uassert( 13410, "replSet too much data to roll back", totSize < 300 * 1024 * 1024 );
+
+                    // note good might be eoo, indicating we should delete it
+                    goodVersions.push_back(pair<DocID,bo>(d,good));
+                }
+            }
+            newMinValid = r.getLastOp(rsoplog);
+            if( newMinValid.isEmpty() ) {
+                sethbmsg("rollback error newMinValid empty?");
+                return;
+            }
+        }
+        catch(DBException& e) {
+            sethbmsg(str::stream() << "rollback re-get objects: " << e.toString(),0);
+            log() << "rollback couldn't re-get ns:" << d.ns << " _id:" << d._id << ' ' << n << '/' << h.toRefetch.size() << rsLog;
+            throw e;
+        }
+
+        MemoryMappedFile::flushAll(true);
+
+        sethbmsg("rollback 3.5");
+        if( h.rbid != getRBID(r.conn()) ) {
+            // our source rolled back itself.  so the data we received isn't necessarily consistent.
+            sethbmsg("rollback rbid on source changed during rollback, cancelling this attempt");
+            return;
+        }
+
+        // update them
+        sethbmsg(str::stream() << "rollback 4 n:" << goodVersions.size());
+
+        bool warn = false;
+
+        assert( !h.commonPointOurDiskloc.isNull() );
+
+        dbMutex.assertWriteLocked();
+
+        /* we have items we are writing that aren't from a point-in-time.  thus best not to come online
+           until we get to that point in freshness. */
+        setMinValid(newMinValid);
+
+        /** any full collection resyncs required? */
+        if( !h.collectionsToResync.empty() ) {
+            for( set<string>::iterator i = h.collectionsToResync.begin(); i != h.collectionsToResync.end(); i++ ) {
+                string ns = *i;
+                sethbmsg(str::stream() << "rollback 4.1 coll resync " << ns);
+                Client::Context c(*i);
+                try {
+                    bob res;
+                    string errmsg;
+                    dropCollection(ns, errmsg, res);
+                    {
+                        dbtemprelease r;
+                        bool ok = copyCollectionFromRemote(them->getServerAddress(), ns, bo(), errmsg, false);
+                        if( !ok ) {
+                            log() << "replSet rollback error resyncing collection " << ns << ' ' << errmsg << rsLog;
+                            throw "rollback error resyncing rollection [1]";
+                        }
+                    }
+                }
+                catch(...) {
+                    log() << "replset rollback error resyncing collection " << ns << rsLog;
+                    throw "rollback error resyncing rollection [2]";
+                }
+            }
+
+            /* we did more reading from primary, so check it again for a rollback (which would mess us up), and
+               make minValid newer.
+               */
+            sethbmsg("rollback 4.2");
+            {
+                string err;
+                try {
+                    newMinValid = r.getLastOp(rsoplog);
+                    if( newMinValid.isEmpty() ) {
+                        err = "can't get minvalid from primary";
+                    }
+                    else {
+                        setMinValid(newMinValid);
+                    }
+                }
+                catch(...) {
+                    err = "can't get/set minvalid";
+                }
+                if( h.rbid != getRBID(r.conn()) ) {
+                    // our source rolled back itself.  so the data we received isn't necessarily consistent.
+                    // however, we've now done writes.  thus we have a problem.
+                    err += "rbid at primary changed during resync/rollback";
+                }
+                if( !err.empty() ) {
+                    log() << "replSet error rolling back : " << err << ". A full resync will be necessary." << rsLog;
+                    /* todo: reset minvalid so that we are permanently in fatal state */
+                    /* todo: don't be fatal, but rather, get all the data first. */
+                    sethbmsg("rollback error");
+                    throw rsfatal();
+                }
+            }
+            sethbmsg("rollback 4.3");
+        }
+
+        sethbmsg("rollback 4.6");
+        /** drop collections to drop before doing individual fixups - that might make things faster below actually if there were subsequent inserts to rollback */
+        for( set<string>::iterator i = h.toDrop.begin(); i != h.toDrop.end(); i++ ) {
+            Client::Context c(*i);
+            try {
+                bob res;
+                string errmsg;
+                log(1) << "replSet rollback drop: " << *i << rsLog;
+                dropCollection(*i, errmsg, res);
+            }
+            catch(...) {
+                log() << "replset rollback error dropping collection " << *i << rsLog;
+            }
+        }
+
+        sethbmsg("rollback 4.7");
+        Client::Context c(rsoplog);
+        NamespaceDetails *oplogDetails = nsdetails(rsoplog);
+        uassert(13423, str::stream() << "replSet error in rollback can't find " << rsoplog, oplogDetails);
+
+        map<string,shared_ptr<RemoveSaver> > removeSavers;
+
+        unsigned deletes = 0, updates = 0;
+        for( list<pair<DocID,bo> >::iterator i = goodVersions.begin(); i != goodVersions.end(); i++ ) {
+            const DocID& d = i->first;
+            bo pattern = d._id.wrap(); // { _id : ... }
+            try {
+                assert( d.ns && *d.ns );
+                if( h.collectionsToResync.count(d.ns) ) {
+                    /* we just synced this entire collection */
+                    continue;
+                }
+
+                getDur().commitIfNeeded();
+
+                /* keep an archive of items rolled back */
+                shared_ptr<RemoveSaver>& rs = removeSavers[d.ns];
+                if ( ! rs )
+                    rs.reset( new RemoveSaver( "rollback" , "" , d.ns ) );
+
+                // todo: lots of overhead in context, this can be faster
+                Client::Context c(d.ns);
+                if( i->second.isEmpty() ) {
+                    // wasn't on the primary; delete.
+                    /* TODO1.6 : can't delete from a capped collection.  need to handle that here. */
+                    deletes++;
+
+                    NamespaceDetails *nsd = nsdetails(d.ns);
+                    if( nsd ) {
+                        if( nsd->capped ) {
+                            /* can't delete from a capped collection - so we truncate instead. if this item must go,
+                            so must all successors!!! */
+                            try {
+                                /** todo: IIRC cappedTrunateAfter does not handle completely empty.  todo. */
+                                // this will crazy slow if no _id index.
+                                long long start = Listener::getElapsedTimeMillis();
+                                DiskLoc loc = Helpers::findOne(d.ns, pattern, false);
+                                if( Listener::getElapsedTimeMillis() - start > 200 )
+                                    log() << "replSet warning roll back slow no _id index for " << d.ns << " perhaps?" << rsLog;
+                                //would be faster but requires index: DiskLoc loc = Helpers::findById(nsd, pattern);
+                                if( !loc.isNull() ) {
+                                    try {
+                                        nsd->cappedTruncateAfter(d.ns, loc, true);
+                                    }
+                                    catch(DBException& e) {
+                                        if( e.getCode() == 13415 ) {
+                                            // hack: need to just make cappedTruncate do this...
+                                            nsd->emptyCappedCollection(d.ns);
+                                        }
+                                        else {
+                                            throw;
+                                        }
+                                    }
+                                }
+                            }
+                            catch(DBException& e) {
+                                log() << "replSet error rolling back capped collection rec " << d.ns << ' ' << e.toString() << rsLog;
+                            }
+                        }
+                        else {
+                            try {
+                                deletes++;
+                                deleteObjects(d.ns, pattern, /*justone*/true, /*logop*/false, /*god*/true, rs.get() );
+                            }
+                            catch(...) {
+                                log() << "replSet error rollback delete failed ns:" << d.ns << rsLog;
+                            }
+                        }
+                        // did we just empty the collection?  if so let's check if it even exists on the source.
+                        if( nsd->stats.nrecords == 0 ) {
+                            try {
+                                string sys = cc().database()->name + ".system.namespaces";
+                                bo o = them->findOne(sys, QUERY("name"<<d.ns));
+                                if( o.isEmpty() ) {
+                                    // we should drop
+                                    try {
+                                        bob res;
+                                        string errmsg;
+                                        dropCollection(d.ns, errmsg, res);
+                                    }
+                                    catch(...) {
+                                        log() << "replset error rolling back collection " << d.ns << rsLog;
+                                    }
+                                }
+                            }
+                            catch(DBException& ) {
+                                /* this isn't *that* big a deal, but is bad. */
+                                log() << "replSet warning rollback error querying for existence of " << d.ns << " at the primary, ignoring" << rsLog;
+                            }
+                        }
+                    }
+                }
+                else {
+                    // todo faster...
+                    OpDebug debug;
+                    updates++;
+                    _updateObjects(/*god*/true, d.ns, i->second, pattern, /*upsert=*/true, /*multi=*/false , /*logtheop=*/false , debug, rs.get() );
+                }
+            }
+            catch(DBException& e) {
+                log() << "replSet exception in rollback ns:" << d.ns << ' ' << pattern.toString() << ' ' << e.toString() << " ndeletes:" << deletes << rsLog;
+                warn = true;
+            }
+        }
+
+        removeSavers.clear(); // this effectively closes all of them
+
+        sethbmsg(str::stream() << "rollback 5 d:" << deletes << " u:" << updates);
+        MemoryMappedFile::flushAll(true);
+        sethbmsg("rollback 6");
+
+        // clean up oplog
+        log(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog;
+        // todo: fatal error if this throws?
+        oplogDetails->cappedTruncateAfter(rsoplog, h.commonPointOurDiskloc, false);
+
+        /* reset cached lastoptimewritten and h value */
+        loadLastOpTimeWritten();
+
+        sethbmsg("rollback 7");
+        MemoryMappedFile::flushAll(true);
+
+        // done
+        if( warn )
+            sethbmsg("issues during syncRollback, see log");
+        else
+            sethbmsg("rollback done");
+    }
+
+    void ReplSetImpl::syncRollback(OplogReader&r) {
         unsigned s = _syncRollback(r);
-        if( s ) 
+        if( s )
             sleepsecs(s);
     }
 
-    unsigned ReplSetImpl::_syncRollback(OplogReader&r) { 
+    unsigned ReplSetImpl::_syncRollback(OplogReader&r) {
         assert( !lockedByMe() );
         assert( !dbMutex.atLeastReadLocked() );
 
@@ -604,7 +609,7 @@ namespace mongo {
 
         if( box.getState().secondary() ) {
             /* by doing this, we will not service reads (return an error as we aren't in secondary staate.
-               that perhaps is moot becasue of the write lock above, but that write lock probably gets deferred 
+               that perhaps is moot becasue of the write lock above, but that write lock probably gets deferred
                or removed or yielded later anyway.
 
                also, this is better for status reporting - we know what is happening.
@@ -618,7 +623,7 @@ namespace mongo {
             r.resetCursor();
             /*DBClientConnection us(false, 0, 0);
             string errmsg;
-            if( !us.connect(HostAndPort::me().toString(),errmsg) ) { 
+            if( !us.connect(HostAndPort::me().toString(),errmsg) ) {
                 sethbmsg("rollback connect to self failure" + errmsg);
                 return;
             }*/
@@ -627,15 +632,15 @@ namespace mongo {
             try {
                 syncRollbackFindCommonPoint(r.conn(), how);
             }
-            catch( const char *p ) { 
+            catch( const char *p ) {
                 sethbmsg(string("rollback 2 error ") + p);
                 return 10;
             }
-            catch( rsfatal& ) { 
+            catch( rsfatal& ) {
                 _fatal();
                 return 2;
             }
-            catch( DBException& e ) { 
+            catch( DBException& e ) {
                 sethbmsg(string("rollback 2 exception ") + e.toString() + "; sleeping 1 min");
                 dbtemprelease r;
                 sleepsecs(60);
@@ -647,20 +652,20 @@ namespace mongo {
 
         {
             incRBID();
-            try { 
+            try {
                 syncFixUp(how, r);
             }
-            catch( rsfatal& ) { 
+            catch( rsfatal& ) {
                 sethbmsg("rollback fixup error");
                 _fatal();
                 return 2;
             }
-            catch(...) { 
+            catch(...) {
                 incRBID(); throw;
             }
             incRBID();
 
-            /* success - leave "ROLLBACK" state 
+            /* success - leave "ROLLBACK" state
                can go to SECONDARY once minvalid is achieved
             */
             box.change(MemberState::RS_RECOVERING, _self);
diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp
index 9de3f60..8d06fcc 100644
--- a/db/repl/rs_sync.cpp
+++ b/db/repl/rs_sync.cpp
@@ -19,30 +19,21 @@
 #include "../../client/dbclient.h"
 #include "rs.h"
 #include "../repl.h"
-
+#include "connections.h"
 namespace mongo {
 
     using namespace bson;
-
     extern unsigned replSetForceInitialSyncFailure;
 
-    void startSyncThread() { 
-        Client::initThread("rs_sync");
-        cc().iAmSyncThread();
-        theReplSet->syncThread();
-        cc().shutdown();
-    }
-
+    /* apply the log op that is in param o */
     void ReplSetImpl::syncApply(const BSONObj &o) {
-        //const char *op = o.getStringField("op");
-        
-        char db[MaxDatabaseLen];
+        char db[MaxDatabaseNameLen];
         const char *ns = o.getStringField("ns");
         nsToDatabase(ns, db);
 
         if ( *ns == '.' || *ns == 0 ) {
-		    if( *o.getStringField("op") == 'n' )
-			    return;
+            if( *o.getStringField("op") == 'n' )
+                return;
             log() << "replSet skipping bad op in oplog: " << o.toString() << endl;
             return;
         }
@@ -54,19 +45,21 @@ namespace mongo {
         applyOperation_inlock(o);
     }
 
+    /* initial oplog application, during initial sync, after cloning.
+       @return false on failure.
+       this method returns an error and doesn't throw exceptions (i think).
+    */
     bool ReplSetImpl::initialSyncOplogApplication(
-        string hn, 
-        const Member *primary,
+        const Member *source,
         OpTime applyGTE,
-        OpTime minValid)
-    { 
-        if( primary == 0 ) return false;
+        OpTime minValid) {
+        if( source == 0 ) return false;
 
-        OpTime ts;
+        const string hn = source->h().toString();
+        OplogReader r;
         try {
-            OplogReader r;
-            if( !r.connect(hn) ) { 
-                log(2) << "replSet can't connect to " << hn << " to read operations" << rsLog;
+            if( !r.connect(hn) ) {
+                log() << "replSet initial sync error can't connect to " << hn << " to read " << rsoplog << rsLog;
                 return false;
             }
 
@@ -80,48 +73,63 @@ namespace mongo {
             }
             assert( r.haveCursor() );
 
-            /* we lock outside the loop to avoid the overhead of locking on every operation.  server isn't usable yet anyway! */
-            writelock lk("");
-
             {
-                if( !r.more() ) { 
+                if( !r.more() ) {
                     sethbmsg("replSet initial sync error reading remote oplog");
+                    log() << "replSet initial sync error remote oplog (" << rsoplog << ") on host " << hn << " is empty?" << rsLog;
                     return false;
                 }
                 bo op = r.next();
                 OpTime t = op["ts"]._opTime();
                 r.putBack(op);
-                assert( !t.isNull() );
+
+                if( op.firstElement().fieldName() == string("$err") ) {
+                    log() << "replSet initial sync error querying " << rsoplog << " on " << hn << " : " << op.toString() << rsLog;
+                    return false;
+                }
+
+                uassert( 13508 , str::stream() << "no 'ts' in first op in oplog: " << op , !t.isNull() );
                 if( t > applyGTE ) {
                     sethbmsg(str::stream() << "error " << hn << " oplog wrapped during initial sync");
+                    log() << "replSet initial sync expected first optime of " << applyGTE << rsLog;
+                    log() << "replSet initial sync but received a first optime of " << t << " from " << hn << rsLog;
                     return false;
                 }
             }
+        }
+        catch(DBException& e) {
+            log() << "replSet initial sync failing: " << e.toString() << rsLog;
+            return false;
+        }
 
-            // todo : use exhaust
-            unsigned long long n = 0;
-            while( 1 ) { 
+        /* we lock outside the loop to avoid the overhead of locking on every operation. */
+        writelock lk("");
 
+        // todo : use exhaust
+        OpTime ts;
+        unsigned long long n = 0;
+        while( 1 ) {
+            try {
                 if( !r.more() )
                     break;
                 BSONObj o = r.nextSafe(); /* note we might get "not master" at some point */
                 {
-                    //writelock lk("");
-
                     ts = o["ts"]._opTime();
 
                     /* if we have become primary, we dont' want to apply things from elsewhere
-                        anymore. assumePrimary is in the db lock so we are safe as long as 
+                        anymore. assumePrimary is in the db lock so we are safe as long as
                         we check after we locked above. */
-					const Member *p1 = box.getPrimary();
-                    if( p1 != primary || replSetForceInitialSyncFailure ) {
+                    if( (source->state() != MemberState::RS_PRIMARY &&
+                            source->state() != MemberState::RS_SECONDARY) ||
+                            replSetForceInitialSyncFailure ) {
+
                         int f = replSetForceInitialSyncFailure;
                         if( f > 0 ) {
                             replSetForceInitialSyncFailure = f-1;
                             log() << "replSet test code invoked, replSetForceInitialSyncFailure" << rsLog;
+                            throw DBException("forced error",0);
                         }
-                        log() << "replSet primary was:" << primary->fullName() << " now:" << 
-                            (p1 != 0 ? p1->fullName() : "none") << rsLog;
+                        log() << "replSet we are now primary" << rsLog;
                         throw DBException("primary changed",0);
                     }
 
@@ -131,38 +139,48 @@ namespace mongo {
                     }
                     _logOpObjRS(o);   /* with repl sets we write the ops to our oplog too */
                 }
-                if( ++n % 100000 == 0 ) { 
+                if( ++n % 100000 == 0 ) {
                     // simple progress metering
                     log() << "replSet initialSyncOplogApplication " << n << rsLog;
                 }
+                
+                getDur().commitIfNeeded();
             }
-        }
-        catch(DBException& e) { 
-            if( ts <= minValid ) {
-                // didn't make it far enough
-                log() << "replSet initial sync failing, error applying oplog " << e.toString() << rsLog;
-                return false;
+            catch (DBException& e) {
+                if( e.getCode() == 11000 || e.getCode() == 11001 ) {
+                    // skip duplicate key exceptions
+                    continue;
+                }
+
+                if( ts <= minValid ) {
+                    // didn't make it far enough
+                    log() << "replSet initial sync failing, error applying oplog " << e.toString() << rsLog;
+                    return false;
+                }
+
+                // otherwise, whatever
+                break;
             }
         }
         return true;
     }
 
-    /* should be in RECOVERING state on arrival here.  
+    /* should be in RECOVERING state on arrival here.
        readlocks
        @return true if transitioned to SECONDARY
     */
-    bool ReplSetImpl::tryToGoLiveAsASecondary(OpTime& /*out*/ minvalid) { 
-        bool golive = false;			
+    bool ReplSetImpl::tryToGoLiveAsASecondary(OpTime& /*out*/ minvalid) {
+        bool golive = false;
         {
             readlock lk("local.replset.minvalid");
             BSONObj mv;
-            if( Helpers::getSingleton("local.replset.minvalid", mv) ) { 
+            if( Helpers::getSingleton("local.replset.minvalid", mv) ) {
                 minvalid = mv["ts"]._opTime();
-                if( minvalid <= lastOpTimeWritten ) { 
+                if( minvalid <= lastOpTimeWritten ) {
                     golive=true;
                 }
             }
-            else 
+            else
                 golive = true; /* must have been the original member */
         }
         if( golive ) {
@@ -172,44 +190,104 @@ namespace mongo {
         return golive;
     }
 
-    /* tail the primary's oplog.  ok to return, will be re-called. */
-    void ReplSetImpl::syncTail() { 
-        // todo : locking vis a vis the mgr...
+    /**
+     * Checks if the oplog given is too far ahead to read from.
+     *
+     * @param r the oplog
+     * @param hn the hostname (for log messages)
+     *
+     * @return if we are stale compared to the oplog on hn
+     */
+    bool ReplSetImpl::_isStale(OplogReader& r, const string& hn) {
+        BSONObj remoteOldestOp = r.findOne(rsoplog, Query());
+        OpTime ts = remoteOldestOp["ts"]._opTime();
+        DEV log() << "replSet remoteOldestOp:    " << ts.toStringLong() << rsLog;
+        else log(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog;
+        DEV {
+            // debugging sync1.js...
+            log() << "replSet lastOpTimeWritten: " << lastOpTimeWritten.toStringLong() << rsLog;
+            log() << "replSet our state: " << state().toString() << rsLog;
+        }
+        if( lastOpTimeWritten < ts ) {
+            log() << "replSet error RS102 too stale to catch up, at least from " << hn << rsLog;
+            log() << "replSet our last optime : " << lastOpTimeWritten.toStringLong() << rsLog;
+            log() << "replSet oldest at " << hn << " : " << ts.toStringLong() << rsLog;
+            log() << "replSet See http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member" << rsLog;
+            sethbmsg("error RS102 too stale to catch up");
+            changeState(MemberState::RS_RECOVERING);
+            sleepsecs(120);
+            return true;
+        }
+        return false;
+    }
 
-        const Member *primary = box.getPrimary();
-        if( primary == 0 ) return;
-        string hn = primary->h().toString();
-        OplogReader r;
-        if( !r.connect(primary->h().toString()) ) { 
+    /**
+     * Tries to connect the oplog reader to a potential sync source.  If
+     * successful, it checks that we are not stale compared to this source.
+     *
+     * @param r reader to populate
+     * @param hn hostname to try
+     *
+     * @return if both checks pass, it returns true, otherwise false.
+     */
+    bool ReplSetImpl::_getOplogReader(OplogReader& r, string& hn) {
+        assert(r.conn() == 0);
+
+        if( !r.connect(hn) ) {
             log(2) << "replSet can't connect to " << hn << " to read operations" << rsLog;
-            return;
+            r.resetConnection();
+            return false;
+        }
+        if( _isStale(r, hn)) {
+            r.resetConnection();
+            return false;
         }
+        return true;
+    }
 
-        /* first make sure we are not hopelessly out of sync by being very stale. */
-        {
-            BSONObj remoteOldestOp = r.findOne(rsoplog, Query());
-            OpTime ts = remoteOldestOp["ts"]._opTime();
-            DEV log() << "replSet remoteOldestOp:    " << ts.toStringLong() << rsLog;
-            else log(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog;
-            DEV { 
-                // debugging sync1.js...
-                log() << "replSet lastOpTimeWritten: " << lastOpTimeWritten.toStringLong() << rsLog;
-                log() << "replSet our state: " << state().toString() << rsLog;
+    /* tail an oplog.  ok to return, will be re-called. */
+    void ReplSetImpl::syncTail() {
+        // todo : locking vis a vis the mgr...
+        OplogReader r;
+        string hn;
+
+        const Member *target = box.getPrimary();
+        if (target != 0) {
+            hn = target->h().toString();
+            if (!_getOplogReader(r, hn)) {
+                // we might be stale wrt the primary, but could still sync from
+                // a secondary
+                target = 0;
+            }
+        }
+
+        // if we cannot reach the master but someone else is more up-to-date
+        // than we are, sync from them.
+        if( target == 0 ) {
+            for(Member *m = head(); m; m=m->next()) {
+                hn = m->h().toString();
+                if (m->hbinfo().up() && m->state().readable() &&
+                        (m->hbinfo().opTime > lastOpTimeWritten) &&
+                        m->config().slaveDelay == 0 &&
+                        _getOplogReader(r, hn)) {
+                    target = m;
+                    break;
+                }
             }
-            if( lastOpTimeWritten < ts ) { 
-                log() << "replSet error RS102 too stale to catch up, at least from primary: " << hn << rsLog;
-                log() << "replSet our last optime : " << lastOpTimeWritten.toStringLong() << rsLog;
-                log() << "replSet oldest at " << hn << " : " << ts.toStringLong() << rsLog;
-                log() << "replSet See http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member" << rsLog;
-                sethbmsg("error RS102 too stale to catch up");
-                sleepsecs(120);
+
+            // no server found
+            if (target == 0) {
+                // if there is no one to sync from
+                OpTime minvalid;
+                tryToGoLiveAsASecondary(minvalid);
                 return;
             }
         }
 
         r.tailingQueryGTE(rsoplog, lastOpTimeWritten);
         assert( r.haveCursor() );
-        assert( r.awaitCapable() );
+
+        uassert(1000, "replSet source for syncing doesn't seem to be await capable -- is it an older version of mongodb?", r.awaitCapable() );
 
         {
             if( !r.more() ) {
@@ -222,7 +300,7 @@ namespace mongo {
                         return;
                     }
                     OpTime theirTS = theirLastOp["ts"]._opTime();
-                    if( theirTS < lastOpTimeWritten ) { 
+                    if( theirTS < lastOpTimeWritten ) {
                         log() << "replSet we are ahead of the primary, will try to roll back" << rsLog;
                         syncRollback(r);
                         return;
@@ -231,7 +309,7 @@ namespace mongo {
                     log() << "replSet syncTail condition 1" << rsLog;
                     sleepsecs(1);
                 }
-                catch(DBException& e) { 
+                catch(DBException& e) {
                     log() << "replSet error querying " << hn << ' ' << e.toString() << rsLog;
                     sleepsecs(2);
                 }
@@ -249,12 +327,9 @@ namespace mongo {
             BSONObj o = r.nextSafe();
             OpTime ts = o["ts"]._opTime();
             long long h = o["h"].numberLong();
-            if( ts != lastOpTimeWritten || h != lastH ) { 
-                log(1) << "TEMP our last op time written: " << lastOpTimeWritten.toStringPretty() << endl;
-                log(1) << "TEMP primary's GTE: " << ts.toStringPretty() << endl;
-                /*
-                }*/
-
+            if( ts != lastOpTimeWritten || h != lastH ) {
+                log() << "replSet our last op time written: " << lastOpTimeWritten.toStringPretty() << endl;
+                log() << "replset source's GTE: " << ts.toStringPretty() << endl;
                 syncRollback(r);
                 return;
             }
@@ -268,49 +343,45 @@ namespace mongo {
 
         while( 1 ) {
             while( 1 ) {
-                if( !r.moreInCurrentBatch() ) { 
-                    /* we need to occasionally check some things. between 
+                if( !r.moreInCurrentBatch() ) {
+                    /* we need to occasionally check some things. between
                        batches is probably a good time. */
 
                     /* perhaps we should check this earlier? but not before the rollback checks. */
-                    if( state().recovering() ) { 
+                    if( state().recovering() ) {
                         /* can we go to RS_SECONDARY state?  we can if not too old and if minvalid achieved */
                         OpTime minvalid;
                         bool golive = ReplSetImpl::tryToGoLiveAsASecondary(minvalid);
                         if( golive ) {
                             ;
                         }
-                        else { 
+                        else {
                             sethbmsg(str::stream() << "still syncing, not yet to minValid optime" << minvalid.toString());
                         }
 
                         /* todo: too stale capability */
                     }
 
-                    if( box.getPrimary() != primary ) 
-                        return;
+                    {
+                        const Member *primary = box.getPrimary();
+                        
+                        if( !target->hbinfo().hbstate.readable() ||
+                            // if we are not syncing from the primary, return (if
+                            // it's up) so that we can try accessing it again
+                            (target != primary && primary != 0)) {
+                            return;
+                        }
+                    }
                 }
                 if( !r.more() )
                     break;
-                { 
+                {
                     BSONObj o = r.nextSafe(); /* note we might get "not master" at some point */
-                    {
-                        writelock lk("");
 
-                        /* if we have become primary, we dont' want to apply things from elsewhere
-                           anymore. assumePrimary is in the db lock so we are safe as long as 
-                           we check after we locked above. */
-                        if( box.getPrimary() != primary ) {
-                            if( box.getState().primary() )
-                                log(0) << "replSet stopping syncTail we are now primary" << rsLog;
-                            return;
-                        }
-
-                        syncApply(o);
-                        _logOpObjRS(o);   /* with repl sets we write the ops to our oplog too: */                   
-                    }
                     int sd = myConfig().slaveDelay;
-                    if( sd ) { 
+                    // ignore slaveDelay if the box is still initializing. once
+                    // it becomes secondary we can worry about it.
+                    if( sd && box.getState().secondary() ) {
                         const OpTime ts = o["ts"]._opTime();
                         long long a = ts.getSecs();
                         long long b = time(0);
@@ -329,13 +400,30 @@ namespace mongo {
                                     sleepsecs(6);
                                     if( time(0) >= waitUntil )
                                         break;
-                                    if( box.getPrimary() != primary )
+                                    if( !target->hbinfo().hbstate.readable() ) {
                                         break;
+                                    }
                                     if( myConfig().slaveDelay != sd ) // reconf
                                         break;
                                 }
                             }
                         }
+
+                    }
+
+                    {
+                        writelock lk("");
+
+                        /* if we have become primary, we dont' want to apply things from elsewhere
+                           anymore. assumePrimary is in the db lock so we are safe as long as
+                           we check after we locked above. */
+                        if( box.getState().primary() ) {
+                            log(0) << "replSet stopping syncTail we are now primary" << rsLog;
+                            return;
+                        }
+
+                        syncApply(o);
+                        _logOpObjRS(o);   /* with repl sets we write the ops to our oplog too: */
                     }
                 }
             }
@@ -345,8 +433,9 @@ namespace mongo {
                 // TODO : reuse our connection to the primary.
                 return;
             }
-            if( box.getPrimary() != primary )
+            if( !target->hbinfo().hbstate.readable() ) {
                 return;
+            }
             // looping back is ok because this is a tailable cursor
         }
     }
@@ -357,15 +446,11 @@ namespace mongo {
             sleepsecs(1);
             return;
         }
-        if( sp.state.fatal() ) { 
+        if( sp.state.fatal() ) {
             sleepsecs(5);
             return;
         }
 
-        /* later, we can sync from up secondaries if we want. tbd. */
-        if( sp.primary == 0 )
-            return;
-
         /* do we have anything at all? */
         if( lastOpTimeWritten.isNull() ) {
             syncDoInitialSync();
@@ -377,23 +462,64 @@ namespace mongo {
     }
 
     void ReplSetImpl::syncThread() {
-        if( myConfig().arbiterOnly )
-            return;
-        while( 1 ) { 
+        /* test here was to force a receive timeout
+        ScopedConn c("localhost");
+        bo info;
+        try {
+            log() << "this is temp" << endl;
+            c.runCommand("admin", BSON("sleep"<<120), info);
+            log() << info.toString() << endl;
+            c.runCommand("admin", BSON("sleep"<<120), info);
+            log() << "temp" << endl;
+        }
+        catch( DBException& e ) {
+            log() << e.toString() << endl;
+            c.runCommand("admin", BSON("sleep"<<120), info);
+            log() << "temp" << endl;
+        }
+        */
+
+        while( 1 ) {
+            if( myConfig().arbiterOnly )
+                return;
+
             try {
                 _syncThread();
             }
-            catch(DBException& e) { 
+            catch(DBException& e) {
                 sethbmsg("syncThread: " + e.toString());
                 sleepsecs(10);
             }
-            catch(...) { 
+            catch(...) {
                 sethbmsg("unexpected exception in syncThread()");
-                // TODO : SET NOT SECONDARY here.
+                // TODO : SET NOT SECONDARY here?
                 sleepsecs(60);
             }
             sleepsecs(1);
+
+            /* normally msgCheckNewState gets called periodically, but in a single node repl set there
+               are no heartbeat threads, so we do it here to be sure.  this is relevant if the singleton
+               member has done a stepDown() and needs to come back up.
+               */
+            OCCASIONALLY mgr->send( boost::bind(&Manager::msgCheckNewState, theReplSet->mgr) );
+        }
+    }
+
+    void startSyncThread() {
+        static int n;
+        if( n != 0 ) {
+            log() << "replSet ERROR : more than one sync thread?" << rsLog;
+            assert( n == 0 );
+        }
+        n++;
+
+        Client::initThread("replica set sync");
+        cc().iAmSyncThread();
+        if (!noauth) {
+            cc().getAuthenticationInfo()->authorize("local");
         }
+        theReplSet->syncThread();
+        cc().shutdown();
     }
 
 }
diff --git a/db/repl_block.cpp b/db/repl_block.cpp
index 9cff24f..05be343 100644
--- a/db/repl_block.cpp
+++ b/db/repl_block.cpp
@@ -35,13 +35,13 @@ namespace mongo {
 
     class SlaveTracking : public BackgroundJob {
     public:
-        string name() { return "SlaveTracking"; }
+        string name() const { return "SlaveTracking"; }
 
         static const char * NS;
 
         struct Ident {
-            
-            Ident(BSONObj r,string h,string n){
+
+            Ident(BSONObj r,string h,string n) {
                 BSONObjBuilder b;
                 b.appendElements( r );
                 b.append( "host" , h );
@@ -52,18 +52,18 @@ namespace mongo {
             bool operator<( const Ident& other ) const {
                 return obj.woCompare( other.obj ) < 0;
             }
-            
+
             BSONObj obj;
         };
 
         struct Info {
-            Info() : loc(0){}
-            ~Info(){
-                if ( loc && owned ){
+            Info() : loc(0) {}
+            ~Info() {
+                if ( loc && owned ) {
                     delete loc;
                 }
             }
-            bool owned;
+            bool owned; // true if loc is a pointer of our creation (and not a pointer into a MMF)
             OpTime * loc;
         };
 
@@ -72,33 +72,33 @@ namespace mongo {
             _started = false;
         }
 
-        void run(){
+        void run() {
             Client::initThread( "slaveTracking" );
             DBDirectClient db;
-            while ( ! inShutdown() ){
+            while ( ! inShutdown() ) {
                 sleepsecs( 1 );
 
                 if ( ! _dirty )
                     continue;
-                
+
                 writelock lk(NS);
 
                 list< pair<BSONObj,BSONObj> > todo;
-                
+
                 {
                     scoped_lock mylk(_mutex);
-                    
-                    for ( map<Ident,Info>::iterator i=_slaves.begin(); i!=_slaves.end(); i++ ){
+
+                    for ( map<Ident,Info>::iterator i=_slaves.begin(); i!=_slaves.end(); i++ ) {
                         BSONObjBuilder temp;
                         temp.appendTimestamp( "syncedTo" , i->second.loc[0].asDate() );
-                        todo.push_back( pair<BSONObj,BSONObj>( i->first.obj.getOwned() , 
+                        todo.push_back( pair<BSONObj,BSONObj>( i->first.obj.getOwned() ,
                                                                BSON( "$set" << temp.obj() ).getOwned() ) );
                     }
-                    
+
                     _slaves.clear();
                 }
 
-                for ( list< pair<BSONObj,BSONObj> >::iterator i=todo.begin(); i!=todo.end(); i++ ){
+                for ( list< pair<BSONObj,BSONObj> >::iterator i=todo.begin(); i!=todo.end(); i++ ) {
                     db.update( NS , i->first , i->second , true );
                 }
 
@@ -106,52 +106,54 @@ namespace mongo {
             }
         }
 
-        void reset(){
+        void reset() {
             scoped_lock mylk(_mutex);
             _slaves.clear();
         }
 
-        void update( const BSONObj& rid , const string& host , const string& ns , OpTime last ){
+        void update( const BSONObj& rid , const string& host , const string& ns , OpTime last ) {
             REPLDEBUG( host << " " << rid << " " << ns << " " << last );
 
             scoped_lock mylk(_mutex);
-            
+
 #ifdef _DEBUG
             MongoFileAllowWrites allowWrites;
 #endif
 
             Ident ident(rid,host,ns);
             Info& i = _slaves[ ident ];
-            if ( i.loc ){
-                i.loc[0] = last;
+            if ( i.loc ) {
+                if( i.owned )
+                    i.loc[0] = last;
+                else
+                    getDur().setNoJournal(i.loc, &last, sizeof(last));
                 return;
             }
-            
+
             dbMutex.assertAtLeastReadLocked();
 
             BSONObj res;
-            if ( Helpers::findOne( NS , ident.obj , res ) ){
+            if ( Helpers::findOne( NS , ident.obj , res ) ) {
                 assert( res["syncedTo"].type() );
                 i.owned = false;
                 i.loc = (OpTime*)res["syncedTo"].value();
-                i.loc[0] = last;
+                getDur().setNoJournal(i.loc, &last, sizeof(last));
                 return;
             }
-            
+
             i.owned = true;
-            i.loc = new OpTime[1];
-            i.loc[0] = last;
+            i.loc = new OpTime(last);
             _dirty = true;
 
-            if ( ! _started ){
+            if ( ! _started ) {
                 // start background thread here since we definitely need it
                 _started = true;
                 go();
             }
 
         }
-        
-        bool opReplicatedEnough( OpTime op , int w ){
+
+        bool opReplicatedEnough( OpTime op , int w ) {
             RARELY {
                 REPLDEBUG( "looking for : " << op << " w=" << w );
             }
@@ -161,9 +163,9 @@ namespace mongo {
 
             w--; // now this is the # of slaves i need
             scoped_lock mylk(_mutex);
-            for ( map<Ident,Info>::iterator i=_slaves.begin(); i!=_slaves.end(); i++){
+            for ( map<Ident,Info>::iterator i=_slaves.begin(); i!=_slaves.end(); i++) {
                 OpTime s = *(i->second.loc);
-                if ( s < op ){
+                if ( s < op ) {
                     continue;
                 }
                 if ( --w == 0 )
@@ -171,9 +173,15 @@ namespace mongo {
             }
             return w <= 0;
         }
-        
+
+        unsigned getSlaveCount() const {
+            scoped_lock mylk(_mutex);
+
+            return _slaves.size();
+        }
+
         // need to be careful not to deadlock with this
-        mongo::mutex _mutex;
+        mutable mongo::mutex _mutex;
         map<Ident,Info> _slaves;
         bool _dirty;
         bool _started;
@@ -182,12 +190,12 @@ namespace mongo {
 
     const char * SlaveTracking::NS = "local.slaves";
 
-    void updateSlaveLocation( CurOp& curop, const char * ns , OpTime lastOp ){
+    void updateSlaveLocation( CurOp& curop, const char * ns , OpTime lastOp ) {
         if ( lastOp.isNull() )
             return;
-        
+
         assert( str::startsWith(ns, "local.oplog.") );
-        
+
         Client * c = curop.getClient();
         assert(c);
         BSONObj rid = c->getRemoteID();
@@ -197,11 +205,15 @@ namespace mongo {
         slaveTracking.update( rid , curop.getRemoteString( false ) , ns , lastOp );
     }
 
-    bool opReplicatedEnough( OpTime op , int w ){
+    bool opReplicatedEnough( OpTime op , int w ) {
         return slaveTracking.opReplicatedEnough( op , w );
     }
 
-    void resetSlaveCache(){
+    void resetSlaveCache() {
         slaveTracking.reset();
     }
+
+    unsigned getSlaveCount() {
+        return slaveTracking.getSlaveCount();
+    }
 }
diff --git a/db/repl_block.h b/db/repl_block.h
index e9a990a..978932d 100644
--- a/db/repl_block.h
+++ b/db/repl_block.h
@@ -24,11 +24,15 @@
 
 /**
    local.slaves  - current location for all slaves
-   
+
  */
 namespace mongo {
-    
-    void updateSlaveLocation( CurOp& curop, const char * ns , OpTime lastOp );
+
+    void updateSlaveLocation( CurOp& curop, const char * oplog_ns , OpTime lastOp );
+
+    /** @return true if op has made it to w servers */
     bool opReplicatedEnough( OpTime op , int w );
+
     void resetSlaveCache();
+    unsigned getSlaveCount();
 }
diff --git a/db/replpair.h b/db/replpair.h
index 1da8b78..a551308 100644
--- a/db/replpair.h
+++ b/db/replpair.h
@@ -55,8 +55,8 @@ namespace mongo {
         int remotePort;
         string remoteHost;
         string remote; // host:port if port specified.
-	//    int date; // -1 not yet set; 0=slave; 1=master
-        
+        //    int date; // -1 not yet set; 0=slave; 1=master
+
         string getInfo() {
             stringstream ss;
             ss << "  state:   ";
@@ -113,12 +113,12 @@ namespace mongo {
     */
     inline bool _isMaster() {
         if( replSet ) {
-            if( theReplSet ) 
+            if( theReplSet )
                 return theReplSet->isPrimary();
             return false;
         }
 
-        if( ! replSettings.slave ) 
+        if( ! replSettings.slave )
             return true;
 
         if ( replAllDead )
@@ -128,17 +128,17 @@ namespace mongo {
             if( replPair->state == ReplPair::State_Master )
                 return true;
         }
-        else { 
+        else {
             if( replSettings.master ) {
-                // if running with --master --slave, allow.  note that master is also true 
+                // if running with --master --slave, allow.  note that master is also true
                 // for repl pairs so the check for replPair above is important.
                 return true;
             }
         }
-        
+
         if ( cc().isGod() )
             return true;
-        
+
         return false;
     }
     inline bool isMaster(const char *client = 0) {
@@ -152,20 +152,22 @@ namespace mongo {
         return strcmp( client, "local" ) == 0;
     }
 
-    inline void notMasterUnless(bool expr) { 
+    inline void notMasterUnless(bool expr) {
         uassert( 10107 , "not master" , expr );
     }
 
-    /* we allow queries to SimpleSlave's -- but not to the slave (nonmaster) member of a replica pair 
-       so that queries to a pair are realtime consistent as much as possible.  use setSlaveOk() to 
+    /* we allow queries to SimpleSlave's -- but not to the slave (nonmaster) member of a replica pair
+       so that queries to a pair are realtime consistent as much as possible.  use setSlaveOk() to
        query the nonmaster member of a replica pair.
     */
     inline void replVerifyReadsOk(ParsedQuery& pq) {
         if( replSet ) {
-            /* todo: speed up the secondary case.  as written here there are 2 mutex entries, it can be 1. */
+            /* todo: speed up the secondary case.  as written here there are 2 mutex entries, it can b 1. */
             if( isMaster() ) return;
-            notMasterUnless( pq.hasOption(QueryOption_SlaveOk) && theReplSet && theReplSet->isSecondary() );
-        } else {
+            uassert(13435, "not master and slaveok=false", pq.hasOption(QueryOption_SlaveOk));
+            uassert(13436, "not master or secondary, can't read", theReplSet && theReplSet->isSecondary() );
+        }
+        else {
             notMasterUnless(isMaster() || pq.hasOption(QueryOption_SlaveOk) || replSettings.slave == SimpleSlave );
         }
     }
diff --git a/db/resource.h b/db/resource.h
old mode 100755
new mode 100644
index bee8d30..9ba1ed2
--- a/db/resource.h
+++ b/db/resource.h
@@ -1,16 +1,16 @@
-//{{NO_DEPENDENCIES}}
-// Microsoft Visual C++ generated include file.
-// Used by db.rc
-//
-#define IDI_ICON2                       102
-
-// Next default values for new objects
-// 
-#ifdef APSTUDIO_INVOKED
-#ifndef APSTUDIO_READONLY_SYMBOLS
-#define _APS_NEXT_RESOURCE_VALUE        104
-#define _APS_NEXT_COMMAND_VALUE         40001
-#define _APS_NEXT_CONTROL_VALUE         1001
-#define _APS_NEXT_SYMED_VALUE           101
-#endif
-#endif
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by db.rc
+//
+#define IDI_ICON2                       102
+
+// Next default values for new objects
+//
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        104
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1001
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
diff --git a/db/restapi.cpp b/db/restapi.cpp
index e9a7ae2..7460c94 100644
--- a/db/restapi.cpp
+++ b/db/restapi.cpp
@@ -29,6 +29,8 @@
 #include "clientcursor.h"
 #include "background.h"
 
+#include "restapi.h"
+
 namespace mongo {
 
     extern const char *replInfo;
@@ -39,17 +41,17 @@ namespace mongo {
 
     class RESTHandler : public DbWebHandler {
     public:
-        RESTHandler() : DbWebHandler( "DUMMY REST" , 1000 , true ){}
+        RESTHandler() : DbWebHandler( "DUMMY REST" , 1000 , true ) {}
 
-        virtual bool handles( const string& url ) const { 
-            return 
+        virtual bool handles( const string& url ) const {
+            return
                 url[0] == '/' &&
                 url.find_last_of( '/' ) > 0;
         }
 
-        virtual void handle( const char *rq, string url, 
+        virtual void handle( const char *rq, string url, BSONObj params,
                              string& responseMsg, int& responseCode,
-                             vector<string>& headers,  const SockAddr &from ){
+                             vector<string>& headers,  const SockAddr &from ) {
 
             string::size_type first = url.find( "/" , 1 );
             if ( first == string::npos ) {
@@ -62,12 +64,6 @@ namespace mongo {
             string coll = url.substr( first + 1 );
             string action = "";
 
-            BSONObj params;
-            if ( coll.find( "?" ) != string::npos ) {
-                MiniWebServer::parseParams( params , coll.substr( coll.find( "?" ) + 1 ) );
-                coll = coll.substr( 0 , coll.find( "?" ) );
-            }
-
             string::size_type last = coll.find_last_of( "/" );
             if ( last == string::npos ) {
                 action = coll;
@@ -107,7 +103,7 @@ namespace mongo {
                 out() << "don't know how to handle a [" << method << "]" << endl;
             }
 
-            if( html ) 
+            if( html )
                 headers.push_back("Content-Type: text/html;charset=utf-8");
             else
                 headers.push_back("Content-Type: text/plain;charset=utf-8");
@@ -118,7 +114,7 @@ namespace mongo {
         bool handleRESTQuery( string ns , string action , BSONObj & params , int & responseCode , stringstream & out ) {
             Timer t;
 
-            int html = _getOption( params["html"] , 0 ); 
+            int html = _getOption( params["html"] , 0 );
             int skip = _getOption( params["skip"] , 0 );
             int num  = _getOption( params["limit"] , _getOption( params["count" ] , 1000 ) ); // count is old, limit is new
 
@@ -131,7 +127,7 @@ namespace mongo {
             BSONObjBuilder queryBuilder;
 
             BSONObjIterator i(params);
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
                 string name = e.fieldName();
                 if ( ! name.find( "filter_" ) == 0 )
@@ -167,10 +163,11 @@ namespace mongo {
 
             if( html )  {
                 string title = string("query ") + ns;
-                out << start(title) 
+                out << start(title)
                     << p(title)
                     << "<pre>";
-            } else {
+            }
+            else {
                 out << "{\n";
                 out << "  \"offset\" : " << skip << ",\n";
                 out << "  \"rows\": [\n";
@@ -195,7 +192,7 @@ namespace mongo {
                 }
             }
 
-            if( html ) { 
+            if( html ) {
                 out << "</pre>\n";
                 if( howMany == 0 ) out << p("Collection is empty");
                 out << _end();
@@ -216,7 +213,8 @@ namespace mongo {
             try {
                 BSONObj obj = fromjson( body );
                 db.insert( ns.c_str(), obj );
-            } catch ( ... ) {
+            }
+            catch ( ... ) {
                 responseCode = 400; // Bad Request.  Seems reasonable for now.
                 out << "{ \"ok\" : false }";
                 return;
@@ -233,18 +231,18 @@ namespace mongo {
                 return atoi( e.valuestr() );
             return def;
         }
-        
+
         DBDirectClient db;
 
     } restHandler;
 
-    bool webHaveAdminUsers(){
+    bool RestAdminAccess::haveAdminUsers() const {
         readlocktryassert rl("admin.system.users", 10000);
-        Client::Context cx( "admin.system.users" );
-        return ! Helpers::isEmpty("admin.system.users");
+        Client::Context cx( "admin.system.users", dbpath, NULL, false );
+        return ! Helpers::isEmpty("admin.system.users", false);
     }
 
-    BSONObj webGetAdminUser( const string& username ){
+    BSONObj RestAdminAccess::getAdminUser( const string& username ) const {
         Client::GodScope gs;
         readlocktryassert rl("admin.system.users", 10000);
         Client::Context cx( "admin.system.users" );
@@ -256,19 +254,19 @@ namespace mongo {
 
     class LowLevelMongodStatus : public WebStatusPlugin {
     public:
-        LowLevelMongodStatus() : WebStatusPlugin( "low level" , 5 , "requires read lock" ){}
+        LowLevelMongodStatus() : WebStatusPlugin( "low level" , 5 , "requires read lock" ) {}
 
-        virtual void init(){}
+        virtual void init() {}
 
-        void _gotLock( int millis , stringstream& ss ){
+        void _gotLock( int millis , stringstream& ss ) {
             ss << "<pre>\n";
             ss << "time to get readlock: " << millis << "ms\n";
-            
+
             ss << "# databases: " << dbHolder.size() << '\n';
-            
+
             if( ClientCursor::numCursors()>500 )
                 ss << "# Cursors: " << ClientCursor::numCursors() << '\n';
-            
+
             ss << "\nreplication: ";
             if( *replInfo )
                 ss << "\nreplInfo:  " << replInfo << "\n\n";
@@ -296,10 +294,10 @@ namespace mongo {
             ss << "</pre>\n";
         }
 
-        virtual void run( stringstream& ss ){
+        virtual void run( stringstream& ss ) {
             Timer t;
             readlocktry lk( "" , 300 );
-            if ( lk.got() ){
+            if ( lk.got() ) {
                 _gotLock( t.millis() , ss );
             }
             else {
diff --git a/db/restapi.h b/db/restapi.h
new file mode 100644
index 0000000..e5ac520
--- /dev/null
+++ b/db/restapi.h
@@ -0,0 +1,34 @@
+/** @file restapi.h
+ */
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../util/admin_access.h"
+
+namespace mongo {
+
+    class RestAdminAccess : public AdminAccess {
+    public:
+        virtual ~RestAdminAccess() { }
+
+        virtual bool haveAdminUsers() const;
+        virtual BSONObj getAdminUser( const string& username ) const;
+    };
+
+} // namespace mongo
diff --git a/db/scanandorder.h b/db/scanandorder.h
index 8d63b9a..4c491fa 100644
--- a/db/scanandorder.h
+++ b/db/scanandorder.h
@@ -50,34 +50,25 @@ namespace mongo {
        _ response size limit from runquery; push it up a bit.
     */
 
-    inline void fillQueryResultFromObj(BufBuilder& bb, FieldMatcher *filter, BSONObj& js, DiskLoc* loc=NULL) {
+    inline void fillQueryResultFromObj(BufBuilder& bb, Projection *filter, const BSONObj& js, DiskLoc* loc=NULL) {
         if ( filter ) {
             BSONObjBuilder b( bb );
-            BSONObjIterator i( js );
-            while ( i.more() ){
-                BSONElement e = i.next();
-                const char * fname = e.fieldName();
-                
-                if ( strcmp( fname , "_id" ) == 0 ){
-                    if (filter->includeID())
-                        b.append( e );
-                } else {
-                    filter->append( b , e );
-                }
-            }
+            filter->transform( js , b );
             if (loc)
                 b.append("$diskLoc", loc->toBSONObj());
             b.done();
-        } else if (loc) {
+        }
+        else if (loc) {
             BSONObjBuilder b( bb );
             b.appendElements(js);
             b.append("$diskLoc", loc->toBSONObj());
             b.done();
-        } else {
+        }
+        else {
             bb.appendBuf((void*) js.objdata(), js.objsize());
         }
     }
-    
+
     typedef multimap<BSONObj,BSONObj,BSONObjCmp> BestMap;
     class ScanAndOrder {
         BestMap best; // key -> full object
@@ -87,9 +78,10 @@ namespace mongo {
         unsigned approxSize;
 
         void _add(BSONObj& k, BSONObj o, DiskLoc* loc) {
-            if (!loc){
+            if (!loc) {
                 best.insert(make_pair(k.getOwned(),o.getOwned()));
-            } else {
+            }
+            else {
                 BSONObjBuilder b;
                 b.appendElements(o);
                 b.append("$diskLoc", loc->toBSONObj());
@@ -110,8 +102,8 @@ namespace mongo {
 
     public:
         ScanAndOrder(int _startFrom, int _limit, BSONObj _order) :
-                best( BSONObjCmp( _order ) ),
-                startFrom(_startFrom), order(_order) {
+            best( BSONObjCmp( _order ) ),
+            startFrom(_startFrom), order(_order) {
             limit = _limit > 0 ? _limit + startFrom : 0x7fffffff;
             approxSize = 0;
         }
@@ -140,7 +132,7 @@ namespace mongo {
             _addIfBetter(k, o, i, loc);
         }
 
-        void _fill(BufBuilder& b, FieldMatcher *filter, int& nout, BestMap::iterator begin, BestMap::iterator end) {
+        void _fill(BufBuilder& b, Projection *filter, int& nout, BestMap::iterator begin, BestMap::iterator end) {
             int n = 0;
             int nFilled = 0;
             for ( BestMap::iterator i = begin; i != end; i++ ) {
@@ -158,7 +150,7 @@ namespace mongo {
         }
 
         /* scanning complete. stick the query result in b for n objects. */
-        void fill(BufBuilder& b, FieldMatcher *filter, int& nout) {
+        void fill(BufBuilder& b, Projection *filter, int& nout) {
             _fill(b, filter, nout, best.begin(), best.end());
         }
 
diff --git a/db/security.cpp b/db/security.cpp
index c552b53..1ec4218 100644
--- a/db/security.cpp
+++ b/db/security.cpp
@@ -20,19 +20,17 @@
 #include "security.h"
 #include "instance.h"
 #include "client.h"
-#include "curop.h"
+#include "curop-inl.h"
 #include "db.h"
 #include "dbhelpers.h"
 
 namespace mongo {
 
-    bool noauth = true;
-    
-	int AuthenticationInfo::warned = 0;
+    int AuthenticationInfo::warned = 0;
 
-    void AuthenticationInfo::print(){
+    void AuthenticationInfo::print() {
         cout << "AuthenticationInfo: " << this << '\n';
-        for ( map<string,Auth>::iterator i=m.begin(); i!=m.end(); i++ ){
+        for ( map<string,Auth>::iterator i=m.begin(); i!=m.end(); i++ ) {
             cout << "\t" << i->first << "\t" << i->second.level << '\n';
         }
         cout << "END" << endl;
@@ -40,16 +38,16 @@ namespace mongo {
 
 
     bool AuthenticationInfo::_isAuthorizedSpecialChecks( const string& dbname ) {
-        if ( cc().isGod() ){
+        if ( cc().isGod() ) {
             return true;
         }
-        
-        if ( isLocalHost ){
-            atleastreadlock l(""); 
+
+        if ( isLocalHost ) {
+            atleastreadlock l("");
             Client::GodScope gs;
             Client::Context c("admin.system.users");
             BSONObj result;
-            if( ! Helpers::getSingleton("admin.system.users", result) ){
+            if( ! Helpers::getSingleton("admin.system.users", result) ) {
                 if( warned == 0 ) {
                     warned++;
                     log() << "note: no users configured in admin.system.users, allowing localhost access" << endl;
diff --git a/db/security.h b/db/security.h
index a6a9103..2b947c1 100644
--- a/db/security.h
+++ b/db/security.h
@@ -20,12 +20,10 @@
 
 #include "nonce.h"
 #include "concurrency.h"
+#include "security_key.h"
 
 namespace mongo {
 
-    // --noauth cmd line option
-    extern bool noauth;
-
     /* for a particular db */
     struct Auth {
         Auth() { level = 0; }
@@ -35,36 +33,36 @@ namespace mongo {
     class AuthenticationInfo : boost::noncopyable {
         mongo::mutex _lock;
         map<string, Auth> m; // dbname -> auth
-		static int warned;
+        static int warned;
     public:
-		bool isLocalHost;
+        bool isLocalHost;
         AuthenticationInfo() : _lock("AuthenticationInfo") { isLocalHost = false; }
         ~AuthenticationInfo() {
         }
-        void logout(const string& dbname ) { 
+        void logout(const string& dbname ) {
             scoped_lock lk(_lock);
-			m.erase(dbname); 
-		}
-        void authorize(const string& dbname ) { 
+            m.erase(dbname);
+        }
+        void authorize(const string& dbname ) {
             scoped_lock lk(_lock);
             m[dbname].level = 2;
         }
         void authorizeReadOnly(const string& dbname) {
             scoped_lock lk(_lock);
-            m[dbname].level = 1;            
+            m[dbname].level = 1;
         }
         bool isAuthorized(const string& dbname) { return _isAuthorized( dbname, 2 ); }
         bool isAuthorizedReads(const string& dbname) { return _isAuthorized( dbname, 1 ); }
         bool isAuthorizedForLock(const string& dbname, int lockType ) { return _isAuthorized( dbname , lockType > 0 ? 2 : 1 ); }
-        
+
         void print();
 
     protected:
-        bool _isAuthorized(const string& dbname, int level) { 
+        bool _isAuthorized(const string& dbname, int level) {
             if( m[dbname].level >= level ) return true;
-			if( noauth ) return true;
-			if( m["admin"].level >= level ) return true;
-			if( m["local"].level >= level ) return true;
+            if( noauth ) return true;
+            if( m["admin"].level >= level ) return true;
+            if( m["local"].level >= level ) return true;
             return _isAuthorizedSpecialChecks( dbname );
         }
 
diff --git a/db/security_commands.cpp b/db/security_commands.cpp
index 7bf2813..67605aa 100644
--- a/db/security_commands.cpp
+++ b/db/security_commands.cpp
@@ -22,7 +22,7 @@
 #include "pch.h"
 #include "security.h"
 #include "../util/md5.hpp"
-#include "json.h" 
+#include "json.h"
 #include "pdfile.h"
 #include "db.h"
 #include "dbhelpers.h"
@@ -32,17 +32,17 @@
 
 namespace mongo {
 
-/* authentication
+    /* authentication
 
-   system.users contains 
-     { user : <username>, pwd : <pwd_digest>, ... }
+       system.users contains
+         { user : <username>, pwd : <pwd_digest>, ... }
 
-   getnonce sends nonce to client
+       getnonce sends nonce to client
 
-   client then sends { authenticate:1, nonce:<nonce_str>, user:<username>, key:<key> }
+       client then sends { authenticate:1, nonce:<nonce_str>, user:<username>, key:<key> }
 
-   where <key> is md5(<nonce_str><username><pwd_digest_str>) as a string
-*/
+       where <key> is md5(<nonce_str><username><pwd_digest_str>) as a string
+    */
 
     boost::thread_specific_ptr<nonce> lastNonce;
 
@@ -83,7 +83,7 @@ namespace mongo {
             return true;
         }
     } cmdLogout;
-    
+
     class CmdAuthenticate : public Command {
     public:
         virtual bool requiresAuth() { return false; }
@@ -93,7 +93,7 @@ namespace mongo {
         virtual bool slaveOk() const {
             return true;
         }
-        virtual LockType locktype() const { return WRITE; } // TODO: make this READ
+        virtual LockType locktype() const { return WRITE; }
         virtual void help(stringstream& ss) const { ss << "internal"; }
         CmdAuthenticate() : Command("authenticate") {}
         bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
@@ -102,16 +102,16 @@ namespace mongo {
             string user = cmdObj.getStringField("user");
             string key = cmdObj.getStringField("key");
             string received_nonce = cmdObj.getStringField("nonce");
-            
-            if( user.empty() || key.empty() || received_nonce.empty() ) { 
-                log() << "field missing/wrong type in received authenticate command " 
-                    << dbname
-                    << endl;               
+
+            if( user.empty() || key.empty() || received_nonce.empty() ) {
+                log() << "field missing/wrong type in received authenticate command "
+                      << dbname
+                      << endl;
                 errmsg = "auth fails";
                 sleepmillis(10);
                 return false;
             }
-            
+
             stringstream digestBuilder;
 
             {
@@ -120,12 +120,13 @@ namespace mongo {
                 if ( ln == 0 ) {
                     reject = true;
                     log(1) << "auth: no lastNonce" << endl;
-                } else {
+                }
+                else {
                     digestBuilder << hex << *ln;
                     reject = digestBuilder.str() != received_nonce;
                     if ( reject ) log(1) << "auth: different lastNonce" << endl;
                 }
-                    
+
                 if ( reject ) {
                     log() << "auth: bad nonce received or getnonce not called. could be a driver bug or a security attack. db:" << cc().database()->name << endl;
                     errmsg = "auth fails";
@@ -134,52 +135,60 @@ namespace mongo {
                 }
             }
 
-            static BSONObj userPattern = fromjson("{\"user\":1}");
-            string systemUsers = dbname + ".system.users";
-            OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1");
-
             BSONObj userObj;
-            {
-                BSONObjBuilder b;
-                b << "user" << user;
-                BSONObj query = b.done();
-                if( !Helpers::findOne(systemUsers.c_str(), query, userObj) ) { 
-                    log() << "auth: couldn't find user " << user << ", " << systemUsers << endl;
-                    errmsg = "auth fails";
-                    return false;
+            string pwd;
+
+            if (user == internalSecurity.user) {
+                pwd = internalSecurity.pwd;
+            }
+            else {
+                static BSONObj userPattern = fromjson("{\"user\":1}");
+                string systemUsers = dbname + ".system.users";
+                OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1");
+                {
+                    BSONObjBuilder b;
+                    b << "user" << user;
+                    BSONObj query = b.done();
+                    if( !Helpers::findOne(systemUsers.c_str(), query, userObj) ) {
+                        log() << "auth: couldn't find user " << user << ", " << systemUsers << endl;
+                        errmsg = "auth fails";
+                        return false;
+                    }
                 }
+
+                pwd = userObj.getStringField("pwd");
             }
-            
+
+
             md5digest d;
             {
-                
-                string pwd = userObj.getStringField("pwd");
                 digestBuilder << user << pwd;
                 string done = digestBuilder.str();
-                
+
                 md5_state_t st;
                 md5_init(&st);
                 md5_append(&st, (const md5_byte_t *) done.c_str(), done.size());
                 md5_finish(&st, d);
             }
-            
+
             string computed = digestToString( d );
-            
-            if ( key != computed ){
+
+            if ( key != computed ) {
                 log() << "auth: key mismatch " << user << ", ns:" << dbname << endl;
                 errmsg = "auth fails";
                 return false;
             }
 
             AuthenticationInfo *ai = cc().getAuthenticationInfo();
-            
+
             if ( userObj[ "readOnly" ].isBoolean() && userObj[ "readOnly" ].boolean() ) {
                 ai->authorizeReadOnly( cc().database()->name.c_str() );
-            } else {
+            }
+            else {
                 ai->authorize( cc().database()->name.c_str() );
             }
             return true;
         }
     } cmdAuthenticate;
-    
+
 } // namespace mongo
diff --git a/db/security_key.cpp b/db/security_key.cpp
new file mode 100644
index 0000000..1ea7021
--- /dev/null
+++ b/db/security_key.cpp
@@ -0,0 +1,105 @@
+// security_key.cpp
+/*
+ *    Copyright (C) 2010 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * This file contains inter-mongo instance security helpers.  Due to the
+ * requirement that it be possible to compile this into mongos and mongod, it
+ * should not depend on much external stuff.
+ */
+
+#include "pch.h"
+#include "security_key.h"
+#include "../client/dbclient.h"
+
+#include <sys/stat.h>
+
+namespace mongo {
+
+    bool noauth = true;
+    AuthInfo internalSecurity;
+
+    bool setUpSecurityKey(const string& filename) {
+        struct stat stats;
+
+        // check obvious file errors
+        if (stat(filename.c_str(), &stats) == -1) {
+            log() << "error getting file " << filename << ": " << strerror(errno) << endl;
+            return false;
+        }
+
+#if !defined(WIN32)
+        // check permissions: must be X00, where X is >= 4
+        if ((stats.st_mode & (S_IRWXG|S_IRWXO)) != 0) {
+            log() << "permissions on " << filename << " are too open" << endl;
+            return false;
+        }
+#endif
+
+        const unsigned long long fileLength = stats.st_size;
+        if (fileLength < 6 || fileLength > 1024) {
+            log() << " key file " << filename << " has length " << stats.st_size
+                  << ", must be between 6 and 1024 chars" << endl;
+            return false;
+        }
+
+        FILE* file = fopen( filename.c_str(), "rb" );
+        if (!file) {
+            log() << "error opening file: " << filename << ": " << strerror(errno) << endl;
+            return false;
+        }
+
+        string str = "";
+
+        // strip key file
+        unsigned long long read = 0;
+        while (read < fileLength) {
+            char buf;
+            int readLength = fread(&buf, 1, 1, file);
+            if (readLength < 1) {
+                log() << "error reading file " << filename << endl;
+                return false;
+            }
+            read++;
+
+            // check for whitespace
+            if ((buf >= '\x09' && buf <= '\x0D') || buf == ' ') {
+                continue;
+            }
+
+            // check valid base64
+            if ((buf < 'A' || buf > 'Z') && (buf < 'a' || buf > 'z') && (buf < '0' || buf > '9') && buf != '+' && buf != '/') {
+                log() << "invalid char in key file " << filename << ": " << buf << endl;
+                return false;
+            }
+
+            str += buf;
+        }
+
+        if (str.size() < 6) {
+            log() << "security key must be at least 6 characters" << endl;
+            return false;
+        }
+
+        log(1) << "security key: " << str << endl;
+
+        // createPWDigest should really not be a member func
+        DBClientConnection conn;
+        internalSecurity.pwd = conn.createPasswordDigest(internalSecurity.user, str);
+
+        return true;
+    }
+} // namespace mongo
diff --git a/db/security_key.h b/db/security_key.h
new file mode 100644
index 0000000..86f1307
--- /dev/null
+++ b/db/security_key.h
@@ -0,0 +1,47 @@
+// security_key.h
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+namespace mongo {
+
+    /**
+     * Internal secret key info.
+     */
+    struct AuthInfo {
+        AuthInfo() {
+            user = "__system";
+        }
+        string user;
+        string pwd;
+    };
+
+    // --noauth cmd line option
+    extern bool noauth;
+    extern AuthInfo internalSecurity;
+
+    /**
+     * This method checks the validity of filename as a security key, hashes its
+     * contents, and stores it in the internalSecurity variable.  Prints an
+     * error message to the logs if there's an error.
+     * @param filename the file containing the key
+     * @return if the key was successfully stored
+     */
+    bool setUpSecurityKey(const string& filename);
+
+} // namespace mongo
diff --git a/db/stats/counters.cpp b/db/stats/counters.cpp
index a2d4cfb..889e8a8 100644
--- a/db/stats/counters.cpp
+++ b/db/stats/counters.cpp
@@ -22,7 +22,7 @@
 
 namespace mongo {
 
-    OpCounters::OpCounters(){
+    OpCounters::OpCounters() {
         int zero = 0;
 
         BSONObjBuilder b;
@@ -42,16 +42,16 @@ namespace mongo {
         _command = (AtomicUInt*)_obj["command"].value();
     }
 
-    void OpCounters::gotOp( int op , bool isCommand ){
-        switch ( op ){
+    void OpCounters::gotOp( int op , bool isCommand ) {
+        switch ( op ) {
         case dbInsert: /*gotInsert();*/ break; // need to handle multi-insert
-        case dbQuery: 
+        case dbQuery:
             if ( isCommand )
                 gotCommand();
-            else 
-                gotQuery(); 
+            else
+                gotQuery();
             break;
-            
+
         case dbUpdate: gotUpdate(); break;
         case dbDelete: gotDelete(); break;
         case dbGetMore: gotGetMore(); break;
@@ -62,24 +62,48 @@ namespace mongo {
         default: log() << "OpCounters::gotOp unknown op: " << op << endl;
         }
     }
-    
-    IndexCounters::IndexCounters(){
+
+    BSONObj& OpCounters::getObj() {
+        const unsigned MAX = 1 << 30;
+        RARELY {
+            bool wrap =
+            _insert->get() > MAX ||
+            _query->get() > MAX ||
+            _update->get() > MAX ||
+            _delete->get() > MAX ||
+            _getmore->get() > MAX ||
+            _command->get() > MAX;
+
+            if ( wrap ) {
+                _insert->zero();
+                _query->zero();
+                _update->zero();
+                _delete->zero();
+                _getmore->zero();
+                _command->zero();
+            }
+
+        }
+        return _obj;
+    }
+
+    IndexCounters::IndexCounters() {
         _memSupported = _pi.blockCheckSupported();
-        
+
         _btreeMemHits = 0;
         _btreeMemMisses = 0;
         _btreeAccesses = 0;
-        
-        
+
+
         _maxAllowed = ( numeric_limits< long long >::max() ) / 2;
         _resets = 0;
 
         _sampling = 0;
         _samplingrate = 100;
     }
-    
-    void IndexCounters::append( BSONObjBuilder& b ){
-        if ( ! _memSupported ){
+
+    void IndexCounters::append( BSONObjBuilder& b ) {
+        if ( ! _memSupported ) {
             b.append( "note" , "not supported on this platform" );
             return;
         }
@@ -90,33 +114,33 @@ namespace mongo {
         bb.appendNumber( "misses" , _btreeMemMisses );
 
         bb.append( "resets" , _resets );
-        
+
         bb.append( "missRatio" , (_btreeAccesses ? (_btreeMemMisses / (double)_btreeAccesses) : 0) );
-        
+
         bb.done();
-        
-        if ( _btreeAccesses > _maxAllowed ){
+
+        if ( _btreeAccesses > _maxAllowed ) {
             _btreeAccesses = 0;
             _btreeMemMisses = 0;
             _btreeMemHits = 0;
             _resets++;
         }
     }
-    
+
     FlushCounters::FlushCounters()
         : _total_time(0)
         , _flushes(0)
         , _last()
     {}
 
-    void FlushCounters::flushed(int ms){
+    void FlushCounters::flushed(int ms) {
         _flushes++;
         _total_time += ms;
         _last_time = ms;
         _last = jsTime();
     }
 
-    void FlushCounters::append( BSONObjBuilder& b ){
+    void FlushCounters::append( BSONObjBuilder& b ) {
         b.appendNumber( "flushes" , _flushes );
         b.appendNumber( "total_ms" , _total_time );
         b.appendNumber( "average_ms" , (_flushes ? (_total_time / double(_flushes)) : 0.0) );
@@ -125,25 +149,59 @@ namespace mongo {
     }
 
 
-    void GenericCounter::hit( const string& name , int count ){
+    void GenericCounter::hit( const string& name , int count ) {
         scoped_lock lk( _mutex );
         _counts[name]++;
     }
-    
+
     BSONObj GenericCounter::getObj() {
         BSONObjBuilder b(128);
         {
             mongo::mutex::scoped_lock lk( _mutex );
-            for ( map<string,long long>::iterator i=_counts.begin(); i!=_counts.end(); i++ ){
+            for ( map<string,long long>::iterator i=_counts.begin(); i!=_counts.end(); i++ ) {
                 b.appendNumber( i->first , i->second );
             }
         }
         return b.obj();
     }
 
-    
+
+    void NetworkCounter::hit( long long bytesIn , long long bytesOut ) {
+        const long long MAX = 1ULL << 60;
+
+        // don't care about the race as its just a counter
+        bool overflow = _bytesIn > MAX || _bytesOut > MAX;
+
+        if ( overflow ) {
+            _lock.lock();
+            _overflows++;
+            _bytesIn = bytesIn;
+            _bytesOut = bytesOut;
+            _requests = 1;
+            _lock.unlock();
+        }
+        else {
+            _lock.lock();
+            _bytesIn += bytesIn;
+            _bytesOut += bytesOut;
+            _requests++;
+            _lock.unlock();
+        }
+    }
+
+    void NetworkCounter::append( BSONObjBuilder& b ) {
+        _lock.lock();
+        b.appendNumber( "bytesIn" , _bytesIn );
+        b.appendNumber( "bytesOut" , _bytesOut );
+        b.appendNumber( "numRequests" , _requests );
+        _lock.unlock();
+    }
+
 
     OpCounters globalOpCounters;
+    OpCounters replOpCounters;
     IndexCounters globalIndexCounters;
     FlushCounters globalFlushCounters;
+    NetworkCounter networkCounter;
+
 }
diff --git a/db/stats/counters.h b/db/stats/counters.h
index 2704464..b5cad85 100644
--- a/db/stats/counters.h
+++ b/db/stats/counters.h
@@ -21,6 +21,7 @@
 #include "../jsobj.h"
 #include "../../util/message.h"
 #include "../../util/processinfo.h"
+#include "../../util/concurrency/spin_lock.h"
 
 namespace mongo {
 
@@ -30,28 +31,33 @@ namespace mongo {
      */
     class OpCounters {
     public:
-        
+
         OpCounters();
 
-        AtomicUInt * getInsert(){ return _insert; }
-        AtomicUInt * getQuery(){ return _query; }
-        AtomicUInt * getUpdate(){ return _update; }
-        AtomicUInt * getDelete(){ return _delete; }
-        AtomicUInt * getGetMore(){ return _getmore; }
-        AtomicUInt * getCommand(){ return _command; }
-        
-        void gotInsert(){ _insert[0]++; }
-        void gotQuery(){ _query[0]++; }
-        void gotUpdate(){ _update[0]++; }
-        void gotDelete(){ _delete[0]++; }
-        void gotGetMore(){ _getmore[0]++; }
-        void gotCommand(){ _command[0]++; }
+        AtomicUInt * getInsert() { return _insert; }
+        AtomicUInt * getQuery() { return _query; }
+        AtomicUInt * getUpdate() { return _update; }
+        AtomicUInt * getDelete() { return _delete; }
+        AtomicUInt * getGetMore() { return _getmore; }
+        AtomicUInt * getCommand() { return _command; }
+
+        void incInsertInWriteLock(int n) { _insert->x += n; }
+        void gotInsert() { _insert[0]++; }
+        void gotQuery() { _query[0]++; }
+        void gotUpdate() { _update[0]++; }
+        void gotDelete() { _delete[0]++; }
+        void gotGetMore() { _getmore[0]++; }
+        void gotCommand() { _command[0]++; }
 
         void gotOp( int op , bool isCommand );
 
-        BSONObj& getObj(){ return _obj; }
+        BSONObj& getObj();
+
     private:
         BSONObj _obj;
+
+        // todo: there will be a lot of cache line contention on these.  need to do something 
+        //       else eventually.
         AtomicUInt * _insert;
         AtomicUInt * _query;
         AtomicUInt * _update;
@@ -59,14 +65,16 @@ namespace mongo {
         AtomicUInt * _getmore;
         AtomicUInt * _command;
     };
-    
+
     extern OpCounters globalOpCounters;
+    extern OpCounters replOpCounters;
+
 
     class IndexCounters {
     public:
         IndexCounters();
-        
-        void btree( char * node ){
+
+        void btree( char * node ) {
             if ( ! _memSupported )
                 return;
             if ( _sampling++ % _samplingrate )
@@ -74,28 +82,28 @@ namespace mongo {
             btree( _pi.blockInMemory( node ) );
         }
 
-        void btree( bool memHit ){
+        void btree( bool memHit ) {
             if ( memHit )
                 _btreeMemHits++;
             else
                 _btreeMemMisses++;
             _btreeAccesses++;
         }
-        void btreeHit(){ _btreeMemHits++; _btreeAccesses++; }
-        void btreeMiss(){ _btreeMemMisses++; _btreeAccesses++; }
-        
+        void btreeHit() { _btreeMemHits++; _btreeAccesses++; }
+        void btreeMiss() { _btreeMemMisses++; _btreeAccesses++; }
+
         void append( BSONObjBuilder& b );
-        
+
     private:
         ProcessInfo _pi;
         bool _memSupported;
 
         int _sampling;
         int _samplingrate;
-        
+
         int _resets;
         long long _maxAllowed;
-        
+
         long long _btreeMemMisses;
         long long _btreeMemHits;
         long long _btreeAccesses;
@@ -108,7 +116,7 @@ namespace mongo {
         FlushCounters();
 
         void flushed(int ms);
-        
+
         void append( BSONObjBuilder& b );
 
     private:
@@ -130,4 +138,21 @@ namespace mongo {
         map<string,long long> _counts; // TODO: replace with thread safe map
         mongo::mutex _mutex;
     };
+
+    class NetworkCounter {
+    public:
+        NetworkCounter() : _bytesIn(0), _bytesOut(0), _requests(0), _overflows(0) {}
+        void hit( long long bytesIn , long long bytesOut );
+        void append( BSONObjBuilder& b );
+    private:
+        long long _bytesIn;
+        long long _bytesOut;
+        long long _requests;
+
+        long long _overflows;
+
+        SpinLock _lock;
+    };
+
+    extern NetworkCounter networkCounter;
 }
diff --git a/db/stats/fine_clock.h b/db/stats/fine_clock.h
index 1f23175..02600e7 100644
--- a/db/stats/fine_clock.h
+++ b/db/stats/fine_clock.h
@@ -36,29 +36,30 @@ namespace mongo {
      * Really, you shouldn't be using this class in hot code paths for
      * platforms you're not sure whether the overhead is low.
      */
-    class FineClock{
+    class FineClock {
     public:
 
         typedef timespec WallTime;
 
-        static WallTime now(){
+        static WallTime now() {
             struct timespec ts;
             clock_gettime(CLOCK_MONOTONIC, &ts);
             return ts;
         }
 
-        static uint64_t diffInNanos( WallTime end, WallTime start ){
+        static uint64_t diffInNanos( WallTime end, WallTime start ) {
             uint64_t diff;
-            if ( end.tv_nsec < start.tv_nsec ){
+            if ( end.tv_nsec < start.tv_nsec ) {
                 diff = 1000000000 * ( end.tv_sec - start.tv_sec - 1);
                 diff += 1000000000 + end.tv_nsec - start.tv_nsec;
-            } else {
+            }
+            else {
                 diff = 1000000000 * ( end.tv_sec - start.tv_sec );
                 diff += end.tv_nsec - start.tv_nsec;
             }
             return diff;
         }
-  
+
     };
 }
 
diff --git a/db/stats/service_stats.cpp b/db/stats/service_stats.cpp
index 5574ecb..d69147f 100644
--- a/db/stats/service_stats.cpp
+++ b/db/stats/service_stats.cpp
@@ -25,7 +25,7 @@ namespace mongo {
 
     using std::ostringstream;
 
-    ServiceStats::ServiceStats(){
+    ServiceStats::ServiceStats() {
         // Time histogram covers up to 128msec in exponential intervals
         // starting at 125usec.
         Histogram::Options timeOpts;
@@ -43,12 +43,12 @@ namespace mongo {
         _spaceHistogram = new Histogram( spaceOpts );
     }
 
-    ServiceStats::~ServiceStats(){
+    ServiceStats::~ServiceStats() {
         delete _timeHistogram;
         delete _spaceHistogram;
     }
 
-    void ServiceStats::logResponse( uint64_t duration, uint64_t bytes ){
+    void ServiceStats::logResponse( uint64_t duration, uint64_t bytes ) {
         _spinLock.lock();
         _timeHistogram->insert( duration / 1000 /* in usecs */ );
         _spaceHistogram->insert( bytes );
diff --git a/db/stats/snapshots.cpp b/db/stats/snapshots.cpp
index 3ce80ca..a81568d 100644
--- a/db/stats/snapshots.cpp
+++ b/db/stats/snapshots.cpp
@@ -27,28 +27,27 @@
    handles snapshotting performance metrics and other such things
  */
 namespace mongo {
-    void SnapshotData::takeSnapshot(){
-         _created = curTimeMicros64();
-         _globalUsage = Top::global.getGlobalData();
+    void SnapshotData::takeSnapshot() {
+        _created = curTimeMicros64();
+        _globalUsage = Top::global.getGlobalData();
         _totalWriteLockedTime = dbMutex.info().getTimeLocked();
         Top::global.cloneMap(_usage);
     }
 
     SnapshotDelta::SnapshotDelta( const SnapshotData& older , const SnapshotData& newer )
-        : _older( older ) , _newer( newer )
-    {
+        : _older( older ) , _newer( newer ) {
         assert( _newer._created > _older._created );
         _elapsed = _newer._created - _older._created;
-        
+
     }
-    
-    Top::CollectionData SnapshotDelta::globalUsageDiff(){
+
+    Top::CollectionData SnapshotDelta::globalUsageDiff() {
         return Top::CollectionData( _older._globalUsage , _newer._globalUsage );
     }
-    Top::UsageMap SnapshotDelta::collectionUsageDiff(){
+    Top::UsageMap SnapshotDelta::collectionUsageDiff() {
         Top::UsageMap u;
-        
-        for ( Top::UsageMap::const_iterator i=_newer._usage.begin(); i != _newer._usage.end(); i++ ){
+
+        for ( Top::UsageMap::const_iterator i=_newer._usage.begin(); i != _newer._usage.end(); i++ ) {
             Top::UsageMap::const_iterator j = _older._usage.find(i->first);
             if (j != _older._usage.end())
                 u[i->first] = Top::CollectionData( j->second , i->second );
@@ -62,8 +61,8 @@ namespace mongo {
         , _loc(0)
         , _stored(0)
     {}
-    
-    const SnapshotData* Snapshots::takeSnapshot(){
+
+    const SnapshotData* Snapshots::takeSnapshot() {
         scoped_lock lk(_lock);
         _loc = ( _loc + 1 ) % _n;
         _snapshots[_loc].takeSnapshot();
@@ -72,7 +71,7 @@ namespace mongo {
         return &_snapshots[_loc];
     }
 
-    auto_ptr<SnapshotDelta> Snapshots::computeDelta( int numBack ){
+    auto_ptr<SnapshotDelta> Snapshots::computeDelta( int numBack ) {
         scoped_lock lk(_lock);
         auto_ptr<SnapshotDelta> p;
         if ( numBack < numDeltas() )
@@ -80,43 +79,43 @@ namespace mongo {
         return p;
     }
 
-    const SnapshotData& Snapshots::getPrev( int numBack ){
+    const SnapshotData& Snapshots::getPrev( int numBack ) {
         int x = _loc - numBack;
         if ( x < 0 )
             x += _n;
         return _snapshots[x];
     }
 
-    void Snapshots::outputLockInfoHTML( stringstream& ss ){
+    void Snapshots::outputLockInfoHTML( stringstream& ss ) {
         scoped_lock lk(_lock);
         ss << "\n<div>";
-        for ( int i=0; i<numDeltas(); i++ ){
+        for ( int i=0; i<numDeltas(); i++ ) {
             SnapshotDelta d( getPrev(i+1) , getPrev(i) );
             unsigned e = (unsigned) d.elapsed() / 1000;
             ss << (unsigned)(100*d.percentWriteLocked());
-            if( e < 3900 || e > 4100 ) 
+            if( e < 3900 || e > 4100 )
                 ss << '(' << e / 1000.0 << "s)";
             ss << ' ';
         }
         ss << "</div>\n";
     }
 
-    void SnapshotThread::run(){
+    void SnapshotThread::run() {
         Client::initThread("snapshotthread");
         Client& client = cc();
 
         long long numLoops = 0;
-        
+
         const SnapshotData* prev = 0;
 
-        while ( ! inShutdown() ){
+        while ( ! inShutdown() ) {
             try {
                 const SnapshotData* s = statsSnapshots.takeSnapshot();
-                
-                if ( prev ){
+
+                if ( prev ) {
                     unsigned long long elapsed = s->_created - prev->_created;
 
-                    if ( cmdLine.cpu ){
+                    if ( cmdLine.cpu ) {
                         SnapshotDelta d( *prev , *s );
                         log() << "cpu: elapsed:" << (elapsed/1000) <<"  writelock: " << (int)(100*d.percentWriteLocked()) << "%" << endl;
                     }
@@ -125,14 +124,14 @@ namespace mongo {
 
                 prev = s;
             }
-            catch ( std::exception& e ){
+            catch ( std::exception& e ) {
                 log() << "ERROR in SnapshotThread: " << e.what() << endl;
             }
-            
+
             numLoops++;
             sleepsecs(4);
         }
-        
+
         client.shutdown();
     }
 
@@ -140,15 +139,15 @@ namespace mongo {
 
     class WriteLockStatus : public WebStatusPlugin {
     public:
-        WriteLockStatus() : WebStatusPlugin( "write lock" , 51 , "% time in write lock, by 4 sec periods" ){}
-        virtual void init(){}
+        WriteLockStatus() : WebStatusPlugin( "write lock" , 51 , "% time in write lock, by 4 sec periods" ) {}
+        virtual void init() {}
 
-        virtual void run( stringstream& ss ){
+        virtual void run( stringstream& ss ) {
             statsSnapshots.outputLockInfoHTML( ss );
 
             ss << "<a "
-                  "href=\"http://www.mongodb.org/pages/viewpage.action?pageId=7209296\" " 
-                  "title=\"snapshot: was the db in the write lock when this page was generated?\">";
+               "href=\"http://www.mongodb.org/pages/viewpage.action?pageId=7209296\" "
+               "title=\"snapshot: was the db in the write lock when this page was generated?\">";
             ss << "write locked now:</a> " << (dbMutex.info().isLocked() ? "true" : "false") << "\n";
         }
 
@@ -156,22 +155,26 @@ namespace mongo {
 
     class DBTopStatus : public WebStatusPlugin {
     public:
-        DBTopStatus() : WebStatusPlugin( "dbtop" , 50 , "(occurences|percent of elapsed)" ){}
+        DBTopStatus() : WebStatusPlugin( "dbtop" , 50 , "(occurences|percent of elapsed)" ) {}
 
-        void display( stringstream& ss , double elapsed , const Top::UsageData& usage ){
+        void display( stringstream& ss , double elapsed , const Top::UsageData& usage ) {
             ss << "<td>";
             ss << usage.count;
             ss << "</td><td>";
             double per = 100 * ((double)usage.time)/elapsed;
-            ss << setprecision(1) << fixed << per << "%";
+            if( per == (int) per )
+                ss << (int) per;
+            else
+                ss << setprecision(1) << fixed << per;
+            ss << '%';
             ss << "</td>";
         }
 
-        void display( stringstream& ss , double elapsed , const string& ns , const Top::CollectionData& data ){
-            if ( ns != "GLOBAL" && data.total.count == 0 )
+        void display( stringstream& ss , double elapsed , const string& ns , const Top::CollectionData& data ) {
+            if ( ns != "TOTAL" && data.total.count == 0 )
                 return;
             ss << "<tr><th>" << ns << "</th>";
-            
+
             display( ss , elapsed , data.total );
 
             display( ss , elapsed , data.readLock );
@@ -182,43 +185,43 @@ namespace mongo {
             display( ss , elapsed , data.insert );
             display( ss , elapsed , data.update );
             display( ss , elapsed , data.remove );
-            
+
             ss << "</tr>\n";
         }
 
-        void run( stringstream& ss ){
+        void run( stringstream& ss ) {
             auto_ptr<SnapshotDelta> delta = statsSnapshots.computeDelta();
             if ( ! delta.get() )
                 return;
-            
+
             ss << "<table border=1 cellpadding=2 cellspacing=0>";
             ss << "<tr align='left'><th>";
-            ss << a("http://www.mongodb.org/display/DOCS/Developer+FAQ#DeveloperFAQ-What%27sa%22namespace%22%3F", "namespace") << 
-                "NS</a></th>"
-                "<th colspan=2>total</th>"
-                "<th colspan=2>Reads</th>"
-                "<th colspan=2>Writes</th>"
-                "<th colspan=2>Queries</th>"
-                "<th colspan=2>GetMores</th>"
-                "<th colspan=2>Inserts</th>"
-                "<th colspan=2>Updates</th>"
-                "<th colspan=2>Removes</th>";
+            ss << a("http://www.mongodb.org/display/DOCS/Developer+FAQ#DeveloperFAQ-What%27sa%22namespace%22%3F", "namespace") <<
+               "NS</a></th>"
+               "<th colspan=2>total</th>"
+               "<th colspan=2>Reads</th>"
+               "<th colspan=2>Writes</th>"
+               "<th colspan=2>Queries</th>"
+               "<th colspan=2>GetMores</th>"
+               "<th colspan=2>Inserts</th>"
+               "<th colspan=2>Updates</th>"
+               "<th colspan=2>Removes</th>";
             ss << "</tr>\n";
-            
-            display( ss , (double) delta->elapsed() , "GLOBAL" , delta->globalUsageDiff() );
-            
+
+            display( ss , (double) delta->elapsed() , "TOTAL" , delta->globalUsageDiff() );
+
             Top::UsageMap usage = delta->collectionUsageDiff();
-            for ( Top::UsageMap::iterator i=usage.begin(); i != usage.end(); i++ ){
+            for ( Top::UsageMap::iterator i=usage.begin(); i != usage.end(); i++ ) {
                 display( ss , (double) delta->elapsed() , i->first , i->second );
             }
-            
+
             ss << "</table>";
-        
+
         }
 
-        virtual void init(){}
+        virtual void init() {}
     } dbtopStatus;
 
     Snapshots statsSnapshots;
-    SnapshotThread snapshotThread;    
+    SnapshotThread snapshotThread;
 }
diff --git a/db/stats/snapshots.h b/db/stats/snapshots.h
index 6d8e23d..d9b8e5e 100644
--- a/db/stats/snapshots.h
+++ b/db/stats/snapshots.h
@@ -28,7 +28,7 @@
 namespace mongo {
 
     class SnapshotThread;
-    
+
     /**
      * stores a point in time snapshot
      * i.e. all counters at a given time
@@ -45,14 +45,14 @@ namespace mongo {
         friend class SnapshotDelta;
         friend class Snapshots;
     };
-    
+
     /**
      * contains performance information for a time period
      */
     class SnapshotDelta {
     public:
         SnapshotDelta( const SnapshotData& older , const SnapshotData& newer );
-        
+
         unsigned long long start() const {
             return _older._created;
         }
@@ -60,7 +60,7 @@ namespace mongo {
         unsigned long long elapsed() const {
             return _elapsed;
         }
-        
+
         unsigned long long timeInWriteLock() const {
             return _newer._totalWriteLockedTime - _older._totalWriteLockedTime;
         }
@@ -83,15 +83,15 @@ namespace mongo {
     class Snapshots {
     public:
         Snapshots(int n=100);
-        
+
         const SnapshotData* takeSnapshot();
-        
+
         int numDeltas() const { return _stored-1; }
 
         const SnapshotData& getPrev( int numBack = 0 );
         auto_ptr<SnapshotDelta> computeDelta( int numBack = 0 );
-        
-        
+
+
         void outputLockInfoHTML( stringstream& ss );
     private:
         mongo::mutex _lock;
@@ -103,10 +103,10 @@ namespace mongo {
 
     class SnapshotThread : public BackgroundJob {
     public:
-        string name() { return "snapshot"; }
+        virtual string name() const { return "snapshot"; }
         void run();
     };
-    
+
     extern Snapshots statsSnapshots;
     extern SnapshotThread snapshotThread;
 
diff --git a/db/stats/top.cpp b/db/stats/top.cpp
index 3e65261..77aef0d 100644
--- a/db/stats/top.cpp
+++ b/db/stats/top.cpp
@@ -22,16 +22,16 @@
 #include "../commands.h"
 
 namespace mongo {
-    
-    Top::UsageData::UsageData( const UsageData& older , const UsageData& newer )
-        : time(newer.time-older.time) , 
-          count(newer.count-older.count) 
-    {
-        
+
+    Top::UsageData::UsageData( const UsageData& older , const UsageData& newer ) {
+        // this won't be 100% accurate on rollovers and drop(), but at least it won't be negative
+        time  = (newer.time  > older.time)  ? (newer.time  - older.time)  : newer.time;
+        count = (newer.count > older.count) ? (newer.count - older.count) : newer.count;
+
     }
 
     Top::CollectionData::CollectionData( const CollectionData& older , const CollectionData& newer )
-        : total( older.total , newer.total ) , 
+        : total( older.total , newer.total ) ,
           readLock( older.readLock , newer.readLock ) ,
           writeLock( older.writeLock , newer.writeLock ) ,
           queries( older.queries , newer.queries ) ,
@@ -39,17 +39,18 @@ namespace mongo {
           insert( older.insert , newer.insert ) ,
           update( older.update , newer.update ) ,
           remove( older.remove , newer.remove ),
-          commands( older.commands , newer.commands ) 
-    {
-        
+          commands( older.commands , newer.commands ) {
+
     }
 
-    
-    void Top::record( const string& ns , int op , int lockType , long long micros , bool command ){
+    void Top::record( const string& ns , int op , int lockType , long long micros , bool command ) {
+        if ( ns[0] == '?' )
+            return;
+
         //cout << "record: " << ns << "\t" << op << "\t" << command << endl;
         scoped_lock lk(_lock);
-        
-        if ( ( command || op == dbQuery ) && ns == _lastDropped ){
+
+        if ( ( command || op == dbQuery ) && ns == _lastDropped ) {
             _lastDropped = "";
             return;
         }
@@ -59,22 +60,15 @@ namespace mongo {
         _record( _global , op , lockType , micros , command );
     }
 
-    void Top::collectionDropped( const string& ns ){
-        //cout << "collectionDropped: " << ns << endl;
-        scoped_lock lk(_lock);
-        _usage.erase(ns);
-        _lastDropped = ns;
-    }
-    
-    void Top::_record( CollectionData& c , int op , int lockType , long long micros , bool command ){
+    void Top::_record( CollectionData& c , int op , int lockType , long long micros , bool command ) {
         c.total.inc( micros );
-        
+
         if ( lockType > 0 )
             c.writeLock.inc( micros );
         else if ( lockType < 0 )
             c.readLock.inc( micros );
-        
-        switch ( op ){
+
+        switch ( op ) {
         case 0:
             // use 0 for unknown, non-specific
             break;
@@ -98,7 +92,7 @@ namespace mongo {
             break;
         case dbKillCursors:
             break;
-        case opReply: 
+        case opReply:
         case dbMsg:
             log() << "unexpected op in Top::record: " << op << endl;
             break;
@@ -108,55 +102,62 @@ namespace mongo {
 
     }
 
-    void Top::cloneMap(Top::UsageMap& out){
+    void Top::collectionDropped( const string& ns ) {
+        //cout << "collectionDropped: " << ns << endl;
+        scoped_lock lk(_lock);
+        _usage.erase(ns);
+        _lastDropped = ns;
+    }
+
+    void Top::cloneMap(Top::UsageMap& out) const {
         scoped_lock lk(_lock);
         out = _usage;
     }
 
-    void Top::append( BSONObjBuilder& b ){
+    void Top::append( BSONObjBuilder& b ) {
         scoped_lock lk( _lock );
-        append( b , _usage );
+        _appendToUsageMap( b , _usage );
     }
 
-    void Top::append( BSONObjBuilder& b , const char * name , const UsageData& map ){
-        BSONObjBuilder bb( b.subobjStart( name ) );
-        bb.appendNumber( "time" , map.time );
-        bb.appendNumber( "count" , map.count );
-        bb.done();
-    }
+    void Top::_appendToUsageMap( BSONObjBuilder& b , const UsageMap& map ) const {
+        for ( UsageMap::const_iterator i=map.begin(); i!=map.end(); i++ ) {
+            BSONObjBuilder bb( b.subobjStart( i->first ) );
 
-    void Top::append( BSONObjBuilder& b , const UsageMap& map ){
-        for ( UsageMap::const_iterator i=map.begin(); i!=map.end(); i++ ){
-            BSONObjBuilder bb( b.subobjStart( i->first.c_str() ) );
-            
             const CollectionData& coll = i->second;
-            
-            append( b , "total" , coll.total );
-            
-            append( b , "readLock" , coll.readLock );
-            append( b , "writeLock" , coll.writeLock );
-
-            append( b , "queries" , coll.queries );
-            append( b , "getmore" , coll.getmore );
-            append( b , "insert" , coll.insert );
-            append( b , "update" , coll.update );
-            append( b , "remove" , coll.remove );
-            append( b , "commands" , coll.commands );
-            
+
+            _appendStatsEntry( b , "total" , coll.total );
+
+            _appendStatsEntry( b , "readLock" , coll.readLock );
+            _appendStatsEntry( b , "writeLock" , coll.writeLock );
+
+            _appendStatsEntry( b , "queries" , coll.queries );
+            _appendStatsEntry( b , "getmore" , coll.getmore );
+            _appendStatsEntry( b , "insert" , coll.insert );
+            _appendStatsEntry( b , "update" , coll.update );
+            _appendStatsEntry( b , "remove" , coll.remove );
+            _appendStatsEntry( b , "commands" , coll.commands );
+
             bb.done();
         }
     }
 
+    void Top::_appendStatsEntry( BSONObjBuilder& b , const char * statsName , const UsageData& map ) const {
+        BSONObjBuilder bb( b.subobjStart( statsName ) );
+        bb.appendNumber( "time" , map.time );
+        bb.appendNumber( "count" , map.count );
+        bb.done();
+    }
+
     class TopCmd : public Command {
     public:
-        TopCmd() : Command( "top", true ){}
+        TopCmd() : Command( "top", true ) {}
 
         virtual bool slaveOk() const { return true; }
         virtual bool adminOnly() const { return true; }
-        virtual LockType locktype() const { return READ; } 
+        virtual LockType locktype() const { return READ; }
         virtual void help( stringstream& help ) const { help << "usage by collection"; }
 
-        virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl){
+        virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
             {
                 BSONObjBuilder b( result.subobjStart( "totals" ) );
                 Top::global.append( b );
@@ -164,11 +165,11 @@ namespace mongo {
             }
             return true;
         }
-        
+
     } topCmd;
 
     Top Top::global;
-    
+
     TopOld::T TopOld::_snapshotStart = TopOld::currentTime();
     TopOld::D TopOld::_snapshotDuration;
     TopOld::UsageMap TopOld::_totalUsage;
diff --git a/db/stats/top.h b/db/stats/top.h
index 135e8f8..9645ed1 100644
--- a/db/stats/top.h
+++ b/db/stats/top.h
@@ -31,29 +31,27 @@ namespace mongo {
     public:
         Top() : _lock("Top") { }
 
-        class UsageData {
-        public:
-            UsageData() : time(0) , count(0){}
+        struct UsageData {
+            UsageData() : time(0) , count(0) {}
             UsageData( const UsageData& older , const UsageData& newer );
             long long time;
             long long count;
 
-            void inc( long long micros ){
+            void inc( long long micros ) {
                 count++;
                 time += micros;
             }
         };
 
-        class CollectionData {
-        public:
+        struct CollectionData {
             /**
              * constructs a diff
              */
-            CollectionData(){}
+            CollectionData() {}
             CollectionData( const CollectionData& older , const CollectionData& newer );
-            
+
             UsageData total;
-            
+
             UsageData readLock;
             UsageData writeLock;
 
@@ -66,25 +64,23 @@ namespace mongo {
         };
 
         typedef map<string,CollectionData> UsageMap;
-        
+
     public:
         void record( const string& ns , int op , int lockType , long long micros , bool command );
         void append( BSONObjBuilder& b );
-        void cloneMap(UsageMap& out);
-        CollectionData getGlobalData(){ return _global; }
+        void cloneMap(UsageMap& out) const;
+        CollectionData getGlobalData() const { return _global; }
         void collectionDropped( const string& ns );
 
     public: // static stuff
         static Top global;
-        
-        void append( BSONObjBuilder& b , const char * name , const UsageData& map );
-        void append( BSONObjBuilder& b , const UsageMap& map );
-        
+
     private:
-        
+        void _appendToUsageMap( BSONObjBuilder& b , const UsageMap& map ) const;
+        void _appendStatsEntry( BSONObjBuilder& b , const char * statsName , const UsageData& map ) const;
         void _record( CollectionData& c , int op , int lockType , long long micros , bool command );
 
-        mongo::mutex _lock;
+        mutable mongo::mutex _lock;
         CollectionData _global;
         UsageMap _usage;
         string _lastDropped;
@@ -99,9 +95,9 @@ namespace mongo {
         typedef boost::tuple< D, int, int, int > UsageData;
     public:
         TopOld() : _read(false), _write(false) { }
-        
+
         /* these are used to record activity: */
-        
+
         void clientStart( const char *client ) {
             clientStop();
             _currentStart = currentTime();
@@ -130,11 +126,11 @@ namespace mongo {
 
         /* these are used to fetch the stats: */
 
-        struct Usage { 
-            string ns; 
-            D time; 
-            double pct; 
-            int reads, writes, calls; 
+        struct Usage {
+            string ns;
+            D time;
+            double pct;
+            int reads, writes, calls;
         };
 
         static void usage( vector< Usage > &res ) {
@@ -145,7 +141,7 @@ namespace mongo {
             UsageMap totalUsage;
             fillParentNamespaces( snapshot, _snapshot );
             fillParentNamespaces( totalUsage, _totalUsage );
-        
+
             multimap< D, string, more > sorted;
             for( UsageMap::iterator i = snapshot.begin(); i != snapshot.end(); ++i )
                 sorted.insert( make_pair( i->second.get<0>(), i->first ) );
@@ -181,7 +177,8 @@ namespace mongo {
             if ( &_snapshot == &_snapshotA ) {
                 _snapshot = _snapshotB;
                 _nextSnapshot = _snapshotA;
-            } else {
+            }
+            else {
                 _snapshot = _snapshotA;
                 _nextSnapshot = _snapshotB;
             }
@@ -211,7 +208,7 @@ namespace mongo {
                 g.get< 1 >()++;
             else if ( !_read && _write )
                 g.get< 2 >()++;
-            g.get< 3 >()++;        
+            g.get< 3 >()++;
         }
         static void fillParentNamespaces( UsageMap &to, const UsageMap &from ) {
             for( UsageMap::const_iterator i = from.begin(); i != from.end(); ++i ) {
@@ -224,8 +221,8 @@ namespace mongo {
                     current = current.substr( 0, dot );
                     inc( to[ current ], i->second );
                     dot = current.rfind( "." );
-                }            
-            }        
+                }
+            }
         }
         static void inc( UsageData &to, const UsageData &from ) {
             to.get<0>() += from.get<0>();
diff --git a/db/storage.cpp b/db/storage.cpp
deleted file mode 100644
index 63e7639..0000000
--- a/db/storage.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-// storage.cpp
-/*
- *    Copyright (C) 2010 10gen Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include "pch.h"
-#include "pdfile.h"
-//#include "reccache.h"
-#include "rec.h"
-#include "db.h"
-
-namespace mongo {
-
-// pick your store for indexes by setting this typedef
-// this doesn't need to be an ifdef, we can make it dynamic
-#if defined(_RECSTORE)
-RecStoreInterface *btreeStore = new CachedBasicRecStore();
-#else
-MongoMemMapped_RecStore *btreeStore = new MongoMemMapped_RecStore();
-#endif
-
-#if 0
-
-#if defined(_RECSTORE)
-    static int inited;
-#endif
-
-void writerThread();
-
-void BasicRecStore::init(const char *fn, unsigned recsize)
-{ 
-    massert( 10394 ,  "compile packing problem recstore?", sizeof(RecStoreHeader) == 8192);
-    filename = fn;
-    f.open(fn);
-    uassert( 10130 ,  string("couldn't open file:")+fn, f.is_open() );
-    len = f.len();
-    if( len == 0 ) { 
-        log() << "creating recstore file " << fn << '\n';
-        h.recsize = recsize;
-        len = sizeof(RecStoreHeader);
-        f.write(0, (const char *) &h, sizeof(RecStoreHeader));
-    }    
-    else { 
-        f.read(0, (char *) &h, sizeof(RecStoreHeader));
-        massert( 10395 , string("recstore was not closed cleanly: ")+fn, h.cleanShutdown==0);
-        massert( 10396 , string("recstore recsize mismatch, file:")+fn, h.recsize == recsize);
-        massert( 10397 , string("bad recstore [1], file:")+fn, (h.leof-sizeof(RecStoreHeader)) % recsize == 0);        
-        if( h.leof > len ) { 
-            stringstream ss;
-            ss << "bad recstore, file:" << fn << " leof:" << h.leof << " len:" << len;
-            massert( 10398 , ss.str(), false);
-        }
-        if( h.cleanShutdown )
-            log() << "warning: non-clean shutdown for file " << fn << '\n';
-        h.cleanShutdown = 2;
-        writeHeader();
-        f.fsync();
-    }
-#if defined(_RECSTORE)
-    if( inited++ == 0 ) {
-        boost::thread t(writerThread);
-    }
-#endif
-}
-
-#endif
-
-}
diff --git a/db/taskqueue.h b/db/taskqueue.h
new file mode 100644
index 0000000..c6a5667
--- /dev/null
+++ b/db/taskqueue.h
@@ -0,0 +1,106 @@
+// @file deferredinvoker.h
+
+/**
+ *    Copyright (C) 2008 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "mongomutex.h"
+
+namespace mongo {
+
+    /** defer work items by queueing them for invocation by another thread.  presumption is that
+        consumer thread is outside of locks more than the source thread.  Additional presumption
+        is that several objects or micro-tasks will be queued and that having a single thread
+        processing them in batch is hepful as they (in the first use case) use a common data
+        structure that can then be in local cpu classes.
+
+        this class is in db/ as it is dbMutex (mongomutex) specific (so far).
+
+        using a functor instead of go() might be more elegant too, once again, would like to test any
+        performance differential.  also worry that operator() hides things?
+
+        MT - copyable "micro task" object we can queue
+             must have a static method void MT::go(const MT&)
+
+        see DefInvoke in dbtests/ for an example.
+    */
+    template< class MT >
+    class TaskQueue {
+    public:
+        TaskQueue() : _which(0), _invokeMutex("deferredinvoker") { }
+
+        void defer(MT mt) {
+            // only one writer allowed.  however the invoke processing below can occur concurrently with
+            // writes (for the most part)
+            DEV dbMutex.assertWriteLocked();
+
+            _queues[_which].push_back(mt);
+        }
+
+        /** call to process deferrals.
+
+            concurrency: handled herein.  multiple threads could call invoke(), but their efforts will be
+                         serialized.  the common case is that there is a single processor calling invoke().
+
+            normally, you call this outside of any lock.  but if you want to fully drain the queue,
+            call from within a read lock.  for example:
+            {
+              // drain with minimal time in lock
+              d.invoke();
+              readlock lk;
+              d.invoke();
+              ...
+            }
+            you can also call invoke periodically to do some work and then pick up later on more.
+        */
+        void invoke() {
+            mutex::scoped_lock lk2(_invokeMutex);
+            int toDrain = 0;
+            {
+                // flip queueing to the other queue (we are double buffered)
+                readlocktry lk("", 5);
+                if( !lk.got() )
+                    return;
+                toDrain = _which;
+                _which = _which ^ 1;
+                wassert( _queues[_which].empty() ); // we are in dbMutex, so it should be/stay empty til we exit dbMutex
+            }
+
+            _drain( _queues[toDrain] );
+            assert( _queues[toDrain].empty() );
+        }
+
+    private:
+        int _which; // 0 or 1
+        typedef vector< MT > Queue;
+        Queue _queues[2];
+
+        // lock order when multiple locks: dbMutex, _invokeMutex
+        mongo::mutex _invokeMutex;
+
+        void _drain(Queue& queue) {
+            unsigned oldCap = queue.capacity();
+            for( typename Queue::iterator i = queue.begin(); i != queue.end(); i++ ) {
+                const MT& v = *i;
+                MT::go(v);
+            }
+            queue.clear();
+            DEV assert( queue.capacity() == oldCap ); // just checking that clear() doesn't deallocate, we don't want that
+        }
+    };
+
+}
diff --git a/db/tests.cpp b/db/tests.cpp
index 1218f1b..00f299e 100644
--- a/db/tests.cpp
+++ b/db/tests.cpp
@@ -32,7 +32,7 @@ namespace mongo {
 
         MemoryMappedFile f;
 
-        long len = 64*1024*1024;
+        unsigned long long len = 64*1024*1024;
         char *p = (char *) f.map("/tmp/test.dat", len);
         char *start = p;
         char *end = p + 64*1024*1024-2;
diff --git a/db/update.cpp b/db/update.cpp
index e178e0f..7de9bb1 100644
--- a/db/update.cpp
+++ b/db/update.cpp
@@ -31,21 +31,25 @@
 namespace mongo {
 
     const char* Mod::modNames[] = { "$inc", "$set", "$push", "$pushAll", "$pull", "$pullAll" , "$pop", "$unset" ,
-                                    "$bitand" , "$bitor" , "$bit" , "$addToSet" };
+                                    "$bitand" , "$bitor" , "$bit" , "$addToSet", "$rename", "$rename"
+                                  };
     unsigned Mod::modNamesNum = sizeof(Mod::modNames)/sizeof(char*);
 
     bool Mod::_pullElementMatch( BSONElement& toMatch ) const {
-        
-        if ( elt.type() != Object ){
+
+        if ( elt.type() != Object ) {
             // if elt isn't an object, then comparison will work
             return toMatch.woCompare( elt , false ) == 0;
         }
 
-        if ( toMatch.type() != Object ){
+        if ( matcherOnPrimitive )
+            return matcher->matches( toMatch.wrap( "" ) );
+
+        if ( toMatch.type() != Object ) {
             // looking for an object, so this can't match
             return false;
         }
-        
+
         // now we have an object on both sides
         return matcher->matches( toMatch.embeddedObject() );
     }
@@ -54,41 +58,53 @@ namespace mongo {
     void Mod::appendIncremented( Builder& bb , const BSONElement& in, ModState& ms ) const {
         BSONType a = in.type();
         BSONType b = elt.type();
-        
-        if ( a == NumberDouble || b == NumberDouble ){
+
+        if ( a == NumberDouble || b == NumberDouble ) {
             ms.incType = NumberDouble;
             ms.incdouble = elt.numberDouble() + in.numberDouble();
         }
-        else if ( a == NumberLong || b == NumberLong ){
+        else if ( a == NumberLong || b == NumberLong ) {
             ms.incType = NumberLong;
             ms.inclong = elt.numberLong() + in.numberLong();
         }
         else {
-            ms.incType = NumberInt;
-            ms.incint = elt.numberInt() + in.numberInt();
+            int x = elt.numberInt() + in.numberInt();
+            if ( x < 0 && elt.numberInt() > 0 && in.numberInt() > 0 ) {
+                // overflow
+                ms.incType = NumberLong;
+                ms.inclong = elt.numberLong() + in.numberLong();
+            }
+            else {
+                ms.incType = NumberInt;
+                ms.incint = elt.numberInt() + in.numberInt();
+            }
         }
-        
+
         ms.appendIncValue( bb , false );
     }
 
     template< class Builder >
     void appendUnset( Builder &b ) {
     }
-    
+
     template<>
     void appendUnset( BSONArrayBuilder &b ) {
         b.appendNull();
     }
-    
+
     template< class Builder >
     void Mod::apply( Builder& b , BSONElement in , ModState& ms ) const {
-        switch ( op ){
-        
+        if ( ms.dontApply ) {
+            return;
+        }
+
+        switch ( op ) {
+
         case INC: {
             appendIncremented( b , in , ms );
             break;
         }
-            
+
         case SET: {
             _checkForAppending( elt );
             b.appendAs( elt , shortFieldName );
@@ -99,13 +115,13 @@ namespace mongo {
             appendUnset( b );
             break;
         }
-            
+
         case PUSH: {
             uassert( 10131 ,  "$push can only be applied to an array" , in.type() == Array );
             BSONObjBuilder bb( b.subarrayStart( shortFieldName ) );
             BSONObjIterator i( in.embeddedObject() );
             int n=0;
-            while ( i.more() ){
+            while ( i.more() ) {
                 bb.append( i.next() );
                 n++;
             }
@@ -116,28 +132,35 @@ namespace mongo {
             bb.done();
             break;
         }
-            
+
         case ADDTOSET: {
             uassert( 12592 ,  "$addToSet can only be applied to an array" , in.type() == Array );
             BSONObjBuilder bb( b.subarrayStart( shortFieldName ) );
-            
+
             BSONObjIterator i( in.embeddedObject() );
-            int n=0;            
+            int n=0;
+
+            if ( isEach() ) {
 
-            if ( isEach() ){
-                
                 BSONElementSet toadd;
                 parseEach( toadd );
-                
-                while ( i.more() ){
+
+                while ( i.more() ) {
                     BSONElement cur = i.next();
                     bb.append( cur );
-                    n++;           
+                    n++;
                     toadd.erase( cur );
                 }
-                
-                for ( BSONElementSet::iterator j=toadd.begin(); j!=toadd.end(); j++ ){
-                    bb.appendAs( *j , BSONObjBuilder::numStr( n++ ) );
+
+                {
+                    BSONObjIterator i( getEach() );
+                    while ( i.more() ) {
+                        BSONElement e = i.next();
+                        if ( toadd.count(e) ) {
+                            bb.appendAs( e , BSONObjBuilder::numStr( n++ ) );
+                            toadd.erase( e );
+                        }
+                    }
                 }
 
             }
@@ -145,34 +168,34 @@ namespace mongo {
 
                 bool found = false;
 
-                while ( i.more() ){
+                while ( i.more() ) {
                     BSONElement cur = i.next();
                     bb.append( cur );
                     n++;
                     if ( elt.woCompare( cur , false ) == 0 )
                         found = true;
                 }
-                
+
                 if ( ! found )
                     bb.appendAs( elt ,  bb.numStr( n ) );
-                
+
             }
-            
+
             bb.done();
             break;
         }
 
 
-            
+
         case PUSH_ALL: {
             uassert( 10132 ,  "$pushAll can only be applied to an array" , in.type() == Array );
             uassert( 10133 ,  "$pushAll has to be passed an array" , elt.type() );
 
             BSONObjBuilder bb( b.subarrayStart( shortFieldName ) );
-            
+
             BSONObjIterator i( in.embeddedObject() );
             int n=0;
-            while ( i.more() ){
+            while ( i.more() ) {
                 bb.append( i.next() );
                 n++;
             }
@@ -180,34 +203,34 @@ namespace mongo {
             ms.pushStartSize = n;
 
             i = BSONObjIterator( elt.embeddedObject() );
-            while ( i.more() ){
+            while ( i.more() ) {
                 bb.appendAs( i.next() , bb.numStr( n++ ) );
             }
 
             bb.done();
             break;
         }
-            
+
         case PULL:
         case PULL_ALL: {
             uassert( 10134 ,  "$pull/$pullAll can only be applied to an array" , in.type() == Array );
             BSONObjBuilder bb( b.subarrayStart( shortFieldName ) );
-                        
+
             int n = 0;
 
             BSONObjIterator i( in.embeddedObject() );
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
                 bool allowed = true;
 
-                if ( op == PULL ){
+                if ( op == PULL ) {
                     allowed = ! _pullElementMatch( e );
                 }
                 else {
                     BSONObjIterator j( elt.embeddedObject() );
                     while( j.more() ) {
                         BSONElement arrJ = j.next();
-                        if ( e.woCompare( arrJ, false ) == 0 ){
+                        if ( e.woCompare( arrJ, false ) == 0 ) {
                             allowed = false;
                             break;
                         }
@@ -217,7 +240,7 @@ namespace mongo {
                 if ( allowed )
                     bb.appendAs( e , bb.numStr( n++ ) );
             }
-            
+
             bb.done();
             break;
         }
@@ -225,13 +248,13 @@ namespace mongo {
         case POP: {
             uassert( 10135 ,  "$pop can only be applied to an array" , in.type() == Array );
             BSONObjBuilder bb( b.subarrayStart( shortFieldName ) );
-                        
+
             int n = 0;
 
             BSONObjIterator i( in.embeddedObject() );
-            if ( elt.isNumber() && elt.number() < 0 ){
+            if ( elt.isNumber() && elt.number() < 0 ) {
                 // pop from front
-                if ( i.more() ){
+                if ( i.more() ) {
                     i.next();
                     n++;
                 }
@@ -246,7 +269,7 @@ namespace mongo {
                 while( i.more() ) {
                     n++;
                     BSONElement arrI = i.next();
-                    if ( i.more() ){
+                    if ( i.more() ) {
                         bb.append( arrI );
                     }
                 }
@@ -262,23 +285,23 @@ namespace mongo {
             uassert( 10136 ,  "$bit needs an array" , elt.type() == Object );
             uassert( 10137 ,  "$bit can only be applied to numbers" , in.isNumber() );
             uassert( 10138 ,  "$bit can't use a double" , in.type() != NumberDouble );
-            
+
             int x = in.numberInt();
             long long y = in.numberLong();
 
             BSONObjIterator it( elt.embeddedObject() );
-            while ( it.more() ){
+            while ( it.more() ) {
                 BSONElement e = it.next();
                 uassert( 10139 ,  "$bit field must be number" , e.isNumber() );
-                if ( strcmp( e.fieldName() , "and" ) == 0 ){
-                    switch( in.type() ){
+                if ( strcmp( e.fieldName() , "and" ) == 0 ) {
+                    switch( in.type() ) {
                     case NumberInt: x = x&e.numberInt(); break;
                     case NumberLong: y = y&e.numberLong(); break;
                     default: assert( 0 );
                     }
                 }
-                else if ( strcmp( e.fieldName() , "or" ) == 0 ){
-                    switch( in.type() ){
+                else if ( strcmp( e.fieldName() , "or" ) == 0 ) {
+                    switch( in.type() ) {
                     case NumberInt: x = x|e.numberInt(); break;
                     case NumberLong: y = y|e.numberLong(); break;
                     default: assert( 0 );
@@ -289,8 +312,8 @@ namespace mongo {
                     throw UserException( 9016, (string)"unknown bit mod:" + e.fieldName() );
                 }
             }
-            
-            switch( in.type() ){
+
+            switch( in.type() ) {
             case NumberInt: b.append( shortFieldName , x ); break;
             case NumberLong: b.append( shortFieldName , y ); break;
             default: assert( 0 );
@@ -299,6 +322,15 @@ namespace mongo {
             break;
         }
 
+        case RENAME_FROM: {
+            break;
+        }
+
+        case RENAME_TO: {
+            ms.handleRename( b, shortFieldName );
+            break;
+        }
+
         default:
             stringstream ss;
             ss << "Mod::apply can't handle type: " << op;
@@ -306,11 +338,30 @@ namespace mongo {
         }
     }
 
+    // -1 inside a non-object (non-object could be array)
+    // 0 missing
+    // 1 found
+    int validRenamePath( BSONObj obj, const char *path ) {
+        while( const char *p = strchr( path, '.' ) ) {
+            string left( path, p - path );
+            BSONElement e = obj.getField( left );
+            if ( e.eoo() ) {
+                return 0;
+            }
+            if ( e.type() != Object ) {
+                return -1;
+            }
+            obj = e.embeddedObject();
+            path = p + 1;
+        }
+        return !obj.getField( path ).eoo();
+    }
+
     auto_ptr<ModSetState> ModSet::prepare(const BSONObj &obj) const {
         DEBUGUPDATE( "\t start prepare" );
-        ModSetState * mss = new ModSetState( obj );
-        
-        
+        auto_ptr<ModSetState> mss( new ModSetState( obj ) );
+
+
         // Perform this check first, so that we don't leave a partially modified object on uassert.
         for ( ModHolder::const_iterator i = _mods.begin(); i != _mods.end(); ++i ) {
             DEBUGUPDATE( "\t\t prepare : " << i->first );
@@ -318,23 +369,51 @@ namespace mongo {
 
             const Mod& m = i->second;
             BSONElement e = obj.getFieldDotted(m.fieldName);
-            
+
             ms.m = &m;
             ms.old = e;
 
+            if ( m.op == Mod::RENAME_FROM ) {
+                int source = validRenamePath( obj, m.fieldName );
+                uassert( 13489, "$rename source field invalid", source != -1 );
+                if ( source != 1 ) {
+                    ms.dontApply = true;
+                }
+                continue;
+            }
+
+            if ( m.op == Mod::RENAME_TO ) {
+                int source = validRenamePath( obj, m.renameFrom() );
+                if ( source == 1 ) {
+                    int target = validRenamePath( obj, m.fieldName );
+                    uassert( 13490, "$rename target field invalid", target != -1 );
+                    ms.newVal = obj.getFieldDotted( m.renameFrom() );
+                    mss->amIInPlacePossible( false );
+                }
+                else {
+                    ms.dontApply = true;
+                }
+                continue;
+            }
+
             if ( e.eoo() ) {
                 mss->amIInPlacePossible( m.op == Mod::UNSET );
                 continue;
-            } 
-            
+            }
+
             switch( m.op ) {
             case Mod::INC:
                 uassert( 10140 ,  "Cannot apply $inc modifier to non-number", e.isNumber() || e.eoo() );
-                if ( mss->amIInPlacePossible( e.isNumber() ) ){
+                if ( mss->amIInPlacePossible( e.isNumber() ) ) {
                     // check more typing info here
-                    if ( m.elt.type() != e.type() ){
+                    if ( m.elt.type() != e.type() ) {
                         // if i'm incrememnting with a double, then the storage has to be a double
-                        mss->amIInPlacePossible( m.elt.type() != NumberDouble ); 
+                        mss->amIInPlacePossible( m.elt.type() != NumberDouble );
+                    }
+
+                    // check for overflow
+                    if ( e.type() == NumberInt && e.numberLong() + m.elt.numberLong() > numeric_limits<int>::max() ) {
+                        mss->amIInPlacePossible( false );
                     }
                 }
                 break;
@@ -343,7 +422,7 @@ namespace mongo {
                 mss->amIInPlacePossible( m.elt.type() == e.type() &&
                                          m.elt.valuesize() == e.valuesize() );
                 break;
-            
+
             case Mod::PUSH:
             case Mod::PUSH_ALL:
                 uassert( 10141 ,  "Cannot apply $push/$pushAll modifier to non-array", e.type() == Array || e.eoo() );
@@ -358,7 +437,7 @@ namespace mongo {
                     BSONElement arrI = i.next();
                     if ( m.op == Mod::PULL ) {
                         mss->amIInPlacePossible( ! m._pullElementMatch( arrI ) );
-                    } 
+                    }
                     else if ( m.op == Mod::PULL_ALL ) {
                         BSONObjIterator j( m.elt.embeddedObject() );
                         while( mss->_inPlacePossible && j.moreWithEOO() ) {
@@ -377,12 +456,12 @@ namespace mongo {
                 mss->amIInPlacePossible( e.embeddedObject().isEmpty() );
                 break;
             }
-                
+
             case Mod::ADDTOSET: {
                 uassert( 12591 ,  "Cannot apply $addToSet modifier to non-array", e.type() == Array || e.eoo() );
-                
+
                 BSONObjIterator i( e.embeddedObject() );
-                if ( m.isEach() ){
+                if ( m.isEach() ) {
                     BSONElementSet toadd;
                     m.parseEach( toadd );
                     while( i.more() ) {
@@ -395,7 +474,7 @@ namespace mongo {
                     bool found = false;
                     while( i.more() ) {
                         BSONElement arrI = i.next();
-                        if ( arrI.woCompare( m.elt , false ) == 0 ){
+                        if ( arrI.woCompare( m.elt , false ) == 0 ) {
                             found = true;
                             break;
                         }
@@ -404,7 +483,7 @@ namespace mongo {
                 }
                 break;
             }
-                
+
             default:
                 // mods we don't know about shouldn't be done in place
                 mss->amIInPlacePossible( false );
@@ -412,28 +491,49 @@ namespace mongo {
         }
 
         DEBUGUPDATE( "\t mss\n" << mss->toString() << "\t--" );
-        
-        return auto_ptr<ModSetState>( mss );
+
+        return mss;
     }
 
     void ModState::appendForOpLog( BSONObjBuilder& b ) const {
-        if ( incType ){
+        if ( dontApply ) {
+            return;
+        }
+
+        if ( incType ) {
             DEBUGUPDATE( "\t\t\t\t\t appendForOpLog inc fieldname: " << m->fieldName << " short:" << m->shortFieldName );
             BSONObjBuilder bb( b.subobjStart( "$set" ) );
             appendIncValue( bb , true );
             bb.done();
             return;
         }
-        
+
+        if ( m->op == Mod::RENAME_FROM ) {
+            DEBUGUPDATE( "\t\t\t\t\t appendForOpLog RENAME_FROM fielName:" << m->fieldName );
+            BSONObjBuilder bb( b.subobjStart( "$unset" ) );
+            bb.append( m->fieldName, 1 );
+            bb.done();
+            return;
+        }
+
+        if ( m->op == Mod::RENAME_TO ) {
+            DEBUGUPDATE( "\t\t\t\t\t appendForOpLog RENAME_TO fielName:" << m->fieldName );
+            BSONObjBuilder bb( b.subobjStart( "$set" ) );
+            bb.appendAs( newVal, m->fieldName );
+            return;
+        }
+
         const char * name = fixedOpName ? fixedOpName : Mod::modNames[op()];
 
         DEBUGUPDATE( "\t\t\t\t\t appendForOpLog name:" << name << " fixed: " << fixed << " fn: " << m->fieldName );
 
         BSONObjBuilder bb( b.subobjStart( name ) );
-        if ( fixed )
+        if ( fixed ) {
             bb.appendAs( *fixed , m->fieldName );
-        else
+        }
+        else {
             bb.appendAs( m->elt , m->fieldName );
+        }
         bb.done();
     }
 
@@ -445,30 +545,55 @@ namespace mongo {
             ss << " fixed: " << fixed;
         return ss.str();
     }
-    
-    void ModSetState::applyModsInPlace() {
+
+    template< class Builder >
+    void ModState::handleRename( Builder &newObjBuilder, const char *shortFieldName ) {
+        newObjBuilder.appendAs( newVal , shortFieldName );
+        BSONObjBuilder b;
+        b.appendAs( newVal, shortFieldName );
+        assert( _objData.isEmpty() );
+        _objData = b.obj();
+        newVal = _objData.firstElement();
+    }
+
+    void ModSetState::applyModsInPlace( bool isOnDisk ) {
+        // TODO i think this assert means that we can get rid of the isOnDisk param
+        //      and just use isOwned as the determination
+        DEV assert( isOnDisk == ! _obj.isOwned() );
+
         for ( ModStateHolder::iterator i = _mods.begin(); i != _mods.end(); ++i ) {
             ModState& m = i->second;
-            
-            switch ( m.m->op ){
+
+            if ( m.dontApply ) {
+                continue;
+            }
+
+            switch ( m.m->op ) {
             case Mod::UNSET:
             case Mod::PULL:
             case Mod::PULL_ALL:
             case Mod::ADDTOSET:
+            case Mod::RENAME_FROM:
+            case Mod::RENAME_TO:
                 // this should have been handled by prepare
                 break;
-
-            // [dm] the BSONElementManipulator statements below are for replication (correct?)
+                // [dm] the BSONElementManipulator statements below are for replication (correct?)
             case Mod::INC:
-                m.m->incrementMe( m.old );
+                if ( isOnDisk )
+                    m.m->IncrementMe( m.old );
+                else
+                    m.m->incrementMe( m.old );
                 m.fixedOpName = "$set";
                 m.fixed = &(m.old);
                 break;
             case Mod::SET:
-                BSONElementManipulator( m.old ).replaceTypeAndValue( m.m->elt );
+                if ( isOnDisk )
+                    BSONElementManipulator( m.old ).ReplaceTypeAndValue( m.m->elt );
+                else
+                    BSONElementManipulator( m.old ).replaceTypeAndValue( m.m->elt );
                 break;
             default:
-                uassert( 10144 ,  "can't apply mod in place - shouldn't have gotten here" , 0 );
+                uassert( 13478 ,  "can't apply mod in place - shouldn't have gotten here" , 0 );
             }
         }
     }
@@ -488,61 +613,62 @@ namespace mongo {
             empty = false;
         }
         if ( empty )
-            fields[ base + top.fieldName() ] = top;            
+            fields[ base + top.fieldName() ] = top;
     }
-    
+
     template< class Builder >
-    void ModSetState::_appendNewFromMods( const string& root , ModState& m , Builder& b , set<string>& onedownseen ){
+    void ModSetState::_appendNewFromMods( const string& root , ModState& m , Builder& b , set<string>& onedownseen ) {
         const char * temp = m.fieldName();
         temp += root.size();
         const char * dot = strchr( temp , '.' );
-        if ( dot ){
+        if ( dot ) {
             string nr( m.fieldName() , 0 , 1 + ( dot - m.fieldName() ) );
             string nf( temp , 0 , dot - temp );
             if ( onedownseen.count( nf ) )
                 return;
             onedownseen.insert( nf );
-            BSONObjBuilder bb ( b.subobjStart( nf.c_str() ) );
+            BSONObjBuilder bb ( b.subobjStart( nf ) );
             createNewFromMods( nr , bb , BSONObj() ); // don't infer an array from name
             bb.done();
         }
         else {
             appendNewFromMod( m , b );
         }
-        
+
     }
-    
+
     template< class Builder >
-    void ModSetState::createNewFromMods( const string& root , Builder& b , const BSONObj &obj ){
+    void ModSetState::createNewFromMods( const string& root , Builder& b , const BSONObj &obj ) {
         DEBUGUPDATE( "\t\t createNewFromMods root: " << root );
         BSONObjIteratorSorted es( obj );
         BSONElement e = es.next();
-        
+
         ModStateHolder::iterator m = _mods.lower_bound( root );
         StringBuilder buf(root.size() + 2 );
         buf << root << (char)255;
         ModStateHolder::iterator mend = _mods.lower_bound( buf.str() );
-        
+
         set<string> onedownseen;
-        
-        while ( e.type() && m != mend ){
+
+        while ( e.type() && m != mend ) {
             string field = root + e.fieldName();
             FieldCompareResult cmp = compareDottedFieldNames( m->second.m->fieldName , field );
 
             DEBUGUPDATE( "\t\t\t field:" << field << "\t mod:" << m->second.m->fieldName << "\t cmp:" << cmp << "\t short: " << e.fieldName() );
-            
-            switch ( cmp ){
-                
+
+            switch ( cmp ) {
+
             case LEFT_SUBFIELD: { // Mod is embeddeed under this element
-                uassert( 10145 ,  "LEFT_SUBFIELD only supports Object" , e.type() == Object || e.type() == Array );
-                if ( onedownseen.count( e.fieldName() ) == 0 ){
+                uassert( 10145 ,  str::stream() << "LEFT_SUBFIELD only supports Object: " << field << " not: " << e.type() , e.type() == Object || e.type() == Array );
+                if ( onedownseen.count( e.fieldName() ) == 0 ) {
                     onedownseen.insert( e.fieldName() );
                     if ( e.type() == Object ) {
                         BSONObjBuilder bb( b.subobjStart( e.fieldName() ) );
                         stringstream nr; nr << root << e.fieldName() << ".";
                         createNewFromMods( nr.str() , bb , e.embeddedObject() );
-                        bb.done();                        
-                    } else {
+                        bb.done();
+                    }
+                    else {
                         BSONArrayBuilder ba( b.subarrayStart( e.fieldName() ) );
                         stringstream nr; nr << root << e.fieldName() << ".";
                         createNewFromMods( nr.str() , ba , e.embeddedObject() );
@@ -578,22 +704,22 @@ namespace mongo {
                 e = es.next();
                 continue;
             case RIGHT_SUBFIELD:
-                massert( 10399 ,  "ModSet::createNewFromMods - RIGHT_SUBFIELD should be impossible" , 0 ); 
+                massert( 10399 ,  "ModSet::createNewFromMods - RIGHT_SUBFIELD should be impossible" , 0 );
                 break;
             default:
                 massert( 10400 ,  "unhandled case" , 0 );
             }
         }
-        
+
         // finished looping the mods, just adding the rest of the elements
-        while ( e.type() ){
+        while ( e.type() ) {
             DEBUGUPDATE( "\t\t\t copying: " << e.fieldName() );
             b.append( e );  // if array, ignore field name
             e = es.next();
         }
-        
+
         // do mods that don't have fields already
-        for ( ; m != mend; m++ ){
+        for ( ; m != mend; m++ ) {
             DEBUGUPDATE( "\t\t\t\t appending from mod at end: " << m->second.m->fieldName );
             _appendNewFromMods( root , m->second , b , onedownseen );
         }
@@ -602,30 +728,30 @@ namespace mongo {
     BSONObj ModSetState::createNewFromMods() {
         BSONObjBuilder b( (int)(_obj.objsize() * 1.1) );
         createNewFromMods( "" , b , _obj );
-        return b.obj();
+        return _newFromMods = b.obj();
     }
 
     string ModSetState::toString() const {
         stringstream ss;
-        for ( ModStateHolder::const_iterator i=_mods.begin(); i!=_mods.end(); ++i ){
+        for ( ModStateHolder::const_iterator i=_mods.begin(); i!=_mods.end(); ++i ) {
             ss << "\t\t" << i->first << "\t" << i->second.toString() << "\n";
         }
         return ss.str();
     }
 
-    BSONObj ModSet::createNewFromQuery( const BSONObj& query ){
+    BSONObj ModSet::createNewFromQuery( const BSONObj& query ) {
         BSONObj newObj;
 
         {
             BSONObjBuilder bb;
             EmbeddedBuilder eb( &bb );
             BSONObjIteratorSorted i( query );
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
                 if ( e.fieldName()[0] == '$' ) // for $atomic and anything else we add
                     continue;
 
-                if ( e.type() == Object && e.embeddedObject().firstElement().fieldName()[0] == '$' ){
+                if ( e.type() == Object && e.embeddedObject().firstElement().fieldName()[0] == '$' ) {
                     // this means this is a $gt type filter, so don't make part of the new object
                     continue;
                 }
@@ -635,17 +761,17 @@ namespace mongo {
             eb.done();
             newObj = bb.obj();
         }
-        
+
         auto_ptr<ModSetState> mss = prepare( newObj );
 
         if ( mss->canApplyInPlace() )
-            mss->applyModsInPlace();
+            mss->applyModsInPlace( false );
         else
             newObj = mss->createNewFromMods();
-        
+
         return newObj;
     }
-    
+
     /* get special operations like $inc
        { $inc: { a:1, b:1 } }
        { $set: { a:77 } }
@@ -656,21 +782,21 @@ namespace mongo {
        NOTE: MODIFIES source from object!
     */
     ModSet::ModSet(
-        const BSONObj &from , 
+        const BSONObj &from ,
         const set<string>& idxKeys,
         const set<string> *backgroundKeys)
         : _isIndexed(0) , _hasDynamicArray( false ) {
-        
+
         BSONObjIterator it(from);
-        
+
         while ( it.more() ) {
             BSONElement e = it.next();
             const char *fn = e.fieldName();
-            
+
             uassert( 10147 ,  "Invalid modifier specified" + string( fn ), e.type() == Object );
             BSONObj j = e.embeddedObject();
             DEBUGUPDATE( "\t" << j );
-            
+
             BSONObjIterator jt(j);
             Mod::Op op = opFromStr( fn );
 
@@ -685,18 +811,45 @@ namespace mongo {
                 uassert( 10151 ,  "have conflicting mods in update" , ! haveConflictingMod( fieldName ) );
                 uassert( 10152 ,  "Modifier $inc allowed for numbers only", f.isNumber() || op != Mod::INC );
                 uassert( 10153 ,  "Modifier $pushAll/pullAll allowed for arrays only", f.type() == Array || ( op != Mod::PUSH_ALL && op != Mod::PULL_ALL ) );
-                
+
+                if ( op == Mod::RENAME_TO ) {
+                    uassert( 13494, "$rename target must be a string", f.type() == String );
+                    const char *target = f.valuestr();
+                    uassert( 13495, "$rename source must differ from target", strcmp( fieldName, target ) != 0 );
+                    uassert( 13496, "invalid mod field name, source may not be empty", fieldName[0] );
+                    uassert( 13479, "invalid mod field name, target may not be empty", target[0] );
+                    uassert( 13480, "invalid mod field name, source may not begin or end in period", fieldName[0] != '.' && fieldName[ strlen( fieldName ) - 1 ] != '.' );
+                    uassert( 13481, "invalid mod field name, target may not begin or end in period", target[0] != '.' && target[ strlen( target ) - 1 ] != '.' );
+                    uassert( 13482, "$rename affecting _id not allowed", !( fieldName[0] == '_' && fieldName[1] == 'i' && fieldName[2] == 'd' && ( !fieldName[3] || fieldName[3] == '.' ) ) );
+                    uassert( 13483, "$rename affecting _id not allowed", !( target[0] == '_' && target[1] == 'i' && target[2] == 'd' && ( !target[3] || target[3] == '.' ) ) );
+                    uassert( 13484, "field name duplication not allowed with $rename target", !haveModForField( target ) );
+                    uassert( 13485, "conflicting mods not allowed with $rename target", !haveConflictingMod( target ) );
+                    uassert( 13486, "$rename target may not be a parent of source", !( strncmp( fieldName, target, strlen( target ) ) == 0 && fieldName[ strlen( target ) ] == '.' ) );
+                    uassert( 13487, "$rename source may not be dynamic array", strstr( fieldName , ".$" ) == 0 );
+                    uassert( 13488, "$rename target may not be dynamic array", strstr( target , ".$" ) == 0 );
+
+                    Mod from;
+                    from.init( Mod::RENAME_FROM, f );
+                    from.setFieldName( fieldName );
+                    updateIsIndexed( from, idxKeys, backgroundKeys );
+                    _mods[ from.fieldName ] = from;
+
+                    Mod to;
+                    to.init( Mod::RENAME_TO, f );
+                    to.setFieldName( target );
+                    updateIsIndexed( to, idxKeys, backgroundKeys );
+                    _mods[ to.fieldName ] = to;
+
+                    DEBUGUPDATE( "\t\t " << fieldName << "\t" << from.fieldName << "\t" << to.fieldName );
+                    continue;
+                }
+
                 _hasDynamicArray = _hasDynamicArray || strstr( fieldName , ".$" ) > 0;
-                
+
                 Mod m;
                 m.init( op , f );
                 m.setFieldName( f.fieldName() );
-                
-                if ( m.isIndexed( idxKeys ) ||
-                    (backgroundKeys && m.isIndexed(*backgroundKeys)) ) {
-                    _isIndexed++;
-                }
-
+                updateIsIndexed( m, idxKeys, backgroundKeys );
                 _mods[m.fieldName] = m;
 
                 DEBUGUPDATE( "\t\t " << fieldName << "\t" << m.fieldName << "\t" << _hasDynamicArray );
@@ -709,10 +862,10 @@ namespace mongo {
         ModSet * n = new ModSet();
         n->_isIndexed = _isIndexed;
         n->_hasDynamicArray = _hasDynamicArray;
-        for ( ModHolder::const_iterator i=_mods.begin(); i!=_mods.end(); i++ ){
+        for ( ModHolder::const_iterator i=_mods.begin(); i!=_mods.end(); i++ ) {
             string s = i->first;
             size_t idx = s.find( ".$" );
-            if ( idx == string::npos ){
+            if ( idx == string::npos ) {
                 n->_mods[s] = i->second;
                 continue;
             }
@@ -726,7 +879,7 @@ namespace mongo {
         }
         return n;
     }
-    
+
     void checkNoMods( BSONObj o ) {
         BSONObjIterator i( o );
         while( i.moreWithEOO() ) {
@@ -736,10 +889,10 @@ namespace mongo {
             uassert( 10154 ,  "Modifiers and non-modifiers cannot be mixed", e.fieldName()[ 0 ] != '$' );
         }
     }
-    
+
     class UpdateOp : public MultiCursor::CursorOp {
     public:
-        UpdateOp( bool hasPositionalField ) : _nscanned(), _hasPositionalField( hasPositionalField ){}
+        UpdateOp( bool hasPositionalField ) : _nscanned(), _hasPositionalField( hasPositionalField ) {}
         virtual void _init() {
             _c = qp().newCursor();
             if ( ! _c->ok() ) {
@@ -751,14 +904,18 @@ namespace mongo {
                 _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , qp().ns() ) );
             }
             return _cc->prepareToYield( _yieldData );
-        }        
+        }
         virtual void recoverFromYield() {
             if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
                 _c.reset();
                 _cc.reset();
                 massert( 13339, "cursor dropped during update", false );
             }
-        }        
+        }
+        virtual long long nscanned() {
+            assert( _c.get() );
+            return _c->nscanned();
+        }
         virtual void next() {
             if ( ! _c->ok() ) {
                 setComplete();
@@ -789,64 +946,62 @@ namespace mongo {
     };
 
     static void checkTooLarge(const BSONObj& newObj) {
-        uassert( 12522 , "$ operator made object too large" , newObj.objsize() <= ( 4 * 1024 * 1024 ) );
+        uassert( 12522 , "$ operator made object too large" , newObj.objsize() <= BSONObjMaxUserSize );
     }
 
-    /* note: this is only (as-is) called for 
+    /* note: this is only (as-is) called for
 
              - not multi
              - not mods is indexed
              - not upsert
     */
-    static UpdateResult _updateById(bool isOperatorUpdate, int idIdxNo, ModSet *mods, int profile, NamespaceDetails *d, 
+    static UpdateResult _updateById(bool isOperatorUpdate, int idIdxNo, ModSet *mods, int profile, NamespaceDetails *d,
                                     NamespaceDetailsTransient *nsdt,
-                                    bool god, const char *ns, 
-                                    const BSONObj& updateobj, BSONObj patternOrig, bool logop, OpDebug& debug) 
-    {
+                                    bool god, const char *ns,
+                                    const BSONObj& updateobj, BSONObj patternOrig, bool logop, OpDebug& debug) {
         DiskLoc loc;
         {
             IndexDetails& i = d->idx(idIdxNo);
             BSONObj key = i.getKeyFromQuery( patternOrig );
             loc = i.head.btree()->findSingle(i, i.head, key);
-            if( loc.isNull() ) { 
+            if( loc.isNull() ) {
                 // no upsert support in _updateById yet, so we are done.
                 return UpdateResult(0, 0, 0);
             }
         }
 
         Record *r = loc.rec();
-                
+
         /* look for $inc etc.  note as listed here, all fields to inc must be this type, you can't set some
            regular ones at the moment. */
-        if ( isOperatorUpdate ) {                   
-            const BSONObj& onDisk = loc.obj();                    
+        if ( isOperatorUpdate ) {
+            const BSONObj& onDisk = loc.obj();
             auto_ptr<ModSetState> mss = mods->prepare( onDisk );
-                    
+
             if( mss->canApplyInPlace() ) {
-                mss->applyModsInPlace();                    
+                mss->applyModsInPlace(true);
                 DEBUGUPDATE( "\t\t\t updateById doing in place update" );
                 /*if ( profile )
                     ss << " fastmod "; */
-            } 
+            }
             else {
                 BSONObj newObj = mss->createNewFromMods();
                 checkTooLarge(newObj);
-                bool changedId;
                 assert(nsdt);
-                DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug, changedId);                        
+                DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug);
             }
-                    
+
             if ( logop ) {
                 DEV assert( mods->size() );
-                 
+
                 BSONObj pattern = patternOrig;
                 if ( mss->haveArrayDepMod() ) {
                     BSONObjBuilder patternBuilder;
                     patternBuilder.appendElements( pattern );
                     mss->appendSizeSpecForArrayDepMods( patternBuilder );
-                    pattern = patternBuilder.obj();                        
+                    pattern = patternBuilder.obj();
                 }
-                        
+
                 if( mss->needOpLogRewrite() ) {
                     DEBUGUPDATE( "\t rewrite update: " << mss->getOpLogRewrite() );
                     logOp("u", ns, mss->getOpLogRewrite() , &pattern );
@@ -857,24 +1012,18 @@ namespace mongo {
             }
             return UpdateResult( 1 , 1 , 1);
         } // end $operator update
-                
+
         // regular update
         BSONElementManipulator::lookForTimestamps( updateobj );
         checkNoMods( updateobj );
-        bool changedId = false;
         assert(nsdt);
-        theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, changedId);
+        theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug );
         if ( logop ) {
-            if ( !changedId ) {
-                logOp("u", ns, updateobj, &patternOrig );
-            } else {
-                logOp("d", ns, patternOrig );
-                logOp("i", ns, updateobj );                    
-            }
+            logOp("u", ns, updateobj, &patternOrig );
         }
         return UpdateResult( 1 , 0 , 1 );
     }
- 
+
     UpdateResult _updateObjects(bool god, const char *ns, const BSONObj& updateobj, BSONObj patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs ) {
         DEBUGUPDATE( "update: " << ns << " update: " << updateobj << " query: " << patternOrig << " upsert: " << upsert << " multi: " << multi );
         Client& client = cc();
@@ -883,20 +1032,20 @@ namespace mongo {
 
         if ( logLevel > 2 )
             ss << " update: " << updateobj.toString();
-        
+
         /* idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case */
         /* NOTE: when yield() is added herein, these must be refreshed after each call to yield! */
         NamespaceDetails *d = nsdetails(ns); // can be null if an upsert...
         NamespaceDetailsTransient *nsdt = &NamespaceDetailsTransient::get_w(ns);
         /* end note */
-        
+
         auto_ptr<ModSet> mods;
         bool isOperatorUpdate = updateobj.firstElement().fieldName()[0] == '$';
         int modsIsIndexed = false; // really the # of indexes
-        if ( isOperatorUpdate ){
-            if( d && d->backgroundIndexBuildInProgress ) { 
+        if ( isOperatorUpdate ) {
+            if( d && d->indexBuildInProgress ) {
                 set<string> bgKeys;
-                d->backgroundIdx().keyPattern().getFieldNames(bgKeys);
+                d->inProgIdx().keyPattern().getFieldNames(bgKeys);
                 mods.reset( new ModSet(updateobj, nsdt->indexKeys(), &bgKeys) );
             }
             else {
@@ -914,30 +1063,30 @@ namespace mongo {
         }
 
         set<DiskLoc> seenObjects;
-        
+
         int numModded = 0;
         long long nscanned = 0;
         MatchDetails details;
         shared_ptr< MultiCursor::CursorOp > opPtr( new UpdateOp( mods.get() && mods->hasDynamicArray() ) );
         shared_ptr< MultiCursor > c( new MultiCursor( ns, patternOrig, BSONObj(), opPtr, true ) );
-        
+
         auto_ptr<ClientCursor> cc;
-            
+
         while ( c->ok() ) {
             nscanned++;
 
             bool atomic = c->matcher()->docMatcher().atomic();
-                
+
             // May have already matched in UpdateOp, but do again to get details set correctly
-            if ( ! c->matcher()->matches( c->currKey(), c->currLoc(), &details ) ){
+            if ( ! c->matcher()->matches( c->currKey(), c->currLoc(), &details ) ) {
                 c->advance();
-                    
-                if ( nscanned % 256 == 0 && ! atomic ){
+
+                if ( nscanned % 256 == 0 && ! atomic ) {
                     if ( cc.get() == 0 ) {
                         shared_ptr< Cursor > cPtr = c;
                         cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) );
                     }
-                    if ( ! cc->yield() ){
+                    if ( ! cc->yield() ) {
                         cc.release();
                         // TODO should we assert or something?
                         break;
@@ -948,20 +1097,20 @@ namespace mongo {
                 }
                 continue;
             }
-            
+
             Record *r = c->_current();
             DiskLoc loc = c->currLoc();
-                
+
             // TODO Maybe this is unnecessary since we have seenObjects
-            if ( c->getsetdup( loc ) ){
+            if ( c->getsetdup( loc ) ) {
                 c->advance();
                 continue;
             }
-                
+
             BSONObj js(r);
-                
+
             BSONObj pattern = patternOrig;
-                
+
             if ( logop ) {
                 BSONObjBuilder idPattern;
                 BSONElement id;
@@ -977,80 +1126,79 @@ namespace mongo {
                     uassert( 10157 ,  "multi-update requires all modified objects to have an _id" , ! multi );
                 }
             }
-                
+
             if ( profile )
                 ss << " nscanned:" << nscanned;
-                
+
             /* look for $inc etc.  note as listed here, all fields to inc must be this type, you can't set some
                 regular ones at the moment. */
             if ( isOperatorUpdate ) {
-                    
-                if ( multi ){
+
+                if ( multi ) {
                     c->advance(); // go to next record in case this one moves
                     if ( seenObjects.count( loc ) )
                         continue;
                 }
-                    
+
                 const BSONObj& onDisk = loc.obj();
-                    
+
                 ModSet * useMods = mods.get();
                 bool forceRewrite = false;
-                    
+
                 auto_ptr<ModSet> mymodset;
-                if ( details.elemMatchKey && mods->hasDynamicArray() ){
+                if ( details.elemMatchKey && mods->hasDynamicArray() ) {
                     useMods = mods->fixDynamicArray( details.elemMatchKey );
                     mymodset.reset( useMods );
                     forceRewrite = true;
                 }
-                    
+
                 auto_ptr<ModSetState> mss = useMods->prepare( onDisk );
-                    
+
                 bool indexHack = multi && ( modsIsIndexed || ! mss->canApplyInPlace() );
-                    
-                if ( indexHack ){
+
+                if ( indexHack ) {
                     if ( cc.get() )
                         cc->updateLocation();
                     else
                         c->noteLocation();
                 }
-                    
-                if ( modsIsIndexed <= 0 && mss->canApplyInPlace() ){
-                    mss->applyModsInPlace();// const_cast<BSONObj&>(onDisk) );
-                    
+
+                if ( modsIsIndexed <= 0 && mss->canApplyInPlace() ) {
+                    mss->applyModsInPlace( true );// const_cast<BSONObj&>(onDisk) );
+
                     DEBUGUPDATE( "\t\t\t doing in place update" );
                     if ( profile )
                         ss << " fastmod ";
-                    
-                    if ( modsIsIndexed ){
+
+                    if ( modsIsIndexed ) {
                         seenObjects.insert( loc );
                     }
-                } 
+                }
                 else {
                     if ( rs )
                         rs->goingToDelete( onDisk );
 
                     BSONObj newObj = mss->createNewFromMods();
                     checkTooLarge(newObj);
-                    bool changedId;
-                    DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug, changedId);
+                    DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug);
                     if ( newLoc != loc || modsIsIndexed ) {
                         // object moved, need to make sure we don' get again
                         seenObjects.insert( newLoc );
                     }
-                        
+
                 }
-                    
+
                 if ( logop ) {
                     DEV assert( mods->size() );
-                        
+
                     if ( mss->haveArrayDepMod() ) {
                         BSONObjBuilder patternBuilder;
                         patternBuilder.appendElements( pattern );
                         mss->appendSizeSpecForArrayDepMods( patternBuilder );
-                        pattern = patternBuilder.obj();                        
+                        pattern = patternBuilder.obj();
                     }
-                        
-                    if ( forceRewrite || mss->needOpLogRewrite() ){
+
+                    if ( forceRewrite || mss->needOpLogRewrite() ) {
                         DEBUGUPDATE( "\t rewrite update: " << mss->getOpLogRewrite() );
                         logOp("u", ns, mss->getOpLogRewrite() , &pattern );
                     }
@@ -1063,13 +1211,13 @@ namespace mongo {
                     return UpdateResult( 1 , 1 , numModded );
                 if ( indexHack )
                     c->checkLocation();
-                    
-                if ( nscanned % 64 == 0 && ! atomic ){
+
+                if ( nscanned % 64 == 0 && ! atomic ) {
                     if ( cc.get() == 0 ) {
                         shared_ptr< Cursor > cPtr = c;
                         cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) );
                     }
-                    if ( ! cc->yield() ){
+                    if ( ! cc->yield() ) {
                         cc.release();
                         break;
                     }
@@ -1077,35 +1225,32 @@ namespace mongo {
                         break;
                     }
                 }
-                
+
+                if (atomic)
+                    getDur().commitIfNeeded();
+
                 continue;
-            } 
-                
+            }
+
             uassert( 10158 ,  "multi update only works with $ operators" , ! multi );
-                
+
             BSONElementManipulator::lookForTimestamps( updateobj );
             checkNoMods( updateobj );
-            bool changedId = false;
-            theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, changedId, god);
+            theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, god);
             if ( logop ) {
                 DEV if( god ) log() << "REALLY??" << endl; // god doesn't get logged, this would be bad.
-                if ( !changedId ) {
-                    logOp("u", ns, updateobj, &pattern );
-                } else {
-                    logOp("d", ns, pattern );
-                    logOp("i", ns, updateobj );                    
-                }
+                logOp("u", ns, updateobj, &pattern );
             }
             return UpdateResult( 1 , 0 , 1 );
         }
-        
+
         if ( numModded )
             return UpdateResult( 1 , 1 , numModded );
 
-        
+
         if ( profile )
             ss << " nscanned:" << nscanned;
-        
+
         if ( upsert ) {
             if ( updateobj.firstElement().fieldName()[0] == '$' ) {
                 /* upsert of an $inc. build a default */
@@ -1115,7 +1260,7 @@ namespace mongo {
                 theDataFileMgr.insertWithObjMod(ns, newObj, god);
                 if ( logop )
                     logOp( "i", ns, newObj );
-                
+
                 return UpdateResult( 0 , 1 , 1 , newObj );
             }
             uassert( 10159 ,  "multi update only works with $ operators" , ! multi );
@@ -1130,14 +1275,14 @@ namespace mongo {
         }
         return UpdateResult( 0 , 0 , 0 );
     }
- 
+
     UpdateResult updateObjects(const char *ns, const BSONObj& updateobj, BSONObj patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug ) {
         uassert( 10155 , "cannot update reserved $ collection", strchr(ns, '$') == 0 );
         if ( strstr(ns, ".system.") ) {
             /* dm: it's very important that system.indexes is never updated as IndexDetails has pointers into it */
-            uassert( 10156 , "cannot update system collection", legalClientSystemNS( ns , true ) );
+            uassert( 10156 , str::stream() << "cannot update system collection: " << ns << " q: " << patternOrig << " u: " << updateobj , legalClientSystemNS( ns , true ) );
         }
         return _updateObjects(false, ns, updateobj, patternOrig, upsert, multi, logop, debug);
     }
-   
+
 }
diff --git a/db/update.h b/db/update.h
index b7950de..d8396b5 100644
--- a/db/update.h
+++ b/db/update.h
@@ -26,32 +26,42 @@ namespace mongo {
     class ModState;
     class ModSetState;
 
-    /* Used for modifiers such as $inc, $set, $push, ... 
+    /* Used for modifiers such as $inc, $set, $push, ...
      * stores the info about a single operation
      * once created should never be modified
      */
     struct Mod {
         // See opFromStr below
-        //        0    1    2     3         4     5          6    7      8       9       10    11
-        enum Op { INC, SET, PUSH, PUSH_ALL, PULL, PULL_ALL , POP, UNSET, BITAND, BITOR , BIT , ADDTOSET  } op;
-        
+        //        0    1    2     3         4     5          6    7      8       9       10    11        12           13
+        enum Op { INC, SET, PUSH, PUSH_ALL, PULL, PULL_ALL , POP, UNSET, BITAND, BITOR , BIT , ADDTOSET, RENAME_FROM, RENAME_TO } op;
+
         static const char* modNames[];
         static unsigned modNamesNum;
 
         const char *fieldName;
         const char *shortFieldName;
-        
+
         BSONElement elt; // x:5 note: this is the actual element from the updateobj
         boost::shared_ptr<Matcher> matcher;
+        bool matcherOnPrimitive;
 
-        void init( Op o , BSONElement& e ){
+        void init( Op o , BSONElement& e ) {
             op = o;
             elt = e;
-            if ( op == PULL && e.type() == Object )
-                matcher.reset( new Matcher( e.embeddedObject() ) );
+            if ( op == PULL && e.type() == Object ) {
+                BSONObj t = e.embeddedObject();
+                if ( t.firstElement().getGtLtOp() == 0 ) {
+                    matcher.reset( new Matcher( t ) );
+                    matcherOnPrimitive = false;
+                }
+                else {
+                    matcher.reset( new Matcher( BSON( "" << t ) ) );
+                    matcherOnPrimitive = true;
+                }
+            }
         }
 
-        void setFieldName( const char * s ){
+        void setFieldName( const char * s ) {
             fieldName = s;
             shortFieldName = strrchr( fieldName , '.' );
             if ( shortFieldName )
@@ -59,14 +69,13 @@ namespace mongo {
             else
                 shortFieldName = fieldName;
         }
-        
+
         /**
          * @param in incrememnts the actual value inside in
          */
         void incrementMe( BSONElement& in ) const {
             BSONElementManipulator manip( in );
-            
-            switch ( in.type() ){
+            switch ( in.type() ) {
             case NumberDouble:
                 manip.setNumber( elt.numberDouble() + in.numberDouble() );
                 break;
@@ -79,18 +88,33 @@ namespace mongo {
             default:
                 assert(0);
             }
-            
         }
-        
+        void IncrementMe( BSONElement& in ) const {
+            BSONElementManipulator manip( in );
+            switch ( in.type() ) {
+            case NumberDouble:
+                manip.SetNumber( elt.numberDouble() + in.numberDouble() );
+                break;
+            case NumberLong:
+                manip.SetLong( elt.numberLong() + in.numberLong() );
+                break;
+            case NumberInt:
+                manip.SetInt( elt.numberInt() + in.numberInt() );
+                break;
+            default:
+                assert(0);
+            }
+        }
+
         template< class Builder >
         void appendIncremented( Builder& bb , const BSONElement& in, ModState& ms ) const;
-        
+
         bool operator<( const Mod &other ) const {
             return strcmp( fieldName, other.fieldName ) < 0;
         }
-        
+
         bool arrayDep() const {
-            switch (op){
+            switch (op) {
             case PUSH:
             case PUSH_ALL:
             case POP:
@@ -99,8 +123,8 @@ namespace mongo {
                 return false;
             }
         }
-        
-        static bool isIndexed( const string& fullName , const set<string>& idxKeys ){
+
+        static bool isIndexed( const string& fullName , const set<string>& idxKeys ) {
             const char * fieldName = fullName.c_str();
             // check if there is an index key that is a parent of mod
             for( const char *dot = strchr( fieldName, '.' ); dot; dot = strchr( dot + 1, '.' ) )
@@ -117,23 +141,23 @@ namespace mongo {
 
             return false;
         }
-        
+
         bool isIndexed( const set<string>& idxKeys ) const {
             string fullName = fieldName;
-            
+
             if ( isIndexed( fullName , idxKeys ) )
                 return true;
-            
-            if ( strstr( fieldName , "." ) ){
+
+            if ( strstr( fieldName , "." ) ) {
                 // check for a.0.1
                 StringBuilder buf( fullName.size() + 1 );
-                for ( size_t i=0; i<fullName.size(); i++ ){
+                for ( size_t i=0; i<fullName.size(); i++ ) {
                     char c = fullName[i];
-                    
-                    if ( c == '$' && 
-                         i > 0 && fullName[i-1] == '.' &&
-                         i+1<fullName.size() && 
-                         fullName[i+1] == '.' ){
+
+                    if ( c == '$' &&
+                            i > 0 && fullName[i-1] == '.' &&
+                            i+1<fullName.size() &&
+                            fullName[i+1] == '.' ) {
                         i++;
                         continue;
                     }
@@ -145,10 +169,10 @@ namespace mongo {
 
                     if ( ! isdigit( fullName[i+1] ) )
                         continue;
-                    
+
                     bool possible = true;
                     size_t j=i+2;
-                    for ( ; j<fullName.size(); j++ ){
+                    for ( ; j<fullName.size(); j++ ) {
                         char d = fullName[j];
                         if ( d == '.' )
                             break;
@@ -157,7 +181,7 @@ namespace mongo {
                         possible = false;
                         break;
                     }
-                    
+
                     if ( possible )
                         i = j;
                 }
@@ -168,25 +192,25 @@ namespace mongo {
 
             return false;
         }
-        
+
         template< class Builder >
         void apply( Builder& b , BSONElement in , ModState& ms ) const;
-        
+
         /**
          * @return true iff toMatch should be removed from the array
          */
         bool _pullElementMatch( BSONElement& toMatch ) const;
 
         void _checkForAppending( const BSONElement& e ) const {
-            if ( e.type() == Object ){
+            if ( e.type() == Object ) {
                 // this is a tiny bit slow, but rare and important
                 // only when setting something TO an object, not setting something in an object
-                // and it checks for { $set : { x : { 'a.b' : 1 } } } 
+                // and it checks for { $set : { x : { 'a.b' : 1 } } }
                 // which is feel has been common
                 uassert( 12527 , "not okForStorage" , e.embeddedObject().okForStorage() );
             }
         }
-        
+
         bool isEach() const {
             if ( elt.type() != Object )
                 return false;
@@ -199,14 +223,18 @@ namespace mongo {
         BSONObj getEach() const {
             return elt.embeddedObjectUserCheck().firstElement().embeddedObjectUserCheck();
         }
-        
+
         void parseEach( BSONElementSet& s ) const {
             BSONObjIterator i(getEach());
-            while ( i.more() ){
+            while ( i.more() ) {
                 s.insert( i.next() );
             }
         }
-        
+
+        const char *renameFrom() const {
+            massert( 13492, "mod must be RENAME_TO type", op == Mod::RENAME_TO );
+            return elt.fieldName();
+        }
     };
 
     /**
@@ -220,7 +248,7 @@ namespace mongo {
         bool _hasDynamicArray;
 
         static void extractFields( map< string, BSONElement > &fields, const BSONElement &top, const string &base );
-        
+
         FieldCompareResult compare( const ModHolder::iterator &m, map< string, BSONElement >::iterator &p, const map< string, BSONElement >::iterator &pEnd ) const {
             bool mDone = ( m == _mods.end() );
             bool pDone = ( p == pEnd );
@@ -236,11 +264,11 @@ namespace mongo {
 
             return compareDottedFieldNames( m->first, p->first.c_str() );
         }
-        
+
         bool mayAddEmbedded( map< string, BSONElement > &existing, string right ) {
             for( string left = EmbeddedBuilder::splitDot( right );
-                 left.length() > 0 && left[ left.length() - 1 ] != '.';
-                 left += "." + EmbeddedBuilder::splitDot( right ) ) {
+                    left.length() > 0 && left[ left.length() - 1 ] != '.';
+                    left += "." + EmbeddedBuilder::splitDot( right ) ) {
                 if ( existing.count( left ) > 0 && existing[ left ].type() != Object )
                     return false;
                 if ( haveModForField( left.c_str() ) )
@@ -250,7 +278,7 @@ namespace mongo {
         }
         static Mod::Op opFromStr( const char *fn ) {
             assert( fn[0] == '$' );
-            switch( fn[1] ){
+            switch( fn[1] ) {
             case 'i': {
                 if ( fn[2] == 'n' && fn[3] == 'c' && fn[4] == 0 )
                     return Mod::INC;
@@ -262,14 +290,14 @@ namespace mongo {
                 break;
             }
             case 'p': {
-                if ( fn[2] == 'u' ){
-                    if ( fn[3] == 's' && fn[4] == 'h' ){
+                if ( fn[2] == 'u' ) {
+                    if ( fn[3] == 's' && fn[4] == 'h' ) {
                         if ( fn[5] == 0 )
                             return Mod::PUSH;
                         if ( fn[5] == 'A' && fn[6] == 'l' && fn[7] == 'l' && fn[8] == 0 )
                             return Mod::PUSH_ALL;
                     }
-                    else if ( fn[3] == 'l' && fn[4] == 'l' ){
+                    else if ( fn[3] == 'l' && fn[4] == 'l' ) {
                         if ( fn[5] == 0 )
                             return Mod::PULL;
                         if ( fn[5] == 'A' && fn[6] == 'l' && fn[7] == 'l' && fn[8] == 0 )
@@ -286,7 +314,7 @@ namespace mongo {
                 break;
             }
             case 'b': {
-                if ( fn[2] == 'i' && fn[3] == 't' ){
+                if ( fn[2] == 'i' && fn[3] == 't' ) {
                     if ( fn[4] == 0 )
                         return Mod::BIT;
                     if ( fn[4] == 'a' && fn[5] == 'n' && fn[6] == 'd' && fn[7] == 0 )
@@ -297,27 +325,41 @@ namespace mongo {
                 break;
             }
             case 'a': {
-                if ( fn[2] == 'd' && fn[3] == 'd' ){
+                if ( fn[2] == 'd' && fn[3] == 'd' ) {
                     // add
                     if ( fn[4] == 'T' && fn[5] == 'o' && fn[6] == 'S' && fn[7] == 'e' && fn[8] == 't' && fn[9] == 0 )
                         return Mod::ADDTOSET;
-                    
+
+                }
+                break;
+            }
+            case 'r': {
+                if ( fn[2] == 'e' && fn[3] == 'n' && fn[4] == 'a' && fn[5] == 'm' && fn[6] =='e' ) {
+                    return Mod::RENAME_TO; // with this return code we handle both RENAME_TO and RENAME_FROM
                 }
+                break;
             }
             default: break;
             }
             uassert( 10161 ,  "Invalid modifier specified " + string( fn ), false );
             return Mod::INC;
         }
-        
-        ModSet(){}
+
+        ModSet() {}
+
+        void updateIsIndexed( const Mod &m, const set<string> &idxKeys, const set<string> *backgroundKeys ) {
+            if ( m.isIndexed( idxKeys ) ||
+                    (backgroundKeys && m.isIndexed(*backgroundKeys)) ) {
+                _isIndexed++;
+            }
+        }
 
     public:
-        
-        ModSet( const BSONObj &from , 
-            const set<string>& idxKeys = set<string>(),
-            const set<string>* backgroundKeys = 0
-            );
+
+        ModSet( const BSONObj &from ,
+                const set<string>& idxKeys = set<string>(),
+                const set<string>* backgroundKeys = 0
+              );
 
         // TODO: this is inefficient - should probably just handle when iterating
         ModSet * fixDynamicArray( const char * elemMatchKey ) const;
@@ -329,7 +371,7 @@ namespace mongo {
          * doesn't change or modify this ModSet or any underying Mod
          */
         auto_ptr<ModSetState> prepare( const BSONObj& obj ) const;
-        
+
         /**
          * given a query pattern, builds an object suitable for an upsert
          * will take the query spec and combine all $ operators
@@ -349,15 +391,15 @@ namespace mongo {
             return _mods.find( fieldName ) != _mods.end();
         }
 
-        bool haveConflictingMod( const string& fieldName ){
+        bool haveConflictingMod( const string& fieldName ) {
             size_t idx = fieldName.find( '.' );
             if ( idx == string::npos )
                 idx = fieldName.size();
-            
+
             ModHolder::const_iterator start = _mods.lower_bound(fieldName.substr(0,idx));
-            for ( ; start != _mods.end(); start++ ){
+            for ( ; start != _mods.end(); start++ ) {
                 FieldCompareResult r = compareDottedFieldNames( fieldName , start->first );
-                switch ( r ){
+                switch ( r ) {
                 case LEFT_SUBFIELD: return true;
                 case LEFT_BEFORE: return false;
                 case SAME: return true;
@@ -367,9 +409,9 @@ namespace mongo {
             }
             return false;
 
-            
+
         }
-        
+
     };
 
     /**
@@ -379,23 +421,28 @@ namespace mongo {
     public:
         const Mod * m;
         BSONElement old;
-        
+        BSONElement newVal;
+        BSONObj _objData;
+
         const char * fixedOpName;
         BSONElement * fixed;
         int pushStartSize;
-        
+
         BSONType incType;
         int incint;
         double incdouble;
         long long inclong;
-        
-        ModState(){
+
+        bool dontApply;
+
+        ModState() {
             fixedOpName = 0;
             fixed = 0;
             pushStartSize = -1;
             incType = EOO;
+            dontApply = false;
         }
-           
+
         Mod::Op op() const {
             return m->op;
         }
@@ -403,12 +450,18 @@ namespace mongo {
         const char * fieldName() const {
             return m->fieldName;
         }
-        
+
         bool needOpLogRewrite() const {
+            if ( dontApply )
+                return false;
+
             if ( fixed || fixedOpName || incType )
                 return true;
-            
-            switch( op() ){
+
+            switch( op() ) {
+            case Mod::RENAME_FROM:
+            case Mod::RENAME_TO:
+                return true;
             case Mod::BIT:
             case Mod::BITAND:
             case Mod::BITOR:
@@ -418,19 +471,19 @@ namespace mongo {
                 return false;
             }
         }
-        
+
         void appendForOpLog( BSONObjBuilder& b ) const;
 
         template< class Builder >
-        void apply( Builder& b , BSONElement in ){
+        void apply( Builder& b , BSONElement in ) {
             m->apply( b , in , *this );
         }
-        
+
         template< class Builder >
         void appendIncValue( Builder& b , bool useFullName ) const {
             const char * n = useFullName ? m->fieldName : m->shortFieldName;
 
-            switch ( incType ){
+            switch ( incType ) {
             case NumberDouble:
                 b.append( n , incdouble ); break;
             case NumberLong:
@@ -443,8 +496,11 @@ namespace mongo {
         }
 
         string toString() const;
+
+        template< class Builder >
+        void handleRename( Builder &newObjBuilder, const char *shortFieldName );
     };
-    
+
     /**
      * this is used to hold state, meta data while applying a ModSet to a BSONObj
      * the goal is to make ModSet const so its re-usable
@@ -459,15 +515,16 @@ namespace mongo {
         const BSONObj& _obj;
         ModStateHolder _mods;
         bool _inPlacePossible;
-        
-        ModSetState( const BSONObj& obj ) 
-            : _obj( obj ) , _inPlacePossible(true){
+        BSONObj _newFromMods; // keep this data alive, as oplog generation may depend on it
+
+        ModSetState( const BSONObj& obj )
+            : _obj( obj ) , _inPlacePossible(true) {
         }
-        
+
         /**
          * @return if in place is still possible
          */
-        bool amIInPlacePossible( bool inPlacePossible ){
+        bool amIInPlacePossible( bool inPlacePossible ) {
             if ( ! inPlacePossible )
                 _inPlacePossible = false;
             return _inPlacePossible;
@@ -478,17 +535,21 @@ namespace mongo {
 
         template< class Builder >
         void _appendNewFromMods( const string& root , ModState& m , Builder& b , set<string>& onedownseen );
-        
+
         template< class Builder >
-        void appendNewFromMod( ModState& ms , Builder& b ){
+        void appendNewFromMod( ModState& ms , Builder& b ) {
+            if ( ms.dontApply ) {
+                return;
+            }
+
             //const Mod& m = *(ms.m); // HACK
             Mod& m = *((Mod*)(ms.m)); // HACK
-                
-            switch ( m.op ){
-                    
-            case Mod::PUSH: 
-            case Mod::ADDTOSET: { 
-                if ( m.isEach() ){
+
+            switch ( m.op ) {
+
+            case Mod::PUSH:
+            case Mod::ADDTOSET: {
+                if ( m.isEach() ) {
                     b.appendArray( m.shortFieldName , m.getEach() );
                 }
                 else {
@@ -497,19 +558,19 @@ namespace mongo {
                     arr.done();
                 }
                 break;
-            } 
-                
+            }
+
             case Mod::PUSH_ALL: {
                 b.appendAs( m.elt, m.shortFieldName );
                 break;
-            } 
-                
+            }
+
             case Mod::UNSET:
             case Mod::PULL:
             case Mod::PULL_ALL:
                 // no-op b/c unset/pull of nothing does nothing
                 break;
-                
+
             case Mod::INC:
                 ms.fixedOpName = "$set";
             case Mod::SET: {
@@ -517,24 +578,29 @@ namespace mongo {
                 b.appendAs( m.elt, m.shortFieldName );
                 break;
             }
-            default: 
+            // shouldn't see RENAME_FROM here
+            case Mod::RENAME_TO:
+                ms.handleRename( b, m.shortFieldName );
+                break;
+            default:
                 stringstream ss;
                 ss << "unknown mod in appendNewFromMod: " << m.op;
                 throw UserException( 9015, ss.str() );
             }
-         
+
         }
 
     public:
-        
+
         bool canApplyInPlace() const {
             return _inPlacePossible;
         }
-        
+
         /**
          * modified underlying _obj
+         * @param isOnDisk - true means this is an on disk object, and this update needs to be made durable
          */
-        void applyModsInPlace();
+        void applyModsInPlace( bool isOnDisk );
 
         BSONObj createNewFromMods();
 
@@ -544,9 +610,9 @@ namespace mongo {
             for ( ModStateHolder::const_iterator i = _mods.begin(); i != _mods.end(); i++ )
                 if ( i->second.needOpLogRewrite() )
                     return true;
-            return false;            
+            return false;
         }
-        
+
         BSONObj getOpLogRewrite() const {
             BSONObjBuilder b;
             for ( ModStateHolder::const_iterator i = _mods.begin(); i != _mods.end(); i++ )
@@ -564,7 +630,7 @@ namespace mongo {
         void appendSizeSpecForArrayDepMods( BSONObjBuilder &b ) const {
             for ( ModStateHolder::const_iterator i = _mods.begin(); i != _mods.end(); i++ ) {
                 const ModState& m = i->second;
-                if ( m.m->arrayDep() ){
+                if ( m.m->arrayDep() ) {
                     if ( m.pushStartSize == -1 )
                         b.appendNull( m.fieldName() );
                     else
@@ -577,6 +643,6 @@ namespace mongo {
 
         friend class ModSet;
     };
-    
+
 }
 
diff --git a/dbtests/background_job_test.cpp b/dbtests/background_job_test.cpp
new file mode 100644
index 0000000..f2bf7d8
--- /dev/null
+++ b/dbtests/background_job_test.cpp
@@ -0,0 +1,109 @@
+// @file background_job_test.cpp
+
+/**
+ *    Copyright (C) 2010 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../pch.h"
+#include <boost/thread/thread.hpp>
+
+#include "dbtests.h"
+#include "../util/time_support.h"
+#include "../util/background.h"
+
+namespace BackgroundJobTests {
+
+    // a global variable that can be accessed independent of the IncTester object below
+    // IncTester keeps it up-to-date
+    int GLOBAL_val;
+
+    class IncTester : public mongo::BackgroundJob {
+    public:
+        explicit IncTester( long long millis , bool selfDelete = false )
+            : BackgroundJob(selfDelete), _val(0), _millis(millis) { GLOBAL_val = 0; }
+
+        void waitAndInc( long long millis ) {
+            if ( millis )
+                mongo::sleepmillis( millis );
+            ++_val;
+            ++GLOBAL_val;
+        }
+
+        int getVal() { return _val; }
+
+        /* --- BackgroundJob virtuals --- */
+
+        string name() const { return "IncTester"; }
+
+        void run() { waitAndInc( _millis ); }
+
+    private:
+        int _val;
+        long long _millis;
+    };
+
+
+    class NormalCase {
+    public:
+        void run() {
+            IncTester tester( 0 /* inc without wait */ );
+            tester.go();
+            ASSERT( tester.wait() );
+            ASSERT_EQUALS( tester.getVal() , 1 );
+        }
+    };
+
+    class TimeOutCase {
+    public:
+        void run() {
+            IncTester tester( 1000 /* wait 1sec before inc-ing */ );
+            tester.go();
+            ASSERT( ! tester.wait( 100 /* ms */ ) ); // should time out
+            ASSERT_EQUALS( tester.getVal() , 0 );
+
+            // if we wait longer than the IncTester, we should see the increment
+            ASSERT( tester.wait( 1500 /* ms */ ) );  // should not time out
+            ASSERT_EQUALS( tester.getVal() , 1 );
+        }
+    };
+
+    class SelfDeletingCase {
+    public:
+        void run() {
+            BackgroundJob* j = new IncTester( 0 /* inc without wait */ , true /* self delete */  );
+            j->go();
+
+
+            // the background thread should have continued running and this test should pass the
+            // heap-checker as well
+            mongo::sleepmillis( 1000 );
+            ASSERT_EQUALS( GLOBAL_val, 1 );
+        }
+    };
+
+
+    class BackgroundJobSuite : public Suite {
+    public:
+        BackgroundJobSuite() : Suite( "background_job" ) {}
+
+        void setupTests() {
+            add< NormalCase >();
+            add< TimeOutCase >();
+            add< SelfDeletingCase >();
+        }
+
+    } backgroundJobSuite;
+
+} // namespace BackgroundJobTests
diff --git a/dbtests/balancer_policy_tests.cpp b/dbtests/balancer_policy_tests.cpp
new file mode 100644
index 0000000..6f7c4a5
--- /dev/null
+++ b/dbtests/balancer_policy_tests.cpp
@@ -0,0 +1,203 @@
+// @file balancer_policy_test.cpp
+
+/**
+ *    Copyright (C) 2010 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "dbtests.h"
+
+// TODO SERVER-1822
+//#include "../s/config.h" // for ShardFields
+//#include "../s/balancer_policy.h"
+
+namespace BalancerPolicyTests {
+
+//
+// TODO SERVER-1822
+//
+#if 0
+
+    typedef mongo::ShardFields sf;  // fields from 'shards' colleciton
+    typedef mongo::LimitsFields lf; // fields from the balancer's limits map
+
+    class SizeMaxedShardTest {
+    public:
+        void run() {
+            BSONObj shard0 = BSON( sf::maxSize(0LL) << lf::currSize(0LL) );
+            ASSERT( ! BalancerPolicy::isSizeMaxed( shard0 ) );
+
+            BSONObj shard1 = BSON( sf::maxSize(100LL) << lf::currSize(80LL) );
+            ASSERT( ! BalancerPolicy::isSizeMaxed( shard1 ) );
+
+            BSONObj shard2 = BSON( sf::maxSize(100LL) << lf::currSize(110LL) );
+            ASSERT( BalancerPolicy::isSizeMaxed( shard2 ) );
+
+            BSONObj empty;
+            ASSERT( ! BalancerPolicy::isSizeMaxed( empty ) );
+        }
+    };
+
+    class DrainingShardTest {
+    public:
+        void run() {
+            BSONObj shard0 = BSON( sf::draining(true) );
+            ASSERT( BalancerPolicy::isDraining( shard0 ) );
+
+            BSONObj shard1 = BSON( sf::draining(false) );
+            ASSERT( ! BalancerPolicy::isDraining( shard1 ) );
+
+            BSONObj empty;
+            ASSERT( ! BalancerPolicy::isDraining( empty ) );
+        }
+    };
+
+    class BalanceNormalTest {
+    public:
+        void run() {
+            // 2 chunks and 0 chunk shards
+            BalancerPolicy::ShardToChunksMap chunkMap;
+            vector<BSONObj> chunks;
+            chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) <<
+                                   "max" << BSON( "x" << 49 )));
+            chunks.push_back(BSON( "min" << BSON( "x" << 49 ) <<
+                                   "max" << BSON( "x" << BSON( "$maxkey"<<1 ))));
+            chunkMap["shard0"] = chunks;
+            chunks.clear();
+            chunkMap["shard1"] = chunks;
+
+            // no limits
+            BalancerPolicy::ShardToLimitsMap limitsMap;
+            BSONObj limits0 = BSON( sf::maxSize(0LL) << lf::currSize(2LL) << sf::draining(false) << lf::hasOpsQueued(false) );
+            BSONObj limits1 = BSON( sf::maxSize(0LL) << lf::currSize(0LL) << sf::draining(false) << lf::hasOpsQueued(false) );
+            limitsMap["shard0"] = limits0;
+            limitsMap["shard1"] = limits1;
+
+            BalancerPolicy::ChunkInfo* c = NULL;
+            c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 1 );
+            ASSERT( c );
+        }
+    };
+
+    class BalanceDrainingTest {
+    public:
+        void run() {
+            // one normal, one draining
+            // 2 chunks and 0 chunk shards
+            BalancerPolicy::ShardToChunksMap chunkMap;
+            vector<BSONObj> chunks;
+            chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) <<
+                                   "max" << BSON( "x" << 49 )));
+            chunkMap["shard0"] = chunks;
+            chunks.clear();
+            chunks.push_back(BSON( "min" << BSON( "x" << 49 ) <<
+                                   "max" << BSON( "x" << BSON( "$maxkey"<<1 ))));
+            chunkMap["shard1"] = chunks;
+
+            // shard0 is draining
+            BalancerPolicy::ShardToLimitsMap limitsMap;
+            BSONObj limits0 = BSON( sf::maxSize(0LL) << lf::currSize(2LL) << sf::draining(true) );
+            BSONObj limits1 = BSON( sf::maxSize(0LL) << lf::currSize(0LL) << sf::draining(false) );
+            limitsMap["shard0"] = limits0;
+            limitsMap["shard1"] = limits1;
+
+            BalancerPolicy::ChunkInfo* c = NULL;
+            c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 0 );
+            ASSERT( c );
+            ASSERT_EQUALS( c->to , "shard1" );
+            ASSERT_EQUALS( c->from , "shard0" );
+            ASSERT( ! c->chunk.isEmpty() );
+        }
+    };
+
+    class BalanceEndedDrainingTest {
+    public:
+        void run() {
+            // 2 chunks and 0 chunk (drain completed) shards
+            BalancerPolicy::ShardToChunksMap chunkMap;
+            vector<BSONObj> chunks;
+            chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) <<
+                                   "max" << BSON( "x" << 49 )));
+            chunks.push_back(BSON( "min" << BSON( "x" << 49 ) <<
+                                   "max" << BSON( "x" << BSON( "$maxkey"<<1 ))));
+            chunkMap["shard0"] = chunks;
+            chunks.clear();
+            chunkMap["shard1"] = chunks;
+
+            // no limits
+            BalancerPolicy::ShardToLimitsMap limitsMap;
+            BSONObj limits0 = BSON( sf::maxSize(0LL) << lf::currSize(2LL) << sf::draining(false) );
+            BSONObj limits1 = BSON( sf::maxSize(0LL) << lf::currSize(0LL) << sf::draining(true) );
+            limitsMap["shard0"] = limits0;
+            limitsMap["shard1"] = limits1;
+
+            BalancerPolicy::ChunkInfo* c = NULL;
+            c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 0 );
+            ASSERT( ! c );
+        }
+    };
+
+    class BalanceImpasseTest {
+    public:
+        void run() {
+            // one maxed out, one draining
+            // 2 chunks and 0 chunk shards
+            BalancerPolicy::ShardToChunksMap chunkMap;
+            vector<BSONObj> chunks;
+            chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) <<
+                                   "max" << BSON( "x" << 49 )));
+            chunkMap["shard0"] = chunks;
+            chunks.clear();
+            chunks.push_back(BSON( "min" << BSON( "x" << 49 ) <<
+                                   "max" << BSON( "x" << BSON( "$maxkey"<<1 ))));
+            chunkMap["shard1"] = chunks;
+
+            // shard0 is draining, shard1 is maxed out, shard2 has writebacks pending
+            BalancerPolicy::ShardToLimitsMap limitsMap;
+            BSONObj limits0 = BSON( sf::maxSize(0LL) << lf::currSize(2LL) << sf::draining(true) );
+            BSONObj limits1 = BSON( sf::maxSize(1LL) << lf::currSize(1LL) << sf::draining(false) );
+            BSONObj limits2 = BSON( sf::maxSize(0LL) << lf::currSize(1LL) << lf::hasOpsQueued(true) );
+            limitsMap["shard0"] = limits0;
+            limitsMap["shard1"] = limits1;
+            limitsMap["shard2"] = limits2;
+
+            BalancerPolicy::ChunkInfo* c = NULL;
+            c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 0 );
+            ASSERT( ! c );
+        }
+    };
+
+//
+// TODO SERVER-1822
+//
+#endif // #if 0
+
+    class All : public Suite {
+    public:
+        All() : Suite( "balancer_policy" ) {
+        }
+
+        void setupTests() {
+            // TODO SERVER-1822
+            // add< SizeMaxedShardTest >();
+            // add< DrainingShardTest >();
+            // add< BalanceNormalTest >();
+            // add< BalanceDrainingTest >();
+            // add< BalanceEndedDrainingTest >();
+            // add< BalanceImpasseTest >();
+        }
+    } allTests;
+
+} // namespace BalancerPolicyTests
diff --git a/dbtests/basictests.cpp b/dbtests/basictests.cpp
index f1e788a..3e0eecd 100644
--- a/dbtests/basictests.cpp
+++ b/dbtests/basictests.cpp
@@ -23,6 +23,8 @@
 #include "../util/base64.h"
 #include "../util/array.h"
 #include "../util/text.h"
+#include "../util/queue.h"
+#include "../util/paths.h"
 
 namespace BasicTests {
 
@@ -49,21 +51,21 @@ namespace BasicTests {
             RARELY ++c;
         }
     };
-    
+
     class Base64Tests {
     public:
-        
-        void roundTrip( string s ){
+
+        void roundTrip( string s ) {
             ASSERT_EQUALS( s , base64::decode( base64::encode( s ) ) );
         }
-        
-        void roundTrip( const unsigned char * _data , int len ){
+
+        void roundTrip( const unsigned char * _data , int len ) {
             const char *data = (const char *) _data;
             string s = base64::encode( data , len );
             string out = base64::decode( s );
             ASSERT_EQUALS( out.size() , static_cast<size_t>(len) );
             bool broke = false;
-            for ( int i=0; i<len; i++ ){
+            for ( int i=0; i<len; i++ ) {
                 if ( data[i] != out[i] )
                     broke = true;
             }
@@ -77,16 +79,16 @@ namespace BasicTests {
             for ( int i=0; i<len; i++ )
                 cout << hex << ( out[i] & 0xFF ) << dec << " ";
             cout << endl;
-            
+
             ASSERT(0);
         }
-        
-        void run(){
+
+        void run() {
 
             ASSERT_EQUALS( "ZWxp" , base64::encode( "eli" , 3 ) );
             ASSERT_EQUALS( "ZWxpb3Rz" , base64::encode( "eliots" , 6 ) );
             ASSERT_EQUALS( "ZWxpb3Rz" , base64::encode( "eliots" ) );
-            
+
             ASSERT_EQUALS( "ZQ==" , base64::encode( "e" , 1 ) );
             ASSERT_EQUALS( "ZWw=" , base64::encode( "el" , 2 ) );
 
@@ -97,10 +99,10 @@ namespace BasicTests {
             roundTrip( "eliot" );
             roundTrip( "eliots" );
             roundTrip( "eliotsz" );
-            
+
             unsigned char z[] = { 0x1 , 0x2 , 0x3 , 0x4 };
             roundTrip( z , 4 );
-            
+
             unsigned char y[] = {
                 0x01, 0x10, 0x83, 0x10, 0x51, 0x87, 0x20, 0x92, 0x8B, 0x30,
                 0xD3, 0x8F, 0x41, 0x14, 0x93, 0x51, 0x55, 0x97, 0x61, 0x96,
@@ -115,15 +117,15 @@ namespace BasicTests {
 
     namespace stringbuildertests {
 #define SBTGB(x) ss << (x); sb << (x);
-        
+
         class Base {
             virtual void pop() = 0;
-            
+
         public:
-            Base(){}
-            virtual ~Base(){}
+            Base() {}
+            virtual ~Base() {}
 
-            void run(){
+            void run() {
                 pop();
                 ASSERT_EQUALS( ss.str() , sb.str() );
             }
@@ -131,9 +133,9 @@ namespace BasicTests {
             stringstream ss;
             StringBuilder sb;
         };
-        
+
         class simple1 : public Base {
-            void pop(){
+            void pop() {
                 SBTGB(1);
                 SBTGB("yo");
                 SBTGB(2);
@@ -141,7 +143,7 @@ namespace BasicTests {
         };
 
         class simple2 : public Base {
-            void pop(){
+            void pop() {
                 SBTGB(1);
                 SBTGB("yo");
                 SBTGB(2);
@@ -154,10 +156,10 @@ namespace BasicTests {
                 SBTGB( (short)(1231231231231LL) );
             }
         };
-    
+
         class reset1 {
         public:
-            void run(){
+            void run() {
                 StringBuilder sb;
                 sb << "1" << "abc" << "5.17";
                 ASSERT_EQUALS( "1abc5.17" , sb.str() );
@@ -171,7 +173,7 @@ namespace BasicTests {
 
         class reset2 {
         public:
-            void run(){
+            void run() {
                 StringBuilder sb;
                 sb << "1" << "abc" << "5.17";
                 ASSERT_EQUALS( "1abc5.17" , sb.str() );
@@ -188,12 +190,19 @@ namespace BasicTests {
     class sleeptest {
     public:
 
-        void run(){
+        void run() {
             Timer t;
-            sleepsecs( 1 );
-            ASSERT_EQUALS( 1 , t.seconds() );
+            int matches = 0;
+            for( int p = 0; p < 3; p++ ) {
+                sleepsecs( 1 );
+                int sec = t.seconds();
+                if( sec == 1 ) 
+                    matches++;
+                ASSERT( sec >= 0 && sec <= 2 );
+                t.reset();
+            }
+            ASSERT( matches >= 2 );
 
-            t.reset();
             sleepmicros( 1527123 );
             ASSERT( t.micros() > 1000000 );
             ASSERT( t.micros() < 2000000 );
@@ -202,17 +211,17 @@ namespace BasicTests {
             sleepmillis( 1727 );
             ASSERT( t.millis() >= 1000 );
             ASSERT( t.millis() <= 2500 );
-            
+
             {
                 int total = 1200;
                 int ms = 2;
                 t.reset();
-                for ( int i=0; i<(total/ms); i++ ){
+                for ( int i=0; i<(total/ms); i++ ) {
                     sleepmillis( ms );
                 }
                 {
                     int x = t.millis();
-                    if ( x < 1000 || x > 2500 ){
+                    if ( x < 1000 || x > 2500 ) {
                         cout << "sleeptest x: " << x << endl;
                         ASSERT( x >= 1000 );
                         ASSERT( x <= 20000 );
@@ -226,12 +235,12 @@ namespace BasicTests {
                 int micros = 100;
                 t.reset();
                 int numSleeps = 1000*(total/micros);
-                for ( int i=0; i<numSleeps; i++ ){
+                for ( int i=0; i<numSleeps; i++ ) {
                     sleepmicros( micros );
                 }
                 {
                     int y = t.millis();
-                    if ( y < 1000 || y > 2500 ){
+                    if ( y < 1000 || y > 2500 ) {
                         cout << "sleeptest y: " << y << endl;
                         ASSERT( y >= 1000 );
                         /* ASSERT( y <= 100000 ); */
@@ -239,9 +248,9 @@ namespace BasicTests {
                 }
             }
 #endif
-            
+
         }
-        
+
     };
 
     class AssertTests {
@@ -249,15 +258,15 @@ namespace BasicTests {
 
         int x;
 
-        AssertTests(){
+        AssertTests() {
             x = 0;
         }
 
-        string foo(){
+        string foo() {
             x++;
             return "";
         }
-        void run(){
+        void run() {
             uassert( -1 , foo() , 1 );
             if( x != 0 ) {
                 ASSERT_EQUALS( 0 , x );
@@ -265,7 +274,7 @@ namespace BasicTests {
             try {
                 uassert( -1 , foo() , 0 );
             }
-            catch ( ... ){}
+            catch ( ... ) {}
             ASSERT_EQUALS( 1 , x );
         }
     };
@@ -273,13 +282,13 @@ namespace BasicTests {
     namespace ArrayTests {
         class basic1 {
         public:
-            void run(){
+            void run() {
                 FastArray<int> a(100);
                 a.push_back( 5 );
                 a.push_back( 6 );
-                
+
                 ASSERT_EQUALS( 2 , a.size() );
-                
+
                 FastArray<int>::iterator i = a.begin();
                 ASSERT( i != a.end() );
                 ASSERT_EQUALS( 5 , *i );
@@ -291,10 +300,10 @@ namespace BasicTests {
             }
         };
     };
-    
+
     class ThreadSafeStringTest {
     public:
-        void run(){
+        void run() {
             ThreadSafeString s;
             s = "eliot";
             ASSERT_EQUALS( s , "eliot" );
@@ -302,8 +311,8 @@ namespace BasicTests {
 
             ThreadSafeString s2 = s;
             ASSERT_EQUALS( s2 , "eliot" );
-            
-            
+
+
             {
                 string foo;
                 {
@@ -315,11 +324,11 @@ namespace BasicTests {
             }
         }
     };
-    
+
     class LexNumCmp {
     public:
         void run() {
-            
+
             ASSERT( ! isNumber( (char)255 ) );
 
             ASSERT_EQUALS( 0, lexNumCmp( "a", "a" ) );
@@ -355,7 +364,7 @@ namespace BasicTests {
             ASSERT_EQUALS( -1, lexNumCmp( "a1{", "a1{a" ) );
             ASSERT_EQUALS( 1, lexNumCmp("21", "11") );
             ASSERT_EQUALS( -1, lexNumCmp("11", "21") );
-            
+
             ASSERT_EQUALS( -1 , lexNumCmp( "a.0" , "a.1" ) );
             ASSERT_EQUALS( -1 , lexNumCmp( "a.0.b" , "a.1" ) );
 
@@ -363,52 +372,78 @@ namespace BasicTests {
             ASSERT_EQUALS( -1 , lexNumCmp( "b.0e" , (string("b.") + (char)255).c_str() ) );
             ASSERT_EQUALS( -1 , lexNumCmp( "b." , "b.0e" ) );
 
-            ASSERT_EQUALS( 0, lexNumCmp( "238947219478347782934718234", "238947219478347782934718234")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "000238947219478347782934718234", "238947219478347782934718234")); 
-            ASSERT_EQUALS( 1, lexNumCmp( "000238947219478347782934718235", "238947219478347782934718234")); 
-            ASSERT_EQUALS( -1, lexNumCmp( "238947219478347782934718234", "238947219478347782934718234.1")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "238", "000238")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "002384", "0002384")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "00002384", "0002384")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "0", "0")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "0000", "0")); 
+            ASSERT_EQUALS( 0, lexNumCmp( "238947219478347782934718234", "238947219478347782934718234"));
+            ASSERT_EQUALS( 0, lexNumCmp( "000238947219478347782934718234", "238947219478347782934718234"));
+            ASSERT_EQUALS( 1, lexNumCmp( "000238947219478347782934718235", "238947219478347782934718234"));
+            ASSERT_EQUALS( -1, lexNumCmp( "238947219478347782934718234", "238947219478347782934718234.1"));
+            ASSERT_EQUALS( 0, lexNumCmp( "238", "000238"));
+            ASSERT_EQUALS( 0, lexNumCmp( "002384", "0002384"));
+            ASSERT_EQUALS( 0, lexNumCmp( "00002384", "0002384"));
+            ASSERT_EQUALS( 0, lexNumCmp( "0", "0"));
+            ASSERT_EQUALS( 0, lexNumCmp( "0000", "0"));
             ASSERT_EQUALS( 0, lexNumCmp( "0", "000"));
             ASSERT_EQUALS( -1, lexNumCmp( "0000", "0.0"));
-            ASSERT_EQUALS( 1, lexNumCmp( "2380", "238")); 
-            ASSERT_EQUALS( 1, lexNumCmp( "2385", "2384")); 
-            ASSERT_EQUALS( 1, lexNumCmp( "2385", "02384")); 
-            ASSERT_EQUALS( 1, lexNumCmp( "2385", "002384")); 
-            ASSERT_EQUALS( -1, lexNumCmp( "123.234.4567", "00238")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "123.234", "00123.234")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "a.123.b", "a.00123.b")); 
-            ASSERT_EQUALS( 1, lexNumCmp( "a.123.b", "a.b.00123.b")); 
-            ASSERT_EQUALS( -1, lexNumCmp( "a.00.0", "a.0.1")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "01.003.02", "1.3.2")); 
-            ASSERT_EQUALS( -1, lexNumCmp( "1.3.2", "10.300.20")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "10.300.20", "000000000000010.0000300.000000020")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "0000a", "0a")); 
-            ASSERT_EQUALS( -1, lexNumCmp( "a", "0a")); 
-            ASSERT_EQUALS( -1, lexNumCmp( "000a", "001a")); 
-            ASSERT_EQUALS( 0, lexNumCmp( "010a", "0010a")); 
+            ASSERT_EQUALS( 1, lexNumCmp( "2380", "238"));
+            ASSERT_EQUALS( 1, lexNumCmp( "2385", "2384"));
+            ASSERT_EQUALS( 1, lexNumCmp( "2385", "02384"));
+            ASSERT_EQUALS( 1, lexNumCmp( "2385", "002384"));
+            ASSERT_EQUALS( -1, lexNumCmp( "123.234.4567", "00238"));
+            ASSERT_EQUALS( 0, lexNumCmp( "123.234", "00123.234"));
+            ASSERT_EQUALS( 0, lexNumCmp( "a.123.b", "a.00123.b"));
+            ASSERT_EQUALS( 1, lexNumCmp( "a.123.b", "a.b.00123.b"));
+            ASSERT_EQUALS( -1, lexNumCmp( "a.00.0", "a.0.1"));
+            ASSERT_EQUALS( 0, lexNumCmp( "01.003.02", "1.3.2"));
+            ASSERT_EQUALS( -1, lexNumCmp( "1.3.2", "10.300.20"));
+            ASSERT_EQUALS( 0, lexNumCmp( "10.300.20", "000000000000010.0000300.000000020"));
+            ASSERT_EQUALS( 0, lexNumCmp( "0000a", "0a"));
+            ASSERT_EQUALS( -1, lexNumCmp( "a", "0a"));
+            ASSERT_EQUALS( -1, lexNumCmp( "000a", "001a"));
+            ASSERT_EQUALS( 0, lexNumCmp( "010a", "0010a"));
         }
     };
 
     class DatabaseValidNames {
     public:
-        void run(){
+        void run() {
             ASSERT( Database::validDBName( "foo" ) );
             ASSERT( ! Database::validDBName( "foo/bar" ) );
             ASSERT( ! Database::validDBName( "foo.bar" ) );
 
-            ASSERT( nsDollarCheck( "asdads" ) );
-            ASSERT( ! nsDollarCheck( "asda$ds" ) );
-            ASSERT( nsDollarCheck( "local.oplog.$main" ) );
+            ASSERT( isANormalNSName( "asdads" ) );
+            ASSERT( ! isANormalNSName( "asda$ds" ) );
+            ASSERT( isANormalNSName( "local.oplog.$main" ) );
+        }
+    };
+
+    class DatabaseOwnsNS {
+    public:
+        void run() {
+
+            bool isNew = false;
+            // this leaks as ~Database is private
+            // if that changes, should put this on the stack
+            Database * db = new Database( "dbtests_basictests_ownsns" , isNew );
+            assert( isNew );
+
+            ASSERT( db->ownsNS( "dbtests_basictests_ownsns.x" ) );
+            ASSERT( db->ownsNS( "dbtests_basictests_ownsns.x.y" ) );
+            ASSERT( ! db->ownsNS( "dbtests_basictests_ownsn.x.y" ) );
+            ASSERT( ! db->ownsNS( "dbtests_basictests_ownsnsa.x.y" ) );
+        }
+    };
+
+    class NSValidNames {
+    public:
+        void run() {
+            ASSERT( isValidNS( "test.foo" ) );
+            ASSERT( ! isValidNS( "test." ) );
+            ASSERT( ! isValidNS( "test" ) );
         }
     };
-    
+
     class PtrTests {
     public:
-        void run(){
+        void run() {
             scoped_ptr<int> p1 (new int(1));
             boost::shared_ptr<int> p2 (new int(2));
             scoped_ptr<const int> p3 (new int(3));
@@ -419,7 +454,7 @@ namespace BasicTests {
             ASSERT_EQUALS( p2.get() , ptr<int>(p2) );
             ASSERT_EQUALS( p2.get() , ptr<int>(p2.get()) ); // T* constructor
             ASSERT_EQUALS( p2.get() , ptr<int>(ptr<int>(p2)) ); // copy constructor
-            ASSERT_EQUALS( *p2      , *ptr<int>(p2)); 
+            ASSERT_EQUALS( *p2      , *ptr<int>(p2));
             ASSERT_EQUALS( p2.get() , ptr<boost::shared_ptr<int> >(&p2)->get() ); // operator->
 
             //const
@@ -431,14 +466,14 @@ namespace BasicTests {
             ASSERT_EQUALS( p4.get() , ptr<const int>(p4.get()) );
             ASSERT_EQUALS( p2.get() , ptr<const int>(ptr<const int>(p2)) );
             ASSERT_EQUALS( p2.get() , ptr<const int>(ptr<int>(p2)) ); // constizing copy constructor
-            ASSERT_EQUALS( *p2      , *ptr<int>(p2)); 
+            ASSERT_EQUALS( *p2      , *ptr<int>(p2));
             ASSERT_EQUALS( p2.get() , ptr<const boost::shared_ptr<int> >(&p2)->get() );
 
             //bool context
             ASSERT( ptr<int>(p1) );
             ASSERT( !ptr<int>(NULL) );
             ASSERT( !ptr<int>() );
-            
+
 #if 0
             // These shouldn't compile
             ASSERT_EQUALS( p3.get() , ptr<int>(p3) );
@@ -450,12 +485,12 @@ namespace BasicTests {
 
     struct StringSplitterTest {
 
-        void test( string s ){
+        void test( string s ) {
             vector<string> v = StringSplitter::split( s , "," );
             ASSERT_EQUALS( s , StringSplitter::join( v , "," ) );
         }
 
-        void run(){
+        void run() {
             test( "a" );
             test( "a,b" );
             test( "a,b,c" );
@@ -496,16 +531,68 @@ namespace BasicTests {
     };
 
 
+    class QueueTest {
+    public:
+        void run() {
+            BlockingQueue<int> q;
+            Timer t;
+            int x;
+            ASSERT( ! q.blockingPop( x , 5 ) );
+            ASSERT( t.seconds() > 3 && t.seconds() < 9 );
+
+        }
+    };
+
+    class StrTests {
+    public:
+
+        void run() {
+            ASSERT_EQUALS( 1u , str::count( "abc" , 'b' ) );
+            ASSERT_EQUALS( 3u , str::count( "babab" , 'b' ) );
+        }
+
+    };
+
+    class HostAndPortTests {
+    public:
+        void run() {
+            HostAndPort a( "x1" , 1000 );
+            HostAndPort b( "x1" , 1000 );
+            HostAndPort c( "x1" , 1001 );
+            HostAndPort d( "x2" , 1000 );
+
+            ASSERT( a == b );
+            ASSERT( a != c );
+            ASSERT( a != d );
+
+        }
+    };
+
+    class RelativePathTest {
+    public:
+        void run() {
+            RelativePath a = RelativePath::fromRelativePath( "a" );
+            RelativePath b = RelativePath::fromRelativePath( "a" );
+            RelativePath c = RelativePath::fromRelativePath( "b" );
+            RelativePath d = RelativePath::fromRelativePath( "a/b" );
+
+
+            ASSERT( a == b );
+            ASSERT( a != c );
+            ASSERT( a != d );
+            ASSERT( c != d );
+        }
+    };
 
     class All : public Suite {
     public:
-        All() : Suite( "basic" ){
+        All() : Suite( "basic" ) {
         }
-        
-        void setupTests(){
+
+        void setupTests() {
             add< Rarely >();
             add< Base64Tests >();
-            
+
             add< stringbuildertests::simple1 >();
             add< stringbuildertests::simple2 >();
             add< stringbuildertests::reset1 >();
@@ -513,18 +600,28 @@ namespace BasicTests {
 
             add< sleeptest >();
             add< AssertTests >();
-            
+
             add< ArrayTests::basic1 >();
             add< LexNumCmp >();
 
             add< DatabaseValidNames >();
+            add< DatabaseOwnsNS >();
+
+            add< NSValidNames >();
 
             add< PtrTests >();
 
             add< StringSplitterTest >();
             add< IsValidUTF8Test >();
+
+            add< QueueTest >();
+
+            add< StrTests >();
+
+            add< HostAndPortTests >();
+            add< RelativePathTest >();
         }
     } myall;
-    
+
 } // namespace BasicTests
 
diff --git a/dbtests/btreetests.cpp b/dbtests/btreetests.cpp
index a90a097..4da7375 100644
--- a/dbtests/btreetests.cpp
+++ b/dbtests/btreetests.cpp
@@ -29,7 +29,12 @@ namespace BtreeTests {
     const char* ns() {
         return "unittests.btreetests";
     }
-    
+
+    // dummy, valid record loc
+    const DiskLoc recordLoc() {
+        return DiskLoc( 0, 2 );
+    }
+
     class Ensure {
     public:
         Ensure() {
@@ -41,45 +46,55 @@ namespace BtreeTests {
     private:
         DBDirectClient _c;
     };
-    
+
     class Base : public Ensure {
     public:
-        Base() : 
-            _context( ns() ) {            
+        Base() :
+            _context( ns() ) {
             {
                 bool f = false;
                 assert( f = true );
                 massert( 10402 , "assert is misdefined", f);
             }
         }
+        virtual ~Base() {}
+        static string bigNumString( long long n, int len = 800 ) {
+            char sub[17];
+            sprintf( sub, "%.16llx", n );
+            string val( len, ' ' );
+            for( int i = 0; i < len; ++i ) {
+                val[ i ] = sub[ i % 16 ];
+            }
+            return val;
+        }
     protected:
-        BtreeBucket* bt() {
+        const BtreeBucket* bt() {
             return id().head.btree();
         }
         DiskLoc dl() {
             return id().head;
         }
         IndexDetails& id() {
-            return nsdetails( ns() )->idx( 1 );
-        }
-        // dummy, valid record loc
-        static DiskLoc recordLoc() {
-            return DiskLoc( 0, 2 );
+            NamespaceDetails *nsd = nsdetails( ns() );
+            assert( nsd );
+            return nsd->idx( 1 );
         }
         void checkValid( int nKeys ) {
             ASSERT( bt() );
             ASSERT( bt()->isHead() );
             bt()->assertValid( order(), true );
-            ASSERT_EQUALS( nKeys, bt()->fullValidate( dl(), order() ) );
+            ASSERT_EQUALS( nKeys, bt()->fullValidate( dl(), order(), 0, true ) );
         }
         void dump() {
             bt()->dumpTree( dl(), order() );
         }
         void insert( BSONObj &key ) {
             bt()->bt_insert( dl(), recordLoc(), key, Ordering::make(order()), true, id(), true );
+            getDur().commitIfNeeded();
         }
-        void unindex( BSONObj &key ) {
-            bt()->unindex( dl(), id(), key, recordLoc() );
+        bool unindex( BSONObj &key ) {
+            getDur().commitIfNeeded();
+            return bt()->unindex( dl(), id(), key, recordLoc() );
         }
         static BSONObj simpleKey( char c, int n = 1 ) {
             BSONObjBuilder builder;
@@ -98,9 +113,38 @@ namespace BtreeTests {
             ASSERT( location == expectedLocation );
             ASSERT_EQUALS( expectedPos, pos );
         }
+        bool present( BSONObj &key, int direction ) {
+            int pos;
+            bool found;
+            bt()->locate( id(), dl(), key, Ordering::make(order()), pos, found, recordLoc(), direction );
+            return found;
+        }
         BSONObj order() {
             return id().keyPattern();
         }
+        const BtreeBucket *child( const BtreeBucket *b, int i ) {
+            assert( i <= b->nKeys() );
+            DiskLoc d;
+            if ( i == b->nKeys() ) {
+                d = b->getNextChild();
+            }
+            else {
+                d = const_cast< DiskLoc& >( b->keyNode( i ).prevChildBucket );
+            }
+            assert( !d.isNull() );
+            return d.btree();
+        }
+        void checkKey( char i ) {
+            stringstream ss;
+            ss << i;
+            checkKey( ss.str() );
+        }
+        void checkKey( const string &k ) {
+            BSONObj key = BSON( "" << k );
+//            log() << "key: " << key << endl;
+            ASSERT( present( key, 1 ) );
+            ASSERT( present( key, -1 ) );
+        }
     private:
         dblock lk_;
         Client::Context _context;
@@ -140,6 +184,8 @@ namespace BtreeTests {
                 insert( longKey );
             }
             checkValid( 20 );
+            ASSERT_EQUALS( 1, bt()->nKeys() );
+            checkSplit();
         }
     protected:
         virtual char shortToken( int i ) const = 0;
@@ -150,6 +196,7 @@ namespace BtreeTests {
         static char rightToken( int i ) {
             return 'z' - i;
         }
+        virtual void checkSplit() = 0;
     };
 
     class SplitRightHeavyBucket : public SplitUnevenBucketBase {
@@ -160,6 +207,10 @@ namespace BtreeTests {
         virtual char longToken( int i ) const {
             return rightToken( i );
         }
+        virtual void checkSplit() {
+            ASSERT_EQUALS( 15, child( bt(), 0 )->nKeys() );
+            ASSERT_EQUALS( 4, child( bt(), 1 )->nKeys() );
+        }
     };
 
     class SplitLeftHeavyBucket : public SplitUnevenBucketBase {
@@ -170,6 +221,10 @@ namespace BtreeTests {
         virtual char longToken( int i ) const {
             return leftToken( i );
         }
+        virtual void checkSplit() {
+            ASSERT_EQUALS( 4, child( bt(), 0 )->nKeys() );
+            ASSERT_EQUALS( 15, child( bt(), 1 )->nKeys() );
+        }
     };
 
     class MissingLocate : public Base {
@@ -225,7 +280,7 @@ namespace BtreeTests {
         }
         void insert( int i ) {
             BSONObj k = key( 'b' + 2 * i );
-            Base::insert( k );            
+            Base::insert( k );
         }
     };
 
@@ -247,20 +302,21 @@ namespace BtreeTests {
         }
         void insert( int i ) {
             BSONObj k = key( 'b' + 2 * i );
-            Base::insert( k );            
-        }        
+            Base::insert( k );
+        }
     };
 
-    class ReuseUnused : public Base {
+    class DontReuseUnused : public Base {
     public:
         void run() {
             for ( int i = 0; i < 10; ++i ) {
                 insert( i );
             }
+//            dump();
             BSONObj root = key( 'p' );
             unindex( root );
             Base::insert( root );
-            locate( root, 0, true, dl(), 1 );
+            locate( root, 0, true, bt()->getNextChild(), 1 );
         }
     private:
         BSONObj key( char c ) {
@@ -268,16 +324,17 @@ namespace BtreeTests {
         }
         void insert( int i ) {
             BSONObj k = key( 'b' + 2 * i );
-            Base::insert( k );            
-        }        
+            Base::insert( k );
+        }
     };
-    
+
     class PackUnused : public Base {
     public:
         void run() {
             for ( long long i = 0; i < 1000000; i += 1000 ) {
                 insert( i );
             }
+//            dump();
             string orig, after;
             {
                 stringstream ss;
@@ -294,8 +351,9 @@ namespace BtreeTests {
             while( c->ok() ) {
                 if ( !c->currKeyNode().prevChildBucket.isNull() ) {
                     toDel.push_back( c->currKey().firstElement().valuestr() );
-                } else {
-                    other.push_back( c->currKey().firstElement().valuestr() );                    
+                }
+                else {
+                    other.push_back( c->currKey().firstElement().valuestr() );
                 }
                 c->advance();
             }
@@ -311,30 +369,25 @@ namespace BtreeTests {
             }
 
             int unused = 0;
-            ASSERT_EQUALS( 0, bt()->fullValidate( dl(), order(), &unused ) );
+            ASSERT_EQUALS( 0, bt()->fullValidate( dl(), order(), &unused, true ) );
 
             for ( long long i = 50000; i < 50100; ++i ) {
                 insert( i );
-            }            
+            }
 
             int unused2 = 0;
-            ASSERT_EQUALS( 100, bt()->fullValidate( dl(), order(), &unused2 ) );
+            ASSERT_EQUALS( 100, bt()->fullValidate( dl(), order(), &unused2, true ) );
 
-            ASSERT( unused2 < unused );
+//            log() << "old unused: " << unused << ", new unused: " << unused2 << endl;
+//
+            ASSERT( unused2 <= unused );
         }
     protected:
         void insert( long long n ) {
-            string val( 800, ' ' );
-            for( int i = 0; i < 800; i += 8 ) {
-                for( int j = 0; j < 8; ++j ) {
-                    // probably we won't get > 56 bits
-                    unsigned char v = 0x80 | ( n >> ( ( 8 - j - 1 ) * 7 ) & 0x000000000000007f );
-                    val[ i + j ] = v;
-                }
-            }
+            string val = bigNumString( n );
             BSONObj k = BSON( "a" << val );
-            Base::insert( k );            
-        }        
+            Base::insert( k );
+        }
     };
 
     class DontDropReferenceKey : public PackUnused {
@@ -344,7 +397,7 @@ namespace BtreeTests {
             for ( long long i = 0; i < 80; i += 1 ) {
                 insert( i );
             }
-            
+
             BSONObjBuilder start;
             start.appendMinKey( "a" );
             BSONObjBuilder end;
@@ -360,19 +413,1220 @@ namespace BtreeTests {
                 c->advance();
             }
             // too much work to try to make this happen through inserts and deletes
-            const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket ) = DiskLoc();
-            const_cast< DiskLoc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() |= 1; // make unused
+            // we are intentionally manipulating the btree bucket directly here
+            getDur().writingDiskLoc( const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket ) ) = DiskLoc();
+            getDur().writingInt( const_cast< DiskLoc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused
             BSONObj k = BSON( "a" << toInsert );
             Base::insert( k );
         }
     };
-    
+
+    class MergeBuckets : public Base {
+    public:
+        virtual ~MergeBuckets() {}
+        void run() {
+            for ( int i = 0; i < 10; ++i ) {
+                insert( i );
+            }
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            int expectedCount = 10 - unindexKeys();
+//            dump();
+            ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords );
+            int unused = 0;
+            ASSERT_EQUALS( expectedCount, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+        }
+    protected:
+        BSONObj key( char c ) {
+            return simpleKey( c, 800 );
+        }
+        void insert( int i ) {
+            BSONObj k = key( 'b' + 2 * i );
+            Base::insert( k );
+        }
+        virtual int unindexKeys() = 0;
+    };
+
+    class MergeBucketsLeft : public MergeBuckets {
+        virtual int unindexKeys() {
+            BSONObj k = key( 'b' );
+            unindex( k );
+            k = key( 'b' + 2 );
+            unindex( k );
+            k = key( 'b' + 4 );
+            unindex( k );
+            k = key( 'b' + 6 );
+            unindex( k );
+            return 4;
+        }
+    };
+
+    class MergeBucketsRight : public MergeBuckets {
+        virtual int unindexKeys() {
+            BSONObj k = key( 'b' + 2 * 9 );
+            unindex( k );
+            return 1;
+        }
+    };
+
+    // deleting from head won't coalesce yet
+//    class MergeBucketsHead : public MergeBuckets {
+//        virtual BSONObj unindexKey() { return key( 'p' ); }
+//    };
+
+    class MergeBucketsDontReplaceHead : public Base {
+    public:
+        void run() {
+            for ( int i = 0; i < 18; ++i ) {
+                insert( i );
+            }
+            //            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = key( 'a' + 17 );
+            unindex( k );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            int unused = 0;
+            ASSERT_EQUALS( 17, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+        }
+    private:
+        BSONObj key( char c ) {
+            return simpleKey( c, 800 );
+        }
+        void insert( int i ) {
+            BSONObj k = key( 'a' + i );
+            Base::insert( k );
+        }
+    };
+
+    // Tool to construct custom trees for tests.
+    class ArtificialTree : public BtreeBucket {
+    public:
+        void push( const BSONObj &key, const DiskLoc &child ) {
+            pushBack( dummyDiskLoc(), key, Ordering::make( BSON( "a" << 1 ) ), child );
+        }
+        void setNext( const DiskLoc &child ) {
+            nextChild = child;
+        }
+        static DiskLoc make( IndexDetails &id ) {
+            DiskLoc ret = addBucket( id );
+            is( ret )->init();
+            getDur().commitIfNeeded();
+            return ret;
+        }
+        static ArtificialTree *is( const DiskLoc &l ) {
+            return static_cast< ArtificialTree * >( l.btreemod() );
+        }
+        static DiskLoc makeTree( const string &spec, IndexDetails &id ) {
+            return makeTree( fromjson( spec ), id );
+        }
+        static DiskLoc makeTree( const BSONObj &spec, IndexDetails &id ) {
+            DiskLoc node = make( id );
+            ArtificialTree *n = ArtificialTree::is( node );
+            BSONObjIterator i( spec );
+            while( i.more() ) {
+                BSONElement e = i.next();
+                DiskLoc child;
+                if ( e.type() == Object ) {
+                    child = makeTree( e.embeddedObject(), id );
+                }
+                if ( e.fieldName() == string( "_" ) ) {
+                    n->setNext( child );
+                }
+                else {
+                    n->push( BSON( "" << expectedKey( e.fieldName() ) ), child );
+                }
+            }
+            n->fixParentPtrs( node );
+            return node;
+        }
+        static void setTree( const string &spec, IndexDetails &id ) {
+            set( makeTree( spec, id ), id );
+        }
+        static void set( const DiskLoc &l, IndexDetails &id ) {
+            ArtificialTree::is( id.head )->deallocBucket( id.head, id );
+            getDur().writingDiskLoc(id.head) = l;
+        }
+        static string expectedKey( const char *spec ) {
+            if ( spec[ 0 ] != '$' ) {
+                return spec;
+            }
+            char *endPtr;
+            // parsing a long long is a pain, so just allow shorter keys for now
+            unsigned long long num = strtol( spec + 1, &endPtr, 16 );
+            int len = 800;
+            if( *endPtr == '$' ) {
+                len = strtol( endPtr + 1, 0, 16 );
+            }
+            return Base::bigNumString( num, len );
+        }
+        static void checkStructure( const BSONObj &spec, const IndexDetails &id, const DiskLoc node ) {
+            ArtificialTree *n = ArtificialTree::is( node );
+            BSONObjIterator j( spec );
+            for( int i = 0; i < n->n; ++i ) {
+                ASSERT( j.more() );
+                BSONElement e = j.next();
+                KeyNode kn = n->keyNode( i );
+                string expected = expectedKey( e.fieldName() );
+                ASSERT( present( id, BSON( "" << expected ), 1 ) );
+                ASSERT( present( id, BSON( "" << expected ), -1 ) );
+                ASSERT_EQUALS( expected, kn.key.firstElement().valuestr() );
+                if ( kn.prevChildBucket.isNull() ) {
+                    ASSERT( e.type() == jstNULL );
+                }
+                else {
+                    ASSERT( e.type() == Object );
+                    checkStructure( e.embeddedObject(), id, kn.prevChildBucket );
+                }
+            }
+            if ( n->nextChild.isNull() ) {
+                // maybe should allow '_' field with null value?
+                ASSERT( !j.more() );
+            }
+            else {
+                BSONElement e = j.next();
+                ASSERT_EQUALS( string( "_" ), e.fieldName() );
+                ASSERT( e.type() == Object );
+                checkStructure( e.embeddedObject(), id, n->nextChild );
+            }
+            ASSERT( !j.more() );
+        }
+        static void checkStructure( const string &spec, const IndexDetails &id ) {
+            checkStructure( fromjson( spec ), id, id.head );
+        }
+        static bool present( const IndexDetails &id, const BSONObj &key, int direction ) {
+            int pos;
+            bool found;
+            id.head.btree()->locate( id, id.head, key, Ordering::make(id.keyPattern()), pos, found, recordLoc(), direction );
+            return found;
+        }
+        int headerSize() const { return BtreeBucket::headerSize(); }
+        int packedDataSize( int pos ) const { return BtreeBucket::packedDataSize( pos ); }
+        void fixParentPtrs( const DiskLoc &thisLoc ) { BtreeBucket::fixParentPtrs( thisLoc ); }
+        void forcePack() {
+            topSize += emptySize;
+            emptySize = 0;
+            setNotPacked();
+        }
+    private:
+        DiskLoc dummyDiskLoc() const { return DiskLoc( 0, 2 ); }
+    };
+
+    /**
+     * We could probably refactor the following tests, but it's easier to debug
+     * them in the present state.
+     */
+
+    class MergeBucketsDelInternal : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{d:{b:{a:null},bb:null,_:{c:null}},_:{f:{e:null},_:{g:null}}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "bb" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{b:{a:null},d:{c:null},f:{e:null},_:{g:null}}", id() );
+        }
+    };
+
+    class MergeBucketsRightNull : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{d:{b:{a:null},bb:null,cc:{c:null}},_:{f:{e:null},h:{g:null}}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "bb" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}", id() );
+        }
+    };
+
+    // not yet handling this case
+    class DontMergeSingleBucket : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{d:{b:{a:null},c:null}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "c" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{d:{b:{a:null}}}", id() );
+        }
+    };
+
+    class ParentMergeNonRightToLeft : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{d:{b:{a:null},bb:null,cc:{c:null}},i:{f:{e:null},h:{g:null}}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "bb" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) );
+            // child does not currently replace parent in this case
+            ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}", id() );
+        }
+    };
+
+    class ParentMergeNonRightToRight : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{d:{b:{a:null},cc:{c:null}},i:{f:{e:null},ff:null,h:{g:null}}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "ff" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) );
+            // child does not currently replace parent in this case
+            ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{i:{b:{a:null},cc:{c:null},d:null,f:{e:null},h:{g:null}}}", id() );
+        }
+    };
+
+    class CantMergeRightNoMerge : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{d:{b:{a:null},bb:null,cc:{c:null}},dd:null,_:{f:{e:null},h:{g:null}}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "bb" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{d:{b:{a:null},cc:{c:null}},dd:null,_:{f:{e:null},h:{g:null}}}", id() );
+        }
+    };
+
+    class CantMergeLeftNoMerge : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{c:{b:{a:null}},d:null,_:{f:{e:null},g:null}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "g" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 6, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{c:{b:{a:null}},d:null,_:{f:{e:null}}}", id() );
+        }
+    };
+
+    class MergeOption : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{c:{b:{a:null}},f:{e:{d:null},ee:null},_:{h:{g:null}}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "ee" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{c:{b:{a:null}},_:{e:{d:null},f:null,h:{g:null}}}", id() );
+        }
+    };
+
+    class ForceMergeLeft : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{c:{b:{a:null}},f:{e:{d:null},ee:null},ff:null,_:{h:{g:null}}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "ee" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{f:{b:{a:null},c:null,e:{d:null}},ff:null,_:{h:{g:null}}}", id() );
+        }
+    };
+
+    class ForceMergeRight : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{c:{b:{a:null}},cc:null,f:{e:{d:null},ee:null},_:{h:{g:null}}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 7, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "ee" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{c:{b:{a:null}},cc:null,_:{e:{d:null},f:null,h:{g:null}}}", id() );
+        }
+    };
+
+    class RecursiveMerge : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},j:{i:null}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 10, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "c" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            // height is not currently reduced in this case
+            ArtificialTree::checkStructure( "{j:{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}}", id() );
+        }
+    };
+
+    class RecursiveMergeRightBucket : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},g:{f:null}},_:{i:null}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 9, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "c" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{g:{b:{a:null},d:null,e:null,f:null},h:null,i:null}", id() );
+        }
+    };
+
+    class RecursiveMergeDoubleRightBucket : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},_:{f:null}},_:{i:null}}", id() );
+//            dump();
+            string ns = id().indexNamespace();
+            ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
+
+            BSONObj k = BSON( "" << "c" );
+            assert( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            // no recursion currently in this case
+            ArtificialTree::checkStructure( "{h:{b:{a:null},d:null,e:null,f:null},_:{i:null}}", id() );
+        }
+    };
+
+    class MergeSizeBase : public Base {
+    public:
+        MergeSizeBase() : _count() {}
+        virtual ~MergeSizeBase() {}
+        void run() {
+            typedef ArtificialTree A;
+            A::set( A::make( id() ), id() );
+            A* root = A::is( dl() );
+            DiskLoc left = A::make( id() );
+            root->push( biggestKey( 'm' ), left );
+            _count = 1;
+            A* l = A::is( left );
+            DiskLoc right = A::make( id() );
+            root->setNext( right );
+            A* r = A::is( right );
+            root->fixParentPtrs( dl() );
+
+            ASSERT_EQUALS( bigSize(), bigSize() / 2 * 2 );
+            fillToExactSize( l, leftSize(), 'a' );
+            fillToExactSize( r, rightSize(), 'n' );
+            ASSERT( leftAdditional() <= 2 );
+            if ( leftAdditional() >= 2 ) {
+                l->push( bigKey( 'k' ), DiskLoc() );
+            }
+            if ( leftAdditional() >= 1 ) {
+                l->push( bigKey( 'l' ), DiskLoc() );
+            }
+            ASSERT( rightAdditional() <= 2 );
+            if ( rightAdditional() >= 2 ) {
+                r->push( bigKey( 'y' ), DiskLoc() );
+            }
+            if ( rightAdditional() >= 1 ) {
+                r->push( bigKey( 'z' ), DiskLoc() );
+            }
+            _count += leftAdditional() + rightAdditional();
+
+//            dump();
+
+            initCheck();
+            string ns = id().indexNamespace();
+            const char *keys = delKeys();
+            for( const char *i = keys; *i; ++i ) {
+                int unused = 0;
+                ASSERT_EQUALS( _count, bt()->fullValidate( dl(), order(), &unused, true ) );
+                ASSERT_EQUALS( 0, unused );
+                ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+                BSONObj k = bigKey( *i );
+                unindex( k );
+//                dump();
+                --_count;
+            }
+
+//            dump();
+
+            int unused = 0;
+            ASSERT_EQUALS( _count, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            validate();
+            if ( !merge() ) {
+                ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            }
+            else {
+                ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords );
+            }
+        }
+    protected:
+        virtual int leftAdditional() const { return 2; }
+        virtual int rightAdditional() const { return 2; }
+        virtual void initCheck() {}
+        virtual void validate() {}
+        virtual int leftSize() const = 0;
+        virtual int rightSize() const = 0;
+        virtual const char * delKeys() const { return "klyz"; }
+        virtual bool merge() const { return true; }
+        void fillToExactSize( ArtificialTree *t, int targetSize, char startKey ) {
+            int size = 0;
+            while( size < targetSize ) {
+                int space = targetSize - size;
+                int nextSize = space - sizeof( _KeyNode );
+                assert( nextSize > 0 );
+                BSONObj newKey = key( startKey++, nextSize );
+                t->push( newKey, DiskLoc() );
+                size += newKey.objsize() + sizeof( _KeyNode );
+                _count += 1;
+            }
+            ASSERT_EQUALS( t->packedDataSize( 0 ), targetSize );
+        }
+        static BSONObj key( char a, int size ) {
+            if ( size >= bigSize() ) {
+                return bigKey( a );
+            }
+            return simpleKey( a, size - ( bigSize() - 801 ) );
+        }
+        static BSONObj bigKey( char a ) {
+            return simpleKey( a, 801 );
+        }
+        static BSONObj biggestKey( char a ) {
+            int size = BtreeBucket::getKeyMax() - bigSize() + 801;
+            return simpleKey( a, size );
+        }
+        static int bigSize() {
+            return bigKey( 'a' ).objsize();
+        }
+        static int biggestSize() {
+            return biggestKey( 'a' ).objsize();
+        }
+        int _count;
+    };
+
+    class MergeSizeJustRightRight : public MergeSizeBase {
+    protected:
+        virtual int rightSize() const { return BtreeBucket::getLowWaterMark() - 1; }
+        virtual int leftSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) - ( BtreeBucket::getLowWaterMark() - 1 ); }
+    };
+
+    class MergeSizeJustRightLeft : public MergeSizeBase {
+    protected:
+        virtual int leftSize() const { return BtreeBucket::getLowWaterMark() - 1; }
+        virtual int rightSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) - ( BtreeBucket::getLowWaterMark() - 1 ); }
+        virtual const char * delKeys() const { return "yzkl"; }
+    };
+
+    class MergeSizeRight : public MergeSizeJustRightRight {
+        virtual int rightSize() const { return MergeSizeJustRightRight::rightSize() - 1; }
+        virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() + 1; }
+    };
+
+    class MergeSizeLeft : public MergeSizeJustRightLeft {
+        virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() + 1; }
+        virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize() - 1; }
+    };
+
+    class NoMergeBelowMarkRight : public MergeSizeJustRightRight {
+        virtual int rightSize() const { return MergeSizeJustRightRight::rightSize() + 1; }
+        virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() - 1; }
+        virtual bool merge() const { return false; }
+    };
+
+    class NoMergeBelowMarkLeft : public MergeSizeJustRightLeft {
+        virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() - 1; }
+        virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize() + 1; }
+        virtual bool merge() const { return false; }
+    };
+
+    class MergeSizeRightTooBig : public MergeSizeJustRightLeft {
+        virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() + 1; }
+        virtual bool merge() const { return false; }
+    };
+
+    class MergeSizeLeftTooBig : public MergeSizeJustRightRight {
+        virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() + 1; }
+        virtual bool merge() const { return false; }
+    };
+
+    class BalanceOneLeftToRight : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},b:{$20:null,$30:null,$40:null,$50:null,a:null},_:{c:null}}", id() );
+            ASSERT_EQUALS( 14, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x40 ) );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},b:{$10:null,$20:null,$30:null,$50:null,a:null},_:{c:null}}", id() );
+        }
+    };
+
+    class BalanceOneRightToLeft : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null},b:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},_:{c:null}}", id() );
+            ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x3 ) );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$20:{$1:null,$2:null,$4:null,$10:null},b:{$30:null,$40:null,$50:null,$60:null,$70:null},_:{c:null}}", id() );
+        }
+    };
+
+    class BalanceThreeLeftToRight : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$20:{$1:{$0:null},$3:{$2:null},$5:{$4:null},$7:{$6:null},$9:{$8:null},$11:{$10:null},$13:{$12:null},_:{$14:null}},b:{$30:null,$40:{$35:null},$50:{$45:null}},_:{c:null}}", id() );
+            ASSERT_EQUALS( 23, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 14, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x30 ) );
+            //            dump();
+            ASSERT( unindex( k ) );
+            //            dump();
+            ASSERT_EQUALS( 22, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 14, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$9:{$1:{$0:null},$3:{$2:null},$5:{$4:null},$7:{$6:null},_:{$8:null}},b:{$11:{$10:null},$13:{$12:null},$20:{$14:null},$40:{$35:null},$50:{$45:null}},_:{c:null}}", id() );
+        }
+    };
+
+    class BalanceThreeRightToLeft : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$20:{$1:{$0:null},$3:{$2:null},$5:null,_:{$14:null}},b:{$30:{$25:null},$40:{$35:null},$50:{$45:null},$60:{$55:null},$70:{$65:null},$80:{$75:null},$90:{$85:null},$100:{$95:null}},_:{c:null}}", id() );
+            ASSERT_EQUALS( 25, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 15, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x5 ) );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 24, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 15, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$50:{$1:{$0:null},$3:{$2:null},$20:{$14:null},$30:{$25:null},$40:{$35:null},_:{$45:null}},b:{$60:{$55:null},$70:{$65:null},$80:{$75:null},$90:{$85:null},$100:{$95:null}},_:{c:null}}", id() );
+        }
+    };
+
+    class BalanceSingleParentKey : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},_:{$20:null,$30:null,$40:null,$50:null,a:null}}", id() );
+            ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x40 ) );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$10:null,$20:null,$30:null,$50:null,a:null}}", id() );
+        }
+    };
+
+    class PackEmpty : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null}", id() );
+            BSONObj k = BSON( "" << "a" );
+            ASSERT( unindex( k ) );
+            ArtificialTree *t = ArtificialTree::is( dl() );
+            t->forcePack();
+            Tester::checkEmpty( t, id() );
+        }
+        class Tester : public ArtificialTree {
+        public:
+            static void checkEmpty( ArtificialTree *a, const IndexDetails &id ) {
+                Tester *t = static_cast< Tester * >( a );
+                ASSERT_EQUALS( 0, t->n );
+                ASSERT( !( t->flags & Packed ) );
+                Ordering o = Ordering::make( id.keyPattern() );
+                int zero = 0;
+                t->_packReadyForMod( o, zero );
+                ASSERT_EQUALS( 0, t->n );
+                ASSERT_EQUALS( 0, t->topSize );
+                ASSERT_EQUALS( BtreeBucket::bodySize(), t->emptySize );
+                ASSERT( t->flags & Packed );
+            }
+        };
+    };
+
+    class PackedDataSizeEmpty : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null}", id() );
+            BSONObj k = BSON( "" << "a" );
+            ASSERT( unindex( k ) );
+            ArtificialTree *t = ArtificialTree::is( dl() );
+            t->forcePack();
+            Tester::checkEmpty( t, id() );
+        }
+        class Tester : public ArtificialTree {
+        public:
+            static void checkEmpty( ArtificialTree *a, const IndexDetails &id ) {
+                Tester *t = static_cast< Tester * >( a );
+                ASSERT_EQUALS( 0, t->n );
+                ASSERT( !( t->flags & Packed ) );
+                int zero = 0;
+                ASSERT_EQUALS( 0, t->packedDataSize( zero ) );
+                ASSERT( !( t->flags & Packed ) );
+            }
+        };
+    };
+
+    class BalanceSingleParentKeyPackParent : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},_:{$20:null,$30:null,$40:null,$50:null,a:null}}", id() );
+            ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            // force parent pack
+            ArtificialTree::is( dl() )->forcePack();
+            BSONObj k = BSON( "" << bigNumString( 0x40 ) );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 11, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$6:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$10:null,$20:null,$30:null,$50:null,a:null}}", id() );
+        }
+    };
+
+    class BalanceSplitParent : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10$10:{$1:null,$2:null,$3:null,$4:null},$100:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null},$200:null,$300:null,$400:null,$500:null,$600:null,$700:null,$800:null,$900:null,_:{c:null}}", id() );
+            ASSERT_EQUALS( 22, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x3 ) );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 21, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$500:{$30:{$1:null,$2:null,$4:null,$10$10:null,$20:null},$100:{$40:null,$50:null,$60:null,$70:null,$80:null},$200:null,$300:null,$400:null},_:{$600:null,$700:null,$800:null,$900:null,_:{c:null}}}", id() );
+        }
+    };
+
+    class RebalancedSeparatorBase : public Base {
+    public:
+        void run() {
+            ArtificialTree::setTree( treeSpec(), id() );
+            modTree();
+            Tester::checkSeparator( id(), expectedSeparator() );
+        }
+        virtual string treeSpec() const = 0;
+        virtual int expectedSeparator() const = 0;
+        virtual void modTree() {}
+        struct Tester : public ArtificialTree {
+            static void checkSeparator( const IndexDetails& id, int expected ) {
+                ASSERT_EQUALS( expected, static_cast< Tester * >( id.head.btreemod() )->rebalancedSeparatorPos( id.head, 0 ) );
+            }
+        };
+    };
+
+    class EvenRebalanceLeft : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$7:{$1:null,$2$31f:null,$3:null,$4$31f:null,$5:null,$6:null},_:{$8:null,$9:null,$10$31e:null}}"; }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class EvenRebalanceLeftCusp : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$6:{$1:null,$2$31f:null,$3:null,$4$31f:null,$5:null},_:{$7:null,$8:null,$9$31e:null,$10:null}}"; }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class EvenRebalanceRight : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$3:{$1:null,$2$31f:null},_:{$4$31f:null,$5:null,$6:null,$7:null,$8$31e:null,$9:null,$10:null}}"; }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class EvenRebalanceRightCusp : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$4$31f:{$1:null,$2$31f:null,$3:null},_:{$5:null,$6:null,$7$31e:null,$8:null,$9:null,$10:null}}"; }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class EvenRebalanceCenter : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$5:{$1:null,$2$31f:null,$3:null,$4$31f:null},_:{$6:null,$7$31e:null,$8:null,$9:null,$10:null}}"; }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class OddRebalanceLeft : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$6$31f:{$1:null,$2:null,$3:null,$4:null,$5:null},_:{$7:null,$8:null,$9:null,$10:null}}"; }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class OddRebalanceRight : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$4:{$1:null,$2:null,$3:null},_:{$5:null,$6:null,$7:null,$8$31f:null,$9:null,$10:null}}"; }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class OddRebalanceCenter : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$5:{$1:null,$2:null,$3:null,$4:null},_:{$6:null,$7:null,$8:null,$9:null,$10$31f:null}}"; }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class RebalanceEmptyRight : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$a:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null,$7:null,$8:null,$9:null},_:{$b:null}}"; }
+        virtual void modTree() {
+            BSONObj k = BSON( "" << bigNumString( 0xb ) );
+            ASSERT( unindex( k ) );
+        }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class RebalanceEmptyLeft : public RebalancedSeparatorBase {
+        virtual string treeSpec() const { return "{$a:{$1:null},_:{$11:null,$12:null,$13:null,$14:null,$15:null,$16:null,$17:null,$18:null,$19:null}}"; }
+        virtual void modTree() {
+            BSONObj k = BSON( "" << bigNumString( 0x1 ) );
+            ASSERT( unindex( k ) );
+        }
+        virtual int expectedSeparator() const { return 4; }
+    };
+
+    class NoMoveAtLowWaterMarkRight : public MergeSizeJustRightRight {
+        virtual int rightSize() const { return MergeSizeJustRightRight::rightSize() + 1; }
+        virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key; }
+        virtual void validate() { ASSERT_EQUALS( _oldTop, bt()->keyNode( 0 ).key ); }
+        virtual bool merge() const { return false; }
+    protected:
+        BSONObj _oldTop;
+    };
+
+    class MoveBelowLowWaterMarkRight : public NoMoveAtLowWaterMarkRight {
+        virtual int rightSize() const { return MergeSizeJustRightRight::rightSize(); }
+        virtual int leftSize() const { return MergeSizeJustRightRight::leftSize() + 1; }
+        // different top means we rebalanced
+        virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key ) ); }
+    };
+
+    class NoMoveAtLowWaterMarkLeft : public MergeSizeJustRightLeft {
+        virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize() + 1; }
+        virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key; }
+        virtual void validate() { ASSERT_EQUALS( _oldTop, bt()->keyNode( 0 ).key ); }
+        virtual bool merge() const { return false; }
+    protected:
+        BSONObj _oldTop;
+    };
+
+    class MoveBelowLowWaterMarkLeft : public NoMoveAtLowWaterMarkLeft {
+        virtual int leftSize() const { return MergeSizeJustRightLeft::leftSize(); }
+        virtual int rightSize() const { return MergeSizeJustRightLeft::rightSize() + 1; }
+        // different top means we rebalanced
+        virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key ) ); }
+    };
+
+    class PreferBalanceLeft : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10:{$1:null,$2:null,$3:null,$4:null,$5:null,$6:null},$20:{$11:null,$12:null,$13:null,$14:null},_:{$30:null}}", id() );
+            ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x12 ) );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$5:{$1:null,$2:null,$3:null,$4:null},$20:{$6:null,$10:null,$11:null,$13:null,$14:null},_:{$30:null}}", id() );
+        }
+    };
+
+    class PreferBalanceRight : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10:{$1:null},$20:{$11:null,$12:null,$13:null,$14:null},_:{$31:null,$32:null,$33:null,$34:null,$35:null,$36:null}}", id() );
+            ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x12 ) );
+            //            dump();
+            ASSERT( unindex( k ) );
+            //            dump();
+            ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$10:{$1:null},$31:{$11:null,$13:null,$14:null,$20:null},_:{$32:null,$33:null,$34:null,$35:null,$36:null}}", id() );
+        }
+    };
+
+    class RecursiveMergeThenBalance : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10:{$5:{$1:null,$2:null},$8:{$6:null,$7:null}},_:{$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null,$90:null}}", id() );
+            ASSERT_EQUALS( 15, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x7 ) );
+            //            dump();
+            ASSERT( unindex( k ) );
+            //            dump();
+            ASSERT_EQUALS( 14, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$40:{$8:{$1:null,$2:null,$5:null,$6:null},$10:null,$20:null,$30:null},_:{$50:null,$60:null,$70:null,$80:null,$90:null}}", id() );
+        }
+    };
+
+    class MergeRightEmpty : public MergeSizeBase {
+    protected:
+        virtual int rightAdditional() const { return 1; }
+        virtual int leftAdditional() const { return 1; }
+        virtual const char * delKeys() const { return "lz"; }
+        virtual int rightSize() const { return 0; }
+        virtual int leftSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ); }
+    };
+
+    class MergeMinRightEmpty : public MergeSizeBase {
+    protected:
+        virtual int rightAdditional() const { return 1; }
+        virtual int leftAdditional() const { return 0; }
+        virtual const char * delKeys() const { return "z"; }
+        virtual int rightSize() const { return 0; }
+        virtual int leftSize() const { return bigSize() + sizeof( _KeyNode ); }
+    };
+
+    class MergeLeftEmpty : public MergeSizeBase {
+    protected:
+        virtual int rightAdditional() const { return 1; }
+        virtual int leftAdditional() const { return 1; }
+        virtual const char * delKeys() const { return "zl"; }
+        virtual int leftSize() const { return 0; }
+        virtual int rightSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ); }
+    };
+
+    class MergeMinLeftEmpty : public MergeSizeBase {
+    protected:
+        virtual int leftAdditional() const { return 1; }
+        virtual int rightAdditional() const { return 0; }
+        virtual const char * delKeys() const { return "l"; }
+        virtual int leftSize() const { return 0; }
+        virtual int rightSize() const { return bigSize() + sizeof( _KeyNode ); }
+    };
+
+    class BalanceRightEmpty : public MergeRightEmpty {
+    protected:
+        virtual int leftSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) + 1; }
+        virtual bool merge() const { return false; }
+        virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key; }
+        virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key ) ); }
+    private:
+        BSONObj _oldTop;
+    };
+
+    class BalanceLeftEmpty : public MergeLeftEmpty {
+    protected:
+        virtual int rightSize() const { return BtreeBucket::bodySize() - biggestSize() - sizeof( _KeyNode ) + 1; }
+        virtual bool merge() const { return false; }
+        virtual void initCheck() { _oldTop = bt()->keyNode( 0 ).key; }
+        virtual void validate() { ASSERT( !( _oldTop == bt()->keyNode( 0 ).key ) ); }
+    private:
+        BSONObj _oldTop;
+    };
+
+    class DelEmptyNoNeighbors : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{b:{a:null}}", id() );
+            ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "a" );
+            //            dump();
+            ASSERT( unindex( k ) );
+            //            dump();
+            ASSERT_EQUALS( 1, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{b:null}", id() );
+        }
+    };
+
+    class DelEmptyEmptyNeighbors : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() );
+            ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "b" );
+            //            dump();
+            ASSERT( unindex( k ) );
+            //            dump();
+            ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), 0, true ) );
+            ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{a:null,c:null,d:null}", id() );
+        }
+    };
+
+    class DelInternal : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() );
+            int unused = 0;
+            ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "c" );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{a:null,b:null,d:null}", id() );
+        }
+    };
+
+    class DelInternalReplaceWithUnused : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() );
+            getDur().writingInt( const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket.btree()->keyNode( 0 ).recordLoc ).GETOFS() ) |= 1; // make unused
+            int unused = 0;
+            ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 1, unused );
+            ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "c" );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            unused = 0;
+            ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 1, unused );
+            ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords );
+            // doesn't discriminate between used and unused
+            ArtificialTree::checkStructure( "{a:null,b:null,d:null}", id() );
+        }
+    };
+
+    class DelInternalReplaceRight : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null,_:{b:null}}", id() );
+            int unused = 0;
+            ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "a" );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            unused = 0;
+            ASSERT_EQUALS( 1, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 1, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{b:null}", id() );
+        }
+    };
+
+    class DelInternalPromoteKey : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null,y:{d:{c:{b:null}},_:{e:null}},z:null}", id() );
+            int unused = 0;
+            ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 5, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "y" );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            unused = 0;
+            ASSERT_EQUALS( 6, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{a:null,e:{c:{b:null},d:null},z:null}", id() );
+        }
+    };
+
+    class DelInternalPromoteRightKey : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null,_:{e:{c:null},_:{f:null}}}", id() );
+            int unused = 0;
+            ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "a" );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            unused = 0;
+            ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 2, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{c:null,_:{e:null,f:null}}", id() );
+        }
+    };
+
+    class DelInternalReplacementPrevNonNull : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null,d:{c:{b:null}},e:null}", id() );
+            int unused = 0;
+            ASSERT_EQUALS( 5, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "d" );
+            //            dump();
+            ASSERT( unindex( k ) );
+            //            dump();
+            ASSERT_EQUALS( 4, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 1, unused );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{a:null,d:{c:{b:null}},e:null}", id() );
+            ASSERT( bt()->keyNode( 1 ).recordLoc.getOfs() & 1 ); // check 'unused' key
+        }
+    };
+
+    class DelInternalReplacementNextNonNull : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{a:null,_:{c:null,_:{d:null}}}", id() );
+            int unused = 0;
+            ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << "a" );
+            //            dump();
+            ASSERT( unindex( k ) );
+            //            dump();
+            ASSERT_EQUALS( 2, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 1, unused );
+            ASSERT_EQUALS( 3, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{a:null,_:{c:null,_:{d:null}}}", id() );
+            ASSERT( bt()->keyNode( 0 ).recordLoc.getOfs() & 1 ); // check 'unused' key
+        }
+    };
+
+    class DelInternalSplitPromoteLeft : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10:null,$20:null,$30$10:{$25:{$23:null},_:{$27:null}},$40:null,$50:null,$60:null,$70:null,$80:null,$90:null,$100:null}", id() );
+            int unused = 0;
+            ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x30, 0x10 ) );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$60:{$10:null,$20:null,$27:{$23:null,$25:null},$40:null,$50:null},_:{$70:null,$80:null,$90:null,$100:null}}", id() );
+        }
+    };
+
+    class DelInternalSplitPromoteRight : public Base {
+    public:
+        void run() {
+            string ns = id().indexNamespace();
+            ArtificialTree::setTree( "{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null,$80:null,$90:null,$100$10:{$95:{$93:null},_:{$97:null}}}", id() );
+            int unused = 0;
+            ASSERT_EQUALS( 13, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            BSONObj k = BSON( "" << bigNumString( 0x100, 0x10 ) );
+//            dump();
+            ASSERT( unindex( k ) );
+//            dump();
+            ASSERT_EQUALS( 12, bt()->fullValidate( dl(), order(), &unused, true ) );
+            ASSERT_EQUALS( 0, unused );
+            ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
+            ArtificialTree::checkStructure( "{$80:{$10:null,$20:null,$30:null,$40:null,$50:null,$60:null,$70:null},_:{$90:null,$97:{$93:null,$95:null}}}", id() );
+        }
+    };
+
     class All : public Suite {
     public:
-        All() : Suite( "btree" ){
+        All() : Suite( "btree" ) {
         }
-        
-        void setupTests(){
+
+        void setupTests() {
             add< Create >();
             add< SimpleInsertDelete >();
             add< SplitRightHeavyBucket >();
@@ -380,9 +1634,77 @@ namespace BtreeTests {
             add< MissingLocate >();
             add< MissingLocateMultiBucket >();
             add< SERVER983 >();
-            add< ReuseUnused >();
+            add< DontReuseUnused >();
             add< PackUnused >();
             add< DontDropReferenceKey >();
+            add< MergeBucketsLeft >();
+            add< MergeBucketsRight >();
+//            add< MergeBucketsHead >();
+            add< MergeBucketsDontReplaceHead >();
+            add< MergeBucketsDelInternal >();
+            add< MergeBucketsRightNull >();
+            add< DontMergeSingleBucket >();
+            add< ParentMergeNonRightToLeft >();
+            add< ParentMergeNonRightToRight >();
+            add< CantMergeRightNoMerge >();
+            add< CantMergeLeftNoMerge >();
+            add< MergeOption >();
+            add< ForceMergeLeft >();
+            add< ForceMergeRight >();
+            add< RecursiveMerge >();
+            add< RecursiveMergeRightBucket >();
+            add< RecursiveMergeDoubleRightBucket >();
+            add< MergeSizeJustRightRight >();
+            add< MergeSizeJustRightLeft >();
+            add< MergeSizeRight >();
+            add< MergeSizeLeft >();
+            add< NoMergeBelowMarkRight >();
+            add< NoMergeBelowMarkLeft >();
+            add< MergeSizeRightTooBig >();
+            add< MergeSizeLeftTooBig >();
+            add< BalanceOneLeftToRight >();
+            add< BalanceOneRightToLeft >();
+            add< BalanceThreeLeftToRight >();
+            add< BalanceThreeRightToLeft >();
+            add< BalanceSingleParentKey >();
+            add< PackEmpty >();
+            add< PackedDataSizeEmpty >();
+            add< BalanceSingleParentKeyPackParent >();
+            add< BalanceSplitParent >();
+            add< EvenRebalanceLeft >();
+            add< EvenRebalanceLeftCusp >();
+            add< EvenRebalanceRight >();
+            add< EvenRebalanceRightCusp >();
+            add< EvenRebalanceCenter >();
+            add< OddRebalanceLeft >();
+            add< OddRebalanceRight >();
+            add< OddRebalanceCenter >();
+            add< RebalanceEmptyRight >();
+            add< RebalanceEmptyLeft >();
+            add< NoMoveAtLowWaterMarkRight >();
+            add< MoveBelowLowWaterMarkRight >();
+            add< NoMoveAtLowWaterMarkLeft >();
+            add< MoveBelowLowWaterMarkLeft >();
+            add< PreferBalanceLeft >();
+            add< PreferBalanceRight >();
+            add< RecursiveMergeThenBalance >();
+            add< MergeRightEmpty >();
+            add< MergeMinRightEmpty >();
+            add< MergeLeftEmpty >();
+            add< MergeMinLeftEmpty >();
+            add< BalanceRightEmpty >();
+            add< BalanceLeftEmpty >();
+            add< DelEmptyNoNeighbors >();
+            add< DelEmptyEmptyNeighbors >();
+            add< DelInternal >();
+            add< DelInternalReplaceWithUnused >();
+            add< DelInternalReplaceRight >();
+            add< DelInternalPromoteKey >();
+            add< DelInternalPromoteRightKey >();
+            add< DelInternalReplacementPrevNonNull >();
+            add< DelInternalReplacementNextNonNull >();
+            add< DelInternalSplitPromoteLeft >();
+            add< DelInternalSplitPromoteRight >();
         }
     } myall;
 }
diff --git a/dbtests/clienttests.cpp b/dbtests/clienttests.cpp
index 58287e9..f51b765 100644
--- a/dbtests/clienttests.cpp
+++ b/dbtests/clienttests.cpp
@@ -20,40 +20,40 @@
 #include "../client/dbclient.h"
 #include "dbtests.h"
 #include "../db/concurrency.h"
- 
+
 namespace ClientTests {
-    
+
     class Base {
     public:
-        
-        Base( string coll ){
+
+        Base( string coll ) {
             _ns = (string)"test." + coll;
         }
-        
-        virtual ~Base(){
+
+        virtual ~Base() {
             db.dropCollection( _ns );
         }
-        
-        const char * ns(){ return _ns.c_str(); }
-        
+
+        const char * ns() { return _ns.c_str(); }
+
         string _ns;
         DBDirectClient db;
     };
-        
+
 
     class DropIndex : public Base {
     public:
-        DropIndex() : Base( "dropindex" ){}
-        void run(){
+        DropIndex() : Base( "dropindex" ) {}
+        void run() {
             db.insert( ns() , BSON( "x" << 2 ) );
             ASSERT_EQUALS( 1 , db.getIndexes( ns() )->itcount() );
-            
+
             db.ensureIndex( ns() , BSON( "x" << 1 ) );
             ASSERT_EQUALS( 2 , db.getIndexes( ns() )->itcount() );
-            
+
             db.dropIndex( ns() , BSON( "x" << 1 ) );
             ASSERT_EQUALS( 1 , db.getIndexes( ns() )->itcount() );
-            
+
             db.ensureIndex( ns() , BSON( "x" << 1 ) );
             ASSERT_EQUALS( 2 , db.getIndexes( ns() )->itcount() );
 
@@ -61,18 +61,18 @@ namespace ClientTests {
             ASSERT_EQUALS( 1 , db.getIndexes( ns() )->itcount() );
         }
     };
-    
+
     class ReIndex : public Base {
     public:
-        ReIndex() : Base( "reindex" ){}
-        void run(){
-            
+        ReIndex() : Base( "reindex" ) {}
+        void run() {
+
             db.insert( ns() , BSON( "x" << 2 ) );
             ASSERT_EQUALS( 1 , db.getIndexes( ns() )->itcount() );
-            
+
             db.ensureIndex( ns() , BSON( "x" << 1 ) );
             ASSERT_EQUALS( 2 , db.getIndexes( ns() )->itcount() );
-            
+
             db.reIndex( ns() );
             ASSERT_EQUALS( 2 , db.getIndexes( ns() )->itcount() );
         }
@@ -81,15 +81,15 @@ namespace ClientTests {
 
     class ReIndex2 : public Base {
     public:
-        ReIndex2() : Base( "reindex2" ){}
-        void run(){
-            
+        ReIndex2() : Base( "reindex2" ) {}
+        void run() {
+
             db.insert( ns() , BSON( "x" << 2 ) );
             ASSERT_EQUALS( 1 , db.getIndexes( ns() )->itcount() );
-            
+
             db.ensureIndex( ns() , BSON( "x" << 1 ) );
             ASSERT_EQUALS( 2 , db.getIndexes( ns() )->itcount() );
-            
+
             BSONObj out;
             ASSERT( db.runCommand( "test" , BSON( "reIndex" << "reindex2" ) , out ) );
             ASSERT_EQUALS( 2 , out["nIndexes"].number() );
@@ -106,7 +106,7 @@ namespace ClientTests {
             for( int i = 0; i < 1111; ++i )
                 db.insert( ns(), BSON( "a" << i << "b" << longs ) );
             db.ensureIndex( ns(), BSON( "a" << 1 << "b" << 1 ) );
-            
+
             auto_ptr< DBClientCursor > c = db.query( ns(), Query().sort( BSON( "a" << 1 << "b" << 1 ) ) );
             ASSERT_EQUALS( 1111, c->itcount() );
         }
@@ -161,20 +161,37 @@ namespace ClientTests {
             ASSERT( db.runCommand( "unittests", BSON( "collstats" << "clienttests.create" ), info ) );
         }
     };
+    
+    class ConnectionStringTests {
+    public:
+        void run() {
+            {
+                ConnectionString s( "a/b,c,d" , ConnectionString::SET );
+                ASSERT_EQUALS( ConnectionString::SET , s.type() );
+                ASSERT_EQUALS( "a" , s.getSetName() );
+                vector<HostAndPort> v = s.getServers();
+                ASSERT_EQUALS( 3U , v.size() );
+                ASSERT_EQUALS( "b" , v[0].host() );
+                ASSERT_EQUALS( "c" , v[1].host() );
+                ASSERT_EQUALS( "d" , v[2].host() );
+            }
+        }
+    };
 
     class All : public Suite {
     public:
-        All() : Suite( "client" ){
+        All() : Suite( "client" ) {
         }
 
-        void setupTests(){
+        void setupTests() {
             add<DropIndex>();
             add<ReIndex>();
             add<ReIndex2>();
             add<CS_10>();
             add<PushBack>();
             add<Create>();
+            add<ConnectionStringTests>();
         }
-        
+
     } all;
 }
diff --git a/dbtests/commandtests.cpp b/dbtests/commandtests.cpp
index fa0014d..fa6204d 100644
--- a/dbtests/commandtests.cpp
+++ b/dbtests/commandtests.cpp
@@ -23,19 +23,19 @@ using namespace mongo;
 
 namespace CommandTests {
     // one namespace per command
-    namespace FileMD5{
+    namespace FileMD5 {
         struct Base {
-            Base(){
+            Base() {
                 db.dropCollection(ns());
                 db.ensureIndex(ns(), BSON( "files_id" << 1 << "n" << 1 ));
             }
 
             const char* ns() { return "test.fs.chunks"; }
-            
+
             DBDirectClient db;
         };
         struct Type0 : Base {
-            void run(){
+            void run() {
                 {
                     BSONObjBuilder b;
                     b.genOID();
@@ -58,8 +58,8 @@ namespace CommandTests {
                 ASSERT_EQUALS( string("5eb63bbbe01eeed093cb22bb8f5acdc3") , result["md5"].valuestr() );
             }
         };
-        struct Type2 : Base{
-            void run(){
+        struct Type2 : Base {
+            void run() {
                 {
                     BSONObjBuilder b;
                     b.genOID();
@@ -86,13 +86,13 @@ namespace CommandTests {
 
     class All : public Suite {
     public:
-        All() : Suite( "commands" ){
+        All() : Suite( "commands" ) {
         }
 
-        void setupTests(){
+        void setupTests() {
             add< FileMD5::Type0 >();
             add< FileMD5::Type2 >();
         }
-        
+
     } all;
 }
diff --git a/dbtests/cursortests.cpp b/dbtests/cursortests.cpp
index 954c8b0..ddd7b03 100644
--- a/dbtests/cursortests.cpp
+++ b/dbtests/cursortests.cpp
@@ -25,12 +25,12 @@
 #include "dbtests.h"
 
 namespace CursorTests {
-    
+
     namespace BtreeCursorTests {
 
         // The ranges expressed in these tests are impossible given our query
         // syntax, so going to do them a hacky way.
-        
+
         class Base {
         protected:
             FieldRangeVector *vec( int *vals, int len, int direction = 1 ) {
@@ -40,7 +40,8 @@ namespace CursorTests {
                     FieldRangeSet s2( "", _objs.back() );
                     if ( i == 0 ) {
                         s.range( "a" ) = s2.range( "a" );
-                    } else {
+                    }
+                    else {
                         s.range( "a" ) |= s2.range( "a" );
                     }
                 }
@@ -49,7 +50,7 @@ namespace CursorTests {
         private:
             vector< BSONObj > _objs;
         };
-        
+
         class MultiRange : public Base {
         public:
             void run() {
@@ -103,7 +104,7 @@ namespace CursorTests {
                 ASSERT( !c.ok() );
             }
         };
-     
+
         class MultiRangeReverse : public Base {
         public:
             void run() {
@@ -129,7 +130,7 @@ namespace CursorTests {
                 ASSERT( !c.ok() );
             }
         };
-     
+
         class Base2 {
         public:
             virtual ~Base2() { _c.dropCollection( ns() ); }
@@ -167,7 +168,7 @@ namespace CursorTests {
             dblock _lk;
             vector< BSONObj > _objs;
         };
-        
+
         class EqEq : public Base2 {
         public:
             void run() {
@@ -194,7 +195,7 @@ namespace CursorTests {
                 check( BSON( "a" << 4 << "b" << BSON( "$gte" << 1 << "$lte" << 10 ) ) );
             }
             virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); }
-        };        
+        };
 
         class EqIn : public Base2 {
         public:
@@ -210,7 +211,7 @@ namespace CursorTests {
                 check( BSON( "a" << 4 << "b" << BSON( "$in" << BSON_ARRAY( 5 << 6 << 11 ) ) ) );
             }
             virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); }
-        };        
+        };
 
         class RangeEq : public Base2 {
         public:
@@ -227,7 +228,7 @@ namespace CursorTests {
                 check( BSON( "a" << BSON( "$gte" << 1 << "$lte" << 10 ) << "b" << 4 ) );
             }
             virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); }
-        };        
+        };
 
         class RangeIn : public Base2 {
         public:
@@ -244,15 +245,15 @@ namespace CursorTests {
                 check( BSON( "a" << BSON( "$gte" << 1 << "$lte" << 10 ) << "b" << BSON( "$in" << BSON_ARRAY( 4 << 6 ) ) ) );
             }
             virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); }
-        };        
-        
+        };
+
     } // namespace BtreeCursorTests
-    
+
     class All : public Suite {
     public:
-        All() : Suite( "cursor" ){}
-        
-        void setupTests(){
+        All() : Suite( "cursor" ) {}
+
+        void setupTests() {
             add< BtreeCursorTests::MultiRange >();
             add< BtreeCursorTests::MultiRangeGap >();
             add< BtreeCursorTests::MultiRangeReverse >();
diff --git a/dbtests/d_chunk_manager_tests.cpp b/dbtests/d_chunk_manager_tests.cpp
new file mode 100644
index 0000000..bcfe9fa
--- /dev/null
+++ b/dbtests/d_chunk_manager_tests.cpp
@@ -0,0 +1,467 @@
+//@file d_chunk_manager_tests.cpp : s/d_chunk_manager.{h,cpp} tests
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "dbtests.h"
+
+#include "../s/d_chunk_manager.h"
+
+namespace {
+
+    class BasicTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "test.foo" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 ) <<
+                                       "unique"  << false );
+
+            // single-chunk collection
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "test.foo-a_MinKey" <<
+                                                 "ns"  << "test.foo" <<
+                                                 "min" << BSON( "a" << MINKEY ) <<
+                                                 "max" << BSON( "a" << MAXKEY ) ) );
+
+            ShardChunkManager s ( collection , chunks );
+
+            BSONObj k1 = BSON( "a" << MINKEY );
+            ASSERT( s.belongsToMe( k1 ) );
+            BSONObj k2 = BSON( "a" << MAXKEY );
+            ASSERT( ! s.belongsToMe( k2 ) );
+            BSONObj k3 = BSON( "a" << 1 << "b" << 2 );
+            ASSERT( s.belongsToMe( k3 ) );
+        }
+    };
+
+    class BasicCompoundTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "test.foo" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 << "b" << 1) <<
+                                       "unique"  << false );
+
+            // single-chunk collection
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "test.foo-a_MinKeyb_MinKey" <<
+                                                 "ns"  << "test.foo" <<
+                                                 "min" << BSON( "a" << MINKEY << "b" << MINKEY ) <<
+                                                 "max" << BSON( "a" << MAXKEY << "b" << MAXKEY ) ) );
+
+            ShardChunkManager s ( collection , chunks );
+
+            BSONObj k1 = BSON( "a" << MINKEY << "b" << MINKEY );
+            ASSERT( s.belongsToMe( k1 ) );
+            BSONObj k2 = BSON( "a" << MAXKEY << "b" << MAXKEY );
+            ASSERT( ! s.belongsToMe( k2 ) );
+            BSONObj k3 = BSON( "a" << MINKEY << "b" << 10 );
+            ASSERT( s.belongsToMe( k3 ) );
+            BSONObj k4 = BSON( "a" << 10 << "b" << 20 );
+            ASSERT( s.belongsToMe( k4 ) );
+        }
+    };
+
+    class RangeTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "x.y" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 ) <<
+                                       "unique"  << false );
+
+            // 3-chunk collection, 2 of them being contiguous
+            // [min->10) , [10->20) , <gap> , [30->max)
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "x.y-a_MinKey" <<
+                                                 "ns"  << "x.y" <<
+                                                 "min" << BSON( "a" << MINKEY ) <<
+                                                 "max" << BSON( "a" << 10 ) ) <<
+                                           BSON( "_id" << "x.y-a_10" <<
+                                                 "ns"  << "x.y" <<
+                                                 "min" << BSON( "a" << 10 ) <<
+                                                 "max" << BSON( "a" << 20 ) ) <<
+                                           BSON( "_id" << "x.y-a_30" <<
+                                                 "ns"  << "x.y" <<
+                                                 "min" << BSON( "a" << 30 ) <<
+                                                 "max" << BSON( "a" << MAXKEY ) ) );
+
+            ShardChunkManager s ( collection , chunks );
+
+            BSONObj k1 = BSON( "a" << 5 );
+            ASSERT( s.belongsToMe( k1 ) );
+            BSONObj k2 = BSON( "a" << 10 );
+            ASSERT( s.belongsToMe( k2 ) );
+            BSONObj k3 = BSON( "a" << 25 );
+            ASSERT( ! s.belongsToMe( k3 ) );
+            BSONObj k4 = BSON( "a" << 30 );
+            ASSERT( s.belongsToMe( k4 ) );
+            BSONObj k5 = BSON( "a" << 40 );
+            ASSERT( s.belongsToMe( k5 ) );
+        }
+    };
+
+    class GetNextTests {
+    public:
+        void run() {
+
+            BSONObj collection = BSON( "_id"     << "x.y" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 ) <<
+                                       "unique"  << false );
+            // empty collection
+            BSONArray chunks1 = BSONArray();
+            ShardChunkManager s1( collection , chunks1 );
+
+            BSONObj empty;
+            BSONObj arbitraryKey = BSON( "a" << 10 );
+            BSONObj foundMin, foundMax;
+
+            ASSERT( s1.getNextChunk( empty , &foundMin , &foundMax ) );
+            ASSERT( foundMin.isEmpty() );
+            ASSERT( foundMax.isEmpty() );
+            ASSERT( s1.getNextChunk( arbitraryKey , &foundMin , &foundMax ) );
+            ASSERT( foundMin.isEmpty() );
+            ASSERT( foundMax.isEmpty() );
+
+            // single-chunk collection
+            // [10->20]
+            BSONObj key_a10 = BSON( "a" << 10 );
+            BSONObj key_a20 = BSON( "a" << 20 );
+            BSONArray chunks2 = BSON_ARRAY( BSON( "_id" << "x.y-a_10" <<
+                                                  "ns"  << "x.y" <<
+                                                  "min" << key_a10 <<
+                                                  "max" << key_a20 ) );
+            ShardChunkManager s2( collection , chunks2 );
+            ASSERT( s2.getNextChunk( empty , &foundMin , &foundMax ) );
+            ASSERT( foundMin.woCompare( key_a10 ) == 0 );
+            ASSERT( foundMax.woCompare( key_a20 ) == 0 );
+
+            // 3-chunk collection, 2 of them being contiguous
+            // [min->10) , [10->20) , <gap> , [30->max)
+            BSONObj key_a30 = BSON( "a" << 30 );
+            BSONObj key_min = BSON( "a" << MINKEY );
+            BSONObj key_max = BSON( "a" << MAXKEY );
+            BSONArray chunks3 = BSON_ARRAY( BSON( "_id" << "x.y-a_MinKey" <<
+                                                  "ns"  << "x.y" <<
+                                                  "min" << key_min <<
+                                                  "max" << key_a10  ) <<
+                                            BSON( "_id" << "x.y-a_10" <<
+                                                  "ns"  << "x.y" <<
+                                                  "min" << key_a10  <<
+                                                  "max" << key_a20  ) <<
+                                            BSON( "_id" << "x.y-a_30" <<
+                                                  "ns"  << "x.y" <<
+                                                  "min" << key_a30  <<
+                                                  "max" << key_max  ) );
+            ShardChunkManager s3( collection , chunks3 );
+            ASSERT( ! s3.getNextChunk( empty , &foundMin , &foundMax ) ); // not eof
+            ASSERT( foundMin.woCompare( key_min ) == 0 );
+            ASSERT( foundMax.woCompare( key_a10 ) == 0 );
+            ASSERT( ! s3.getNextChunk( key_a10 , &foundMin , &foundMax ) );
+            ASSERT( foundMin.woCompare( key_a30 ) == 0 );
+            ASSERT( foundMax.woCompare( key_max ) == 0 );
+            ASSERT( s3.getNextChunk( key_a30 , &foundMin , &foundMax ) );
+        }
+    };
+
+    class DeletedTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "test.foo" <<
+                                       "dropped" << "true" );
+
+            BSONArray chunks = BSONArray();
+
+            ASSERT_EXCEPTION( ShardChunkManager s ( collection , chunks ) , UserException );
+        }
+    };
+
+    class ClonePlusTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "test.foo" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 << "b" << 1 ) <<
+                                       "unique"  << false );
+            // 1-chunk collection
+            // [10,0-20,0)
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "test.foo-a_MinKey" <<
+                                                 "ns"  << "test.foo" <<
+                                                 "min" << BSON( "a" << 10 << "b" << 0 ) <<
+                                                 "max" << BSON( "a" << 20 << "b" << 0 ) ) );
+
+            ShardChunkManager s ( collection , chunks );
+
+            // new chunk [20,0-30,0)
+            BSONObj min = BSON( "a" << 20 << "b" << 0 );
+            BSONObj max = BSON( "a" << 30 << "b" << 0 );
+            ShardChunkManagerPtr cloned( s.clonePlus( min , max , 1 /* TODO test version */ ) );
+
+            BSONObj k1 = BSON( "a" << 5 << "b" << 0 );
+            ASSERT( ! cloned->belongsToMe( k1 ) );
+            BSONObj k2 = BSON( "a" << 20 << "b" << 0 );
+            ASSERT( cloned->belongsToMe( k2 ) );
+            BSONObj k3 = BSON( "a" << 25 << "b" << 0 );
+            ASSERT( cloned->belongsToMe( k3 ) );
+            BSONObj k4 = BSON( "a" << 30 << "b" << 0 );
+            ASSERT( ! cloned->belongsToMe( k4 ) );
+        }
+    };
+
+    class ClonePlusExceptionTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "test.foo" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 << "b" << 1 ) <<
+                                       "unique"  << false );
+            // 1-chunk collection
+            // [10,0-20,0)
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "test.foo-a_MinKey" <<
+                                                 "ns"  << "test.foo" <<
+                                                 "min" << BSON( "a" << 10 << "b" << 0 ) <<
+                                                 "max" << BSON( "a" << 20 << "b" << 0 ) ) );
+
+            ShardChunkManager s ( collection , chunks );
+
+            // [15,0-25,0) overlaps [10,0-20,0)
+            BSONObj min = BSON( "a" << 15 << "b" << 0 );
+            BSONObj max = BSON( "a" << 25 << "b" << 0 );
+            ASSERT_EXCEPTION( s.clonePlus ( min , max , 1 /* TODO test version */ ) , UserException );
+        }
+    };
+
+    class CloneMinusTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "x.y" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 << "b" << 1 ) <<
+                                       "unique"  << false );
+
+            // 2-chunk collection
+            // [10,0->20,0) , <gap> , [30,0->40,0)
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "x.y-a_10b_0" <<
+                                                 "ns"  << "x.y" <<
+                                                 "min" << BSON( "a" << 10 << "b" << 0 ) <<
+                                                 "max" << BSON( "a" << 20 << "b" << 0 ) ) <<
+                                           BSON( "_id" << "x.y-a_30b_0" <<
+                                                 "ns"  << "x.y" <<
+                                                 "min" << BSON( "a" << 30 << "b" << 0 ) <<
+                                                 "max" << BSON( "a" << 40 << "b" << 0 ) ) );
+
+            ShardChunkManager s ( collection , chunks );
+
+            // deleting chunk [10,0-20,0)
+            BSONObj min = BSON( "a" << 10 << "b" << 0 );
+            BSONObj max = BSON( "a" << 20 << "b" << 0 );
+            ShardChunkManagerPtr cloned( s.cloneMinus( min , max , 1 /* TODO test version */ ) );
+
+            BSONObj k1 = BSON( "a" << 5 << "b" << 0 );
+            ASSERT( ! cloned->belongsToMe( k1 ) );
+            BSONObj k2 = BSON( "a" << 15 << "b" << 0 );
+            ASSERT( ! cloned->belongsToMe( k2 ) );
+            BSONObj k3 = BSON( "a" << 30 << "b" << 0 );
+            ASSERT( cloned->belongsToMe( k3 ) );
+            BSONObj k4 = BSON( "a" << 35 << "b" << 0 );
+            ASSERT( cloned->belongsToMe( k4 ) );
+            BSONObj k5 = BSON( "a" << 40 << "b" << 0 );
+            ASSERT( ! cloned->belongsToMe( k5 ) );
+        }
+    };
+
+    class CloneMinusExceptionTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "x.y" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 << "b" << 1 ) <<
+                                       "unique"  << false );
+
+            // 2-chunk collection
+            // [10,0->20,0) , <gap> , [30,0->40,0)
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "x.y-a_10b_0" <<
+                                                 "ns"  << "x.y" <<
+                                                 "min" << BSON( "a" << 10 << "b" << 0 ) <<
+                                                 "max" << BSON( "a" << 20 << "b" << 0 ) ) <<
+                                           BSON( "_id" << "x.y-a_30b_0" <<
+                                                 "ns"  << "x.y" <<
+                                                 "min" << BSON( "a" << 30 << "b" << 0 ) <<
+                                                 "max" << BSON( "a" << 40 << "b" << 0 ) ) );
+
+            ShardChunkManager s ( collection , chunks );
+
+            // deleting non-existing chunk [25,0-28,0)
+            BSONObj min1 = BSON( "a" << 25 << "b" << 0 );
+            BSONObj max1 = BSON( "a" << 28 << "b" << 0 );
+            ASSERT_EXCEPTION( s.cloneMinus( min1 , max1 , 1 /* TODO test version */ ) , UserException );
+
+
+            // deletin an overlapping range (not exactly a chunk) [15,0-25,0)
+            BSONObj min2 = BSON( "a" << 15 << "b" << 0 );
+            BSONObj max2 = BSON( "a" << 25 << "b" << 0 );
+            ASSERT_EXCEPTION( s.cloneMinus( min2 , max2 , 1 /* TODO test version */ ) , UserException );
+        }
+    };
+
+    class CloneSplitTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "test.foo" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 << "b" << 1 ) <<
+                                       "unique"  << false );
+            // 1-chunk collection
+            // [10,0-20,0)
+            BSONObj min = BSON( "a" << 10 << "b" << 0 );
+            BSONObj max = BSON( "a" << 20 << "b" << 0 );
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "test.foo-a_MinKey"
+                                                 << "ns"  << "test.foo"
+                                                 << "min" << min
+                                                 << "max" << max ) );
+
+            ShardChunkManager s ( collection , chunks );
+
+            BSONObj split1 = BSON( "a" << 15 << "b" << 0 );
+            BSONObj split2 = BSON( "a" << 18 << "b" << 0 );
+            vector<BSONObj> splitKeys;
+            splitKeys.push_back( split1 );
+            splitKeys.push_back( split2 );
+            ShardChunkVersion version( 1 , 99 ); // first chunk 1|99 , second 1|100
+            ShardChunkManagerPtr cloned( s.cloneSplit( min , max , splitKeys , version ) );
+
+            version.incMinor(); /* second chunk 1|100, first split point */
+            version.incMinor(); /* third chunk 1|101, second split point */
+            ASSERT_EQUALS( cloned->getVersion() , version /* 1|101 */ );
+            ASSERT_EQUALS( s.getNumChunks() , 1u );
+            ASSERT_EQUALS( cloned->getNumChunks() , 3u );
+            ASSERT( cloned->belongsToMe( min ) );
+            ASSERT( cloned->belongsToMe( split1 ) );
+            ASSERT( cloned->belongsToMe( split2 ) );
+            ASSERT( ! cloned->belongsToMe( max ) );
+        }
+    };
+
+    class CloneSplitExceptionTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "test.foo" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 << "b" << 1 ) <<
+                                       "unique"  << false );
+            // 1-chunk collection
+            // [10,0-20,0)
+            BSONObj min = BSON( "a" << 10 << "b" << 0 );
+            BSONObj max = BSON( "a" << 20 << "b" << 0 );
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "test.foo-a_MinKey"
+                                                 << "ns"  << "test.foo"
+                                                 << "min" << min
+                                                 << "max" << max ) );
+
+            ShardChunkManager s ( collection , chunks );
+
+            BSONObj badSplit = BSON( "a" << 5 << "b" << 0 );
+            vector<BSONObj> splitKeys;
+            splitKeys.push_back( badSplit );
+            ASSERT_EXCEPTION( s.cloneSplit( min , max , splitKeys , ShardChunkVersion( 1 ) ) , UserException );
+
+            BSONObj badMax = BSON( "a" << 25 << "b" << 0 );
+            BSONObj split = BSON( "a" << 15 << "b" << 0 );
+            splitKeys.clear();
+            splitKeys.push_back( split );
+            ASSERT_EXCEPTION( s.cloneSplit( min , badMax, splitKeys , ShardChunkVersion( 1 ) ) , UserException );
+        }
+    };
+
+    class EmptyShardTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "test.foo" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 ) <<
+                                       "unique"  << false );
+
+            // no chunks on this shard
+            BSONArray chunks;
+
+            // shard can have zero chunks for an existing collection
+            // version should be 0, though
+            ShardChunkManager s( collection , chunks );
+            ASSERT_EQUALS( s.getVersion() , ShardChunkVersion( 0 ) );
+            ASSERT_EQUALS( s.getNumChunks() , 0u );
+        }
+    };
+
+    class LastChunkTests {
+    public:
+        void run() {
+            BSONObj collection = BSON( "_id"     << "test.foo" <<
+                                       "dropped" << false <<
+                                       "key"     << BSON( "a" << 1 ) <<
+                                       "unique"  << false  );
+
+            // 1-chunk collection
+            // [10->20)
+            BSONArray chunks = BSON_ARRAY( BSON( "_id" << "test.foo-a_10" <<
+                                                 "ns"  << "test.foo" <<
+                                                 "min" << BSON( "a" << 10 ) <<
+                                                 "max" << BSON( "a" << 20 ) ) );
+
+            ShardChunkManager s( collection , chunks );
+            BSONObj min = BSON( "a" << 10 );
+            BSONObj max = BSON( "a" << 20 );
+
+            // if we remove the only chunk, the only version accepted is 0
+            ShardChunkVersion nonZero = 99;
+            ASSERT_EXCEPTION( s.cloneMinus( min , max , nonZero ) , UserException );
+            ShardChunkManagerPtr empty( s.cloneMinus( min , max , 0 ) );
+            ASSERT_EQUALS( empty->getVersion() , ShardChunkVersion( 0 ) );
+            ASSERT_EQUALS( empty->getNumChunks() , 0u );
+            BSONObj k = BSON( "a" << 15 << "b" << 0 );
+            ASSERT( ! empty->belongsToMe( k ) );
+
+            // we can add a chunk to an empty manager
+            // version should be provided
+            ASSERT_EXCEPTION( empty->clonePlus( min , max , 0 ) , UserException );
+            ShardChunkManagerPtr cloned( empty->clonePlus( min , max , nonZero ) );
+            ASSERT_EQUALS( cloned->getVersion(), nonZero );
+            ASSERT_EQUALS( cloned->getNumChunks() , 1u );
+            ASSERT( cloned->belongsToMe( k ) );
+        }
+    };
+
+    class ShardChunkManagerSuite : public Suite {
+    public:
+        ShardChunkManagerSuite() : Suite ( "shard_chunk_manager" ) {}
+
+        void setupTests() {
+            add< BasicTests >();
+            add< BasicCompoundTests >();
+            add< RangeTests >();
+            add< GetNextTests >();
+            add< DeletedTests >();
+            add< ClonePlusTests >();
+            add< ClonePlusExceptionTests >();
+            add< CloneMinusTests >();
+            add< CloneMinusExceptionTests >();
+            add< CloneSplitTests >();
+            add< CloneSplitExceptionTests >();
+            add< EmptyShardTests >();
+            add< LastChunkTests >();
+        }
+    } shardChunkManagerSuite;
+
+}  // anonymous namespace
diff --git a/dbtests/dbtests.cpp b/dbtests/dbtests.cpp
index 195a1d1..8ede08d 100644
--- a/dbtests/dbtests.cpp
+++ b/dbtests/dbtests.cpp
@@ -1,4 +1,4 @@
-// dbtests.cpp : Runs db unit tests.
+// #file dbtests.cpp : Runs db unit tests.
 //
 
 /**
@@ -18,11 +18,9 @@
  */
 
 #include "pch.h"
-
 #include "dbtests.h"
 
 int main( int argc, char** argv ) {
     static StaticObserver StaticObserver;
     return Suite::run(argc, argv, "/tmp/unittest");
 }
-
diff --git a/dbtests/directclienttests.cpp b/dbtests/directclienttests.cpp
new file mode 100644
index 0000000..204bf92
--- /dev/null
+++ b/dbtests/directclienttests.cpp
@@ -0,0 +1,80 @@
+/** @file directclienttests.cpp
+*/
+
+/**
+ *    Copyright (C) 2008 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "../db/query.h"
+#include "../db/db.h"
+#include "../db/instance.h"
+#include "../db/json.h"
+#include "../db/lasterror.h"
+#include "../db/update.h"
+#include "../util/timer.h"
+#include "dbtests.h"
+
+namespace DirectClientTests {
+
+    class ClientBase {
+    public:
+        // NOTE: Not bothering to backup the old error record.
+        ClientBase() {  mongo::lastError.reset( new LastError() );  }
+        virtual ~ClientBase() { }
+    protected:
+        static bool error() {
+            return !_client.getPrevError().getField( "err" ).isNull();
+        }
+        DBDirectClient &client() const { return _client; }
+    private:
+        static DBDirectClient _client;
+    };
+    DBDirectClient ClientBase::_client;
+
+    const char *ns = "a.b";
+
+    class Capped : public ClientBase {
+    public:
+        virtual void run() {
+            for( int pass=0; pass < 3; pass++ ) {
+                client().createCollection(ns, 1024 * 1024, true, 999);
+                for( int j =0; j < pass*3; j++ )
+                    client().insert(ns, BSON("x" << j));
+
+                // test truncation of a capped collection
+                if( pass ) {
+                    BSONObj info;
+                    BSONObj cmd = BSON( "captrunc" << "b" << "n" << 1 << "inc" << true );
+                    cout << cmd.toString() << endl;
+                    bool ok = client().runCommand("a", cmd, info);
+                    cout << info.toString() << endl;
+                    assert(ok);
+                }
+
+                assert( client().dropCollection(ns) );
+            }
+        }
+    };
+
+    class All : public Suite {
+    public:
+        All() : Suite( "directclient" ) {
+        }
+        void setupTests() {
+            add< Capped >();
+        }
+    } myall;
+}
diff --git a/dbtests/framework.cpp b/dbtests/framework.cpp
index e624211..c92c8d6 100644
--- a/dbtests/framework.cpp
+++ b/dbtests/framework.cpp
@@ -25,6 +25,7 @@
 
 #include "framework.h"
 #include "../util/file_allocator.h"
+#include "../db/dur.h"
 
 #ifndef _WIN32
 #include <cxxabi.h>
@@ -34,7 +35,7 @@
 namespace po = boost::program_options;
 
 namespace mongo {
-    
+
     CmdLine cmdLine;
 
     namespace regression {
@@ -46,21 +47,21 @@ namespace mongo {
             Result( string name ) : _name( name ) , _rc(0) , _tests(0) , _fails(0) , _asserts(0) {
             }
 
-            string toString(){
+            string toString() {
                 stringstream ss;
 
                 char result[128];
                 sprintf(result, "%-20s | tests: %4d | fails: %4d | assert calls: %6d\n", _name.c_str(), _tests, _fails, _asserts);
                 ss << result;
 
-                for ( list<string>::iterator i=_messages.begin(); i!=_messages.end(); i++ ){
+                for ( list<string>::iterator i=_messages.begin(); i!=_messages.end(); i++ ) {
                     ss << "\t" << *i << '\n';
                 }
-                
+
                 return ss.str();
             }
 
-            int rc(){
+            int rc() {
                 return _rc;
             }
 
@@ -77,7 +78,7 @@ namespace mongo {
 
         Result * Result::cur = 0;
 
-        Result * Suite::run( const string& filter ){
+        Result * Suite::run( const string& filter ) {
             tlogLevel = -1;
 
             log(1) << "\t about to setupTests" << endl;
@@ -90,53 +91,53 @@ namespace mongo {
             /* see note in SavedContext */
             //writelock lk("");
 
-            for ( list<TestCase*>::iterator i=_tests.begin(); i!=_tests.end(); i++ ){
+            for ( list<TestCase*>::iterator i=_tests.begin(); i!=_tests.end(); i++ ) {
                 TestCase * tc = *i;
-                if ( filter.size() && tc->getName().find( filter ) == string::npos ){
+                if ( filter.size() && tc->getName().find( filter ) == string::npos ) {
                     log(1) << "\t skipping test: " << tc->getName() << " because doesn't match filter" << endl;
                     continue;
                 }
 
                 r->_tests++;
-                
+
                 bool passes = false;
-                
+
                 log(1) << "\t going to run test: " << tc->getName() << endl;
-                
+
                 stringstream err;
                 err << tc->getName() << "\t";
-                
+
                 try {
                     tc->run();
                     passes = true;
                 }
-                catch ( MyAssertionException * ae ){
+                catch ( MyAssertionException * ae ) {
                     err << ae->ss.str();
                     delete( ae );
                 }
-                catch ( std::exception& e ){
+                catch ( std::exception& e ) {
                     err << " exception: " << e.what();
                 }
-                catch ( int x ){
+                catch ( int x ) {
                     err << " caught int : " << x << endl;
                 }
-                catch ( ... ){
+                catch ( ... ) {
                     cerr << "unknown exception in test: " << tc->getName() << endl;
                 }
-                
-                if ( ! passes ){
+
+                if ( ! passes ) {
                     string s = err.str();
                     log() << "FAIL: " << s << endl;
                     r->_fails++;
                     r->_messages.push_back( s );
-                }	
+                }
             }
-            
+
             if ( r->_fails )
                 r->_rc = 17;
 
             log(1) << "\t DONE running tests" << endl;
-	    
+
             return r;
         }
 
@@ -155,20 +156,23 @@ namespace mongo {
             po::positional_options_description positional_options;
 
             shell_options.add_options()
-                ("help,h", "show this usage information")
-                ("dbpath", po::value<string>(&dbpathSpec)->default_value(default_dbpath),
-                 "db data path for this test run. NOTE: the contents of this "
-                 "directory will be overwritten if it already exists")
-                ("debug", "run tests with verbose output")
-                ("list,l", "list available test suites")
-                ("filter,f" , po::value<string>() , "string substring filter on test name" )
-                ("verbose,v", "verbose")
-                ("seed", po::value<unsigned long long>(&seed), "random number seed")
-                ;
-            
+            ("help,h", "show this usage information")
+            ("dbpath", po::value<string>(&dbpathSpec)->default_value(default_dbpath),
+             "db data path for this test run. NOTE: the contents of this "
+             "directory will be overwritten if it already exists")
+            ("debug", "run tests with verbose output")
+            ("list,l", "list available test suites")
+            ("bigfiles", "use big datafiles instead of smallfiles which is the default")
+            ("filter,f" , po::value<string>() , "string substring filter on test name" )
+            ("verbose,v", "verbose")
+            ("dur", "enable journaling")
+            ("nodur", "disable journaling (currently the default)")
+            ("seed", po::value<unsigned long long>(&seed), "random number seed")
+            ;
+
             hidden_options.add_options()
-                ("suites", po::value< vector<string> >(), "test suites to run")
-                ;
+            ("suites", po::value< vector<string> >(), "test suites to run")
+            ;
 
             positional_options.add("suites", -1);
 
@@ -185,7 +189,8 @@ namespace mongo {
                           positional(positional_options).
                           style(command_line_style).run(), params);
                 po::notify(params);
-            } catch (po::error &e) {
+            }
+            catch (po::error &e) {
                 cout << "ERROR: " << e.what() << endl << endl;
                 show_help_text(argv[0], shell_options);
                 return EXIT_BADOPTIONS;
@@ -196,6 +201,13 @@ namespace mongo {
                 return EXIT_CLEAN;
             }
 
+            if( params.count("nodur") ) {
+                cmdLine.dur = false;
+            }
+            if( params.count("dur") || cmdLine.dur ) {
+                cmdLine.dur = true;
+            }
+
             if (params.count("debug") || params.count("verbose") ) {
                 logLevel = 1;
             }
@@ -217,18 +229,25 @@ namespace mongo {
                 }
                 boost::filesystem::directory_iterator end_iter;
                 for (boost::filesystem::directory_iterator dir_iter(p);
-                     dir_iter != end_iter; ++dir_iter) {
+                        dir_iter != end_iter; ++dir_iter) {
                     boost::filesystem::remove_all(*dir_iter);
                 }
-            } else {
+            }
+            else {
                 boost::filesystem::create_directory(p);
             }
 
             string dbpathString = p.native_directory_string();
             dbpath = dbpathString.c_str();
-            
+
             cmdLine.prealloc = false;
+
+            // dbtest defaults to smallfiles
             cmdLine.smallfiles = true;
+            if( params.count("bigfiles") ) {
+                cmdLine.dur = true;
+            }
+
             cmdLine.oplogSize = 10 * 1024 * 1024;
             Client::initThread("testsuite");
             acquirePathLock();
@@ -236,32 +255,39 @@ namespace mongo {
             srand( (unsigned) seed );
             printGitVersion();
             printSysInfo();
-            out() << "random seed: " << seed << endl;
+            log() << "random seed: " << seed << endl;
 
-            theFileAllocator().start();
+            FileAllocator::get()->start();
 
             vector<string> suites;
             if (params.count("suites")) {
                 suites = params["suites"].as< vector<string> >();
             }
-            
+
             string filter = "";
-            if ( params.count( "filter" ) ){
+            if ( params.count( "filter" ) ) {
                 filter = params["filter"].as<string>();
             }
 
+            dur::startup();
+
+            if( debug && cmdLine.dur ) {
+                cout << "setting cmdLine.durOptions=8" << endl;
+                cmdLine.durOptions = 8;
+            }
+
             int ret = run(suites,filter);
 
 #if !defined(_WIN32) && !defined(__sunos__)
             flock( lockFile, LOCK_UN );
 #endif
-            
+
             cc().shutdown();
             dbexit( (ExitCode)ret ); // so everything shuts down cleanly
             return ret;
         }
 
-        int Suite::run( vector<string> suites , const string& filter ){
+        int Suite::run( vector<string> suites , const string& filter ) {
             for ( unsigned int i = 0; i < suites.size(); i++ ) {
                 if ( _suites->find( suites[i] ) == _suites->end() ) {
                     cout << "invalid test [" << suites[i] << "], use --list to see valid names" << endl;
@@ -277,7 +303,7 @@ namespace mongo {
 
             list<Result*> results;
 
-            for ( list<string>::iterator i=torun.begin(); i!=torun.end(); i++ ){
+            for ( list<string>::iterator i=torun.begin(); i!=torun.end(); i++ ) {
                 string name = *i;
                 Suite * s = (*_suites)[name];
                 assert( s );
@@ -298,12 +324,12 @@ namespace mongo {
             int fails = 0;
             int asserts = 0;
 
-            for ( list<Result*>::iterator i=results.begin(); i!=results.end(); i++ ){
+            for ( list<Result*>::iterator i=results.begin(); i!=results.end(); i++ ) {
                 Result * r = *i;
                 cout << r->toString();
                 if ( abs( r->rc() ) > abs( rc ) )
                     rc = r->rc();
-                
+
                 tests += r->_tests;
                 fails += r->_fails;
                 asserts += r->_asserts;
@@ -313,13 +339,13 @@ namespace mongo {
             totals._tests = tests;
             totals._fails = fails;
             totals._asserts = asserts;
-            
+
             cout << totals.toString(); // includes endl
 
             return rc;
         }
 
-        void Suite::registerSuite( string name , Suite * s ){
+        void Suite::registerSuite( string name , Suite * s ) {
             if ( ! _suites )
                 _suites = new map<string,Suite*>();
             Suite*& m = (*_suites)[name];
@@ -327,37 +353,37 @@ namespace mongo {
             m = s;
         }
 
-        void assert_pass(){
+        void assert_pass() {
             Result::cur->_asserts++;
         }
 
-        void assert_fail( const char * exp , const char * file , unsigned line ){
+        void assert_fail( const char * exp , const char * file , unsigned line ) {
             Result::cur->_asserts++;
-            
+
             MyAssertionException * e = new MyAssertionException();
             e->ss << "ASSERT FAILED! " << file << ":" << line << endl;
             throw e;
         }
 
-        void fail( const char * exp , const char * file , unsigned line ){
+        void fail( const char * exp , const char * file , unsigned line ) {
             assert(0);
         }
 
-        MyAssertionException * MyAsserts::getBase(){
+        MyAssertionException * MyAsserts::getBase() {
             MyAssertionException * e = new MyAssertionException();
             e->ss << _file << ":" << _line << " " << _aexp << " != " << _bexp << " ";
             return e;
         }
-        
-        void MyAsserts::printLocation(){
+
+        void MyAsserts::printLocation() {
             log() << _file << ":" << _line << " " << _aexp << " != " << _bexp << " ";
         }
 
-        void MyAsserts::_gotAssert(){
+        void MyAsserts::_gotAssert() {
             Result::cur->_asserts++;
         }
 
     }
 
-    void setupSignals(){}
+    void setupSignals( bool inFork ) {}
 }
diff --git a/dbtests/framework.h b/dbtests/framework.h
index bec14a2..29ba58b 100644
--- a/dbtests/framework.h
+++ b/dbtests/framework.h
@@ -49,7 +49,7 @@ namespace mongo {
 
         class TestCase {
         public:
-            virtual ~TestCase(){}
+            virtual ~TestCase() {}
             virtual void run() = 0;
             virtual string getName() = 0;
         };
@@ -57,15 +57,15 @@ namespace mongo {
         template< class T >
         class TestHolderBase : public TestCase {
         public:
-            TestHolderBase(){}
-            virtual ~TestHolderBase(){}
-            virtual void run(){
+            TestHolderBase() {}
+            virtual ~TestHolderBase() {}
+            virtual void run() {
                 auto_ptr<T> t;
                 t.reset( create() );
                 t->run();
             }
             virtual T * create() = 0;
-            virtual string getName(){
+            virtual string getName() {
                 return demangleName( typeid(T) );
             }
         };
@@ -73,7 +73,7 @@ namespace mongo {
         template< class T >
         class TestHolder0 : public TestHolderBase<T> {
         public:
-            virtual T * create(){
+            virtual T * create() {
                 return new T();
             }
         };
@@ -81,8 +81,8 @@ namespace mongo {
         template< class T , typename A  >
         class TestHolder1 : public TestHolderBase<T> {
         public:
-            TestHolder1( const A& a ) : _a(a){}
-            virtual T * create(){
+            TestHolder1( const A& a ) : _a(a) {}
+            virtual T * create() {
                 return new T( _a );
             }
             const A& _a;
@@ -90,25 +90,25 @@ namespace mongo {
 
         class Suite {
         public:
-            Suite( string name ) : _name( name ){
+            Suite( string name ) : _name( name ) {
                 registerSuite( name , this );
                 _ran = 0;
             }
 
             virtual ~Suite() {
-                if ( _ran ){
+                if ( _ran ) {
                     DBDirectClient c;
                     c.dropDatabase( "unittests" );
                 }
             }
 
             template<class T>
-            void add(){
+            void add() {
                 _tests.push_back( new TestHolder0<T>() );
             }
 
             template<class T , typename A >
-            void add( const A& a ){
+            void add( const A& a ) {
                 _tests.push_back( new TestHolder1<T,A>(a) );
             }
 
@@ -137,7 +137,7 @@ namespace mongo {
 
         class MyAssertionException : boost::noncopyable {
         public:
-            MyAssertionException(){
+            MyAssertionException() {
                 ss << "assertion: ";
             }
             stringstream ss;
@@ -148,32 +148,32 @@ namespace mongo {
         class MyAsserts {
         public:
             MyAsserts( const char * aexp , const char * bexp , const char * file , unsigned line )
-                : _aexp( aexp ) , _bexp( bexp ) , _file( file ) , _line( line ){
+                : _aexp( aexp ) , _bexp( bexp ) , _file( file ) , _line( line ) {
 
             }
-            
+
             template<typename A,typename B>
-            void ae( A a , B b ){
+            void ae( A a , B b ) {
                 _gotAssert();
                 if ( a == b )
                     return;
-                
+
                 printLocation();
-                    
+
                 MyAssertionException * e = getBase();
                 e->ss << a << " != " << b << endl;
                 log() << e->ss.str() << endl;
                 throw e;
             }
-            
+
             template<typename A,typename B>
-            void nae( A a , B b ){
+            void nae( A a , B b ) {
                 _gotAssert();
                 if ( a != b )
                     return;
-                
+
                 printLocation();
-                    
+
                 MyAssertionException * e = getBase();
                 e->ss << a << " == " << b << endl;
                 log() << e->ss.str() << endl;
@@ -182,13 +182,13 @@ namespace mongo {
 
 
             void printLocation();
-            
+
         private:
-            
+
             void _gotAssert();
-            
+
             MyAssertionException * getBase();
-            
+
             string _aexp;
             string _bexp;
             string _file;
diff --git a/dbtests/histogram_test.cpp b/dbtests/histogram_test.cpp
index 5a8970d..e9cbb5b 100644
--- a/dbtests/histogram_test.cpp
+++ b/dbtests/histogram_test.cpp
@@ -25,9 +25,9 @@ namespace mongo {
 
     using mongo::Histogram;
 
-    class BoundariesInit{
+    class BoundariesInit {
     public:
-        void run(){
+        void run() {
             Histogram::Options opts;
             opts.numBuckets = 3;
             opts.bucketSize = 10;
@@ -45,9 +45,9 @@ namespace mongo {
         }
     };
 
-    class BoundariesExponential{
+    class BoundariesExponential {
     public:
-        void run(){
+        void run() {
             Histogram::Options opts;
             opts.numBuckets = 4;
             opts.bucketSize = 125;
@@ -57,13 +57,13 @@ namespace mongo {
             ASSERT_EQUALS( h.getBoundary( 0 ), 125u );
             ASSERT_EQUALS( h.getBoundary( 1 ), 250u );
             ASSERT_EQUALS( h.getBoundary( 2 ), 500u );
-            ASSERT_EQUALS( h.getBoundary( 3 ), numeric_limits<uint32_t>::max() );            
+            ASSERT_EQUALS( h.getBoundary( 3 ), numeric_limits<uint32_t>::max() );
         }
     };
 
-    class BoundariesFind{
+    class BoundariesFind {
     public:
-        void run(){
+        void run() {
             Histogram::Options opts;
             opts.numBuckets = 3;
             opts.bucketSize = 10;
@@ -81,14 +81,14 @@ namespace mongo {
 
     class HistogramSuite : public Suite {
     public:
-        HistogramSuite() : Suite( "histogram" ){}
+        HistogramSuite() : Suite( "histogram" ) {}
 
-        void setupTests(){
+        void setupTests() {
             add< BoundariesInit >();
             add< BoundariesExponential >();
             add< BoundariesFind >();
             // TODO: complete the test suite
-        } 
+        }
     } histogramSuite;
 
 }  // anonymous namespace
diff --git a/dbtests/jsobjtests.cpp b/dbtests/jsobjtests.cpp
index ea7606f..6804d71 100644
--- a/dbtests/jsobjtests.cpp
+++ b/dbtests/jsobjtests.cpp
@@ -150,7 +150,7 @@ namespace JsobjTests {
 
         class MultiKeySortOrder : public Base {
         public:
-            void run(){
+            void run() {
                 ASSERT( BSON( "x" << "a" ).woCompare( BSON( "x" << "b" ) ) < 0 );
                 ASSERT( BSON( "x" << "b" ).woCompare( BSON( "x" << "a" ) ) > 0 );
 
@@ -255,9 +255,9 @@ namespace JsobjTests {
             }
         };
 
-        class AsTempObj{
+        class AsTempObj {
         public:
-            void run(){
+            void run() {
                 {
                     BSONObjBuilder bb;
                     bb << "a" << 1;
@@ -267,7 +267,7 @@ namespace JsobjTests {
                     ASSERT(tmp.hasField("a"));
                     ASSERT(!tmp.hasField("b"));
                     ASSERT(tmp == BSON("a" << 1));
-                    
+
                     bb << "b" << 2;
                     BSONObj obj = bb.obj();
                     ASSERT_EQUALS(obj.objsize() , 4+(1+2+4)+(1+2+4)+1);
@@ -285,7 +285,7 @@ namespace JsobjTests {
                     ASSERT(tmp.hasField("a"));
                     ASSERT(!tmp.hasField("b"));
                     ASSERT(tmp == BSON("a" << BSON("$gt" << 1)));
-                    
+
                     bb << "b" << LT << 2;
                     BSONObj obj = bb.obj();
                     ASSERT(obj.objsize() == 4+(1+2+(4+1+4+4+1))+(1+2+(4+1+4+4+1))+1);
@@ -293,7 +293,7 @@ namespace JsobjTests {
                     ASSERT(obj.hasField("a"));
                     ASSERT(obj.hasField("b"));
                     ASSERT(obj == BSON("a" << BSON("$gt" << 1)
-                                    << "b" << BSON("$lt" << 2)));
+                                       << "b" << BSON("$lt" << 2)));
                 }
                 {
                     BSONObjBuilder bb(32);
@@ -304,10 +304,10 @@ namespace JsobjTests {
                     ASSERT(tmp.hasField("a"));
                     ASSERT(!tmp.hasField("b"));
                     ASSERT(tmp == BSON("a" << 1));
-                    
+
                     //force a realloc
                     BSONArrayBuilder arr;
-                    for (int i=0; i < 10000; i++){
+                    for (int i=0; i < 10000; i++) {
                         arr << i;
                     }
                     bb << "b" << arr.arr();
@@ -319,8 +319,8 @@ namespace JsobjTests {
             }
         };
 
-        struct AppendIntOrLL{
-            void run(){
+        struct AppendIntOrLL {
+            void run() {
                 const long long billion = 1000*1000*1000;
                 BSONObjBuilder b;
                 b.appendIntOrLL("i1",  1);
@@ -362,16 +362,16 @@ namespace JsobjTests {
         };
 
         struct AppendNumber {
-            void run(){
+            void run() {
                 BSONObjBuilder b;
                 b.appendNumber( "a" , 5 );
                 b.appendNumber( "b" , 5.5 );
                 b.appendNumber( "c" , (1024LL*1024*1024)-1 );
                 b.appendNumber( "d" , (1024LL*1024*1024*1024)-1 );
                 b.appendNumber( "e" , 1024LL*1024*1024*1024*1024*1024 );
-                
+
                 BSONObj o = b.obj();
-                
+
                 ASSERT( o["a"].type() == NumberInt );
                 ASSERT( o["b"].type() == NumberDouble );
                 ASSERT( o["c"].type() == NumberInt );
@@ -380,7 +380,7 @@ namespace JsobjTests {
 
             }
         };
-        
+
         class ToStringArray {
         public:
             void run() {
@@ -391,28 +391,28 @@ namespace JsobjTests {
 
         class ToStringNumber {
         public:
-            
-            void run(){
+
+            void run() {
                 BSONObjBuilder b;
                 b.append( "a" , (int)4 );
                 b.append( "b" , (double)5 );
                 b.append( "c" , (long long)6 );
-                
+
                 b.append( "d" , 123.456789123456789123456789123456789 );
                 b.append( "e" , 123456789.123456789123456789123456789 );
                 b.append( "f" , 1234567891234567891234.56789123456789 );
 
                 b.append( "g" , -123.456 );
-                
+
                 BSONObj x = b.obj();
                 ASSERT_EQUALS( "4", x["a"].toString( false , true ) );
                 ASSERT_EQUALS( "5.0", x["b"].toString( false , true ) );
-                ASSERT_EQUALS( "6", x["c"].toString( false , true ) );                
+                ASSERT_EQUALS( "6", x["c"].toString( false , true ) );
 
                 ASSERT_EQUALS( "123.4567891234568" , x["d"].toString( false , true ) );
                 ASSERT_EQUALS( "123456789.1234568" , x["e"].toString( false , true ) );
                 // ASSERT_EQUALS( "1.234567891234568e+21" , x["f"].toString( false , true ) ); // windows and *nix are different - TODO, work around for test or not bother?
-                
+
                 ASSERT_EQUALS( "-123.456" , x["g"].toString( false , true ) );
 
             }
@@ -442,6 +442,46 @@ namespace JsobjTests {
 
         };
 
+        class AppendAs {
+        public:
+            void run() {
+                BSONObjBuilder b;
+                {
+                    BSONObj foo = BSON( "foo" << 1 );
+                    b.appendAs( foo.firstElement(), "bar" );
+                }
+                ASSERT_EQUALS( BSON( "bar" << 1 ), b.done() );
+            }
+        };
+
+        class ArrayAppendAs {
+        public:
+            void run() {
+                BSONArrayBuilder b;
+                {
+                    BSONObj foo = BSON( "foo" << 1 );
+                    b.appendAs( foo.firstElement(), "3" );
+                }
+                BSONArray a = b.arr();
+                BSONObj expected = BSON( "3" << 1 );
+                ASSERT_EQUALS( expected.firstElement(), a[ 3 ] );
+                ASSERT_EQUALS( 4, a.nFields() );
+            }
+        };
+
+        class GetField {
+        public:
+            void run(){
+                BSONObj o = BSON( "a" << 1 <<
+                                  "b" << BSON( "a" << 2 ) <<
+                                  "c" << BSON_ARRAY( BSON( "a" << 3 ) << BSON( "a" << 4 ) ) );
+                ASSERT_EQUALS( 1 , o.getFieldDotted( "a" ).numberInt() );
+                ASSERT_EQUALS( 2 , o.getFieldDotted( "b.a" ).numberInt() );
+                ASSERT_EQUALS( 3 , o.getFieldDotted( "c.0.a" ).numberInt() );
+                ASSERT_EQUALS( 4 , o.getFieldDotted( "c.1.a" ).numberInt() );
+            }
+        };
+
         namespace Validation {
 
             class Base {
@@ -691,12 +731,12 @@ namespace JsobjTests {
                     a.valid();
 
                     BSONObj b = fromjson( "{\"one\":2, \"two\":5, \"three\": {},"
-                                         "\"four\": { \"five\": { \"six\" : 11 } },"
-                                         "\"seven\": [ \"a\", \"bb\", \"ccc\", 5 ],"
-                                         "\"eight\": Dbref( \"rrr\", \"01234567890123456789aaaa\" ),"
-                                         "\"_id\": ObjectId( \"deadbeefdeadbeefdeadbeef\" ),"
-                                         "\"nine\": { \"$binary\": \"abc=\", \"$type\": \"00\" },"
-                                         "\"ten\": Date( 44 ), \"eleven\": /foooooo/i }" );
+                                          "\"four\": { \"five\": { \"six\" : 11 } },"
+                                          "\"seven\": [ \"a\", \"bb\", \"ccc\", 5 ],"
+                                          "\"eight\": Dbref( \"rrr\", \"01234567890123456789aaaa\" ),"
+                                          "\"_id\": ObjectId( \"deadbeefdeadbeefdeadbeef\" ),"
+                                          "\"nine\": { \"$binary\": \"abc=\", \"$type\": \"00\" },"
+                                          "\"ten\": Date( 44 ), \"eleven\": /foooooo/i }" );
                     fuzz( b );
                     b.valid();
                 }
@@ -723,7 +763,7 @@ namespace JsobjTests {
 
         class init1 {
         public:
-            void run(){
+            void run() {
                 OID a;
                 OID b;
 
@@ -736,7 +776,7 @@ namespace JsobjTests {
 
         class initParse1 {
         public:
-            void run(){
+            void run() {
 
                 OID a;
                 OID b;
@@ -750,7 +790,7 @@ namespace JsobjTests {
 
         class append {
         public:
-            void run(){
+            void run() {
                 BSONObjBuilder b;
                 b.appendOID( "a" , 0 );
                 b.appendOID( "b" , 0 , false );
@@ -766,18 +806,18 @@ namespace JsobjTests {
 
         class increasing {
         public:
-            BSONObj g(){
+            BSONObj g() {
                 BSONObjBuilder b;
                 b.appendOID( "_id" , 0 , true );
                 return b.obj();
             }
-            void run(){
+            void run() {
                 BSONObj a = g();
                 BSONObj b = g();
-                
+
                 ASSERT( a.woCompare( b ) < 0 );
-                
-                // yes, there is a 1/1000 chance this won't increase time(0) 
+
+                // yes, there is a 1/1000 chance this won't increase time(0)
                 // and therefore inaccurately say the function is behaving
                 // buf if its broken, it will fail 999/1000, so i think that's good enough
                 sleepsecs( 1 );
@@ -788,7 +828,7 @@ namespace JsobjTests {
 
         class ToDate {
         public:
-            void run(){
+            void run() {
                 OID oid;
 
                 {
@@ -812,7 +852,7 @@ namespace JsobjTests {
 
         class FromDate {
         public:
-            void run(){
+            void run() {
                 OID min, oid, max;
                 Date_t now = jsTime();
                 oid.init(); // slight chance this has different time. If its a problem, can change.
@@ -890,26 +930,26 @@ namespace JsobjTests {
         class LabelMulti : public LabelBase {
             BSONObj expected() {
                 return BSON( "z" << "q"
-                            << "a" << BSON( "$gt" << 1 << "$lte" << "x" )
-                            << "b" << BSON( "$ne" << 1 << "$ne" << "f" << "$ne" << 22.3 )
-                            << "x" << "p" );
+                             << "a" << BSON( "$gt" << 1 << "$lte" << "x" )
+                             << "b" << BSON( "$ne" << 1 << "$ne" << "f" << "$ne" << 22.3 )
+                             << "x" << "p" );
             }
             BSONObj actual() {
                 return BSON( "z" << "q"
-                            << "a" << GT << 1 << LTE << "x"
-                            << "b" << NE << 1 << NE << "f" << NE << 22.3
-                            << "x" << "p" );
+                             << "a" << GT << 1 << LTE << "x"
+                             << "b" << NE << 1 << NE << "f" << NE << 22.3
+                             << "x" << "p" );
             }
         };
         class LabelishOr : public LabelBase {
             BSONObj expected() {
                 return BSON( "$or" << BSON_ARRAY(
-                               BSON("a" << BSON( "$gt" << 1 << "$lte" << "x" ))
-                            << BSON("b" << BSON( "$ne" << 1 << "$ne" << "f" << "$ne" << 22.3 ))
-                            << BSON("x" << "p" )));
+                                 BSON("a" << BSON( "$gt" << 1 << "$lte" << "x" ))
+                                 << BSON("b" << BSON( "$ne" << 1 << "$ne" << "f" << "$ne" << 22.3 ))
+                                 << BSON("x" << "p" )));
             }
             BSONObj actual() {
-                return OR( BSON( "a" << GT << 1 << LTE << "x"), 
+                return OR( BSON( "a" << GT << 1 << LTE << "x"),
                            BSON( "b" << NE << 1 << NE << "f" << NE << 22.3),
                            BSON( "x" << "p" ) );
             }
@@ -925,7 +965,7 @@ namespace JsobjTests {
 
         class ElementAppend {
         public:
-            void run(){
+            void run() {
                 BSONObj a = BSON( "a" << 17 );
                 BSONObj b = BSON( "b" << a["a"] );
                 ASSERT_EQUALS( NumberInt , a["a"].type() );
@@ -998,23 +1038,39 @@ namespace JsobjTests {
         }
     };
 
+    class MinMaxKeyBuilder {
+    public:
+        void run() {
+            BSONObj min = BSON( "a" << MINKEY );
+            BSONObj max = BSON( "b" << MAXKEY );
+
+            ASSERT( min.valid() );
+            ASSERT( max.valid() );
+
+            BSONElement minElement = min["a"];
+            BSONElement maxElement = max["b"];
+            ASSERT( minElement.type() == MinKey );
+            ASSERT( maxElement.type() == MaxKey );
+        }
+    };
+
     class MinMaxElementTest {
     public:
 
-        BSONObj min( int t ){
+        BSONObj min( int t ) {
             BSONObjBuilder b;
             b.appendMinForType( "a" , t );
             return b.obj();
         }
 
-        BSONObj max( int t ){
+        BSONObj max( int t ) {
             BSONObjBuilder b;
             b.appendMaxForType( "a" , t );
             return b.obj();
         }
 
-        void run(){
-            for ( int t=1; t<JSTypeMax; t++ ){
+        void run() {
+            for ( int t=1; t<JSTypeMax; t++ ) {
                 stringstream ss;
                 ss << "type: " << t;
                 string s = ss.str();
@@ -1025,14 +1081,11 @@ namespace JsobjTests {
                 massert( 10407 ,  s , abs( min( t ).firstElement().canonicalType() - max( t ).firstElement().canonicalType() ) <= 10 );
             }
         }
-
-
-
     };
 
     class ExtractFieldsTest {
     public:
-        void run(){
+        void run() {
             BSONObj x = BSON( "a" << 10 << "b" << 11 );
             assert( BSON( "a" << 10 ).woCompare( x.extractFields( BSON( "a" << 1 ) ) ) == 0 );
             assert( BSON( "b" << 11 ).woCompare( x.extractFields( BSON( "b" << 1 ) ) ) == 0 );
@@ -1044,10 +1097,10 @@ namespace JsobjTests {
 
     class ComparatorTest {
     public:
-        BSONObj one( string s ){
+        BSONObj one( string s ) {
             return BSON( "x" << s );
         }
-        BSONObj two( string x , string y ){
+        BSONObj two( string x , string y ) {
             BSONObjBuilder b;
             b.append( "x" , x );
             if ( y.size() )
@@ -1057,7 +1110,7 @@ namespace JsobjTests {
             return b.obj();
         }
 
-        void test( BSONObj order , BSONObj l , BSONObj r , bool wanted ){
+        void test( BSONObj order , BSONObj l , BSONObj r , bool wanted ) {
             BSONObjCmp c( order );
             bool got = c(l,r);
             if ( got == wanted )
@@ -1065,11 +1118,11 @@ namespace JsobjTests {
             cout << " order: " << order << " l: " << l << "r: " << r << " wanted: " << wanted << " got: " << got << endl;
         }
 
-        void lt( BSONObj order , BSONObj l , BSONObj r ){
+        void lt( BSONObj order , BSONObj l , BSONObj r ) {
             test( order , l , r , 1 );
         }
 
-        void run(){
+        void run() {
             BSONObj s = BSON( "x" << 1 );
             BSONObj c = BSON( "x" << 1 << "y" << 1 );
             test( s , one( "A" ) , one( "B" ) , 1 );
@@ -1093,7 +1146,7 @@ namespace JsobjTests {
     namespace external_sort {
         class Basic1 {
         public:
-            void run(){
+            void run() {
                 BSONObjExternalSorter sorter;
                 sorter.add( BSON( "x" << 10 ) , 5  , 1);
                 sorter.add( BSON( "x" << 2 ) , 3 , 1 );
@@ -1101,14 +1154,14 @@ namespace JsobjTests {
                 sorter.add( BSON( "x" << 5 ) , 7 , 1 );
 
                 sorter.sort();
-                
+
                 auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator();
                 int num=0;
-                while ( i->more() ){
+                while ( i->more() ) {
                     pair<BSONObj,DiskLoc> p = i->next();
                     if ( num == 0 )
                         assert( p.first["x"].number() == 2 );
-                    else if ( num <= 2 ){
+                    else if ( num <= 2 ) {
                         assert( p.first["x"].number() == 5 );
                     }
                     else if ( num == 3 )
@@ -1117,15 +1170,15 @@ namespace JsobjTests {
                         ASSERT( 0 );
                     num++;
                 }
-                
-                
+
+
                 ASSERT_EQUALS( 0 , sorter.numFiles() );
             }
         };
 
         class Basic2 {
         public:
-            void run(){
+            void run() {
                 BSONObjExternalSorter sorter( BSONObj() , 10 );
                 sorter.add( BSON( "x" << 10 ) , 5  , 11 );
                 sorter.add( BSON( "x" << 2 ) , 3 , 1 );
@@ -1133,18 +1186,18 @@ namespace JsobjTests {
                 sorter.add( BSON( "x" << 5 ) , 7 , 1 );
 
                 sorter.sort();
-                
+
                 auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator();
                 int num=0;
-                while ( i->more() ){
+                while ( i->more() ) {
                     pair<BSONObj,DiskLoc> p = i->next();
-                    if ( num == 0 ){
+                    if ( num == 0 ) {
                         assert( p.first["x"].number() == 2 );
                         ASSERT_EQUALS( p.second.toString() , "3:1" );
                     }
                     else if ( num <= 2 )
                         assert( p.first["x"].number() == 5 );
-                    else if ( num == 3 ){
+                    else if ( num == 3 ) {
                         assert( p.first["x"].number() == 10 );
                         ASSERT_EQUALS( p.second.toString() , "5:b" );
                     }
@@ -1158,7 +1211,7 @@ namespace JsobjTests {
 
         class Basic3 {
         public:
-            void run(){
+            void run() {
                 BSONObjExternalSorter sorter( BSONObj() , 10 );
                 sorter.sort();
 
@@ -1171,23 +1224,23 @@ namespace JsobjTests {
 
         class ByDiskLock {
         public:
-            void run(){
+            void run() {
                 BSONObjExternalSorter sorter;
                 sorter.add( BSON( "x" << 10 ) , 5  , 4);
                 sorter.add( BSON( "x" << 2 ) , 3 , 0 );
                 sorter.add( BSON( "x" << 5 ) , 6 , 2 );
                 sorter.add( BSON( "x" << 5 ) , 7 , 3 );
                 sorter.add( BSON( "x" << 5 ) , 2 , 1 );
-                
+
                 sorter.sort();
 
                 auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator();
                 int num=0;
-                while ( i->more() ){
+                while ( i->more() ) {
                     pair<BSONObj,DiskLoc> p = i->next();
                     if ( num == 0 )
                         assert( p.first["x"].number() == 2 );
-                    else if ( num <= 3 ){
+                    else if ( num <= 3 ) {
                         assert( p.first["x"].number() == 5 );
                     }
                     else if ( num == 4 )
@@ -1205,9 +1258,9 @@ namespace JsobjTests {
 
         class Big1 {
         public:
-            void run(){
+            void run() {
                 BSONObjExternalSorter sorter( BSONObj() , 2000 );
-                for ( int i=0; i<10000; i++ ){
+                for ( int i=0; i<10000; i++ ) {
                     sorter.add( BSON( "x" << rand() % 10000 ) , 5  , i );
                 }
 
@@ -1216,7 +1269,7 @@ namespace JsobjTests {
                 auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator();
                 int num=0;
                 double prev = 0;
-                while ( i->more() ){
+                while ( i->more() ) {
                     pair<BSONObj,DiskLoc> p = i->next();
                     num++;
                     double cur = p.first["x"].number();
@@ -1226,22 +1279,22 @@ namespace JsobjTests {
                 assert( num == 10000 );
             }
         };
-        
+
         class Big2 {
         public:
-            void run(){
+            void run() {
                 const int total = 100000;
                 BSONObjExternalSorter sorter( BSONObj() , total * 2 );
-                for ( int i=0; i<total; i++ ){
+                for ( int i=0; i<total; i++ ) {
                     sorter.add( BSON( "a" << "b" ) , 5  , i );
                 }
 
                 sorter.sort();
-                
+
                 auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator();
                 int num=0;
                 double prev = 0;
-                while ( i->more() ){
+                while ( i->more() ) {
                     pair<BSONObj,DiskLoc> p = i->next();
                     num++;
                     double cur = p.first["x"].number();
@@ -1255,21 +1308,21 @@ namespace JsobjTests {
 
         class D1 {
         public:
-            void run(){
-                
+            void run() {
+
                 BSONObjBuilder b;
                 b.appendNull("");
                 BSONObj x = b.obj();
-                
+
                 BSONObjExternalSorter sorter;
                 sorter.add(x, DiskLoc(3,7));
                 sorter.add(x, DiskLoc(4,7));
                 sorter.add(x, DiskLoc(2,7));
                 sorter.add(x, DiskLoc(1,7));
                 sorter.add(x, DiskLoc(3,77));
-                
+
                 sorter.sort();
-                
+
                 auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator();
                 while( i->more() ) {
                     BSONObjExternalSorter::Data d = i->next();
@@ -1280,14 +1333,14 @@ namespace JsobjTests {
             }
         };
     }
-    
+
     class CompatBSON {
     public:
-        
+
 #define JSONBSONTEST(j,s,m) ASSERT_EQUALS( fromjson( j ).objsize() , s ); ASSERT_EQUALS( fromjson( j ).md5() , m );
 #define RAWBSONTEST(j,s,m) ASSERT_EQUALS( j.objsize() , s ); ASSERT_EQUALS( j.md5() , m );
 
-        void run(){
+        void run() {
 
             JSONBSONTEST( "{ 'x' : true }" , 9 , "6fe24623e4efc5cf07f027f9c66b5456" );
             JSONBSONTEST( "{ 'x' : null }" , 8 , "12d43430ff6729af501faf0638e68888" );
@@ -1297,20 +1350,20 @@ namespace JsobjTests {
             JSONBSONTEST( "{ 'a' : { 'b' : 1.1 } }" , 24 , "31887a4b9d55cd9f17752d6a8a45d51f" );
             JSONBSONTEST( "{ 'x' : 5.2 , 'y' : { 'a' : 'eliot' , b : true } , 'z' : null }" , 44 , "b3de8a0739ab329e7aea138d87235205" );
             JSONBSONTEST( "{ 'x' : 5.2 , 'y' : [ 'a' , 'eliot' , 'b' , true ] , 'z' : null }" , 62 , "cb7bad5697714ba0cbf51d113b6a0ee8" );
-            
+
             RAWBSONTEST( BSON( "x" << 4 ) , 12 , "d1ed8dbf79b78fa215e2ded74548d89d" );
-            
+
         }
     };
-    
+
     class CompareDottedFieldNamesTest {
     public:
-        void t( FieldCompareResult res , const string& l , const string& r ){
+        void t( FieldCompareResult res , const string& l , const string& r ) {
             ASSERT_EQUALS( res , compareDottedFieldNames( l , r ) );
             ASSERT_EQUALS( -1 * res , compareDottedFieldNames( r , l ) );
         }
-        
-        void run(){
+
+        void run() {
             t( SAME , "x" , "x" );
             t( SAME , "x.a" , "x.a" );
             t( LEFT_BEFORE , "a" , "b" );
@@ -1320,13 +1373,13 @@ namespace JsobjTests {
         }
     };
 
-    struct NestedDottedConversions{
-        void t(const BSONObj& nest, const BSONObj& dot){
+    struct NestedDottedConversions {
+        void t(const BSONObj& nest, const BSONObj& dot) {
             ASSERT_EQUALS( nested2dotted(nest), dot);
             ASSERT_EQUALS( nest, dotted2nested(dot));
         }
 
-        void run(){
+        void run() {
             t( BSON("a" << BSON("b" << 1)), BSON("a.b" << 1) );
             t( BSON("a" << BSON("b" << 1 << "c" << 1)), BSON("a.b" << 1 << "a.c" << 1) );
             t( BSON("a" << BSON("b" << 1 << "c" << 1) << "d" << 1), BSON("a.b" << 1 << "a.c" << 1 << "d" << 1) );
@@ -1334,8 +1387,8 @@ namespace JsobjTests {
         }
     };
 
-    struct BSONArrayBuilderTest{
-        void run(){
+    struct BSONArrayBuilderTest {
+        void run() {
             int i = 0;
             BSONObjBuilder objb;
             BSONArrayBuilder arrb;
@@ -1374,13 +1427,13 @@ namespace JsobjTests {
             ASSERT_EQUALS(o["arr2"].type(), Array);
         }
     };
-    
-    struct ArrayMacroTest{
-        void run(){
+
+    struct ArrayMacroTest {
+        void run() {
             BSONArray arr = BSON_ARRAY( "hello" << 1 << BSON( "foo" << BSON_ARRAY( "bar" << "baz" << "qux" ) ) );
             BSONObj obj = BSON( "0" << "hello"
-                             << "1" << 1
-                             << "2" << BSON( "foo" << BSON_ARRAY( "bar" << "baz" << "qux" ) ) );
+                                << "1" << 1
+                                << "2" << BSON( "foo" << BSON_ARRAY( "bar" << "baz" << "qux" ) ) );
 
             ASSERT_EQUALS(arr, obj);
             ASSERT_EQUALS(arr["2"].type(), Object);
@@ -1390,25 +1443,25 @@ namespace JsobjTests {
 
     class NumberParsing {
     public:
-        void run(){
+        void run() {
             BSONObjBuilder a;
             BSONObjBuilder b;
 
             a.append( "a" , (int)1 );
             ASSERT( b.appendAsNumber( "a" , "1" ) );
-            
+
             a.append( "b" , 1.1 );
             ASSERT( b.appendAsNumber( "b" , "1.1" ) );
 
             a.append( "c" , (int)-1 );
             ASSERT( b.appendAsNumber( "c" , "-1" ) );
-            
+
             a.append( "d" , -1.1 );
             ASSERT( b.appendAsNumber( "d" , "-1.1" ) );
 
             a.append( "e" , (long long)32131231231232313LL );
             ASSERT( b.appendAsNumber( "e" , "32131231231232313" ) );
-            
+
             ASSERT( ! b.appendAsNumber( "f" , "zz" ) );
             ASSERT( ! b.appendAsNumber( "f" , "5zz" ) );
             ASSERT( ! b.appendAsNumber( "f" , "zz5" ) );
@@ -1416,10 +1469,10 @@ namespace JsobjTests {
             ASSERT_EQUALS( a.obj() , b.obj() );
         }
     };
-    
+
     class bson2settest {
     public:
-        void run(){
+        void run() {
             BSONObj o = BSON( "z" << 1 << "a" << 2 << "m" << 3 << "c" << 4 );
             BSONObjIteratorSorted i( o );
             stringstream ss;
@@ -1429,7 +1482,7 @@ namespace JsobjTests {
 
             {
                 Timer t;
-                for ( int i=0; i<10000; i++ ){
+                for ( int i=0; i<10000; i++ ) {
                     BSONObjIteratorSorted j( o );
                     int l = 0;
                     while ( j.more() )
@@ -1444,22 +1497,22 @@ namespace JsobjTests {
 
     class checkForStorageTests {
     public:
-        
-        void good( string s ){
+
+        void good( string s ) {
             BSONObj o = fromjson( s );
             if ( o.okForStorage() )
                 return;
             throw UserException( 12528 , (string)"should be ok for storage:" + s );
         }
 
-        void bad( string s ){
+        void bad( string s ) {
             BSONObj o = fromjson( s );
             if ( ! o.okForStorage() )
                 return;
             throw UserException( 12529 , (string)"should NOT be ok for storage:" + s );
         }
 
-        void run(){
+        void run() {
             good( "{x:1}" );
             bad( "{'x.y':1}" );
 
@@ -1470,7 +1523,7 @@ namespace JsobjTests {
 
     class InvalidIDFind {
     public:
-        void run(){
+        void run() {
             BSONObj x = BSON( "_id" << 5 << "t" << 2 );
             {
                 char * crap = (char*)malloc( x.objsize() );
@@ -1479,7 +1532,7 @@ namespace JsobjTests {
                 ASSERT_EQUALS( x , y );
                 free( crap );
             }
-            
+
             {
                 char * crap = (char*)malloc( x.objsize() );
                 memcpy( crap , x.objdata() , x.objsize() );
@@ -1490,21 +1543,21 @@ namespace JsobjTests {
                     BSONObj y( crap , false );
                     state = 1;
                 }
-                catch ( std::exception& e ){
+                catch ( std::exception& e ) {
                     state = 2;
                     ASSERT( strstr( e.what() , "_id: 5" ) > 0 );
                 }
                 free( crap );
                 ASSERT_EQUALS( 2 , state );
             }
-                
-            
+
+
         }
     };
 
     class ElementSetTest {
     public:
-        void run(){
+        void run() {
             BSONObj x = BSON( "a" << 1 << "b" << 1 << "c" << 2 );
             BSONElement a = x["a"];
             BSONElement b = x["b"];
@@ -1512,7 +1565,7 @@ namespace JsobjTests {
             cout << "c: " << c << endl;
             ASSERT( a.woCompare( b ) != 0 );
             ASSERT( a.woCompare( b , false ) == 0 );
-            
+
             BSONElementSet s;
             s.insert( a );
             ASSERT_EQUALS( 1U , s.size() );
@@ -1523,8 +1576,8 @@ namespace JsobjTests {
             ASSERT( s.find( a ) != s.end() );
             ASSERT( s.find( b ) != s.end() );
             ASSERT( s.find( c ) == s.end() );
-                    
-            
+
+
             s.insert( c );
             ASSERT_EQUALS( 2U , s.size() );
 
@@ -1536,12 +1589,22 @@ namespace JsobjTests {
             ASSERT( s.count( a ) );
             ASSERT( s.count( b ) );
             ASSERT( s.count( c ) );
+
+            {
+                BSONElementSet x;
+                BSONObj o = fromjson( "{ 'a' : [ 1 , 2 , 1 ] }" );
+                BSONObjIterator i( o["a"].embeddedObjectUserCheck() );
+                while ( i.more() ) {
+                    x.insert( i.next() );
+                }
+                ASSERT_EQUALS( 2U , x.size() );
+            }
         }
     };
 
     class EmbeddedNumbers {
     public:
-        void run(){
+        void run() {
             BSONObj x = BSON( "a" << BSON( "b" << 1 ) );
             BSONObj y = BSON( "a" << BSON( "b" << 1.0 ) );
             ASSERT_EQUALS( x , y );
@@ -1551,12 +1614,12 @@ namespace JsobjTests {
 
     class BuilderPartialItearte {
     public:
-        void run(){
+        void run() {
             {
                 BSONObjBuilder b;
                 b.append( "x" , 1 );
                 b.append( "y" , 2 );
-                
+
                 BSONObjIterator i = b.iterator();
                 ASSERT( i.more() );
                 ASSERT_EQUALS( 1 , i.next().numberInt() );
@@ -1577,13 +1640,13 @@ namespace JsobjTests {
 
                 ASSERT_EQUALS( BSON( "x" << 1 << "y" << 2 << "z" << 3 ) , b.obj() );
             }
-                
+
         }
     };
 
     class BSONFieldTests {
     public:
-        void run(){
+        void run() {
             {
                 BSONField<int> x("x");
                 BSONObj o = BSON( x << 5 );
@@ -1610,11 +1673,11 @@ namespace JsobjTests {
 
     class BSONForEachTest {
     public:
-        void run(){
+        void run() {
             BSONObj obj = BSON("a" << 1 << "a" << 2 << "a" << 3);
-            
+
             int count = 0;
-            BSONForEach(e, obj){
+            BSONForEach(e, obj) {
                 ASSERT_EQUALS( e.fieldName() , string("a") );
                 count += e.Int();
             }
@@ -1625,7 +1688,7 @@ namespace JsobjTests {
 
     class StringDataTest {
     public:
-        void run(){
+        void run() {
             StringData a( string( "aaa" ) );
             ASSERT_EQUALS( 3u , a.size() );
 
@@ -1645,8 +1708,8 @@ namespace JsobjTests {
 
     class CompareOps {
     public:
-        void run(){
-            
+        void run() {
+
             BSONObj a = BSON("a"<<1);
             BSONObj b = BSON("a"<<1);
             BSONObj c = BSON("a"<<2);
@@ -1657,7 +1720,7 @@ namespace JsobjTests {
             ASSERT( ! ( a < b ) );
             ASSERT( a <= b );
             ASSERT( a < c );
-            
+
             ASSERT( f > d );
             ASSERT( f >= e );
             ASSERT( ! ( f > e ) );
@@ -1666,12 +1729,12 @@ namespace JsobjTests {
 
     class HashingTest {
     public:
-        void run(){
+        void run() {
             int N = 100000;
-            BSONObj x = BSON( "name" << "eliot was here" 
+            BSONObj x = BSON( "name" << "eliot was here"
                               << "x" << 5
                               << "asdasdasdas" << "asldkasldjasldjasldjlasjdlasjdlasdasdasdasdasdasdasd" );
-            
+
             {
                 Timer t;
                 for ( int i=0; i<N; i++ )
@@ -1679,7 +1742,7 @@ namespace JsobjTests {
                 int millis = t.millis();
                 cout << "md5 : " << millis << endl;
             }
-            
+
             {
                 Timer t;
                 for ( int i=0; i<N; i++ )
@@ -1694,17 +1757,17 @@ namespace JsobjTests {
                     checksum( x.objdata() , x.objsize() );
                 int millis = t.millis();
                 cout << "checksum : " << millis << endl;
-            }                
-                
+            }
+
         }
     };
 
     class All : public Suite {
     public:
-        All() : Suite( "jsobj" ){
+        All() : Suite( "jsobj" ) {
         }
 
-        void setupTests(){
+        void setupTests() {
             add< BufBuilderBasic >();
             add< BSONElementBasic >();
             add< BSONObjTests::Create >();
@@ -1724,6 +1787,10 @@ namespace JsobjTests {
             add< BSONObjTests::ToStringArray >();
             add< BSONObjTests::ToStringNumber >();
             add< BSONObjTests::NullString >();
+            add< BSONObjTests::AppendAs >();
+            add< BSONObjTests::ArrayAppendAs >();
+            add< BSONObjTests::GetField >();
+
             add< BSONObjTests::Validation::BadType >();
             add< BSONObjTests::Validation::EooBeforeEnd >();
             add< BSONObjTests::Validation::Undefined >();
@@ -1771,16 +1838,13 @@ namespace JsobjTests {
             add< ValueStreamTests::LabelishOr >();
             add< ValueStreamTests::Unallowed >();
             add< ValueStreamTests::ElementAppend >();
-            add< SubObjectBuilder >();
-            add< DateBuilder >();
-            add< DateNowBuilder >();
-            add< TimeTBuilder >();
             add< ValueStreamTests::Unallowed >();
             add< ValueStreamTests::ElementAppend >();
             add< SubObjectBuilder >();
             add< DateBuilder >();
             add< DateNowBuilder >();
             add< TimeTBuilder >();
+            add< MinMaxKeyBuilder >();
             add< MinMaxElementTest >();
             add< ComparatorTest >();
             add< ExtractFieldsTest >();
@@ -1810,6 +1874,6 @@ namespace JsobjTests {
             add< HashingTest >();
         }
     } myall;
-    
+
 } // namespace JsobjTests
 
diff --git a/dbtests/jsontests.cpp b/dbtests/jsontests.cpp
index 990558e..b630523 100644
--- a/dbtests/jsontests.cpp
+++ b/dbtests/jsontests.cpp
@@ -205,11 +205,11 @@ namespace JsonTests {
                 b.appendDBRef( "a", "namespace", oid );
                 BSONObj built = b.done();
                 ASSERT_EQUALS( "{ \"a\" : { \"$ref\" : \"namespace\", \"$id\" : \"ffffffffffffffffffffffff\" } }",
-                              built.jsonString( Strict ) );
+                               built.jsonString( Strict ) );
                 ASSERT_EQUALS( "{ \"a\" : { \"$ref\" : \"namespace\", \"$id\" : \"ffffffffffffffffffffffff\" } }",
-                              built.jsonString( JS ) );
+                               built.jsonString( JS ) );
                 ASSERT_EQUALS( "{ \"a\" : Dbref( \"namespace\", \"ffffffffffffffffffffffff\" ) }",
-                              built.jsonString( TenGen ) );
+                               built.jsonString( TenGen ) );
             }
         };
 
@@ -221,7 +221,7 @@ namespace JsonTests {
                 BSONObjBuilder b;
                 b.appendDBRef( "a", "namespace", oid );
                 ASSERT_EQUALS( "{ \"a\" : { \"$ref\" : \"namespace\", \"$id\" : \"000000000000000000000000\" } }",
-                              b.done().jsonString( Strict ) );
+                               b.done().jsonString( Strict ) );
             }
         };
 
@@ -234,9 +234,9 @@ namespace JsonTests {
                 b.appendOID( "a", &oid );
                 BSONObj built = b.done();
                 ASSERT_EQUALS( "{ \"a\" : { \"$oid\" : \"ffffffffffffffffffffffff\" } }",
-                              built.jsonString( Strict ) );
+                               built.jsonString( Strict ) );
                 ASSERT_EQUALS( "{ \"a\" : ObjectId( \"ffffffffffffffffffffffff\" ) }",
-                              built.jsonString( TenGen ) );
+                               built.jsonString( TenGen ) );
             }
         };
 
@@ -258,12 +258,12 @@ namespace JsonTests {
                 BSONObjBuilder c;
                 c.appendBinData( "a", 2, BinDataGeneral, z );
                 ASSERT_EQUALS( "{ \"a\" : { \"$binary\" : \"YWI=\", \"$type\" : \"00\" } }",
-                              c.done().jsonString( Strict ) );
+                               c.done().jsonString( Strict ) );
 
                 BSONObjBuilder d;
                 d.appendBinData( "a", 1, BinDataGeneral, z );
                 ASSERT_EQUALS( "{ \"a\" : { \"$binary\" : \"YQ==\", \"$type\" : \"00\" } }",
-                              d.done().jsonString( Strict ) );
+                               d.done().jsonString( Strict ) );
             }
         };
 
@@ -295,7 +295,7 @@ namespace JsonTests {
                 b.appendRegex( "a", "abc", "i" );
                 BSONObj built = b.done();
                 ASSERT_EQUALS( "{ \"a\" : { \"$regex\" : \"abc\", \"$options\" : \"i\" } }",
-                              built.jsonString( Strict ) );
+                               built.jsonString( Strict ) );
                 ASSERT_EQUALS( "{ \"a\" : /abc/i }", built.jsonString( TenGen ) );
                 ASSERT_EQUALS( "{ \"a\" : /abc/i }", built.jsonString( JS ) );
             }
@@ -308,7 +308,7 @@ namespace JsonTests {
                 b.appendRegex( "a", "/\"", "i" );
                 BSONObj built = b.done();
                 ASSERT_EQUALS( "{ \"a\" : { \"$regex\" : \"/\\\"\", \"$options\" : \"i\" } }",
-                              built.jsonString( Strict ) );
+                               built.jsonString( Strict ) );
                 ASSERT_EQUALS( "{ \"a\" : /\\/\\\"/i }", built.jsonString( TenGen ) );
                 ASSERT_EQUALS( "{ \"a\" : /\\/\\\"/i }", built.jsonString( JS ) );
             }
@@ -321,7 +321,7 @@ namespace JsonTests {
                 b.appendRegex( "a", "z", "abcgimx" );
                 BSONObj built = b.done();
                 ASSERT_EQUALS( "{ \"a\" : { \"$regex\" : \"z\", \"$options\" : \"abcgimx\" } }",
-                              built.jsonString( Strict ) );
+                               built.jsonString( Strict ) );
                 ASSERT_EQUALS( "{ \"a\" : /z/gim }", built.jsonString( TenGen ) );
                 ASSERT_EQUALS( "{ \"a\" : /z/gim }", built.jsonString( JS ) );
             }
@@ -329,17 +329,17 @@ namespace JsonTests {
 
         class CodeTests {
         public:
-            void run(){
+            void run() {
                 BSONObjBuilder b;
                 b.appendCode( "x" , "function(){ return 1; }" );
                 BSONObj o = b.obj();
                 ASSERT_EQUALS( "{ \"x\" : function(){ return 1; } }" , o.jsonString() );
             }
         };
-        
+
         class TimestampTests {
         public:
-            void run(){
+            void run() {
                 BSONObjBuilder b;
                 b.appendTimestamp( "x" , 4000 , 10 );
                 BSONObj o = b.obj();
@@ -349,7 +349,7 @@ namespace JsonTests {
 
         class NullString {
         public:
-            void run(){
+            void run() {
                 BSONObjBuilder b;
                 b.append( "x" , "a\0b" , 4 );
                 BSONObj o = b.obj();
@@ -359,7 +359,7 @@ namespace JsonTests {
 
         class AllTypes {
         public:
-            void run(){
+            void run() {
                 OID oid;
                 oid.init();
 
@@ -384,12 +384,12 @@ namespace JsonTests {
                 b.appendTimestamp( "s" , 123123123123123LL );
                 b.append( "t" , 12321312312LL );
                 b.appendMaxKey( "u" );
-                
+
                 BSONObj o = b.obj();
                 cout << o.jsonString() << endl;
             }
         };
-        
+
     } // namespace JsonStringTests
 
     namespace FromJsonTests {
@@ -504,7 +504,7 @@ namespace JsonTests {
             virtual ~FancyNumber() {}
             void run() {
                 ASSERT_EQUALS( int( 1000000 * bson().firstElement().number() ),
-                              int( 1000000 * fromjson( json() ).firstElement().number() ) );
+                               int( 1000000 * fromjson( json() ).firstElement().number() ) );
             }
             virtual BSONObj bson() const {
                 BSONObjBuilder b;
@@ -978,8 +978,8 @@ namespace JsonTests {
         };
 
         class NumericTypes : public Base {
-            public:
-            void run(){
+        public:
+            void run() {
                 Base::run();
 
                 BSONObj o = fromjson(json());
@@ -990,12 +990,12 @@ namespace JsonTests {
 
                 ASSERT(o["long"].numberLong() == 9223372036854775807ll);
             }
-            
+
             virtual BSONObj bson() const {
                 return BSON( "int" << 123
-                          << "long" << 9223372036854775807ll // 2**63 - 1
-                          << "double" << 3.14
-                          );
+                             << "long" << 9223372036854775807ll // 2**63 - 1
+                             << "double" << 3.14
+                           );
             }
             virtual string json() const {
                 return "{ \"int\": 123, \"long\": 9223372036854775807, \"double\": 3.14 }";
@@ -1003,8 +1003,8 @@ namespace JsonTests {
         };
 
         class NegativeNumericTypes : public Base {
-            public:
-            void run(){
+        public:
+            void run() {
                 Base::run();
 
                 BSONObj o = fromjson(json());
@@ -1015,12 +1015,12 @@ namespace JsonTests {
 
                 ASSERT(o["long"].numberLong() == -9223372036854775807ll);
             }
-            
+
             virtual BSONObj bson() const {
                 return BSON( "int" << -123
-                          << "long" << -9223372036854775807ll // -1 * (2**63 - 1)
-                          << "double" << -3.14
-                          );
+                             << "long" << -9223372036854775807ll // -1 * (2**63 - 1)
+                             << "double" << -3.14
+                           );
             }
             virtual string json() const {
                 return "{ \"int\": -123, \"long\": -9223372036854775807, \"double\": -3.14 }";
@@ -1029,8 +1029,8 @@ namespace JsonTests {
 
         class EmbeddedDatesBase : public Base  {
         public:
-            
-            virtual void run(){
+
+            virtual void run() {
                 BSONObj o = fromjson( json() );
                 ASSERT_EQUALS( 3 , (o["time.valid"].type()) );
                 BSONObj e = o["time.valid"].embeddedObjectUserCheck();
@@ -1038,7 +1038,7 @@ namespace JsonTests {
                 ASSERT_EQUALS( 9 , e["$lt"].type() );
                 Base::run();
             }
-            
+
             BSONObj bson() const {
                 BSONObjBuilder e;
                 e.appendDate( "$gt" , 1257829200000LL );
@@ -1082,10 +1082,10 @@ namespace JsonTests {
 
     class All : public Suite {
     public:
-        All() : Suite( "json" ){
+        All() : Suite( "json" ) {
         }
 
-        void setupTests(){
+        void setupTests() {
             add< JsonStringTests::Empty >();
             add< JsonStringTests::SingleStringMember >();
             add< JsonStringTests::EscapedCharacters >();
@@ -1116,7 +1116,7 @@ namespace JsonTests {
             add< JsonStringTests::TimestampTests >();
             add< JsonStringTests::NullString >();
             add< JsonStringTests::AllTypes >();
-            
+
             add< FromJsonTests::Empty >();
             add< FromJsonTests::EmptyWithSpace >();
             add< FromJsonTests::SingleString >();
diff --git a/dbtests/jstests.cpp b/dbtests/jstests.cpp
index a9d9db8..c33b200 100644
--- a/dbtests/jstests.cpp
+++ b/dbtests/jstests.cpp
@@ -1,4 +1,4 @@
-// javajstests.cpp 
+// javajstests.cpp
 //
 
 /**
@@ -22,15 +22,16 @@
 
 #include "../pch.h"
 #include "../scripting/engine.h"
+#include "../util/timer.h"
 
 #include "dbtests.h"
 
 namespace mongo {
-    bool dbEval(const char *ns, BSONObj& cmd, BSONObjBuilder& result, string& errmsg);
+    bool dbEval(const string& dbName , BSONObj& cmd, BSONObjBuilder& result, string& errmsg);
 } // namespace mongo
 
 namespace JSTests {
-    
+
     class Fundamental {
     public:
         void run() {
@@ -42,26 +43,26 @@ namespace JSTests {
             globalScriptEngine->runTest();
         }
     };
-    
+
     class BasicScope {
     public:
-        void run(){
+        void run() {
             auto_ptr<Scope> s;
             s.reset( globalScriptEngine->newScope() );
 
             s->setNumber( "x" , 5 );
             ASSERT( 5 == s->getNumber( "x" ) );
-            
+
             s->setNumber( "x" , 1.67 );
             ASSERT( 1.67 == s->getNumber( "x" ) );
 
             s->setString( "s" , "eliot was here" );
             ASSERT( "eliot was here" == s->getString( "s" ) );
-            
+
             s->setBoolean( "b" , true );
             ASSERT( s->getBoolean( "b" ) );
 
-            if ( 0 ){
+            if ( 0 ) {
                 s->setBoolean( "b" , false );
                 ASSERT( ! s->getBoolean( "b" ) );
             }
@@ -70,12 +71,12 @@ namespace JSTests {
 
     class ResetScope {
     public:
-        void run(){
+        void run() {
             // Not worrying about this for now SERVER-446.
             /*
             auto_ptr<Scope> s;
             s.reset( globalScriptEngine->newScope() );
-            
+
             s->setBoolean( "x" , true );
             ASSERT( s->getBoolean( "x" ) );
 
@@ -84,36 +85,36 @@ namespace JSTests {
             */
         }
     };
-    
+
     class FalseTests {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
 
             ASSERT( ! s->getBoolean( "x" ) );
-            
+
             s->setString( "z" , "" );
             ASSERT( ! s->getBoolean( "z" ) );
-            
-            
+
+
             delete s ;
         }
     };
 
     class SimpleFunctions {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
 
             s->invoke( "x=5;" , BSONObj() );
             ASSERT( 5 == s->getNumber( "x" ) );
-            
+
             s->invoke( "return 17;" , BSONObj() );
             ASSERT( 17 == s->getNumber( "return" ) );
-            
+
             s->invoke( "function(){ return 17; }" , BSONObj() );
             ASSERT( 17 == s->getNumber( "return" ) );
-            
+
             s->setNumber( "x" , 1.76 );
             s->invoke( "return x == 1.76; " , BSONObj() );
             ASSERT( s->getBoolean( "return" ) );
@@ -121,7 +122,7 @@ namespace JSTests {
             s->setNumber( "x" , 1.76 );
             s->invoke( "return x == 1.79; " , BSONObj() );
             ASSERT( ! s->getBoolean( "return" ) );
-            
+
             s->invoke( "function( z ){ return 5 + z; }" , BSON( "" << 11 ) );
             ASSERT_EQUALS( 16 , s->getNumber( "return" ) );
 
@@ -131,9 +132,9 @@ namespace JSTests {
 
     class ObjectMapping {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
-            
+
             BSONObj o = BSON( "x" << 17 << "y" << "eliot" << "z" << "sara" );
             s->setObject( "blah" , o );
 
@@ -154,7 +155,7 @@ namespace JSTests {
 
             s->invoke( "this.z == 'asara';" , BSONObj() );
             ASSERT_EQUALS( false , s->getBoolean( "return" ) );
-            
+
             s->invoke( "return this.x == 17;" , BSONObj() );
             ASSERT_EQUALS( true , s->getBoolean( "return" ) );
 
@@ -169,28 +170,28 @@ namespace JSTests {
 
             s->invoke( "function (){ return this.x == 17; }" , BSONObj() );
             ASSERT_EQUALS( true , s->getBoolean( "return" ) );
-            
+
             s->invoke( "function z(){ return this.x == 18; }" , BSONObj() );
             ASSERT_EQUALS( false , s->getBoolean( "return" ) );
 
             s->invoke( "function (){ this.x == 17; }" , BSONObj() );
             ASSERT_EQUALS( false , s->getBoolean( "return" ) );
-            
+
             s->invoke( "function z(){ this.x == 18; }" , BSONObj() );
             ASSERT_EQUALS( false , s->getBoolean( "return" ) );
 
             s->invoke( "x = 5; for( ; x <10; x++){ a = 1; }" , BSONObj() );
             ASSERT_EQUALS( 10 , s->getNumber( "x" ) );
-            
+
             delete s;
         }
     };
 
     class ObjectDecoding {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
-            
+
             s->invoke( "z = { num : 1 };" , BSONObj() );
             BSONObj out = s->getObject( "z" );
             ASSERT_EQUALS( 1 , out["num"].number() );
@@ -200,43 +201,43 @@ namespace JSTests {
             out = s->getObject( "z" );
             ASSERT_EQUALS( (string)"eliot" , out["x"].valuestr() );
             ASSERT_EQUALS( 1 , out.nFields() );
-                           
+
             BSONObj o = BSON( "x" << 17 );
-            s->setObject( "blah" , o );   
+            s->setObject( "blah" , o );
             out = s->getObject( "blah" );
             ASSERT_EQUALS( 17 , out["x"].number() );
-            
+
             delete s;
         }
     };
-    
+
     class JSOIDTests {
     public:
-        void run(){
+        void run() {
 #ifdef MOZJS
             Scope * s = globalScriptEngine->newScope();
-            
+
             s->localConnect( "blah" );
-            
+
             s->invoke( "z = { _id : new ObjectId() , a : 123 };" , BSONObj() );
             BSONObj out = s->getObject( "z" );
             ASSERT_EQUALS( 123 , out["a"].number() );
             ASSERT_EQUALS( jstOID , out["_id"].type() );
-            
+
             OID save = out["_id"].__oid();
-            
+
             s->setObject( "a" , out );
-            
-            s->invoke( "y = { _id : a._id , a : 124 };" , BSONObj() );            
+
+            s->invoke( "y = { _id : a._id , a : 124 };" , BSONObj() );
             out = s->getObject( "y" );
             ASSERT_EQUALS( 124 , out["a"].number() );
-            ASSERT_EQUALS( jstOID , out["_id"].type() );            
+            ASSERT_EQUALS( jstOID , out["_id"].type() );
             ASSERT_EQUALS( out["_id"].__oid().str() , save.str() );
 
-            s->invoke( "y = { _id : new ObjectId( a._id ) , a : 125 };" , BSONObj() );            
+            s->invoke( "y = { _id : new ObjectId( a._id ) , a : 125 };" , BSONObj() );
             out = s->getObject( "y" );
             ASSERT_EQUALS( 125 , out["a"].number() );
-            ASSERT_EQUALS( jstOID , out["_id"].type() );            
+            ASSERT_EQUALS( jstOID , out["_id"].type() );
             ASSERT_EQUALS( out["_id"].__oid().str() , save.str() );
 
             delete s;
@@ -267,9 +268,9 @@ namespace JSTests {
 
     class ObjectModReadonlyTests {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
-            
+
             BSONObj o = BSON( "x" << 17 << "y" << "eliot" << "z" << "sara" << "zz" << BSONObj() );
             s->setObject( "blah" , o , true );
 
@@ -288,16 +289,16 @@ namespace JSTests {
             s->setObject( "blah.zz", BSON( "a" << 19 ) );
             out = s->getObject( "blah" );
             ASSERT( out["zz"].embeddedObject()["a"].eoo() );
-            
+
             s->invoke( "delete blah['x']" , BSONObj() );
             out = s->getObject( "blah" );
             ASSERT( !out["x"].eoo() );
-            
+
             // read-only object itself can be overwritten
             s->invoke( "blah = {}", BSONObj() );
             out = s->getObject( "blah" );
             ASSERT( out.isEmpty() );
-            
+
             // test array - can't implement this in v8
 //            o = fromjson( "{a:[1,2,3]}" );
 //            s->setObject( "blah", o, true );
@@ -307,45 +308,47 @@ namespace JSTests {
 //            out = s->getObject( "blah" );
 //            ASSERT_EQUALS( 1.0, out[ "a" ].embeddedObject()[ 0 ].number() );
 //            ASSERT_EQUALS( 3.0, out[ "a" ].embeddedObject()[ 2 ].number() );
-            
+
             delete s;
         }
     };
 
     class OtherJSTypes {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
-            
-            { // date
+
+            {
+                // date
                 BSONObj o;
-                { 
+                {
                     BSONObjBuilder b;
                     b.appendDate( "d" , 123456789 );
                     o = b.obj();
                 }
                 s->setObject( "x" , o );
-                
+
                 s->invoke( "return x.d.getTime() != 12;" , BSONObj() );
                 ASSERT_EQUALS( true, s->getBoolean( "return" ) );
-                
+
                 s->invoke( "z = x.d.getTime();" , BSONObj() );
                 ASSERT_EQUALS( 123456789 , s->getNumber( "z" ) );
-                
+
                 s->invoke( "z = { z : x.d }" , BSONObj() );
                 BSONObj out = s->getObject( "z" );
                 ASSERT( out["z"].type() == Date );
             }
 
-            { // regex
+            {
+                // regex
                 BSONObj o;
-                { 
+                {
                     BSONObjBuilder b;
                     b.appendRegex( "r" , "^a" , "i" );
                     o = b.obj();
                 }
                 s->setObject( "x" , o );
-                
+
                 s->invoke( "z = x.r.test( 'b' );" , BSONObj() );
                 ASSERT_EQUALS( false , s->getBoolean( "z" ) );
 
@@ -362,26 +365,26 @@ namespace JSTests {
                 ASSERT_EQUALS( (string)"i" , out["a"].regexFlags() );
 
             }
-            
+
             // array
             {
                 BSONObj o = fromjson( "{r:[1,2,3]}" );
-                s->setObject( "x", o, false );                
+                s->setObject( "x", o, false );
                 BSONObj out = s->getObject( "x" );
                 ASSERT_EQUALS( Array, out.firstElement().type() );
 
-                s->setObject( "x", o, true );                
+                s->setObject( "x", o, true );
                 out = s->getObject( "x" );
                 ASSERT_EQUALS( Array, out.firstElement().type() );
             }
-            
+
             delete s;
         }
     };
 
     class SpecialDBTypes {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
 
             BSONObjBuilder b;
@@ -389,7 +392,7 @@ namespace JSTests {
             b.appendMinKey( "b" );
             b.appendMaxKey( "c" );
             b.appendTimestamp( "d" , 1234000 , 9876 );
-            
+
 
             {
                 BSONObj t = b.done();
@@ -398,7 +401,7 @@ namespace JSTests {
             }
 
             s->setObject( "z" , b.obj() );
-            
+
             ASSERT( s->invoke( "y = { a : z.a , b : z.b , c : z.c , d: z.d }" , BSONObj() ) == 0 );
 
             BSONObj out = s->getObject( "y" );
@@ -414,14 +417,14 @@ namespace JSTests {
             delete s;
         }
     };
-    
+
     class TypeConservation {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
-            
+
             //  --  A  --
-            
+
             BSONObj o;
             {
                 BSONObjBuilder b ;
@@ -431,7 +434,7 @@ namespace JSTests {
             }
             ASSERT_EQUALS( NumberInt , o["a"].type() );
             ASSERT_EQUALS( NumberDouble , o["b"].type() );
-            
+
             s->setObject( "z" , o );
             s->invoke( "return z" , BSONObj() );
             BSONObj out = s->getObject( "return" );
@@ -442,7 +445,7 @@ namespace JSTests {
             ASSERT_EQUALS( NumberInt , out["a"].type() );
 
             //  --  B  --
-            
+
             {
                 BSONObjBuilder b ;
                 b.append( "a" , (int)5 );
@@ -459,31 +462,31 @@ namespace JSTests {
             ASSERT_EQUALS( NumberDouble , out["b"].type() );
             ASSERT_EQUALS( NumberInt , out["a"].type() );
 
-            
+
             //  -- C --
-            
+
             {
                 BSONObjBuilder b ;
-                
+
                 {
                     BSONObjBuilder c;
                     c.append( "0" , 5.5 );
                     c.append( "1" , 6 );
                     b.appendArray( "a" , c.obj() );
                 }
-                
+
                 o = b.obj();
             }
-            
+
             ASSERT_EQUALS( NumberDouble , o["a"].embeddedObjectUserCheck()["0"].type() );
             ASSERT_EQUALS( NumberInt , o["a"].embeddedObjectUserCheck()["1"].type() );
-            
+
             s->setObject( "z" , o , false );
             out = s->getObject( "z" );
 
             ASSERT_EQUALS( NumberDouble , out["a"].embeddedObjectUserCheck()["0"].type() );
             ASSERT_EQUALS( NumberInt , out["a"].embeddedObjectUserCheck()["1"].type() );
-            
+
             s->invokeSafe( "z.z = 5;" , BSONObj() );
             out = s->getObject( "z" );
             ASSERT_EQUALS( 5 , out["z"].number() );
@@ -493,9 +496,9 @@ namespace JSTests {
 
 
             // Eliot says I don't have to worry about this case
-            
+
 //            // -- D --
-//            
+//
 //            o = fromjson( "{a:3.0,b:4.5}" );
 //            ASSERT_EQUALS( NumberDouble , o["a"].type() );
 //            ASSERT_EQUALS( NumberDouble , o["b"].type() );
@@ -505,20 +508,20 @@ namespace JSTests {
 //            out = s->getObject( "return" );
 //            ASSERT_EQUALS( 3 , out["a"].number() );
 //            ASSERT_EQUALS( 4.5 , out["b"].number() );
-//            
+//
 //            ASSERT_EQUALS( NumberDouble , out["b"].type() );
 //            ASSERT_EQUALS( NumberDouble , out["a"].type() );
-//            
-            
+//
+
             delete s;
         }
-        
+
     };
-    
+
     class NumberLong {
     public:
         void run() {
-            Scope * s = globalScriptEngine->newScope();
+            auto_ptr<Scope> s( globalScriptEngine->newScope() );
             s->localConnect( "blah" );
             BSONObjBuilder b;
             long long val = (long long)( 0xbabadeadbeefbaddULL );
@@ -527,7 +530,7 @@ namespace JSTests {
             s->setObject( "a", in );
             BSONObj out = s->getObject( "a" );
             ASSERT_EQUALS( mongo::NumberLong, out.firstElement().type() );
-            
+
             ASSERT( s->exec( "printjson( a ); b = {b:a.a}", "foo", false, true, false ) );
             out = s->getObject( "b" );
             ASSERT_EQUALS( mongo::NumberLong, out.firstElement().type() );
@@ -537,7 +540,7 @@ namespace JSTests {
                 cout << out.toString() << endl;
                 ASSERT_EQUALS( val, out.firstElement().numberLong() );
             }
-            
+
             ASSERT( s->exec( "c = {c:a.a.toString()}", "foo", false, true, false ) );
             out = s->getObject( "c" );
             stringstream ss;
@@ -552,12 +555,12 @@ namespace JSTests {
             ASSERT( s->exec( "e = {e:a.a.floatApprox}", "foo", false, true, false ) );
             out = s->getObject( "e" );
             ASSERT_EQUALS( NumberDouble, out.firstElement().type() );
-            ASSERT_EQUALS( double( val ), out.firstElement().number() );     
+            ASSERT_EQUALS( double( val ), out.firstElement().number() );
 
             ASSERT( s->exec( "f = {f:a.a.top}", "foo", false, true, false ) );
             out = s->getObject( "f" );
             ASSERT( NumberDouble == out.firstElement().type() || NumberInt == out.firstElement().type() );
-            
+
             s->setObject( "z", BSON( "z" << (long long)( 4 ) ) );
             ASSERT( s->exec( "y = {y:z.z.top}", "foo", false, true, false ) );
             out = s->getObject( "y" );
@@ -566,36 +569,64 @@ namespace JSTests {
             ASSERT( s->exec( "x = {x:z.z.floatApprox}", "foo", false, true, false ) );
             out = s->getObject( "x" );
             ASSERT( NumberDouble == out.firstElement().type() || NumberInt == out.firstElement().type() );
-            ASSERT_EQUALS( double( 4 ), out.firstElement().number() );     
+            ASSERT_EQUALS( double( 4 ), out.firstElement().number() );
 
             ASSERT( s->exec( "w = {w:z.z}", "foo", false, true, false ) );
             out = s->getObject( "w" );
             ASSERT_EQUALS( mongo::NumberLong, out.firstElement().type() );
-            ASSERT_EQUALS( 4, out.firstElement().numberLong() );     
-            
+            ASSERT_EQUALS( 4, out.firstElement().numberLong() );
+
         }
     };
-    
+
+    class NumberLong2 {
+    public:
+        void run() {
+            auto_ptr<Scope> s( globalScriptEngine->newScope() );
+            s->localConnect( "blah" );
+
+            BSONObj in;
+            {
+                BSONObjBuilder b;
+                b.append( "a" , 5 );
+                b.append( "b" , (long long)5 );
+                b.append( "c" , (long long)pow( 2.0, 29 ) );
+                b.append( "d" , (long long)pow( 2.0, 30 ) );
+                b.append( "e" , (long long)pow( 2.0, 31 ) );
+                b.append( "f" , (long long)pow( 2.0, 45 ) );
+                in = b.obj();
+            }
+            s->setObject( "a" , in );
+
+            ASSERT( s->exec( "x = tojson( a ); " ,"foo" , false , true , false ) );
+            string outString = s->getString( "x" );
+
+            ASSERT( s->exec( (string)"y = " + outString , "foo2" , false , true , false ) );
+            BSONObj out = s->getObject( "y" );
+            ASSERT_EQUALS( in , out );
+        }
+    };
+
     class WeirdObjects {
     public:
 
-        BSONObj build( int depth ){
+        BSONObj build( int depth ) {
             BSONObjBuilder b;
             b.append( "0" , depth );
             if ( depth > 0 )
                 b.appendArray( "1" , build( depth - 1 ) );
             return b.obj();
         }
-        
-        void run(){
+
+        void run() {
             Scope * s = globalScriptEngine->newScope();
 
             s->localConnect( "blah" );
-            
-            for ( int i=5; i<100 ; i += 10 ){
+
+            for ( int i=5; i<100 ; i += 10 ) {
                 s->setObject( "a" , build(i) , false );
                 s->invokeSafe( "tojson( a )" , BSONObj() );
-                
+
                 s->setObject( "a" , build(5) , true );
                 s->invokeSafe( "tojson( a )" , BSONObj() );
             }
@@ -609,11 +640,12 @@ namespace JSTests {
         BSONObj cmd;
         BSONObjBuilder result;
         string errmsg;
-        dbEval( "", cmd, result, errmsg);
+        dbEval( "test", cmd, result, errmsg);
+        assert(0);
     }
 
     DBDirectClient client;
-    
+
     class Utf8Check {
     public:
         Utf8Check() { reset(); }
@@ -638,7 +670,7 @@ namespace JSTests {
         }
         void reset() {
             client.dropCollection( ns() );
-        }        
+        }
         static const char *ns() { return "unittest.jstests.utf8check"; }
     };
 
@@ -654,13 +686,13 @@ namespace JSTests {
     private:
         void reset() {
             client.dropCollection( ns() );
-        }        
+        }
         static const char *ns() { return "unittest.jstests.longutf8string"; }
     };
 
     class InvalidUTF8Check {
     public:
-        void run(){
+        void run() {
             if( !globalScriptEngine->utf8Ok() )
                 return;
 
@@ -676,24 +708,24 @@ namespace JSTests {
                 crap[2] = (char) 128;
                 crap[3] = 17;
                 crap[4] = 0;
-                
+
                 BSONObjBuilder bb;
                 bb.append( "x" , crap );
                 b = bb.obj();
             }
-            
+
             //cout << "ELIOT: " << b.jsonString() << endl;
             s->setThis( &b );
             // its ok  if this is handled by js, just can't create a c++ exception
-            s->invoke( "x=this.x.length;" , BSONObj() ); 
+            s->invoke( "x=this.x.length;" , BSONObj() );
         }
     };
-    
+
     class CodeTests {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
-            
+
             {
                 BSONObjBuilder b;
                 b.append( "a" , 1 );
@@ -702,10 +734,10 @@ namespace JSTests {
                 b.appendCodeWScope( "d" , "function(){ out.d = 13 + bleh; }" , BSON( "bleh" << 5 ) );
                 s->setObject( "foo" , b.obj() );
             }
-            
+
             s->invokeSafe( "out = {}; out.a = foo.a; foo.b(); foo.c();" , BSONObj() );
             BSONObj out = s->getObject( "out" );
-            
+
             ASSERT_EQUALS( 1 , out["a"].number() );
             ASSERT_EQUALS( 11 , out["b"].number() );
             ASSERT_EQUALS( 12 , out["c"].number() );
@@ -714,7 +746,7 @@ namespace JSTests {
             //s->invokeSafe( "foo.d() " , BSONObj() );
             //out = s->getObject( "out" );
             //ASSERT_EQUALS( 18 , out["d"].number() );
-            
+
 
             delete s;
         }
@@ -722,19 +754,19 @@ namespace JSTests {
 
     class DBRefTest {
     public:
-        DBRefTest(){
+        DBRefTest() {
             _a = "unittest.dbref.a";
             _b = "unittest.dbref.b";
             reset();
         }
-        ~DBRefTest(){
+        ~DBRefTest() {
             //reset();
         }
-        
-        void run(){
+
+        void run() {
 
             client.insert( _a , BSON( "a" << "17" ) );
-            
+
             {
                 BSONObj fromA = client.findOne( _a , BSONObj() );
                 assert( fromA.valid() );
@@ -744,28 +776,28 @@ namespace JSTests {
                 b.appendDBRef( "c" , "dbref.a" , fromA["_id"].__oid() );
                 client.insert( _b , b.obj() );
             }
-            
+
             ASSERT( client.eval( "unittest" , "x = db.dbref.b.findOne(); assert.eq( 17 , x.c.fetch().a , 'ref working' );" ) );
-            
+
             // BSON DBRef <=> JS DBPointer
             ASSERT( client.eval( "unittest", "x = db.dbref.b.findOne(); db.dbref.b.drop(); x.c = new DBPointer( x.c.ns, x.c.id ); db.dbref.b.insert( x );" ) );
             ASSERT_EQUALS( DBRef, client.findOne( "unittest.dbref.b", "" )[ "c" ].type() );
-            
+
             // BSON Object <=> JS DBRef
             ASSERT( client.eval( "unittest", "x = db.dbref.b.findOne(); db.dbref.b.drop(); x.c = new DBRef( x.c.ns, x.c.id ); db.dbref.b.insert( x );" ) );
             ASSERT_EQUALS( Object, client.findOne( "unittest.dbref.b", "" )[ "c" ].type() );
             ASSERT_EQUALS( string( "dbref.a" ), client.findOne( "unittest.dbref.b", "" )[ "c" ].embeddedObject().getStringField( "$ref" ) );
         }
-        
-        void reset(){
+
+        void reset() {
             client.dropCollection( _a );
             client.dropCollection( _b );
         }
-        
+
         const char * _a;
         const char * _b;
     };
-    
+
     class InformalDBRef {
     public:
         void run() {
@@ -775,20 +807,20 @@ namespace JSTests {
             client.insert( ns(), BSON( "r" << BSON( "$ref" << "jstests.informaldbref" << "$id" << obj["_id"].__oid() << "foo" << "bar" ) ) );
             obj = client.findOne( ns(), BSONObj() );
             ASSERT_EQUALS( "bar", obj[ "r" ].embeddedObject()[ "foo" ].str() );
-            
+
             ASSERT( client.eval( "unittest", "x = db.jstests.informaldbref.findOne(); y = { r:x.r }; db.jstests.informaldbref.drop(); y.r[ \"a\" ] = \"b\"; db.jstests.informaldbref.save( y );" ) );
             obj = client.findOne( ns(), BSONObj() );
-            ASSERT_EQUALS( "bar", obj[ "r" ].embeddedObject()[ "foo" ].str() );            
-            ASSERT_EQUALS( "b", obj[ "r" ].embeddedObject()[ "a" ].str() );            
+            ASSERT_EQUALS( "bar", obj[ "r" ].embeddedObject()[ "foo" ].str() );
+            ASSERT_EQUALS( "b", obj[ "r" ].embeddedObject()[ "a" ].str() );
         }
     private:
         static const char *ns() { return "unittest.jstests.informaldbref"; }
     };
-    
+
     class BinDataType {
     public:
-        
-        void pp( const char * s , BSONElement e ){
+
+        void pp( const char * s , BSONElement e ) {
             int len;
             const char * data = e.binData( len );
             cout << s << ":" << e.binDataType() << "\t" << len << endl;
@@ -798,12 +830,12 @@ namespace JSTests {
             cout << endl;
         }
 
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
             s->localConnect( "asd" );
             const char * foo = "asdas\0asdasd";
             const char * base64 = "YXNkYXMAYXNkYXNk";
-            
+
             BSONObj in;
             {
                 BSONObjBuilder b;
@@ -812,10 +844,10 @@ namespace JSTests {
                 in = b.obj();
                 s->setObject( "x" , in );
             }
-            
+
             s->invokeSafe( "myb = x.b; print( myb ); printjson( myb );" , BSONObj() );
             s->invokeSafe( "y = { c : myb };" , BSONObj() );
-            
+
             BSONObj out = s->getObject( "y" );
             ASSERT_EQUALS( BinData , out["c"].type() );
 //            pp( "in " , in["b"] );
@@ -827,14 +859,14 @@ namespace JSTests {
             stringstream expected;
             expected << "BinData(" << BinDataGeneral << ",\"" << base64 << "\")";
             ASSERT_EQUALS( expected.str(), s->getString( "q" ) );
-            
+
             stringstream scriptBuilder;
             scriptBuilder << "z = { c : new BinData( " << BinDataGeneral << ", \"" << base64 << "\" ) };";
             string script = scriptBuilder.str();
             s->invokeSafe( script.c_str(), BSONObj() );
             out = s->getObject( "z" );
 //            pp( "out" , out["c"] );
-            ASSERT_EQUALS( 0 , in["b"].woCompare( out["c"] , false ) );            
+            ASSERT_EQUALS( 0 , in["b"].woCompare( out["c"] , false ) );
 
             s->invokeSafe( "a = { f: new BinData( 128, \"\" ) };", BSONObj() );
             out = s->getObject( "a" );
@@ -842,16 +874,16 @@ namespace JSTests {
             out[ "f" ].binData( len );
             ASSERT_EQUALS( 0, len );
             ASSERT_EQUALS( 128, out[ "f" ].binDataType() );
-            
+
             delete s;
         }
     };
 
     class VarTests {
     public:
-        void run(){
+        void run() {
             Scope * s = globalScriptEngine->newScope();
-            
+
             ASSERT( s->exec( "a = 5;" , "a" , false , true , false ) );
             ASSERT_EQUALS( 5 , s->getNumber("a" ) );
 
@@ -863,19 +895,19 @@ namespace JSTests {
 
     class Speed1 {
     public:
-        void run(){
+        void run() {
             BSONObj start = BSON( "x" << 5 );
             BSONObj empty;
 
             auto_ptr<Scope> s;
             s.reset( globalScriptEngine->newScope() );
-            
+
             ScriptingFunction f = s->createFunction( "return this.x + 6;" );
             s->setThis( &start );
-            
+
             Timer t;
             double n = 0;
-            for ( ; n < 100000; n++ ){
+            for ( ; n < 100000; n++ ) {
                 s->invoke( f , empty );
                 ASSERT_EQUALS( 11 , s->getNumber( "return" ) );
             }
@@ -885,10 +917,10 @@ namespace JSTests {
 
     class ScopeOut {
     public:
-        void run(){
+        void run() {
             auto_ptr<Scope> s;
             s.reset( globalScriptEngine->newScope() );
-            
+
             s->invokeSafe( "x = 5;" , BSONObj() );
             {
                 BSONObjBuilder b;
@@ -910,18 +942,39 @@ namespace JSTests {
         }
     };
 
+    class RenameTest {
+    public:
+        void run() {
+            auto_ptr<Scope> s;
+            s.reset( globalScriptEngine->newScope() );
+
+            s->setNumber( "x" , 5 );
+            ASSERT_EQUALS( 5 , s->getNumber( "x" ) );
+            ASSERT_EQUALS( Undefined , s->type( "y" ) );
+
+            s->rename( "x" , "y" );
+            ASSERT_EQUALS( 5 , s->getNumber( "y" ) );
+            ASSERT_EQUALS( Undefined , s->type( "x" ) );
+
+            s->rename( "y" , "x" );
+            ASSERT_EQUALS( 5 , s->getNumber( "x" ) );
+            ASSERT_EQUALS( Undefined , s->type( "y" ) );
+        }
+    };
+
+
     class All : public Suite {
     public:
         All() : Suite( "js" ) {
         }
-        
-        void setupTests(){
+
+        void setupTests() {
             add< Fundamental >();
             add< BasicScope >();
             add< ResetScope >();
             add< FalseTests >();
             add< SimpleFunctions >();
-            
+
             add< ObjectMapping >();
             add< ObjectDecoding >();
             add< JSOIDTests >();
@@ -931,15 +984,17 @@ namespace JSTests {
             add< SpecialDBTypes >();
             add< TypeConservation >();
             add< NumberLong >();
-            
+            add< NumberLong2 >();
+            add< RenameTest >();
+
             add< WeirdObjects >();
             add< CodeTests >();
             add< DBRefTest >();
             add< InformalDBRef >();
             add< BinDataType >();
-            
+
             add< VarTests >();
-            
+
             add< Speed1 >();
 
             add< InvalidUTF8Check >();
@@ -949,6 +1004,6 @@ namespace JSTests {
             add< ScopeOut >();
         }
     } myall;
-    
+
 } // namespace JavaJSTests
 
diff --git a/dbtests/matchertests.cpp b/dbtests/matchertests.cpp
index 696c924..380b8b8 100644
--- a/dbtests/matchertests.cpp
+++ b/dbtests/matchertests.cpp
@@ -18,12 +18,15 @@
  */
 
 #include "pch.h"
-#include "../db/matcher.h"
+#include "../util/timer.h"
 
+#include "../db/matcher.h"
 #include "../db/json.h"
 
 #include "dbtests.h"
 
+
+
 namespace MatcherTests {
 
     class Basic {
@@ -34,26 +37,26 @@ namespace MatcherTests {
             ASSERT( m.matches( fromjson( "{\"a\":\"b\"}" ) ) );
         }
     };
-    
+
     class DoubleEqual {
     public:
         void run() {
             BSONObj query = fromjson( "{\"a\":5}" );
             Matcher m( query );
-            ASSERT( m.matches( fromjson( "{\"a\":5}" ) ) );            
+            ASSERT( m.matches( fromjson( "{\"a\":5}" ) ) );
         }
     };
-    
+
     class MixedNumericEqual {
     public:
         void run() {
             BSONObjBuilder query;
             query.append( "a", 5 );
             Matcher m( query.done() );
-            ASSERT( m.matches( fromjson( "{\"a\":5}" ) ) );            
-        }        
+            ASSERT( m.matches( fromjson( "{\"a\":5}" ) ) );
+        }
     };
-    
+
     class MixedNumericGt {
     public:
         void run() {
@@ -62,16 +65,16 @@ namespace MatcherTests {
             BSONObjBuilder b;
             b.append( "a", 5 );
             ASSERT( m.matches( b.done() ) );
-        }        
+        }
     };
-    
+
     class MixedNumericIN {
     public:
-        void run(){
+        void run() {
             BSONObj query = fromjson( "{ a : { $in : [4,6] } }" );
             ASSERT_EQUALS( 4 , query["a"].embeddedObject()["$in"].embeddedObject()["0"].number() );
             ASSERT_EQUALS( NumberInt , query["a"].embeddedObject()["$in"].embeddedObject()["0"].type() );
-            
+
             Matcher m( query );
 
             {
@@ -92,19 +95,19 @@ namespace MatcherTests {
                 b.append( "a" , 4 );
                 ASSERT( m.matches( b.done() ) );
             }
-                
+
         }
     };
 
     class MixedNumericEmbedded {
     public:
-        void run(){
+        void run() {
             Matcher m( BSON( "a" << BSON( "x" << 1 ) ) );
             ASSERT( m.matches( BSON( "a" << BSON( "x" << 1 ) ) ) );
             ASSERT( m.matches( BSON( "a" << BSON( "x" << 1.0 ) ) ) );
         }
     };
-    
+
     class Size {
     public:
         void run() {
@@ -113,16 +116,38 @@ namespace MatcherTests {
             ASSERT( !m.matches( fromjson( "{a:[1,2,3]}" ) ) );
             ASSERT( !m.matches( fromjson( "{a:[1,2,3,'a','b']}" ) ) );
             ASSERT( !m.matches( fromjson( "{a:[[1,2,3,4]]}" ) ) );
-        }        
+        }
+    };
+
+
+    class TimingBase {
+    public:
+        long time( const BSONObj& patt , const BSONObj& obj ) {
+            Matcher m( patt );
+            Timer t;
+            for ( int i=0; i<10000; i++ ) {
+                ASSERT( m.matches( obj ) );
+            }
+            return t.millis();
+        }
+    };
+
+    class AllTiming : public TimingBase {
+    public:
+        void run() {
+            long normal = time( BSON( "x" << 5 ) , BSON( "x" << 5 ) );
+            long all = time( BSON( "x" << BSON( "$all" << BSON_ARRAY( 5 ) ) ) , BSON( "x" << 5 ) );
+
+            cout << "normal: " << normal << " all: " << all << endl;
+        }
     };
-    
 
     class All : public Suite {
     public:
-        All() : Suite( "matcher" ){
+        All() : Suite( "matcher" ) {
         }
-        
-        void setupTests(){
+
+        void setupTests() {
             add< Basic >();
             add< DoubleEqual >();
             add< MixedNumericEqual >();
@@ -130,8 +155,9 @@ namespace MatcherTests {
             add< MixedNumericIN >();
             add< Size >();
             add< MixedNumericEmbedded >();
+            add< AllTiming >();
         }
     } dball;
-    
+
 } // namespace MatcherTests
 
diff --git a/dbtests/mmaptests.cpp b/dbtests/mmaptests.cpp
new file mode 100644
index 0000000..7fb6eee
--- /dev/null
+++ b/dbtests/mmaptests.cpp
@@ -0,0 +1,219 @@
+// @file mmaptests.cpp
+
+/**
+ *    Copyright (C) 2008 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "../db/mongommf.h"
+#include "../util/timer.h"
+#include "dbtests.h"
+
+namespace MMapTests {
+
+    class LeakTest  {
+        const string fn;
+        const int optOld;
+    public:
+        LeakTest() :
+            fn( (path(dbpath) / "testfile.map").string() ), optOld(cmdLine.durOptions)
+        { 
+            cmdLine.durOptions = 0; // DurParanoid doesn't make sense with this test
+        }
+        ~LeakTest() {
+            cmdLine.durOptions = optOld;
+            try { boost::filesystem::remove(fn); }
+            catch(...) { }
+        }
+        void run() {
+
+            try { boost::filesystem::remove(fn); }
+            catch(...) { }
+
+            writelock lk;
+
+            {
+                MongoMMF f;
+                unsigned long long len = 256 * 1024 * 1024;
+                assert( f.create(fn, len, /*sequential*/false) );
+                {
+                    char *p = (char *) f.getView();
+                    assert(p);
+                    // write something to the private view as a test
+                    if( cmdLine.dur ) 
+                        MemoryMappedFile::makeWritable(p, 6);
+                    strcpy(p, "hello");
+                }
+                if( cmdLine.dur ) {
+                    char *w = (char *) f.view_write();
+                    strcpy(w + 6, "world");
+                }
+                MongoFileFinder ff;
+                ASSERT( ff.findByPath(fn) );
+                ASSERT( ff.findByPath("asdf") == 0 );
+            }
+            {
+                MongoFileFinder ff;
+                ASSERT( ff.findByPath(fn) == 0 );
+            }
+
+            int N = 10000;
+#if !defined(_WIN32) && !defined(__linux__)
+            // seems this test is slow on OS X.
+            N = 100;
+#endif
+
+            // we make a lot here -- if we were leaking, presumably it would fail doing this many.
+            Timer t;
+            for( int i = 0; i < N; i++ ) {
+                MongoMMF f;
+                assert( f.open(fn, i%4==1) );
+                {
+                    char *p = (char *) f.getView();
+                    assert(p);
+                    if( cmdLine.dur ) 
+                        MemoryMappedFile::makeWritable(p, 4);
+                    strcpy(p, "zzz");
+                }
+                if( cmdLine.dur ) {
+                    char *w = (char *) f.view_write();
+                    if( i % 2 == 0 )
+                        ++(*w);
+                    assert( w[6] == 'w' );
+                }
+            }
+            if( t.millis() > 10000 ) {
+                log() << "warning: MMap LeakTest is unusually slow N:" << N << ' ' << t.millis() << "ms" << endl;
+            }
+
+        }
+    };
+
+    class All : public Suite {
+    public:
+        All() : Suite( "mmap" ) {}
+        void setupTests() {
+            add< LeakTest >();
+        }
+    } myall;
+
+#if 0
+
+    class CopyOnWriteSpeedTest {
+    public:
+        void run() {
+
+            string fn = "/tmp/testfile.map";
+            boost::filesystem::remove(fn);
+
+            MemoryMappedFile f;
+            char *p = (char *) f.create(fn, 1024 * 1024 * 1024, true);
+            assert(p);
+            strcpy(p, "hello");
+
+            {
+                void *x = f.testGetCopyOnWriteView();
+                Timer tt;
+                for( int i = 11; i < 1000000000; i++ )
+                    p[i] = 'z';
+                cout << "fill 1GB time: " << tt.millis() << "ms" << endl;
+                f.testCloseCopyOnWriteView(x);
+            }
+
+            /* test a lot of view/unviews */
+            {
+                Timer t;
+
+                char *q;
+                for( int i = 0; i < 1000; i++ ) {
+                    q = (char *) f.testGetCopyOnWriteView();
+                    assert( q );
+                    if( i == 999 ) {
+                        strcpy(q+2, "there");
+                    }
+                    f.testCloseCopyOnWriteView(q);
+                }
+
+                cout << "view unview: " << t.millis() << "ms" << endl;
+            }
+
+            f.flush(true);
+
+            /* plain old mmaped writes */
+            {
+                Timer t;
+                for( int i = 0; i < 10; i++ ) {
+                    memset(p+100, 'c', 200 * 1024 * 1024);
+                }
+                cout << "traditional writes: " << t.millis() << "ms" << endl;
+            }
+
+            f.flush(true);
+
+            /* test doing some writes */
+            {
+                Timer t;
+                char *q = (char *) f.testGetCopyOnWriteView();
+                for( int i = 0; i < 10; i++ ) {
+                    assert( q );
+                    memset(q+100, 'c', 200 * 1024 * 1024);
+                }
+                f.testCloseCopyOnWriteView(q);
+
+                cout << "inc style some writes: " << t.millis() << "ms" << endl;
+            }
+
+            /* test doing some writes */
+            {
+                Timer t;
+                for( int i = 0; i < 10; i++ ) {
+                    char *q = (char *) f.testGetCopyOnWriteView();
+                    assert( q );
+                    memset(q+100, 'c', 200 * 1024 * 1024);
+                    f.testCloseCopyOnWriteView(q);
+                }
+
+                cout << "some writes: " << t.millis() << "ms" << endl;
+            }
+
+            /* more granular */
+            {
+                Timer t;
+                for( int i = 0; i < 100; i++ ) {
+                    char *q = (char *) f.testGetCopyOnWriteView();
+                    assert( q );
+                    memset(q+100, 'c', 20 * 1024 * 1024);
+                    f.testCloseCopyOnWriteView(q);
+                }
+
+                cout << "more granular some writes: " << t.millis() << "ms" << endl;
+            }
+
+            p[10] = 0;
+            cout << p << endl;
+        }
+    };
+
+    class All : public Suite {
+    public:
+        All() : Suite( "mmap" ) {}
+        void setupTests() {
+            add< CopyOnWriteSpeedTest >();
+        }
+    } myall;
+
+#endif
+
+}
diff --git a/dbtests/mockdbclient.h b/dbtests/mockdbclient.h
index 9119075..fda0963 100644
--- a/dbtests/mockdbclient.h
+++ b/dbtests/mockdbclient.h
@@ -64,8 +64,8 @@ public:
         virtual void afterCommand() {}
     };
     DirectDBClientConnection( ReplPair *rp, ConnectionCallback *cc = 0 ) :
-            rp_( rp ),
-            cc_( cc ) {
+        rp_( rp ),
+        cc_( cc ) {
     }
     virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0) {
         BSONObj c = query.obj.copy();
diff --git a/dbtests/namespacetests.cpp b/dbtests/namespacetests.cpp
index ca051fe..c2be0b0 100644
--- a/dbtests/namespacetests.cpp
+++ b/dbtests/namespacetests.cpp
@@ -32,7 +32,7 @@ namespace NamespaceTests {
             dblock lk;
             Client::Context _context;
         public:
-            Base() : _context(ns()){
+            Base() : _context(ns()) {
             }
             virtual ~Base() {
                 if ( id_.info.isNull() )
@@ -323,7 +323,7 @@ namespace NamespaceTests {
                 return k.obj();
             }
         };
-        
+
         class ArraySubobjectSingleMissing : public Base {
         public:
             void run() {
@@ -336,7 +336,7 @@ namespace NamespaceTests {
                     elts.push_back( simpleBC( i ) );
                 BSONObjBuilder b;
                 b.append( "a", elts );
-                
+
                 BSONObjSetDefaultOrder keys;
                 id().getKeysFromObject( b.done(), keys );
                 checkSize( 4, keys );
@@ -353,7 +353,7 @@ namespace NamespaceTests {
                 return aDotB();
             }
         };
-        
+
         class ArraySubobjectMissing : public Base {
         public:
             void run() {
@@ -376,7 +376,7 @@ namespace NamespaceTests {
                 return aDotB();
             }
         };
-        
+
         class MissingField : public Base {
         public:
             void run() {
@@ -391,7 +391,7 @@ namespace NamespaceTests {
                 return BSON( "a" << 1 );
             }
         };
-        
+
         class SubobjectMissing : public Base {
         public:
             void run() {
@@ -406,12 +406,12 @@ namespace NamespaceTests {
                 return aDotB();
             }
         };
-        
+
         class CompoundMissing : public Base {
         public:
-            void run(){
+            void run() {
                 create();
-                
+
                 {
                     BSONObjSetDefaultOrder keys;
                     id().getKeysFromObject( fromjson( "{x:'a',y:'b'}" ) , keys );
@@ -428,16 +428,16 @@ namespace NamespaceTests {
                     b.appendNull( "" );
                     assertEquals( b.obj() , *keys.begin() );
                 }
-                
+
             }
 
         private:
             virtual BSONObj key() const {
                 return BSON( "x" << 1 << "y" << 1 );
             }
-            
+
         };
-        
+
         class ArraySubelementComplex : public Base {
         public:
             void run() {
@@ -508,17 +508,17 @@ namespace NamespaceTests {
                 return aDotB();
             }
         };
-        
+
         class EmptyArray : Base {
         public:
-            void run(){
+            void run() {
                 create();
 
                 BSONObjSetDefaultOrder keys;
                 id().getKeysFromObject( fromjson( "{a:[1,2]}" ), keys );
                 checkSize(2, keys );
                 keys.clear();
-                
+
                 id().getKeysFromObject( fromjson( "{a:[1]}" ), keys );
                 checkSize(1, keys );
                 keys.clear();
@@ -535,14 +535,14 @@ namespace NamespaceTests {
 
         class MultiEmptyArray : Base {
         public:
-            void run(){
+            void run() {
                 create();
 
                 BSONObjSetDefaultOrder keys;
                 id().getKeysFromObject( fromjson( "{a:1,b:[1,2]}" ), keys );
                 checkSize(2, keys );
                 keys.clear();
-                
+
                 id().getKeysFromObject( fromjson( "{a:1,b:[1]}" ), keys );
                 checkSize(1, keys );
                 keys.clear();
@@ -551,7 +551,7 @@ namespace NamespaceTests {
                 //cout << "YO : " << *(keys.begin()) << endl;
                 checkSize(1, keys );
                 keys.clear();
-                
+
                 id().getKeysFromObject( fromjson( "{a:1,b:[]}" ), keys );
                 checkSize(1, keys );
                 //cout << "YO : " << *(keys.begin()) << endl;
@@ -600,11 +600,11 @@ namespace NamespaceTests {
                     if ( fileNo == -1 )
                         continue;
                     for ( int j = i.ext()->firstRecord.getOfs(); j != DiskLoc::NullOfs;
-                          j = DiskLoc( fileNo, j ).rec()->nextOfs ) {
+                            j = DiskLoc( fileNo, j ).rec()->nextOfs ) {
                         ++count;
                     }
                 }
-                ASSERT_EQUALS( count, nsd()->nrecords );
+                ASSERT_EQUALS( count, nsd()->stats.nrecords );
                 return count;
             }
             int nExtents() const {
@@ -620,7 +620,7 @@ namespace NamespaceTests {
                 return ns_;
             }
             NamespaceDetails *nsd() const {
-                return nsdetails( ns() );
+                return nsdetails( ns() )->writingWithExtra();
             }
             static BSONObj bigObj() {
                 string as( 187, 'a' );
@@ -700,7 +700,7 @@ namespace NamespaceTests {
             }
         };
 
-        /* test  NamespaceDetails::cappedTruncateAfter(const char *ns, DiskLoc loc) 
+        /* test  NamespaceDetails::cappedTruncateAfter(const char *ns, DiskLoc loc)
         */
         class TruncateCapped : public Base {
             virtual string spec() const {
@@ -737,9 +737,9 @@ namespace NamespaceTests {
                 }
 
                 DiskLoc d = l[6];
-                long long n = nsd->nrecords;
+                long long n = nsd->stats.nrecords;
                 nsd->cappedTruncateAfter(ns(), d, false);
-                ASSERT_EQUALS( nsd->nrecords , n-1 );
+                ASSERT_EQUALS( nsd->stats.nrecords , n-1 );
 
                 {
                     ForwardCappedCursor c(nsd);
@@ -770,7 +770,7 @@ namespace NamespaceTests {
             void run() {
                 create();
                 nsd()->deletedList[ 2 ] = nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted;
-                nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted = DiskLoc();
+                nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted.drec()->nextDeleted.writing() = DiskLoc();
                 nsd()->cappedLastDelRecLastExtent().Null();
                 NamespaceDetails *d = nsd();
                 zero( &d->capExtent );
@@ -820,15 +820,15 @@ namespace NamespaceTests {
                 ASSERT_EQUALS( 496U, sizeof( NamespaceDetails ) );
             }
         };
-        
+
     } // namespace NamespaceDetailsTests
 
     class All : public Suite {
     public:
-        All() : Suite( "namespace" ){
+        All() : Suite( "namespace" ) {
         }
 
-        void setupTests(){
+        void setupTests() {
             add< IndexDetailsTests::Create >();
             add< IndexDetailsTests::GetKeysFromObjectSimple >();
             add< IndexDetailsTests::GetKeysFromObjectDotted >();
diff --git a/dbtests/pairingtests.cpp b/dbtests/pairingtests.cpp
index 68d4c0e..9cca548 100644
--- a/dbtests/pairingtests.cpp
+++ b/dbtests/pairingtests.cpp
@@ -37,7 +37,7 @@ namespace PairingTests {
         ~Base() {
             pairSync = backup;
             dblock lk;
-			Helpers::emptyCollection( "local.pair.sync" );
+            Helpers::emptyCollection( "local.pair.sync" );
             if ( pairSync->initialSyncCompleted() ) {
                 // save to db
                 pairSync->setInitialSyncCompleted();
@@ -63,7 +63,7 @@ namespace PairingTests {
     private:
         static void init() {
             dblock lk;
-			Helpers::emptyCollection( "local.pair.sync" );
+            Helpers::emptyCollection( "local.pair.sync" );
             if ( synced != 0 && notSynced != 0 )
                 return;
             notSynced = new PairSync();
@@ -71,7 +71,7 @@ namespace PairingTests {
             synced = new PairSync();
             synced->init();
             synced->setInitialSyncCompleted();
-			Helpers::emptyCollection( "local.pair.sync" );
+            Helpers::emptyCollection( "local.pair.sync" );
         }
         PairSync *backup;
         static PairSync *synced;
@@ -199,24 +199,24 @@ namespace PairingTests {
 
                 TestableReplPair rp4( true, fromjson( "{ok:1,you_are:1}" ) );
                 rp4.arbitrate();
-                ASSERT( rp4.state == ReplPair::State_Master );                
+                ASSERT( rp4.state == ReplPair::State_Master );
 
                 TestableReplPair rp5( true, fromjson( "{ok:1,you_are:0}" ) );
                 rp5.arbitrate();
-                ASSERT( rp5.state == ReplPair::State_Slave );                
+                ASSERT( rp5.state == ReplPair::State_Slave );
 
                 TestableReplPair rp6( true, fromjson( "{ok:1,you_are:-1}" ) );
                 rp6.arbitrate();
                 // unchanged from initial value
-                ASSERT( rp6.state == ReplPair::State_Negotiating );           
+                ASSERT( rp6.state == ReplPair::State_Negotiating );
             }
         private:
             class TestableReplPair : public ReplPair {
             public:
                 TestableReplPair( bool connect, const BSONObj &one ) :
-                        ReplPair( "a", "z" ),
-                        connect_( connect ),
-                        one_( one ) {
+                    ReplPair( "a", "z" ),
+                    connect_( connect ),
+                    one_( one ) {
                 }
                 virtual
                 DBClientConnection *newClientConnection() const {
@@ -326,10 +326,10 @@ namespace PairingTests {
 
     class All : public Suite {
     public:
-        All() : Suite( "pairing" ){
+        All() : Suite( "pairing" ) {
         }
-        
-        void setupTests(){
+
+        void setupTests() {
             add< ReplPairTests::Create >();
             add< ReplPairTests::Dominant >();
             add< ReplPairTests::SetMaster >();
diff --git a/dbtests/pdfiletests.cpp b/dbtests/pdfiletests.cpp
index 7e92783..2844fc4 100644
--- a/dbtests/pdfiletests.cpp
+++ b/dbtests/pdfiletests.cpp
@@ -31,7 +31,7 @@ namespace PdfileTests {
 
         class Base {
         public:
-            Base() : _context( ns() ){
+            Base() : _context( ns() ) {
             }
             virtual ~Base() {
                 if ( !nsd() )
@@ -71,6 +71,7 @@ namespace PdfileTests {
                 BSONObj o = b.done();
                 int len = o.objsize();
                 Extent *e = ext.ext();
+                e = getDur().writing(e);
                 int ofs;
                 if ( e->lastRecord.isNull() )
                     ofs = ext.getOfs() + ( e->_extentData - (char *)e );
@@ -78,6 +79,7 @@ namespace PdfileTests {
                     ofs = e->lastRecord.getOfs() + e->lastRecord.rec()->lengthWithHeaders;
                 DiskLoc dl( ext.a(), ofs );
                 Record *r = dl.rec();
+                r = (Record*) getDur().writingPtr(r, Record::HeaderSize + len);
                 r->lengthWithHeaders = Record::HeaderSize + len;
                 r->extentOfs = e->myLoc.getOfs();
                 r->nextOfs = DiskLoc::NullOfs;
@@ -86,7 +88,7 @@ namespace PdfileTests {
                 if ( e->firstRecord.isNull() )
                     e->firstRecord = dl;
                 else
-                    e->lastRecord.rec()->nextOfs = ofs;
+                    getDur().writingInt(e->lastRecord.rec()->nextOfs) = ofs;
                 e->lastRecord = dl;
                 return dl;
             }
@@ -110,7 +112,7 @@ namespace PdfileTests {
 
         class EmptyLooped : public Base {
             virtual void prepare() {
-                nsd()->capFirstNewRecord = DiskLoc();
+                nsd()->writingWithExtra()->capFirstNewRecord = DiskLoc();
             }
             virtual int count() const {
                 return 0;
@@ -119,7 +121,7 @@ namespace PdfileTests {
 
         class EmptyMultiExtentLooped : public Base {
             virtual void prepare() {
-                nsd()->capFirstNewRecord = DiskLoc();
+                nsd()->writingWithExtra()->capFirstNewRecord = DiskLoc();
             }
             virtual int count() const {
                 return 0;
@@ -131,7 +133,7 @@ namespace PdfileTests {
 
         class Single : public Base {
             virtual void prepare() {
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 0 );
+                nsd()->writingWithExtra()->capFirstNewRecord = insert( nsd()->capExtent, 0 );
             }
             virtual int count() const {
                 return 1;
@@ -140,7 +142,8 @@ namespace PdfileTests {
 
         class NewCapFirst : public Base {
             virtual void prepare() {
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 0 );
+                DiskLoc x = insert( nsd()->capExtent, 0 );
+                nsd()->writingWithExtra()->capFirstNewRecord = x;
                 insert( nsd()->capExtent, 1 );
             }
             virtual int count() const {
@@ -151,7 +154,7 @@ namespace PdfileTests {
         class NewCapLast : public Base {
             virtual void prepare() {
                 insert( nsd()->capExtent, 0 );
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 1 );
+                nsd()->capFirstNewRecord.writing() = insert( nsd()->capExtent, 1 );
             }
             virtual int count() const {
                 return 2;
@@ -161,7 +164,7 @@ namespace PdfileTests {
         class NewCapMiddle : public Base {
             virtual void prepare() {
                 insert( nsd()->capExtent, 0 );
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 1 );
+                nsd()->capFirstNewRecord.writing() = insert( nsd()->capExtent, 1 );
                 insert( nsd()->capExtent, 2 );
             }
             virtual int count() const {
@@ -173,7 +176,7 @@ namespace PdfileTests {
             virtual void prepare() {
                 insert( nsd()->capExtent, 0 );
                 insert( nsd()->lastExtent, 1 );
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 2 );
+                nsd()->capFirstNewRecord.writing() = insert( nsd()->capExtent, 2 );
                 insert( nsd()->capExtent, 3 );
             }
             virtual int count() const {
@@ -186,10 +189,10 @@ namespace PdfileTests {
 
         class LastExtent : public Base {
             virtual void prepare() {
-                nsd()->capExtent = nsd()->lastExtent;
+                nsd()->capExtent.writing() = nsd()->lastExtent;
                 insert( nsd()->capExtent, 0 );
                 insert( nsd()->firstExtent, 1 );
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 2 );
+                nsd()->capFirstNewRecord.writing() = insert( nsd()->capExtent, 2 );
                 insert( nsd()->capExtent, 3 );
             }
             virtual int count() const {
@@ -202,11 +205,11 @@ namespace PdfileTests {
 
         class MidExtent : public Base {
             virtual void prepare() {
-                nsd()->capExtent = nsd()->firstExtent.ext()->xnext;
+                nsd()->capExtent.writing() = nsd()->firstExtent.ext()->xnext;
                 insert( nsd()->capExtent, 0 );
                 insert( nsd()->lastExtent, 1 );
                 insert( nsd()->firstExtent, 2 );
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 3 );
+                nsd()->capFirstNewRecord.writing() = insert( nsd()->capExtent, 3 );
                 insert( nsd()->capExtent, 4 );
             }
             virtual int count() const {
@@ -219,10 +222,10 @@ namespace PdfileTests {
 
         class AloneInExtent : public Base {
             virtual void prepare() {
-                nsd()->capExtent = nsd()->firstExtent.ext()->xnext;
+                nsd()->capExtent.writing() = nsd()->firstExtent.ext()->xnext;
                 insert( nsd()->lastExtent, 0 );
                 insert( nsd()->firstExtent, 1 );
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 2 );
+                nsd()->capFirstNewRecord.writing() = insert( nsd()->capExtent, 2 );
             }
             virtual int count() const {
                 return 3;
@@ -234,10 +237,10 @@ namespace PdfileTests {
 
         class FirstInExtent : public Base {
             virtual void prepare() {
-                nsd()->capExtent = nsd()->firstExtent.ext()->xnext;
+                nsd()->capExtent.writing() = nsd()->firstExtent.ext()->xnext;
                 insert( nsd()->lastExtent, 0 );
                 insert( nsd()->firstExtent, 1 );
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 2 );
+                nsd()->capFirstNewRecord.writing() = insert( nsd()->capExtent, 2 );
                 insert( nsd()->capExtent, 3 );
             }
             virtual int count() const {
@@ -250,11 +253,11 @@ namespace PdfileTests {
 
         class LastInExtent : public Base {
             virtual void prepare() {
-                nsd()->capExtent = nsd()->firstExtent.ext()->xnext;
+                nsd()->capExtent.writing() = nsd()->firstExtent.ext()->xnext;
                 insert( nsd()->capExtent, 0 );
                 insert( nsd()->lastExtent, 1 );
                 insert( nsd()->firstExtent, 2 );
-                nsd()->capFirstNewRecord = insert( nsd()->capExtent, 3 );
+                nsd()->capFirstNewRecord.writing() = insert( nsd()->capExtent, 3 );
             }
             virtual int count() const {
                 return 4;
@@ -265,11 +268,11 @@ namespace PdfileTests {
         };
 
     } // namespace ScanCapped
-    
+
     namespace Insert {
         class Base {
         public:
-            Base() : _context( ns() ){
+            Base() : _context( ns() ) {
             }
             virtual ~Base() {
                 if ( !nsd() )
@@ -288,7 +291,7 @@ namespace PdfileTests {
             dblock lk_;
             Client::Context _context;
         };
-        
+
         class UpdateDate : public Base {
         public:
             void run() {
@@ -301,12 +304,86 @@ namespace PdfileTests {
             }
         };
     } // namespace Insert
-    
+
+    class ExtentSizing {
+    public:
+        struct SmallFilesControl {
+            SmallFilesControl() {
+                old = cmdLine.smallfiles;
+                cmdLine.smallfiles = false;
+            }
+            ~SmallFilesControl() {
+                cmdLine.smallfiles = old;
+            }
+            bool old;
+        };
+        void run() {
+            SmallFilesControl c;
+            // test that no matter what we start with, we always get to max extent size
+            for ( int obj=16; obj<BSONObjMaxUserSize; obj += 111 ) {
+                int sz = Extent::initialSize( obj );
+                for ( int i=0; i<100; i++ ) {
+                    sz = Extent::followupSize( obj , sz );
+                }
+                ASSERT_EQUALS( Extent::maxSize() , sz );
+            }
+        }
+    };
+
+    class ExtentAllocOrder {
+    public:
+        void run() {
+            string dbname = "unittest_ex";
+
+            string c1 = dbname + ".x1";
+            string c2 = dbname + ".x2";
+
+            {
+                DBDirectClient db;
+                db.dropDatabase( dbname );
+            }
+
+            dblock mylock;
+            Client::Context cx( dbname );
+
+            bool isnew;
+            Database * d = dbHolder.getOrCreate( dbname , dbpath , isnew );
+            assert( d );
+
+            int big = 10 * 1024;
+            //int small = 1024;
+
+            unsigned long long l = 0;
+            int n = 0;
+            while ( 1 ) {
+                n++;
+                if( n == 5 && sizeof(void*)==4 )
+                    break;
+                MongoDataFile * f = d->addAFile( big , false );
+                cout << f->length() << ' ' << n << endl;
+                if ( f->length() == l )
+                    break;
+                l = f->length();
+            }
+
+            int start = d->numFiles();
+            for ( int i=0; i<start; i++ )
+                d->allocExtent( c1.c_str() , d->getFile( i )->getHeader()->unusedLength , false );
+            ASSERT_EQUALS( start , d->numFiles() );
+
+            {
+                DBDirectClient db;
+                db.dropDatabase( dbname );
+            }
+        }
+    };
+
+
     class All : public Suite {
     public:
-        All() : Suite( "pdfile" ){}
-        
-        void setupTests(){
+        All() : Suite( "pdfile" ) {}
+
+        void setupTests() {
             add< ScanCapped::Empty >();
             add< ScanCapped::EmptyLooped >();
             add< ScanCapped::EmptyMultiExtentLooped >();
@@ -321,6 +398,8 @@ namespace PdfileTests {
             add< ScanCapped::FirstInExtent >();
             add< ScanCapped::LastInExtent >();
             add< Insert::UpdateDate >();
+            add< ExtentSizing >();
+            add< ExtentAllocOrder >();
         }
     } myall;
 
diff --git a/dbtests/perf/btreeperf.cpp b/dbtests/perf/btreeperf.cpp
new file mode 100644
index 0000000..7d68d8f
--- /dev/null
+++ b/dbtests/perf/btreeperf.cpp
@@ -0,0 +1,442 @@
+// btreeperf.cpp
+
+/*    Copyright 2010 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+/**
+ * Performance timing and space utilization testing for btree indexes.
+ */
+
+#include <iostream>
+
+#include <boost/random/bernoulli_distribution.hpp>
+#include <boost/random/geometric_distribution.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/variate_generator.hpp>
+#include <boost/random/uniform_int.hpp>
+
+#include "client/dbclient.h"
+#include "../../util/timer.h"
+
+using namespace std;
+using namespace mongo;
+using namespace boost;
+
+const char *ns = "test.btreeperf";
+const char *db = "test";
+const char *index_collection = "btreeperf.$_id_";
+
+// This random number generator has a much larger period than the default
+// generator and is half as fast as the default.  Given that we intend to
+// generate large numbers of documents and will utilize more than one random
+// sample per document, choosing this generator seems like a worthwhile tradeoff.
+mt19937 randomNumberGenerator;
+
+/**
+ * An interface for generating documents to be inserted and document specs for
+ * remove requests.
+ */
+class InsertAndRemoveStrategy {
+public:
+    virtual ~InsertAndRemoveStrategy() {}
+    virtual BSONObj insertObj() = 0;
+    virtual BSONObj removeObj() = 0;
+protected:
+    /**
+     * Helper functions for converting a sample value to a sample object with
+     * specified _id, to be inserted or removed.
+     */
+
+    template< class T >
+    BSONObj insertObjWithVal( const T &val ) {
+        BSONObjBuilder b;
+        b.append( "_id", val );
+        return b.obj();
+    }
+    template< class T >
+    BSONObj removeObjWithVal( const T &val ) {
+        BSONObjBuilder b;
+        b.append( "_id", val );
+        return b.obj();
+    }
+};
+
+/**
+ * Manages a set of elements of type T.  Supports inserting unique elements and
+ * sampling a random element without replacement.
+ *
+ * TODO In the contexts where this class is currently used, duplicate keys are
+ * either impossible or highly unlikely.  And an occasional duplicate value will
+ * not much affect the procedure by wich a random element is chosen.  We could
+ * stop checking for duplicates in push(), eliminate _set from the implementaiton,
+ * and potentially improve performance and memory requirements somewhat.
+ */
+template< class T >
+class SetSampler {
+public:
+    /** @param val Insert this value in the set if not already present. */
+    void push( const T& val ) {
+        if ( _set.insert( val ).second ) {
+            _vector.push_back( val );
+        }
+    }
+    /** @return a random element removed from the set */
+    T pull() {
+        if ( _vector.size() == 0 ) {
+            return T();
+        }
+        uniform_int< size_t > sizeRange( 0, _vector.size() - 1 );
+        variate_generator< mt19937&, uniform_int< size_t > > sizeGenerator( randomNumberGenerator, sizeRange );
+        size_t toRemove = sizeGenerator();
+        T val = _vector[ toRemove ];
+        // Replace the random element with the last element, then remove the
+        // last element.
+        _vector[ toRemove ] = _vector.back();
+        _vector.pop_back();
+        _set.erase( val );
+        return val;
+    }
+private:
+    vector< T > _vector;
+    set< T > _set;
+};
+
+/**
+ * Tracks values that have been specified for insertion by the derived class's
+ * implementation of insertVal() and selects uniformally from among values that
+ * have been inserted but not yet removed for the next value to remove.
+ *
+ * The implementation is probabilistically sound, but may be resource intensive
+ * and slow due to the use of a SetSampler.
+ */
+template< class T >
+class InsertAndUniformRemoveStrategy : public InsertAndRemoveStrategy {
+public:
+    virtual BSONObj insertObj() {
+        T val = insertVal();
+        _sampler.push( val );
+        return insertObjWithVal( val );
+    }
+    virtual BSONObj removeObj() { return removeObjWithVal( _sampler.pull() ); }
+protected:
+    /** @return value to insert. This is the only function a derived class need implement. */
+    virtual T insertVal() = 0;
+private:
+    SetSampler< T > _sampler;
+};
+
+/**
+ * The derived class supplies keys to be inserted and removed.  The key removal
+ * strategy is similar to the strategy for selecting a random element described
+ * in the MongoDB cookbook: the first key in the collection greater than or
+ * equal to the supplied removal key is removed.  This allows selecting an
+ * exising key for removal without the overhead required by a SetSampler.
+ *
+ * While this ranged selection strategy can work well for selecting a random
+ * element, there are some theoretical and empirically observed shortcomings
+ * when the strategy is applied to removing nodes for btree performance measurement:
+ * 1 The likelihood that a given key is removed is proportional to the difference
+ *   in value between it and the previous key.  Because key deletion increases
+ *   the difference in value between adjacent keys, neighboring keys will be
+ *   more likely to be deleted than they would be in a true uniform distribution.
+ * 2 MongoDB 1.6 uses 'unused' nodes in the btree implementation.  With a ranged
+ *   removal strategy, those nodes must be traversed to find a node available
+ *   for removal.
+ * 3 Ranged removal was observed to be biased against the balancing policy of
+ *   MongoDB 1.7 in some cases, in terms of storage size.  This may be a
+ *   consequence of point 1 above.
+ * 4 Ranged removal was observed to be significantly biased against the btree
+ *   implementation in MongoDB 1.6 in terms of performance.  This is likely a
+ *   consequence of point 2 above.
+ * 5 In some cases the biases described above were not evident in tests lasting
+ *   several minutes, but were evident in tests lasting several hours.
+ */
+template< class T >
+class InsertAndRangedRemoveStrategy : public InsertAndRemoveStrategy {
+public:
+    virtual BSONObj insertObj() { return insertObjWithVal( insertVal() ); }
+    virtual BSONObj removeObj() { return rangedRemoveObjWithVal( removeVal() ); }
+protected:
+    /** Small likelihood that this removal spec will not match any document */
+    template< class U >
+    BSONObj rangedRemoveObjWithVal( const U &val ) {
+        BSONObjBuilder b1;
+        BSONObjBuilder b2( b1.subobjStart( "_id" ) );
+        b2.append( "$gte", val );
+        b2.done();
+        return b1.obj();
+    }
+    virtual T insertVal() = 0;
+    virtual T removeVal() = 0;
+};
+
+/**
+ * Integer Keys
+ * Uniform Inserts
+ * Uniform Removes
+ */
+class UniformInsertRangedUniformRemoveInteger : public InsertAndRangedRemoveStrategy< long long > {
+public:
+    UniformInsertRangedUniformRemoveInteger() :
+        _uniform_int( 0ULL, ~0ULL ),
+        _nextLongLong( randomNumberGenerator, _uniform_int ) {
+    }
+    /** Small likelihood of duplicates */
+    virtual long long insertVal() { return _nextLongLong(); }
+    virtual long long removeVal() { return _nextLongLong(); }
+private:
+    uniform_int< unsigned long long > _uniform_int;
+    variate_generator< mt19937&, uniform_int< unsigned long long > > _nextLongLong;
+};
+
+class UniformInsertUniformRemoveInteger : public InsertAndUniformRemoveStrategy< long long > {
+public:
+    virtual long long insertVal() { return _gen.insertVal(); }
+private:
+    UniformInsertRangedUniformRemoveInteger _gen;
+};
+
+/**
+ * String Keys
+ * Uniform Inserts
+ * Uniform Removes
+ */
+class UniformInsertRangedUniformRemoveString : public InsertAndRangedRemoveStrategy< string > {
+public:
+    UniformInsertRangedUniformRemoveString() :
+        _geometric_distribution( 0.9 ),
+        _nextLength( randomNumberGenerator, _geometric_distribution ),
+        _uniform_char( 'a', 'z' ),
+        _nextChar( randomNumberGenerator, _uniform_char ) {
+    }
+    /** Small likelihood of duplicates */
+    virtual string insertVal() { return nextString(); }
+    virtual string removeVal() { return nextString(); }
+private:
+    string nextString() {
+        // The longer the minimum string length, the lower the likelihood of duplicates
+        int len = _nextLength() + 5;
+        len = len > 100 ? 100 : len;
+        string ret( len, 'x' );
+        for( int i = 0; i < len; ++i ) {
+            ret[ i ] = _nextChar();
+        }
+        return ret;
+    }
+    geometric_distribution<> _geometric_distribution;
+    variate_generator< mt19937&, geometric_distribution<> > _nextLength;
+    uniform_int< char > _uniform_char;
+    variate_generator< mt19937&, uniform_int< char > > _nextChar;
+};
+
+class UniformInsertUniformRemoveString : public InsertAndUniformRemoveStrategy< string > {
+public:
+    virtual string insertVal() { return _gen.insertVal(); }
+private:
+    UniformInsertRangedUniformRemoveString _gen;
+};
+
+/**
+ * OID Keys
+ * Increasing Inserts
+ * Uniform Removes
+ */
+class IncreasingInsertRangedUniformRemoveOID : public InsertAndRangedRemoveStrategy< OID > {
+public:
+    IncreasingInsertRangedUniformRemoveOID() :
+        _max( -1 ) {
+    }
+    virtual OID insertVal() { return oidFromULL( ++_max ); }
+    virtual OID removeVal() {
+        uniform_int< unsigned long long > distribution( 0, _max > 0 ? _max : 0 );
+        variate_generator< mt19937&, uniform_int< unsigned long long > > generator( randomNumberGenerator, distribution );
+        return oidFromULL( generator() );
+    }
+private:
+    static OID oidFromULL( unsigned long long val ) {
+        val = __builtin_bswap64( val );
+        OID oid;
+        oid.clear();
+        memcpy( (char*)&oid + 4, &val, 8 );
+        return oid;
+    }
+    long long _max;
+};
+
+class IncreasingInsertUniformRemoveOID : public InsertAndUniformRemoveStrategy< OID > {
+public:
+    virtual OID insertVal() { return _gen.insertVal(); }
+private:
+    IncreasingInsertRangedUniformRemoveOID _gen;
+};
+
+/**
+ * Integer Keys
+ * Increasing Inserts
+ * Increasing Removes (on remove, the lowest key is always removed)
+ */
+class IncreasingInsertIncreasingRemoveInteger : public InsertAndRemoveStrategy {
+public:
+    IncreasingInsertIncreasingRemoveInteger() :
+        // Start with a large value so data type will be preserved if we round
+        // trip through json.
+        _min( 1LL << 32 ),
+        _max( 1LL << 32 ) {
+    }
+    virtual BSONObj insertObj() { return insertObjWithVal( ++_max ); }
+    virtual BSONObj removeObj() { return removeObjWithVal( _min < _max ? ++_min : _min ); }
+private:
+    long long _min;
+    long long _max;
+};
+
+/** Generate a random boolean value. */
+class BernoulliGenerator {
+public:
+    /**
+     * @param excessFalsePercent This specifies the desired rate of false values
+     * vs true values.  If we want false to be 5% more likely than true, we
+     * specify 5 for this argument.
+     */
+    BernoulliGenerator( int excessFalsePercent ) :
+        _bernoulli_distribution( 1.0 / ( 2.0 + excessFalsePercent / 100.0 ) ),
+        _generator( randomNumberGenerator, _bernoulli_distribution ) {
+    }
+    bool operator()() { return _generator(); }
+private:
+    bernoulli_distribution<> _bernoulli_distribution;
+    variate_generator< mt19937&, bernoulli_distribution<> > _generator;
+};
+
+/** Runs a strategy on a connection, with specified mix of inserts and removes. */
+class InsertAndRemoveRunner {
+public:
+    InsertAndRemoveRunner( DBClientConnection &conn, InsertAndRemoveStrategy &strategy, int excessInsertPercent ) :
+        _conn( conn ),
+        _strategy( strategy ),
+        _nextOpTypeRemove( excessInsertPercent ) {
+    }
+    void writeOne() {
+        if ( _nextOpTypeRemove() ) {
+            _conn.remove( ns, _strategy.removeObj(), true );
+        }
+        else {
+            _conn.insert( ns, _strategy.insertObj() );
+        }
+    }
+private:
+    DBClientConnection &_conn;
+    InsertAndRemoveStrategy &_strategy;
+    BernoulliGenerator _nextOpTypeRemove;
+};
+
+/**
+ * Writes a test script to cout based on a strategy and specified mix of inserts
+ * and removes.  The script can be subsequently executed by InsertAndRemoveRunner.
+ * Script generation is intended for strategies that are memory or cpu intensive
+ * and might either divert resources from a mongod instance being analyzed on the
+ * same machine or fail to generate requests as quickly as the mongod might
+ * accept them.
+ * The script contains one line per operation.  Each line begins
+ * with a letter indicating the operation type, followed by a space.  Next
+ * follows the json representation of a document for the specified operation
+ * type.
+ */
+class InsertAndRemoveScriptGenerator {
+public:
+    InsertAndRemoveScriptGenerator( InsertAndRemoveStrategy &strategy, int excessInsertPercent ) :
+        _strategy( strategy ),
+        _nextOpTypeRemove( excessInsertPercent ) {
+    }
+    void writeOne() {
+        if ( _nextOpTypeRemove() ) {
+            cout << "r " << _strategy.removeObj().jsonString() << endl;
+        }
+        else {
+            cout << "i " << _strategy.insertObj().jsonString() << endl;
+        }
+    }
+private:
+    InsertAndRemoveStrategy &_strategy;
+    BernoulliGenerator _nextOpTypeRemove;
+};
+
+/**
+ * Run a test script from cin that was generated by
+ * InsertAndRemoveScriptGenerator.  Running the script is intended to be
+ * lightweight in terms of memory and cpu usage, and fast.
+ */
+class InsertAndRemoveScriptRunner {
+public:
+    InsertAndRemoveScriptRunner( DBClientConnection &conn ) :
+        _conn( conn ) {
+    }
+    void writeOne() {
+        cin.getline( _buf, 1024 );
+        BSONObj val = fromjson( _buf + 2 );
+        if ( _buf[ 0 ] == 'r' ) {
+            _conn.remove( ns, val, true );
+        }
+        else {
+            _conn.insert( ns, val );
+        }
+    }
+private:
+    DBClientConnection &_conn;
+    char _buf[ 1024 ];
+};
+
+int main( int argc, const char **argv ) {
+
+    DBClientConnection conn;
+    conn.connect( "127.0.0.1:27017" );
+    conn.dropCollection( ns );
+
+//    UniformInsertRangedUniformRemoveInteger strategy;
+//    UniformInsertUniformRemoveInteger strategy;
+//    UniformInsertRangedUniformRemoveString strategy;
+//    UniformInsertUniformRemoveString strategy;
+//    IncreasingInsertRangedUniformRemoveOID strategy;
+//    IncreasingInsertUniformRemoveOID strategy;
+//    IncreasingInsertIncreasingRemoveInteger strategy;
+//    InsertAndRemoveScriptGenerator runner( strategy, 5 );
+    InsertAndRemoveScriptRunner runner( conn );
+
+    Timer t;
+    BSONObj statsCmd = BSON( "collstats" << index_collection );
+
+    // Print header, unless we are generating a script (in that case, comment this out).
+    cout << "ops,milliseconds,docs,totalBucketSize" << endl;
+
+    long long i = 0;
+    long long n = 10000000000;
+    while( i < n ) {
+        runner.writeOne();
+        // Print statistics, unless we are generating a script (in that case, comment this out).
+        // The stats collection requests below provide regular read operations,
+        // ensuring we are caught up with the progress being made by the mongod
+        // under analysis.
+        if ( ++i % 50000 == 0 ) {
+            // The total number of documents present.
+            long long docs = conn.count( ns );
+            BSONObj result;
+            conn.runCommand( db, statsCmd, result );
+            // The total number of bytes used for all allocated 8K buckets of the
+            // btree.
+            long long totalBucketSize = result.getField( "count" ).numberLong() * 8192;
+            cout << i << ',' << t.millis() << ',' << docs << ',' << totalBucketSize << endl;
+        }
+    }
+}
diff --git a/dbtests/perf/perftest.cpp b/dbtests/perf/perftest.cpp
index f86a1c3..ef03551 100644
--- a/dbtests/perf/perftest.cpp
+++ b/dbtests/perf/perftest.cpp
@@ -74,14 +74,14 @@ public:
              << "}" << endl;
     }
     ~Runner() {
-        theFileAllocator().waitUntilFinished();
+        FileAllocator::get()->waitUntilFinished();
         client_->dropDatabase( testDb< T >().c_str() );
     }
 };
 
 class RunnerSuite : public Suite {
 public:
-    RunnerSuite( string name ) : Suite( name ){}
+    RunnerSuite( string name ) : Suite( name ) {}
 protected:
     template< class T >
     void add() {
@@ -168,9 +168,9 @@ namespace Insert {
 
     class All : public RunnerSuite {
     public:
-        All() : RunnerSuite( "insert" ){}
+        All() : RunnerSuite( "insert" ) {}
 
-        void setupTests(){
+        void setupTests() {
             add< IdIndex >();
             add< TwoIndex >();
             add< TenIndex >();
@@ -252,8 +252,8 @@ namespace Update {
 
     class All : public RunnerSuite {
     public:
-        All() : RunnerSuite( "update" ){}
-        void setupTests(){
+        All() : RunnerSuite( "update" ) {}
+        void setupTests() {
             add< Smaller >();
             add< Bigger >();
             add< Inc >();
@@ -266,33 +266,33 @@ namespace Update {
 namespace BSON {
 
     const char *sample =
-    "{\"one\":2, \"two\":5, \"three\": {},"
-    "\"four\": { \"five\": { \"six\" : 11 } },"
-    "\"seven\": [ \"a\", \"bb\", \"ccc\", 5 ],"
-    "\"eight\": Dbref( \"rrr\", \"01234567890123456789aaaa\" ),"
-    "\"_id\": ObjectId( \"deadbeefdeadbeefdeadbeef\" ),"
-    "\"nine\": { \"$binary\": \"abc=\", \"$type\": \"02\" },"
-    "\"ten\": Date( 44 ), \"eleven\": /foooooo/i }";
+        "{\"one\":2, \"two\":5, \"three\": {},"
+        "\"four\": { \"five\": { \"six\" : 11 } },"
+        "\"seven\": [ \"a\", \"bb\", \"ccc\", 5 ],"
+        "\"eight\": Dbref( \"rrr\", \"01234567890123456789aaaa\" ),"
+        "\"_id\": ObjectId( \"deadbeefdeadbeefdeadbeef\" ),"
+        "\"nine\": { \"$binary\": \"abc=\", \"$type\": \"02\" },"
+        "\"ten\": Date( 44 ), \"eleven\": /foooooo/i }";
 
     const char *shopwikiSample =
-    "{ '_id' : '289780-80f85380b5c1d4a0ad75d1217673a4a2' , 'site_id' : 289780 , 'title'"
-    ": 'Jubilee - Margaret Walker' , 'image_url' : 'http://www.heartlanddigsandfinds.c"
-    "om/store/graphics/Product_Graphics/Product_8679.jpg' , 'url' : 'http://www.heartla"
-    "nddigsandfinds.com/store/store_product_detail.cfm?Product_ID=8679&Category_ID=2&Su"
-    "b_Category_ID=910' , 'url_hash' : 3450626119933116345 , 'last_update' :  null  , '"
-    "features' : { '$imagePrefetchDate' : '2008Aug30 22:39' , '$image.color.rgb' : '5a7"
-    "574' , 'Price' : '$10.99' , 'Description' : 'Author--s 1st Novel. A Houghton Miffl"
-    "in Literary Fellowship Award novel by the esteemed poet and novelist who has demon"
-    "strated a lifelong commitment to the heritage of black culture. An acclaimed story"
-    "of Vyry, a negro slave during the 19th Century, facing the biggest challenge of h"
-    "er lifetime - that of gaining her freedom, fighting for all the things she had nev"
-    "er known before. The author, great-granddaughter of Vyry, reveals what the Civil W"
-    "ar in America meant to the Negroes. Slavery W' , '$priceHistory-1' : '2008Dec03 $1"
-    "0.99' , 'Brand' : 'Walker' , '$brands_in_title' : 'Walker' , '--path' : '//HTML[1]"
-    "/BODY[1]/TABLE[1]/TR[1]/TD[1]/P[1]/TABLE[1]/TR[1]/TD[1]/TABLE[1]/TR[2]/TD[2]/TABLE"
-    "[1]/TR[1]/TD[1]/P[1]/TABLE[1]/TR[1]' , '~location' : 'en_US' , '$crawled' : '2009J"
-    "an11 03:22' , '$priceHistory-2' : '2008Nov15 $10.99' , '$priceHistory-0' : '2008De"
-    "c24 $10.99'}}";
+        "{ '_id' : '289780-80f85380b5c1d4a0ad75d1217673a4a2' , 'site_id' : 289780 , 'title'"
+        ": 'Jubilee - Margaret Walker' , 'image_url' : 'http://www.heartlanddigsandfinds.c"
+        "om/store/graphics/Product_Graphics/Product_8679.jpg' , 'url' : 'http://www.heartla"
+        "nddigsandfinds.com/store/store_product_detail.cfm?Product_ID=8679&Category_ID=2&Su"
+        "b_Category_ID=910' , 'url_hash' : 3450626119933116345 , 'last_update' :  null  , '"
+        "features' : { '$imagePrefetchDate' : '2008Aug30 22:39' , '$image.color.rgb' : '5a7"
+        "574' , 'Price' : '$10.99' , 'Description' : 'Author--s 1st Novel. A Houghton Miffl"
+        "in Literary Fellowship Award novel by the esteemed poet and novelist who has demon"
+        "strated a lifelong commitment to the heritage of black culture. An acclaimed story"
+        "of Vyry, a negro slave during the 19th Century, facing the biggest challenge of h"
+        "er lifetime - that of gaining her freedom, fighting for all the things she had nev"
+        "er known before. The author, great-granddaughter of Vyry, reveals what the Civil W"
+        "ar in America meant to the Negroes. Slavery W' , '$priceHistory-1' : '2008Dec03 $1"
+        "0.99' , 'Brand' : 'Walker' , '$brands_in_title' : 'Walker' , '--path' : '//HTML[1]"
+        "/BODY[1]/TABLE[1]/TR[1]/TD[1]/P[1]/TABLE[1]/TR[1]/TD[1]/TABLE[1]/TR[2]/TD[2]/TABLE"
+        "[1]/TR[1]/TD[1]/P[1]/TABLE[1]/TR[1]' , '~location' : 'en_US' , '$crawled' : '2009J"
+        "an11 03:22' , '$priceHistory-2' : '2008Nov15 $10.99' , '$priceHistory-0' : '2008De"
+        "c24 $10.99'}}";
 
     class Parse {
     public:
@@ -332,8 +332,8 @@ namespace BSON {
 
     class All : public RunnerSuite {
     public:
-        All() : RunnerSuite( "bson" ){}
-        void setupTests(){
+        All() : RunnerSuite( "bson" ) {}
+        void setupTests() {
             add< Parse >();
             add< ShopwikiParse >();
             add< Json >();
@@ -402,8 +402,8 @@ namespace Index {
 
     class All : public RunnerSuite {
     public:
-        All() : RunnerSuite( "index" ){}
-        void setupTests(){
+        All() : RunnerSuite( "index" ) {}
+        void setupTests() {
             add< Int >();
             add< ObjectId >();
             add< String >();
@@ -435,7 +435,7 @@ namespace QueryTests {
         }
         void run() {
             client_->findOne( ns_.c_str(),
-                             QUERY( "a" << "b" ).hint( BSON( "_id" << 1 ) ) );
+                              QUERY( "a" << "b" ).hint( BSON( "_id" << 1 ) ) );
         }
         string ns_;
     };
@@ -465,7 +465,7 @@ namespace QueryTests {
         }
         void run() {
             auto_ptr< DBClientCursor > c =
-            client_->query( ns_.c_str(), Query( BSONObj() ).sort( BSON( "_id" << 1 ) ) );
+                client_->query( ns_.c_str(), Query( BSONObj() ).sort( BSON( "_id" << 1 ) ) );
             int i = 0;
             for( ; c->more(); c->nextSafe(), ++i );
             ASSERT_EQUALS( 50000, i );
@@ -481,7 +481,7 @@ namespace QueryTests {
         }
         void run() {
             auto_ptr< DBClientCursor > c =
-            client_->query( ns_.c_str(), Query( BSONObj() ).sort( BSON( "_id" << 1 ) ) );
+                client_->query( ns_.c_str(), Query( BSONObj() ).sort( BSON( "_id" << 1 ) ) );
             int i = 0;
             for( ; c->more(); c->nextSafe(), ++i );
             ASSERT_EQUALS( 50000, i );
@@ -541,8 +541,8 @@ namespace QueryTests {
 
     class All : public RunnerSuite {
     public:
-        All() : RunnerSuite( "query" ){}
-        void setupTests(){
+        All() : RunnerSuite( "query" ) {}
+        void setupTests() {
             add< NoMatch >();
             add< NoMatchIndex >();
             add< NoMatchLong >();
@@ -602,8 +602,8 @@ namespace Count {
 
     class All : public RunnerSuite {
     public:
-        All() : RunnerSuite( "count" ){}
-        void setupTests(){
+        All() : RunnerSuite( "count" ) {}
+        void setupTests() {
             add< Count >();
             add< CountIndex >();
             add< CountSimpleIndex >();
@@ -677,8 +677,8 @@ namespace Plan {
 
     class All : public RunnerSuite {
     public:
-        All() : RunnerSuite("plan" ){}
-        void setupTests(){
+        All() : RunnerSuite("plan" ) {}
+        void setupTests() {
             add< Hint >();
             add< Sort >();
             add< Query >();
diff --git a/dbtests/perftests.cpp b/dbtests/perftests.cpp
new file mode 100644
index 0000000..182595c
--- /dev/null
+++ b/dbtests/perftests.cpp
@@ -0,0 +1,336 @@
+/** @file perftests.cpp.cpp : unit tests relating to performance
+
+          The idea herein is tests that run fast and can be part of the normal CI suite.  So no tests herein that take
+          a long time to run.  Obviously we need those too, but they will be separate.
+
+          These tests use DBDirectClient; they are a bit white-boxish.
+*/
+
+/**
+ *    Copyright (C) 2008 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "../db/query.h"
+#include "../db/db.h"
+#include "../db/instance.h"
+#include "../db/json.h"
+#include "../db/lasterror.h"
+#include "../db/update.h"
+#include "../db/taskqueue.h"
+#include "../util/timer.h"
+#include "dbtests.h"
+#include "../db/dur_stats.h"
+
+namespace PerfTests {
+    typedef DBDirectClient DBClientType;
+    //typedef DBClientConnection DBClientType;
+
+    class ClientBase {
+    public:
+        // NOTE: Not bothering to backup the old error record.
+        ClientBase() {
+            //_client.connect("localhost");
+            mongo::lastError.reset( new LastError() );
+        }
+        virtual ~ClientBase() {
+            //mongo::lastError.release();
+        }
+    protected:
+        static void insert( const char *ns, BSONObj o ) {
+            _client.insert( ns, o );
+        }
+        static void update( const char *ns, BSONObj q, BSONObj o, bool upsert = 0 ) {
+            _client.update( ns, Query( q ), o, upsert );
+        }
+        static bool error() {
+            return !_client.getPrevError().getField( "err" ).isNull();
+        }
+        DBClientBase &client() const { return _client; }
+    private:
+        static DBClientType _client;
+    };
+    DBClientType ClientBase::_client;
+
+    // todo: use a couple threads. not a very good test yet.
+    class TaskQueueTest {
+        static int tot;
+        struct V {
+            int val;
+            static void go(const V &v) { tot += v.val; }
+        };
+    public:
+        void run() {
+            tot = 0;
+            TaskQueue<V> d;
+            int x = 0;
+            for( int i = 0; i < 100; i++ ) {
+                if( i % 30 == 0 )
+                    d.invoke();
+
+                x += i;
+                writelock lk;
+                V v;
+                v.val = i;
+                d.defer(v);
+            }
+            d.invoke();
+            assert( x == tot );
+        }
+    };
+    int TaskQueueTest::tot;
+
+    class CappedTest : public ClientBase {
+    };
+
+    class B : public ClientBase {
+        string _ns;
+    protected:
+        const char *ns() { return _ns.c_str(); }
+        virtual void prep() = 0;
+
+        virtual void timed() = 0;
+
+        // optional 2nd test phase to be timed separately
+        // return name of it
+        virtual const char * timed2() { return 0; }
+
+        virtual void post() { }
+        virtual string name() = 0;
+        virtual unsigned long long expectation() = 0;
+        virtual int howLongMillis() { return 5000; }
+    public:
+        void say(unsigned long long n, int ms, string s) {
+            cout << setw(36) << left << s << ' ' << right << setw(7) << n*1000/ms << "/sec   " << setw(4) << ms << "ms" << endl;
+            cout << dur::stats.curr->_asObj().toString() << endl;
+        }
+        void run() {
+            _ns = string("perftest.") + name();
+            client().dropCollection(ns());
+
+            prep();
+
+            int hlm = howLongMillis();
+
+            dur::stats._intervalMicros = 0; // no auto rotate
+            dur::stats.curr->reset();
+            Timer t;
+            unsigned long long n = 0;
+            const unsigned Batch = 50;
+            do {
+                unsigned i;
+                for( i = 0; i < Batch; i++ )
+                    timed();
+                n += i;
+            }
+            while( t.millis() < hlm );
+            client().getLastError(); // block until all ops are finished
+            int ms = t.millis();
+            say(n, ms, name());
+
+            if( n < expectation() ) {
+                cout << "\ntest " << name() << " seems slow n:" << n << " ops/sec but expect greater than:" << expectation() << endl;
+                cout << endl;
+            }
+
+            {
+                const char *test2name = timed2();
+                if( test2name ) {
+                    dur::stats.curr->reset();
+                    Timer t;
+                    unsigned long long n = 0;
+                    while( 1 ) {
+                        unsigned i;
+                        for( i = 0; i < Batch; i++ )
+                            timed2();
+                        n += i;
+                        if( t.millis() > hlm )
+                            break;
+                    }
+                    int ms = t.millis();
+                    say(n, ms, test2name);
+                }
+            }
+        }
+    };
+
+    class InsertDup : public B {
+        const BSONObj o;
+    public:
+        InsertDup() : o( BSON("_id" << 1) ) { } // dup keys
+        string name() {
+            return "insert duplicate _ids";
+        }
+        void prep() {
+            client().insert( ns(), o );
+        }
+        void timed() {
+            client().insert( ns(), o );
+        }
+        void post() {
+            assert( client().count(ns()) == 1 );
+        }
+        unsigned long long expectation() { return 1000; }
+    };
+
+    class Insert1 : public InsertDup {
+        const BSONObj x;
+    public:
+        Insert1() : x( BSON("x" << 99) ) { }
+        string name() { return "insert simple"; }
+        void timed() {
+            client().insert( ns(), x );
+        }
+        void post() {
+            assert( client().count(ns()) > 100 );
+        }
+        unsigned long long expectation() { return 1000; }
+    };
+
+    class InsertBig : public InsertDup {
+        BSONObj x;
+        virtual int howLongMillis() {
+            if( sizeof(void*) == 4 )
+                return 1000;  // could exceed mmapping if run too long, as this function adds a lot fasta
+            return 5000;
+        }
+    public:
+        InsertBig() {
+            char buf[200000];
+            BSONObjBuilder b;
+            b.append("x", 99);
+            b.appendBinData("bin", 200000, (BinDataType) 129, buf);
+            x = b.obj();
+        }
+        string name() { return "insert big"; }
+        void timed() {
+            client().insert( ns(), x );
+        }
+        unsigned long long expectation() { return 20; }
+    };
+
+    class InsertRandom : public B {
+    public:
+        string name() { return "random inserts"; }
+        void prep() {
+            client().insert( ns(), BSONObj() );
+            client().ensureIndex(ns(), BSON("x"<<1));
+        }
+        void timed() {
+            int x = rand();
+            BSONObj y = BSON("x" << x << "y" << rand() << "z" << 33);
+            client().insert(ns(), y);
+        }
+        void post() {
+        }
+        unsigned long long expectation() { return 1000; }
+    };
+
+    /** upserts about 32k records and then keeps updating them
+        2 indexes
+    */
+    class Update1 : public B {
+    public:
+        static int rand() {
+            return std::rand() & 0x7fff;
+        }
+        string name() { return "random upserts"; }
+        void prep() {
+            client().insert( ns(), BSONObj() );
+            client().ensureIndex(ns(), BSON("x"<<1));
+        }
+        void timed() {
+            int x = rand();
+            BSONObj q = BSON("x" << x);
+            BSONObj y = BSON("x" << x << "y" << rand() << "z" << 33);
+            client().update(ns(), q, y, /*upsert*/true);
+        }
+
+        const char * timed2() {
+            static BSONObj I = BSON( "$inc" << BSON( "y" << 1 ) );
+
+            // test some $inc's
+
+            int x = rand();
+            BSONObj q = BSON("x" << x);
+            client().update(ns(), q, I);
+
+            return "inc";
+        }
+
+        void post() {
+        }
+        unsigned long long expectation() { return 1000; }
+    };
+
+    template <typename T>
+    class MoreIndexes : public T {
+    public:
+        string name() { return T::name() + " with more indexes"; }
+        void prep() {
+            T::prep();
+            this->client().ensureIndex(this->ns(), BSON("y"<<1));
+            this->client().ensureIndex(this->ns(), BSON("z"<<1));
+        }
+    };
+
+    void t() {
+        for( int i = 0; i < 20; i++ ) {
+            sleepmillis(21);
+            string fn = "/tmp/t1";
+            MongoMMF f;
+            unsigned long long len = 1 * 1024 * 1024;
+            assert( f.create(fn, len, /*sequential*/rand()%2==0) );
+            {
+                char *p = (char *) f.getView();
+                assert(p);
+                // write something to the private view as a test
+                strcpy(p, "hello");
+            }
+            if( cmdLine.dur ) {
+                char *w = (char *) f.view_write();
+                strcpy(w + 6, "world");
+            }
+            MongoFileFinder ff;
+            ASSERT( ff.findByPath(fn) );
+        }
+    }
+
+    class All : public Suite {
+    public:
+        All() : Suite( "perf" )
+        {
+        }
+        ~All() { 
+        }
+        Result * run( const string& filter ) { 
+            boost::thread a(t);
+            Result * res = Suite::run(filter); 
+            a.join();
+            return res;
+        }
+
+        void setupTests() {
+            add< TaskQueueTest >();
+            add< InsertDup >();
+            add< Insert1 >();
+            add< InsertRandom >();
+            add< MoreIndexes<InsertRandom> >();
+            add< Update1 >();
+            add< MoreIndexes<Update1> >();
+            add< InsertBig >();
+        }
+    } myall;
+}
diff --git a/dbtests/queryoptimizertests.cpp b/dbtests/queryoptimizertests.cpp
index f5d1155..acf9217 100644
--- a/dbtests/queryoptimizertests.cpp
+++ b/dbtests/queryoptimizertests.cpp
@@ -27,12 +27,12 @@
 
 namespace mongo {
     extern BSONObj id_obj;
-    void runQuery(Message& m, QueryMessage& q, Message &response ){
+    void runQuery(Message& m, QueryMessage& q, Message &response ) {
         CurOp op( &(cc()) );
         op.ensureStarted();
         runQuery( m , q , op, response );
     }
-    void runQuery(Message& m, QueryMessage& q ){
+    void runQuery(Message& m, QueryMessage& q ) {
         Message response;
         runQuery( m, q, response );
     }
@@ -64,14 +64,14 @@ namespace QueryOptimizerTests {
                 }
             }
         };
-        
+
 
         class NumericBase : public Base {
         public:
-            NumericBase(){
+            NumericBase() {
                 o = BSON( "min" << -numeric_limits<double>::max() << "max" << numeric_limits<double>::max() );
             }
-            
+
             virtual BSONElement lower() { return o["min"]; }
             virtual BSONElement upper() { return o["max"]; }
         private:
@@ -81,7 +81,7 @@ namespace QueryOptimizerTests {
         class Empty : public Base {
             virtual BSONObj query() { return BSONObj(); }
         };
-        
+
         class Eq : public Base {
         public:
             Eq() : o_( BSON( "a" << 1 ) ) {}
@@ -94,7 +94,7 @@ namespace QueryOptimizerTests {
         class DupEq : public Eq {
         public:
             virtual BSONObj query() { return BSON( "a" << 1 << "b" << 2 << "a" << 1 ); }
-        };        
+        };
 
         class Lt : public NumericBase {
         public:
@@ -103,13 +103,13 @@ namespace QueryOptimizerTests {
             virtual BSONElement upper() { return o_.firstElement(); }
             virtual bool upperInclusive() { return false; }
             BSONObj o_;
-        };        
+        };
 
         class Lte : public Lt {
-            virtual BSONObj query() { return BSON( "a" << LTE << 1 ); }            
+            virtual BSONObj query() { return BSON( "a" << LTE << 1 ); }
             virtual bool upperInclusive() { return true; }
         };
-        
+
         class Gt : public NumericBase {
         public:
             Gt() : o_( BSON( "-" << 1 ) ) {}
@@ -117,23 +117,23 @@ namespace QueryOptimizerTests {
             virtual BSONElement lower() { return o_.firstElement(); }
             virtual bool lowerInclusive() { return false; }
             BSONObj o_;
-        };        
-        
+        };
+
         class Gte : public Gt {
-            virtual BSONObj query() { return BSON( "a" << GTE << 1 ); }            
+            virtual BSONObj query() { return BSON( "a" << GTE << 1 ); }
             virtual bool lowerInclusive() { return true; }
         };
-        
+
         class TwoLt : public Lt {
-            virtual BSONObj query() { return BSON( "a" << LT << 1 << LT << 5 ); }                        
+            virtual BSONObj query() { return BSON( "a" << LT << 1 << LT << 5 ); }
         };
 
         class TwoGt : public Gt {
-            virtual BSONObj query() { return BSON( "a" << GT << 0 << GT << 1 ); }                        
-        };        
+            virtual BSONObj query() { return BSON( "a" << GT << 0 << GT << 1 ); }
+        };
 
         class EqGte : public Eq {
-            virtual BSONObj query() { return BSON( "a" << 1 << "a" << GTE << 1 ); }            
+            virtual BSONObj query() { return BSON( "a" << 1 << "a" << GTE << 1 ); }
         };
 
         class EqGteInvalid {
@@ -142,7 +142,7 @@ namespace QueryOptimizerTests {
                 FieldRangeSet fbs( "ns", BSON( "a" << 1 << "a" << GTE << 2 ) );
                 ASSERT( !fbs.matchPossible() );
             }
-        };        
+        };
 
         struct RegexBase : Base {
             void run() { //need to only look at first interval
@@ -166,7 +166,7 @@ namespace QueryOptimizerTests {
             virtual BSONElement upper() { return o2_.firstElement(); }
             virtual bool upperInclusive() { return false; }
             BSONObj o1_, o2_;
-        };        
+        };
 
         class RegexObj : public RegexBase {
         public:
@@ -177,7 +177,7 @@ namespace QueryOptimizerTests {
             virtual bool upperInclusive() { return false; }
             BSONObj o1_, o2_;
         };
-        
+
         class UnhelpfulRegex : public RegexBase {
         public:
             UnhelpfulRegex() {
@@ -191,13 +191,13 @@ namespace QueryOptimizerTests {
                 BSONObjBuilder b;
                 b.appendRegex( "a", "abc" );
                 return b.obj();
-            }            
+            }
             virtual BSONElement lower() { return limits["lower"]; }
             virtual BSONElement upper() { return limits["upper"]; }
             virtual bool upperInclusive() { return false; }
             BSONObj limits;
         };
-        
+
         class In : public Base {
         public:
             In() : o1_( BSON( "-" << -3 ) ), o2_( BSON( "-" << 44 ) ) {}
@@ -219,7 +219,7 @@ namespace QueryOptimizerTests {
             virtual BSONElement upper() { return o2_.firstElement(); }
             BSONObj o1_, o2_;
         };
-        
+
         class Equality {
         public:
             void run() {
@@ -237,7 +237,7 @@ namespace QueryOptimizerTests {
                 ASSERT( !s6.range( "a" ).equality() );
             }
         };
-        
+
         class SimplifiedQuery {
         public:
             void run() {
@@ -251,7 +251,7 @@ namespace QueryOptimizerTests {
                 ASSERT( !simple.getObjectField( "e" ).woCompare( fromjson( "{$gte:0,$lte:10}" ) ) );
             }
         };
-        
+
         class QueryPatternTest {
         public:
             void run() {
@@ -277,14 +277,14 @@ namespace QueryOptimizerTests {
                 return FieldRangeSet( "", query ).pattern( sort );
             }
         };
-        
+
         class NoWhere {
         public:
             void run() {
                 ASSERT_EQUALS( 0, FieldRangeSet( "ns", BSON( "$where" << 1 ) ).nNontrivialRanges() );
             }
         };
-        
+
         class Numeric {
         public:
             void run() {
@@ -311,29 +311,39 @@ namespace QueryOptimizerTests {
                 ASSERT( f.range( "a" ).max().woCompare( BSON( "a" << 3.0 ).firstElement(), false ) == 0 );
             }
         };
-		
-		class MultiBound {
-		public:
-			void run() {
+
+        class UnionBound {
+        public:
+            void run() {
+                FieldRangeSet frs( "", fromjson( "{a:{$gt:1,$lt:9},b:{$gt:9,$lt:12}}" ) );
+                FieldRange ret = frs.range( "a" );
+                ret |= frs.range( "b" );
+                ASSERT_EQUALS( 2U, ret.intervals().size() );
+            }
+        };
+
+        class MultiBound {
+        public:
+            void run() {
                 FieldRangeSet frs1( "", fromjson( "{a:{$in:[1,3,5,7,9]}}" ) );
                 FieldRangeSet frs2( "", fromjson( "{a:{$in:[2,3,5,8,9]}}" ) );
-				FieldRange fr1 = frs1.range( "a" );
-				FieldRange fr2 = frs2.range( "a" );
-				fr1 &= fr2;
+                FieldRange fr1 = frs1.range( "a" );
+                FieldRange fr2 = frs2.range( "a" );
+                fr1 &= fr2;
                 ASSERT( fr1.min().woCompare( BSON( "a" << 3.0 ).firstElement(), false ) == 0 );
                 ASSERT( fr1.max().woCompare( BSON( "a" << 9.0 ).firstElement(), false ) == 0 );
-				vector< FieldInterval > intervals = fr1.intervals();
-				vector< FieldInterval >::const_iterator j = intervals.begin();
-				double expected[] = { 3, 5, 9 };
-				for( int i = 0; i < 3; ++i, ++j ) {
-					ASSERT_EQUALS( expected[ i ], j->_lower._bound.number() );
-					ASSERT( j->_lower._inclusive );
-					ASSERT( j->_lower == j->_upper );
-				}
-				ASSERT( j == intervals.end() );
-			}
-		};
-        
+                vector< FieldInterval > intervals = fr1.intervals();
+                vector< FieldInterval >::const_iterator j = intervals.begin();
+                double expected[] = { 3, 5, 9 };
+                for( int i = 0; i < 3; ++i, ++j ) {
+                    ASSERT_EQUALS( expected[ i ], j->_lower._bound.number() );
+                    ASSERT( j->_lower._inclusive );
+                    ASSERT( j->_lower == j->_upper );
+                }
+                ASSERT( j == intervals.end() );
+            }
+        };
+
         class DiffBase {
         public:
             virtual ~DiffBase() {}
@@ -341,7 +351,7 @@ namespace QueryOptimizerTests {
                 FieldRangeSet frs( "", fromjson( obj().toString() ) );
                 FieldRange ret = frs.range( "a" );
                 ret -= frs.range( "b" );
-                check( ret );                
+                check( ret );
             }
         protected:
             void check( const FieldRange &fr ) {
@@ -366,7 +376,7 @@ namespace QueryOptimizerTests {
         class TwoRangeBase : public DiffBase {
         public:
             TwoRangeBase( string obj, int low, int high, bool lowI, bool highI )
-            : _obj( obj ) {
+                : _obj( obj ) {
                 _n[ 0 ] = low;
                 _n[ 1 ] = high;
                 _b[ 0 ] = lowI;
@@ -381,7 +391,7 @@ namespace QueryOptimizerTests {
             int _n[ 2 ];
             bool _b[ 2 ];
         };
-        
+
         struct Diff1 : public TwoRangeBase {
             Diff1() : TwoRangeBase( "{a:{$gt:1,$lt:2},b:{$gt:3,$lt:4}}", 1, 2, false, false ) {}
         };
@@ -389,7 +399,7 @@ namespace QueryOptimizerTests {
         struct Diff2 : public TwoRangeBase {
             Diff2() : TwoRangeBase( "{a:{$gt:1,$lt:2},b:{$gt:2,$lt:4}}", 1, 2, false, false ) {}
         };
-        
+
         struct Diff3 : public TwoRangeBase {
             Diff3() : TwoRangeBase( "{a:{$gt:1,$lte:2},b:{$gt:2,$lt:4}}", 1, 2, false, true ) {}
         };
@@ -397,11 +407,11 @@ namespace QueryOptimizerTests {
         struct Diff4 : public TwoRangeBase {
             Diff4() : TwoRangeBase( "{a:{$gt:1,$lt:2},b:{$gte:2,$lt:4}}", 1, 2, false, false) {}
         };
-        
+
         struct Diff5 : public TwoRangeBase {
             Diff5() : TwoRangeBase( "{a:{$gt:1,$lte:2},b:{$gte:2,$lt:4}}", 1, 2, false, false) {}
         };
-        
+
         struct Diff6 : public TwoRangeBase {
             Diff6() : TwoRangeBase( "{a:{$gt:1,$lte:3},b:{$gte:2,$lt:4}}", 1, 2, false, false) {}
         };
@@ -409,7 +419,7 @@ namespace QueryOptimizerTests {
         struct Diff7 : public TwoRangeBase {
             Diff7() : TwoRangeBase( "{a:{$gt:1,$lte:3},b:{$gt:2,$lt:4}}", 1, 2, false, true) {}
         };
-        
+
         struct Diff8 : public TwoRangeBase {
             Diff8() : TwoRangeBase( "{a:{$gt:1,$lt:4},b:{$gt:2,$lt:4}}", 1, 2, false, true) {}
         };
@@ -420,22 +430,45 @@ namespace QueryOptimizerTests {
 
         struct Diff10 : public TwoRangeBase {
             Diff10() : TwoRangeBase( "{a:{$gt:1,$lte:4},b:{$gt:2,$lte:4}}", 1, 2, false, true) {}
-        };        
-        
-        struct Diff11 : public TwoRangeBase {
-            Diff11() : TwoRangeBase( "{a:{$gt:1,$lte:4},b:{$gt:2,$lt:4}}", 1, 4, false, true) {}
         };
 
-        struct Diff12 : public TwoRangeBase {
-            Diff12() : TwoRangeBase( "{a:{$gt:1,$lt:5},b:{$gt:2,$lt:4}}", 1, 5, false, false) {}
+        class SplitRangeBase : public DiffBase {
+        public:
+            SplitRangeBase( string obj, int low1, bool low1I, int high1, bool high1I, int low2, bool low2I, int high2, bool high2I )
+                : _obj( obj ) {
+                _n[ 0 ] = low1;
+                _n[ 1 ] = high1;
+                _n[ 2 ] = low2;
+                _n[ 3 ] = high2;
+                _b[ 0 ] = low1I;
+                _b[ 1 ] = high1I;
+                _b[ 2 ] = low2I;
+                _b[ 3 ] = high2I;
+            }
+        private:
+            virtual unsigned len() const { return 2; }
+            virtual const int *nums() const { return _n; }
+            virtual const bool *incs() const { return _b; }
+            virtual BSONObj obj() const { return fromjson( _obj ); }
+            string _obj;
+            int _n[ 4 ];
+            bool _b[ 4 ];
+        };
+
+        struct Diff11 : public SplitRangeBase {
+            Diff11() : SplitRangeBase( "{a:{$gt:1,$lte:4},b:{$gt:2,$lt:4}}", 1, false, 2, true, 4, true, 4, true) {}
+        };
+
+        struct Diff12 : public SplitRangeBase {
+            Diff12() : SplitRangeBase( "{a:{$gt:1,$lt:5},b:{$gt:2,$lt:4}}", 1, false, 2, true, 4, true, 5, false) {}
         };
-        
+
         struct Diff13 : public TwoRangeBase {
             Diff13() : TwoRangeBase( "{a:{$gt:1,$lt:5},b:{$gt:1,$lt:4}}", 4, 5, true, false) {}
         };
-        
-        struct Diff14 : public TwoRangeBase {
-            Diff14() : TwoRangeBase( "{a:{$gte:1,$lt:5},b:{$gt:1,$lt:4}}", 1, 5, true, false) {}
+
+        struct Diff14 : public SplitRangeBase {
+            Diff14() : SplitRangeBase( "{a:{$gte:1,$lt:5},b:{$gt:1,$lt:4}}", 1, true, 1, true, 4, true, 5, false) {}
         };
 
         struct Diff15 : public TwoRangeBase {
@@ -481,7 +514,7 @@ namespace QueryOptimizerTests {
         struct Diff25 : public TwoRangeBase {
             Diff25() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:0}", 1, 5, true, true) {}
         };
-        
+
         struct Diff26 : public TwoRangeBase {
             Diff26() : TwoRangeBase( "{a:{$gt:1,$lte:5},b:1}", 1, 5, false, true) {}
         };
@@ -490,14 +523,14 @@ namespace QueryOptimizerTests {
             Diff27() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:1}", 1, 5, false, true) {}
         };
 
-        struct Diff28 : public TwoRangeBase {
-            Diff28() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:3}", 1, 5, true, true) {}
+        struct Diff28 : public SplitRangeBase {
+            Diff28() : SplitRangeBase( "{a:{$gte:1,$lte:5},b:3}", 1, true, 3, false, 3, false, 5, true) {}
         };
 
         struct Diff29 : public TwoRangeBase {
             Diff29() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:5}", 1, 5, true, false) {}
         };
-        
+
         struct Diff30 : public TwoRangeBase {
             Diff30() : TwoRangeBase( "{a:{$gte:1,$lt:5},b:5}", 1, 5, true, false) {}
         };
@@ -505,7 +538,7 @@ namespace QueryOptimizerTests {
         struct Diff31 : public TwoRangeBase {
             Diff31() : TwoRangeBase( "{a:{$gte:1,$lt:5},b:6}", 1, 5, true, false) {}
         };
-        
+
         struct Diff32 : public TwoRangeBase {
             Diff32() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:6}", 1, 5, true, true) {}
         };
@@ -513,7 +546,7 @@ namespace QueryOptimizerTests {
         class EmptyBase : public DiffBase {
         public:
             EmptyBase( string obj )
-            : _obj( obj ) {}
+                : _obj( obj ) {}
         private:
             virtual unsigned len() const { return 0; }
             virtual const int *nums() const { return 0; }
@@ -521,7 +554,7 @@ namespace QueryOptimizerTests {
             virtual BSONObj obj() const { return fromjson( _obj ); }
             string _obj;
         };
-                
+
         struct Diff33 : public EmptyBase {
             Diff33() : EmptyBase( "{a:{$gte:1,$lte:5},b:{$gt:0,$lt:6}}" ) {}
         };
@@ -553,7 +586,7 @@ namespace QueryOptimizerTests {
         struct Diff40 : public EmptyBase {
             Diff40() : EmptyBase( "{a:{$gt:1,$lte:5},b:{$gt:0,$lte:5}}" ) {}
         };
-        
+
         struct Diff41 : public TwoRangeBase {
             Diff41() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:{$gt:0,$lt:5}}", 5, 5, true, true ) {}
         };
@@ -606,8 +639,8 @@ namespace QueryOptimizerTests {
             Diff53() : EmptyBase( "{a:{$gte:1,$lt:5},b:{$gte:1,$lte:5}}" ) {}
         };
 
-        struct Diff54 : public TwoRangeBase {
-            Diff54() : TwoRangeBase( "{a:{$gte:1,$lte:5},b:{$gt:1,$lt:5}}", 1, 5, true, true ) {}
+        struct Diff54 : public SplitRangeBase {
+            Diff54() : SplitRangeBase( "{a:{$gte:1,$lte:5},b:{$gt:1,$lt:5}}", 1, true, 1, true, 5, true, 5, true ) {}
         };
 
         struct Diff55 : public TwoRangeBase {
@@ -621,7 +654,7 @@ namespace QueryOptimizerTests {
         struct Diff57 : public EmptyBase {
             Diff57() : EmptyBase( "{a:{$gte:1,$lte:5},b:{$gte:1,$lte:5}}" ) {}
         };
-        
+
         struct Diff58 : public TwoRangeBase {
             Diff58() : TwoRangeBase( "{a:1,b:{$gt:1,$lt:5}}", 1, 1, true, true ) {}
         };
@@ -645,7 +678,11 @@ namespace QueryOptimizerTests {
         struct Diff63 : public EmptyBase {
             Diff63() : EmptyBase( "{a:5,b:5}" ) {}
         };
-        
+
+        struct Diff64 : public TwoRangeBase {
+            Diff64() : TwoRangeBase( "{a:{$gte:1,$lte:2},b:{$gt:0,$lte:1}}", 1, 2, false, true ) {}
+        };
+
         class DiffMulti1 : public DiffBase {
         public:
             void run() {
@@ -656,12 +693,12 @@ namespace QueryOptimizerTests {
                 other |= frs.range( "d" );
                 other |= frs.range( "e" );
                 ret -= other;
-                check( ret );                
+                check( ret );
             }
         protected:
-            virtual unsigned len() const { return 1; }
-            virtual const int *nums() const { static int n[] = { 2, 7 }; return n; }
-            virtual const bool *incs() const { static bool b[] = { true, true }; return b; }
+            virtual unsigned len() const { return 3; }
+            virtual const int *nums() const { static int n[] = { 2, 3, 3, 4, 5, 7 }; return n; }
+            virtual const bool *incs() const { static bool b[] = { true, false, false, true, true, true }; return b; }
             virtual BSONObj obj() const { return BSONObj(); }
         };
 
@@ -675,7 +712,7 @@ namespace QueryOptimizerTests {
                 ret |= frs.range( "d" );
                 ret |= frs.range( "e" );
                 ret -= mask;
-                check( ret );                
+                check( ret );
             }
         protected:
             virtual unsigned len() const { return 2; }
@@ -683,7 +720,7 @@ namespace QueryOptimizerTests {
             virtual const bool *incs() const { static bool b[] = { false, true, true, false }; return b; }
             virtual BSONObj obj() const { return BSONObj(); }
         };
-        
+
         class SetIntersect {
         public:
             void run() {
@@ -693,9 +730,9 @@ namespace QueryOptimizerTests {
                 ASSERT_EQUALS( fromjson( "{a:1,b:5,c:7,d:{$gte:8,$lte:9},e:10}" ), frs1.simplifiedQuery( BSONObj() ) );
             }
         };
-        
+
     } // namespace FieldRangeTests
-    
+
     namespace QueryPlanTests {
         class Base {
         public:
@@ -742,23 +779,25 @@ namespace QueryOptimizerTests {
             static DBDirectClient client_;
         };
         DBDirectClient Base::client_;
-        
+
         // There's a limit of 10 indexes total, make sure not to exceed this in a given test.
 #define INDEXNO(x) nsd()->idxNo( *this->index( BSON(x) ) )
 #define INDEX(x) this->index( BSON(x) )
         auto_ptr< FieldRangeSet > FieldRangeSet_GLOBAL;
 #define FBS(x) ( FieldRangeSet_GLOBAL.reset( new FieldRangeSet( ns(), x ) ), *FieldRangeSet_GLOBAL )
-        
+        auto_ptr< FieldRangeSet > FieldRangeSet_GLOBAL2;
+#define FBS2(x) ( FieldRangeSet_GLOBAL2.reset( new FieldRangeSet( ns(), x ) ), *FieldRangeSet_GLOBAL2 )
+
         class NoIndex : public Base {
         public:
             void run() {
-                QueryPlan p( nsd(), -1, FBS( BSONObj() ), BSONObj(), BSONObj() );
+                QueryPlan p( nsd(), -1, FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSONObj() );
                 ASSERT( !p.optimal() );
                 ASSERT( !p.scanAndOrderRequired() );
                 ASSERT( !p.exactKeyMatch() );
             }
         };
-        
+
         class SimpleOrder : public Base {
         public:
             void run() {
@@ -768,43 +807,43 @@ namespace QueryOptimizerTests {
                 BSONObjBuilder b2;
                 b2.appendMaxKey( "" );
                 BSONObj end = b2.obj();
-                
-                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
+
+                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
                 ASSERT( !p.scanAndOrderRequired() );
                 ASSERT( !startKey( p ).woCompare( start ) );
                 ASSERT( !endKey( p ).woCompare( end ) );
-                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << 1 ) );
+                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << 1 ) );
                 ASSERT( !p2.scanAndOrderRequired() );
-                QueryPlan p3( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "b" << 1 ) );
+                QueryPlan p3( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "b" << 1 ) );
                 ASSERT( p3.scanAndOrderRequired() );
                 ASSERT( !startKey( p3 ).woCompare( start ) );
                 ASSERT( !endKey( p3 ).woCompare( end ) );
             }
         };
-        
+
         class MoreIndexThanNeeded : public Base {
         public:
             void run() {
-                QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
-                ASSERT( !p.scanAndOrderRequired() );                
+                QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
+                ASSERT( !p.scanAndOrderRequired() );
             }
         };
-        
+
         class IndexSigns : public Base {
         public:
             void run() {
-                QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << -1 ) , FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << -1 ) );
-                ASSERT( !p.scanAndOrderRequired() );                
+                QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << -1 ) , FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << -1 ) );
+                ASSERT( !p.scanAndOrderRequired() );
                 ASSERT_EQUALS( 1, p.direction() );
-                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << -1 ) );
-                ASSERT( p2.scanAndOrderRequired() );                
+                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << -1 ) );
+                ASSERT( p2.scanAndOrderRequired() );
                 ASSERT_EQUALS( 0, p2.direction() );
-                QueryPlan p3( nsd(), indexno( id_obj ), FBS( BSONObj() ), BSONObj(), BSON( "_id" << 1 ) );
+                QueryPlan p3( nsd(), indexno( id_obj ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "_id" << 1 ) );
                 ASSERT( !p3.scanAndOrderRequired() );
                 ASSERT_EQUALS( 1, p3.direction() );
-            }            
+            }
         };
-        
+
         class IndexReverse : public Base {
         public:
             void run() {
@@ -816,18 +855,18 @@ namespace QueryOptimizerTests {
                 b2.appendMaxKey( "" );
                 b2.appendMinKey( "" );
                 BSONObj end = b2.obj();
-                QueryPlan p( nsd(),  INDEXNO( "a" << -1 << "b" << 1 ),FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << -1 ) );
-                ASSERT( !p.scanAndOrderRequired() );                
+                QueryPlan p( nsd(),  INDEXNO( "a" << -1 << "b" << 1 ),FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 << "b" << -1 ) );
+                ASSERT( !p.scanAndOrderRequired() );
                 ASSERT_EQUALS( -1, p.direction() );
                 ASSERT( !startKey( p ).woCompare( start ) );
                 ASSERT( !endKey( p ).woCompare( end ) );
-                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << -1 << "b" << -1 ) );
-                ASSERT( !p2.scanAndOrderRequired() );                
+                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << -1 << "b" << -1 ) );
+                ASSERT( !p2.scanAndOrderRequired() );
                 ASSERT_EQUALS( -1, p2.direction() );
-                QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << -1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << -1 << "b" << -1 ) );
-                ASSERT( p3.scanAndOrderRequired() );                
+                QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << -1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << -1 << "b" << -1 ) );
+                ASSERT( p3.scanAndOrderRequired() );
                 ASSERT_EQUALS( 0, p3.direction() );
-            }                        
+            }
         };
 
         class NoOrder : public Base {
@@ -841,143 +880,143 @@ namespace QueryOptimizerTests {
                 b2.append( "", 3 );
                 b2.appendMaxKey( "" );
                 BSONObj end = b2.obj();
-                QueryPlan p( nsd(), INDEXNO( "a" << -1 << "b" << 1 ), FBS( BSON( "a" << 3 ) ), BSON( "a" << 3 ), BSONObj() );
-                ASSERT( !p.scanAndOrderRequired() );                
+                QueryPlan p( nsd(), INDEXNO( "a" << -1 << "b" << 1 ), FBS( BSON( "a" << 3 ) ), FBS2( BSON( "a" << 3 ) ), BSON( "a" << 3 ), BSONObj() );
+                ASSERT( !p.scanAndOrderRequired() );
                 ASSERT( !startKey( p ).woCompare( start ) );
                 ASSERT( !endKey( p ).woCompare( end ) );
-                QueryPlan p2( nsd(), INDEXNO( "a" << -1 << "b" << 1 ), FBS( BSON( "a" << 3 ) ), BSON( "a" << 3 ), BSONObj() );
-                ASSERT( !p2.scanAndOrderRequired() );                
+                QueryPlan p2( nsd(), INDEXNO( "a" << -1 << "b" << 1 ), FBS( BSON( "a" << 3 ) ), FBS2( BSON( "a" << 3 ) ), BSON( "a" << 3 ), BSONObj() );
+                ASSERT( !p2.scanAndOrderRequired() );
                 ASSERT( !startKey( p ).woCompare( start ) );
                 ASSERT( !endKey( p ).woCompare( end ) );
-            }            
+            }
         };
-        
+
         class EqualWithOrder : public Base {
         public:
             void run() {
-                QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 4 ) ), BSON( "a" << 4 ), BSON( "b" << 1 ) );
-                ASSERT( !p.scanAndOrderRequired() );                
-                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "b" << 4 ) ), BSON( "b" << 4 ), BSON( "a" << 1 << "c" << 1 ) );
-                ASSERT( !p2.scanAndOrderRequired() );                
-                QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 4 ) ), BSON( "b" << 4 ), BSON( "a" << 1 << "c" << 1 ) );
-                ASSERT( p3.scanAndOrderRequired() );                
+                QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 4 ) ), FBS2( BSON( "a" << 4 ) ), BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                ASSERT( !p.scanAndOrderRequired() );
+                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "b" << 4 ) ), FBS2( BSON( "b" << 4 ) ), BSON( "b" << 4 ), BSON( "a" << 1 << "c" << 1 ) );
+                ASSERT( !p2.scanAndOrderRequired() );
+                QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 4 ) ), FBS2( BSON( "b" << 4 ) ), BSON( "b" << 4 ), BSON( "a" << 1 << "c" << 1 ) );
+                ASSERT( p3.scanAndOrderRequired() );
             }
         };
-        
+
         class Optimal : public Base {
         public:
             void run() {
-                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
+                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
                 ASSERT( p.optimal() );
-                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
+                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
                 ASSERT( p2.optimal() );
-                QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 ) ), BSON( "a" << 1 ), BSON( "a" << 1 ) );
+                QueryPlan p3( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 ) ), FBS2( BSON( "a" << 1 ) ), BSON( "a" << 1 ), BSON( "a" << 1 ) );
                 ASSERT( p3.optimal() );
-                QueryPlan p4( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), BSON( "b" << 1 ), BSON( "a" << 1 ) );
+                QueryPlan p4( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), FBS2( BSON( "b" << 1 ) ), BSON( "b" << 1 ), BSON( "a" << 1 ) );
                 ASSERT( !p4.optimal() );
-                QueryPlan p5( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 ) ), BSON( "a" << 1 ), BSON( "b" << 1 ) );
+                QueryPlan p5( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 ) ), FBS2( BSON( "a" << 1 ) ), BSON( "a" << 1 ), BSON( "b" << 1 ) );
                 ASSERT( p5.optimal() );
-                QueryPlan p6( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), BSON( "b" << 1 ), BSON( "b" << 1 ) );
+                QueryPlan p6( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), FBS2( BSON( "b" << 1 ) ), BSON( "b" << 1 ), BSON( "b" << 1 ) );
                 ASSERT( !p6.optimal() );
-                QueryPlan p7( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 << "b" << 1 ) ), BSON( "a" << 1 << "b" << 1 ), BSON( "a" << 1 ) );
+                QueryPlan p7( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 << "b" << 1 ) ), FBS2( BSON( "a" << 1 << "b" << 1 ) ), BSON( "a" << 1 << "b" << 1 ), BSON( "a" << 1 ) );
                 ASSERT( p7.optimal() );
-                QueryPlan p8( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 << "b" << LT << 1 ), BSON( "a" << 1 )  );
+                QueryPlan p8( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), FBS2( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 << "b" << LT << 1 ), BSON( "a" << 1 )  );
                 ASSERT( p8.optimal() );
-                QueryPlan p9( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 << "b" << LT << 1 ), BSON( "a" << 1 ) );
+                QueryPlan p9( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), FBS2( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 << "b" << LT << 1 ), BSON( "a" << 1 ) );
                 ASSERT( p9.optimal() );
             }
         };
-        
+
         class MoreOptimal : public Base {
         public:
             void run() {
-                 QueryPlan p10( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 ) ), BSON( "a" << 1 ), BSONObj() );
-                 ASSERT( p10.optimal() );
-                 QueryPlan p11( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 << "b" << LT << 1 ), BSONObj() );
-                 ASSERT( p11.optimal() );
-                 QueryPlan p12( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << LT << 1 ) ), BSON( "a" << LT << 1 ), BSONObj() );
-                 ASSERT( p12.optimal() );
-                 QueryPlan p13( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << LT << 1 ) ), BSON( "a" << LT << 1 ), BSON( "a" << 1 ) );
-                 ASSERT( p13.optimal() );
+                QueryPlan p10( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 ) ), FBS2( BSON( "a" << 1 ) ), BSON( "a" << 1 ), BSONObj() );
+                ASSERT( p10.optimal() );
+                QueryPlan p11( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << 1 << "b" << LT << 1 ) ), FBS2( BSON( "a" << 1 << "b" << LT << 1 ) ), BSON( "a" << 1 << "b" << LT << 1 ), BSONObj() );
+                ASSERT( p11.optimal() );
+                QueryPlan p12( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << LT << 1 ) ), FBS2( BSON( "a" << LT << 1 ) ), BSON( "a" << LT << 1 ), BSONObj() );
+                ASSERT( p12.optimal() );
+                QueryPlan p13( nsd(), INDEXNO( "a" << 1 << "b" << 1 << "c" << 1 ), FBS( BSON( "a" << LT << 1 ) ), FBS2( BSON( "a" << LT << 1 ) ), BSON( "a" << LT << 1 ), BSON( "a" << 1 ) );
+                ASSERT( p13.optimal() );
             }
         };
-        
+
         class KeyMatch : public Base {
         public:
             void run() {
-                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
+                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
                 ASSERT( !p.exactKeyMatch() );
-                QueryPlan p2( nsd(), INDEXNO( "b" << 1 << "a" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
+                QueryPlan p2( nsd(), INDEXNO( "b" << 1 << "a" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
                 ASSERT( !p2.exactKeyMatch() );
-                QueryPlan p3( nsd(), INDEXNO( "b" << 1 << "a" << 1 ), FBS( BSON( "b" << "z" ) ), BSON( "b" << "z" ), BSON( "a" << 1 ) );
+                QueryPlan p3( nsd(), INDEXNO( "b" << 1 << "a" << 1 ), FBS( BSON( "b" << "z" ) ), FBS2( BSON( "b" << "z" ) ), BSON( "b" << "z" ), BSON( "a" << 1 ) );
                 ASSERT( !p3.exactKeyMatch() );
-                QueryPlan p4( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << "y" << "b" << "z" ) ), BSON( "c" << "y" << "b" << "z" ), BSON( "a" << 1 ) );
+                QueryPlan p4( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << "y" << "b" << "z" ) ), FBS2( BSON( "c" << "y" << "b" << "z" ) ), BSON( "c" << "y" << "b" << "z" ), BSON( "a" << 1 ) );
                 ASSERT( !p4.exactKeyMatch() );
-                QueryPlan p5( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << "y" << "b" << "z" ) ), BSON( "c" << "y" << "b" << "z" ), BSONObj() );
+                QueryPlan p5( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << "y" << "b" << "z" ) ), FBS2( BSON( "c" << "y" << "b" << "z" ) ), BSON( "c" << "y" << "b" << "z" ), BSONObj() );
                 ASSERT( !p5.exactKeyMatch() );
-                QueryPlan p6( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << LT << "y" << "b" << GT << "z" ) ), BSON( "c" << LT << "y" << "b" << GT << "z" ), BSONObj() );
+                QueryPlan p6( nsd(), INDEXNO( "b" << 1 << "a" << 1 << "c" << 1 ), FBS( BSON( "c" << LT << "y" << "b" << GT << "z" ) ), FBS2( BSON( "c" << LT << "y" << "b" << GT << "z" ) ), BSON( "c" << LT << "y" << "b" << GT << "z" ), BSONObj() );
                 ASSERT( !p6.exactKeyMatch() );
-                QueryPlan p7( nsd(), INDEXNO( "b" << 1 ), FBS( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
+                QueryPlan p7( nsd(), INDEXNO( "b" << 1 ), FBS( BSONObj() ), FBS2( BSONObj() ), BSONObj(), BSON( "a" << 1 ) );
                 ASSERT( !p7.exactKeyMatch() );
-                QueryPlan p8( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << "y" << "a" << "z" ) ), BSON( "b" << "y" << "a" << "z" ), BSONObj() );
+                QueryPlan p8( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << "y" << "a" << "z" ) ), FBS2( BSON( "b" << "y" << "a" << "z" ) ), BSON( "b" << "y" << "a" << "z" ), BSONObj() );
                 ASSERT( p8.exactKeyMatch() );
-                QueryPlan p9( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "z" ) ), BSON( "a" << "z" ), BSON( "a" << 1 ) );
+                QueryPlan p9( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "z" ) ), FBS2( BSON( "a" << "z" ) ), BSON( "a" << "z" ), BSON( "a" << 1 ) );
                 ASSERT( p9.exactKeyMatch() );
             }
         };
-        
+
         class MoreKeyMatch : public Base {
         public:
             void run() {
-                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "r" << "b" << NE << "q" ) ), BSON( "a" << "r" << "b" << NE << "q" ), BSON( "a" << 1 ) );
-                ASSERT( !p.exactKeyMatch() );                
+                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "r" << "b" << NE << "q" ) ), FBS2( BSON( "a" << "r" << "b" << NE << "q" ) ), BSON( "a" << "r" << "b" << NE << "q" ), BSON( "a" << 1 ) );
+                ASSERT( !p.exactKeyMatch() );
             }
         };
-        
+
         class ExactKeyQueryTypes : public Base {
         public:
             void run() {
-                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "b" ) ), BSON( "a" << "b" ), BSONObj() );
+                QueryPlan p( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << "b" ) ), FBS2( BSON( "a" << "b" ) ), BSON( "a" << "b" ), BSONObj() );
                 ASSERT( p.exactKeyMatch() );
-                QueryPlan p2( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << 4 ) ), BSON( "a" << 4 ), BSONObj() );
+                QueryPlan p2( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << 4 ) ), FBS2( BSON( "a" << 4 ) ), BSON( "a" << 4 ), BSONObj() );
                 ASSERT( !p2.exactKeyMatch() );
-                QueryPlan p3( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << BSON( "c" << "d" ) ) ), BSON( "a" << BSON( "c" << "d" ) ), BSONObj() );
+                QueryPlan p3( nsd(), INDEXNO( "a" << 1 ), FBS( BSON( "a" << BSON( "c" << "d" ) ) ), FBS2( BSON( "a" << BSON( "c" << "d" ) ) ), BSON( "a" << BSON( "c" << "d" ) ), BSONObj() );
                 ASSERT( !p3.exactKeyMatch() );
                 BSONObjBuilder b;
                 b.appendRegex( "a", "^ddd" );
                 BSONObj q = b.obj();
-                QueryPlan p4( nsd(), INDEXNO( "a" << 1 ), FBS( q ), q, BSONObj() );
+                QueryPlan p4( nsd(), INDEXNO( "a" << 1 ), FBS( q ), FBS2( q ), q, BSONObj() );
                 ASSERT( !p4.exactKeyMatch() );
-                QueryPlan p5( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << "z" << "b" << 4 ) ), BSON( "a" << "z" << "b" << 4 ), BSONObj() );
+                QueryPlan p5( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "a" << "z" << "b" << 4 ) ), FBS2( BSON( "a" << "z" << "b" << 4 ) ), BSON( "a" << "z" << "b" << 4 ), BSONObj() );
                 ASSERT( !p5.exactKeyMatch() );
             }
         };
-        
+
         class Unhelpful : public Base {
         public:
             void run() {
-                QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), BSON( "b" << 1 ), BSONObj() );
+                QueryPlan p( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 ) ), FBS2( BSON( "b" << 1 ) ), BSON( "b" << 1 ), BSONObj() );
                 ASSERT( !p.range( "a" ).nontrivial() );
                 ASSERT( p.unhelpful() );
-                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 << "c" << 1 ) ), BSON( "b" << 1 << "c" << 1 ), BSON( "a" << 1 ) );
+                QueryPlan p2( nsd(), INDEXNO( "a" << 1 << "b" << 1 ), FBS( BSON( "b" << 1 << "c" << 1 ) ), FBS2( BSON( "b" << 1 << "c" << 1 ) ), BSON( "b" << 1 << "c" << 1 ), BSON( "a" << 1 ) );
                 ASSERT( !p2.scanAndOrderRequired() );
                 ASSERT( !p2.range( "a" ).nontrivial() );
                 ASSERT( !p2.unhelpful() );
-                QueryPlan p3( nsd(), INDEXNO( "b" << 1 ), FBS( BSON( "b" << 1 << "c" << 1 ) ), BSON( "b" << 1 << "c" << 1 ), BSONObj() );
+                QueryPlan p3( nsd(), INDEXNO( "b" << 1 ), FBS( BSON( "b" << 1 << "c" << 1 ) ), FBS2( BSON( "b" << 1 << "c" << 1 ) ), BSON( "b" << 1 << "c" << 1 ), BSONObj() );
                 ASSERT( p3.range( "b" ).nontrivial() );
                 ASSERT( !p3.unhelpful() );
-                QueryPlan p4( nsd(), INDEXNO( "b" << 1 << "c" << 1 ), FBS( BSON( "c" << 1 << "d" << 1 ) ), BSON( "c" << 1 << "d" << 1 ), BSONObj() );
+                QueryPlan p4( nsd(), INDEXNO( "b" << 1 << "c" << 1 ), FBS( BSON( "c" << 1 << "d" << 1 ) ), FBS2( BSON( "c" << 1 << "d" << 1 ) ), BSON( "c" << 1 << "d" << 1 ), BSONObj() );
                 ASSERT( !p4.range( "b" ).nontrivial() );
                 ASSERT( p4.unhelpful() );
             }
         };
-        
+
     } // namespace QueryPlanTests
 
     namespace QueryPlanSetTests {
         class Base {
         public:
-            Base() : _context( ns() ){
+            Base() : _context( ns() ) {
                 string err;
                 userCreateNS( ns(), BSONObj(), err, false );
             }
@@ -1000,7 +1039,7 @@ namespace QueryOptimizerTests {
                 if ( fieldsToReturn )
                     fieldsToReturn->appendSelfToBufBuilder(b);
                 toSend.setData(dbQuery, b.buf(), b.len());
-            }            
+            }
         protected:
             static const char *ns() { return "unittests.QueryPlanSetTests"; }
             static NamespaceDetails *nsd() { return nsdetails( ns() ); }
@@ -1008,24 +1047,26 @@ namespace QueryOptimizerTests {
             dblock lk_;
             Client::Context _context;
         };
-        
+
         class NoIndexes : public Base {
         public:
             void run() {
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 4 ), BSON( "b" << 1 ) );
                 ASSERT_EQUALS( 1, s.nPlans() );
             }
         };
-        
+
         class Optimal : public Base {
         public:
             void run() {
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" );
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "b_2" );
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSONObj() );
-                ASSERT_EQUALS( 1, s.nPlans() );                
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 4 ), BSONObj() );
+                ASSERT_EQUALS( 1, s.nPlans() );
             }
         };
 
@@ -1035,7 +1076,8 @@ namespace QueryOptimizerTests {
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" );
                 Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" );
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 4 ), BSON( "b" << 1 ) );
                 ASSERT_EQUALS( 3, s.nPlans() );
             }
         };
@@ -1046,11 +1088,12 @@ namespace QueryOptimizerTests {
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" );
                 Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" );
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSONObj() ) );
-                QueryPlanSet s( ns(), frs, BSONObj(), BSONObj() );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSONObj(), BSONObj() );
                 ASSERT_EQUALS( 1, s.nPlans() );
             }
         };
-        
+
         class HintSpec : public Base {
         public:
             void run() {
@@ -1059,8 +1102,9 @@ namespace QueryOptimizerTests {
                 BSONObj b = BSON( "hint" << BSON( "a" << 1 ) );
                 BSONElement e = b.firstElement();
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "b" << 1 ), &e );
-                ASSERT_EQUALS( 1, s.nPlans() );                
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 1 ), BSON( "b" << 1 ), &e );
+                ASSERT_EQUALS( 1, s.nPlans() );
             }
         };
 
@@ -1072,11 +1116,12 @@ namespace QueryOptimizerTests {
                 BSONObj b = BSON( "hint" << "a_1" );
                 BSONElement e = b.firstElement();
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "b" << 1 ), &e );
-                ASSERT_EQUALS( 1, s.nPlans() );                
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 1 ), BSON( "b" << 1 ), &e );
+                ASSERT_EQUALS( 1, s.nPlans() );
             }
         };
-        
+
         class NaturalHint : public Base {
         public:
             void run() {
@@ -1085,8 +1130,9 @@ namespace QueryOptimizerTests {
                 BSONObj b = BSON( "hint" << BSON( "$natural" << 1 ) );
                 BSONElement e = b.firstElement();
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "b" << 1 ), &e );
-                ASSERT_EQUALS( 1, s.nPlans() );                
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 1 ), BSON( "b" << 1 ), &e );
+                ASSERT_EQUALS( 1, s.nPlans() );
             }
         };
 
@@ -1096,7 +1142,8 @@ namespace QueryOptimizerTests {
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" );
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "b_2" );
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "$natural" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 1 ), BSON( "$natural" << 1 ) );
                 ASSERT_EQUALS( 1, s.nPlans() );
             }
         };
@@ -1107,11 +1154,12 @@ namespace QueryOptimizerTests {
                 BSONObj b = BSON( "hint" << "a_1" );
                 BSONElement e = b.firstElement();
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 ) ) );
-                ASSERT_EXCEPTION( QueryPlanSet s( ns(), frs, BSON( "a" << 1 ), BSON( "b" << 1 ), &e ),
-                                 AssertionException );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                ASSERT_EXCEPTION( QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 1 ), BSON( "b" << 1 ), &e ),
+                                  AssertionException );
             }
         };
-        
+
         class Count : public Base {
         public:
             void run() {
@@ -1136,7 +1184,7 @@ namespace QueryOptimizerTests {
                 ASSERT_EQUALS( 0, runCount( ns(), BSON( "query" << BSON( "a" << GT << 0 << LT << -1 ) ), err ) );
             }
         };
-        
+
         class QueryMissingNs : public Base {
         public:
             QueryMissingNs() { log() << "querymissingns starts" << endl; }
@@ -1154,25 +1202,27 @@ namespace QueryOptimizerTests {
             }
 
         };
-        
+
         class UnhelpfulIndex : public Base {
         public:
             void run() {
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" );
                 Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" );
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 1 << "c" << 2 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 1 << "c" << 2 ), BSONObj() );
-                ASSERT_EQUALS( 2, s.nPlans() );                
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 1 << "c" << 2 ), BSONObj() );
+                ASSERT_EQUALS( 2, s.nPlans() );
             }
-        };        
-        
+        };
+
         class SingleException : public Base {
         public:
             void run() {
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" );
                 Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" );
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 4 ), BSON( "b" << 1 ) );
                 ASSERT_EQUALS( 3, s.nPlans() );
                 bool threw = false;
                 auto_ptr< TestOp > t( new TestOp( true, threw ) );
@@ -1200,6 +1250,7 @@ namespace QueryOptimizerTests {
                     return op;
                 }
                 virtual bool mayRecordPlan() const { return true; }
+                virtual long long nscanned() { return 0; }
             private:
                 bool iThrow_;
                 bool &threw_;
@@ -1207,14 +1258,15 @@ namespace QueryOptimizerTests {
                 mutable bool youThrow_;
             };
         };
-        
+
         class AllException : public Base {
         public:
             void run() {
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" );
                 Helpers::ensureIndex( ns(), BSON( "b" << 1 ), false, "b_1" );
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 4 ), BSON( "b" << 1 ) );
                 ASSERT_EQUALS( 3, s.nPlans() );
                 auto_ptr< TestOp > t( new TestOp() );
                 boost::shared_ptr< TestOp > done = s.runOp( *t );
@@ -1233,9 +1285,10 @@ namespace QueryOptimizerTests {
                     return new TestOp();
                 }
                 virtual bool mayRecordPlan() const { return true; }
+                virtual long long nscanned() { return 0; }
             };
         };
-        
+
         class SaveGoodIndex : public Base {
         public:
             void run() {
@@ -1249,7 +1302,7 @@ namespace QueryOptimizerTests {
                 nPlans( 3 );
                 runQuery();
                 nPlans( 1 );
-                
+
                 {
                     DBDirectClient client;
                     for( int i = 0; i < 34; ++i ) {
@@ -1259,9 +1312,10 @@ namespace QueryOptimizerTests {
                     }
                 }
                 nPlans( 3 );
-                
+
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 4 ), BSON( "b" << 1 ) );
                 NoRecordTestOp original;
                 s.runOp( original );
                 nPlans( 3 );
@@ -1269,29 +1323,33 @@ namespace QueryOptimizerTests {
                 BSONObj hint = fromjson( "{hint:{$natural:1}}" );
                 BSONElement hintElt = hint.firstElement();
                 auto_ptr< FieldRangeSet > frs2( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s2( ns(), frs2, BSON( "a" << 4 ), BSON( "b" << 1 ), &hintElt );
+                auto_ptr< FieldRangeSet > frsOrig2( new FieldRangeSet( *frs2 ) );
+                QueryPlanSet s2( ns(), frs2, frsOrig2, BSON( "a" << 4 ), BSON( "b" << 1 ), &hintElt );
                 TestOp newOriginal;
                 s2.runOp( newOriginal );
                 nPlans( 3 );
 
                 auto_ptr< FieldRangeSet > frs3( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s3( ns(), frs3, BSON( "a" << 4 ), BSON( "b" << 1 << "c" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig3( new FieldRangeSet( *frs3 ) );
+                QueryPlanSet s3( ns(), frs3, frsOrig3, BSON( "a" << 4 ), BSON( "b" << 1 << "c" << 1 ) );
                 TestOp newerOriginal;
                 s3.runOp( newerOriginal );
-                nPlans( 3 );                
-                
+                nPlans( 3 );
+
                 runQuery();
                 nPlans( 1 );
             }
         private:
             void nPlans( int n ) {
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) );
-                ASSERT_EQUALS( n, s.nPlans() );                
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                ASSERT_EQUALS( n, s.nPlans() );
             }
             void runQuery() {
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 4 ), BSON( "b" << 1 ) );
                 TestOp original;
                 s.runOp( original );
             }
@@ -1305,33 +1363,37 @@ namespace QueryOptimizerTests {
                     return new TestOp();
                 }
                 virtual bool mayRecordPlan() const { return true; }
+                virtual long long nscanned() { return 0; }
             };
             class NoRecordTestOp : public TestOp {
                 virtual bool mayRecordPlan() const { return false; }
                 virtual QueryOp *_createChild() const { return new NoRecordTestOp(); }
             };
-        };        
-        
+        };
+
         class TryAllPlansOnErr : public Base {
         public:
             void run() {
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" );
 
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s( ns(), frs, BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, BSON( "a" << 4 ), BSON( "b" << 1 ) );
                 ScanOnlyTestOp op;
                 s.runOp( op );
                 ASSERT( fromjson( "{$natural:1}" ).woCompare( NamespaceDetailsTransient::_get( ns() ).indexForPattern( s.fbs().pattern( BSON( "b" << 1 ) ) ) ) == 0 );
                 ASSERT_EQUALS( 1, NamespaceDetailsTransient::_get( ns() ).nScannedForPattern( s.fbs().pattern( BSON( "b" << 1 ) ) ) );
-                
+
                 auto_ptr< FieldRangeSet > frs2( new FieldRangeSet( ns(), BSON( "a" << 4 ) ) );
-                QueryPlanSet s2( ns(), frs2, BSON( "a" << 4 ), BSON( "b" << 1 ) );
+                auto_ptr< FieldRangeSet > frsOrig2( new FieldRangeSet( *frs2 ) );
+                QueryPlanSet s2( ns(), frs2, frsOrig2, BSON( "a" << 4 ), BSON( "b" << 1 ) );
                 TestOp op2;
                 ASSERT( s2.runOp( op2 )->complete() );
             }
         private:
             class TestOp : public QueryOp {
             public:
+                TestOp() {}
                 virtual void _init() {}
                 virtual void next() {
                     if ( qp().indexKey().firstElement().fieldName() == string( "$natural" ) )
@@ -1342,6 +1404,7 @@ namespace QueryOptimizerTests {
                     return new TestOp();
                 }
                 virtual bool mayRecordPlan() const { return true; }
+                virtual long long nscanned() { return 1; }
             };
             class ScanOnlyTestOp : public TestOp {
                 virtual void next() {
@@ -1354,7 +1417,7 @@ namespace QueryOptimizerTests {
                 }
             };
         };
-        
+
         class FindOne : public Base {
         public:
             void run() {
@@ -1362,12 +1425,12 @@ namespace QueryOptimizerTests {
                 theDataFileMgr.insertWithObjMod( ns(), one );
                 BSONObj result;
                 ASSERT( Helpers::findOne( ns(), BSON( "a" << 1 ), result ) );
-                ASSERT_EXCEPTION( Helpers::findOne( ns(), BSON( "a" << 1 ), result, true ), AssertionException );                
+                ASSERT_EXCEPTION( Helpers::findOne( ns(), BSON( "a" << 1 ), result, true ), AssertionException );
                 Helpers::ensureIndex( ns(), BSON( "a" << 1 ), false, "a_1" );
-                ASSERT( Helpers::findOne( ns(), BSON( "a" << 1 ), result, true ) );                
+                ASSERT( Helpers::findOne( ns(), BSON( "a" << 1 ), result, true ) );
             }
         };
-        
+
         class Delete : public Base {
         public:
             void run() {
@@ -1380,10 +1443,10 @@ namespace QueryOptimizerTests {
                 theDataFileMgr.insertWithObjMod( ns(), one );
                 deleteObjects( ns(), BSON( "a" << 1 ), false );
                 ASSERT( BSON( "a" << 1 ).woCompare( NamespaceDetailsTransient::_get( ns() ).indexForPattern( FieldRangeSet( ns(), BSON( "a" << 1 ) ).pattern() ) ) == 0 );
-                ASSERT_EQUALS( 2, NamespaceDetailsTransient::_get( ns() ).nScannedForPattern( FieldRangeSet( ns(), BSON( "a" << 1 ) ).pattern() ) );
+                ASSERT_EQUALS( 1, NamespaceDetailsTransient::_get( ns() ).nScannedForPattern( FieldRangeSet( ns(), BSON( "a" << 1 ) ).pattern() ) );
             }
         };
-        
+
         class DeleteOneScan : public Base {
         public:
             void run() {
@@ -1410,7 +1473,7 @@ namespace QueryOptimizerTests {
                 theDataFileMgr.insertWithObjMod( ns(), one );
                 theDataFileMgr.insertWithObjMod( ns(), two );
                 theDataFileMgr.insertWithObjMod( ns(), three );
-                deleteObjects( ns(), BSON( "a" << GTE << 0 << "_id" << GT << 0 ), true );
+                deleteObjects( ns(), BSON( "a" << GTE << 0 ), true );
                 for( boost::shared_ptr<Cursor> c = theDataFileMgr.findAll( ns() ); c->ok(); c->advance() )
                     ASSERT( 2 != c->current().getIntField( "_id" ) );
             }
@@ -1436,7 +1499,7 @@ namespace QueryOptimizerTests {
                     runQuery( m, q);
                 }
                 ASSERT( BSON( "$natural" << 1 ).woCompare( NamespaceDetailsTransient::_get( ns() ).indexForPattern( FieldRangeSet( ns(), BSON( "b" << 0 << "a" << GTE << 0 ) ).pattern() ) ) == 0 );
-                
+
                 Message m2;
                 assembleRequest( ns(), QUERY( "b" << 99 << "a" << GTE << 0 ).obj, 2, 0, 0, 0, m2 );
                 {
@@ -1444,11 +1507,11 @@ namespace QueryOptimizerTests {
                     QueryMessage q(d);
                     runQuery( m2, q);
                 }
-                ASSERT( BSON( "a" << 1 ).woCompare( NamespaceDetailsTransient::_get( ns() ).indexForPattern( FieldRangeSet( ns(), BSON( "b" << 0 << "a" << GTE << 0 ) ).pattern() ) ) == 0 );                
-                ASSERT_EQUALS( 2, NamespaceDetailsTransient::_get( ns() ).nScannedForPattern( FieldRangeSet( ns(), BSON( "b" << 0 << "a" << GTE << 0 ) ).pattern() ) );
+                ASSERT( BSON( "a" << 1 ).woCompare( NamespaceDetailsTransient::_get( ns() ).indexForPattern( FieldRangeSet( ns(), BSON( "b" << 0 << "a" << GTE << 0 ) ).pattern() ) ) == 0 );
+                ASSERT_EQUALS( 3, NamespaceDetailsTransient::_get( ns() ).nScannedForPattern( FieldRangeSet( ns(), BSON( "b" << 0 << "a" << GTE << 0 ) ).pattern() ) );
             }
         };
-        
+
         class InQueryIntervals : public Base {
         public:
             void run() {
@@ -1460,30 +1523,32 @@ namespace QueryOptimizerTests {
                 BSONObj hint = fromjson( "{$hint:{a:1}}" );
                 BSONElement hintElt = hint.firstElement();
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ) ) );
-                QueryPlanSet s( ns(), frs, fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSONObj(), &hintElt );
-                QueryPlan qp( nsd(), 1, s.fbs(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSONObj() );
+                auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                QueryPlanSet s( ns(), frs, frsOrig, fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSONObj(), &hintElt );
+                QueryPlan qp( nsd(), 1, s.fbs(), s.originalFrs(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSONObj() );
                 boost::shared_ptr<Cursor> c = qp.newCursor();
                 double expected[] = { 2, 3, 6, 9 };
                 for( int i = 0; i < 4; ++i, c->advance() ) {
                     ASSERT_EQUALS( expected[ i ], c->current().getField( "a" ).number() );
                 }
                 ASSERT( !c->ok() );
-                
+
                 // now check reverse
                 {
                     auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ) ) );
-                    QueryPlanSet s( ns(), frs, fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSON( "a" << -1 ), &hintElt );
-                    QueryPlan qp( nsd(), 1, s.fbs(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSON( "a" << -1 ) );
+                    auto_ptr< FieldRangeSet > frsOrig( new FieldRangeSet( *frs ) );
+                    QueryPlanSet s( ns(), frs, frsOrig, fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSON( "a" << -1 ), &hintElt );
+                    QueryPlan qp( nsd(), 1, s.fbs(), s.originalFrs(), fromjson( "{a:{$in:[2,3,6,9,11]}}" ), BSON( "a" << -1 ) );
                     boost::shared_ptr<Cursor> c = qp.newCursor();
                     double expected[] = { 9, 6, 3, 2 };
                     for( int i = 0; i < 4; ++i, c->advance() ) {
                         ASSERT_EQUALS( expected[ i ], c->current().getField( "a" ).number() );
                     }
-                    ASSERT( !c->ok() );                    
+                    ASSERT( !c->ok() );
                 }
             }
         };
-        
+
         class EqualityThenIn : public Base {
         public:
             void run() {
@@ -1494,8 +1559,8 @@ namespace QueryOptimizerTests {
                 }
                 BSONObj hint = fromjson( "{$hint:{a:1,b:1}}" );
                 BSONElement hintElt = hint.firstElement();
-                auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ) ) );                
-                QueryPlan qp( nsd(), 1, *frs, fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ), BSONObj() );
+                auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ) ) );
+                QueryPlan qp( nsd(), 1, *frs, *frs, fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ), BSONObj() );
                 boost::shared_ptr<Cursor> c = qp.newCursor();
                 double expected[] = { 2, 3, 6, 9 };
                 ASSERT( c->ok() );
@@ -1506,7 +1571,7 @@ namespace QueryOptimizerTests {
                 ASSERT( !c->ok() );
             }
         };
-        
+
         class NotEqualityThenIn : public Base {
         public:
             void run() {
@@ -1518,7 +1583,7 @@ namespace QueryOptimizerTests {
                 BSONObj hint = fromjson( "{$hint:{a:1,b:1}}" );
                 BSONElement hintElt = hint.firstElement();
                 auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns(), fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ) ) );
-                QueryPlan qp( nsd(), 1, *frs, fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ), BSONObj() );
+                QueryPlan qp( nsd(), 1, *frs, *frs, fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ), BSONObj() );
                 boost::shared_ptr<Cursor> c = qp.newCursor();
                 int matches[] = { 2, 3, 6, 9 };
                 for( int i = 0; i < 4; ++i, c->advance() ) {
@@ -1529,7 +1594,7 @@ namespace QueryOptimizerTests {
         };
 
     } // namespace QueryPlanSetTests
-    
+
     class Base {
     public:
         Base() : _ctx( ns() ) {
@@ -1549,7 +1614,7 @@ namespace QueryOptimizerTests {
         dblock lk_;
         Client::Context _ctx;
     };
-        
+
     class BestGuess : public Base {
     public:
         void run() {
@@ -1559,7 +1624,7 @@ namespace QueryOptimizerTests {
             theDataFileMgr.insertWithObjMod( ns(), temp );
             temp = BSON( "b" << 1 );
             theDataFileMgr.insertWithObjMod( ns(), temp );
-            
+
             boost::shared_ptr< Cursor > c = bestGuessCursor( ns(), BSON( "b" << 1 ), BSON( "a" << 1 ) );
             ASSERT_EQUALS( string( "a" ), c->indexKeyPattern().firstElement().fieldName() );
             c = bestGuessCursor( ns(), BSON( "a" << 1 ), BSON( "b" << 1 ) );
@@ -1568,22 +1633,22 @@ namespace QueryOptimizerTests {
             ASSERT_EQUALS( string( "a" ), m->sub_c()->indexKeyPattern().firstElement().fieldName() );
             m = dynamic_pointer_cast< MultiCursor >( bestGuessCursor( ns(), fromjson( "{a:1,$or:[{y:1}]}" ), BSON( "b" << 1 ) ) );
             ASSERT_EQUALS( string( "b" ), m->sub_c()->indexKeyPattern().firstElement().fieldName() );
-            
+
             FieldRangeSet frs( "ns", BSON( "a" << 1 ) );
             {
                 scoped_lock lk(NamespaceDetailsTransient::_qcMutex);
-                NamespaceDetailsTransient::get_inlock( ns() ).registerIndexForPattern( frs.pattern( BSON( "b" << 1 ) ), BSON( "a" << 1 ), 0 );  
+                NamespaceDetailsTransient::get_inlock( ns() ).registerIndexForPattern( frs.pattern( BSON( "b" << 1 ) ), BSON( "a" << 1 ), 0 );
             }
             m = dynamic_pointer_cast< MultiCursor >( bestGuessCursor( ns(), fromjson( "{a:1,$or:[{y:1}]}" ), BSON( "b" << 1 ) ) );
             ASSERT_EQUALS( string( "b" ), m->sub_c()->indexKeyPattern().firstElement().fieldName() );
         }
     };
-    
+
     class All : public Suite {
     public:
-        All() : Suite( "queryoptimizer" ){}
-        
-        void setupTests(){
+        All() : Suite( "queryoptimizer" ) {}
+
+        void setupTests() {
             add< FieldRangeTests::Empty >();
             add< FieldRangeTests::Eq >();
             add< FieldRangeTests::DupEq >();
@@ -1606,6 +1671,7 @@ namespace QueryOptimizerTests {
             add< FieldRangeTests::Numeric >();
             add< FieldRangeTests::InLowerBound >();
             add< FieldRangeTests::InUpperBound >();
+            add< FieldRangeTests::UnionBound >();
             add< FieldRangeTests::MultiBound >();
             add< FieldRangeTests::Diff1 >();
             add< FieldRangeTests::Diff2 >();
@@ -1670,6 +1736,7 @@ namespace QueryOptimizerTests {
             add< FieldRangeTests::Diff61 >();
             add< FieldRangeTests::Diff62 >();
             add< FieldRangeTests::Diff63 >();
+            add< FieldRangeTests::Diff64 >();
             add< FieldRangeTests::DiffMulti1 >();
             add< FieldRangeTests::DiffMulti2 >();
             add< FieldRangeTests::SetIntersect >();
@@ -1713,6 +1780,6 @@ namespace QueryOptimizerTests {
             add< BestGuess >();
         }
     } myall;
-    
+
 } // namespace QueryOptimizerTests
 
diff --git a/dbtests/querytests.cpp b/dbtests/querytests.cpp
index 31e1879..d008e4d 100644
--- a/dbtests/querytests.cpp
+++ b/dbtests/querytests.cpp
@@ -25,6 +25,8 @@
 #include "../db/json.h"
 #include "../db/lasterror.h"
 
+#include "../util/timer.h"
+
 #include "dbtests.h"
 
 namespace mongo {
@@ -37,7 +39,7 @@ namespace QueryTests {
         dblock lk;
         Client::Context _context;
     public:
-        Base() : _context( ns() ){
+        Base() : _context( ns() ) {
             addIndex( fromjson( "{\"a\":1}" ) );
         }
         ~Base() {
@@ -48,7 +50,8 @@ namespace QueryTests {
                     toDelete.push_back( c->currLoc() );
                 for( vector< DiskLoc >::iterator i = toDelete.begin(); i != toDelete.end(); ++i )
                     theDataFileMgr.deleteRecord( ns(), i->rec(), *i, false );
-            } catch ( ... ) {
+            }
+            catch ( ... ) {
                 FAIL( "Exception while cleaning up records" );
             }
         }
@@ -129,7 +132,7 @@ namespace QueryTests {
             ASSERT_EQUALS( 1, runCount( ns(), cmd, err ) );
         }
     };
-    
+
     class FindOne : public Base {
     public:
         void run() {
@@ -145,12 +148,11 @@ namespace QueryTests {
 
     class ClientBase {
     public:
-        // NOTE: Not bothering to backup the old error record.
         ClientBase() {
             mongo::lastError.reset( new LastError() );
         }
         ~ClientBase() {
-            mongo::lastError.release();
+            //mongo::lastError.release();
         }
     protected:
         static void insert( const char *ns, BSONObj o ) {
@@ -170,6 +172,9 @@ namespace QueryTests {
 
     class BoundedKey : public ClientBase {
     public:
+        ~BoundedKey() {
+            client().dropCollection( "unittests.querytests.BoundedKey" );
+        }
         void run() {
             const char *ns = "unittests.querytests.BoundedKey";
             insert( ns, BSON( "a" << 1 ) );
@@ -210,7 +215,7 @@ namespace QueryTests {
             client().dropCollection( ns );
         }
 
-        void testLimit(int limit){
+        void testLimit(int limit) {
             ASSERT_EQUALS(client().query( ns, BSONObj(), limit )->itcount(), limit);
         }
         void run() {
@@ -285,7 +290,7 @@ namespace QueryTests {
             insert( ns, BSON( "a" << 0 ) );
             c = client().query( ns, QUERY( "a" << 1 ).hint( BSON( "$natural" << 1 ) ), 2, 0, 0, QueryOption_CursorTailable );
             ASSERT( 0 != c->getCursorId() );
-            ASSERT( !c->isDead() );            
+            ASSERT( !c->isDead() );
         }
     };
 
@@ -345,7 +350,7 @@ namespace QueryTests {
             ASSERT( !client().getLastError().empty() );
         }
     };
-    
+
     class TailableQueryOnId : public ClientBase {
     public:
         ~TailableQueryOnId() {
@@ -511,7 +516,13 @@ namespace QueryTests {
         static const char *ns() { return "unittests.querytests.AutoResetIndexCache"; }
         static const char *idxNs() { return "unittests.system.indexes"; }
         void index() const { ASSERT( !client().findOne( idxNs(), BSON( "name" << NE << "_id_" ) ).isEmpty() ); }
-        void noIndex() const { ASSERT( client().findOne( idxNs(), BSON( "name" << NE << "_id_" ) ).isEmpty() ); }
+        void noIndex() const {
+            BSONObj o = client().findOne( idxNs(), BSON( "name" << NE << "_id_" ) );
+            if( !o.isEmpty() ) {
+                cout << o.toString() << endl;
+                ASSERT( false );
+            }
+        }
         void checkIndex() {
             client().ensureIndex( ns(), BSON( "a" << 1 ) );
             index();
@@ -598,8 +609,8 @@ namespace QueryTests {
             client().insert( ns, fromjson( "{a:[1,2,3]}" ) );
             ASSERT( client().query( ns, Query( "{a:[1,2,3]}" ) )->more() );
             client().ensureIndex( ns, BSON( "a" << 1 ) );
-            ASSERT( client().query( ns, Query( "{a:{$in:[1,[1,2,3]]}}" ).hint( BSON( "a" << 1 ) ) )->more() ); 
-            ASSERT( client().query( ns, Query( "{a:[1,2,3]}" ).hint( BSON( "a" << 1 ) ) )->more() ); // SERVER-146 
+            ASSERT( client().query( ns, Query( "{a:{$in:[1,[1,2,3]]}}" ).hint( BSON( "a" << 1 ) ) )->more() );
+            ASSERT( client().query( ns, Query( "{a:[1,2,3]}" ).hint( BSON( "a" << 1 ) ) )->more() ); // SERVER-146
         }
     };
 
@@ -613,7 +624,7 @@ namespace QueryTests {
             client().insert( ns, fromjson( "{a:[[1],2]}" ) );
             check( "$natural" );
             client().ensureIndex( ns, BSON( "a" << 1 ) );
-            check( "a" ); // SERVER-146 
+            check( "a" ); // SERVER-146
         }
     private:
         void check( const string &hintField ) {
@@ -756,12 +767,12 @@ namespace QueryTests {
 
     class DifferentNumbers : public ClientBase {
     public:
-        ~DifferentNumbers(){
+        ~DifferentNumbers() {
             client().dropCollection( "unittests.querytests.DifferentNumbers" );
         }
-        void t( const char * ns ){
+        void t( const char * ns ) {
             auto_ptr< DBClientCursor > cursor = client().query( ns, Query().sort( "7" ) );
-            while ( cursor->more() ){
+            while ( cursor->more() ) {
                 BSONObj o = cursor->next();
                 assert( o.valid() );
                 //cout << " foo " << o << endl;
@@ -782,37 +793,37 @@ namespace QueryTests {
             t(ns);
         }
     };
-    
+
     class CollectionBase : public ClientBase {
     public:
-    
-        CollectionBase( string leaf ){
+
+        CollectionBase( string leaf ) {
             _ns = "unittests.querytests.";
             _ns += leaf;
             client().dropCollection( ns() );
         }
-        
-        virtual ~CollectionBase(){
+
+        virtual ~CollectionBase() {
             client().dropCollection( ns() );
         }
-        
-        int count(){
+
+        int count() {
             return (int) client().count( ns() );
         }
 
-        const char * ns(){
+        const char * ns() {
             return _ns.c_str();
         }
-        
+
     private:
         string _ns;
     };
 
     class SymbolStringSame : public CollectionBase {
     public:
-        SymbolStringSame() : CollectionBase( "symbolstringsame" ){}
+        SymbolStringSame() : CollectionBase( "symbolstringsame" ) {}
 
-        void run(){
+        void run() {
             { BSONObjBuilder b; b.appendSymbol( "x" , "eliot" ); b.append( "z" , 17 ); client().insert( ns() , b.obj() ); }
             ASSERT_EQUALS( 17 , client().findOne( ns() , BSONObj() )["z"].number() );
             {
@@ -828,46 +839,46 @@ namespace QueryTests {
 
     class TailableCappedRaceCondition : public CollectionBase {
     public:
-        
-        TailableCappedRaceCondition() : CollectionBase( "tailablecappedrace" ){
+
+        TailableCappedRaceCondition() : CollectionBase( "tailablecappedrace" ) {
             client().dropCollection( ns() );
             _n = 0;
         }
-        void run(){
+        void run() {
             string err;
 
-            writelock lk("");            
+            writelock lk("");
             Client::Context ctx( "unittests" );
 
             ASSERT( userCreateNS( ns() , fromjson( "{ capped : true , size : 2000 }" ) , err , false ) );
-            for ( int i=0; i<100; i++ ){
+            for ( int i=0; i<100; i++ ) {
                 insertNext();
                 ASSERT( count() < 45 );
             }
-            
+
             int a = count();
-            
+
             auto_ptr< DBClientCursor > c = client().query( ns() , QUERY( "i" << GT << 0 ).hint( BSON( "$natural" << 1 ) ), 0, 0, 0, QueryOption_CursorTailable );
             int n=0;
-            while ( c->more() ){
+            while ( c->more() ) {
                 BSONObj z = c->next();
                 n++;
             }
-            
+
             ASSERT_EQUALS( a , n );
 
             insertNext();
             ASSERT( c->more() );
 
-            for ( int i=0; i<50; i++ ){
+            for ( int i=0; i<50; i++ ) {
                 insertNext();
             }
 
-            while ( c->more() ){ c->next(); }
+            while ( c->more() ) { c->next(); }
             ASSERT( c->isDead() );
         }
-        
-        void insertNext(){
+
+        void insertNext() {
             insert( ns() , BSON( "i" << _n++ ) );
         }
 
@@ -876,89 +887,71 @@ namespace QueryTests {
 
     class HelperTest : public CollectionBase {
     public:
-        
-        HelperTest() : CollectionBase( "helpertest" ){
+
+        HelperTest() : CollectionBase( "helpertest" ) {
         }
 
-        void run(){
+        void run() {
             writelock lk("");
             Client::Context ctx( "unittests" );
-            
-            for ( int i=0; i<50; i++ ){
+
+            for ( int i=0; i<50; i++ ) {
                 insert( ns() , BSON( "_id" << i << "x" << i * 2 ) );
             }
 
             ASSERT_EQUALS( 50 , count() );
-            
+
             BSONObj res;
             ASSERT( Helpers::findOne( ns() , BSON( "_id" << 20 ) , res , true ) );
             ASSERT_EQUALS( 40 , res["x"].numberInt() );
-            
+
             ASSERT( Helpers::findById( cc(), ns() , BSON( "_id" << 20 ) , res ) );
             ASSERT_EQUALS( 40 , res["x"].numberInt() );
 
             ASSERT( ! Helpers::findById( cc(), ns() , BSON( "_id" << 200 ) , res ) );
 
             unsigned long long slow , fast;
-            
+
             int n = 10000;
             {
                 Timer t;
-                for ( int i=0; i<n; i++ ){
+                for ( int i=0; i<n; i++ ) {
                     ASSERT( Helpers::findOne( ns() , BSON( "_id" << 20 ) , res , true ) );
                 }
                 slow = t.micros();
             }
             {
                 Timer t;
-                for ( int i=0; i<n; i++ ){
+                for ( int i=0; i<n; i++ ) {
                     ASSERT( Helpers::findById( cc(), ns() , BSON( "_id" << 20 ) , res ) );
                 }
                 fast = t.micros();
             }
-            
+
             cout << "HelperTest  slow:" << slow << " fast:" << fast << endl;
-            
-            {
-                auto_ptr<CursorIterator> i = Helpers::find( ns() );
-                int n = 0;
-                while ( i->hasNext() ){
-                    BSONObj o = i->next();
-                    n++;
-                }
-                ASSERT_EQUALS( 50 , n );
 
-                i = Helpers::find( ns() , BSON( "_id" << 20 ) );
-                n = 0;
-                while ( i->hasNext() ){
-                    BSONObj o = i->next();
-                    n++;
-                }
-                ASSERT_EQUALS( 1 , n );
-            }
-            
         }
     };
 
     class HelperByIdTest : public CollectionBase {
     public:
-        
-        HelperByIdTest() : CollectionBase( "helpertestbyid" ){
+
+        HelperByIdTest() : CollectionBase( "helpertestbyid" ) {
         }
 
-        void run(){
+        void run() {
             writelock lk("");
             Client::Context ctx( "unittests" );
 
-            for ( int i=0; i<1000; i++ ){
+            for ( int i=0; i<1000; i++ ) {
                 insert( ns() , BSON( "_id" << i << "x" << i * 2 ) );
             }
-            for ( int i=0; i<1000; i+=2 ){
+            for ( int i=0; i<1000; i+=2 ) {
                 client_.remove( ns() , BSON( "_id" << i ) );
             }
 
-            BSONObj res;            
-            for ( int i=0; i<1000; i++ ){
+            BSONObj res;
+            for ( int i=0; i<1000; i++ ) {
                 bool found = Helpers::findById( cc(), ns() , BSON( "_id" << i ) , res );
                 ASSERT_EQUALS( i % 2 , int(found) );
             }
@@ -966,19 +959,19 @@ namespace QueryTests {
         }
     };
 
-    class ClientCursorTest : public CollectionBase{
-        ClientCursorTest() : CollectionBase( "clientcursortest" ){
+    class ClientCursorTest : public CollectionBase {
+        ClientCursorTest() : CollectionBase( "clientcursortest" ) {
         }
 
-        void run(){
+        void run() {
             writelock lk("");
             Client::Context ctx( "unittests" );
-            
-            for ( int i=0; i<1000; i++ ){
+
+            for ( int i=0; i<1000; i++ ) {
                 insert( ns() , BSON( "_id" << i << "x" << i * 2 ) );
             }
 
-            
+
         }
     };
 
@@ -990,19 +983,19 @@ namespace QueryTests {
         ~FindingStart() {
             __findingStartInitialTimeout = _old;
         }
-        
+
         void run() {
             BSONObj info;
             ASSERT( client().runCommand( "unittests", BSON( "create" << "querytests.findingstart" << "capped" << true << "size" << 1000 << "$nExtents" << 5 << "autoIndexId" << false ), info ) );
-            
+
             int i = 0;
             for( int oldCount = -1;
-                count() != oldCount;
-                oldCount = count(), client().insert( ns(), BSON( "ts" << i++ ) ) );
+                    count() != oldCount;
+                    oldCount = count(), client().insert( ns(), BSON( "ts" << i++ ) ) );
 
             for( int k = 0; k < 5; ++k ) {
                 client().insert( ns(), BSON( "ts" << i++ ) );
-                int min = client().query( ns(), Query().sort( BSON( "$natural" << 1 ) ) )->next()[ "ts" ].numberInt();            
+                int min = client().query( ns(), Query().sort( BSON( "$natural" << 1 ) ) )->next()[ "ts" ].numberInt();
                 for( int j = -1; j < i; ++j ) {
                     auto_ptr< DBClientCursor > c = client().query( ns(), QUERY( "ts" << GTE << j ), 0, 0, 0, QueryOption_OplogReplay );
                     ASSERT( c->more() );
@@ -1012,7 +1005,7 @@ namespace QueryTests {
                 }
             }
         }
-        
+
     private:
         int _old;
     };
@@ -1025,17 +1018,19 @@ namespace QueryTests {
         ~FindingStartPartiallyFull() {
             __findingStartInitialTimeout = _old;
         }
-        
+
         void run() {
+            unsigned startNumCursors = ClientCursor::numCursors();
+
             BSONObj info;
             ASSERT( client().runCommand( "unittests", BSON( "create" << "querytests.findingstart" << "capped" << true << "size" << 10000 << "$nExtents" << 5 << "autoIndexId" << false ), info ) );
-            
+
             int i = 0;
             for( ; i < 150; client().insert( ns(), BSON( "ts" << i++ ) ) );
-            
+
             for( int k = 0; k < 5; ++k ) {
                 client().insert( ns(), BSON( "ts" << i++ ) );
-                int min = client().query( ns(), Query().sort( BSON( "$natural" << 1 ) ) )->next()[ "ts" ].numberInt();            
+                int min = client().query( ns(), Query().sort( BSON( "$natural" << 1 ) ) )->next()[ "ts" ].numberInt();
                 for( int j = -1; j < i; ++j ) {
                     auto_ptr< DBClientCursor > c = client().query( ns(), QUERY( "ts" << GTE << j ), 0, 0, 0, QueryOption_OplogReplay );
                     ASSERT( c->more() );
@@ -1044,13 +1039,15 @@ namespace QueryTests {
                     ASSERT_EQUALS( ( j > min ? j : min ), next[ "ts" ].numberInt() );
                 }
             }
+
+            ASSERT_EQUALS( startNumCursors, ClientCursor::numCursors() );
         }
-        
+
     private:
         int _old;
     };
-        
-    
+
+
     class WhatsMyUri : public CollectionBase {
     public:
         WhatsMyUri() : CollectionBase( "whatsmyuri" ) {}
@@ -1060,15 +1057,15 @@ namespace QueryTests {
             ASSERT_EQUALS( unknownAddress.toString(), result[ "you" ].str() );
         }
     };
-    
+
     namespace parsedtests {
         class basic1 {
         public:
-            void _test( const BSONObj& in ){
+            void _test( const BSONObj& in ) {
                 ParsedQuery q( "a.b" , 5 , 6 , 9 , in , BSONObj() );
                 ASSERT_EQUALS( BSON( "x" << 5 ) , q.getFilter() );
             }
-            void run(){
+            void run() {
                 _test( BSON( "x" << 5 ) );
                 _test( BSON( "query" << BSON( "x" << 5 ) ) );
                 _test( BSON( "$query" << BSON( "x" << 5 ) ) );
@@ -1090,23 +1087,23 @@ namespace QueryTests {
     namespace queryobjecttests {
         class names1 {
         public:
-            void run(){
+            void run() {
                 ASSERT_EQUALS( BSON( "x" << 1 ) , QUERY( "query" << BSON( "x" << 1 ) ).getFilter() );
                 ASSERT_EQUALS( BSON( "x" << 1 ) , QUERY( "$query" << BSON( "x" << 1 ) ).getFilter() );
             }
-            
+
         };
     }
 
     class OrderingTest {
     public:
-        void run(){
+        void run() {
             {
                 Ordering o = Ordering::make( BSON( "a" << 1 << "b" << -1 << "c" << 1 ) );
                 ASSERT_EQUALS( 1 , o.get(0) );
                 ASSERT_EQUALS( -1 , o.get(1) );
                 ASSERT_EQUALS( 1 , o.get(2) );
-                
+
                 ASSERT( ! o.descending( 1 ) );
                 ASSERT( o.descending( 1 << 1 ) );
                 ASSERT( ! o.descending( 1 << 2 ) );
@@ -1117,7 +1114,7 @@ namespace QueryTests {
                 ASSERT_EQUALS( 1 , o.get(0) );
                 ASSERT_EQUALS( 1 , o.get(1) );
                 ASSERT_EQUALS( -1 , o.get(2) );
-                
+
                 ASSERT( ! o.descending( 1 ) );
                 ASSERT( ! o.descending( 1 << 1 ) );
                 ASSERT(  o.descending( 1 << 2 ) );
@@ -1126,12 +1123,100 @@ namespace QueryTests {
         }
     };
 
+    namespace proj { // Projection tests
+
+        class T1 {
+        public:
+            void run() {
+
+                Projection m;
+                m.init( BSON( "a" << 1 ) );
+                ASSERT_EQUALS( BSON( "a" << 5 ) , m.transform( BSON( "x" << 1 << "a" << 5 ) ) );
+            }
+        };
+
+        class K1 {
+        public:
+            void run() {
+
+                Projection m;
+                m.init( BSON( "a" << 1 ) );
+
+                scoped_ptr<Projection::KeyOnly> x( m.checkKey( BSON( "a" << 1 ) ) );
+                ASSERT( ! x );
+
+                x.reset( m.checkKey( BSON( "a" << 1  << "_id" << 1 ) ) );
+                ASSERT( x );
+
+                ASSERT_EQUALS( BSON( "a" << 5 << "_id" << 17 ) ,
+                               x->hydrate( BSON( "" << 5 << "" << 17 ) ) );
+
+                x.reset( m.checkKey( BSON( "a" << 1 << "x" << 1 << "_id" << 1 ) ) );
+                ASSERT( x );
+
+                ASSERT_EQUALS( BSON( "a" << 5 << "_id" << 17 ) ,
+                               x->hydrate( BSON( "" << 5 << "" << 123 << "" << 17 ) ) );
+
+            }
+        };
+
+        class K2 {
+        public:
+            void run() {
+
+                Projection m;
+                m.init( BSON( "a" << 1 << "_id" << 0 ) );
+
+                scoped_ptr<Projection::KeyOnly> x( m.checkKey( BSON( "a" << 1 ) ) );
+                ASSERT( x );
+
+                ASSERT_EQUALS( BSON( "a" << 17 ) ,
+                               x->hydrate( BSON( "" << 17 ) ) );
+
+                x.reset( m.checkKey( BSON( "x" << 1 << "a" << 1 << "_id" << 1 ) ) );
+                ASSERT( x );
+
+                ASSERT_EQUALS( BSON( "a" << 123 ) ,
+                               x->hydrate( BSON( "" << 5 << "" << 123 << "" << 17 ) ) );
+
+            }
+        };
+
+
+        class K3 {
+        public:
+            void run() {
+
+                {
+                    Projection m;
+                    m.init( BSON( "a" << 1 << "_id" << 0 ) );
+
+                    scoped_ptr<Projection::KeyOnly> x( m.checkKey( BSON( "a" << 1 << "x.a" << 1 ) ) );
+                    ASSERT( x );
+                }
+
+
+                {
+                    // TODO: this is temporary SERVER-2104
+                    Projection m;
+                    m.init( BSON( "x.a" << 1 << "_id" << 0 ) );
+
+                    scoped_ptr<Projection::KeyOnly> x( m.checkKey( BSON( "a" << 1 << "x.a" << 1 ) ) );
+                    ASSERT( ! x );
+                }
+
+            }
+        };
+
+
+    }
+
     class All : public Suite {
     public:
         All() : Suite( "query" ) {
         }
 
-        void setupTests(){
+        void setupTests() {
             add< CountBasic >();
             add< CountQuery >();
             add< CountFields >();
@@ -1176,14 +1261,19 @@ namespace QueryTests {
             add< FindingStart >();
             add< FindingStartPartiallyFull >();
             add< WhatsMyUri >();
-            
+
             add< parsedtests::basic1 >();
-            
+
             add< queryobjecttests::names1 >();
 
             add< OrderingTest >();
+
+            add< proj::T1 >();
+            add< proj::K1 >();
+            add< proj::K2 >();
+            add< proj::K3 >();
         }
     } myall;
-    
+
 } // namespace QueryTests
 
diff --git a/dbtests/repltests.cpp b/dbtests/repltests.cpp
index a190dc8..c6ffba2 100644
--- a/dbtests/repltests.cpp
+++ b/dbtests/repltests.cpp
@@ -34,13 +34,13 @@ namespace ReplTests {
 
     BSONObj f( const char *s ) {
         return fromjson( s );
-    }    
-    
+    }
+
     class Base {
         dblock lk;
         Client::Context _context;
     public:
-        Base() : _context( ns() ){
+        Base() : _context( ns() ) {
             replSettings.master = true;
             createOplog();
             ensureHaveIdIndex( ns() );
@@ -50,7 +50,8 @@ namespace ReplTests {
                 replSettings.master = false;
                 deleteAll( ns() );
                 deleteAll( cllNS() );
-            } catch ( ... ) {
+            }
+            catch ( ... ) {
                 FAIL( "Exception while cleaning up test" );
             }
         }
@@ -63,7 +64,7 @@ namespace ReplTests {
         }
         DBDirectClient *client() const { return &client_; }
         BSONObj one( const BSONObj &query = BSONObj() ) const {
-            return client()->findOne( ns(), query );            
+            return client()->findOne( ns(), query );
         }
         void checkOne( const BSONObj &o ) const {
             check( o, one( o ) );
@@ -78,11 +79,11 @@ namespace ReplTests {
         void check( const BSONObj &expected, const BSONObj &got ) const {
             if ( expected.woCompare( got ) ) {
                 out() << "expected: " << expected.toString()
-                    << ", got: " << got.toString() << endl;
+                      << ", got: " << got.toString() << endl;
             }
             ASSERT_EQUALS( expected , got );
         }
-        BSONObj oneOp() const { 
+        BSONObj oneOp() const {
             return client()->findOne( cllNS(), BSONObj() );
         }
         int count() const {
@@ -131,7 +132,7 @@ namespace ReplTests {
             out() << "all for " << ns << endl;
             for(; c->ok(); c->advance() ) {
                 out() << c->current().toString() << endl;
-            }            
+            }
         }
         // These deletes don't get logged.
         static void deleteAll( const char *ns ) {
@@ -143,7 +144,7 @@ namespace ReplTests {
                 toDelete.push_back( c->currLoc() );
             }
             for( vector< DiskLoc >::iterator i = toDelete.begin(); i != toDelete.end(); ++i ) {
-                theDataFileMgr.deleteRecord( ns, i->rec(), *i, true );            
+                theDataFileMgr.deleteRecord( ns, i->rec(), *i, true );
             }
         }
         static void insert( const BSONObj &o, bool god = false ) {
@@ -163,7 +164,7 @@ namespace ReplTests {
         static DBDirectClient client_;
     };
     DBDirectClient Base::client_;
-    
+
     class LogBasic : public Base {
     public:
         void run() {
@@ -172,9 +173,9 @@ namespace ReplTests {
             ASSERT_EQUALS( 2, opCount() );
         }
     };
-    
+
     namespace Idempotence {
-        
+
         class Base : public ReplTests::Base {
         public:
             virtual ~Base() {}
@@ -186,7 +187,7 @@ namespace ReplTests {
                 applyAllOperations();
                 check();
                 ASSERT_EQUALS( nOps, opCount() );
-                
+
                 reset();
                 applyAllOperations();
                 check();
@@ -200,7 +201,7 @@ namespace ReplTests {
             virtual void check() const = 0;
             virtual void reset() const = 0;
         };
-        
+
         class InsertTimestamp : public Base {
         public:
             void doIt() const {
@@ -221,7 +222,7 @@ namespace ReplTests {
         private:
             mutable Date_t date_;
         };
-        
+
         class InsertAutoId : public Base {
         public:
             InsertAutoId() : o_( fromjson( "{\"a\":\"b\"}" ) ) {}
@@ -248,12 +249,12 @@ namespace ReplTests {
                 checkOne( o_ );
             }
         };
-        
+
         class InsertTwo : public Base {
         public:
-            InsertTwo() : 
-            o_( fromjson( "{'_id':1,a:'b'}" ) ),
-            t_( fromjson( "{'_id':2,c:'d'}" ) ) {}
+            InsertTwo() :
+                o_( fromjson( "{'_id':1,a:'b'}" ) ),
+                t_( fromjson( "{'_id':2,c:'d'}" ) ) {}
             void doIt() const {
                 vector< BSONObj > v;
                 v.push_back( o_ );
@@ -287,7 +288,7 @@ namespace ReplTests {
                 deleteAll( ns() );
             }
         private:
-            BSONObj o_;            
+            BSONObj o_;
         };
 
         class UpdateTimestamp : public Base {
@@ -311,14 +312,14 @@ namespace ReplTests {
         private:
             mutable Date_t date_;
         };
-        
+
         class UpdateSameField : public Base {
         public:
             UpdateSameField() :
-            q_( fromjson( "{a:'b'}" ) ),
-            o1_( wid( "{a:'b'}" ) ),
-            o2_( wid( "{a:'b'}" ) ),
-            u_( fromjson( "{a:'c'}" ) ){}
+                q_( fromjson( "{a:'b'}" ) ),
+                o1_( wid( "{a:'b'}" ) ),
+                o2_( wid( "{a:'b'}" ) ),
+                u_( fromjson( "{a:'c'}" ) ) {}
             void doIt() const {
                 client()->update( ns(), q_, u_ );
             }
@@ -334,14 +335,14 @@ namespace ReplTests {
             }
         private:
             BSONObj q_, o1_, o2_, u_;
-        };        
-        
+        };
+
         class UpdateSameFieldWithId : public Base {
         public:
             UpdateSameFieldWithId() :
-            o_( fromjson( "{'_id':1,a:'b'}" ) ),
-            q_( fromjson( "{a:'b'}" ) ),
-            u_( fromjson( "{'_id':1,a:'c'}" ) ){}
+                o_( fromjson( "{'_id':1,a:'b'}" ) ),
+                q_( fromjson( "{a:'b'}" ) ),
+                u_( fromjson( "{'_id':1,a:'c'}" ) ) {}
             void doIt() const {
                 client()->update( ns(), q_, u_ );
             }
@@ -356,14 +357,14 @@ namespace ReplTests {
                 insert( fromjson( "{'_id':2,a:'b'}" ) );
             }
         private:
-            BSONObj o_, q_, u_;            
-        };        
+            BSONObj o_, q_, u_;
+        };
 
         class UpdateSameFieldExplicitId : public Base {
         public:
             UpdateSameFieldExplicitId() :
-            o_( fromjson( "{'_id':1,a:'b'}" ) ),
-            u_( fromjson( "{'_id':1,a:'c'}" ) ){}
+                o_( fromjson( "{'_id':1,a:'b'}" ) ),
+                u_( fromjson( "{'_id':1,a:'c'}" ) ) {}
             void doIt() const {
                 client()->update( ns(), o_, u_ );
             }
@@ -376,46 +377,15 @@ namespace ReplTests {
                 insert( o_ );
             }
         protected:
-            BSONObj o_, u_;            
-        };
-        
-        class UpdateId : public UpdateSameFieldExplicitId {
-        public:
-            UpdateId() {
-                o_ = fromjson( "{'_id':1}" );
-                u_ = fromjson( "{'_id':2}" );
-            }
-        };
-        
-        class UpdateId2 : public ReplTests::Base {
-        public:
-            UpdateId2() :
-            o_( fromjson( "{'_id':1}" ) ),
-            u_( fromjson( "{'_id':2}" ) ){}
-            void run() {
-                deleteAll( ns() );
-                insert( o_ );
-                client()->update( ns(), o_, u_ );
-                ASSERT_EQUALS( 1, count() );
-                checkOne( u_ );
-                
-                deleteAll( ns() );
-                insert( o_ );
-                insert( u_ ); // simulate non snapshot replication, then op application
-                applyAllOperations();
-                ASSERT_EQUALS( 1, count() );
-                checkOne( u_ );
-            }
-        protected:
-            BSONObj o_, u_;            
+            BSONObj o_, u_;
         };
 
         class UpdateDifferentFieldExplicitId : public Base {
         public:
             UpdateDifferentFieldExplicitId() :
-            o_( fromjson( "{'_id':1,a:'b'}" ) ),
-            q_( fromjson( "{'_id':1}" ) ),
-            u_( fromjson( "{'_id':1,a:'c'}" ) ){}
+                o_( fromjson( "{'_id':1,a:'b'}" ) ),
+                q_( fromjson( "{'_id':1}" ) ),
+                u_( fromjson( "{'_id':1,a:'c'}" ) ) {}
             void doIt() const {
                 client()->update( ns(), q_, u_ );
             }
@@ -428,28 +398,28 @@ namespace ReplTests {
                 insert( o_ );
             }
         protected:
-            BSONObj o_, q_, u_;            
-        };        
-        
+            BSONObj o_, q_, u_;
+        };
+
         class UpsertUpdateNoMods : public UpdateDifferentFieldExplicitId {
             void doIt() const {
                 client()->update( ns(), q_, u_, true );
             }
         };
-        
+
         class UpsertInsertNoMods : public InsertAutoId {
             void doIt() const {
                 client()->update( ns(), fromjson( "{a:'c'}" ), o_, true );
             }
         };
-        
+
         class UpdateSet : public Base {
         public:
             UpdateSet() :
-            o_( fromjson( "{'_id':1,a:5}" ) ),
-            q_( fromjson( "{a:5}" ) ),
-            u_( fromjson( "{$set:{a:7}}" ) ),
-            ou_( fromjson( "{'_id':1,a:7}" ) ) {}
+                o_( fromjson( "{'_id':1,a:5}" ) ),
+                q_( fromjson( "{a:5}" ) ),
+                u_( fromjson( "{$set:{a:7}}" ) ),
+                ou_( fromjson( "{'_id':1,a:7}" ) ) {}
             void doIt() const {
                 client()->update( ns(), q_, u_ );
             }
@@ -462,16 +432,16 @@ namespace ReplTests {
                 insert( o_ );
             }
         protected:
-            BSONObj o_, q_, u_, ou_;            
+            BSONObj o_, q_, u_, ou_;
         };
-        
+
         class UpdateInc : public Base {
         public:
             UpdateInc() :
-            o_( fromjson( "{'_id':1,a:5}" ) ),
-            q_( fromjson( "{a:5}" ) ),
-            u_( fromjson( "{$inc:{a:3}}" ) ),
-            ou_( fromjson( "{'_id':1,a:8}" ) ) {}
+                o_( fromjson( "{'_id':1,a:5}" ) ),
+                q_( fromjson( "{a:5}" ) ),
+                u_( fromjson( "{$inc:{a:3}}" ) ),
+                ou_( fromjson( "{'_id':1,a:8}" ) ) {}
             void doIt() const {
                 client()->update( ns(), q_, u_ );
             }
@@ -484,16 +454,16 @@ namespace ReplTests {
                 insert( o_ );
             }
         protected:
-            BSONObj o_, q_, u_, ou_;            
+            BSONObj o_, q_, u_, ou_;
         };
 
         class UpdateInc2 : public Base {
         public:
             UpdateInc2() :
-            o_( fromjson( "{'_id':1,a:5}" ) ),
-            q_( fromjson( "{a:5}" ) ),
-            u_( fromjson( "{$inc:{a:3},$set:{x:5}}" ) ),
-            ou_( fromjson( "{'_id':1,a:8,x:5}" ) ) {}
+                o_( fromjson( "{'_id':1,a:5}" ) ),
+                q_( fromjson( "{a:5}" ) ),
+                u_( fromjson( "{$inc:{a:3},$set:{x:5}}" ) ),
+                ou_( fromjson( "{'_id':1,a:8,x:5}" ) ) {}
             void doIt() const {
                 client()->update( ns(), q_, u_ );
             }
@@ -506,16 +476,16 @@ namespace ReplTests {
                 insert( o_ );
             }
         protected:
-            BSONObj o_, q_, u_, ou_;            
+            BSONObj o_, q_, u_, ou_;
         };
-        
+
         class IncEmbedded : public Base {
         public:
             IncEmbedded() :
-            o_( fromjson( "{'_id':1,a:{b:3},b:{b:1}}" ) ),
-            q_( fromjson( "{'_id':1}" ) ),
-            u_( fromjson( "{$inc:{'a.b':1,'b.b':1}}" ) ),
-            ou_( fromjson( "{'_id':1,a:{b:4},b:{b:2}}" ) )
+                o_( fromjson( "{'_id':1,a:{b:3},b:{b:1}}" ) ),
+                q_( fromjson( "{'_id':1}" ) ),
+                u_( fromjson( "{$inc:{'a.b':1,'b.b':1}}" ) ),
+                ou_( fromjson( "{'_id':1,a:{b:4},b:{b:2}}" ) )
             {}
             void doIt() const {
                 client()->update( ns(), q_, u_ );
@@ -529,16 +499,16 @@ namespace ReplTests {
                 insert( o_ );
             }
         protected:
-            BSONObj o_, q_, u_, ou_;            
+            BSONObj o_, q_, u_, ou_;
         };
 
         class IncCreates : public Base {
         public:
             IncCreates() :
-            o_( fromjson( "{'_id':1}" ) ),
-            q_( fromjson( "{'_id':1}" ) ),
-            u_( fromjson( "{$inc:{'a':1}}" ) ),
-            ou_( fromjson( "{'_id':1,a:1}") )
+                o_( fromjson( "{'_id':1}" ) ),
+                q_( fromjson( "{'_id':1}" ) ),
+                u_( fromjson( "{$inc:{'a':1}}" ) ),
+                ou_( fromjson( "{'_id':1,a:1}") )
             {}
             void doIt() const {
                 client()->update( ns(), q_, u_ );
@@ -552,16 +522,16 @@ namespace ReplTests {
                 insert( o_ );
             }
         protected:
-            BSONObj o_, q_, u_, ou_;            
+            BSONObj o_, q_, u_, ou_;
         };
 
 
         class UpsertInsertIdMod : public Base {
         public:
             UpsertInsertIdMod() :
-            q_( fromjson( "{'_id':5,a:4}" ) ),
-            u_( fromjson( "{$inc:{a:3}}" ) ),
-            ou_( fromjson( "{'_id':5,a:7}" ) ) {}
+                q_( fromjson( "{'_id':5,a:4}" ) ),
+                u_( fromjson( "{$inc:{a:3}}" ) ),
+                ou_( fromjson( "{'_id':5,a:7}" ) ) {}
             void doIt() const {
                 client()->update( ns(), q_, u_, true );
             }
@@ -573,15 +543,15 @@ namespace ReplTests {
                 deleteAll( ns() );
             }
         protected:
-            BSONObj q_, u_, ou_;            
+            BSONObj q_, u_, ou_;
         };
-        
+
         class UpsertInsertSet : public Base {
         public:
             UpsertInsertSet() :
-            q_( fromjson( "{a:5}" ) ),
-            u_( fromjson( "{$set:{a:7}}" ) ),
-            ou_( fromjson( "{a:7}" ) ) {}
+                q_( fromjson( "{a:5}" ) ),
+                u_( fromjson( "{$set:{a:7}}" ) ),
+                ou_( fromjson( "{a:7}" ) ) {}
             void doIt() const {
                 client()->update( ns(), q_, u_, true );
             }
@@ -594,15 +564,15 @@ namespace ReplTests {
                 insert( fromjson( "{'_id':7,a:7}" ) );
             }
         protected:
-            BSONObj o_, q_, u_, ou_;            
+            BSONObj o_, q_, u_, ou_;
         };
-        
+
         class UpsertInsertInc : public Base {
         public:
             UpsertInsertInc() :
-            q_( fromjson( "{a:5}" ) ),
-            u_( fromjson( "{$inc:{a:3}}" ) ),
-            ou_( fromjson( "{a:8}" ) ) {}
+                q_( fromjson( "{a:5}" ) ),
+                u_( fromjson( "{$inc:{a:3}}" ) ),
+                ou_( fromjson( "{a:8}" ) ) {}
             void doIt() const {
                 client()->update( ns(), q_, u_, true );
             }
@@ -614,38 +584,38 @@ namespace ReplTests {
                 deleteAll( ns() );
             }
         protected:
-            BSONObj o_, q_, u_, ou_;            
+            BSONObj o_, q_, u_, ou_;
         };
-        
+
         class MultiInc : public Base {
         public:
-            
+
             string s() const {
                 stringstream ss;
                 auto_ptr<DBClientCursor> cc = client()->query( ns() , Query().sort( BSON( "_id" << 1 ) ) );
                 bool first = true;
-                while ( cc->more() ){
+                while ( cc->more() ) {
                     if ( first ) first = false;
                     else ss << ",";
-                    
+
                     BSONObj o = cc->next();
                     ss << o["x"].numberInt();
                 }
                 return ss.str();
             }
-            
+
             void doIt() const {
                 client()->insert( ns(), BSON( "_id" << 1 << "x" << 1 ) );
                 client()->insert( ns(), BSON( "_id" << 2 << "x" << 5 ) );
-                
+
                 ASSERT_EQUALS( "1,5" , s() );
-                
+
                 client()->update( ns() , BSON( "_id" << 1 ) , BSON( "$inc" << BSON( "x" << 1 ) ) );
                 ASSERT_EQUALS( "2,5" , s() );
-                
+
                 client()->update( ns() , BSONObj() , BSON( "$inc" << BSON( "x" << 1 ) ) );
                 ASSERT_EQUALS( "3,5" , s() );
-                
+
                 client()->update( ns() , BSONObj() , BSON( "$inc" << BSON( "x" << 1 ) ) , false , true );
                 check();
             }
@@ -653,18 +623,18 @@ namespace ReplTests {
             void check() const {
                 ASSERT_EQUALS( "4,6" , s() );
             }
-            
+
             void reset() const {
                 deleteAll( ns() );
             }
         };
-        
+
         class UpdateWithoutPreexistingId : public Base {
         public:
             UpdateWithoutPreexistingId() :
-            o_( fromjson( "{a:5}" ) ),
-            u_( fromjson( "{a:5}" ) ),
-            ot_( fromjson( "{b:4}" ) ) {}
+                o_( fromjson( "{a:5}" ) ),
+                u_( fromjson( "{a:5}" ) ),
+                ot_( fromjson( "{b:4}" ) ) {}
             void doIt() const {
                 client()->update( ns(), o_, u_ );
             }
@@ -679,15 +649,15 @@ namespace ReplTests {
                 insert( o_, true );
             }
         protected:
-            BSONObj o_, u_, ot_;            
-        };        
-        
+            BSONObj o_, u_, ot_;
+        };
+
         class Remove : public Base {
         public:
             Remove() :
-            o1_( f( "{\"_id\":\"010101010101010101010101\",\"a\":\"b\"}" ) ),
-            o2_( f( "{\"_id\":\"010101010101010101010102\",\"a\":\"b\"}" ) ),
-            q_( f( "{\"a\":\"b\"}" ) ) {}
+                o1_( f( "{\"_id\":\"010101010101010101010101\",\"a\":\"b\"}" ) ),
+                o2_( f( "{\"_id\":\"010101010101010101010102\",\"a\":\"b\"}" ) ),
+                q_( f( "{\"a\":\"b\"}" ) ) {}
             void doIt() const {
                 client()->remove( ns(), q_ );
             }
@@ -700,23 +670,23 @@ namespace ReplTests {
                 insert( o2_ );
             }
         protected:
-            BSONObj o1_, o2_, q_;            
+            BSONObj o1_, o2_, q_;
         };
-        
+
         class RemoveOne : public Remove {
             void doIt() const {
                 client()->remove( ns(), q_, true );
-            }            
+            }
             void check() const {
                 ASSERT_EQUALS( 1, count() );
             }
         };
-          
+
         class FailingUpdate : public Base {
         public:
             FailingUpdate() :
-            o_( fromjson( "{'_id':1,a:'b'}" ) ),
-            u_( fromjson( "{'_id':1,c:'d'}" ) ) {}
+                o_( fromjson( "{'_id':1,a:'b'}" ) ),
+                u_( fromjson( "{'_id':1,c:'d'}" ) ) {}
             void doIt() const {
                 client()->update( ns(), o_, u_ );
                 client()->insert( ns(), o_ );
@@ -731,7 +701,7 @@ namespace ReplTests {
         protected:
             BSONObj o_, u_;
         };
-        
+
         class SetNumToStr : public Base {
         public:
             void doIt() const {
@@ -746,7 +716,7 @@ namespace ReplTests {
                 insert( BSON( "_id" << 0 << "a" << 4.0 ) );
             }
         };
-        
+
         class Push : public Base {
         public:
             void doIt() const {
@@ -760,9 +730,9 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4]}" ) );
-            }            
+            }
         };
-        
+
         class PushUpsert : public Base {
         public:
             void doIt() const {
@@ -776,7 +746,7 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4]}" ) );
-            }            
+            }
         };
 
         class MultiPush : public Base {
@@ -792,7 +762,7 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4]}" ) );
-            }            
+            }
         };
 
         class EmptyPush : public Base {
@@ -808,13 +778,13 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0}" ) );
-            }                        
+            }
         };
 
         class PushAll : public Base {
         public:
             void doIt() const {
-	      client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$pushAll:{a:[5.0,6.0]}}" ) );
+                client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$pushAll:{a:[5.0,6.0]}}" ) );
             }
             using ReplTests::Base::check;
             void check() const {
@@ -824,13 +794,13 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4]}" ) );
-            }            
+            }
         };
-        
+
         class PushAllUpsert : public Base {
         public:
             void doIt() const {
-	      client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$pushAll:{a:[5.0,6.0]}}" ), true );
+                client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$pushAll:{a:[5.0,6.0]}}" ), true );
             }
             using ReplTests::Base::check;
             void check() const {
@@ -840,7 +810,7 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4]}" ) );
-            }            
+            }
         };
 
         class EmptyPushAll : public Base {
@@ -856,7 +826,7 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0}" ) );
-            }                        
+            }
         };
 
         class Pull : public Base {
@@ -872,9 +842,9 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4,5]}" ) );
-            }            
+            }
         };
-                
+
         class PullNothing : public Base {
         public:
             void doIt() const {
@@ -888,13 +858,13 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4,5]}" ) );
-            }            
+            }
         };
-                
+
         class PullAll : public Base {
         public:
             void doIt() const {
-	      client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$pullAll:{a:[4,5]}}" ) );
+                client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$pullAll:{a:[4,5]}}" ) );
             }
             using ReplTests::Base::check;
             void check() const {
@@ -904,7 +874,7 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4,5,6]}" ) );
-            }            
+            }
         };
 
         class Pop : public Base {
@@ -920,7 +890,7 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4,5,6]}" ) );
-            }            
+            }
         };
 
         class PopReverse : public Base {
@@ -936,7 +906,7 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:[4,5,6]}" ) );
-            }            
+            }
         };
 
         class BitOp : public Base {
@@ -952,13 +922,78 @@ namespace ReplTests {
             void reset() const {
                 deleteAll( ns() );
                 insert( fromjson( "{'_id':0,a:3}" ) );
-            }            
+            }
+        };
+
+        class Rename : public Base {
+        public:
+            void doIt() const {
+                client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$rename:{a:'b'}}" ) );
+                client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$set:{a:50}}" ) );
+            }
+            using ReplTests::Base::check;
+            void check() const {
+                ASSERT_EQUALS( 1, count() );
+                check( BSON( "_id" << 0 << "a" << 50 << "b" << 3 ) , one( fromjson( "{'_id':0}" ) ) );
+            }
+            void reset() const {
+                deleteAll( ns() );
+                insert( fromjson( "{'_id':0,a:3}" ) );
+            }
+        };
+
+        class RenameReplace : public Base {
+        public:
+            void doIt() const {
+                client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$rename:{a:'b'}}" ) );
+                client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$set:{a:50}}" ) );
+            }
+            using ReplTests::Base::check;
+            void check() const {
+                ASSERT_EQUALS( 1, count() );
+                check( BSON( "_id" << 0 << "a" << 50 << "b" << 3 ) , one( fromjson( "{'_id':0}" ) ) );
+            }
+            void reset() const {
+                deleteAll( ns() );
+                insert( fromjson( "{'_id':0,a:3,b:100}" ) );
+            }
+        };
+
+        class RenameOverwrite : public Base {
+        public:
+            void doIt() const {
+                client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$rename:{a:'b'}}" ) );
+            }
+            using ReplTests::Base::check;
+            void check() const {
+                ASSERT_EQUALS( 1, count() );
+                check( BSON( "_id" << 0 << "b" << 3 << "z" << 1 ) , one( fromjson( "{'_id':0}" ) ) );
+            }
+            void reset() const {
+                deleteAll( ns() );
+                insert( fromjson( "{'_id':0,z:1,a:3}" ) );
+            }
+        };
+
+        class NoRename : public Base {
+        public:
+            void doIt() const {
+                client()->update( ns(), BSON( "_id" << 0 ), fromjson( "{$rename:{c:'b'},$set:{z:1}}" ) );
+            }
+            using ReplTests::Base::check;
+            void check() const {
+                ASSERT_EQUALS( 1, count() );
+                check( BSON( "_id" << 0 << "a" << 3 << "z" << 1 ) , one( fromjson( "{'_id':0}" ) ) );
+            }
+            void reset() const {
+                deleteAll( ns() );
+                insert( fromjson( "{'_id':0,a:3}" ) );
+            }
         };
 
-        
 
     } // namespace Idempotence
-    
+
     class DeleteOpIsIdBased : public Base {
     public:
         void run() {
@@ -968,21 +1003,21 @@ namespace ReplTests {
             client()->remove( ns(), BSON( "a" << 10 ) );
             ASSERT_EQUALS( 1U, client()->count( ns(), BSONObj() ) );
             insert( BSON( "_id" << 0 << "a" << 11 ) );
-            insert( BSON( "_id" << 2 << "a" << 10 ) );            
+            insert( BSON( "_id" << 2 << "a" << 10 ) );
             insert( BSON( "_id" << 3 << "a" << 10 ) );
-            
+
             applyAllOperations();
             ASSERT_EQUALS( 2U, client()->count( ns(), BSONObj() ) );
             ASSERT( !one( BSON( "_id" << 1 ) ).isEmpty() );
             ASSERT( !one( BSON( "_id" << 2 ) ).isEmpty() );
         }
     };
-    
+
     class DbIdsTest {
     public:
         void run() {
             Client::Context ctx( "unittests.repltest.DbIdsTest" );
-            
+
             s_.reset( new DbIds( "local.temp.DbIdsTest" ) );
             s_->reset();
             check( false, false, false );
@@ -991,7 +1026,7 @@ namespace ReplTests {
             check( true, false, false );
             s_->set( "a", BSON( "_id" << 4 ), false );
             check( false, false, false );
-            
+
             s_->set( "b", BSON( "_id" << 4 ), true );
             check( false, true, false );
             s_->set( "b", BSON( "_id" << 4 ), false );
@@ -1009,7 +1044,7 @@ namespace ReplTests {
 
             s_->reset();
             check( false, false, false );
-            
+
             s_->set( "a", BSON( "_id" << 4 ), true );
             s_->set( "a", BSON( "_id" << 4 ), true );
             check( true, false, false );
@@ -1020,17 +1055,17 @@ namespace ReplTests {
         void check( bool one, bool two, bool three ) {
             ASSERT_EQUALS( one, s_->get( "a", BSON( "_id" << 4 ) ) );
             ASSERT_EQUALS( two, s_->get( "b", BSON( "_id" << 4 ) ) );
-            ASSERT_EQUALS( three, s_->get( "a", BSON( "_id" << 5 ) ) );            
+            ASSERT_EQUALS( three, s_->get( "a", BSON( "_id" << 5 ) ) );
         }
         dblock lk_;
         auto_ptr< DbIds > s_;
     };
-    
+
     class MemIdsTest {
     public:
         void run() {
             int n = sizeof( BSONObj ) + BSON( "_id" << 4 ).objsize();
-            
+
             s_.reset();
             ASSERT_EQUALS( 0, s_.roughSize() );
             ASSERT( !s_.get( "a", BSON( "_id" << 4 ) ) );
@@ -1057,7 +1092,7 @@ namespace ReplTests {
     public:
         void run() {
             Client::Context ctx( "unittests.repltests.IdTrackerTest" );
-            
+
             ASSERT( s_.inMem() );
             s_.reset( 4 * sizeof( BSONObj ) - 1 );
             s_.haveId( "a", BSON( "_id" << 0 ), true );
@@ -1069,34 +1104,34 @@ namespace ReplTests {
             s_.mayUpgradeStorage();
             ASSERT( !s_.inMem() );
             check();
-            
+
             s_.haveId( "a", BSON( "_id" << 1 ), false );
             ASSERT( !s_.haveId( "a", BSON( "_id" << 1 ) ) );
             s_.haveId( "a", BSON( "_id" << 1 ), true );
             check();
-            ASSERT( !s_.inMem() );            
-            
+            ASSERT( !s_.inMem() );
+
             s_.reset( 4 * sizeof( BSONObj ) - 1 );
             s_.mayUpgradeStorage();
-            ASSERT( s_.inMem() );                        
+            ASSERT( s_.inMem() );
         }
     private:
         void check() {
             ASSERT( s_.haveId( "a", BSON( "_id" << 0 ) ) );
             ASSERT( s_.haveId( "a", BSON( "_id" << 1 ) ) );
             ASSERT( s_.haveId( "b", BSON( "_id" << 0 ) ) );
-            ASSERT( s_.haveModId( "b", BSON( "_id" << 0 ) ) );            
+            ASSERT( s_.haveModId( "b", BSON( "_id" << 0 ) ) );
         }
         dblock lk_;
         IdTracker s_;
     };
-    
+
     class All : public Suite {
     public:
-        All() : Suite( "repl" ){
+        All() : Suite( "repl" ) {
         }
-        
-        void setupTests(){
+
+        void setupTests() {
             add< LogBasic >();
             add< Idempotence::InsertTimestamp >();
             add< Idempotence::InsertAutoId >();
@@ -1107,8 +1142,6 @@ namespace ReplTests {
             add< Idempotence::UpdateSameField >();
             add< Idempotence::UpdateSameFieldWithId >();
             add< Idempotence::UpdateSameFieldExplicitId >();
-            add< Idempotence::UpdateId >();
-            add< Idempotence::UpdateId2 >();
             add< Idempotence::UpdateDifferentFieldExplicitId >();
             add< Idempotence::UpsertUpdateNoMods >();
             add< Idempotence::UpsertInsertNoMods >();
@@ -1140,12 +1173,16 @@ namespace ReplTests {
             add< Idempotence::Pop >();
             add< Idempotence::PopReverse >();
             add< Idempotence::BitOp >();
+            add< Idempotence::Rename >();
+            add< Idempotence::RenameReplace >();
+            add< Idempotence::RenameOverwrite >();
+            add< Idempotence::NoRename >();
             add< DeleteOpIsIdBased >();
             add< DbIdsTest >();
             add< MemIdsTest >();
             add< IdTrackerTest >();
         }
     } myall;
-    
+
 } // namespace ReplTests
 
diff --git a/dbtests/sharding.cpp b/dbtests/sharding.cpp
index 2473366..19edd55 100644
--- a/dbtests/sharding.cpp
+++ b/dbtests/sharding.cpp
@@ -27,17 +27,17 @@ namespace ShardingTests {
     namespace serverandquerytests {
         class test1 {
         public:
-            void run(){
+            void run() {
                 ServerAndQuery a( "foo:1" , BSON( "a" << GT << 0 << LTE << 100 ) );
                 ServerAndQuery b( "foo:1" , BSON( "a" << GT << 200 << LTE << 1000 ) );
-                
+
                 ASSERT( a < b );
                 ASSERT( ! ( b < a ) );
 
                 set<ServerAndQuery> s;
                 s.insert( a );
                 s.insert( b );
-                
+
                 ASSERT_EQUALS( (unsigned int)2 , s.size() );
             }
         };
@@ -45,12 +45,12 @@ namespace ShardingTests {
 
     class All : public Suite {
     public:
-        All() : Suite( "sharding" ){
+        All() : Suite( "sharding" ) {
         }
 
-        void setupTests(){
+        void setupTests() {
             add< serverandquerytests::test1 >();
         }
     } myall;
-        
+
 }
diff --git a/dbtests/socktests.cpp b/dbtests/socktests.cpp
index 267b1d6..5cd42f5 100644
--- a/dbtests/socktests.cpp
+++ b/dbtests/socktests.cpp
@@ -19,7 +19,6 @@
 
 #include "pch.h"
 #include "../util/sock.h"
-
 #include "dbtests.h"
 
 namespace SockTests {
@@ -30,16 +29,20 @@ namespace SockTests {
             ASSERT_EQUALS( "127.0.0.1", hostbyname( "localhost" ) );
             ASSERT_EQUALS( "127.0.0.1", hostbyname( "127.0.0.1" ) );
             // ASSERT_EQUALS( "::1", hostbyname( "::1" ) ); // IPv6 disabled at runtime by default.
+
+            HostAndPort h("asdfasdfasdf_no_such_host");
+            // this fails uncomment when fixed.
+            ASSERT( !h.isSelf() );
         }
     };
-    
+
     class All : public Suite {
     public:
-        All() : Suite( "sock" ){}
-        void setupTests(){
+        All() : Suite( "sock" ) {}
+        void setupTests() {
             add< HostByName >();
         }
     } myall;
-    
+
 } // namespace SockTests
 
diff --git a/dbtests/spin_lock_test.cpp b/dbtests/spin_lock_test.cpp
index d053d61..01eb7b3 100644
--- a/dbtests/spin_lock_test.cpp
+++ b/dbtests/spin_lock_test.cpp
@@ -26,26 +26,26 @@ namespace {
 
     using mongo::SpinLock;
 
-    class LockTester{
+    class LockTester {
     public:
         LockTester( SpinLock* spin, int* counter )
-            : _spin(spin), _counter(counter), _requests(0){}
+            : _spin(spin), _counter(counter), _requests(0) {}
 
-        ~LockTester(){
+        ~LockTester() {
             delete _t;
         }
 
-        void start( int increments ){
-            _t = new boost::thread( boost::bind(&LockTester::test, this, increments) ); 
+        void start( int increments ) {
+            _t = new boost::thread( boost::bind(&LockTester::test, this, increments) );
         }
 
-        void join(){
+        void join() {
             if ( _t ) _t->join();
         }
 
-        int requests() const{ 
-            return _requests; 
-        } 
+        int requests() const {
+            return _requests;
+        }
 
     private:
         SpinLock*      _spin;     // not owned here
@@ -53,7 +53,7 @@ namespace {
         int            _requests;
         boost::thread* _t;
 
-        void test( int increments ){
+        void test( int increments ) {
             while ( increments-- > 0 ) {
                 _spin->lock();
                 ++(*_counter);
@@ -61,14 +61,14 @@ namespace {
                 _spin->unlock();
             }
         }
-        
+
         LockTester( LockTester& );
         LockTester& operator=( LockTester& );
     };
 
-    class ConcurrentIncs{
+    class ConcurrentIncs {
     public:
-        void run(){
+        void run() {
 
 #if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
 
@@ -77,37 +77,37 @@ namespace {
 
             const int threads = 64;
             const int incs = 10000;
-            LockTester* testers[threads];  
-
-          for ( int i = 0; i < threads; i++ ){
-              testers[i] = new LockTester( &spin, &counter );
-          }
-          for ( int i = 0; i < threads; i++ ){
-              testers[i]->start( incs );
-          }
-          for ( int i = 0; i < threads; i++ ){
-              testers[i]->join();
-              ASSERT_EQUALS( testers[i]->requests(), incs );
-              delete testers[i];
-          }
-
-          ASSERT_EQUALS( counter, threads*incs );
+            LockTester* testers[threads];
+
+            for ( int i = 0; i < threads; i++ ) {
+                testers[i] = new LockTester( &spin, &counter );
+            }
+            for ( int i = 0; i < threads; i++ ) {
+                testers[i]->start( incs );
+            }
+            for ( int i = 0; i < threads; i++ ) {
+                testers[i]->join();
+                ASSERT_EQUALS( testers[i]->requests(), incs );
+                delete testers[i];
+            }
+
+            ASSERT_EQUALS( counter, threads*incs );
 #else
 
-          // WARNING "TODO Missing spin lock in this platform."
-          ASSERT( true );
+            // WARNING "TODO Missing spin lock in this platform."
+            ASSERT( true );
+
 
-          
 #endif
 
         }
     };
 
-    class SpinLockSuite : public Suite{
+    class SpinLockSuite : public Suite {
     public:
-        SpinLockSuite() : Suite( "spinlock" ){}
+        SpinLockSuite() : Suite( "spinlock" ) {}
 
-        void setupTests(){
+        void setupTests() {
             add< ConcurrentIncs >();
         }
     } spinLockSuite;
diff --git a/dbtests/test.vcproj b/dbtests/test.vcproj
deleted file mode 100644
index c297d85..0000000
--- a/dbtests/test.vcproj
+++ /dev/null
@@ -1,1453 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
-	ProjectType="Visual C++"
-	Version="9.00"
-	Name="test"
-	ProjectGUID="{215B2D68-0A70-4D10-8E75-B33010C62A91}"
-	RootNamespace="dbtests"
-	Keyword="Win32Proj"
-	TargetFrameworkVersion="196613"
-	>
-	<Platforms>
-		<Platform
-			Name="Win32"
-		/>
-	</Platforms>
-	<ToolFiles>
-	</ToolFiles>
-	<Configurations>
-		<Configuration
-			Name="Debug|Win32"
-			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
-			IntermediateDirectory="$(ConfigurationName)"
-			ConfigurationType="1"
-			UseOfMFC="0"
-			UseOfATL="0"
-			CharacterSet="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="0"
-				AdditionalIncludeDirectories="&quot;c:\program files\boost\latest&quot;;..\..\js\src;&quot;..\pcre-7.4&quot;;c:\boost;\boost"
-				PreprocessorDefinitions="MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC"
-				MinimalRebuild="true"
-				BasicRuntimeChecks="3"
-				RuntimeLibrary="3"
-				UsePrecompiledHeader="2"
-				PrecompiledHeaderThrough="pch.h"
-				WarningLevel="3"
-				Detect64BitPortabilityProblems="false"
-				DebugInformationFormat="4"
-				DisableSpecificWarnings="4355;4800"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				AdditionalDependencies="ws2_32.lib Psapi.lib"
-				LinkIncremental="2"
-				AdditionalLibraryDirectories="&quot;c:\Program Files\boost\latest\lib&quot;;c:\boost\lib;\boost\lib"
-				IgnoreAllDefaultLibraries="false"
-				IgnoreDefaultLibraryNames=""
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				TargetMachine="1"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-		<Configuration
-			Name="Release|Win32"
-			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
-			IntermediateDirectory="$(ConfigurationName)"
-			ConfigurationType="1"
-			CharacterSet="1"
-			WholeProgramOptimization="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="2"
-				EnableIntrinsicFunctions="true"
-				AdditionalIncludeDirectories="&quot;c:\program files\boost\latest&quot;;..\..\js\src;&quot;..\pcre-7.4&quot;;c:\boost;\boost"
-				PreprocessorDefinitions="MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC"
-				RuntimeLibrary="0"
-				EnableFunctionLevelLinking="true"
-				UsePrecompiledHeader="2"
-				PrecompiledHeaderThrough="pch.h"
-				WarningLevel="3"
-				DebugInformationFormat="3"
-				DisableSpecificWarnings="4355;4800"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				AdditionalDependencies="ws2_32.lib psapi.lib"
-				LinkIncremental="1"
-				AdditionalLibraryDirectories="&quot;c:\Program Files\boost\latest\lib&quot;;c:\boost\lib;\boost\lib"
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				OptimizeReferences="2"
-				EnableCOMDATFolding="2"
-				TargetMachine="1"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-	</Configurations>
-	<References>
-	</References>
-	<Files>
-		<Filter
-			Name="misc and third party"
-			>
-			<File
-				RelativePath="..\..\boostw\boost_1_34_1\boost\config\auto_link.hpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\db.rc"
-				>
-			</File>
-			<File
-				RelativePath="..\..\js\src\js.lib"
-				>
-			</File>
-			<File
-				RelativePath="..\pcre-7.4\pcrecpp.cc"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\pcre-7.4\pcrecpp.h"
-				>
-			</File>
-			<File
-				RelativePath="..\SConstruct"
-				>
-			</File>
-			<File
-				RelativePath="..\targetver.h"
-				>
-			</File>
-			<File
-				RelativePath="..\..\boostw\boost_1_34_1\boost\version.hpp"
-				>
-			</File>
-			<Filter
-				Name="pcre"
-				>
-				<File
-					RelativePath="..\pcre-7.4\config.h"
-					>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre.h"
-					>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_chartables.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_compile.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_config.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_dfa_exec.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_exec.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_fullinfo.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_get.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_globals.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_info.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_maketables.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_newline.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_ord2utf8.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_refcount.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_scanner.cc"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_stringpiece.cc"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_study.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_tables.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_try_flipped.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_ucp_searchfuncs.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_valid_utf8.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_version.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_xclass.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcreposix.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-			</Filter>
-		</Filter>
-		<Filter
-			Name="storage related"
-			>
-			<File
-				RelativePath="..\db\rec.h"
-				>
-			</File>
-			<File
-				RelativePath="..\db\reccache.h"
-				>
-			</File>
-			<File
-				RelativePath="..\db\reci.h"
-				>
-			</File>
-			<File
-				RelativePath="..\db\recstore.h"
-				>
-			</File>
-			<File
-				RelativePath="..\db\storage.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\storage.h"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="client"
-			>
-			<File
-				RelativePath="..\client\connpool.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\connpool.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\dbclient.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\dbclient.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\dbclientcursor.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\model.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\syncclusterconnection.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="db"
-			>
-			<File
-				RelativePath="..\pch.cpp"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="1"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="1"
-					/>
-				</FileConfiguration>
-			</File>
-			<Filter
-				Name="cpp"
-				Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
-				UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
-				>
-				<File
-					RelativePath="..\db\client.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\clientcursor.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\cloner.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\commands.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\common.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\cursor.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\database.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\dbcommands.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\dbeval.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\dbhelpers.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\dbwebserver.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\extsort.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\index.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\index_geo2d.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\indexkey.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\instance.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\introspect.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\jsobj.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\json.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\lasterror.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\matcher.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\mmap_win.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\namespace.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\nonce.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\pdfile.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\query.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\queryoptimizer.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\ramstore.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\repl.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\security.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\security_commands.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\tests.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\update.cpp"
-					>
-				</File>
-			</Filter>
-			<Filter
-				Name="h"
-				>
-				<File
-					RelativePath="..\db\clientcursor.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\cmdline.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\cmdline.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\commands.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\concurrency.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\curop.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\cursor.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\database.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\db.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\dbhelpers.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\dbinfo.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\dbmessage.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\diskloc.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\extsort.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\introspect.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\jsobj.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\json.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\matcher.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\matcher_covered.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\grid\message.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\minilex.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\namespace.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\oplog.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\pch.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\pdfile.h"
-					>
-				</File>
-				<File
-					RelativePath="..\grid\protocol.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\query.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\queryoptimizer.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\queryutil.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\repl.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\repl_block.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\replset.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\resource.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\scanandorder.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\security.h"
-					>
-				</File>
-			</Filter>
-		</Filter>
-		<Filter
-			Name="util"
-			>
-			<File
-				RelativePath="..\util\builder.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\debug_util.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\file.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\goodies.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\hashtab.h"
-				>
-			</File>
-			<File
-				RelativePath="..\db\lasterror.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\log.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\lruishmap.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\md5.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\md5.hpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\miniwebserver.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\mmap.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\sock.h"
-				>
-			</File>
-			<File
-				RelativePath="..\util\unittest.h"
-				>
-			</File>
-			<Filter
-				Name="cpp"
-				>
-				<File
-					RelativePath="..\util\assert_util.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\background.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\base64.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\httpclient.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\md5.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-							PrecompiledHeaderThrough=""
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\util\md5main.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\message.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\message_server_port.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\miniwebserver.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\mmap.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\ntservice.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\processinfo_win32.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\sock.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\thread_pool.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\util.cpp"
-					>
-				</File>
-			</Filter>
-			<Filter
-				Name="concurrency"
-				>
-				<File
-					RelativePath="..\util\concurrency\list.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\msg.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\task.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\task.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\value.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\concurrency\vars.cpp"
-					>
-				</File>
-			</Filter>
-		</Filter>
-		<Filter
-			Name="shard"
-			>
-			<File
-				RelativePath="..\s\d_logic.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\s\d_util.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\s\shardconnection.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="scripting"
-			>
-			<File
-				RelativePath="..\scripting\engine.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\engine_spidermonkey.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\shell\mongo_vstudio.cpp"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\scripting\utils.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="dbtests"
-			>
-			<File
-				RelativePath=".\basictests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\btreetests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\clienttests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\cursortests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\dbtests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\framework.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\jsobjtests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\jsontests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\jstests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\matchertests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\namespacetests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\pairingtests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\pdfiletests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\queryoptimizertests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\querytests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\repltests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\socktests.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\threadedtests.cpp"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						DisableSpecificWarnings="4180"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath=".\updatetests.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="stats"
-			>
-			<File
-				RelativePath="..\db\stats\counters.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\stats\snapshots.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\stats\top.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="replsets"
-			>
-			<File
-				RelativePath="..\db\repl\consensus.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\repl\health.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\repl\heartbeat.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\repl\manager.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\repl\replset.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\repl\replset_commands.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\repl\rs_config.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\repl\rs_initiate.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="btree related"
-			>
-			<File
-				RelativePath="..\db\btree.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\btree.h"
-				>
-			</File>
-			<File
-				RelativePath="..\db\btreecursor.cpp"
-				>
-			</File>
-		</Filter>
-	</Files>
-	<Globals>
-	</Globals>
-</VisualStudioProject>
diff --git a/dbtests/test.vcxproj b/dbtests/test.vcxproj
index d52278a..b80a730 100644
--- a/dbtests/test.vcxproj
+++ b/dbtests/test.vcxproj
@@ -68,7 +68,7 @@
     <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Configuration)\</OutDir>
     <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
     <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</LinkIncremental>
     <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
     <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
     <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Configuration)\</OutDir>
@@ -88,6 +88,10 @@
     <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;$(IncludePath)</IncludePath>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
@@ -100,7 +104,7 @@
       <PrecompiledHeader>Use</PrecompiledHeader>
       <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
       <WarningLevel>Level3</WarningLevel>
-      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
       <DisableSpecificWarnings>4355;4800;%(DisableSpecificWarnings)</DisableSpecificWarnings>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
@@ -112,13 +116,14 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
       <TargetMachine>MachineX86</TargetMachine>
+      <Profile>true</Profile>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>..\..\js\src;..\pcre-7.4;C:\boost;\boost;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>_DURABLE;_UNICODE;UNICODE;SUPPORT_UCP;SUPPORT_UTF8;MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <PrecompiledHeader>Use</PrecompiledHeader>
@@ -191,16 +196,17 @@
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClInclude Include="..\..\boostw\boost_1_34_1\boost\config\auto_link.hpp" />
+    <ClInclude Include="..\db\dur.h" />
+    <ClInclude Include="..\db\durop.h" />
+    <ClInclude Include="..\db\dur_journal.h" />
+    <ClInclude Include="..\db\jsobjmanipulator.h" />
+    <ClInclude Include="..\db\mongommf.h" />
+    <ClInclude Include="..\db\mongomutex.h" />
     <ClInclude Include="..\pcre-7.4\pcrecpp.h" />
     <ClInclude Include="..\targetver.h" />
     <ClInclude Include="..\..\boostw\boost_1_34_1\boost\version.hpp" />
     <ClInclude Include="..\pcre-7.4\config.h" />
     <ClInclude Include="..\pcre-7.4\pcre.h" />
-    <ClInclude Include="..\db\rec.h" />
-    <ClInclude Include="..\db\reccache.h" />
-    <ClInclude Include="..\db\reci.h" />
-    <ClInclude Include="..\db\recstore.h" />
-    <ClInclude Include="..\db\storage.h" />
     <ClInclude Include="..\client\connpool.h" />
     <ClInclude Include="..\client\dbclient.h" />
     <ClInclude Include="..\client\model.h" />
@@ -244,6 +250,7 @@
     <ClInclude Include="..\util\hashtab.h" />
     <ClInclude Include="..\db\lasterror.h" />
     <ClInclude Include="..\util\log.h" />
+    <ClInclude Include="..\util\logfile.h" />
     <ClInclude Include="..\util\lruishmap.h" />
     <ClInclude Include="..\util\md5.h" />
     <ClInclude Include="..\util\md5.hpp" />
@@ -253,14 +260,28 @@
     <ClInclude Include="..\util\unittest.h" />
   </ItemGroup>
   <ItemGroup>
+    <ClCompile Include="..\bson\oid.cpp" />
     <ClCompile Include="..\client\dbclientcursor.cpp" />
+    <ClCompile Include="..\client\dbclient_rs.cpp" />
     <ClCompile Include="..\client\distlock.cpp" />
     <ClCompile Include="..\client\gridfs.cpp" />
     <ClCompile Include="..\client\model.cpp" />
     <ClCompile Include="..\client\parallel.cpp" />
     <ClCompile Include="..\db\cap.cpp" />
+    <ClCompile Include="..\db\commands\isself.cpp" />
+    <ClCompile Include="..\db\compact.cpp" />
+    <ClCompile Include="..\db\dbcommands_generic.cpp" />
+    <ClCompile Include="..\db\dur.cpp" />
+    <ClCompile Include="..\db\durop.cpp" />
+    <ClCompile Include="..\db\dur_commitjob.cpp" />
+    <ClCompile Include="..\db\dur_journal.cpp" />
+    <ClCompile Include="..\db\dur_preplogbuffer.cpp" />
+    <ClCompile Include="..\db\dur_recover.cpp" />
+    <ClCompile Include="..\db\dur_writetodatafiles.cpp" />
     <ClCompile Include="..\db\geo\2d.cpp" />
     <ClCompile Include="..\db\geo\haystack.cpp" />
+    <ClCompile Include="..\db\mongommf.cpp" />
+    <ClCompile Include="..\db\projection.cpp" />
     <ClCompile Include="..\db\repl\consensus.cpp" />
     <ClCompile Include="..\db\repl\heartbeat.cpp" />
     <ClCompile Include="..\db\repl\manager.cpp" />
@@ -270,6 +291,7 @@
     <ClCompile Include="..\db\repl\rs_rollback.cpp" />
     <ClCompile Include="..\db\repl\rs_sync.cpp" />
     <ClCompile Include="..\db\restapi.cpp" />
+    <ClCompile Include="..\db\security_key.cpp" />
     <ClCompile Include="..\pcre-7.4\pcrecpp.cc">
       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
       </PrecompiledHeader>
@@ -510,7 +532,6 @@
       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
       </PrecompiledHeader>
     </ClCompile>
-    <ClCompile Include="..\db\storage.cpp" />
     <ClCompile Include="..\client\connpool.cpp" />
     <ClCompile Include="..\client\dbclient.cpp" />
     <ClCompile Include="..\client\syncclusterconnection.cpp" />
@@ -542,21 +563,29 @@
     <ClCompile Include="..\db\json.cpp" />
     <ClCompile Include="..\db\lasterror.cpp" />
     <ClCompile Include="..\db\matcher.cpp" />
+    <ClCompile Include="..\scripting\bench.cpp" />
     <ClCompile Include="..\s\chunk.cpp" />
     <ClCompile Include="..\s\config.cpp" />
+    <ClCompile Include="..\s\d_chunk_manager.cpp" />
     <ClCompile Include="..\s\d_migrate.cpp" />
     <ClCompile Include="..\s\d_split.cpp" />
     <ClCompile Include="..\s\d_state.cpp" />
-    <ClCompile Include="..\s\d_util.cpp" />
     <ClCompile Include="..\s\d_writeback.cpp" />
     <ClCompile Include="..\s\grid.cpp" />
     <ClCompile Include="..\s\shard.cpp" />
     <ClCompile Include="..\s\shardconnection.cpp" />
     <ClCompile Include="..\s\shardkey.cpp" />
+    <ClCompile Include="..\util\alignedbuilder.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="..\util\concurrency\spin_lock.cpp" />
+    <ClCompile Include="..\util\concurrency\synchronization.cpp" />
     <ClCompile Include="..\util\concurrency\task.cpp" />
     <ClCompile Include="..\util\concurrency\thread_pool.cpp" />
     <ClCompile Include="..\util\concurrency\vars.cpp" />
+    <ClCompile Include="..\util\file_allocator.cpp" />
     <ClCompile Include="..\util\log.cpp" />
+    <ClCompile Include="..\util\logfile.cpp" />
     <ClCompile Include="..\util\mmap_win.cpp" />
     <ClCompile Include="..\db\namespace.cpp" />
     <ClCompile Include="..\db\nonce.cpp" />
@@ -564,7 +593,6 @@
     <ClCompile Include="..\db\query.cpp" />
     <ClCompile Include="..\db\queryoptimizer.cpp" />
     <ClCompile Include="..\util\processinfo.cpp" />
-    <ClCompile Include="..\util\ramstore.cpp" />
     <ClCompile Include="..\db\repl.cpp" />
     <ClCompile Include="..\db\security.cpp" />
     <ClCompile Include="..\db\security_commands.cpp" />
@@ -598,7 +626,6 @@
     <ClCompile Include="..\util\message_server_port.cpp" />
     <ClCompile Include="..\util\miniwebserver.cpp" />
     <ClCompile Include="..\util\mmap.cpp" />
-    <ClCompile Include="..\util\ntservice.cpp" />
     <ClCompile Include="..\util\processinfo_win32.cpp" />
     <ClCompile Include="..\util\sock.cpp" />
     <ClCompile Include="..\util\stringutils.cpp" />
@@ -610,8 +637,7 @@
     <ClCompile Include="..\shell\mongo_vstudio.cpp">
       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
       </PrecompiledHeader>
-      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
       </PrecompiledHeader>
       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@@ -624,14 +650,17 @@
     <ClCompile Include="clienttests.cpp" />
     <ClCompile Include="cursortests.cpp" />
     <ClCompile Include="dbtests.cpp" />
+    <ClCompile Include="directclienttests.cpp" />
     <ClCompile Include="framework.cpp" />
     <ClCompile Include="jsobjtests.cpp" />
     <ClCompile Include="jsontests.cpp" />
     <ClCompile Include="jstests.cpp" />
     <ClCompile Include="matchertests.cpp" />
+    <ClCompile Include="mmaptests.cpp" />
     <ClCompile Include="namespacetests.cpp" />
     <ClCompile Include="pairingtests.cpp" />
     <ClCompile Include="pdfiletests.cpp" />
+    <ClCompile Include="perftests.cpp" />
     <ClCompile Include="queryoptimizertests.cpp" />
     <ClCompile Include="querytests.cpp" />
     <ClCompile Include="repltests.cpp" />
diff --git a/dbtests/test.vcxproj.filters b/dbtests/test.vcxproj.filters
index ba4c4af..c52f7f6 100755
--- a/dbtests/test.vcxproj.filters
+++ b/dbtests/test.vcxproj.filters
@@ -7,9 +7,6 @@
     <Filter Include="misc and third party\pcre">
       <UniqueIdentifier>{0a50fb63-4ac3-4e30-a9d4-b0841878ee73}</UniqueIdentifier>
     </Filter>
-    <Filter Include="storage related">
-      <UniqueIdentifier>{eb2684bf-ca8d-4162-9313-56a81233c471}</UniqueIdentifier>
-    </Filter>
     <Filter Include="client">
       <UniqueIdentifier>{45dab36c-864e-45de-bb8e-cf1d87a2c4f6}</UniqueIdentifier>
     </Filter>
@@ -44,15 +41,18 @@
     <Filter Include="replsets">
       <UniqueIdentifier>{9320a670-3b28-471a-bf92-6c8d881a37a4}</UniqueIdentifier>
     </Filter>
-    <Filter Include="btree related">
-      <UniqueIdentifier>{4fff2dbf-30c4-4295-8db8-d513c1e36220}</UniqueIdentifier>
-    </Filter>
     <Filter Include="util\concurrency">
       <UniqueIdentifier>{d499fdba-b256-4b12-af20-cdd1ae1addff}</UniqueIdentifier>
     </Filter>
     <Filter Include="util\h">
       <UniqueIdentifier>{353b6f01-1cab-4156-a576-bc75ab204776}</UniqueIdentifier>
     </Filter>
+    <Filter Include="btree">
+      <UniqueIdentifier>{4fff2dbf-30c4-4295-8db8-d513c1e36220}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="dur">
+      <UniqueIdentifier>{c296d097-0d46-46ee-9097-f2df659d9596}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\boostw\boost_1_34_1\boost\config\auto_link.hpp">
@@ -73,21 +73,6 @@
     <ClInclude Include="..\pcre-7.4\pcre.h">
       <Filter>misc and third party\pcre</Filter>
     </ClInclude>
-    <ClInclude Include="..\db\rec.h">
-      <Filter>storage related</Filter>
-    </ClInclude>
-    <ClInclude Include="..\db\reccache.h">
-      <Filter>storage related</Filter>
-    </ClInclude>
-    <ClInclude Include="..\db\reci.h">
-      <Filter>storage related</Filter>
-    </ClInclude>
-    <ClInclude Include="..\db\recstore.h">
-      <Filter>storage related</Filter>
-    </ClInclude>
-    <ClInclude Include="..\db\storage.h">
-      <Filter>storage related</Filter>
-    </ClInclude>
     <ClInclude Include="..\client\connpool.h">
       <Filter>client</Filter>
     </ClInclude>
@@ -188,7 +173,7 @@
       <Filter>db\h</Filter>
     </ClInclude>
     <ClInclude Include="..\db\btree.h">
-      <Filter>btree related</Filter>
+      <Filter>btree</Filter>
     </ClInclude>
     <ClInclude Include="..\util\concurrency\list.h">
       <Filter>util\concurrency</Filter>
@@ -238,6 +223,27 @@
     <ClInclude Include="..\util\sock.h">
       <Filter>util\h</Filter>
     </ClInclude>
+    <ClInclude Include="..\db\dur.h">
+      <Filter>dur</Filter>
+    </ClInclude>
+    <ClInclude Include="..\db\dur_journal.h">
+      <Filter>dur</Filter>
+    </ClInclude>
+    <ClInclude Include="..\util\logfile.h">
+      <Filter>dur</Filter>
+    </ClInclude>
+    <ClInclude Include="..\db\mongommf.h">
+      <Filter>dur</Filter>
+    </ClInclude>
+    <ClInclude Include="..\db\durop.h">
+      <Filter>dur</Filter>
+    </ClInclude>
+    <ClInclude Include="..\db\jsobjmanipulator.h">
+      <Filter>db</Filter>
+    </ClInclude>
+    <ClInclude Include="..\db\mongomutex.h">
+      <Filter>db</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <Library Include="..\..\js\js64r.lib">
@@ -326,9 +332,6 @@
     <ClCompile Include="..\pcre-7.4\pcreposix.c">
       <Filter>misc and third party\pcre</Filter>
     </ClCompile>
-    <ClCompile Include="..\db\storage.cpp">
-      <Filter>storage related</Filter>
-    </ClCompile>
     <ClCompile Include="..\client\connpool.cpp">
       <Filter>client</Filter>
     </ClCompile>
@@ -422,9 +425,6 @@
     <ClCompile Include="..\db\queryoptimizer.cpp">
       <Filter>db\cpp</Filter>
     </ClCompile>
-    <ClCompile Include="..\util\ramstore.cpp">
-      <Filter>db\cpp</Filter>
-    </ClCompile>
     <ClCompile Include="..\db\repl.cpp">
       <Filter>db\cpp</Filter>
     </ClCompile>
@@ -485,9 +485,6 @@
     <ClCompile Include="..\util\mmap.cpp">
       <Filter>util\cpp</Filter>
     </ClCompile>
-    <ClCompile Include="..\util\ntservice.cpp">
-      <Filter>util\cpp</Filter>
-    </ClCompile>
     <ClCompile Include="..\util\processinfo_win32.cpp">
       <Filter>util\cpp</Filter>
     </ClCompile>
@@ -591,10 +588,10 @@
       <Filter>replsets</Filter>
     </ClCompile>
     <ClCompile Include="..\db\btree.cpp">
-      <Filter>btree related</Filter>
+      <Filter>btree</Filter>
     </ClCompile>
     <ClCompile Include="..\db\btreecursor.cpp">
-      <Filter>btree related</Filter>
+      <Filter>btree</Filter>
     </ClCompile>
     <ClCompile Include="..\db\repl\manager.cpp">
       <Filter>db\cpp</Filter>
@@ -614,9 +611,6 @@
     <ClCompile Include="..\s\shardconnection.cpp">
       <Filter>shard</Filter>
     </ClCompile>
-    <ClCompile Include="..\s\d_util.cpp">
-      <Filter>shard</Filter>
-    </ClCompile>
     <ClCompile Include="..\util\concurrency\thread_pool.cpp">
       <Filter>util\concurrency</Filter>
     </ClCompile>
@@ -698,6 +692,81 @@
     <ClCompile Include="..\db\restapi.cpp">
       <Filter>db\cpp</Filter>
     </ClCompile>
+    <ClCompile Include="..\util\concurrency\spin_lock.cpp">
+      <Filter>db\cpp</Filter>
+    </ClCompile>
+    <ClCompile Include="mmaptests.cpp">
+      <Filter>dbtests</Filter>
+    </ClCompile>
+    <ClCompile Include="..\scripting\bench.cpp">
+      <Filter>scripting</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\compact.cpp">
+      <Filter>db\cpp</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\commands\isself.cpp">
+      <Filter>db\cpp</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\dur.cpp">
+      <Filter>dur</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\dur_journal.cpp">
+      <Filter>dur</Filter>
+    </ClCompile>
+    <ClCompile Include="..\util\logfile.cpp">
+      <Filter>dur</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\mongommf.cpp">
+      <Filter>dur</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\projection.cpp">
+      <Filter>db\cpp</Filter>
+    </ClCompile>
+    <ClCompile Include="..\s\d_chunk_manager.cpp">
+      <Filter>db\cpp</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\dur_recover.cpp">
+      <Filter>db\cpp</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\durop.cpp">
+      <Filter>db\cpp</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\dbcommands_generic.cpp">
+      <Filter>db\cpp</Filter>
+    </ClCompile>
+    <ClCompile Include="..\util\alignedbuilder.cpp">
+      <Filter>util</Filter>
+    </ClCompile>
+    <ClCompile Include="..\bson\oid.cpp">
+      <Filter>db</Filter>
+    </ClCompile>
+    <ClCompile Include="..\util\concurrency\synchronization.cpp">
+      <Filter>util</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\dur_commitjob.cpp">
+      <Filter>dur</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\dur_writetodatafiles.cpp">
+      <Filter>dur</Filter>
+    </ClCompile>
+    <ClCompile Include="..\client\dbclient_rs.cpp">
+      <Filter>client</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\dur_preplogbuffer.cpp">
+      <Filter>dur</Filter>
+    </ClCompile>
+    <ClCompile Include="perftests.cpp">
+      <Filter>dbtests</Filter>
+    </ClCompile>
+    <ClCompile Include="directclienttests.cpp">
+      <Filter>dbtests</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\security_key.cpp">
+      <Filter>db\cpp</Filter>
+    </ClCompile>
+    <ClCompile Include="..\util\file_allocator.cpp">
+      <Filter>util\cpp</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <None Include="..\SConstruct">
diff --git a/dbtests/threadedtests.cpp b/dbtests/threadedtests.cpp
index af413cc..805b2d5 100644
--- a/dbtests/threadedtests.cpp
+++ b/dbtests/threadedtests.cpp
@@ -21,6 +21,7 @@
 #include "../bson/util/atomic_int.h"
 #include "../util/concurrency/mvar.h"
 #include "../util/concurrency/thread_pool.h"
+#include "../util/timer.h"
 #include <boost/thread.hpp>
 #include <boost/bind.hpp>
 
@@ -29,34 +30,108 @@
 namespace ThreadedTests {
 
     template <int nthreads_param=10>
-    class ThreadedTest{
-        public:
-            virtual void setup() {} //optional
-            virtual void subthread() = 0;
-            virtual void validate() = 0;
+    class ThreadedTest {
+    public:
+        virtual void setup() {} //optional
+        virtual void subthread() = 0;
+        virtual void validate() = 0;
 
-            static const int nthreads = nthreads_param;
+        static const int nthreads = nthreads_param;
 
-            void run(){
-                setup();
+        void run() {
+            setup();
+            launch_subthreads(nthreads);
+            validate();
+        }
 
-                launch_subthreads(nthreads);
+        virtual ~ThreadedTest() {}; // not necessary, but makes compilers happy
 
-                validate();
-            }
+    private:
+        void launch_subthreads(int remaining) {
+            if (!remaining) return;
 
-            virtual ~ThreadedTest() {}; // not necessary, but makes compilers happy
+            boost::thread athread(boost::bind(&ThreadedTest::subthread, this));
 
-        private:
-            void launch_subthreads(int remaining){
-                if (!remaining) return;
+            launch_subthreads(remaining - 1);
 
-                boost::thread athread(boost::bind(&ThreadedTest::subthread, this));
-
-                launch_subthreads(remaining - 1);
+            athread.join();
+        }
+    };
 
-                athread.join();
+    class MongoMutexTest : public ThreadedTest<135> {
+#if defined(_DEBUG)
+        enum { N = 5000 };
+#else
+        enum { N = 40000 };
+#endif
+        MongoMutex *mm;
+    public:
+        void run() {
+            Timer t;
+            cout << "MongoMutexTest N:" << N << endl;
+            ThreadedTest<135>::run();
+            cout << "MongoMutexTest " << t.millis() << "ms" << endl;
+        }
+    private:
+        virtual void setup() {
+            mm = new MongoMutex("MongoMutexTest");
+        }
+        virtual void subthread() {
+            Client::initThread("mongomutextest");
+            sleepmillis(0);
+            for( int i = 0; i < N; i++ ) {
+                if( i % 7 == 0 ) {
+                    mm->lock_shared();
+                    mm->lock_shared();
+                    mm->unlock_shared();
+                    mm->unlock_shared();
+                }
+                else if( i % 7 == 1 ) {
+                    mm->lock_shared();
+                    ASSERT( mm->atLeastReadLocked() );
+                    mm->unlock_shared();
+                }
+                else if( i % 7 == 2 ) {
+                    mm->lock();
+                    ASSERT( mm->isWriteLocked() );
+                    mm->unlock();
+                }
+                else if( i % 7 == 3 ) {
+                    mm->lock();
+                    mm->lock_shared();
+                    ASSERT( mm->isWriteLocked() );
+                    mm->unlock_shared();
+                    mm->unlock();
+                }
+                else if( i % 7 == 4 ) {
+                    mm->lock();
+                    mm->releaseEarly();
+                    mm->unlock();
+                }
+                else if( i % 7 == 5 ) {
+                    if( mm->lock_try(1) ) {
+                        mm->unlock();
+                    }
+                }
+                else if( i % 7 == 6 ) {
+                    if( mm->lock_shared_try(0) ) {
+                        mm->unlock_shared();
+                    }
+                }
+                else {
+                    mm->lock_shared();
+                    mm->unlock_shared();
+                }
             }
+            cc().shutdown();
+        }
+        virtual void validate() {
+            ASSERT( !mm->atLeastReadLocked() );
+            mm->lock();
+            mm->unlock();
+            mm->lock_shared();
+            mm->unlock_shared();
+        }
     };
 
     // Tested with up to 30k threads
@@ -64,13 +139,13 @@ namespace ThreadedTests {
         static const int iterations = 1000000;
         AtomicUInt target;
 
-        void subthread(){
-            for(int i=0; i < iterations; i++){
+        void subthread() {
+            for(int i=0; i < iterations; i++) {
                 //target.x++; // verified to fail with this version
                 target++;
             }
         }
-        void validate(){
+        void validate() {
             ASSERT_EQUALS(target.x , unsigned(nthreads * iterations));
 
             AtomicUInt u;
@@ -80,6 +155,12 @@ namespace ThreadedTests {
             ASSERT_EQUALS(2u, u--);
             ASSERT_EQUALS(0u, --u);
             ASSERT_EQUALS(0u, u);
+            
+            u++;
+            ASSERT( u > 0 );
+
+            u--;
+            ASSERT( ! ( u > 0 ) );
         }
     };
 
@@ -87,10 +168,10 @@ namespace ThreadedTests {
         static const int iterations = 10000;
         MVar<int> target;
 
-        public:
+    public:
         MVarTest() : target(0) {}
-        void subthread(){
-            for(int i=0; i < iterations; i++){
+        void subthread() {
+            for(int i=0; i < iterations; i++) {
                 int val = target.take();
 #if BOOST_VERSION >= 103500
                 //increase chances of catching failure
@@ -99,30 +180,30 @@ namespace ThreadedTests {
                 target.put(val+1);
             }
         }
-        void validate(){
+        void validate() {
             ASSERT_EQUALS(target.take() , nthreads * iterations);
         }
     };
 
-    class ThreadPoolTest{
+    class ThreadPoolTest {
         static const int iterations = 10000;
         static const int nThreads = 8;
 
         AtomicUInt counter;
-        void increment(int n){
-            for (int i=0; i<n; i++){
+        void increment(int n) {
+            for (int i=0; i<n; i++) {
                 counter++;
             }
         }
 
-        public:
-        void run(){
+    public:
+        void run() {
             ThreadPool tp(nThreads);
 
-            for (int i=0; i < iterations; i++){
+            for (int i=0; i < iterations; i++) {
                 tp.schedule(&ThreadPoolTest::increment, this, 2);
             }
-            
+
             tp.join();
 
             ASSERT(counter == (unsigned)(iterations * 2));
@@ -131,7 +212,7 @@ namespace ThreadedTests {
 
     class LockTest {
     public:
-        void run(){
+        void run() {
             // quick atomicint wrap test
             // MSGID likely assumes this semantic
             AtomicUInt counter = 0xffffffff;
@@ -145,14 +226,15 @@ namespace ThreadedTests {
 
     class All : public Suite {
     public:
-        All() : Suite( "threading" ){
+        All() : Suite( "threading" ) {
         }
 
-        void setupTests(){
+        void setupTests() {
             add< IsAtomicUIntAtomic >();
             add< MVarTest >();
             add< ThreadPoolTest >();
             add< LockTest >();
+            add< MongoMutexTest >();
         }
     } myall;
 }
diff --git a/dbtests/updatetests.cpp b/dbtests/updatetests.cpp
index 17f861e..0f95a32 100644
--- a/dbtests/updatetests.cpp
+++ b/dbtests/updatetests.cpp
@@ -110,14 +110,14 @@ namespace UpdateTests {
 
     class PushAllNonArray : public Fail {
         void doIt() {
-	  insert( ns(), fromjson( "{a:[1]}" ) );
+            insert( ns(), fromjson( "{a:[1]}" ) );
             update( ns(), BSONObj(), fromjson( "{$pushAll:{a:'d'}}" ) );
         }
     };
 
     class PullAllNonArray : public Fail {
         void doIt() {
-	  insert( ns(), fromjson( "{a:[1]}" ) );
+            insert( ns(), fromjson( "{a:[1]}" ) );
             update( ns(), BSONObj(), fromjson( "{$pullAll:{a:'d'}}" ) );
         }
     };
@@ -241,12 +241,12 @@ namespace UpdateTests {
 
     class MultiInc : public SetBase {
     public:
-        
-        string s(){
+
+        string s() {
             stringstream ss;
             auto_ptr<DBClientCursor> cc = client().query( ns() , Query().sort( BSON( "_id" << 1 ) ) );
             bool first = true;
-            while ( cc->more() ){
+            while ( cc->more() ) {
                 if ( first ) first = false;
                 else ss << ",";
 
@@ -255,11 +255,11 @@ namespace UpdateTests {
             }
             return ss.str();
         }
-        
-        void run(){
+
+        void run() {
             client().insert( ns(), BSON( "_id" << 1 << "x" << 1 ) );
             client().insert( ns(), BSON( "_id" << 2 << "x" << 5 ) );
-            
+
             ASSERT_EQUALS( "1,5" , s() );
 
             client().update( ns() , BSON( "_id" << 1 ) , BSON( "$inc" << BSON( "x" << 1 ) ) );
@@ -270,7 +270,7 @@ namespace UpdateTests {
 
             client().update( ns() , BSONObj() , BSON( "$inc" << BSON( "x" << 1 ) ) , false , true );
             ASSERT_EQUALS( "4,6" , s() );
-            
+
         }
     };
 
@@ -498,10 +498,10 @@ namespace UpdateTests {
             client().insert( ns(), BSON( "_id" << 55 << "i" << 5 ) );
             client().update( ns(), BSON( "i" << 5 ), BSON( "i" << 6 ) );
             ASSERT( !client().findOne( ns(), Query( BSON( "_id" << 55 ) ).hint
-                                     ( "{\"_id\":ObjectId(\"000000000000000000000000\")}" ) ).isEmpty() );
+                                       ( "{\"_id\":ObjectId(\"000000000000000000000000\")}" ) ).isEmpty() );
         }
     };
-    
+
     class CheckNoMods : public SetBase {
     public:
         void run() {
@@ -509,7 +509,7 @@ namespace UpdateTests {
             ASSERT( error() );
         }
     };
-    
+
     class UpdateMissingToNull : public SetBase {
     public:
         void run() {
@@ -520,10 +520,10 @@ namespace UpdateTests {
     };
 
     namespace ModSetTests {
-        
+
         class internal1 {
         public:
-            void run(){
+            void run() {
                 BSONObj b = BSON( "$inc" << BSON( "x" << 1 << "a.b" << 1 ) );
                 ModSet m(b);
 
@@ -532,7 +532,7 @@ namespace UpdateTests {
                 ASSERT( ! m.haveModForField( "y" ) );
                 ASSERT( ! m.haveModForField( "a.c" ) );
                 ASSERT( ! m.haveModForField( "a" ) );
-                
+
                 ASSERT( m.haveConflictingMod( "x" ) );
                 ASSERT( m.haveConflictingMod( "a" ) );
                 ASSERT( m.haveConflictingMod( "a.b" ) );
@@ -541,14 +541,14 @@ namespace UpdateTests {
                 ASSERT( ! m.haveConflictingMod( "a.a" ) );
             }
         };
-        
+
         class Base {
         public:
 
-            virtual ~Base(){}
+            virtual ~Base() {}
+
 
-            
-            void test( BSONObj morig , BSONObj in , BSONObj wanted ){
+            void test( BSONObj morig , BSONObj in , BSONObj wanted ) {
                 BSONObj m = morig.copy();
                 ModSet set(m);
 
@@ -556,20 +556,20 @@ namespace UpdateTests {
                 ASSERT_EQUALS( wanted , out );
             }
         };
-        
+
         class inc1 : public Base {
         public:
-            void run(){
+            void run() {
                 BSONObj m = BSON( "$inc" << BSON( "x" << 1 ) );
                 test( m , BSON( "x" << 5 )  , BSON( "x" << 6 ) );
                 test( m , BSON( "a" << 5 )  , BSON( "a" << 5 << "x" << 1 ) );
                 test( m , BSON( "z" << 5 )  , BSON( "x" << 1 << "z" << 5 ) );
             }
         };
-        
+
         class inc2 : public Base {
         public:
-            void run(){
+            void run() {
                 BSONObj m = BSON( "$inc" << BSON( "a.b" << 1 ) );
                 test( m , BSONObj() , BSON( "a" << BSON( "b" << 1 ) ) );
                 test( m , BSON( "a" << BSON( "b" << 2 ) ) , BSON( "a" << BSON( "b" << 3 ) ) );
@@ -577,23 +577,23 @@ namespace UpdateTests {
                 m = BSON( "$inc" << BSON( "a.b" << 1 << "a.c" << 1 ) );
                 test( m , BSONObj() , BSON( "a" << BSON( "b" << 1 << "c" << 1 ) ) );
 
-                
+
             }
         };
 
         class set1 : public Base {
         public:
-            void run(){
+            void run() {
                 test( BSON( "$set" << BSON( "x" << 17 ) ) , BSONObj() , BSON( "x" << 17 ) );
                 test( BSON( "$set" << BSON( "x" << 17 ) ) , BSON( "x" << 5 ) , BSON( "x" << 17 ) );
 
                 test( BSON( "$set" << BSON( "x.a" << 17 ) ) , BSON( "z" << 5 ) , BSON( "x" << BSON( "a" << 17 )<< "z" << 5 ) );
             }
-        };        
-        
+        };
+
         class push1 : public Base {
         public:
-            void run(){
+            void run() {
                 test( BSON( "$push" << BSON( "a" << 5 ) ) , fromjson( "{a:[1]}" ) , fromjson( "{a:[1,5]}" ) );
             }
         };
@@ -602,33 +602,45 @@ namespace UpdateTests {
 
     namespace basic {
         class Base : public ClientBase {
+        protected:
+
             virtual const char * ns() = 0;
             virtual void dotest() = 0;
-            
-        protected:
 
-            void test( const char* initial , const char* mod , const char* after ){
+            void insert( const BSONObj& o ) {
+                client().insert( ns() , o );
+            }
+
+            void update( const BSONObj& m ) {
+                client().update( ns() , BSONObj() , m );
+            }
+
+            BSONObj findOne() {
+                return client().findOne( ns() , BSONObj() );
+            }
+
+            void test( const char* initial , const char* mod , const char* after ) {
                 test( fromjson( initial ) , fromjson( mod ) , fromjson( after ) );
             }
 
 
-            void test( const BSONObj& initial , const BSONObj& mod , const BSONObj& after ){
+            void test( const BSONObj& initial , const BSONObj& mod , const BSONObj& after ) {
                 client().dropCollection( ns() );
-                client().insert( ns() , initial );
-                client().update( ns() , BSONObj() , mod );
-                ASSERT_EQUALS( after , client().findOne( ns(), BSONObj() ));
+                insert( initial );
+                update( mod );
+                ASSERT_EQUALS( after , findOne() );
                 client().dropCollection( ns() );
             }
 
         public:
-            
-            Base(){}
-            virtual ~Base(){
+
+            Base() {}
+            virtual ~Base() {
             }
 
-            void run(){
+            void run() {
                 client().dropCollection( ns() );
-                
+
                 dotest();
 
                 client().dropCollection( ns() );
@@ -640,98 +652,124 @@ namespace UpdateTests {
             virtual BSONObj mod() = 0;
             virtual BSONObj after() = 0;
 
-            void dotest(){
+            void dotest() {
                 test( initial() , mod() , after() );
             }
-            
+
         };
-        
+
         class inc1 : public SingleTest {
-            virtual BSONObj initial(){
+            virtual BSONObj initial() {
                 return BSON( "_id" << 1 << "x" << 1 );
             }
-            virtual BSONObj mod(){
+            virtual BSONObj mod() {
                 return BSON( "$inc" << BSON( "x" << 2 ) );
             }
-            virtual BSONObj after(){
+            virtual BSONObj after() {
                 return BSON( "_id" << 1 << "x" << 3 );
             }
-            virtual const char * ns(){
+            virtual const char * ns() {
                 return "unittests.inc1";
             }
 
         };
 
         class inc2 : public SingleTest {
-            virtual BSONObj initial(){
+            virtual BSONObj initial() {
                 return BSON( "_id" << 1 << "x" << 1 );
             }
-            virtual BSONObj mod(){
+            virtual BSONObj mod() {
                 return BSON( "$inc" << BSON( "x" << 2.5 ) );
             }
-            virtual BSONObj after(){
+            virtual BSONObj after() {
                 return BSON( "_id" << 1 << "x" << 3.5 );
             }
-            virtual const char * ns(){
+            virtual const char * ns() {
                 return "unittests.inc2";
             }
 
         };
-            
+
         class inc3 : public SingleTest {
-            virtual BSONObj initial(){
+            virtual BSONObj initial() {
                 return BSON( "_id" << 1 << "x" << 537142123123LL );
             }
-            virtual BSONObj mod(){
+            virtual BSONObj mod() {
                 return BSON( "$inc" << BSON( "x" << 2 ) );
             }
-            virtual BSONObj after(){
+            virtual BSONObj after() {
                 return BSON( "_id" << 1 << "x" << 537142123125LL );
             }
-            virtual const char * ns(){
-                return "unittests.inc2";
+            virtual const char * ns() {
+                return "unittests.inc3";
             }
 
         };
 
         class inc4 : public SingleTest {
-            virtual BSONObj initial(){
+            virtual BSONObj initial() {
                 return BSON( "_id" << 1 << "x" << 537142123123LL );
             }
-            virtual BSONObj mod(){
+            virtual BSONObj mod() {
                 return BSON( "$inc" << BSON( "x" << 2LL ) );
             }
-            virtual BSONObj after(){
+            virtual BSONObj after() {
                 return BSON( "_id" << 1 << "x" << 537142123125LL );
             }
-            virtual const char * ns(){
-                return "unittests.inc2";
+            virtual const char * ns() {
+                return "unittests.inc4";
             }
 
         };
 
         class inc5 : public SingleTest {
-            virtual BSONObj initial(){
+            virtual BSONObj initial() {
                 return BSON( "_id" << 1 << "x" << 537142123123LL );
             }
-            virtual BSONObj mod(){
+            virtual BSONObj mod() {
                 return BSON( "$inc" << BSON( "x" << 2.0 ) );
             }
-            virtual BSONObj after(){
+            virtual BSONObj after() {
                 return BSON( "_id" << 1 << "x" << 537142123125LL );
             }
-            virtual const char * ns(){
-                return "unittests.inc2";
+            virtual const char * ns() {
+                return "unittests.inc5";
             }
 
         };
 
+        class inc6 : public Base {
+
+            virtual const char * ns() {
+                return "unittests.inc6";
+            }
+
+
+            virtual BSONObj initial() { return BSONObj(); }
+            virtual BSONObj mod() { return BSONObj(); }
+            virtual BSONObj after() { return BSONObj(); }
+
+            void dotest() {
+                client().insert( ns() , BSON( "x" << 5 ) );
+                ASSERT( findOne()["x"].type() == NumberInt );
+                long long start = 5;
+                long long max = numeric_limits<int>::max();
+                max *= 32;
+
+                while ( start < max ) {
+                    update( BSON( "$inc" << BSON( "x" << 500000 ) ) );
+                    start += 500000;
+                    ASSERT_EQUALS( start , findOne()["x"].numberLong() ); // SERVER-2005
+                }
+
+            }
+        };
 
         class bit1 : public Base {
-            const char * ns(){
+            const char * ns() {
                 return "unittests.bit1";
             }
-            void dotest(){
+            void dotest() {
                 test( BSON( "_id" << 1 << "x" << 3 ) , BSON( "$bit" << BSON( "x" << BSON( "and" << 2 ) ) ) , BSON( "_id" << 1 << "x" << ( 3 & 2 ) ) );
                 test( BSON( "_id" << 1 << "x" << 1 ) , BSON( "$bit" << BSON( "x" << BSON( "or" << 4 ) ) ) , BSON( "_id" << 1 << "x" << ( 1 | 4 ) ) );
                 test( BSON( "_id" << 1 << "x" << 3 ) , BSON( "$bit" << BSON( "x" << BSON( "and" << 2 << "or" << 8 ) ) ) , BSON( "_id" << 1 << "x" << ( ( 3 & 2 ) | 8 ) ) );
@@ -739,21 +777,21 @@ namespace UpdateTests {
 
             }
         };
-        
+
         class unset : public Base {
-            const char * ns(){
+            const char * ns() {
                 return "unittests.unset";
             }
-            void dotest(){
+            void dotest() {
                 test( "{_id:1,x:1}" , "{$unset:{x:1}}" , "{_id:1}" );
             }
         };
 
         class setswitchint : public Base {
-            const char * ns(){
+            const char * ns() {
                 return "unittests.int1";
             }
-            void dotest(){
+            void dotest() {
                 test( BSON( "_id" << 1 << "x" << 1 ) , BSON( "$set" << BSON( "x" << 5.6 ) ) , BSON( "_id" << 1 << "x" << 5.6 ) );
                 test( BSON( "_id" << 1 << "x" << 5.6 ) , BSON( "$set" << BSON( "x" << 1 ) ) , BSON( "_id" << 1 << "x" << 1 ) );
             }
@@ -761,12 +799,12 @@ namespace UpdateTests {
 
 
     };
-    
+
     class All : public Suite {
     public:
         All() : Suite( "update" ) {
         }
-        void setupTests(){
+        void setupTests() {
             add< ModId >();
             add< ModNonmodMix >();
             add< InvalidMod >();
@@ -815,18 +853,19 @@ namespace UpdateTests {
             add< PreserveIdWithIndex >();
             add< CheckNoMods >();
             add< UpdateMissingToNull >();
-            
+
             add< ModSetTests::internal1 >();
             add< ModSetTests::inc1 >();
             add< ModSetTests::inc2 >();
             add< ModSetTests::set1 >();
             add< ModSetTests::push1 >();
-            
+
             add< basic::inc1 >();
             add< basic::inc2 >();
             add< basic::inc3 >();
             add< basic::inc4 >();
             add< basic::inc5 >();
+            add< basic::inc6 >();
             add< basic::bit1 >();
             add< basic::unset >();
             add< basic::setswitchint >();
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index c3b32b6..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,134 +0,0 @@
-mongodb (1.6.5) unstable; urgency=low
-
-  * full change log http://jira.mongodb.org/browse/SERVER/fixforversion/10207
-
-  -- Richard Kreuter <richard@10gen.com>  Tue, 7 Dec 2010 16:56:28 -0500
-
-mongodb (1.6.4) unstable; urgency=low
-
-  * replica_sets shell helpers
-  * sharding chunk safety, yielding during migrate cleanup
-  * full change log http://jira.mongodb.org/browse/SERVER/fixforversion/10191
-
-  -- Richard Kreuter <richard@10gen.com>  Tue, 26 Oct 2010 16:56:28 -0500
-
-mongodb (1.6.3) unstable; urgency=low
-
-  * replica_sets slavedelay, rollback
-  * sharding optimization for larger than ram data sets
-  * full change log http://jira.mongodb.org/browse/SERVER/fixforversion/10190
-
-  -- Richard Kreuter <richard@10gen.com>  Thu, 23 Sep 2010 16:56:28 -0500
-
-mongodb (1.6.2) unstable; urgency=low
-
-  * replica_sets some fixes
-  * sharding some fixes with rs
-  * full change log http://jira.mongodb.org/browse/SERVER/fixforversion/10187
-
-  -- Richard Kreuter <richard@10gen.com>  Wed, 1 Sep 2010 16:56:28 -0500
-
-
-mongodb (1.6.1) unstable; urgency=low
-
-  * replica_sets some fixes
-  * sharding some fixes with rs
-  * full change log http://jira.mongodb.org/browse/SERVER/fixforversion/10183
-
-  -- Richard Kreuter <richard@10gen.com>  Tue, 17 Aug 2010 16:56:28 -0500
-
-mongodb (1.6.0) unstable; urgency=low
-
-  * sharding stable
-  * replica_sets stable
-
-  -- Richard Kreuter <richard@10gen.com>  Thu, 05 Aug 2010 16:56:28 -0500
-
-mongodb (1.5.8) unstable; urgency=low
-
-  * sharding lots of changes
-  * replica_sets lots of changes
-
-  -- Richard Kreuter <richard@10gen.com>  Tue, 03 Aug 2010 16:56:28 -0500
-
-mongodb (1.5.7) unstable; urgency=low
-
-  * sharding lots of changes
-  * replica_sets lots of changes
-
- -- Richard Kreuter <richard@10gen.com>  Fri, 30 Jul 2010 16:56:28 -0500
-
-
-mongodb (1.5.6) unstable; urgency=low
-
-  * sharding lots of changes, see http://jira.mongodb.org/browse/SERVER/fixforversion/10179
-
- -- Richard Kreuter <richard@10gen.com>  Sat, 24 Jul 2010 16:56:28 -0500
-
-mongodb (1.5.5) unstable; urgency=low
-
-  * sharding lots of changes, see http://jira.mongodb.org/browse/SERVER/fixforversion/10157
-
- -- Richard Kreuter <richard@10gen.com>  Fri, 16 Jul 2010 16:56:28 -0500
-
-mongodb (1.5.4) unstable; urgency=low
-
-  * sharding lots of changes, see http://jira.mongodb.org/browse/SERVER/fixforversion/10157
-
- -- Richard Kreuter <richard@10gen.com>  Fri, 2 Jul 2010 16:56:28 -0500
-
-mongodb (1.5.3) unstable; urgency=low
-
-  * sharding lots of changes, see http://jira.mongodb.org/browse/SERVER/fixforversion/10157
-
- -- Richard Kreuter <richard@10gen.com>  Thu, 17 Jun 2010 16:56:28 -0500
-
-mongodb (1.5.2) unstable; urgency=low
-
-  * sharding lots of changes, see http://jira.mongodb.org/browse/SERVER/fixforversion/10143
-
- -- Richard Kreuter <richard@10gen.com>  Wed, 27 May 2010 16:56:28 -0500
-
-mongodb (1.5.1) unstable; urgency=low
-
-  * sharding lots of changes, see http://jira.mongodb.org/browse/SERVER/fixforversion/10142
-
- -- Richard Kreuter <richard@10gen.com>  Wed, 3 May 2010 16:56:28 -0500
-
-mongodb (1.5.0) unstable; urgency=low
-
-  * replication w & real-time, see http://jira.mongodb.org/browse/SERVER/fixforversion/10125
-
- -- Richard Kreuter <richard@10gen.com>  Wed, 22 Mar 2010 16:56:28 -0500
-
-
-mongodb (1.3.5) unstable; urgency=low
-
-  * bug fixes
-
- -- Richard Kreuter <richard@10gen.com>  Wed, 22 Mar 2010 16:56:28 -0500
-
-mongodb (1.3.4) unstable; urgency=low
-
-  * bufg fixes
-
- -- Richard Kreuter <richard@10gen.com>  Wed, 17 Mar 2010 16:56:28 -0500
-
-mongodb (1.3.3) unstable; urgency=low
-
-  * geo
-
- -- Richard Kreuter <richard@10gen.com>  Fri, 05 Feb 2010 16:56:28 -0500
-
-mongodb (1.3.2) unstable; urgency=low
-
-  * munged debian files
-
- -- Richard Kreuter <richard@10gen.com>  Fri, 05 Feb 2010 16:56:28 -0500
-
-mongodb (1.3.1) unstable; urgency=low
-
-  * Initial release
-
- -- Kristina Chodorow <kristina@10gen.com>  Tue, 07 Apr 2009 10:18:58 -0400
-
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index 7f8f011..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-7
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 2aef1c3..0000000
--- a/debian/control
+++ /dev/null
@@ -1,29 +0,0 @@
-Source: mongodb
-Section: devel
-Priority: optional
-Maintainer: Richard Kreuter <richard@10gen.com>
-Build-Depends: debhelper (>= 7), libpcre3, libpcre3-dev, scons, xulrunner-dev, libboost1.35-dev | libboost1.37-dev | libboost1.38-dev | libboost1.40-dev, libboost-thread1.35-dev | libboost-thread1.37-dev | libboost-thread1.38-dev | libboost-thread1.40-dev, libboost-filesystem1.35-dev | libboost-filesystem1.37-dev | libboost-filesystem1.38-dev | libboost-filesystem1.40-dev, libboost-program-options1.35-dev | libboost-program-options1.37-dev | libboost-program-options1.38-dev | libboost-program-options1.40-dev, libboost-date-time1.35-dev | libboost-date-time1.37-dev | libboost-date-time1.38-dev | libboost-date-time1.40-dev, libpcap-dev, libreadline-dev
-Standards-Version: 3.8.0
-Homepage: http://www.mongodb.org
-
-Package: mongodb
-Architecture: any
-Depends: ${shlibs:Depends}, ${misc:Depends}, xulrunner-dev
-Description: An object/document-oriented database
- MongoDB is a high-performance, open source, schema-free 
- document-oriented  data store that's easy to deploy, manage
- and use. It's network accessible, written in C++ and offers
- the following features :
- .
-    * Collection oriented storage - easy storage of object-
-      style data
-    * Full index support, including on inner objects
-    * Query profiling
-    * Replication and fail-over support
-    * Efficient storage of binary data including large 
-      objects (e.g. videos)
-    * Auto-sharding for cloud-level scalability (Q209)
- .
- High performance, scalability, and reasonable depth of
- functionality are the goals for the project.
-
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 478c6f9..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,23 +0,0 @@
-This package was debianized by Kristina Chodorow <kristina@10gen.com> on
-Tue, 07 Apr 2009 10:18:58 -0400.
-
-It was downloaded from http://www.mongodb.org
-
-Upstream Authors:
-
-    Eliot Horowitz
-    Dwight Merriman
-    Aaron Staple
-    Michael Dirolf
-    Kristina Chodorow
-
-Copyright:
-
-    2009 10gen
-
-License:
-
-    AGPL
-
-The Debian packaging is (C) 2009, Kristina Chodorow <kristina@10gen.com> and
-is licensed under the AGPL, see `http://www.fsf.org/licensing/licenses/agpl-3.0.html'.
diff --git a/debian/dirs b/debian/dirs
deleted file mode 100644
index a7b6e78..0000000
--- a/debian/dirs
+++ /dev/null
@@ -1,3 +0,0 @@
-usr/bin
-usr/sbin
-var/lib/mongodb
diff --git a/debian/init.d b/debian/init.d
deleted file mode 100644
index 47a10a0..0000000
--- a/debian/init.d
+++ /dev/null
@@ -1,243 +0,0 @@
-#!/bin/sh
-#
-# init.d script with LSB support.
-#
-# Copyright (c) 2007 Javier Fernandez-Sanguino <jfs@debian.org>
-#
-# This is free software; you may redistribute it and/or modify
-# it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 2,
-# or (at your option) any later version.
-#
-# This is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License with
-# the Debian operating system, in /usr/share/common-licenses/GPL;  if
-# not, write to the Free Software Foundation, Inc., 59 Temple Place,
-# Suite 330, Boston, MA 02111-1307 USA
-#
-### BEGIN INIT INFO
-# Provides:          mongodb
-# Required-Start:    $network $local_fs $remote_fs
-# Required-Stop:     $network $local_fs $remote_fs
-# Should-Start:      $named
-# Should-Stop:
-# Default-Start:     2 3 4 5
-# Default-Stop:      0 1 6
-# Short-Description: An object/document-oriented database
-# Description:       MongoDB is a high-performance, open source, schema-free 
-#                    document-oriented  data store that's easy to deploy, manage
-#                    and use. It's network accessible, written in C++ and offers
-#                    the following features:
-#                    
-#                       * Collection oriented storage - easy storage of object-
-#                         style data
-#                       * Full index support, including on inner objects
-#                       * Query profiling
-#                       * Replication and fail-over support
-#                       * Efficient storage of binary data including large 
-#                         objects (e.g. videos)
-#                       * Auto-sharding for cloud-level scalability (Q209)
-#                    
-#                    High performance, scalability, and reasonable depth of
-#                    functionality are the goals for the project.
-### END INIT INFO
-
-PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
-DAEMON=/usr/bin/mongod
-DESC=database
-
-# Default defaults.  Can be overridden by the /etc/default/$NAME
-NAME=mongodb
-CONF=/etc/mongodb.conf
-DATA=/var/lib/mongodb
-LOGDIR=/var/log/mongodb
-PIDFILE=/var/run/$NAME.pid
-LOGFILE=$LOGDIR/$NAME.log  # Server logfile
-ENABLE_MONGODB=yes
-
-# Include mongodb defaults if available
-if [ -f /etc/default/$NAME ] ; then
-	. /etc/default/$NAME
-fi
-
-if test ! -x $DAEMON; then
-    echo "Could not find $DAEMON"
-    exit 0
-fi
-
-if test "x$ENABLE_MONGODB" != "xyes"; then
-    exit 0
-fi
-
-if test ! -x $DATA; then
-    mkdir $DATA || exit 0
-fi
-
-. /lib/lsb/init-functions
-
-STARTTIME=1
-DIETIME=10                   # Time to wait for the server to die, in seconds
-                            # If this value is set too low you might not
-                            # let some servers to die gracefully and
-                            # 'restart' will not work
-
-DAEMONUSER=${DAEMONUSER:-mongodb}
-DAEMON_OPTS=${DAEMON_OPTS:-"--dbpath $DATA --logpath $LOGFILE run"}
-DAEMON_OPTS="$DAEMON_OPTS --config $CONF"
-
-set -e
-
-
-running_pid() {
-# Check if a given process pid's cmdline matches a given name
-    pid=$1
-    name=$2
-    [ -z "$pid" ] && return 1
-    [ ! -d /proc/$pid ] &&  return 1
-    cmd=`cat /proc/$pid/cmdline | tr "\000" "\n"|head -n 1 |cut -d : -f 1`
-    # Is this the expected server
-    [ "$cmd" != "$name" ] &&  return 1
-    return 0
-}
-
-running() {
-# Check if the process is running looking at /proc
-# (works for all users)
-
-    # No pidfile, probably no daemon present
-    [ ! -f "$PIDFILE" ] && return 1
-    pid=`cat $PIDFILE`
-    running_pid $pid $DAEMON || return 1
-    return 0
-}
-
-start_server() {
-# Start the process using the wrapper
-            start-stop-daemon --background --start --quiet --pidfile $PIDFILE \
-                        --make-pidfile --chuid $DAEMONUSER \
-                        --exec $DAEMON -- $DAEMON_OPTS
-            errcode=$?
-	return $errcode
-}
-
-stop_server() {
-# Stop the process using the wrapper
-            start-stop-daemon --stop --quiet --pidfile $PIDFILE \
-                        --user $DAEMONUSER \
-                        --exec $DAEMON
-            errcode=$?
-	return $errcode
-}
-
-force_stop() {
-# Force the process to die killing it manually
-	[ ! -e "$PIDFILE" ] && return
-	if running ; then
-		kill -15 $pid
-	# Is it really dead?
-		sleep "$DIETIME"s
-		if running ; then
-			kill -9 $pid
-			sleep "$DIETIME"s
-			if running ; then
-				echo "Cannot kill $NAME (pid=$pid)!"
-				exit 1
-			fi
-		fi
-	fi
-	rm -f $PIDFILE
-}
-
-
-case "$1" in
-  start)
-	log_daemon_msg "Starting $DESC" "$NAME"
-        # Check if it's running first
-        if running ;  then
-            log_progress_msg "apparently already running"
-            log_end_msg 0
-            exit 0
-        fi
-        if start_server ; then
-            # NOTE: Some servers might die some time after they start,
-            # this code will detect this issue if STARTTIME is set
-            # to a reasonable value
-            [ -n "$STARTTIME" ] && sleep $STARTTIME # Wait some time 
-            if  running ;  then
-                # It's ok, the server started and is running
-                log_end_msg 0
-            else
-                # It is not running after we did start
-                log_end_msg 1
-            fi
-        else
-            # Either we could not start it
-            log_end_msg 1
-        fi
-	;;
-  stop)
-        log_daemon_msg "Stopping $DESC" "$NAME"
-        if running ; then
-            # Only stop the server if we see it running
-			errcode=0
-            stop_server || errcode=$?
-            log_end_msg $errcode
-        else
-            # If it's not running don't do anything
-            log_progress_msg "apparently not running"
-            log_end_msg 0
-            exit 0
-        fi
-        ;;
-  force-stop)
-        # First try to stop gracefully the program
-        $0 stop
-        if running; then
-            # If it's still running try to kill it more forcefully
-            log_daemon_msg "Stopping (force) $DESC" "$NAME"
-			errcode=0
-            force_stop || errcode=$?
-            log_end_msg $errcode
-        fi
-	;;
-  restart|force-reload)
-        log_daemon_msg "Restarting $DESC" "$NAME"
-		errcode=0
-        stop_server || errcode=$?
-        # Wait some sensible amount, some server need this
-        [ -n "$DIETIME" ] && sleep $DIETIME
-        start_server || errcode=$?
-        [ -n "$STARTTIME" ] && sleep $STARTTIME
-        running || errcode=$?
-        log_end_msg $errcode
-	;;
-  status)
-
-        log_daemon_msg "Checking status of $DESC" "$NAME"
-        if running ;  then
-            log_progress_msg "running"
-            log_end_msg 0
-        else
-            log_progress_msg "apparently not running"
-            log_end_msg 1
-            exit 1
-        fi
-        ;;
-  # MongoDB can't reload its configuration.
-  reload)
-        log_warning_msg "Reloading $NAME daemon: not implemented, as the daemon"
-        log_warning_msg "cannot re-read the config file (use restart)."
-        ;;
-
-  *)
-	N=/etc/init.d/$NAME
-	echo "Usage: $N {start|stop|force-stop|restart|force-reload|status}" >&2
-	exit 1
-	;;
-esac
-
-exit 0
diff --git a/debian/lintian-overrides b/debian/lintian-overrides
deleted file mode 100644
index c843e9e..0000000
--- a/debian/lintian-overrides
+++ /dev/null
@@ -1,11 +0,0 @@
-# Agreed with upstream, that redefining rpath is necessary as xulrunner used to
-# change API without changing so-name
-mongodb: binary-or-shlib-defines-rpath ./usr/bin/mongo /usr/lib64/xulrunner-1.9.1
-mongodb: binary-or-shlib-defines-rpath ./usr/bin/mongod /usr/lib64/xulrunner-1.9.1
-mongodb: binary-or-shlib-defines-rpath ./usr/bin/mongodump /usr/lib64/xulrunner-1.9.1
-mongodb: binary-or-shlib-defines-rpath ./usr/bin/mongoexport /usr/lib64/xulrunner-1.9.1
-mongodb: binary-or-shlib-defines-rpath ./usr/bin/mongofiles /usr/lib64/xulrunner-1.9.1
-mongodb: binary-or-shlib-defines-rpath ./usr/bin/mongoimport /usr/lib64/xulrunner-1.9.1
-mongodb: binary-or-shlib-defines-rpath ./usr/bin/mongorestore /usr/lib64/xulrunner-1.9.1
-mongodb: binary-or-shlib-defines-rpath ./usr/bin/mongos /usr/lib64/xulrunner-1.9.1
-mongodb: binary-or-shlib-defines-rpath ./usr/bin/mongosniff /usr/lib64/xulrunner-1.9.1
diff --git a/debian/mongo.1 b/debian/mongo.1
deleted file mode 100644
index 89f4881..0000000
--- a/debian/mongo.1
+++ /dev/null
@@ -1,62 +0,0 @@
-.\" Documentation for the MongoDB shell
-.TH MONGO "1" "June 2009" "10gen" "Mongo Database"
-.SH "NAME"
-mongo \- the Mongo command\-line tool
-.SH "SYNOPSIS"
-\fBmongo [\fIOPTIONS\fR] [\fIDB_ADDRESS\fR] [\fIFILE+\fR]\fR
-.SH "DESCRIPTION"
-.PP
-\fBmongo\fR
-is a JavaScript shell (with GNU
-readline
-capabilities).  It supports interactive and non\-interactive use.  When used interactively, JavaScript can be used to query the database or perform any other function normally available with SpiderMonkey.  Database output is displayed in JSON format.
-.PP
-If JavaScript files are specified on the command line, the shell will run non\-interactively, running each one in sequence and then exiting.
-.SH "EXAMPLES"
-.TP
-.B mongo
-start the shell, connecting to the server at localhost:27017 and using the test database
-.TP
-.B mongo foo
-start the shell using the foo database at localhost:27017
-.TP
-.B mongo 192.169.0.5/foo
-start the shell using the foo database at 192.169.0.5:27017
-.TP
-.B mongo 192.169.0.5:9999/foo
-start the shell using the foo database at 192.169.0.5:9999
-.TP
-.B mongo script1.js script2.js script3.js
-run three scripts and exit
-.SH "OPTIONS"
-.TP
-.B \-\-shell
-run the shell after executing files
-.TP
-.B \-\-help
-show usage information
-.TP
-.B \-\-host HOST
-server to connect to (default HOST=localhost)
-.TP
-.B \-\-port PORT
-port to connect to (default PORT=27017)
-.TP
-.B \-\-nodb 
-do not connect to mongod
-.TP
-.B \-\-eval SCRIPT
-evaluate JavaScript
-.TP
-.B \-u USERNAME
-specify user to log in as
-.TP
-.B \-pPASSWORD
-specify password of user (notice there is no space)
-.SH "COPYRIGHT"
-.PP
-Copyright 2007\-2009 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Kristina Chodorow
diff --git a/debian/mongod.1 b/debian/mongod.1
deleted file mode 100644
index 7b86359..0000000
--- a/debian/mongod.1
+++ /dev/null
@@ -1,16 +0,0 @@
-.\" Documentation for the MongoDB shell
-.TH MONGOD "1" "June 2009" "10gen" "Mongo Database"
-.SH "NAME"
-mongod \- the Mongo Daemon
-.SH "SYNOPSIS"
-.SH "DESCRIPTION"
-.PP
-\fBmongod\fR
-is a core MongoDB daemon. You are not supposed to call it directly, please refer to the wiki if necessary.
-.SH "COPYRIGHT"
-.PP
-Copyright 2007\-2009 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Antonin Kral
diff --git a/debian/mongodb.conf b/debian/mongodb.conf
deleted file mode 100644
index 6a5de05..0000000
--- a/debian/mongodb.conf
+++ /dev/null
@@ -1,95 +0,0 @@
-# mongodb.conf
-
-# Where to store the data.
-
-# Note: if you run mongodb as a non-root user (recommended) you may
-# need to create and set permissions for this directory manually,
-# e.g., if the parent directory isn't mutable by the mongodb user.
-dbpath=/var/lib/mongodb
-
-#where to log
-logpath=/var/log/mongodb/mongodb.log
-
-logappend=true
-
-#port = 27017
-
-
-
-# Enables periodic logging of CPU utilization and I/O wait
-#cpu = true
-
-# Turn on/off security.  Off is currently the default
-#noauth = true
-#auth = true
-
-# Verbose logging output.
-#verbose = true
-
-# Inspect all client data for validity on receipt (useful for
-# developing drivers)
-#objcheck = true
-
-# Enable db quota management
-#quota = true
-
-# Set oplogging level where n is
-#   0=off (default)
-#   1=W
-#   2=R
-#   3=both
-#   7=W+some reads
-#oplog = 0
-
-# Diagnostic/debugging option
-#nocursors = true
-
-# Ignore query hints
-#nohints = true
-
-# Disable the HTTP interface (Defaults to localhost:27018).
-#nohttpinterface = true
-
-# Turns off server-side scripting.  This will result in greatly limited
-# functionality
-#noscripting = true
-
-# Turns off table scans.  Any query that would do a table scan fails.
-#notablescan = true
-
-# Disable data file preallocation.
-#noprealloc = true
-
-# Specify .ns file size for new databases.
-# nssize = <size>
-
-# Accout token for Mongo monitoring server.
-#mms-token = <token>
-
-# Server name for Mongo monitoring server.
-#mms-name = <server-name>
-
-# Ping interval for Mongo monitoring server.
-#mms-interval = <seconds>
-
-# Replication Options
-
-# in replicated mongo databases, specify here whether this is a slave or master
-#slave = true
-#source = master.example.com
-# Slave only: specify a single database to replicate
-#only = master.example.com
-# or
-#master = true
-#source = slave.example.com
-
-# Address of a server to pair with.
-#pairwith = <server:port>
-# Address of arbiter server.
-#arbiter = <server:port>
-# Automatically resync if slave data is stale
-#autoresync
-# Custom size for replication operation log.
-#oplogSize = <MB>
-# Size limit for in-memory storage of op ids.
-#opIdMem = <bytes>
diff --git a/debian/mongodump.1 b/debian/mongodump.1
deleted file mode 100644
index 5cb33ce..0000000
--- a/debian/mongodump.1
+++ /dev/null
@@ -1,36 +0,0 @@
-.\" Documentation for the MongoDB dump tool
-.TH MONGODUMP "1" "June 2009" "10gen" "Mongo Database"
-.SH "NAME"
-mongodump \- the Mongo dump tool
-.SH "SYNOPSIS"
-\fBmongodump [\fIOPTIONS\fR]\fR
-.SH "DESCRIPTION"
-.PP
-\fBmongodump\fR
-is a tool to output a binary representation of a database.  It is mostly used for doing hot backups of a database.
-.SH "OPTIONS"
-.TP
-.B \-\-help
-show usage information
-.TP
-.B \-h, \-\-host HOST
-server to connect to (default HOST=localhost)
-.TP
-.B \-d, \-\-db DATABASE
-database to use
-.TP
-.B \-c, \-\-c COLLECTION
-collection to use
-.TP
-.B \-o, \-\-out FILE
-output file, if not specified, stdout is used
-.TP
-.B \-\-dbpath PATH
-directly access mongod data files in this path, instead of connecting to a mongod instance
-.SH "COPYRIGHT"
-.PP
-Copyright 2007\-2009 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Kristina Chodorow
diff --git a/debian/mongoexport.1 b/debian/mongoexport.1
deleted file mode 100644
index 1996b36..0000000
--- a/debian/mongoexport.1
+++ /dev/null
@@ -1,51 +0,0 @@
-.\" Documentation for the MongoDB shell
-.TH MONGOEXPORT "1" "June 2009" "10gen" "Mongo Database"
-.SH "NAME"
-mongoexport \- the Mongo export tool
-.SH "SYNOPSIS"
-\fBmongoexport [\fIOPTIONS\fR]\fR
-.SH "DESCRIPTION"
-.PP
-\fBmongoexport\fR
-is a tool to export a MongoDB collection to either JSON or CSV. The query can be filtered or a list of fields to output can be given.
-.PP
-If the output is CSV, the fields must be specified in order.
-.SH "EXAMPLES"
-.TP
-.B mongoexport -d test -c test1 --csv -f "name,num"
-export documents from test.test1 in CSV format
-.SH "OPTIONS"
-.TP
-.B \-\-help
-show usage information
-.TP
-.B \-h, \-\-host HOST
-server to connect to (default HOST=localhost)
-.TP
-.B \-d, \-\-db DATABASE
-database to use
-.TP
-.B \-c, \-\-c COLLECTION
-collection to use
-.TP
-.B \-q, \-\-query QUERY
-query filter
-.TP
-.B \-f, \-\-fields FIELDS
-comma\-separated list of field names
-.TP
-.B \-\-csv
-export to CSV instead of JSON
-.TP
-.B \-o, \-\-out FILE
-output file, if not specified, stdout is used
-.TP
-.B \-\-dbpath PATH
-directly access mongod data files in this path, instead of connecting to a mongod instance
-.SH "COPYRIGHT"
-.PP
-Copyright 2007\-2009 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Kristina Chodorow
diff --git a/debian/mongofiles.1 b/debian/mongofiles.1
deleted file mode 100644
index 4d7c0c5..0000000
--- a/debian/mongofiles.1
+++ /dev/null
@@ -1,52 +0,0 @@
-.\" Documentation for the MongoDB dump tool
-.TH MONGOFILES "1" "June 2009" "10gen" "Mongo Database"
-.SH "NAME"
-mongofiles \- a simple GridFS interface
-.SH "SYNOPSIS"
-\fBmongofiles [\fIOPTIONS\fR]\fR
-.SH "DESCRIPTION"
-.PP
-\fBmongofiles\fR
-is used to list, get, and insert files in the database.
-.SH "EXAMPLES"
-.TP
-.B mongofiles list
-lists files in test.fs.files
-.TP
-.B mongofiles put README.txt
-inserts the file README.txt into the collection test.fs.files
-.TP
-.B mongofiles get photo.jpg
-retrieves photo.jpg from test.fs.files and saves it locally
-.SH "OPTIONS"
-.TP
-.B \-\-help
-show usage information
-.TP
-.B \-h, \-\-host HOST
-mongo host to which to connect
-.TP
-.B \-d, \-\-db DB
-database to use (default DB=test)
-.TP
-.B \-c, \-\-collection COLLECTION (default COLLECTION=fs.files)
-collection to use
-.TP
-.B \-\-command [list\||\|search\||\|put\||\|get]
-execute a command
-.TP
-.B \-\-file FILE
-filename for get or put
-.TP
-.B list
-list all files.  takes an optional filename.  the file has to start with the filename
-.TP
-.B search
-search all files for something that contains the string
-.SH "COPYRIGHT"
-.PP
-Copyright 2007\-2009 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Kristina Chodorow
diff --git a/debian/mongoimport.1 b/debian/mongoimport.1
deleted file mode 100644
index 4b6c3de..0000000
--- a/debian/mongoimport.1
+++ /dev/null
@@ -1,63 +0,0 @@
-.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.37.1.
-.TH MONGOIMPORT "1" "January 2010" "10gen" "Mongo Database"
-.SH "NAME"
-mongoimport \- the Mongo import tool
-.SH "SYNOPSIS"
-\fBmongoimport [\fIOPTIONS\fR]\fR
-.SH "DESCRIPTION"
-.PP
-\fBmongoimport\fR
-is a tool to import a MongoDB collection from JSON, CSV, or TSV. The query can be filtered or a list of fields to input can be given.
-.\".SH "EXAMPLES"
-.\".TP
-.\".B mongoimport -d test -c test1 --csv -f "name,num"
-.\"import documents from test.test1 in CSV format
-.SS "OPTIONS"
-.TP
-\fB\-\-help\fR
-show usage information
-.TP
-.B \-h, \-\-host HOST
-server to connect to (default HOST=localhost)
-.TP
-.B \-d, \-\-db DATABASE
-database to use
-.TP
-.B \-c, \-\-c COLLECTION
-collection to use (some commands)
-.TP
-.B \-\-dbpath PATH
-directly access mongod data files in this path,
-instead of connecting to a mongod instance
-.TP
-.B \-v, \-\-verbose
-be more verbose (include multiple times for more
-verbosity e.g. \fB\-vvvvv\fR)
-.TP
-.B \-f, \-\-fields NAMES
-comma seperated list of field names e.g. \fB\-f\fR name,age
-.TP
-.B \-\-fieldFile FILE
-file with fields names \- 1 per line
-.TP
-.B \-\-ignoreBlanks
-if given, empty fields in csv and tsv will be ignored
-.TP
-.B \-\-type TYPE
-type of file to import.  default: json (json,csv,tsv)
-.TP
-.B \-\-file FILE
-file to import from; if not specified stdin is used
-.TP
-.B \-\-drop
-drop collection first
-.TP
-.B \-\-headerline
-CSV,TSV only \- use first line as headers
-.SH "COPYRIGHT"
-.PP
-Copyright 2007\-2009 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Kristina Chodorow
diff --git a/debian/mongorestore.1 b/debian/mongorestore.1
deleted file mode 100644
index 5f207b0..0000000
--- a/debian/mongorestore.1
+++ /dev/null
@@ -1,36 +0,0 @@
-.\" Documentation for the MongoDB dump tool
-.TH MONGORESTORE "1" "June 2009" "10gen" "Mongo Database"
-.SH "NAME"
-mongorestore \- the Mongo restoration tool
-.SH "SYNOPSIS"
-\fBmongorestore [\fIOPTIONS\fR]\fR
-.SH "DESCRIPTION"
-.PP
-\fBmongorestore\fR
-is a tool to use the output from mongodump to restore a database.
-.SH "OPTIONS"
-.TP
-.B \-\-help
-show usage information
-.TP
-.B \-h, \-\-host HOST
-server to connect to (default HOST=localhost)
-.TP
-.B \-d, \-\-db DATABASE
-database to use
-.TP
-.B \-c, \-\-c COLLECTION
-collection to use
-.TP
-.B \-\-dir PATH
-directory from which to restore
-.TP
-.B \-\-dbpath PATH
-directly access mongod data files in this path, instead of connecting to a mongod instance
-.SH "COPYRIGHT"
-.PP
-Copyright 2007\-2009 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Kristina Chodorow
diff --git a/debian/mongos.1 b/debian/mongos.1
deleted file mode 100644
index 74d01c6..0000000
--- a/debian/mongos.1
+++ /dev/null
@@ -1,39 +0,0 @@
-.\" Documentation for the MongoDB dump tool
-.TH MONGOS "1" "June 2009" "10gen" "Mongo Database"
-.SH "NAME"
-mongos \- the Mongo sharding server
-.SH "SYNOPSIS"
-\fBmongos [\fIOPTIONS\fR]\fR
-.SH "DESCRIPTION"
-.PP
-\fBmongos\fR
-is used to setup, configure, and get information about sharded databases.
-.SH "EXAMPLES"
-.PP
-.B ./mongod --port 9999 --dbpath /data/db/a # first server
-.PP
-.B ./mongod --port 9998 --dbpath /data/db/b # second server
-.PP
-.B ./mongos --configdb localhost:9999 # mongos
-.PP
-starts three servers to set up sharding
-.SH "OPTIONS"
-.TP
-.B \-\-help
-show usage information
-.TP
-.B \-\-port N
-port on which to listen
-.TP
-.B \-\-configdb DATABASE+
-one or more databases to use as the configuration databases
-.TP
-.B \-v+
-verbosity
-.SH "COPYRIGHT"
-.PP
-Copyright 2007\-2009 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Kristina Chodorow
diff --git a/debian/mongosniff.1 b/debian/mongosniff.1
deleted file mode 100644
index b6f1063..0000000
--- a/debian/mongosniff.1
+++ /dev/null
@@ -1,30 +0,0 @@
-.TH MONGOSNIFF "1" "Jan 2010" "10gen" "Mongo Database"
-.SH "NAME"
-mongosniff \- the Mongo packet analyzer
-.SH "SYNOPSIS"
-\fBmongosniff [\fIOPTIONS\fR] [\fI<port0> <port1> ...\fR]
-.SH "DESCRIPTION"
-.PP
-\fBmongosniff\fR
-is a analyzer tool for analyzing packets coming to your database.
-.PP
-.SH "OPTIONS"
-.TP
-.B \-\-forward
-Forward all parsed request messages to mongod instance at specified host:port
-.TP
-.B \-\-source
-Source of traffic to sniff, either a network interface or a file containing previously captured packets, in pcap format. If no source is specified, mongosniff will attempt to sniff from one of the machine's network interfaces.
-.TP
-.B \-\-help
-print a short help message.
-.TP
-.B <port0>
-These parameters are used to filter sniffing.  By default, only port 27017 is sniffed.
-.SH "COPYRIGHT"
-.PP
-Copyright 2007\-2009 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Antonin Kral
diff --git a/debian/mongostat.1 b/debian/mongostat.1
deleted file mode 100644
index 5828104..0000000
--- a/debian/mongostat.1
+++ /dev/null
@@ -1,39 +0,0 @@
-.\" Documentation for the MongoDB shell
-.TH MONGOSTAT "15" "March 2010" "10gen" "Mongo Database"
-.SH "NAME"
-mongostat \- view statistics on a running mongod instance
-.SH "SYNOPSIS"
-\fBmongostat [\fIOPTIONS\fR]
-.SH "DESCRIPTION"
-.PP
-\fBmongostat\fR
-prints statistics on a running mongod instance.
-.SH "OPTIONS"
-.TP
-.B \-\-help
-show usage information
-.TP
-.B \-h, \-\-host HOST
-mongo host to connect to (use "left,right" for pairs)
-\" .TP
-\" .B \-\-port PORT
-\" port to connect to (default PORT=27017)
-.TP
-.B \-d, \-\-db ARG
-db to use
-.TP
-.B \-c, \-\-collection ARG
-collection to use (some commands)
-.TP
-.B \-u, \-\-username USERNAME
-specify user to log in as
-.TP
-.B \-p, \-\-password PASSWORD
-specify password of user (notice there is no space)
-.SH "COPYRIGHT"
-.PP
-Copyright 2010 10gen
-.SH "SEE ALSO"
-For more information, please refer to the MongoDB wiki, available at http://www.mongodb.org.
-.SH "AUTHOR"
-Eliot Horowitz
diff --git a/debian/postinst b/debian/postinst
deleted file mode 100644
index 4d0e786..0000000
--- a/debian/postinst
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/sh
-# postinst script for mongodb
-#
-# see: dh_installdeb(1)
-
-set -e
-
-# summary of how this script can be called:
-#        * <postinst> `configure' <most-recently-configured-version>
-#        * <old-postinst> `abort-upgrade' <new version>
-#        * <conflictor's-postinst> `abort-remove' `in-favour' <package>
-#          <new-version>
-#        * <postinst> `abort-remove'
-#        * <deconfigured's-postinst> `abort-deconfigure' `in-favour'
-#          <failed-install-package> <version> `removing'
-#          <conflicting-package> <version>
-# for details, see http://www.debian.org/doc/debian-policy/ or
-# the debian-policy package
-
-
-case "$1" in
-    configure)
-	# create a mongodb group and user
-        if ! grep -q mongodb /etc/passwd; then
-		adduser --system --no-create-home mongodb
-		addgroup --system mongodb
-		adduser mongodb mongodb
-        fi
-
-	# create db -- note: this should agree with dbpath in mongodb.conf
-	mkdir -p /var/lib/mongodb
-	chown -R mongodb:mongodb /var/lib/mongodb
-
-	# create logdir -- note: this should agree with logpath in mongodb.conf
-	mkdir -p /var/log/mongodb
-	chown -R mongodb:mongodb /var/log/mongodb
-    ;;
-
-    abort-upgrade|abort-remove|abort-deconfigure)
-    ;;
-
-    *)
-        echo "postinst called with unknown argument \`$1'" >&2
-        exit 1
-    ;;
-esac
-
-# dh_installdeb will replace this with shell code automatically
-# generated by other debhelper scripts.
-
-#DEBHELPER#
-
-exit 0
-
-
diff --git a/debian/postrm b/debian/postrm
deleted file mode 100644
index 4bbb708..0000000
--- a/debian/postrm
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/sh
-# postrm script for mongodb
-#
-# see: dh_installdeb(1)
-
-set -e
-
-# summary of how this script can be called:
-#        * <postrm> `remove'
-#        * <postrm> `purge'
-#        * <old-postrm> `upgrade' <new-version>
-#        * <new-postrm> `failed-upgrade' <old-version>
-#        * <new-postrm> `abort-install'
-#        * <new-postrm> `abort-install' <old-version>
-#        * <new-postrm> `abort-upgrade' <old-version>
-#        * <disappearer's-postrm> `disappear' <overwriter>
-#          <overwriter-version>
-# for details, see http://www.debian.org/doc/debian-policy/ or
-# the debian-policy package
-
-
-case "$1" in
-    purge|remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear)
-    ;;
-
-    *)
-        echo "postrm called with unknown argument \`$1'" >&2
-        exit 1
-    ;;
-esac
-
-# dh_installdeb will replace this with shell code automatically
-# generated by other debhelper scripts.
-
-#DEBHELPER#
-
-exit 0
-
-
diff --git a/debian/prerm b/debian/prerm
deleted file mode 100644
index 9507ade..0000000
--- a/debian/prerm
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/sh
-# prerm script for mongodb
-#
-# see: dh_installdeb(1)
-
-set -e
-
-# summary of how this script can be called:
-#        * <prerm> `remove'
-#        * <old-prerm> `upgrade' <new-version>
-#        * <new-prerm> `failed-upgrade' <old-version>
-#        * <conflictor's-prerm> `remove' `in-favour' <package> <new-version>
-#        * <deconfigured's-prerm> `deconfigure' `in-favour'
-#          <package-being-installed> <version> `removing'
-#          <conflicting-package> <version>
-# for details, see http://www.debian.org/doc/debian-policy/ or
-# the debian-policy package
-
-echo "arg: $1"
-
-case "$1" in
-    remove|upgrade|deconfigure)
-    ;;
-
-    failed-upgrade)
-    ;;
-
-    *)
-        echo "prerm called with unknown argument \`$1'" >&2
-        exit 1
-    ;;
-esac
-
-# dh_installdeb will replace this with shell code automatically
-# generated by other debhelper scripts.
-
-#DEBHELPER#
-
-exit 0
-
-
diff --git a/debian/rules b/debian/rules
deleted file mode 100644
index 2afdfdb..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/usr/bin/make -f
-# -*- makefile -*-
-# Sample debian/rules that uses debhelper.
-# This file was originally written by Joey Hess and Craig Small.
-# As a special exception, when this file is copied by dh-make into a
-# dh-make output file, you may use that output file without restriction.
-# This special exception was added by Craig Small in version 0.37 of dh-make.
-
-# Uncomment this to turn on verbose mode.
-#export DH_VERBOSE=1
-
-
-configure: configure-stamp
-configure-stamp:
-	dh_testdir
-        # Add here commands to configure the package.
-
-	touch configure-stamp
-
-
-build: build-stamp
-
-build-stamp: configure-stamp  
-	dh_testdir
-
-        # Add here commands to compile the package.
-	scons 
-        #docbook-to-man debian/mongodb.sgml > mongodb.1
-	ls debian/*.1 > debian/mongodb.manpages
-
-	touch $@
-
-clean: 
-	dh_testdir
-	dh_testroot
-	rm -f build-stamp configure-stamp
-
-	# FIXME: scons freaks out at the presence of target files
-	# under debian/mongodb.
-	#scons -c
-	rm -rf $(CURDIR)/debian/mongodb
-	rm -f config.log
-	rm -f mongo
-	rm -f mongod
-	rm -f mongoimportjson
-	rm -f mongoexport
-	rm -f mongorestore
-	rm -f mongodump
-	rm -f mongofiles
-	rm -f .sconsign.dblite
-	rm -f libmongoclient.a
-	rm -rf client/*.o
-	rm -rf tools/*.o
-	rm -rf shell/*.o
-	rm -rf .sconf_temp
-	rm -f buildscripts/*.pyc 
-	rm -f *.pyc
-	rm -f buildinfo.cpp
-	dh_clean debian/files
-
-install: build
-	dh_testdir
-	dh_testroot
-	dh_prep
-	dh_installdirs
-
-	scons --prefix=$(CURDIR)/debian/mongodb/usr install
-	mkdir -p $(CURDIR)/debian/mongodb/etc
-	cp $(CURDIR)/debian/mongodb.conf $(CURDIR)/debian/mongodb/etc/mongodb.conf 
-
-	mkdir -p $(CURDIR)/debian/mongodb/usr/share/lintian/overrides/
-	install -m 644 $(CURDIR)/debian/lintian-overrides \
-		$(CURDIR)/debian/mongodb/usr/share/lintian/overrides/mongodb
-
-# Build architecture-independent files here.
-binary-indep: build install
-# We have nothing to do by default.
-
-# Build architecture-dependent files here.
-binary-arch: build install
-	dh_testdir
-	dh_testroot
-	dh_installchangelogs 
-	dh_installdocs
-	dh_installexamples
-#	dh_install
-#	dh_installmenu
-#	dh_installdebconf	
-#	dh_installlogrotate
-#	dh_installemacsen
-#	dh_installpam
-#	dh_installmime
-	dh_installinit
-#	dh_installinfo
-	dh_installman
-	dh_link
-	dh_strip
-	dh_compress
-	dh_fixperms
-	dh_installdeb
-	dh_shlibdeps
-	dh_gencontrol
-	dh_md5sums
-	dh_builddeb
-
-binary: binary-indep binary-arch
-.PHONY: build clean binary-indep binary-arch binary install configure
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 08ce42b..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,10 +0,0 @@
-# Example watch control file for uscan
-# Rename this file to "watch" and then you can run the "uscan" command
-# to check for upstream updates and more.
-# See uscan(1) for format
-
-# Compulsory line, this is a version 3 file
-version=3
-
-# examine a Webserver directory
-http://downloads.mongodb.org/linux/mongodb-linux-(.*)\.tar\.gz
diff --git a/distsrc/client/SConstruct b/distsrc/client/SConstruct
index 8a8bae9..a97699e 100644
--- a/distsrc/client/SConstruct
+++ b/distsrc/client/SConstruct
@@ -1,6 +1,7 @@
 
 import os
 
+# options 
 AddOption( "--extrapath",
            dest="extrapath",
            type="string",
@@ -8,6 +9,15 @@ AddOption( "--extrapath",
            action="store",
            help="comma separated list of add'l paths  (--extrapath /opt/foo/,/foo) static linking" )
 
+AddOption( "--prefix",
+           dest="prefix",
+           type="string",
+           nargs=1,
+           action="store",
+           default="/usr/local",
+           help="installation root" )
+
+
 env = Environment()
 
 def addExtraLibs( s ):
@@ -36,6 +46,7 @@ elif "linux2" == os.sys.platform:
 
 if nix:
     env.Append( CPPFLAGS=" -O3" )
+    env.Append( LIBS=["pthread"] )
 if linux:
     env.Append( LINKFLAGS=" -Wl,--as-needed -Wl,-zdefs " )
 
@@ -44,18 +55,37 @@ conf = Configure(env)
 for lib in boostLibs:
     if not conf.CheckLib("boost_%s-mt" % lib):
         conf.CheckLib("boost_%s" % lib)
+
+dirs = [ "" , "bson/" , "bson/util/" , 
+         "client/"  , "s/" , "shell/" , 
+         "db/" , 
+         "scripting/" , 
+         "util/" , "util/concurrency/" , "util/mongoutils/" ]
+
 allClientFiles = []
-allClientFiles += Glob( "mongo/*.cpp" )
-allClientFiles += Glob( "mongo/client/*.cpp" )
-allClientFiles += Glob( "mongo/s/*.cpp" )
-allClientFiles += Glob( "mongo/shell/*.cpp" )
-allClientFiles += Glob( "mongo/db/*.cpp" )
-allClientFiles += Glob( "mongo/scripting/*.cpp" )
-allClientFiles += Glob( "mongo/util/*.cpp" )
+for x in dirs:
+    allClientFiles += Glob( "mongo/" + x + "*.cpp" )
 allClientFiles += Glob( "mongo/util/*.c" )
 
-env.SharedLibrary( "mongoclient" , allClientFiles )
-env.Library( "mongoclient" , allClientFiles )
+libs = []
+libs += env.SharedLibrary( "mongoclient" , allClientFiles )
+libs += env.Library( "mongoclient" , allClientFiles )
+
+# install
+
+prefix = GetOption( "prefix" )
+
+for x in libs:
+    env.Install( prefix + "/lib/" , str(x) )
+
+for x in dirs:
+    x = "mongo/" + x
+    env.Install( prefix + "/include/" + x , Glob( x + "*.h" ) )
+
+env.Alias( "install" , prefix )
+
+
+# example setup
 
 clientTests = []
 clientEnv = env.Clone();
diff --git a/doxygenConfig b/doxygenConfig
index 9d4bbfb..fcf10e7 100644
--- a/doxygenConfig
+++ b/doxygenConfig
@@ -3,7 +3,7 @@
 #---------------------------------------------------------------------------
 DOXYFILE_ENCODING      = UTF-8
 PROJECT_NAME           = MongoDB
-PROJECT_NUMBER         = 1.6.6-pre-
+PROJECT_NUMBER         = 1.8.0
 OUTPUT_DIRECTORY       = docs/doxygen
 CREATE_SUBDIRS         = NO
 OUTPUT_LANGUAGE        = English
@@ -101,7 +101,7 @@ WARN_LOGFILE           =
 #---------------------------------------------------------------------------
 # configuration options related to the input files
 #---------------------------------------------------------------------------
-INPUT                  = client db/jsobj.h db/json.h bson
+INPUT                  = client db/jsobj.h db/json.h bson util
 INPUT_ENCODING         = UTF-8
 FILE_PATTERNS          = *.c \
                          *.cc \
diff --git a/jstests/_tst.js b/jstests/_tst.js
new file mode 100644
index 0000000..f208164
--- /dev/null
+++ b/jstests/_tst.js
@@ -0,0 +1,41 @@
+/* a general testing framework (helpers) for us in the jstests/
+
+   to use, from your test file:
+     testname="mytestname";
+     load("jstests/_tst.js");
+*/
+
+if( typeof tst == "undefined" ) {
+    tst = {}
+
+    tst.log = function (optional_msg) {
+        print("\n\nstep " + ++this._step + " " + (optional_msg || ""));
+    }
+
+    tst.success = function () {
+        print(testname + " SUCCESS");
+    }
+
+    /* diff files a and b, returning the difference (empty str if no difference) */
+    tst.diff = function(a, b) {
+        function reSlash(s) {
+            var x = s;
+            if (_isWindows()) {
+                while (1) {
+                    var y = x.replace('/', '\\');
+                    if (y == x)
+                        break;
+                    x = y;
+                }
+            }
+            return x;
+        }
+        a = reSlash(a);
+        b = reSlash(b);
+        print("diff " + a + " " + b);
+        return run("diff", a, b);
+    }
+}   
+
+print(testname + " BEGIN");
+tst._step = 0;
diff --git a/jstests/apitest_db.js b/jstests/apitest_db.js
index f54879c..c734d67 100644
--- a/jstests/apitest_db.js
+++ b/jstests/apitest_db.js
@@ -70,3 +70,8 @@ assert( asserted, "should have asserted" );
 
 dd( "g" );
 
+
+
+assert.eq( "foo" , db.getSisterDB( "foo" ).getName() )
+assert.eq( "foo" , db.getSiblingDB( "foo" ).getName() )
+
diff --git a/jstests/array4.js b/jstests/array4.js
new file mode 100644
index 0000000..1053e16
--- /dev/null
+++ b/jstests/array4.js
@@ -0,0 +1,30 @@
+
+t = db.array4;
+t.drop();
+
+t.insert({"a": ["1", "2", "3"]});
+t.insert({"a" : ["2", "1"]});
+
+var x = {'a.0' : /1/};
+
+assert.eq(t.count(x), 1);
+
+assert.eq(t.findOne(x).a[0], 1);
+assert.eq(t.findOne(x).a[1], 2);
+
+t.drop();
+
+t.insert({"a" : {"0" : "1"}});
+t.insert({"a" : ["2", "1"]});
+
+assert.eq(t.count(x), 1);
+assert.eq(t.findOne(x).a[0], 1);
+
+t.drop();
+
+t.insert({"a" : ["0", "1", "2", "3", "4", "5", "6", "1", "1", "1", "2", "3", "2", "1"]});
+t.insert({"a" : ["2", "1"]});
+
+x = {"a.12" : /2/};
+assert.eq(t.count(x), 1);
+assert.eq(t.findOne(x).a[0], 0);
diff --git a/jstests/arrayfind3.js b/jstests/arrayfind3.js
new file mode 100644
index 0000000..60da713
--- /dev/null
+++ b/jstests/arrayfind3.js
@@ -0,0 +1,21 @@
+
+t = db.arrayfind3;
+t.drop()
+
+t.save({a:[1,2]}) 
+t.save({a:[1, 2, 6]}) 
+t.save({a:[1, 4, 6]}) 
+
+
+assert.eq( 2 , t.find( {a:{$gte:3, $lte: 5}} ).itcount() , "A1" )
+assert.eq( 1 , t.find( {a:{$elemMatch:{$gte:3, $lte: 5}}} ).itcount() , "A2" )
+
+t.ensureIndex( { a : 1 } )
+
+printjson( t.find( {a:{$gte:3, $lte: 5}} ).explain() );
+
+//assert.eq( 2 , t.find( {a:{$gte:3, $lte: 5}} ).itcount() , "B1" ); // SERVER-1264
+assert.eq( 1 , t.find( {a:{$elemMatch:{$gte:3, $lte: 5}}} ).itcount() , "B2" )
+
+
+
diff --git a/jstests/auth/auth1.js b/jstests/auth/auth1.js
index 6fc6dc5..2f2a1b4 100644
--- a/jstests/auth/auth1.js
+++ b/jstests/auth/auth1.js
@@ -68,6 +68,6 @@ if ( db.runCommand( "features" ).readlock ){
               initial: { count: 0 }
             };
     
-    assert.throws( function() { return t.group( p ) }, "write reduce didn't fail" );
+    assert.throws( function() { return t.group( p ) }, null , "write reduce didn't fail" );
 }
 
diff --git a/jstests/basic3.js b/jstests/basic3.js
index 2deee2b..4488865 100644
--- a/jstests/basic3.js
+++ b/jstests/basic3.js
@@ -3,14 +3,13 @@ t = db.getCollection( "foo_basic3" );
 
 t.find( { "a.b" : 1 } ).toArray();
 
-ok = false;
+ok = true;
 
 try{
     t.save( { "a.b" : 5 } );
     ok = false;
 }
 catch ( e ){
-    ok = true;
 }
 assert( ok , ". in names aren't allowed doesn't work" );
 
@@ -19,6 +18,33 @@ try{
     ok = false;
 }
 catch ( e ){
-    ok = true;
 }
 assert( ok , ". in embedded names aren't allowed doesn't work" );
+
+// following tests make sure update keys are checked
+t.save({"a": 0,"b": 1})
+try {
+    t.update({"a": 0}, {"b.b": 1});
+    ok = false;
+} catch (e) {}
+assert( ok , "must deny '.' in key of update" );
+
+// upsert with embedded doc
+try {
+    t.update({"a": 10}, {"b": { "c.c" : 1 }}, true);
+    ok = false;
+} catch (e) {}
+assert( ok , "must deny '.' in key of update" );
+
+// if it is a modifier, it should still go through
+t.update({"a": 0}, {$set: { "c.c": 1}})
+t.update({"a": 0}, {$inc: { "c.c": 1}})
+
+// edge cases
+try {
+    t.update({"a": 0}, {"": { "c.c": 1}})
+    ok = false;
+} catch (e) {}
+assert( ok , "must deny '.' in key of update" );
+t.update({"a": 0}, {})
+
diff --git a/jstests/big_object1.js b/jstests/big_object1.js
new file mode 100644
index 0000000..be841e0
--- /dev/null
+++ b/jstests/big_object1.js
@@ -0,0 +1,46 @@
+
+t = db.big_object1
+t.drop();
+
+if ( db.adminCommand( "buildinfo" ).bits == 64 ){
+    
+    s = ""
+    while ( s.length < 850 * 1024 ){
+        s += "x";
+    }
+    
+    x = 0;
+    while ( true ){
+        n = { _id : x , a : [] }
+        for ( i=0; i<14+x; i++ )
+            n.a.push( s )
+        try {
+            t.insert( n )
+            o = n
+        }
+        catch ( e ){
+            break;
+        }
+        
+        if ( db.getLastError() != null )
+            break;
+        x++;
+    }
+    
+    printjson( t.stats(1024*1024) )
+
+    assert.lt( 15 * 1024 * 1024 , Object.bsonsize( o ) , "A1" )
+    assert.gt( 17 * 1024 * 1024 , Object.bsonsize( o ) , "A2" )
+    
+    assert.eq( x , t.count() , "A3" )
+    
+    for ( i=0; i<x; i++ ){
+        o = t.findOne( { _id : 1 } )
+        assert( o , "B" + i );
+    }
+    
+    t.drop()
+}
+else {
+    print( "skipping big_object1 b/c not 64-bit" )
+}
diff --git a/jstests/capped3.js b/jstests/capped3.js
index d6d2b23..2e5e679 100644
--- a/jstests/capped3.js
+++ b/jstests/capped3.js
@@ -23,9 +23,9 @@ c = t2.find().sort( {$natural:-1} );
 i = 999;
 while( c.hasNext() ) {
     assert.eq( i--, c.next().i, "E" );
-}
-//print( "i: " + i );
-var str = tojson( t2.stats() );
+}
+//print( "i: " + i );
+var str = tojson( t2.stats() );
 //print( "stats: " + tojson( t2.stats() ) );
 assert( i < 990, "F" );
 
diff --git a/jstests/capped6.js b/jstests/capped6.js
index 851bbd1..6579807 100644
--- a/jstests/capped6.js
+++ b/jstests/capped6.js
@@ -1,3 +1,5 @@
+// Test NamespaceDetails::cappedTruncateAfter via 'captrunc' command
+
 Random.setRandomSeed();
 
 db.capped6.drop();
@@ -8,6 +10,12 @@ function debug( x ) {
 //    print( x );
 }
 
+/**
+ * Check that documents in the collection are in order according to the value
+ * of a, which corresponds to the insert order.  This is a check that the oldest
+ * document(s) is/are deleted when space is needed for the newest document.  The
+ * check is performed in both forward and reverse directions.
+ */
 function checkOrder( i ) {
     res = tzz.find().sort( { $natural: -1 } );
     assert( res.hasNext(), "A" );
@@ -30,12 +38,18 @@ function checkOrder( i ) {
 var val = new Array( 500 );
 var c = "";
 for( i = 0; i < 500; ++i, c += "-" ) {
+    // The a values are strings of increasing length.
     val[ i ] = { a: c };
 }
 
 var oldMax = Random.randInt( 500 );
 var max = 0;
 
+/**
+ * Insert new documents until there are 'oldMax' documents in the collection,
+ * then remove a random number of documents (often all but one) via one or more
+ * 'captrunc' requests.
+ */
 function doTest() {
     for( var i = max; i < oldMax; ++i ) {
         tzz.save( val[ i ] );
@@ -48,7 +62,13 @@ function doTest() {
         min = Random.randInt( count ) + 1;
     }
 
+    // Iteratively remove a random number of documents until we have no more
+    // than 'min' documents.
     while( count > min ) {
+        // 'n' is the number of documents to remove - we must account for the
+        // possibility that 'inc' will be true, and avoid removing all documents
+        // from the collection in that case, as removing all documents is not
+        // allowed by 'captrunc'
         var n = Random.randInt( count - min - 1 ); // 0 <= x <= count - min - 1
         var inc = Random.rand() > 0.5;
         debug( count + " " + n + " " + inc );
@@ -58,10 +78,13 @@ function doTest() {
         }
         count -= n;
         max -= n;
+        // Validate the remaining documents.
         checkOrder( max - 1 );
     }
 }
 
+// Repeatedly add up to 'oldMax' documents and then truncate the newest
+// documents.  Newer documents take up more space than older documents.
 for( var i = 0; i < 10; ++i ) {
     doTest();
 }
@@ -77,6 +100,8 @@ db.capped6.drop();
 db._dbCommand( { create: "capped6", capped: true, size: 1000, $nExtents: 11, autoIndexId: false } );
 tzz = db.capped6;
 
+// Same test as above, but now the newer documents take less space than the
+// older documents instead of more.
 for( var i = 0; i < 10; ++i ) {
     doTest();
 }
diff --git a/jstests/capped7.js b/jstests/capped7.js
index ecb689e..693828d 100644
--- a/jstests/capped7.js
+++ b/jstests/capped7.js
@@ -1,3 +1,5 @@
+// Test NamespaceDetails::emptyCappedCollection via 'emptycapped' command
+
 Random.setRandomSeed();
 
 db.capped7.drop();
@@ -8,6 +10,10 @@ var ten = new Array( 11 ).toString().replace( /,/g, "-" );
 
 count = 0;
 
+/**
+ * Insert new documents until the capped collection loops and the document
+ * count doesn't increase on insert.
+ */
 function insertUntilFull() {
 count = tzz.count();
     var j = 0;
@@ -23,21 +29,27 @@ while( 1 ) {
 
 insertUntilFull();
 
+// oldCount == count before empty
 oldCount = count;
 
 assert.eq.automsg( "11", "tzz.stats().numExtents" );
+
+// oldSize == size before empty
 var oldSize = tzz.stats().storageSize;
 
 assert.commandWorked( db._dbCommand( { emptycapped: "capped7" } ) );
 
+// check that collection storage parameters are the same after empty
 assert.eq.automsg( "11", "tzz.stats().numExtents" );
 assert.eq.automsg( "oldSize", "tzz.stats().storageSize" );
 
+// check that the collection is empty after empty
 assert.eq.automsg( "0", "tzz.find().itcount()" );
 assert.eq.automsg( "0", "tzz.count()" );
 
+// check that we can reuse the empty collection, inserting as many documents
+// as we were able to the first time through.
 insertUntilFull();
-
 assert.eq.automsg( "oldCount", "count" );
 assert.eq.automsg( "oldCount", "tzz.find().itcount()" );
 assert.eq.automsg( "oldCount", "tzz.count()" );
@@ -47,12 +59,16 @@ var oldSize = tzz.stats().storageSize;
 
 assert.commandWorked( db._dbCommand( { emptycapped: "capped7" } ) );
 
+// check that the collection storage parameters are unchanged after another empty
 assert.eq.automsg( "11", "tzz.stats().numExtents" );
 assert.eq.automsg( "oldSize", "tzz.stats().storageSize" );
 
+// insert an arbitrary number of documents
 var total = Random.randInt( 2000 );
 for( var j = 1; j <= total; ++j ) {
     tzz.save( {i:ten,j:j} );
+    // occasionally check that only the oldest documents are removed to make room
+    // for the newest documents
     if ( Random.rand() > 0.95 ) {
         assert.automsg( "j >= tzz.count()" );
         assert.eq.automsg( "tzz.count()", "tzz.find().itcount()" );
@@ -62,6 +78,7 @@ for( var j = 1; j <= total; ++j ) {
         while( c.hasNext() ) {
             assert.eq.automsg( "c.next().j", "k--" );
         }
+        // check the same thing with a reverse iterator as well
         var c = tzz.find().sort( {$natural:1} );
         assert.automsg( "c.hasNext()" );
         while( c.hasNext() ) {
diff --git a/jstests/capped8.js b/jstests/capped8.js
new file mode 100644
index 0000000..cce0eec
--- /dev/null
+++ b/jstests/capped8.js
@@ -0,0 +1,86 @@
+// Test NamespaceDetails::cappedTruncateAfter with empty extents
+
+Random.setRandomSeed();
+
+t = db.jstests_capped8;
+
+function debug( x ) {
+//    printjson( x );
+}        
+
+/** Generate an object with a string field of specified length */
+function obj( size ) {
+    return {a:new Array( size + 1 ).toString()};;
+}
+
+function withinOne( a, b ) {
+    assert( Math.abs( a - b ) <= 1, "not within one: " + a + ", " + b )
+}
+
+/**
+ * Insert enough documents of the given size spec that the collection will
+ * contain only documents having this size spec.
+ */
+function insertMany( size ) {
+    // Add some variability, as the precise number can trigger different cases.
+    n = 250 + Random.randInt( 10 );
+    for( i = 0; i < n; ++i ) {
+        t.save( obj( size ) );
+        debug( t.count() );
+    }
+}
+
+/**
+ * Insert some documents in such a way that there may be an empty extent, then
+ * truncate the capped collection.
+ */
+function insertAndTruncate( first ) {
+    myInitialCount = t.count();
+    // Insert enough documents to make the capped allocation loop over.
+    insertMany( 50 );
+    myFiftyCount = t.count();
+    // Insert documents that are too big to fit in the smaller extents.
+    insertMany( 2000 );
+    myTwokCount = t.count();
+    if ( first ) {
+        initialCount = myInitialCount;
+        fiftyCount = myFiftyCount;
+        twokCount = myTwokCount;
+        // Sanity checks for collection count
+        assert( fiftyCount > initialCount );
+        assert( fiftyCount > twokCount );
+    } else {
+        // Check that we are able to insert roughly the same number of documents
+        // after truncating.  The exact values are slightly variable as a result
+        // of the capped allocation algorithm.
+        withinOne( initialCount, myInitialCount );
+        withinOne( fiftyCount, myFiftyCount );
+        withinOne( twokCount, myTwokCount );
+    }
+    count = t.count();
+    // Check that we can truncate the collection successfully.
+    assert.commandWorked( db.runCommand( { captrunc:"jstests_capped8", n:count - 1, inc:false } ) );
+}
+
+/** Test truncating and subsequent inserts */
+function testTruncate() {
+    insertAndTruncate( true );
+    insertAndTruncate( false );
+    insertAndTruncate( false );
+}
+
+t.drop();
+db._dbCommand( { create:"jstests_capped8", capped: true, $nExtents: [ 10000, 10000, 1000 ] } );
+testTruncate();
+
+t.drop();
+db._dbCommand( { create:"jstests_capped8", capped: true, $nExtents: [ 10000, 1000, 1000 ] } );
+testTruncate();
+
+t.drop();
+db._dbCommand( { create:"jstests_capped8", capped: true, $nExtents: [ 10000, 1000 ] } );
+testTruncate();
+
+t.drop();
+db._dbCommand( { create:"jstests_capped8", capped: true, $nExtents: [ 10000 ] } );
+testTruncate();
diff --git a/jstests/check_shard_index.js b/jstests/check_shard_index.js
new file mode 100644
index 0000000..a5a1fc1
--- /dev/null
+++ b/jstests/check_shard_index.js
@@ -0,0 +1,45 @@
+// -------------------------
+//  CHECKSHARDINGINDEX TEST UTILS
+// -------------------------
+
+f = db.jstests_shardingindex;
+f.drop();
+
+
+// -------------------------
+// Case 1: all entries filled or empty should make a valid index
+//
+
+f.drop();
+f.ensureIndex( { x: 1 , y: 1 } );
+assert.eq( 0 , f.count() , "1. initial count should be zero" );
+
+res = db.runCommand( { checkShardingIndex: "test.jstests_shardingindex" , keyPattern: {x:1, y:1} , force: true });
+assert.eq( true , res.ok, "1a" );
+
+f.save( { x: 1 , y : 1 } );
+assert.eq( 1 , f.count() , "1. count after initial insert should be 1" );
+res = db.runCommand( { checkShardingIndex: "test.jstests_shardingindex" , keyPattern: {x:1, y:1} , force: true });
+assert.eq( true , res.ok , "1b" );
+
+
+// -------------------------
+// Case 2: entry with null values would make an index unsuitable
+//
+
+f.drop();
+f.ensureIndex( { x: 1 , y: 1 } );
+assert.eq( 0 , f.count() , "2. initial count should be zero" );
+
+f.save( { x: 1 , y : 1 } );
+f.save( { x: null , y : 1 } );
+
+res = db.runCommand( { checkShardingIndex: "test.jstests_shardingindex" , keyPattern: {x:1, y:1} , force: true });
+assert.eq( true , res.ok , "2a " + tojson(res) );
+
+f.save( { y: 2 } );
+assert.eq( 3 , f.count() , "2. count after initial insert should be 3" );
+res = db.runCommand( { checkShardingIndex: "test.jstests_shardingindex" , keyPattern: {x:1, y:1} , force: true });
+assert.eq( false , res.ok , "2b " + tojson(res) );
+
+print("PASSED");
diff --git a/jstests/conc_update.js b/jstests/conc_update.js
deleted file mode 100644
index ac70861..0000000
--- a/jstests/conc_update.js
+++ /dev/null
@@ -1,45 +0,0 @@
-// db = db.getSisterDB("concurrency")
-// db.dropDatabase();
-// 
-// NRECORDS=10*1024*1024 // this needs to be relatively big so that
-//                       // the update() will take a while.
-// 
-// print("loading data (will take a while; progress msg every 1024*1024 documents)")
-// for (i=0; i<(10*1024*1024); i++) {
-//     db.conc.insert({x:i})
-//     if ((i%(1024*1024))==0)
-// 	print("loaded " + i/(1024*1024) + " mibi-records")
-// }
-// 
-// print("making an index (will take a while)")
-// db.conc.ensureIndex({x:1})
-// 
-// var c1=db.conc.count({x:{$lt:NRECORDS}})
-// // this is just a flag that the child will toggle when it's done.
-// db.concflag.update({}, {inprog:true}, true)
-// 
-// updater=startParallelShell("db=db.getSisterDB('concurrency');\
-// 			   db.conc.update({}, {$inc:{x: "+NRECORDS+"}}, false, true);\
-// 			   print(db.getLastError());\
-// 			   db.concflag.update({},{inprog:false})");
-// 
-// querycount=0;
-// decrements=0;
-// misses=0
-// while (1) {
-//     if (db.concflag.findOne().inprog) {
-// 	c2=db.conc.count({x:{$lt:10*1024*1024}})
-// 	print(c2)
-// 	querycount++;
-// 	if (c2<c1)
-// 	    decrements++;
-// 	else
-// 	    misses++;
-// 	c1 = c2;
-//     } else
-// 	break;
-//     sleep(10);
-// }
-// print(querycount + " queries, " + decrements + " decrements, " + misses + " misses");
-// 
-// updater() // wait()
diff --git a/jstests/coveredIndex1.js b/jstests/coveredIndex1.js
new file mode 100644
index 0000000..1d6fe36
--- /dev/null
+++ b/jstests/coveredIndex1.js
@@ -0,0 +1,59 @@
+
+t = db["jstests_coveredIndex1"];
+t.drop();
+
+t.save({fn: "john", ln: "doe"})
+t.save({fn: "jack", ln: "doe"})
+t.save({fn: "john", ln: "smith"})
+t.save({fn: "jack", ln: "black"})
+t.save({fn: "bob", ln: "murray"})
+t.save({fn: "aaa", ln: "bbb", obj: {a: 1, b: "blah"}})
+assert.eq( t.findOne({ln: "doe"}).fn, "john", "Cannot find right record" );
+assert.eq( t.count(), 6, "Not right length" );
+
+// use simple index
+t.ensureIndex({ln: 1});
+assert.eq( t.find({ln: "doe"}).explain().indexOnly, false, "Find using covered index but all fields are returned");
+assert.eq( t.find({ln: "doe"}, {ln: 1}).explain().indexOnly, false, "Find using covered index but _id is returned");
+assert.eq( t.find({ln: "doe"}, {ln: 1, _id: 0}).explain().indexOnly, true, "Find is not using covered index");
+
+// use compound index
+t.dropIndex({ln: 1})
+t.ensureIndex({ln: 1, fn: 1});
+// return 1 field
+assert.eq( t.find({ln: "doe"}, {ln: 1, _id: 0}).explain().indexOnly, true, "Find is not using covered index");
+// return both fields, multiple docs returned
+assert.eq( t.find({ln: "doe"}, {ln: 1, fn: 1, _id: 0}).explain().indexOnly, true, "Find is not using covered index");
+// match 1 record using both fields
+assert.eq( t.find({ln: "doe", fn: "john"}, {ln: 1, fn: 1, _id: 0}).explain().indexOnly, true, "Find is not using covered index");
+// change ordering
+assert.eq( t.find({fn: "john", ln: "doe"}, {fn: 1, ln: 1, _id: 0}).explain().indexOnly, true, "Find is not using covered index");
+// ask from 2nd index key
+assert.eq( t.find({fn: "john"}, {fn: 1, _id: 0}).explain().indexOnly, false, "Find is using covered index, but doesnt have 1st key");
+
+// repeat above but with _id field
+t.dropIndex({ln: 1, fn: 1})
+t.ensureIndex({_id: 1, ln: 1});
+// return 1 field
+assert.eq( t.find({_id: 123}, {_id: 1}).explain().indexOnly, true, "Find is not using covered index");
+// match 1 record using both fields
+assert.eq( t.find({_id: 123, ln: "doe"}, {ln: 1}).explain().indexOnly, true, "Find is not using covered index");
+// change ordering
+assert.eq( t.find({ln: "doe", _id: 123}, {ln: 1, _id: 1}).explain().indexOnly, true, "Find is not using covered index");
+// ask from 2nd index key
+assert.eq( t.find({ln: "doe"}, {ln: 1}).explain().indexOnly, false, "Find is using covered index, but doesnt have 1st key");
+
+// repeat above but with embedded obj
+t.dropIndex({_id: 1, ln: 1})
+t.ensureIndex({obj: 1});
+assert.eq( t.find({"obj.a": 1}, {obj: 1}).explain().indexOnly, false, "Shouldnt use index when introspecting object");
+assert.eq( t.find({obj: {a: 1, b: "blah"}}).explain().indexOnly, false, "Index doesnt have all fields to cover");
+assert.eq( t.find({obj: {a: 1, b: "blah"}}, {obj: 1, _id: 0}).explain().indexOnly, true, "Find is not using covered index");
+
+// repeat above but with index on sub obj field
+t.dropIndex({obj: 1});
+t.ensureIndex({"obj.a": 1, "obj.b": 1})
+assert.eq( t.find({"obj.a": 1}, {obj: 1}).explain().indexOnly, false, "Shouldnt use index when introspecting object");
+
+assert(t.validate().valid);
+
diff --git a/jstests/coveredIndex2.js b/jstests/coveredIndex2.js
new file mode 100644
index 0000000..f01c0eb
--- /dev/null
+++ b/jstests/coveredIndex2.js
@@ -0,0 +1,18 @@
+t = db["jstests_coveredIndex2"];
+t.drop();
+
+t.save({a: 1})
+t.save({a: 2})
+assert.eq( t.findOne({a: 1}).a, 1, "Cannot find right record" );
+assert.eq( t.count(), 2, "Not right length" );
+
+// use simple index
+t.ensureIndex({a: 1});
+assert.eq( t.find({a:1}).explain().indexOnly, false, "Find using covered index but all fields are returned");
+assert.eq( t.find({a:1}, {a: 1}).explain().indexOnly, false, "Find using covered index but _id is returned");
+assert.eq( t.find({a:1}, {a: 1, _id: 0}).explain().indexOnly, true, "Find is not using covered index");
+
+// add multikey
+t.save({a:[3,4]})
+assert.eq( t.find({a:1}, {ln: 1, _id: 0}).explain().indexOnly, false, "Find is using covered index even after multikey insert");
+
diff --git a/jstests/cursora.js b/jstests/cursora.js
index 0916fa7..a46688a 100644
--- a/jstests/cursora.js
+++ b/jstests/cursora.js
@@ -1,34 +1,47 @@
-
 t = db.cursora
 
-
-
 function run( n , atomic ){
-
+    if( !isNumber(n) ) {
+	print("n:");
+	printjson(n);
+	assert(isNumber(n), "cursora.js isNumber");
+    }
     t.drop()
     
     for ( i=0; i<n; i++ )
         t.insert( { _id : i } )
     db.getLastError()
 
+    print("cursora.js startParallelShell n:"+n+" atomic:"+atomic)
     join = startParallelShell( "sleep(50); db.cursora.remove( {"  + ( atomic ? "$atomic:true" : "" ) + "} ); db.getLastError();" );
-    
-    start = new Date()
-    num = t.find( function(){ num = 2; for ( var x=0; x<1000; x++ ) num += 2; return num > 0; } ).sort( { _id : -1 } ).limit(n).itcount()
-    end = new Date()
 
+    var start = null;
+    var ex = null;
+    var num = null;
+    var end = null;
+    try {
+        start = new Date()
+        ex = t.find(function () { num = 2; for (var x = 0; x < 1000; x++) num += 2; return num > 0; }).sort({ _id: -1 }).explain()
+        num = ex.n
+        end = new Date()
+    }
+    catch (e) {
+        print("cursora.js FAIL " + e);
+        join();
+        throw e;
+    }
+    
     join()
 
-    print( "num: " + num + " time:" + ( end.getTime() - start.getTime() ) )
-    assert.eq( 0 , t.count() , "after remove" )
+    //print( "cursora.js num: " + num + " time:" + ( end.getTime() - start.getTime() ) )
+    assert.eq( 0 , t.count() , "after remove: " + tojson( ex ) )
+    // assert.lt( 0 , ex.nYields , "not enough yields : " + tojson( ex ) ); // TODO make this more reliable so cen re-enable assert
     if ( n == num )
-        print( "warning: shouldn't have counted all  n: " + n + " num: " + num );
+        print( "cursora.js warning: shouldn't have counted all  n: " + n + " num: " + num );
 }
 
 run( 1500 )
 run( 5000 )
-
 run( 1500 , true )
 run( 5000 , true )
-    
-
+print("cursora.js SUCCESS")
diff --git a/jstests/datasize3.js b/jstests/datasize3.js
index d45f34b..df79e6d 100644
--- a/jstests/datasize3.js
+++ b/jstests/datasize3.js
@@ -22,10 +22,12 @@ t.ensureIndex( { x : 1 } )
 for ( i=2; i<100; i++ )
     t.insert( { x : i } )
 
-a = run( { min : { x : 20 } , max : { x : 50 } } )
-b = run( { min : { x : 20 } , max : { x : 50 } , estimate : true } )
+a = run( { min : { x : 20 } , max : { x : 50 } } ).size
+b = run( { min : { x : 20 } , max : { x : 50 } , estimate : true } ).size
 
-assert.eq( a.size , b.size );
+ratio = Math.min( a , b ) / Math.max( a , b );
+
+assert.lt( 0.97 , ratio , "sizes not equal a: " + a + " b: " + b );
 
 
 
diff --git a/jstests/dbcase.js b/jstests/dbcase.js
index d76b739..21854d8 100644
--- a/jstests/dbcase.js
+++ b/jstests/dbcase.js
@@ -1,4 +1,6 @@
 
+/* 
+TODO SERVER-2111
 a = db.getSisterDB( "dbcasetest_dbnamea" )
 b = db.getSisterDB( "dbcasetest_dbnameA" )
 
@@ -19,5 +21,5 @@ a.dropDatabase();
 b.dropDatabase();
 
 print( db.getMongo().getDBNames() )
-
+*/
 
diff --git a/jstests/disk/directoryperdb.js b/jstests/disk/directoryperdb.js
index 90a1f03..3b65bd0 100644
--- a/jstests/disk/directoryperdb.js
+++ b/jstests/disk/directoryperdb.js
@@ -9,7 +9,7 @@ db[ baseName ].save( {} );
 assert.eq( 1, db[ baseName ].count() , "A : " + tojson( db[baseName].find().toArray() ) );
 
 checkDir = function( dir ) {
-    db.runCommand( {fsync:1} );
+    db.adminCommand( {fsync:1} );
     files = listFiles( dir );
     found = false;
     for( f in files ) {
@@ -60,3 +60,5 @@ assert( m.getDBs().totalSize > 0, "bad size calc" );
 db.dropDatabase();
 files = listFiles( dbpath );
 files.forEach( function( f ) { assert( !new RegExp( baseName ).test( f.name ), "drop database - dir not cleared" ); } );
+
+print("SUCCESS directoryperdb.js");
diff --git a/jstests/disk/diskfull.js b/jstests/disk/diskfull.js
index 6cbcbb7..26b707d 100644
--- a/jstests/disk/diskfull.js
+++ b/jstests/disk/diskfull.js
@@ -1,19 +1,25 @@
 doIt = false;
+dbpath = "/data/db/diskfulltest";
+
 files = listFiles( "/data/db" );
 for ( i in files ) {
-    if ( files[ i ].name == "/data/db/diskfulltest" ) {
+    if ( files[ i ].name == dbpath ) {
         doIt = true;
     }
 }
 
 if ( !doIt ) {
-    print( "path /data/db/diskfulltest/ missing, skipping diskfull test" );
+    print( "path " + dbpath + " missing, skipping diskfull test" );
     doIt = false;
 }
 
 if ( doIt ) {
+    // Clear dbpath without removing and recreating diskfulltest directory, as resetDbpath does
+    files = listFiles( dbpath );
+    files.forEach( function( x ) { removeFile( x.name ) } );
+    
     port = allocatePorts( 1 )[ 0 ];
-    m = startMongoProgram( "mongod", "--port", port, "--dbpath", "/data/db/diskfulltest", "--nohttpinterface", "--bind_ip", "127.0.0.1" );
+    m = startMongoProgram( "mongod", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" );
     c = m.getDB( "diskfulltest" ).getCollection( "diskfulltest" )
     c.save( { a: 6 } );
     assert.soon( function() { return rawMongoProgramOutput().match( /file allocation failure/ ); }, "didn't see 'file allocation failure'" );
diff --git a/jstests/disk/killall.js b/jstests/disk/killall.js
new file mode 100644
index 0000000..a1487bb
--- /dev/null
+++ b/jstests/disk/killall.js
@@ -0,0 +1,42 @@
+// running ops should be killed
+// dropped collection should be ok after restart
+
+if ( typeof _threadInject == "undefined" ) { // don't run in v8 mode - SERVER-2076
+
+port = allocatePorts( 1 )[ 0 ]
+
+var baseName = "jstests_disk_killall";
+
+var m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName, "--nohttpinterface" );
+
+m.getDB( "test" ).getCollection( baseName ).save( {} );
+m.getDB( "test" ).getLastError();
+
+s1 = startParallelShell( "db." + baseName + ".count( { $where: function() { while( 1 ) { ; } } } )", port );
+sleep( 1000 );
+
+s2 = startParallelShell( "db." + baseName + ".drop()", port );
+sleep( 1000 );
+
+/**
+ * 12 == mongod's exit code on interrupt (eg standard kill)
+ * stopMongod sends a standard kill signal to mongod, then waits for mongod to stop.  If mongod doesn't stop
+ * in a reasonable amount of time, stopMongod sends kill -9 and in that case will not return 12.  We're checking
+ * in this assert that mongod will stop quickly even while evaling an infinite loop in server side js.
+ *
+ * 14 is sometimes returned instead due to SERVER-2184
+ */
+exitCode = stopMongod( port );
+assert( exitCode == 12 || exitCode == 14, "got unexpected exitCode: " + exitCode );
+
+s1();
+s2();
+
+var m = startMongoProgram( "mongod", "--port", port, "--dbpath", "/data/db/" + baseName );
+
+m.getDB( "test" ).getCollection( baseName ).stats();
+m.getDB( "test" ).getCollection( baseName ).drop();
+
+stopMongod( port );
+
+}
\ No newline at end of file
diff --git a/jstests/disk/preallocate.js b/jstests/disk/preallocate.js
index d772fbb..4f35866 100644
--- a/jstests/disk/preallocate.js
+++ b/jstests/disk/preallocate.js
@@ -2,7 +2,7 @@
 
 port = allocatePorts( 1 )[ 0 ];
 
-var baseName = "jstests_preallocate2";
+var baseName = "jstests_preallocate";
 
 var m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName );
 
@@ -10,7 +10,11 @@ assert.eq( 0, m.getDBs().totalSize );
 
 m.getDB( baseName ).createCollection( baseName + "1" );
 
-assert.soon( function() { return m.getDBs().totalSize > 100000000; }, "expected second file to bring total size over 100MB" );
+expectedMB = 100;
+if ( m.getDB( baseName ).serverBits() < 64 )
+    expectedMB /= 4;
+
+assert.soon( function() { return m.getDBs().totalSize > expectedMB * 1000000; }, "\n\n\nFAIL preallocate.js expected second file to bring total size over " + expectedMB + "MB" );
 
 stopMongod( port );
 
diff --git a/jstests/disk/preallocate2.js b/jstests/disk/preallocate2.js
index ee9382c..9b2159f 100644
--- a/jstests/disk/preallocate2.js
+++ b/jstests/disk/preallocate2.js
@@ -8,4 +8,8 @@ var m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName );
 
 m.getDB( baseName )[ baseName ].save( {i:1} );
 
-assert.soon( function() { return m.getDBs().totalSize > 100000000; }, "expected second file to bring total size over 100MB" );
\ No newline at end of file
+expectedMB = 100;
+if ( m.getDB( baseName ).serverBits() < 64 )
+    expectedMB /= 4;
+
+assert.soon( function() { return m.getDBs().totalSize > expectedMB * 1000000; }, "\n\n\nFAIL preallocate.js expected second file to bring total size over " + expectedMB + "MB" );
diff --git a/jstests/disk/preallocate_directoryperdb.js b/jstests/disk/preallocate_directoryperdb.js
new file mode 100644
index 0000000..fd92aaf
--- /dev/null
+++ b/jstests/disk/preallocate_directoryperdb.js
@@ -0,0 +1,50 @@
+/**
+ * Test for SERVER-2417 - should not preallocate a database file while we are
+ * dropping its directory in directoryperdb mode.
+ */
+
+var baseDir = "jstests_disk_preallocate_directoryperdb";
+var baseName = "preallocate_directoryperdb"
+var baseName2 = "preallocate_directoryperdb2"
+var baseName3 = "preallocate_directoryperdb3"
+port = allocatePorts( 1 )[ 0 ];
+dbpath = "/data/db/" + baseDir + "/";
+
+function checkDb2DirAbsent() {
+    files = listFiles( dbpath );
+//    printjson( files );
+    for( var f in files ) {
+        var name = files[ f ].name;
+        assert.eq( -1, name.indexOf( dbpath + baseName2 ), "baseName2 dir still present" );
+    }    
+}
+
+var m = startMongod( "--smallfiles", "--directoryperdb", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" );
+db = m.getDB( baseName );
+db2 = m.getDB( baseName2 );
+c = db[ baseName ];
+c2 = db2[ baseName2 ];
+big = new Array( 5000 ).toString();
+for( var i = 0; i < 3000; ++i ) {
+    c.save( { b:big } );
+    c2.save( { b:big } );
+    db.getLastError();
+}
+
+// Due to our write pattern, we expect db2's .3 file to be queued up in the file
+// allocator behind db's .3 file at the time db2 is dropped.  This will
+// (incorrectly) cause db2's dir to be recreated until SERVER-2417 is fixed.
+db2.dropDatabase();
+
+checkDb2DirAbsent();
+
+db.dropDatabase();
+
+// Try writing a new database, to ensure file allocator is still working.
+db3 = m.getDB( baseName3 );
+c3 = db[ baseName3 ];
+c3.save( {} );
+assert( !db3.getLastError() );
+assert.eq( 1, c3.count() );
+
+checkDb2DirAbsent();
diff --git a/jstests/distinct1.js b/jstests/distinct1.js
index 433e051..5e47400 100644
--- a/jstests/distinct1.js
+++ b/jstests/distinct1.js
@@ -2,6 +2,8 @@
 t = db.distinct1;
 t.drop();
 
+assert.eq( 0 , t.distinct( "a" ).length , "test empty" );
+
 t.save( { a : 1 } )
 t.save( { a : 2 } )
 t.save( { a : 2 } )
diff --git a/jstests/distinct_array1.js b/jstests/distinct_array1.js
index 0d41b80..f654dba 100644
--- a/jstests/distinct_array1.js
+++ b/jstests/distinct_array1.js
@@ -21,4 +21,5 @@ t.save( { a : [] , c : 12 } );
 t.save( { a : { b : "z"} , c : 12 } );
 
 res = t.distinct( "a.b" );
+res.sort()
 assert.eq( "a,b,c,d,e,f,z" , res.toString() , "B1" );
diff --git a/jstests/distinct_index1.js b/jstests/distinct_index1.js
new file mode 100644
index 0000000..8677457
--- /dev/null
+++ b/jstests/distinct_index1.js
@@ -0,0 +1,50 @@
+
+t = db.distinct_index1
+t.drop();
+
+function r( x ){
+    return Math.floor( Math.sqrt( x * 123123 ) ) % 10;
+}
+
+function d( k , q ){
+    return t.runCommand( "distinct" , { key : k , query : q || {} } )
+}
+
+for ( i=0; i<1000; i++ ){
+    o = { a : r(i*5) , b : r(i) };
+    t.insert( o );
+}
+
+x = d( "a" );
+assert.eq( 1000 , x.stats.n , "AA1" )
+assert.eq( 1000 , x.stats.nscanned , "AA2" )
+assert.eq( 1000 , x.stats.nscannedObjects , "AA3" )
+
+x = d( "a" , { a : { $gt : 5 } } );
+assert.eq( 398 , x.stats.n , "AB1" )
+assert.eq( 1000 , x.stats.nscanned , "AB2" )
+assert.eq( 1000 , x.stats.nscannedObjects , "AB3" )
+
+x = d( "b" , { a : { $gt : 5 } } );
+assert.eq( 398 , x.stats.n , "AC1" )
+assert.eq( 1000 , x.stats.nscanned , "AC2" )
+assert.eq( 1000 , x.stats.nscannedObjects , "AC3" )
+
+
+
+t.ensureIndex( { a : 1 } )
+
+x = d( "a" );
+assert.eq( 1000 , x.stats.n , "BA1" )
+assert.eq( 1000 , x.stats.nscanned , "BA2" )
+assert.eq( 0 , x.stats.nscannedObjects , "BA3" )
+
+x = d( "a" , { a : { $gt : 5 } } );
+assert.eq( 398 , x.stats.n , "BB1" )
+assert.eq( 398 , x.stats.nscanned , "BB2" )
+assert.eq( 0 , x.stats.nscannedObjects , "BB3" )
+
+x = d( "b" , { a : { $gt : 5 } } );
+assert.eq( 398 , x.stats.n , "BC1" )
+assert.eq( 398 , x.stats.nscanned , "BC2" )
+assert.eq( 398 , x.stats.nscannedObjects , "BC3" )
diff --git a/jstests/distinct_index2.js b/jstests/distinct_index2.js
new file mode 100644
index 0000000..2ba65f9
--- /dev/null
+++ b/jstests/distinct_index2.js
@@ -0,0 +1,35 @@
+t = db.distinct_index2;
+t.drop();
+
+t.ensureIndex( { a : 1 , b : 1 } )
+t.ensureIndex( { c : 1 } )
+
+function x(){
+    return Math.floor( Math.random() * 10 );
+}
+
+for ( i=0; i<2000; i++ ){
+    t.insert( { a : x() , b : x() , c : x() } )
+}
+
+correct = []
+for ( i=0; i<10; i++ )
+    correct.push( i )
+
+function check( field ){
+    res = t.distinct( field )
+    res = res.sort()
+    assert.eq( correct , res , "check: " + field );
+
+    if ( field != "a" ){
+        res = t.distinct( field , { a : 1 } )
+        res = res.sort()
+        assert.eq( correct , res , "check 2: " + field );
+    }
+}
+
+check( "a" )
+check( "b" )
+check( "c" )
+
+
diff --git a/jstests/drop2.js b/jstests/drop2.js
new file mode 100644
index 0000000..fa239fd
--- /dev/null
+++ b/jstests/drop2.js
@@ -0,0 +1,43 @@
+t = db.jstests_drop2;
+t.drop();
+
+function debug( x ) {
+//    printjson( x );
+}
+
+t.save( {} );
+db.getLastError();
+
+function op( drop ) {
+    p = db.currentOp().inprog;
+    debug( p );
+    for ( var i in p ) {
+        var o = p[ i ];
+        if ( drop ) {
+            if ( o.active && o.query && o.query.drop && o.query.drop == "jstests_drop2" ) {
+                return o.opid;
+            }
+        } else {
+            if ( o.active && o.query && o.query.query && o.query.query.$where && o.ns == "test.jstests_drop2" ) {
+                return o.opid;
+            }
+        }
+    }
+    return null;
+}
+
+s1 = startParallelShell( "db.jstests_drop2.count( { $where: function() { while( 1 ) { ; } } } )" );
+countOp = null;
+assert.soon( function() { countOp = op( false ); return countOp; } );
+
+s2 = startParallelShell( "db.jstests_drop2.drop()" );
+dropOp = null;
+assert.soon( function() { dropOp = op( true ); return dropOp; } );
+
+db.killOp( dropOp );
+db.killOp( countOp );
+
+s1();
+s2();
+
+t.drop(); // in SERVER-1818, this fails
diff --git a/jstests/dropIndex.js b/jstests/dropIndex.js
deleted file mode 100644
index a6e5f46..0000000
--- a/jstests/dropIndex.js
+++ /dev/null
@@ -1,16 +0,0 @@
-
-t = db.dropIndex;
-t.drop();
-
-t.insert( { _id : 1 , a : 2 , b : 3 } );
-assert.eq( 1 , t.getIndexes().length , "A1" );
-
-t.ensureIndex( { a : 1 } );
-t.ensureIndex( { b : 1 } );
-assert.eq( 3 , t.getIndexes().length , "A2" );
-
-x = db._dbCommand( { dropIndexes: t.getName() , index : t._genIndexName( { a : 1 } ) } );
-assert.eq( 2 , t.getIndexes().length , "B1" );
-
-x = db._dbCommand( { dropIndexes: t.getName() , index : { b : 1 } } )
-assert.eq( 1 , t.getIndexes().length , "B2" );
diff --git a/jstests/drop_index.js b/jstests/drop_index.js
new file mode 100644
index 0000000..a6e5f46
--- /dev/null
+++ b/jstests/drop_index.js
@@ -0,0 +1,16 @@
+
+t = db.dropIndex;
+t.drop();
+
+t.insert( { _id : 1 , a : 2 , b : 3 } );
+assert.eq( 1 , t.getIndexes().length , "A1" );
+
+t.ensureIndex( { a : 1 } );
+t.ensureIndex( { b : 1 } );
+assert.eq( 3 , t.getIndexes().length , "A2" );
+
+x = db._dbCommand( { dropIndexes: t.getName() , index : t._genIndexName( { a : 1 } ) } );
+assert.eq( 2 , t.getIndexes().length , "B1" );
+
+x = db._dbCommand( { dropIndexes: t.getName() , index : { b : 1 } } )
+assert.eq( 1 , t.getIndexes().length , "B2" );
diff --git a/jstests/dur/a_quick.js b/jstests/dur/a_quick.js
new file mode 100755
index 0000000..f703f3f
--- /dev/null
+++ b/jstests/dur/a_quick.js
@@ -0,0 +1,123 @@
+/* quick.js
+   test durability
+   this file should always run quickly
+   other tests can be slow
+*/
+
+testname = "a_quick";
+load("jstests/_tst.js");
+
+function checkNoJournalFiles(path, pass) {
+    var files = listFiles(path);
+    if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) {
+        if (pass == null) {
+            // wait a bit longer for mongod to potentially finish if it is still running.
+            sleep(10000);
+            return checkNoJournalFiles(path, 1);
+        }   
+        print("\n\n\n");
+        print("FAIL path:" + path);
+        print("unexpected files:");
+        printjson(files);
+        assert(false, "FAIL a journal/lsn file is present which is unexpected");
+    }
+}
+
+// directories
+var path1 = "/data/db/quicknodur";
+var path2 = "/data/db/quickdur";
+
+// non-durable version
+tst.log("start mongod without dur");
+var conn = startMongodEmpty("--port", 30000, "--dbpath", path1, "--nodur");
+tst.log("without dur work");
+var d = conn.getDB("test");
+d.foo.insert({ _id:123 });
+d.getLastError();
+tst.log("stop without dur");
+stopMongod(30000);
+
+// durable version
+tst.log("start mongod with dur");
+conn = startMongodEmpty("--port", 30001, "--dbpath", path2, "--dur", "--durOptions", 8);
+tst.log("with dur work");
+d = conn.getDB("test");
+d.foo.insert({ _id: 123 });
+d.getLastError(); // wait
+
+// we could actually do getlasterror fsync:1 now, but maybe this is agood 
+// as it will assure that commits happen on a timely basis.  a bunch of the other dur/*js
+// tests use fsync
+tst.log("sleep a bit for a group commit");
+sleep(8000);
+
+// kill the process hard
+tst.log("kill -9 mongod");
+stopMongod(30001, /*signal*/9);
+
+// journal file should be present, and non-empty as we killed hard
+
+// we will force removal of a datafile to be sure we can recreate everything
+// without it being present.
+removeFile(path2 + "/test.0");
+
+// for that to work, we can't skip anything though:
+removeFile(path2 + "/journal/lsn");
+
+// with the file deleted, we MUST start from the beginning of the journal.
+// thus this check to be careful
+var files = listFiles(path2 + "/journal/");
+if (files.some(function (f) { return f.name.indexOf("lsn") >= 0; })) {
+    print("\n\n\n");
+    print(path2);
+    printjson(files);
+    assert(false, "a journal/lsn file is present which will make this test potentially fail.");
+}
+
+// restart and recover
+tst.log("restart and recover");
+conn = startMongodNoReset("--port", 30002, "--dbpath", path2, "--dur", "--durOptions", 9);
+tst.log("check data results");
+d = conn.getDB("test");
+
+var countOk = (d.foo.count() == 1);
+if (!countOk) {
+    print("\n\n\na_quick.js FAIL count " + d.foo.count() + " is wrong\n\n\n");
+    // keep going - want to see if the diff matches.  if so the sleep() above was too short?
+}
+
+tst.log("stop");
+stopMongod(30002);
+
+// at this point, after clean shutdown, there should be no journal files
+tst.log("check no journal files");
+checkNoJournalFiles(path2 + "/journal");
+
+tst.log("check data matches");
+var diff = tst.diff(path1 + "/test.ns", path2 + "/test.ns");
+print("diff of .ns files returns:" + diff);
+
+function showfiles() {
+    print("\n\nERROR: files for dur and nodur do not match");
+    print(path1 + " files:");
+    printjson(listFiles(path1));
+    print(path2 + " files:");
+    printjson(listFiles(path2));
+    print();
+}
+
+if (diff != "") {
+    showfiles();    
+    assert(diff == "", "error test.ns files differ");
+}
+
+diff = tst.diff(path1 + "/test.0", path2 + "/test.0");
+print("diff of .0 files returns:" + diff);
+if (diff != "") {
+    showfiles();
+    assert(diff == "", "error test.0 files differ");
+}
+
+assert(countOk, "a_quick.js document count after recovery was not the expected value");
+
+tst.success();   
diff --git a/jstests/dur/closeall.js b/jstests/dur/closeall.js
new file mode 100644
index 0000000..f169f06
--- /dev/null
+++ b/jstests/dur/closeall.js
@@ -0,0 +1,80 @@
+// testing closealldatabases concurrency
+// this is also a test of recoverFromYield() as that will get exercised by the update
+
+function f() {
+    var variant = (new Date()) % 4;
+    var path = "/data/db/closeall";
+    var path2 = "/data/db/closeall_slave";
+    var ourdb = "closealltest";
+
+    print("closeall.js start mongod variant:" + variant);
+    var options = (new Date()-0)%2==0 ? 8 : 0;
+    print("closeall.js --durOptions " + options);
+    var N = 1000;
+    if (options) 
+        N = 300;
+
+    // use replication to exercise that code too with a close, and also to test local.sources with a close
+    var conn = startMongodEmpty("--port", 30001, "--dbpath", path, "--dur", "--durOptions", options, "--master", "--oplogSize", 64);
+    var connSlave = startMongodEmpty("--port", 30002, "--dbpath", path2, "--dur", "--durOptions", options, "--slave", "--source", "localhost:30001");
+
+    var slave = connSlave.getDB(ourdb);
+
+    // we'll use two connections to make a little parallelism
+    var db1 = conn.getDB(ourdb);
+    var db2 = new Mongo(db1.getMongo().host).getDB(ourdb);
+
+    print("closeall.js run test");
+
+    for( var i = 0; i < N; i++ ) { 
+    	db1.foo.insert({x:1}); // this does wait for a return code so we will get some parallelism
+	    if( i % 7 == 0 )
+	        db1.foo.insert({x:99, y:2});
+	    if( i %     49 == 0 )
+	        db1.foo.update({ x: 99 }, { a: 1, b: 2, c: 3, d: 4 });
+	    if (i % 100 == 0)
+	        db1.foo.find();
+	    if( i == 800 )
+	        db1.foo.ensureIndex({ x: 1 });
+        var res = null;
+        try {
+	    if( variant == 1 )
+		sleep(0);
+	    else if( variant == 2 ) 
+		sleep(1);
+	    else if( variant == 3 && i % 10 == 0 )
+		print(i);
+            res = db2.adminCommand("closeAllDatabases");
+        }
+        catch (e) {
+            sleep(5000); // sleeping a little makes console output order prettier
+            print("\n\n\nFAIL closeall.js closeAllDatabases command invocation threw an exception. i:" + i);
+            try {
+                print("getlasterror:");
+                printjson(db2.getLastErrorObj());
+                print("trying one more closealldatabases:");
+                res = db2.adminCommand("closeAllDatabases");
+                printjson(res);
+            }
+            catch (e) {
+                print("got another exception : " + e);
+            }
+            print("\n\n\n");
+            // sleep a little to capture possible mongod output?
+            sleep(2000);
+            throw e;
+        }
+	    assert( res.ok, "closeAllDatabases res.ok=false");
+	}
+
+	print("closeall.js end test loop.  slave.foo.count:");
+	print(slave.foo.count());
+
+	print("closeall.js shutting down servers");
+	stopMongod(30002);
+	stopMongod(30001);
+}
+
+f();
+sleep(500);
+print("SUCCESS closeall.js");
diff --git a/jstests/dur/diskfull.js b/jstests/dur/diskfull.js
new file mode 100644
index 0000000..da45c20
--- /dev/null
+++ b/jstests/dur/diskfull.js
@@ -0,0 +1,136 @@
+/** Test running out of disk space with durability enabled */
+
+startPath = "/data/db/diskfulltest";
+recoverPath = "/data/db/dur_diskfull";
+
+doIt = false;
+files = listFiles( "/data/db" );
+for ( i in files ) {
+    if ( files[ i ].name == startPath ) {
+        doIt = true;
+    }
+}
+
+if ( !doIt ) {
+    print( "path " + startPath + " missing, skipping diskfull test" );
+    doIt = false;
+}
+
+function checkNoJournalFiles(path, pass) {
+    var files = listFiles(path);
+    if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) {
+        if (pass == null) {
+            // wait a bit longer for mongod to potentially finish if it is still running.
+            sleep(10000);
+            return checkNoJournalFiles(path, 1);
+        }
+        print("\n\n\n");
+        print("FAIL path:" + path);
+        print("unexpected files:");
+        printjson(files);
+        assert(false, "FAIL a journal/lsn file is present which is unexpected");
+    }
+}
+
+/** Clear dbpath without removing and recreating diskfulltest directory, as resetDbpath does */
+function clear() {
+    files = listFiles( startPath );
+    files.forEach( function( x ) { removeFile( x.name ) } );
+}
+
+function log(str) {
+    print();
+    if(str)
+        print(testname+" step " + step++ + " " + str);
+    else
+        print(testname+" step " + step++);
+}
+
+function work() {
+    log("work");
+    try {
+        var d = conn.getDB("test");
+        
+        big = new Array( 5000 ).toString();
+        for( i = 0; i < 10000; ++i ) {
+            d.foo.insert( { _id:i, b:big } );
+        }
+        
+        d.getLastError();
+    } catch ( e ) {
+        print( e );
+        raise( e );
+    } finally {
+        log("endwork");
+    }
+}
+
+function verify() { 
+    log("verify");
+    var d = conn.getDB("test");
+    c = d.foo.count();
+    v = d.foo.validate();
+    // not much we can guarantee about the writes, just validate when possible
+    if ( c != 0 && !v.valid ) {
+        printjson( v );
+        print( c );
+        assert( v.valid );
+        assert.gt( c, 0 );
+    }
+}
+
+function runFirstMongodAndFillDisk() {
+    log();
+    
+    clear();
+    conn = startMongodNoReset("--port", 30001, "--dbpath", startPath, "--dur", "--smallfiles", "--durOptions", 8, "--noprealloc");
+    
+    assert.throws( work, null, "no exception thrown when exceeding disk capacity" );
+    waitMongoProgramOnPort( 30001 );
+    
+    // the above wait doesn't work on windows
+    sleep(5000);    
+}
+
+function runSecondMongdAndRecover() {
+    // restart and recover
+    log();
+    conn = startMongodNoReset("--port", 30003, "--dbpath", startPath, "--dur", "--smallfiles", "--durOptions", 8, "--noprealloc");
+    verify();
+    
+    log("stop");
+    stopMongod(30003);
+    
+    // stopMongod seems to be asynchronous (hmmm) so we sleep here.
+    sleep(5000);
+    
+    // at this point, after clean shutdown, there should be no journal files
+    log("check no journal files");
+    checkNoJournalFiles(startPath + "/journal/");
+    
+    log();    
+}
+
+function someWritesInJournal() {
+    runFirstMongodAndFillDisk();
+    runSecondMongdAndRecover();
+}
+
+function noWritesInJournal() {
+    // It is too difficult to consistently trigger cases where there are no existing journal files due to lack of disk space, but
+    // if we were to test this case we would need to manualy remove the lock file.
+//    removeFile( startPath + "/mongod.lock" );
+}
+
+if ( doIt ) {
+
+    var testname = "dur_diskfull";
+    var step = 1;
+    var conn = null;
+ 
+    someWritesInJournal();
+    noWritesInJournal();
+    
+    print(testname + " SUCCESS");    
+
+}
\ No newline at end of file
diff --git a/jstests/dur/dropdb.js b/jstests/dur/dropdb.js
new file mode 100644
index 0000000..7f82cd7
--- /dev/null
+++ b/jstests/dur/dropdb.js
@@ -0,0 +1,163 @@
+/* durability test dropping a database
+*/
+
+var debugging = false;
+var testname = "dropdb";
+var step = 1;
+var conn = null;
+
+function checkNoJournalFiles(path, pass) {
+    var files = listFiles(path);
+    if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) {
+        if (pass == null) {
+            // wait a bit longer for mongod to potentially finish if it is still running.
+            sleep(10000);
+            return checkNoJournalFiles(path, 1);
+        }
+        print("\n\n\n");
+        print("FAIL path:" + path);
+        print("unexpected files:");
+        printjson(files);
+        assert(false, "FAIL a journal/lsn file is present which is unexpected");
+    }
+}
+
+function runDiff(a, b) {
+    function reSlash(s) {
+        var x = s;
+        if (_isWindows()) {
+            while (1) {
+                var y = x.replace('/', '\\');
+                if (y == x)
+                    break;
+                x = y;
+            }
+        }
+        return x;
+    }
+    a = reSlash(a);
+    b = reSlash(b);
+    print("diff " + a + " " + b);
+    return run("diff", a, b);
+}
+
+function log(str) {
+    if (str)
+        print("\n" + testname + " step " + step++ + " " + str);
+    else
+        print("\n" + testname + " step " + step++);
+}
+
+// if you do inserts here, you will want to set _id.  otherwise they won't match on different 
+// runs so we can't do a binary diff of the resulting files to check they are consistent.
+function work() {
+    log("work (add data, drop database)");
+
+    var e = conn.getDB("teste");
+    e.foo.insert({ _id: 99 });
+
+    var d = conn.getDB("test");
+    d.foo.insert({ _id: 3, x: 22 });
+    d.bar.insert({ _id: 3, x: 22 });
+
+    d.dropDatabase();
+
+    d.foo.insert({ _id: 100 });
+
+    // assure writes applied in case we kill -9 on return from this function
+    assert(d.runCommand({ getlasterror: 1, fsync: 1 }).ok, "getlasterror not ok");
+}
+
+function verify() {
+    log("verify");
+    var d = conn.getDB("test");
+    var count = d.foo.count();
+    if (count != 1) {
+	print("going to fail, count mismatch in verify()");
+        sleep(10000); // easier to read the output this way
+        print("\n\n\ndropdb.js FAIL test.foo.count() should be 1 but is : " + count);
+	print(d.foo.count() + "\n\n\n");
+        assert(false);
+    }
+    assert(d.foo.findOne()._id == 100, "100");
+
+    print("dropdb.js teste.foo.findOne:");
+    printjson(conn.getDB("teste").foo.findOne());
+
+    var teste = conn.getDB("teste");
+    print("dropdb count " + teste.foo.count());
+    assert(teste.foo.findOne()._id == 99, "teste");
+
+}
+
+if (debugging) {
+    // mongod already running in debugger
+    conn = db.getMongo();
+    work();
+    verify();
+    sleep(30000);
+    quit();
+}
+
+// directories
+var path1 = "/data/db/" + testname + "nodur";
+var path2 = "/data/db/" + testname + "dur";
+
+// non-durable version
+log("mongod nodur");
+conn = startMongodEmpty("--port", 30000, "--dbpath", path1, "--nodur", "--smallfiles");
+work();
+verify();
+stopMongod(30000);
+
+// durable version
+log("mongod dur");
+conn = startMongodEmpty("--port", 30001, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 8);
+work();
+verify();
+
+// kill the process hard
+log("kill 9");
+stopMongod(30001, /*signal*/9);
+
+// journal file should be present, and non-empty as we killed hard
+
+// we will force removal of a datafile to be sure we can recreate everything.
+removeFile(path2 + "/test.0");
+// the trick above is only valid if journals haven't rotated out, and also if lsn isn't skipping
+removeFile(path2 + "/lsn");
+
+log("restart and recover");
+conn = startMongodNoReset("--port", 30002, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 9);
+
+log("verify after recovery");
+verify();
+
+log("stop mongod 30002");
+stopMongod(30002);
+sleep(5000);
+
+// at this point, after clean shutdown, there should be no journal files
+log("check no journal files");
+checkNoJournalFiles(path2 + "/journal");
+
+log("check data matches ns");
+var diff = runDiff(path1 + "/test.ns", path2 + "/test.ns");
+if (diff != "") {
+    print("\n\n\nDIFFERS\n");
+    print(diff);
+}
+assert(diff == "", "error test.ns files differ");
+
+log("check data matches .0");
+diff = runDiff(path1 + "/test.0", path2 + "/test.0");
+if (diff != "") {
+    print("\n\n\nDIFFERS\n");
+    print(diff);
+}
+assert(diff == "", "error test.0 files differ");
+
+log("check data matches done");
+
+print(testname + " SUCCESS");
+
diff --git a/jstests/dur/dur1.js b/jstests/dur/dur1.js
new file mode 100755
index 0000000..4c8f1bf
--- /dev/null
+++ b/jstests/dur/dur1.js
@@ -0,0 +1,154 @@
+/* 
+   test durability
+*/
+
+var debugging = false;
+var testname = "dur1";
+var step = 1;
+var conn = null;
+
+function checkNoJournalFiles(path, pass) {
+    var files = listFiles(path);
+    if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) {
+        if (pass == null) {
+            // wait a bit longer for mongod to potentially finish if it is still running.
+            sleep(10000);
+            return checkNoJournalFiles(path, 1);
+        }
+        print("\n\n\n");
+        print("FAIL path:" + path);
+        print("unexpected files:");
+        printjson(files);
+        assert(false, "FAIL a journal/lsn file is present which is unexpected");
+    }
+}
+
+function runDiff(a, b) {
+    function reSlash(s) {
+        var x = s;
+        if (_isWindows()) {
+            while (1) {
+                var y = x.replace('/', '\\');
+                if (y == x)
+                    break;
+                x = y;
+            }
+        }
+        return x;
+    }
+    a = reSlash(a);
+    b = reSlash(b);
+    print("diff " + a + " " + b);
+    return run("diff", a, b);
+}
+
+function log(str) {
+    print();
+    if(str)
+        print(testname+" step " + step++ + " " + str);
+    else
+        print(testname+" step " + step++);
+}
+
+// if you do inserts here, you will want to set _id.  otherwise they won't match on different 
+// runs so we can't do a binary diff of the resulting files to check they are consistent.
+function work() {
+    log("work");
+    var d = conn.getDB("test");
+    d.foo.insert({ _id: 3, x: 22 });
+    d.foo.insert({ _id: 4, x: 22 });
+    d.a.insert({ _id: 3, x: 22, y: [1, 2, 3] });
+    d.a.insert({ _id: 4, x: 22, y: [1, 2, 3] });
+    d.a.update({ _id: 4 }, { $inc: { x: 1} });
+
+    // try building an index.  however, be careful as object id's in system.indexes would vary, so we do it manually:
+    d.system.indexes.insert({ _id: 99, ns: "test.a", key: { x: 1 }, name: "x_1", v: 0 });
+
+//    d.a.update({ _id: 4 }, { $inc: { x: 1} });
+//    d.a.reIndex();
+
+    // assure writes applied in case we kill -9 on return from this function
+    d.getLastError();
+
+    log("endwork");
+    return d;
+}
+
+function verify() { 
+    log("verify");
+    var d = conn.getDB("test");
+    var ct = d.foo.count();
+    if (ct != 2) {
+        print("\n\n\nFAIL dur1.js count is wrong in verify(): " + ct + "\n\n\n");
+        assert(ct == 2);
+    }
+}
+
+if( debugging ) { 
+    // mongod already running in debugger
+    conn = db.getMongo();
+    work();
+    sleep(30000);
+    quit();
+}
+
+log();
+
+// directories
+var path1 = "/data/db/" + testname+"nodur";
+var path2 = "/data/db/" + testname+"dur";
+
+// non-durable version
+log();
+conn = startMongodEmpty("--port", 30000, "--dbpath", path1, "--nodur", "--smallfiles");
+work();
+stopMongod(30000);
+
+// durable version
+log();
+conn = startMongodEmpty("--port", 30001, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 8);
+work();
+
+// wait for group commit.
+printjson(conn.getDB('admin').runCommand({getlasterror:1, fsync:1}));
+
+// kill the process hard
+stopMongod(30001, /*signal*/9);
+
+// journal file should be present, and non-empty as we killed hard
+
+// restart and recover
+log();
+conn = startMongodNoReset("--port", 30002, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 8);
+verify();
+
+log("stop");
+stopMongod(30002);
+
+// stopMongod seems to be asynchronous (hmmm) so we sleep here.
+sleep(5000);
+
+// at this point, after clean shutdown, there should be no journal files
+log("check no journal files");
+checkNoJournalFiles(path2 + "/journal");
+
+log("check data matches ns");
+var diff = runDiff(path1 + "/test.ns", path2 + "/test.ns");
+if (diff != "") {
+    print("\n\n\nDIFFERS\n");
+    print(diff);
+}
+assert(diff == "", "error test.ns files differ");
+
+log("check data matches .0");
+var diff = runDiff(path1 + "/test.0", path2 + "/test.0");
+if (diff != "") {
+    print("\n\n\nDIFFERS\n");
+    print(diff);
+}
+assert(diff == "", "error test.0 files differ");
+
+log("check data matches done");
+
+print(testname + " SUCCESS");
+
diff --git a/jstests/dur/dur2.js b/jstests/dur/dur2.js
new file mode 100644
index 0000000..dd0ab0f
--- /dev/null
+++ b/jstests/dur/dur2.js
@@ -0,0 +1,92 @@
+/* test durability
+   runs mongod, kill -9's, recovers
+*/
+
+var debugging = false;
+var testname = "dur2";
+var step = 1;
+var conn = null;
+
+var start = new Date();
+function howLongSecs() {
+    return (new Date() - start) / 1000;
+}
+
+function log(str) {
+    if(str)
+        print("\n" + testname+" step " + step++ + " " + str);
+    else
+        print(testname+" step " + step++);
+}
+
+function verify() {
+    log("verify");
+    var d = conn.getDB("test");
+    var mycount = d.foo.count();
+    //print("count:" + mycount);
+    assert(mycount>2, "count wrong");
+}
+
+function work() {
+    log("work");
+    x = 'x'; while(x.length < 1024) x+=x;
+    var d = conn.getDB("test");
+    d.foo.drop();
+    d.foo.insert({});
+
+    // go long enough we will have time to kill it later during recovery
+    var j = 2;
+    var MaxTime = 15;
+    if (Math.random() < 0.1) {
+        print("dur2.js DOING A LONGER (120 sec) PASS - if an error, try long pass to replicate");
+        MaxTime = 120;
+    }
+    while (1) {
+        d.foo.insert({ _id: j, z: x });
+        d.foo.update({ _id: j }, { $inc: { a: 1} });
+        if (j % 25 == 0)
+            d.foo.remove({ _id: j });
+        j++;
+        if( j % 3 == 0 )
+            d.foo.update({ _id: j }, { $inc: { a: 1} }, true);
+        if (j % 10000 == 0)
+            print(j);
+        if (howLongSecs() > MaxTime)
+            break;
+    }
+
+    verify();
+    d.runCommand({ getLastError: 1, fsync: 1 });
+}
+
+if( debugging ) {
+    // mongod already running in debugger
+    print("DOING DEBUG MODE BEHAVIOR AS 'db' IS DEFINED -- RUN mongo --nodb FOR REGULAR TEST BEHAVIOR");
+    conn = db.getMongo();
+    work();
+    sleep(30000);
+    quit();
+}
+
+// directories
+var path = "/data/db/" + testname+"dur";
+
+log("run mongod with --dur");
+conn = startMongodEmpty("--port", 30001, "--dbpath", path, "--dur", "--smallfiles", "--durOptions", /*DurParanoid*/8, "--master", "--oplogSize", 64);
+work();
+
+log("kill -9");
+stopMongod(30001, /*signal*/9);
+
+// journal file should be present, and non-empty as we killed hard
+assert(listFiles(path + "/journal/").length > 0, "journal directory is unexpectantly empty after kill");
+
+// restart and recover
+log("restart mongod and recover");
+conn = startMongodNoReset("--port", 30002, "--dbpath", path, "--dur", "--smallfiles", "--durOptions", 8, "--master", "--oplogSize", 64);
+verify();
+
+log("stopping mongod 30002");
+stopMongod(30002);
+
+print(testname + " SUCCESS");
diff --git a/jstests/dur/lsn.js b/jstests/dur/lsn.js
new file mode 100755
index 0000000..505d8f5
--- /dev/null
+++ b/jstests/dur/lsn.js
@@ -0,0 +1,126 @@
+/* test durability, specifically last sequence number function
+   runs mongod, kill -9's, recovers
+   then writes more data and verifies with DurParanoid that it matches
+*/
+
+var debugging = false;
+var testname = "lsn";
+var step = 1;
+var conn = null;
+
+var start = new Date();
+function howLongSecs() {
+    return (new Date() - start) / 1000;
+}
+
+function log(str) {
+    if(str)
+        print("\n" + testname+" step " + step++ + " " + str);
+    else
+        print(testname+" step " + step++);
+}
+
+function verify() {
+    log("verify");
+    var d = conn.getDB("test");
+    var mycount = d.foo.count();
+    print("count:" + mycount);
+    assert(mycount>2, "count wrong");
+}
+
+// if you do inserts here, you will want to set _id.  otherwise they won't match on different 
+// runs so we can't do a binary diff of the resulting files to check they are consistent.
+function work() {
+    log("work");
+    x = 'x'; while(x.length < 1024) x+=x;
+    var d = conn.getDB("test");
+    d.foo.drop();
+    d.foo.insert({});
+
+    // go long enough we will have time to kill it later during recovery
+    var j = 2;
+    var MaxTime = 15;
+    if (Math.random() < 0.05) {
+        print("doing a longer pass");
+        MaxTime = 90;
+    }
+    while (1) {
+        d.foo.insert({ _id: j, z: x });
+        d.foo.update({ _id: j }, { $inc: { a: 1} });
+        if (j % 25 == 0)
+            d.foo.remove({ _id: j });
+        j++;
+        if( j % 3 == 0 )
+            d.foo.update({ _id: j }, { $inc: { a: 1} }, true);
+        if (j % 10000 == 0)
+            print(j);
+        if (howLongSecs() > MaxTime)
+            break;
+    }
+
+    verify();
+    d.runCommand({ getLastError: 1, fsync: 1 });
+}
+
+if( debugging ) {
+    // mongod already running in debugger
+    print("DOING DEBUG MODE BEHAVIOR AS 'db' IS DEFINED -- RUN mongo --nodb FOR REGULAR TEST BEHAVIOR");
+    conn = db.getMongo();
+    work();
+    sleep(30000);
+    quit();
+}
+
+// directories
+var path2 = "/data/db/" + testname+"dur";
+
+// run mongod with a short --syncdelay to make LSN writing sooner
+log("run mongod --dur and a short --syncdelay");
+conn = startMongodEmpty("--syncdelay", 2, "--port", 30001, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", /*DurParanoid*/8, "--master", "--oplogSize", 64);
+work();
+
+log("wait a while for a sync and an lsn write");
+sleep(14); // wait for lsn write
+
+log("kill mongod -9");
+stopMongod(30001, /*signal*/9);
+
+// journal file should be present, and non-empty as we killed hard
+
+// check that there is an lsn file
+{
+    var files = listFiles(path2 + "/journal/");
+    assert(files.some(function (f) { return f.name.indexOf("lsn") >= 0; }),
+           "lsn.js FAIL no lsn file found after kill, yet one is expected");
+}
+/*assert.soon(
+    function () {
+        var files = listFiles(path2 + "/journal/");
+        return files.some(function (f) { return f.name.indexOf("lsn") >= 0; });
+    },
+    "lsn.js FAIL no lsn file found after kill, yet one is expected"
+);*/
+
+// restart and recover
+log("restart mongod, recover, verify");
+conn = startMongodNoReset("--port", 30002, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 24, "--master", "--oplogSize", 64);
+verify();
+
+// idea here is to verify (in a simplistic way) that we are in a good state to do further ops after recovery
+log("add data after recovery");
+{
+    var d = conn.getDB("test");
+    d.xyz.insert({ x: 1 });
+    d.xyz.insert({ x: 1 });
+    d.xyz.insert({ x: 1 });
+    d.xyz.update({}, { $set: { x: "aaaaaaaaaaaa"} });
+    d.xyz.reIndex();
+    d.xyz.drop();
+    sleep(1);
+    d.xyz.insert({ x: 1 });
+}
+
+log("stop mongod 30002");
+stopMongod(30002);
+
+print(testname + " SUCCESS");
diff --git a/jstests/dur/manyRestart.js b/jstests/dur/manyRestart.js
new file mode 100755
index 0000000..04e4318
--- /dev/null
+++ b/jstests/dur/manyRestart.js
@@ -0,0 +1,191 @@
+/* 
+   test durability
+*/
+
+var debugging = false;
+var testname = "manyRestarts";
+var step = 1;
+var conn = null;
+
+function checkNoJournalFiles(path, pass) {
+    var files = listFiles(path);
+    if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) {
+        if (pass == null) {
+            // wait a bit longer for mongod to potentially finish if it is still running.
+            sleep(10000);
+            return checkNoJournalFiles(path, 1);
+        }
+        print("\n\n\n");
+        print("FAIL path:" + path);
+        print("unexpected files:");
+        printjson(files);
+        assert(false, "FAIL a journal/lsn file is present which is unexpected");
+    }
+}
+
+function runDiff(a, b) {
+    function reSlash(s) {
+        var x = s;
+        if (_isWindows()) {
+            while (1) {
+                var y = x.replace('/', '\\');
+                if (y == x)
+                    break;
+                x = y;
+            }
+        }
+        return x;
+    }
+    a = reSlash(a);
+    b = reSlash(b);
+    print("diff " + a + " " + b);
+    return run("diff", a, b);
+}
+
+function log(str) {
+    print();
+    if(str)
+        print(testname+" step " + step++ + " " + str);
+    else
+        print(testname+" step " + step++);
+}
+
+// if you do inserts here, you will want to set _id.  otherwise they won't match on different 
+// runs so we can't do a binary diff of the resulting files to check they are consistent.
+function work() {
+    log("work");
+    var d = conn.getDB("test");
+    d.foo.insert({ _id: 3, x: 22 });
+    d.foo.insert({ _id: 4, x: 22 });
+    d.a.insert({ _id: 3, x: 22, y: [1, 2, 3] });
+    d.a.insert({ _id: 4, x: 22, y: [1, 2, 3] });
+    d.a.update({ _id: 4 }, { $inc: { x: 1} });
+
+    // try building an index.  however, be careful as object id's in system.indexes would vary, so we do it manually:
+    d.system.indexes.insert({ _id: 99, ns: "test.a", key: { x: 1 }, name: "x_1", v: 0 });
+
+//    d.a.update({ _id: 4 }, { $inc: { x: 1} });
+//    d.a.reIndex();
+
+    // assure writes applied in case we kill -9 on return from this function
+    d.getLastError();
+    log("endwork");
+    return d;
+}
+
+function addRows() {
+    var rand = Random.randInt(10000);
+    log("add rows " + rand);
+    var d = conn.getDB("test");
+    for (var j = 0; j < rand; ++j) {
+        d.rows.insert({a:1, b: "blah"});
+    }
+    return rand;
+}
+
+function verify() { 
+    log("verify");
+    var d = conn.getDB("test");
+    assert.eq(d.foo.count(), 2, "collection count is wrong");
+    assert.eq(d.a.count(), 2, "collection count is wrong");
+}
+
+function verifyRows(nrows) {
+    log("verify rows " + nrows);
+    var d = conn.getDB("test");
+    assert.eq(d.rows.count(), nrows, "collection count is wrong");
+}
+
+if( debugging ) { 
+    // mongod already running in debugger
+    conn = db.getMongo();
+    work();
+    sleep(30000);
+    quit();
+}
+
+log();
+
+// directories
+var path1 = "/data/db/" + testname+"nodur";
+var path2 = "/data/db/" + testname+"dur";
+
+// non-durable version
+log("starting 30000");
+conn = startMongodEmpty("--port", 30000, "--dbpath", path1, "--nodur", "--smallfiles");
+work();
+stopMongod(30000);
+
+log("starting 30001");
+conn = startMongodEmpty("--port", 30001, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 8);
+work();
+// wait for group commit.
+printjson(conn.getDB('admin').runCommand({getlasterror:1, fsync:1}));
+
+stopMongod(30001);
+sleep(5000);
+
+for (var i = 0; i < 3; ++i) {
+
+    // durable version
+    log("restarting 30001");
+    conn = startMongodNoReset("--port", 30001, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 8);
+
+    // wait for group commit.
+    printjson(conn.getDB('admin').runCommand({getlasterror:1, fsync:1}));
+    
+    verify();
+    
+    // kill the process hard
+    log("hard kill");
+    stopMongod(30001, /*signal*/9);
+    
+    sleep(5000);
+}
+
+// journal file should be present, and non-empty as we killed hard
+
+// restart and recover
+log("restart");
+conn = startMongodNoReset("--port", 30002, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 8);
+log("verify");
+verify();
+log("stop");
+stopMongod(30002);
+sleep(5000);
+
+// at this point, after clean shutdown, there should be no journal files
+log("check no journal files");
+checkNoJournalFiles(path2 + "/journal");
+
+log("check data matches ns");
+var diff = runDiff(path1 + "/test.ns", path2 + "/test.ns");
+assert(diff == "", "error test.ns files differ");
+
+log("check data matches .0");
+var diff = runDiff(path1 + "/test.0", path2 + "/test.0");
+assert(diff == "", "error test.0 files differ");
+
+log("check data matches done");
+
+var nrows = 0;
+for (var i = 0; i < 5; ++i) {
+
+    // durable version
+    log("restarting 30001");
+    conn = startMongodNoReset("--port", 30001, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 8);
+    nrows += addRows();
+    // wait for group commit.
+    printjson(conn.getDB('admin').runCommand({getlasterror:1, fsync:1}));
+    
+    verifyRows(nrows);
+    
+    // kill the process hard
+    log("hard kill");
+    stopMongod(30001, /*signal*/9);
+    
+    sleep(5000);
+}
+
+print(testname + " SUCCESS");
+
diff --git a/jstests/dur/md5.js b/jstests/dur/md5.js
new file mode 100644
index 0000000..107476e
--- /dev/null
+++ b/jstests/dur/md5.js
@@ -0,0 +1,101 @@
+/**
+ * Test md5 validation of journal file.
+ * This test is dependent on the journal file format and may require an update if the format changes,
+ * see comments near fuzzFile() below.
+ */
+
+var debugging = false;
+var testname = "dur_md5";
+var step = 1;
+var conn = null;
+
+function log(str) {
+    print();
+    if(str)
+        print(testname+" step " + step++ + " " + str);
+    else
+        print(testname+" step " + step++);
+}
+
+/** Changes here may require updating the byte index of the md5 hash, see File comments below. */
+function work() {
+    log("work");
+    var d = conn.getDB("test");
+    d.foo.insert({ _id: 3, x: 22 });
+    d.foo.insert({ _id: 4, x: 22 });
+    d.a.insert({ _id: 3, x: 22, y: [1, 2, 3] });
+    d.a.insert({ _id: 4, x: 22, y: [1, 2, 3] });
+    d.a.update({ _id: 4 }, { $inc: { x: 1} });
+    
+    // try building an index.  however, be careful as object id's in system.indexes would vary, so we do it manually:
+    d.system.indexes.insert({ _id: 99, ns: "test.a", key: { x: 1 }, name: "x_1", v: 0 });
+    
+    //    d.a.update({ _id: 4 }, { $inc: { x: 1} });
+    //    d.a.reIndex();
+    
+    // assure writes applied in case we kill -9 on return from this function
+    d.getLastError(); 
+    
+    log("endwork");
+}
+
+if( debugging ) { 
+    // mongod already running in debugger
+    conn = db.getMongo();
+    work();
+    sleep(30000);
+    quit();
+}
+
+log();
+
+var path = "/data/db/" + testname+"dur";
+
+log();
+conn = startMongodEmpty("--port", 30001, "--dbpath", path, "--dur", "--smallfiles", "--durOptions", 8);
+work();
+
+// wait for group commit.
+printjson(conn.getDB('admin').runCommand({getlasterror:1, fsync:1}));
+
+log("kill -9");
+
+// kill the process hard
+stopMongod(30001, /*signal*/9);
+
+// journal file should be present, and non-empty as we killed hard
+
+// Bit flip the first byte of the md5sum contained within the opcode footer.
+// This ensures we get an md5 exception instead of some other type of exception.
+var file = path + "/journal/j._0";
+
+// if test fails, uncomment these "cp" lines to debug:
+// run("cp", file, "/tmp/before");
+
+// journal header is 8192
+// jsectheader is 20
+// so a little beyond that
+fuzzFile(file, 8214+8);
+
+// run("cp", file, "/tmp/after");
+
+log("run mongod again recovery should fail");
+
+// 100 exit code corresponds to EXIT_UNCAUGHT, which is triggered when there is an exception during recovery.
+// 14 is is sometimes triggered instead due to SERVER-2184
+exitCode = runMongoProgram( "mongod", "--port", 30002, "--dbpath", path, "--dur", "--smallfiles", "--durOptions", /*9*/13 );
+
+if (exitCode != 100 && exitCode != 14) {
+    print("\n\n\nFAIL md5.js expected mongod to fail but didn't? mongod exitCode: " + exitCode + "\n\n\n");
+    // sleep a little longer to get more output maybe
+    sleep(2000);
+    assert(false);
+}
+
+// TODO Possibly we could check the mongod log to verify that the correct type of exception was thrown.  But
+// that would introduce a dependency on the mongod log format, which we may not want.
+
+print("SUCCESS md5.js");
+
+// if we sleep a littler here we may get more out the mongod output logged
+sleep(500);
diff --git a/jstests/dur/oplog.js b/jstests/dur/oplog.js
new file mode 100755
index 0000000..379c1b6
--- /dev/null
+++ b/jstests/dur/oplog.js
@@ -0,0 +1,159 @@
+/* oplog.js */
+
+var debugging = false;
+var testname = "oplog";
+var step = 1;
+var conn = null;
+
+function checkNoJournalFiles(path, pass) {
+    var files = listFiles(path);
+    if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) {
+        if (pass == null) {
+            // wait a bit longer for mongod to potentially finish if it is still running.
+            sleep(10000);
+            return checkNoJournalFiles(path, 1);
+        }
+        print("\n\n\n");
+        print("FAIL path:" + path);
+        print("unexpected files:");
+        printjson(files);
+        assert(false, "FAIL a journal/lsn file is present which is unexpected");
+    }
+}
+
+function runDiff(a, b) {
+    function reSlash(s) {
+        var x = s;
+        if (_isWindows()) {
+            while (1) {
+                var y = x.replace('/', '\\');
+                if (y == x)
+                    break;
+                x = y;
+            }
+        }
+        return x;
+    }
+    a = reSlash(a);
+    b = reSlash(b);
+    print("diff " + a + " " + b);
+    return runProgram("diff", a, b);
+}
+
+function log(str) {
+    print();
+    if(str)
+        print(testname+" step " + step++ + " " + str);
+    else
+        print(testname+" step " + step++);
+}
+
+function verify() {
+    log("verify");
+    var d = conn.getDB("local");
+    var mycount = d.oplog.$main.find({ "o.z": 3 }).count();
+    print(mycount);
+    assert(mycount == 3, "oplog doesnt match");
+}
+
+// if you do inserts here, you will want to set _id.  otherwise they won't match on different 
+// runs so we can't do a binary diff of the resulting files to check they are consistent.
+function work() {
+    log("work");
+    var d = conn.getDB("test");
+    var q = conn.getDB("testq"); // use tewo db's to exercise JDbContext a bit.
+    d.foo.insert({ _id: 3, x: 22 });
+    d.foo.insert({ _id: 4, x: 22 });
+    q.foo.insert({ _id: 4, x: 22 });
+    d.a.insert({ _id: 3, x: 22, y: [1, 2, 3] });
+    q.a.insert({ _id: 3, x: 22, y: [1, 2, 3] });
+    d.a.insert({ _id: 4, x: 22, y: [1, 2, 3] });
+    d.a.update({ _id: 4 }, { $inc: { x: 1} });
+    // OpCode_ObjCopy fires on larger operations so make one that isn't tiny
+    var big = "axxxxxxxxxxxxxxb";
+    big = big + big;
+    big = big + big;
+    big = big + big;
+    big = big + big;
+    big = big + big;
+    d.foo.insert({ _id: 5, q: "aaaaa", b: big, z: 3 });
+    q.foo.insert({ _id: 5, q: "aaaaa", b: big, z: 3 });
+    d.foo.insert({ _id: 6, q: "aaaaa", b: big, z: 3 });
+    d.foo.update({ _id: 5 }, { $set: { z: 99} });
+
+    // assure writes applied in case we kill -9 on return from this function
+    d.getLastError();
+
+    log("endwork");
+
+    verify();
+}
+
+if( debugging ) {
+    // mongod already running in debugger
+    print("DOING DEBUG MODE BEHAVIOR AS 'db' IS DEFINED -- RUN mongo --nodb FOR REGULAR TEST BEHAVIOR");
+    conn = db.getMongo();
+    work();
+    sleep(30000);
+    quit();
+}
+
+log();
+
+// directories
+var path1 = "/data/db/" + testname+"nodur";
+var path2 = "/data/db/" + testname+"dur";
+
+// non-durable version
+log();
+conn = startMongodEmpty("--port", 30000, "--dbpath", path1, "--nodur", "--smallfiles", "--master", "--oplogSize", 64);
+work();
+stopMongod(30000);
+
+// durable version
+log();
+conn = startMongodEmpty("--port", 30001, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", /*DurParanoid*/8, "--master", "--oplogSize", 64);
+work();
+
+// wait for group commit.
+printjson(conn.getDB('admin').runCommand({getlasterror:1, fsync:1}));
+
+// kill the process hard
+stopMongod(30001, /*signal*/9);
+
+// journal file should be present, and non-empty as we killed hard
+
+// restart and recover
+log();
+conn = startMongodNoReset("--port", 30002, "--dbpath", path2, "--dur", "--smallfiles", "--durOptions", 8, "--master", "--oplogSize", 64);
+verify();
+
+log("stop");
+stopMongod(30002);
+
+// stopMongod seems to be asynchronous (hmmm) so we sleep here.
+sleep(5000);
+
+// at this point, after clean shutdown, there should be no journal files
+log("check no journal files");
+checkNoJournalFiles(path2 + "/journal");
+
+log("check data matches ns");
+var diff = runDiff(path1 + "/test.ns", path2 + "/test.ns");
+if (diff != "") {
+    print("\n\n\nDIFFERS\n");
+    print(diff);
+}
+assert(diff == "", "error test.ns files differ");
+
+log("check data matches .0");
+diff = runDiff(path1 + "/test.0", path2 + "/test.0");
+if (diff != "") {
+    print("\n\n\nDIFFERS\n");
+    print(diff);
+}
+assert(diff == "", "error test.0 files differ");
+
+log("check data matches done");
+
+print(testname + " SUCCESS");
diff --git a/jstests/error5.js b/jstests/error5.js
index ed8d922..5884d20 100644
--- a/jstests/error5.js
+++ b/jstests/error5.js
@@ -2,7 +2,7 @@
 t = db.error5
 t.drop();
 
-assert.throws( function(){ t.save( 4 ); } , "A" );
+assert.throws( function(){ t.save( 4 ); printjson( t.findOne() ) } , null , "A" );
 t.save( { a : 1 } )
 assert.eq( 1 , t.count() , "B" );
 
diff --git a/jstests/eval_nolock.js b/jstests/eval_nolock.js
new file mode 100644
index 0000000..2688ec5
--- /dev/null
+++ b/jstests/eval_nolock.js
@@ -0,0 +1,16 @@
+
+t = db.eval_nolock
+t.drop();
+
+for ( i=0; i<10; i++ )
+    t.insert( { _id : i } );
+
+res = db.runCommand( { eval : 
+              function(){
+                  db.eval_nolock.insert( { _id : 123 } );
+                  return db.eval_nolock.count();
+              }
+              , nlock : true } );
+    
+assert.eq( 11 , res.retval , "A" )
+
diff --git a/jstests/evalc.js b/jstests/evalc.js
index 59c9467..8a9e889 100644
--- a/jstests/evalc.js
+++ b/jstests/evalc.js
@@ -7,20 +7,6 @@ for( i = 0; i < 10; ++i ) {
 
 // SERVER-1610
 
-function op() {
-    uri = db.runCommand( "whatsmyuri" ).you;
-    printjson( uri );
-    p = db.currentOp().inprog;
-    for ( var i in p ) {
-        var o = p[ i ];
-        if ( o.client == uri ) {
-            print( "found it" );
-            return o.opid;
-        }
-    }
-    return -1;
-}
-
 s = startParallelShell( "print( 'starting forked:' + Date() ); for ( i=0; i<500000; i++ ){ db.currentOp(); } print( 'ending forked:' + Date() ); " )
 
 print( "starting eval: " + Date() )
diff --git a/jstests/evald.js b/jstests/evald.js
new file mode 100644
index 0000000..78cabb6
--- /dev/null
+++ b/jstests/evald.js
@@ -0,0 +1,68 @@
+t = db.jstests_evald;
+t.drop();
+
+function debug( x ) {
+//    printjson( x );
+}
+
+for( i = 0; i < 10; ++i ) {
+    t.save( {i:i} );
+}
+db.getLastError();
+
+function op( ev, where ) {
+    p = db.currentOp().inprog;
+    debug( p );
+    for ( var i in p ) {
+        var o = p[ i ];
+        if ( where ) {
+            if ( o.active && o.query && o.query.query && o.query.query.$where && o.ns == "test.jstests_evald" ) {
+                return o.opid;
+            }
+        } else {
+            if ( o.active && o.query && o.query.$eval && o.query.$eval == ev ) {
+                return o.opid;
+            }
+        }
+    }
+    return -1;
+}
+
+function doIt( ev, wait, where ) {
+
+    if ( where ) {
+        s = startParallelShell( ev );
+    } else {
+        s = startParallelShell( "db.eval( '" + ev + "' )" );        
+    }
+
+    o = null;
+    assert.soon( function() { o = op( ev, where ); return o != -1 } );
+
+    if ( wait ) {
+        sleep( 2000 );
+    }
+
+    debug( "going to kill" );
+
+    db.killOp( o );
+
+    debug( "sent kill" );
+
+    s();
+
+}
+
+doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { ; } } } )", true, true );
+doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { ; } } } )", false, true );
+doIt( "while( true ) {;}", false );
+doIt( "while( true ) {;}", true );
+
+// the for loops are currently required, as a spawned op masks the parent op - see SERVER-1931
+doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count( {i:10} ); }", true );
+doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count( {i:10} ); }", false );
+doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count(); }", true );
+doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count(); }", false );
+
+doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} try { db.jstests_evald.count( {i:10} ); } catch ( e ) { } }", true );
+doIt( "while( 1 ) { try { while( 1 ) { ; } } catch ( e ) { } }", true );
diff --git a/jstests/evale.js b/jstests/evale.js
new file mode 100644
index 0000000..af5a303
--- /dev/null
+++ b/jstests/evale.js
@@ -0,0 +1,5 @@
+t = db.jstests_evale;
+t.drop();
+
+db.eval( function() { return db.jstests_evale.count( { $where:function() { return true; } } ) } );
+db.eval( "db.jstests_evale.count( { $where:function() { return true; } } )" );
\ No newline at end of file
diff --git a/jstests/evalf.js b/jstests/evalf.js
new file mode 100644
index 0000000..12d0192
--- /dev/null
+++ b/jstests/evalf.js
@@ -0,0 +1,26 @@
+// test that killing a parent op interrupts the child op
+
+t = db.jstests_evalf;
+t.drop();
+
+if ( typeof _threadInject == "undefined" ) { // don't run in v8 mode - SERVER-1900
+
+db.eval( function() {
+        opid = null;
+        while( opid == null ) {
+            ops = db.currentOp().inprog;
+            for( i in ops ) {
+                o = ops[ i ];
+                if ( o.active && o.query && o.query.$eval ) {
+                    opid = o.opid;
+                }
+            }
+        }
+        db.jstests_evalf.save( {opid:opid} );
+        db.jstests_evalf.count( { $where:function() {
+                               db.killOp( db.jstests_evalf.findOne().opid );
+                               while( 1 ) { ; }
+                               } } );
+        } );
+
+}
\ No newline at end of file
diff --git a/jstests/exists.js b/jstests/exists.js
index 28f69e8..3f1e904 100644
--- a/jstests/exists.js
+++ b/jstests/exists.js
@@ -25,7 +25,7 @@ function dotest( n ){
     assert.eq( 3, t.count( {'a.b': {$exists:true}} ) , n );
     assert.eq( 2, t.count( {'a.b.c': {$exists:true}} ) , n );
     assert.eq( 1, t.count( {'a.b.c.d': {$exists:true}} ) , n );
-    
+
     assert.eq( 1, t.count( {a: {$exists:false}} ) , n );
     assert.eq( 2, t.count( {'a.b': {$exists:false}} ) , n );
     assert.eq( 3, t.count( {'a.b.c': {$exists:false}} ) , n );
@@ -38,6 +38,7 @@ t.ensureIndex( { "a.b" : 1 } )
 t.ensureIndex( { "a.b.c" : 1 } )
 t.ensureIndex( { "a.b.c.d" : 1 } )
 dotest( "after index" )
+assert.eq( 1, t.find( {a: {$exists:false}} ).hint( {a:1} ).itcount() );
     
 t.drop();
 
diff --git a/jstests/explain1.js b/jstests/explain1.js
index 6d5ac55..2460c28 100644
--- a/jstests/explain1.js
+++ b/jstests/explain1.js
@@ -20,5 +20,5 @@ assert.eq( 20 , t.find( q ).limit(20).itcount() , "F" );
 
 assert.eq( 49 , t.find(q).explain().n , "G" );
 assert.eq( 20 , t.find(q).limit(20).explain().n , "H" );
-assert.eq( 49 , t.find(q).limit(-20).explain().n , "I" );
+assert.eq( 20 , t.find(q).limit(-20).explain().n , "I" );
 
diff --git a/jstests/explain2.js b/jstests/explain2.js
index 4960e5a..6cb5160 100644
--- a/jstests/explain2.js
+++ b/jstests/explain2.js
@@ -16,12 +16,12 @@ function go( q , c , b , o ){
 }
 
 q = { a : { $gt : 3 } }
-go( q , 6 , 7 , 6 );
+go( q , 6 , 6 , 6 );
 
 q.b = 5
-go( q , 1 , 1 , 1 );
+go( q , 1 , 6 , 1 );
 
 delete q.b
 q.c = 5
-go( q , 1 , 7 , 6 );
+go( q , 1 , 6 , 6 );
 
diff --git a/jstests/explain3.js b/jstests/explain3.js
new file mode 100644
index 0000000..69dcac5
--- /dev/null
+++ b/jstests/explain3.js
@@ -0,0 +1,24 @@
+/** SERVER-2451 Kill cursor while explain is yielding */
+
+t = db.jstests_explain3;
+t.drop();
+
+t.ensureIndex( {i:1} );
+for( var i = 0; i < 10000; ++i ) {
+    t.save( {i:i,j:0} );
+}
+db.getLastError();
+
+s = startParallelShell( "sleep( 20 ); db.jstests_explain3.dropIndex( {i:1} );" );
+
+try {
+    t.find( {i:{$gt:-1},j:1} ).hint( {i:1} ).explain()
+} catch (e) {
+    print( "got exception" );
+    printjson( e );
+}
+
+s();
+
+// Sanity check to make sure mongod didn't seg fault.
+assert.eq( 10000, t.count() );
\ No newline at end of file
diff --git a/jstests/find_and_modify3.js b/jstests/find_and_modify3.js
index 1d30204..4214dfb 100644
--- a/jstests/find_and_modify3.js
+++ b/jstests/find_and_modify3.js
@@ -8,13 +8,13 @@ t.insert({_id:2, other:2, comments:[{i:0, j:0}, {i:1, j:1}]});
 orig0 = t.findOne({_id:0})
 orig2 = t.findOne({_id:2})
 
-out = t.findAndModify({query: {_id:1, 'comments.i':0}, update: {$set: {'comments.$.j':2}}, 'new': true});
+out = t.findAndModify({query: {_id:1, 'comments.i':0}, update: {$set: {'comments.$.j':2}}, 'new': true, sort:{other:1}});
 assert.eq(out.comments[0], {i:0, j:2});
 assert.eq(out.comments[1], {i:1, j:1});
 assert.eq(t.findOne({_id:0}), orig0);
 assert.eq(t.findOne({_id:2}), orig2);
 
-out = t.findAndModify({query: {other:1, 'comments.i':1}, update: {$set: {'comments.$.j':3}}, 'new': true});
+out = t.findAndModify({query: {other:1, 'comments.i':1}, update: {$set: {'comments.$.j':3}}, 'new': true, sort:{other:1}});
 assert.eq(out.comments[0], {i:0, j:2});
 assert.eq(out.comments[1], {i:1, j:3});
 assert.eq(t.findOne({_id:0}), orig0);
diff --git a/jstests/geo_borders.js b/jstests/geo_borders.js
new file mode 100644
index 0000000..85ffe35
--- /dev/null
+++ b/jstests/geo_borders.js
@@ -0,0 +1,189 @@
+
+t = db.borders
+t.drop()
+
+// FIXME:  FAILS for all epsilon < 1
+epsilon = 1
+//epsilon = 0.99
+
+// For these tests, *required* that step ends exactly on max
+min = -1
+max = 1
+step = 1
+numItems = 0;
+
+for(var x = min; x <= max; x += step){
+	for(var y = min; y <= max; y += step){
+		t.insert({ loc: { x : x, y : y } })
+		numItems++;
+	}
+}
+
+overallMin = -1
+overallMax = 1
+
+// Create a point index slightly smaller than the points we have
+t.ensureIndex({ loc : "2d" }, { max : overallMax - epsilon / 2, min : overallMin + epsilon / 2})
+assert(db.getLastError(), "A1")
+
+// FIXME:  FAILS for all epsilon < 1
+// Create a point index only slightly bigger than the points we have
+t.ensureIndex({ loc : "2d" }, { max : overallMax + epsilon, min : overallMin - epsilon })
+assert.isnull(db.getLastError(), "A2")
+
+
+
+
+
+
+
+
+//************
+// Box Tests
+//************
+
+
+/*
+// FIXME: Fails w/ non-nice error
+// Make sure we can get all points in full bounds
+assert(numItems == t.find({ loc : { $within : { $box : [[overallMin - epsilon,
+	                                                     overallMin - epsilon],
+	                                                    [overallMax + epsilon,
+	                                                     overallMax + epsilon]] } } }).count(), "B1");
+*/ 
+
+// Make sure an error is thrown if the bounds are bigger than the box itself
+// TODO:  Do we really want an error in this case?  Shouldn't we just clip the box?
+try{
+	t.findOne({ loc : { $within : { $box : [[overallMin - 2 * epsilon,
+                                             overallMin - 2 * epsilon],
+                                            [overallMax + 2 * epsilon,
+                                             overallMax + 2 * epsilon]] } } });
+	assert(false, "B2");
+}
+catch(e){}
+
+//Make sure we can get at least close to the bounds of the index
+assert(numItems == t.find({ loc : { $within : { $box : [[overallMin - epsilon / 2,
+	                                                     overallMin - epsilon / 2],
+	                                                    [overallMax + epsilon / 2,
+	                                                     overallMax + epsilon / 2]] } } }).count(), "B3");
+
+
+//**************
+//Circle tests
+//**************
+
+center = (overallMax + overallMin) / 2
+center = [center, center]
+radius = overallMax
+
+offCenter = [center[0] + radius, center[1] + radius]
+onBounds = [offCenter[0] + epsilon, offCenter[1] + epsilon]
+offBounds = [onBounds[0] + epsilon, onBounds[1] + epsilon]
+
+
+//Make sure we can get all points when radius is exactly at full bounds
+assert(0 < t.find({ loc : { $within : { $center : [center, radius + epsilon] } } }).count(), "C1");
+
+//Make sure we can get points when radius is over full bounds
+assert(0 < t.find({ loc : { $within : { $center : [center, radius + 2 * epsilon] } } }).count(), "C2");
+
+//Make sure we can get points when radius is over full bounds, off-centered
+assert(0 < t.find({ loc : { $within : { $center : [offCenter, radius + 2 * epsilon] } } }).count(), "C3");
+
+//Make sure we get correct corner point when center is in bounds
+// (x bounds wrap, so could get other corner)
+cornerPt = t.findOne({ loc : { $within : { $center : [offCenter, step / 2] } } });
+assert(cornerPt.loc.y == overallMax, "C4")
+
+/*
+// FIXME:  FAILS, returns opposite corner
+// Make sure we get correct corner point when center is on bounds
+cornerPt = t.findOne({ loc : { $within : { $center : [onBounds,
+                                                      Math.sqrt(2 * epsilon * epsilon) + (step / 2) ] } } });
+assert(cornerPt.loc.y == overallMax, "C5")
+*/
+
+// TODO:  Handle gracefully?
+// Make sure we can't get corner point when center is over bounds
+try{
+	t.findOne({ loc : { $within : { $center : [offBounds,
+                                               Math.sqrt(8 * epsilon * epsilon) + (step / 2) ] } } });
+	assert(false, "C6")
+}
+catch(e){}
+
+
+
+
+
+
+
+//***********
+//Near tests
+//***********
+
+//Make sure we can get all nearby points to point in range
+assert(t.find({ loc : { $near : offCenter } }).next().loc.y == overallMax,
+	   "D1");
+
+/*
+// FIXME: FAILS, returns opposite list
+// Make sure we can get all nearby points to point on boundary
+assert(t.find({ loc : { $near : onBounds } }).next().loc.y == overallMax,
+       "D2");
+*/
+
+//TODO: Could this work?
+//Make sure we can't get all nearby points to point over boundary
+try{
+	t.findOne({ loc : { $near : offBounds } })
+	assert(false, "D3")
+}
+catch(e){}
+
+/*
+// FIXME: FAILS, returns only single point
+//Make sure we can get all nearby points within one step (4 points in top corner)
+assert(4 == t.find({ loc : { $near : offCenter, $maxDistance : step * 1.9 } }).count(),
+	   "D4");
+*/
+
+
+
+//**************
+//Command Tests
+//**************
+
+
+//Make sure we can get all nearby points to point in range
+assert(db.runCommand({ geoNear : "borders", near : offCenter }).results[0].obj.loc.y == overallMax,
+	   "E1");
+
+
+/*
+// FIXME: FAILS, returns opposite list
+//Make sure we can get all nearby points to point on boundary
+assert(db.runCommand({ geoNear : "borders", near : onBounds }).results[0].obj.loc.y == overallMax,
+	    "E2");
+*/
+
+//TODO: Could this work?
+//Make sure we can't get all nearby points to point over boundary
+try{
+	db.runCommand({ geoNear : "borders", near : offBounds }).results.length
+	assert(false, "E3")
+}
+catch(e){}
+
+
+/*
+// FIXME: Fails, returns one point
+//Make sure we can get all nearby points within one step (4 points in top corner)
+assert(4 == db.runCommand({ geoNear : "borders", near : offCenter, maxDistance : step * 1.5 }).results.length,
+	   "E4");
+*/
+
+
+
diff --git a/jstests/geo_center_sphere1.js b/jstests/geo_center_sphere1.js
new file mode 100644
index 0000000..dd7c98a
--- /dev/null
+++ b/jstests/geo_center_sphere1.js
@@ -0,0 +1,93 @@
+
+t = db.geo_center_sphere1;
+t.drop();
+
+skip = 3 // lower for more rigor, higher for more speed (tested with .5, .678, 1, 2, 3, and 4)
+
+searches = [ 
+    //  x , y    rad
+    [ [ 5 , 0 ] , 0.05 ] , // ~200 miles
+    [ [ 135 , 0 ] , 0.05 ] ,
+
+    [ [ 5 , 70 ] , 0.05 ] ,
+    [ [ 135 , 70 ] , 0.05 ] ,
+    [ [ 5 , 85 ] , 0.05 ] ,
+
+    [ [ 20 , 0 ] , 0.25 ] , // ~1000 miles
+    [ [ 20 , -45 ] , 0.25 ] ,
+    [ [ -20 , 60 ] , 0.25 ] ,
+    [ [ -20 , -70 ] , 0.25 ] ,
+];
+correct = searches.map( function(z){ return []; } );
+
+num = 0;
+
+for ( x=-179; x<=179; x += skip ){
+    for ( y=-89; y<=89; y += skip ){
+        o = { _id : num++ , loc : [ x , y ] } 
+        t.save( o )
+        for ( i=0; i<searches.length; i++ ){
+            if ( Geo.sphereDistance( [ x , y ] , searches[i][0] ) <= searches[i][1])
+                correct[i].push( o );
+        }
+    }
+    gc(); // needed with low skip values
+}
+
+t.ensureIndex( { loc : "2d" } );
+
+for ( i=0; i<searches.length; i++ ){
+    print('------------');
+    print( tojson( searches[i] ) + "\t" + correct[i].length )
+    q = { loc : { $within : { $centerSphere : searches[i] } } }
+
+    //correct[i].forEach( printjson )
+    //printjson( q );
+    //t.find( q ).forEach( printjson )
+    
+    //printjson(t.find( q ).explain())
+
+    //printjson( Array.sort( correct[i].map( function(z){ return z._id; } ) ) )
+    //printjson( Array.sort( t.find(q).map( function(z){ return z._id; } ) ) )
+    
+    var numExpected = correct[i].length
+    var x = correct[i].map( function(z){ return z._id; } )
+    var y = t.find(q).map( function(z){ return z._id; } )
+
+    missing = [];
+    epsilon = 0.001; // allow tenth of a percent error due to conversions
+    for (var j=0; j<x.length; j++){
+        if (!Array.contains(y, x[j])){
+            missing.push(x[j]);
+            var obj = t.findOne({_id: x[j]});
+            var dist = Geo.sphereDistance(searches[i][0], obj.loc);
+            print("missing: " + tojson(obj) + " " + dist)
+            if ((Math.abs(dist - searches[i][1]) / dist) < epsilon)
+                numExpected -= 1;
+        }
+    }
+    for (var j=0; j<y.length; j++){
+        if (!Array.contains(x, y[j])){
+            missing.push(y[j]);
+            var obj = t.findOne({_id: y[j]});
+            var dist = Geo.sphereDistance(searches[i][0], obj.loc);
+            print("extra: " + tojson(obj) + " " + dist)
+            if ((Math.abs(dist - searches[i][1]) / dist) < epsilon)
+                numExpected += 1;
+        }
+    }
+
+
+    assert.eq( numExpected , t.find( q ).itcount() , "itcount : " + tojson( searches[i] ) );
+    assert.eq( numExpected , t.find( q ).count() , "count : " + tojson( searches[i] ) );
+    assert.gt( numExpected * 2 , t.find(q).explain().nscanned , "nscanned : " + tojson( searches[i] ) )
+}
+
+
+
+
+
+
+
+
+    
diff --git a/jstests/geo_circle2.js b/jstests/geo_circle2.js
index 0232490..ef76884 100644
--- a/jstests/geo_circle2.js
+++ b/jstests/geo_circle2.js
@@ -21,3 +21,6 @@ t.insert({ "uid" : 355844 , "loc" : { "x" : 34 , "y" : -4} ,"categories" : [ "sp
 
 assert.eq( 10 , t.find({ "loc" : { "$within" : { "$center" : [ { "x" : 0 ,"y" : 0} , 50]}} } ).itcount() , "A" );
 assert.eq( 6 , t.find({ "loc" : { "$within" : { "$center" : [ { "x" : 0 ,"y" : 0} , 50]}}, "categories" : "sports" } ).itcount() , "B" );
+
+// When not a $near or $within query, geo index should not be used.  Fails if geo index is used.
+assert.eq( 1 , t.find({ "loc" : { "x" : -36, "y" : -8}, "categories" : "sports" }).itcount(), "C" )
diff --git a/jstests/geo_circle2a.js b/jstests/geo_circle2a.js
new file mode 100644
index 0000000..1597033
--- /dev/null
+++ b/jstests/geo_circle2a.js
@@ -0,0 +1,36 @@
+// From SERVER-2381 
+// Tests to make sure that nested multi-key indexing works for geo indexes and is not used for direct position
+// lookups
+
+db.test.drop()
+db.test.insert({ p : [1112,3473], t : [{ k : 'a', v : 'b' }, { k : 'c', v : 'd' }] })
+db.test.ensureIndex({ p : '2d', 't.k' : 1 }, { min : 0, max : 10000 })
+
+// Succeeds, since on direct lookup should not use the index
+assert(1 == db.test.find({p:[1112,3473],'t.k':'a'}).count(), "A")
+// Succeeds and uses the geo index
+assert(1 == db.test.find({p:{$within:{$box:[[1111,3472],[1113,3475]]}}, 't.k' : 'a' }).count(), "B")
+
+
+db.test.drop()
+db.test.insert({ point:[ 1, 10 ], tags : [ { k : 'key', v : 'value' }, { k : 'key2', v : 123 } ] })
+db.test.insert({ point:[ 1, 10 ], tags : [ { k : 'key', v : 'value' } ] })
+
+db.test.ensureIndex({ point : "2d" , "tags.k" : 1, "tags.v" : 1 })
+
+// Succeeds, since should now lookup multi-keys correctly
+assert(2 == db.test.find({ point : { $within : { $box : [[0,0],[12,12]] } } }).count(), "C") 
+// Succeeds, and should not use geoindex
+assert(2 == db.test.find({ point : [1, 10] }).count(), "D")
+assert(2 == db.test.find({ point : [1, 10], "tags.v" : "value" }).count(), "E")
+assert(1 == db.test.find({ point : [1, 10], "tags.v" : 123 }).count(), "F")
+
+
+db.test.drop()
+db.test.insert({ point:[ 1, 10 ], tags : [ { k : { 'hello' : 'world'}, v : 'value' }, { k : 'key2', v : 123 } ] })
+db.test.insert({ point:[ 1, 10 ], tags : [ { k : 'key', v : 'value' } ] })
+
+db.test.ensureIndex({ point : "2d" , "tags.k" : 1, "tags.v" : 1 })
+
+// Succeeds, should be able to look up the complex element
+assert(1 == db.test.find({ point : { $within : { $box : [[0,0],[12,12]] } }, 'tags.k' : { 'hello' : 'world' } }).count(), "G")
\ No newline at end of file
diff --git a/jstests/geo_near_random1.js b/jstests/geo_near_random1.js
new file mode 100644
index 0000000..50539f3
--- /dev/null
+++ b/jstests/geo_near_random1.js
@@ -0,0 +1,12 @@
+// this tests all points using $near
+load("jstests/libs/geo_near_random.js");
+
+var test = new GeoNearRandomTest("geo_near_random1");
+
+test.insertPts(50);
+
+test.testPt([0,0]);
+test.testPt(test.mkPt());
+test.testPt(test.mkPt());
+test.testPt(test.mkPt());
+test.testPt(test.mkPt());
diff --git a/jstests/geo_near_random2.js b/jstests/geo_near_random2.js
new file mode 100644
index 0000000..1673abb
--- /dev/null
+++ b/jstests/geo_near_random2.js
@@ -0,0 +1,21 @@
+// this tests 1% of all points using $near and $nearSphere
+load("jstests/libs/geo_near_random.js");
+
+var test = new GeoNearRandomTest("geo_near_random2");
+
+test.insertPts(5000);
+
+opts = {sphere:0, nToTest:test.nPts*0.01}; 
+test.testPt([0,0], opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+
+opts.sphere = 1
+test.testPt([0,0], opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+
diff --git a/jstests/geo_sort1.js b/jstests/geo_sort1.js
new file mode 100644
index 0000000..67de80e
--- /dev/null
+++ b/jstests/geo_sort1.js
@@ -0,0 +1,22 @@
+
+t = db.geo_sort1
+t.drop();
+
+for ( x=0; x<10; x++ ){
+    for ( y=0; y<10; y++ ){
+        t.insert( { loc : [ x , y ] , foo : x * x * y } );
+    }
+}
+
+t.ensureIndex( { loc : "2d" , foo : 1 } )
+
+q = t.find( { loc : { $near : [ 5 , 5  ] } , foo : { $gt : 20 } } )
+m = function(z){ return z.foo; }
+
+a = q.clone().map( m );
+b = q.clone().sort( { foo : 1 } ).map( m );
+
+assert.neq( a , b , "A" );
+a.sort();
+b.sort();
+assert.eq( a , b , "B" );
diff --git a/jstests/geo_update1.js b/jstests/geo_update1.js
new file mode 100644
index 0000000..68a8de6
--- /dev/null
+++ b/jstests/geo_update1.js
@@ -0,0 +1,38 @@
+
+t = db.geo_update1
+t.drop()
+
+for(var x = 0; x < 10; x++ ) { 
+    for(var y = 0; y < 10; y++ ) { 
+        t.insert({"loc": [x, y] , x : x , y : y , z : 1 }); 
+    } 
+} 
+
+t.ensureIndex( { loc : "2d" } ) 
+
+function p(){
+    print( "--------------" );
+    for ( var y=0; y<10; y++ ){
+        var c = t.find( { y : y } ).sort( { x : 1 } )
+        var s = "";
+        while ( c.hasNext() )
+            s += c.next().z + " ";
+        print( s )
+    }
+    print( "--------------" );
+}
+
+p()
+
+t.update({"loc" : {"$within" : {"$center" : [[5,5], 2]}}}, {'$inc' : { 'z' : 1}}, false, true); 
+assert.isnull( db.getLastError() , "B1" )
+p()
+
+t.update({}, {'$inc' : { 'z' : 1}}, false, true); 
+assert.isnull( db.getLastError() , "B2" )
+p()
+
+
+t.update({"loc" : {"$within" : {"$center" : [[5,5], 2]}}}, {'$inc' : { 'z' : 1}}, false, true); 
+assert.isnull( db.getLastError() , "B3" )
+p()
diff --git a/jstests/geo_update2.js b/jstests/geo_update2.js
new file mode 100644
index 0000000..2308b2c
--- /dev/null
+++ b/jstests/geo_update2.js
@@ -0,0 +1,40 @@
+
+t = db.geo_update2
+t.drop()
+
+for(var x = 0; x < 10; x++ ) { 
+    for(var y = 0; y < 10; y++ ) { 
+        t.insert({"loc": [x, y] , x : x , y : y }); 
+    } 
+} 
+
+t.ensureIndex( { loc : "2d" } ) 
+
+function p(){
+    print( "--------------" );
+    for ( var y=0; y<10; y++ ){
+        var c = t.find( { y : y } ).sort( { x : 1 } )
+        var s = "";
+        while ( c.hasNext() )
+            s += c.next().z + " ";
+        print( s )
+    }
+    print( "--------------" );
+}
+
+p()
+
+
+t.update({"loc" : {"$within" : {"$center" : [[5,5], 2]}}}, {'$inc' : { 'z' : 1}}, false, true); 
+assert.isnull( db.getLastError() , "B1" )
+p()
+
+t.update({}, {'$inc' : { 'z' : 1}}, false, true); 
+assert.isnull( db.getLastError() , "B2" )
+p()
+
+
+t.update({"loc" : {"$within" : {"$center" : [[5,5], 2]}}}, {'$inc' : { 'z' : 1}}, false, true); 
+assert.isnull( db.getLastError() , "B3" )
+p()
+
diff --git a/jstests/geof.js b/jstests/geof.js
new file mode 100644
index 0000000..786ead6
--- /dev/null
+++ b/jstests/geof.js
@@ -0,0 +1,19 @@
+t = db.geof
+t.drop();
+
+// corners (dist ~0.98)
+t.insert({loc: [ 0.7,  0.7]})
+t.insert({loc: [ 0.7, -0.7]})
+t.insert({loc: [-0.7,  0.7]})
+t.insert({loc: [-0.7, -0.7]})
+
+// on x axis (dist == 0.9)
+t.insert({loc: [-0.9, 0]})
+t.insert({loc: [-0.9, 0]})
+
+t.ensureIndex( { loc : "2d" } )
+
+t.find({loc: {$near: [0,0]}}).limit(2).forEach( function(o){
+    //printjson(o);
+    assert.lt(Geo.distance([0,0], o.loc), 0.95);
+});
diff --git a/jstests/group6.js b/jstests/group6.js
index 8d738d4..b77a37a 100644
--- a/jstests/group6.js
+++ b/jstests/group6.js
@@ -29,3 +29,4 @@ for( i = 1; i <= 10; ++i ) {
 
 assert.eq.automsg( "55", "t.group( {key:'y', reduce:function(doc,out){ out.i += doc.i; }, initial:{i:0} } )[ 0 ].i" );
 
+assert.eq.automsg( "NumberLong(10)", "t.group( {$reduce: function(doc, prev) { prev.count += 1; }, initial: {count: new NumberLong(0) }} )[ 0 ].count" );
\ No newline at end of file
diff --git a/jstests/in3.js b/jstests/in3.js
index 305fb22..b0a8bb7 100644
--- a/jstests/in3.js
+++ b/jstests/in3.js
@@ -8,4 +8,4 @@ assert.eq( {i:[[3,3],[6,6]]}, t.find( {i:{$in:[3,6]}} ).explain().indexBounds ,
 for ( var i=0; i<20; i++ )
     t.insert( { i : i } );
 
-assert.eq( 2 , t.find( {i:{$in:[3,6]}} ).explain().nscanned , "B1" )
+assert.eq( 3 , t.find( {i:{$in:[3,6]}} ).explain().nscanned , "B1" )
diff --git a/jstests/in4.js b/jstests/in4.js
index 9aed608..b6c2d10 100644
--- a/jstests/in4.js
+++ b/jstests/in4.js
@@ -27,7 +27,7 @@ checkRanges( {a:[[2,2],[3,3]],b:[[4,10]]}, t.find( {a:{$in:[2,3]},b:{$gt:4,$lt:1
 t.save( {a:1,b:1} );
 t.save( {a:2,b:4.5} );
 t.save( {a:2,b:4} );
-assert.eq.automsg( "1", "t.find( {a:{$in:[2,3]},b:{$in:[4,5]}} ).explain().nscanned" );
+assert.eq.automsg( "2", "t.find( {a:{$in:[2,3]},b:{$in:[4,5]}} ).explain().nscanned" );
 assert.eq.automsg( "2", "t.findOne( {a:{$in:[2,3]},b:{$in:[4,5]}} ).a" );
 assert.eq.automsg( "4", "t.findOne( {a:{$in:[2,3]},b:{$in:[4,5]}} ).b" );
 
@@ -41,7 +41,7 @@ assert.eq.automsg( "1", "t.find( {a:2,b:{$in:[3,4]},c:5} ).explain().nscanned" )
 t.remove();
 t.save( {a:2,b:4,c:5} );
 t.save( {a:2,b:4,c:4} );
-assert.eq.automsg( "1", "t.find( {a:2,b:{$in:[3,4]},c:5} ).explain().nscanned" );
+assert.eq.automsg( "2", "t.find( {a:2,b:{$in:[3,4]},c:5} ).explain().nscanned" );
 
 t.drop();
 t.ensureIndex( {a:1,b:-1} );
diff --git a/jstests/index11.js b/jstests/index11.js
new file mode 100644
index 0000000..2a552dd
--- /dev/null
+++ b/jstests/index11.js
@@ -0,0 +1,13 @@
+// Reindex w/ field too large to index
+
+coll = db.jstests_index11;
+coll.drop();
+
+coll.ensureIndex({"k": 1, "v": 1});
+coll.insert({k: "x", v: "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"});
+assert.eq(0, coll.find({"k": "x"}).count()); // SERVER-1716
+
+coll.dropIndexes();
+coll.ensureIndex({"k": 1, "v": 1});
+
+assert.eq(0, coll.find({"k": "x"}).count());
diff --git a/jstests/index_check6.js b/jstests/index_check6.js
index 240f4cf..d7992a2 100644
--- a/jstests/index_check6.js
+++ b/jstests/index_check6.js
@@ -12,10 +12,12 @@ for ( var age=10; age<50; age++ ){
 
 assert.eq( 10 , t.find( { age : 30 } ).explain().nscanned , "A" );
 assert.eq( 20 , t.find( { age : { $gte : 29 , $lte : 30 } } ).explain().nscanned , "B" );
-assert.eq( 12 , t.find( { age : { $gte : 25 , $lte : 30 }, rating: {$in: [0,9] } } ).explain().nscanned , "C1" );
+assert.eq( 18 , t.find( { age : { $gte : 25 , $lte : 30 }, rating: {$in: [0,9] } } ).explain().nscanned , "C1" );
+assert.eq( 23 , t.find( { age : { $gte : 25 , $lte : 30 }, rating: {$in: [0,8] } } ).explain().nscanned , "C2" );
+assert.eq( 28 , t.find( { age : { $gte : 25 , $lte : 30 }, rating: {$in: [1,8] } } ).explain().nscanned , "C3" );
 
-assert.eq( 2 , t.find( { age : { $gte : 29 , $lte : 30 } , rating : 5 } ).explain().nscanned , "C" ); // SERVER-371
-assert.eq( 4 , t.find( { age : { $gte : 29 , $lte : 30 } , rating : { $gte : 4 , $lte : 5 } } ).explain().nscanned , "D" ); // SERVER-371
+assert.eq( 4 , t.find( { age : { $gte : 29 , $lte : 30 } , rating : 5 } ).explain().nscanned , "C" ); // SERVER-371
+assert.eq( 6 , t.find( { age : { $gte : 29 , $lte : 30 } , rating : { $gte : 4 , $lte : 5 } } ).explain().nscanned , "D" ); // SERVER-371
 
 assert.eq.automsg( "2", "t.find( { age:30, rating:{ $gte:4, $lte:5} } ).explain().nscanned" );
 
@@ -36,19 +38,30 @@ assert.eq.automsg( "2", "t.find( { a:5, b:5, c:{$gte:5,$lte:6} } ).sort( sort ).
 assert.eq.automsg( "1", "t.find( { a:5, b:5, c:{$gte:5.5,$lte:6} } ).sort( sort ).explain().nscanned" );
 assert.eq.automsg( "1", "t.find( { a:5, b:5, c:{$gte:5,$lte:5.5} } ).sort( sort ).explain().nscanned" );
 assert.eq.automsg( "3", "t.find( { a:5, b:5, c:{$gte:5,$lte:7} } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "2", "t.find( { a:5, b:{$gte:5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "1", "t.find( { a:5, b:{$gte:5.5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "1", "t.find( { a:5, b:{$gte:5,$lte:5.5}, c:5 } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "3", "t.find( { a:5, b:{$gte:5,$lte:7}, c:5 } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "2", "t.find( { a:{$gte:5,$lte:6}, b:5, c:5 } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "1", "t.find( { a:{$gte:5.5,$lte:6}, b:5, c:5 } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "1", "t.find( { a:{$gte:5,$lte:5.5}, b:5, c:5 } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "3", "t.find( { a:{$gte:5,$lte:7}, b:5, c:5 } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "4", "t.find( { a:{$gte:5,$lte:6}, b:5, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "2", "t.find( { a:{$gte:5.5,$lte:6}, b:5, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "4", "t.find( { a:5, b:{$gte:5,$lte:6}, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "4", "t.find( { a:{$gte:5,$lte:6}, b:{$gte:5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" );
-assert.eq.automsg( "8", "t.find( { a:{$gte:5,$lte:6}, b:{$gte:5,$lte:6}, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" );
+assert.eq.automsg( "4", "t.find( { a:5, b:{$gte:5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" );
+    if ( s.b > 0 ) {
+        assert.eq.automsg( "2", "t.find( { a:5, b:{$gte:5.5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" );
+        assert.eq.automsg( "2", "t.find( { a:5, b:{$gte:5,$lte:5.5}, c:5 } ).sort( sort ).explain().nscanned" );        
+    } else {
+        assert.eq.automsg( "2", "t.find( { a:5, b:{$gte:5.5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" );
+        assert.eq.automsg( "2", "t.find( { a:5, b:{$gte:5,$lte:5.5}, c:5 } ).sort( sort ).explain().nscanned" );   
+    }
+assert.eq.automsg( "7", "t.find( { a:5, b:{$gte:5,$lte:7}, c:5 } ).sort( sort ).explain().nscanned" );
+assert.eq.automsg( "4", "t.find( { a:{$gte:5,$lte:6}, b:5, c:5 } ).sort( sort ).explain().nscanned" );
+    if ( s.a > 0 ) {
+        assert.eq.automsg( "2", "t.find( { a:{$gte:5.5,$lte:6}, b:5, c:5 } ).sort( sort ).explain().nscanned" );
+        assert.eq.automsg( "2", "t.find( { a:{$gte:5,$lte:5.5}, b:5, c:5 } ).sort( sort ).explain().nscanned" );
+        assert.eq.automsg( "3", "t.find( { a:{$gte:5.5,$lte:6}, b:5, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" );
+    } else {
+        assert.eq.automsg( "2", "t.find( { a:{$gte:5.5,$lte:6}, b:5, c:5 } ).sort( sort ).explain().nscanned" );
+        assert.eq.automsg( "2", "t.find( { a:{$gte:5,$lte:5.5}, b:5, c:5 } ).sort( sort ).explain().nscanned" );        
+        assert.eq.automsg( "3", "t.find( { a:{$gte:5.5,$lte:6}, b:5, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" );
+    }
+assert.eq.automsg( "7", "t.find( { a:{$gte:5,$lte:7}, b:5, c:5 } ).sort( sort ).explain().nscanned" );
+assert.eq.automsg( "6", "t.find( { a:{$gte:5,$lte:6}, b:5, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" );
+assert.eq.automsg( "6", "t.find( { a:5, b:{$gte:5,$lte:6}, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" );
+assert.eq.automsg( "10", "t.find( { a:{$gte:5,$lte:6}, b:{$gte:5,$lte:6}, c:5 } ).sort( sort ).explain().nscanned" );
+assert.eq.automsg( "14", "t.find( { a:{$gte:5,$lte:6}, b:{$gte:5,$lte:6}, c:{$gte:5,$lte:6} } ).sort( sort ).explain().nscanned" );
 }
 
 for ( var a = -1; a <= 1; a += 2 ) {
diff --git a/jstests/index_check7.js b/jstests/index_check7.js
index 68102d6..1d0aaeb 100644
--- a/jstests/index_check7.js
+++ b/jstests/index_check7.js
@@ -11,5 +11,5 @@ assert.eq( 1 , t.find( { x : 27 } ).explain().nscanned , "A" )
 t.ensureIndex( { x : -1 } )
 assert.eq( 1 , t.find( { x : 27 } ).explain().nscanned , "B" )
 
-assert.eq( 41 , t.find( { x : { $gt : 59 } } ).explain().nscanned , "C" );
+assert.eq( 40 , t.find( { x : { $gt : 59 } } ).explain().nscanned , "C" );
 
diff --git a/jstests/index_many2.js b/jstests/index_many2.js
index 3fca5f5..f113b8b 100644
--- a/jstests/index_many2.js
+++ b/jstests/index_many2.js
@@ -27,3 +27,5 @@ assert.eq( num - 1 , t.getIndexKeys().length , "B0" )
 t.ensureIndex( { z : 1 } )
 assert.eq( num  , t.getIndexKeys().length , "B1" )
 
+t.dropIndex( "*" );
+assert.eq( 1  , t.getIndexKeys().length , "C1" )
diff --git a/jstests/index_sparse1.js b/jstests/index_sparse1.js
new file mode 100644
index 0000000..f2805b3
--- /dev/null
+++ b/jstests/index_sparse1.js
@@ -0,0 +1,46 @@
+
+t = db.index_sparse1;
+t.drop();
+
+t.insert( { _id : 1 , x : 1 } )
+t.insert( { _id : 2 , x : 2 } )
+t.insert( { _id : 3 , x : 2 } )
+t.insert( { _id : 4 } )
+t.insert( { _id : 5 } )
+
+assert.eq( 5 , t.count() , "A1" )
+assert.eq( 5 , t.find().sort( { x : 1 } ).itcount() , "A2" )
+
+t.ensureIndex( { x : 1 } )
+assert.eq( 2 , t.getIndexes().length , "B1" )
+assert.eq( 5 , t.find().sort( { x : 1 } ).itcount() , "B2" )
+t.dropIndex( { x : 1 } )
+assert.eq( 1 , t.getIndexes().length , "B3" )
+
+t.ensureIndex( { x : 1 } , { sparse : 1 } )
+assert.eq( 2 , t.getIndexes().length , "C1" )
+assert.eq( 3 , t.find().sort( { x : 1 } ).itcount() , "C2" )
+t.dropIndex( { x : 1 } )
+assert.eq( 1 , t.getIndexes().length , "C3" )
+
+// -- sparse & unique
+
+t.remove( { _id : 2 } )
+
+// test that we can't create a unique index without sparse 
+t.ensureIndex( { x : 1 } , { unique : 1 } )
+assert( db.getLastError() , "D1" )
+assert.eq( 1 , t.getIndexes().length , "D2" )
+
+
+t.ensureIndex( { x : 1 } , { unique : 1 , sparse : 1 } )
+assert.eq( 2 , t.getIndexes().length , "E1" )
+t.dropIndex( { x : 1 } )
+assert.eq( 1 , t.getIndexes().length , "E3" )
+
+
+t.insert( { _id : 2 , x : 2 } )
+t.ensureIndex( { x : 1 } , { unique : 1 , sparse : 1 } )
+assert.eq( 1 , t.getIndexes().length , "F1" )
+
+
diff --git a/jstests/index_sparse2.js b/jstests/index_sparse2.js
new file mode 100644
index 0000000..2b16c9d
--- /dev/null
+++ b/jstests/index_sparse2.js
@@ -0,0 +1,21 @@
+t = db.index_sparse2;
+t.drop();
+
+t.insert( { _id : 1 , x : 1 , y : 1 } )
+t.insert( { _id : 2 , x : 2 } )
+t.insert( { _id : 3 } )
+
+t.ensureIndex( { x : 1 , y : 1 } )
+assert.eq( 2 , t.getIndexes().length , "A1" )
+assert.eq( 3 , t.find().sort( { x : 1 , y : 1 } ).itcount() , "A2" )
+t.dropIndex( { x : 1 , y : 1 } )
+assert.eq( 1 , t.getIndexes().length , "A3" )
+
+t.ensureIndex( { x : 1 , y : 1 } , { sparse : 1 }  )
+assert.eq( 2 , t.getIndexes().length , "B1" )
+assert.eq( 2 , t.find().sort( { x : 1 , y : 1 } ).itcount() , "B2" )
+t.dropIndex( { x : 1 , y : 1 } )
+assert.eq( 1 , t.getIndexes().length , "B3" )
+
+
+
diff --git a/jstests/indexh.js b/jstests/indexh.js
index c6aad18..ac2a93e 100644
--- a/jstests/indexh.js
+++ b/jstests/indexh.js
@@ -6,11 +6,17 @@ function debug( t ) {
     print( t );
 }
 
+function extraDebug() {
+//    printjson( db.stats() );
+//    db.printCollectionStats();    
+}
+
 // index extent freeing
 t.drop();
 t.save( {} );
 var s1 = db.stats().dataSize;
 debug( "s1: " + s1 );
+extraDebug();
 t.ensureIndex( {a:1} );
 var s2 = db.stats().dataSize;
 debug( "s2: " + s2 );
@@ -18,6 +24,7 @@ assert.automsg( "s1 < s2" );
 t.dropIndex( {a:1} );
 var s3 = db.stats().dataSize;
 debug( "s3: " + s3 );
+extraDebug();
 assert.eq.automsg( "s1", "s3" );
 
 // index node freeing
diff --git a/jstests/indexi.js b/jstests/indexi.js
new file mode 100644
index 0000000..b54ffce
--- /dev/null
+++ b/jstests/indexi.js
@@ -0,0 +1,16 @@
+t = db.jstests_indexi;
+
+t.drop();
+
+for( var a = 0; a < 10; ++a ) {
+    for( var b = 0; b < 10; ++b ) {
+        for( var c = 0; c < 10; ++c ) {
+            t.save( {a:a,b:b,c:c} );
+        }
+    }
+}
+
+t.ensureIndex( {a:1,b:1,c:1} );
+t.ensureIndex( {a:1,c:1} );
+
+assert.automsg( "!t.find( {a:{$gt:1,$lt:10},c:{$gt:1,$lt:10}} ).explain().indexBounds.b" );
\ No newline at end of file
diff --git a/jstests/indexj.js b/jstests/indexj.js
new file mode 100644
index 0000000..0d1afc2
--- /dev/null
+++ b/jstests/indexj.js
@@ -0,0 +1,44 @@
+// SERVER-726
+
+t = db.jstests_indexj;
+t.drop();
+
+t.ensureIndex( {a:1} );
+t.save( {a:5} );
+assert.eq( 0, t.find( { a: { $gt:4, $lt:5 } } ).explain().nscanned, "A" );
+
+t.drop();
+t.ensureIndex( {a:1} );
+t.save( {a:4} );
+assert.eq( 0, t.find( { a: { $gt:4, $lt:5 } } ).explain().nscanned, "B" );
+
+t.save( {a:5} );
+assert.eq( 0, t.find( { a: { $gt:4, $lt:5 } } ).explain().nscanned, "D" );
+
+t.save( {a:4} );
+assert.eq( 0, t.find( { a: { $gt:4, $lt:5 } } ).explain().nscanned, "C" );
+
+t.save( {a:5} );
+assert.eq( 0, t.find( { a: { $gt:4, $lt:5 } } ).explain().nscanned, "D" );
+
+t.drop();
+t.ensureIndex( {a:1,b:1} );
+t.save( { a:1,b:1 } );
+t.save( { a:1,b:2 } );
+t.save( { a:2,b:1 } );
+t.save( { a:2,b:2 } );
+
+assert.eq( 2, t.find( { a:{$in:[1,2]}, b:{$gt:1,$lt:2} } ).explain().nscanned );
+assert.eq( 2, t.find( { a:{$in:[1,2]}, b:{$gt:1,$lt:2} } ).sort( {a:-1,b:-1} ).explain().nscanned );
+
+t.save( {a:1,b:1} );
+t.save( {a:1,b:1} );
+assert.eq( 2, t.find( { a:{$in:[1,2]}, b:{$gt:1,$lt:2} } ).explain().nscanned );
+assert.eq( 2, t.find( { a:{$in:[1,2]}, b:{$gt:1,$lt:2} } ).explain().nscanned );
+assert.eq( 2, t.find( { a:{$in:[1,2]}, b:{$gt:1,$lt:2} } ).sort( {a:-1,b:-1} ).explain().nscanned );
+
+assert.eq( 1, t.find( { a:{$in:[1,1.9]}, b:{$gt:1,$lt:2} } ).explain().nscanned );
+assert.eq( 1, t.find( { a:{$in:[1.1,2]}, b:{$gt:1,$lt:2} } ).sort( {a:-1,b:-1} ).explain().nscanned );
+
+t.save( { a:1,b:1.5} );
+assert.eq( 3, t.find( { a:{$in:[1,2]}, b:{$gt:1,$lt:2} } ).explain().nscanned, "F" );
diff --git a/jstests/insert2.js b/jstests/insert2.js
new file mode 100644
index 0000000..442e7dc
--- /dev/null
+++ b/jstests/insert2.js
@@ -0,0 +1,8 @@
+
+t = db.insert2
+t.drop()
+
+assert.isnull( t.findOne() , "A" )
+t.insert( { z : 1 ,  $inc : { x : 1 } } , true );
+assert.isnull( t.findOne() , "B" )
+
diff --git a/jstests/jni2.js b/jstests/jni2.js
index 221780d..53ad58c 100644
--- a/jstests/jni2.js
+++ b/jstests/jni2.js
@@ -14,8 +14,8 @@ assert.throws( function(){
                            db.jni2t.save( { y : 1 } );
                            return 1; 
                        } 
-            } ).length();
-} , "can't save from $where" );
+            } ).forEach( printjson );
+} , null , "can't save from $where" );
 
 assert.eq( 0 , db.jni2t.find().length() , "B" )
 
diff --git a/jstests/killop.js b/jstests/killop.js
new file mode 100644
index 0000000..b5e50d9
--- /dev/null
+++ b/jstests/killop.js
@@ -0,0 +1,43 @@
+t = db.jstests_killop
+t.drop();
+
+if ( typeof _threadInject == "undefined" ) { // don't run in v8 mode - SERVER-1900
+
+function debug( x ) {
+//    printjson( x );
+}
+
+t.save( {} );
+db.getLastError();
+
+function ops() {
+    p = db.currentOp().inprog;
+    debug( p );
+    ids = [];
+    for ( var i in p ) {
+        var o = p[ i ];
+        if ( o.active && o.query && o.query.query && o.query.query.$where && o.ns == "test.jstests_killop" ) {
+            ids.push( o.opid );
+        }
+    }
+    return ids;
+}
+
+s1 = startParallelShell( "db.jstests_killop.count( { $where: function() { while( 1 ) { ; } } } )" );
+s2 = startParallelShell( "db.jstests_killop.count( { $where: function() { while( 1 ) { ; } } } )" );
+
+o = [];
+assert.soon( function() { o = ops(); return o.length == 2; } );
+debug( o );
+db.killOp( o[ 0 ] );
+db.killOp( o[ 1 ] );
+
+start = new Date();
+
+s1();
+s2();
+
+// don't want to pass if timeout killed the js function
+assert( ( new Date() ) - start < 30000 );
+
+}
\ No newline at end of file
diff --git a/jstests/libs/concurrent.js b/jstests/libs/concurrent.js
new file mode 100644
index 0000000..9198818
--- /dev/null
+++ b/jstests/libs/concurrent.js
@@ -0,0 +1,30 @@
+/* NOTE: Requires mongo shell to be built with V8 javascript engine,
+which implements concurrent threads via fork() */
+
+// Fork and start
+function fork_(thunk) {
+    thread = fork(thunk)
+    thread.start()
+    return thread
+}
+
+// In functional form, useful for high-order functions like map in fun.js
+function join_(thread) {thread.join()}
+
+// Fork a loop on each one-arg block and wait for all of them to terminate. Foreground blocks are executed n times, background blocks are executed repeatedly until all forground loops finish. If any fail, stop all loops and reraise exception in main thread
+function parallel(n, foregroundBlock1s, backgroundBlock1s) {
+    var err = null
+    var stop = false
+    function loop(m) {return function(block1) {return function() {
+        for (var i = 0; i < m; i++) {if (stop) break; block1(i)} }}}
+    function watch(block) {return function() {
+        try {block()} catch(e) {err = e; stop = true}}}
+    foreThunks = map(watch, map(loop(n), foregroundBlock1s))
+    backThunks = map(watch, map(loop(Infinity), backgroundBlock1s))
+    foreThreads = map(fork_, foreThunks)
+    backThreads = map(fork_, backThunks)
+    map(join_, foreThreads)
+    stop = true
+    map(join_, backThreads)
+    if (err != null) throw err
+}
diff --git a/jstests/libs/fun.js b/jstests/libs/fun.js
new file mode 100644
index 0000000..276f32a
--- /dev/null
+++ b/jstests/libs/fun.js
@@ -0,0 +1,32 @@
+// General high-order functions
+
+function forEach (action, array) {
+  for (var i = 0; i < array.length; i++)
+    action (array[i]);
+}
+
+function foldl (combine, base, array) {
+  for (var i = 0; i < array.length; i++)
+    base = combine (base, array[i]);
+  return base
+}
+
+function foldr (combine, base, array) {
+  for (var i = array.length - 1; i >= 0; i--)
+    base = combine (array[i], base);
+  return base
+}
+
+function map (func, array) {
+  var result = [];
+  for (var i = 0; i < array.length; i++)
+    result.push (func (array[i]));
+  return result
+}
+
+function filter (pred, array) {
+  var result = []
+  for (var i = 0; i < array.length; i++)
+    if (pred (array[i])) result.push (array[i]);
+  return result
+}
diff --git a/jstests/libs/geo_near_random.js b/jstests/libs/geo_near_random.js
new file mode 100644
index 0000000..8624ef2
--- /dev/null
+++ b/jstests/libs/geo_near_random.js
@@ -0,0 +1,78 @@
+GeoNearRandomTest = function(name) {
+    this.name = name;
+    this.t = db[name];
+    this.nPts = 0;
+
+    // reset state
+    this.t.drop();
+    Random.srand(1234);
+
+    print("starting test: " + name);
+}
+
+
+GeoNearRandomTest.prototype.mkPt = function mkPt(scale){
+    scale = scale || 1; // scale is good for staying away from edges
+    return [((Random.rand() * 359.8) - 179.9) * scale, ((Random.rand() * 180) - 90) * scale];
+}
+
+GeoNearRandomTest.prototype.insertPts = function(nPts) {
+    assert.eq(this.nPts, 0, "insertPoints already called");
+    this.nPts = nPts;
+
+    for (var i=0; i<nPts; i++){
+        this.t.insert({_id: i, loc: this.mkPt()});
+    }
+
+    this.t.ensureIndex({loc: '2d'});
+}
+
+GeoNearRandomTest.prototype.assertIsPrefix = function(short, long) {
+    for (var i=0; i < short.length; i++){
+        assert.eq(short[i], long[i]);
+    }
+} 
+
+GeoNearRandomTest.prototype.testPt = function(pt, opts) {
+    assert.neq(this.nPts, 0, "insertPoints not yet called");
+
+    opts = opts || {};
+    opts['sphere'] = opts['sphere'] || 0;
+    opts['nToTest'] = opts['nToTest'] || this.nPts; // be careful, test is O( N^2 )
+
+    print("testing point: " + tojson(pt) + " opts: " + tojson(opts));
+
+
+    var cmd = {geoNear:this.t.getName(), near: pt, num: 1, spherical:opts.sphere};
+
+    var last = db.runCommand(cmd).results;
+    for (var i=2; i <= opts.nToTest; i++){
+        //print(i); // uncomment to watch status
+        cmd.num = i
+        var ret = db.runCommand(cmd).results;
+
+        try {
+            this.assertIsPrefix(last, ret);
+        } catch (e) {
+            print("*** failed while compairing " + (i-1) + " and " + i);
+            printjson(cmd);
+            throw e; // rethrow
+        }
+
+        last = ret;
+    }
+
+    
+    if (!opts.sharded){
+        last = last.map(function(x){return x.obj});
+
+        var query = {loc:{}};
+        query.loc[ opts.sphere ? '$nearSphere' : '$near' ] = pt;
+        var near = this.t.find(query).limit(opts.nToTest).toArray();
+
+        this.assertIsPrefix(last, near);
+        assert.eq(last, near);
+    }
+}
+
+
diff --git a/jstests/libs/grid.js b/jstests/libs/grid.js
new file mode 100644
index 0000000..7aef176
--- /dev/null
+++ b/jstests/libs/grid.js
@@ -0,0 +1,172 @@
+// Grid infrastructure: Servers, ReplicaSets, ConfigSets, Shards, Routers (mongos). Convenient objects and functions on top of those in shell/servers.js -Tony
+
+load('jstests/libs/fun.js')
+load('jstests/libs/network.js')
+
+// New servers and routers take and increment port number from this.
+// A comment containing FreshPorts monad implies reading and incrementing this, IO may also read/increment this.
+var nextPort = 31000
+
+/*** Server is the spec of a mongod, ie. all its command line options.
+ To start a server call 'begin' ***/
+// new Server :: String -> FreshPorts Server
+function Server (name) {
+    this.dbpath = '/data/db/' + name + nextPort
+    this.port = nextPort++
+    this.noprealloc = ''
+    this.smallfiles = ''
+    this.rest = ''
+    this.oplogSize = 8
+}
+
+Server.prototype.addr = '127.0.0.1'
+
+// Server -> String <addr:port>
+Server.prototype.host = function() {
+    return this.addr + ':' + this.port
+}
+
+// Start a new server with this spec and return connection to it
+// Server -> IO Connection
+Server.prototype.begin = function() {
+    return startMongodEmpty(this)
+}
+
+// Stop server and remove db directory
+// Server -> IO ()
+Server.prototype.end = function() {
+    print('Stopping mongod on port ' + this.port)
+    stopMongod (this.port)
+    resetDbpath (this.dbpath)
+}
+
+// Cut server from network so it is unreachable (but still alive)
+// Requires sudo access and ipfw program (Mac OS X and BSD Unix). TODO: use iptables on Linux.
+function cutServer (conn) {
+    var addrport = parseHost (conn.host)
+    cutNetwork (addrport.port)
+}
+
+// Ensure server is connected to network (undo cutServer)
+// Requires sudo access and ipfw program (Mac OS X and BSD Unix). TODO: use iptables on Linux.
+function uncutServer (conn) {
+    var iport = parseHost (conn.host)
+    restoreNetwork (iport.port)
+}
+
+// Kill server process at other end of this connection
+function killServer (conn, _signal) {
+    var signal = _signal || 15
+    var iport = parseHost (conn.host)
+    stopMongod (iport.port, signal)
+}
+
+/*** ReplicaSet is the spec of a replica set, ie. options given to ReplicaSetTest.
+ To start a replica set call 'begin' ***/
+// new ReplicaSet :: String -> Int -> FreshPorts ReplicaSet
+function ReplicaSet (name, numServers) {
+    this.name = name
+    this.host = '127.0.0.1'
+    this.nodes = numServers
+    this.startPort = nextPort
+    this.oplogSize = 40
+    nextPort += numServers
+}
+
+// Start a replica set with this spec and return ReplSetTest, which hold connections to the servers including the master server. Call ReplicaSetTest.stopSet() to end all servers
+// ReplicaSet -> IO ReplicaSetTest
+ReplicaSet.prototype.begin = function() {
+    var rs = new ReplSetTest(this)
+    rs.startSet()
+    rs.initiate()
+    rs.awaitReplication()
+    return rs
+}
+
+// Create a new server and add it to replica set
+// ReplicaSetTest -> IO Connection
+ReplSetTest.prototype.addServer = function() {
+    var conn = this.add()
+    nextPort++
+    this.reInitiate()
+    this.awaitReplication()
+    assert.soon(function() {
+        var doc = conn.getDB('admin').isMaster()
+        return doc['ismaster'] || doc['secondary']
+    })
+    return conn
+}
+
+/*** ConfigSet is a set of specs (Servers) for sharding config servers.
+ Supply either the servers or the number of servers desired.
+ To start the config servers call 'begin' ***/
+// new ConfigSet :: [Server] or Int -> FreshPorts ConfigSet
+function ConfigSet (configSvrsOrNumSvrs) {
+    if (typeof configSvrsOrNumSvrs == 'number') {
+        this.configSvrs = []
+        for (var i = 0; i < configSvrsOrNumSvrs; i++)
+            this.configSvrs.push (new Server ('config'))
+    } else
+        this.configSvrs = configSvrs
+}
+
+// Start config servers, return list of connections to them
+// ConfigSet -> IO [Connection]
+ConfigSet.prototype.begin = function() {
+    return map (function(s) {return s.begin()}, this.configSvrs)
+}
+
+// Stop config servers
+// ConfigSet -> IO ()
+ConfigSet.prototype.end = function() {
+    return map (function(s) {return s.end()}, this.configSvrs)
+}
+
+/*** Router is the spec for a mongos, ie, its command line options.
+ To start a router (mongos) call 'begin' ***/
+// new Router :: ConfigSet -> FreshPorts Router
+function Router (configSet) {
+    this.port = nextPort++
+    this.v = 0
+    this.configdb = map (function(s) {return s.host()}, configSet.configSvrs) .join(',')
+    this.chunkSize = 1
+}
+
+// Start router (mongos) with this spec and return connection to it
+// Router -> IO Connection
+Router.prototype.begin = function() {
+    return startMongos (this)
+}
+
+// Stop router
+// Router -> IO ()
+Router.prototype.end = function() {
+    return stopMongoProgram (this.port)
+}
+
+// Add shard to config via router (mongos) connection. Shard is either a replSet name (replSet.getURL()) or single server (server.host)
+// Connection -> String -> IO ()
+function addShard (routerConn, repSetOrHostName) {
+    var ack = routerConn.getDB('admin').runCommand ({addshard: repSetOrHostName})
+    assert (ack['ok'], tojson(ack))
+}
+
+// Connection -> String -> IO ()
+function enableSharding (routerConn, dbName) {
+    var ack = routerConn.getDB('admin').runCommand ({enablesharding: dbName})
+    assert (ack['ok'], tojson(ack))
+}
+
+// Connection -> String -> String -> String -> IO ()
+function shardCollection (routerConn, dbName, collName, shardKey) {
+    var ack = routerConn.getDB('admin').runCommand ({shardcollection: dbName + '.' + collName, key: shardKey})
+    assert (ack['ok'], tojson(ack))
+}
+
+// Move db from its current primary shard to given shard. Shard is either a replSet name (replSet.getURL()) or single server (server.host)
+// Connection -> String -> String -> IO ()
+function moveDB (routerConn, dbname, repSetOrHostName) {
+    var ack = routerConn.getDB('admin').runCommand ({moveprimary: dbname, to: repSetOrHostName})
+    printjson(ack)
+    assert (ack['ok'], tojson(ack))
+}
diff --git a/jstests/libs/network.js b/jstests/libs/network.js
new file mode 100644
index 0000000..e5b33f3
--- /dev/null
+++ b/jstests/libs/network.js
@@ -0,0 +1,37 @@
+
+// Parse "127.0.0.1:300" into {addr: "127.0.0.1", port: 300},
+// and "127.0.0.1" into {addr: "127.0.0.1", port: undefined}
+function parseHost (hostString) {
+    var items = hostString.match(/(\d+.\d+.\d+.\d+)(:(\d+))?/)
+    return {addr: items[1], port: parseInt(items[3])}
+}
+
+
+/* Network traffic shaping (packet dropping) to simulate network problems
+ Currently works on BSD Unix and Mac OS X only (using ipfw).
+ Requires sudo access.
+ TODO: make it work on Linux too (using iptables). */
+
+var nextRuleNum = 100  // this grows indefinitely but can't exceed 65534, so can't call routines below indefinitely
+var portRuleNum = {}
+
+// Cut network connection to local port by dropping packets using iptables
+function cutNetwork (port) {
+    portRuleNum[port] = nextRuleNum
+    runProgram ('sudo', 'ipfw', 'add ' + nextRuleNum++ + ' deny tcp from any to any ' + port)
+    runProgram ('sudo', 'ipfw', 'add ' + nextRuleNum++ + ' deny tcp from any ' + port + ' to any')
+    //TODO: confirm it worked (since sudo may not work)
+    runProgram ('sudo', 'ipfw', 'show')
+}
+
+// Restore network connection to local port by not dropping packets using iptables
+function restoreNetwork (port) {
+    var ruleNum = portRuleNum[port]
+    if (ruleNum) {
+        runProgram ('sudo', 'ipfw', 'delete ' + ruleNum++)
+        runProgram ('sudo', 'ipfw', 'delete ' + ruleNum)
+        delete portRuleNum[port]
+    }
+    //TODO: confirm it worked (since sudo may not work)
+    runProgram ('sudo', 'ipfw', 'show')
+}
diff --git a/jstests/misc/biginsert.js b/jstests/misc/biginsert.js
new file mode 100755
index 0000000..ebbdc18
--- /dev/null
+++ b/jstests/misc/biginsert.js
@@ -0,0 +1,18 @@
+o = "xxxxxxxxxxxxxxxxxxx";
+o = o + o;
+o + o;
+o = o + o;
+o = o + o;
+o = o + o;
+
+var B = 40000;
+var last = new Date();
+for (i = 0; i < 30000000; i++) {
+    db.foo.insert({ o: o });
+    if (i % B == 0) {
+        var n = new Date();
+        print(i);
+        print("per sec: " + B*1000 / (n - last));
+        last = n;
+    }
+}
diff --git a/jstests/mr1.js b/jstests/mr1.js
index aacd69b..dc81534 100644
--- a/jstests/mr1.js
+++ b/jstests/mr1.js
@@ -49,7 +49,7 @@ r2 = function( key , values ){
     return total;
 };
 
-res = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r } );
+res = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r , out : "mr1_out" } );
 d( res );
 if ( ks == "_id" ) assert( res.ok , "not ok" );
 assert.eq( 4 , res.counts.input , "A" );
@@ -66,7 +66,7 @@ assert.eq( 3 , z.b , "E" );
 assert.eq( 3 , z.c , "F" );
 x.drop();
 
-res = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r , query : { x : { "$gt" : 2 } } } );
+res = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r , query : { x : { "$gt" : 2 } } , out : "mr1_out" } );
 d( res );
 assert.eq( 2 , res.counts.input , "B" );
 x = db[res.result];
@@ -77,7 +77,7 @@ assert.eq( 1 , z.b , "C2" );
 assert.eq( 2 , z.c , "C3" );
 x.drop();
 
-res = db.runCommand( { mapreduce : "mr1" , map : m2 , reduce : r2 , query : { x : { "$gt" : 2 } } } );
+res = db.runCommand( { mapreduce : "mr1" , map : m2 , reduce : r2 , query : { x : { "$gt" : 2 } } , out : "mr1_out" } );
 d( res );
 assert.eq( 2 , res.counts.input , "B" );
 x = db[res.result];
@@ -104,7 +104,7 @@ for ( i=5; i<1000; i++ ){
     t.save( { x : i , tags : [ "b" , "d" ] } );
 }
 
-res = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r } );
+res = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r , out : "mr1_out" } );
 d( res );
 assert.eq( 999 , res.counts.input , "Z1" );
 x = db[res.result];
@@ -125,12 +125,12 @@ assert.eq( 995 , getk( "d" ).value.count , "ZD" );
 x.drop();
 
 if ( true ){
-    printjson( db.runCommand( { mapreduce : "mr1" , map : m , reduce : r , verbose : true } ) );
+    printjson( db.runCommand( { mapreduce : "mr1" , map : m , reduce : r , verbose : true , out : "mr1_out" } ) );
 }
 
 print( "t1: " + Date.timeFunc( 
     function(){
-        var out = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r } );
+        var out = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r , out : "mr1_out" } );
         if ( ks == "_id" ) assert( out.ok , "XXX : " + tojson( out ) );
         db[out.result].drop();
     } , 10 ) + " (~500 on 2.8ghz) - itcount: " + Date.timeFunc( function(){ db.mr1.find().itcount(); } , 10 ) );    
@@ -138,7 +138,7 @@ print( "t1: " + Date.timeFunc(
 
 
 // test doesn't exist
-res = db.runCommand( { mapreduce : "lasjdlasjdlasjdjasldjalsdj12e" , map : m , reduce : r } );
+res = db.runCommand( { mapreduce : "lasjdlasjdlasjdjasldjalsdj12e" , map : m , reduce : r , out : "mr1_out" } );
 assert( ! res.ok , "should be not ok" );
 
 if ( true ){
@@ -166,11 +166,15 @@ if ( true ){
     }
     x.drop();
     
-    res = db.runCommand( { mapreduce : "mr1" , out : "mr1_foo" , map : m2 , reduce : r2 } );
+    res = db.runCommand( { mapreduce : "mr1" , out : "mr1_foo" , map : m2 , reduce : r2 , out : "mr1_out" } );
     d(res);
     print( "t3: " + res.timeMillis + " (~3500 on 2.8ghz)" );
+
+    res = db.runCommand( { mapreduce : "mr1" , map : m2 , reduce : r2 , out : { inline : true } } );
+    print( "t4: " + res.timeMillis  );
+
 }
 
 
-res = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r } );
+res = db.runCommand( { mapreduce : "mr1" , map : m , reduce : r , out : "mr1_out" } );
 assert( res.ok , "should be ok" );
diff --git a/jstests/mr2.js b/jstests/mr2.js
index 0a8e9d6..709c305 100644
--- a/jstests/mr2.js
+++ b/jstests/mr2.js
@@ -29,7 +29,12 @@ function r( who , values ){
 
 function reformat( r ){
     var x = {};
-    r.find().forEach( 
+    var cursor;
+    if ( r.results )
+        cursor = r.results;
+    else
+        cursor = r.find();
+    cursor.forEach( 
         function(z){
             x[z._id] = z.value;
         }
@@ -41,10 +46,22 @@ function f( who , res ){
     res.avg = res.totalSize / res.num;
     return res;
 }
-res = t.mapReduce( m , r , { finalize : f } );
+
+res = t.mapReduce( m , r , { finalize : f , out : "mr2_out" } );
+printjson( res )
 x = reformat( res );
-assert.eq( 9 , x.a.avg , "A" );
-assert.eq( 16 , x.b.avg , "B" );
-assert.eq( 18 , x.c.avg , "C" );
+assert.eq( 9 , x.a.avg , "A1" );
+assert.eq( 16 , x.b.avg , "A2" );
+assert.eq( 18 , x.c.avg , "A3" );
 res.drop();
 
+res = t.mapReduce( m , r , { finalize : f , out : { inline : 1 } } );
+printjson( res )
+x = reformat( res );
+assert.eq( 9 , x.a.avg , "B1" );
+assert.eq( 16 , x.b.avg , "B2" );
+assert.eq( 18 , x.c.avg , "B3" );
+res.drop();
+
+assert( ! ( "result" in res ) , "B4" )
+
diff --git a/jstests/mr3.js b/jstests/mr3.js
index e7d1f2c..3b0a918 100644
--- a/jstests/mr3.js
+++ b/jstests/mr3.js
@@ -25,7 +25,7 @@ r = function( key , values ){
     return { count : total };
 };
 
-res = t.mapReduce( m , r );
+res = t.mapReduce( m , r , { out : "mr3_out" } );
 z = res.convertToSingleObject()
 
 assert.eq( 3 , Object.keySet( z ).length , "A1" );
@@ -35,7 +35,7 @@ assert.eq( 3 , z.c.count , "A4" );
 
 res.drop();
 
-res = t.mapReduce( m , r , { mapparams : [ 2 , 2 ] } );
+res = t.mapReduce( m , r , { out : "mr3_out" , mapparams : [ 2 , 2 ] } );
 z = res.convertToSingleObject()
 
 assert.eq( 3 , Object.keySet( z ).length , "B1" );
@@ -52,7 +52,7 @@ realm = m;
 m = function(){
     emit( this._id , 1 );
 }
-res = t.mapReduce( m , r );
+res = t.mapReduce( m , r , { out : "mr3_out" } );
 res.drop();
 
 m = function(){
@@ -60,7 +60,7 @@ m = function(){
 }
 
 before = db.getCollectionNames().length;
-assert.throws( function(){ t.mapReduce( m , r ); } );
+assert.throws( function(){ t.mapReduce( m , r , { out : "mr3_out" } ); } );
 assert.eq( before , db.getCollectionNames().length , "after throw crap" );
 
 
@@ -69,5 +69,5 @@ r = function( k , v ){
     return v.x.x.x;
 }
 before = db.getCollectionNames().length;
-assert.throws( function(){ t.mapReduce( m , r ); } );
+assert.throws( function(){ t.mapReduce( m , r , "mr3_out"  ) } )
 assert.eq( before , db.getCollectionNames().length , "after throw crap" );
diff --git a/jstests/mr4.js b/jstests/mr4.js
index b14cdfe..78c8bce 100644
--- a/jstests/mr4.js
+++ b/jstests/mr4.js
@@ -23,7 +23,7 @@ r = function( key , values ){
     return { count : total };
 };
 
-res = t.mapReduce( m , r , { scope : { xx : 1 } } );
+res = t.mapReduce( m , r , { out : "mr4_out" , scope : { xx : 1 } } );
 z = res.convertToSingleObject()
 
 assert.eq( 3 , Object.keySet( z ).length , "A1" );
@@ -34,7 +34,7 @@ assert.eq( 3 , z.c.count , "A4" );
 res.drop();
 
 
-res = t.mapReduce( m , r , { scope : { xx : 2 } } );
+res = t.mapReduce( m , r , { scope : { xx : 2 } , out : "mr4_out" } );
 z = res.convertToSingleObject()
 
 assert.eq( 3 , Object.keySet( z ).length , "A1" );
diff --git a/jstests/mr5.js b/jstests/mr5.js
index bbac3fe..50a63d1 100644
--- a/jstests/mr5.js
+++ b/jstests/mr5.js
@@ -25,7 +25,7 @@ r = function( k , v ){
     return { stats : stats , total : total }
 }
 
-res = t.mapReduce( m , r , { scope : { xx : 1 } } );
+res = t.mapReduce( m , r , { out : "mr5_out" , scope : { xx : 1 } } );
 //res.find().forEach( printjson )
 
 z = res.convertToSingleObject()
@@ -44,7 +44,7 @@ m = function(){
 
 
 
-res = t.mapReduce( m , r , { scope : { xx : 1 } } );
+res = t.mapReduce( m , r , { out : "mr5_out" , scope : { xx : 1 } } );
 //res.find().forEach( printjson )
 
 z = res.convertToSingleObject()
diff --git a/jstests/mr_bigobject.js b/jstests/mr_bigobject.js
index 8224209..4466b8d 100644
--- a/jstests/mr_bigobject.js
+++ b/jstests/mr_bigobject.js
@@ -3,11 +3,11 @@ t = db.mr_bigobject
 t.drop()
 
 s = "";
-while ( s.length < ( 1024 * 1024 ) ){
+while ( s.length < ( 6 * 1024 * 1024 ) ){
     s += "asdasdasd";
 }
 
-for ( i=0; i<10; i++ )
+for ( i=0; i<5; i++ )
     t.insert( { _id : i , s : s } )
 
 m = function(){
@@ -18,13 +18,14 @@ r = function( k , v ){
     return 1;
 }
 
-assert.throws( function(){ t.mapReduce( m , r ); } , "emit should fail" )
+assert.throws( function(){ r = t.mapReduce( m , r , "mr_bigobject_out" ); } , null , "emit should fail" )
+
 
 m = function(){
     emit( 1 , this.s );
 }
 
-assert.eq( { 1 : 1 } , t.mapReduce( m , r ).convertToSingleObject() , "A1" )
+assert.eq( { 1 : 1 } , t.mapReduce( m , r , "mr_bigobject_out" ).convertToSingleObject() , "A1" )
 
 r = function( k , v ){
     total = 0;
@@ -38,4 +39,6 @@ r = function( k , v ){
     return total;
 }
 
-assert.eq( { 1 : 10 * s.length } , t.mapReduce( m , r ).convertToSingleObject() , "A1" )
+assert.eq( { 1 : t.count() * s.length } , t.mapReduce( m , r , "mr_bigobject_out" ).convertToSingleObject() , "A1" )
+
+t.drop()
diff --git a/jstests/mr_comments.js b/jstests/mr_comments.js
new file mode 100644
index 0000000..f6a0699
--- /dev/null
+++ b/jstests/mr_comments.js
@@ -0,0 +1,28 @@
+
+t = db.mr_comments
+t.drop()
+
+t.insert( { foo : 1 } )
+t.insert( { foo : 1 } )
+t.insert( { foo : 2 } )
+
+res = db.runCommand(
+    { mapreduce : "mr_comments",
+      map : "// This will fail\n\n    // Emit some stuff\n    emit(this.foo, 1)\n",
+      reduce : function(key, values){
+          return Array.sum(values);
+      },
+      out: "mr_comments_out"
+    });
+assert.eq( 3 , res.counts.emit )
+
+res = db.runCommand(
+    { mapreduce : "mr_comments",
+      map : "// This will fail\nfunction(){\n    // Emit some stuff\n    emit(this.foo, 1)\n}\n",
+      reduce : function(key, values){
+          return Array.sum(values);
+      },
+      out: "mr_comments_out"
+    });
+
+assert.eq( 3 , res.counts.emit )
diff --git a/jstests/mr_errorhandling.js b/jstests/mr_errorhandling.js
index 57724f1..c4e1137 100644
--- a/jstests/mr_errorhandling.js
+++ b/jstests/mr_errorhandling.js
@@ -24,7 +24,7 @@ r = function( k , v ){
     return total;
 }
 
-res = t.mapReduce( m_good , r );
+res = t.mapReduce( m_good , r , "mr_errorhandling_out" );
 assert.eq( { 1 : 1 , 2 : 2 , 3 : 2 , 4 : 1 } , res.convertToSingleObject() , "A" );
 res.drop()
 
@@ -32,7 +32,7 @@ res = null;
 
 theerror = null;
 try {
-    res = t.mapReduce( m_bad , r );
+    res = t.mapReduce( m_bad , r , "mr_errorhandling_out" );
 }
 catch ( e ){
     theerror = e.toString();
@@ -42,6 +42,8 @@ assert( theerror , "B2" );
 assert( theerror.indexOf( "emit" ) >= 0 , "B3" );
 
 // test things are still in an ok state
-res = t.mapReduce( m_good , r );
+res = t.mapReduce( m_good , r , "mr_errorhandling_out" );
 assert.eq( { 1 : 1 , 2 : 2 , 3 : 2 , 4 : 1 } , res.convertToSingleObject() , "A" );
 res.drop()
+
+assert.throws( function(){ t.mapReduce( m_good , r , { out : "xxx" , query : "foo" } ); } )
diff --git a/jstests/mr_index.js b/jstests/mr_index.js
new file mode 100644
index 0000000..521d44d
--- /dev/null
+++ b/jstests/mr_index.js
@@ -0,0 +1,43 @@
+
+t = db.mr_index
+t.drop()
+
+outName = "mr_index_out"
+out = db[outName]
+out.drop()
+
+t.insert( { tags : [ 1  ] } )
+t.insert( { tags : [ 1 , 2  ] } )
+t.insert( { tags : [ 1 , 2 , 3 ] } )
+t.insert( { tags : [ 3 ] } )
+t.insert( { tags : [ 2 , 3 ] } )
+t.insert( { tags : [ 2 , 3 ] } )
+t.insert( { tags : [ 1 , 2 ] } )
+
+m = function(){ 
+    for ( i=0; i<this.tags.length; i++ )
+        emit( this.tags[i] , 1 );
+}
+
+r = function( k , vs ){
+    return Array.sum( vs );
+}
+
+ex = function(){
+    return out.find().sort( { value : 1 } ).explain()
+}
+
+res = t.mapReduce(  m , r , { out : outName } )
+    
+assert.eq( "BasicCursor" , ex().cursor , "A1" )
+out.ensureIndex( { value : 1 } )
+assert.eq( "BtreeCursor value_1" , ex().cursor , "A2" )
+assert.eq( 3 , ex().n , "A3" )
+
+res = t.mapReduce(  m , r , { out : outName } )
+    
+assert.eq( "BtreeCursor value_1" , ex().cursor , "B1" )
+assert.eq( 3 , ex().n , "B2" )
+res.drop()
+
+
diff --git a/jstests/mr_index2.js b/jstests/mr_index2.js
new file mode 100644
index 0000000..a8d845e
--- /dev/null
+++ b/jstests/mr_index2.js
@@ -0,0 +1,22 @@
+
+t = db.mr_index2;
+t.drop()
+
+t.save( { arr : [1, 2] } ) 
+
+map = function() { emit(this._id, 1) } 
+reduce = function(k,vals) { return Array.sum( vals ); }
+
+res = t.mapReduce(map,reduce, { out : "mr_index2_out" , query : {} }) 
+assert.eq( 1 ,res.counts.input , "A" )
+res.drop()
+
+res = t.mapReduce(map,reduce, { out : "mr_index2_out" , query : { arr: {$gte:0} } }) 
+assert.eq( 1 ,res.counts.input , "B" )
+res.drop()
+
+t.ensureIndex({arr:1}) 
+res = t.mapReduce(map,reduce, { out : "mr_index2_out" , query : { arr: {$gte:0} } }) 
+assert.eq( 1 ,res.counts.input , "C" )
+res.drop();
+
diff --git a/jstests/mr_index3.js b/jstests/mr_index3.js
new file mode 100644
index 0000000..0607cc8
--- /dev/null
+++ b/jstests/mr_index3.js
@@ -0,0 +1,50 @@
+
+t = db.mr_index3
+t.drop(); 
+
+t.insert( { _id : 1, name : 'name1', tags : ['dog', 'cat'] } ); 
+t.insert( { _id : 2, name : 'name2', tags : ['cat'] } ); 
+t.insert( { _id : 3, name : 'name3', tags : ['mouse', 'cat', 'dog'] } ); 
+t.insert( { _id : 4, name : 'name4', tags : [] } ); 
+
+m = function(){ 
+    for ( var i=0; i<this.tags.length; i++ )
+        emit( this.tags[i] , 1 )
+}; 
+
+r = function( key , values ){ 
+    return Array.sum( values );
+}; 
+
+a1 = db.runCommand({ mapreduce : 'mr_index3', map : m, reduce : r , out : { inline : true } } ).results
+a2 = db.runCommand({ mapreduce : 'mr_index3', map : m, reduce : r, query: {name : 'name1'} , out : { inline : true }}).results
+a3 = db.runCommand({ mapreduce : 'mr_index3', map : m, reduce : r, query: {name : {$gt:'name'} } , out : { inline : true }}).results
+
+assert.eq( [
+    {
+	"_id" : "cat",
+	"value" : 3
+    },
+    {
+	"_id" : "dog",
+	"value" : 2
+    },
+    {
+	"_id" : "mouse",
+	"value" : 1
+    }
+] , a1 , "A1" );
+assert.eq( [ { "_id" : "cat", "value" : 1 }, { "_id" : "dog", "value" : 1 } ] , a2 , "A2" )
+assert.eq( a1 , a3 , "A3" )
+
+t.ensureIndex({name:1, tags:1}); 
+
+b1 = db.runCommand({ mapreduce : 'mr_index3', map : m, reduce : r , out : { inline : true } } ).results
+b2 = db.runCommand({ mapreduce : 'mr_index3', map : m, reduce : r, query: {name : 'name1'} , out : { inline : true }}).results
+b3 = db.runCommand({ mapreduce : 'mr_index3', map : m, reduce : r, query: {name : {$gt:'name'} } , out : { inline : true }}).results
+
+assert.eq( a1 , b1 , "AB1" )
+assert.eq( a2 , b2 , "AB2" )
+assert.eq( a3 , b3 , "AB3" )
+
+
diff --git a/jstests/mr_killop.js b/jstests/mr_killop.js
new file mode 100644
index 0000000..899997d
--- /dev/null
+++ b/jstests/mr_killop.js
@@ -0,0 +1,127 @@
+t = db.jstests_mr_killop;
+t.drop();
+t2 = db.jstests_mr_killop_out;
+t2.drop();
+
+if ( typeof _threadInject == "undefined" ) { // don't run in v8 mode - SERVER-1900 
+
+    function debug( x ) {
+//        printjson( x );
+    }
+
+    /** @return op code for map reduce op created by spawned shell, or that op's child */
+    function op( where ) {
+        p = db.currentOp().inprog;
+        debug( p );
+        for ( var i in p ) {
+            var o = p[ i ];
+            if ( where ) {
+                if ( o.active && o.ns == "test.jstests_mr_killop" && o.query && o.query.$where ) {
+                    return o.opid;
+                }
+            } else {
+                if ( o.active && o.query && o.query.mapreduce && o.query.mapreduce == "jstests_mr_killop" ) {
+                    return o.opid;
+                }
+            }
+        }
+        return -1;
+    }
+
+    /**
+ * Run one map reduce with the specified parameters in a parallel shell, kill the
+ * map reduce op or its child op with killOp, and wait for the map reduce op to
+ * terminate.
+ * @where - if true, a count $where op is killed rather than the map reduce op.
+ * This is necessay for a child count $where of a map reduce op because child
+ * ops currently mask parent ops in currentOp.
+ */
+    function testOne( map, reduce, finalize, scope, where, wait ) {
+        t.drop();
+        t2.drop();
+        // Ensure we have 2 documents for the reduce to run
+        t.save( {a:1} );
+        t.save( {a:1} );
+        db.getLastError();
+                
+        spec =
+            {
+                mapreduce:"jstests_mr_killop",
+                out:"jstests_mr_killop_out",
+                map: map,
+                reduce: reduce
+            };
+        if ( finalize ) {
+            spec[ "finalize" ] = finalize;
+        }
+        if ( scope ) {
+            spec[ "scope" ] = scope;
+        }
+
+        // Windows shell strips all double quotes from command line, so use
+        // single quotes.
+        stringifiedSpec = tojson( spec ).toString().replace( /\n/g, ' ' ).replace( /\"/g, "\'" );
+        
+        // The assert below won't be caught by this test script, but it will cause error messages
+        // to be printed.
+        s = startParallelShell( "assert.commandWorked( db.runCommand( " + stringifiedSpec + " ) );" );
+        
+        if ( wait ) {
+            sleep( 2000 );
+        }
+        
+        o = null;
+        assert.soon( function() { o = op( where ); return o != -1 } );
+
+        db.killOp( o );
+        debug( "did kill" );
+        
+        // When the map reduce op is killed, the spawned shell will exit
+        s();
+        debug( "parallel shell completed" );
+        
+        assert.eq( -1, op( where ) );
+    }
+
+    /** Test using wait and non wait modes */
+    function test( map, reduce, finalize, scope, where ) {
+        testOne( map, reduce, finalize, scope, where, false );
+        testOne( map, reduce, finalize, scope, where, true );
+    }
+
+    /** Test looping in map and reduce functions */
+    function runMRTests( loop, where ) {
+        test( loop, function( k, v ) { return v[ 0 ]; }, null, null, where );
+        test( function() { emit( this.a, 1 ); }, loop, null, null, where );
+        test( function() { loop(); }, function( k, v ) { return v[ 0 ] }, null, { loop: loop }, where );
+    }
+
+    /** Test looping in finalize function */
+    function runFinalizeTests( loop, where ) {
+        test( function() { emit( this.a, 1 ); }, function( k, v ) { return v[ 0 ] }, loop, null, where );
+        test( function() { emit( this.a, 1 ); }, function( k, v ) { return v[ 0 ] }, function( a, b ) { loop() }, { loop: loop }, where );
+    }
+
+    var loop = function() {
+        while( 1 ) {
+            ;
+        }
+    }
+    runMRTests( loop, false );
+    runFinalizeTests( loop, false );
+
+    var loop = function() {
+        while( 1 ) {
+            db.jstests_mr_killop.count( { a:1 } );
+        }
+    }
+    runMRTests( loop, false );
+    // db can't be accessed from finalize() so not running that test
+
+    /** Test that we can kill the child op of a map reduce op */
+    var loop = function() {
+        db.jstests_mr_killop.find( {$where:function() { while( 1 ) { ; } }} ).toArray();
+    }
+    runMRTests( loop, true );
+
+}
diff --git a/jstests/mr_merge.js b/jstests/mr_merge.js
new file mode 100644
index 0000000..c008ebb
--- /dev/null
+++ b/jstests/mr_merge.js
@@ -0,0 +1,51 @@
+
+t = db.mr_merge;
+t.drop();
+
+t.insert( { a : [ 1 , 2 ] } )
+t.insert( { a : [ 2 , 3 ] } )
+t.insert( { a : [ 3 , 4 ] } )
+
+outName = "mr_merge_out";
+out = db[outName];
+out.drop();
+
+m = function(){ for (i=0; i<this.a.length; i++ ) emit( this.a[i] , 1 ); } 
+r = function(k,vs){ return Array.sum( vs ); }
+
+function tos( o ){
+    var s = "";
+    for ( var i=0; i<100; i++ ){
+        if ( o[i] )
+            s += i + "_" + o[i];
+    }
+    return s;
+}
+
+
+res = t.mapReduce( m , r , { out : outName } )
+
+
+expected = { "1" : 1 , "2" : 2 , "3" : 2 , "4" : 1 }
+assert.eq( tos( expected ) , tos( res.convertToSingleObject() ) , "A" );
+
+t.insert( { a : [ 4 , 5 ] } )
+out.insert( { _id : 10 , value : "5" } )
+res = t.mapReduce( m , r , { out : outName } )
+
+expected["4"]++;
+expected["5"] = 1
+assert.eq( tos( expected ) , tos( res.convertToSingleObject() ) , "B" );
+
+t.insert( { a : [ 5 , 6 ] } )
+out.insert( { _id : 10 , value : "5" } )
+res = t.mapReduce( m , r , { out : { merge : outName } } )
+
+expected["5"]++;
+expected["10"] = 5
+expected["6"] = 1
+
+assert.eq( tos( expected ) , tos( res.convertToSingleObject() ) , "C" );
+
+
+
diff --git a/jstests/mr_optim.js b/jstests/mr_optim.js
new file mode 100644
index 0000000..30ab602
--- /dev/null
+++ b/jstests/mr_optim.js
@@ -0,0 +1,47 @@
+
+
+t = db.mr_optim;
+t.drop();
+
+for (var i = 0; i < 1000; ++i) {
+    t.save( {a: Math.random(1000), b: Math.random(10000)} );
+}
+
+function m(){
+    emit(this._id, 13);
+}
+
+function r( key , values ){
+    return "bad";
+}
+
+function reformat( r ){
+    var x = {};
+    var cursor;
+    if ( r.results )
+        cursor = r.results;
+    else
+        cursor = r.find();
+    cursor.forEach( 
+        function(z){
+            x[z._id] = z.value;
+        }
+    );
+    return x;
+}
+
+res = t.mapReduce( m , r , { out : "mr_optim_out" } );
+printjson( res )
+x = reformat( res );
+for (var key in x) {
+    assert.eq(x[key], 13, "value is not equal to original, maybe reduce has run");
+}
+res.drop();
+
+res = t.mapReduce( m , r , { out : { inline : 1 } } );
+//printjson( res )
+x2 = reformat( res );
+res.drop();
+
+assert.eq(x, x2, "object from inline and collection are not equal")
+
diff --git a/jstests/mr_outreduce.js b/jstests/mr_outreduce.js
new file mode 100644
index 0000000..87cba98
--- /dev/null
+++ b/jstests/mr_outreduce.js
@@ -0,0 +1,41 @@
+
+t = db.mr_outreduce;
+t.drop();
+
+t.insert( { _id : 1 , a : [ 1 , 2 ] } )
+t.insert( { _id : 2 , a : [ 2 , 3 ] } )
+t.insert( { _id : 3 , a : [ 3 , 4 ] } )
+
+outName = "mr_outreduce_out";
+out = db[outName];
+out.drop();
+
+m = function(){ for (i=0; i<this.a.length; i++ ) emit( this.a[i] , 1 ); } 
+r = function(k,vs){ return Array.sum( vs ); }
+
+function tos( o ){
+    var s = "";
+    for ( var i=0; i<100; i++ ){
+        if ( o[i] )
+            s += i + "_" + o[i] + "|"
+    }
+    return s;
+}
+
+
+res = t.mapReduce( m , r , { out : outName } )
+
+
+expected = { "1" : 1 , "2" : 2 , "3" : 2 , "4" : 1 }
+assert.eq( tos( expected ) , tos( res.convertToSingleObject() ) , "A" );
+
+t.insert( { _id : 4 , a : [ 4 , 5 ] } )
+out.insert( { _id : 10 , value : "5" } ) // this is a sentinal to make sure it wasn't killed
+res = t.mapReduce( m , r , { out : { reduce : outName } , query : { _id : { $gt : 3 } } } )
+
+expected["4"]++;
+expected["5"] = 1
+expected["10"] = 5
+assert.eq( tos( expected ) , tos( res.convertToSingleObject() ) , "B" );
+
+
diff --git a/jstests/mr_outreduce2.js b/jstests/mr_outreduce2.js
new file mode 100644
index 0000000..fc27363
--- /dev/null
+++ b/jstests/mr_outreduce2.js
@@ -0,0 +1,27 @@
+
+normal = "mr_outreduce2"
+out = normal + "_out"
+
+t = db[normal]
+t.drop();
+
+db[out].drop()
+
+t.insert( { _id : 1 , x : 1 } )
+t.insert( { _id : 2 , x : 1 } )
+t.insert( { _id : 3 , x : 2 } )
+
+m = function(){ emit( this.x , 1 ); }
+r = function(k,v){ return Array.sum( v ); }
+
+res = t.mapReduce( m , r , { out : { reduce : out } , query : { _id : { $gt : 0 } } } )
+
+assert.eq( 2 , db[out].findOne( { _id : 1 } ).value , "A1" )
+assert.eq( 1 , db[out].findOne( { _id : 2 } ).value , "A2" )
+
+
+t.insert( { _id : 4 , x : 2 } )
+res = t.mapReduce( m , r , { out : { reduce : out } , query : { _id : { $gt : 3 } } , finalize : null } )
+
+assert.eq( 2 , db[out].findOne( { _id : 1 } ).value , "B1" )
+assert.eq( 2 , db[out].findOne( { _id : 2 } ).value , "B2" )
diff --git a/jstests/mr_replaceIntoDB.js b/jstests/mr_replaceIntoDB.js
new file mode 100644
index 0000000..217f407
--- /dev/null
+++ b/jstests/mr_replaceIntoDB.js
@@ -0,0 +1,45 @@
+
+t = db.mr_replace;
+t.drop();
+
+t.insert( { a : [ 1 , 2 ] } )
+t.insert( { a : [ 2 , 3 ] } )
+t.insert( { a : [ 3 , 4 ] } )
+
+outCollStr = "mr_replace_col";
+outDbStr = "mr_db";
+
+m = function(){ for (i=0; i<this.a.length; i++ ) emit( this.a[i] , 1 ); } 
+r = function(k,vs){ return Array.sum( vs ); }
+
+function tos( o ){
+    var s = "";
+    for ( var i=0; i<100; i++ ){
+        if ( o[i] )
+            s += i + "_" + o[i];
+    }
+    return s;
+}
+
+print("Testing mr replace into other DB")
+res = t.mapReduce( m , r , { out : { replace: outCollStr, db: outDbStr } } )
+printjson( res );
+expected = { "1" : 1 , "2" : 2 , "3" : 2 , "4" : 1 };
+outDb = db.getMongo().getDB(outDbStr);
+outColl = outDb[outCollStr];
+str = tos( outColl.convertToSingleObject("value") )
+print("Received result: " + str);
+assert.eq( tos( expected ) , str , "A Received wrong result " + str );
+
+print("checking result field");
+assert.eq(res.result.collection, outCollStr, "B1 Wrong collection " + res.result.collection)
+assert.eq(res.result.db, outDbStr, "B2 Wrong db " + res.result.db)
+
+print("Replace again and check");
+outColl.save({_id: "5", value : 1});
+t.mapReduce( m , r , { out : { replace: outCollStr, db: outDbStr } } )
+str = tos( outColl.convertToSingleObject("value") )
+print("Received result: " + str);
+assert.eq( tos( expected ) , str , "C1 Received wrong result " + str );
+
+
diff --git a/jstests/mr_sort.js b/jstests/mr_sort.js
index 7692062..cc8db18 100644
--- a/jstests/mr_sort.js
+++ b/jstests/mr_sort.js
@@ -24,17 +24,17 @@ r = function( k , v ){
 }
 
 
-res = t.mapReduce( m , r );
+res = t.mapReduce( m , r , "mr_sort_out " );
 x = res.convertToSingleObject();
 res.drop();
 assert.eq( { "a" : 55 } , x , "A1" )
 
-res = t.mapReduce( m , r , { query : { x : { $lt : 3 } } } )
+res = t.mapReduce( m , r , { out : "mr_sort_out" , query : { x : { $lt : 3 } } } )
 x = res.convertToSingleObject();
 res.drop();
 assert.eq( { "a" : 3 } , x , "A2" )
 
-res = t.mapReduce( m , r , { sort : { x : 1 } , limit : 2 } );
+res = t.mapReduce( m , r , { out : "mr_sort_out" , sort : { x : 1 } , limit : 2 } );
 x = res.convertToSingleObject();
 res.drop();
 assert.eq( { "a" : 3 } , x , "A3" )
diff --git a/jstests/multiClient/rsDurKillRestart1.js b/jstests/multiClient/rsDurKillRestart1.js
new file mode 100644
index 0000000..03e658b
--- /dev/null
+++ b/jstests/multiClient/rsDurKillRestart1.js
@@ -0,0 +1,139 @@
+/* NOTE: This test requires mongo shell to be built with V8 javascript engines so
+fork() is available */
+
+/*
+1. Starts up a replica set with 2 servers and 1 arbiter, all with --dur option. 
+2. Loads 10000 1K docs into a collection 
+3. Forks 5 client threads, each $pushes then $pulls its own id to/from the same array in all document (multi-update) 
+5. A 6th thread kills a random server in the replica set every 0-60 secs then restarts it 0-30 secs later. 
+-Tony */
+
+load('jstests/libs/fun.js')
+load('jstests/libs/concurrent.js')
+
+function random(n) {
+    return Math.floor(Math.random() * n)
+}
+
+function makeText(size) {
+    var text = ''
+    for (var i = 0; i < size; i++) text += 'a'
+    return text
+}
+
+function checkEqual (value, expected) {
+    if (value != expected) throw ('expected ' + expected + ' got ' + value)
+}
+
+function deploy() {
+    var rs = new ReplSetTest({nodes: 3, oplogSize: 1000})
+    rs.startSet({dur: null})
+    var cfg = rs.getReplSetConfig()
+    cfg.members[2]['arbiterOnly'] = true
+    rs.initiate(cfg)
+    rs.awaitReplication()
+    return rs
+}
+
+function confirmWrite(db) {
+    var cmd = {getlasterror: 1, fsync: true, w: 2}
+    var res = db.runCommand(cmd)
+    if (! res.ok) throw (tojson(cmd) + 'failed: ' + tojson(res))
+}
+
+N = 10000
+Text = makeText(1000)
+
+function loadInitialData(rs) {
+    var db = rs.getMaster().getDB('test')
+    for (var i = 0; i < N; i++) db['col'].insert({x: i, text: Text})
+    confirmWrite(db)
+}
+
+function newMasterConnection(ports) {
+    for (var i = 0; i < ports.length; i++) {
+      try {
+       print ('Try connect to '+ i)
+        var conn = new Mongo("127.0.0.1:" + ports[i])
+        var rec = conn.getDB('admin').runCommand({ismaster: 1})
+        if (rec && rec.ok && rec['ismaster']) {
+         print ('Connected ' + i)
+            return conn }
+        // else close conn
+      } catch(e) {}
+    }
+    throw 'no master: ' + ports
+}
+
+function rsMaster(ports, oldConn) {
+    try {
+        var rec = oldConn.getDB('admin').runCommand({ismaster: 1})
+        if (rec['ismaster']) return oldConn
+    } catch (e) {}
+    return newMasterConnection(ports)
+}
+
+function queryAndUpdateData(ports) {return function(z) {
+    var conn = null
+    return function(i) {
+      function printFailure(e) {print ('Q&U' + z + '-' + i + ': ' + e)}
+      try {
+        sleep(1000 + (z * 500))
+        print('update ' + z + ' round ' + i)
+        var db
+        try {
+            conn = rsMaster(ports, conn)
+            db = conn.getDB('test')
+        } catch (e) {
+            printFailure(e)
+            return
+        }
+        var n
+        try {
+            db['col'].update({}, {$push: {'z': z}}, false, true)
+            n = db['col'].count({'z': z})
+        } catch (e) {
+            printFailure(e)
+            return
+        }
+        checkEqual (n, N)
+        sleep(1000)
+        try {
+            db['col'].update({}, {$pull: {'z': z}}, false, true)
+            n = db['col'].count({'z': z})
+        } catch (e) {
+            printFailure(e)
+            return
+        }
+        checkEqual (n, 0)
+      } catch (e) {throw ('(Q&U' + z + '-' + i + ') ' + e)}
+    }
+}}
+
+function killer(rs) {return function(i) {
+  try {
+    sleep(random(30) * 1000)
+    var r = random(rs.ports.length - 1)
+    print('Killing ' + r)
+    stopMongod(rs.getPort(r), 9)  // hard kill
+    sleep(random(30) * 1000)
+    print('Restarting ' + r)
+    rs.restart(r, {dur: null})
+  } catch (e) {throw ('(Killer-' + i + ') ' + e)}
+}}
+
+function rsPorts(rs) {
+    ports = new Array()
+    for (var i = 0; i < rs.ports.length; i++) ports[i] = rs.getPort(i)
+    return ports
+}
+
+function go(numRounds) {
+    var rs = deploy()
+    loadInitialData(rs)
+    var jobs = map(queryAndUpdateData(rsPorts(rs)), [1,2,3,4,5])
+    parallel (numRounds, jobs, [killer(rs)])
+    sleep (2000)
+    rs.stopSet()
+    print("rsDurKillRestart1.js SUCCESS")
+}
diff --git a/jstests/ne2.js b/jstests/ne2.js
new file mode 100644
index 0000000..89e99aa
--- /dev/null
+++ b/jstests/ne2.js
@@ -0,0 +1,21 @@
+// check that we don't scan $ne values
+
+t = db.jstests_ne2;
+t.drop();
+t.ensureIndex( {a:1} );
+
+t.save( { a:-0.5 } );
+t.save( { a:0 } );
+t.save( { a:0 } );
+t.save( { a:0.5 } );
+
+e = t.find( { a: { $ne: 0 } } ).explain( true );
+assert.eq( "BtreeCursor a_1 multi", e.cursor );
+assert.eq( 0, e.indexBounds.a[ 0 ][ 1 ] );
+assert.eq( 0, e.indexBounds.a[ 1 ][ 0 ] );
+assert.eq( 3, e.nscanned );
+
+e = t.find( { a: { $gt: -1, $lt: 1, $ne: 0 } } ).explain();
+assert.eq( "BtreeCursor a_1 multi", e.cursor );
+assert.eq( { a: [ [ -1, 0 ], [ 0, 1 ] ] }, e.indexBounds );
+assert.eq( 3, e.nscanned );
diff --git a/jstests/ne3.js b/jstests/ne3.js
new file mode 100644
index 0000000..489f47a
--- /dev/null
+++ b/jstests/ne3.js
@@ -0,0 +1,12 @@
+// don't allow most operators with regex
+
+t = db.jstests_ne3;
+t.drop();
+
+assert.throws( function() { t.findOne( { t: { $ne: /a/ } } ); } );
+assert.throws( function() { t.findOne( { t: { $gt: /a/ } } ); } );
+assert.throws( function() { t.findOne( { t: { $gte: /a/ } } ); } );
+assert.throws( function() { t.findOne( { t: { $lt: /a/ } } ); } );
+assert.throws( function() { t.findOne( { t: { $lte: /a/ } } ); } );
+
+assert.eq( 0, t.count( { t: { $in: [ /a/ ] } } ) );
\ No newline at end of file
diff --git a/jstests/not2.js b/jstests/not2.js
index dcd4535..b588ebd 100644
--- a/jstests/not2.js
+++ b/jstests/not2.js
@@ -97,7 +97,7 @@ indexed = function( query, min, max ) {
         assert.eq( exp.indexBounds[ i ][0][0], min );        
     }
     for( i in exp.indexBounds ) {
-        assert.eq( exp.indexBounds[ i ][0][1], max );
+        assert.eq( exp.indexBounds[ i ][exp.indexBounds[ i ].length - 1][1], max );
     }
 }
 
@@ -109,7 +109,7 @@ not = function( query ) {
 }
 
 indexed( {i:1}, 1, 1 );
-not( {i:{$ne:1}} );
+indexed( {i:{$ne:1}}, {$minElement:1}, {$maxElement:1} );
 
 indexed( {i:{$not:{$ne:"a"}}}, "a", "a" );
 not( {i:{$not:/^a/}} );
@@ -138,5 +138,6 @@ not( {i:{$not:{$in:[1]}}} );
 t.drop();
 t.ensureIndex( {"i.j":1} );
 indexed( {i:{$elemMatch:{j:1}}}, 1, 1 );
+//indexed( {i:{$not:{$elemMatch:{j:1}}}}, {$minElement:1}, {$maxElement:1} );
 not( {i:{$not:{$elemMatch:{j:1}}}} );
 indexed( {i:{$not:{$elemMatch:{j:{$ne:1}}}}}, 1, 1 );
diff --git a/jstests/notablescan.js b/jstests/notablescan.js
new file mode 100644
index 0000000..2e8cb0c
--- /dev/null
+++ b/jstests/notablescan.js
@@ -0,0 +1,22 @@
+// check notablescan mode
+
+t = db.test_notablescan;
+t.drop();
+
+try {
+    assert.commandWorked( db._adminCommand( { setParameter:1, notablescan:true } ) );
+    // commented lines are SERVER-2222
+//    assert.throws( function() { t.find( {a:1} ).toArray(); } );
+    t.save( {a:1} );
+//    assert.throws( function() { t.count( {a:1} ); } );
+//    assert.throws( function() { t.find( {} ).toArray(); } );
+    assert.throws( function() { t.find( {a:1} ).toArray(); } );
+    assert.throws( function() { t.find( {a:1} ).hint( {$natural:1} ).toArray(); } );
+    t.ensureIndex( {a:1} );
+    assert.eq( 0, t.find( {a:1,b:1} ).itcount() );    
+    assert.eq( 1, t.find( {a:1,b:null} ).itcount() );    
+} finally {
+    // We assume notablescan was false before this test started and restore that
+    // expected value.
+    assert.commandWorked( db._adminCommand( { setParameter:1, notablescan:false } ) );
+}
diff --git a/jstests/objid5.js b/jstests/objid5.js
index 9a26839..f85ebc8 100644
--- a/jstests/objid5.js
+++ b/jstests/objid5.js
@@ -6,13 +6,14 @@ t.save( { _id : 5.5 } );
 assert.eq( 18 , Object.bsonsize( t.findOne() ) , "A" );
 
 x = db.runCommand( { features : 1 } )
-y = db.runCommand( { features : 1 , oidReset : 1 } )
-
-if( !x.ok )
+y = db.runCommand( { features : 1 , oidReset : 1 } )
+
+if( !x.ok )
     print("x: " + tojson(x));
 
 assert( x.oidMachine , "B1" )
 assert.neq( x.oidMachine , y.oidMachine , "B2" )
 assert.eq( x.oidMachine , y.oidMachineOld , "B3" )
 
-assert.eq( 18 , Object.bsonsize( { _id : 7.7 } ) , "C" )
+assert.eq( 18 , Object.bsonsize( { _id : 7.7 } ) , "C1" )
+assert.eq( 0 , Object.bsonsize( null ) , "C2" )
diff --git a/jstests/or4.js b/jstests/or4.js
index af8704b..f793f36 100644
--- a/jstests/or4.js
+++ b/jstests/or4.js
@@ -1,6 +1,13 @@
 t = db.jstests_or4;
 t.drop();
 
+// v8 does not have a builtin Array.sort
+if (!Array.sort) {
+  Array.sort = function(arr) {
+    return arr.sort();
+  };
+}
+
 checkArrs = function( a, b ) {
     m = "[" + a + "] != [" + b + "]";
     a = eval( a );
@@ -66,10 +73,10 @@ assert.eq.automsg( "4", "t.find( {$or:[{a:2},{b:3}]} ).snapshot().toArray().leng
 t.save( {a:1,b:3} );
 assert.eq.automsg( "4", "t.find( {$or:[{a:2},{b:3}]} ).batchSize(-4).toArray().length" );
 
-assert.eq.automsg( "[1,2]", "t.distinct( 'a', {$or:[{a:2},{b:3}]} )" );
+assert.eq.automsg( "[1,2]", "Array.sort( t.distinct( 'a', {$or:[{a:2},{b:3}]} ) )" );
 
 assert.eq.automsg( "[{a:2},{a:null},{a:1}]", "t.group( {key:{a:1}, cond:{$or:[{a:2},{b:3}]}, reduce:function( x, y ) { }, initial:{} } )" );
-assert.eq.automsg( "5", "t.mapReduce( function() { emit( 'a', this.a ); }, function( key, vals ) { return vals.length; }, {query:{$or:[{a:2},{b:3}]}} ).counts.input" );
+assert.eq.automsg( "5", "t.mapReduce( function() { emit( 'a', this.a ); }, function( key, vals ) { return vals.length; }, {out:{inline:true},query:{$or:[{a:2},{b:3}]}} ).counts.input" );
 
 explain = t.find( {$or:[{a:2},{b:3}]} ).explain();
 assert.eq.automsg( "2", "explain.clauses.length" );
diff --git a/jstests/or6.js b/jstests/or6.js
index 3800c78..097965b 100644
--- a/jstests/or6.js
+++ b/jstests/or6.js
@@ -7,8 +7,8 @@ assert.eq.automsg( "2", "t.find( {$or:[{a:{$gt:2}},{a:{$gt:0}}]} ).explain().cla
 assert.eq.automsg( "2", "t.find( {$or:[{a:{$lt:2}},{a:{$lt:4}}]} ).explain().clauses[ 1 ].indexBounds.a[ 0 ][ 0 ]" );
 
 assert.eq.automsg( "2", "t.find( {$or:[{a:{$gt:2,$lt:10}},{a:{$gt:0,$lt:5}}]} ).explain().clauses[ 1 ].indexBounds.a[ 0 ][ 1 ]" );
-assert.eq.automsg( "0", "t.find( {$or:[{a:{$gt:2,$lt:10}},{a:{$gt:0,$lt:15}}]} ).explain().clauses[ 1 ].indexBounds.a[ 0 ][ 0 ]" );
-assert.eq.automsg( "15", "t.find( {$or:[{a:{$gt:2,$lt:10}},{a:{$gt:0,$lt:15}}]} ).explain().clauses[ 1 ].indexBounds.a[ 0 ][ 1 ]" );
+
+assert.eq( [ [ 0, 2 ], [ 10, 15 ] ], t.find( {$or:[{a:{$gt:2,$lt:10}},{a:{$gt:0,$lt:15}}]} ).explain().clauses[ 1 ].indexBounds.a );
 
 // no separate clauses
 assert.eq.automsg( "null", "t.find( {$or:[{a:{$gt:2,$lt:10}},{a:{$gt:3,$lt:5}}]} ).explain().clauses" );
@@ -19,6 +19,8 @@ assert.eq.automsg( "null", "t.find( {$or:[{a:1},{b:2}]} ).hint( {a:1} ).explain(
 assert.eq.automsg( "2", "t.find( {$or:[{a:1},{a:3}]} ).hint( {a:1} ).explain().clauses.length" );
 assert.eq.automsg( "'BasicCursor'", "t.find( {$or:[{a:1},{a:3}]} ).hint( {$natural:1} ).explain().cursor" );
 
+assert.eq( null, t.find( {$or:[{a:{$gt:1,$lt:5},b:6}, {a:3,b:{$gt:0,$lt:10}}]} ).explain().clauses );
+
 t.ensureIndex( {b:1} );
 assert.eq.automsg( "2", "t.find( {$or:[{a:1,b:5},{a:3,b:5}]} ).hint( {a:1} ).explain().clauses.length" );
 
@@ -28,4 +30,10 @@ t.ensureIndex( {a:1,b:1} );
 assert.eq.automsg( "2", "t.find( {$or:[{a:{$in:[1,2]},b:5}, {a:2,b:6}]} ).explain().clauses.length" );
 assert.eq.automsg( "2", "t.find( {$or:[{a:{$gt:1,$lte:2},b:5}, {a:2,b:6}]} ).explain().clauses.length" );
 assert.eq.automsg( "2", "t.find( {$or:[{a:{$gt:1,$lte:3},b:5}, {a:2,b:6}]} ).explain().clauses.length" );
-assert.eq.automsg( "null", "t.find( {$or:[{a:{$in:[1,2]}}, {a:2}]} ).explain().clauses" );
\ No newline at end of file
+assert.eq.automsg( "null", "t.find( {$or:[{a:{$in:[1,2]}}, {a:2}]} ).explain().clauses" );
+
+assert.eq( null, t.find( {$or:[{a:{$gt:1,$lt:5},b:{$gt:0,$lt:3},c:6}, {a:3,b:{$gt:1,$lt:2},c:{$gt:0,$lt:10}}]} ).explain().clauses );
+assert.eq( null, t.find( {$or:[{a:{$gt:1,$lt:5},c:6}, {a:3,b:{$gt:1,$lt:2},c:{$gt:0,$lt:10}}]} ).explain().clauses );
+exp = t.find( {$or:[{a:{$gt:1,$lt:5},b:{$gt:0,$lt:3},c:6}, {a:3,b:{$gt:1,$lt:4},c:{$gt:0,$lt:10}}]} ).explain();
+assert.eq( 3, exp.clauses[ 1 ].indexBounds.b[ 0 ][ 0 ] );
+assert.eq( 4, exp.clauses[ 1 ].indexBounds.b[ 0 ][ 1 ] );
diff --git a/jstests/orc.js b/jstests/orc.js
new file mode 100644
index 0000000..0ea7f19
--- /dev/null
+++ b/jstests/orc.js
@@ -0,0 +1,29 @@
+// test that or duplicates are dropped in certain special cases
+t = db.jstests_orc;
+t.drop();
+
+// The goal here will be to ensure the full range of valid values is scanned for each or clause, in order to ensure that
+// duplicates are eliminated properly in the cases below when field range elimination is not employed.  The deduplication
+// of interest will occur on field a.  The range specifications for fields b and c are such that (in the current
+// implementation) field range elimination will not occur between the or clauses, meaning that the full range of valid values
+// will be scanned for each clause and deduplication will be forced.
+
+// NOTE This test uses some tricks to avoid or range elimination, but in future implementations these tricks may not apply.
+// Perhaps it would be worthwhile to create a mode where range elimination is disabled so it will be possible to write a more
+// robust test.
+
+t.ensureIndex( {a:-1,b:1,c:1} );
+
+// sanity test
+t.save( {a:null,b:4,c:4} );
+assert.eq( 1, t.count( {$or:[{a:null,b:{$gte:0,$lte:5},c:{$gte:0,$lte:5}},{a:null,b:{$gte:3,$lte:8},c:{$gte:3,$lte:8}}]} ) );
+
+// from here on is SERVER-2245
+t.remove();
+t.save( {b:4,c:4} );
+assert.eq( 1, t.count( {$or:[{a:null,b:{$gte:0,$lte:5},c:{$gte:0,$lte:5}},{a:null,b:{$gte:3,$lte:8},c:{$gte:3,$lte:8}}]} ) );
+
+//t.remove();
+//t.save( {a:[],b:4,c:4} );
+//printjson( t.find( {$or:[{a:[],b:{$gte:0,$lte:5},c:{$gte:0,$lte:5}},{a:[],b:{$gte:3,$lte:8},c:{$gte:3,$lte:8}}]} ).explain() );
+//assert.eq( 1, t.count( {$or:[{a:[],b:{$gte:0,$lte:5},c:{$gte:0,$lte:5}},{a:[],b:{$gte:3,$lte:8},c:{$gte:3,$lte:8}}]} ) );
diff --git a/jstests/ord.js b/jstests/ord.js
new file mode 100644
index 0000000..4612f21
--- /dev/null
+++ b/jstests/ord.js
@@ -0,0 +1,34 @@
+// check that we don't crash if an index used by an earlier or clause is dropped
+
+// Dropping an index kills all cursors on the indexed namespace, not just those 
+// cursors using the dropped index.  This test is to serve as a reminder that
+// the $or implementation may need minor adjustments (memory ownership) if this
+// behavior is changed.
+
+t = db.jstests_ord;
+t.drop();
+
+t.ensureIndex( {a:1} );
+t.ensureIndex( {b:1} );
+
+for( i = 0; i < 80; ++i ) {
+    t.save( {a:1} );
+}
+
+for( i = 0; i < 100; ++i ) {
+    t.save( {b:1} );
+}
+
+c = t.find( { $or: [ {a:1}, {b:1} ] } ).batchSize( 100 );
+for( i = 0; i < 90; ++i ) {
+    c.next();
+}
+// At this point, our initial query has ended and there is a client cursor waiting
+// to read additional documents from index {b:1}.  Deduping is performed against
+// the index key {a:1}
+
+t.dropIndex( {a:1} );
+
+// Dropping an index kills all cursors on the indexed namespace, not just those 
+// cursors using the dropped index.
+assert.throws( c.next() );
diff --git a/jstests/ore.js b/jstests/ore.js
new file mode 100644
index 0000000..3c105c1
--- /dev/null
+++ b/jstests/ore.js
@@ -0,0 +1,13 @@
+// verify that index direction is considered when deduping based on an earlier
+// index
+
+t = db.jstests_ore;
+t.drop();
+
+t.ensureIndex( {a:-1} )
+t.ensureIndex( {b:1} );
+
+t.save( {a:1,b:1} );
+t.save( {a:2,b:1} );
+
+assert.eq( 2, t.count( {$or:[{a:{$in:[1,2]}},{b:1}]} ) );
\ No newline at end of file
diff --git a/jstests/orf.js b/jstests/orf.js
new file mode 100644
index 0000000..eb6be7a
--- /dev/null
+++ b/jstests/orf.js
@@ -0,0 +1,15 @@
+// Test a query with 200 $or clauses
+
+t = db.jstests_orf;
+t.drop();
+
+a = [];
+for( var i = 0; i < 200; ++i ) {
+    a.push( {_id:i} );
+}
+a.forEach( function( x ) { t.save( x ); } );
+
+explain = t.find( {$or:a} ).explain();
+assert.eq( 200, explain.n );
+assert.eq( 200, explain.clauses.length );
+assert.eq( 200, t.count( {$or:a} ) );
diff --git a/jstests/parallel/del.js b/jstests/parallel/del.js
new file mode 100644
index 0000000..c6eb500
--- /dev/null
+++ b/jstests/parallel/del.js
@@ -0,0 +1,79 @@
+
+
+N = 1000;
+
+HOST = db.getMongo().host
+
+DONE = false;
+
+function del1( dbname ){
+    var m = new Mongo( HOST )
+    var db = m.getDB( "foo" + dbname );
+    var t = db.del
+
+    while ( ! DONE ){
+        var r = Math.random();
+        var n = Math.floor( Math.random() * N );
+        if ( r < .9 ){
+            t.insert( { x : n } )
+        }
+        else if ( r < .98 ){
+            t.remove( { x : n } );
+        }
+        else if ( r < .99 ){
+            t.remove( { x : { $lt : n }  } )
+        }
+        else {
+            t.remove( { x : { $gt : n } } );
+        }
+        if ( r > .9999 )
+            print( t.count() )
+    }
+}
+
+function del2( dbname ){
+    var m = new Mongo( HOST )
+    var db = m.getDB( "foo" + dbname );
+    var t = db.del
+
+    while ( ! DONE ){
+        var r = Math.random();
+        var n = Math.floor( Math.random() * N );
+        var s = Math.random() > .5 ? 1 : -1;
+        
+        if ( r < .5 ){
+            t.findOne( { x : n } )
+        }
+        else if ( r < .75 ){
+            t.find( { x : { $lt : n } } ).sort( { x : s } ).itcount();
+        }
+        else {
+            t.find( { x : { $gt : n } } ).sort( { x : s } ).itcount();
+        }
+    }
+}
+
+all = []
+
+all.push( fork( del1 , "a" ) )
+all.push( fork( del2 , "a" ) )
+all.push( fork( del1 , "b" ) )
+all.push( fork( del2 , "b" ) )
+
+for ( i=0; i<all.length; i++ )
+    all[i].start()
+
+a = db.getSisterDB( "fooa" )
+b = db.getSisterDB( "foob" )
+
+for ( i=0; i<10; i++ ){
+    sleep( 2000 )
+    print( "dropping" )
+    a.dropDatabase();
+    b.dropDatabase();
+}
+
+DONE = true;
+
+all[0].join()
+
diff --git a/jstests/parallel/repl.js b/jstests/parallel/repl.js
index cb9b770..919b0d7 100644
--- a/jstests/parallel/repl.js
+++ b/jstests/parallel/repl.js
@@ -1,4 +1,4 @@
-// test all operations in parallel
+// test basic operations in parallel, with replication
 
 baseName = "parallel_repl"
 
@@ -25,7 +25,7 @@ for( id = 0; id < 10; ++id ) {
                 g.addRemove( { _id:Random.randInt( 1000 ) } );
                 break;
             case 2: // update
-                g.addUpdate( {_id:{$lt:1000}}, {a:{$inc:5}} );
+                g.addUpdate( {_id:{$lt:1000}}, {$inc:{a:5}} );
                 break;
             default:
                 assert( false, "Invalid op code" );
diff --git a/jstests/perf/geo_near1.js b/jstests/perf/geo_near1.js
new file mode 100644
index 0000000..c999483
--- /dev/null
+++ b/jstests/perf/geo_near1.js
@@ -0,0 +1,11 @@
+var t = db.bench.geo_near1;
+t.drop()
+
+var numPts = 1000*1000;
+
+
+for (var i=0; i < numPts; i++){
+    x = (Math.random() * 100) - 50;
+    y = (Math.random() * 100) - 50;
+    t.insert({loc: [x,y], i: i});
+}
diff --git a/jstests/profile1.js b/jstests/profile1.js
index 49f6838..0e8009a 100644
--- a/jstests/profile1.js
+++ b/jstests/profile1.js
@@ -1,4 +1,6 @@
 
+try {
+
 /* With pre-created system.profile (capped) */
 db.runCommand({profile: 0});
 db.getCollection("system.profile").drop();
@@ -40,3 +42,8 @@ assert.eq(2, db.runCommand({profile: -1}).was, "I");
 assert.eq(1, db.system.profile.stats().capped, "J");
 var auto_size = db.system.profile.storageSize();
 assert.gt(auto_size, capped_size, "K");
+
+} finally {
+    // disable profiling for subsequent tests
+    assert.commandWorked( db.runCommand( {profile:0} ) );
+}
\ No newline at end of file
diff --git a/jstests/proj_key1.js b/jstests/proj_key1.js
new file mode 100644
index 0000000..ad944f7
--- /dev/null
+++ b/jstests/proj_key1.js
@@ -0,0 +1,28 @@
+
+t = db.proj_key1;
+t.drop();
+
+as = []
+
+for ( i=0; i<10; i++ ){
+    as.push( { a : i } )
+    t.insert( { a : i , b : i } );
+}
+
+assert( ! t.find( {} , { a : 1 } ).explain().indexOnly , "A1" )
+
+t.ensureIndex( { a : 1 } )
+
+assert( t.find( { a : { $gte : 0 } } , { a : 1 , _id : 0 } ).explain().indexOnly , "A2" )
+
+assert( ! t.find( { a : { $gte : 0 } } , { a : 1 } ).explain().indexOnly , "A3" ) // because id _id
+
+// assert( t.find( {} , { a : 1 , _id : 0 } ).explain().indexOnly , "A4" ); // TODO: need to modify query optimier SERVER-2109
+
+assert.eq( as , t.find( { a : { $gte : 0 } } , { a : 1 , _id : 0 } ).toArray() , "B1" )
+assert.eq( as , t.find( { a : { $gte : 0 } } , { a : 1 , _id : 0 } ).batchSize(2).toArray() , "B1" )
+
+
+
+
+
diff --git a/jstests/pull_remove1.js b/jstests/pull_remove1.js
new file mode 100644
index 0000000..379f3f2
--- /dev/null
+++ b/jstests/pull_remove1.js
@@ -0,0 +1,14 @@
+
+t = db.pull_remove1
+t.drop()
+
+o = { _id : 1 , a : [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ] }
+t.insert( o )
+
+assert.eq( o , t.findOne() , "A1" )
+
+o.a = o.a.filter( function(z){ return z >= 6; } )
+t.update( {} , { $pull : { a : { $lt : 6 } } } )
+
+assert.eq( o.a , t.findOne().a , "A2" )
+
diff --git a/jstests/push2.js b/jstests/push2.js
index 943ec11..b976169 100644
--- a/jstests/push2.js
+++ b/jstests/push2.js
@@ -18,3 +18,5 @@ for ( x=0; x<200; x++ ){
 }
 
 assert( gotError , "should have gotten error" );
+
+t.drop();
diff --git a/jstests/queryoptimizer2.js b/jstests/queryoptimizer2.js
new file mode 100644
index 0000000..af21e95
--- /dev/null
+++ b/jstests/queryoptimizer2.js
@@ -0,0 +1,62 @@
+
+t = db.queryoptimizer2;
+
+function doTest( f1, f2 ) {
+
+t.drop()
+    
+for( i = 0; i < 30; ++i ) {
+    t.save( { a:2 } );
+}
+
+for( i = 0; i < 30; ++i ) {
+    t.save( { b:2 } );
+}
+
+for( i = 0; i < 60; ++i ) {
+    t.save( { c:2 } );
+}
+
+t.ensureIndex( { a:1 } );
+t.ensureIndex( { b:1 } );
+
+e = t.find( { b:2 } ).batchSize( 100 ).explain( true );
+assert.eq( null, e.oldPlan );
+
+t.ensureIndex( { c:1 } ); // will clear query cache
+
+f1();
+
+assert( t.find( { a:2 } ).batchSize( 100 ).explain( true ).oldPlan );
+assert( t.find( { b:2 } ).batchSize( 100 ).explain( true ).oldPlan );
+
+e = t.find( { c:2 } ).batchSize( 100 ).explain( true );
+// no pattern should be recorded as a result of the $or query
+assert.eq( null, e.oldPlan );
+
+t.dropIndex( { b:1 } ); // clear query cache
+for( i = 0; i < 15; ++i ) {
+    t.save( { a:2 } );
+}
+
+f2();
+// pattern should be recorded, since > half of results returned from this index
+assert( t.find( { c:2 } ).batchSize( 100 ).explain( true ).oldPlan );
+
+}
+
+doTest( function() {
+       t.find( { $or: [ { a:2 }, { b:2 }, { c:2 } ] } ).batchSize( 100 ).toArray();
+       },
+       function() {
+       t.find( { $or: [ { a:2 }, { c:2 } ] } ).batchSize( 100 ).toArray();       
+       }
+       );
+
+doTest( function() {
+       t.find( { $or: [ { a:2 }, { b:2 }, { c:2 } ] } ).limit( 100 ).count( true );
+       },
+       function() {
+       t.find( { $or: [ { a:2 }, { c:2 } ] } ).limit( 100 ).count( true );       
+       }
+       );
diff --git a/jstests/regex3.js b/jstests/regex3.js
index ee8d9cf..7d703aa 100644
--- a/jstests/regex3.js
+++ b/jstests/regex3.js
@@ -23,7 +23,7 @@ t.save( { name : "c" } );
 assert.eq( 3 , t.find( { name : /^aa*/ } ).count() , "B ni" );
 t.ensureIndex( { name : 1 } );
 assert.eq( 3 , t.find( { name : /^aa*/ } ).count() , "B i 1" );
-assert.eq( 3 , t.find( { name : /^aa*/ } ).explain().nscanned , "B i 1 e" );
+assert.eq( 4 , t.find( { name : /^aa*/ } ).explain().nscanned , "B i 1 e" );
 
 assert.eq( 2 , t.find( { name : /^a[ab]/ } ).count() , "B i 2" );
 assert.eq( 2 , t.find( { name : /^a[bc]/ } ).count() , "B i 3" );
diff --git a/jstests/regex6.js b/jstests/regex6.js
index 12ed85b..8243313 100644
--- a/jstests/regex6.js
+++ b/jstests/regex6.js
@@ -10,10 +10,10 @@ t.save( { name : "aaron" } );
 t.ensureIndex( { name : 1 } );
 
 assert.eq( 0 , t.find( { name : /^\// } ).count() , "index count" );
-assert.eq( 0 , t.find( { name : /^\// } ).explain().nscanned , "index explain 1" );
+assert.eq( 1 , t.find( { name : /^\// } ).explain().nscanned , "index explain 1" );
 assert.eq( 0 , t.find( { name : /^é/ } ).explain().nscanned , "index explain 2" );
 assert.eq( 0 , t.find( { name : /^\é/ } ).explain().nscanned , "index explain 3" );
-assert.eq( 0 , t.find( { name : /^\./ } ).explain().nscanned , "index explain 4" );
+assert.eq( 1 , t.find( { name : /^\./ } ).explain().nscanned , "index explain 4" );
 assert.eq( 4 , t.find( { name : /^./ } ).explain().nscanned , "index explain 5" );
 
 assert.eq( 4 , t.find( { name : /^\Qblah\E/ } ).explain().nscanned , "index explain 6" );
diff --git a/jstests/regex9.js b/jstests/regex9.js
index 559efd9..896855c 100644
--- a/jstests/regex9.js
+++ b/jstests/regex9.js
@@ -1,5 +1,5 @@
 
-t = db.regex3;
+t = db.regex9;
 t.drop();
 
 t.insert( { _id : 1 , a : [ "a" , "b" , "c" ] } )
diff --git a/jstests/remove_undefined.js b/jstests/remove_undefined.js
new file mode 100644
index 0000000..d5344a3
--- /dev/null
+++ b/jstests/remove_undefined.js
@@ -0,0 +1,28 @@
+
+t = db.drop_undefined.js
+
+t.insert( { _id : 1  } )
+t.insert( { _id : 2  } )
+t.insert( { _id : null } )
+
+z = { foo : 1 , x : null }
+
+t.remove( { x : z.bar } )
+assert.eq( 3 , t.count() , "A1" )
+
+t.remove( { x : undefined } )
+assert.eq( 3 , t.count() , "A2" )
+
+assert.throws( function(){ t.remove( { _id : z.bar } ) } , null , "B1" ) 
+assert.throws( function(){ t.remove( { _id : undefined } ) } , null , "B2" )
+
+
+t.remove( { _id : z.x } )
+assert.eq( 2 , t.count() , "C1" )
+
+t.insert( { _id : null } )
+assert.eq( 3 , t.count() , "C2" )
+
+assert.throws( function(){ t.remove( { _id : undefined } ) } , null, "C3" )
+assert.eq( 3 , t.count() , "C4" )
+
diff --git a/jstests/rename4.js b/jstests/rename4.js
new file mode 100644
index 0000000..29be374
--- /dev/null
+++ b/jstests/rename4.js
@@ -0,0 +1,121 @@
+t = db.jstests_rename4;
+t.drop();
+
+function c( f ) {
+    assert( !db.getLastError(), "error" );
+    eval( f );
+    assert( db.getLastError(), "no error" );
+    db.resetError();
+}
+
+c( "t.update( {}, {$rename:{'a':'a'}} )" );
+c( "t.update( {}, {$rename:{'':'a'}} )" );
+c( "t.update( {}, {$rename:{'a':''}} )" );
+c( "t.update( {}, {$rename:{'_id':'a'}} )" );
+c( "t.update( {}, {$rename:{'a':'_id'}} )" );
+c( "t.update( {}, {$rename:{'_id.a':'b'}} )" );
+c( "t.update( {}, {$rename:{'b':'_id.a'}} )" );
+c( "t.update( {}, {$rename:{'_id.a':'_id.b'}} )" );
+c( "t.update( {}, {$rename:{'_id.b':'_id.a'}} )" );
+c( "t.update( {}, {$rename:{'.a':'b'}} )" );
+c( "t.update( {}, {$rename:{'a':'.b'}} )" );
+c( "t.update( {}, {$rename:{'a.':'b'}} )" );
+c( "t.update( {}, {$rename:{'a':'b.'}} )" );
+c( "t.update( {}, {$rename:{'a.b':'a'}} )" );
+c( "t.update( {}, {$rename:{'a.$':'b'}} )" );
+c( "t.update( {}, {$rename:{'a':'b.$'}} )" );
+c( "t.update( {}, {$set:{b:1},$rename:{'a':'b'}} )" );
+c( "t.update( {}, {$rename:{'a':'b'},$set:{b:1}} )" );
+c( "t.update( {}, {$rename:{'a':'b'},$set:{a:1}} )" );
+c( "t.update( {}, {$set:{'b.c':1},$rename:{'a':'b'}} )" );
+c( "t.update( {}, {$set:{b:1},$rename:{'a':'b.c'}} )" );
+c( "t.update( {}, {$rename:{'a':'b'},$set:{'b.c':1}} )" );
+c( "t.update( {}, {$rename:{'a':'b.c'},$set:{b:1}} )" );
+
+t.save( {a:[1],b:{c:[1]},d:[{e:1}],f:1} );
+c( "t.update( {}, {$rename:{'a.0':'f'}} )" );
+c( "t.update( {}, {$rename:{'a.0':'g'}} )" );
+c( "t.update( {}, {$rename:{'f':'a.0'}} )" );
+c( "t.update( {}, {$rename:{'b.c.0':'f'}} )" );
+c( "t.update( {}, {$rename:{'f':'b.c.0'}} )" );
+c( "t.update( {}, {$rename:{'d.e':'d.f'}} )" );
+c( "t.update( {}, {$rename:{'d.e':'f'}} )" );
+c( "t.update( {}, {$rename:{'d.f':'d.e'}} )" );
+c( "t.update( {}, {$rename:{'f':'d.e'}} )" );
+c( "t.update( {}, {$rename:{'d.0.e':'d.f'}} )" );
+c( "t.update( {}, {$rename:{'d.0.e':'f'}} )" );
+c( "t.update( {}, {$rename:{'d.f':'d.0.e'}} )" );
+c( "t.update( {}, {$rename:{'f':'d.0.e'}} )" );
+c( "t.update( {}, {$rename:{'f.g':'a'}} )" );
+c( "t.update( {}, {$rename:{'a':'f.g'}} )" );
+
+function v( start, mod, expected ) {
+    t.remove();
+    t.save( start );
+    t.update( {}, mod );
+    assert( !db.getLastError() );
+    var got = t.findOne();
+    delete got._id;
+    assert.eq( expected, got );
+}
+
+v( {a:1}, {$rename:{a:'b'}}, {b:1} );
+v( {a:1}, {$rename:{a:'bb'}}, {bb:1} );
+v( {b:1}, {$rename:{b:'a'}}, {a:1} );
+v( {bb:1}, {$rename:{bb:'a'}}, {a:1} );
+v( {a:{y:1}}, {$rename:{'a.y':'a.z'}}, {a:{z:1}} );
+v( {a:{yy:1}}, {$rename:{'a.yy':'a.z'}}, {a:{z:1}} );
+v( {a:{z:1}}, {$rename:{'a.z':'a.y'}}, {a:{y:1}} );
+v( {a:{zz:1}}, {$rename:{'a.zz':'a.y'}}, {a:{y:1}} );
+v( {a:{c:1}}, {$rename:{a:'b'}}, {b:{c:1}} );
+v( {aa:{c:1}}, {$rename:{aa:'b'}}, {b:{c:1}} );
+v( {a:1,b:2}, {$rename:{a:'b'}}, {b:1} );
+v( {aa:1,b:2}, {$rename:{aa:'b'}}, {b:1} );
+v( {a:1,bb:2}, {$rename:{a:'bb'}}, {bb:1} );
+v( {a:1}, {$rename:{a:'b.c'}}, {b:{c:1}} );
+v( {aa:1}, {$rename:{aa:'b.c'}}, {b:{c:1}} );
+v( {a:1,b:{}}, {$rename:{a:'b.c'}}, {b:{c:1}} );
+v( {aa:1,b:{}}, {$rename:{aa:'b.c'}}, {b:{c:1}} );
+v( {a:1}, {$rename:{b:'c'}}, {a:1} );
+v( {aa:1}, {$rename:{b:'c'}}, {aa:1} );
+v( {}, {$rename:{b:'c'}}, {} );
+v( {a:{b:1,c:2}}, {$rename:{'a.b':'d'}}, {a:{c:2},d:1} );
+v( {a:{bb:1,c:2}}, {$rename:{'a.bb':'d'}}, {a:{c:2},d:1} );
+v( {a:{b:1}}, {$rename:{'a.b':'d'}}, {a:{},d:1} );
+v( {a:[5]}, {$rename:{a:'b'}}, {b:[5]} );
+v( {aa:[5]}, {$rename:{aa:'b'}}, {b:[5]} );
+v( {'0':1}, {$rename:{'0':'5'}}, {'5':1} );
+v( {a:1,b:2}, {$rename:{a:'c'},$set:{b:5}}, {b:5,c:1} );
+v( {aa:1,b:2}, {$rename:{aa:'c'},$set:{b:5}}, {b:5,c:1} );
+v( {a:1,b:2}, {$rename:{z:'c'},$set:{b:5}}, {a:1,b:5} );
+v( {aa:1,b:2}, {$rename:{z:'c'},$set:{b:5}}, {aa:1,b:5} );
+
+// (formerly) rewriting single field
+v( {a:{z:1,b:1}}, {$rename:{'a.b':'a.c'}}, {a:{c:1,z:1}} );
+v( {a:{z:1,tomato:1}}, {$rename:{'a.tomato':'a.potato'}}, {a:{potato:1,z:1}} );
+v( {a:{z:1,b:1,c:1}}, {$rename:{'a.b':'a.c'}}, {a:{c:1,z:1}} );
+v( {a:{z:1,tomato:1,potato:1}}, {$rename:{'a.tomato':'a.potato'}}, {a:{potato:1,z:1}} );
+v( {a:{z:1,b:1}}, {$rename:{'a.b':'a.cc'}}, {a:{cc:1,z:1}} );
+v( {a:{z:1,b:1,c:1}}, {$rename:{'a.b':'aa.c'}}, {a:{c:1,z:1},aa:{c:1}} );
+
+// invalid target, but missing source
+v( {a:1,c:4}, {$rename:{b:'c.d'}}, {a:1,c:4} );
+
+// check index
+t.drop();
+t.ensureIndex( {a:1} );
+
+function l( start, mod, query, expected ) {
+    t.remove();
+    t.save( start );
+    t.update( {}, mod );
+    assert( !db.getLastError() );
+    var got = t.find( query ).hint( {a:1} ).next();
+    delete got._id;
+    assert.eq( expected, got );
+}
+
+l( {a:1}, {$rename:{a:'b'}}, {a:null}, {b:1} );
+l( {a:1}, {$rename:{a:'bb'}}, {a:null}, {bb:1} );
+l( {b:1}, {$rename:{b:'a'}}, {a:1}, {a:1} );
+l( {bb:1}, {$rename:{bb:'a'}}, {a:1}, {a:1} );
diff --git a/jstests/repl/basic1.js b/jstests/repl/basic1.js
index 701d71e..15fc983 100644
--- a/jstests/repl/basic1.js
+++ b/jstests/repl/basic1.js
@@ -60,7 +60,7 @@ r = function( key , v ){
 correct = { a : 2 , b : 1 };
 
 function checkMR( t ){
-    var res = t.mapReduce( m , r );
+    var res = t.mapReduce( m , r , "basic1_out" );
     assert.eq( correct , res.convertToSingleObject() , "checkMR: " + tojson( t ) );
 }
 
@@ -68,7 +68,7 @@ function checkNumCollections( msg , diff ){
     if ( ! diff ) diff = 0;
     var m = am.getCollectionNames();
     var s = as.getCollectionNames();
-    assert.eq( m.length + diff , s.length , "lengths bad \n" + tojson( m ) + "\n" + tojson( s ) );
+    assert.eq( m.length + diff , s.length , msg + " lengths bad \n" + tojson( m ) + "\n" + tojson( s ) );
 }
 
 checkNumCollections( "MR1" );
diff --git a/jstests/repl/block2.js b/jstests/repl/block2.js
index 0e34758..f38a4e3 100644
--- a/jstests/repl/block2.js
+++ b/jstests/repl/block2.js
@@ -18,25 +18,26 @@ function check( msg ){
     assert.eq( tm.count() , ts.count() , "check: " + msg );
 }
 
+function worked( w , wtimeout ){
+    return dbm.getLastError( w , wtimeout ) == null;
+}
+
 check( "A" );
 
 tm.save( { x : 1 } );
-dbm.getLastError( 2 );
-check( "B" );
+assert( worked( 2 ) , "B" );
 
 tm.save( { x : 2 } );
-dbm.getLastError( 2 , 500 );
-check( "C" );
+assert( worked( 2 , 500 ) , "C" ) 
 
 rt.stop( false );
 tm.save( { x : 3 } )
 assert.eq( 3 , tm.count() , "D1" );
-assert.throws( function(){ dbm.getLastError( 2 , 500 ); } , "D2" )
+assert( ! worked( 2 , 500 ) , "D2" )
 
 s = rt.start( false )
 setup();
-dbm.getLastError( 2 , 30000 )
-check( "D3" )
+assert( worked( 2 , 30000 ) , "E" )
 
 rt.stop();
 
diff --git a/jstests/repl/mastermaster1.js b/jstests/repl/mastermaster1.js
index 9f9334b..4932d5a 100644
--- a/jstests/repl/mastermaster1.js
+++ b/jstests/repl/mastermaster1.js
@@ -6,6 +6,8 @@ ports = allocatePorts( 2 )
 left = startMongodTest( ports[0] , "mastermaster1left" , false , { master : "" , slave : "" , source : "127.0.0.1:" + ports[1] } )
 right = startMongodTest( ports[1] , "mastermaster1left" , false , { master : "" , slave : "" , source : "127.0.0.1:" + ports[0] } )
 
+print( "check 1" )
+
 x = left.getDB( "admin" ).runCommand( "ismaster" )
 assert( x.ismaster , "left: " + tojson( x ) )
 
@@ -15,6 +17,8 @@ assert( x.ismaster , "right: " + tojson( x ) )
 ldb = left.getDB( "test" )
 rdb = right.getDB( "test" )
 
+print( "check 2" )
+
 ldb.foo.insert( { _id : 1 , x : "eliot" } )
 var result = ldb.runCommand( { getlasterror : 1 , w : 2 , wtimeout : 20000 } );
 printjson(result);
@@ -27,12 +31,12 @@ print( "check 3" )
 assert.eq( 2 , ldb.foo.count() , "B1" )
 assert.eq( 2 , rdb.foo.count() , "B2" )
 
-
+print( "going to stop everything" )
 
 for ( var i=0; i<ports.length; i++ ){
     stopMongod( ports[i] );
 }
 
-
+print( "yay" )
 
 
diff --git a/jstests/repl/pair1.js b/jstests/repl/pair1.js
index b8b7ffd..84dd7b7 100644
--- a/jstests/repl/pair1.js
+++ b/jstests/repl/pair1.js
@@ -40,8 +40,8 @@ check = function( s ) {
 // check that slave reads and writes are guarded
 checkSlaveGuard = function( s ) {
     var t = s.getDB( baseName + "-temp" ).temp;
-    assert.throws( t.find().count, {}, "not master" );
-    assert.throws( t.find(), {}, "not master", "find did not assert" );
+    assert.throws( t.find().count, [], "not master" );
+    assert.throws( t.find(), [], "not master", "find did not assert" );
     
     checkError = function() {
         assert.eq( "not master", s.getDB( "admin" ).getLastError() );
diff --git a/jstests/repl/repl1.js b/jstests/repl/repl1.js
index 60f3942..9f46f7a 100644
--- a/jstests/repl/repl1.js
+++ b/jstests/repl/repl1.js
@@ -48,6 +48,8 @@ doTest = function( signal ) {
     assert.soon( function() { return as.find().count() == 1020; } );
     assert.eq( 1, as.find( { i: 1019 } ).count() );
 
+    assert.automsg( "m.getDB( 'local' ).getCollection( 'oplog.$main' ).stats().size > 0" );
+    
     rt.stop();
 }
 
diff --git a/jstests/repl/repl11.js b/jstests/repl/repl11.js
index c5c63b3..aef9872 100644
--- a/jstests/repl/repl11.js
+++ b/jstests/repl/repl11.js
@@ -35,6 +35,10 @@ doTest = function( signal ) {
     sa = s.getDB( baseName ).a;
     assert.soon( function() { return 1 == sa.count(); } );
     
+    s.getDB( "local" ).auth( "repl", "foo" );
+    assert.commandWorked( s.getDB( "admin" )._adminCommand( {serverStatus:1,repl:1} ) );
+    assert.commandWorked( s.getDB( "admin" )._adminCommand( {serverStatus:1,repl:2} ) );
+    
     rt.stop( false, signal );
     
     ma.save( {} );
diff --git a/jstests/repl/repl2.js b/jstests/repl/repl2.js
index c9fe6b9..42b0caf 100644
--- a/jstests/repl/repl2.js
+++ b/jstests/repl/repl2.js
@@ -31,6 +31,8 @@ doTest = function( signal ) {
     assert.soon( function() { return 1 == s.getDB( "admin" ).runCommand( { "resync" : 1 } ).ok; } );
 
     soonCount( 1001 );
+    assert.automsg( "m.getDB( 'local' ).getCollection( 'oplog.$main' ).stats().size > 0" );
+
     as = s.getDB("foo").a
     assert.eq( 1, as.find( { i: 0 } ).count() );
     assert.eq( 1, as.find( { i: 999 } ).count() );
diff --git a/jstests/repl/snapshot3.js b/jstests/repl/snapshot3.js
index d8d268d..02955e5 100644
--- a/jstests/repl/snapshot3.js
+++ b/jstests/repl/snapshot3.js
@@ -47,7 +47,7 @@ assert.eq( 500, rp.slave().getDB( baseName )[ baseName ].count() );
 rp.master().getDB( baseName )[ baseName ].save( {i:500} );
 assert.soon( function() { return 501 == rp.slave().getDB( baseName )[ baseName ].count(); } );
 
-assert( !rawMongoProgramOutput().match( /resync/ ) );
-assert( !rawMongoProgramOutput().match( /SyncException/ ) );
+assert( !rawMongoProgramOutput().match( new RegExp( "resync.*" + baseName + ".*\n" ) ) , "last1" );
+assert( !rawMongoProgramOutput().match( /SyncException/ ) , "last2" );
 
 print("snapshot3.js finishes");
diff --git a/jstests/replsets/auth1.js b/jstests/replsets/auth1.js
new file mode 100644
index 0000000..4945869
--- /dev/null
+++ b/jstests/replsets/auth1.js
@@ -0,0 +1,184 @@
+// check replica set authentication
+
+load("jstests/replsets/rslib.js");
+
+var name = "rs_auth1";
+var port = allocatePorts(4);
+var path = "jstests/replsets/";
+
+    
+print("reset permissions");
+run("chmod", "644", path+"key1");
+run("chmod", "644", path+"key2");
+
+
+print("try starting mongod");
+var m = runMongoProgram( "mongod", "--keyFile", path+"key1", "--port", port[0], "--dbpath", "/data/db/" + name);
+
+
+print("should fail with wrong permissions");
+assert.eq(m, 2, "mongod should exit w/ 2: permissions too open");
+stopMongod(port[0]);
+
+
+print("change permissions on #1 & #2");
+run("chmod", "600", path+"key1");
+run("chmod", "600", path+"key2");
+
+
+print("add a user to server0: foo");
+m = startMongodTest( port[0], name+"-0", 0 );
+m.getDB("admin").addUser("foo", "bar");
+m.getDB("test").addUser("bar", "baz");
+print("make sure user is written before shutting down");
+m.getDB("test").getLastError();
+stopMongod(port[0]);
+
+
+print("start up rs");
+var rs = new ReplSetTest({"name" : name, "nodes" : 3, "startPort" : port[0]});
+m = rs.restart(0, {"keyFile" : path+"key1"});
+var s = rs.start(1, {"keyFile" : path+"key1"});
+var s2 = rs.start(2, {"keyFile" : path+"key1"});
+
+var result = m.getDB("admin").auth("foo", "bar");
+assert.eq(result, 1, "login failed");
+result = m.getDB("admin").runCommand({replSetInitiate : rs.getReplSetConfig()});
+assert.eq(result.ok, 1, "couldn't initiate: "+tojson(result));
+
+var master = rs.getMaster().getDB("test");
+wait(function() {
+        var status = master.adminCommand({replSetGetStatus:1});
+        return status.members && status.members[1].state == 2 && status.members[2].state == 2;
+    });
+
+master.foo.insert({x:1});
+master.runCommand({getlasterror:1, w:3, wtimeout:60000});
+
+
+print("try some legal and illegal reads");
+var r = master.foo.findOne();
+assert.eq(r.x, 1);
+
+s.setSlaveOk();
+slave = s.getDB("test");
+
+function doQueryOn(p) {
+    var err = {};
+    try {
+        r = p.foo.findOne();
+    }
+    catch(e) {
+        if (typeof(JSON) != "undefined") {
+            err = JSON.parse(e.substring(6));
+        }
+        else if (e.indexOf("10057") > 0) {
+            err.code = 10057;
+        }
+    }
+    assert.eq(err.code, 10057);
+};
+
+doQueryOn(slave);
+master.adminCommand({logout:1});
+doQueryOn(master);
+
+
+result = slave.auth("bar", "baz");
+assert.eq(result, 1);
+
+r = slave.foo.findOne();
+assert.eq(r.x, 1);
+
+
+print("add some data");
+master.auth("bar", "baz");
+for (var i=0; i<1000; i++) {
+    master.foo.insert({x:i, foo : "bar"});
+}
+master.runCommand({getlasterror:1, w:3, wtimeout:60000});
+
+
+print("fail over");
+rs.stop(0);
+
+wait(function() {
+        function getMaster(s) {
+            var result = s.getDB("admin").runCommand({isMaster: 1});
+            printjson(result);
+            if (result.ismaster) {
+                master = s.getDB("test");
+                return true;
+            }
+            return false;
+        }
+
+        if (getMaster(s) || getMaster(s2)) {
+            return true;
+        }
+        return false;
+    });
+
+
+print("add some more data 1");
+master.auth("bar", "baz");
+for (var i=0; i<1000; i++) {
+    master.foo.insert({x:i, foo : "bar"});
+}
+master.runCommand({getlasterror:1, w:3, wtimeout:60000});
+
+
+print("resync");
+rs.restart(0);
+
+
+print("add some more data 2");
+for (var i=0; i<1000; i++) {
+    master.foo.insert({x:i, foo : "bar"});
+}
+master.runCommand({getlasterror:1, w:3, wtimeout:60000});
+
+
+print("add member with wrong key");
+var conn = new MongodRunner(port[3], "/data/db/"+name+"-3", null, null, ["--replSet","rs_auth1","--rest","--oplogSize","2", "--keyFile", path+"key2"], {no_bind : true});
+conn.start();
+
+
+master.getSisterDB("admin").auth("foo", "bar");
+var config = master.getSisterDB("local").system.replset.findOne();
+config.members.push({_id : 3, host : getHostName()+":"+port[3]});
+config.version++;
+try {
+    master.adminCommand({replSetReconfig:config});
+}
+catch (e) {
+    print("error: "+e);
+}
+reconnect(master);
+master.getSisterDB("admin").auth("foo", "bar");
+
+
+print("shouldn't ever sync");
+for (var i = 0; i<30; i++) {
+    print("iteration: " +i);
+    var results = master.adminCommand({replSetGetStatus:1});
+    printjson(results);
+    assert(results.members[3].state != 2);
+    sleep(1000);
+}
+
+
+print("stop member");
+stopMongod(port[3]);
+
+
+print("start back up with correct key");
+conn = new MongodRunner(port[3], "/data/db/"+name+"-3", null, null, ["--replSet","rs_auth1","--rest","--oplogSize","2", "--keyFile", path+"key1"], {no_bind : true});
+conn.start();
+
+wait(function() {
+        var results = master.adminCommand({replSetGetStatus:1});
+        printjson(results);
+        return results.members[3].state == 2;
+    });
+
diff --git a/jstests/replsets/buildindexes.js b/jstests/replsets/buildindexes.js
new file mode 100644
index 0000000..76de797
--- /dev/null
+++ b/jstests/replsets/buildindexes.js
@@ -0,0 +1,86 @@
+doTest = function( signal ) {
+
+  var name = "buildIndexes";
+  var host = getHostName();
+  
+  var replTest = new ReplSetTest( {name: name, nodes: 3} );
+
+  var nodes = replTest.startSet();
+
+  var config = replTest.getReplSetConfig();
+  config.members[2].priority = 0;
+  config.members[2].buildIndexes = false;
+  
+  replTest.initiate(config);
+
+  var master = replTest.getMaster().getDB(name);
+  var slaveConns = replTest.liveNodes.slaves;
+  var slave = [];
+  for (var i in slaveConns) {
+    slaveConns[i].setSlaveOk();
+    slave.push(slaveConns[i].getDB(name));
+  }
+  replTest.awaitReplication();
+
+  print("creating an index on x");
+  master.x.ensureIndex({y : 1});
+  printjson(master.x.stats());
+
+  for (var i=0; i<100; i++) {
+    master.x.insert({x:1,y:"abc",c:1});
+  }
+
+  replTest.awaitReplication();
+
+  printjson(slave[0].runCommand({count: "x"}));
+  var ns = master.x+"";
+  print("namespace: "+ns);
+
+  // can't query system.indexes from slave, so we'll look at coll.stats()
+  printjson(slave[0].adminCommand({replSetGetStatus:1}));
+  printjson(slave[0].getSisterDB("local").system.replset.findOne());
+  printjson(master.stats());
+  printjson(slave[0].stats());
+  printjson(slave[1].stats());
+  printjson(master.x.stats());
+  printjson(slave[0].x.stats());
+  printjson(slave[1].x.stats());
+  print("sleeping");  
+  sleep(20000);
+  var indexes = slave[0].stats().indexes;
+  assert.eq(indexes, 2, 'number of indexes');
+
+  indexes = slave[1].stats().indexes;
+  assert.eq(indexes, 1);
+
+  
+  indexes = slave[0].x.stats().indexSizes;
+  printjson(indexes);
+  
+  var count = 0;
+  for (var i in indexes) {
+    count++;
+    if (i == "_id_") {
+      continue;
+    }
+    print(i);
+    print(i.match(/y_/));
+    assert(i.match(/y_/));
+  }
+
+  assert.eq(count, 2);
+  
+  indexes = slave[1].x.stats().indexSizes;
+  printjson(indexes);
+
+  count = 0;
+  for (var i in indexes) {
+    count++;
+  }  
+
+  assert.eq(count, 1);
+
+  replTest.stopSet(15);
+}
+
+doTest(15);
diff --git a/jstests/replsets/cloneDb.js b/jstests/replsets/cloneDb.js
new file mode 100644
index 0000000..6d2d0f3
--- /dev/null
+++ b/jstests/replsets/cloneDb.js
@@ -0,0 +1,52 @@
+// Test for cloning a db from a replica set [SERVER-1643] -Tony
+
+load('jstests/libs/grid.js')
+
+doTest = function( signal ) {
+
+    var N = 2000
+
+    // ~1KB string
+    var Text = ''
+    for (var i = 0; i < 40; i++)
+        Text += 'abcdefghijklmnopqrstuvwxyz'
+
+    // Create replica set
+    var repset = new ReplicaSet ('testSet', 3) .begin()
+    var master = repset.getMaster()
+    var db1 = master.getDB('test')
+    
+    // Insert data
+    for (var i = 0; i < N; i++) {
+        db1['foo'].insert({x: i, text: Text})
+        db1.getLastError(2)  // wait to be copied to at least one secondary
+    }
+    
+    // Create single server
+    var solo = new Server ('singleTarget')
+    var soloConn = solo.begin()
+    var db2 = soloConn.getDB('test')
+    
+    // Clone db from replica set to single server
+    db2.cloneDatabase (repset.getURL())
+    
+    // Confirm clone worked
+    assert.eq (Text, db2['foo'] .findOne({x: N-1}) ['text'], 'cloneDatabase failed (test1)')
+    
+    // Now test the reverse direction
+    db1 = master.getDB('test2')
+    db2 = soloConn.getDB('test2')
+    for (var i = 0; i < N; i++) {
+        db2['foo'].insert({x: i, text: Text})
+        db2.getLastError()
+    }
+    db1.cloneDatabase (solo.host())
+    assert.eq (Text, db2['foo'] .findOne({x: N-1}) ['text'], 'cloneDatabase failed (test2)')
+
+    // Shut down replica set and single server
+    solo.end()
+    repset.stopSet( signal )
+}
+
+doTest( 15 );
+print("replsets/cloneDb.js SUCCESS");
diff --git a/jstests/replsets/config1.js b/jstests/replsets/config1.js
new file mode 100644
index 0000000..748ce8f
--- /dev/null
+++ b/jstests/replsets/config1.js
@@ -0,0 +1,21 @@
+doTest = function( signal ) {
+    var name = 'config1';
+  
+    var replTest = new ReplSetTest( {name: name, nodes: 3} );
+    var nodes = replTest.startSet();
+
+    var config = replTest.getReplSetConfig();
+    config.settings = {"heartbeatSleep" : .5, heartbeatTimeout : .8};
+    
+    replTest.initiate(config);
+
+    // Call getMaster to return a reference to the node that's been
+    // elected master.
+    var master = replTest.getMaster();
+
+    config = master.getDB("local").system.replset.findOne();
+    assert.eq(config.settings.heartbeatSleep, .5);
+    assert.eq(config.settings.heartbeatTimeout, .8);
+};
+
+doTest(15);
diff --git a/jstests/replsets/fastsync.js b/jstests/replsets/fastsync.js
new file mode 100644
index 0000000..d7c3905
--- /dev/null
+++ b/jstests/replsets/fastsync.js
@@ -0,0 +1,117 @@
+/*
+ * 1. insert 100000 objects
+ * 2. export to two dbpaths
+ * 3. add one node w/fastsync
+ * 4. check that we never get "errmsg" : "initial sync cloning db: whatever"
+ * 5. check writes are replicated
+ */
+
+var w = 0;
+var wait = function(f) {
+    w++;
+    var n = 0;
+    while (!f()) {
+        if( n % 4 == 0 )
+            print("toostale.js waiting " + w);
+        if (++n == 4) {
+            print("" + f);
+        }
+        assert(n < 200, 'tried 200 times, giving up');
+        sleep(1000);
+    }
+}
+
+var reconnect = function(a) {
+  wait(function() { 
+      try {
+        a.getDB("foo").bar.stats();
+        return true;
+      } catch(e) {
+        print(e);
+        return false;
+      }
+    });
+};
+
+ports = allocatePorts( 3 );
+
+var basename = "jstests_fastsync";
+var basePath = "/data/db/" + basename;
+var hostname = getHostName();
+
+var pargs = new MongodRunner( ports[ 0 ], basePath + "-p", false, false,
+                              ["--replSet", basename, "--oplogSize", 2],
+                              {no_bind : true} );
+p = pargs.start();
+
+var admin = p.getDB("admin");
+var foo = p.getDB("foo");
+var local = p.getDB("local");
+
+var config = {_id : basename, members : [{_id : 0, host : hostname+":"+ports[0]}]};
+printjson(config);
+var result = admin.runCommand({replSetInitiate : config});
+print("result:");
+printjson(result);
+
+var count = 0;
+while (count < 10 && result.ok != 1) {
+  count++;
+  sleep(2000);
+  result = admin.runCommand({replSetInitiate : config});
+}   
+
+assert(result.ok, tojson(result));
+assert.soon(function() { return admin.runCommand({isMaster:1}).ismaster; });
+
+print("1");
+for (var i=0; i<100000; i++) {
+  foo.bar.insert({date : new Date(), x : i, str : "all the talk on the market"});
+}
+print("total in foo: "+foo.bar.count());
+
+
+print("2");
+admin.runCommand( {fsync:1,lock:1} );
+copyDbpath( basePath + "-p", basePath + "-s" );
+admin.$cmd.sys.unlock.findOne();
+
+
+print("3");
+var sargs = new MongodRunner( ports[ 1 ], basePath + "-s", false, false,
+                              ["--replSet", basename, "--fastsync",
+                               "--oplogSize", 2], {no_bind : true} );
+var reuseData = true;
+sargs.start(reuseData);
+
+config = local.system.replset.findOne();
+config.version++;
+config.members.push({_id:1, host:hostname+":"+ports[1]});
+
+result = admin.runCommand({replSetReconfig : config});
+assert(result.ok, "reconfig worked");
+reconnect(p);
+
+print("4");
+var status = admin.runCommand({replSetGetStatus : 1});
+var count = 0;
+while (status.members[1].state != 2 && count < 200) {
+  print("not a secondary yet");
+  if (count % 10 == 0) {
+    printjson(status);
+  }
+  assert(!status.members[1].errmsg || !status.members[1].errmsg.match("^initial sync cloning db"));
+  
+  sleep(1000);
+
+  // disconnection could happen here
+  try {  
+    status = admin.runCommand({replSetGetStatus : 1});
+  }
+  catch (e) {
+    print(e);
+  }
+  count++;
+}
+
+assert.eq(status.members[1].state, 2);
diff --git a/jstests/replsets/getlasterror_w2.js b/jstests/replsets/getlasterror_w2.js
new file mode 100644
index 0000000..795e667
--- /dev/null
+++ b/jstests/replsets/getlasterror_w2.js
@@ -0,0 +1,36 @@
+// BUG: [SERVER-1768] replica set getlasterror {w: 2} after 2000 
+// inserts hangs while secondary servers log "replSet error RS102 too stale to catch up" every once in a while
+
+function newReplicaSet (name, numServers) {
+    var rs = new ReplSetTest({name: name, nodes: numServers})
+    rs.startSet()
+    rs.initiate()
+    rs.awaitReplication()
+    return rs
+}
+
+function go() {
+var N = 2000
+
+// ~1KB string
+var Text = ''
+for (var i = 0; i < 40; i++)
+    Text += 'abcdefghijklmnopqrstuvwxyz'
+
+// Create replica set of 3 servers
+var repset = newReplicaSet('repset', 3)
+var conn = repset.getMaster()
+var db = conn.getDB('test')
+
+// Add data to it
+for (var i = 0; i < N; i++)
+    db['foo'].insert({x: i, text: Text})
+
+// wait to be copied to at least one secondary (BUG hangs here)
+db.getLastError(2)
+
+print('getlasterror_w2.js SUCCESS')
+}
+
+// turn off until fixed 
+//go();
diff --git a/jstests/replsets/groupAndMapReduce.js b/jstests/replsets/groupAndMapReduce.js
new file mode 100644
index 0000000..539fe44
--- /dev/null
+++ b/jstests/replsets/groupAndMapReduce.js
@@ -0,0 +1,105 @@
+doTest = function( signal ) {
+
+    // Test basic replica set functionality.
+    // -- Replication
+    // -- Failover
+
+    // Replica set testing API
+    // Create a new replica set test. Specify set name and the number of nodes you want.
+    var replTest = new ReplSetTest( {name: 'testSet', nodes: 3} );
+
+    // call startSet() to start each mongod in the replica set
+    // this returns a list of nodes
+    var nodes = replTest.startSet();
+
+    // Call initiate() to send the replSetInitiate command
+    // This will wait for initiation
+    replTest.initiate();
+
+    // Call getMaster to return a reference to the node that's been
+    // elected master.
+    var master = replTest.getMaster();
+
+    // save some records
+    var len = 100
+    for (var i = 0; i < len; ++i) {
+        master.getDB("foo").foo.save({a: i});
+    }
+
+    // This method will check the oplogs of the master
+    // and slaves in the set and wait until the change has replicated.
+    replTest.awaitReplication();
+    print("Sleeping 10s for slaves to go to secondary state");
+    sleep(10000);
+
+    slaves = replTest.liveNodes.slaves;
+    assert( slaves.length == 2, "Expected 2 slaves but length was " + slaves.length );
+    slaves.forEach(function(slave) {
+        // try to read from slave
+        slave.slaveOk = true;
+        var count = slave.getDB("foo").foo.count();
+        printjson( count );
+        assert.eq( len , count , "slave count wrong: " + slave );
+      
+        print("Doing a findOne to verify we can get a row"); 
+        var one = slave.getDB("foo").foo.findOne();
+        printjson(one);
+
+//        stats = slave.getDB("foo").adminCommand({replSetGetStatus:1});
+//        printjson(stats);
+ 
+        print("Calling group() with slaveOk=true, must succeed");
+        slave.slaveOk = true;
+        count = slave.getDB("foo").foo.group({initial: {n:0}, reduce: function(obj,out){out.n++;}});
+        printjson( count );
+        assert.eq( len , count[0].n , "slave group count wrong: " + slave );
+
+        print("Calling group() with slaveOk=false, must fail"); 
+        slave.slaveOk = false;
+        try {
+            count = slave.getDB("foo").foo.group({initial: {n:0}, reduce: function(obj,out){out.n++;}});
+            assert(false, "group() succeeded with slaveOk=false");
+        } catch (e) {
+            print("Received exception: " + e);
+        }
+        
+        print("Calling inline mr() with slaveOk=true, must succeed"); 
+        slave.slaveOk = true;
+        map = function() { emit(this.a, 1); };
+        reduce = function(key, vals) { var sum = 0; for (var i = 0; i < vals.length; ++i) { sum += vals[i]; } return sum; };
+        slave.getDB("foo").foo.mapReduce(map, reduce, {out: { "inline" : 1}});
+
+        print("Calling mr() to collection with slaveOk=true, must fail");
+        try {
+            slave.getDB("foo").foo.mapReduce(map, reduce, "output");
+            assert(false, "mapReduce() to collection succeeded on slave");
+        } catch (e) {
+            print("Received exception: " + e);
+        }
+
+        print("Calling inline mr() with slaveOk=false, must fail"); 
+        slave.slaveOk = false;
+        try {
+            slave.getDB("foo").foo.mapReduce(map, reduce, {out: { "inline" : 1}});
+            assert(false, "mapReduce() succeeded on slave with slaveOk=false");
+        } catch (e) {
+            print("Received exception: " + e);
+        }
+        print("Calling mr() to collection with slaveOk=false, must fail");
+        try {
+            slave.getDB("foo").foo.mapReduce(map, reduce, "output");
+            assert(false, "mapReduce() to collection succeeded on slave with slaveOk=false");
+        } catch (e) {
+            print("Received exception: " + e);
+        }
+
+    });
+
+    
+
+    // Shut down the set and finish the test.
+    replTest.stopSet( signal );
+}
+
+doTest( 15 );
+print("SUCCESS");
diff --git a/jstests/replsets/initial_sync1.js b/jstests/replsets/initial_sync1.js
new file mode 100644
index 0000000..ee30b4e
--- /dev/null
+++ b/jstests/replsets/initial_sync1.js
@@ -0,0 +1,129 @@
+/**
+ * Test killing the secondary during initially sync
+ *
+ * 1. Bring up set
+ * 2. Insert some data
+ * 4. Make sure synced
+ * 5. Freeze #2
+ * 6. Bring up #3
+ * 7. Kill #2 in the middle of syncing
+ * 8. Eventually it should become a secondary
+ * 9. Bring #2 back up
+ * 10. Insert some stuff
+ * 11. Everyone happy eventually
+ */
+
+load("jstests/replsets/rslib.js");
+var basename = "jstests_initsync1";
+
+
+print("1. Bring up set");
+var replTest = new ReplSetTest( {name: basename, nodes: 2} );
+var conns = replTest.startSet();
+replTest.initiate();
+
+var master = replTest.getMaster();
+var foo = master.getDB("foo");
+var admin = master.getDB("admin");
+
+var slave1 = replTest.liveNodes.slaves[0];
+var admin_s1 = slave1.getDB("admin");
+var local_s1 = slave1.getDB("local");
+
+print("2. Insert some data");
+for (var i=0; i<10000; i++) {
+  foo.bar.insert({date : new Date(), x : i, str : "all the talk on the market"});
+}
+print("total in foo: "+foo.bar.count());
+
+
+print("4. Make sure synced");
+replTest.awaitReplication();
+
+
+print("5. Freeze #2");
+admin_s1.runCommand({replSetFreeze:999999});
+
+
+print("6. Bring up #3");
+var ports = allocatePorts( 3 );
+var basePath = "/data/db/" + basename;
+var hostname = getHostName();
+
+var sargs = new MongodRunner( ports[ 2 ], basePath, false, false,
+                              ["--replSet", basename, "--oplogSize", 2],
+                              {no_bind : true} );
+var slave2 = sargs.start();
+var local_s2 = slave2.getDB("local");
+var admin_s2 = slave2.getDB("admin");
+
+var config = replTest.getReplSetConfig();
+config.version = 2;
+config.members.push({_id:2, host:hostname+":"+ports[2]});
+
+try {
+  admin.runCommand({replSetReconfig:config});
+}
+catch(e) {
+  print(e);
+}
+reconnect(slave1);
+reconnect(slave2);
+
+wait(function() {
+    var config2 = local_s1.system.replset.findOne();
+    var config3 = local_s2.system.replset.findOne();
+
+    printjson(config2);
+    printjson(config3);
+
+    return config2.version == config.version &&
+      (config3 && config3.version == config.version);
+  });
+
+wait(function() {
+    var status = admin_s2.runCommand({replSetGetStatus:1});
+    printjson(status);
+    return status.members &&
+      (status.members[2].state == 3 || status.members[2].state == 2);
+  });
+
+
+print("7. Kill #2 in the middle of syncing");
+replTest.stop(1);
+
+
+print("8. Eventually it should become a secondary");
+print("if initial sync has started, this will cause it to fail and sleep for 5 minutes");
+sleep(5*60*1000);
+wait(function() {
+    var status = admin_s2.runCommand({replSetGetStatus:1});
+    occasionally(function() { printjson(status); });
+    return status.members[2].state == 2;
+  });
+
+
+print("9. Bring #2 back up");
+replTest.start(1, {}, true);
+reconnect(slave1);
+wait(function() {
+    var status = admin_s1.runCommand({replSetGetStatus:1});
+    printjson(status);
+    return status.ok == 1 && status.members &&
+      status.members[1].state == 2 || status.members[1].state == 1;
+  });
+
+
+/**
+ * TODO: this fails on buildbot
+ * see SERVER-2550
+print("10. Insert some stuff");
+master = replTest.getMaster();
+for (var i=0; i<10000; i++) {
+  foo.bar.insert({date : new Date(), x : i, str : "all the talk on the market"});
+}
+
+
+print("11. Everyone happy eventually");
+replTest.awaitReplication();
+*/
diff --git a/jstests/replsets/initial_sync2.js b/jstests/replsets/initial_sync2.js
new file mode 100644
index 0000000..3ad3972
--- /dev/null
+++ b/jstests/replsets/initial_sync2.js
@@ -0,0 +1,179 @@
+/**
+ * Test killing the primary during initial sync
+ * and don't allow the other secondary to become primary
+ *
+ * 1. Bring up set
+ * 2. Insert some data
+ * 4. Make sure synced
+ * 5. Freeze #2
+ * 6. Bring up #3
+ * 7. Kill #1 in the middle of syncing
+ * 8. Check that #3 makes it into secondary state
+ * 9. Bring #1 back up
+ * 10. Initial sync should succeed
+ * 11. Insert some stuff
+ * 12. Everyone happy eventually
+ */
+
+load("jstests/replsets/rslib.js");
+var basename = "jstests_initsync2";
+
+var doTest = function() {
+
+print("1. Bring up set");
+var replTest = new ReplSetTest( {name: basename, nodes: 2} );
+var conns = replTest.startSet();
+replTest.initiate();
+
+var master = replTest.getMaster();
+var origMaster = master;
+var foo = master.getDB("foo");
+var admin = master.getDB("admin");
+
+var slave1 = replTest.liveNodes.slaves[0];
+var admin_s1 = slave1.getDB("admin");
+var local_s1 = slave1.getDB("local");
+
+print("2. Insert some data");
+for (var i=0; i<10000; i++) {
+  foo.bar.insert({date : new Date(), x : i, str : "all the talk on the market"});
+}
+print("total in foo: "+foo.bar.count());
+
+
+print("4. Make sure synced");
+replTest.awaitReplication();
+
+
+print("5. Freeze #2");
+admin_s1.runCommand({replSetFreeze:999999});
+
+
+print("6. Bring up #3");
+var ports = allocatePorts( 3 );
+var basePath = "/data/db/" + basename;
+var hostname = getHostName();
+
+var sargs = new MongodRunner( ports[ 2 ], basePath, false, false,
+                              ["--replSet", basename, "--oplogSize", 2],
+                              {no_bind : true} );
+var slave2 = sargs.start();
+var local_s2 = slave2.getDB("local");
+var admin_s2 = slave2.getDB("admin");
+
+var config = replTest.getReplSetConfig();
+config.version = 2;
+config.members.push({_id:2, host:hostname+":"+ports[2]});
+
+try {
+  admin.runCommand({replSetReconfig:config});
+}
+catch(e) {
+  print(e);
+}
+reconnect(slave1);
+reconnect(slave2);
+
+wait(function() {
+    var config2 = local_s1.system.replset.findOne();
+    var config3 = local_s2.system.replset.findOne();
+
+    printjson(config2);
+    printjson(config3);
+
+    return config2.version == config.version &&
+      (config3 && config3.version == config.version);
+  });
+admin_s2.runCommand({replSetFreeze:999999});
+
+
+wait(function() {
+    var status = admin_s2.runCommand({replSetGetStatus:1});
+    printjson(status);
+    return status.members &&
+      (status.members[2].state == 3 || status.members[2].state == 2);
+  });
+
+
+print("7. Kill #1 in the middle of syncing");
+replTest.stop(0);
+
+
+print("8. Check that #3 makes it into secondary state");
+wait(function() {
+        var status = admin_s2.runCommand({replSetGetStatus:1});
+        occasionally(function() { printjson(status);}, 10);
+        if (status.members[2].state == 2 || status.members[2].state == 1) {
+            return true;
+        }
+        return false;
+    });
+
+
+print("9. Bring #1 back up");
+replTest.start(0, {}, true);
+reconnect(master);
+wait(function() {
+    var status = admin.runCommand({replSetGetStatus:1});
+    printjson(status);
+    return status.members &&
+      (status.members[0].state == 1 || status.members[0].state == 2);
+  });
+
+
+print("10. Initial sync should succeed");
+wait(function() {
+    var status = admin_s2.runCommand({replSetGetStatus:1});
+    printjson(status);
+    return status.members &&
+      status.members[2].state == 2 || status.members[2].state == 1;
+  });
+
+
+print("11. Insert some stuff");
+// ReplSetTest doesn't find master correctly unless all nodes are defined by
+// ReplSetTest
+for (var i = 0; i<30; i++) {
+  var result = admin.runCommand({isMaster : 1});
+  if (result.ismaster) {
+    break;
+  }
+  else if (result.primary) {
+    master = connect(result.primary+"/admin").getMongo();
+    break;
+  }
+  sleep(1000);
+}
+
+for (var i=0; i<10000; i++) {
+  foo.bar.insert({date : new Date(), x : i, str : "all the talk on the market"});
+}
+
+
+print("12. Everyone happy eventually");
+// if 3 is master...
+if (master+"" != origMaster+"") {
+  print("3 is master");
+  slave2 = origMaster;
+}
+
+wait(function() {
+    var op1 = getLatestOp(master);
+    var op2 = getLatestOp(slave1);
+    var op3 = getLatestOp(slave2);
+
+    occasionally(function() {
+        print("latest ops:");
+        printjson(op1);
+        printjson(op2);
+        printjson(op3);
+      });
+    
+    return friendlyEqual(getLatestOp(master), getLatestOp(slave1)) &&
+      friendlyEqual(getLatestOp(master), getLatestOp(slave2));
+  });
+
+replTest.stopSet();
+};
+
+doTest();
diff --git a/jstests/replsets/initial_sync3.js b/jstests/replsets/initial_sync3.js
new file mode 100644
index 0000000..471aa16
--- /dev/null
+++ b/jstests/replsets/initial_sync3.js
@@ -0,0 +1,87 @@
+/* test initial sync options
+ *
+ * {state : 1}
+ * {state : 2}
+ * {name : host+":"+port}
+ * {_id : 2}
+ * {optime : now}
+ * {optime : 1970}
+ */
+
+load("jstests/replsets/rslib.js");
+var name = "initialsync3";
+var host = getHostName();
+var port = allocatePorts(7);
+
+print("Start set with three nodes");
+var replTest = new ReplSetTest( {name: name, nodes: 7} );
+var nodes = replTest.startSet();
+replTest.initiate({
+    _id : name,
+    members : [
+               {_id:0, host : host+":"+port[0]},
+               {_id:1, host : host+":"+port[1], initialSync : {state : 1}},
+               {_id:2, host : host+":"+port[2], initialSync : {state : 2}},
+               {_id:3, host : host+":"+port[3], initialSync : {name : host+":"+port[2]}},
+               {_id:4, host : host+":"+port[4], initialSync : {_id : 2}},
+               {_id:5, host : host+":"+port[5], initialSync : {optime : new Date()}},
+               {_id:6, host : host+":"+port[6], initialSync : {optime : new Date(0)}}
+               ]});
+
+var master = replTest.getMaster();
+
+print("Initial sync");
+master.getDB("foo").bar.baz.insert({x:1});
+
+print("Make sure everyone's secondary");
+wait(function() {
+    var status = master.getDB("admin").runCommand({replSetGetStatus:1});
+    occasionally(function() {
+        printjson(status);
+      });
+
+    if (!status.members) {
+      return false;
+    }
+    
+    for (i=0; i<7; i++) {
+      if (status.members[i].state != 1 && status.members[i].state != 2) {
+        return false;
+      }
+    }
+    return true;
+    
+  });
+
+replTest.awaitReplication();
+
+replTest.stopSet();
+
+print("reconfig");
+
+var rs2 = new ReplSetTest( {name: 'reconfig-isync3', nodes: 3} );
+rs2.startSet();
+rs2.initiate();
+
+master = rs2.getMaster();
+var config = master.getDB("local").system.replset.findOne();
+config.version++;
+config.members[0].initialSync = {state : 2};
+config.members[1].initialSync = {state : 1};
+try {
+    master.getDB("admin").runCommand({replSetReconfig : config});
+}
+catch(e) {
+    print("trying to reconfigure: "+e);
+}
+
+master = rs2.getMaster();
+config = master.getDB("local").system.replset.findOne();
+
+assert(typeof(config.members[0].initialSync) == "object");
+assert.eq(config.members[0].initialSync.state, 2);
+assert.eq(config.members[1].initialSync.state, 1);
+
+rs2.stopSet();
+
+print("initialSync3 success!");
diff --git a/jstests/replsets/ismaster1.js b/jstests/replsets/ismaster1.js
new file mode 100644
index 0000000..22865e5
--- /dev/null
+++ b/jstests/replsets/ismaster1.js
@@ -0,0 +1,36 @@
+/**
+ * 1. Check passive field in isMaster
+ */
+
+load("jstests/replsets/rslib.js");
+
+var name = "ismaster";
+var host = getHostName();
+
+var replTest = new ReplSetTest( {name: name, nodes: 3} );
+
+var nodes = replTest.startSet();
+
+var config = replTest.getReplSetConfig();
+config.members[1].priority = 0;
+config.members[2].priority = 0;
+  
+replTest.initiate(config);
+
+var master = replTest.getMaster();
+wait(function() {
+        var result = master.getDB("admin").runCommand({replSetGetStatus:1});
+        return result.members && result.members[0].state == 1 &&
+            result.members[1].state == 2 && result.members[2].state == 2;
+    });
+
+var result = master.getDB("admin").runCommand({isMaster:1});
+assert(!('passive' in result), tojson(result));
+
+result = replTest.liveNodes.slaves[0].getDB("admin").runCommand({isMaster:1});
+assert('passive' in result, tojson(result));
+
+result = replTest.liveNodes.slaves[1].getDB("admin").runCommand({isMaster:1});
+assert('passive' in result, tojson(result));
+
+replTest.stopSet();
diff --git a/jstests/replsets/key1 b/jstests/replsets/key1
new file mode 100644
index 0000000..b5c19e4
--- /dev/null
+++ b/jstests/replsets/key1
@@ -0,0 +1 @@
+foop de doop
diff --git a/jstests/replsets/key2 b/jstests/replsets/key2
new file mode 100644
index 0000000..cbde821
--- /dev/null
+++ b/jstests/replsets/key2
@@ -0,0 +1 @@
+other key
diff --git a/jstests/replsets/remove1.js b/jstests/replsets/remove1.js
new file mode 100644
index 0000000..ebd17d6
--- /dev/null
+++ b/jstests/replsets/remove1.js
@@ -0,0 +1,132 @@
+/* test removing a node from a replica set
+ *
+ * Start set with three nodes
+ * Initial sync
+ * Remove slave1
+ * Remove slave2
+ * Bring slave1 back up
+ * Bring slave2 back up
+ * Add them back as slave
+ * Make sure everyone's secondary
+ */
+
+load("jstests/replsets/rslib.js");
+var name = "removeNodes";
+var host = getHostName();
+
+
+print("Start set with three nodes");
+var replTest = new ReplSetTest( {name: name, nodes: 3} );
+var nodes = replTest.startSet();
+replTest.initiate();
+var master = replTest.getMaster();
+
+
+print("Initial sync");
+master.getDB("foo").bar.baz.insert({x:1});
+
+replTest.awaitReplication();
+
+
+print("Remove slave2");
+var config = replTest.getReplSetConfig();
+
+config.members.pop();
+config.version = 2;
+try {
+  master.getDB("admin").runCommand({replSetReconfig:config});
+}
+catch(e) {
+  print(e);
+}
+reconnect(master);
+
+
+print("Remove slave1");
+config.members.pop();
+config.version = 3;
+try {
+  master.getDB("admin").runCommand({replSetReconfig:config});
+}
+catch(e) {
+  print(e);
+}
+reconnect(master);
+
+print("sleeping 1");
+sleep(10000);
+// these are already down, but this clears their ports from memory so that they
+// can be restarted later
+stopMongod(replTest.getPort(1));
+stopMongod(replTest.getPort(2));
+
+
+print("Bring slave1 back up");
+var paths = [ replTest.getPath(1), replTest.getPath(2) ];
+var ports = allocatePorts(2, replTest.getPort(2)+1);
+var args = ["mongod", "--port", ports[0], "--dbpath", paths[0], "--noprealloc", "--smallfiles", "--rest"];
+var conn = startMongoProgram.apply( null, args );
+conn.getDB("local").system.replset.remove();
+printjson(conn.getDB("local").runCommand({getlasterror:1}));
+print(conn);
+print("sleeping 2");
+sleep(10000);
+stopMongod(ports[0]);
+
+replTest.restart(1);
+
+
+print("Bring slave2 back up");
+args[2] = ports[1];
+args[4] = paths[1];
+conn = startMongoProgram.apply( null, args );
+conn.getDB("local").system.replset.remove();
+print("path: "+paths[1]);
+print("sleeping 3");
+sleep(10000);
+stopMongod(ports[1]);
+
+replTest.restart(2);
+sleep(10000);
+
+
+print("Add them back as slaves");
+config.members.push({_id:1, host : host+":"+replTest.getPort(1)});
+config.members.push({_id:2, host : host+":"+replTest.getPort(2)});
+config.version = 4;
+wait(function() {
+    try {
+      master.getDB("admin").runCommand({replSetReconfig:config});
+    }
+    catch(e) {
+      print(e);
+    }
+    reconnect(master);
+
+    master.setSlaveOk();
+    var newConfig = master.getDB("local").system.replset.findOne();
+    return newConfig.version == 4;
+  });
+
+
+print("Make sure everyone's secondary");
+wait(function() {
+    var status = master.getDB("admin").runCommand({replSetGetStatus:1});
+    occasionally(function() {
+        printjson(status);
+      });
+    
+    if (!status.members || status.members.length != 3) {
+      return false;
+    }
+
+    for (var i = 0; i<3; i++) {
+      if (status.members[i].state != 1 && status.members[i].state != 2) {
+        return false;
+      }
+    }
+    return true;
+  });
+
+replTest.stopSet();
+
diff --git a/jstests/replsets/replset2.js b/jstests/replsets/replset2.js
index f18b467..4849620 100644
--- a/jstests/replsets/replset2.js
+++ b/jstests/replsets/replset2.js
@@ -1,126 +1,126 @@
-print("\n\nreplset2.js BEGIN");
-
-doTest = function (signal) {
-
-    // FAILING TEST
-    // See below:
-
-    // Test replication with getLastError
-
-    // Replica set testing API
-    // Create a new replica set test. Specify set name and the number of nodes you want.
-    var replTest = new ReplSetTest({ name: 'testSet', nodes: 3, oplogSize: 5 });
-
-    // call startSet() to start each mongod in the replica set
-    // this returns a list of nodes
-    var nodes = replTest.startSet();
-
-    // Call initiate() to send the replSetInitiate command
-    // This will wait for initiation
-    replTest.initiate();
-
-    var testDB = "repl-test";
-
-    // Call getMaster to return a reference to the node that's been
-    // elected master.
-    var master = replTest.getMaster();
-
-    // Wait for replication to a single node
-    master.getDB(testDB).bar.insert({ n: 1 });
-
-    // Wait for initial sync
-    replTest.awaitReplication();
-
-    var slaves = replTest.liveNodes.slaves;
-    slaves.forEach(function (slave) { slave.setSlaveOk(); });
-
-    var failed = false;
-    var callGetLastError = function (w, timeout, db) {
-        try {
-            var result = master.getDB(db).getLastErrorObj(w, timeout);
-            print("replset2.js getLastError result: " + tojson(result));
-            if (result['ok'] != 1) {
-                print("replset2.js FAILURE getlasterror not ok");
-                failed = true;
-            }
-        }
-        catch (e) {
-            print("\nreplset2.js exception in getLastError: " + e + '\n');
-            throw e;
-        }
-    }
-
-    // Test getlasterror with multiple inserts
-    // TEST FAILS HEREg
-    print("\n\nreplset2.js **** Try inserting a multiple records -- first insert ****")
-
-    printjson(master.getDB("admin").runCommand("replSetGetStatus"));
-
-    master.getDB(testDB).foo.insert({ n: 1 });
-    master.getDB(testDB).foo.insert({ n: 2 });
-    master.getDB(testDB).foo.insert({ n: 3 });
-
-    print("\nreplset2.js **** TEMP 1 ****")
-
-    printjson(master.getDB("admin").runCommand("replSetGetStatus"));
-
-    callGetLastError(3, 25000, testDB);
-
-    print("replset2.js **** TEMP 1a ****")
-
-    m1 = master.getDB(testDB).foo.findOne({ n: 1 });
-    printjson(m1);
-    assert(m1['n'] == 1, "replset2.js Failed to save to master on multiple inserts");
-
-    print("replset2.js **** TEMP 1b ****")
-
-    var s0 = slaves[0].getDB(testDB).foo.findOne({ n: 1 });
-    assert(s0['n'] == 1, "replset2.js Failed to replicate to slave 0 on multiple inserts");
-
-    var s1 = slaves[1].getDB(testDB).foo.findOne({ n: 1 });
-    assert(s1['n'] == 1, "replset2.js Failed to replicate to slave 1 on multiple inserts");
-
-    // Test getlasterror with a simple insert
-    print("replset2.js **** Try inserting a single record ****")
-    master.getDB(testDB).dropDatabase();
-    master.getDB(testDB).foo.insert({ n: 1 });
-    callGetLastError(3, 10000, testDB);
-
-    m1 = master.getDB(testDB).foo.findOne({ n: 1 });
-    printjson(m1);
-    assert(m1['n'] == 1, "replset2.js Failed to save to master");
-
-    s0 = slaves[0].getDB(testDB).foo.findOne({ n: 1 });
-    assert(s0['n'] == 1, "replset2.js Failed to replicate to slave 0");
-
-    s1 = slaves[1].getDB(testDB).foo.findOne({ n: 1 });
-    assert(s1['n'] == 1, "replset2.js Failed to replicate to slave 1");
-
-    // Test getlasterror with large insert
-    print("replset2.js **** Try inserting many records ****")
+print("\n\nreplset2.js BEGIN");
+
+doTest = function (signal) {
+
+    // FAILING TEST
+    // See below:
+
+    // Test replication with getLastError
+
+    // Replica set testing API
+    // Create a new replica set test. Specify set name and the number of nodes you want.
+    var replTest = new ReplSetTest({ name: 'testSet', nodes: 3, oplogSize: 5 });
+
+    // call startSet() to start each mongod in the replica set
+    // this returns a list of nodes
+    var nodes = replTest.startSet();
+
+    // Call initiate() to send the replSetInitiate command
+    // This will wait for initiation
+    replTest.initiate();
+
+    var testDB = "repl-test";
+
+    // Call getMaster to return a reference to the node that's been
+    // elected master.
+    var master = replTest.getMaster();
+
+    // Wait for replication to a single node
+    master.getDB(testDB).bar.insert({ n: 1 });
+
+    // Wait for initial sync
+    replTest.awaitReplication();
+
+    var slaves = replTest.liveNodes.slaves;
+    slaves.forEach(function (slave) { slave.setSlaveOk(); });
+
+    var failed = false;
+    var callGetLastError = function (w, timeout, db) {
+        try {
+            var result = master.getDB(db).getLastErrorObj(w, timeout);
+            print("replset2.js getLastError result: " + tojson(result));
+            if (result['ok'] != 1) {
+                print("replset2.js FAILURE getlasterror not ok");
+                failed = true;
+            }
+        }
+        catch (e) {
+            print("\nreplset2.js exception in getLastError: " + e + '\n');
+            throw e;
+        }
+    }
+
+    // Test getlasterror with multiple inserts
+    // TEST FAILS HEREg
+    print("\n\nreplset2.js **** Try inserting a multiple records -- first insert ****")
+
+    printjson(master.getDB("admin").runCommand("replSetGetStatus"));
+
+    master.getDB(testDB).foo.insert({ n: 1 });
+    master.getDB(testDB).foo.insert({ n: 2 });
+    master.getDB(testDB).foo.insert({ n: 3 });
+
+    print("\nreplset2.js **** TEMP 1 ****")
+
+    printjson(master.getDB("admin").runCommand("replSetGetStatus"));
+
+    callGetLastError(3, 25000, testDB);
+
+    print("replset2.js **** TEMP 1a ****")
+
+    m1 = master.getDB(testDB).foo.findOne({ n: 1 });
+    printjson(m1);
+    assert(m1['n'] == 1, "replset2.js Failed to save to master on multiple inserts");
+
+    print("replset2.js **** TEMP 1b ****")
+
+    var s0 = slaves[0].getDB(testDB).foo.findOne({ n: 1 });
+    assert(s0['n'] == 1, "replset2.js Failed to replicate to slave 0 on multiple inserts");
+
+    var s1 = slaves[1].getDB(testDB).foo.findOne({ n: 1 });
+    assert(s1['n'] == 1, "replset2.js Failed to replicate to slave 1 on multiple inserts");
+
+    // Test getlasterror with a simple insert
+    print("replset2.js **** Try inserting a single record ****")
+    master.getDB(testDB).dropDatabase();
+    master.getDB(testDB).foo.insert({ n: 1 });
+    callGetLastError(3, 10000, testDB);
+
+    m1 = master.getDB(testDB).foo.findOne({ n: 1 });
+    printjson(m1);
+    assert(m1['n'] == 1, "replset2.js Failed to save to master");
+
+    s0 = slaves[0].getDB(testDB).foo.findOne({ n: 1 });
+    assert(s0['n'] == 1, "replset2.js Failed to replicate to slave 0");
+
+    s1 = slaves[1].getDB(testDB).foo.findOne({ n: 1 });
+    assert(s1['n'] == 1, "replset2.js Failed to replicate to slave 1");
+
+    // Test getlasterror with large insert
+    print("replset2.js **** Try inserting many records ****")
     try {
-    bigData = new Array(2000).toString()
-    for (var n = 0; n < 1000; n++) {
-        master.getDB(testDB).baz.insert({ n: n, data: bigData });
-    }
-    callGetLastError(3, 60000, testDB);
-
-    print("replset2.js **** V1 ")
-
-    var verifyReplication = function (nodeName, collection) {
-        data = collection.findOne({ n: 1 });
-        assert(data['n'] == 1, "replset2.js Failed to save to " + nodeName);
-        data = collection.findOne({ n: 999 });
-        assert(data['n'] == 999, "replset2.js Failed to save to " + nodeName);
-    }
-
-    print("replset2.js **** V2 ")
-
-    verifyReplication("master", master.getDB(testDB).baz);
-    verifyReplication("slave 0", slaves[0].getDB(testDB).baz);
-    verifyReplication("slave 1", slaves[1].getDB(testDB).baz);
-
-    assert(failed == false, "replset2.js Replication with getLastError failed. See errors.");
+    bigData = new Array(2000).toString()
+    for (var n = 0; n < 1000; n++) {
+        master.getDB(testDB).baz.insert({ n: n, data: bigData });
+    }
+    callGetLastError(3, 60000, testDB);
+
+    print("replset2.js **** V1 ")
+
+    var verifyReplication = function (nodeName, collection) {
+        data = collection.findOne({ n: 1 });
+        assert(data['n'] == 1, "replset2.js Failed to save to " + nodeName);
+        data = collection.findOne({ n: 999 });
+        assert(data['n'] == 999, "replset2.js Failed to save to " + nodeName);
+    }
+
+    print("replset2.js **** V2 ")
+
+    verifyReplication("master", master.getDB(testDB).baz);
+    verifyReplication("slave 0", slaves[0].getDB(testDB).baz);
+    verifyReplication("slave 1", slaves[1].getDB(testDB).baz);
+
+    assert(failed == false, "replset2.js Replication with getLastError failed. See errors.");
     }
     catch(e) {
       print("ERROR: " + e);
@@ -132,10 +132,10 @@ doTest = function (signal) {
       printjson(slaves[1].getDB("local").oplog.rs.find().sort({"$natural": -1}).limit(1).next());
     }
 
-
-    replTest.stopSet(signal);
+
+    replTest.stopSet(signal);
 }
 
-doTest( 15 );
-
+doTest( 15 );
+
 print("\nreplset2.js SUCCESS\n");
diff --git a/jstests/replsets/replset3.js b/jstests/replsets/replset3.js
index 8126b9d..faa0627 100644
--- a/jstests/replsets/replset3.js
+++ b/jstests/replsets/replset3.js
@@ -1,56 +1,80 @@
-
-doTest = function( signal ) {
-
-    // Test replica set step down
-
-    // Replica set testing API
-    // Create a new replica set test. Specify set name and the number of nodes you want.
-    var replTest = new ReplSetTest( {name: 'testSet', nodes: 3} );
-
-    // call startSet() to start each mongod in the replica set
-    // this returns a list of nodes
-    var nodes = replTest.startSet();
-
-    // Call initiate() to send the replSetInitiate command
-    // This will wait for initiation
-    replTest.initiate();
-
-    // Get master node
-    var master = replTest.getMaster();
-
-    // Write some data to master
-    // NOTE: this test fails unless we write some data.
-    master.getDB("foo").foo.save({a: 1});
-    master.getDB("foo").runCommand({getlasterror: 1, w:3, wtimeout: 20000});
-
-    // Step down master
-    master.getDB("admin").runCommand({replSetStepDown: true});
-
-    try {
-      var new_master = replTest.getMaster();
-    }
-    catch( err ) {
-      throw( "Could not elect new master before timeout." );
-    }
-
-    assert( master != new_master, "Old master shouldn't be equal to new master." );
-
-    // Make sure that slaves are still up
-    var result = new_master.getDB("admin").runCommand({replSetGetStatus: 1});
-    assert( result['ok'] == 1, "Could not verify that slaves were still up:" + result );
-
-    slaves = replTest.liveNodes.slaves;
-    assert.soon(function() {
-        res = slaves[0].getDB("admin").runCommand({replSetGetStatus: 1})
-        return res.myState == 2;
-    }, "Slave 0 state not ready.");
-
-    assert.soon(function() {
-        res = slaves[1].getDB("admin").runCommand({replSetGetStatus: 1})
-        return res.myState == 2;
-    }, "Slave 1 state not ready.");
-
-    replTest.stopSet( 15 );
+
+doTest = function (signal) {
+
+    // Test replica set step down
+
+    // Replica set testing API
+    // Create a new replica set test. Specify set name and the number of nodes you want.
+    var replTest = new ReplSetTest({ name: 'testSet', nodes: 3 });
+
+    // call startSet() to start each mongod in the replica set
+    // this returns a list of nodes
+    var nodes = replTest.startSet();
+
+    // Call initiate() to send the replSetInitiate command
+    // This will wait for initiation
+    replTest.initiate();
+
+    // Get master node
+    var master = replTest.getMaster();
+
+    // Write some data to master
+    // NOTE: this test fails unless we write some data.
+    master.getDB("foo").foo.save({ a: 1 });
+    master.getDB("foo").runCommand({ getlasterror: 1, w: 3, wtimeout: 20000 });
+
+    var phase = 1;
+
+    print(phase++);
+
+    // Step down master.  Note: this may close our connection!
+    try {
+        master.getDB("admin").runCommand({ replSetStepDown: true });
+    } catch (err) {
+        print("caught: " + err + " on stepdown");
+    }
+
+    print(phase++);
+
+    try {
+        var new_master = replTest.getMaster();
+    }
+    catch (err) {
+        throw ("Could not elect new master before timeout.");
+    }
+
+    print(phase++);
+
+    assert(master != new_master, "Old master shouldn't be equal to new master.");
+
+    print(phase++);
+
+    // Make sure that slaves are still up
+    var result = new_master.getDB("admin").runCommand({ replSetGetStatus: 1 });
+    assert(result['ok'] == 1, "Could not verify that slaves were still up:" + result);
+
+    print(phase++);
+
+    slaves = replTest.liveNodes.slaves;
+    assert.soon(function () {
+        try {
+            res = slaves[0].getDB("admin").runCommand({ replSetGetStatus: 1 })
+        } catch (err) { }
+        return res.myState == 2;
+    }, "Slave 0 state not ready.");
+
+    print(phase++);
+
+    assert.soon(function () {
+        try {
+            res = slaves[1].getDB("admin").runCommand({ replSetGetStatus: 1 })
+        } catch (err) { }
+        return res.myState == 2;
+    }, "Slave 1 state not ready.");
+
+    print("replset3.js SUCCESS");
+
+    replTest.stopSet(15);
 }
 
 doTest( 15 );
diff --git a/jstests/replsets/replset5.js b/jstests/replsets/replset5.js
index fe1761e..13ee5c9 100644
--- a/jstests/replsets/replset5.js
+++ b/jstests/replsets/replset5.js
@@ -23,15 +23,15 @@ doTest = function (signal) {
     master.getDB("barDB").bar.save({ a: 1 });
     replTest.awaitReplication();
 
-    // These writes should be replicated immediately
-    master.getDB(testDB).foo.insert({ n: 1 });
-    master.getDB(testDB).foo.insert({ n: 2 });
-    master.getDB(testDB).foo.insert({ n: 3 });
-
-    // *** NOTE ***: The default doesn't seem to be propogating.
-    // When I run getlasterror with no defaults, the slaves don't have the data:
-    // These getlasterror commands can be run individually to verify this.
-    //master.getDB("admin").runCommand({ getlasterror: 1, w: 3, wtimeout: 20000 });
+    // These writes should be replicated immediately
+    var docNum = 5000;
+    for(var n=0; n<docNum; n++) {
+      master.getDB(testDB).foo.insert({ n: n });
+    }
+
+    // If you want to test failure, just add values for w and wtimeout
+    // to the following command. This will override the default set above and
+    // prevent replication from happening in time for the count tests below.
     master.getDB("admin").runCommand({getlasterror: 1});
 
     var slaves = replTest.liveNodes.slaves;
@@ -40,31 +40,15 @@ doTest = function (signal) {
 
     print("Testing slave counts");
 
-    // These should all have 3 documents, but they don't always.
-    var master1count = master.getDB(testDB).foo.count();
-    assert( master1count == 3, "Master has " + master1count + " of 3 documents!");
-
     var slave0count = slaves[0].getDB(testDB).foo.count();
-    assert( slave0count == 3, "Slave 0 has " + slave0count + " of 3 documents!");
+    assert( slave0count == docNum, "Slave 0 has " + slave0count + " of " + docNum + " documents!");
 
     var slave1count = slaves[1].getDB(testDB).foo.count();
-    assert( slave1count == 3, "Slave 1 has " + slave1count + " of 3 documents!");
+    assert( slave1count == docNum, "Slave 1 has " + slave1count + " of " + docNum + " documents!");
 
-    print("Testing slave 0");
+    var master1count = master.getDB(testDB).foo.count();
+    assert( master1count == docNum, "Master has " + master1count + " of " + docNum + " documents!");
 
-    var s0 = slaves[0].getDB(testDB).foo.find();
-    assert(s0.next()['n']);
-    assert(s0.next()['n']);
-    assert(s0.next()['n']);
-
-    print("Testing slave 1");
-
-    var s1 = slaves[1].getDB(testDB).foo.find();
-    assert(s1.next()['n']);
-    assert(s1.next()['n']);
-    assert(s1.next()['n']);
-
-    // End test
     replTest.stopSet(signal);
 }
 
diff --git a/jstests/replsets/replset_remove_node.js b/jstests/replsets/replset_remove_node.js
index fcb754c..9fef721 100644
--- a/jstests/replsets/replset_remove_node.js
+++ b/jstests/replsets/replset_remove_node.js
@@ -33,8 +33,15 @@ doTest = function( signal ) {
     config.version = c.version + 1;
     config.members = [ { "_id" : 0, "host" : replTest.host + ":31000" },
                        { "_id" : 2, "host" : replTest.host + ":31002" } ]
-    replTest.initiate( config , 'replSetReconfig' );
 
+    try {
+        replTest.initiate( config , 'replSetReconfig' );
+    }
+    catch(e) {
+        print(e);
+    }
+
+    
     // Make sure that a new master comes up
     master = replTest.getMaster();
     slaves = replTest.liveNodes.slaves;
diff --git a/jstests/replsets/replsetarb2.js b/jstests/replsets/replsetarb2.js
index 0dd8a3d..0e4c791 100644
--- a/jstests/replsets/replsetarb2.js
+++ b/jstests/replsets/replsetarb2.js
@@ -29,6 +29,8 @@ doTest = function( signal ) {
     master.getDB("foo").foo.insert({a: "foo"});
     replTest.awaitReplication();
 
+    assert( ! conns[1].getDB( "admin" ).runCommand( "ismaster" ).secondary , "arbiter shouldn't be secondary" )
+
     // Now kill the original master
     mId = replTest.getNodeId( master );
     replTest.stop( mId );
diff --git a/jstests/replsets/replsetarb3.js b/jstests/replsets/replsetarb3.js
new file mode 100644
index 0000000..1193cf2
--- /dev/null
+++ b/jstests/replsets/replsetarb3.js
@@ -0,0 +1,144 @@
+// @file replsetarb3.js
+// try turning arbiters into non-arbiters and vice versa
+
+/*
+ * 1: initialize set
+ * 2: check m3.state == 7
+ * 3: reconfig
+ * 4: check m3.state == 2
+ * 5: reconfig
+ * 6: check m3.state == 7
+ * 7: reconfig
+ * 8: check m3.state == 2
+ * 9: insert 10000
+ * 10: reconfig
+ * 11: check m3.state == 7
+ */
+
+var debug = false;
+
+var statusSoon = function(s) {
+  assert.soon(function() {
+      var status = master.getDB("admin").runCommand({ replSetGetStatus: 1 });
+      if (debug)
+        printjson(status);
+      return status.members[2].state == s;
+    });
+};
+
+var w = 0;
+var wait = function(f) {
+    w++;
+    var n = 0;
+    while (!f()) {
+        if( n % 4 == 0 )
+            print("toostale.js waiting " + w);
+        if (++n == 4) {
+            print("" + f);
+        }
+        assert(n < 200, 'tried 200 times, giving up');
+        sleep(1000);
+    }
+}
+
+var reconnect = function(a) {
+  wait(function() { 
+      try {
+        a.getDB("foo").bar.stats();
+        return true;
+      } catch(e) {
+        print(e);
+        return false;
+      }
+    });
+};
+
+var reconfig = function() {
+  config.version++;
+  try {
+    var result = master.getDB("admin").runCommand({replSetReconfig : config});
+  }
+  catch(e) {
+    print(e);
+  }
+  reconnect(master);
+  reconnect(replTest.liveNodes.slaves[1]);
+  sleep(20000);
+};
+
+var replTest = new ReplSetTest( {name: 'unicomplex', nodes: 3} );
+var nodes = replTest.nodeList();
+
+print(tojson(nodes));
+
+
+var conns = replTest.startSet();
+
+print("1");
+var config = {"_id" : "unicomplex", "members" : [
+    {"_id" : 0, "host" : nodes[0] },
+    {"_id" : 1, "host" : nodes[1] },
+    {"_id" : 2, "host" : nodes[2], "arbiterOnly" : true}]};
+var r = replTest.initiate(config);
+config.version = 1;
+
+var master = replTest.getMaster();
+
+// Wait for initial replication
+master.getDB("foo").foo.insert({a: "foo"});
+replTest.awaitReplication();
+
+
+print("2");
+statusSoon(7);
+assert.eq(replTest.liveNodes.slaves[1].getDB("local").oplog.rs.count(), 0);
+
+/*
+print("3");
+delete config.members[2].arbiterOnly;
+reconfig();
+
+
+print("4");
+statusSoon(2);
+assert(replTest.liveNodes.slaves[1].getDB("local").oplog.rs.count() > 0);
+
+
+print("5");
+config.members[2].arbiterOnly = true;
+reconfig();
+
+
+print("6");
+statusSoon(7);
+assert.eq(replTest.liveNodes.slaves[1].getDB("local").oplog.rs.count(), 0);
+
+
+print("7");
+delete config.members[2].arbiterOnly;
+reconfig();
+
+
+print("8");
+statusSoon(2);
+assert(replTest.liveNodes.slaves[1].getDB("local").oplog.rs.count() > 0);
+
+
+print("9");
+for (var i = 0; i < 10000; i++) {
+  master.getDB("foo").bar.insert({increment : i, c : 0, foo : "kasdlfjaklsdfalksdfakldfmalksdfmaklmfalkfmkafmdsaklfma", date : new Date(), d : Date()});
+}
+
+
+print("10");
+config.members[2].arbiterOnly = true;
+reconfig();
+
+
+print("11");
+statusSoon(7);
+assert.eq(replTest.liveNodes.slaves[1].getDB("local").oplog.rs.count(), 0);
+*/
+
+replTest.stopSet( 15 );
+
diff --git a/jstests/replsets/replsetfreeze.js b/jstests/replsets/replsetfreeze.js
new file mode 100644
index 0000000..3721ba5
--- /dev/null
+++ b/jstests/replsets/replsetfreeze.js
@@ -0,0 +1,105 @@
+/*
+ * 1: initialize set
+ * 2: step down m1
+ * 3: freeze set for 30 seconds
+ * 4: check no one is master for 30 seconds
+ * 5: check for new master
+ * 6: step down new master
+ * 7: freeze for 30 seconds
+ * 8: unfreeze
+ * 9: check we get a new master within 30 seconds
+ */
+
+
+var w = 0;
+var wait = function(f) {
+    w++;
+    var n = 0;
+    while (!f()) {
+        if( n % 4 == 0 )
+            print("toostale.js waiting " + w);
+        if (++n == 4) {
+            print("" + f);
+        }
+        assert(n < 200, 'tried 200 times, giving up');
+        sleep(1000);
+    }
+}
+
+var reconnect = function(a) {
+  wait(function() { 
+      try {
+        a.getDB("foo").bar.stats();
+        return true;
+      } catch(e) {
+        print(e);
+        return false;
+      }
+    });
+};
+
+
+print("1: initialize set");
+var replTest = new ReplSetTest( {name: 'unicomplex', nodes: 3} );
+var nodes = replTest.nodeList();
+var conns = replTest.startSet();
+var config = {"_id" : "unicomplex", "members" : [
+    {"_id" : 0, "host" : nodes[0] },
+    {"_id" : 1, "host" : nodes[1] },
+    {"_id" : 2, "host" : nodes[2], "arbiterOnly" : true}]};
+var r = replTest.initiate(config);
+var master = replTest.getMaster();
+
+
+print("2: step down m1");
+try {
+  master.getDB("admin").runCommand({replSetStepDown : 1});
+}
+catch(e) {
+  print(e);
+}
+reconnect(master);
+
+print("3: freeze set for 30 seconds");
+master.getDB("admin").runCommand({replSetFreeze : 30});
+
+
+print("4: check no one is master for 30 seconds");
+var start = (new Date()).getTime();
+while ((new Date()).getTime() - start < 30000) {
+  var result = master.getDB("admin").runCommand({isMaster:1});
+  assert.eq(result.ismaster, false);
+  assert.eq(result.primary, undefined);
+  sleep(1000);
+}
+
+
+print("5: check for new master");
+master = replTest.getMaster();
+
+
+print("6: step down new master");
+try {
+  master.getDB("admin").runCommand({replSetStepDown : 1});
+}
+catch(e) {
+  print(e);
+}
+reconnect(master);
+
+
+print("7: freeze for 30 seconds");
+master.getDB("admin").runCommand({replSetFreeze : 30});
+sleep(1000);
+
+
+print("8: unfreeze");
+master.getDB("admin").runCommand({replSetFreeze : 0});
+
+
+print("9: check we get a new master within 30 seconds");
+master = replTest.getMaster();
+
+
+replTest.stopSet( 15 );
+
diff --git a/jstests/replsets/rollback.js b/jstests/replsets/rollback.js
index 8840371..6370e41 100644
--- a/jstests/replsets/rollback.js
+++ b/jstests/replsets/rollback.js
@@ -1,155 +1,186 @@
-// test rollback in replica sets
-
-// try running as :
-// 
-//   mongo --nodb rollback.js | tee out | grep -v ^m31
-//
-
-var debugging = 0;
-
-function pause(s) {
-    print(s);
-    while (debugging) {
-        sleep(3000);
-        print(s);
-    }
-}
-
-function deb(obj) { 
-    if( debugging ) {
-        print("\n\n\n" + obj + "\n\n");
-    }  
-}
-
-w = 0;
-
-function wait(f) {
-    w++;
-    var n = 0;
-    while (!f()) {
-        if( n % 4 == 0 )
-            print("rollback.js waiting " + w);
-        if (++n == 4) {
-            print("" + f);
-        }
-        sleep(1000);
-    }
-}
-
-doTest = function (signal) {
-
-    var replTest = new ReplSetTest({ name: 'unicomplex', nodes: 3 });
-    var nodes = replTest.nodeList();
-    //print(tojson(nodes));
-
-    var conns = replTest.startSet();
-    var r = replTest.initiate({ "_id": "unicomplex",
-        "members": [
-                             { "_id": 0, "host": nodes[0] },
-                             { "_id": 1, "host": nodes[1] },
-                             { "_id": 2, "host": nodes[2], arbiterOnly: true}]
-    });
-
-    // Make sure we have a master
-    var master = replTest.getMaster();
-    a_conn = conns[0];
-    A = a_conn.getDB("admin");
-    b_conn = conns[1];
-    a_conn.setSlaveOk();
-    b_conn.setSlaveOk();
-    B = b_conn.getDB("admin");
-    assert(master == conns[0], "conns[0] assumed to be master");
-    assert(a_conn == master);
-
-    //deb(master);
-
-    // Make sure we have an arbiter
-    assert.soon(function () {
-        res = conns[2].getDB("admin").runCommand({ replSetGetStatus: 1 });
-        return res.myState == 7;
-    }, "Arbiter failed to initialize.");
-
-    // Wait for initial replication
-    var a = a_conn.getDB("foo");
-    var b = b_conn.getDB("foo");
-
-    /* force the oplog to roll */
-    if (new Date() % 2 == 0) {
-        print("ROLLING OPLOG AS PART OF TEST (we only do this sometimes)");
-        var pass = 1;
-        var first = a.getSisterDB("local").oplog.rs.find().sort({ $natural: 1 }).limit(1)[0];
-        a.roll.insert({ x: 1 });
-        while (1) {
-            for (var i = 0; i < 10000; i++)
-                a.roll.update({}, { $inc: { x: 1} });
-            var op = a.getSisterDB("local").oplog.rs.find().sort({ $natural: 1 }).limit(1)[0];
-            if (tojson(op.h) != tojson(first.h)) {
-                printjson(op);
-                printjson(first);
-                break;
-            }
-            pass++;
-            a.getLastError(2); // unlikely secondary isn't keeping up, but let's avoid possible intermittent issues with that.
-        }
-        print("PASSES FOR OPLOG ROLL: " + pass);
-    }
-    else {
-        print("NO ROLL");
-    }
-
-    a.bar.insert({ q: 1, a: "foo" });
-    a.bar.insert({ q: 2, a: "foo", x: 1 });
-    a.bar.insert({ q: 3, bb: 9, a: "foo" });
-
-    assert(a.bar.count() == 3, "t.count");
-
-    // wait for secondary to get this data
-    wait(function () { return b.bar.count() == 3; });
-
-    A.runCommand({ replSetTest: 1, blind: true });
-    wait(function () { return B.isMaster().ismaster; });
-
-    b.bar.insert({ q: 4 });
-    b.bar.insert({ q: 5 });
-    b.bar.insert({ q: 6 });
-    assert(b.bar.count() == 6, "u.count");
-
-    // a should not have the new data as it was in blind state.
-    B.runCommand({ replSetTest: 1, blind: true });
-    A.runCommand({ replSetTest: 1, blind: false });
-    wait(function () { return !B.isMaster().ismaster; });
-    wait(function () { return A.isMaster().ismaster; });
-
-    assert(a.bar.count() == 3, "t is 3");
-    a.bar.insert({ q: 7 });
-    a.bar.insert({ q: 8 });
-    {
-        assert(a.bar.count() == 5);
-        var x = a.bar.find().toArray();
-        assert(x[0].q == 1, '1');
-        assert(x[1].q == 2, '2');
-        assert(x[2].q == 3, '3');
-        assert(x[3].q == 7, '7');
-        assert(x[4].q == 8, '8');
-    }
-
-    // A is 1 2 3 7 8
-    // B is 1 2 3 4 5 6
-
-    // bring B back online  
-    B.runCommand({ replSetTest: 1, blind: false });
-
-    wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; });
-
-    // everyone is up here...
-    assert(A.isMaster().ismaster || A.isMaster().secondary, "A up");
-    assert(B.isMaster().ismaster || B.isMaster().secondary, "B up");
-
-    friendlyEqual(a.bar.find().sort({ _id: 1 }).toArray(), b.bar.find().sort({ _id: 1 }).toArray(), "server data sets do not match");
-
-    pause("rollback.js SUCCESS");
-    replTest.stopSet(signal);
+// test rollback in replica sets
+
+// try running as :
+// 
+//   mongo --nodb rollback.js | tee out | grep -v ^m31
+//
+
+var debugging = 0;
+
+function pause(s) {
+    print(s);
+    while (debugging) {
+        sleep(3000);
+        print(s);
+    }
+}
+
+function deb(obj) { 
+    if( debugging ) {
+        print("\n\n\n" + obj + "\n\n");
+    }  
+}
+
+w = 0;
+
+function wait(f) {
+    w++;
+    var n = 0;
+    while (!f()) {
+        if( n % 4 == 0 )
+            print("rollback.js waiting " + w);
+        if (++n == 4) {
+            print("" + f);
+        }
+        assert(n < 200, 'tried 200 times, giving up');
+        sleep(1000);
+    }
 }
 
+doTest = function (signal) {
+
+    var replTest = new ReplSetTest({ name: 'unicomplex', nodes: 3 });
+    var nodes = replTest.nodeList();
+    //print(tojson(nodes));
+
+    var conns = replTest.startSet();
+    var r = replTest.initiate({ "_id": "unicomplex",
+        "members": [
+                             { "_id": 0, "host": nodes[0] },
+                             { "_id": 1, "host": nodes[1] },
+                             { "_id": 2, "host": nodes[2], arbiterOnly: true}]
+    });
+
+    // Make sure we have a master
+    var master = replTest.getMaster();
+    a_conn = conns[0];
+    A = a_conn.getDB("admin");
+    b_conn = conns[1];
+    a_conn.setSlaveOk();
+    b_conn.setSlaveOk();
+    B = b_conn.getDB("admin");
+    assert(master == conns[0], "conns[0] assumed to be master");
+    assert(a_conn == master);
+
+    //deb(master);
+
+    // Make sure we have an arbiter
+    assert.soon(function () {
+        res = conns[2].getDB("admin").runCommand({ replSetGetStatus: 1 });
+        return res.myState == 7;
+    }, "Arbiter failed to initialize.");
+
+    // Wait for initial replication
+    var a = a_conn.getDB("foo");
+    var b = b_conn.getDB("foo");
+
+    /* force the oplog to roll */
+    if (new Date() % 2 == 0) {
+        print("ROLLING OPLOG AS PART OF TEST (we only do this sometimes)");
+        var pass = 1;
+        var first = a.getSisterDB("local").oplog.rs.find().sort({ $natural: 1 }).limit(1)[0];
+        a.roll.insert({ x: 1 });
+        while (1) {
+            for (var i = 0; i < 10000; i++)
+                a.roll.update({}, { $inc: { x: 1} });
+            var op = a.getSisterDB("local").oplog.rs.find().sort({ $natural: 1 }).limit(1)[0];
+            if (tojson(op.h) != tojson(first.h)) {
+                printjson(op);
+                printjson(first);
+                break;
+            }
+            pass++;
+            a.getLastError(2); // unlikely secondary isn't keeping up, but let's avoid possible intermittent issues with that.
+        }
+        print("PASSES FOR OPLOG ROLL: " + pass);
+    }
+    else {
+        print("NO ROLL");
+    }
+
+    a.bar.insert({ q: 1, a: "foo" });
+    a.bar.insert({ q: 2, a: "foo", x: 1 });
+    a.bar.insert({ q: 3, bb: 9, a: "foo" });
+
+    assert(a.bar.count() == 3, "t.count");
+
+    // wait for secondary to get this data
+    wait(function () { return b.bar.count() == 3; });
+
+    A.runCommand({ replSetTest: 1, blind: true });
+    reconnect(a,b);
+    wait(function () { return B.isMaster().ismaster; });
+
+    b.bar.insert({ q: 4 });
+    b.bar.insert({ q: 5 });
+    b.bar.insert({ q: 6 });
+    assert(b.bar.count() == 6, "u.count");
+
+    // a should not have the new data as it was in blind state.
+    B.runCommand({ replSetTest: 1, blind: true });
+    print("*************** wait for server to reconnect ****************");
+    reconnect(a,b);
+    A.runCommand({ replSetTest: 1, blind: false });
+    reconnect(a,b);
+
+    print("*************** B ****************");
+    wait(function () { try { return !B.isMaster().ismaster; } catch(e) { return false; } });
+    print("*************** A ****************");
+    reconnect(a,b); 
+    wait(function () {
+        try {
+          return A.isMaster().ismaster;
+        } catch(e) {
+          return false;
+        }
+      });
+
+    assert(a.bar.count() == 3, "t is 3");
+    a.bar.insert({ q: 7 });
+    a.bar.insert({ q: 8 });
+    {
+        assert(a.bar.count() == 5);
+        var x = a.bar.find().toArray();
+        assert(x[0].q == 1, '1');
+        assert(x[1].q == 2, '2');
+        assert(x[2].q == 3, '3');
+        assert(x[3].q == 7, '7');
+        assert(x[4].q == 8, '8');
+    }
+
+    // A is 1 2 3 7 8
+    // B is 1 2 3 4 5 6
+
+    // bring B back online  
+    B.runCommand({ replSetTest: 1, blind: false });
+    reconnect(a,b);
+
+    wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; });
+
+    // everyone is up here...
+    assert(A.isMaster().ismaster || A.isMaster().secondary, "A up");
+    assert(B.isMaster().ismaster || B.isMaster().secondary, "B up");
+    replTest.awaitReplication();
+
+    friendlyEqual(a.bar.find().sort({ _id: 1 }).toArray(), b.bar.find().sort({ _id: 1 }).toArray(), "server data sets do not match");
+
+    pause("rollback.js SUCCESS");
+    replTest.stopSet(signal);
+};
+
+
+var reconnect = function(a,b) {
+  wait(function() { 
+      try {
+        a.bar.stats();
+        b.bar.stats();
+        return true;
+      } catch(e) {
+        print(e);
+        return false;
+      }
+    });
+};
+
 print("rollback.js");
 doTest( 15 );
diff --git a/jstests/replsets/rollback2.js b/jstests/replsets/rollback2.js
index 483d221..46fb548 100644
--- a/jstests/replsets/rollback2.js
+++ b/jstests/replsets/rollback2.js
@@ -1,201 +1,232 @@
-// test rollback in replica sets
-
-// try running as :
-// 
-//   mongo --nodb rollback.js | tee out | grep -v ^m31
-//
-
-var debugging = 0;
-
-function pause(s) {
-    print(s);
-    while (debugging) {
-        sleep(3000);
-        print(s);
-    }
-}
-
-function deb(obj) { 
-    if( debugging ) {
-        print("\n\n\n" + obj + "\n\n");
-    }  
-}
-
-w = 0;
-
-function wait(f) {
-    w++;
-    var n = 0;
-    while (!f()) {
-        if (n % 4 == 0)
-            print("rollback2.js waiting " + w);
-        if (++n == 4) {
-            print("" + f);
-        }
-        sleep(1000);
-    }
-}
-
-function dbs_match(a, b) {
-    print("dbs_match");
-
-    var ac = a.system.namespaces.find().sort({name:1}).toArray();
-    var bc = b.system.namespaces.find().sort({name:1}).toArray();
-    if (!friendlyEqual(ac, bc)) {
-        print("dbs_match: namespaces don't match");
-        print("\n\n");
-        printjson(ac);
-        print("\n\n");
-        printjson(bc);
-        print("\n\n");
-        return false;
-    }
-
-    var c = a.getCollectionNames();
-    for( var i in c ) {
-        print("checking " + c[i]);
-        if( !friendlyEqual( a[c[i]].find().sort({_id:1}).toArray(), b[c[i]].find().sort({_id:1}).toArray() ) ) { 
-            print("dbs_match: collections don't match " + c[i]);
-            return false;
-        }
-    }
-    return true;
-}
-
-/* these writes will be initial data and replicate everywhere. */
-function doInitialWrites(db) {
-    t = db.bar;
-    t.insert({ q:0});
-    t.insert({ q: 1, a: "foo" });
-    t.insert({ q: 2, a: "foo", x: 1 });
-    t.insert({ q: 3, bb: 9, a: "foo" });
-    t.insert({ q: 40, a: 1 });
-    t.insert({ q: 40, a: 2 });
-    t.insert({ q: 70, txt: 'willremove' });
-
-    db.createCollection("kap", { capped: true, size: 5000 });
-    db.kap.insert({ foo: 1 })
-
-    // going back to empty on capped is a special case and must be tested
-    db.createCollection("kap2", { capped: true, size: 5501 });
-}
-
-/* these writes on one primary only and will be rolled back. */
-function doItemsToRollBack(db) {
-    t = db.bar;
-    t.insert({ q: 4 });
-    t.update({ q: 3 }, { q: 3, rb: true });
-
-    t.remove({ q: 40 }); // multi remove test
-
-    t.update({ q: 2 }, { q: 39, rb: true });
-
-    // rolling back a delete will involve reinserting the item(s)
-    t.remove({ q: 1 });
-
-    t.update({ q: 0 }, { $inc: { y: 1} });
-
-    db.kap.insert({ foo: 2 })
-    db.kap2.insert({ foo: 2 })
-
-    // create a collection (need to roll back the whole thing)
-    db.newcoll.insert({ a: true });
-
-    // create a new empty collection (need to roll back the whole thing)
-    db.createCollection("abc");
-}
-
-function doWritesToKeep2(db) {
-    t = db.bar;
-    t.insert({ txt: 'foo' });
-    t.remove({ q: 70 });
-    t.update({ q: 0 }, { $inc: { y: 33} });
-}
-
-function verify(db) {
-    print("verify");
-    t = db.bar;
-    assert(t.find({ q: 1 }).count() == 1);
-    assert(t.find({ txt: 'foo' }).count() == 1);
-    assert(t.find({ q: 4 }).count() == 0);
-}
-
-doTest = function (signal) {
-
-    var replTest = new ReplSetTest({ name: 'unicomplex', nodes: 3 });
-    var nodes = replTest.nodeList();
-    //print(tojson(nodes));
-
-    var conns = replTest.startSet();
-    var r = replTest.initiate({ "_id": "unicomplex",
-        "members": [
-                             { "_id": 0, "host": nodes[0] },
-                             { "_id": 1, "host": nodes[1] },
-                             { "_id": 2, "host": nodes[2], arbiterOnly: true}]
-    });
-
-    // Make sure we have a master
-    var master = replTest.getMaster();
-    a_conn = conns[0];
-    A = a_conn.getDB("admin");
-    b_conn = conns[1];
-    a_conn.setSlaveOk();
-    b_conn.setSlaveOk();
-    B = b_conn.getDB("admin");
-    assert(master == conns[0], "conns[0] assumed to be master");
-    assert(a_conn == master);
-
-    //deb(master);
-
-    // Make sure we have an arbiter
-    assert.soon(function () {
-        res = conns[2].getDB("admin").runCommand({ replSetGetStatus: 1 });
-        return res.myState == 7;
-    }, "Arbiter failed to initialize.");
-
-    // Wait for initial replication
-    var a = a_conn.getDB("foo");
-    var b = b_conn.getDB("foo");
-    doInitialWrites(a);
-
-    // wait for secondary to get this data
-    wait(function () { return b.bar.count() == a.bar.count(); });
-
-    A.runCommand({ replSetTest: 1, blind: true });
-    wait(function () { return B.isMaster().ismaster; });
-
-    doItemsToRollBack(b);
-
-    // a should not have the new data as it was in blind state.
-    B.runCommand({ replSetTest: 1, blind: true });
-    A.runCommand({ replSetTest: 1, blind: false });
-    wait(function () { return !B.isMaster().ismaster; });
-    wait(function () { return A.isMaster().ismaster; });
-
-    assert(a.bar.count() >= 1, "count check");
-    doWritesToKeep2(a);
-
-    // A is 1 2 3 7 8
-    // B is 1 2 3 4 5 6
-
-    // bring B back online
-    // as A is primary, B will roll back and then catch up
-    B.runCommand({ replSetTest: 1, blind: false });
-
-    wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; });
-
-    // everyone is up here...
-    assert(A.isMaster().ismaster || A.isMaster().secondary, "A up");
-    assert(B.isMaster().ismaster || B.isMaster().secondary, "B up");
-
-    verify(a);
-
-    assert( dbs_match(a,b), "server data sets do not match after rollback, something is wrong");
-
-    pause("rollback2.js SUCCESS");
-    replTest.stopSet(signal);
+// a test of rollback in replica sets
+//
+// try running as :
+// 
+//   mongo --nodb rollback2.js | tee out | grep -v ^m31
+//
+
+var debugging = 0;
+
+function pause(s) {
+    print(s);
+    while (debugging) {
+        sleep(3000);
+        print(s);
+    }
+}
+
+function deb(obj) { 
+    if( debugging ) {
+        print("\n\n\n" + obj + "\n\n");
+    }  
+}
+
+w = 0;
+
+function wait(f) {
+    w++;
+    var n = 0;
+    while (!f()) {
+        if (n % 4 == 0)
+            print("rollback2.js waiting " + w);
+        if (++n == 4) {
+            print("" + f);
+        }
+        assert(n < 200, 'tried 200 times, giving up');
+        sleep(1000);
+    }
+}
+
+function dbs_match(a, b) {
+    print("dbs_match");
+
+    var ac = a.system.namespaces.find().sort({name:1}).toArray();
+    var bc = b.system.namespaces.find().sort({name:1}).toArray();
+    if (!friendlyEqual(ac, bc)) {
+        print("dbs_match: namespaces don't match");
+        print("\n\n");
+        printjson(ac);
+        print("\n\n");
+        printjson(bc);
+        print("\n\n");
+        return false;
+    }
+
+    var c = a.getCollectionNames();
+    for( var i in c ) {
+        print("checking " + c[i]);
+        if( !friendlyEqual( a[c[i]].find().sort({_id:1}).toArray(), b[c[i]].find().sort({_id:1}).toArray() ) ) { 
+            print("dbs_match: collections don't match " + c[i]);
+            return false;
+        }
+    }
+    return true;
+}
+
+/* these writes will be initial data and replicate everywhere. */
+function doInitialWrites(db) {
+    t = db.bar;
+    t.insert({ q:0});
+    t.insert({ q: 1, a: "foo" });
+    t.insert({ q: 2, a: "foo", x: 1 });
+    t.insert({ q: 3, bb: 9, a: "foo" });
+    t.insert({ q: 40, a: 1 });
+    t.insert({ q: 40, a: 2 });
+    t.insert({ q: 70, txt: 'willremove' });
+
+    db.createCollection("kap", { capped: true, size: 5000 });
+    db.kap.insert({ foo: 1 })
+
+    // going back to empty on capped is a special case and must be tested
+    db.createCollection("kap2", { capped: true, size: 5501 });
+}
+
+/* these writes on one primary only and will be rolled back. */
+function doItemsToRollBack(db) {
+    t = db.bar;
+    t.insert({ q: 4 });
+    t.update({ q: 3 }, { q: 3, rb: true });
+
+    t.remove({ q: 40 }); // multi remove test
+
+    t.update({ q: 2 }, { q: 39, rb: true });
+
+    // rolling back a delete will involve reinserting the item(s)
+    t.remove({ q: 1 });
+
+    t.update({ q: 0 }, { $inc: { y: 1} });
+
+    db.kap.insert({ foo: 2 })
+    db.kap2.insert({ foo: 2 })
+
+    // create a collection (need to roll back the whole thing)
+    db.newcoll.insert({ a: true });
+
+    // create a new empty collection (need to roll back the whole thing)
+    db.createCollection("abc");
 }
 
+function doWritesToKeep2(db) {
+    t = db.bar;
+    t.insert({ txt: 'foo' });
+    t.remove({ q: 70 });
+    t.update({ q: 0 }, { $inc: { y: 33} });
+}
+
+function verify(db) {
+    print("verify");
+    t = db.bar;
+    assert(t.find({ q: 1 }).count() == 1);
+    assert(t.find({ txt: 'foo' }).count() == 1);
+    assert(t.find({ q: 4 }).count() == 0);
+}
+
+doTest = function (signal) {
+
+    var replTest = new ReplSetTest({ name: 'unicomplex', nodes: 3 });
+    var nodes = replTest.nodeList();
+    //print(tojson(nodes));
+
+    var conns = replTest.startSet();
+    var r = replTest.initiate({ "_id": "unicomplex",
+        "members": [
+                             { "_id": 0, "host": nodes[0] },
+                             { "_id": 1, "host": nodes[1] },
+                             { "_id": 2, "host": nodes[2], arbiterOnly: true}]
+    });
+
+    // Make sure we have a master
+    var master = replTest.getMaster();
+    a_conn = conns[0];
+    A = a_conn.getDB("admin");
+    b_conn = conns[1];
+    a_conn.setSlaveOk();
+    b_conn.setSlaveOk();
+    B = b_conn.getDB("admin");
+    assert(master == conns[0], "conns[0] assumed to be master");
+    assert(a_conn == master);
+
+    //deb(master);
+
+    // Make sure we have an arbiter
+    assert.soon(function () {
+        res = conns[2].getDB("admin").runCommand({ replSetGetStatus: 1 });
+        return res.myState == 7;
+    }, "Arbiter failed to initialize.");
+
+    // Wait for initial replication
+    var a = a_conn.getDB("foo");
+    var b = b_conn.getDB("foo");
+    wait(function () {
+        var status = A.runCommand({replSetGetStatus : 1});
+        return status.members[1].state == 2;
+      });
+
+    doInitialWrites(a);
+
+    // wait for secondary to get this data
+    wait(function () { return b.bar.count() == a.bar.count(); });
+    wait(function () {
+        var status = A.runCommand({replSetGetStatus : 1});
+        return status.members[1].state == 2;
+      });
+
+    
+    A.runCommand({ replSetTest: 1, blind: true });
+    reconnect(a, b);
+    
+    wait(function () { return B.isMaster().ismaster; });
+
+    doItemsToRollBack(b);
+
+    // a should not have the new data as it was in blind state.
+    B.runCommand({ replSetTest: 1, blind: true });
+    reconnect(a, b);
+    A.runCommand({ replSetTest: 1, blind: false });
+    reconnect(a,b);
+
+    wait(function () { try { return !B.isMaster().ismaster; } catch(e) { return false; } });
+    wait(function () { try { return A.isMaster().ismaster; } catch(e) { return false; } });
+
+    assert(a.bar.count() >= 1, "count check");
+    doWritesToKeep2(a);
+
+    // A is 1 2 3 7 8
+    // B is 1 2 3 4 5 6
+
+    // bring B back online
+    // as A is primary, B will roll back and then catch up
+    B.runCommand({ replSetTest: 1, blind: false });
+    reconnect(a,b);
+    
+    wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; });
+    
+    // everyone is up here...
+    assert(A.isMaster().ismaster || A.isMaster().secondary, "A up");
+    assert(B.isMaster().ismaster || B.isMaster().secondary, "B up");
+    replTest.awaitReplication();
+    
+    verify(a);
+
+    assert( dbs_match(a,b), "server data sets do not match after rollback, something is wrong");
+
+    pause("rollback2.js SUCCESS");
+    replTest.stopSet(signal);
+};
+
+var reconnect = function(a,b) {
+  wait(function() { 
+      try {
+        a.bar.stats();
+        b.bar.stats();
+        return true;
+      } catch(e) {
+        print(e);
+        return false;
+      }
+    });
+};
+
 print("rollback2.js");
 
 doTest( 15 );
diff --git a/jstests/replsets/rollback3.js b/jstests/replsets/rollback3.js
index 5c2f2f1..fa923d8 100755
--- a/jstests/replsets/rollback3.js
+++ b/jstests/replsets/rollback3.js
@@ -30,10 +30,10 @@ function wait(f) {
         if (n % 4 == 0)
             print("rollback3.js waiting " + w);
         if (++n == 4) {
-            print("" + f);
-        }
-        if (n == 200) {
-            print("rollback3.js failing waited too long");
+            print("" + f);
+        }
+        if (n == 200) {
+            print("rollback3.js failing waited too long");
             throw "wait error";
         }
         sleep(1000);
@@ -188,15 +188,20 @@ doTest = function (signal) {
     wait(function () { return b.bar.count() == a.bar.count(); });
 
     A.runCommand({ replSetTest: 1, blind: true });
-    wait(function () { return B.isMaster().ismaster; });
+    reconnect(a,b);
+    wait(function () { try { return B.isMaster().ismaster; } catch(e) { return false; } });
 
     doItemsToRollBack(b);
 
     // a should not have the new data as it was in blind state.
     B.runCommand({ replSetTest: 1, blind: true });
+    reconnect(a,b);
+
     A.runCommand({ replSetTest: 1, blind: false });
-    wait(function () { return !B.isMaster().ismaster; });
-    wait(function () { return A.isMaster().ismaster; });
+    reconnect(a,b);
+    
+    wait(function () { try { return !B.isMaster().ismaster; } catch(e) { return false; } });
+    wait(function () { try { return A.isMaster().ismaster; } catch(e) { return false; } });
 
     assert(a.bar.count() >= 1, "count check");
     doWritesToKeep2(a);
@@ -207,18 +212,34 @@ doTest = function (signal) {
     // bring B back online
     // as A is primary, B will roll back and then catch up
     B.runCommand({ replSetTest: 1, blind: false });
+    reconnect(a,b);
 
     wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; });
 
     // everyone is up here...
     assert(A.isMaster().ismaster || A.isMaster().secondary, "A up");
     assert(B.isMaster().ismaster || B.isMaster().secondary, "B up");
-
+    replTest.awaitReplication();
+    
     assert( dbs_match(a,b), "server data sets do not match after rollback, something is wrong");
 
     pause("rollback3.js SUCCESS");
     replTest.stopSet(signal);
-}
+};
+
+
+var reconnect = function(a,b) {
+  wait(function() { 
+      try {
+        a.bar.stats();
+        b.bar.stats();
+        return true;
+      } catch(e) {
+        print(e);
+        return false;
+      }
+    });
+};
 
 print("rollback3.js");
 doTest( 15 );
diff --git a/jstests/replsets/rslib.js b/jstests/replsets/rslib.js
new file mode 100644
index 0000000..c072829
--- /dev/null
+++ b/jstests/replsets/rslib.js
@@ -0,0 +1,63 @@
+
+var count = 0;
+var w = 0;
+
+var wait = function(f) {
+    w++;
+    var n = 0;
+    while (!f()) {
+        if( n % 4 == 0 )
+            print("waiting " + w);
+        if (++n == 4) {
+            print("" + f);
+        }
+        assert(n < 200, 'tried 200 times, giving up');
+        sleep(1000);
+    }
+};
+
+/**
+ * Use this to do something once every 4 iterations.
+ *
+ * <pre>
+ * for (i=0; i<1000; i++) {
+ *   occasionally(function() { print("4 more iterations"); });
+ * }
+ * </pre>
+ */
+var occasionally = function(f, n) {
+  var interval = n || 4;
+  if (count % interval == 0) {
+    f();
+  }
+  count++;
+};
+  
+var reconnect = function(a) {
+  wait(function() { 
+      try {
+        // make this work with either dbs or connections
+        if (typeof(a.getDB) == "function") {
+          a.getDB("foo").bar.stats();
+        }
+        else {
+          a.bar.stats();
+        }
+        return true;
+      } catch(e) {
+        print(e);
+        return false;
+      }
+    });
+};
+
+
+var getLatestOp = function(server) {
+    server.getDB("admin").getMongo().setSlaveOk();
+    var log = server.getDB("local")['oplog.rs'];
+    var cursor = log.find({}).sort({'$natural': -1}).limit(1);
+    if (cursor.hasNext()) {
+      return cursor.next();
+    }
+    return null;
+};
diff --git a/jstests/replsets/slaveDelay2.js b/jstests/replsets/slaveDelay2.js
new file mode 100644
index 0000000..2d9dd1f
--- /dev/null
+++ b/jstests/replsets/slaveDelay2.js
@@ -0,0 +1,104 @@
+
+var name = "slaveDelay2";
+var host = getHostName();
+
+var waitForAllMembers = function(master) {
+  var ready = false;
+
+  outer:
+  while (true) {
+    var state = master.getSisterDB("admin").runCommand({replSetGetStatus:1});
+
+    for (var m in state.members) {
+      if (state.members[m].state != 2 && state.members[m].state != 1) {
+        sleep(10000);
+        continue outer;
+      }
+    }
+
+    printjson(state);
+    print("okay, everyone is primary or secondary");
+    return;
+  }
+};
+
+
+var initialize = function() {
+  var replTest = new ReplSetTest( {name: name, nodes: 1} );
+
+  var nodes = replTest.startSet();
+  
+  replTest.initiate();
+
+  var master = replTest.getMaster().getDB(name);
+
+  waitForAllMembers(master);
+  
+  return replTest;
+};
+
+var populate = function(master) {
+  // insert records
+  for (var i =0; i<1000; i++) {
+    master.foo.insert({_id:1});
+  }
+  
+  master.runCommand({getlasterror:1});
+}
+
+doTest = function( signal ) {
+  var replTest = initialize();
+  var master = replTest.getMaster().getDB(name);
+  populate(master);
+  var admin = master.getSisterDB("admin");
+    
+  /**
+   * start a slave with a long delay (1 hour) and do some writes while it is
+   * initializing. Make sure it syncs all of these writes before going into
+   * syncDelay.
+   */
+  var conn = startMongodTest(31008, name + "-sd", 0, { useHostname: true, replSet: name });
+  conn.setSlaveOk();
+  
+  config = master.getSisterDB("local").system.replset.findOne();
+  config.version++;
+  config.members.push({_id : 1, host : host+":31008",priority:0, slaveDelay:3600});
+  var ok = admin.runCommand({replSetReconfig : config});
+  assert.eq(ok.ok,1);
+
+  // do inserts during initial sync
+  count = 0;
+  while (count < 10) {
+    for (var i = 100*count; i<100*(count+1); i++) {
+      master.foo.insert({x:i});
+    }
+
+    //check if initial sync is done
+    var state = master.getSisterDB("admin").runCommand({replSetGetStatus:1});
+    printjson(state);
+    if (state.members[1].state == 2) {
+      break;
+    }
+    
+    count++;
+  }
+  
+  // throw out last 100 inserts, but make sure the others were applied
+  if (count == 0) {
+    print("NOTHING TO CHECK");
+    replTest.stopSet();
+    return;
+  }
+
+  // wait a bit for the syncs to be applied
+  waitForAllMembers(master);    
+
+  for (var i=0; i<(100*count); i++) {
+    var obj = conn.getDB(name).foo.findOne({x : i});
+    assert(obj);
+  }
+    
+  replTest.stopSet();
+}
+
+doTest(15);
diff --git a/jstests/replsets/slavedelay1.js b/jstests/replsets/slavedelay1.js
new file mode 100644
index 0000000..e549822
--- /dev/null
+++ b/jstests/replsets/slavedelay1.js
@@ -0,0 +1,127 @@
+
+var waitForAllMembers = function(master) {
+  var ready = false;
+
+  outer:
+  while (true) {
+    var state = master.getSisterDB("admin").runCommand({replSetGetStatus:1});
+    printjson(state);
+
+    for (var m in state.members) {
+      if (state.members[m].state != 2 && state.members[m].state != 1) {
+        sleep(10000);
+        continue outer;
+      }
+    }
+    return;
+  }
+};
+
+
+doTest = function( signal ) {
+
+  var name = "slaveDelay";
+  var host = getHostName();
+  
+  var replTest = new ReplSetTest( {name: name, nodes: 3} );
+
+  var nodes = replTest.startSet();
+
+  /* set slaveDelay to 30 seconds */
+  var config = replTest.getReplSetConfig();
+  config.members[2].priority = 0;
+  config.members[2].slaveDelay = 30;
+  
+  replTest.initiate(config);
+
+  var master = replTest.getMaster().getDB(name);
+  var slaveConns = replTest.liveNodes.slaves;
+  var slave = [];
+  for (var i in slaveConns) {
+    var d = slaveConns[i].getDB(name);
+    d.getMongo().setSlaveOk();
+    slave.push(d);
+  }
+
+  waitForAllMembers(master);
+  
+  // insert a record
+  master.foo.insert({x:1});
+  master.runCommand({getlasterror:1, w:2});
+  
+  var doc = master.foo.findOne();
+  assert.eq(doc.x, 1);
+  
+  // make sure slave has it
+  var doc = slave[0].foo.findOne();
+  assert.eq(doc.x, 1);
+
+  // make sure delayed slave doesn't have it
+  assert.eq(slave[1].foo.findOne(), null);
+
+  // wait 35 seconds
+  sleep(35000);
+
+  // now delayed slave should have it
+  assert.eq(slave[1].foo.findOne().x, 1);
+
+  
+  /************* Part 2 *******************/
+
+  // how about non-initial sync?
+  
+  for (var i=0; i<100; i++) {
+    master.foo.insert({_id : i, "foo" : "bar"});
+  }
+  master.runCommand({getlasterror:1,w:2});
+
+  assert.eq(master.foo.findOne({_id : 99}).foo, "bar");
+  assert.eq(slave[0].foo.findOne({_id : 99}).foo, "bar");
+  assert.eq(slave[1].foo.findOne({_id : 99}), null);
+
+  sleep(35000); 
+
+  assert.eq(slave[1].foo.findOne({_id : 99}).foo, "bar");
+ 
+  /************* Part 3 *******************/
+
+  // how about if we add a new server?  will it sync correctly?
+
+  var conn = startMongodTest( 31007 , name+"-part3" , 0 , {useHostname : true, replSet : name} );
+  
+  config = master.getSisterDB("local").system.replset.findOne();
+  printjson(config);
+  config.version++;
+  config.members.push({_id : 3, host : host+":31007",priority:0, slaveDelay:10});
+
+  var admin = master.getSisterDB("admin");
+  try {
+    var ok = admin.runCommand({replSetReconfig : config});
+    assert.eq(ok.ok,1);
+  }
+  catch(e) {
+    print(e);
+  }
+
+  master = replTest.getMaster().getDB(name);
+
+  waitForAllMembers(master);
+
+  sleep(15000);
+
+  // it should be all caught up now
+
+  master.foo.insert({_id : 123, "x" : "foo"});
+  master.runCommand({getlasterror:1,w:2});
+
+  conn.setSlaveOk();
+  assert.eq(conn.getDB(name).foo.findOne({_id:123}), null);
+  
+  sleep(15000);
+
+  assert.eq(conn.getDB(name).foo.findOne({_id:123}).x, "foo");  
+  
+  replTest.stopSet();
+}
+
+doTest(15);
diff --git a/jstests/replsets/sync1.js b/jstests/replsets/sync1.js
index e60d128..af16044 100644
--- a/jstests/replsets/sync1.js
+++ b/jstests/replsets/sync1.js
@@ -1,5 +1,7 @@
 // test rollback of replica sets
 
+load("jstests/replsets/rslib.js");
+
 var debugging=0;
 
 w = 0;
@@ -50,7 +52,7 @@ doTest = function (signal) {
     dbs[0].bar.ensureIndex({ w: 1 });
 
     var ok = false;
-    var inserts = 100000;
+    var inserts = 10000;
 
     print("\nsync1.js ********************************************************************** part 5");
 
@@ -62,7 +64,7 @@ doTest = function (signal) {
     do {
         sleep(1000);
         status = dbs[0].getSisterDB("admin").runCommand({ replSetGetStatus: 1 });
-    } while (status.members[1].state != 2 && status.members[2].state != 2);
+    } while (status.members[1].state != 2 || status.members[2].state != 2);
 
     print("\nsync1.js ********************************************************************** part 6");
     dbs[0].getSisterDB("admin").runCommand({ replSetTest: 1, blind: true });
@@ -125,12 +127,14 @@ doTest = function (signal) {
                 try {
                     printjson(dbs[1].isMaster());
                     printjson(dbs[1].bar.count());
+                    printjson(dbs[1].adminCommand({replSetGetStatus : 1}));
                 }
                 catch (e) { print(e); }
                 print("dbs[2]:");
                 try {
                     printjson(dbs[2].isMaster());
                     printjson(dbs[2].bar.count());
+                    printjson(dbs[2].adminCommand({replSetGetStatus : 1}));
                 }
                 catch (e) { print(e); }
                 assert(false, "sync1.js too many exceptions, failing");
@@ -161,10 +165,22 @@ doTest = function (signal) {
     print("\nsync1.js ********************************************************************** part 10");
 
     // now, let's see if rollback works
-    var result = dbs[0].getSisterDB("admin").runCommand({ replSetTest: 1, blind: false });
+    wait(function() {
+        try {
+          dbs[0].adminCommand({ replSetTest: 1, blind: false });
+        }
+        catch(e) {
+          print(e);
+        }
+        reconnect(dbs[0]);
+        reconnect(dbs[1]);
+
+        var status = dbs[1].adminCommand({replSetGetStatus:1});
+        return status.members[0].health == 1;
+      });
+    
+    
     dbs[0].getMongo().setSlaveOk();
-
-    printjson(result);
     sleep(5000);
 
     // now this should resync
@@ -192,6 +208,10 @@ doTest = function (signal) {
 
         count++;
         if (count == 100) {
+            printjson(dbs[0].isMaster());
+            printjson(dbs[0].adminCommand({replSetGetStatus:1}));
+            printjson(dbs[1].isMaster());
+            printjson(dbs[1].adminCommand({replSetGetStatus:1}));
             pause("FAIL part 11");
             assert(false, "replsets/\nsync1.js fails timing out");
             replTest.stopSet(signal);
diff --git a/jstests/replsets/sync_passive.js b/jstests/replsets/sync_passive.js
new file mode 100644
index 0000000..d3e8ef4
--- /dev/null
+++ b/jstests/replsets/sync_passive.js
@@ -0,0 +1,89 @@
+/**
+ * Test syncing from non-primaries.
+ *
+ * Start a set.
+ * Inital sync.
+ * Kill member 1.
+ * Add some data.
+ * Kill member 0.
+ * Restart member 1.
+ * Check that it syncs.
+ * Add some data.
+ * Kill member 1.
+ * Restart member 0.
+ * Check that it syncs.
+ */
+
+load("jstests/replsets/rslib.js");
+
+var name = "sync_passive";
+var host = getHostName();
+  
+var replTest = new ReplSetTest( {name: name, nodes: 3} );
+
+var nodes = replTest.startSet();
+
+/* set slaveDelay to 30 seconds */
+var config = replTest.getReplSetConfig();
+config.members[2].priority = 0;
+  
+replTest.initiate(config);
+
+var master = replTest.getMaster().getDB("test");
+var server0 = master;
+var server1 = replTest.liveNodes.slaves[0];
+
+print("Initial sync");
+for (var i=0;i<100;i++) {
+    master.foo.insert({x:i});
+}
+replTest.awaitReplication();
+
+
+print("stop #1");
+replTest.stop(1);
+
+
+print("add some data");
+for (var i=0;i<1000;i++) {
+    master.bar.insert({x:i});
+}
+replTest.awaitReplication();
+
+
+print("stop #0");
+replTest.stop(0);
+
+
+print("restart #1");
+replTest.restart(1);
+
+
+print("check sync");
+replTest.awaitReplication();
+
+
+print("add data");
+reconnect(server1);
+master = replTest.getMaster().getDB("test");
+for (var i=0;i<1000;i++) {
+    master.bar.insert({x:i});
+}
+replTest.awaitReplication();
+
+
+print("kill #1");
+replTest.stop(1);
+
+
+print("restart #0");
+replTest.restart(0);
+reconnect(server0);
+
+
+print("wait for sync");
+replTest.awaitReplication();
+
+
+print("bring #1 back up, make sure everything's okay");
+replTest.restart(1);
diff --git a/jstests/replsets/sync_passive2.js b/jstests/replsets/sync_passive2.js
new file mode 100644
index 0000000..230d71c
--- /dev/null
+++ b/jstests/replsets/sync_passive2.js
@@ -0,0 +1,120 @@
+/**
+ * Test syncing from non-primaries.
+ */
+
+load("jstests/replsets/rslib.js");
+
+var name = "sync_passive2";
+var host = getHostName();
+  
+var replTest = new ReplSetTest( {name: name, nodes: 5} );
+var nodes = replTest.startSet();
+
+// 0: master
+// 1: arbiter
+// 2: slave a
+// 3: slave b
+// 4: slave c
+var config = replTest.getReplSetConfig();
+config.members[1].arbiterOnly = true;
+for (i=2; i<config.members.length; i++) {
+    config.members[i].priority = 0;
+}
+replTest.initiate(config);
+
+var master = replTest.getMaster().getDB("test");
+
+print("Initial sync");
+for (var i=0;i<10000;i++) {
+    master.foo.insert({x:i, foo:"bar", msg : "all the talk on the market", date : [new Date(), new Date(), new Date()]});
+}
+replTest.awaitReplication();
+
+
+print("stop c");
+replTest.stop(4);
+
+
+print("add data");
+for (var i=0;i<10000;i++) {
+    master.foo.insert({x:i, foo:"bar", msg : "all the talk on the market", date : [new Date(), new Date(), new Date()]});
+}
+replTest.awaitReplication();
+
+
+print("stop b");
+replTest.stop(3);
+
+
+print("add data");
+for (var i=0;i<10000;i++) {
+    master.foo.insert({x:i, foo:"bar", msg : "all the talk on the market", date : [new Date(), new Date(), new Date()]});
+}
+replTest.awaitReplication();
+
+
+print("kill master");
+replTest.stop(0);
+replTest.stop(2);
+
+
+// now we have just the arbiter up
+
+print("restart c");
+replTest.restart(4);
+print("restart b");
+replTest.restart(3);
+
+
+print("wait for sync");
+wait(function() {
+        var status = replTest.liveNodes.slaves[0].getDB("admin").runCommand({replSetGetStatus:1});
+        occasionally(function() {
+                printjson(status);
+                print("1: " + status.members +" 2: "+(status.members[3].state == 2)+" 3: "+ (status.members[4].state == 2)
+                      + " 4: "+friendlyEqual(status.members[3].optime, status.members[4].optime));
+            });
+        
+        return status.members &&
+            status.members[3].state == 2 &&
+            status.members[4].state == 2 &&
+            friendlyEqual(status.members[3].optime, status.members[4].optime);
+  });
+
+
+print("restart a");
+replTest.restart(2);
+print("wait for sync2");
+wait(function() {
+        var status = replTest.liveNodes.slaves[0].getDB("admin").runCommand({replSetGetStatus:1});
+        occasionally(function() {
+                printjson(status);
+                print("1: " + status.members +" 2a: "+(status.members[3].state == 2)+" 2: "+
+                      (status.members[3].state == 2)+" 3: "+ (status.members[4].state == 2)
+                      + " 4: "+friendlyEqual(status.members[3].optime, status.members[4].optime));
+            });
+
+        return status.members &&
+            status.members[2].state == 2 &&
+            status.members[3].state == 2 &&
+            status.members[4].state == 2 &&
+            friendlyEqual(status.members[3].optime, status.members[4].optime) &&
+            friendlyEqual(status.members[2].optime, status.members[4].optime);
+  });
+
+print("bring master back up, make sure everything's okay");
+replTest.restart(0);
+
+print("wait for sync");
+wait(function() {
+        var status = replTest.liveNodes.slaves[0].getDB("admin").runCommand({replSetGetStatus:1});
+        occasionally(function() {
+                printjson(status);
+            });
+        return status.members &&
+            status.members[2].state == 2 &&
+            status.members[3].state == 2 &&
+            status.members[4].state == 2 &&
+            friendlyEqual(status.members[3].optime, status.members[4].optime) &&
+            friendlyEqual(status.members[2].optime, status.members[4].optime);
+  });
diff --git a/jstests/replsets/toostale.js b/jstests/replsets/toostale.js
new file mode 100644
index 0000000..0b8da0d
--- /dev/null
+++ b/jstests/replsets/toostale.js
@@ -0,0 +1,121 @@
+
+// this tests that:
+// * stale members get into state 3 (recovering)
+// * they stay in state 3 after restarting
+// * they can recover and go into state 2 if someone less up-to-date becomes primary
+
+/*
+ * 1: initial insert
+ * 2: initial sync
+ * 3: blind s2
+ * 4: overflow oplog
+ * 5: unblind s2
+ * 6: check s2.state == 3
+ * 7: restart s2
+ * 8: check s2.state == 3
+ */
+
+
+var w = 0;
+var wait = function(f) {
+    w++;
+    var n = 0;
+    while (!f()) {
+        if( n % 4 == 0 )
+            print("toostale.js waiting " + w);
+        if (++n == 4) {
+            print("" + f);
+        }
+        assert(n < 200, 'tried 200 times, giving up');
+        sleep(1000);
+    }
+}
+
+var reconnect = function(a) {
+  wait(function() { 
+      try {
+        a.bar.stats();
+        return true;
+      } catch(e) {
+        print(e);
+        return false;
+      }
+    });
+};
+
+
+var name = "toostale"
+var replTest = new ReplSetTest( {name: name, nodes: 3});
+
+var nodes = replTest.startSet();
+replTest.initiate();
+var master = replTest.getMaster();
+var mdb = master.getDB("foo");
+
+
+print("1: initial insert");
+mdb.foo.save({a: 1000});
+
+
+print("2: initial sync");
+replTest.awaitReplication();
+
+print("3: blind s2");
+replTest.stop(2);
+print("waiting until the master knows the slave is blind");
+assert.soon(function() { return master.getDB("admin").runCommand({replSetGetStatus:1}).members[2].health == 0 });
+print("okay");
+
+print("4: overflow oplog");
+reconnect(master.getDB("local"));
+var count = master.getDB("local").oplog.rs.count();
+var prevCount = -1;
+while (count != prevCount) {
+  print("inserting 10000");
+  for (var i = 0; i < 10000; i++) {
+    mdb.bar.insert({x:i, date : new Date(), str : "safkaldmfaksndfkjansfdjanfjkafa"});
+  }
+  prevCount = count;
+  replTest.awaitReplication();
+  count = master.getDB("local").oplog.rs.count();
+  print("count: "+count+" prev: "+prevCount);
+}
+
+
+print("5: unblind s2");
+replTest.restart(2);
+print("waiting until the master knows the slave is not blind");
+assert.soon(function() { return master.getDB("admin").runCommand({replSetGetStatus:1}).members[2].health != 0 });
+print("okay");
+
+
+print("6: check s2.state == 3");
+var goStale = function() {
+  wait(function() {
+      var status = master.getDB("admin").runCommand({replSetGetStatus:1});
+      printjson(status);
+      return status.members[2].state == 3;
+    });
+};
+goStale();
+
+
+print("7: restart s2");
+replTest.stop(2);
+replTest.restart(2);
+
+
+print("8: check s2.state == 3");
+status = master.getDB("admin").runCommand({replSetGetStatus:1});
+while (status.state == 0) {
+  print("state is 0: ");
+  printjson(status);
+  sleep(1000);
+  status = master.getDB("admin").runCommand({replSetGetStatus:1});
+}
+
+printjson(status);
+assert.eq(status.members[2].state, 3, 'recovering');
+
+
+replTest.stopSet(15);
diff --git a/jstests/replsets/two_initsync.js b/jstests/replsets/two_initsync.js
index 6ae8475..7d1442d 100755
--- a/jstests/replsets/two_initsync.js
+++ b/jstests/replsets/two_initsync.js
@@ -32,6 +32,7 @@ function wait(f) {
         if (++n == 4) {
             print("" + f);
         }
+        assert(n < 200, 'tried 200 times, giving up');
         sleep(1000);
     }
 }
diff --git a/jstests/replsets/twosets.js b/jstests/replsets/twosets.js
index 7cf367b..aae1113 100644
--- a/jstests/replsets/twosets.js
+++ b/jstests/replsets/twosets.js
@@ -5,19 +5,13 @@ doTest = function( signal ) {
 
     var orig = new ReplSetTest( {name: 'testSet', nodes: 3} );
     orig.startSet();
+    orig.initiate();
+    var master = orig.getMaster();
 
     var interloper = new ReplSetTest( {name: 'testSet', nodes: 3, startPort : 31003} );
     interloper.startSet();
-
-    sleep(5000);
-
-    orig.initiate();
     interloper.initiate();
 
-    sleep(5000);
-
-    var master = orig.getMaster();
-
     var conf = master.getDB("local").system.replset.findOne();
  
     var nodes = interloper.nodeList();
@@ -26,8 +20,13 @@ doTest = function( signal ) {
     conf.members.push({_id : id, host : host});
     conf.version++;
 
-    var result = master.getDB("admin").runCommand({replSetReconfig : conf});
-
+    try {
+      var result = master.getDB("admin").runCommand({replSetReconfig : conf});
+    }
+    catch(e) {
+      print(e);
+    }
+    
     // now... stuff should blow up?
 
     sleep(10);
diff --git a/jstests/rs/rs_basic.js b/jstests/rs/rs_basic.js
index 08de689..e8d124d 100644
--- a/jstests/rs/rs_basic.js
+++ b/jstests/rs/rs_basic.js
@@ -1,51 +1,51 @@
 // rs_basic.js
 
-load("../../jstests/rs/test_framework.js");
-
-function go() {
-    assert(__nextPort == 27000, "_nextPort==27000");
-
-    a = null;
-    try {init
-        a = new Mongo("localhost:27000");
-        print("using already open mongod on port 27000 -- presume you are debugging or something. should start empty.");
-        __nextPort++;
-    }
-    catch (e) {
-        a = rs_mongod();
-    }
-
+load("../../jstests/rs/test_framework.js");
+
+function go() {
+    assert(__nextPort == 27000, "_nextPort==27000");
+
+    a = null;
+    try {init
+        a = new Mongo("localhost:27000");
+        print("using already open mongod on port 27000 -- presume you are debugging or something. should start empty.");
+        __nextPort++;
+    }
+    catch (e) {
+        a = rs_mongod();
+    }
+
     b = rs_mongod();
 
-    x = a.getDB("admin");
-    y = b.getDB("admin");
-    memb = [];
-    memb[0] = x;
-    memb[1] = y;
-
-    print("rs_basic.js go(): started 2 servers");
-
-    cfg = { _id: 'asdf', members: [] };
-    var hn = hostname();
-    cfg.members[0] = { _id: 0, host: hn + ":27000" };
-    cfg.members[1] = { _id: 1, host: hn + ":27001" };
-
+    x = a.getDB("admin");
+    y = b.getDB("admin");
+    memb = [];
+    memb[0] = x;
+    memb[1] = y;
+
+    print("rs_basic.js go(): started 2 servers");
+
+    cfg = { _id: 'asdf', members: [] };
+    var hn = hostname();
+    cfg.members[0] = { _id: 0, host: hn + ":27000" };
+    cfg.members[1] = { _id: 1, host: hn + ":27001" };
+
     print("cfg=" + tojson(cfg));
-}
-
-function init(server) {
-    var i = server;
-    //i = Random.randInt(2); // a random member of the set
-    var m = memb[i];
-    assert(!m.ismaster(), "not ismaster");
-    var res = m.runCommand({ replSetInitiate: cfg });
-    return res;
-}
-
-_path = '../../db/Debug/';
-print("_path var set to " + _path);
-
-print("go() to run");
+}
+
+function init(server) {
+    var i = server;
+    //i = Random.randInt(2); // a random member of the set
+    var m = memb[i];
+    assert(!m.ismaster(), "not ismaster");
+    var res = m.runCommand({ replSetInitiate: cfg });
+    return res;
+}
+
+_path = '../../db/Debug/';
+print("_path var set to " + _path);
+
+print("go() to run");
 print("init() to initiate");
 
 
@@ -108,7 +108,7 @@ r = function( key , v ){
 correct = { a : 2 , b : 1 };
 
 function checkMR( t ){
-    var res = t.mapReduce( m , r );
+    var res = t.mapReduce( m , r , "xyz" );
     assert.eq( correct , res.convertToSingleObject() , "checkMR: " + tojson( t ) );
 }
 
diff --git a/jstests/set_param1.js b/jstests/set_param1.js
new file mode 100644
index 0000000..555cb52
--- /dev/null
+++ b/jstests/set_param1.js
@@ -0,0 +1,9 @@
+
+old = db.adminCommand( { "getParameter" : "*" } )
+tmp1 = db.adminCommand( { "setParameter" : 1 , "logLevel" : 5 } )
+tmp2 = db.adminCommand( { "setParameter" : 1 , "logLevel" : old.logLevel } )
+now = db.adminCommand( { "getParameter" : "*" } )
+
+assert.eq( old , now , "A" )
+assert.eq( old.logLevel , tmp1.was , "B" )
+assert.eq( 5 , tmp2.was , "C" )
diff --git a/jstests/sharding/addshard3.js b/jstests/sharding/addshard3.js
new file mode 100644
index 0000000..aa5a21e
--- /dev/null
+++ b/jstests/sharding/addshard3.js
@@ -0,0 +1,9 @@
+
+s = new ShardingTest( "add_shard3", 1 );
+
+var result = s.admin.runCommand({"addshard" : "localhost:31000"});
+
+printjson(result);
+
+assert.eq(result.ok, 0, "don't add mongos as a shard");
+
diff --git a/jstests/sharding/addshard4.js b/jstests/sharding/addshard4.js
new file mode 100644
index 0000000..be4a8b3
--- /dev/null
+++ b/jstests/sharding/addshard4.js
@@ -0,0 +1,24 @@
+// a replica set's passive nodes should be okay to add as part of a shard config
+
+s = new ShardingTest( "addshard4", 2 , 0 , 1 , {useHostname : true});
+
+r = new ReplSetTest({name : "addshard4", nodes : 3, startPort : 34000});
+r.startSet();
+
+var config = r.getReplSetConfig();
+config.members[2].priority = 0;
+
+r.initiate(config);
+
+var master = r.getMaster().master;
+
+var members = config.members.map(function(elem) { return elem.host; });
+var shardName = "addshard4/"+members.join(",");
+
+print("adding shard "+shardName);
+
+var result = s.adminCommand({"addshard" : shardName});
+
+printjson(result);
+
+
diff --git a/jstests/sharding/auto1.js b/jstests/sharding/auto1.js
index 346c43a..bdd43e9 100644
--- a/jstests/sharding/auto1.js
+++ b/jstests/sharding/auto1.js
@@ -32,15 +32,19 @@ print( "datasize: " + tojson( s.getServer( "test" ).getDB( "admin" ).runCommand(
 for ( ; i<200; i++ ){
     coll.save( { num : i , s : bigString } );
 }
+db.getLastError();
 
 s.printChunks()
+s.printChangeLog()
 counts.push( s.config.chunks.count() );
 
 for ( ; i<400; i++ ){
     coll.save( { num : i , s : bigString } );
 }
+db.getLastError();
 
 s.printChunks();
+s.printChangeLog()
 counts.push( s.config.chunks.count() );
 
 for ( ; i<700; i++ ){
@@ -49,6 +53,7 @@ for ( ; i<700; i++ ){
 db.getLastError();
 
 s.printChunks();
+s.printChangeLog()
 counts.push( s.config.chunks.count() );
 
 assert( counts[counts.length-1] > counts[0] , "counts 1 : " + tojson( counts ) )
diff --git a/jstests/sharding/bigMapReduce.js b/jstests/sharding/bigMapReduce.js
index 1cc12f4..3cc1d66 100644
--- a/jstests/sharding/bigMapReduce.js
+++ b/jstests/sharding/bigMapReduce.js
@@ -7,11 +7,69 @@ db = s.getDB( "test" );
 var str=""
 for (i=0;i<4*1024;i++) { str=str+"a"; }
 for (j=0; j<50; j++) for (i=0; i<512; i++){ db.foo.save({y:str})}
+db.getLastError();
+
+s.printChunks();
+s.printChangeLog();
 
 function map() { emit('count', 1); } 
 function reduce(key, values) { return Array.sum(values) } 
 
-out = db.foo.mapReduce(map, reduce) 
-printjson(out) // SERVER-1400
+gotAGoodOne = false;
+
+for ( iter=0; iter<5; iter++ ){
+    try {
+        out = db.foo.mapReduce(map, reduce,"big_out") 
+        gotAGoodOne = true
+    }
+    catch ( e ){
+        if ( __mrerror__ && __mrerror__.cause && __mrerror__.cause.assertionCode == 13388 ){
+            // TODO: SERVER-2396
+            sleep( 1000 );
+            continue;
+        }
+        printjson( __mrerror__ );
+        throw e;
+    }
+}
+assert( gotAGoodOne , "no good for basic" )
+
+gotAGoodOne = false;
+// test output to a different DB
+// do it multiple times so that primary shard changes
+for (iter = 0; iter < 5; iter++) {
+    outCollStr = "mr_replace_col_" + iter;
+    outDbStr = "mr_db_" + iter;
+
+    print("Testing mr replace into DB " + iter)
+
+    try {
+        res = db.foo.mapReduce( map , reduce , { out : { replace: outCollStr, db: outDbStr } } )
+        gotAGoodOne = true;
+    }
+    catch ( e ){
+        if ( __mrerror__ && __mrerror__.cause && __mrerror__.cause.assertionCode == 13388 ){
+            // TODO: SERVER-2396
+            sleep( 1000 );
+            continue;
+        }
+        printjson( __mrerror__ );
+        throw e;
+    }
+    printjson(res);
+
+    outDb = s.getDB(outDbStr);
+    outColl = outDb[outCollStr];
+
+    obj = outColl.convertToSingleObject("value");
+    assert.eq( 25600 , obj.count , "Received wrong result " + obj.count );
+
+    print("checking result field");
+    assert.eq(res.result.collection, outCollStr, "Wrong collection " + res.result.collection);
+    assert.eq(res.result.db, outDbStr, "Wrong db " + res.result.db);
+}
+
+assert( gotAGoodOne , "no good for out db" )
 
 s.stop()
+
diff --git a/jstests/sharding/count1.js b/jstests/sharding/count1.js
index ed69d1f..cc3f712 100644
--- a/jstests/sharding/count1.js
+++ b/jstests/sharding/count1.js
@@ -27,14 +27,16 @@ db.foo.save( { _id : 6 , name : "allan" } )
 
 assert.eq( 6 , db.foo.find().count() , "basic count" );
 
-s.adminCommand( { split : "test.foo" , find : { name : "joe" } } );
-s.adminCommand( { split : "test.foo" , find : { name : "joe" } } );
-s.adminCommand( { split : "test.foo" , find : { name : "joe" } } );
+s.adminCommand( { split : "test.foo" , find : { name : "joe" } } ); // [Minkey -> allan) , * [allan -> ..)
+s.adminCommand( { split : "test.foo" , find : { name : "joe" } } ); // * [allan -> sara) , [sara -> Maxkey)
+s.adminCommand( { split : "test.foo" , find : { name : "joe" } } ); // [alan -> joe) , [joe -> sara]
+
+s.printChunks()
 
 assert.eq( 6 , db.foo.find().count() , "basic count after split " );
 assert.eq( 6 , db.foo.find().sort( { name : 1 } ).count() , "basic count after split sorted " );
 
-s.adminCommand( { movechunk : "test.foo" , find : { name : "joe" } , to : secondary.getMongo().name } );
+s.adminCommand( { movechunk : "test.foo" , find : { name : "allan" } , to : secondary.getMongo().name } );
 
 assert.eq( 3 , primary.foo.find().toArray().length , "primary count" );
 assert.eq( 3 , secondary.foo.find().toArray().length , "secondary count" );
diff --git a/jstests/sharding/cursor1.js b/jstests/sharding/cursor1.js
index 2a30936..f6cb9e4 100644
--- a/jstests/sharding/cursor1.js
+++ b/jstests/sharding/cursor1.js
@@ -53,7 +53,7 @@ sleep( 6000 )
 assert( cur.next() , "T3" )
 assert( cur.next() , "T4" );
 sleep( 22000 )
-assert.throws( function(){ cur.next(); } , "T5" )
+assert.throws( function(){ cur.next(); } , null , "T5" )
 after = db.runCommand( { "cursorInfo" : 1 , "setTimeout" : 10000 } ) // 10 seconds
 gc(); gc()
 
diff --git a/jstests/sharding/features1.js b/jstests/sharding/features1.js
index 05b8b8c..c22f094 100644
--- a/jstests/sharding/features1.js
+++ b/jstests/sharding/features1.js
@@ -81,10 +81,10 @@ assert.eq( 1 , db.foo3.count() , "eval pre1" );
 assert.eq( 1 , db.foo2.count() , "eval pre2" );
 
 assert.eq( 8 , db.eval( function(){ return db.foo3.findOne().a; } ), "eval 1 " );
-assert.throws( function(){ db.eval( function(){ return db.foo2.findOne().a; } ) } , "eval 2" )
+assert.throws( function(){ db.eval( function(){ return db.foo2.findOne().a; } ) } , null , "eval 2" )
 
 assert.eq( 1 , db.eval( function(){ return db.foo3.count(); } ), "eval 3 " );
-assert.throws( function(){ db.eval( function(){ return db.foo2.count(); } ) } , "eval 4" )
+assert.throws( function(){ db.eval( function(){ return db.foo2.count(); } ) } , null , "eval 4" )
 
 
 // ---- unique shard key ----
@@ -105,6 +105,14 @@ assert.eq( 2 , b.foo4.getIndexes().length , "ub2" );
 assert( a.foo4.getIndexes()[1].unique , "ua3" );
 assert( b.foo4.getIndexes()[1].unique , "ub3" );
 
+assert.eq( 2 , db.foo4.count() , "uc1" )
+db.foo4.save( { num : 7 } )
+assert.eq( 3 , db.foo4.count() , "uc2" )
+db.foo4.save( { num : 7 } )
+gle = db.getLastErrorObj();
+assert( gle.err , "uc3" )
+assert.eq( 3 , db.foo4.count() , "uc4" )
+
 // --- don't let you convertToCapped ----
 assert( ! db.foo4.isCapped() , "ca1" );
 assert( ! a.foo4.isCapped() , "ca2" );
@@ -152,12 +160,22 @@ assert.throws( function(){ db.foo6.group( { key : { a : 1 } , initial : { count
 // ---- can't shard non-empty collection without index -----
 
 db.foo8.save( { a : 1 } );
+db.getLastError();
 assert( ! s.admin.runCommand( { shardcollection : "test.foo8" , key : { a : 1 } } ).ok , "non-empty collection" );
 
+
+// ---- can't shard non-empty collection with null values in shard key ----
+
+db.foo9.save( { b : 1 } );
+db.getLastError();
+db.foo9.ensureIndex( { a : 1 } );
+assert( ! s.admin.runCommand( { shardcollection : "test.foo9" , key : { a : 1 } } ).ok , "entry with null value" );
+
+
 // --- listDatabases ---
 
 r = db.getMongo().getDBs()
-assert.eq( 4 , r.databases.length , "listDatabases 1 : " + tojson( r ) )
+assert.eq( 3 , r.databases.length , "listDatabases 1 : " + tojson( r ) )
 assert.lt( 10000 , r.totalSize , "listDatabases 2 : " + tojson( r ) );
 
 s.stop()
diff --git a/jstests/sharding/features2.js b/jstests/sharding/features2.js
index dfb2883..b2070ea 100644
--- a/jstests/sharding/features2.js
+++ b/jstests/sharding/features2.js
@@ -92,8 +92,10 @@ r = function( key , values ){
 
 doMR = function( n ){
     print(n);
-
-    var res = db.mr.mapReduce( m , r );
+    
+    // on-disk
+    
+    var res = db.mr.mapReduce( m , r , "smr1_out" );
     printjson( res );
     assert.eq( new NumberLong(4) , res.counts.input , "MR T0 " + n );
 
@@ -103,11 +105,26 @@ doMR = function( n ){
     var z = {};
     x.find().forEach( function(a){ z[a._id] = a.value.count; } );
     assert.eq( 3 , Object.keySet( z ).length , "MR T2 " + n );
-    assert.eq( 2 , z.a , "MR T2 " + n );
-    assert.eq( 3 , z.b , "MR T2 " + n );
-    assert.eq( 3 , z.c , "MR T2 " + n );
+    assert.eq( 2 , z.a , "MR T3 " + n );
+    assert.eq( 3 , z.b , "MR T4 " + n );
+    assert.eq( 3 , z.c , "MR T5 " + n );
 
     x.drop();
+
+    // inline
+
+    var res = db.mr.mapReduce( m , r , { out : { inline : 1 } } );
+    printjson( res );
+    assert.eq( new NumberLong(4) , res.counts.input , "MR T6 " + n );
+
+    var z = {};
+    res.find().forEach( function(a){ z[a._id] = a.value.count; } );
+    printjson( z );
+    assert.eq( 3 , Object.keySet( z ).length , "MR T7 " + n ) ;
+    assert.eq( 2 , z.a , "MR T8 " + n );
+    assert.eq( 3 , z.b , "MR T9 " + n );
+    assert.eq( 3 , z.c , "MR TA " + n );
+
 }
 
 doMR( "before" );
@@ -124,7 +141,7 @@ s.adminCommand({movechunk:'test.mr', find:{x:3}, to: s.getServer('test').name }
 
 doMR( "after extra split" );
 
-cmd = { mapreduce : "mr" , map : "emit( " , reduce : "fooz + " };
+cmd = { mapreduce : "mr" , map : "emit( " , reduce : "fooz + " , out : "broken1" };
 
 x = db.runCommand( cmd );
 y = s._connections[0].getDB( "test" ).runCommand( cmd );
diff --git a/jstests/sharding/features3.js b/jstests/sharding/features3.js
index b15ccd3..b28d88e 100644
--- a/jstests/sharding/features3.js
+++ b/jstests/sharding/features3.js
@@ -1,4 +1,3 @@
-
 s = new ShardingTest( "features3" , 2 , 1 , 1 );
 s.adminCommand( { enablesharding : "test" } );
 
@@ -25,7 +24,7 @@ assert.eq( N / 2 , x.shards.shard0001.count , "count on shard0001" )
 start = new Date()
 
 print( "about to fork shell: " + Date() )
-join = startParallelShell( "db.foo.find( function(){ x = \"\"; for ( i=0; i<10000; i++ ){ x+=i; } return true; } ).itcount()" )
+join = startParallelShell( "db.foo.find( function(){ x = ''; for ( i=0; i<10000; i++ ){ x+=i; } return true; } ).itcount()" )
 print( "after forking shell: " + Date() )
 
 function getMine( printInprog ){
diff --git a/jstests/sharding/geo_near_random1.js b/jstests/sharding/geo_near_random1.js
new file mode 100644
index 0000000..6ffd4b2
--- /dev/null
+++ b/jstests/sharding/geo_near_random1.js
@@ -0,0 +1,37 @@
+// this tests all points using $near
+load("jstests/libs/geo_near_random.js");
+
+var testName = "geo_near_random1";
+var s = new ShardingTest( testName , 3 );
+
+db = s.getDB("test"); // global db
+
+var test = new GeoNearRandomTest(testName);
+
+s.adminCommand({enablesharding:'test'});
+s.adminCommand({shardcollection: ('test.' + testName), key: {_id:1} });
+
+test.insertPts(50);
+
+for (var i = (test.nPts/10); i < test.nPts; i+= (test.nPts/10)){
+    s.adminCommand({split: ('test.' + testName), middle: {_id: i} });
+    try {
+        s.adminCommand({moveChunk: ('test.' + testName), find: {_id: i-1}, to: ('shard000' + (i%3))});
+    } catch (e) {
+        // ignore this error
+        if (! e.match(/that chunk is already on that shard/)){
+            throw e;
+        }
+    }
+}
+
+printShardingSizes()
+
+var opts = {sharded: true}
+test.testPt([0,0], opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+
+s.stop()
diff --git a/jstests/sharding/geo_near_random2.js b/jstests/sharding/geo_near_random2.js
new file mode 100644
index 0000000..4871e1e
--- /dev/null
+++ b/jstests/sharding/geo_near_random2.js
@@ -0,0 +1,44 @@
+// this tests 1% of all points using $near and $nearSphere
+load("jstests/libs/geo_near_random.js");
+
+var testName = "geo_near_random2";
+var s = new ShardingTest( testName , 3 );
+
+db = s.getDB("test"); // global db
+
+var test = new GeoNearRandomTest(testName);
+
+s.adminCommand({enablesharding:'test'});
+s.adminCommand({shardcollection: ('test.' + testName), key: {_id:1} });
+
+test.insertPts(5000);
+
+for (var i = (test.nPts/10); i < test.nPts; i+= (test.nPts/10)){
+    s.adminCommand({split: ('test.' + testName), middle: {_id: i} });
+    try {
+        s.adminCommand({moveChunk: ('test.' + testName), find: {_id: i-1}, to: ('shard000' + (i%3))});
+    } catch (e) {
+        // ignore this error
+        if (! e.match(/that chunk is already on that shard/)){
+            throw e;
+        }
+    }
+}
+
+printShardingSizes()
+
+opts = {sphere:0, nToTest:test.nPts*0.01, sharded:true}; 
+test.testPt([0,0], opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+
+opts.sphere = 1
+test.testPt([0,0], opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+
+s.stop()
diff --git a/jstests/sharding/key_many.js b/jstests/sharding/key_many.js
index 1e0ba9d..3a8203f 100644
--- a/jstests/sharding/key_many.js
+++ b/jstests/sharding/key_many.js
@@ -20,7 +20,7 @@ s = new ShardingTest( "key_many" , 2 );
 s.adminCommand( { enablesharding : "test" } )
 db = s.getDB( "test" );
 primary = s.getServer( "test" ).getDB( "test" );
-seconday = s.getOther( primary ).getDB( "test" );
+secondary = s.getOther( primary ).getDB( "test" );
 
 function makeObjectDotted( v ){
     var o = {};
@@ -97,12 +97,12 @@ for ( var i=0; i<types.length; i++ ){
     s.adminCommand( { split : longName , find : makeObjectDotted( curT.values[3] ) } );
     s.adminCommand( { split : longName , find : makeObjectDotted( curT.values[3] ) } );
 
-    s.adminCommand( { movechunk : longName , find : makeObjectDotted( curT.values[3] ) , to : seconday.getMongo().name } );
+    s.adminCommand( { movechunk : longName , find : makeObjectDotted( curT.values[0] ) , to : secondary.getMongo().name } );
     
     s.printChunks();
     
     assert.eq( 3 , primary[shortName].find().toArray().length , curT.name + " primary count" );
-    assert.eq( 3 , seconday[shortName].find().toArray().length , curT.name + " secondary count" );
+    assert.eq( 3 , secondary[shortName].find().toArray().length , curT.name + " secondary count" );
     
     assert.eq( 6 , c.find().toArray().length , curT.name + " total count" );
     assert.eq( 6 , c.find().sort( makeObjectDotted( 1 ) ).toArray().length , curT.name + " total count sorted" );
diff --git a/jstests/sharding/key_string.js b/jstests/sharding/key_string.js
index 8ee1c70..bbc5dfb 100644
--- a/jstests/sharding/key_string.js
+++ b/jstests/sharding/key_string.js
@@ -20,11 +20,11 @@ db.foo.save( { name : "allan" } )
 
 assert.eq( 6 , db.foo.find().count() , "basic count" );
 
-s.adminCommand( { split : "test.foo" , find : { name : "joe" } } );
-s.adminCommand( { split : "test.foo" , find : { name : "joe" } } );
-s.adminCommand( { split : "test.foo" , find : { name : "joe" } } );
+s.adminCommand( { split : "test.foo" , find : { name : "joe" } } ); // [Minkey -> allan) , * [allan -> ..)
+s.adminCommand( { split : "test.foo" , find : { name : "joe" } } ); // * [allan -> sara) , [sara -> Maxkey)
+s.adminCommand( { split : "test.foo" , find : { name : "joe" } } ); // [alan -> joe) , [joe -> sara]
 
-s.adminCommand( { movechunk : "test.foo" , find : { name : "joe" } , to : seconday.getMongo().name } );
+s.adminCommand( { movechunk : "test.foo" , find : { name : "allan" } , to : seconday.getMongo().name } );
 
 s.printChunks();
 
@@ -39,6 +39,11 @@ assert.eq( 6 , db.foo.find().sort( { name : 1 } ).count() , "total count with co
 assert.eq( "allan,bob,eliot,joe,mark,sara" ,  db.foo.find().sort( { name : 1 } ).toArray().map( function(z){ return z.name; } ) , "sort 1" );
 assert.eq( "sara,mark,joe,eliot,bob,allan" ,  db.foo.find().sort( { name : -1 } ).toArray().map( function(z){ return z.name; } ) , "sort 2" );
 
+// make sure we can't foce a split on an extreme key
+// [allan->joe) 
+assert.throws( function(){ s.adminCommand( { split : "test.foo" , middle : { name : "allan" } } ) } );
+assert.throws( function(){ s.adminCommand( { split : "test.foo" , middle : { name : "joe" } } ) } );
+
 s.stop();
 
 
diff --git a/jstests/sharding/limit_push.js b/jstests/sharding/limit_push.js
new file mode 100644
index 0000000..75ad271
--- /dev/null
+++ b/jstests/sharding/limit_push.js
@@ -0,0 +1,47 @@
+// This test is to ensure that limit() clauses are pushed down to the shards and evaluated
+// See: http://jira.mongodb.org/browse/SERVER-1896
+
+s = new ShardingTest( "limit_push", 2, 1, 1 );
+
+db = s.getDB( "test" );
+
+// Create some data
+for (i=0; i < 100; i++) { db.limit_push.insert({ _id : i, x: i}); } 
+db.limit_push.ensureIndex( { x : 1 } );
+assert.eq( 100 , db.limit_push.find().length() , "Incorrect number of documents" );
+
+// Shard the collection
+s.adminCommand( { enablesharding : "test" } );
+s.adminCommand( { shardcollection : "test.limit_push" , key : { x : 1 } } );
+
+// Now split the and move the data between the shards
+s.adminCommand( { split : "test.limit_push", middle : { x : 50 }} ); 
+s.adminCommand( { moveChunk: "test.limit_push", find : { x : 51}, to : "shard0000" }) 
+
+// Check that the chunck have split correctly
+assert.eq( 2 , s.config.chunks.count() , "wrong number of chunks");
+
+// The query is asking for the maximum value below a given value
+// db.limit_push.find( { x : { $lt : 60} } ).sort( { x:-1} ).limit(1)
+q = { x : { $lt : 60} }; 
+
+// Make sure the basic queries are correct
+assert.eq( 60 , db.limit_push.find( q ).count() , "Did not find 60 documents" );
+//rs = db.limit_push.find( q ).sort( { x:-1} ).limit(1)
+//assert.eq( rs , { _id : "1" , x : 59 } , "Did not find document with value 59" );
+
+// Now make sure that the explain shos that each shard is returning a single document as indicated
+// by the "n" element for each shard
+exp = db.limit_push.find( q ).sort( { x:-1} ).limit(1).explain();
+printjson( exp )
+
+assert.eq("ParallelSort", exp.clusteredType, "Not a ParallelSort");
+
+var k = 0;
+for (var j in exp.shards) {
+    assert.eq( 1 , exp.shards[j][0].n, "'n' is not 1 from shard000" + k.toString());
+    k++
+}
+
+s.stop();
+
diff --git a/jstests/sharding/migrateBig.js b/jstests/sharding/migrateBig.js
new file mode 100644
index 0000000..f6ba18a
--- /dev/null
+++ b/jstests/sharding/migrateBig.js
@@ -0,0 +1,45 @@
+
+s = new ShardingTest( "migrateBig" , 2 , 0 , 1 , { chunksize : 1 } );
+
+s.adminCommand( { enablesharding : "test" } );
+s.adminCommand( { shardcollection : "test.foo" , key : { x : 1 } } );
+
+db = s.getDB( "test" )
+coll = db.foo
+
+big = ""
+while ( big.length < 10000 )
+    big += "eliot"
+
+for ( x=0; x<100; x++ )
+    coll.insert( { x : x , big : big } )
+
+s.adminCommand( { split : "test.foo" , middle : { x : 33 } } )
+s.adminCommand( { split : "test.foo" , middle : { x : 66 } } )
+s.adminCommand( { movechunk : "test.foo" , find : { x : 90 } , to : s.getOther( s.getServer( "test" ) ).name } )
+
+db.printShardingStatus()
+
+print( "YO : "  + s.getServer( "test" ).host )
+direct = new Mongo( s.getServer( "test" ).host )
+print( "direct : " + direct )
+
+directDB = direct.getDB( "test" )
+
+for ( done=0; done<2*1024*1024; done+=big.length ){
+    directDB.foo.insert( { x : 50 + Math.random() , big : big } )
+    directDB.getLastError();
+}
+
+db.printShardingStatus()
+
+assert.throws( function(){  s.adminCommand( { movechunk : "test.foo" , find : { x : 50 } , to : s.getOther( s.getServer( "test" ) ).name } ); } , [] , "move should fail" )
+
+for ( i=0; i<20; i+= 2 )
+    s.adminCommand( { split : "test.foo" , middle : { x : i } } )
+
+db.printShardingStatus()
+
+assert.soon( function(){ var x = s.chunkDiff( "foo" , "test" ); print( "chunk diff: " + x ); return x < 2; } , "no balance happened" , 120 * 1000 , 2000 ) 
+
+s.stop()
diff --git a/jstests/sharding/multi_mongos1.js b/jstests/sharding/multi_mongos1.js
new file mode 100644
index 0000000..cf9ebde
--- /dev/null
+++ b/jstests/sharding/multi_mongos1.js
@@ -0,0 +1,70 @@
+// multi_mongos.js
+
+// setup sharding with two mongos, s1 and s2
+s1 = new ShardingTest( "multi_mongos1" , 2 , 1 , 2 );
+s2 = s1._mongos[1];
+
+s1.adminCommand( { enablesharding : "test" } );
+s1.adminCommand( { shardcollection : "test.foo" , key : { num : 1 } } );
+
+s1.config.databases.find().forEach( printjson )
+
+viaS1 = s1.getDB( "test" ).foo;
+viaS2 = s2.getDB( "test" ).foo;
+
+primary = s1.getServer( "test" ).getDB( "test" ).foo;
+secondary = s1.getOther( primary.name ).getDB( "test" ).foo;
+
+N = 4;
+for (i=1; i<=N; i++) {
+    viaS1.save( { num : i } );
+}
+
+// initial checks
+
+// both mongos see all elements
+assert.eq( N , viaS1.find().toArray().length , "normal A" );
+assert.eq( N , viaS2.find().toArray().length , "other A" );
+
+// all elements are in one of the shards
+assert.eq( N , primary.count() , "p1" )
+assert.eq( 0 , secondary.count() , "s1" )
+assert.eq( 1 , s1.onNumShards( "foo" ) , "on 1 shards" );
+
+// 
+// STEP 1 (builds a bit of context so there should probably not be a step 2 in this same test)
+//   where we try to issue a move chunk from a mongos that's stale
+//   followed by a split on a valid chunk, albeit one with not the highest lastmod
+
+// split in [Minkey->1), [1->N), [N,Maxkey)
+s1.adminCommand( { split : "test.foo" , middle : { num : 1 } } );
+s1.adminCommand( { split : "test.foo" , middle : { num : N } } );
+
+// s2 is now stale w.r.t boundaires around { num: 1 }
+res = s2.getDB( "admin" ).runCommand( { movechunk : "test.foo" , find : { num : 1 } , to : s1.getOther( s1.getServer( "test" ) ).name } );
+assert.eq( 0 , res.ok , "a move with stale boundaries should not have succeeded" + tojson(res) ); 
+
+// s2 must have reloaded as a result of a failed move; retrying should work
+res = s2.getDB( "admin" ).runCommand( { movechunk : "test.foo" , find : { num : 1 } , to : s1.getOther( s1.getServer( "test" ) ).name } );
+assert.eq( 1 , res.ok , "mongos did not reload after a failed migrate" + tojson(res) );
+
+// s1 is not stale about the boundaries of [MinKey->1) 
+// but we'll try to split a chunk whose lastmod.major was not touched by the previous move
+// in 1.6, that chunk would be with [Minkey->1) (where { num: -1 } falls)
+// after 1.6, it would be with [N->Maxkey] (where { num: N+1 } falls)
+// s.printShardingStatus()
+res = s1.getDB( "admin" ).runCommand( { split : "test.foo" , middle : { num : N+1 } } ); // replace with { num: -1 } instead in 1.6
+assert.eq( 1, res.ok , "split over accurate boudaries should have succeeded" + tojson(res) );
+
+// { num : 4 } is on primary
+// { num : 1 , 2 , 3 } are on secondary
+assert.eq( 1 , primary.find().toArray().length , "wrong count on primary" );
+assert.eq( 3 , secondary.find().toArray().length , "wrong count on secondary" );
+assert.eq( N , primary.find().itcount() + secondary.find().itcount() , "wrong total count" )
+
+assert.eq( N , viaS1.find().toArray().length , "normal B" );
+assert.eq( N , viaS2.find().toArray().length , "other B" );
+
+printjson( primary._db._adminCommand( "shardingState" ) );
+
+s1.stop();
\ No newline at end of file
diff --git a/jstests/sharding/rename.js b/jstests/sharding/rename.js
index aa6137d..fa27611 100644
--- a/jstests/sharding/rename.js
+++ b/jstests/sharding/rename.js
@@ -24,3 +24,4 @@ assert.eq(db.bar.findOne(), {_id:3}, '3.1');
 assert.eq(db.bar.count(), 1, '3.2');
 assert.eq(db.foo.count(), 0, '3.3');
 
+s.stop()
\ No newline at end of file
diff --git a/jstests/sharding/shard1.js b/jstests/sharding/shard1.js
index 1783238..ae382e4 100644
--- a/jstests/sharding/shard1.js
+++ b/jstests/sharding/shard1.js
@@ -21,6 +21,7 @@ assert.eq( 3 , db.foo.find().length() , "after partitioning count failed" );
 s.adminCommand( shardCommand );
 
 cconfig = s.config.collections.findOne( { _id : "test.foo" } );
+assert( cconfig , "why no collection entry for test.foo" )
 delete cconfig.lastmod
 delete cconfig.dropped
 assert.eq( cconfig , { _id : "test.foo" , key : { num : 1 } , unique : false } , "Sharded content" );
diff --git a/jstests/sharding/shard3.js b/jstests/sharding/shard3.js
index e57dc1e..7132563 100644
--- a/jstests/sharding/shard3.js
+++ b/jstests/sharding/shard3.js
@@ -41,9 +41,10 @@ printjson( primary._db._adminCommand( "shardingState" ) );
 
 // --- filtering ---
 
-function doCounts( name , total ){
+function doCounts( name , total , onlyItCounts ){
     total = total || ( primary.count() + secondary.count() );
-    assert.eq( total , a.count() , name + " count" );    
+    if ( ! onlyItCounts )
+        assert.eq( total , a.count() , name + " count" );    
     assert.eq( total , a.find().sort( { n : 1 } ).itcount() , name + " itcount - sort n" );
     assert.eq( total , a.find().itcount() , name + " itcount" );
     assert.eq( total , a.find().sort( { _id : 1 } ).itcount() , name + " itcount - sort _id" );
@@ -51,8 +52,12 @@ function doCounts( name , total ){
 }
 
 var total = doCounts( "before wrong save" )
-//secondary.save( { num : -3 } );
-//doCounts( "after wrong save" , total )
+secondary.save( { num : -3 } );
+doCounts( "after wrong save" , total , true )
+e = a.find().explain();
+assert.eq( 3 , e.n , "ex1" )
+assert.eq( 4 , e.nscanned , "ex2" )
+assert.eq( 1 , e.nChunkSkips , "ex3" )
 
 // --- move all to 1 ---
 print( "MOVE ALL TO 1" );
@@ -89,27 +94,18 @@ s.printCollectionInfo( "test.foo" , "after counts" );
 assert.eq( 0 , primary.count() , "p count after drop" )
 assert.eq( 0 , secondary.count() , "s count after drop" )
 
+// NOTE
+//   the following bypasses the sharding layer and writes straight to the servers
+//   this is not supported at all but we'd like to leave this backdoor for now
 primary.save( { num : 1 } );
 secondary.save( { num : 4 } );
-
 assert.eq( 1 , primary.count() , "p count after drop and save" )
 assert.eq( 1 , secondary.count() , "s count after drop and save " )
 
+print("*** makes sure that sharded access respects the drop command" );
 
-print("*** makes sure that sharding knows where things live" );
-
-assert.eq( 1 , a.count() , "a count after drop and save" )
-s.printCollectionInfo( "test.foo" , "after a count" );
-assert.eq( 1 , b.count() , "b count after drop and save" )
-s.printCollectionInfo( "test.foo" , "after b count" );
-
-assert( a.findOne( { num : 1 } ) , "a drop1" );
-assert.isnull( a.findOne( { num : 4 } ) , "a drop1" );
-
-s.printCollectionInfo( "test.foo" , "after a findOne tests" );
-
-assert( b.findOne( { num : 1 } ) , "b drop1" );
-assert.isnull( b.findOne( { num : 4 } ) , "b drop1" );
+assert.isnull( a.findOne() , "lookup via mongos 'a' accessed dropped data" );
+assert.isnull( b.findOne() , "lookup via mongos 'b' accessed dropped data" );
 
 s.printCollectionInfo( "test.foo" , "after b findOne tests" );
 
@@ -130,6 +126,8 @@ s.printCollectionInfo( "test.foo" , "after dropDatabase setup3" );
 print( "*** ready to call dropDatabase" )
 res = s.getDB( "test" ).dropDatabase();
 assert.eq( 1 , res.ok , "dropDatabase failed : " + tojson( res ) );
+// Waiting for SERVER-2253
+// assert.eq( 0 , s.config.databases.count( { _id: "test" } ) , "database 'test' was dropped but still appears in configDB" );
 
 s.printShardingStatus();
 s.printCollectionInfo( "test.foo" , "after dropDatabase call 1" );
diff --git a/jstests/sharding/shard_insert_getlasterror_w2.js b/jstests/sharding/shard_insert_getlasterror_w2.js
new file mode 100644
index 0000000..c722f21
--- /dev/null
+++ b/jstests/sharding/shard_insert_getlasterror_w2.js
@@ -0,0 +1,89 @@
+// replica set as solo shard
+// getLastError(2) fails on about every 170 inserts on my Macbook laptop -Tony
+// TODO: Add assertion code that catches hang
+
+load('jstests/libs/grid.js')
+
+function go() {
+
+    var N = 2000
+
+    // ~1KB string
+    var Text = ''
+        for (var i = 0; i < 40; i++)
+            Text += 'abcdefghijklmnopqrstuvwxyz'
+
+    // Create replica set with 3 servers
+    var repset1 = new ReplicaSet('repset1', 3) .begin()
+
+    // Add data to it
+    var conn1a = repset1.getMaster()
+    var db1a = conn1a.getDB('test')
+    for (var i = 0; i < N; i++) {
+        db1a['foo'].insert({x: i, text: Text})
+        db1a.getLastError(2)  // wait to be copied to at least one secondary
+    }
+
+    // Create 3 sharding config servers
+    var configsetSpec = new ConfigSet(3)
+    var configsetConns = configsetSpec.begin()
+
+    // Create sharding router (mongos)
+    var routerSpec = new Router(configsetSpec)
+    var routerConn = routerSpec.begin()
+    var dba = routerConn.getDB('admin')
+    var db = routerConn.getDB('test')
+
+    // Add repset1 as only shard
+    addShard (routerConn, repset1.getURL())
+
+    // Enable sharding on test db and its collection foo
+    enableSharding (routerConn, 'test')
+    db['foo'].ensureIndex({x: 1})
+    shardCollection (routerConn, 'test', 'foo', {x: 1})
+
+    sleep(30000)
+    printjson (db['foo'].stats())
+    dba.printShardingStatus()
+    printjson (db['foo'].count())
+
+    // Test case where GLE should return an error
+    db.foo.insert({_id:'a', x:1});
+    db.foo.insert({_id:'a', x:1});
+    var x = db.getLastErrorObj(2, 30000)
+    assert.neq(x.err, null, tojson(x));
+
+    // Add more data
+    for (var i = N; i < 2*N; i++) {
+        db['foo'].insert({x: i, text: Text})
+        var x = db.getLastErrorObj(2, 30000)  // wait to be copied to at least one secondary
+            if (i % 30 == 0) print(i)
+        if (i % 100 == 0 || x.err != null) printjson(x);
+        assert.eq(x.err, null, tojson(x));
+    }
+
+    // take down the slave and make sure it fails over
+    repset1.stop(1);
+    repset1.stop(2);
+    db.getMongo().setSlaveOk();
+    print("trying some queries");
+    assert.soon(function() { try {
+                db.foo.find().next();
+            }
+            catch(e) {
+                print(e);
+                return false;
+            }
+            return true;
+        });
+    
+    // Done
+    routerSpec.end()
+    configsetSpec.end()
+    repset1.stopSet()
+
+    print('shard_insert_getlasterror_w2.js SUCCESS')
+}
+
+//Uncomment below to execute
+go()
diff --git a/jstests/sharding/sort1.js b/jstests/sharding/sort1.js
index 0edb7a7..e2b287e 100644
--- a/jstests/sharding/sort1.js
+++ b/jstests/sharding/sort1.js
@@ -2,7 +2,7 @@
 s = new ShardingTest( "sort1" , 2 , 0 , 2 )
 
 s.adminCommand( { enablesharding : "test" } );
-s.adminCommand( { shardcollection : "test.data" , key : { num : 1 } } );
+s.adminCommand( { shardcollection : "test.data" , key : { 'sub.num' : 1 } } );
 
 db = s.getDB( "test" );
 
@@ -11,16 +11,16 @@ N = 100
 forward = []
 backward = [] 
 for ( i=0; i<N; i++ ){
-    db.data.insert( { _id : i , num : i , x : N - i } )
+    db.data.insert( { _id : i , sub: {num : i , x : N - i }} )
     forward.push( i )
     backward.push( ( N - 1 ) - i )
 }
 db.getLastError();
 
-s.adminCommand( { split : "test.data" , middle : { num : 33 } } )
-s.adminCommand( { split : "test.data" , middle : { num : 66 } } )
+s.adminCommand( { split : "test.data" , middle : { 'sub.num' : 33 } } )
+s.adminCommand( { split : "test.data" , middle : { 'sub.num' : 66 } } )
 
-s.adminCommand( { movechunk : "test.data" , find : { num : 50 } , to : s.getOther( s.getServer( "test" ) ).name } );
+s.adminCommand( { movechunk : "test.data" , find : { 'sub.num' : 50 } , to : s.getOther( s.getServer( "test" ) ).name } );
 
 assert.eq( 3 , s.config.chunks.find().itcount() , "A1" );
 
@@ -28,31 +28,31 @@ temp = s.config.chunks.find().sort( { min : 1 } ).toArray();
 assert.eq( temp[0].shard , temp[2].shard , "A2" );
 assert.neq( temp[0].shard , temp[1].shard , "A3" );
 
-temp = db.data.find().sort( { num : 1 } ).toArray();
+temp = db.data.find().sort( { 'sub.num' : 1 } ).toArray();
 assert.eq( N , temp.length , "B1" );
 for ( i=0; i<100; i++ ){
-    assert.eq( i , temp[i].num , "B2" )
+    assert.eq( i , temp[i].sub.num , "B2" )
 }
 
 
-db.data.find().sort( { num : 1 } ).toArray();
-s.getServer("test").getDB( "test" ).data.find().sort( { num : 1 } ).toArray();
+db.data.find().sort( { 'sub.num' : 1 } ).toArray();
+s.getServer("test").getDB( "test" ).data.find().sort( { 'sub.num' : 1 } ).toArray();
 
-a = Date.timeFunc( function(){ z = db.data.find().sort( { num : 1 } ).toArray(); } , 200 );
+a = Date.timeFunc( function(){ z = db.data.find().sort( { 'sub.num' : 1 } ).toArray(); } , 200 );
 assert.eq( 100 , z.length , "C1" )
-b = 1.5 * Date.timeFunc( function(){ z = s.getServer("test").getDB( "test" ).data.find().sort( { num : 1 } ).toArray(); } , 200 );
+b = 1.5 * Date.timeFunc( function(){ z = s.getServer("test").getDB( "test" ).data.find().sort( { 'sub.num' : 1 } ).toArray(); } , 200 );
 assert.eq( 67 , z.length , "C2" )
 
 print( "a: " + a + " b:" + b + " mongos slow down: " + Math.ceil( 100 * ( ( a - b ) / b ) ) + "%" )
 
 // -- secondary index sorting
 
-function getSorted( by , want , dir , proj ){
+function getSorted( by , dir , proj ){
     var s = {}
     s[by] = dir || 1;
     printjson( s )
     var cur = db.data.find( {} , proj || {} ).sort( s )
-    return terse( cur.map( function(z){ return z[want]; } ) );
+    return terse( cur.map( function(z){ return z.sub.num; } ) );
 }
 
 function terse( a ){
@@ -68,14 +68,22 @@ function terse( a ){
 forward = terse(forward);
 backward = terse(backward);
 
-assert.eq( forward , getSorted( "num" , "num" , 1 ) , "D1" )
-assert.eq( backward , getSorted( "num" , "num" , -1 ) , "D2" )
+assert.eq( forward , getSorted( "sub.num" , 1 ) , "D1" )
+assert.eq( backward , getSorted( "sub.num" , -1 ) , "D2" )
 
-assert.eq( backward , getSorted( "x" , "num" , 1 ) , "D3" )
-assert.eq( forward , getSorted( "x" , "num" , -1 ) , "D4" )
+assert.eq( backward , getSorted( "sub.x" , 1 ) , "D3" )
+assert.eq( forward , getSorted( "sub.x" , -1 ) , "D4" )
 
-assert.eq( backward , getSorted( "x" , "num" , 1 , { num : 1 } ) , "D5" )
-assert.eq( forward , getSorted( "x" , "num" , -1 , { num : 1 } ) , "D6" )
+assert.eq( backward , getSorted( "sub.x" , 1 , { 'sub.num' : 1 } ) , "D5" )
+assert.eq( forward , getSorted( "sub.x" , -1 , { 'sub.num' : 1 } ) , "D6" )
 
+assert.eq( backward , getSorted( "sub.x" , 1 , { 'sub' : 1 } ) , "D7" )
+assert.eq( forward , getSorted( "sub.x" , -1 , { 'sub' : 1 } ) , "D8" )
+
+assert.eq( backward , getSorted( "sub.x" , 1 , { '_id' : 0 } ) , "D9" )
+assert.eq( forward , getSorted( "sub.x" , -1 , { '_id' : 0 } ) , "D10" )
+
+assert.eq( backward , getSorted( "sub.x" , 1 , { '_id' : 0, 'sub.num':1 } ) , "D11" )
+assert.eq( forward , getSorted( "sub.x" , -1 , { '_id' : 0, 'sub.num':1 } ) , "D12" )
 
 s.stop();
diff --git a/jstests/sharding/splitpick.js b/jstests/sharding/splitpick.js
deleted file mode 100644
index 3733906..0000000
--- a/jstests/sharding/splitpick.js
+++ /dev/null
@@ -1,39 +0,0 @@
-// splitpick.js
-
-/**
-* tests picking the middle to split on
-*/
-
-s = new ShardingTest( "splitpick" , 2 );
-
-db = s.getDB( "test" );
-
-s.adminCommand( { enablesharding : "test" } );
-s.adminCommand( { shardcollection : "test.foo" , key : { a : 1 } } );
-
-c = db.foo;
-
-for ( var i=1; i<20; i++ ){
-    c.save( { a : i } );
-}
-c.save( { a : 99 } );
-db.getLastError();
-
-function checkSplit( f, want , num ){
-    x = s.admin.runCommand( { splitvalue : "test.foo" , find : { a : f } } );
-    assert.eq( want, x.middle ? x.middle.a : null , "splitvalue " + num + " " + tojson( x ) );
-}
-
-checkSplit( 1 , 1 , "1" )
-checkSplit( 3 , 1 , "2" )
-
-s.adminCommand( { split : "test.foo" , find : { a : 1 } } );
-checkSplit( 3 , 99 , "3" )
-s.adminCommand( { split : "test.foo" , find : { a : 99 } } );
-
-assert.eq( s.config.chunks.count() , 3 );
-s.printChunks();
-
-checkSplit( 50 , 10 , "4" )
-
-s.stop();
diff --git a/jstests/sharding/sync1.js b/jstests/sharding/sync1.js
index e649387..2c1a8f7 100644
--- a/jstests/sharding/sync1.js
+++ b/jstests/sharding/sync1.js
@@ -13,14 +13,23 @@ assert.eq( 2 , t.find().count() , "A4" );
 test.checkHashes( "test" , "A3" );
 
 test.tempKill();
-assert.throws( function(){ t.save( { x : 3 } ) } , "B1" )
+assert.throws( function(){ t.save( { x : 3 } ) } , null , "B1" )
 assert.eq( 2 , t.find().itcount() , "B2" );
 test.tempStart();
 test.checkHashes( "test" , "B3" );
 
-
 assert.eq( 2 , t.find().itcount() , "C1" );
-t.remove( { x : 1 } )
+assert.soon( function(){
+    try  {
+        t.remove( { x : 1 } )
+        return true;
+    }
+    catch ( e ){
+        print( e );
+    }
+    return false;
+} )
+t.find().forEach( printjson )
 assert.eq( 1 , t.find().itcount() , "C2" );
 
 test.stop();
diff --git a/jstests/sharding/update1.js b/jstests/sharding/update1.js
index 63d4bf6..6359050 100644
--- a/jstests/sharding/update1.js
+++ b/jstests/sharding/update1.js
@@ -42,5 +42,12 @@ assert.eq(err.code, 13123, 'key error code 2');
 coll.update({_id:1, key:1}, {$set: {foo:2}});
 assert.isnull(db.getLastError(), 'getLastError reset');
 
+coll.update( { key : 17 } , { $inc : { x : 5 } } , true  );
+assert.eq( 5 , coll.findOne( { key : 17 } ).x , "up1" )
+
+coll.update( { key : 18 } , { $inc : { x : 5 } } , true , true );
+assert.eq( 5 , coll.findOne( { key : 18 } ).x , "up2" )
+
+
 s.stop()
 
diff --git a/jstests/sharding/version1.js b/jstests/sharding/version1.js
index 0516aff..a16ead3 100644
--- a/jstests/sharding/version1.js
+++ b/jstests/sharding/version1.js
@@ -2,22 +2,46 @@
 
 s = new ShardingTest( "version1" , 1 , 2 )
 
+s.adminCommand( { enablesharding : "alleyinsider" } );
+s.adminCommand( { shardcollection : "alleyinsider.foo" , key : { num : 1 } } );
+
+// alleyinsider.foo is supposed to have one chunk, version 1|0, in shard000
+s.printShardingStatus();
+
 a = s._connections[0].getDB( "admin" );
 
 assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).ok == 0 );
+
 assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : "a" } ).ok == 0 );
+
 assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , authoritative : true } ).ok == 0 );
-assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 } ).ok == 0 , "should have failed b/c no auth" );
 
-assert.commandWorked( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 , authoritative : true } ) , "should have worked" );
-assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : "a" , version : 2 } ).ok == 0 );
+assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 } ).ok == 0 ,
+        "should have failed b/c no auth" );
+
+assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 , authoritative : true } ) ,
+        "should have failed because first setShardVersion needs shard info" );
+
+assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 , authoritative : true , 
+                         shard: "shard0000" , shardHost: "localhost:30000" } ) , 
+        "should have failed because version is config is 1|0" );
+
+assert.commandWorked( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , 
+                                      version : new NumberLong( 4294967296 ), // 1|0
+                                      authoritative : true , shard: "shard0000" , shardHost: "localhost:30000" } ) , 
+                     "should have worked" );
+
+assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : "a" , version : 2 } ).ok == 0 , "A" );
 
-assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 } ).ok == 1 );
-assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 1 } ).ok == 0 );
+assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 } ).ok == 0 , "B" );
+assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 1 } ).ok == 0 , "C" );
 
-assert.eq( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 3 } ).oldVersion.i , 2 , "oldVersion" );
+// the only way that setSharVersion passes is if the shard agrees with the version
+// the shard takes its version from config directly
+// TODO bump timestamps in config
+// assert.eq( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 3 } ).oldVersion.i , 2 , "oldVersion" );
 
-assert.eq( a.runCommand( { "getShardVersion" : "alleyinsider.foo" } ).mine.i , 3 , "my get version A" );
-assert.eq( a.runCommand( { "getShardVersion" : "alleyinsider.foo" } ).global.i , 3 , "my get version B" );
+// assert.eq( a.runCommand( { "getShardVersion" : "alleyinsider.foo" } ).mine.i , 3 , "my get version A" );
+// assert.eq( a.runCommand( { "getShardVersion" : "alleyinsider.foo" } ).global.i , 3 , "my get version B" );
 
 s.stop();
diff --git a/jstests/sharding/version2.js b/jstests/sharding/version2.js
index 9683c92..f502fd3 100644
--- a/jstests/sharding/version2.js
+++ b/jstests/sharding/version2.js
@@ -2,6 +2,10 @@
 
 s = new ShardingTest( "version2" , 1 , 2 )
 
+s.adminCommand( { enablesharding : "alleyinsider" } );
+s.adminCommand( { shardcollection : "alleyinsider.foo" , key : { num : 1 } } );
+s.adminCommand( { shardcollection : "alleyinsider.bar" , key : { num : 1 } } );
+
 a = s._connections[0].getDB( "admin" );
 
 // setup from one client
@@ -9,28 +13,41 @@ a = s._connections[0].getDB( "admin" );
 assert( a.runCommand( { "getShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).mine.i == 0 );
 assert( a.runCommand( { "getShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).global.i == 0 );
 
-assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 , authoritative : true } ).ok == 1 );
+assert( a.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , authoritative : true ,
+                        version : new NumberLong( 4294967296 ), // 1|0
+                        shard: "shard0000" , shardHost: "localhost:30000" } ).ok == 1 );
+
+printjson( s.config.chunks.findOne() );
 
-assert( a.runCommand( { "getShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).mine.i == 2 );
-assert( a.runCommand( { "getShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).global.i == 2 );
+assert( a.runCommand( { "getShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).mine.t == 1000 );
+assert( a.runCommand( { "getShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).global.t == 1000 );
 
 // from another client
 
 a2 = connect( s._connections[0].name + "/admin" );
 
-assert.eq( a2.runCommand( { "getShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).global.i , 2 , "a2 global 1" );
+assert.eq( a2.runCommand( { "getShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).global.t , 1000 , "a2 global 1" );
 assert.eq( a2.runCommand( { "getShardVersion" : "alleyinsider.foo" , configdb : s._configDB } ).mine.i , 0 , "a2 mine 1" );
 
 function simpleFindOne(){
     return a2.getMongo().getDB( "alleyinsider" ).foo.findOne();
 }
 
-assert.commandWorked( a2.runCommand( { "setShardVersion" : "alleyinsider.bar" , configdb : s._configDB , version : 2 , authoritative : true } ) , "setShardVersion bar temp");
+assert.commandWorked( a2.runCommand( { "setShardVersion" : "alleyinsider.bar" , configdb : s._configDB , version : new NumberLong( 4294967296 ) , authoritative : true } ) , "setShardVersion bar temp");
+
 assert.throws( simpleFindOne , [] , "should complain about not in sharded mode 1" );
-assert( a2.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 } ).ok == 1 , "setShardVersion a2-1");
-simpleFindOne(); // now should run ok
-assert( a2.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 3 } ).ok == 1 , "setShardVersion a2-2");
-simpleFindOne(); // newer version is ok
+
+
+// the only way that setSharVersion passes is if the shard agrees with the version
+// the shard takes its version from config directly
+// TODO bump timestamps in config
+// assert( a2.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 2 } ).ok == 1 , "setShardVersion a2-1");
+
+// simpleFindOne(); // now should run ok
+
+// assert( a2.runCommand( { "setShardVersion" : "alleyinsider.foo" , configdb : s._configDB , version : 3 } ).ok == 1 , "setShardVersion a2-2");
+
+// simpleFindOne(); // newer version is ok
 
 
 s.stop();
diff --git a/jstests/shellkillop.js b/jstests/shellkillop.js
index 1091458..580d4c8 100644
--- a/jstests/shellkillop.js
+++ b/jstests/shellkillop.js
@@ -1,18 +1,65 @@
-baseName = "jstests_shellkillop";
-
-db[ baseName ].drop();
-
-for( i = 0; i < 100000; ++i ) {
-    db[ baseName ].save( {i:1} );
-}
-assert.eq( 100000, db[ baseName ].count() );
-
-spawn = startMongoProgramNoConnect( "mongo", "--autokillop", "--port", myPort(), "--eval", "db." + baseName + ".update( {}, {$set:{i:\"abcdefghijkl\"}}, false, true ); db." + baseName + ".count();" );
-sleep( 100 );
-stopMongoProgramByPid( spawn );
-sleep( 100 );
-inprog = db.currentOp().inprog
-printjson( inprog );
-for( i in inprog ) {
-    assert( inprog[ i ].ns != "test." + baseName, "still running op" );
-}
+baseName = "jstests_shellkillop";
+
+// 'retry' should be set to true in contexts where an exception should cause the test to be retried rather than to fail.
+retry = false;
+
+function testShellAutokillop() {
+
+if (_isWindows()) {
+    print("shellkillop.js not testing on windows, as functionality is missing there");
+    print("shellkillop.js see http://jira.mongodb.org/browse/SERVER-1451");
+}
+else {
+    db[baseName].drop();
+
+    print("shellkillop.js insert data");
+    for (i = 0; i < 100000; ++i) {
+        db[baseName].insert({ i: 1 });
+    }
+    assert.eq(100000, db[baseName].count());
+
+    // mongo --autokillop suppressed the ctrl-c "do you want to kill current operation" message
+    // it's just for testing purposes and thus not in the shell help
+    var evalStr = "print('SKO subtask started'); db." + baseName + ".update( {}, {$set:{i:'abcdefghijkl'}}, false, true ); db." + baseName + ".count();";
+    print("shellkillop.js evalStr:" + evalStr);
+    spawn = startMongoProgramNoConnect("mongo", "--autokillop", "--port", myPort(), "--eval", evalStr);
+
+    sleep(100);
+    retry = true;
+    assert(db[baseName].find({ i: 'abcdefghijkl' }).count() < 100000, "update ran too fast, test won't be valid");
+    retry = false;
+
+    stopMongoProgramByPid(spawn);
+
+    sleep(100);
+
+    print("count abcdefghijkl:" + db[baseName].find({ i: 'abcdefghijkl' }).count());
+
+    var inprog = db.currentOp().inprog;
+    for (i in inprog) {
+	if (inprog[i].ns == "test." + baseName)
+	    throw "shellkillop.js op is still running: " + tojson( inprog[i] );
+    }
+
+    retry = true;
+    assert(db[baseName].find({ i: 'abcdefghijkl' }).count() < 100000, "update ran too fast, test was not valid");
+    retry = false;
+}
+
+}
+
+for( var nTries = 0; nTries < 10 && retry; ++nTries ) {
+    try {
+        testShellAutokillop();
+    } catch (e)  {
+        if ( !retry ) {
+            throw e;
+        }
+        printjson( e );
+        print( "retrying..." );
+    }
+}
+
+assert( !retry, "retried too many times" );
+
+print("shellkillop.js SUCCESS");
diff --git a/jstests/shellspawn.js b/jstests/shellspawn.js
index 7df3c04..6b713f8 100644
--- a/jstests/shellspawn.js
+++ b/jstests/shellspawn.js
@@ -1,3 +1,5 @@
+#!/usr/bin/mongod
+
 baseName = "jstests_shellspawn";
 t = db.getCollection( baseName );
 t.drop();
diff --git a/jstests/shellstartparallel.js b/jstests/shellstartparallel.js
new file mode 100644
index 0000000..5911029
--- /dev/null
+++ b/jstests/shellstartparallel.js
@@ -0,0 +1,17 @@
+function f() {
+    throw "intentional_throw_to_test_assert_throws";
+}
+assert.throws(f);
+
+// verify that join works
+db.sps.drop();
+join = startParallelShell("sleep(1000); db.sps.insert({x:1}); db.getLastError();");
+join();
+assert.eq(1, db.sps.count(), "join problem?");
+
+// test with a throw
+join = startParallelShell("db.sps.insert({x:1}); db.getLastError(); throw 'intentionally_uncaught';");
+join();
+assert.eq(2, db.sps.count(), "join2 problem?");
+
+print("shellstartparallel.js SUCCESS");
diff --git a/jstests/slowNightly/32bit.js b/jstests/slowNightly/32bit.js
new file mode 100755
index 0000000..d80cc78
--- /dev/null
+++ b/jstests/slowNightly/32bit.js
@@ -0,0 +1,125 @@
+// 32bit.js dm
+
+var forceSeedToBe = null;
+
+if (forceSeedToBe)
+    print("\n32bit.js WARNING FORCING A SPECIFIC SEED - TEST WILL RUN DURING DAY");
+
+function f() {
+    seed = forceSeedToBe || Math.random();
+    
+    pass = 1;
+
+    var mydb = db.getSisterDB( "test_32bit" );
+    mydb.dropDatabase();
+
+    while( 1 ) {
+        if( pass >= 2 ) 
+	    break;
+        print("32bit.js PASS #" + pass);
+        pass++;
+        
+        t = mydb.colltest_32bit;
+
+        print("seed=" + seed);
+        
+        t.insert({x:1});
+        t.ensureIndex({a:1});
+        t.ensureIndex({b:1}, true);
+        t.ensureIndex({x:1});
+        if( Math.random() < 0.3 )
+	    t.ensureIndex({c:1});
+        t.ensureIndex({d:1});
+        t.ensureIndex({e:1});
+        t.ensureIndex({f:1});
+        
+        big = 'a                          b';
+        big = big + big;
+        k = big;
+        big = big + big;
+        big = big + big;
+        big = big + big;
+        
+        a = 0;
+        c = 'kkk';
+        var start = new Date();
+        while( 1 ) { 
+	    b = Math.random(seed);
+	    d = c + -a;
+            f = Math.random(seed) + a;
+            a++;
+	    cc = big;
+            if( Math.random(seed) < .1 ) 
+		cc = null;
+	    t.insert({a:a,b:b,c:cc,d:d,f:f});
+	    if( Math.random(seed) < 0.01 ) { 
+
+	        if( mydb.getLastError() ) {
+		    /* presumably we have mmap error on 32 bit. try a few more manipulations attempting to break things */		
+		    t.insert({a:33,b:44,c:55,d:66,f:66});
+		    t.insert({a:33,b:44000,c:55,d:66});
+		    t.insert({a:33,b:440000,c:55});
+		    t.insert({a:33,b:4400000});
+		    t.update({a:20},{'$set':{c:'abc'}});
+		    t.update({a:21},{'$set':{c:'aadsfbc'}});
+		    t.update({a:22},{'$set':{c:'c'}});
+		    t.update({a:23},{'$set':{b:cc}});
+		    t.remove({a:22});
+		    break;
+	        }
+	        
+	        t.remove({a:a});
+	        t.remove({b:Math.random(seed)});
+	        t.insert({e:1});
+	        t.insert({f:'aaaaaaaaaa'});
+	        
+                if( Math.random() < 0.00001 ) { print("remove cc"); t.remove({c:cc}); }
+                if( Math.random() < 0.0001 ) { print("update cc"); t.update({c:cc},{'$set':{c:1}},false,true); }
+                if( Math.random() < 0.00001 ) { print("remove e"); t.remove({e:1}); }
+	    }
+	    if (a == 20000 ) {
+		var delta_ms = (new Date())-start;
+		// 2MM / 20000 = 100.  1000ms/sec.
+		var eta_secs = delta_ms * (100 / 1000);
+		print("32bit.js eta_secs:" + eta_secs);
+		if( eta_secs > 1000 ) {
+		    print("32bit.js machine is slow, stopping early. a:" + a);
+		    mydb.dropDatabase();
+		    return;
+		}
+	    }
+	    if( a % 100000 == 0 ) {
+	        print(a);
+	        // on 64 bit we won't error out, so artificially stop.  on 32 bit we will hit mmap limit ~1.6MM but may 
+	        // vary by a factor of 2x by platform
+	        if( a >= 2200000 ) {
+                    mydb.dropDatabase();
+		    return;
+		}
+            }
+        } 
+        print("count: " + t.count());
+
+        var res = t.validate();
+        if( !res.valid ) {
+            print("32bit.js FAIL validating");
+            print(res.result);
+            printjson(res);
+	        //mydb.dropDatabase();
+	        throw "fail validating 32bit.js";
+        }
+
+        mydb.dropDatabase();    
+    }
+
+    print("32bit.js SUCCESS");
+}
+
+if (!db._adminCommand("buildInfo").debug && !db.runCommand( { serverStatus : 1 , repl : 1 } ).repl ){
+    /* this test is slow, so don't run during the day */
+    print("\n32bit.js running - this test is slow so only runs at night.");
+    f();
+}
+else {
+    print("32bit.js skipping this test - debug server build would be too slow");
+}
diff --git a/jstests/slowNightly/btreedel.js b/jstests/slowNightly/btreedel.js
new file mode 100644
index 0000000..824eb3e
--- /dev/null
+++ b/jstests/slowNightly/btreedel.js
@@ -0,0 +1,43 @@
+// btreedel.js
+
+t = db.foo;
+t.remove({});
+
+for (var i = 0; i < 1000000; i++) {
+    t.insert({ _id: i, x: 'a                                                                               b' });
+}
+
+print("1 insert done count: " + t.count());
+
+var c = t.find({y:null}).sort({ _id: 1 });
+for (var j = 0; j < 400000; j++) {
+    c.next();
+    if (j % 200000 == 0)
+        printjson(c.next());
+}
+printjson(c.next());
+
+var d = t.find({ _id: { $gt: 300000} }).sort({ _id: -1 });
+d.next();
+
+print("2");
+
+t.remove({ _id: { $gt: 200000, $lt: 600000} });
+
+print("3");
+print(d.hasNext());
+
+n = 0;
+last = {};
+printjson(c.next());
+while (c.hasNext()) {
+    n++;
+    last = c.next();
+}
+
+print("4. n:" + n);
+printjson(last);
+
+assert(n > 100000);
+
+print("btreedel.js success");
diff --git a/jstests/slowNightly/capped4.js b/jstests/slowNightly/capped4.js
index 01af8f2..27d138c 100644
--- a/jstests/slowNightly/capped4.js
+++ b/jstests/slowNightly/capped4.js
@@ -31,4 +31,4 @@ assert( t.validate().valid, "G" );
 
 db._adminCommand("closeAllDatabases");
 
-//assert( db.serverStatus().cursors.totalOpen == 0, "cursors open and shouldn't be");
+assert( db.serverStatus().cursors.totalOpen == 0, "cursors open and shouldn't be");
diff --git a/jstests/slowNightly/command_line_parsing.js b/jstests/slowNightly/command_line_parsing.js
new file mode 100644
index 0000000..38c7324
--- /dev/null
+++ b/jstests/slowNightly/command_line_parsing.js
@@ -0,0 +1,9 @@
+// validate command line parameter parsing
+
+port = allocatePorts( 1 )[ 0 ];
+var baseName = "jstests_slowNightly_command_line_parsing";
+
+// test notablescan
+var m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName, "--notablescan" );
+m.getDB( baseName ).getCollection( baseName ).save( {a:1} );
+assert.throws( function() { m.getDB( baseName ).getCollection( baseName ).find( {a:1} ).toArray() } );
diff --git a/jstests/slowNightly/dur_big_atomic_update.js b/jstests/slowNightly/dur_big_atomic_update.js
new file mode 100644
index 0000000..ffb0d83
--- /dev/null
+++ b/jstests/slowNightly/dur_big_atomic_update.js
@@ -0,0 +1,31 @@
+// @file dur_big_atomic_update.js
+//
+// this tests writing 1GB in an atomic update to make sure we commit periodically
+
+var path = "/data/db/dur_big_atomic_update";
+
+conn = startMongodEmpty("--port", 30001, "--dbpath", path, "--dur", "--durOptions", 8);
+d = conn.getDB("test");
+d.foo.drop();
+
+for (var i=0; i<1024; i++){
+    d.foo.insert({_id:i});
+}
+
+big_string = 'x';
+while (big_string.length < 1024*1024) {
+    big_string += big_string;
+}
+
+d.foo.update({$atomic:1}, {$set: {big_string: big_string}}, false, /*multi*/true);
+err = d.getLastErrorObj();
+
+assert(err.err == null);
+assert(err.n == 1024);
+
+// free up space
+d.dropDatabase();
+
+stopMongod(30001);
+
+print("dur big atomic update SUCCESS");
diff --git a/jstests/slowNightly/dur_passthrough.js b/jstests/slowNightly/dur_passthrough.js
new file mode 100644
index 0000000..22482e0
--- /dev/null
+++ b/jstests/slowNightly/dur_passthrough.js
@@ -0,0 +1,89 @@
+// runs the toplevel jstests with --dur
+//
+// TODO(mathias) use paranoid mode (--durOptions 8) once we are reasonably sure it will pass
+
+// DEBUG : set this variable to debug by skipping to a specific test to start with and go from there
+//var skippingTo = /null.js/;
+var skippingTo = false;
+
+conn = startMongodEmpty("--port", 30100, "--dbpath", "/data/db/dur_passthrough", "--dur", "--smallfiles");
+db = conn.getDB("test");
+
+function durPassThrough() {
+
+    var runnerStart = new Date()
+
+    var ran = {};
+
+    /** run a test. won't run more than once. logs if fails and then throws.
+    */
+    function runTest(x) {
+        function _run(x) {
+            if (/[\/\\]_/.test(x.name) ||
+                    !/\.js$/.test(x.name) ||
+                    /repair/.test(x.name) ||
+//		/numberlong/.test(x.name) ||
+                    false // placeholder so all real tests end in ||
+                ) {
+                print("dur_passthrough.js >>>> skipping " + x.name);
+                return;
+            }
+            print();
+            print("dur_passthrough.js run " + x.name);
+            print("dur_passthrough.js end " + x.name + ' ' + Date.timeFunc(function () { load(x.name); }, 1) + "ms");
+            print();
+        }
+        if (ran[x.name])
+            return;
+        ran[x.name] = true;
+        try {
+            _run(x);
+        }
+        catch (e) {
+            print("\n\n\n\ndur_passthrough.js FAIL " + x.name + "\n\n\n");
+            throw e;
+        }
+    }
+
+    var files = listFiles("jstests");
+
+    if( !skippingTo ) {
+	    // run something that will almost surely pass and is fast just to make sure our framework 
+	    // here is really working
+	    runTest({ name: 'jstests/basic1.js' });
+
+	    // run "suspicious" tests early.  these are tests that have ever failed in buildbot.  we run them 
+	    // early and try to get a fail fast
+	    runTest({ name: 'jstests/shellstartparallel.js' });
+	    runTest({ name: 'jstests/cursora.js' });
+
+	    // run the shell-oriented tests early. if the shell is broken the other tests aren't meaningful
+	    runTest({ name: 'jstests/run_program1.js' });
+	    runTest({ name: 'jstests/shellspawn.js' });
+	    runTest({ name: 'jstests/shellkillop.js' });
+    }
+
+    files = files.sort(compareOn('name'));
+    files.forEach(
+        function (x) {
+            if (skippingTo && !skippingTo.test(x.name)) {
+                print("dur_passthrough.js temp skip " + x.name);
+                return;
+            }
+            skippingTo = false;
+
+            // to keep memory usage low on 32 bit:
+            db.adminCommand("closeAllDatabases");
+
+            runTest(x);
+        }
+    );
+
+    print("dur_passthrough.js stopMongod");
+    stopMongod(30100);
+    var runnerEnd = new Date();
+    print("dur_passthrough.js total runner time: " + ((runnerEnd.getTime() - runnerStart.getTime()) / 1000) + "secs")
+}
+
+durPassThrough();
+print("dur_passthrough.js SUCCESS");
diff --git a/jstests/slowNightly/dur_remove_old_journals.js b/jstests/slowNightly/dur_remove_old_journals.js
new file mode 100644
index 0000000..3c57c12
--- /dev/null
+++ b/jstests/slowNightly/dur_remove_old_journals.js
@@ -0,0 +1,53 @@
+// this test makes sure that old journal files are removed
+
+// tunables
+STRING_SIZE = 1024*1024;
+NUM_TO_INSERT = 2.5*1024;
+PATH = "/data/db/dur_remove_old_journals";
+SYNC_DELAY = 5; // must be a number
+
+conn = startMongodEmpty("--port", 30001, "--dbpath", PATH, "--dur", "--smallfiles", "--syncdelay", ''+SYNC_DELAY);
+db = conn.getDB("test");
+
+longString = 'x';
+while (longString.length < STRING_SIZE)
+    longString += longString;
+
+numInserted = 0;
+while (numInserted < NUM_TO_INSERT){
+    db.foo.insert({_id: numInserted++, s:longString});
+
+
+    if (numInserted % 100 == 0){
+        print("numInserted: " + numInserted);
+        db.adminCommand({fsync:1});
+        db.foo.remove();
+        db.adminCommand({fsync:1});
+    }
+}
+
+sleepSecs = SYNC_DELAY + 15 // long enough for data file flushing and journal keep time
+print("\nWaiting " + sleepSecs + " seconds...\n");
+sleep(sleepSecs*1000);
+
+
+files = listFiles(PATH + "/journal")
+printjson(files);
+
+var nfiles = 0;
+files.forEach(function (file) {
+    assert.eq('string', typeof (file.name));    // sanity checking
+    if (/prealloc/.test(file.name)) {
+        ;
+    }
+    else {
+        nfiles++;
+        assert(!(/j\._[01]/.test(file.name)), "Old journal file still exists: " + file.name);
+    }
+})
+
+assert.eq(2, nfiles); // j._2 and lsn
+
+stopMongod(30001);
+
+print("*** success ***");
diff --git a/jstests/slowNightly/geo_near_random1.js b/jstests/slowNightly/geo_near_random1.js
new file mode 100644
index 0000000..ad67bdc
--- /dev/null
+++ b/jstests/slowNightly/geo_near_random1.js
@@ -0,0 +1,13 @@
+// this tests all points using $near
+load("jstests/libs/geo_near_random.js");
+
+var test = new GeoNearRandomTest("nightly.geo_near_random1");
+
+test.insertPts(200);
+
+test.testPt([0,0]);
+test.testPt(test.mkPt());
+test.testPt(test.mkPt());
+test.testPt(test.mkPt());
+test.testPt(test.mkPt());
+
diff --git a/jstests/slowNightly/geo_near_random2.js b/jstests/slowNightly/geo_near_random2.js
new file mode 100644
index 0000000..d7dbc97
--- /dev/null
+++ b/jstests/slowNightly/geo_near_random2.js
@@ -0,0 +1,21 @@
+// this tests 1% of all points using $near and $nearSphere
+load("jstests/libs/geo_near_random.js");
+
+var test = new GeoNearRandomTest("nightly.geo_near_random2");
+
+test.insertPts(10000);
+
+opts = {sphere:0, nToTest:test.nPts*0.01}; 
+test.testPt([0,0], opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+
+opts.sphere = 1
+test.testPt([0,0], opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+
diff --git a/jstests/slowNightly/index_check9.js b/jstests/slowNightly/index_check9.js
new file mode 100644
index 0000000..6634d06
--- /dev/null
+++ b/jstests/slowNightly/index_check9.js
@@ -0,0 +1,118 @@
+Random.setRandomSeed();
+
+t = db.test_index_check9;
+
+function doIt() {
+
+t.drop();
+
+function sort() {
+    var sort = {};
+    for( var i = 0; i < n; ++i ) {
+        sort[ fields[ i ] ] = Random.rand() > 0.5 ? 1 : -1;
+    }    
+    return sort;
+}
+
+var fields = [ 'a', 'b', 'c', 'd', 'e' ];
+n = Random.randInt( 5 ) + 1;
+var idx = sort();
+
+var chars = "abcdefghijklmnopqrstuvwxyz";
+var alphas = []
+for( var i = 0; i < n; ++i ) {
+    alphas.push( Random.rand() > 0.5 );
+}
+    
+t.ensureIndex( idx );
+
+function obj() {
+    var ret = {};
+    for( var i = 0; i < n; ++i ) {
+        ret[ fields[ i ] ] = r( alphas[ i ] );
+    }
+    return ret;
+}
+
+function r( alpha ) {
+    if ( !alpha ) {
+        return Random.randInt( 10 );
+    } else {
+        var len = Random.randInt( 10 );
+        buf = "";
+        for( var i = 0; i < len; ++i ) {
+            buf += chars.charAt( Random.randInt( chars.length ) );
+        }
+        return buf;
+    }
+}
+
+function check() {
+    var v = t.validate();
+    if ( !t.valid ) {
+        printjson( t );
+        assert( t.valid );
+    }
+    var spec = {};
+    for( var i = 0; i < n; ++i ) {
+        if ( Random.rand() > 0.5 ) {
+            var bounds = [ r( alphas[ i ] ), r( alphas[ i ] ) ];
+            if ( bounds[ 0 ] > bounds[ 1 ] ) {
+                bounds.reverse();
+            }
+	    var s = {};
+	    if ( Random.rand() > 0.5 ) {
+		s[ "$gte" ] = bounds[ 0 ];
+	    } else {
+		s[ "$gt" ] = bounds[ 0 ];
+	    }
+	    if ( Random.rand() > 0.5 ) {
+		s[ "$lte" ] = bounds[ 1 ];
+	    } else {
+		s[ "$lt" ] = bounds[ 1 ];
+	    }
+            spec[ fields[ i ] ] = s;
+        } else {
+            var vals = []
+            for( var j = 0; j < Random.randInt( 15 ); ++j ) {
+                vals.push( r( alphas[ i ] ) );
+            }
+            spec[ fields[ i ] ] = { $in: vals };
+        }
+    }
+    s = sort();
+    c1 = t.find( spec, { _id:null } ).sort( s ).hint( idx ).toArray();
+    c2 = t.find( spec ).sort( s ).explain().nscanned;
+    c3 = t.find( spec, { _id:null } ).sort( s ).hint( {$natural:1} ).toArray();
+    //    assert.eq( c1, c3, "spec: " + tojson( spec ) + ", sort: " + tojson( s ) );
+    //    assert.eq( c1.length, c2 );
+    assert.eq( c1, c3 );
+}
+
+for( var i = 0; i < 10000; ++i ) {
+    t.save( obj() );
+    if( Random.rand() > 0.999 ) {
+        print( i );
+        check();
+    }
+}
+
+for( var i = 0; i < 100000; ++i ) {
+    if ( Random.rand() > 0.9 ) {
+        t.save( obj() );
+    } else {
+        t.remove( obj() ); // improve
+    }
+    if( Random.rand() > 0.999 ) {
+        print( i );
+        check();
+    }
+}
+
+check();
+
+}
+
+for( var z = 0; z < 5; ++z ) {
+    doIt();
+}
\ No newline at end of file
diff --git a/jstests/slowNightly/large_chunk.js b/jstests/slowNightly/large_chunk.js
new file mode 100644
index 0000000..6cf40e3
--- /dev/null
+++ b/jstests/slowNightly/large_chunk.js
@@ -0,0 +1,51 @@
+// Where we test operations dealing with large chunks
+
+// Starts a new sharding environment limiting the chunksize to 2GB.
+// Note that early splitting will start with a 1/4 of max size currently.  
+s = new ShardingTest( "large_chunk" , 2 , 2 , 1 , { chunksize : 2000 } );
+
+// take the balancer out of the equation
+s.config.settings.update( { _id: "balancer" }, { $set : { stopped: true } } , true );
+s.config.settings.find().forEach( printjson )
+db = s.getDB( "test" );
+
+//
+// Step 1 - Test moving a large chunk
+//
+
+// Turn on sharding on the 'test.foo' collection and generate a large chunk
+s.adminCommand( { enablesharding : "test" } );
+s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } );
+
+bigString = ""
+while ( bigString.length < 10000 )
+    bigString += "asdasdasdasdadasdasdasdasdasdasdasdasda";
+
+inserted = 0;
+num = 0;
+while ( inserted < ( 400 * 1024 * 1024 ) ){
+    db.foo.insert( { _id : num++ , s : bigString } );
+    inserted += bigString.length;
+}
+db.getLastError();
+assert.eq( 1 , s.config.chunks.count() , "step 1 - need one large chunk" );
+
+primary = s.getServer( "test" ).getDB( "test" );
+secondary = s.getOther( primary ).getDB( "test" );
+
+// Make sure that we don't move that chunk if it goes past what we consider the maximum chunk size
+print("Checkpoint 1a")
+max = 200 * 1024 * 1024;
+moveChunkCmd = { movechunk : "test.foo" , find : { _id : 1 } , to : secondary.getMongo().name , maxChunkSizeBytes : max };
+assert.throws( function() { s.adminCommand( moveChunkCmd ); } );
+
+// Move the chunk
+print("checkpoint 1b");
+before = s.config.chunks.find().toArray();
+s.adminCommand( { movechunk : "test.foo" , find : { _id : 1 } , to : secondary.getMongo().name } );
+after = s.config.chunks.find().toArray();
+assert.neq( before[0].shard , after[0].shard , "move chunk did not work" );
+
+s.config.changelog.find().forEach( printjson )
+
+s.stop();
\ No newline at end of file
diff --git a/jstests/slowNightly/moveprimary-replset.js b/jstests/slowNightly/moveprimary-replset.js
new file mode 100755
index 0000000..0b6a78b
--- /dev/null
+++ b/jstests/slowNightly/moveprimary-replset.js
@@ -0,0 +1,67 @@
+// Move db between replica set shards -Tony
+
+load('jstests/libs/grid.js')
+
+function go() {
+
+var N = 10000
+
+// Create replica set of one server
+var repset1 = new ReplicaSet('repset1', 1) .begin()
+var conn1a = repset1.getMaster()
+var db1a = conn1a.getDB('test')
+
+// Add data to it
+for (var i = 1; i <= N; i++) db1a['foo'].insert({x: i})
+
+// Add another server to replica set
+var conn1b = repset1.addServer()
+conn1b.setSlaveOk()
+var db1b = conn1b.getDB('test')
+
+// Check that new server received replicated data
+assert (db1b['foo'].count() == N, 'data did not replicate')
+
+// Create sharding config servers
+var configset = new ConfigSet(3)
+configset.begin()
+
+// Create sharding router (mongos)
+var router = new Router(configset)
+var routerConn = router.begin()
+var db = routerConn.getDB('test')
+
+// Add repset1 as only shard
+addShard (routerConn, repset1.getURL())
+
+// Add data via router and check it
+db['foo'].update({}, {$set: {y: 'hello'}}, false, true)
+assert (db['foo'].count({y: 'hello'}) == N,
+    'updating and counting docs via router (mongos) failed')
+
+// Create another replica set
+var repset2 = new ReplicaSet('repset2', 2) .begin()
+var conn2a = repset2.getMaster()
+
+// Add repset2 as second shard
+addShard (routerConn, repset2.getURL())
+
+routerConn.getDB('admin').printShardingStatus()
+printjson (conn2a.getDBs())
+
+// Move test db from repset1 to repset2
+moveDB (routerConn, 'test', repset2.getURL())
+
+routerConn.getDB('admin').printShardingStatus()
+printjson (conn2a.getDBs())
+
+//Done
+router.end()
+configset.end()
+repset2.stopSet()
+repset1.stopSet()
+
+print('moveprimary-replset.js SUCCESS')
+}
+
+go()
diff --git a/jstests/slowNightly/newcollection2.js b/jstests/slowNightly/newcollection2.js
new file mode 100644
index 0000000..6bf2495
--- /dev/null
+++ b/jstests/slowNightly/newcollection2.js
@@ -0,0 +1,11 @@
+// Alocate collection forcing just a small size remainder in 2nd extent
+
+port = allocatePorts( 1 )[ 0 ]
+var baseName = "jstests_disk_newcollection2";
+var m = startMongod( "--noprealloc", "--smallfiles", "--port", port, "--dbpath", "/data/db/" + baseName );
+db = m.getDB( "test" );
+
+db.createCollection( baseName, {size:0x1FFC0000-0x10-8192} );
+var v = db[ baseName ].validate();
+printjson( v );
+assert( v.valid );
diff --git a/jstests/slowNightly/run_sharding_passthrough.js b/jstests/slowNightly/run_sharding_passthrough.js
deleted file mode 100644
index fda982b..0000000
--- a/jstests/slowNightly/run_sharding_passthrough.js
+++ /dev/null
@@ -1,94 +0,0 @@
-s = new ShardingTest( "auto1" , 2 , 1 , 1 );
-s.adminCommand( { enablesharding : "test" } );
-db=s.getDB("test");
-
-var files = listFiles("jstests");
-
-var runnerStart = new Date()
-
-files.forEach(
-    function(x) {
-        
-// /(basic|update).*\.js$/
-        if ( /[\/\\]_/.test(x.name) ||
-             ! /\.js$/.test(x.name ) ){ 
-            print(" >>>>>>>>>>>>>>> skipping " + x.name);
-            return;
-        }
-
-	// Notes:
-
-	// apply_ops1: nothing works, dunno why yet. SERVER-1439
-
-	// copydb, copydb2: copyDatabase seems not to work at all in
-	//                  the ShardingTest setup.  SERVER-1440
-
-	// cursor8: cursorInfo different/meaningless(?) in mongos 
-	//          closeAllDatabases may not work through mongos
-	//          SERVER-1441
-	//          deal with cursorInfo in mongos SERVER-1442
-
-	// dbcase: Database names are case-insensitive under ShardingTest?
-	//         SERVER-1443
-
-	// These are all SERVER-1444
-	// count5: limit() and maybe skip() may be unreliable
-	// geo3: limit() not working, I think
-	// or4: skip() not working?
-
-	// shellkillop: dunno yet.  SERVER-1445
-
-	// These should simply not be run under sharding:
-	// dbadmin: Uncertain  Cut-n-pasting its contents into mongo worked.
-	// error1: getpreverror not supported under sharding 
-	// fsync, fsync2: isn't supported through mongos
-	// remove5: getpreverror, I think. don't run
-	// update4: getpreverror don't run
-
-	// Around July 20, command passthrough went away, and these
-	// commands weren't implemented:
-	// clean cloneCollectionAsCapped copydbgetnonce dataSize
-	// datasize dbstats deleteIndexes dropIndexes forceerror
-	// getnonce logout medianKey profile reIndex repairDatabase
-	// reseterror splitVector validate
-
-     /* missing commands :
-     * forceerror and switchtoclienterrors
-     * cloneCollectionAsCapped
-     * splitvector
-     * profile (apitest_db, cursor6, evalb)
-     * copydbgetnonce
-     * dbhash
-     * medianKey
-     * clean (apitest_dbcollection)
-     * logout and getnonce
-     */
-	if (/[\/\\](error3|capped.*|splitvector|apitest_db|cursor6|copydb-auth|profile1|dbhash|median|apitest_dbcollection|evalb|auth1|auth2)\.js$/.test(x.name)) {
-	    print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name)	    
-	    return;
-	}
-	// These are bugs (some might be fixed now):
-	if (/[\/\\](apply_ops1|count5|cursor8|or4|shellkillop|update4)\.js$/.test(x.name)) {
-	    print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name)	    
-	    return;
-	}
-	// These aren't supposed to get run under sharding:
-	if (/[\/\\](dbadmin|error1|fsync|fsync2|geo.*|indexh|remove5|update4)\.js$/.test(x.name)) {
-	    print(" >>>>>>>>>>>>>>> skipping test that would fail under sharding " + x.name)	    
-	    return;
-	}
-        
-        print(" *******************************************");
-        print("         Test : " + x.name + " ...");
-        print("                " + Date.timeFunc(
-		  function() {
-		      load(x.name);
-		  }, 1) + "ms");
-        
-    }
-);
-
-
-var runnerEnd = new Date()
-
-print( "total runner time: " + ( ( runnerEnd.getTime() - runnerStart.getTime() ) / 1000 ) + "secs" )
diff --git a/jstests/slowNightly/sharding_balance1.js b/jstests/slowNightly/sharding_balance1.js
index 840aaff..9379c4f 100644
--- a/jstests/slowNightly/sharding_balance1.js
+++ b/jstests/slowNightly/sharding_balance1.js
@@ -1,7 +1,7 @@
 // sharding_balance1.js
 
 
-s = new ShardingTest( "slow_sharding_balance1" , 2 , 2 , 1 , { chunksize : 1 } )
+s = new ShardingTest( "slow_sharding_balance1" , 2 , 1 , 1 , { chunksize : 1 } )
 
 s.adminCommand( { enablesharding : "test" } );
 
diff --git a/jstests/slowNightly/sharding_balance2.js b/jstests/slowNightly/sharding_balance2.js
index c94e256..3296ff6 100644
--- a/jstests/slowNightly/sharding_balance2.js
+++ b/jstests/slowNightly/sharding_balance2.js
@@ -1,6 +1,6 @@
 // sharding_balance2.js
 
-s = new ShardingTest( "slow_sharding_balance2" , 2 , 2 , 1 , { chunksize : 1 , manualAddShard : true } )
+s = new ShardingTest( "slow_sharding_balance2" , 2 , 1 , 1 , { chunksize : 1 , manualAddShard : true } )
 
 names = s.getConnNames();
 for ( var i=0; i<names.length; i++ ){
diff --git a/jstests/slowNightly/sharding_balance3.js b/jstests/slowNightly/sharding_balance3.js
index faec197..b0db05f 100644
--- a/jstests/slowNightly/sharding_balance3.js
+++ b/jstests/slowNightly/sharding_balance3.js
@@ -1,6 +1,8 @@
 // sharding_balance3.js
 
-s = new ShardingTest( "slow_sharding_balance3" , 2 , 2 , 1 , { chunksize : 1 } );
+// simple test to make sure things get balanced 
+
+s = new ShardingTest( "slow_sharding_balance3" , 2 , 3 , 1 , { chunksize : 1 } );
 
 s.adminCommand( { enablesharding : "test" } );
 
diff --git a/jstests/slowNightly/sharding_balance4.js b/jstests/slowNightly/sharding_balance4.js
index c54d3da..4cbbba6 100644
--- a/jstests/slowNightly/sharding_balance4.js
+++ b/jstests/slowNightly/sharding_balance4.js
@@ -1,6 +1,8 @@
 // sharding_balance4.js
 
-s = new ShardingTest( "slow_sharding_balance4" , 2 , 2 , 1 , { chunksize : 1 } )
+// check that doing updates done during a migrate all go to the right place
+
+s = new ShardingTest( "slow_sharding_balance4" , 2 , 1 , 1 , { chunksize : 1 } )
 
 s.adminCommand( { enablesharding : "test" } );
 s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } );
@@ -36,9 +38,8 @@ for ( i=0; i<N*10; i++ ){
 }
 db.getLastError();
 
-s.printChunks( "test.foo" )
-
-for ( var i=0; i<10; i++ ){
+for ( var i=0; i<50; i++ ){
+    s.printChunks( "test.foo" )
     if ( check( "initial:" + i , true ) )
         break;
     sleep( 5000 )
@@ -48,19 +49,6 @@ check( "initial at end" )
 
 assert.lt( 20 , s.config.chunks.count()  , "setup2" );
 
-function dist(){
-    var x = {}
-    s.config.chunks.find( { ns : "test.foo" } ).forEach(
-        function(z){
-            if ( x[z.shard] )
-                x[z.shard]++
-            else
-                x[z.shard] = 1;
-        }
-    );
-    return x;
-}
-
 function check( msg , dontAssert ){
     for ( var x in counts ){
         var e = counts[x];
@@ -69,9 +57,15 @@ function check( msg , dontAssert ){
         if ( z && z.x == e )
             continue;
         
-        if ( dontAssert )
+        if ( dontAssert ){
+            if ( z )
+                delete z.s;
+            print( "not asserting for key failure: " + x + " want: " + e + " got: " + tojson(z) )
             return false;
+        }
 
+        // we will assert past this point but wait a bit to see if it is because the missing update
+        // was being held in the writeback roundtrip
         sleep( 10000 );
         
         var y = db.foo.findOne( { _id : parseInt( x ) } )
@@ -79,6 +73,8 @@ function check( msg , dontAssert ){
         if ( y ){
             delete y.s;
         }
+
+        s.printChunks( "test.foo" )
         
         assert( z , "couldn't find : " + x + " y:" + tojson(y) + " e: " + e + " " + msg )
         assert.eq( e , z.x , "count for : " + x + " y:" + tojson(y) + " " + msg )
@@ -90,22 +86,27 @@ function check( msg , dontAssert ){
 function diff(){
     var myid = doUpdate( false )
     var le = db.getLastErrorCmd();
+
     if ( le.err )
         print( "ELIOT ELIOT : " + tojson( le ) + "\t" + myid );
 
+    assert( le.updatedExisting , "GLE diff 1: " + tojson(le) )
+    assert.eq( 1 , le.n , "GLE diff 2: " + tojson(le) )
+
+
     if ( Math.random() > .99 ){
         db.getLastError()
-        check(); // SERVER-1430  TODO
+        check( "random late check" ); // SERVER-1430 
     }
 
-    var x = dist();
+    var x = s.chunkCounts( "foo" )
     if ( Math.random() > .999 )
         printjson( x )
     return Math.max( x.shard0000 , x.shard0001 ) - Math.min( x.shard0000 , x.shard0001 );
 }
 
 function sum(){
-    var x = dist();
+    var x = s.chunkCounts( "foo" )
     return x.shard0000 + x.shard0001;
 }
 
diff --git a/jstests/slowNightly/sharding_balance_randomorder1.js b/jstests/slowNightly/sharding_balance_randomorder1.js
new file mode 100644
index 0000000..05eabc6
--- /dev/null
+++ b/jstests/slowNightly/sharding_balance_randomorder1.js
@@ -0,0 +1,54 @@
+// sharding_balance1.js
+
+s = new ShardingTest( "sharding_balance_randomorder1" , 2 , 2 , 1 , { chunksize : 1 } )
+
+s.adminCommand( { enablesharding : "test" } );
+
+s.config.settings.find().forEach( printjson )
+
+db = s.getDB( "test" );
+
+bigString = ""
+while ( bigString.length < 10000 )
+    bigString += "asdasdasdasdadasdasdasdasdasdasdasdasda";
+
+inserted = 0;
+num = 0;
+while ( inserted < ( 20 * 1024 * 1024 ) ){
+    db.foo.insert( { _id : Math.random() , s : bigString } );
+    inserted += bigString.length;
+}
+
+db.getLastError();
+s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } );
+assert.lt( 20 , s.config.chunks.count()  , "setup2" );
+
+function diff(){
+    var x = s.chunkCounts( "foo" );
+    printjson( x )
+    return Math.max( x.shard0000 , x.shard0001 ) - Math.min( x.shard0000 , x.shard0001 );
+}
+
+function sum(){
+    var x = s.chunkCounts( "foo" );
+    return x.shard0000 + x.shard0001;
+}
+
+assert.lt( 20 , diff() , "big differential here" );
+print( diff() )
+
+assert.soon( function(){
+    var d = diff();
+    return d < 5;
+} , "balance didn't happen" , 1000 * 60 * 3 , 5000 );
+    
+var chunkCount = sum();
+s.adminCommand( { removeshard: "shard0000" } );
+
+assert.soon( function(){
+    printjson(s.chunkCounts( "foo" ));
+    s.config.shards.find().forEach(function(z){printjson(z);});
+    return chunkCount == s.config.chunks.count({shard: "shard0001"});
+} , "removeshard didn't happen" , 1000 * 60 * 3 , 5000 );
+
+s.stop();
diff --git a/jstests/slowNightly/sharding_cursors1.js b/jstests/slowNightly/sharding_cursors1.js
index 307e8d7..de59b0d 100644
--- a/jstests/slowNightly/sharding_cursors1.js
+++ b/jstests/slowNightly/sharding_cursors1.js
@@ -1,4 +1,4 @@
-s = new ShardingTest( "cursors1" , 2 , 0 , 1 , { chunksize : 1 } )
+s = new ShardingTest( "sharding_cursors1" , 2 , 0 , 1 , { chunksize : 1 } )
 
 s.adminCommand( { enablesharding : "test" } );
 
@@ -17,6 +17,10 @@ toInsert = ( 1 * 1000 * 1000 );
 for (var i=0; i < toInsert; i++ ){
     db.foo.insert( { i: i, r: Math.random(), s: bigString } );
     assert.eq(db.getLastError(), null, 'no error'); //SERVER-1541
+
+    if ( i % 1000 == 999 ) {
+        print( "already inserted " + ( i + 1 ) );
+    }
 }
 
 inserted = toInsert;
diff --git a/jstests/slowNightly/sharding_multiple_collections.js b/jstests/slowNightly/sharding_multiple_collections.js
new file mode 100644
index 0000000..61d9911
--- /dev/null
+++ b/jstests/slowNightly/sharding_multiple_collections.js
@@ -0,0 +1,53 @@
+// multcollections.js
+
+s = new ShardingTest( "multcollections" , 2 , 1 , 1 , { chunksize : 1 }  );
+
+s.adminCommand( { enablesharding : "test" } );
+
+db = s.getDB( "test" )
+
+N = 100000
+
+S = ""
+while ( S.length < 500 )
+    S += "123123312312";
+
+for ( i=0; i<N; i++ ){
+    db.foo.insert( { _id : i , s : S } )
+    db.bar.insert( { _id : i , s : S , s2 : S } )
+    db.getLastError()
+}
+
+db.printShardingStatus()
+
+function mytest( coll , i , loopNumber ){
+    x = coll.find( { _id : i } ).explain();
+    if ( x )
+        return;
+    throw "can't find " + i + " in " + coll.getName() + " on loopNumber: " + loopNumber +  " explain: " + tojson( x );
+}
+
+loopNumber = 0
+while ( 1 ){
+    for ( i=0; i<N; i++ ){
+        mytest( db.foo , i , loopNumber );
+        mytest( db.bar , i , loopNumber );
+        if ( i % 1000 == 0 )
+            print( i )
+    }
+    db.printShardingStatus()
+    loopNumber++;
+
+    if ( loopNumber == 1 ){
+        s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } );
+        s.adminCommand( { shardcollection : "test.bar" , key : { _id : 1 } } );
+    }
+        
+    assert( loopNumber < 1000 , "taking too long" );
+
+    if ( s.chunkDiff( "foo" ) < 12 && s.chunkDiff( "bar" ) < 12 )
+        break
+}
+
+s.stop()
+
diff --git a/jstests/slowNightly/sharding_passthrough.js b/jstests/slowNightly/sharding_passthrough.js
new file mode 100644
index 0000000..81781ca
--- /dev/null
+++ b/jstests/slowNightly/sharding_passthrough.js
@@ -0,0 +1,94 @@
+s = new ShardingTest( "sharding_passthrough" , 2 , 1 , 1 );
+s.adminCommand( { enablesharding : "test" } );
+db=s.getDB("test");
+
+var files = listFiles("jstests");
+
+var runnerStart = new Date()
+
+files.forEach(
+    function(x) {
+        
+// /(basic|update).*\.js$/
+        if ( /[\/\\]_/.test(x.name) ||
+             ! /\.js$/.test(x.name ) ){ 
+            print(" >>>>>>>>>>>>>>> skipping " + x.name);
+            return;
+        }
+
+	// Notes:
+
+	// apply_ops1: nothing works, dunno why yet. SERVER-1439
+
+	// copydb, copydb2: copyDatabase seems not to work at all in
+	//                  the ShardingTest setup.  SERVER-1440
+
+	// cursor8: cursorInfo different/meaningless(?) in mongos 
+	//          closeAllDatabases may not work through mongos
+	//          SERVER-1441
+	//          deal with cursorInfo in mongos SERVER-1442
+
+	// dbcase: Database names are case-insensitive under ShardingTest?
+	//         SERVER-1443
+
+	// These are all SERVER-1444
+	// count5: limit() and maybe skip() may be unreliable
+	// geo3: limit() not working, I think
+	// or4: skip() not working?
+
+	// shellkillop: dunno yet.  SERVER-1445
+
+	// These should simply not be run under sharding:
+	// dbadmin: Uncertain  Cut-n-pasting its contents into mongo worked.
+	// error1: getpreverror not supported under sharding 
+	// fsync, fsync2: isn't supported through mongos
+	// remove5: getpreverror, I think. don't run
+	// update4: getpreverror don't run
+
+	// Around July 20, command passthrough went away, and these
+	// commands weren't implemented:
+	// clean cloneCollectionAsCapped copydbgetnonce dataSize
+	// datasize dbstats deleteIndexes dropIndexes forceerror
+	// getnonce logout medianKey profile reIndex repairDatabase
+	// reseterror splitVector validate
+
+     /* missing commands :
+     * forceerror and switchtoclienterrors
+     * cloneCollectionAsCapped
+     * splitvector
+     * profile (apitest_db, cursor6, evalb)
+     * copydbgetnonce
+     * dbhash
+     * medianKey
+     * clean (apitest_dbcollection)
+     * logout and getnonce
+     */
+	if (/[\/\\](error3|capped.*|splitvector|apitest_db|cursor6|copydb-auth|profile1|dbhash|median|apitest_dbcollection|evalb|evald|eval_nolock|auth1|auth2|unix_socket\d*)\.js$/.test(x.name)) {
+	    print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name)	    
+	    return;
+	}
+	// These are bugs (some might be fixed now):
+	if (/[\/\\](apply_ops1|count5|cursor8|or4|shellkillop|update4)\.js$/.test(x.name)) {
+	    print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name)	    
+	    return;
+	}
+	// These aren't supposed to get run under sharding:
+	if (/[\/\\](dbadmin|error1|fsync|fsync2|geo.*|indexh|remove5|update4|notablescan|check_shard_index|mr_replaceIntoDB)\.js$/.test(x.name)) {
+	    print(" >>>>>>>>>>>>>>> skipping test that would fail under sharding " + x.name)	    
+	    return;
+	}
+        
+        print(" *******************************************");
+        print("         Test : " + x.name + " ...");
+        print("                " + Date.timeFunc(
+		  function() {
+		      load(x.name);
+		  }, 1) + "ms");
+        
+    }
+);
+
+
+var runnerEnd = new Date()
+
+print( "total runner time: " + ( ( runnerEnd.getTime() - runnerStart.getTime() ) / 1000 ) + "secs" )
diff --git a/jstests/slowNightly/sharding_rs1.js b/jstests/slowNightly/sharding_rs1.js
index b7d90ba..4ad126e 100644
--- a/jstests/slowNightly/sharding_rs1.js
+++ b/jstests/slowNightly/sharding_rs1.js
@@ -43,10 +43,19 @@ function diff(){
 assert.lt( 20 , diff() , "big differential here" );
 print( diff() )
 
+{
+    // quick test for SERVER-2686
+    var mydbs = db.getMongo().getDBs().databases;
+    for ( var i=0; i<mydbs.length; i++ ) {
+        assert( mydbs[i].name != "local" , "mongos listDatabases can't return local" );
+    }
+}
+
+
 assert.soon( function(){
     var d = diff();
     return d < 5;
-} , "balance didn't happen" , 1000 * 60 * 3 , 5000 );
+} , "balance didn't happen" , 1000 * 60 * 6 , 5000 );
 
 s.config.settings.update( { _id: "balancer" }, { $set : { stopped: true } } , true );
 
@@ -56,7 +65,7 @@ for ( i=0; i<s._rs.length; i++ ){
     x = r.test.getHashes( "test" );
     print( r.url + "\t" + tojson( x ) )
     for ( j=0; j<x.slaves.length; j++ )
-        assert.eq( x.master.md5 , x.slaves[j].md5 , "hashes same for: " + r.url + " slave: " + j );
+        assert.eq( x.master.md5 , x.slaves[j].md5 , "hashes not same for: " + r.url + " slave: " + j );
 }
 
 
diff --git a/jstests/slowNightly/sharding_rs2.js b/jstests/slowNightly/sharding_rs2.js
new file mode 100644
index 0000000..cd7cf68
--- /dev/null
+++ b/jstests/slowNightly/sharding_rs2.js
@@ -0,0 +1,163 @@
+// mostly for testing mongos w/replica sets
+
+
+s = new ShardingTest( "rs2" , 2 , 1 , 1 , { rs : true , chunksize : 1 } )
+
+db = s.getDB( "test" )
+t = db.foo
+
+// -------------------------------------------------------------------------------------------
+// ---------- test that config server updates when replica set config changes ----------------
+// -------------------------------------------------------------------------------------------
+
+
+db.foo.save( { _id : 5 ,x : 17 } )
+assert.eq( 1 , db.foo.count() );
+
+s.config.databases.find().forEach( printjson )
+s.config.shards.find().forEach( printjson )
+
+serverName = s.getServerName( "test" ) 
+
+function countNodes(){
+    var x = s.config.shards.findOne( { _id : serverName } );
+    return x.host.split( "," ).length
+}
+
+assert.eq( 3 , countNodes() , "A1" )
+
+rs = s.getRSEntry( serverName );
+rs.test.add()
+try {
+    rs.test.reInitiate();
+}
+catch ( e ){
+    // this os ok as rs's may close connections on a change of master
+    print( e );
+}
+
+assert.soon( 
+    function(){
+        try {
+            printjson( rs.test.getMaster().getDB("admin").runCommand( "isMaster" ) )
+            s.config.shards.find().forEach( printjsononeline );
+            return countNodes() == 4;
+        }
+        catch ( e ){
+            print( e );
+        }
+    } , "waiting for config server to update" , 180 * 1000 , 1000 );
+
+// cleanup after adding node
+for ( i=0; i<5; i++ ){
+    try {
+        db.foo.findOne();
+    }
+    catch ( e ){}
+}
+
+// -------------------------------------------------------------------------------------------
+// ---------- test routing to slaves ----------------
+// -------------------------------------------------------------------------------------------
+
+// --- not sharded ----
+
+m = new Mongo( s.s.name );
+ts = m.getDB( "test" ).foo
+
+before = rs.test.getMaster().adminCommand( "serverStatus" ).opcounters
+
+for ( i=0; i<10; i++ )
+    assert.eq( 17 , ts.findOne().x , "B1" )
+
+m.setSlaveOk()
+for ( i=0; i<10; i++ )
+    assert.eq( 17 , ts.findOne().x , "B2" )
+
+after = rs.test.getMaster().adminCommand( "serverStatus" ).opcounters
+
+printjson( before )
+printjson( after )
+
+assert.eq( before.query + 10 , after.query , "B3" )
+
+// --- add more data ----
+
+db.foo.ensureIndex( { x : 1 } )
+
+for ( i=0; i<100; i++ ){
+    if ( i == 17 ) continue;
+    db.foo.insert( { x : i } )
+}
+db.getLastError( 3 , 10000 );
+
+assert.eq( 100 , ts.count() , "B4" )
+assert.eq( 100 , ts.find().itcount() , "B5" )
+assert.eq( 100 , ts.find().batchSize(5).itcount() , "B6" )
+
+t.find().batchSize(3).next();
+gc(); gc(); gc();
+
+// --- sharded ----
+
+assert.eq( 100 , db.foo.count() , "C1" )
+
+s.adminCommand( { enablesharding : "test" } );
+s.adminCommand( { shardcollection : "test.foo" , key : { x : 1 } } );
+
+assert.eq( 100 , t.count() , "C2" )
+s.adminCommand( { split : "test.foo" , middle : { x : 50 } } )
+
+db.printShardingStatus()
+
+other = s.config.shards.findOne( { _id : { $ne : serverName } } );
+s.adminCommand( { moveChunk : "test.foo" , find : { x : 10 } , to : other._id } )
+assert.eq( 100 , t.count() , "C3" )
+
+assert.eq( 50 , rs.test.getMaster().getDB( "test" ).foo.count() , "C4" )
+
+// by non-shard key
+
+m = new Mongo( s.s.name );
+ts = m.getDB( "test" ).foo
+
+before = rs.test.getMaster().adminCommand( "serverStatus" ).opcounters
+
+for ( i=0; i<10; i++ )
+    assert.eq( 17 , ts.findOne( { _id : 5 } ).x , "D1" )
+
+m.setSlaveOk()
+for ( i=0; i<10; i++ )
+    assert.eq( 17 , ts.findOne( { _id : 5 } ).x , "D2" )
+
+after = rs.test.getMaster().adminCommand( "serverStatus" ).opcounters
+
+assert.eq( before.query + 10 , after.query , "D3" )
+
+// by shard key
+
+m = new Mongo( s.s.name );
+ts = m.getDB( "test" ).foo
+
+before = rs.test.getMaster().adminCommand( "serverStatus" ).opcounters
+
+for ( i=0; i<10; i++ )
+    assert.eq( 57 , ts.findOne( { x : 57 } ).x , "E1" )
+
+m.setSlaveOk()
+for ( i=0; i<10; i++ )
+    assert.eq( 57 , ts.findOne( { x : 57 } ).x , "E2" )
+
+after = rs.test.getMaster().adminCommand( "serverStatus" ).opcounters
+
+assert.eq( before.query + 10 , after.query , "E3" )
+
+assert.eq( 100 , ts.count() , "E4" )
+assert.eq( 100 , ts.find().itcount() , "E5" )
+printjson( ts.find().batchSize(5).explain() )
+assert.eq( 100 , ts.find().batchSize(5).itcount() , "E6" )
+
+printjson( db.adminCommand( "getShardMap" ) );
+
+
+s.stop()
diff --git a/jstests/slowNightly/unix_socket1.js b/jstests/slowNightly/unix_socket1.js
new file mode 100644
index 0000000..e651659
--- /dev/null
+++ b/jstests/slowNightly/unix_socket1.js
@@ -0,0 +1,26 @@
+if ( ! _isWindows() ) {
+    hoststring = db.getMongo().host
+    index = hoststring.lastIndexOf(':')
+    if (index == -1){
+        port = '27017'
+    } else {
+        port = hoststring.substr(index + 1)
+    }
+
+    sock = new Mongo('/tmp/mongodb-' + port + '.sock')
+    sockdb = sock.getDB(db.getName())
+    assert( sockdb.runCommand('ping').ok );
+
+    // test unix socket path
+    var ports = allocatePorts(1);
+    var path = "/data/db/sockpath";
+    
+    var conn = new MongodRunner(ports[0], path, null, null, ["--unixSocketPrefix", path]);
+    conn.start();
+    
+    var sock2 = new Mongo(path+"/mongodb-"+ports[0]+".sock");
+    sockdb2 = sock2.getDB(db.getName())
+    assert( sockdb2.runCommand('ping').ok );
+} else {
+    print("Not testing unix sockets on Windows");
+}
diff --git a/jstests/slowWeekly/conc_update.js b/jstests/slowWeekly/conc_update.js
index 6094136..d460a0d 100644
--- a/jstests/slowWeekly/conc_update.js
+++ b/jstests/slowWeekly/conc_update.js
@@ -16,36 +16,39 @@ print("making an index (this will take a while)")
 db.conc.ensureIndex({x:1})
 
 var c1=db.conc.count({x:{$lt:NRECORDS}})
-// this is just a flag that the child will toggle when it's done.
-db.concflag.update({}, {inprog:true}, true)
 
 updater=startParallelShell("db=db.getSisterDB('concurrency');\
+                           db.concflag.insert( {inprog:true} );\
+                           sleep(20);\
 			   db.conc.update({}, {$inc:{x: "+NRECORDS+"}}, false, true);\
 			   e=db.getLastError();\
 			   print('update error: '+ e);\
 			   db.concflag.update({},{inprog:false});\
-			   assert.eq(e, null, \"update failed\");");
+			   assert.eq(e, null, 'update failed');");
+
+assert.soon( function(){ var x = db.concflag.findOne(); return x && x.inprog; } , 
+             "wait for fork" , 30000 , 1 );
 
 querycount=0;
 decrements=0;
 misses=0
-while (1) {
-    if (db.concflag.findOne().inprog) {
+
+assert.soon( 
+    function(){
 	c2=db.conc.count({x:{$lt:NRECORDS}})
-	e=db.getLastError()
  	print(c2)
-	print(e)
-	assert.eq(e, null, "some count() failed")
 	querycount++;
 	if (c2<c1)
 	    decrements++;
 	else
 	    misses++;
-	c1 = c2;
-    } else
-	break;
-    sleep(10);
-}
+	c1 = c2;        
+        return ! db.concflag.findOne().inprog;
+    } , 
+    "update never finished" , 3600 * 1000 , 10 );
+
 print(querycount + " queries, " + decrements + " decrements, " + misses + " misses");
 
+assert.eq( NRECORDS , db.conc.count() , "AT END 1" )
+
 updater() // wait()
diff --git a/jstests/slowWeekly/disk_reuse1.js b/jstests/slowWeekly/disk_reuse1.js
new file mode 100644
index 0000000..4e504c0
--- /dev/null
+++ b/jstests/slowWeekly/disk_reuse1.js
@@ -0,0 +1,41 @@
+
+t = db.disk_reuse1;
+t.drop();
+
+N = 10000;
+
+function k(){
+    return Math.floor( Math.random() * N );
+}
+
+s = "";
+while ( s.length < 1024 )
+    s += "abc";
+
+state = {}
+
+for ( i=0; i<N; i++ )
+    t.insert( { _id : i , s : s } );
+
+orig = t.stats();
+
+t.remove();
+
+for ( i=0; i<N; i++ )
+    t.insert( { _id : i , s : s } );
+
+assert.eq( orig.storageSize , t.stats().storageSize , "A" )
+
+for ( j=0; j<100; j++ ){
+    for ( i=0; i<N; i++ ){
+        var r = Math.random();
+        if ( r > .5 )
+            t.remove( { _id : i } )
+        else
+            t.insert( { _id : i , s : s } )
+    }
+
+    //printjson( t.stats() );    
+
+    assert.eq( orig.storageSize , t.stats().storageSize , "B" + j  )
+}
diff --git a/jstests/slowWeekly/dur_passthrough.js b/jstests/slowWeekly/dur_passthrough.js
new file mode 100644
index 0000000..1840fb7
--- /dev/null
+++ b/jstests/slowWeekly/dur_passthrough.js
@@ -0,0 +1,44 @@
+//
+// simple runner to run toplevel tests in jstests
+//
+
+//TODO(mathias) add --master or make another test
+//conn = startMongodEmpty("--port", 30200, "--dbpath", "/data/db/dur_passthrough", "--dur", "--smallfiles", "--durOptions", "24");
+conn = startMongodEmpty("--port", 30200, "--dbpath", "/data/db/dur_passthrough", "--dur", "--smallfiles", "--durOptions", "8");
+db = conn.getDB("test");
+
+var files = listFiles("jstests");
+files = files.sort(compareOn('name'));
+
+var runnerStart = new Date()
+
+files.forEach(
+    function (x) {
+
+        if (/[\/\\]_/.test(x.name) ||
+             !/\.js$/.test(x.name) ||
+             /repair/.test(x.name) || // fails on recovery
+             /shellkillop/.test(x.name) || // takes forever and don't test anything new
+             false // placeholder so all real tests end in ||
+           )
+        {
+            print(" >>>>>>>>>>>>>>> skipping " + x.name);
+            return;
+        }
+
+        print();
+        print(" *******************************************");
+        print("         Test : " + x.name + " ...");
+        print("                " + Date.timeFunc(function () { load(x.name); }, 1) + "ms");
+
+    }
+);
+
+stopMongod(30200);
+
+var runnerEnd = new Date()
+
+print( "total runner time: " + ( ( runnerEnd.getTime() - runnerStart.getTime() ) / 1000 ) + "secs" )
+
+//TODO(mathias): test recovery here
+
diff --git a/jstests/slowWeekly/geo_near_random1.js b/jstests/slowWeekly/geo_near_random1.js
new file mode 100644
index 0000000..5ddfd26
--- /dev/null
+++ b/jstests/slowWeekly/geo_near_random1.js
@@ -0,0 +1,13 @@
+// this tests all points using $near
+load("jstests/libs/geo_near_random.js");
+
+var test = new GeoNearRandomTest("weekly.geo_near_random1");
+
+test.insertPts(1000);
+
+test.testPt([0,0]);
+test.testPt(test.mkPt());
+test.testPt(test.mkPt());
+test.testPt(test.mkPt());
+test.testPt(test.mkPt());
+
diff --git a/jstests/slowWeekly/geo_near_random2.js b/jstests/slowWeekly/geo_near_random2.js
new file mode 100644
index 0000000..9e93657
--- /dev/null
+++ b/jstests/slowWeekly/geo_near_random2.js
@@ -0,0 +1,21 @@
+// this tests 1% of all points using $near and $nearSphere
+load("jstests/libs/geo_near_random.js");
+
+var test = new GeoNearRandomTest("weekly.geo_near_random2");
+
+test.insertPts(50000);
+
+opts = {sphere:0, nToTest:test.nPts*0.01}; 
+test.testPt([0,0], opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+test.testPt(test.mkPt(), opts);
+
+opts.sphere = 1
+test.testPt([0,0], opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+test.testPt(test.mkPt(0.8), opts);
+
diff --git a/jstests/slowWeekly/indexbg_dur.js b/jstests/slowWeekly/indexbg_dur.js
new file mode 100644
index 0000000..5fbe0e7
--- /dev/null
+++ b/jstests/slowWeekly/indexbg_dur.js
@@ -0,0 +1,67 @@
+/**
+ * Kill mongod during a background index build and ensure that the bad index
+ * can be dropped on restart.
+ */
+
+function countFields( x ) {
+    var count = 0;
+    for( var i in x ) {
+        ++count;
+    }
+    return count;
+}
+
+size = 100000;
+while( 1 ) {
+    print( "size: " + size );
+
+    var testname = "index_build";
+    var path = "/data/db/" + testname+"_dur";
+    conn = startMongodEmpty("--port", 30001, "--dbpath", path, "--dur", "--smallfiles", "--durOptions", 8);
+    t = conn.getDB( testname ).getCollection( testname );
+
+    for( var i = 0; i < size; ++i ) {
+        t.save( {i:i} );
+    }
+    t.getDB().getLastError();
+    x = startMongoProgramNoConnect( "mongo", "--eval", "db.getSisterDB( '" + testname + "' )." + testname + ".ensureIndex( {i:1}, {background:true} );", conn.host );
+    sleep( 1000 );
+    stopMongod( 30001, /* signal */ 9 );
+    waitProgram( x );
+    
+    conn = startMongodNoReset("--port", 30001, "--dbpath", path, "--dur", "--smallfiles", "--durOptions", 8);
+    t = conn.getDB( testname ).getCollection( testname );
+    
+    var statsSize = countFields( t.stats().indexSizes );
+    var nsSize = conn.getDB( testname ).system.indexes.count( {ns:testname+'.'+testname} );
+    
+    // If index build completed before the kill, try again with more data.
+    if ( !( statsSize == 1 && nsSize == 2 ) ) {
+        print( "statsSize: " + statsSize + ", nsSize: " + nsSize + ", retrying with more data" );
+        stopMongod( 30001 );
+        size *= 2;
+        continue;
+    }
+    
+    assert.eq( "index not found", t.dropIndex( "i_1" ).errmsg );
+    
+    var statsSize = countFields( t.stats().indexSizes );
+    var nsSize = conn.getDB( testname ).system.indexes.count( {ns:testname+'.'+testname} );
+
+    assert.eq( statsSize, nsSize );
+    assert( t.validate().valid );
+    // TODO check that index namespace is cleaned up as well once that is implemented
+    
+    t.ensureIndex( {i:1} );
+    var statsSize = countFields( t.stats().indexSizes );
+    var nsSize = conn.getDB( testname ).system.indexes.count( {ns:testname+'.'+testname} );
+
+    assert.eq( 2, statsSize );
+    assert.eq( 2, nsSize );
+    
+    exp = t.find( {i:20} ).explain();
+    assert.eq( 1, exp.n );
+    assert.eq( 'BtreeCursor i_1', exp.cursor );
+    
+    break;
+}   
diff --git a/jstests/slowWeekly/query_yield1.js b/jstests/slowWeekly/query_yield1.js
index e996b53..1a95b87 100644
--- a/jstests/slowWeekly/query_yield1.js
+++ b/jstests/slowWeekly/query_yield1.js
@@ -2,10 +2,10 @@
 t = db.query_yield1;
 t.drop()
 
-N = 10000;
+N = 20000;
 i = 0;
 
-q = function(){ var x=this.n; for ( var i=0; i<500; i++ ){ x = x * 2; } return false; }
+q = function(){ var x=this.n; for ( var i=0; i<250; i++ ){ x = x * 2; } return false; }
 
 while ( true ){
     function fill(){
@@ -59,7 +59,7 @@ while ( ( (new Date()).getTime() - start ) < ( time * 2 ) ){
         assert.eq( 1 , x.inprog.length , "nothing in prog" );
     }
     
-    assert.gt( 50 , me );
+    assert.gt( 200 , me , "took too long for me to run" );
 
     if ( x.inprog.length == 0 )
         break;
diff --git a/jstests/slowWeekly/query_yield2.js b/jstests/slowWeekly/query_yield2.js
index e13fabe..dd7e5d9 100644
--- a/jstests/slowWeekly/query_yield2.js
+++ b/jstests/slowWeekly/query_yield2.js
@@ -2,10 +2,10 @@
 t = db.query_yield2;
 t.drop()
 
-N = 100;
+N = 200;
 i = 0;
 
-q = function(){ var x=this.n; for ( var i=0; i<50000; i++ ){ x = x * 2; } return false; }
+q = function(){ var x=this.n; for ( var i=0; i<25000; i++ ){ x = x * 2; } return false; }
 
 while ( true ){
     function fill(){
@@ -59,7 +59,7 @@ while ( ( (new Date()).getTime() - start ) < ( time * 2 ) ){
         assert.eq( 1 , x.inprog.length , "nothing in prog" );
     }
     
-    assert.gt( 75 , me );
+    assert.gt( 100 , me );
 
     if ( x.inprog.length == 0 )
         break;
diff --git a/jstests/slowWeekly/update_yield1.js b/jstests/slowWeekly/update_yield1.js
index 2e63690..7e95855 100644
--- a/jstests/slowWeekly/update_yield1.js
+++ b/jstests/slowWeekly/update_yield1.js
@@ -27,7 +27,7 @@ while ( true ){
     timeUpdate();
     time = timeUpdate();
     print( N + "\t" + time );
-    if ( time > 2000 )
+    if ( time > 8000 )
         break;
     
     N *= 2;
@@ -47,13 +47,14 @@ num = 0;
 start = new Date();
 while ( ( (new Date()).getTime() - start ) < ( time * 2 ) ){
     var me = Date.timeFunc( function(){ t.findOne(); } );
+    if (me > 50) print("time: " + me);
     
     if ( num++ == 0 ){
         var x = db.currentOp()
         assert.eq( 1 , x.inprog.length , "nothing in prog" );
     }
 
-    assert.gt( 50 , me );
+    assert.gt( 2000 , me );
 }
 
 join();
@@ -65,14 +66,16 @@ assert.eq( 0 , x.inprog.length , "weird 2" );
 
 join = startParallelShell( "db.update_yield1.update( { $atomic : true } , { $inc : { n : 1 } } , false , true ); db.getLastError()" );
 
-assert.soon( 
-    function(){
-        return db.currentOp().inprog.length > 0;
-    } , "never doing update 2"
-);
+sleep(1000); // wait for shell startup ops to finish
+
+var x = db.currentOp();
+printjson(x);
+assert.eq(1, x.inprog.length, "never doing update 2");
+assert.eq("update", x.inprog[0].op);
+
+t.findOne(); // should wait for update to finish
 
-t.findOne();
 var x = db.currentOp()
-assert.eq( 0 , x.inprog.length , "should have been atomic" );
+assert.eq( [] , x.inprog , "should have been atomic" );
 
 join();
diff --git a/jstests/sort2.js b/jstests/sort2.js
index facd64c..1e21414 100644
--- a/jstests/sort2.js
+++ b/jstests/sort2.js
@@ -1,6 +1,6 @@
 // test sorting, mainly a test ver simple with no index
 
-t = db.sorrrt2;
+t = db.sort2;
 t.drop();
 
 t.save({x:1, y:{a:5,b:4}});
diff --git a/jstests/splitvector.js b/jstests/splitvector.js
index 8d86319..da93486 100644
--- a/jstests/splitvector.js
+++ b/jstests/splitvector.js
@@ -11,7 +11,7 @@
 //        e.g. 20000
 // @param maxChunkSize is in MBs.
 //
-assertChunkSizes = function ( splitVec , numDocs , maxChunkSize ){
+assertChunkSizes = function ( splitVec , numDocs , maxChunkSize , msg ){
     splitVec = [{ x: -1 }].concat( splitVec );
     splitVec.push( { x: numDocs+1 } );
     for ( i=0; i<splitVec.length-1; i++) { 
@@ -22,9 +22,9 @@ assertChunkSizes = function ( splitVec , numDocs , maxChunkSize ){
         // It is okay for the last chunk to be  smaller. A collection's size does not
         // need to be exactly a multiple of maxChunkSize.
         if ( i < splitVec.length - 2 )
-            assert.close( maxChunkSize , size , "A"+i ,  -3 );
+            assert.close( maxChunkSize , size , "A"+i , -3 );
         else
-            assert.gt( maxChunkSize, size, "A"+i );
+            assert.gt( maxChunkSize , size , "A"+i , msg + "b" );
     }
 }
 
@@ -37,27 +37,27 @@ f = db.jstests_splitvector;
 f.drop();
 
 // -------------------------
-// Case: missing paramters 
+// Case 1: missing parameters 
 
-assert.eq( false, db.runCommand( { splitVector: "test.jstests_splitvector" } ).ok );
-assert.eq( false, db.runCommand( { splitVector: "test.jstests_splitvector" , maxChunkSize: 1} ).ok );
+assert.eq( false, db.runCommand( { splitVector: "test.jstests_splitvector" } ).ok , "1a" );
+assert.eq( false, db.runCommand( { splitVector: "test.jstests_splitvector" , maxChunkSize: 1} ).ok , "1b" );
 
 
 // -------------------------
-// Case: missing index
+// Case 2: missing index
 
-assert.eq( false, db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 } ).ok );
+assert.eq( false, db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 } ).ok , "2");
 
 
 // -------------------------
-// Case: empty collection
+// Case 3: empty collection
 
 f.ensureIndex( { x: 1} );
-assert.eq( [], db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 } ).splitKeys );
+assert.eq( [], db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 } ).splitKeys , "3");
 
 
 // -------------------------
-// Case: uniform collection
+// Case 4: uniform collection
 
 f.drop();
 f.ensureIndex( { x: 1 } );
@@ -67,15 +67,129 @@ filler = "";
 while( filler.length < 500 ) filler += "a";
 f.save( { x: 0, y: filler } );
 docSize = db.runCommand( { datasize: "test.jstests_splitvector" } ).size;
-assert.gt( docSize, 500 );
+assert.gt( docSize, 500 , "4a" );
 
 // Fill collection and get split vector for 1MB maxChunkSize
 numDocs = 20000;
 for( i=1; i<numDocs; i++ ){
     f.save( { x: i, y: filler } );
 }
+db.getLastError();
 res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 } );
 
-assert.eq( true , res.ok );
-assert.close( numDocs*docSize / (1<<20) , res.splitKeys.length , "num split keys" , -1 );
-assertChunkSizes( res.splitKeys , numDocs, (1<<20) * 0.9 ); // splitVector cuts at 90% of maxChunkSize
+// splitVector aims at getting half-full chunks after split
+factor = 0.5; 
+
+assert.eq( true , res.ok , "4b" );
+assert.close( numDocs*docSize / ((1<<20) * factor), res.splitKeys.length , "num split keys" , -1 );
+assertChunkSizes( res.splitKeys , numDocs, (1<<20) * factor , "4d" );
+
+
+// -------------------------
+// Case 5: limit number of split points
+
+f.drop();
+f.ensureIndex( { x: 1 } );
+
+// Fill collection and get split vector for 1MB maxChunkSize
+numDocs = 10000;
+for( i=1; i<numDocs; i++ ){
+    f.save( { x: i, y: filler } );
+}
+db.getLastError();
+res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 , maxSplitPoints: 1} );
+
+assert.eq( true , res.ok , "5a" );
+assert.eq( 1 , res.splitKeys.length , "5b" );
+
+
+// -------------------------
+// Case 6: limit number of objects in a chunk
+
+f.drop();
+f.ensureIndex( { x: 1 } );
+
+// Fill collection and get split vector for 1MB maxChunkSize
+numDocs = 10000;
+for( i=1; i<numDocs; i++ ){
+    f.save( { x: i, y: filler } );
+}
+db.getLastError();
+res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 , maxChunkObjects: 500} );
+
+assert.eq( true , res.ok , "6a" );
+assert.eq( 19 , res.splitKeys.length , "6b" );
+
+
+// -------------------------
+// Case 7: enough occurances of min key documents to pass the chunk limit
+// [1111111111111111,2,3)
+
+f.drop();
+f.ensureIndex( { x: 1 } );
+
+// Fill collection and get split vector for 1MB maxChunkSize
+numDocs = 2100;
+for( i=1; i<numDocs; i++ ){
+    f.save( { x: 1, y: filler } );
+}
+
+for( i=1; i<10; i++ ){
+    f.save( { x: 2, y: filler } );
+}
+db.getLastError();
+res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 } );
+
+assert.eq( true , res.ok , "7a" );
+assert.eq( 2 , res.splitKeys[0].x, "7b");
+
+
+// -------------------------
+// Case 8: few occurrances of min key, and enough of some other that we cannot split it
+// [1, 22222222222222, 3)
+
+f.drop();
+f.ensureIndex( { x: 1 } );
+
+for( i=1; i<10; i++ ){
+    f.save( { x: 1, y: filler } );
+}
+
+numDocs = 2100;
+for( i=1; i<numDocs; i++ ){
+    f.save( { x: 2, y: filler } );
+}
+
+for( i=1; i<10; i++ ){
+    f.save( { x: 3, y: filler } );
+}
+
+db.getLastError();
+res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 } );
+
+assert.eq( true , res.ok , "8a" );
+assert.eq( 2 , res.splitKeys.length , "8b" );
+assert.eq( 2 , res.splitKeys[0].x , "8c" );
+assert.eq( 3 , res.splitKeys[1].x , "8d" );
+
+
+// -------------------------
+// Case 9: splitVector "force" mode, where we split (possible small) chunks in the middle
+//
+
+f.drop();
+f.ensureIndex( { x: 1 } );
+
+f.save( { x: 1 } );
+f.save( { x: 2 } );
+f.save( { x: 3 } );
+db.getLastError();
+
+res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , force : true } );
+
+assert.eq( true , res.ok , "9a" );
+assert.eq( 1 , res.splitKeys.length , "9b" );
+assert.eq( 2 , res.splitKeys[0].x , "9c" );
+
+
+print("PASSED");
diff --git a/jstests/tempCleanup.js b/jstests/tempCleanup.js
deleted file mode 100644
index 0a8a909..0000000
--- a/jstests/tempCleanup.js
+++ /dev/null
@@ -1,16 +0,0 @@
-
-mydb = db.getSisterDB( "temp_cleanup_test" )
-
-t = mydb.tempCleanup
-t.drop()
-
-t.insert( { x : 1 } )
-
-res = t.mapReduce( function(){ emit(1,1); } , function(){ return 1; } );
-printjson( res );
-
-assert.eq( 1 , t.count() , "A1" )
-assert.eq( 1 , mydb[res.result].count() , "A2" )
-
-mydb.dropDatabase()
-
diff --git a/jstests/temp_cleanup.js b/jstests/temp_cleanup.js
new file mode 100644
index 0000000..e827083
--- /dev/null
+++ b/jstests/temp_cleanup.js
@@ -0,0 +1,16 @@
+
+mydb = db.getSisterDB( "temp_cleanup_test" )
+
+t = mydb.tempCleanup
+t.drop()
+
+t.insert( { x : 1 } )
+
+res = t.mapReduce( function(){ emit(1,1); } , function(){ return 1; } , "xyz" );
+printjson( res );
+
+assert.eq( 1 , t.count() , "A1" )
+assert.eq( 1 , mydb[res.result].count() , "A2" )
+
+mydb.dropDatabase()
+
diff --git a/jstests/tool/dumprestore2.js b/jstests/tool/dumprestore2.js
index 86e65ae..31822e5 100644
--- a/jstests/tool/dumprestore2.js
+++ b/jstests/tool/dumprestore2.js
@@ -8,6 +8,9 @@ c.save( { a : 22 } );
 assert.eq( 1 , c.count() , "setup2" );
 t.stop();
 
+// SERVER-2501 on Windows the mongod may still be running at this point, so we wait for it to stop.
+sleep( 5000 );
+
 t.runTool( "dump" , "--dbpath" , t.dbpath , "--out" , t.ext );
 
 resetDbpath( t.dbpath );
diff --git a/jstests/tool/dumprestore3.js b/jstests/tool/dumprestore3.js
new file mode 100644
index 0000000..32e5f35
--- /dev/null
+++ b/jstests/tool/dumprestore3.js
@@ -0,0 +1,60 @@
+// dumprestore3.js
+
+var name = "dumprestore3";
+
+function step(msg) {
+    msg = msg || "";
+    this.x = (this.x || 0) + 1;
+    print('\n' + name + ".js step " + this.x + ' ' + msg);
+}
+
+step();
+
+var replTest = new ReplSetTest( {name: name, nodes: 2} );
+var nodes = replTest.startSet();
+replTest.initiate();
+var master = replTest.getMaster();
+
+{
+    step("populate master");
+    var foo = master.getDB("foo");
+    for (i = 0; i < 20; i++) {
+        foo.bar.insert({ x: i, y: "abc" });
+    }
+}
+
+{
+    step("wait for slaves");
+    replTest.awaitReplication();
+}
+
+{
+    step("dump & restore a db into a slave");
+    var port = 30020;
+    var conn = startMongodTest(port, name + "-other");
+    var c = conn.getDB("foo").bar;
+    c.save({ a: 22 });
+    assert.eq(1, c.count(), "setup2");
+}
+
+step("try mongorestore to slave");
+
+var data = "/data/db/dumprestore3-other1/";
+resetDbpath(data);
+runMongoProgram( "mongodump", "--host", "127.0.0.1:"+port, "--out", data );
+
+var x = runMongoProgram( "mongorestore", "--host", "127.0.0.1:"+replTest.ports[1], "--dir", data );
+assert.eq(x, _isWindows() ? -1 : 255, "mongorestore should exit w/ -1 on slave");
+
+step("try mongoimport to slave");
+
+dataFile = "/data/db/dumprestore3-other2.json";
+runMongoProgram( "mongoexport", "--host", "127.0.0.1:"+port, "--out", dataFile, "--db", "foo", "--collection", "bar" );
+
+x = runMongoProgram( "mongoimport", "--host", "127.0.0.1:"+replTest.ports[1], "--file", dataFile );
+assert.eq(x, _isWindows() ? -1 : 255, "mongoreimport should exit w/ -1 on slave"); // windows return is signed
+
+step("stopSet");
+replTest.stopSet();
+
+step("SUCCESS");
diff --git a/jstests/tool/dumprestore4.js b/jstests/tool/dumprestore4.js
new file mode 100644
index 0000000..568e196
--- /dev/null
+++ b/jstests/tool/dumprestore4.js
@@ -0,0 +1,42 @@
+// dumprestore4.js -- see SERVER-2186
+
+// The point of this test is to ensure that mongorestore successfully
+// constructs indexes when the database being restored into has a
+// different name than the database dumped from.  There are 2
+// issues here: (1) if you dumped from database "A" and restore into
+// database "B", B should have exactly the right indexes; (2) if for
+// some reason you have another database called "A" at the time of the
+// restore, mongorestore shouldn't touch it.
+
+t = new ToolTest( "dumprestore4" );
+
+c = t.startDB( "dumprestore4" );
+
+db=t.db
+
+dbname = db.getName();
+dbname2 = "NOT_"+dbname;
+
+db2=db.getSisterDB( dbname2 );
+
+db.dropDatabase(); // make sure it's empty
+db2.dropDatabase(); // make sure everybody's empty
+
+assert.eq( 0 , db.system.indexes.count() , "setup1" );
+c.ensureIndex({ x : 1} );
+assert.eq( 2 , db.system.indexes.count() , "setup2" ); // _id and x_1
+
+assert.eq( 0, t.runTool( "dump" , "-d" , dbname, "--out", t.ext ), "dump")
+
+// to ensure issue (2), we have to clear out the first db.
+// By inspection, db.dropIndexes() doesn't get rid of the _id index on c,
+// so we have to drop the collection.
+c.drop();
+assert.eq( 0, t.runTool( "restore" , "--dir" , t.ext + "/" + dbname, "-d", dbname2 ), "restore" );
+
+// issue (1)
+assert.eq( 2 , db2.system.indexes.count() , "after restore 1" );
+// issue (2)
+assert.eq( 0 , db.system.indexes.count() , "after restore 2" );
+
+t.stop();
diff --git a/jstests/tool/tool1.js b/jstests/tool/tool1.js
index 91fce80..ebe8293 100644
--- a/jstests/tool/tool1.js
+++ b/jstests/tool/tool1.js
@@ -18,7 +18,7 @@ function fileSize(){
 port = allocatePorts( 1 )[ 0 ];
 resetDbpath( externalPath );
 
-m = startMongod( "--port", port, "--dbpath", dbPath, "--nohttpinterface", "--bind_ip", "127.0.0.1" );
+m = startMongod( "--port", port, "--dbpath", dbPath, "--nohttpinterface", "--noprealloc" , "--bind_ip", "127.0.0.1" );
 c = m.getDB( baseName ).getCollection( baseName );
 c.save( { a: 1 } );
 assert( c.findOne() );
diff --git a/jstests/ts1.js b/jstests/ts1.js
new file mode 100644
index 0000000..062519c
--- /dev/null
+++ b/jstests/ts1.js
@@ -0,0 +1,38 @@
+t = db.ts1
+t.drop()
+
+N = 20
+
+for ( i=0; i<N; i++ ){
+    t.insert( { _id : i , x : new Timestamp() } )
+    sleep( 100 )
+}
+
+function get(i){
+    return t.findOne( { _id : i } ).x;
+}
+
+function cmp( a , b ){
+    if ( a.t < b.t )
+        return -1;
+    if ( a.t > b.t )
+        return 1;
+    
+    return a.i - b.i;
+}
+
+for ( i=0; i<N-1; i++ ){
+    a = get(i);
+    b = get(i+1);
+    //print( tojson(a) + "\t" + tojson(b) + "\t" + cmp(a,b) );
+    assert.gt( 0 , cmp( a , b ) , "cmp " + i  )
+}
+
+assert.eq( N , t.find( { x : { $type : 17 } } ).itcount() , "B1" )
+assert.eq( 0 , t.find( { x : { $type : 3 } } ).itcount() , "B2" )
+
+t.insert( { _id : 100 , x : new Timestamp( 123456 , 50 ) } )
+x = t.findOne( { _id : 100 } ).x
+assert.eq( 123000 , x.t , "C1" )
+assert.eq( 50 , x.i , "C2" )
+
diff --git a/jstests/update_addToSet3.js b/jstests/update_addToSet3.js
new file mode 100644
index 0000000..e9da58e
--- /dev/null
+++ b/jstests/update_addToSet3.js
@@ -0,0 +1,18 @@
+
+t = db.update_addToSet3
+t.drop()
+
+t.insert( { _id : 1 } )
+
+t.update( { _id : 1 } , { $addToSet : { a : { $each : [ 6 , 5 , 4 ] } } } )
+assert.eq( t.findOne() , { _id : 1 , a : [ 6 , 5 , 4 ] } , "A1" )
+
+t.update( { _id : 1 } , { $addToSet : { a : { $each : [ 3 , 2 , 1 ] } } } )
+assert.eq( t.findOne() , { _id : 1 , a : [ 6 , 5 , 4 , 3 , 2 , 1 ] } , "A2" )
+
+t.update( { _id : 1 } , { $addToSet : { a : { $each : [ 4 , 7 , 9 , 2 ] } } } )
+assert.eq( t.findOne() , { _id : 1 , a : [ 6 , 5 , 4 , 3 , 2 , 1 , 7 , 9 ] } , "A3" )
+
+t.update( { _id : 1 } , { $addToSet : { a : { $each : [ 12 , 13 , 12 ] } } } )
+assert.eq( t.findOne() , { _id : 1 , a : [ 6 , 5 , 4 , 3 , 2 , 1 , 7 , 9 , 12 , 13 ] } , "A4" )
+
diff --git a/jstests/update_arraymatch6.js b/jstests/update_arraymatch6.js
new file mode 100644
index 0000000..8892e6f
--- /dev/null
+++ b/jstests/update_arraymatch6.js
@@ -0,0 +1,14 @@
+t = db.jstests_update_arraymatch6;
+t.drop();
+
+function doTest() {
+    t.save( {a: [{id: 1, x: [5,6,7]}, {id: 2, x: [8,9,10]}]} );
+    t.update({'a.id': 1}, {$set: {'a.$.x': [1,1,1]}});
+    assert.automsg( "!db.getLastError()" );
+    assert.eq.automsg( "1", "t.findOne().a[ 0 ].x[ 0 ]" );
+}
+
+doTest();
+t.drop();
+t.ensureIndex( { 'a.id':1 } );
+doTest();
\ No newline at end of file
diff --git a/jstests/update_multi6.js b/jstests/update_multi6.js
new file mode 100644
index 0000000..3799a27
--- /dev/null
+++ b/jstests/update_multi6.js
@@ -0,0 +1,10 @@
+
+t = db.update_multi6
+t.drop();
+
+t.update( { _id : 1 } , { _id : 1 , x : 1 , y : 2 } , true , false );
+assert( t.findOne( { _id : 1 } ) , "A" )
+
+t.update( { _id : 2 } , { _id : 2 , x : 1 , y : 2 } , true , true );
+assert( db.getLastError() , "B: " + db.getLastErrorCmd() );
+
diff --git a/lib/libboost_thread-gcc41-mt-d-1_34_1.a b/lib/libboost_thread-gcc41-mt-d-1_34_1.a
new file mode 100644
index 0000000..09377ac
Binary files /dev/null and b/lib/libboost_thread-gcc41-mt-d-1_34_1.a differ
diff --git a/mongo.xcodeproj/project.pbxproj b/mongo.xcodeproj/project.pbxproj
deleted file mode 100644
index d1f8c39..0000000
--- a/mongo.xcodeproj/project.pbxproj
+++ /dev/null
@@ -1,1879 +0,0 @@
-// !$*UTF8*$!
-{
-	archiveVersion = 1;
-	classes = {
-	};
-	objectVersion = 44;
-	objects = {
-
-/* Begin PBXFileReference section */
-		9302D9930F30AB8C00DFA4EF /* collection.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = collection.js; sourceTree = "<group>"; };
-		9302D9940F30AB8C00DFA4EF /* db.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = db.js; sourceTree = "<group>"; };
-		9302D9950F30AB8C00DFA4EF /* dbshell.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dbshell.cpp; sourceTree = "<group>"; };
-		9302D9980F30AB8C00DFA4EF /* mongo.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = mongo.js; sourceTree = "<group>"; };
-		9302D9990F30AB8C00DFA4EF /* mongo.jsall */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = mongo.jsall; sourceTree = "<group>"; };
-		9302D99E0F30AB8C00DFA4EF /* query.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = query.js; sourceTree = "<group>"; };
-		9302D9A20F30AB8C00DFA4EF /* utils.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = utils.js; sourceTree = "<group>"; };
-		9303D1AB10E1415C00294FAC /* client.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = client.cpp; sourceTree = "<group>"; };
-		9303D1AC10E1415C00294FAC /* client.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = client.h; sourceTree = "<group>"; };
-		9303D1AD10E1415C00294FAC /* cmdline.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cmdline.h; sourceTree = "<group>"; };
-		9303D1AE10E1415C00294FAC /* curop.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = curop.h; sourceTree = "<group>"; };
-		9303D1AF10E1415C00294FAC /* extsort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = extsort.cpp; sourceTree = "<group>"; };
-		9303D1B010E1415C00294FAC /* extsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = extsort.h; sourceTree = "<group>"; };
-		9303D1B110E1415C00294FAC /* filever.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filever.h; sourceTree = "<group>"; };
-		9303D1B210E1415C00294FAC /* module.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = module.cpp; sourceTree = "<group>"; };
-		9303D1B310E1415C00294FAC /* module.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = module.h; sourceTree = "<group>"; };
-		9303D1B510E1415C00294FAC /* mms.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mms.cpp; sourceTree = "<group>"; };
-		9303D1B610E1415C00294FAC /* mms.o */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.objfile"; path = mms.o; sourceTree = "<group>"; };
-		9303D1B710E1415C00294FAC /* mr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mr.cpp; sourceTree = "<group>"; };
-		9303D1B810E1415C00294FAC /* update.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = update.cpp; sourceTree = "<group>"; };
-		9303D1B910E1415C00294FAC /* update.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = update.h; sourceTree = "<group>"; };
-		9307500C114EA14700272A70 /* indexbg1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexbg1.js; sourceTree = "<group>"; };
-		9307500D114EA14700272A70 /* indexbg2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexbg2.js; sourceTree = "<group>"; };
-		9307500E114EA14700272A70 /* ns1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = ns1.js; sourceTree = "<group>"; };
-		93075092114EE1BA00272A70 /* dbhash.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = dbhash.js; sourceTree = "<group>"; };
-		930750A7114EF4B100272A70 /* background.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = background.h; sourceTree = "<group>"; };
-		930750A8114EFB9900272A70 /* update_addToSet.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update_addToSet.js; sourceTree = "<group>"; };
-		930750A9114EFB9900272A70 /* update_arraymatch1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update_arraymatch1.js; sourceTree = "<group>"; };
-		930750AA114EFB9900272A70 /* update_arraymatch2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update_arraymatch2.js; sourceTree = "<group>"; };
-		930750AB114EFB9900272A70 /* updateb.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = updateb.js; sourceTree = "<group>"; };
-		930750AC114EFB9900272A70 /* updatec.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = updatec.js; sourceTree = "<group>"; };
-		930B844D0FA10D1C00F22B4B /* optime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = optime.h; sourceTree = "<group>"; };
-		931184DC0F83C95800A6DC44 /* message_server_port.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = message_server_port.cpp; sourceTree = "<group>"; };
-		931186FB0F8535FF00A6DC44 /* bridge.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bridge.cpp; sourceTree = "<group>"; };
-		931979810FBC67FB001FE537 /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = utils.cpp; sourceTree = "<group>"; };
-		931A027A0F58AA4400147C0E /* jsobjmanipulator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jsobjmanipulator.h; sourceTree = "<group>"; };
-		93278F570F72D32900844664 /* gridfs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = gridfs.cpp; sourceTree = "<group>"; };
-		93278F580F72D32900844664 /* gridfs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = gridfs.h; sourceTree = "<group>"; };
-		93278F610F72D39400844664 /* cursors.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = cursors.cpp; sourceTree = "<group>"; };
-		93278F620F72D39400844664 /* cursors.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cursors.h; sourceTree = "<group>"; };
-		93278F630F72D39400844664 /* d_logic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = d_logic.cpp; sourceTree = "<group>"; };
-		93278F640F72D39400844664 /* d_logic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = d_logic.h; sourceTree = "<group>"; };
-		93278F650F72D39400844664 /* strategy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = strategy.cpp; sourceTree = "<group>"; };
-		93278F660F72D39400844664 /* strategy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = strategy.h; sourceTree = "<group>"; };
-		93278F670F72D39400844664 /* strategy_shard.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = strategy_shard.cpp; sourceTree = "<group>"; };
-		932AC3EB0F4A5B34005BF8B0 /* queryoptimizertests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = queryoptimizertests.cpp; sourceTree = "<group>"; };
-		932AC4310F4A5E9D005BF8B0 /* SConstruct */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = SConstruct; sourceTree = "<group>"; };
-		932D854611AB912B002749FB /* array_match1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = array_match1.js; sourceTree = "<group>"; };
-		932D854711AB912B002749FB /* capped5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = capped5.js; sourceTree = "<group>"; };
-		932D854811AB912B002749FB /* datasize2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = datasize2.js; sourceTree = "<group>"; };
-		932D854911AB912B002749FB /* distinct_array1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = distinct_array1.js; sourceTree = "<group>"; };
-		932D854A11AB912B002749FB /* distinct_speed1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = distinct_speed1.js; sourceTree = "<group>"; };
-		932D854B11AB912B002749FB /* dropIndex.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = dropIndex.js; sourceTree = "<group>"; };
-		932D854C11AB912B002749FB /* error5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = error5.js; sourceTree = "<group>"; };
-		932D854D11AB912B002749FB /* exists2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = exists2.js; sourceTree = "<group>"; };
-		932D854E11AB912B002749FB /* explain2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = explain2.js; sourceTree = "<group>"; };
-		932D854F11AB912B002749FB /* extent.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = extent.js; sourceTree = "<group>"; };
-		932D855011AB912B002749FB /* find_and_modify2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find_and_modify2.js; sourceTree = "<group>"; };
-		932D855111AB912B002749FB /* find_and_modify3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find_and_modify3.js; sourceTree = "<group>"; };
-		932D855211AB912B002749FB /* find7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find7.js; sourceTree = "<group>"; };
-		932D855311AB912B002749FB /* fm4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = fm4.js; sourceTree = "<group>"; };
-		932D855411AB912B002749FB /* geo_box1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = geo_box1.js; sourceTree = "<group>"; };
-		933A4D130F55A68600145C4B /* authTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = authTest.cpp; sourceTree = "<group>"; };
-		933A4D150F55A68600145C4B /* clientTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clientTest.cpp; sourceTree = "<group>"; };
-		933A4D170F55A68600145C4B /* first.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = first.cpp; sourceTree = "<group>"; };
-		933A4D190F55A68600145C4B /* second.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = second.cpp; sourceTree = "<group>"; };
-		933A4D1B0F55A68600145C4B /* tail.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tail.cpp; sourceTree = "<group>"; };
-		933A4D1C0F55A68600145C4B /* tutorial.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tutorial.cpp; sourceTree = "<group>"; };
-		933A4D1D0F55A68600145C4B /* whereExample.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whereExample.cpp; sourceTree = "<group>"; };
-		933E22110F4327B2000209E3 /* perftest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = perftest.cpp; sourceTree = "<group>"; };
-		933E22120F4327B2000209E3 /* perftest.o */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.objfile"; path = perftest.o; sourceTree = "<group>"; };
-		9342232B0EF16D4F00608550 /* connpool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = connpool.cpp; sourceTree = "<group>"; };
-		9342232C0EF16D4F00608550 /* connpool.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = connpool.h; sourceTree = "<group>"; };
-		9342232D0EF16D4F00608550 /* dbclient.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dbclient.cpp; sourceTree = "<group>"; };
-		9342232E0EF16D4F00608550 /* dbclient.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dbclient.h; sourceTree = "<group>"; };
-		934223300EF16D4F00608550 /* model.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = model.cpp; sourceTree = "<group>"; };
-		934223310EF16D4F00608550 /* model.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = model.h; sourceTree = "<group>"; };
-		934223860EF16D7000608550 /* btreetests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = btreetests.cpp; sourceTree = "<group>"; };
-		934223880EF16D7000608550 /* dbtests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dbtests.cpp; sourceTree = "<group>"; };
-		934223890EF16D7000608550 /* dbtests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dbtests.h; sourceTree = "<group>"; };
-		9342238C0EF16D7000608550 /* mockdbclient.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mockdbclient.h; sourceTree = "<group>"; };
-		9342238D0EF16D7000608550 /* pairingtests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pairingtests.cpp; sourceTree = "<group>"; };
-		934223900EF16DB400608550 /* btree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = btree.cpp; sourceTree = "<group>"; };
-		934223910EF16DB400608550 /* btree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = btree.h; sourceTree = "<group>"; };
-		934223920EF16DB400608550 /* btreecursor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = btreecursor.cpp; sourceTree = "<group>"; };
-		934223930EF16DB400608550 /* clientcursor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clientcursor.cpp; sourceTree = "<group>"; };
-		934223940EF16DB400608550 /* clientcursor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clientcursor.h; sourceTree = "<group>"; };
-		934223950EF16DB400608550 /* cloner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = cloner.cpp; sourceTree = "<group>"; };
-		934223960EF16DB400608550 /* commands.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = commands.cpp; sourceTree = "<group>"; };
-		934223970EF16DB400608550 /* commands.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commands.h; sourceTree = "<group>"; };
-		934223980EF16DB400608550 /* cursor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cursor.h; sourceTree = "<group>"; };
-		934223990EF16DB400608550 /* database.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = database.h; sourceTree = "<group>"; };
-		9342239A0EF16DB400608550 /* db.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = db.cpp; sourceTree = "<group>"; };
-		9342239B0EF16DB400608550 /* db.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = db.h; sourceTree = "<group>"; };
-		9342239F0EF16DB400608550 /* dbcommands.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dbcommands.cpp; sourceTree = "<group>"; };
-		934223A00EF16DB400608550 /* dbeval.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dbeval.cpp; sourceTree = "<group>"; };
-		934223A10EF16DB400608550 /* dbhelpers.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dbhelpers.cpp; sourceTree = "<group>"; };
-		934223A20EF16DB400608550 /* dbhelpers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dbhelpers.h; sourceTree = "<group>"; };
-		934223A50EF16DB400608550 /* dbmessage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dbmessage.h; sourceTree = "<group>"; };
-		934223A60EF16DB400608550 /* dbwebserver.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dbwebserver.cpp; sourceTree = "<group>"; };
-		934223A70EF16DB400608550 /* instance.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = instance.cpp; sourceTree = "<group>"; };
-		934223A80EF16DB400608550 /* instance.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = instance.h; sourceTree = "<group>"; };
-		934223A90EF16DB400608550 /* introspect.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = introspect.cpp; sourceTree = "<group>"; };
-		934223AA0EF16DB400608550 /* introspect.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = introspect.h; sourceTree = "<group>"; };
-		934223AD0EF16DB400608550 /* javatest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = javatest.cpp; sourceTree = "<group>"; };
-		934223AE0EF16DB400608550 /* jsobj.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jsobj.cpp; sourceTree = "<group>"; };
-		934223AF0EF16DB400608550 /* jsobj.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = jsobj.h; sourceTree = "<group>"; };
-		934223B00EF16DB400608550 /* json.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = json.cpp; sourceTree = "<group>"; };
-		934223B10EF16DB400608550 /* json.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = json.h; sourceTree = "<group>"; };
-		934223B70EF16DB400608550 /* matcher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = matcher.cpp; sourceTree = "<group>"; };
-		934223B80EF16DB400608550 /* matcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = matcher.h; sourceTree = "<group>"; };
-		934223B90EF16DB400608550 /* minilex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = minilex.h; sourceTree = "<group>"; };
-		934223BA0EF16DB400608550 /* namespace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = namespace.cpp; sourceTree = "<group>"; };
-		934223BB0EF16DB400608550 /* namespace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = namespace.h; sourceTree = "<group>"; };
-		934223BD0EF16DB400608550 /* pdfile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pdfile.cpp; sourceTree = "<group>"; };
-		934223BE0EF16DB400608550 /* pdfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pdfile.h; sourceTree = "<group>"; };
-		934223BF0EF16DB400608550 /* query.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = query.cpp; sourceTree = "<group>"; };
-		934223C00EF16DB400608550 /* query.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = query.h; sourceTree = "<group>"; };
-		934223C10EF16DB400608550 /* queryoptimizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = queryoptimizer.cpp; sourceTree = "<group>"; };
-		934223C20EF16DB400608550 /* queryoptimizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = queryoptimizer.h; sourceTree = "<group>"; };
-		934223C30EF16DB400608550 /* repl.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = repl.cpp; sourceTree = "<group>"; };
-		934223C40EF16DB400608550 /* repl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = repl.h; sourceTree = "<group>"; };
-		934223C50EF16DB400608550 /* replset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = replset.h; sourceTree = "<group>"; };
-		934223C60EF16DB400608550 /* resource.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resource.h; sourceTree = "<group>"; };
-		934223C70EF16DB400608550 /* scanandorder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = scanandorder.h; sourceTree = "<group>"; };
-		934223C80EF16DB400608550 /* storage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = storage.h; sourceTree = "<group>"; };
-		934223C90EF16DB400608550 /* tests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tests.cpp; sourceTree = "<group>"; };
-		934BEB9B10DFFA9600178102 /* _lodeRunner.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = _lodeRunner.js; sourceTree = "<group>"; };
-		934BEB9C10DFFA9600178102 /* _runner.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = _runner.js; sourceTree = "<group>"; };
-		934BEB9D10DFFA9600178102 /* _runner_leak.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = _runner_leak.js; sourceTree = "<group>"; };
-		934BEB9E10DFFA9600178102 /* _runner_leak_nojni.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = _runner_leak_nojni.js; sourceTree = "<group>"; };
-		934BEB9F10DFFA9600178102 /* _runner_sharding.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = _runner_sharding.js; sourceTree = "<group>"; };
-		934BEBA010DFFA9600178102 /* all.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = all.js; sourceTree = "<group>"; };
-		934BEBA110DFFA9600178102 /* all2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = all2.js; sourceTree = "<group>"; };
-		934BEBA210DFFA9600178102 /* apitest_db.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = apitest_db.js; sourceTree = "<group>"; };
-		934BEBA310DFFA9600178102 /* apitest_dbcollection.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = apitest_dbcollection.js; sourceTree = "<group>"; };
-		934BEBA410DFFA9600178102 /* array1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = array1.js; sourceTree = "<group>"; };
-		934BEBA510DFFA9600178102 /* array3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = array3.js; sourceTree = "<group>"; };
-		934BEBA610DFFA9600178102 /* auth1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = auth1.js; sourceTree = "<group>"; };
-		934BEBA810DFFA9600178102 /* autoid.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = autoid.js; sourceTree = "<group>"; };
-		934BEBA910DFFA9600178102 /* basic1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic1.js; sourceTree = "<group>"; };
-		934BEBAA10DFFA9600178102 /* basic2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic2.js; sourceTree = "<group>"; };
-		934BEBAB10DFFA9600178102 /* basic3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic3.js; sourceTree = "<group>"; };
-		934BEBAC10DFFA9600178102 /* basic4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic4.js; sourceTree = "<group>"; };
-		934BEBAD10DFFA9600178102 /* basic5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic5.js; sourceTree = "<group>"; };
-		934BEBAE10DFFA9600178102 /* basic6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic6.js; sourceTree = "<group>"; };
-		934BEBAF10DFFA9600178102 /* basic7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic7.js; sourceTree = "<group>"; };
-		934BEBB010DFFA9600178102 /* basic8.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic8.js; sourceTree = "<group>"; };
-		934BEBB110DFFA9600178102 /* basic9.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic9.js; sourceTree = "<group>"; };
-		934BEBB210DFFA9600178102 /* basica.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basica.js; sourceTree = "<group>"; };
-		934BEBB310DFFA9600178102 /* basicb.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basicb.js; sourceTree = "<group>"; };
-		934BEBB410DFFA9600178102 /* capped.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = capped.js; sourceTree = "<group>"; };
-		934BEBB510DFFA9600178102 /* capped1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = capped1.js; sourceTree = "<group>"; };
-		934BEBB710DFFA9600178102 /* capped3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = capped3.js; sourceTree = "<group>"; };
-		934BEBB810DFFA9600178102 /* capped4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = capped4.js; sourceTree = "<group>"; };
-		934BEBBA10DFFA9600178102 /* clonecollection.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = clonecollection.js; sourceTree = "<group>"; };
-		934BEBBB10DFFA9600178102 /* copydb.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = copydb.js; sourceTree = "<group>"; };
-		934BEBBC10DFFA9600178102 /* count.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = count.js; sourceTree = "<group>"; };
-		934BEBBD10DFFA9600178102 /* count2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = count2.js; sourceTree = "<group>"; };
-		934BEBBE10DFFA9600178102 /* count3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = count3.js; sourceTree = "<group>"; };
-		934BEBBF10DFFA9600178102 /* count4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = count4.js; sourceTree = "<group>"; };
-		934BEBC010DFFA9600178102 /* count5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = count5.js; sourceTree = "<group>"; };
-		934BEBC110DFFA9600178102 /* cursor1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = cursor1.js; sourceTree = "<group>"; };
-		934BEBC210DFFA9600178102 /* cursor2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = cursor2.js; sourceTree = "<group>"; };
-		934BEBC310DFFA9600178102 /* cursor3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = cursor3.js; sourceTree = "<group>"; };
-		934BEBC410DFFA9600178102 /* cursor4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = cursor4.js; sourceTree = "<group>"; };
-		934BEBC510DFFA9600178102 /* cursor5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = cursor5.js; sourceTree = "<group>"; };
-		934BEBC610DFFA9600178102 /* cursor6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = cursor6.js; sourceTree = "<group>"; };
-		934BEBC710DFFA9600178102 /* cursor7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = cursor7.js; sourceTree = "<group>"; };
-		934BEBC810DFFA9600178102 /* cursor8.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = cursor8.js; sourceTree = "<group>"; };
-		934BEBC910DFFA9600178102 /* datasize.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = datasize.js; sourceTree = "<group>"; };
-		934BEBCA10DFFA9600178102 /* date1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = date1.js; sourceTree = "<group>"; };
-		934BEBCB10DFFA9600178102 /* dbref1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = dbref1.js; sourceTree = "<group>"; };
-		934BEBCC10DFFA9600178102 /* dbref2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = dbref2.js; sourceTree = "<group>"; };
-		934BEBCE10DFFA9600178102 /* dbNoCreate.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = dbNoCreate.js; sourceTree = "<group>"; };
-		934BEBCF10DFFA9600178102 /* diskfull.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = diskfull.js; sourceTree = "<group>"; };
-		934BEBD010DFFA9600178102 /* norepeat.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = norepeat.js; sourceTree = "<group>"; };
-		934BEBD110DFFA9600178102 /* distinct1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = distinct1.js; sourceTree = "<group>"; };
-		934BEBD210DFFA9600178102 /* distinct2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = distinct2.js; sourceTree = "<group>"; };
-		934BEBD310DFFA9600178102 /* drop.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = drop.js; sourceTree = "<group>"; };
-		934BEBD410DFFA9600178102 /* error1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = error1.js; sourceTree = "<group>"; };
-		934BEBD510DFFA9600178102 /* error2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = error2.js; sourceTree = "<group>"; };
-		934BEBD610DFFA9600178102 /* error3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = error3.js; sourceTree = "<group>"; };
-		934BEBD710DFFA9600178102 /* eval0.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval0.js; sourceTree = "<group>"; };
-		934BEBD810DFFA9600178102 /* eval1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval1.js; sourceTree = "<group>"; };
-		934BEBD910DFFA9600178102 /* eval2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval2.js; sourceTree = "<group>"; };
-		934BEBDA10DFFA9600178102 /* eval3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval3.js; sourceTree = "<group>"; };
-		934BEBDB10DFFA9600178102 /* eval4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval4.js; sourceTree = "<group>"; };
-		934BEBDC10DFFA9600178102 /* eval5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval5.js; sourceTree = "<group>"; };
-		934BEBDD10DFFA9600178102 /* eval6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval6.js; sourceTree = "<group>"; };
-		934BEBDE10DFFA9600178102 /* eval7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval7.js; sourceTree = "<group>"; };
-		934BEBDF10DFFA9600178102 /* eval8.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval8.js; sourceTree = "<group>"; };
-		934BEBE010DFFA9600178102 /* eval9.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = eval9.js; sourceTree = "<group>"; };
-		934BEBE110DFFA9600178102 /* evala.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = evala.js; sourceTree = "<group>"; };
-		934BEBE210DFFA9600178102 /* evalb.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = evalb.js; sourceTree = "<group>"; };
-		934BEBE310DFFA9600178102 /* exists.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = exists.js; sourceTree = "<group>"; };
-		934BEBE410DFFA9600178102 /* explain1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = explain1.js; sourceTree = "<group>"; };
-		934BEBE510DFFA9600178102 /* find1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find1.js; sourceTree = "<group>"; };
-		934BEBE610DFFA9600178102 /* find2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find2.js; sourceTree = "<group>"; };
-		934BEBE710DFFA9600178102 /* find3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find3.js; sourceTree = "<group>"; };
-		934BEBE810DFFA9600178102 /* find4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find4.js; sourceTree = "<group>"; };
-		934BEBE910DFFA9600178102 /* find5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find5.js; sourceTree = "<group>"; };
-		934BEBEA10DFFA9600178102 /* find6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find6.js; sourceTree = "<group>"; };
-		934BEBEB10DFFA9600178102 /* fm1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = fm1.js; sourceTree = "<group>"; };
-		934BEBEC10DFFA9600178102 /* fm2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = fm2.js; sourceTree = "<group>"; };
-		934BEBED10DFFA9600178102 /* fm3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = fm3.js; sourceTree = "<group>"; };
-		934BEBEE10DFFA9600178102 /* group1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = group1.js; sourceTree = "<group>"; };
-		934BEBEF10DFFA9600178102 /* group2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = group2.js; sourceTree = "<group>"; };
-		934BEBF010DFFA9600178102 /* group3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = group3.js; sourceTree = "<group>"; };
-		934BEBF110DFFA9600178102 /* group4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = group4.js; sourceTree = "<group>"; };
-		934BEBF210DFFA9600178102 /* group5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = group5.js; sourceTree = "<group>"; };
-		934BEBF310DFFA9600178102 /* hint1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = hint1.js; sourceTree = "<group>"; };
-		934BEBF410DFFA9600178102 /* id1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = id1.js; sourceTree = "<group>"; };
-		934BEBF510DFFA9600178102 /* in.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = in.js; sourceTree = "<group>"; };
-		934BEBF610DFFA9600178102 /* in2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = in2.js; sourceTree = "<group>"; };
-		934BEBF710DFFA9600178102 /* inc1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = inc1.js; sourceTree = "<group>"; };
-		934BEBF810DFFA9600178102 /* inc2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = inc2.js; sourceTree = "<group>"; };
-		934BEBF910DFFA9600178102 /* inc3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = inc3.js; sourceTree = "<group>"; };
-		934BEBFA10DFFA9600178102 /* index1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index1.js; sourceTree = "<group>"; };
-		934BEBFB10DFFA9600178102 /* index10.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index10.js; sourceTree = "<group>"; };
-		934BEBFC10DFFA9600178102 /* index2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index2.js; sourceTree = "<group>"; };
-		934BEBFD10DFFA9600178102 /* index3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index3.js; sourceTree = "<group>"; };
-		934BEBFE10DFFA9600178102 /* index4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index4.js; sourceTree = "<group>"; };
-		934BEBFF10DFFA9600178102 /* index5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index5.js; sourceTree = "<group>"; };
-		934BEC0010DFFA9600178102 /* index6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index6.js; sourceTree = "<group>"; };
-		934BEC0110DFFA9600178102 /* index7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index7.js; sourceTree = "<group>"; };
-		934BEC0210DFFA9600178102 /* index8.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index8.js; sourceTree = "<group>"; };
-		934BEC0310DFFA9600178102 /* index9.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index9.js; sourceTree = "<group>"; };
-		934BEC0410DFFA9600178102 /* index_check1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index_check1.js; sourceTree = "<group>"; };
-		934BEC0510DFFA9600178102 /* index_check2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index_check2.js; sourceTree = "<group>"; };
-		934BEC0610DFFA9600178102 /* index_check3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index_check3.js; sourceTree = "<group>"; };
-		934BEC0710DFFA9600178102 /* index_check5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index_check5.js; sourceTree = "<group>"; };
-		934BEC0810DFFA9600178102 /* index_check6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index_check6.js; sourceTree = "<group>"; };
-		934BEC0910DFFA9600178102 /* index_check7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index_check7.js; sourceTree = "<group>"; };
-		934BEC0A10DFFA9600178102 /* index_many.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index_many.js; sourceTree = "<group>"; };
-		934BEC0B10DFFA9600178102 /* indexa.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexa.js; sourceTree = "<group>"; };
-		934BEC0C10DFFA9600178102 /* indexapi.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexapi.js; sourceTree = "<group>"; };
-		934BEC0D10DFFA9600178102 /* indexb.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexb.js; sourceTree = "<group>"; };
-		934BEC0E10DFFA9600178102 /* indexc.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexc.js; sourceTree = "<group>"; };
-		934BEC0F10DFFA9600178102 /* indexd.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexd.js; sourceTree = "<group>"; };
-		934BEC1010DFFA9600178102 /* indexe.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexe.js; sourceTree = "<group>"; };
-		934BEC1110DFFA9600178102 /* jni1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = jni1.js; sourceTree = "<group>"; };
-		934BEC1210DFFA9600178102 /* jni2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = jni2.js; sourceTree = "<group>"; };
-		934BEC1310DFFA9600178102 /* jni3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = jni3.js; sourceTree = "<group>"; };
-		934BEC1410DFFA9600178102 /* jni4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = jni4.js; sourceTree = "<group>"; };
-		934BEC1510DFFA9600178102 /* jni5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = jni5.js; sourceTree = "<group>"; };
-		934BEC1610DFFA9600178102 /* jni7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = jni7.js; sourceTree = "<group>"; };
-		934BEC1710DFFA9600178102 /* jni8.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = jni8.js; sourceTree = "<group>"; };
-		934BEC1810DFFA9600178102 /* jni9.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = jni9.js; sourceTree = "<group>"; };
-		934BEC1910DFFA9600178102 /* json1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = json1.js; sourceTree = "<group>"; };
-		934BEC1A10DFFA9600178102 /* map1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = map1.js; sourceTree = "<group>"; };
-		934BEC1B10DFFA9600178102 /* median.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = median.js; sourceTree = "<group>"; };
-		934BEC1C10DFFA9600178102 /* minmax.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = minmax.js; sourceTree = "<group>"; };
-		934BEC1D10DFFA9600178102 /* mod1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = mod1.js; sourceTree = "<group>"; };
-		934BEC1E10DFFA9600178102 /* mr1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = mr1.js; sourceTree = "<group>"; };
-		934BEC1F10DFFA9600178102 /* mr2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = mr2.js; sourceTree = "<group>"; };
-		934BEC2010DFFA9600178102 /* mr3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = mr3.js; sourceTree = "<group>"; };
-		934BEC2110DFFA9600178102 /* mr4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = mr4.js; sourceTree = "<group>"; };
-		934BEC2210DFFA9600178102 /* mr5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = mr5.js; sourceTree = "<group>"; };
-		934BEC2310DFFA9600178102 /* multi.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = multi.js; sourceTree = "<group>"; };
-		934BEC2410DFFA9600178102 /* multi2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = multi2.js; sourceTree = "<group>"; };
-		934BEC2510DFFA9600178102 /* ne1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = ne1.js; sourceTree = "<group>"; };
-		934BEC2610DFFA9600178102 /* nin.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = nin.js; sourceTree = "<group>"; };
-		934BEC2710DFFA9600178102 /* not1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = not1.js; sourceTree = "<group>"; };
-		934BEC2810DFFA9600178102 /* null.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = null.js; sourceTree = "<group>"; };
-		934BEC2910DFFA9600178102 /* objid1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = objid1.js; sourceTree = "<group>"; };
-		934BEC2A10DFFA9600178102 /* objid2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = objid2.js; sourceTree = "<group>"; };
-		934BEC2B10DFFA9600178102 /* objid3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = objid3.js; sourceTree = "<group>"; };
-		934BEC2C10DFFA9600178102 /* objid4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = objid4.js; sourceTree = "<group>"; };
-		934BEC2D10DFFA9600178102 /* objid5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = objid5.js; sourceTree = "<group>"; };
-		934BEC2F10DFFA9600178102 /* find1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find1.js; sourceTree = "<group>"; };
-		934BEC3010DFFA9600178102 /* index1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = index1.js; sourceTree = "<group>"; };
-		934BEC3110DFFA9600178102 /* remove1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove1.js; sourceTree = "<group>"; };
-		934BEC3210DFFA9600178102 /* profile1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = profile1.js; sourceTree = "<group>"; };
-		934BEC3310DFFA9600178102 /* pull.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pull.js; sourceTree = "<group>"; };
-		934BEC3410DFFA9600178102 /* pull2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pull2.js; sourceTree = "<group>"; };
-		934BEC3510DFFA9600178102 /* pullall.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pullall.js; sourceTree = "<group>"; };
-		934BEC3610DFFA9600178102 /* push.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = push.js; sourceTree = "<group>"; };
-		934BEC3710DFFA9600178102 /* pushall.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pushall.js; sourceTree = "<group>"; };
-		934BEC3810DFFA9600178102 /* query1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = query1.js; sourceTree = "<group>"; };
-		934BEC3910DFFA9600178102 /* queryoptimizer1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = queryoptimizer1.js; sourceTree = "<group>"; };
-		934BEC3B10DFFA9600178102 /* quota1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = quota1.js; sourceTree = "<group>"; };
-		934BEC3C10DFFA9600178102 /* recstore.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = recstore.js; sourceTree = "<group>"; };
-		934BEC3D10DFFA9600178102 /* ref.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = ref.js; sourceTree = "<group>"; };
-		934BEC3E10DFFA9600178102 /* ref2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = ref2.js; sourceTree = "<group>"; };
-		934BEC3F10DFFA9600178102 /* ref3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = ref3.js; sourceTree = "<group>"; };
-		934BEC4010DFFA9600178102 /* ref4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = ref4.js; sourceTree = "<group>"; };
-		934BEC4110DFFA9600178102 /* regex.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = regex.js; sourceTree = "<group>"; };
-		934BEC4210DFFA9600178102 /* regex2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = regex2.js; sourceTree = "<group>"; };
-		934BEC4310DFFA9600178102 /* regex3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = regex3.js; sourceTree = "<group>"; };
-		934BEC4410DFFA9600178102 /* regex4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = regex4.js; sourceTree = "<group>"; };
-		934BEC4510DFFA9600178102 /* remove.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove.js; sourceTree = "<group>"; };
-		934BEC4610DFFA9600178102 /* remove2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove2.js; sourceTree = "<group>"; };
-		934BEC4710DFFA9600178102 /* remove3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove3.js; sourceTree = "<group>"; };
-		934BEC4810DFFA9600178102 /* remove4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove4.js; sourceTree = "<group>"; };
-		934BEC4910DFFA9600178102 /* remove5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove5.js; sourceTree = "<group>"; };
-		934BEC4A10DFFA9600178102 /* remove6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove6.js; sourceTree = "<group>"; };
-		934BEC4B10DFFA9600178102 /* remove7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove7.js; sourceTree = "<group>"; };
-		934BEC4C10DFFA9600178102 /* remove8.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove8.js; sourceTree = "<group>"; };
-		934BEC4D10DFFA9600178102 /* rename.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = rename.js; sourceTree = "<group>"; };
-		934BEC4E10DFFA9600178102 /* rename2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = rename2.js; sourceTree = "<group>"; };
-		934BEC4F10DFFA9600178102 /* repair.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repair.js; sourceTree = "<group>"; };
-		934BEC5110DFFA9600178102 /* basic1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic1.js; sourceTree = "<group>"; };
-		934BEC5210DFFA9600178102 /* pair1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pair1.js; sourceTree = "<group>"; };
-		934BEC5310DFFA9600178102 /* pair2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pair2.js; sourceTree = "<group>"; };
-		934BEC5410DFFA9600178102 /* pair3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pair3.js; sourceTree = "<group>"; };
-		934BEC5510DFFA9600178102 /* pair4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pair4.js; sourceTree = "<group>"; };
-		934BEC5610DFFA9600178102 /* pair5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pair5.js; sourceTree = "<group>"; };
-		934BEC5710DFFA9600178102 /* pair6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pair6.js; sourceTree = "<group>"; };
-		934BEC5810DFFA9600178102 /* repl1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl1.js; sourceTree = "<group>"; };
-		934BEC5910DFFA9600178102 /* repl2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl2.js; sourceTree = "<group>"; };
-		934BEC5A10DFFA9600178102 /* repl3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl3.js; sourceTree = "<group>"; };
-		934BEC5B10DFFA9600178102 /* repl4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl4.js; sourceTree = "<group>"; };
-		934BEC5C10DFFA9600178102 /* repl5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl5.js; sourceTree = "<group>"; };
-		934BEC5D10DFFA9600178102 /* repl6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl6.js; sourceTree = "<group>"; };
-		934BEC5E10DFFA9600178102 /* repl7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl7.js; sourceTree = "<group>"; };
-		934BEC5F10DFFA9600178102 /* repl8.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl8.js; sourceTree = "<group>"; };
-		934BEC6010DFFA9600178102 /* repl9.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl9.js; sourceTree = "<group>"; };
-		934BEC6110DFFA9600178102 /* replacePeer1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = replacePeer1.js; sourceTree = "<group>"; };
-		934BEC6210DFFA9700178102 /* replacePeer2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = replacePeer2.js; sourceTree = "<group>"; };
-		934BEC6310DFFA9700178102 /* set1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = set1.js; sourceTree = "<group>"; };
-		934BEC6410DFFA9700178102 /* set2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = set2.js; sourceTree = "<group>"; };
-		934BEC6510DFFA9700178102 /* set3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = set3.js; sourceTree = "<group>"; };
-		934BEC6710DFFA9700178102 /* auto1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = auto1.js; sourceTree = "<group>"; };
-		934BEC6810DFFA9700178102 /* auto2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = auto2.js; sourceTree = "<group>"; };
-		934BEC6910DFFA9700178102 /* count1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = count1.js; sourceTree = "<group>"; };
-		934BEC6A10DFFA9700178102 /* diffservers1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = diffservers1.js; sourceTree = "<group>"; };
-		934BEC6B10DFFA9700178102 /* error1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = error1.js; sourceTree = "<group>"; };
-		934BEC6C10DFFA9700178102 /* features1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = features1.js; sourceTree = "<group>"; };
-		934BEC6D10DFFA9700178102 /* features2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = features2.js; sourceTree = "<group>"; };
-		934BEC6E10DFFA9700178102 /* key_many.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = key_many.js; sourceTree = "<group>"; };
-		934BEC6F10DFFA9700178102 /* key_string.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = key_string.js; sourceTree = "<group>"; };
-		934BEC7010DFFA9700178102 /* movePrimary1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = movePrimary1.js; sourceTree = "<group>"; };
-		934BEC7110DFFA9700178102 /* moveshard1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = moveshard1.js; sourceTree = "<group>"; };
-		934BEC7210DFFA9700178102 /* passthrough1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = passthrough1.js; sourceTree = "<group>"; };
-		934BEC7310DFFA9700178102 /* shard1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = shard1.js; sourceTree = "<group>"; };
-		934BEC7410DFFA9700178102 /* shard2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = shard2.js; sourceTree = "<group>"; };
-		934BEC7510DFFA9700178102 /* shard3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = shard3.js; sourceTree = "<group>"; };
-		934BEC7610DFFA9700178102 /* shard4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = shard4.js; sourceTree = "<group>"; };
-		934BEC7710DFFA9700178102 /* shard5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = shard5.js; sourceTree = "<group>"; };
-		934BEC7810DFFA9700178102 /* shard6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = shard6.js; sourceTree = "<group>"; };
-		934BEC7910DFFA9700178102 /* splitpick.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = splitpick.js; sourceTree = "<group>"; };
-		934BEC7A10DFFA9700178102 /* version1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = version1.js; sourceTree = "<group>"; };
-		934BEC7B10DFFA9700178102 /* version2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = version2.js; sourceTree = "<group>"; };
-		934BEC7C10DFFA9700178102 /* shellspawn.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = shellspawn.js; sourceTree = "<group>"; };
-		934BEC7D10DFFA9700178102 /* sort1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = sort1.js; sourceTree = "<group>"; };
-		934BEC7E10DFFA9700178102 /* sort2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = sort2.js; sourceTree = "<group>"; };
-		934BEC7F10DFFA9700178102 /* sort3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = sort3.js; sourceTree = "<group>"; };
-		934BEC8010DFFA9700178102 /* sort4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = sort4.js; sourceTree = "<group>"; };
-		934BEC8110DFFA9700178102 /* sort5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = sort5.js; sourceTree = "<group>"; };
-		934BEC8210DFFA9700178102 /* sort_numeric.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = sort_numeric.js; sourceTree = "<group>"; };
-		934BEC8310DFFA9700178102 /* stats.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = stats.js; sourceTree = "<group>"; };
-		934BEC8410DFFA9700178102 /* storefunc.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = storefunc.js; sourceTree = "<group>"; };
-		934BEC8510DFFA9700178102 /* sub1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = sub1.js; sourceTree = "<group>"; };
-		934BEC8710DFFA9700178102 /* csv1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = csv1.js; sourceTree = "<group>"; };
-		934BEC8810DFFA9700178102 /* dumprestore1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = dumprestore1.js; sourceTree = "<group>"; };
-		934BEC8910DFFA9700178102 /* dumprestore2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = dumprestore2.js; sourceTree = "<group>"; };
-		934BEC8A10DFFA9700178102 /* exportimport1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = exportimport1.js; sourceTree = "<group>"; };
-		934BEC8B10DFFA9700178102 /* exportimport2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = exportimport2.js; sourceTree = "<group>"; };
-		934BEC8C10DFFA9700178102 /* tool1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = tool1.js; sourceTree = "<group>"; };
-		934BEC8D10DFFA9700178102 /* type1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = type1.js; sourceTree = "<group>"; };
-		934BEC8E10DFFA9700178102 /* unique2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = unique2.js; sourceTree = "<group>"; };
-		934BEC8F10DFFA9700178102 /* uniqueness.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = uniqueness.js; sourceTree = "<group>"; };
-		934BEC9010DFFA9700178102 /* unset.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = unset.js; sourceTree = "<group>"; };
-		934BEC9110DFFA9700178102 /* update.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update.js; sourceTree = "<group>"; };
-		934BEC9210DFFA9700178102 /* update2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update2.js; sourceTree = "<group>"; };
-		934BEC9310DFFA9700178102 /* update3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update3.js; sourceTree = "<group>"; };
-		934BEC9410DFFA9700178102 /* update4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update4.js; sourceTree = "<group>"; };
-		934BEC9510DFFA9700178102 /* update5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update5.js; sourceTree = "<group>"; };
-		934BEC9610DFFA9700178102 /* update6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update6.js; sourceTree = "<group>"; };
-		934BEC9710DFFA9700178102 /* update7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update7.js; sourceTree = "<group>"; };
-		934BEC9810DFFA9700178102 /* update8.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update8.js; sourceTree = "<group>"; };
-		934BEC9910DFFA9700178102 /* update9.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = update9.js; sourceTree = "<group>"; };
-		934BEC9A10DFFA9700178102 /* updatea.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = updatea.js; sourceTree = "<group>"; };
-		934BEC9B10DFFA9700178102 /* where1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = where1.js; sourceTree = "<group>"; };
-		934BEC9C10DFFA9700178102 /* where2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = where2.js; sourceTree = "<group>"; };
-		934BEE8C10E050A500178102 /* allocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = allocator.h; sourceTree = "<group>"; };
-		934BEE8D10E050A500178102 /* assert_util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = assert_util.cpp; sourceTree = "<group>"; };
-		934BEE8E10E050A500178102 /* assert_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = assert_util.h; sourceTree = "<group>"; };
-		934BEE8F10E050A500178102 /* base64.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = base64.cpp; sourceTree = "<group>"; };
-		934BEE9010E050A500178102 /* base64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = base64.h; sourceTree = "<group>"; };
-		934BEE9110E050A500178102 /* debug_util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = debug_util.cpp; sourceTree = "<group>"; };
-		934BEE9210E050A500178102 /* debug_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = debug_util.h; sourceTree = "<group>"; };
-		934BEE9310E050A500178102 /* embedded_builder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = embedded_builder.h; sourceTree = "<group>"; };
-		934BEE9410E050A500178102 /* httpclient.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = httpclient.cpp; sourceTree = "<group>"; };
-		934BEE9510E050A500178102 /* httpclient.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = httpclient.h; sourceTree = "<group>"; };
-		934BEE9610E050A500178102 /* md5main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = md5main.cpp; sourceTree = "<group>"; };
-		934BEE9710E050A500178102 /* message_server.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = message_server.h; sourceTree = "<group>"; };
-		934BEE9810E050A500178102 /* message_server_asio.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = message_server_asio.cpp; sourceTree = "<group>"; };
-		934BEE9910E050A500178102 /* mvar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mvar.h; sourceTree = "<group>"; };
-		934BEE9A10E050A500178102 /* ntservice.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ntservice.cpp; sourceTree = "<group>"; };
-		934BEE9B10E050A500178102 /* ntservice.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ntservice.h; sourceTree = "<group>"; };
-		934BEE9C10E050A500178102 /* processinfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = processinfo.h; sourceTree = "<group>"; };
-		934BEE9D10E050A500178102 /* processinfo_darwin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = processinfo_darwin.cpp; sourceTree = "<group>"; };
-		934BEE9E10E050A500178102 /* processinfo_linux2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = processinfo_linux2.cpp; sourceTree = "<group>"; };
-		934BEE9F10E050A500178102 /* processinfo_none.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = processinfo_none.cpp; sourceTree = "<group>"; };
-		934BEEA010E050A500178102 /* queue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = queue.h; sourceTree = "<group>"; };
-		934CE9C911EBB73E00EB6ADC /* repair4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repair4.js; sourceTree = "<group>"; };
-		934CEBDC11EFBFE300EB6ADC /* clonedatabase.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = clonedatabase.js; sourceTree = "<group>"; };
-		934DD87C0EFAD23B00459CC1 /* background.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = background.cpp; sourceTree = "<group>"; };
-		934DD87D0EFAD23B00459CC1 /* background.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = background.h; sourceTree = "<group>"; };
-		934DD87F0EFAD23B00459CC1 /* builder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builder.h; sourceTree = "<group>"; };
-		934DD8800EFAD23B00459CC1 /* goodies.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = goodies.h; sourceTree = "<group>"; };
-		934DD8810EFAD23B00459CC1 /* hashtab.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hashtab.h; sourceTree = "<group>"; };
-		934DD8820EFAD23B00459CC1 /* log.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = log.h; sourceTree = "<group>"; };
-		934DD8830EFAD23B00459CC1 /* lruishmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lruishmap.h; sourceTree = "<group>"; };
-		934DD8840EFAD23B00459CC1 /* miniwebserver.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = miniwebserver.cpp; sourceTree = "<group>"; };
-		934DD8850EFAD23B00459CC1 /* miniwebserver.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = miniwebserver.h; sourceTree = "<group>"; };
-		934DD8870EFAD23B00459CC1 /* mmap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mmap.cpp; sourceTree = "<group>"; };
-		934DD8880EFAD23B00459CC1 /* mmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mmap.h; sourceTree = "<group>"; };
-		934DD88A0EFAD23B00459CC1 /* sock.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sock.cpp; sourceTree = "<group>"; };
-		934DD88B0EFAD23B00459CC1 /* sock.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sock.h; sourceTree = "<group>"; };
-		934DD88D0EFAD23B00459CC1 /* unittest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = unittest.h; sourceTree = "<group>"; };
-		934DD88E0EFAD23B00459CC1 /* util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = util.cpp; sourceTree = "<group>"; };
-		935C941B1106709800439EB1 /* preallocate.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = preallocate.js; sourceTree = "<group>"; };
-		9368FCB11186C71E00283DC8 /* bson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bson.h; sourceTree = "<group>"; };
-		9368FCB21186C71E00283DC8 /* bson_db.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bson_db.h; sourceTree = "<group>"; };
-		9368FCB41186C71E00283DC8 /* bsondemo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bsondemo.cpp; sourceTree = "<group>"; };
-		9368FCB51186C71E00283DC8 /* bsondemo.vcproj */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = bsondemo.vcproj; sourceTree = "<group>"; };
-		9368FCB61186C71E00283DC8 /* bsonelement.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bsonelement.h; sourceTree = "<group>"; };
-		9368FCB71186C71E00283DC8 /* bsoninlines.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bsoninlines.h; sourceTree = "<group>"; };
-		9368FCB81186C71E00283DC8 /* bsonmisc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bsonmisc.h; sourceTree = "<group>"; };
-		9368FCB91186C71E00283DC8 /* bsonobj.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bsonobj.h; sourceTree = "<group>"; };
-		9368FCBA1186C71E00283DC8 /* bsonobjbuilder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bsonobjbuilder.h; sourceTree = "<group>"; };
-		9368FCBB1186C71E00283DC8 /* bsonobjiterator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bsonobjiterator.h; sourceTree = "<group>"; };
-		9368FCBC1186C71E00283DC8 /* bsontypes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bsontypes.h; sourceTree = "<group>"; };
-		9368FCBD1186C71E00283DC8 /* oid.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = oid.h; sourceTree = "<group>"; };
-		9368FCBE1186C71E00283DC8 /* ordering.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ordering.h; sourceTree = "<group>"; };
-		9368FCBF1186C71E00283DC8 /* README */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = README; sourceTree = "<group>"; };
-		9368FCC11186C71E00283DC8 /* atomic_int.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_int.h; sourceTree = "<group>"; };
-		9368FCC21186C71E00283DC8 /* builder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builder.h; sourceTree = "<group>"; };
-		9368FCC31186C71E00283DC8 /* misc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = misc.h; sourceTree = "<group>"; };
-		936B89590F4C899400934AF2 /* file.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = file.h; sourceTree = "<group>"; };
-		936B895A0F4C899400934AF2 /* md5.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = md5.c; sourceTree = "<group>"; };
-		936B895B0F4C899400934AF2 /* md5.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = md5.h; sourceTree = "<group>"; };
-		936B895C0F4C899400934AF2 /* md5.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = md5.hpp; sourceTree = "<group>"; };
-		936B895E0F4C899400934AF2 /* message.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = message.cpp; sourceTree = "<group>"; };
-		936B895F0F4C899400934AF2 /* message.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = message.h; sourceTree = "<group>"; };
-		9378842D11C6C987007E85F5 /* indexh.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexh.js; sourceTree = "<group>"; };
-		937884E811C80B22007E85F5 /* or8.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = or8.js; sourceTree = "<group>"; };
-		937C493311C0358D00836543 /* or7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = or7.js; sourceTree = "<group>"; };
-		937CACE90F27BF4900C57AA6 /* socktests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = socktests.cpp; sourceTree = "<group>"; };
-		937D0E340F28CB070071FFA9 /* repltests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = repltests.cpp; sourceTree = "<group>"; };
-		937D14AB0F2A225F0071FFA9 /* nonce.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nonce.h; sourceTree = "<group>"; };
-		937D14AC0F2A226E0071FFA9 /* nonce.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = nonce.cpp; sourceTree = "<group>"; };
-		938A748A11D140EC005265E1 /* in4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = in4.js; sourceTree = "<group>"; };
-		938A74BF11D17ECE005265E1 /* numberlong.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = numberlong.js; sourceTree = "<group>"; };
-		938A7A420F54871000FB7A07 /* storage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = storage.cpp; sourceTree = "<group>"; };
-		938A7A430F54873600FB7A07 /* concurrency.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = concurrency.h; sourceTree = "<group>"; };
-		938A7A440F54873600FB7A07 /* queryutil.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = queryutil.cpp; sourceTree = "<group>"; };
-		938A7A450F54873600FB7A07 /* queryutil.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = queryutil.h; sourceTree = "<group>"; };
-		938A7A460F54873600FB7A07 /* rec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rec.h; sourceTree = "<group>"; };
-		938A7A470F54873600FB7A07 /* reccache.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reccache.cpp; sourceTree = "<group>"; };
-		938A7A480F54873600FB7A07 /* reccache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reccache.h; sourceTree = "<group>"; };
-		938A7A490F54873600FB7A07 /* reci.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reci.h; sourceTree = "<group>"; };
-		938A7A4A0F54873600FB7A07 /* recstore.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = recstore.h; sourceTree = "<group>"; };
-		938E5EB3110E1ED700A8760A /* repair.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repair.js; sourceTree = "<group>"; };
-		938E60AC110F734800A8760A /* directoryperdb.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = directoryperdb.js; sourceTree = "<group>"; };
-		938E639B110FC66900A8760A /* auth1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = auth1.js; sourceTree = "<group>"; };
-		938E63D0110FC96B00A8760A /* auth2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = auth2.js; sourceTree = "<group>"; };
-		9391C9DD1120F9D300292B19 /* newcollection.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = newcollection.js; sourceTree = "<group>"; };
-		939693B511BEBAAA0069E3E5 /* repair.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repair.js; sourceTree = "<group>"; };
-		93A13A210F4620A500AF1B0D /* commands.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = commands.cpp; sourceTree = "<group>"; };
-		93A13A230F4620A500AF1B0D /* config.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = config.cpp; sourceTree = "<group>"; };
-		93A13A240F4620A500AF1B0D /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = "<group>"; };
-		93A13A270F4620A500AF1B0D /* request.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = request.cpp; sourceTree = "<group>"; };
-		93A13A280F4620A500AF1B0D /* request.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = request.h; sourceTree = "<group>"; };
-		93A13A2A0F4620A500AF1B0D /* server.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = server.cpp; sourceTree = "<group>"; };
-		93A13A2B0F4620A500AF1B0D /* server.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = server.h; sourceTree = "<group>"; };
-		93A13A2D0F4620A500AF1B0D /* shard.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shard.cpp; sourceTree = "<group>"; };
-		93A13A2E0F4620A500AF1B0D /* shard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shard.h; sourceTree = "<group>"; };
-		93A13A300F4620A500AF1B0D /* strategy_single.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = strategy_single.cpp; sourceTree = "<group>"; };
-		93A13A330F4620E500AF1B0D /* dump.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dump.cpp; sourceTree = "<group>"; };
-		93A13A350F4620E500AF1B0D /* export.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = export.cpp; sourceTree = "<group>"; };
-		93A13A370F4620E500AF1B0D /* files.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = files.cpp; sourceTree = "<group>"; };
-		93A13A3B0F4620E500AF1B0D /* restore.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = restore.cpp; sourceTree = "<group>"; };
-		93A13A3D0F4620E500AF1B0D /* sniffer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sniffer.cpp; sourceTree = "<group>"; };
-		93A13A3F0F4620E500AF1B0D /* Tool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Tool.cpp; sourceTree = "<group>"; };
-		93A13A400F4620E500AF1B0D /* Tool.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Tool.h; sourceTree = "<group>"; };
-		93A479F30FAF2A5000E760DD /* engine.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = engine.cpp; sourceTree = "<group>"; };
-		93A479F40FAF2A5000E760DD /* engine.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = engine.h; sourceTree = "<group>"; };
-		93A479F60FAF2A5000E760DD /* engine_java.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = engine_java.cpp; sourceTree = "<group>"; };
-		93A479F70FAF2A5000E760DD /* engine_java.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = engine_java.h; sourceTree = "<group>"; };
-		93A479F90FAF2A5000E760DD /* engine_none.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = engine_none.cpp; sourceTree = "<group>"; };
-		93A479FA0FAF2A5000E760DD /* engine_spidermonkey.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = engine_spidermonkey.cpp; sourceTree = "<group>"; };
-		93A47AA50FAF416F00E760DD /* engine_v8.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = engine_v8.cpp; sourceTree = "<group>"; };
-		93A47AA60FAF41B200E760DD /* engine_v8.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = engine_v8.h; sourceTree = "<group>"; };
-		93A6E10C0F24CF9800DA4EBF /* lasterror.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lasterror.h; sourceTree = "<group>"; };
-		93A6E10D0F24CFB100DA4EBF /* flushtest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = flushtest.cpp; sourceTree = "<group>"; };
-		93A6E10E0F24CFD300DA4EBF /* security.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = security.h; sourceTree = "<group>"; };
-		93A6E10F0F24CFEA00DA4EBF /* security_commands.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = security_commands.cpp; sourceTree = "<group>"; };
-		93A71DB610D06CAD003C9E90 /* mr.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = mr.js; sourceTree = "<group>"; };
-		93A8CD170F33B78D00C92B85 /* mmap_mm.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mmap_mm.cpp; path = util/mmap_mm.cpp; sourceTree = SOURCE_ROOT; };
-		93A8CD180F33B7A000C92B85 /* mmap_posix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mmap_posix.cpp; path = util/mmap_posix.cpp; sourceTree = SOURCE_ROOT; };
-		93A8CD190F33B7AF00C92B85 /* mmap_win.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mmap_win.cpp; path = util/mmap_win.cpp; sourceTree = SOURCE_ROOT; };
-		93AEC57A10E94749005DF720 /* insert.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = insert.js; sourceTree = "<group>"; };
-		93AF75500F216D0300994C66 /* jsontests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jsontests.cpp; sourceTree = "<group>"; };
-		93B4A81A0F1C01B4000C862C /* security.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = security.cpp; sourceTree = "<group>"; };
-		93B4A81B0F1C01D8000C862C /* lasterror.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = lasterror.cpp; sourceTree = "<group>"; };
-		93B4A8290F1C024C000C862C /* cursor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = cursor.cpp; sourceTree = "<group>"; };
-		93B4A82A0F1C0256000C862C /* pdfiletests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pdfiletests.cpp; sourceTree = "<group>"; };
-		93B9F671112B3AD40066ECD2 /* copyauth.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; name = copyauth.js; path = auth/copyauth.js; sourceTree = "<group>"; };
-		93B9F76A112B6C020066ECD2 /* snapshot1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = snapshot1.js; sourceTree = "<group>"; };
-		93B9F76B112B6C1D0066ECD2 /* snapshot2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = snapshot2.js; sourceTree = "<group>"; };
-		93B9F7E6112B98710066ECD2 /* snapshot3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = snapshot3.js; sourceTree = "<group>"; };
-		93B9F91A112C7F200066ECD2 /* set4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = set4.js; sourceTree = "<group>"; };
-		93B9F91B112C7F200066ECD2 /* set5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = set5.js; sourceTree = "<group>"; };
-		93B9F91C112C7F200066ECD2 /* set6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = set6.js; sourceTree = "<group>"; };
-		93B9FA36112CAC3C0066ECD2 /* shellkillop.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = shellkillop.js; sourceTree = "<group>"; };
-		93BC2AE10FB87662006BC285 /* cursortests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = cursortests.cpp; sourceTree = "<group>"; };
-		93BC2AE20FB87662006BC285 /* jstests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jstests.cpp; sourceTree = "<group>"; };
-		93BCE15610F25DFE00FA139B /* arrayfind1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = arrayfind1.js; sourceTree = "<group>"; };
-		93BCE15710F25DFE00FA139B /* dbadmin.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = dbadmin.js; sourceTree = "<group>"; };
-		93BCE15810F25DFE00FA139B /* error4.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = error4.js; sourceTree = "<group>"; };
-		93BCE15910F25DFE00FA139B /* find_and_modify.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = find_and_modify.js; sourceTree = "<group>"; };
-		93BCE15A10F25DFE00FA139B /* fsync.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = fsync.js; sourceTree = "<group>"; };
-		93BCE15B10F25DFE00FA139B /* regex5.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = regex5.js; sourceTree = "<group>"; };
-		93BCE15C10F25DFE00FA139B /* rename3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = rename3.js; sourceTree = "<group>"; };
-		93BCE16010F2642900FA139B /* database.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = database.cpp; path = db/database.cpp; sourceTree = "<group>"; };
-		93BCE16110F2642900FA139B /* dbcommands_admin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = dbcommands_admin.cpp; path = db/dbcommands_admin.cpp; sourceTree = "<group>"; };
-		93BCE1D310F26CDA00FA139B /* fsync2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = fsync2.js; sourceTree = "<group>"; };
-		93BCE35F10F2BD8300FA139B /* clienttests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clienttests.cpp; sourceTree = "<group>"; };
-		93BCE36010F2BD8300FA139B /* framework.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = framework.cpp; sourceTree = "<group>"; };
-		93BCE36110F2BD8300FA139B /* framework.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = framework.h; sourceTree = "<group>"; };
-		93BCE36210F2BD8300FA139B /* threadedtests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = threadedtests.cpp; sourceTree = "<group>"; };
-		93BCE36310F2BD8300FA139B /* updatetests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = updatetests.cpp; sourceTree = "<group>"; };
-		93BCE41810F3AF1B00FA139B /* capped2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = capped2.js; sourceTree = "<group>"; };
-		93BCE4B510F3C8DB00FA139B /* allops.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = allops.js; sourceTree = "<group>"; };
-		93BCE5A510F3F8E900FA139B /* manyclients.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = manyclients.js; sourceTree = "<group>"; };
-		93BCE5A610F3FB5200FA139B /* basicPlus.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basicPlus.js; sourceTree = "<group>"; };
-		93BDCE401157E7280097FE87 /* repl10.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl10.js; sourceTree = "<group>"; };
-		93BDCE411157E7280097FE87 /* repl11.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl11.js; sourceTree = "<group>"; };
-		93BDCE92115817210097FE87 /* pair7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = pair7.js; sourceTree = "<group>"; };
-		93BDCEB9115830CB0097FE87 /* repl.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repl.js; sourceTree = "<group>"; };
-		93BFA0E311330A8C0045D084 /* not2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = not2.js; sourceTree = "<group>"; };
-		93C38E940FA66622007D6E4A /* basictests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = basictests.cpp; sourceTree = "<group>"; };
-		93C529C511D047CF00CF42F7 /* repair2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; name = repair2.js; path = disk/repair2.js; sourceTree = "<group>"; };
-		93C5BC7911E5AE8700F9671C /* in6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = in6.js; sourceTree = "<group>"; };
-		93C5BC9E11E5B7FE00F9671C /* group6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = group6.js; sourceTree = "<group>"; };
-		93C8E6FE11457D9000F28017 /* master1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = master1.js; sourceTree = "<group>"; };
-		93C8E81C1145BCCA00F28017 /* regex7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = regex7.js; sourceTree = "<group>"; };
-		93C8E9DF1146D39700F28017 /* arrayfind2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = arrayfind2.js; sourceTree = "<group>"; };
-		93C8EB4D114721D000F28017 /* copydb2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = copydb2.js; sourceTree = "<group>"; };
-		93C8ECE61147820C00F28017 /* counters.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = counters.cpp; sourceTree = "<group>"; };
-		93C8ECE71147820C00F28017 /* counters.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = counters.h; sourceTree = "<group>"; };
-		93C8ECE91147820C00F28017 /* snapshots.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = snapshots.cpp; sourceTree = "<group>"; };
-		93C8ECEA1147820C00F28017 /* snapshots.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = snapshots.h; sourceTree = "<group>"; };
-		93C8ECEC1147820C00F28017 /* top.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = top.cpp; sourceTree = "<group>"; };
-		93C8ECED1147820C00F28017 /* top.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = top.h; sourceTree = "<group>"; };
-		93C8ED001147824B00F28017 /* thread_pool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = thread_pool.cpp; sourceTree = "<group>"; };
-		93C8ED041147828F00F28017 /* index.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = index.cpp; sourceTree = "<group>"; };
-		93C9236B11D9427B00BA617F /* in1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = in1.js; sourceTree = "<group>"; };
-		93C9236C11D943C500BA617F /* repair3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = repair3.js; sourceTree = "<group>"; };
-		93C9236D11D943CD00BA617F /* preallocate2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = preallocate2.js; sourceTree = "<group>"; };
-		93CC40C2113C407A00734218 /* insert1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = insert1.js; sourceTree = "<group>"; };
-		93CC441A113DE6BA00734218 /* indexg.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = indexg.js; sourceTree = "<group>"; };
-		93CC4484113E602400734218 /* in3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = in3.js; sourceTree = "<group>"; };
-		93D0C1520EF1D377005253B7 /* jsobjtests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = jsobjtests.cpp; sourceTree = "<group>"; };
-		93D0C1FB0EF1E267005253B7 /* namespacetests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = namespacetests.cpp; sourceTree = "<group>"; };
-		93D5A8921117A1380052C931 /* regex6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = regex6.js; sourceTree = "<group>"; };
-		93D5AEC5111905B80010C810 /* import.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = import.cpp; sourceTree = "<group>"; };
-		93D6BBF70F265E1100FE5722 /* matchertests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = matchertests.cpp; sourceTree = "<group>"; };
-		93D6BC9B0F266FC300FE5722 /* querytests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = querytests.cpp; sourceTree = "<group>"; };
-		93DCDBD30F9515AF005349BC /* file_allocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = file_allocator.h; sourceTree = "<group>"; };
-		93E5B88710D7FF730044F9E4 /* mongo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mongo.cpp; sourceTree = "<group>"; };
-		93E5B88810D7FF730044F9E4 /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utils.h; sourceTree = "<group>"; };
-		93E5B88910D7FF890044F9E4 /* engine_spidermonkey.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = engine_spidermonkey.h; sourceTree = "<group>"; };
-		93E5B88A10D7FF890044F9E4 /* v8_db.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = v8_db.cpp; sourceTree = "<group>"; };
-		93E5B88B10D7FF890044F9E4 /* v8_db.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = v8_db.h; sourceTree = "<group>"; };
-		93E5B88C10D7FF890044F9E4 /* v8_utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = v8_utils.cpp; sourceTree = "<group>"; };
-		93E5B88D10D7FF890044F9E4 /* v8_utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = v8_utils.h; sourceTree = "<group>"; };
-		93E5B88E10D7FF890044F9E4 /* v8_wrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = v8_wrapper.cpp; sourceTree = "<group>"; };
-		93E5B88F10D7FF890044F9E4 /* v8_wrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = v8_wrapper.h; sourceTree = "<group>"; };
-		93E6E09F11FDFAAA00EDA451 /* cap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = cap.cpp; sourceTree = "<group>"; };
-		93E6E10A11FE2BBC00EDA451 /* capped6.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = capped6.js; sourceTree = "<group>"; };
-		93E727090F4B5B5B004F9B5D /* shardkey.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shardkey.cpp; sourceTree = "<group>"; };
-		93E7270A0F4B5B5B004F9B5D /* shardkey.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shardkey.h; sourceTree = "<group>"; };
-		93E8A4381173E6480025F7F8 /* or1.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = or1.js; sourceTree = "<group>"; };
-		93E8A4D01174EEAF0025F7F8 /* or2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = or2.js; sourceTree = "<group>"; };
-		93E8A53411752FCE0025F7F8 /* or3.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = or3.js; sourceTree = "<group>"; };
-		93EC34601207628300A95C8A /* capped7.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = capped7.js; sourceTree = "<group>"; };
-		93EC350F1207AEB000A95C8A /* remove9.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = remove9.js; sourceTree = "<group>"; };
-		93F0957010E165E50053380C /* basic.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = basic.js; sourceTree = "<group>"; };
-		93F095CC10E16FF70053380C /* shellfork.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; path = shellfork.js; sourceTree = "<group>"; };
-		C6859E8B029090EE04C91782 /* mongo.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = mongo.1; sourceTree = "<group>"; };
-/* End PBXFileReference section */
-
-/* Begin PBXGroup section */
-		08FB7794FE84155DC02AAC07 /* mongo */ = {
-			isa = PBXGroup;
-			children = (
-				08FB7795FE84155DC02AAC07 /* Source */,
-				C6859E8C029090F304C91782 /* Documentation */,
-				1AB674ADFE9D54B511CA2CBB /* Products */,
-			);
-			name = mongo;
-			sourceTree = "<group>";
-		};
-		08FB7795FE84155DC02AAC07 /* Source */ = {
-			isa = PBXGroup;
-			children = (
-				9368FCB01186C71E00283DC8 /* bson */,
-				93BCE16010F2642900FA139B /* database.cpp */,
-				93BCE16110F2642900FA139B /* dbcommands_admin.cpp */,
-				934BEB9A10DFFA9600178102 /* jstests */,
-				93A479F20FAF2A5000E760DD /* scripting */,
-				932AC4310F4A5E9D005BF8B0 /* SConstruct */,
-				93A13A320F4620E500AF1B0D /* tools */,
-				93A13A200F4620A500AF1B0D /* s */,
-				9302D9920F30AB8C00DFA4EF /* shell */,
-				9342232A0EF16D4F00608550 /* client */,
-				9342238F0EF16DB400608550 /* db */,
-				934223850EF16D7000608550 /* dbtests */,
-				934DD87B0EFAD23B00459CC1 /* util */,
-			);
-			name = Source;
-			sourceTree = "<group>";
-		};
-		1AB674ADFE9D54B511CA2CBB /* Products */ = {
-			isa = PBXGroup;
-			children = (
-			);
-			name = Products;
-			sourceTree = "<group>";
-		};
-		9302D9920F30AB8C00DFA4EF /* shell */ = {
-			isa = PBXGroup;
-			children = (
-				93E5B88710D7FF730044F9E4 /* mongo.cpp */,
-				93E5B88810D7FF730044F9E4 /* utils.h */,
-				93A71DB610D06CAD003C9E90 /* mr.js */,
-				931979810FBC67FB001FE537 /* utils.cpp */,
-				9302D9930F30AB8C00DFA4EF /* collection.js */,
-				9302D9940F30AB8C00DFA4EF /* db.js */,
-				9302D9950F30AB8C00DFA4EF /* dbshell.cpp */,
-				9302D9980F30AB8C00DFA4EF /* mongo.js */,
-				9302D9990F30AB8C00DFA4EF /* mongo.jsall */,
-				9302D99E0F30AB8C00DFA4EF /* query.js */,
-				9302D9A20F30AB8C00DFA4EF /* utils.js */,
-			);
-			path = shell;
-			sourceTree = "<group>";
-		};
-		9303D1B410E1415C00294FAC /* modules */ = {
-			isa = PBXGroup;
-			children = (
-				9303D1B510E1415C00294FAC /* mms.cpp */,
-				9303D1B610E1415C00294FAC /* mms.o */,
-			);
-			path = modules;
-			sourceTree = "<group>";
-		};
-		9307500B114EA14700272A70 /* slow */ = {
-			isa = PBXGroup;
-			children = (
-				93C9236B11D9427B00BA617F /* in1.js */,
-				9307500C114EA14700272A70 /* indexbg1.js */,
-				9307500D114EA14700272A70 /* indexbg2.js */,
-				9307500E114EA14700272A70 /* ns1.js */,
-			);
-			path = slow;
-			sourceTree = "<group>";
-		};
-		933A4D120F55A68600145C4B /* examples */ = {
-			isa = PBXGroup;
-			children = (
-				933A4D130F55A68600145C4B /* authTest.cpp */,
-				933A4D150F55A68600145C4B /* clientTest.cpp */,
-				933A4D170F55A68600145C4B /* first.cpp */,
-				933A4D190F55A68600145C4B /* second.cpp */,
-				933A4D1B0F55A68600145C4B /* tail.cpp */,
-				933A4D1C0F55A68600145C4B /* tutorial.cpp */,
-				933A4D1D0F55A68600145C4B /* whereExample.cpp */,
-			);
-			path = examples;
-			sourceTree = "<group>";
-		};
-		933E22100F4327B2000209E3 /* perf */ = {
-			isa = PBXGroup;
-			children = (
-				933E22110F4327B2000209E3 /* perftest.cpp */,
-				933E22120F4327B2000209E3 /* perftest.o */,
-			);
-			path = perf;
-			sourceTree = "<group>";
-		};
-		9342232A0EF16D4F00608550 /* client */ = {
-			isa = PBXGroup;
-			children = (
-				93278F570F72D32900844664 /* gridfs.cpp */,
-				93278F580F72D32900844664 /* gridfs.h */,
-				933A4D120F55A68600145C4B /* examples */,
-				9342232B0EF16D4F00608550 /* connpool.cpp */,
-				9342232C0EF16D4F00608550 /* connpool.h */,
-				9342232D0EF16D4F00608550 /* dbclient.cpp */,
-				9342232E0EF16D4F00608550 /* dbclient.h */,
-				934223300EF16D4F00608550 /* model.cpp */,
-				934223310EF16D4F00608550 /* model.h */,
-			);
-			path = client;
-			sourceTree = "<group>";
-		};
-		934223850EF16D7000608550 /* dbtests */ = {
-			isa = PBXGroup;
-			children = (
-				93BCE35F10F2BD8300FA139B /* clienttests.cpp */,
-				93BCE36010F2BD8300FA139B /* framework.cpp */,
-				93BCE36110F2BD8300FA139B /* framework.h */,
-				93BCE36210F2BD8300FA139B /* threadedtests.cpp */,
-				93BCE36310F2BD8300FA139B /* updatetests.cpp */,
-				93BC2AE10FB87662006BC285 /* cursortests.cpp */,
-				93BC2AE20FB87662006BC285 /* jstests.cpp */,
-				93C38E940FA66622007D6E4A /* basictests.cpp */,
-				932AC3EB0F4A5B34005BF8B0 /* queryoptimizertests.cpp */,
-				933E22100F4327B2000209E3 /* perf */,
-				937D0E340F28CB070071FFA9 /* repltests.cpp */,
-				937CACE90F27BF4900C57AA6 /* socktests.cpp */,
-				93D6BC9B0F266FC300FE5722 /* querytests.cpp */,
-				93D6BBF70F265E1100FE5722 /* matchertests.cpp */,
-				93AF75500F216D0300994C66 /* jsontests.cpp */,
-				93B4A82A0F1C0256000C862C /* pdfiletests.cpp */,
-				93D0C1FB0EF1E267005253B7 /* namespacetests.cpp */,
-				93D0C1520EF1D377005253B7 /* jsobjtests.cpp */,
-				934223860EF16D7000608550 /* btreetests.cpp */,
-				934223880EF16D7000608550 /* dbtests.cpp */,
-				934223890EF16D7000608550 /* dbtests.h */,
-				9342238C0EF16D7000608550 /* mockdbclient.h */,
-				9342238D0EF16D7000608550 /* pairingtests.cpp */,
-			);
-			path = dbtests;
-			sourceTree = "<group>";
-		};
-		9342238F0EF16DB400608550 /* db */ = {
-			isa = PBXGroup;
-			children = (
-				93E6E09F11FDFAAA00EDA451 /* cap.cpp */,
-				930750A7114EF4B100272A70 /* background.h */,
-				93C8ED041147828F00F28017 /* index.cpp */,
-				93C8ECE51147820C00F28017 /* stats */,
-				9303D1AB10E1415C00294FAC /* client.cpp */,
-				9303D1AC10E1415C00294FAC /* client.h */,
-				9303D1AD10E1415C00294FAC /* cmdline.h */,
-				9303D1AE10E1415C00294FAC /* curop.h */,
-				9303D1AF10E1415C00294FAC /* extsort.cpp */,
-				9303D1B010E1415C00294FAC /* extsort.h */,
-				9303D1B110E1415C00294FAC /* filever.h */,
-				9303D1B210E1415C00294FAC /* module.cpp */,
-				9303D1B310E1415C00294FAC /* module.h */,
-				9303D1B410E1415C00294FAC /* modules */,
-				9303D1B710E1415C00294FAC /* mr.cpp */,
-				9303D1B810E1415C00294FAC /* update.cpp */,
-				9303D1B910E1415C00294FAC /* update.h */,
-				931A027A0F58AA4400147C0E /* jsobjmanipulator.h */,
-				938A7A430F54873600FB7A07 /* concurrency.h */,
-				938A7A440F54873600FB7A07 /* queryutil.cpp */,
-				938A7A450F54873600FB7A07 /* queryutil.h */,
-				938A7A460F54873600FB7A07 /* rec.h */,
-				938A7A470F54873600FB7A07 /* reccache.cpp */,
-				938A7A480F54873600FB7A07 /* reccache.h */,
-				938A7A490F54873600FB7A07 /* reci.h */,
-				938A7A4A0F54873600FB7A07 /* recstore.h */,
-				938A7A420F54871000FB7A07 /* storage.cpp */,
-				93A8CD190F33B7AF00C92B85 /* mmap_win.cpp */,
-				93A8CD180F33B7A000C92B85 /* mmap_posix.cpp */,
-				93A8CD170F33B78D00C92B85 /* mmap_mm.cpp */,
-				937D14AC0F2A226E0071FFA9 /* nonce.cpp */,
-				937D14AB0F2A225F0071FFA9 /* nonce.h */,
-				93A6E10F0F24CFEA00DA4EBF /* security_commands.cpp */,
-				93A6E10E0F24CFD300DA4EBF /* security.h */,
-				93A6E10D0F24CFB100DA4EBF /* flushtest.cpp */,
-				93A6E10C0F24CF9800DA4EBF /* lasterror.h */,
-				93B4A8290F1C024C000C862C /* cursor.cpp */,
-				93B4A81B0F1C01D8000C862C /* lasterror.cpp */,
-				93B4A81A0F1C01B4000C862C /* security.cpp */,
-				934223900EF16DB400608550 /* btree.cpp */,
-				934223910EF16DB400608550 /* btree.h */,
-				934223920EF16DB400608550 /* btreecursor.cpp */,
-				934223930EF16DB400608550 /* clientcursor.cpp */,
-				934223940EF16DB400608550 /* clientcursor.h */,
-				934223950EF16DB400608550 /* cloner.cpp */,
-				934223960EF16DB400608550 /* commands.cpp */,
-				934223970EF16DB400608550 /* commands.h */,
-				934223980EF16DB400608550 /* cursor.h */,
-				934223990EF16DB400608550 /* database.h */,
-				9342239A0EF16DB400608550 /* db.cpp */,
-				9342239B0EF16DB400608550 /* db.h */,
-				9342239F0EF16DB400608550 /* dbcommands.cpp */,
-				934223A00EF16DB400608550 /* dbeval.cpp */,
-				934223A10EF16DB400608550 /* dbhelpers.cpp */,
-				934223A20EF16DB400608550 /* dbhelpers.h */,
-				934223A50EF16DB400608550 /* dbmessage.h */,
-				934223A60EF16DB400608550 /* dbwebserver.cpp */,
-				934223A70EF16DB400608550 /* instance.cpp */,
-				934223A80EF16DB400608550 /* instance.h */,
-				934223A90EF16DB400608550 /* introspect.cpp */,
-				934223AA0EF16DB400608550 /* introspect.h */,
-				934223AD0EF16DB400608550 /* javatest.cpp */,
-				934223AE0EF16DB400608550 /* jsobj.cpp */,
-				934223AF0EF16DB400608550 /* jsobj.h */,
-				934223B00EF16DB400608550 /* json.cpp */,
-				934223B10EF16DB400608550 /* json.h */,
-				934223B70EF16DB400608550 /* matcher.cpp */,
-				934223B80EF16DB400608550 /* matcher.h */,
-				934223B90EF16DB400608550 /* minilex.h */,
-				934223BA0EF16DB400608550 /* namespace.cpp */,
-				934223BB0EF16DB400608550 /* namespace.h */,
-				934223BD0EF16DB400608550 /* pdfile.cpp */,
-				934223BE0EF16DB400608550 /* pdfile.h */,
-				934223BF0EF16DB400608550 /* query.cpp */,
-				934223C00EF16DB400608550 /* query.h */,
-				934223C10EF16DB400608550 /* queryoptimizer.cpp */,
-				934223C20EF16DB400608550 /* queryoptimizer.h */,
-				934223C30EF16DB400608550 /* repl.cpp */,
-				934223C40EF16DB400608550 /* repl.h */,
-				934223C50EF16DB400608550 /* replset.h */,
-				934223C60EF16DB400608550 /* resource.h */,
-				934223C70EF16DB400608550 /* scanandorder.h */,
-				934223C80EF16DB400608550 /* storage.h */,
-				934223C90EF16DB400608550 /* tests.cpp */,
-			);
-			path = db;
-			sourceTree = "<group>";
-		};
-		934BEB9A10DFFA9600178102 /* jstests */ = {
-			isa = PBXGroup;
-			children = (
-				93EC350F1207AEB000A95C8A /* remove9.js */,
-				93EC34601207628300A95C8A /* capped7.js */,
-				93E6E10A11FE2BBC00EDA451 /* capped6.js */,
-				93C5BC9E11E5B7FE00F9671C /* group6.js */,
-				93C5BC7911E5AE8700F9671C /* in6.js */,
-				938A74BF11D17ECE005265E1 /* numberlong.js */,
-				938A748A11D140EC005265E1 /* in4.js */,
-				93C529C511D047CF00CF42F7 /* repair2.js */,
-				937884E811C80B22007E85F5 /* or8.js */,
-				9378842D11C6C987007E85F5 /* indexh.js */,
-				937C493311C0358D00836543 /* or7.js */,
-				932D854611AB912B002749FB /* array_match1.js */,
-				932D854711AB912B002749FB /* capped5.js */,
-				932D854811AB912B002749FB /* datasize2.js */,
-				932D854911AB912B002749FB /* distinct_array1.js */,
-				932D854A11AB912B002749FB /* distinct_speed1.js */,
-				932D854B11AB912B002749FB /* dropIndex.js */,
-				932D854C11AB912B002749FB /* error5.js */,
-				932D854D11AB912B002749FB /* exists2.js */,
-				932D854E11AB912B002749FB /* explain2.js */,
-				932D854F11AB912B002749FB /* extent.js */,
-				932D855011AB912B002749FB /* find_and_modify2.js */,
-				932D855111AB912B002749FB /* find_and_modify3.js */,
-				932D855211AB912B002749FB /* find7.js */,
-				932D855311AB912B002749FB /* fm4.js */,
-				932D855411AB912B002749FB /* geo_box1.js */,
-				93E8A53411752FCE0025F7F8 /* or3.js */,
-				93E8A4D01174EEAF0025F7F8 /* or2.js */,
-				93E8A4381173E6480025F7F8 /* or1.js */,
-				930750A8114EFB9900272A70 /* update_addToSet.js */,
-				930750A9114EFB9900272A70 /* update_arraymatch1.js */,
-				930750AA114EFB9900272A70 /* update_arraymatch2.js */,
-				930750AB114EFB9900272A70 /* updateb.js */,
-				930750AC114EFB9900272A70 /* updatec.js */,
-				93075092114EE1BA00272A70 /* dbhash.js */,
-				9307500B114EA14700272A70 /* slow */,
-				93C8EB4D114721D000F28017 /* copydb2.js */,
-				93C8E9DF1146D39700F28017 /* arrayfind2.js */,
-				93C8E81C1145BCCA00F28017 /* regex7.js */,
-				93CC4484113E602400734218 /* in3.js */,
-				93CC441A113DE6BA00734218 /* indexg.js */,
-				93CC40C2113C407A00734218 /* insert1.js */,
-				93BFA0E311330A8C0045D084 /* not2.js */,
-				93B9FA36112CAC3C0066ECD2 /* shellkillop.js */,
-				93B9F91A112C7F200066ECD2 /* set4.js */,
-				93B9F91B112C7F200066ECD2 /* set5.js */,
-				93B9F91C112C7F200066ECD2 /* set6.js */,
-				93B9F671112B3AD40066ECD2 /* copyauth.js */,
-				93D5A8921117A1380052C931 /* regex6.js */,
-				938E639A110FC66900A8760A /* auth */,
-				93BCE41810F3AF1B00FA139B /* capped2.js */,
-				93BCE1D310F26CDA00FA139B /* fsync2.js */,
-				93BCE15610F25DFE00FA139B /* arrayfind1.js */,
-				93BCE15710F25DFE00FA139B /* dbadmin.js */,
-				93BCE15810F25DFE00FA139B /* error4.js */,
-				93BCE15910F25DFE00FA139B /* find_and_modify.js */,
-				93BCE15A10F25DFE00FA139B /* fsync.js */,
-				93BCE15B10F25DFE00FA139B /* regex5.js */,
-				93BCE15C10F25DFE00FA139B /* rename3.js */,
-				93F0956F10E165E50053380C /* parallel */,
-				934BEB9B10DFFA9600178102 /* _lodeRunner.js */,
-				934BEB9C10DFFA9600178102 /* _runner.js */,
-				934BEB9D10DFFA9600178102 /* _runner_leak.js */,
-				934BEB9E10DFFA9600178102 /* _runner_leak_nojni.js */,
-				934BEB9F10DFFA9600178102 /* _runner_sharding.js */,
-				934BEBA010DFFA9600178102 /* all.js */,
-				934BEBA110DFFA9600178102 /* all2.js */,
-				934BEBA210DFFA9600178102 /* apitest_db.js */,
-				934BEBA310DFFA9600178102 /* apitest_dbcollection.js */,
-				934BEBA410DFFA9600178102 /* array1.js */,
-				934BEBA510DFFA9600178102 /* array3.js */,
-				934BEBA610DFFA9600178102 /* auth1.js */,
-				934BEBA810DFFA9600178102 /* autoid.js */,
-				934BEBA910DFFA9600178102 /* basic1.js */,
-				934BEBAA10DFFA9600178102 /* basic2.js */,
-				934BEBAB10DFFA9600178102 /* basic3.js */,
-				934BEBAC10DFFA9600178102 /* basic4.js */,
-				934BEBAD10DFFA9600178102 /* basic5.js */,
-				934BEBAE10DFFA9600178102 /* basic6.js */,
-				934BEBAF10DFFA9600178102 /* basic7.js */,
-				934BEBB010DFFA9600178102 /* basic8.js */,
-				934BEBB110DFFA9600178102 /* basic9.js */,
-				934BEBB210DFFA9600178102 /* basica.js */,
-				934BEBB310DFFA9600178102 /* basicb.js */,
-				934BEBB410DFFA9600178102 /* capped.js */,
-				934BEBB510DFFA9600178102 /* capped1.js */,
-				934BEBB710DFFA9600178102 /* capped3.js */,
-				934BEBB810DFFA9600178102 /* capped4.js */,
-				934BEBB910DFFA9600178102 /* clone */,
-				934BEBBB10DFFA9600178102 /* copydb.js */,
-				934BEBBC10DFFA9600178102 /* count.js */,
-				934BEBBD10DFFA9600178102 /* count2.js */,
-				934BEBBE10DFFA9600178102 /* count3.js */,
-				934BEBBF10DFFA9600178102 /* count4.js */,
-				934BEBC010DFFA9600178102 /* count5.js */,
-				934BEBC110DFFA9600178102 /* cursor1.js */,
-				934BEBC210DFFA9600178102 /* cursor2.js */,
-				934BEBC310DFFA9600178102 /* cursor3.js */,
-				934BEBC410DFFA9600178102 /* cursor4.js */,
-				934BEBC510DFFA9600178102 /* cursor5.js */,
-				934BEBC610DFFA9600178102 /* cursor6.js */,
-				934BEBC710DFFA9600178102 /* cursor7.js */,
-				934BEBC810DFFA9600178102 /* cursor8.js */,
-				934BEBC910DFFA9600178102 /* datasize.js */,
-				934BEBCA10DFFA9600178102 /* date1.js */,
-				934BEBCB10DFFA9600178102 /* dbref1.js */,
-				934BEBCC10DFFA9600178102 /* dbref2.js */,
-				934BEBCD10DFFA9600178102 /* disk */,
-				934BEBD110DFFA9600178102 /* distinct1.js */,
-				934BEBD210DFFA9600178102 /* distinct2.js */,
-				934BEBD310DFFA9600178102 /* drop.js */,
-				934BEBD410DFFA9600178102 /* error1.js */,
-				934BEBD510DFFA9600178102 /* error2.js */,
-				934BEBD610DFFA9600178102 /* error3.js */,
-				934BEBD710DFFA9600178102 /* eval0.js */,
-				934BEBD810DFFA9600178102 /* eval1.js */,
-				934BEBD910DFFA9600178102 /* eval2.js */,
-				934BEBDA10DFFA9600178102 /* eval3.js */,
-				934BEBDB10DFFA9600178102 /* eval4.js */,
-				934BEBDC10DFFA9600178102 /* eval5.js */,
-				934BEBDD10DFFA9600178102 /* eval6.js */,
-				934BEBDE10DFFA9600178102 /* eval7.js */,
-				934BEBDF10DFFA9600178102 /* eval8.js */,
-				934BEBE010DFFA9600178102 /* eval9.js */,
-				934BEBE110DFFA9600178102 /* evala.js */,
-				934BEBE210DFFA9600178102 /* evalb.js */,
-				934BEBE310DFFA9600178102 /* exists.js */,
-				934BEBE410DFFA9600178102 /* explain1.js */,
-				934BEBE510DFFA9600178102 /* find1.js */,
-				934BEBE610DFFA9600178102 /* find2.js */,
-				934BEBE710DFFA9600178102 /* find3.js */,
-				934BEBE810DFFA9600178102 /* find4.js */,
-				934BEBE910DFFA9600178102 /* find5.js */,
-				934BEBEA10DFFA9600178102 /* find6.js */,
-				934BEBEB10DFFA9600178102 /* fm1.js */,
-				934BEBEC10DFFA9600178102 /* fm2.js */,
-				934BEBED10DFFA9600178102 /* fm3.js */,
-				934BEBEE10DFFA9600178102 /* group1.js */,
-				934BEBEF10DFFA9600178102 /* group2.js */,
-				934BEBF010DFFA9600178102 /* group3.js */,
-				934BEBF110DFFA9600178102 /* group4.js */,
-				934BEBF210DFFA9600178102 /* group5.js */,
-				934BEBF310DFFA9600178102 /* hint1.js */,
-				934BEBF410DFFA9600178102 /* id1.js */,
-				934BEBF510DFFA9600178102 /* in.js */,
-				934BEBF610DFFA9600178102 /* in2.js */,
-				934BEBF710DFFA9600178102 /* inc1.js */,
-				934BEBF810DFFA9600178102 /* inc2.js */,
-				934BEBF910DFFA9600178102 /* inc3.js */,
-				934BEBFA10DFFA9600178102 /* index1.js */,
-				934BEBFB10DFFA9600178102 /* index10.js */,
-				934BEBFC10DFFA9600178102 /* index2.js */,
-				934BEBFD10DFFA9600178102 /* index3.js */,
-				934BEBFE10DFFA9600178102 /* index4.js */,
-				934BEBFF10DFFA9600178102 /* index5.js */,
-				934BEC0010DFFA9600178102 /* index6.js */,
-				934BEC0110DFFA9600178102 /* index7.js */,
-				934BEC0210DFFA9600178102 /* index8.js */,
-				934BEC0310DFFA9600178102 /* index9.js */,
-				934BEC0410DFFA9600178102 /* index_check1.js */,
-				934BEC0510DFFA9600178102 /* index_check2.js */,
-				934BEC0610DFFA9600178102 /* index_check3.js */,
-				934BEC0710DFFA9600178102 /* index_check5.js */,
-				934BEC0810DFFA9600178102 /* index_check6.js */,
-				934BEC0910DFFA9600178102 /* index_check7.js */,
-				934BEC0A10DFFA9600178102 /* index_many.js */,
-				934BEC0B10DFFA9600178102 /* indexa.js */,
-				934BEC0C10DFFA9600178102 /* indexapi.js */,
-				934BEC0D10DFFA9600178102 /* indexb.js */,
-				934BEC0E10DFFA9600178102 /* indexc.js */,
-				934BEC0F10DFFA9600178102 /* indexd.js */,
-				934BEC1010DFFA9600178102 /* indexe.js */,
-				934BEC1110DFFA9600178102 /* jni1.js */,
-				934BEC1210DFFA9600178102 /* jni2.js */,
-				934BEC1310DFFA9600178102 /* jni3.js */,
-				934BEC1410DFFA9600178102 /* jni4.js */,
-				934BEC1510DFFA9600178102 /* jni5.js */,
-				934BEC1610DFFA9600178102 /* jni7.js */,
-				934BEC1710DFFA9600178102 /* jni8.js */,
-				934BEC1810DFFA9600178102 /* jni9.js */,
-				934BEC1910DFFA9600178102 /* json1.js */,
-				934BEC1A10DFFA9600178102 /* map1.js */,
-				934BEC1B10DFFA9600178102 /* median.js */,
-				934BEC1C10DFFA9600178102 /* minmax.js */,
-				934BEC1D10DFFA9600178102 /* mod1.js */,
-				934BEC1E10DFFA9600178102 /* mr1.js */,
-				934BEC1F10DFFA9600178102 /* mr2.js */,
-				934BEC2010DFFA9600178102 /* mr3.js */,
-				934BEC2110DFFA9600178102 /* mr4.js */,
-				934BEC2210DFFA9600178102 /* mr5.js */,
-				934BEC2310DFFA9600178102 /* multi.js */,
-				934BEC2410DFFA9600178102 /* multi2.js */,
-				934BEC2510DFFA9600178102 /* ne1.js */,
-				934BEC2610DFFA9600178102 /* nin.js */,
-				934BEC2710DFFA9600178102 /* not1.js */,
-				934BEC2810DFFA9600178102 /* null.js */,
-				934BEC2910DFFA9600178102 /* objid1.js */,
-				934BEC2A10DFFA9600178102 /* objid2.js */,
-				934BEC2B10DFFA9600178102 /* objid3.js */,
-				934BEC2C10DFFA9600178102 /* objid4.js */,
-				934BEC2D10DFFA9600178102 /* objid5.js */,
-				934BEC2E10DFFA9600178102 /* perf */,
-				934BEC3210DFFA9600178102 /* profile1.js */,
-				934BEC3310DFFA9600178102 /* pull.js */,
-				934BEC3410DFFA9600178102 /* pull2.js */,
-				934BEC3510DFFA9600178102 /* pullall.js */,
-				934BEC3610DFFA9600178102 /* push.js */,
-				934BEC3710DFFA9600178102 /* pushall.js */,
-				934BEC3810DFFA9600178102 /* query1.js */,
-				934BEC3910DFFA9600178102 /* queryoptimizer1.js */,
-				934BEC3A10DFFA9600178102 /* quota */,
-				934BEC3C10DFFA9600178102 /* recstore.js */,
-				934BEC3D10DFFA9600178102 /* ref.js */,
-				934BEC3E10DFFA9600178102 /* ref2.js */,
-				934BEC3F10DFFA9600178102 /* ref3.js */,
-				934BEC4010DFFA9600178102 /* ref4.js */,
-				934BEC4110DFFA9600178102 /* regex.js */,
-				934BEC4210DFFA9600178102 /* regex2.js */,
-				934BEC4310DFFA9600178102 /* regex3.js */,
-				934BEC4410DFFA9600178102 /* regex4.js */,
-				934BEC4510DFFA9600178102 /* remove.js */,
-				934BEC4610DFFA9600178102 /* remove2.js */,
-				934BEC4710DFFA9600178102 /* remove3.js */,
-				934BEC4810DFFA9600178102 /* remove4.js */,
-				934BEC4910DFFA9600178102 /* remove5.js */,
-				934BEC4A10DFFA9600178102 /* remove6.js */,
-				934BEC4B10DFFA9600178102 /* remove7.js */,
-				934BEC4C10DFFA9600178102 /* remove8.js */,
-				934BEC4D10DFFA9600178102 /* rename.js */,
-				934BEC4E10DFFA9600178102 /* rename2.js */,
-				934BEC4F10DFFA9600178102 /* repair.js */,
-				934BEC5010DFFA9600178102 /* repl */,
-				934BEC6310DFFA9700178102 /* set1.js */,
-				934BEC6410DFFA9700178102 /* set2.js */,
-				934BEC6510DFFA9700178102 /* set3.js */,
-				934BEC6610DFFA9700178102 /* sharding */,
-				934BEC7C10DFFA9700178102 /* shellspawn.js */,
-				934BEC7D10DFFA9700178102 /* sort1.js */,
-				934BEC7E10DFFA9700178102 /* sort2.js */,
-				934BEC7F10DFFA9700178102 /* sort3.js */,
-				934BEC8010DFFA9700178102 /* sort4.js */,
-				934BEC8110DFFA9700178102 /* sort5.js */,
-				934BEC8210DFFA9700178102 /* sort_numeric.js */,
-				934BEC8310DFFA9700178102 /* stats.js */,
-				934BEC8410DFFA9700178102 /* storefunc.js */,
-				934BEC8510DFFA9700178102 /* sub1.js */,
-				934BEC8610DFFA9700178102 /* tool */,
-				934BEC8D10DFFA9700178102 /* type1.js */,
-				934BEC8E10DFFA9700178102 /* unique2.js */,
-				934BEC8F10DFFA9700178102 /* uniqueness.js */,
-				934BEC9010DFFA9700178102 /* unset.js */,
-				934BEC9110DFFA9700178102 /* update.js */,
-				934BEC9210DFFA9700178102 /* update2.js */,
-				934BEC9310DFFA9700178102 /* update3.js */,
-				934BEC9410DFFA9700178102 /* update4.js */,
-				934BEC9510DFFA9700178102 /* update5.js */,
-				934BEC9610DFFA9700178102 /* update6.js */,
-				934BEC9710DFFA9700178102 /* update7.js */,
-				934BEC9810DFFA9700178102 /* update8.js */,
-				934BEC9910DFFA9700178102 /* update9.js */,
-				934BEC9A10DFFA9700178102 /* updatea.js */,
-				934BEC9B10DFFA9700178102 /* where1.js */,
-				934BEC9C10DFFA9700178102 /* where2.js */,
-			);
-			path = jstests;
-			sourceTree = "<group>";
-		};
-		934BEBB910DFFA9600178102 /* clone */ = {
-			isa = PBXGroup;
-			children = (
-				934CEBDC11EFBFE300EB6ADC /* clonedatabase.js */,
-				934BEBBA10DFFA9600178102 /* clonecollection.js */,
-			);
-			path = clone;
-			sourceTree = "<group>";
-		};
-		934BEBCD10DFFA9600178102 /* disk */ = {
-			isa = PBXGroup;
-			children = (
-				934CE9C911EBB73E00EB6ADC /* repair4.js */,
-				93C9236D11D943CD00BA617F /* preallocate2.js */,
-				93C9236C11D943C500BA617F /* repair3.js */,
-				9391C9DD1120F9D300292B19 /* newcollection.js */,
-				938E60AC110F734800A8760A /* directoryperdb.js */,
-				938E5EB3110E1ED700A8760A /* repair.js */,
-				935C941B1106709800439EB1 /* preallocate.js */,
-				934BEBCE10DFFA9600178102 /* dbNoCreate.js */,
-				934BEBCF10DFFA9600178102 /* diskfull.js */,
-				934BEBD010DFFA9600178102 /* norepeat.js */,
-			);
-			path = disk;
-			sourceTree = "<group>";
-		};
-		934BEC2E10DFFA9600178102 /* perf */ = {
-			isa = PBXGroup;
-			children = (
-				934BEC2F10DFFA9600178102 /* find1.js */,
-				934BEC3010DFFA9600178102 /* index1.js */,
-				934BEC3110DFFA9600178102 /* remove1.js */,
-			);
-			path = perf;
-			sourceTree = "<group>";
-		};
-		934BEC3A10DFFA9600178102 /* quota */ = {
-			isa = PBXGroup;
-			children = (
-				934BEC3B10DFFA9600178102 /* quota1.js */,
-			);
-			path = quota;
-			sourceTree = "<group>";
-		};
-		934BEC5010DFFA9600178102 /* repl */ = {
-			isa = PBXGroup;
-			children = (
-				939693B511BEBAAA0069E3E5 /* repair.js */,
-				93BDCE92115817210097FE87 /* pair7.js */,
-				93BDCE401157E7280097FE87 /* repl10.js */,
-				93BDCE411157E7280097FE87 /* repl11.js */,
-				93C8E6FE11457D9000F28017 /* master1.js */,
-				93B9F7E6112B98710066ECD2 /* snapshot3.js */,
-				93B9F76B112B6C1D0066ECD2 /* snapshot2.js */,
-				93B9F76A112B6C020066ECD2 /* snapshot1.js */,
-				934BEC5110DFFA9600178102 /* basic1.js */,
-				934BEC5210DFFA9600178102 /* pair1.js */,
-				934BEC5310DFFA9600178102 /* pair2.js */,
-				934BEC5410DFFA9600178102 /* pair3.js */,
-				934BEC5510DFFA9600178102 /* pair4.js */,
-				934BEC5610DFFA9600178102 /* pair5.js */,
-				934BEC5710DFFA9600178102 /* pair6.js */,
-				934BEC5810DFFA9600178102 /* repl1.js */,
-				934BEC5910DFFA9600178102 /* repl2.js */,
-				934BEC5A10DFFA9600178102 /* repl3.js */,
-				934BEC5B10DFFA9600178102 /* repl4.js */,
-				934BEC5C10DFFA9600178102 /* repl5.js */,
-				934BEC5D10DFFA9600178102 /* repl6.js */,
-				934BEC5E10DFFA9600178102 /* repl7.js */,
-				934BEC5F10DFFA9600178102 /* repl8.js */,
-				934BEC6010DFFA9600178102 /* repl9.js */,
-				934BEC6110DFFA9600178102 /* replacePeer1.js */,
-				934BEC6210DFFA9700178102 /* replacePeer2.js */,
-			);
-			path = repl;
-			sourceTree = "<group>";
-		};
-		934BEC6610DFFA9700178102 /* sharding */ = {
-			isa = PBXGroup;
-			children = (
-				934BEC6710DFFA9700178102 /* auto1.js */,
-				934BEC6810DFFA9700178102 /* auto2.js */,
-				934BEC6910DFFA9700178102 /* count1.js */,
-				934BEC6A10DFFA9700178102 /* diffservers1.js */,
-				934BEC6B10DFFA9700178102 /* error1.js */,
-				934BEC6C10DFFA9700178102 /* features1.js */,
-				934BEC6D10DFFA9700178102 /* features2.js */,
-				934BEC6E10DFFA9700178102 /* key_many.js */,
-				934BEC6F10DFFA9700178102 /* key_string.js */,
-				934BEC7010DFFA9700178102 /* movePrimary1.js */,
-				934BEC7110DFFA9700178102 /* moveshard1.js */,
-				934BEC7210DFFA9700178102 /* passthrough1.js */,
-				934BEC7310DFFA9700178102 /* shard1.js */,
-				934BEC7410DFFA9700178102 /* shard2.js */,
-				934BEC7510DFFA9700178102 /* shard3.js */,
-				934BEC7610DFFA9700178102 /* shard4.js */,
-				934BEC7710DFFA9700178102 /* shard5.js */,
-				934BEC7810DFFA9700178102 /* shard6.js */,
-				934BEC7910DFFA9700178102 /* splitpick.js */,
-				934BEC7A10DFFA9700178102 /* version1.js */,
-				934BEC7B10DFFA9700178102 /* version2.js */,
-			);
-			path = sharding;
-			sourceTree = "<group>";
-		};
-		934BEC8610DFFA9700178102 /* tool */ = {
-			isa = PBXGroup;
-			children = (
-				934BEC8710DFFA9700178102 /* csv1.js */,
-				934BEC8810DFFA9700178102 /* dumprestore1.js */,
-				934BEC8910DFFA9700178102 /* dumprestore2.js */,
-				934BEC8A10DFFA9700178102 /* exportimport1.js */,
-				934BEC8B10DFFA9700178102 /* exportimport2.js */,
-				934BEC8C10DFFA9700178102 /* tool1.js */,
-			);
-			path = tool;
-			sourceTree = "<group>";
-		};
-		934DD87B0EFAD23B00459CC1 /* util */ = {
-			isa = PBXGroup;
-			children = (
-				93C8ED001147824B00F28017 /* thread_pool.cpp */,
-				934BEE8C10E050A500178102 /* allocator.h */,
-				934BEE8D10E050A500178102 /* assert_util.cpp */,
-				934BEE8E10E050A500178102 /* assert_util.h */,
-				934BEE8F10E050A500178102 /* base64.cpp */,
-				934BEE9010E050A500178102 /* base64.h */,
-				934BEE9110E050A500178102 /* debug_util.cpp */,
-				934BEE9210E050A500178102 /* debug_util.h */,
-				934BEE9310E050A500178102 /* embedded_builder.h */,
-				934BEE9410E050A500178102 /* httpclient.cpp */,
-				934BEE9510E050A500178102 /* httpclient.h */,
-				934BEE9610E050A500178102 /* md5main.cpp */,
-				934BEE9710E050A500178102 /* message_server.h */,
-				934BEE9810E050A500178102 /* message_server_asio.cpp */,
-				934BEE9910E050A500178102 /* mvar.h */,
-				934BEE9A10E050A500178102 /* ntservice.cpp */,
-				934BEE9B10E050A500178102 /* ntservice.h */,
-				934BEE9C10E050A500178102 /* processinfo.h */,
-				934BEE9D10E050A500178102 /* processinfo_darwin.cpp */,
-				934BEE9E10E050A500178102 /* processinfo_linux2.cpp */,
-				934BEE9F10E050A500178102 /* processinfo_none.cpp */,
-				934BEEA010E050A500178102 /* queue.h */,
-				930B844D0FA10D1C00F22B4B /* optime.h */,
-				93DCDBD30F9515AF005349BC /* file_allocator.h */,
-				931184DC0F83C95800A6DC44 /* message_server_port.cpp */,
-				936B89590F4C899400934AF2 /* file.h */,
-				936B895A0F4C899400934AF2 /* md5.c */,
-				936B895B0F4C899400934AF2 /* md5.h */,
-				936B895C0F4C899400934AF2 /* md5.hpp */,
-				936B895E0F4C899400934AF2 /* message.cpp */,
-				936B895F0F4C899400934AF2 /* message.h */,
-				934DD87C0EFAD23B00459CC1 /* background.cpp */,
-				934DD87D0EFAD23B00459CC1 /* background.h */,
-				934DD87F0EFAD23B00459CC1 /* builder.h */,
-				934DD8800EFAD23B00459CC1 /* goodies.h */,
-				934DD8810EFAD23B00459CC1 /* hashtab.h */,
-				934DD8820EFAD23B00459CC1 /* log.h */,
-				934DD8830EFAD23B00459CC1 /* lruishmap.h */,
-				934DD8840EFAD23B00459CC1 /* miniwebserver.cpp */,
-				934DD8850EFAD23B00459CC1 /* miniwebserver.h */,
-				934DD8870EFAD23B00459CC1 /* mmap.cpp */,
-				934DD8880EFAD23B00459CC1 /* mmap.h */,
-				934DD88A0EFAD23B00459CC1 /* sock.cpp */,
-				934DD88B0EFAD23B00459CC1 /* sock.h */,
-				934DD88D0EFAD23B00459CC1 /* unittest.h */,
-				934DD88E0EFAD23B00459CC1 /* util.cpp */,
-			);
-			path = util;
-			sourceTree = "<group>";
-		};
-		9368FCB01186C71E00283DC8 /* bson */ = {
-			isa = PBXGroup;
-			children = (
-				9368FCB11186C71E00283DC8 /* bson.h */,
-				9368FCB21186C71E00283DC8 /* bson_db.h */,
-				9368FCB31186C71E00283DC8 /* bsondemo */,
-				9368FCB61186C71E00283DC8 /* bsonelement.h */,
-				9368FCB71186C71E00283DC8 /* bsoninlines.h */,
-				9368FCB81186C71E00283DC8 /* bsonmisc.h */,
-				9368FCB91186C71E00283DC8 /* bsonobj.h */,
-				9368FCBA1186C71E00283DC8 /* bsonobjbuilder.h */,
-				9368FCBB1186C71E00283DC8 /* bsonobjiterator.h */,
-				9368FCBC1186C71E00283DC8 /* bsontypes.h */,
-				9368FCBD1186C71E00283DC8 /* oid.h */,
-				9368FCBE1186C71E00283DC8 /* ordering.h */,
-				9368FCBF1186C71E00283DC8 /* README */,
-				9368FCC01186C71E00283DC8 /* util */,
-			);
-			path = bson;
-			sourceTree = "<group>";
-		};
-		9368FCB31186C71E00283DC8 /* bsondemo */ = {
-			isa = PBXGroup;
-			children = (
-				9368FCB41186C71E00283DC8 /* bsondemo.cpp */,
-				9368FCB51186C71E00283DC8 /* bsondemo.vcproj */,
-			);
-			path = bsondemo;
-			sourceTree = "<group>";
-		};
-		9368FCC01186C71E00283DC8 /* util */ = {
-			isa = PBXGroup;
-			children = (
-				9368FCC11186C71E00283DC8 /* atomic_int.h */,
-				9368FCC21186C71E00283DC8 /* builder.h */,
-				9368FCC31186C71E00283DC8 /* misc.h */,
-			);
-			path = util;
-			sourceTree = "<group>";
-		};
-		938E639A110FC66900A8760A /* auth */ = {
-			isa = PBXGroup;
-			children = (
-				938E63D0110FC96B00A8760A /* auth2.js */,
-				938E639B110FC66900A8760A /* auth1.js */,
-			);
-			path = auth;
-			sourceTree = "<group>";
-		};
-		93A13A200F4620A500AF1B0D /* s */ = {
-			isa = PBXGroup;
-			children = (
-				93278F610F72D39400844664 /* cursors.cpp */,
-				93278F620F72D39400844664 /* cursors.h */,
-				93278F630F72D39400844664 /* d_logic.cpp */,
-				93278F640F72D39400844664 /* d_logic.h */,
-				93278F650F72D39400844664 /* strategy.cpp */,
-				93278F660F72D39400844664 /* strategy.h */,
-				93278F670F72D39400844664 /* strategy_shard.cpp */,
-				93E727090F4B5B5B004F9B5D /* shardkey.cpp */,
-				93E7270A0F4B5B5B004F9B5D /* shardkey.h */,
-				93A13A210F4620A500AF1B0D /* commands.cpp */,
-				93A13A230F4620A500AF1B0D /* config.cpp */,
-				93A13A240F4620A500AF1B0D /* config.h */,
-				93A13A270F4620A500AF1B0D /* request.cpp */,
-				93A13A280F4620A500AF1B0D /* request.h */,
-				93A13A2A0F4620A500AF1B0D /* server.cpp */,
-				93A13A2B0F4620A500AF1B0D /* server.h */,
-				93A13A2D0F4620A500AF1B0D /* shard.cpp */,
-				93A13A2E0F4620A500AF1B0D /* shard.h */,
-				93A13A300F4620A500AF1B0D /* strategy_single.cpp */,
-			);
-			path = s;
-			sourceTree = "<group>";
-		};
-		93A13A320F4620E500AF1B0D /* tools */ = {
-			isa = PBXGroup;
-			children = (
-				93D5AEC5111905B80010C810 /* import.cpp */,
-				931186FB0F8535FF00A6DC44 /* bridge.cpp */,
-				93A13A330F4620E500AF1B0D /* dump.cpp */,
-				93A13A350F4620E500AF1B0D /* export.cpp */,
-				93A13A370F4620E500AF1B0D /* files.cpp */,
-				93A13A3B0F4620E500AF1B0D /* restore.cpp */,
-				93A13A3D0F4620E500AF1B0D /* sniffer.cpp */,
-				93A13A3F0F4620E500AF1B0D /* Tool.cpp */,
-				93A13A400F4620E500AF1B0D /* Tool.h */,
-			);
-			path = tools;
-			sourceTree = "<group>";
-		};
-		93A479F20FAF2A5000E760DD /* scripting */ = {
-			isa = PBXGroup;
-			children = (
-				93E5B88910D7FF890044F9E4 /* engine_spidermonkey.h */,
-				93E5B88A10D7FF890044F9E4 /* v8_db.cpp */,
-				93E5B88B10D7FF890044F9E4 /* v8_db.h */,
-				93E5B88C10D7FF890044F9E4 /* v8_utils.cpp */,
-				93E5B88D10D7FF890044F9E4 /* v8_utils.h */,
-				93E5B88E10D7FF890044F9E4 /* v8_wrapper.cpp */,
-				93E5B88F10D7FF890044F9E4 /* v8_wrapper.h */,
-				93A47AA60FAF41B200E760DD /* engine_v8.h */,
-				93A47AA50FAF416F00E760DD /* engine_v8.cpp */,
-				93A479F30FAF2A5000E760DD /* engine.cpp */,
-				93A479F40FAF2A5000E760DD /* engine.h */,
-				93A479F60FAF2A5000E760DD /* engine_java.cpp */,
-				93A479F70FAF2A5000E760DD /* engine_java.h */,
-				93A479F90FAF2A5000E760DD /* engine_none.cpp */,
-				93A479FA0FAF2A5000E760DD /* engine_spidermonkey.cpp */,
-			);
-			path = scripting;
-			sourceTree = "<group>";
-		};
-		93C8ECE51147820C00F28017 /* stats */ = {
-			isa = PBXGroup;
-			children = (
-				93C8ECE61147820C00F28017 /* counters.cpp */,
-				93C8ECE71147820C00F28017 /* counters.h */,
-				93C8ECE91147820C00F28017 /* snapshots.cpp */,
-				93C8ECEA1147820C00F28017 /* snapshots.h */,
-				93C8ECEC1147820C00F28017 /* top.cpp */,
-				93C8ECED1147820C00F28017 /* top.h */,
-			);
-			path = stats;
-			sourceTree = "<group>";
-		};
-		93F0956F10E165E50053380C /* parallel */ = {
-			isa = PBXGroup;
-			children = (
-				93BDCEB9115830CB0097FE87 /* repl.js */,
-				93BCE5A610F3FB5200FA139B /* basicPlus.js */,
-				93BCE5A510F3F8E900FA139B /* manyclients.js */,
-				93BCE4B510F3C8DB00FA139B /* allops.js */,
-				93AEC57A10E94749005DF720 /* insert.js */,
-				93F095CC10E16FF70053380C /* shellfork.js */,
-				93F0957010E165E50053380C /* basic.js */,
-			);
-			path = parallel;
-			sourceTree = "<group>";
-		};
-		C6859E8C029090F304C91782 /* Documentation */ = {
-			isa = PBXGroup;
-			children = (
-				C6859E8B029090EE04C91782 /* mongo.1 */,
-			);
-			name = Documentation;
-			sourceTree = "<group>";
-		};
-/* End PBXGroup section */
-
-/* Begin PBXLegacyTarget section */
-		9302D74A0F2E6E4000DFA4EF /* scons db 64 */ = {
-			isa = PBXLegacyTarget;
-			buildArgumentsString = "--64 -j2";
-			buildConfigurationList = 9302D74B0F2E6E4000DFA4EF /* Build configuration list for PBXLegacyTarget "scons db 64" */;
-			buildPhases = (
-			);
-			buildToolPath = /usr/local/bin/scons;
-			buildWorkingDirectory = "";
-			dependencies = (
-			);
-			name = "scons db 64";
-			passBuildSettingsInEnvironment = 1;
-			productName = "scons db";
-		};
-		9302D74E0F2E6E4400DFA4EF /* scons all 64 */ = {
-			isa = PBXLegacyTarget;
-			buildArgumentsString = "--64 -j2 .";
-			buildConfigurationList = 9302D74F0F2E6E4400DFA4EF /* Build configuration list for PBXLegacyTarget "scons all 64" */;
-			buildPhases = (
-			);
-			buildToolPath = /usr/local/bin/scons;
-			dependencies = (
-			);
-			name = "scons all 64";
-			passBuildSettingsInEnvironment = 1;
-			productName = "scons all";
-		};
-		9302D7520F2E6E4600DFA4EF /* scons test 64 */ = {
-			isa = PBXLegacyTarget;
-			buildArgumentsString = "--64 -j2 test";
-			buildConfigurationList = 9302D7530F2E6E4600DFA4EF /* Build configuration list for PBXLegacyTarget "scons test 64" */;
-			buildPhases = (
-			);
-			buildToolPath = /usr/local/bin/scons;
-			dependencies = (
-			);
-			name = "scons test 64";
-			passBuildSettingsInEnvironment = 1;
-			productName = "scons test";
-		};
-		932F1DCC0F213B06008FA2E7 /* scons db */ = {
-			isa = PBXLegacyTarget;
-			buildArgumentsString = "-j2";
-			buildConfigurationList = 932F1DD40F213B0F008FA2E7 /* Build configuration list for PBXLegacyTarget "scons db" */;
-			buildPhases = (
-			);
-			buildToolPath = /usr/local/bin/scons;
-			buildWorkingDirectory = "";
-			dependencies = (
-			);
-			name = "scons db";
-			passBuildSettingsInEnvironment = 1;
-			productName = "scons db";
-		};
-		934BEB2E10DFED2700178102 /* scons debug test v8 */ = {
-			isa = PBXLegacyTarget;
-			buildArgumentsString = "-j2 --d --usev8 test";
-			buildConfigurationList = 934BEB2F10DFED2700178102 /* Build configuration list for PBXLegacyTarget "scons debug test v8" */;
-			buildPhases = (
-			);
-			buildToolPath = /opt/local/bin/scons;
-			dependencies = (
-			);
-			name = "scons debug test v8";
-			passBuildSettingsInEnvironment = 1;
-			productName = "scons test";
-		};
-		93A47B070FAF46EB00E760DD /* scons debug all */ = {
-			isa = PBXLegacyTarget;
-			buildArgumentsString = "-j2 --d .";
-			buildConfigurationList = 93A47B0F0FAF474A00E760DD /* Build configuration list for PBXLegacyTarget "scons debug all" */;
-			buildPhases = (
-			);
-			buildToolPath = /opt/local/bin/scons;
-			dependencies = (
-			);
-			name = "scons debug all";
-			passBuildSettingsInEnvironment = 1;
-			productName = "scons debug all";
-		};
-		93A8D0700F36AE0200C92B85 /* scons debug test */ = {
-			isa = PBXLegacyTarget;
-			buildArgumentsString = "-j2 --d test";
-			buildConfigurationList = 93A8D0710F36AE0200C92B85 /* Build configuration list for PBXLegacyTarget "scons debug test" */;
-			buildPhases = (
-			);
-			buildToolPath = /opt/local/bin/scons;
-			dependencies = (
-			);
-			name = "scons debug test";
-			passBuildSettingsInEnvironment = 1;
-			productName = "scons test";
-		};
-		93AF75120F213CC500994C66 /* scons all */ = {
-			isa = PBXLegacyTarget;
-			buildArgumentsString = "-j2 .";
-			buildConfigurationList = 93AF75250F213D2500994C66 /* Build configuration list for PBXLegacyTarget "scons all" */;
-			buildPhases = (
-			);
-			buildToolPath = /usr/local/bin/scons;
-			dependencies = (
-			);
-			name = "scons all";
-			passBuildSettingsInEnvironment = 1;
-			productName = "scons all";
-		};
-		93AF75170F213CFB00994C66 /* scons test */ = {
-			isa = PBXLegacyTarget;
-			buildArgumentsString = "-j2 test";
-			buildConfigurationList = 93AF75260F213D2500994C66 /* Build configuration list for PBXLegacyTarget "scons test" */;
-			buildPhases = (
-			);
-			buildToolPath = /usr/local/bin/scons;
-			dependencies = (
-			);
-			name = "scons test";
-			passBuildSettingsInEnvironment = 1;
-			productName = "scons test";
-		};
-/* End PBXLegacyTarget section */
-
-/* Begin PBXProject section */
-		08FB7793FE84155DC02AAC07 /* Project object */ = {
-			isa = PBXProject;
-			buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mongo" */;
-			compatibilityVersion = "Xcode 3.0";
-			hasScannedForEncodings = 1;
-			mainGroup = 08FB7794FE84155DC02AAC07 /* mongo */;
-			projectDirPath = "";
-			projectRoot = "";
-			targets = (
-				932F1DCC0F213B06008FA2E7 /* scons db */,
-				93AF75120F213CC500994C66 /* scons all */,
-				93AF75170F213CFB00994C66 /* scons test */,
-				9302D74A0F2E6E4000DFA4EF /* scons db 64 */,
-				9302D74E0F2E6E4400DFA4EF /* scons all 64 */,
-				9302D7520F2E6E4600DFA4EF /* scons test 64 */,
-				93A8D0700F36AE0200C92B85 /* scons debug test */,
-				93A47B070FAF46EB00E760DD /* scons debug all */,
-				934BEB2E10DFED2700178102 /* scons debug test v8 */,
-			);
-		};
-/* End PBXProject section */
-
-/* Begin XCBuildConfiguration section */
-		1DEB923608733DC60010E9CD /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				GCC_WARN_ABOUT_RETURN_TYPE = YES;
-				GCC_WARN_UNUSED_VARIABLE = YES;
-				PREBINDING = NO;
-				SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.5.sdk";
-			};
-			name = Debug;
-		};
-		1DEB923708733DC60010E9CD /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ARCHS = (
-					ppc,
-					i386,
-				);
-				GCC_WARN_ABOUT_RETURN_TYPE = YES;
-				GCC_WARN_UNUSED_VARIABLE = YES;
-				PREBINDING = NO;
-				SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.5.sdk";
-			};
-			name = Release;
-		};
-		9302D74C0F2E6E4000DFA4EF /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				PRODUCT_NAME = "scons db";
-			};
-			name = Debug;
-		};
-		9302D74D0F2E6E4000DFA4EF /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = YES;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				GCC_ENABLE_FIX_AND_CONTINUE = NO;
-				PRODUCT_NAME = "scons db";
-				ZERO_LINK = NO;
-			};
-			name = Release;
-		};
-		9302D7500F2E6E4400DFA4EF /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				PRODUCT_NAME = "scons all";
-			};
-			name = Debug;
-		};
-		9302D7510F2E6E4400DFA4EF /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = YES;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				GCC_ENABLE_FIX_AND_CONTINUE = NO;
-				PRODUCT_NAME = "scons all";
-				ZERO_LINK = NO;
-			};
-			name = Release;
-		};
-		9302D7540F2E6E4600DFA4EF /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				PRODUCT_NAME = "scons test";
-			};
-			name = Debug;
-		};
-		9302D7550F2E6E4600DFA4EF /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = YES;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				GCC_ENABLE_FIX_AND_CONTINUE = NO;
-				PRODUCT_NAME = "scons test";
-				ZERO_LINK = NO;
-			};
-			name = Release;
-		};
-		932F1DCD0F213B06008FA2E7 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				PRODUCT_NAME = "scons db";
-			};
-			name = Debug;
-		};
-		932F1DCE0F213B06008FA2E7 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = YES;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				GCC_ENABLE_FIX_AND_CONTINUE = NO;
-				PRODUCT_NAME = "scons db";
-				ZERO_LINK = NO;
-			};
-			name = Release;
-		};
-		934BEB3010DFED2700178102 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				PRODUCT_NAME = "scons debug test";
-			};
-			name = Debug;
-		};
-		934BEB3110DFED2700178102 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = YES;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				GCC_ENABLE_FIX_AND_CONTINUE = NO;
-				PRODUCT_NAME = "scons test";
-				ZERO_LINK = NO;
-			};
-			name = Release;
-		};
-		93A47B080FAF46EB00E760DD /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				PRODUCT_NAME = "scons debug all";
-			};
-			name = Debug;
-		};
-		93A47B090FAF46EB00E760DD /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = YES;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				GCC_ENABLE_FIX_AND_CONTINUE = NO;
-				PRODUCT_NAME = "scons debug all";
-				ZERO_LINK = NO;
-			};
-			name = Release;
-		};
-		93A8D0720F36AE0200C92B85 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				PRODUCT_NAME = "scons debug test";
-			};
-			name = Debug;
-		};
-		93A8D0730F36AE0200C92B85 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = YES;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				GCC_ENABLE_FIX_AND_CONTINUE = NO;
-				PRODUCT_NAME = "scons test";
-				ZERO_LINK = NO;
-			};
-			name = Release;
-		};
-		93AF75130F213CC500994C66 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				PRODUCT_NAME = "scons all";
-			};
-			name = Debug;
-		};
-		93AF75140F213CC500994C66 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = YES;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				GCC_ENABLE_FIX_AND_CONTINUE = NO;
-				PRODUCT_NAME = "scons all";
-				ZERO_LINK = NO;
-			};
-			name = Release;
-		};
-		93AF75180F213CFC00994C66 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				PRODUCT_NAME = "scons test";
-			};
-			name = Debug;
-		};
-		93AF75190F213CFC00994C66 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				COPY_PHASE_STRIP = YES;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				GCC_ENABLE_FIX_AND_CONTINUE = NO;
-				PRODUCT_NAME = "scons test";
-				ZERO_LINK = NO;
-			};
-			name = Release;
-		};
-/* End XCBuildConfiguration section */
-
-/* Begin XCConfigurationList section */
-		1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mongo" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				1DEB923608733DC60010E9CD /* Debug */,
-				1DEB923708733DC60010E9CD /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		9302D74B0F2E6E4000DFA4EF /* Build configuration list for PBXLegacyTarget "scons db 64" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				9302D74C0F2E6E4000DFA4EF /* Debug */,
-				9302D74D0F2E6E4000DFA4EF /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		9302D74F0F2E6E4400DFA4EF /* Build configuration list for PBXLegacyTarget "scons all 64" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				9302D7500F2E6E4400DFA4EF /* Debug */,
-				9302D7510F2E6E4400DFA4EF /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		9302D7530F2E6E4600DFA4EF /* Build configuration list for PBXLegacyTarget "scons test 64" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				9302D7540F2E6E4600DFA4EF /* Debug */,
-				9302D7550F2E6E4600DFA4EF /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		932F1DD40F213B0F008FA2E7 /* Build configuration list for PBXLegacyTarget "scons db" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				932F1DCD0F213B06008FA2E7 /* Debug */,
-				932F1DCE0F213B06008FA2E7 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		934BEB2F10DFED2700178102 /* Build configuration list for PBXLegacyTarget "scons debug test v8" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				934BEB3010DFED2700178102 /* Debug */,
-				934BEB3110DFED2700178102 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		93A47B0F0FAF474A00E760DD /* Build configuration list for PBXLegacyTarget "scons debug all" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				93A47B080FAF46EB00E760DD /* Debug */,
-				93A47B090FAF46EB00E760DD /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		93A8D0710F36AE0200C92B85 /* Build configuration list for PBXLegacyTarget "scons debug test" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				93A8D0720F36AE0200C92B85 /* Debug */,
-				93A8D0730F36AE0200C92B85 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		93AF75250F213D2500994C66 /* Build configuration list for PBXLegacyTarget "scons all" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				93AF75130F213CC500994C66 /* Debug */,
-				93AF75140F213CC500994C66 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		93AF75260F213D2500994C66 /* Build configuration list for PBXLegacyTarget "scons test" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				93AF75180F213CFC00994C66 /* Debug */,
-				93AF75190F213CFC00994C66 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-/* End XCConfigurationList section */
-	};
-	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
-}
diff --git a/mongo_astyle b/mongo_astyle
new file mode 100644
index 0000000..fa8b83d
--- /dev/null
+++ b/mongo_astyle
@@ -0,0 +1,16 @@
+--indent=spaces=4
+--convert-tabs
+--lineend=linux
+
+--indent-namespaces
+--indent-preprocessor
+
+
+--keep-one-line-statements
+--keep-one-line-blocks
+
+--break-closing-brackets
+--brackets=attach
+
+
+
diff --git a/pch.cpp b/pch.cpp
index 7202a90..a81ff61 100644
--- a/pch.cpp
+++ b/pch.cpp
@@ -27,4 +27,4 @@
 
 #else
 // unknown compiler
-#endif 
+#endif
diff --git a/pch.h b/pch.h
index 6f6ef41..51bc220 100644
--- a/pch.h
+++ b/pch.h
@@ -1,7 +1,7 @@
-// pch.h : include file for standard system include files,
-// or project specific include files that are used frequently, but
-// are changed infrequently
-//
+/** @file pch.h : include file for standard system include files,
+ *  or project specific include files that are used frequently, but
+ *  are changed infrequently
+ */
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -31,15 +31,16 @@
 # define _CRT_SECURE_NO_WARNINGS
 #endif
 
+// [dm] i am not sure why we need this.
 #if defined(WIN32)
-
-#ifndef _WIN32
-#define _WIN32
-#endif
-
+# ifndef _WIN32
+# define _WIN32
+# endif
 #endif
 
 #if defined(_WIN32)
+// for rand_s() usage:
+# define _CRT_RAND_S
 # ifndef NOMINMAX
 #  define NOMINMAX
 # endif
@@ -69,6 +70,7 @@
 #include "limits.h"
 
 #include <boost/any.hpp>
+#include "boost/thread/once.hpp"
 #include <boost/archive/iterators/transform_width.hpp>
 #include <boost/filesystem/convenience.hpp>
 #include <boost/filesystem/exception.hpp>
@@ -107,16 +109,17 @@ namespace mongo {
     const int VERSION_MINOR = 5;
 
     enum ExitCode {
-        EXIT_CLEAN = 0 , 
-        EXIT_BADOPTIONS = 2 , 
+        EXIT_CLEAN = 0 ,
+        EXIT_BADOPTIONS = 2 ,
         EXIT_REPLICATION_ERROR = 3 ,
         EXIT_NEED_UPGRADE = 4 ,
+        EXIT_SHARDING_ERROR = 5 ,
         EXIT_KILL = 12 ,
-        EXIT_ABRUBT = 14 ,
+        EXIT_ABRUPT = 14 ,
         EXIT_NTSERVICE_ERROR = 20 ,
         EXIT_JAVA = 21 ,
-        EXIT_OOM_MALLOC = 42 , 
-        EXIT_OOM_REALLOC = 43 , 
+        EXIT_OOM_MALLOC = 42 ,
+        EXIT_OOM_REALLOC = 43 ,
         EXIT_FS = 45 ,
         EXIT_CLOCK_SKEW = 47 ,
         EXIT_NET_ERROR = 48 ,
@@ -126,7 +129,7 @@ namespace mongo {
 
     };
 
-    void dbexit( ExitCode returnCode, const char *whyMsg = "");
+    void dbexit( ExitCode returnCode, const char *whyMsg = "", bool tryToGetLock = false);
 
     /**
        this is here so you can't just type exit() to quit the program
@@ -135,10 +138,7 @@ namespace mongo {
      */
     void exit( ExitCode returnCode );
     bool inShutdown();
-    
-} // namespace mongo
 
-namespace mongo {
     using namespace boost::filesystem;
     void asserted(const char *msg, const char *file, unsigned line);
 }
@@ -156,10 +156,6 @@ namespace mongo {
     void sayDbContext(const char *msg = 0);
     void rawOut( const string &s );
 
-} // namespace mongo
-
-namespace mongo {
-
     typedef char _TCHAR;
 
     using boost::uint32_t;
diff --git a/rpm/init.d-mongod b/rpm/init.d-mongod
index 5ee8379..b7d4567 100644
--- a/rpm/init.d-mongod
+++ b/rpm/init.d-mongod
@@ -61,10 +61,11 @@ case "$1" in
     restart
     ;;
   condrestart)
-    [ -f /var/lock/subsys/mongodb ] && restart || :
+    [ -f /var/lock/subsys/mongod ] && restart || :
     ;;
   status)
     status $mongod
+    RETVAL=$?
     ;;
   *)
     echo "Usage: $0 {start|stop|status|restart|reload|force-reload|condrestart}"
diff --git a/rpm/mongo.spec b/rpm/mongo.spec
index 98f4d39..5ef543b 100644
--- a/rpm/mongo.spec
+++ b/rpm/mongo.spec
@@ -1,5 +1,5 @@
 Name: mongo
-Version: 1.6.5
+Version: 1.8.0
 Release: mongodb_1%{?dist}
 Summary: mongo client shell and tools
 License: AGPL 3.0
@@ -105,6 +105,7 @@ fi
 %{_bindir}/mongoimport
 %{_bindir}/mongorestore
 %{_bindir}/mongostat
+%{_bindir}/bsondump
 
 %{_mandir}/man1/mongo.1*
 %{_mandir}/man1/mongod.1*
diff --git a/rpm/mongod.conf b/rpm/mongod.conf
index 99346ef..1530199 100644
--- a/rpm/mongod.conf
+++ b/rpm/mongod.conf
@@ -78,14 +78,3 @@ dbpath=/var/lib/mongo
 # or
 #master = true
 #source = slave.example.com
-
-# Address of a server to pair with.
-#pairwith = <server:port>
-# Address of arbiter server.
-#arbiter = <server:port>
-# Automatically resync if slave data is stale
-#autoresync
-# Custom size for replication operation log.
-#oplogSize = <MB>
-# Size limit for in-memory storage of op ids.
-#opIdMem = <bytes>
diff --git a/s/balance.cpp b/s/balance.cpp
index 33cafdf..ee0c992 100644
--- a/s/balance.cpp
+++ b/s/balance.cpp
@@ -1,4 +1,4 @@
-// balance.cpp
+//@file balance.cpp
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -31,10 +31,10 @@
 #include "grid.h"
 
 namespace mongo {
-    
+
     Balancer balancer;
 
-    Balancer::Balancer() : _balancedLastTime(0), _policy( new BalancerPolicy ){}
+    Balancer::Balancer() : _balancedLastTime(0), _policy( new BalancerPolicy ) {}
 
     Balancer::~Balancer() {
         delete _policy;
@@ -43,15 +43,15 @@ namespace mongo {
     int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks ) {
         int movedCount = 0;
 
-        for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ){
+        for ( vector<CandidateChunkPtr>::const_iterator it = candidateChunks->begin(); it != candidateChunks->end(); ++it ) {
             const CandidateChunk& chunkInfo = *it->get();
 
             DBConfigPtr cfg = grid.getDBConfig( chunkInfo.ns );
             assert( cfg );
-        
+
             ChunkManagerPtr cm = cfg->getChunkManager( chunkInfo.ns );
             assert( cm );
-        
+
             const BSONObj& chunkToMove = chunkInfo.chunk;
             ChunkPtr c = cm->findChunk( chunkToMove["min"].Obj() );
             if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) || c->getMax().woCompare( chunkToMove["max"].Obj() ) ) {
@@ -61,62 +61,65 @@ namespace mongo {
 
                 c = cm->findChunk( chunkToMove["min"].Obj() );
                 if ( c->getMin().woCompare( chunkToMove["min"].Obj() ) || c->getMax().woCompare( chunkToMove["max"].Obj() ) ) {
-                    log() << "chunk mismatch after reload, ignoring will retry issue cm: " 
+                    log() << "chunk mismatch after reload, ignoring will retry issue cm: "
                           << c->getMin() << " min: " << chunkToMove["min"].Obj() << endl;
                     continue;
                 }
             }
-        
-            string errmsg;
-            if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , errmsg ) ){
+
+            BSONObj res;
+            if ( c->moveAndCommit( Shard::make( chunkInfo.to ) , Chunk::MaxChunkSize , res ) ) {
                 movedCount++;
                 continue;
             }
 
-            log() << "MOVE FAILED **** " << errmsg << "\n"
-                  << "           from: " << chunkInfo.from << " to: " << chunkInfo.to << " chunk: " << chunkToMove << endl;
+            // the move requires acquiring the collection metadata's lock, which can fail
+            log() << "balacer move failed: " << res << " from: " << chunkInfo.from << " to: " << chunkInfo.to
+                  << " chunk: " << chunkToMove << endl;
+
+            if ( res["chunkTooBig"].trueValue() ) {
+                // reload just to be safe
+                cm = cfg->getChunkManager( chunkInfo.ns );
+                assert( cm );
+                c = cm->findChunk( chunkToMove["min"].Obj() );
+                
+                log() << "forcing a split because migrate failed for size reasons" << endl;
+                
+                res = BSONObj();
+                c->singleSplit( true , res );
+                log() << "forced split results: " << res << endl;
+
+                // TODO: if the split fails, mark as jumbo SERVER-2571
+            }
         }
 
         return movedCount;
     }
-    
-    void Balancer::_ping(){
-        assert( _myid.size() && _started );
-        try {
-            ScopedDbConnection conn( configServer.getPrimary() );
-            _ping( conn.conn() );
-            conn.done();
-        }
-        catch ( std::exception& e ){
-            log() << "bare ping failed: " << e.what() << endl;
-        }
-        
-    }
 
-    void Balancer::_ping( DBClientBase& conn ){
+    void Balancer::_ping( DBClientBase& conn ) {
         WriteConcern w = conn.getWriteConcern();
         conn.setWriteConcern( W_NONE );
 
-        conn.update( ShardNS::mongos , 
-                      BSON( "_id" << _myid ) , 
-                      BSON( "$set" << BSON( "ping" << DATENOW << "up" << (int)(time(0)-_started) ) ) , 
-                      true );
+        conn.update( ShardNS::mongos ,
+                     BSON( "_id" << _myid ) ,
+                     BSON( "$set" << BSON( "ping" << DATENOW << "up" << (int)(time(0)-_started) ) ) ,
+                     true );
 
         conn.setWriteConcern( w);
     }
-    
-    bool Balancer::_checkOIDs(){
+
+    bool Balancer::_checkOIDs() {
         vector<Shard> all;
         Shard::getAllShards( all );
-        
+
         map<int,Shard> oids;
-        
-        for ( vector<Shard>::iterator i=all.begin(); i!=all.end(); ++i ){
+
+        for ( vector<Shard>::iterator i=all.begin(); i!=all.end(); ++i ) {
             Shard s = *i;
             BSONObj f = s.runCommand( "admin" , "features" );
-            if ( f["oidMachine"].isNumber() ){
+            if ( f["oidMachine"].isNumber() ) {
                 int x = f["oidMachine"].numberInt();
-                if ( oids.count(x) == 0 ){
+                if ( oids.count(x) == 0 ) {
                     oids[x] = s;
                 }
                 else {
@@ -133,7 +136,7 @@ namespace mongo {
         return true;
     }
 
-    void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ){
+    void Balancer::_doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks ) {
         assert( candidateChunks );
 
         //
@@ -143,8 +146,8 @@ namespace mongo {
 
         auto_ptr<DBClientCursor> cursor = conn.query( ShardNS::collection , BSONObj() );
         vector< string > collections;
-        while ( cursor->more() ){
-            BSONObj col = cursor->next();
+        while ( cursor->more() ) {
+            BSONObj col = cursor->nextSafe();
 
             // sharded collections will have a shard "key".
             if ( ! col["key"].eoo() )
@@ -164,7 +167,7 @@ namespace mongo {
         //
         // TODO: skip unresponsive shards and mark information as stale.
         //
- 
+
         vector<Shard> allShards;
         Shard::getAllShards( allShards );
         if ( allShards.size() < 2) {
@@ -172,14 +175,16 @@ namespace mongo {
             return;
         }
 
-        map< string, BSONObj > shardLimitsMap; 
-        for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ){
+        map< string, BSONObj > shardLimitsMap;
+        for ( vector<Shard>::const_iterator it = allShards.begin(); it != allShards.end(); ++it ) {
             const Shard& s = *it;
             ShardStatus status = s.getStatus();
 
-            BSONObj limitsObj = BSON( ShardFields::maxSize( s.getMaxSize() ) << 
-                                      ShardFields::currSize( status.mapped() ) <<
-                                      ShardFields::draining( s.isDraining()) );
+            BSONObj limitsObj = BSON( ShardFields::maxSize( s.getMaxSize() ) <<
+                                      LimitsFields::currSize( status.mapped() ) <<
+                                      ShardFields::draining( s.isDraining() )  <<
+                                      LimitsFields::hasOpsQueued( status.hasOpsQueued() )
+                                    );
 
             shardLimitsMap[ s.getName() ] = limitsObj;
         }
@@ -193,8 +198,8 @@ namespace mongo {
 
             map< string,vector<BSONObj> > shardToChunksMap;
             cursor = conn.query( ShardNS::chunk , QUERY( "ns" << ns ).sort( "min" ) );
-            while ( cursor->more() ){
-                BSONObj chunk = cursor->next();
+            while ( cursor->more() ) {
+                BSONObj chunk = cursor->nextSafe();
                 vector<BSONObj>& chunks = shardToChunksMap[chunk["shard"].String()];
                 chunks.push_back( chunk.getOwned() );
             }
@@ -204,8 +209,8 @@ namespace mongo {
                 log(1) << "skipping empty collection (" << ns << ")";
                 continue;
             }
-                
-            for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ){
+
+            for ( vector<Shard>::iterator i=allShards.begin(); i!=allShards.end(); ++i ) {
                 // this just makes sure there is an entry in shardToChunksMap for every shard
                 Shard s = *i;
                 shardToChunksMap[s.getName()].size();
@@ -216,75 +221,109 @@ namespace mongo {
         }
     }
 
-    void Balancer::run(){
+    bool Balancer::_init() {
+        try {
+
+            log() << "about to contact config servers and shards" << endl;
+
+            // contact the config server and refresh shard information
+            // checks that each shard is indeed a different process (no hostname mixup)
+            // these checks are redundant in that they're redone at every new round but we want to do them initially here
+            // so to catch any problem soon
+            Shard::reloadShardInfo();
+            _checkOIDs();
+
+            log() << "config servers and shards contacted successfully" << endl;
 
-        { // init stuff, don't want to do at static init
             StringBuilder buf;
             buf << getHostNameCached() << ":" << cmdLine.port;
             _myid = buf.str();
-            log(1) << "balancer myid: " << _myid << endl;
-            
             _started = time(0);
 
-            Shard::reloadShardInfo();
+            log() << "balancer id: " << _myid << " started at " << time_t_to_String_short(_started) << endl;
+
+            return true;
+
         }
-        
-        _ping();
-        _checkOIDs();
+        catch ( std::exception& ) {
 
+            log( LL_WARNING ) << "could not initialize balancer, please check that all shards and config servers are up" << endl;
+            return false;
+
+        }
+    }
+
+    void Balancer::run() {
+
+        // this is the body of a BackgroundJob so if we throw here we're basically ending the balancer thread prematurely
+        while ( ! inShutdown() ) {
+
+            if ( ! _init() ) {
+                log() << "will retry to initialize balancer in one minute" << endl;
+                sleepsecs( 60 );
+                continue;
+            }
+
+            break;
+        }
+
+        // getConnectioString and the constructor of a DistributedLock do not throw, which is what we expect on while
+        // on the balancer thread
         ConnectionString config = configServer.getConnectionString();
         DistributedLock balanceLock( config , "balancer" );
 
-        while ( ! inShutdown() ){
-            
+        while ( ! inShutdown() ) {
+
             try {
+                
+                // first make sure we should even be running
+                if ( ! grid.shouldBalance() ) {
+                    log(1) << "skipping balancing round because balancing is disabled" << endl;
+                    sleepsecs( 30 );
+                    continue;
+                }
+                
+
                 ScopedDbConnection conn( config );
 
-                _ping( conn.conn() );                
-                if ( ! _checkOIDs() ){
+                _ping( conn.conn() );
+                if ( ! _checkOIDs() ) {
                     uassert( 13258 , "oids broken after resetting!" , _checkOIDs() );
                 }
-                
+
                 // use fresh shard state
-                Shard::reloadShardInfo(); 
+                Shard::reloadShardInfo();
 
                 dist_lock_try lk( &balanceLock , "doing balance round" );
-                if ( ! lk.got() ){
-                    log(1) << "skipping balancing round during ongoing split or move activity." << endl;
+                if ( ! lk.got() ) {
+                    log(1) << "skipping balancing round because another balancer is active" << endl;
                     conn.done();
 
                     sleepsecs( 30 ); // no need to wake up soon
                     continue;
                 }
-                        
-                if ( ! grid.shouldBalance() ) {
-                    log(1) << "skipping balancing round because balancing is disabled" << endl;;
-                    conn.done();
-
-                    sleepsecs( 30 );
-                    continue;
-                }
 
-                log(1) << "*** start balancing round" << endl;        
+                log(1) << "*** start balancing round" << endl;
 
                 vector<CandidateChunkPtr> candidateChunks;
                 _doBalanceRound( conn.conn() , &candidateChunks );
                 if ( candidateChunks.size() == 0 ) {
                     log(1) << "no need to move any chunk" << endl;
-                } else {
+                }
+                else {
                     _balancedLastTime = _moveChunks( &candidateChunks );
                 }
 
-                log(1) << "*** end of balancing round" << endl;        
+                log(1) << "*** end of balancing round" << endl;
                 conn.done();
 
                 sleepsecs( _balancedLastTime ? 5 : 10 );
             }
-            catch ( std::exception& e ){
+            catch ( std::exception& e ) {
                 log() << "caught exception while doing balance: " << e.what() << endl;
 
                 // Just to match the opening statement if in log level 1
-                log(1) << "*** End of balancing round" << endl;        
+                log(1) << "*** End of balancing round" << endl;
 
                 sleepsecs( 30 ); // sleep a fair amount b/c of error
                 continue;
diff --git a/s/balance.h b/s/balance.h
index cafae11..0ad2647 100644
--- a/s/balance.h
+++ b/s/balance.h
@@ -1,4 +1,4 @@
-// balance.h
+//@file balance.h
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -24,7 +24,16 @@
 #include "balancer_policy.h"
 
 namespace mongo {
-    
+
+    /**
+     * The balancer is a background task that tries to keep the number of chunks across all servers of the cluster even. Although
+     * every mongos will have one balancer running, only one of them will be active at the any given point in time. The balancer
+     * uses a 'DistributedLock' for that coordination.
+     *
+     * The balancer does act continuously but in "rounds". At a given round, it would decide if there is an imbalance by
+     * checking the difference in chunks between the most and least loaded shards. It would issue a request for a chunk
+     * migration per round, if it found so.
+     */
     class Balancer : public BackgroundJob {
     public:
         Balancer();
@@ -34,47 +43,63 @@ namespace mongo {
 
         virtual void run();
 
-        virtual string name() { return "Balancer"; }        
+        virtual string name() const { return "Balancer"; }
 
     private:
         typedef BalancerPolicy::ChunkInfo CandidateChunk;
         typedef shared_ptr<CandidateChunk> CandidateChunkPtr;
 
+        // hostname:port of my mongos
+        string _myid;
+
+        // time the Balancer started running
+        time_t _started;
+
+        // number of moved chunks in last round
+        int _balancedLastTime;
+
+        // decide which chunks to move; owned here.
+        BalancerPolicy* _policy;
+
+        /**
+         * Checks that the balancer can connect to all servers it needs to do its job.
+         *
+         * @return true if balancing can be started
+         *
+         * This method throws on a network exception
+         */
+        bool _init();
+
         /**
-         * Gathers all the necessary information about shards and chunks, and 
-         * decides whether there are candidate chunks to be moved.
+         * Gathers all the necessary information about shards and chunks, and decides whether there are candidate chunks to
+         * be moved.
+         *
+         * @param conn is the connection with the config server(s)
+         * @param candidateChunks (IN/OUT) filled with candidate chunks, one per collection, that could possibly be moved
          */
         void _doBalanceRound( DBClientBase& conn, vector<CandidateChunkPtr>* candidateChunks );
 
         /**
-         * Execute the chunk migrations described in 'candidateChunks' and
-         * returns the number of chunks effectively moved.
+         * Issues chunk migration request, one at a time.
+         *
+         * @param candidateChunks possible chunks to move
+         * @return number of chunks effectively moved
          */
         int _moveChunks( const vector<CandidateChunkPtr>* candidateChunks );
 
         /**
-         * Check the health of the master configuration server
+         * Marks this balancer as being live on the config server(s).
+         *
+         * @param conn is the connection with the config server(s)
          */
-        void _ping();
         void _ping( DBClientBase& conn );
 
         /**
-         * @return true if everything is ok
+         * @return true if all the servers listed in configdb as being shards are reachable and are distinct processes
          */
         bool _checkOIDs();
 
-        // internal state
-
-        string          _myid;             // hostname:port of my mongos
-        time_t          _started;          // time Balancer starte running
-        int             _balancedLastTime; // number of moved chunks in last round
-        BalancerPolicy* _policy;           // decide which chunks to move; owned here.
-
-        // non-copyable, non-assignable
-
-        Balancer(const Balancer&);
-        Balancer operator=(const Balancer&);
     };
-    
+
     extern Balancer balancer;
 }
diff --git a/s/balancer_policy.cpp b/s/balancer_policy.cpp
index 98619c0..2098a1f 100644
--- a/s/balancer_policy.cpp
+++ b/s/balancer_policy.cpp
@@ -28,54 +28,62 @@
 
 namespace mongo {
 
-    BalancerPolicy::ChunkInfo* BalancerPolicy::balance( const string& ns, 
-                                                        const ShardToLimitsMap& shardToLimitsMap,  
-                                                        const ShardToChunksMap& shardToChunksMap, 
-                                                        int balancedLastTime ){
+    // limits map fields
+    BSONField<long long> LimitsFields::currSize( "currSize" );
+    BSONField<bool> LimitsFields::hasOpsQueued( "hasOpsQueued" );
+
+    BalancerPolicy::ChunkInfo* BalancerPolicy::balance( const string& ns,
+            const ShardToLimitsMap& shardToLimitsMap,
+            const ShardToChunksMap& shardToChunksMap,
+            int balancedLastTime ) {
         pair<string,unsigned> min("",numeric_limits<unsigned>::max());
         pair<string,unsigned> max("",0);
         vector<string> drainingShards;
-	        
-        for (ShardToChunksIter i = shardToChunksMap.begin(); i!=shardToChunksMap.end(); ++i ){
 
-            // Find whether this shard has reached its size cap or whether it is being removed.
+        for (ShardToChunksIter i = shardToChunksMap.begin(); i!=shardToChunksMap.end(); ++i ) {
+
+            // Find whether this shard's capacity or availability are exhausted
             const string& shard = i->first;
             BSONObj shardLimits;
             ShardToLimitsIter it = shardToLimitsMap.find( shard );
             if ( it != shardToLimitsMap.end() ) shardLimits = it->second;
             const bool maxedOut = isSizeMaxed( shardLimits );
             const bool draining = isDraining( shardLimits );
+            const bool opsQueued = hasOpsQueued( shardLimits );
 
-            // Check whether this shard is a better chunk receiver then the current one. 
-            // Maxed out shards or draining shards cannot be considered receivers.
+            // Is this shard a better chunk receiver then the current one?
+            // Shards that would be bad receiver candidates:
+            // + maxed out shards
+            // + draining shards
+            // + shards with operations queued for writeback
             const unsigned size = i->second.size();
-            if ( ! maxedOut && ! draining ){
-                if ( size < min.second ){
+            if ( ! maxedOut && ! draining && ! opsQueued ) {
+                if ( size < min.second ) {
                     min = make_pair( shard , size );
                 }
             }
 
             // Check whether this shard is a better chunk donor then the current one.
             // Draining shards take a lower priority than overloaded shards.
-            if ( size > max.second ){
-                max = make_pair( shard , size ); 
+            if ( size > max.second ) {
+                max = make_pair( shard , size );
             }
-            if ( draining && (size > 0)){
+            if ( draining && (size > 0)) {
                 drainingShards.push_back( shard );
             }
         }
 
-        // If there is no candidate chunk receiver -- they may have all been maxed out, 
-        // draining, ... -- there's not much that the policy can do.  
-        if ( min.second == numeric_limits<unsigned>::max() ){
+        // If there is no candidate chunk receiver -- they may have all been maxed out,
+        // draining, ... -- there's not much that the policy can do.
+        if ( min.second == numeric_limits<unsigned>::max() ) {
             log() << "no availalable shards to take chunks" << endl;
             return NULL;
         }
-        
+
         log(1) << "collection : " << ns << endl;
         log(1) << "donor      : " << max.second << " chunks on " << max.first << endl;
         log(1) << "receiver   : " << min.second << " chunks on " << min.first << endl;
-        if ( ! drainingShards.empty() ){
+        if ( ! drainingShards.empty() ) {
             string drainingStr;
             joinStringDelim( drainingShards, &drainingStr, ',' );
             log(1) << "draining           : " << ! drainingShards.empty() << "(" << drainingShards.size() << ")" << endl;
@@ -86,34 +94,36 @@ namespace mongo {
         const int imbalance = max.second - min.second;
         const int threshold = balancedLastTime ? 2 : 8;
         string from, to;
-        if ( imbalance >= threshold ){
+        if ( imbalance >= threshold ) {
             from = max.first;
             to = min.first;
 
-        } else if ( ! drainingShards.empty() ){
+        }
+        else if ( ! drainingShards.empty() ) {
             from = drainingShards[ rand() % drainingShards.size() ];
             to = min.first;
 
-        } else {
-            // Everything is balanced here! 
+        }
+        else {
+            // Everything is balanced here!
             return NULL;
         }
 
         const vector<BSONObj>& chunksFrom = shardToChunksMap.find( from )->second;
         const vector<BSONObj>& chunksTo = shardToChunksMap.find( to )->second;
         BSONObj chunkToMove = pickChunk( chunksFrom , chunksTo );
-        log() << "chose [" << from << "] to [" << to << "] " << chunkToMove << endl;        
+        log() << "chose [" << from << "] to [" << to << "] " << chunkToMove << endl;
 
         return new ChunkInfo( ns, to, from, chunkToMove );
     }
 
-    BSONObj BalancerPolicy::pickChunk( const vector<BSONObj>& from, const vector<BSONObj>& to ){
+    BSONObj BalancerPolicy::pickChunk( const vector<BSONObj>& from, const vector<BSONObj>& to ) {
         // It is possible for a donor ('from') shard to have less chunks than a recevier one ('to')
-        // if the donor is in draining mode. 
-        
+        // if the donor is in draining mode.
+
         if ( to.size() == 0 )
             return from[0];
-        
+
         if ( from[0]["min"].Obj().woCompare( to[to.size()-1]["max"].Obj() , BSONObj() , false ) == 0 )
             return from[0];
 
@@ -123,174 +133,41 @@ namespace mongo {
         return from[0];
     }
 
-    bool BalancerPolicy::isSizeMaxed( BSONObj limits ){
-        // If there's no limit information for the shard, assume it can be a chunk receiver 
+    bool BalancerPolicy::isSizeMaxed( BSONObj limits ) {
+        // If there's no limit information for the shard, assume it can be a chunk receiver
         // (i.e., there's not bound on space utilization)
-        if ( limits.isEmpty() ){
+        if ( limits.isEmpty() ) {
             return false;
         }
 
         long long maxUsage = limits[ ShardFields::maxSize.name() ].Long();
-        if ( maxUsage == 0 ){
+        if ( maxUsage == 0 ) {
             return false;
         }
 
-        long long currUsage = limits[ ShardFields::currSize.name() ].Long();
-        if ( currUsage < maxUsage ){
+        long long currUsage = limits[ LimitsFields::currSize.name() ].Long();
+        if ( currUsage < maxUsage ) {
             return false;
         }
 
         return true;
     }
 
-    bool BalancerPolicy::isDraining( BSONObj limits ){
+    bool BalancerPolicy::isDraining( BSONObj limits ) {
         BSONElement draining = limits[ ShardFields::draining.name() ];
-        if ( draining.eoo() || ! draining.Bool() ){
+        if ( draining.eoo() || ! draining.trueValue() ) {
             return false;
         }
 
         return true;
     }
 
-    class PolicyObjUnitTest : public UnitTest {
-    public:
-
-        typedef ShardFields sf;  // convenience alias
-
-        void caseSizeMaxedShard(){
-            BSONObj shard0 = BSON( sf::maxSize(0LL) << sf::currSize(0LL) );
-            assert( ! BalancerPolicy::isSizeMaxed( shard0 ) );
-
-            BSONObj shard1 = BSON( sf::maxSize(100LL) << sf::currSize(80LL) );
-            assert( ! BalancerPolicy::isSizeMaxed( shard1 ) );
-
-            BSONObj shard2 = BSON( sf::maxSize(100LL) << sf::currSize(110LL) );
-            assert( BalancerPolicy::isSizeMaxed( shard2 ) );
-
-            BSONObj empty;
-            assert( ! BalancerPolicy::isSizeMaxed( empty ) );
-        }
-
-        void caseDrainingShard(){
-            BSONObj shard0 = BSON( sf::draining(true) );
-            assert( BalancerPolicy::isDraining( shard0 ) );
-
-            BSONObj shard1 = BSON( sf::draining(false) );
-            assert( ! BalancerPolicy::isDraining( shard1 ) );
-
-            BSONObj empty;
-            assert( ! BalancerPolicy::isDraining( empty ) );
-        }
-
-        void caseBalanceNormal(){
-            // 2 chunks and 0 chunk shards
-            BalancerPolicy::ShardToChunksMap chunkMap;
-            vector<BSONObj> chunks;
-            chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) <<
-                                   "max" << BSON( "x" << 49 )));
-            chunks.push_back(BSON( "min" << BSON( "x" << 49 ) <<
-                                   "max" << BSON( "x" << BSON( "$maxkey"<<1 ))));
-            chunkMap["shard0"] = chunks;
-            chunks.clear();
-            chunkMap["shard1"] = chunks;
-
-            // no limits
-            BalancerPolicy::ShardToLimitsMap limitsMap;
-            BSONObj limits0 = BSON( sf::maxSize(0LL) << sf::currSize(2LL) << sf::draining(false) );
-            BSONObj limits1 = BSON( sf::maxSize(0LL) << sf::currSize(0LL) << sf::draining(false) );
-            limitsMap["shard0"] = limits0;
-            limitsMap["shard1"] = limits1;
-	    
-            BalancerPolicy::ChunkInfo* c = NULL;
-            c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 1 );
-            assert( c != NULL );
-        }
-
-        void caseBalanceDraining(){
-            // one normal, one draining
-            // 2 chunks and 0 chunk shards
-            BalancerPolicy::ShardToChunksMap chunkMap;
-            vector<BSONObj> chunks;
-            chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) <<
-                                   "max" << BSON( "x" << 49 )));
-            chunkMap["shard0"] = chunks;
-            chunks.clear();
-            chunks.push_back(BSON( "min" << BSON( "x" << 49 ) <<
-                                   "max" << BSON( "x" << BSON( "$maxkey"<<1 ))));
-            chunkMap["shard1"] = chunks;
-
-            // shard0 is draining
-            BalancerPolicy::ShardToLimitsMap limitsMap;
-            BSONObj limits0 = BSON( sf::maxSize(0LL) << sf::currSize(2LL) << sf::draining(true) );
-            BSONObj limits1 = BSON( sf::maxSize(0LL) << sf::currSize(0LL) << sf::draining(false) );
-            limitsMap["shard0"] = limits0;
-            limitsMap["shard1"] = limits1;
-	    
-            BalancerPolicy::ChunkInfo* c = NULL;
-            c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 0 );
-            assert( c != NULL );
-            assert( c->to == "shard1" );
-            assert( c->from == "shard0" );
-            assert( ! c->chunk.isEmpty() );
-        }
-
-        void caseBalanceEndedDraining(){
-            // 2 chunks and 0 chunk (drain completed) shards
-            BalancerPolicy::ShardToChunksMap chunkMap;
-            vector<BSONObj> chunks;
-            chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) <<
-                                   "max" << BSON( "x" << 49 )));
-            chunks.push_back(BSON( "min" << BSON( "x" << 49 ) <<
-                                   "max" << BSON( "x" << BSON( "$maxkey"<<1 ))));
-            chunkMap["shard0"] = chunks;
-            chunks.clear();
-            chunkMap["shard1"] = chunks;
-
-            // no limits
-            BalancerPolicy::ShardToLimitsMap limitsMap;
-            BSONObj limits0 = BSON( sf::maxSize(0LL) << sf::currSize(2LL) << sf::draining(false) );
-            BSONObj limits1 = BSON( sf::maxSize(0LL) << sf::currSize(0LL) << sf::draining(true) );
-            limitsMap["shard0"] = limits0;
-            limitsMap["shard1"] = limits1;
-	    
-            BalancerPolicy::ChunkInfo* c = NULL;
-            c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 0 );
-            assert( c == NULL );	    
-        }
-
-        void caseBalanceImpasse(){
-            // one maxed out, one draining
-            // 2 chunks and 0 chunk shards
-            BalancerPolicy::ShardToChunksMap chunkMap;
-            vector<BSONObj> chunks;
-            chunks.push_back(BSON( "min" << BSON( "x" << BSON( "$minKey"<<1) ) <<
-                                   "max" << BSON( "x" << 49 )));
-            chunkMap["shard0"] = chunks;
-            chunks.clear();
-            chunks.push_back(BSON( "min" << BSON( "x" << 49 ) <<
-                                   "max" << BSON( "x" << BSON( "$maxkey"<<1 ))));
-            chunkMap["shard1"] = chunks;
-
-            // shard0 is draining, shard1 is maxed out
-            BalancerPolicy::ShardToLimitsMap limitsMap;
-            BSONObj limits0 = BSON( sf::maxSize(0LL) << sf::currSize(2LL) << sf::draining(true) );
-            BSONObj limits1 = BSON( sf::maxSize(1LL) << sf::currSize(1LL) << sf::draining(false) );
-            limitsMap["shard0"] = limits0;
-            limitsMap["shard1"] = limits1;
-	    
-            BalancerPolicy::ChunkInfo* c = NULL;
-            c = BalancerPolicy::balance( "ns", limitsMap, chunkMap, 0 );
-            assert( c == NULL );
-        }
-
-        void run(){
-            caseSizeMaxedShard();
-            caseDrainingShard();
-            caseBalanceNormal();
-            caseBalanceDraining();
-            caseBalanceImpasse();
-            log(1) << "policyObjUnitTest passed" << endl;
+    bool BalancerPolicy::hasOpsQueued( BSONObj limits ) {
+        BSONElement opsQueued = limits[ LimitsFields::hasOpsQueued.name() ];
+        if ( opsQueued.eoo() || ! opsQueued.trueValue() ) {
+            return false;
         }
-    } policyObjUnitTest;
+        return true;
+    }
 
 }  // namespace mongo
diff --git a/s/balancer_policy.h b/s/balancer_policy.h
index 3622edc..cef5aa6 100644
--- a/s/balancer_policy.h
+++ b/s/balancer_policy.h
@@ -1,4 +1,4 @@
-// balancer_policy.h
+// @file balancer_policy.h
 
 /**
 *    Copyright (C) 2010 10gen Inc.
@@ -29,20 +29,20 @@ namespace mongo {
 
         /**
          * Returns a suggested chunk to move whithin a collection's shards, given information about
-         * space usage and number of chunks for that collection. If the policy doesn't recommend 
+         * space usage and number of chunks for that collection. If the policy doesn't recommend
          * moving, it returns NULL.
          *
          * @param ns is the collections namepace.
-         * @param shardLimitMap is a map from shardId to an object that describes (for now) space 
+         * @param shardLimitMap is a map from shardId to an object that describes (for now) space
          * cap and usage. E.g.: { "maxSize" : <size_in_MB> , "usedSize" : <size_in_MB> }.
          * @param shardToChunksMap is a map from shardId to chunks that live there. A chunk's format
-         * is { }. 
+         * is { }.
          * @param balancedLastTime is the number of chunks effectively moved in the last round.
          * @returns NULL or ChunkInfo of the best move to make towards balacing the collection.
          */
         typedef map< string,BSONObj > ShardToLimitsMap;
         typedef map< string,vector<BSONObj> > ShardToChunksMap;
-        static ChunkInfo* balance( const string& ns, const ShardToLimitsMap& shardToLimitsMap,  
+        static ChunkInfo* balance( const string& ns, const ShardToLimitsMap& shardToLimitsMap,
                                    const ShardToChunksMap& shardToChunksMap, int balancedLastTime );
 
         // below exposed for testing purposes only -- treat it as private --
@@ -57,11 +57,16 @@ namespace mongo {
         static bool isSizeMaxed( BSONObj shardLimits );
 
         /**
-         * Returns true if 'shardLimist' contains a field "draining". Expects the optional field 
+         * Returns true if 'shardLimist' contains a field "draining". Expects the optional field
          * "isDraining" on 'shrdLimits'.
          */
         static bool isDraining( BSONObj shardLimits );
 
+        /**
+         * Returns true if a shard currently has operations in any of its writeback queues
+         */
+        static bool hasOpsQueued( BSONObj shardLimits );
+
     private:
         // Convenience types
         typedef ShardToChunksMap::const_iterator ShardToChunksIter;
@@ -76,7 +81,16 @@ namespace mongo {
         const BSONObj chunk;
 
         ChunkInfo( const string& a_ns , const string& a_to , const string& a_from , const BSONObj& a_chunk )
-            : ns( a_ns ) , to( a_to ) , from( a_from ), chunk( a_chunk ){}
+            : ns( a_ns ) , to( a_to ) , from( a_from ), chunk( a_chunk ) {}
+    };
+
+    /**
+     * Field names used in the 'limits' map.
+     */
+    struct LimitsFields {
+        // we use 'draining' and 'maxSize' from the 'shards' collection plus the following
+        static BSONField<long long> currSize; // currently used disk space in bytes
+        static BSONField<bool> hasOpsQueued;  // writeback queue is not empty?
     };
 
 }  // namespace mongo
diff --git a/s/chunk.cpp b/s/chunk.cpp
index 87d7747..1c72535 100644
--- a/s/chunk.cpp
+++ b/s/chunk.cpp
@@ -1,4 +1,4 @@
-// shard.cpp
+// @file chunk.cpp
 
 /**
  *    Copyright (C) 2008 10gen Inc.
@@ -17,63 +17,62 @@
  */
 
 #include "pch.h"
-#include "chunk.h"
-#include "config.h"
-#include "grid.h"
-#include "../util/unittest.h"
+
 #include "../client/connpool.h"
-#include "../client/distlock.h"
 #include "../db/queryutil.h"
+#include "../util/unittest.h"
+
+#include "chunk.h"
+#include "config.h"
 #include "cursors.h"
+#include "grid.h"
 #include "strategy.h"
+#include "client.h"
 
 namespace mongo {
 
-    inline bool allOfType(BSONType type, const BSONObj& o){
+    inline bool allOfType(BSONType type, const BSONObj& o) {
         BSONObjIterator it(o);
-        while(it.more()){
+        while(it.more()) {
             if (it.next().type() != type)
                 return false;
         }
         return true;
     }
 
-    RWLock chunkSplitLock("rw:chunkSplitLock");
-
     // -------  Shard --------
 
-    int Chunk::MaxChunkSize = 1024 * 1024 * 200;
-    
-    Chunk::Chunk( ChunkManager * manager )
-      : _manager(manager),
-        _lastmod(0), _modified(false), _dataWritten(0)
-    {}
+    string Chunk::chunkMetadataNS = "config.chunks";
+
+    int Chunk::MaxChunkSize = 1024 * 1024 * 64;
+
+    Chunk::Chunk( ChunkManager * manager ) : _manager(manager), _lastmod(0) {
+        _setDataWritten();
+    }
 
     Chunk::Chunk(ChunkManager * info , const BSONObj& min, const BSONObj& max, const Shard& shard)
-      : _manager(info), _min(min), _max(max), _shard(shard),
-        _lastmod(0), _modified(false), _dataWritten(0)
-    {}
+        : _manager(info), _min(min), _max(max), _shard(shard), _lastmod(0) {
+        _setDataWritten();
+    }
+
+    void Chunk::_setDataWritten() {
+        _dataWritten = rand() % ( MaxChunkSize / 5 );
+    }
 
     string Chunk::getns() const {
         assert( _manager );
-        return _manager->getns(); 
+        return _manager->getns();
     }
 
-    void Chunk::setShard( const Shard& s ){
-        _shard = s;
-        _manager->_migrationNotification(this);
-        _modified = true;
-    }
-    
-    bool Chunk::contains( const BSONObj& obj ) const{
+    bool Chunk::contains( const BSONObj& obj ) const {
         return
             _manager->getShardKey().compare( getMin() , obj ) <= 0 &&
             _manager->getShardKey().compare( obj , getMax() ) < 0;
     }
 
     bool ChunkRange::contains(const BSONObj& obj) const {
-    // same as Chunk method
-        return 
+        // same as Chunk method
+        return
             _manager->getShardKey().compare( getMin() , obj ) <= 0 &&
             _manager->getShardKey().compare( obj , getMax() ) < 0;
     }
@@ -85,324 +84,288 @@ namespace mongo {
     bool Chunk::maxIsInf() const {
         return _manager->getShardKey().globalMax().woCompare( getMax() ) == 0;
     }
-    
-    BSONObj Chunk::pickSplitPoint() const{
-        int sort = 0;
-        
-        if ( minIsInf() ){
-            sort = 1;
-        }
-        else if ( maxIsInf() ){
-            sort = -1;
-        }
-        
-        if ( sort ){
-            ShardConnection conn( getShard().getConnString() , _manager->getns() );
-            Query q;
-            if ( sort == 1 )
-                q.sort( _manager->getShardKey().key() );
-            else {
-                BSONObj k = _manager->getShardKey().key();
-                BSONObjBuilder r;
-                
-                BSONObjIterator i(k);
-                while( i.more() ) {
-                    BSONElement e = i.next();
-                    uassert( 10163 ,  "can only handle numbers here - which i think is correct" , e.isNumber() );
-                    r.append( e.fieldName() , -1 * e.number() );
-                }
-                
-                q.sort( r.obj() );
-            }
-            BSONObj end = conn->findOne( _manager->getns() , q );
-            conn.done();
 
-            if ( ! end.isEmpty() )
-                return _manager->getShardKey().extractKey( end );
+    BSONObj Chunk::_getExtremeKey( int sort ) const {
+        ShardConnection conn( getShard().getConnString() , _manager->getns() );
+        Query q;
+        if ( sort == 1 ) {
+            q.sort( _manager->getShardKey().key() );
         }
-        
-        BSONObj cmd = BSON( "medianKey" << _manager->getns()
-                            << "keyPattern" << _manager->getShardKey().key()
-                            << "min" << getMin()
-                            << "max" << getMax() );
+        else {
+            // need to invert shard key pattern to sort backwards
+            // TODO: make a helper in ShardKeyPattern?
 
-        ScopedDbConnection conn( getShard().getConnString() );
-        BSONObj result;
-        if ( ! conn->runCommand( "admin" , cmd , result ) ){
-            stringstream ss;
-            ss << "medianKey command failed: " << result;
-            uassert( 10164 ,  ss.str() , 0 );
+            BSONObj k = _manager->getShardKey().key();
+            BSONObjBuilder r;
+
+            BSONObjIterator i(k);
+            while( i.more() ) {
+                BSONElement e = i.next();
+                uassert( 10163 ,  "can only handle numbers here - which i think is correct" , e.isNumber() );
+                r.append( e.fieldName() , -1 * e.number() );
+            }
+
+            q.sort( r.obj() );
         }
 
-        BSONObj median = result.getObjectField( "median" ).getOwned();
+        // find the extreme key
+        BSONObj end = conn->findOne( _manager->getns() , q );
         conn.done();
 
+        if ( end.isEmpty() )
+            return BSONObj();
+
+        return _manager->getShardKey().extractKey( end );
+    }
 
-        if (median == getMin()){
-            Query q;
-            q.minKey(_min).maxKey(_max);
-            q.sort(_manager->getShardKey().key());
+    void Chunk::pickMedianKey( BSONObj& medianKey ) const {
+        // Ask the mongod holding this chunk to figure out the split points.
+        ScopedDbConnection conn( getShard().getConnString() );
+        BSONObj result;
+        BSONObjBuilder cmd;
+        cmd.append( "splitVector" , _manager->getns() );
+        cmd.append( "keyPattern" , _manager->getShardKey().key() );
+        cmd.append( "min" , getMin() );
+        cmd.append( "max" , getMax() );
+        cmd.appendBool( "force" , true );
+        BSONObj cmdObj = cmd.obj();
 
-            median = conn->findOne(_manager->getns(), q);
-            median = _manager->getShardKey().extractKey( median );
+        if ( ! conn->runCommand( "admin" , cmdObj , result )) {
+            conn.done();
+            ostringstream os;
+            os << "splitVector command (median key) failed: " << result;
+            uassert( 13503 , os.str() , 0 );
         }
-        
-        if ( median < getMin() || median >= getMax() ){
-            stringstream ss;
-            ss << "medianKey returned value out of range.  " 
-               << " cmd: " << cmd 
-               << " result: " << result;
-            uasserted( 13394 , ss.str() );
+
+        BSONObjIterator it( result.getObjectField( "splitKeys" ) );
+        if ( it.more() ) {
+            medianKey = it.next().Obj().getOwned();
         }
-        
-        return median;
+
+        conn.done();
     }
 
-    void Chunk::pickSplitVector( vector<BSONObj>* splitPoints ) const { 
+    void Chunk::pickSplitVector( vector<BSONObj>& splitPoints , int chunkSize /* bytes */, int maxPoints, int maxObjs ) const {
         // Ask the mongod holding this chunk to figure out the split points.
         ScopedDbConnection conn( getShard().getConnString() );
         BSONObj result;
         BSONObjBuilder cmd;
         cmd.append( "splitVector" , _manager->getns() );
         cmd.append( "keyPattern" , _manager->getShardKey().key() );
-        cmd.append( "maxChunkSize" , Chunk::MaxChunkSize / (1<<20) );
+        cmd.append( "min" , getMin() );
+        cmd.append( "max" , getMax() );
+        cmd.append( "maxChunkSizeBytes" , chunkSize );
+        cmd.append( "maxSplitPoints" , maxPoints );
+        cmd.append( "maxChunkObjects" , maxObjs );
         BSONObj cmdObj = cmd.obj();
 
-        if ( ! conn->runCommand( "admin" , cmdObj , result )){
+        if ( ! conn->runCommand( "admin" , cmdObj , result )) {
+            conn.done();
             ostringstream os;
             os << "splitVector command failed: " << result;
             uassert( 13345 , os.str() , 0 );
-        }       
+        }
 
         BSONObjIterator it( result.getObjectField( "splitKeys" ) );
-        while ( it.more() ){
-            splitPoints->push_back( it.next().Obj().getOwned() );
+        while ( it.more() ) {
+            splitPoints.push_back( it.next().Obj().getOwned() );
         }
         conn.done();
     }
 
-    ChunkPtr Chunk::split(){
-        vector<BSONObj> splitPoints;
-        splitPoints.push_back( pickSplitPoint() );
-        return multiSplit( splitPoints );
+    ChunkPtr Chunk::singleSplit( bool force , BSONObj& res ) {
+        vector<BSONObj> splitPoint;
+
+        // if splitting is not obligatory we may return early if there are not enough data
+        // we cap the number of objects that would fall in the first half (before the split point)
+        // the rationale is we'll find a split point without traversing all the data
+        if ( ! force ) {
+            vector<BSONObj> candidates;
+            const int maxPoints = 2;
+            const int maxObjs = 250000;
+            pickSplitVector( candidates , getManager()->getCurrentDesiredChunkSize() , maxPoints , maxObjs );
+            if ( candidates.size() <= 1 ) {
+                // no split points means there isn't enough data to split on
+                // 1 split point means we have between half the chunk size to full chunk size
+                // so we shouldn't split
+                log(1) << "chunk not full enough to trigger auto-split" << endl;
+                return ChunkPtr();
+            }
+
+            splitPoint.push_back( candidates.front() );
+
+        }
+        else {
+            // if forcing a split, use the chunk's median key
+            BSONObj medianKey;
+            pickMedianKey( medianKey );
+            if ( ! medianKey.isEmpty() )
+                splitPoint.push_back( medianKey );
+        }
+
+        // We assume that if the chunk being split is the first (or last) one on the collection, this chunk is
+        // likely to see more insertions. Instead of splitting mid-chunk, we use the very first (or last) key
+        // as a split point.
+        if ( minIsInf() ) {
+            splitPoint.clear();
+            BSONObj key = _getExtremeKey( 1 );
+            if ( ! key.isEmpty() ) {
+                splitPoint.push_back( key );
+            }
+
+        }
+        else if ( maxIsInf() ) {
+            splitPoint.clear();
+            BSONObj key = _getExtremeKey( -1 );
+            if ( ! key.isEmpty() ) {
+                splitPoint.push_back( key );
+            }
+        }
+
+        // Normally, we'd have a sound split point here if the chunk is not empty. It's also a good place to
+        // sanity check.
+        if ( splitPoint.empty() || _min == splitPoint.front() || _max == splitPoint.front() ) {
+            log() << "want to split chunk, but can't find split point chunk " << toString()
+                  << " got: " << ( splitPoint.empty() ? "<empty>" : splitPoint.front().toString() ) << endl;
+            return ChunkPtr();
+        }
+
+        return multiSplit( splitPoint , res );
     }
-    
-    ChunkPtr Chunk::multiSplit( const vector<BSONObj>& m ){
-        const size_t maxSplitPoints = 256;
+
+    ChunkPtr Chunk::multiSplit( const vector<BSONObj>& m , BSONObj& res ) {
+        const size_t maxSplitPoints = 8192;
 
         uassert( 10165 , "can't split as shard doesn't have a manager" , _manager );
         uassert( 13332 , "need a split key to split chunk" , !m.empty() );
         uassert( 13333 , "can't split a chunk in that many parts", m.size() < maxSplitPoints );
-        uassert( 13003 , "can't split a chunk with only one distinct value" , _min.woCompare(_max) ); 
+        uassert( 13003 , "can't split a chunk with only one distinct value" , _min.woCompare(_max) );
 
-        DistributedLock lockSetup( ConnectionString( modelServer() , ConnectionString::SYNC ) , getns() );
-        dist_lock_try dlk( &lockSetup , string("split-") + toString() );
-        uassert( 10166 , "locking namespace failed" , dlk.got() );
-        
-        {
-            ShardChunkVersion onServer = getVersionOnConfigServer();
-            ShardChunkVersion mine = _lastmod;
-            if ( onServer > mine ){
-                stringstream ss;
-                ss << "mulitSplit failing because config not up to date" 
-                   << " onServer: " << onServer.toString()
-                   << " mine: " << mine.toString();
-
-                //reload config
-                grid.getDBConfig(_manager->_ns)->getChunkManager(_manager->_ns, true);
-
-                uasserted( 13387 , ss.str() );
-            }
-        }
+        ScopedDbConnection conn( getShard().getConnString() );
 
-        BSONObjBuilder detail;
-        appendShortVersion( "before" , detail );
-        log(1) << "before split on " << m.size() << " points " << toString() << endl;
+        BSONObjBuilder cmd;
+        cmd.append( "splitChunk" , _manager->getns() );
+        cmd.append( "keyPattern" , _manager->getShardKey().key() );
+        cmd.append( "min" , getMin() );
+        cmd.append( "max" , getMax() );
+        cmd.append( "from" , getShard().getConnString() );
+        cmd.append( "splitKeys" , m );
+        cmd.append( "shardId" , genID() );
+        cmd.append( "configdb" , configServer.modelServer() );
+        BSONObj cmdObj = cmd.obj();
 
-        // Iterate over the split points in 'm', splitting off a new chunk per entry. That chunk's range 
-        // covers until the next entry in 'm' or _max .
-        vector<ChunkPtr> newChunks;
-        vector<BSONObj>::const_iterator i = m.begin();
-        BSONObj nextPoint = i->getOwned();
-        _modified = true;
-        do {
-            BSONObj splitPoint = nextPoint;
-            log(4) << "splitPoint: " << splitPoint << endl;
-            nextPoint = (++i != m.end()) ? i->getOwned() : _max.getOwned();
-            log(4) << "nextPoint: " << nextPoint << endl;
-
-            if ( nextPoint <= splitPoint) {
-                stringstream ss;
-                ss << "multiSplit failing because keys min: " << splitPoint << " and max: " << nextPoint
-                   << " do not define a valid chunk";
-                uasserted( 13395, ss.str() );
-            }
+        if ( ! conn->runCommand( "admin" , cmdObj , res )) {
+            warning() << "splitChunk failed - cmd: " << cmdObj << " result: " << res << endl;
+            conn.done();
+
+            // reloading won't stricly solve all problems, e.g. the collection's metdata lock can be taken
+            // but we issue here so that mongos may refresh wihtout needing to be written/read against
+            _manager->_reload();
+
+            return ChunkPtr();
+        }
 
-            ChunkPtr c( new Chunk( _manager, splitPoint , nextPoint , _shard) );
-            c->_modified = true;
-            newChunks.push_back( c );
-        } while ( i != m.end() );
+        conn.done();
+        _manager->_reload();
 
-        // Have the chunk manager reflect the key change for the first chunk and create an entry for every
-        // new chunk spawned by it.
+        // The previous multisplit logic adjusted the boundaries of 'this' chunk. Any call to 'this' object hereafter
+        // will see a different _max for the chunk.
+        // TODO Untie this dependency since, for metadata purposes, the reload() above already fixed boundaries
         {
             rwlock lk( _manager->_lock , true );
 
             setMax(m[0].getOwned());
             DEV assert( shared_from_this() );
             _manager->_chunkMap[_max] = shared_from_this();
-
-            for ( vector<ChunkPtr>::const_iterator it = newChunks.begin(); it != newChunks.end(); ++it ){
-                ChunkPtr s = *it;
-                _manager->_chunkMap[s->getMax()] = s;
-            }
-        }
-        
-        log(1) << "after split adjusted range: " << toString() << endl;
-        for ( vector<ChunkPtr>::const_iterator it = newChunks.begin(); it != newChunks.end(); ++it ){
-            ChunkPtr s = *it;
-            log(1) << "after split created new chunk: " << s->toString() << endl;
-        }
-
-        // Save the new key boundaries in the configDB.
-        _manager->save( false );
-
-        // Log all these changes in the configDB's log. We log a simple split differently than a multi-split.
-        if ( newChunks.size() == 1) {
-            appendShortVersion( "left" , detail );
-            newChunks[0]->appendShortVersion( "right" , detail );
-            configServer.logChange( "split" , _manager->getns(), detail.obj() );
-
-        } else {
-            BSONObj beforeDetailObj = detail.obj();
-            BSONObj firstDetailObj = beforeDetailObj.getOwned();
-            const int newChunksSize = newChunks.size();
-
-            BSONObjBuilder firstDetail;
-            firstDetail.appendElements( beforeDetailObj );
-            firstDetail.append( "number" , 0 );
-            firstDetail.append( "of" , newChunksSize );
-            appendShortVersion( "chunk" , firstDetail );
-            configServer.logChange( "multi-split" , _manager->getns() , firstDetail.obj() );
-
-            for ( int i=0; i < newChunksSize; i++ ){
-                BSONObjBuilder chunkDetail;
-                chunkDetail.appendElements( beforeDetailObj );
-                chunkDetail.append( "number", i+1 );
-                chunkDetail.append( "of" , newChunksSize );
-                newChunks[i]->appendShortVersion( "chunk" , chunkDetail );
-                configServer.logChange( "multi-split" , _manager->getns() , chunkDetail.obj() );
-            }
         }
 
-        return newChunks[0];
+        // return the second half, if a single split, or the first new chunk, if a multisplit.
+        return _manager->findChunk( m[0] );
     }
 
-    bool Chunk::moveAndCommit( const Shard& to , string& errmsg ){
+    bool Chunk::moveAndCommit( const Shard& to , long long chunkSize /* bytes */, BSONObj& res ) {
         uassert( 10167 ,  "can't move shard to its current location!" , getShard() != to );
-        
+
         log() << "moving chunk ns: " << _manager->getns() << " moving ( " << toString() << ") " << _shard.toString() << " -> " << to.toString() << endl;
-        
+
         Shard from = _shard;
-        
+
         ScopedDbConnection fromconn( from);
 
-        BSONObj res;
         bool worked = fromconn->runCommand( "admin" ,
-                                            BSON( "moveChunk" << _manager->getns() << 
-                                                  "from" << from.getConnString() <<
-                                                  "to" << to.getConnString() <<
-                                                  "min" << _min << 
-                                                  "max" << _max << 
-                                                  "shardId" << genID() <<
-                                                  "configdb" << configServer.modelServer()
-                                                  ) ,
+                                            BSON( "moveChunk" << _manager->getns() <<
+                                                    "from" << from.getConnString() <<
+                                                    "to" << to.getConnString() <<
+                                                    "min" << _min <<
+                                                    "max" << _max <<
+                                                    "maxChunkSizeBytes" << chunkSize <<
+                                                    "shardId" << genID() <<
+                                                    "configdb" << configServer.modelServer()
+                                                ) ,
                                             res
-                                            );
-        
+                                          );
+
         fromconn.done();
 
-        if ( worked ){
-            _manager->_reload();
-            return true;
-        }
-        
-        errmsg = res["errmsg"].String();
-        errmsg += " " + res.toString();
-        return false;
+        // if succeeded, needs to reload to pick up the new location
+        // if failed, mongos may be stale
+        // reload is excessive here as the failure could be simply because collection metadata is taken
+        _manager->_reload();
+
+        return worked;
     }
-    
-    bool Chunk::splitIfShould( long dataWritten ){
+
+    bool Chunk::splitIfShould( long dataWritten ) {
         LastError::Disabled d( lastError.get() );
+
         try {
-            return _splitIfShould( dataWritten );
-        }
-        catch ( std::exception& e ){
-            log( LL_ERROR ) << "splitIfShould failed: " << e.what() << endl;
-            return false;
-        }
-    }
+            _dataWritten += dataWritten;
+            int splitThreshold = getManager()->getCurrentDesiredChunkSize();
+            if ( minIsInf() || maxIsInf() ) {
+                splitThreshold = (int) ((double)splitThreshold * .9);
+            }
 
-    bool Chunk::_splitIfShould( long dataWritten ){
-        _dataWritten += dataWritten;
-        
-        // split faster in early chunks helps spread out an initial load better
-        int splitThreshold;
-        const int minChunkSize = 1 << 20;  // 1 MBytes
-        int numChunks = getManager()->numChunks();
-        if ( numChunks < 10 ){
-            splitThreshold = max( MaxChunkSize / 4 , minChunkSize );
-        } else if ( numChunks < 20 ){
-            splitThreshold = max( MaxChunkSize / 2 , minChunkSize );
-        } else {
-            splitThreshold = max( MaxChunkSize , minChunkSize );
-        }
-        
-        if ( minIsInf() || maxIsInf() ){
-            splitThreshold = (int) ((double)splitThreshold * .9);
-        }
+            if ( _dataWritten < splitThreshold / 5 )
+                return false;
 
-        if ( _dataWritten < splitThreshold / 5 )
-            return false;
-        
-        if ( ! chunkSplitLock.lock_try(0) )
-            return false;
-        
-        rwlock lk( chunkSplitLock , 1 , true );
+            log(1) << "about to initiate autosplit: " << *this << " dataWritten: " << _dataWritten << " splitThreshold: " << splitThreshold << endl;
 
-        log(3) << "\t splitIfShould : " << *this << endl;
+            _dataWritten = 0; // reset so we check often enough
 
-        _dataWritten = 0;
-        
-        BSONObj splitPoint = pickSplitPoint();
-        if ( splitPoint.isEmpty() || _min == splitPoint || _max == splitPoint) {
-            log() << "SHARD PROBLEM** shard is too big, but can't split: " << toString() << endl;
-            return false;
-        }
+            BSONObj res;
+            ChunkPtr newShard = singleSplit( false /* does not force a split if not enough data */ , res );
+            if ( newShard.get() == NULL ) {
+                // singleSplit would have issued a message if we got here
+                _dataWritten = 0; // this means there wasn't enough data to split, so don't want to try again until considerable more data
+                return false;
+            }
 
-        long size = getPhysicalSize();
-        if ( size < splitThreshold )
-            return false;
-        
-        log() << "autosplitting " << _manager->getns() << " size: " << size << " shard: " << toString() 
-              << " on: " << splitPoint << "(splitThreshold " << splitThreshold << ")" << endl;
+            log() << "autosplitted " << _manager->getns() << " shard: " << toString()
+                  << " on: " << newShard->getMax() << "(splitThreshold " << splitThreshold << ")"
+#ifdef _DEBUG
+                  << " size: " << getPhysicalSize() // slow - but can be usefule when debugging
+#endif
+                  << endl;
 
-        vector<BSONObj> splitPoints;
-        splitPoints.push_back( splitPoint );
-        ChunkPtr newShard = multiSplit( splitPoints );
+            moveIfShould( newShard );
 
-        moveIfShould( newShard );
-        
-        return true;
+            return true;
+
+        }
+        catch ( std::exception& e ) {
+            // if the collection lock is taken (e.g. we're migrating), it is fine for the split to fail.
+            warning() << "could have autosplit on collection: " << _manager->getns() << " but: " << e.what() << endl;
+            return false;
+        }
     }
 
-    bool Chunk::moveIfShould( ChunkPtr newChunk ){
+    bool Chunk::moveIfShould( ChunkPtr newChunk ) {
         ChunkPtr toMove;
-       
-        if ( newChunk->countObjects(2) <= 1 ){
+
+        if ( newChunk->countObjects(2) <= 1 ) {
             toMove = newChunk;
         }
-        else if ( this->countObjects(2) <= 1 ){
+        else if ( this->countObjects(2) <= 1 ) {
             DEV assert( shared_from_this() );
             toMove = shared_from_this();
         }
@@ -412,45 +375,46 @@ namespace mongo {
         }
 
         assert( toMove );
-        
-        Shard newLocation = Shard::pick();
-        if ( getShard() == newLocation ){
-            // if this is the best server, then we shouldn't do anything!
-            log(1) << "not moving chunk: " << toString() << " b/c would move to same place  " << newLocation.toString() << " -> " << getShard().toString() << endl;
+
+        Shard newLocation = Shard::pick( getShard() );
+        if ( getShard() == newLocation ) {
+            // if this is the best shard, then we shouldn't do anything (Shard::pick already logged our shard).
+            log(1) << "recently split chunk: " << toString() << "already in the best shard" << endl;
             return 0;
         }
 
         log() << "moving chunk (auto): " << toMove->toString() << " to: " << newLocation.toString() << " #objects: " << toMove->countObjects() << endl;
 
-        string errmsg;
-        massert( 10412 ,  (string)"moveAndCommit failed: " + errmsg , 
-                 toMove->moveAndCommit( newLocation , errmsg ) );
-        
+        BSONObj res;
+        massert( 10412 ,
+                 str::stream() << "moveAndCommit failed: " << res ,
+                 toMove->moveAndCommit( newLocation , MaxChunkSize , res ) );
+
         return true;
     }
 
-    long Chunk::getPhysicalSize() const{
+    long Chunk::getPhysicalSize() const {
         ScopedDbConnection conn( getShard().getConnString() );
-        
+
         BSONObj result;
-        uassert( 10169 ,  "datasize failed!" , conn->runCommand( "admin" , 
-                                                                 BSON( "datasize" << _manager->getns()
-                                                                       << "keyPattern" << _manager->getShardKey().key() 
-                                                                       << "min" << getMin() 
-                                                                       << "max" << getMax() 
-                                                                       << "maxSize" << ( MaxChunkSize + 1 )
-                                                                       << "estimate" << true
-                                                                       ) , result ) );
-        
+        uassert( 10169 ,  "datasize failed!" , conn->runCommand( "admin" ,
+                 BSON( "datasize" << _manager->getns()
+                       << "keyPattern" << _manager->getShardKey().key()
+                       << "min" << getMin()
+                       << "max" << getMax()
+                       << "maxSize" << ( MaxChunkSize + 1 )
+                       << "estimate" << true
+                     ) , result ) );
+
         conn.done();
         return (long)result["size"].number();
     }
 
-    int Chunk::countObjects(int maxCount) const { 
+    int Chunk::countObjects(int maxCount) const {
         static const BSONObj fields = BSON("_id" << 1 );
 
         ShardConnection conn( getShard() , _manager->getns() );
-        
+
         // not using regular count as this is more flexible and supports $min/$max
         Query q = Query().minKey(_min).maxKey(_max);
         int n;
@@ -458,33 +422,33 @@ namespace mongo {
             auto_ptr<DBClientCursor> c = conn->query(_manager->getns(), q, maxCount, 0, &fields);
             assert( c.get() );
             n = c->itcount();
-        }        
+        }
         conn.done();
         return n;
     }
 
-    void Chunk::appendShortVersion( const char * name , BSONObjBuilder& b ){
+    void Chunk::appendShortVersion( const char * name , BSONObjBuilder& b ) {
         BSONObjBuilder bb( b.subobjStart( name ) );
         bb.append( "min" , _min );
         bb.append( "max" , _max );
         bb.done();
     }
-    
-    bool Chunk::operator==( const Chunk& s ) const{
-        return 
+
+    bool Chunk::operator==( const Chunk& s ) const {
+        return
             _manager->getShardKey().compare( _min , s._min ) == 0 &&
             _manager->getShardKey().compare( _max , s._max ) == 0
             ;
     }
 
-    void Chunk::serialize(BSONObjBuilder& to,ShardChunkVersion myLastMod){
-        
+    void Chunk::serialize(BSONObjBuilder& to,ShardChunkVersion myLastMod) {
+
         to.append( "_id" , genID( _manager->getns() , _min ) );
 
-        if ( myLastMod.isSet() ){
+        if ( myLastMod.isSet() ) {
             to.appendTimestamp( "lastmod" , myLastMod );
         }
-        else if ( _lastmod.isSet() ){
+        else if ( _lastmod.isSet() ) {
             assert( _lastmod > 0 && _lastmod < 1000 );
             to.appendTimestamp( "lastmod" , _lastmod );
         }
@@ -503,15 +467,15 @@ namespace mongo {
         buf << ns << "-";
 
         BSONObjIterator i(o);
-        while ( i.more() ){
+        while ( i.more() ) {
             BSONElement e = i.next();
             buf << e.fieldName() << "_" << e.toString(false, true);
         }
 
         return buf.str();
     }
-    
-    void Chunk::unserialize(const BSONObj& from){
+
+    void Chunk::unserialize(const BSONObj& from) {
         string ns = from.getStringField( "ns" );
         _shard.reset( from.getStringField( "shard" ) );
 
@@ -520,15 +484,15 @@ namespace mongo {
 
         BSONElement e = from["minDotted"];
 
-        if (e.eoo()){
+        if (e.eoo()) {
             _min = from.getObjectField( "min" ).getOwned();
             _max = from.getObjectField( "max" ).getOwned();
-        } 
+        }
         else { // TODO delete this case after giving people a chance to migrate
             _min = e.embeddedObject().getOwned();
             _max = from.getObjectField( "maxDotted" ).getOwned();
         }
-        
+
         uassert( 10170 ,  "Chunk needs a ns" , ! ns.empty() );
         uassert( 13327 ,  "Chunk ns must match server ns" , ns == _manager->getns() );
 
@@ -538,26 +502,13 @@ namespace mongo {
         uassert( 10173 ,  "Chunk needs a max" , ! _max.isEmpty() );
     }
 
-    string Chunk::modelServer() const {
-        // TODO: this could move around?
-        return configServer.modelServer();
-    }
-    
-    ShardChunkVersion Chunk::getVersionOnConfigServer() const {
-        ScopedDbConnection conn( modelServer() );
-        BSONObj o = conn->findOne( ShardNS::chunk , BSON( "_id" << genID() ) );
-        conn.done();
-        return o["lastmod"];
-    }
-
     string Chunk::toString() const {
         stringstream ss;
         ss << "ns:" << _manager->getns() << " at: " << _shard.toString() << " lastmod: " << _lastmod.toString() << " min: " << _min << " max: " << _max;
         return ss.str();
     }
-    
-    
-    ShardKeyPattern Chunk::skey() const{
+
+    ShardKeyPattern Chunk::skey() const {
         return _manager->getShardKey();
     }
 
@@ -565,75 +516,66 @@ namespace mongo {
 
     AtomicUInt ChunkManager::NextSequenceNumber = 1;
 
-    ChunkManager::ChunkManager( DBConfig * config , string ns , ShardKeyPattern pattern , bool unique ) : 
-        _config( config ) , _ns( ns ) , 
-        _key( pattern ) , _unique( unique ) , 
-        _sequenceNumber(  ++NextSequenceNumber ), _lock("rw:ChunkManager")
-    {
-        _reload_inlock();
-        
-        if ( _chunkMap.empty() ){
-            ChunkPtr c( new Chunk(this, _key.globalMin(), _key.globalMax(), config->getPrimary()) );
-            c->setModified( true );
-            
-            _chunkMap[c->getMax()] = c;
-            _chunkRanges.reloadAll(_chunkMap);
-
-            _shards.insert(c->getShard());
-
-            save_inlock( true );
-            log() << "no chunks for:" << ns << " so creating first: " << c->toString() << endl;
-        }
+    ChunkManager::ChunkManager( string ns , ShardKeyPattern pattern , bool unique ) :
+        _ns( ns ) , _key( pattern ) , _unique( unique ) , _lock("rw:ChunkManager"),
+        _nsLock( ConnectionString( configServer.modelServer() , ConnectionString::SYNC ) , ns ) {
+        _reload_inlock();  // will set _sequenceNumber
     }
-    
-    ChunkManager::~ChunkManager(){
+
+    ChunkManager::~ChunkManager() {
         _chunkMap.clear();
         _chunkRanges.clear();
         _shards.clear();
     }
-    
-    void ChunkManager::_reload(){
+
+    void ChunkManager::_reload() {
         rwlock lk( _lock , true );
         _reload_inlock();
     }
 
-    void ChunkManager::_reload_inlock(){
+    void ChunkManager::_reload_inlock() {
         int tries = 3;
-        while (tries--){
+        while (tries--) {
             _chunkMap.clear();
             _chunkRanges.clear();
             _shards.clear();
             _load();
 
-            if (_isValid()){
+            if (_isValid()) {
                 _chunkRanges.reloadAll(_chunkMap);
+
+                // The shard versioning mechanism hinges on keeping track of the number of times we reloaded ChunkManager's.
+                // Increasing this number here will prompt checkShardVersion() to refresh the connection-level versions to
+                // the most up to date value.
+                _sequenceNumber = ++NextSequenceNumber;
+
                 return;
             }
 
-            if (_chunkMap.size() < 10){ 
+            if (_chunkMap.size() < 10) {
                 _printChunks();
             }
+
             sleepmillis(10 * (3-tries));
-            sleepsecs(10);
         }
-        msgasserted(13282, "Couldn't load a valid config for " + _ns + " after 3 tries. Giving up");
-        
+
+        msgasserted(13282, "Couldn't load a valid config for " + _ns + " after 3 attempts. Please try again.");
+
     }
 
-    void ChunkManager::_load(){
-        static Chunk temp(0);
-        
-        ScopedDbConnection conn( temp.modelServer() );
+    void ChunkManager::_load() {
+        ScopedDbConnection conn( configServer.modelServer() );
 
-        auto_ptr<DBClientCursor> cursor = conn->query(temp.getNS(), QUERY("ns" << _ns).sort("lastmod",1), 0, 0, 0, 0,
-                (DEBUG_BUILD ? 2 : 1000000)); // batch size. Try to induce potential race conditions in debug builds
+        // TODO really need the sort?
+        auto_ptr<DBClientCursor> cursor = conn->query( Chunk::chunkMetadataNS, QUERY("ns" << _ns).sort("lastmod",1), 0, 0, 0, 0,
+                                          (DEBUG_BUILD ? 2 : 1000000)); // batch size. Try to induce potential race conditions in debug builds
         assert( cursor.get() );
-        while ( cursor->more() ){
+        while ( cursor->more() ) {
             BSONObj d = cursor->next();
-            if ( d["isMaxMarker"].trueValue() ){
+            if ( d["isMaxMarker"].trueValue() ) {
                 continue;
             }
-            
+
             ChunkPtr c( new Chunk( this ) );
             c->unserialize( d );
 
@@ -655,10 +597,10 @@ namespace mongo {
         ENSURE(allOfType(MaxKey, prior(_chunkMap.end())->second->getMax()));
 
         // Make sure there are no gaps or overlaps
-        for (ChunkMap::const_iterator it=boost::next(_chunkMap.begin()), end=_chunkMap.end(); it != end; ++it){
+        for (ChunkMap::const_iterator it=boost::next(_chunkMap.begin()), end=_chunkMap.end(); it != end; ++it) {
             ChunkMap::const_iterator last = prior(it);
 
-            if (!(it->second->getMin() == last->second->getMax())){
+            if (!(it->second->getMin() == last->second->getMax())) {
                 PRINT(it->second->toString());
                 PRINT(it->second->getMin());
                 PRINT(last->second->getMax());
@@ -677,54 +619,101 @@ namespace mongo {
         }
     }
 
-    bool ChunkManager::hasShardKey( const BSONObj& obj ){
+    bool ChunkManager::hasShardKey( const BSONObj& obj ) {
         return _key.hasShardKey( obj );
     }
 
-    ChunkPtr ChunkManager::findChunk( const BSONObj & obj , bool retry ){
+    void ChunkManager::createFirstChunk( const Shard& shard ) {
+        assert( _chunkMap.size() == 0 );
+
+        ChunkPtr c( new Chunk(this, _key.globalMin(), _key.globalMax(), shard ) );
+
+        // this is the first chunk; start the versioning from scratch
+        ShardChunkVersion version;
+        version.incMajor();
+
+        // build update for the chunk collection
+        BSONObjBuilder chunkBuilder;
+        c->serialize( chunkBuilder , version );
+        BSONObj chunkCmd = chunkBuilder.obj();
+
+        log() << "about to create first chunk for: " << _ns << endl;
+
+        ScopedDbConnection conn( configServer.modelServer() );
+        BSONObj res;
+        conn->update( Chunk::chunkMetadataNS, QUERY( "_id" << c->genID() ), chunkCmd,  true, false );
+
+        string errmsg = conn->getLastError();
+        if ( errmsg.size() ) {
+            stringstream ss;
+            ss << "saving first chunk failed.  cmd: " << chunkCmd << " result: " << errmsg;
+            log( LL_ERROR ) << ss.str() << endl;
+            msgasserted( 13592 , ss.str() ); // assert(13592)
+        }
+
+        conn.done();
+
+        // every instance of ChunkManager has a unique sequence number; callers of ChunkManager may
+        // inquiry about whether there were changes in chunk configuration (see re/load() calls) since
+        // the last access to ChunkManager by checking the sequence number
+        _sequenceNumber = ++NextSequenceNumber;
+
+        _chunkMap[c->getMax()] = c;
+        _chunkRanges.reloadAll(_chunkMap);
+        _shards.insert(c->getShard());
+        c->setLastmod(version);
+
+        // the ensure index will have the (desired) indirect effect of creating the collection on the
+        // assigned shard, as it sets up the index over the sharding keys.
+        ensureIndex_inlock();
+
+        log() << "successfully created first chunk for " << c->toString() << endl;
+    }
+
+    ChunkPtr ChunkManager::findChunk( const BSONObj & obj , bool retry ) {
         BSONObj key = _key.extractKey(obj);
-        
+
         {
-            rwlock lk( _lock , false ); 
-            
+            rwlock lk( _lock , false );
+
             BSONObj foo;
             ChunkPtr c;
             {
                 ChunkMap::iterator it = _chunkMap.upper_bound(key);
-                if (it != _chunkMap.end()){
+                if (it != _chunkMap.end()) {
                     foo = it->first;
                     c = it->second;
                 }
             }
-            
-            if ( c ){
+
+            if ( c ) {
                 if ( c->contains( obj ) )
                     return c;
-                
+
                 PRINT(foo);
                 PRINT(*c);
                 PRINT(key);
-                
+
                 _reload_inlock();
                 massert(13141, "Chunk map pointed to incorrect chunk", false);
             }
         }
 
-        if ( retry ){
+        if ( retry ) {
             stringstream ss;
             ss << "couldn't find a chunk aftry retry which should be impossible extracted: " << key;
             throw UserException( 8070 , ss.str() );
         }
-        
+
         log() << "ChunkManager: couldn't find chunk for: " << key << " going to retry" << endl;
         _reload_inlock();
         return findChunk( obj , true );
     }
 
     ChunkPtr ChunkManager::findChunkOnServer( const Shard& shard ) const {
-        rwlock lk( _lock , false ); 
- 
-        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
+        rwlock lk( _lock , false );
+
+        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ) {
             ChunkPtr c = i->second;
             if ( c->getShard() == shard )
                 return c;
@@ -733,20 +722,33 @@ namespace mongo {
         return ChunkPtr();
     }
 
-    void ChunkManager::getShardsForQuery( set<Shard>& shards , const BSONObj& query ){
-        rwlock lk( _lock , false ); 
+    void ChunkManager::getShardsForQuery( set<Shard>& shards , const BSONObj& query ) {
+        rwlock lk( _lock , false );
         DEV PRINT(query);
 
         //TODO look into FieldRangeSetOr
         FieldRangeOrSet fros(_ns.c_str(), query, false);
-        uassert(13088, "no support for special queries yet", fros.getSpecial().empty());
+
+        const string special = fros.getSpecial();
+        if (special == "2d") {
+            BSONForEach(field, query) {
+                if (getGtLtOp(field) == BSONObj::opNEAR) {
+                    uassert(13501, "use geoNear command rather than $near query", false);
+                    // TODO: convert to geoNear rather than erroring out
+                }
+                // $within queries are fine
+            }
+        }
+        else if (!special.empty()) {
+            uassert(13502, "unrecognized special query type: " + special, false);
+        }
 
         do {
             boost::scoped_ptr<FieldRangeSet> frs (fros.topFrs());
             {
                 // special case if most-significant field isn't in query
                 FieldRange range = frs->range(_key.key().firstElement().fieldName());
-                if ( !range.nontrivial() ){
+                if ( !range.nontrivial() ) {
                     DEV PRINT(range.nontrivial());
                     getAllShards(shards);
                     return;
@@ -754,7 +756,7 @@ namespace mongo {
             }
 
             BoundList ranges = frs->indexBounds(_key.key(), 1);
-            for (BoundList::const_iterator it=ranges.begin(), end=ranges.end(); it != end; ++it){
+            for (BoundList::const_iterator it=ranges.begin(), end=ranges.end(); it != end; ++it) {
                 BSONObj minObj = it->first.replaceFieldNames(_key.key());
                 BSONObj maxObj = it->second.replaceFieldNames(_key.key());
 
@@ -765,35 +767,36 @@ namespace mongo {
                 min = _chunkRanges.upper_bound(minObj);
                 max = _chunkRanges.upper_bound(maxObj);
 
-                assert(min != _chunkRanges.ranges().end());
+                massert( 13507 , str::stream() << "invalid chunk config minObj: " << minObj , min != _chunkRanges.ranges().end());
 
                 // make max non-inclusive like end iterators
                 if(max != _chunkRanges.ranges().end())
                     ++max;
 
-                for (ChunkRangeMap::const_iterator it=min; it != max; ++it){
+                for (ChunkRangeMap::const_iterator it=min; it != max; ++it) {
                     shards.insert(it->second->getShard());
                 }
 
                 // once we know we need to visit all shards no need to keep looping
                 //if (shards.size() == _shards.size())
-                    //return;
+                //return;
             }
 
             if (fros.moreOrClauses())
                 fros.popOrClause();
 
-        } while (fros.moreOrClauses());
+        }
+        while (fros.moreOrClauses());
     }
 
-    void ChunkManager::getShardsForRange(set<Shard>& shards, const BSONObj& min, const BSONObj& max){
+    void ChunkManager::getShardsForRange(set<Shard>& shards, const BSONObj& min, const BSONObj& max) {
         uassert(13405, "min must have shard key", hasShardKey(min));
         uassert(13406, "max must have shard key", hasShardKey(max));
 
         ChunkRangeMap::const_iterator it = _chunkRanges.upper_bound(min);
         ChunkRangeMap::const_iterator end = _chunkRanges.lower_bound(max);
 
-        for (; it!=end; ++ it){
+        for (; it!=end; ++ it) {
             shards.insert(it->second->getShard());
 
             // once we know we need to visit all shards no need to keep looping
@@ -802,282 +805,165 @@ namespace mongo {
         }
     }
 
-    void ChunkManager::getAllShards( set<Shard>& all ){
-        rwlock lk( _lock , false ); 
+    void ChunkManager::getAllShards( set<Shard>& all ) {
+        rwlock lk( _lock , false );
         all.insert(_shards.begin(), _shards.end());
     }
-    
-    void ChunkManager::ensureIndex_inlock(){
+
+    void ChunkManager::ensureIndex_inlock() {
         //TODO in parallel?
-        for ( set<Shard>::const_iterator i=_shards.begin(); i!=_shards.end(); ++i ){
+        for ( set<Shard>::const_iterator i=_shards.begin(); i!=_shards.end(); ++i ) {
             ScopedDbConnection conn( i->getConnString() );
-            conn->ensureIndex( getns() , getShardKey().key() , _unique );
+            conn->ensureIndex( getns() , getShardKey().key() , _unique , "" , false /* do not cache ensureIndex SERVER-1691 */ );
             conn.done();
         }
     }
-    
-    void ChunkManager::drop( ChunkManagerPtr me ){
-        rwlock lk( _lock , true ); 
+
+    void ChunkManager::drop( ChunkManagerPtr me ) {
+        rwlock lk( _lock , true );
 
         configServer.logChange( "dropCollection.start" , _ns , BSONObj() );
-        
-        DistributedLock lockSetup( ConnectionString( configServer.modelServer() , ConnectionString::SYNC ) , getns() );
-        dist_lock_try dlk( &lockSetup  , "drop" );
-        uassert( 13331 ,  "locking namespace failed" , dlk.got() );
-        
+
+        dist_lock_try dlk( &_nsLock  , "drop" );
+        uassert( 13331 ,  "collection's metadata is undergoing changes. Please try again." , dlk.got() );
+
         uassert( 10174 ,  "config servers not all up" , configServer.allUp() );
-        
+
         set<Shard> seen;
-        
+
         log(1) << "ChunkManager::drop : " << _ns << endl;
 
         // lock all shards so no one can do a split/migrate
-        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
+        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ) {
             ChunkPtr c = i->second;
             seen.insert( c->getShard() );
         }
-        
-        log(1) << "ChunkManager::drop : " << _ns << "\t all locked" << endl;        
+
+        log(1) << "ChunkManager::drop : " << _ns << "\t all locked" << endl;
 
         // wipe my meta-data
         _chunkMap.clear();
         _chunkRanges.clear();
         _shards.clear();
-        
+
         // delete data from mongod
-        for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ){
+        for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ) {
             ScopedDbConnection conn( *i );
             conn->dropCollection( _ns );
             conn.done();
         }
-        
-        log(1) << "ChunkManager::drop : " << _ns << "\t removed shard data" << endl;        
 
-        // clean up database meta-data
-        uassert( 10176 ,  "no sharding data?" , _config->removeSharding( _ns ) );
-        
+        log(1) << "ChunkManager::drop : " << _ns << "\t removed shard data" << endl;
+
         // remove chunk data
-        static Chunk temp(0);
-        ScopedDbConnection conn( temp.modelServer() );
-        conn->remove( temp.getNS() , BSON( "ns" << _ns ) );
+        ScopedDbConnection conn( configServer.modelServer() );
+        conn->remove( Chunk::chunkMetadataNS , BSON( "ns" << _ns ) );
         conn.done();
-        log(1) << "ChunkManager::drop : " << _ns << "\t removed chunk data" << endl;                
-        
-        for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ){
+        log(1) << "ChunkManager::drop : " << _ns << "\t removed chunk data" << endl;
+
+        for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ) {
             ScopedDbConnection conn( *i );
             BSONObj res;
             if ( ! setShardVersion( conn.conn() , _ns , 0 , true , res ) )
-                throw UserException( 8071 , (string)"OH KNOW, cleaning up after drop failed: " + res.toString() );
+                throw UserException( 8071 , str::stream() << "cleaning up after drop failed: " << res );
             conn.done();
         }
 
-        log(1) << "ChunkManager::drop : " << _ns << "\t DONE" << endl;        
+        log(1) << "ChunkManager::drop : " << _ns << "\t DONE" << endl;
         configServer.logChange( "dropCollection" , _ns , BSONObj() );
     }
-    
-    void ChunkManager::save( bool major ){
-        rwlock lk( _lock , true ); 
-        save_inlock( major );
-    }
-    
-    void ChunkManager::save_inlock( bool major ){
-        
-        ShardChunkVersion a = getVersion_inlock();
-        assert( a > 0 || _chunkMap.size() <= 1 );
-        ShardChunkVersion nextChunkVersion = a;
-        nextChunkVersion.inc( major );
-
-        vector<ChunkPtr> toFix;
-        vector<ShardChunkVersion> newVersions;
-        
-        BSONObjBuilder cmdBuilder;
-        BSONArrayBuilder updates( cmdBuilder.subarrayStart( "applyOps" ) );
-        
-        
-        int numOps = 0;
-        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
-            ChunkPtr c = i->second;
-            if ( ! c->getModified() )
-                continue;
-
-            numOps++;
-            _sequenceNumber = ++NextSequenceNumber;
-
-            ShardChunkVersion myVersion = nextChunkVersion;
-            nextChunkVersion.incMinor();
-            toFix.push_back( c );
-            newVersions.push_back( myVersion );
-
-            BSONObjBuilder op;
-            op.append( "op" , "u" );
-            op.appendBool( "b" , true );
-            op.append( "ns" , ShardNS::chunk );
-
-            BSONObjBuilder n( op.subobjStart( "o" ) );
-            c->serialize( n , myVersion );
-            n.done();
-
-            BSONObjBuilder q( op.subobjStart( "o2" ) );
-            q.append( "_id" , c->genID() );
-            q.done();
-
-            updates.append( op.obj() );
-        }
-        
-        if ( numOps == 0 )
-            return;
-        
-        updates.done();
-        
-        if ( a > 0 || _chunkMap.size() > 1 ){
-            BSONArrayBuilder temp( cmdBuilder.subarrayStart( "preCondition" ) );
-            BSONObjBuilder b;
-            b.append( "ns" , ShardNS::chunk );
-            b.append( "q" , BSON( "query" << BSON( "ns" << _ns ) << "orderby" << BSON( "lastmod" << -1 ) ) );
-            {
-                BSONObjBuilder bb( b.subobjStart( "res" ) );
-                bb.appendTimestamp( "lastmod" , a );
-                bb.done();
-            }
-            temp.append( b.obj() );
-            temp.done();
-        }
 
-        BSONObj cmd = cmdBuilder.obj();
-        
-        log(7) << "ChunkManager::save update: " << cmd << endl;
-        
-        ScopedDbConnection conn( Chunk(0).modelServer() );
-        BSONObj res;
-        bool ok = conn->runCommand( "config" , cmd , res );
-        conn.done();
-
-        if ( ! ok ){
-            stringstream ss;
-            ss << "saving chunks failed.  cmd: " << cmd << " result: " << res;
-            log( LL_ERROR ) << ss.str() << endl;
-            msgasserted( 13327 , ss.str() );
-        }
-
-        for ( unsigned i=0; i<toFix.size(); i++ ){
-            toFix[i]->_lastmod = newVersions[i];
-            toFix[i]->setModified( false );
-        }
-
-        massert( 10417 ,  "how did version get smalled" , getVersion_inlock() >= a );
-        
-        ensureIndex_inlock(); // TODO: this is too aggressive - but not really sooo bad
-    }
-    
     void ChunkManager::maybeChunkCollection() {
         uassert( 13346 , "can't pre-split already splitted collection" , (_chunkMap.size() == 1) );
 
         ChunkPtr soleChunk = _chunkMap.begin()->second;
         vector<BSONObj> splitPoints;
-        soleChunk->pickSplitVector( &splitPoints );
-        if ( splitPoints.empty() ){
+        soleChunk->pickSplitVector( splitPoints , Chunk::MaxChunkSize );
+        if ( splitPoints.empty() ) {
             log(1) << "not enough data to warrant chunking " << getns() << endl;
             return;
         }
 
-        soleChunk->multiSplit( splitPoints );
-    }
-
-    ShardChunkVersion ChunkManager::getVersionOnConfigServer() const {
-        static Chunk temp(0);
-        
-        ScopedDbConnection conn( temp.modelServer() );
-        
-        auto_ptr<DBClientCursor> cursor = conn->query(temp.getNS(), QUERY("ns" << _ns).sort("lastmod",1), 1 );
-        assert( cursor.get() );
-        BSONObj o;
-        if ( cursor->more() )
-            o = cursor->next();
-        conn.done();
-             
-        return o["lastmod"];
+        BSONObj res;
+        ChunkPtr p;
+        p = soleChunk->multiSplit( splitPoints , res );
+        if ( p.get() == NULL ) {
+            log( LL_WARNING ) << "could not split '" << getns() << "': " << res << endl;
+            return;
+        }
     }
 
-    ShardChunkVersion ChunkManager::getVersion( const Shard& shard ) const{
-        rwlock lk( _lock , false ); 
+    ShardChunkVersion ChunkManager::getVersion( const Shard& shard ) const {
+        rwlock lk( _lock , false );
         // TODO: cache or something?
-        
+
         ShardChunkVersion max = 0;
 
-        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
+        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ) {
             ChunkPtr c = i->second;
             DEV assert( c );
             if ( c->getShard() != shard )
                 continue;
-            if ( c->_lastmod > max )
-                max = c->_lastmod;
-        }        
+            if ( c->getLastmod() > max )
+                max = c->getLastmod();
+        }
         return max;
     }
 
-    ShardChunkVersion ChunkManager::getVersion() const{
-        rwlock lk( _lock , false ); 
-        return getVersion_inlock();
-    }
-    
-    ShardChunkVersion ChunkManager::getVersion_inlock() const{
+    ShardChunkVersion ChunkManager::getVersion() const {
+        rwlock lk( _lock , false );
+
         ShardChunkVersion max = 0;
-        
-        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
+
+        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ) {
             ChunkPtr c = i->second;
-            if ( c->_lastmod > max )
-                max = c->_lastmod;
-        }        
+            if ( c->getLastmod() > max )
+                max = c->getLastmod();
+        }
 
         return max;
     }
 
     string ChunkManager::toString() const {
-        rwlock lk( _lock , false );         
+        rwlock lk( _lock , false );
 
         stringstream ss;
         ss << "ChunkManager: " << _ns << " key:" << _key.toString() << '\n';
-        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
+        for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ) {
             const ChunkPtr c = i->second;
             ss << "\t" << c->toString() << '\n';
         }
         return ss.str();
     }
 
-    void ChunkManager::_migrationNotification(Chunk* c){
-        _chunkRanges.reloadRange(_chunkMap, c->getMin(), c->getMax());
-        _shards.insert(c->getShard());
-    }
-
-    
-    void ChunkRangeManager::assertValid() const{
+    void ChunkRangeManager::assertValid() const {
         if (_ranges.empty())
             return;
 
         try {
             // No Nulls
-            for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it){
+            for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it) {
                 assert(it->second);
             }
-            
+
             // Check endpoints
             assert(allOfType(MinKey, _ranges.begin()->second->getMin()));
             assert(allOfType(MaxKey, prior(_ranges.end())->second->getMax()));
 
             // Make sure there are no gaps or overlaps
-            for (ChunkRangeMap::const_iterator it=boost::next(_ranges.begin()), end=_ranges.end(); it != end; ++it){
+            for (ChunkRangeMap::const_iterator it=boost::next(_ranges.begin()), end=_ranges.end(); it != end; ++it) {
                 ChunkRangeMap::const_iterator last = prior(it);
                 assert(it->second->getMin() == last->second->getMax());
             }
 
             // Check Map keys
-            for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it){
+            for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it) {
                 assert(it->first == it->second->getMax());
             }
 
             // Make sure we match the original chunks
             const ChunkMap chunks = _ranges.begin()->second->getManager()->_chunkMap;
-            for ( ChunkMap::const_iterator i=chunks.begin(); i!=chunks.end(); ++i ){
+            for ( ChunkMap::const_iterator i=chunks.begin(); i!=chunks.end(); ++i ) {
                 const ChunkPtr chunk = i->second;
 
                 ChunkRangeMap::const_iterator min = _ranges.upper_bound(chunk->getMin());
@@ -1090,8 +976,9 @@ namespace mongo {
                 assert(min->second->contains( chunk->getMin() ));
                 assert(min->second->contains( chunk->getMax() ) || (min->second->getMax() == chunk->getMax()));
             }
-            
-        } catch (...) {
+
+        }
+        catch (...) {
             log( LL_ERROR ) << "\t invalid ChunkRangeMap! printing ranges:" << endl;
 
             for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it)
@@ -1101,15 +988,15 @@ namespace mongo {
         }
     }
 
-    void ChunkRangeManager::reloadRange(const ChunkMap& chunks, const BSONObj& min, const BSONObj& max){
-        if (_ranges.empty()){
+    void ChunkRangeManager::reloadRange(const ChunkMap& chunks, const BSONObj& min, const BSONObj& max) {
+        if (_ranges.empty()) {
             reloadAll(chunks);
             return;
         }
-        
+
         ChunkRangeMap::iterator low  = _ranges.upper_bound(min);
         ChunkRangeMap::iterator high = _ranges.lower_bound(max);
-        
+
         assert(low != _ranges.end());
         assert(high != _ranges.end());
         assert(low->second);
@@ -1135,10 +1022,10 @@ namespace mongo {
         // merge low-end if possible
         low = _ranges.upper_bound(min);
         assert(low != _ranges.end());
-        if (low != _ranges.begin()){
+        if (low != _ranges.begin()) {
             shared_ptr<ChunkRange> a = prior(low)->second;
             shared_ptr<ChunkRange> b = low->second;
-            if (a->getShard() == b->getShard()){
+            if (a->getShard() == b->getShard()) {
                 shared_ptr<ChunkRange> cr (new ChunkRange(*a, *b));
                 _ranges.erase(prior(low));
                 _ranges.erase(low); // invalidates low
@@ -1150,10 +1037,10 @@ namespace mongo {
 
         // merge high-end if possible
         high = _ranges.lower_bound(max);
-        if (high != prior(_ranges.end())){
+        if (high != prior(_ranges.end())) {
             shared_ptr<ChunkRange> a = high->second;
             shared_ptr<ChunkRange> b = boost::next(high)->second;
-            if (a->getShard() == b->getShard()){
+            if (a->getShard() == b->getShard()) {
                 shared_ptr<ChunkRange> cr (new ChunkRange(*a, *b));
                 _ranges.erase(boost::next(high));
                 _ranges.erase(high); //invalidates high
@@ -1164,15 +1051,15 @@ namespace mongo {
         DEV assertValid();
     }
 
-    void ChunkRangeManager::reloadAll(const ChunkMap& chunks){
+    void ChunkRangeManager::reloadAll(const ChunkMap& chunks) {
         _ranges.clear();
         _insertRange(chunks.begin(), chunks.end());
 
         DEV assertValid();
     }
 
-    void ChunkRangeManager::_insertRange(ChunkMap::const_iterator begin, const ChunkMap::const_iterator end){
-        while (begin != end){
+    void ChunkRangeManager::_insertRange(ChunkMap::const_iterator begin, const ChunkMap::const_iterator end) {
+        while (begin != end) {
             ChunkMap::const_iterator first = begin;
             Shard shard = first->second->getShard();
             while (begin != end && (begin->second->getShard() == shard))
@@ -1182,32 +1069,50 @@ namespace mongo {
             _ranges[cr->getMax()] = cr;
         }
     }
-    
+
+    int ChunkManager::getCurrentDesiredChunkSize() const {
+        // split faster in early chunks helps spread out an initial load better
+        const int minChunkSize = 1 << 20;  // 1 MBytes
+
+        int splitThreshold = Chunk::MaxChunkSize;
+
+        int nc = numChunks();
+
+        if ( nc < 10 ) {
+            splitThreshold = max( splitThreshold / 4 , minChunkSize );
+        }
+        else if ( nc < 20 ) {
+            splitThreshold = max( splitThreshold / 2 , minChunkSize );
+        }
+
+        return splitThreshold;
+    }
+
     class ChunkObjUnitTest : public UnitTest {
     public:
-        void runShard(){
+        void runShard() {
             ChunkPtr c;
             assert( ! c );
             c.reset( new Chunk( 0 ) );
             assert( c );
         }
-        
-        void runShardChunkVersion(){
+
+        void runShardChunkVersion() {
             vector<ShardChunkVersion> all;
             all.push_back( ShardChunkVersion(1,1) );
             all.push_back( ShardChunkVersion(1,2) );
             all.push_back( ShardChunkVersion(2,1) );
             all.push_back( ShardChunkVersion(2,2) );
-            
-            for ( unsigned i=0; i<all.size(); i++ ){
-                for ( unsigned j=i+1; j<all.size(); j++ ){
+
+            for ( unsigned i=0; i<all.size(); i++ ) {
+                for ( unsigned j=i+1; j<all.size(); j++ ) {
                     assert( all[i] < all[j] );
                 }
             }
 
         }
 
-        void run(){
+        void run() {
             runShard();
             runShardChunkVersion();
             log(1) << "shardObjTest passed" << endl;
@@ -1217,7 +1122,11 @@ namespace mongo {
 
     // ----- to be removed ---
     extern OID serverID;
-    bool setShardVersion( DBClientBase & conn , const string& ns , ShardChunkVersion version , bool authoritative , BSONObj& result ){
+
+    // NOTE (careful when deprecating)
+    //   currently the sharding is enabled because of a write or read (as opposed to a split or migrate), the shard learns
+    //   its name and through the 'setShardVersion' command call
+    bool setShardVersion( DBClientBase & conn , const string& ns , ShardChunkVersion version , bool authoritative , BSONObj& result ) {
         BSONObjBuilder cmdBuilder;
         cmdBuilder.append( "setShardVersion" , ns.c_str() );
         cmdBuilder.append( "configdb" , configServer.modelServer() );
@@ -1230,9 +1139,9 @@ namespace mongo {
         cmdBuilder.append( "shard" , s.getName() );
         cmdBuilder.append( "shardHost" , s.getConnString() );
         BSONObj cmd = cmdBuilder.obj();
-        
+
         log(1) << "    setShardVersion  " << s.getName() << " " << conn.getServerAddress() << "  " << ns << "  " << cmd << " " << &conn << endl;
-        
+
         return conn.runCommand( "admin" , cmd , result );
     }
 
diff --git a/s/chunk.h b/s/chunk.h
index 82f2300..43c0d95 100644
--- a/s/chunk.h
+++ b/s/chunk.h
@@ -1,9 +1,4 @@
-// shard.h
-
-/*
-   A "shard" is a database (replica pair typically) which represents
-   one partition of the overall database.
-*/
+// @file chunk.h
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -24,16 +19,17 @@
 #pragma once
 
 #include "../pch.h"
-#include "../client/dbclient.h"
-#include "../client/model.h"
+
 #include "../bson/util/atomic_int.h"
+#include "../client/dbclient.h"
+#include "../client/distlock.h"
+
 #include "shardkey.h"
 #include "shard.h"
-#include "config.h"
 #include "util.h"
 
 namespace mongo {
-    
+
     class DBConfig;
     class Chunk;
     class ChunkRange;
@@ -46,130 +42,187 @@ namespace mongo {
     // key is max for each Chunk or ChunkRange
     typedef map<BSONObj,ChunkPtr,BSONObjCmp> ChunkMap;
     typedef map<BSONObj,shared_ptr<ChunkRange>,BSONObjCmp> ChunkRangeMap;
-    
+
+    typedef shared_ptr<ChunkManager> ChunkManagerPtr;
+
     /**
        config.chunks
        { ns : "alleyinsider.fs.chunks" , min : {} , max : {} , server : "localhost:30001" }
-       
+
        x is in a shard iff
        min <= x < max
-     */    
+     */
     class Chunk : boost::noncopyable, public boost::enable_shared_from_this<Chunk>  {
     public:
-
         Chunk( ChunkManager * info );
         Chunk( ChunkManager * info , const BSONObj& min, const BSONObj& max, const Shard& shard);
-        
-        const BSONObj& getMin() const { return _min; }
-        const BSONObj& getMax() const { return _max; }
-        
-        void setMin(const BSONObj& o){
-            _min = o;
-        }
-        void setMax(const BSONObj& o){
-            _max = o;
-        }
-
 
-        string getns() const;
-        Shard getShard() const { return _shard; }
+        //
+        // serialization support
+        //
 
-        void setShard( const Shard& shard );
-
-        bool contains( const BSONObj& obj ) const;
+        void serialize(BSONObjBuilder& to, ShardChunkVersion myLastMod=0);
+        void unserialize(const BSONObj& from);
 
-        string toString() const;
+        //
+        // chunk boundary support
+        //
 
-        friend ostream& operator << (ostream& out, const Chunk& c){ return (out << c.toString()); }
+        const BSONObj& getMin() const { return _min; }
+        const BSONObj& getMax() const { return _max; }
+        void setMin(const BSONObj& o) { _min = o; }
+        void setMax(const BSONObj& o) { _max = o; }
 
-        bool operator==(const Chunk& s) const;
-        
-        bool operator!=(const Chunk& s) const{
-            return ! ( *this == s );
-        }
-        
         // if min/max key is pos/neg infinity
         bool minIsInf() const;
         bool maxIsInf() const;
 
-        BSONObj pickSplitPoint() const;
-        ChunkPtr split();
+        bool contains( const BSONObj& obj ) const;
 
-        void pickSplitVector( vector<BSONObj>* splitPoints ) const;
-        ChunkPtr multiSplit( const vector<BSONObj>& splitPoints );
+        string genID() const;
+        static string genID( const string& ns , const BSONObj& min );
+
+        //
+        // chunk version support
+        //
+
+        void appendShortVersion( const char * name , BSONObjBuilder& b );
+
+        ShardChunkVersion getLastmod() const { return _lastmod; }
+        void setLastmod( ShardChunkVersion v ) { _lastmod = v; }
+
+        //
+        // split support
+        //
 
-        /**
-         * @return size of shard in bytes
-         *  talks to mongod to do this
-         */
-        long getPhysicalSize() const;
-        
-        int countObjects(int maxcount=0) const;
-        
         /**
          * if the amount of data written nears the max size of a shard
          * then we check the real size, and if its too big, we split
+         * @return if something was split
          */
         bool splitIfShould( long dataWritten );
-        
-        /*
+
+        /**
+         * Splits this chunk at a non-specificed split key to be chosen by the mongod holding this chunk.
+         *
+         * @param force if set to true, will split the chunk regardless if the split is really necessary size wise
+         *              if set to false, will only split if the chunk has reached the currently desired maximum size
+         * @param res the object containing details about the split execution
+         * @return if found a key, return a pointer to the first chunk, otherwise return a null pointer
+         */
+        ChunkPtr singleSplit( bool force , BSONObj& res );
+
+        /**
+         * Splits this chunk at the given key (or keys)
+         *
+         * @param splitPoints the vector of keys that should be used to divide this chunk
+         * @param res the object containing details about the split execution
+         * @return shared pointer to the first new Chunk or null pointer if failed
+         */
+        ChunkPtr multiSplit( const  vector<BSONObj>& splitPoints , BSONObj& res );
+
+        /**
+         * Asks the mongod holding this chunk to find a key that approximately divides this chunk in two
+         *
+         * @param medianKey the key that divides this chunk, if there is one, or empty
+         */
+        void pickMedianKey( BSONObj& medianKey ) const;
+
+        /**
+         * @param splitPoints vector to be filled in
+         * @param chunkSize chunk size to target in bytes
+         * @param maxPoints limits the number of split points that are needed, zero is max (optional)
+         * @param maxObjs limits the number of objects in each chunk, zero is as max (optional)
+         */
+        void pickSplitVector( vector<BSONObj>& splitPoints , int chunkSize , int maxPoints = 0, int maxObjs = 0) const;
+
+        //
+        // migration support
+        //
+
+        /**
          * moves either this shard or newShard if it makes sense too
+         *
          * @return whether or not a shard was moved
          */
         bool moveIfShould( ChunkPtr newShard = ChunkPtr() );
 
-        bool moveAndCommit( const Shard& to , string& errmsg );
+        /**
+         * Issues a migrate request for this chunk
+         *
+         * @param to shard to move this chunk to
+         * @param chunSize maximum number of bytes beyond which the migrate should no go trhough
+         * @param res the object containing details about the migrate execution
+         * @return true if move was successful
+         */
+        bool moveAndCommit( const Shard& to , long long chunkSize , BSONObj& res );
 
-        const char * getNS(){ return "config.chunks"; }
-        void serialize(BSONObjBuilder& to, ShardChunkVersion myLastMod=0);
-        void unserialize(const BSONObj& from);
-        string modelServer() const;
-        
-        void appendShortVersion( const char * name , BSONObjBuilder& b );
+        /**
+         * @return size of shard in bytes
+         *  talks to mongod to do this
+         */
+        long getPhysicalSize() const;
+
+        //
+        // chunk size support
 
+        int countObjects(int maxcount=0) const;
+
+        //
+        // public constants
+        //
+
+        static string chunkMetadataNS;
         static int MaxChunkSize;
 
-        string genID() const;
-        static string genID( const string& ns , const BSONObj& min );
+        //
+        // accessors and helpers
+        //
 
-        const ChunkManager* getManager() const { return _manager; }
-        
-        bool getModified() { return _modified; }
-        void setModified( bool modified ) { _modified = modified; }
+        string toString() const;
 
-        ShardChunkVersion getVersionOnConfigServer() const;
-    private:
+        friend ostream& operator << (ostream& out, const Chunk& c) { return (out << c.toString()); }
+        bool operator==(const Chunk& s) const;
+        bool operator!=(const Chunk& s) const { return ! ( *this == s ); }
 
-        bool _splitIfShould( long dataWritten );
+        string getns() const;
+        const char * getNS() { return "config.chunks"; }
+        Shard getShard() const { return _shard; }
+        const ChunkManager* getManager() const { return _manager; }
 
+    private:
         // main shard info
-        
+
         ChunkManager * _manager;
-        ShardKeyPattern skey() const;
 
         BSONObj _min;
         BSONObj _max;
         Shard _shard;
         ShardChunkVersion _lastmod;
 
-        bool _modified;
-        
         // transient stuff
 
         long _dataWritten;
-        
+
         // methods, etc..
-        
-        void _split( BSONObj& middle );
 
-        friend class ChunkManager;
-        friend class ShardObjUnitTest;
+        /**
+         * if sort 1, return lowest key
+         * if sort -1, return highest key
+         * will return empty object if have none
+         */
+        BSONObj _getExtremeKey( int sort ) const;
+
+        /** initializes _dataWritten with a random value so that a mongos restart wouldn't cause delay in splitting */
+        void _setDataWritten();
+
+        ShardKeyPattern skey() const;
     };
 
-    class ChunkRange{
+    class ChunkRange {
     public:
-        const ChunkManager* getManager() const{ return _manager; }
-        Shard getShard() const{ return _shard; }
+        const ChunkManager* getManager() const { return _manager; }
+        Shard getShard() const { return _shard; }
 
         const BSONObj& getMin() const { return _min; }
         const BSONObj& getMax() const { return _max; }
@@ -181,11 +234,10 @@ namespace mongo {
             : _manager(begin->second->getManager())
             , _shard(begin->second->getShard())
             , _min(begin->second->getMin())
-            , _max(prior(end)->second->getMax())
-        {
+            , _max(prior(end)->second->getMax()) {
             assert( begin != end );
 
-            DEV while (begin != end){
+            DEV while (begin != end) {
                 assert(begin->second->getManager() == _manager);
                 assert(begin->second->getShard() == _shard);
                 ++begin;
@@ -197,14 +249,13 @@ namespace mongo {
             : _manager(min.getManager())
             , _shard(min.getShard())
             , _min(min.getMin())
-            , _max(max.getMax())
-        {
+            , _max(max.getMax()) {
             assert(min.getShard() == max.getShard());
             assert(min.getManager() == max.getManager());
             assert(min.getMax() == max.getMin());
         }
 
-        friend ostream& operator<<(ostream& out, const ChunkRange& cr){
+        friend ostream& operator<<(ostream& out, const ChunkRange& cr) {
             return (out << "ChunkRange(min=" << cr._min << ", max=" << cr._max << ", shard=" << cr._shard <<")");
         }
 
@@ -239,7 +290,7 @@ namespace mongo {
     };
 
     /* config.sharding
-         { ns: 'alleyinsider.fs.chunks' , 
+         { ns: 'alleyinsider.fs.chunks' ,
            key: { ts : 1 } ,
            shards: [ { min: 1, max: 100, server: a } , { min: 101, max: 200 , server : b } ]
          }
@@ -247,75 +298,61 @@ namespace mongo {
     class ChunkManager {
     public:
 
-        ChunkManager( DBConfig * config , string ns , ShardKeyPattern pattern , bool unique );
+        ChunkManager( string ns , ShardKeyPattern pattern , bool unique );
         virtual ~ChunkManager();
 
         string getns() const { return _ns; }
-        
+
         int numChunks() const { rwlock lk( _lock , false ); return _chunkMap.size(); }
         bool hasShardKey( const BSONObj& obj );
 
+        void createFirstChunk( const Shard& shard );
         ChunkPtr findChunk( const BSONObj& obj , bool retry = false );
         ChunkPtr findChunkOnServer( const Shard& shard ) const;
-        
-        ShardKeyPattern& getShardKey(){  return _key; }
+
         const ShardKeyPattern& getShardKey() const {  return _key; }
-        bool isUnique(){ return _unique; }
+        bool isUnique() const { return _unique; }
 
         void maybeChunkCollection();
-        
+
         void getShardsForQuery( set<Shard>& shards , const BSONObj& query );
         void getAllShards( set<Shard>& all );
         void getShardsForRange(set<Shard>& shards, const BSONObj& min, const BSONObj& max); // [min, max)
 
-        void save( bool major );
-
         string toString() const;
 
         ShardChunkVersion getVersion( const Shard& shard ) const;
         ShardChunkVersion getVersion() const;
 
-        /** 
-         * actually does a query on the server
-         * doesn't look at any local data
-         */
-        ShardChunkVersion getVersionOnConfigServer() const;
-        
         /**
          * this is just an increasing number of how many ChunkManagers we have so we know if something has been updated
          */
-        unsigned long long getSequenceNumber(){
-            return _sequenceNumber;
-        }
-        
-        void getInfo( BSONObjBuilder& b ){
+        unsigned long long getSequenceNumber() const { return _sequenceNumber; }
+
+        void getInfo( BSONObjBuilder& b ) {
             b.append( "key" , _key.key() );
             b.appendBool( "unique" , _unique );
         }
-        
+
         /**
          * @param me - so i don't get deleted before i'm done
          */
         void drop( ChunkManagerPtr me );
 
         void _printChunks() const;
-        
+
+        int getCurrentDesiredChunkSize() const;
+
     private:
-        
         void _reload();
         void _reload_inlock();
         void _load();
 
-        void save_inlock( bool major );
-        ShardChunkVersion getVersion_inlock() const;
         void ensureIndex_inlock();
-        
-        DBConfig * _config;
+
         string _ns;
         ShardKeyPattern _key;
         bool _unique;
-        
-        map<string,unsigned long long> _maxMarkers;
 
         ChunkMap _chunkMap;
         ChunkRangeManager _chunkRanges;
@@ -323,11 +360,9 @@ namespace mongo {
         set<Shard> _shards;
 
         unsigned long long _sequenceNumber;
-        
-        mutable RWLock _lock;
 
-        // This should only be called from Chunk after it has been migrated
-        void _migrationNotification(Chunk* c);
+        mutable RWLock _lock;
+        DistributedLock _nsLock;
 
         friend class Chunk;
         friend class ChunkRangeManager; // only needed for CRM::assertValid()
@@ -362,12 +397,14 @@ namespace mongo {
     /*
     struct chunk_lock {
         chunk_lock( const Chunk* c ){
-            
+
         }
-        
+
         Chunk _c;
     };
     */
     inline string Chunk::genID() const { return genID(_manager->getns(), _min); }
 
+    bool setShardVersion( DBClientBase & conn , const string& ns , ShardChunkVersion version , bool authoritative , BSONObj& result );
+
 } // namespace mongo
diff --git a/s/client.cpp b/s/client.cpp
new file mode 100644
index 0000000..b8559b6
--- /dev/null
+++ b/s/client.cpp
@@ -0,0 +1,292 @@
+// s/client.cpp
+
+/**
+ *    Copyright (C) 2008 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "server.h"
+
+#include "../db/commands.h"
+#include "../db/dbmessage.h"
+#include "../db/stats/counters.h"
+
+#include "../client/connpool.h"
+
+#include "client.h"
+#include "request.h"
+#include "config.h"
+#include "chunk.h"
+#include "stats.h"
+#include "cursors.h"
+#include "grid.h"
+#include "s/writeback_listener.h"
+
+namespace mongo {
+
+    ClientInfo::ClientInfo( int clientId ) : _id( clientId ) {
+        _cur = &_a;
+        _prev = &_b;
+        _autoSplitOk = true;
+        newRequest();
+    }
+
+    ClientInfo::~ClientInfo() {
+        if ( _lastAccess ) {
+            scoped_lock lk( _clientsLock );
+            Cache::iterator i = _clients.find( _id );
+            if ( i != _clients.end() ) {
+                _clients.erase( i );
+            }
+        }
+    }
+
+    void ClientInfo::addShard( const string& shard ) {
+        _cur->insert( shard );
+        _sinceLastGetError.insert( shard );
+    }
+
+    void ClientInfo::newRequest( AbstractMessagingPort* p ) {
+
+        if ( p ) {
+            HostAndPort r = p->remote();
+            if ( _remote.port() == -1 )
+                _remote = r;
+            else if ( _remote != r ) {
+                stringstream ss;
+                ss << "remotes don't match old [" << _remote.toString() << "] new [" << r.toString() << "]";
+                throw UserException( 13134 , ss.str() );
+            }
+        }
+
+        _lastAccess = (int) time(0);
+
+        set<string> * temp = _cur;
+        _cur = _prev;
+        _prev = temp;
+        _cur->clear();
+    }
+
+    void ClientInfo::disconnect() {
+        _lastAccess = 0;
+    }
+
+    ClientInfo * ClientInfo::get( int clientId , bool create ) {
+
+        if ( ! clientId )
+            clientId = getClientId();
+
+        if ( ! clientId ) {
+            ClientInfo * info = _tlInfo.get();
+            if ( ! info ) {
+                info = new ClientInfo( 0 );
+                _tlInfo.reset( info );
+            }
+            info->newRequest();
+            return info;
+        }
+
+        scoped_lock lk( _clientsLock );
+        Cache::iterator i = _clients.find( clientId );
+        if ( i != _clients.end() )
+            return i->second;
+        if ( ! create )
+            return 0;
+        ClientInfo * info = new ClientInfo( clientId );
+        _clients[clientId] = info;
+        return info;
+    }
+
+    void ClientInfo::disconnect( int clientId ) {
+        if ( ! clientId )
+            return;
+
+        scoped_lock lk( _clientsLock );
+        Cache::iterator i = _clients.find( clientId );
+        if ( i == _clients.end() )
+            return;
+
+        ClientInfo* ci = i->second;
+        ci->disconnect();
+        delete ci;
+        _clients.erase( i );
+    }
+
+    void ClientInfo::_addWriteBack( vector<WBInfo>& all , const BSONObj& gle ) {
+        BSONElement w = gle["writeback"];
+
+        if ( w.type() != jstOID )
+            return;
+
+        BSONElement cid = gle["connectionId"];
+
+        if ( cid.eoo() ) {
+            error() << "getLastError writeback can't work because of version mis-match" << endl;
+            return;
+        }
+
+        all.push_back( WBInfo( cid.numberLong() , w.OID() ) );
+    }
+
+    vector<BSONObj> ClientInfo::_handleWriteBacks( vector<WBInfo>& all , bool fromWriteBackListener ) {
+        vector<BSONObj> res;
+        
+        if ( fromWriteBackListener ) {
+            LOG(1) << "not doing recusrive writeback" << endl;
+            return res;
+        }
+
+        if ( all.size() == 0 )
+            return res;
+        
+        for ( unsigned i=0; i<all.size(); i++ ) {
+            res.push_back( WriteBackListener::waitFor( all[i].connectionId , all[i].id ) );
+        }
+
+        return res;
+    }
+
+
+
+    bool ClientInfo::getLastError( const BSONObj& options , BSONObjBuilder& result , bool fromWriteBackListener ) {
+        set<string> * shards = getPrev();
+
+        if ( shards->size() == 0 ) {
+            result.appendNull( "err" );
+            return true;
+        }
+
+        vector<WBInfo> writebacks;
+
+        // handle single server
+        if ( shards->size() == 1 ) {
+            string theShard = *(shards->begin() );
+
+            ShardConnection conn( theShard , "" );
+            
+            BSONObj res;
+            bool ok = conn->runCommand( "admin" , options , res );
+            res = res.getOwned();
+            conn.done();
+            
+
+            _addWriteBack( writebacks , res );
+
+            // hit other machines just to block
+            for ( set<string>::const_iterator i=sinceLastGetError().begin(); i!=sinceLastGetError().end(); ++i ) {
+                string temp = *i;
+                if ( temp == theShard )
+                    continue;
+
+                ShardConnection conn( temp , "" );
+                _addWriteBack( writebacks , conn->getLastErrorDetailed() );
+                conn.done();
+            }
+            clearSinceLastGetError();
+            
+            if ( writebacks.size() ){
+                vector<BSONObj> v = _handleWriteBacks( writebacks , fromWriteBackListener );
+                if ( v.size() == 0 && fromWriteBackListener ) {
+                    // ok
+                }
+                else {
+                    assert( v.size() == 1 );
+                    result.appendElements( v[0] );
+                    result.appendElementsUnique( res );
+                    result.append( "initialGLEHost" , theShard );
+                }
+            }
+            else {
+                result.append( "singleShard" , theShard );
+                result.appendElements( res );
+            }
+            
+            return ok;
+        }
+
+        BSONArrayBuilder bbb( result.subarrayStart( "shards" ) );
+
+        long long n = 0;
+
+        // hit each shard
+        vector<string> errors;
+        vector<BSONObj> errorObjects;
+        for ( set<string>::iterator i = shards->begin(); i != shards->end(); i++ ) {
+            string theShard = *i;
+            bbb.append( theShard );
+            ShardConnection conn( theShard , "" );
+            BSONObj res;
+            bool ok = conn->runCommand( "admin" , options , res );
+            _addWriteBack( writebacks, res );
+            
+            string temp = DBClientWithCommands::getLastErrorString( res );
+            if ( conn->type() != ConnectionString::SYNC && ( ok == false || temp.size() ) ) {
+                errors.push_back( temp );
+                errorObjects.push_back( res );
+            }
+            n += res["n"].numberLong();
+            conn.done();
+        }
+
+        bbb.done();
+
+        result.appendNumber( "n" , n );
+
+        // hit other machines just to block
+        for ( set<string>::const_iterator i=sinceLastGetError().begin(); i!=sinceLastGetError().end(); ++i ) {
+            string temp = *i;
+            if ( shards->count( temp ) )
+                continue;
+
+            ShardConnection conn( temp , "" );
+            _addWriteBack( writebacks, conn->getLastErrorDetailed() );
+            conn.done();
+        }
+        clearSinceLastGetError();
+
+        if ( errors.size() == 0 ) {
+            result.appendNull( "err" );
+            _handleWriteBacks( writebacks , fromWriteBackListener );
+            return true;
+        }
+
+        result.append( "err" , errors[0].c_str() );
+
+        {
+            // errs
+            BSONArrayBuilder all( result.subarrayStart( "errs" ) );
+            for ( unsigned i=0; i<errors.size(); i++ ) {
+                all.append( errors[i].c_str() );
+            }
+            all.done();
+        }
+
+        {
+            // errObjects
+            BSONArrayBuilder all( result.subarrayStart( "errObjects" ) );
+            for ( unsigned i=0; i<errorObjects.size(); i++ ) {
+                all.append( errorObjects[i] );
+            }
+            all.done();
+        }
+        _handleWriteBacks( writebacks , fromWriteBackListener );
+        return true;
+    }
+
+    ClientInfo::Cache& ClientInfo::_clients = *(new ClientInfo::Cache());
+    mongo::mutex ClientInfo::_clientsLock("_clientsLock");
+    boost::thread_specific_ptr<ClientInfo> ClientInfo::_tlInfo;
+
+} // namespace mongo
diff --git a/s/client.h b/s/client.h
new file mode 100644
index 0000000..bd4295f
--- /dev/null
+++ b/s/client.h
@@ -0,0 +1,120 @@
+// client.h
+
+/*
+ *    Copyright (C) 2010 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../pch.h"
+
+namespace mongo {
+
+    /**
+     * holds information about a client connected to a mongos
+     * 1 per client socket
+     * currently implemented with a thread local
+     */
+    class ClientInfo {
+
+        typedef map<int,ClientInfo*> Cache;
+
+    public:
+        ClientInfo( int clientId );
+        ~ClientInfo();
+
+        /** new request from client, adjusts internal state */
+        void newRequest( AbstractMessagingPort* p = 0 );
+
+        /** client disconnected */
+        void disconnect();
+
+        /**
+         * @return remote socket address of the client
+         */
+        HostAndPort getRemote() const { return _remote; }
+
+        /**
+         * notes that this client use this shard
+         * keeps track of all shards accessed this request
+         */
+        void addShard( const string& shard );
+
+        /**
+         * gets shards used on the previous request
+         */
+        set<string> * getPrev() const { return _prev; };
+
+        /**
+         * gets all shards we've accessed since the last time we called clearSinceLastGetError
+         */
+        const set<string>& sinceLastGetError() const { return _sinceLastGetError; }
+
+        /**
+         * clears list of shards we've talked to
+         */
+        void clearSinceLastGetError() { _sinceLastGetError.clear(); }
+
+        /**
+         * calls getLastError
+         * resets shards since get last error
+         * @return if the command was ok or if there was an error
+         */
+        bool getLastError( const BSONObj& options , BSONObjBuilder& result , bool fromWriteBackListener = false );
+
+        /** @return if its ok to auto split from this client */
+        bool autoSplitOk() const { return _autoSplitOk; }
+        
+        void noAutoSplit() { _autoSplitOk = false; }
+
+        static ClientInfo * get( int clientId = 0 , bool create = true );
+        static void disconnect( int clientId );
+
+    private:
+
+        struct WBInfo {
+            WBInfo( ConnectionId c , OID o ) : connectionId( c ) , id( o ) {}
+            ConnectionId connectionId;
+            OID id;
+        };
+
+        // for getLastError
+        void _addWriteBack( vector<WBInfo>& all , const BSONObj& o );
+        vector<BSONObj> _handleWriteBacks( vector<WBInfo>& all , bool fromWriteBackListener );
+
+
+        int _id; // unique client id
+        HostAndPort _remote; // server:port of remote socket end
+
+        // we use _a and _b to store shards we've talked to on the current request and the previous
+        // we use 2 so we can flip for getLastError type operations
+
+        set<string> _a; // actual set for _cur or _prev
+        set<string> _b; //   "
+
+        set<string> * _cur; // pointer to _a or _b depending on state
+        set<string> * _prev; //  ""
+
+
+        set<string> _sinceLastGetError; // all shards accessed since last getLastError
+
+        int _lastAccess;
+        bool _autoSplitOk; 
+
+        static mongo::mutex _clientsLock;
+        static Cache& _clients;
+        static boost::thread_specific_ptr<ClientInfo> _tlInfo;
+    };
+
+
+}
diff --git a/s/commands_admin.cpp b/s/commands_admin.cpp
index 551b8a9..532161a 100644
--- a/s/commands_admin.cpp
+++ b/s/commands_admin.cpp
@@ -29,6 +29,7 @@
 #include "../util/message.h"
 #include "../util/processinfo.h"
 #include "../util/stringutils.h"
+#include "../util/version.h"
 
 #include "../client/connpool.h"
 
@@ -41,6 +42,8 @@
 #include "grid.h"
 #include "strategy.h"
 #include "stats.h"
+#include "writeback_listener.h"
+#include "client.h"
 
 namespace mongo {
 
@@ -48,7 +51,7 @@ namespace mongo {
 
         class GridAdminCmd : public Command {
         public:
-            GridAdminCmd( const char * n ) : Command( n , false, tolowerString(n).c_str() ){
+            GridAdminCmd( const char * n ) : Command( n , false, tolowerString(n).c_str() ) {
             }
             virtual bool slaveOk() const {
                 return true;
@@ -58,7 +61,7 @@ namespace mongo {
             }
 
             // all grid commands are designed not to lock
-            virtual LockType locktype() const { return NONE; } 
+            virtual LockType locktype() const { return NONE; }
         };
 
         // --------------- misc commands ----------------------
@@ -69,31 +72,34 @@ namespace mongo {
             virtual void help( stringstream& help ) const {
                 help << " shows status/reachability of servers in the cluster";
             }
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 result.append("configserver", configServer.getPrimary().getConnString() );
                 result.append("isdbgrid", 1);
                 return true;
             }
         } netstat;
-        
+
         class ServerStatusCmd : public Command {
         public:
-            ServerStatusCmd() : Command( "serverStatus" , true ){
+            ServerStatusCmd() : Command( "serverStatus" , true ) {
                 _started = time(0);
             }
-            
+
             virtual bool slaveOk() const { return true; }
-            virtual LockType locktype() const { return NONE; } 
-            
+            virtual LockType locktype() const { return NONE; }
+
             bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+                result.append( "host" , prettyHostName() );
+                result.append("version", versionString);
+                result.append("process","mongos");
                 result.append("uptime",(double) (time(0)-_started));
                 result.appendDate( "localTime" , jsTime() );
 
                 {
                     BSONObjBuilder t( result.subobjStart( "mem" ) );
-                    
+
                     ProcessInfo p;
-                    if ( p.supported() ){
+                    if ( p.supported() ) {
                         t.appendNumber( "resident" , p.getResidentSize() );
                         t.appendNumber( "virtual" , p.getVirtualMemorySize() );
                         t.appendBool( "supported" , true );
@@ -102,7 +108,7 @@ namespace mongo {
                         result.append( "note" , "not all mem info support on this platform" );
                         t.appendBool( "supported" , false );
                     }
-                    
+
                     t.done();
                 }
 
@@ -112,7 +118,7 @@ namespace mongo {
                     bb.append( "available" , connTicketHolder.available() );
                     bb.done();
                 }
-                
+
                 {
                     BSONObjBuilder bb( result.subobjStart( "extra_info" ) );
                     bb.append("note", "fields vary by platform");
@@ -120,7 +126,7 @@ namespace mongo {
                     p.getExtraInfo(bb);
                     bb.done();
                 }
-                
+
                 result.append( "opcounters" , globalOpCounters.getObj() );
                 {
                     BSONObjBuilder bb( result.subobjStart( "ops" ) );
@@ -130,7 +136,7 @@ namespace mongo {
                 }
 
                 result.append( "shardCursorType" , shardedCursorTypes.getObj() );
-                
+
                 {
                     BSONObjBuilder asserts( result.subobjStart( "asserts" ) );
                     asserts.append( "regular" , assertionCount.regular );
@@ -141,6 +147,13 @@ namespace mongo {
                     asserts.done();
                 }
 
+                {
+                    BSONObjBuilder bb( result.subobjStart( "network" ) );
+                    networkCounter.append( bb );
+                    bb.done();
+                }
+
+
                 return 1;
             }
 
@@ -149,34 +162,34 @@ namespace mongo {
 
         class FsyncCommand : public GridAdminCmd {
         public:
-            FsyncCommand() : GridAdminCmd( "fsync" ){}
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
-                if ( cmdObj["lock"].trueValue() ){
+            FsyncCommand() : GridAdminCmd( "fsync" ) {}
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+                if ( cmdObj["lock"].trueValue() ) {
                     errmsg = "can't do lock through mongos";
                     return false;
                 }
-                
+
                 BSONObjBuilder sub;
 
                 bool ok = true;
                 int numFiles = 0;
-                
+
                 vector<Shard> shards;
                 Shard::getAllShards( shards );
-                for ( vector<Shard>::iterator i=shards.begin(); i!=shards.end(); i++ ){
+                for ( vector<Shard>::iterator i=shards.begin(); i!=shards.end(); i++ ) {
                     Shard s = *i;
 
                     BSONObj x = s.runCommand( "admin" , "fsync" );
                     sub.append( s.getName() , x );
 
-                    if ( ! x["ok"].trueValue() ){
+                    if ( ! x["ok"].trueValue() ) {
                         ok = false;
                         errmsg = x["errmsg"].String();
                     }
-                    
+
                     numFiles += x["numFiles"].numberInt();
                 }
-                
+
                 result.append( "numFiles" , numFiles );
                 result.append( "all" , sub.obj() );
                 return ok;
@@ -192,43 +205,43 @@ namespace mongo {
                 help << " example: { moveprimary : 'foo' , to : 'localhost:9999' }";
                 // TODO: locking?
             }
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string dbname = cmdObj.firstElement().valuestrsafe();
 
-                if ( dbname.size() == 0 ){
+                if ( dbname.size() == 0 ) {
                     errmsg = "no db";
                     return false;
                 }
 
-                if ( dbname == "config" ){
+                if ( dbname == "config" ) {
                     errmsg = "can't move config db";
                     return false;
                 }
 
                 DBConfigPtr config = grid.getDBConfig( dbname , false );
-                if ( ! config ){
+                if ( ! config ) {
                     errmsg = "can't find db!";
                     return false;
                 }
 
                 string to = cmdObj["to"].valuestrsafe();
-                if ( ! to.size()  ){
+                if ( ! to.size()  ) {
                     errmsg = "you have to specify where you want to move it";
                     return false;
                 }
                 Shard s = Shard::make( to );
 
-                if ( config->getPrimary() == s.getConnString() ){
+                if ( config->getPrimary() == s.getConnString() ) {
                     errmsg = "thats already the primary";
                     return false;
                 }
 
-                if ( ! grid.knowAboutShard( s.getConnString() ) ){
+                if ( ! grid.knowAboutShard( s.getConnString() ) ) {
                     errmsg = "that server isn't known to me";
                     return false;
                 }
-                
-                log() << "movePrimary: moving " << dbname << " primary from: " << config->getPrimary().toString() 
+
+                log() << "movePrimary: moving " << dbname << " primary from: " << config->getPrimary().toString()
                       << " to: " << s.toString() << endl;
 
                 // TODO LOCKING: this is not safe with multiple mongos
@@ -241,7 +254,7 @@ namespace mongo {
                 bool worked = toconn->runCommand( dbname.c_str() , BSON( "clone" << config->getPrimary().getConnString() ) , cloneRes );
                 toconn.done();
 
-                if ( ! worked ){
+                if ( ! worked ) {
                     log() << "clone failed" << cloneRes << endl;
                     errmsg = "clone failed";
                     return false;
@@ -264,25 +277,25 @@ namespace mongo {
 
         class EnableShardingCmd : public GridAdminCmd {
         public:
-            EnableShardingCmd() : GridAdminCmd( "enableSharding" ){}
+            EnableShardingCmd() : GridAdminCmd( "enableSharding" ) {}
             virtual void help( stringstream& help ) const {
                 help
-                    << "Enable sharding for a db. (Use 'shardcollection' command afterwards.)\n"
-                    << "  { enablesharding : \"<dbname>\" }\n";
+                        << "Enable sharding for a db. (Use 'shardcollection' command afterwards.)\n"
+                        << "  { enablesharding : \"<dbname>\" }\n";
             }
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string dbname = cmdObj.firstElement().valuestrsafe();
-                if ( dbname.size() == 0 ){
+                if ( dbname.size() == 0 ) {
                     errmsg = "no db";
                     return false;
                 }
 
                 DBConfigPtr config = grid.getDBConfig( dbname );
-                if ( config->isShardingEnabled() ){
+                if ( config->isShardingEnabled() ) {
                     errmsg = "already enabled";
                     return false;
                 }
-                
+
                 log() << "enabling sharding on: " << dbname << endl;
 
                 config->enableSharding();
@@ -295,46 +308,46 @@ namespace mongo {
 
         class ShardCollectionCmd : public GridAdminCmd {
         public:
-            ShardCollectionCmd() : GridAdminCmd( "shardCollection" ){}
+            ShardCollectionCmd() : GridAdminCmd( "shardCollection" ) {}
 
             virtual void help( stringstream& help ) const {
                 help
-                    << "Shard a collection.  Requires key.  Optional unique. Sharding must already be enabled for the database.\n"
-                    << "  { enablesharding : \"<dbname>\" }\n";
+                        << "Shard a collection.  Requires key.  Optional unique. Sharding must already be enabled for the database.\n"
+                        << "  { enablesharding : \"<dbname>\" }\n";
             }
 
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string ns = cmdObj.firstElement().valuestrsafe();
-                if ( ns.size() == 0 ){
+                if ( ns.size() == 0 ) {
                     errmsg = "no ns";
                     return false;
                 }
 
                 DBConfigPtr config = grid.getDBConfig( ns );
-                if ( ! config->isShardingEnabled() ){
+                if ( ! config->isShardingEnabled() ) {
                     errmsg = "sharding not enabled for db";
                     return false;
                 }
 
-                if ( config->isSharded( ns ) ){
+                if ( config->isSharded( ns ) ) {
                     errmsg = "already sharded";
                     return false;
                 }
 
                 BSONObj key = cmdObj.getObjectField( "key" );
-                if ( key.isEmpty() ){
+                if ( key.isEmpty() ) {
                     errmsg = "no shard key";
                     return false;
                 }
 
-                BSONForEach(e, key){
-                    if (!e.isNumber() || e.number() != 1.0){
+                BSONForEach(e, key) {
+                    if (!e.isNumber() || e.number() != 1.0) {
                         errmsg = "shard keys must all be ascending";
                         return false;
                     }
                 }
 
-                if ( ns.find( ".system." ) != string::npos ){
+                if ( ns.find( ".system." ) != string::npos ) {
                     errmsg = "can't shard system namespaces";
                     return false;
                 }
@@ -344,10 +357,10 @@ namespace mongo {
                 // 1. A unique index must have the sharding key as its prefix. Otherwise maintainig uniqueness would
                 // require coordinated access to all shards. Trying to shard a collection with such an index is not
                 // allowed.
-                // 
+                //
                 // 2. Sharding a collection requires an index over the sharding key. That index must be create upfront.
                 // The rationale is that sharding a non-empty collection would need to create the index and that could
-                // be slow. Requiring the index upfront allows the admin to plan before sharding and perhaps use 
+                // be slow. Requiring the index upfront allows the admin to plan before sharding and perhaps use
                 // background index construction. One exception to the rule: empty collections. It's fairly easy to
                 // create the index as part of the sharding process.
                 //
@@ -358,20 +371,20 @@ namespace mongo {
                     bool hasShardIndex = false;
 
                     ScopedDbConnection conn( config->getPrimary() );
-                    BSONObjBuilder b; 
-                    b.append( "ns" , ns ); 
+                    BSONObjBuilder b;
+                    b.append( "ns" , ns );
 
                     auto_ptr<DBClientCursor> cursor = conn->query( config->getName() + ".system.indexes" , b.obj() );
-                    while ( cursor->more() ){
+                    while ( cursor->more() ) {
                         BSONObj idx = cursor->next();
 
                         // Is index key over the sharding key? Remember that.
-                        if ( key.woCompare( idx["key"].embeddedObjectUserCheck() ) == 0 ){
+                        if ( key.woCompare( idx["key"].embeddedObjectUserCheck() ) == 0 ) {
                             hasShardIndex = true;
                         }
 
                         // Not a unique index? Move on.
-                        if ( idx["unique"].eoo() || ! idx["unique"].Bool() )
+                        if ( idx["unique"].eoo() || ! idx["unique"].trueValue() )
                             continue;
 
                         // Shard key is prefix of unique index? Move on.
@@ -384,17 +397,31 @@ namespace mongo {
                     }
 
                     BSONObj res = conn->findOne( config->getName() + ".system.namespaces" , BSON( "name" << ns ) );
-                    if ( res["options"].type() == Object && res["options"].embeddedObject()["capped"].trueValue() ){
+                    if ( res["options"].type() == Object && res["options"].embeddedObject()["capped"].trueValue() ) {
                         errmsg = "can't shard capped collection";
                         conn.done();
                         return false;
                     }
 
-                    if ( ! hasShardIndex && ( conn->count( ns ) != 0 ) ){
+                    if ( hasShardIndex ) {
+                        // make sure there are no null entries in the sharding index
+                        BSONObjBuilder cmd;
+                        cmd.append( "checkShardingIndex" , ns );
+                        cmd.append( "keyPattern" , key );
+                        BSONObj cmdObj = cmd.obj();
+                        if ( ! conn->runCommand( "admin" , cmdObj , res )) {
+                            errmsg = res["errmsg"].str();
+                            conn.done();
+                            return false;
+                        }
+                    }
+
+                    if ( ! hasShardIndex && ( conn->count( ns ) != 0 ) ) {
                         errmsg = "please create an index over the sharding key before sharding.";
+                        conn.done();
                         return false;
                     }
-                
+
                     conn.done();
                 }
 
@@ -409,26 +436,26 @@ namespace mongo {
 
         class GetShardVersion : public GridAdminCmd {
         public:
-            GetShardVersion() : GridAdminCmd( "getShardVersion" ){}
+            GetShardVersion() : GridAdminCmd( "getShardVersion" ) {}
             virtual void help( stringstream& help ) const {
                 help << " example: { getShardVersion : 'alleyinsider.foo'  } ";
             }
-            
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string ns = cmdObj.firstElement().valuestrsafe();
-                if ( ns.size() == 0 ){
+                if ( ns.size() == 0 ) {
                     errmsg = "need to speciy fully namespace";
                     return false;
                 }
-                
+
                 DBConfigPtr config = grid.getDBConfig( ns );
-                if ( ! config->isSharded( ns ) ){
+                if ( ! config->isSharded( ns ) ) {
                     errmsg = "ns not sharded.";
                     return false;
                 }
-                
+
                 ChunkManagerPtr cm = config->getChunkManager( ns );
-                if ( ! cm ){
+                if ( ! cm ) {
                     errmsg = "no chunk manager?";
                     return false;
                 }
@@ -439,144 +466,141 @@ namespace mongo {
             }
         } getShardVersionCmd;
 
-        class SplitCollectionHelper : public GridAdminCmd {
+        class SplitCollectionCmd : public GridAdminCmd {
         public:
-            SplitCollectionHelper( const char * name ) : GridAdminCmd( name ) , _name( name ){}
+            SplitCollectionCmd() : GridAdminCmd( "split" ) {}
             virtual void help( stringstream& help ) const {
                 help
-                    << " example: { split : 'alleyinsider.blog.posts' , find : { ts : 1 } } - split the shard that contains give key \n"
-                    << " example: { split : 'alleyinsider.blog.posts' , middle : { ts : 1 } } - split the shard that contains the key with this as the middle \n"
-                    << " NOTE: this does not move move the chunks, it merely creates a logical seperation \n"
-                    ;
+                        << " example: - split the shard that contains give key \n"
+                        << " { split : 'alleyinsider.blog.posts' , find : { ts : 1 } }\n"
+                        << " example: - split the shard that contains the key with this as the middle \n"
+                        << " { split : 'alleyinsider.blog.posts' , middle : { ts : 1 } }\n"
+                        << " NOTE: this does not move move the chunks, it merely creates a logical seperation \n"
+                        ;
             }
 
-            virtual bool _split( BSONObjBuilder& result , string&errmsg , const string& ns , ChunkManagerPtr manager , ChunkPtr old , BSONObj middle ) = 0;
-
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 ShardConnection::sync();
 
                 string ns = cmdObj.firstElement().valuestrsafe();
-                if ( ns.size() == 0 ){
+                if ( ns.size() == 0 ) {
                     errmsg = "no ns";
                     return false;
                 }
 
                 DBConfigPtr config = grid.getDBConfig( ns );
-                if ( ! config->isSharded( ns ) ){
+                if ( ! config->isSharded( ns ) ) {
                     errmsg = "ns not sharded.  have to shard before can split";
                     return false;
                 }
 
                 BSONObj find = cmdObj.getObjectField( "find" );
-                if ( find.isEmpty() ){
+                if ( find.isEmpty() ) {
                     find = cmdObj.getObjectField( "middle" );
 
-                    if ( find.isEmpty() ){
+                    if ( find.isEmpty() ) {
                         errmsg = "need to specify find or middle";
                         return false;
                     }
                 }
-                
-                ChunkManagerPtr info = config->getChunkManager( ns );
-                ChunkPtr old = info->findChunk( find );
-
-                return _split( result , errmsg , ns , info , old , cmdObj.getObjectField( "middle" ) );
-            }
-
-        protected:
-            string _name;
-        };
-
-        class SplitValueCommand : public SplitCollectionHelper {
-        public:
-            SplitValueCommand() : SplitCollectionHelper( "splitValue" ){}
-            virtual bool _split( BSONObjBuilder& result , string& errmsg , const string& ns , ChunkManagerPtr manager , ChunkPtr old , BSONObj middle ){
-
-                result << "shardinfo" << old->toString();
-
-                result.appendBool( "auto" , middle.isEmpty() );
-
-                if ( middle.isEmpty() )
-                    middle = old->pickSplitPoint();
 
-                result.append( "middle" , middle );
-
-                return true;
-            }
+                ChunkManagerPtr info = config->getChunkManager( ns );
+                ChunkPtr chunk = info->findChunk( find );
+                BSONObj middle = cmdObj.getObjectField( "middle" );
 
-        } splitValueCmd;
+                assert( chunk.get() );
+                log() << "splitting: " << ns << "  shard: " << chunk << endl;
 
+                BSONObj res;
+                ChunkPtr p;
+                if ( middle.isEmpty() ) {
+                    p = chunk->singleSplit( true /* force a split even if not enough data */ , res );
 
-        class SplitCollection : public SplitCollectionHelper {
-        public:
-            SplitCollection() : SplitCollectionHelper( "split" ){}
-            virtual bool _split( BSONObjBuilder& result , string& errmsg , const string& ns , ChunkManagerPtr manager , ChunkPtr old , BSONObj middle ){
-                assert( old.get() );
-                log() << "splitting: " << ns << "  shard: " << old << endl;
-
-                if ( middle.isEmpty() )
-                    old->split();
+                }
                 else {
+                    // sanity check if the key provided is a valid split point
+                    if ( ( middle == chunk->getMin() ) || ( middle == chunk->getMax() ) ) {
+                        errmsg = "cannot split on initial or final chunk's key";
+                        return false;
+                    }
+
                     vector<BSONObj> splitPoints;
                     splitPoints.push_back( middle );
-                    old->multiSplit( splitPoints );
+                    p = chunk->multiSplit( splitPoints , res );
                 }
 
+                if ( p.get() == NULL ) {
+                    errmsg = "split failed";
+                    result.append( "cause" , res );
+                    return false;
+                }
+                config->getChunkManager( ns , true );
                 return true;
             }
-
-
         } splitCollectionCmd;
 
         class MoveChunkCmd : public GridAdminCmd {
         public:
-            MoveChunkCmd() : GridAdminCmd( "moveChunk" ){}
+            MoveChunkCmd() : GridAdminCmd( "moveChunk" ) {}
             virtual void help( stringstream& help ) const {
                 help << "{ movechunk : 'test.foo' , find : { num : 1 } , to : 'localhost:30001' }";
             }
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 ShardConnection::sync();
 
                 Timer t;
                 string ns = cmdObj.firstElement().valuestrsafe();
-                if ( ns.size() == 0 ){
+                if ( ns.size() == 0 ) {
                     errmsg = "no ns";
                     return false;
                 }
 
                 DBConfigPtr config = grid.getDBConfig( ns );
-                if ( ! config->isSharded( ns ) ){
+                if ( ! config->isSharded( ns ) ) {
                     errmsg = "ns not sharded.  have to shard before can move a chunk";
                     return false;
                 }
 
                 BSONObj find = cmdObj.getObjectField( "find" );
-                if ( find.isEmpty() ){
+                if ( find.isEmpty() ) {
                     errmsg = "need to specify find.  see help";
                     return false;
                 }
 
                 string toString = cmdObj["to"].valuestrsafe();
-                if ( ! toString.size()  ){
+                if ( ! toString.size()  ) {
                     errmsg = "you have to specify where you want to move the chunk";
                     return false;
                 }
-                
+
                 Shard to = Shard::make( toString );
 
+                // so far, chunk size serves test purposes; it may or may not become a supported parameter
+                long long maxChunkSizeBytes = cmdObj["maxChunkSizeBytes"].numberLong();
+                if ( maxChunkSizeBytes == 0 ) {
+                    maxChunkSizeBytes = Chunk::MaxChunkSize;
+                }
+
                 tlog() << "CMD: movechunk: " << cmdObj << endl;
 
                 ChunkManagerPtr info = config->getChunkManager( ns );
                 ChunkPtr c = info->findChunk( find );
                 const Shard& from = c->getShard();
 
-                if ( from == to ){
+                if ( from == to ) {
                     errmsg = "that chunk is already on that shard";
                     return false;
                 }
-                
-                if ( ! c->moveAndCommit( to , errmsg ) )
+
+                BSONObj res;
+                if ( ! c->moveAndCommit( to , maxChunkSizeBytes , res ) ) {
+                    errmsg = "move failed";
+                    result.append( "cause" , res );
                     return false;
+                }
+                
+                // pre-emptively reload the config to get new version info
+                config->getChunkManager( ns , true );
 
                 result.append( "millis" , t.millis() );
                 return true;
@@ -591,12 +615,12 @@ namespace mongo {
             virtual void help( stringstream& help ) const {
                 help << "list all shards of the system";
             }
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 ScopedDbConnection conn( configServer.getPrimary() );
 
                 vector<BSONObj> all;
                 auto_ptr<DBClientCursor> cursor = conn->query( "config.shards" , BSONObj() );
-                while ( cursor->more() ){
+                while ( cursor->more() ) {
                     BSONObj o = cursor->next();
                     all.push_back( o );
                 }
@@ -608,27 +632,27 @@ namespace mongo {
             }
         } listShardsCmd;
 
-		/* a shard is a single mongod server or a replica pair.  add it (them) to the cluster as a storage partition. */
+        /* a shard is a single mongod server or a replica pair.  add it (them) to the cluster as a storage partition. */
         class AddShard : public GridAdminCmd {
         public:
             AddShard() : GridAdminCmd("addShard") { }
             virtual void help( stringstream& help ) const {
                 help << "add a new shard to the system";
             }
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 errmsg.clear();
 
                 // get replica set component hosts
                 ConnectionString servers = ConnectionString::parse( cmdObj.firstElement().valuestrsafe() , errmsg );
-                if ( ! errmsg.empty() ){
+                if ( ! errmsg.empty() ) {
                     log() << "addshard request " << cmdObj << " failed:" << errmsg << endl;
                     return false;
                 }
 
                 // using localhost in server names implies every other process must use locahost addresses too
                 vector<HostAndPort> serverAddrs = servers.getServers();
-                for ( size_t i = 0 ; i < serverAddrs.size() ; i++ ){ 
-                    if ( serverAddrs[i].isLocalHost() != grid.allowLocalHost() ){
+                for ( size_t i = 0 ; i < serverAddrs.size() ; i++ ) {
+                    if ( serverAddrs[i].isLocalHost() != grid.allowLocalHost() ) {
                         errmsg = "can't use localhost as a shard since all shards need to communicate. "
                                  "either use all shards and configdbs in localhost or all in actual IPs " ;
                         log() << "addshard request " << cmdObj << " failed: attempt to mix localhosts and IPs" << endl;
@@ -636,7 +660,7 @@ namespace mongo {
                     }
 
                     // it's fine if mongods of a set all use default port
-                    if ( ! serverAddrs[i].hasPort() ){
+                    if ( ! serverAddrs[i].hasPort() ) {
                         serverAddrs[i].setPort( CmdLine::ShardServerPort );
                     }
                 }
@@ -645,15 +669,15 @@ namespace mongo {
                 string name = "";
                 if ( cmdObj["name"].type() == String ) {
                     name = cmdObj["name"].valuestrsafe();
-                } 
+                }
 
                 // maxSize is the space usage cap in a shard in MBs
                 long long maxSize = 0;
-                if ( cmdObj[ ShardFields::maxSize.name() ].isNumber() ){
+                if ( cmdObj[ ShardFields::maxSize.name() ].isNumber() ) {
                     maxSize = cmdObj[ ShardFields::maxSize.name() ].numberLong();
                 }
-                
-                if ( ! grid.addShard( &name , servers , maxSize , errmsg ) ){
+
+                if ( ! grid.addShard( &name , servers , maxSize , errmsg ) ) {
                     log() << "addshard request " << cmdObj << " failed: " << errmsg << endl;
                     return false;
                 }
@@ -673,10 +697,10 @@ namespace mongo {
             virtual void help( stringstream& help ) const {
                 help << "remove a shard to the system.";
             }
-            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string target = cmdObj.firstElement().valuestrsafe();
                 Shard s = Shard::make( target );
-                if ( ! grid.knowAboutShard( s.getConnString() ) ){
+                if ( ! grid.knowAboutShard( s.getConnString() ) ) {
                     errmsg = "unknown shard";
                     return false;
                 }
@@ -687,7 +711,7 @@ namespace mongo {
                 BSONObj searchDoc = BSON( "_id" << s.getName() );
                 BSONObj drainingDoc = BSON( "_id" << s.getName() << ShardFields::draining(true) );
                 BSONObj shardDoc = conn->findOne( "config.shards", drainingDoc );
-                if ( shardDoc.isEmpty() ){
+                if ( shardDoc.isEmpty() ) {
 
                     // TODO prevent move chunks to this shard.
 
@@ -696,7 +720,7 @@ namespace mongo {
                     conn->update( "config.shards" , searchDoc , newStatus, false /* do no upsert */);
 
                     errmsg = conn->getLastError();
-                    if ( errmsg.size() ){
+                    if ( errmsg.size() ) {
                         log() << "error starting remove shard: " << s.getName() << " err: " << errmsg << endl;
                         return false;
                     }
@@ -704,7 +728,7 @@ namespace mongo {
                     Shard::reloadShardInfo();
 
                     result.append( "msg"   , "draining started successfully" );
-                    result.append( "state" , "started" ); 
+                    result.append( "state" , "started" );
                     result.append( "shard" , s.getName() );
                     conn.done();
                     return true;
@@ -716,12 +740,12 @@ namespace mongo {
                 long long chunkCount = conn->count( "config.chunks" , shardIDDoc );
                 BSONObj primaryDoc = BSON( "primary" << shardDoc[ "_id" ].str() );
                 long long dbCount = conn->count( "config.databases" , primaryDoc );
-                if ( ( chunkCount == 0 ) && ( dbCount == 0 ) ){
-                    log() << "going to remove shard: " << s.getName() << endl;                    
+                if ( ( chunkCount == 0 ) && ( dbCount == 0 ) ) {
+                    log() << "going to remove shard: " << s.getName() << endl;
                     conn->remove( "config.shards" , searchDoc );
 
                     errmsg = conn->getLastError();
-                    if ( errmsg.size() ){
+                    if ( errmsg.size() ) {
                         log() << "error concluding remove shard: " << s.getName() << " err: " << errmsg << endl;
                         return false;
                     }
@@ -755,7 +779,7 @@ namespace mongo {
 
         class IsDbGridCmd : public Command {
         public:
-            virtual LockType locktype() const { return NONE; } 
+            virtual LockType locktype() const { return NONE; }
             virtual bool slaveOk() const {
                 return true;
             }
@@ -769,7 +793,7 @@ namespace mongo {
 
         class CmdIsMaster : public Command {
         public:
-            virtual LockType locktype() const { return NONE; } 
+            virtual LockType locktype() const { return NONE; }
             virtual bool requiresAuth() { return false; }
             virtual bool slaveOk() const {
                 return true;
@@ -777,10 +801,11 @@ namespace mongo {
             virtual void help( stringstream& help ) const {
                 help << "test if this is master half of a replica pair";
             }
-            CmdIsMaster() : Command("ismaster") { }
+            CmdIsMaster() : Command("isMaster" , false , "ismaster") { }
             virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
-                result.append("ismaster", 1.0 );
+                result.appendBool("ismaster", true );
                 result.append("msg", "isdbgrid");
+                result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize);
                 return true;
             }
         } ismaster;
@@ -794,23 +819,23 @@ namespace mongo {
             virtual bool slaveOk() const {
                 return true;
             }
-            virtual LockType locktype() const { return NONE; } 
+            virtual LockType locktype() const { return NONE; }
             virtual bool requiresAuth() {
                 return false;
             }
             virtual void help( stringstream &help ) const {
                 help << "{whatsmyuri:1}";
-            }        
+            }
             virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 result << "you" << ClientInfo::get()->getRemote();
                 return true;
             }
         } cmdWhatsMyUri;
-        
+
 
         class CmdShardingGetPrevError : public Command {
         public:
-            virtual LockType locktype() const { return NONE; } 
+            virtual LockType locktype() const { return NONE; }
             virtual bool requiresAuth() { return false; }
             virtual bool slaveOk() const {
                 return true;
@@ -827,7 +852,7 @@ namespace mongo {
 
         class CmdShardingGetLastError : public Command {
         public:
-            virtual LockType locktype() const { return NONE; } 
+            virtual LockType locktype() const { return NONE; }
             virtual bool requiresAuth() { return false; }
             virtual bool slaveOk() const {
                 return true;
@@ -836,196 +861,147 @@ namespace mongo {
                 help << "check for an error on the last command executed";
             }
             CmdShardingGetLastError() : Command("getLastError" , false , "getlasterror") { }
-            
-            void addWriteBack( vector<OID>& all , const BSONObj& o ){
-                BSONElement e = o["writeback"];
 
-                if ( e.type() == jstOID )
-                    all.push_back( e.OID() );
-            }
-            
-            void handleWriteBacks( vector<OID>& all ){
-                if ( all.size() == 0 )
-                    return;
-                
-                for ( unsigned i=0; i<all.size(); i++ ){
-                    waitForWriteback( all[i] );
-                }
-            }
-            
             virtual bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 LastError *le = lastError.disableForCommand();
                 {
                     assert( le );
-                    if ( le->msg.size() && le->nPrev == 1 ){
+                    if ( le->msg.size() && le->nPrev == 1 ) {
                         le->appendSelf( result );
                         return true;
                     }
                 }
-                
+
                 ClientInfo * client = ClientInfo::get();
-                set<string> * shards = client->getPrev();
-                
-                if ( shards->size() == 0 ){
-                    result.appendNull( "err" );
-                    return true;
-                }
+                return client->getLastError( cmdObj , result );
+            }
+        } cmdGetLastError;
 
-                //log() << "getlasterror enter: " << shards->size() << endl;
+    }
 
+    class CmdShardingResetError : public Command {
+    public:
+        CmdShardingResetError() : Command( "resetError" , false , "reseterror" ) {}
 
-                vector<OID> writebacks;
-                
-                // handle single server
-                if ( shards->size() == 1 ){
-                    string theShard = *(shards->begin() );
-                    result.append( "theshard" , theShard.c_str() );
-                    ShardConnection conn( theShard , "" );
-                    BSONObj res;
-                    bool ok = conn->runCommand( dbName , cmdObj , res );
-                    //log() << "\t" << res << endl;
-                    result.appendElements( res );
-                    conn.done();
-                    result.append( "singleShard" , theShard );
-                    addWriteBack( writebacks , res );
-                    
-                    // hit other machines just to block
-                    for ( set<string>::const_iterator i=client->sinceLastGetError().begin(); i!=client->sinceLastGetError().end(); ++i ){
-                        string temp = *i;
-                        if ( temp == theShard )
-                            continue;
-                        
-                        ShardConnection conn( temp , "" );
-                        addWriteBack( writebacks , conn->getLastErrorDetailed() );
-                        conn.done();
-                    }
-                    client->clearSinceLastGetError();
-                    handleWriteBacks( writebacks );
-                    return ok;
-                }
-                
-                BSONArrayBuilder bbb( result.subarrayStart( "shards" ) );
-
-                long long n = 0;
-
-                // hit each shard
-                vector<string> errors;
-                for ( set<string>::iterator i = shards->begin(); i != shards->end(); i++ ){
-                    string theShard = *i;
-                    bbb.append( theShard );
-                    ShardConnection conn( theShard , "" );
-                    BSONObj res;
-                    bool ok = conn->runCommand( dbName , cmdObj , res );
-                    addWriteBack( writebacks, res );
-                    string temp = DBClientWithCommands::getLastErrorString( res );
-                    if ( ok == false || temp.size() )
-                        errors.push_back( temp );
-                    n += res["n"].numberLong();
-                    conn.done();
-                }
-                
-                bbb.done();
-                
-                result.appendNumber( "n" , n );
-
-                // hit other machines just to block
-                for ( set<string>::const_iterator i=client->sinceLastGetError().begin(); i!=client->sinceLastGetError().end(); ++i ){
-                    string temp = *i;
-                    if ( shards->count( temp ) )
-                        continue;
-                    
-                    ShardConnection conn( temp , "" );
-                    addWriteBack( writebacks, conn->getLastErrorDetailed() );
-                    conn.done();
-                }
-                client->clearSinceLastGetError();
+        virtual LockType locktype() const { return NONE; }
+        virtual bool requiresAuth() { return false; }
+        virtual bool slaveOk() const {
+            return true;
+        }
 
-                if ( errors.size() == 0 ){
-                    result.appendNull( "err" );
-                    handleWriteBacks( writebacks );
-                    return true;
-                }
-                
-                result.append( "err" , errors[0].c_str() );
-                
-                BSONObjBuilder all;
-                for ( unsigned i=0; i<errors.size(); i++ ){
-                    all.append( all.numStr( i ) , errors[i].c_str() );
-                }
-                result.appendArray( "errs" , all.obj() );
-                handleWriteBacks( writebacks );
-                return true;
+        bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+            LastError *le = lastError.get();
+            if ( le )
+                le->reset();
+
+            ClientInfo * client = ClientInfo::get();
+            set<string> * shards = client->getPrev();
+
+            for ( set<string>::iterator i = shards->begin(); i != shards->end(); i++ ) {
+                string theShard = *i;
+                ShardConnection conn( theShard , "" );
+                BSONObj res;
+                conn->runCommand( dbName , cmdObj , res );
+                conn.done();
             }
-        } cmdGetLastError;
-        
-    }
-    
+
+            return true;
+        }
+    } cmdShardingResetError;
+
     class CmdListDatabases : public Command {
     public:
-        CmdListDatabases() : Command("listDatabases", false , "listdatabases" ) {}
+        CmdListDatabases() : Command("listDatabases", true , "listdatabases" ) {}
 
         virtual bool logTheOp() { return false; }
         virtual bool slaveOk() const { return true; }
         virtual bool slaveOverrideOk() { return true; }
         virtual bool adminOnly() const { return true; }
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         virtual void help( stringstream& help ) const { help << "list databases on cluster"; }
-        
+
         bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
             vector<Shard> shards;
             Shard::getAllShards( shards );
-            
+
             map<string,long long> sizes;
             map< string,shared_ptr<BSONObjBuilder> > dbShardInfo;
 
-            for ( vector<Shard>::iterator i=shards.begin(); i!=shards.end(); i++ ){
+            for ( vector<Shard>::iterator i=shards.begin(); i!=shards.end(); i++ ) {
                 Shard s = *i;
                 BSONObj x = s.runCommand( "admin" , "listDatabases" );
 
                 BSONObjIterator j( x["databases"].Obj() );
-                while ( j.more() ){
+                while ( j.more() ) {
                     BSONObj theDB = j.next().Obj();
-                    
+
                     string name = theDB["name"].String();
                     long long size = theDB["sizeOnDisk"].numberLong();
 
                     long long& totalSize = sizes[name];
-                    if ( size == 1 ){
+                    if ( size == 1 ) {
                         if ( totalSize <= 1 )
                             totalSize = 1;
                     }
                     else
                         totalSize += size;
-                    
+
                     shared_ptr<BSONObjBuilder>& bb = dbShardInfo[name];
                     if ( ! bb.get() )
                         bb.reset( new BSONObjBuilder() );
                     bb->appendNumber( s.getName() , size );
                 }
-                
+
             }
-            
+
             long long totalSize = 0;
 
             BSONArrayBuilder bb( result.subarrayStart( "databases" ) );
-            for ( map<string,long long>::iterator i=sizes.begin(); i!=sizes.end(); ++i ){
+            for ( map<string,long long>::iterator i=sizes.begin(); i!=sizes.end(); ++i ) {
                 string name = i->first;
+
+                if ( name == "local" ) {
+                    // we don't return local
+                    // since all shards have their own independant local
+                    continue;
+                }
+
                 long long size = i->second;
                 totalSize += size;
-                
+
                 BSONObjBuilder temp;
                 temp.append( "name" , name );
-                temp.appendNumber( "size" , size );
+                temp.appendNumber( "sizeOnDisk" , size );
                 temp.appendBool( "empty" , size == 1 );
                 temp.append( "shards" , dbShardInfo[name]->obj() );
-                
+
                 bb.append( temp.obj() );
             }
+            
+            if ( sizes.find( "config" ) == sizes.end() ){
+                ScopedDbConnection conn( configServer.getPrimary() );
+                BSONObj x;
+                if ( conn->simpleCommand( "config" , &x , "dbstats" ) ){
+                    BSONObjBuilder b;
+                    b.append( "name" , "config" );
+                    b.appendBool( "empty" , false );
+                    if ( x["fileSize"].type() )
+                        b.appendAs( x["fileSize"] , "sizeOnDisk" );
+                    else
+                        b.append( "sizeOnDisk" , 1 );
+                    bb.append( b.obj() );
+                }
+                else {
+                    bb.append( BSON( "name" << "config" ) );
+                }
+                conn.done();
+            }
+
             bb.done();
 
             result.appendNumber( "totalSize" , totalSize );
             result.appendNumber( "totalSizeMb" , totalSize / ( 1024 * 1024 ) );
-            
+
             return 1;
         }
 
@@ -1038,9 +1014,9 @@ namespace mongo {
         virtual bool slaveOk() const { return true; }
         virtual bool slaveOverrideOk() { return true; }
         virtual bool adminOnly() const { return true; }
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         virtual void help( stringstream& help ) const { help << "Not supported sharded"; }
-        
+
         bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& /*result*/, bool /*fromRepl*/) {
             errmsg = "closeAllDatabases isn't supported through mongos";
             return false;
@@ -1048,4 +1024,22 @@ namespace mongo {
     } cmdCloseAllDatabases;
 
 
+    class CmdReplSetGetStatus : public Command {
+    public:
+        CmdReplSetGetStatus() : Command("replSetGetStatus"){}
+        virtual bool logTheOp() { return false; }
+        virtual bool slaveOk() const { return true; }
+        virtual bool adminOnly() const { return true; }
+        virtual LockType locktype() const { return NONE; }
+        virtual void help( stringstream& help ) const { help << "Not supported through mongos"; }
+
+        bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& /*result*/, bool /*fromRepl*/) {        
+            if ( jsobj["forShell"].trueValue() )
+                lastError.disableForCommand();
+
+            errmsg = "replSetGetStatus is not supported through mongos";
+            return false;
+        }
+    } cmdReplSetGetStatus;
+
 } // namespace mongo
diff --git a/s/commands_public.cpp b/s/commands_public.cpp
index 80d5cc9..02000a0 100644
--- a/s/commands_public.cpp
+++ b/s/commands_public.cpp
@@ -33,10 +33,10 @@
 namespace mongo {
 
     namespace dbgrid_pub_cmds {
-        
+
         class PublicGridCommand : public Command {
         public:
-            PublicGridCommand( const char* n, const char* oldname=NULL ) : Command( n, false, oldname ){
+            PublicGridCommand( const char* n, const char* oldname=NULL ) : Command( n, false, oldname ) {
             }
             virtual bool slaveOk() const {
                 return true;
@@ -46,18 +46,18 @@ namespace mongo {
             }
 
             // all grid commands are designed not to lock
-            virtual LockType locktype() const { return NONE; } 
+            virtual LockType locktype() const { return NONE; }
 
         protected:
-            bool passthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ){
+            bool passthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ) {
                 return _passthrough(conf->getName(), conf, cmdObj, result);
             }
-            bool adminPassthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ){
+            bool adminPassthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ) {
                 return _passthrough("admin", conf, cmdObj, result);
             }
-            
+
         private:
-            bool _passthrough(const string& db,  DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ){
+            bool _passthrough(const string& db,  DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ) {
                 ShardConnection conn( conf->getPrimary() , "" );
                 BSONObj res;
                 bool ok = conn->runCommand( db , cmdObj , res );
@@ -75,33 +75,33 @@ namespace mongo {
             virtual bool adminOnly() const { return false; }
 
             // all grid commands are designed not to lock
-            virtual LockType locktype() const { return NONE; } 
+            virtual LockType locktype() const { return NONE; }
 
 
             // default impl uses all shards for DB
-            virtual void getShards(const string& dbName , BSONObj& cmdObj, set<Shard>& shards){
+            virtual void getShards(const string& dbName , BSONObj& cmdObj, set<Shard>& shards) {
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
                 conf->getAllShards(shards);
             }
-            
+
             virtual void aggregateResults(const vector<BSONObj>& results, BSONObjBuilder& output) {}
 
             // don't override
-            virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& output, bool){
+            virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& output, bool) {
                 set<Shard> shards;
                 getShards(dbName, cmdObj, shards);
 
                 list< shared_ptr<Future::CommandResult> > futures;
-                for ( set<Shard>::const_iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ){
+                for ( set<Shard>::const_iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) {
                     futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj ) );
                 }
-                
+
                 vector<BSONObj> results;
                 BSONObjBuilder subobj (output.subobjStart("raw"));
                 BSONObjBuilder errors;
-                for ( list< shared_ptr<Future::CommandResult> >::iterator i=futures.begin(); i!=futures.end(); i++ ){
+                for ( list< shared_ptr<Future::CommandResult> >::iterator i=futures.begin(); i!=futures.end(); i++ ) {
                     shared_ptr<Future::CommandResult> res = *i;
-                    if ( ! res->join() ){
+                    if ( ! res->join() ) {
                         errors.appendAs(res->result()["errmsg"], res->getServer());
                     }
                     results.push_back( res->result() );
@@ -111,11 +111,11 @@ namespace mongo {
                 subobj.done();
 
                 BSONObj errobj = errors.done();
-                if (! errobj.isEmpty()){
+                if (! errobj.isEmpty()) {
                     errmsg = errobj.toString(false, true);
                     return false;
                 }
-                
+
                 aggregateResults(results, output);
                 return true;
             }
@@ -126,39 +126,40 @@ namespace mongo {
         public:
             AllShardsCollectionCommand(const char* n, const char* oldname=NULL) : RunOnAllShardsCommand(n, oldname) {}
 
-            virtual void getShards(const string& dbName , BSONObj& cmdObj, set<Shard>& shards){
+            virtual void getShards(const string& dbName , BSONObj& cmdObj, set<Shard>& shards) {
                 string fullns = dbName + '.' + cmdObj.firstElement().valuestrsafe();
-                
+
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
                     shards.insert(conf->getShard(fullns));
-                } else {
+                }
+                else {
                     conf->getChunkManager(fullns)->getAllShards(shards);
                 }
             }
         };
 
-        
+
         class NotAllowedOnShardedCollectionCmd : public PublicGridCommand {
         public:
-            NotAllowedOnShardedCollectionCmd( const char * n ) : PublicGridCommand( n ){}
+            NotAllowedOnShardedCollectionCmd( const char * n ) : PublicGridCommand( n ) {}
 
             virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ) = 0;
-            
-            virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+
+            virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string fullns = getFullNS( dbName , cmdObj );
-                
+
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
                     return passthrough( conf , cmdObj , result );
                 }
                 errmsg = "can't do command: " + name + " on sharded collection";
                 return false;
             }
         };
-        
+
         // ----
 
         class DropIndexesCmd : public AllShardsCollectionCommand {
@@ -194,7 +195,7 @@ namespace mongo {
                 long long indexSize = 0;
                 long long fileSize = 0;
 
-                for (vector<BSONObj>::const_iterator it(results.begin()), end(results.end()); it != end; ++it){
+                for (vector<BSONObj>::const_iterator it(results.begin()), end(results.end()); it != end; ++it) {
                     const BSONObj& b = *it;
                     objects     += b["objects"].numberLong();
                     dataSize    += b["dataSize"].numberLong();
@@ -219,23 +220,24 @@ namespace mongo {
 
         class DropCmd : public PublicGridCommand {
         public:
-            DropCmd() : PublicGridCommand( "drop" ){}
-            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            DropCmd() : PublicGridCommand( "drop" ) {}
+            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string collection = cmdObj.firstElement().valuestrsafe();
                 string fullns = dbName + "." + collection;
-                
+
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
+
                 log() << "DROP: " << fullns << endl;
-                
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
                     return passthrough( conf , cmdObj , result );
                 }
-                
+
                 ChunkManagerPtr cm = conf->getChunkManager( fullns );
                 massert( 10418 ,  "how could chunk manager be null!" , cm );
-                
+
                 cm->drop( cm );
+                uassert( 13512 , "drop collection attempted on non-sharded collection" , conf->removeSharding( fullns ) );
 
                 return 1;
             }
@@ -243,25 +245,25 @@ namespace mongo {
 
         class DropDBCmd : public PublicGridCommand {
         public:
-            DropDBCmd() : PublicGridCommand( "dropDatabase" ){}
-            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
-                
+            DropDBCmd() : PublicGridCommand( "dropDatabase" ) {}
+            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+
                 BSONElement e = cmdObj.firstElement();
-                
-                if ( ! e.isNumber() || e.number() != 1 ){
+
+                if ( ! e.isNumber() || e.number() != 1 ) {
                     errmsg = "invalid params";
                     return 0;
                 }
-                
+
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
+
                 log() << "DROP DATABASE: " << dbName << endl;
 
-                if ( ! conf ){
+                if ( ! conf ) {
                     result.append( "info" , "database didn't exist" );
                     return true;
                 }
-                
+
                 if ( ! conf->dropDatabase( errmsg ) )
                     return false;
 
@@ -272,8 +274,8 @@ namespace mongo {
 
         class RenameCollectionCmd : public PublicGridCommand {
         public:
-            RenameCollectionCmd() : PublicGridCommand( "renameCollection" ){}
-            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            RenameCollectionCmd() : PublicGridCommand( "renameCollection" ) {}
+            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string fullnsFrom = cmdObj.firstElement().valuestrsafe();
                 string dbNameFrom = nsToDatabase( fullnsFrom.c_str() );
                 DBConfigPtr confFrom = grid.getDBConfig( dbNameFrom , false );
@@ -297,18 +299,19 @@ namespace mongo {
 
         class CopyDBCmd : public PublicGridCommand {
         public:
-            CopyDBCmd() : PublicGridCommand( "copydb" ){}
-            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            CopyDBCmd() : PublicGridCommand( "copydb" ) {}
+            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string todb = cmdObj.getStringField("todb");
                 uassert(13402, "need a todb argument", !todb.empty());
-                
+
                 DBConfigPtr confTo = grid.getDBConfig( todb );
                 uassert(13398, "cant copy to sharded DB", !confTo->isShardingEnabled());
 
                 string fromhost = cmdObj.getStringField("fromhost");
-                if (!fromhost.empty()){
+                if (!fromhost.empty()) {
                     return adminPassthrough( confTo , cmdObj , result );
-                } else {
+                }
+                else {
                     string fromdb = cmdObj.getStringField("fromdb");
                     uassert(13399, "need a fromdb argument", !fromdb.empty());
 
@@ -317,7 +320,7 @@ namespace mongo {
                     uassert(13401, "cant copy from sharded DB", !confFrom->isShardingEnabled());
 
                     BSONObjBuilder b;
-                    BSONForEach(e, cmdObj){
+                    BSONForEach(e, cmdObj) {
                         if (strcmp(e.fieldName(), "fromhost") != 0)
                             b.append(e);
                     }
@@ -328,67 +331,67 @@ namespace mongo {
                 }
 
             }
-        }copyDBCmd;
+        } copyDBCmd;
 
         class CountCmd : public PublicGridCommand {
         public:
             CountCmd() : PublicGridCommand("count") { }
-            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool l){
+            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool l) {
                 string collection = cmdObj.firstElement().valuestrsafe();
                 string fullns = dbName + "." + collection;
-                
+
                 BSONObj filter;
                 if ( cmdObj["query"].isABSONObj() )
                     filter = cmdObj["query"].Obj();
-                
+
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
                     ShardConnection conn( conf->getPrimary() , fullns );
 
                     BSONObj temp;
                     bool ok = conn->runCommand( dbName , cmdObj , temp );
                     conn.done();
-                    
-                    if ( ok ){
+
+                    if ( ok ) {
                         result.append( temp["n"] );
                         return true;
                     }
-                    
-                    if ( temp["code"].numberInt() != StaleConfigInContextCode ){
+
+                    if ( temp["code"].numberInt() != StaleConfigInContextCode ) {
                         errmsg = temp["errmsg"].String();
                         result.appendElements( temp );
                         return false;
                     }
-                    
+
                     // this collection got sharded
                     ChunkManagerPtr cm = conf->getChunkManager( fullns , true );
-                    if ( ! cm ){
+                    if ( ! cm ) {
                         errmsg = "should be sharded now";
                         result.append( "root" , temp );
                         return false;
                     }
                 }
-                
+
                 long long total = 0;
                 map<string,long long> shardCounts;
-                
+
                 ChunkManagerPtr cm = conf->getChunkManager( fullns );
-                while ( true ){
-                    if ( ! cm ){
+                while ( true ) {
+                    if ( ! cm ) {
                         // probably unsharded now
                         return run( dbName , cmdObj , errmsg , result , l );
                     }
-                    
+
                     set<Shard> shards;
                     cm->getShardsForQuery( shards , filter );
                     assert( shards.size() );
-                    
+
                     bool hadToBreak = false;
 
-                    for (set<Shard>::iterator it=shards.begin(), end=shards.end(); it != end; ++it){
+                    for (set<Shard>::iterator it=shards.begin(), end=shards.end(); it != end; ++it) {
                         ShardConnection conn(*it, fullns);
-                        if ( conn.setVersion() ){
+                        if ( conn.setVersion() ) {
                             total = 0;
                             shardCounts.clear();
                             cm = conf->getChunkManager( fullns );
@@ -396,19 +399,19 @@ namespace mongo {
                             hadToBreak = true;
                             break;
                         }
-                        
+
                         BSONObj temp;
                         bool ok = conn->runCommand( dbName , BSON( "count" << collection << "query" << filter ) , temp );
                         conn.done();
-                        
-                        if ( ok ){
+
+                        if ( ok ) {
                             long long mine = temp["n"].numberLong();
                             total += mine;
                             shardCounts[it->getName()] = mine;
                             continue;
                         }
-                        
-                        if ( StaleConfigInContextCode == temp["code"].numberInt() ){
+
+                        if ( StaleConfigInContextCode == temp["code"].numberInt() ) {
                             // my version is old
                             total = 0;
                             shardCounts.clear();
@@ -425,7 +428,7 @@ namespace mongo {
                     if ( ! hadToBreak )
                         break;
                 }
-                
+
                 total = applySkipLimit( total , cmdObj );
                 result.appendNumber( "n" , total );
                 BSONObjBuilder temp( result.subobjStart( "shards" ) );
@@ -439,13 +442,13 @@ namespace mongo {
         class CollectionStats : public PublicGridCommand {
         public:
             CollectionStats() : PublicGridCommand("collStats", "collstats") { }
-            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string collection = cmdObj.firstElement().valuestrsafe();
                 string fullns = dbName + "." + collection;
-                
+
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
                     result.append( "ns" , fullns );
                     result.appendBool("sharded", false);
                     result.append( "primary" , conf->getPrimary().getName() );
@@ -458,17 +461,17 @@ namespace mongo {
 
                 set<Shard> servers;
                 cm->getAllShards(servers);
-                
+
                 BSONObjBuilder shardStats;
                 long long count=0;
                 long long size=0;
                 long long storageSize=0;
                 int nindexes=0;
                 bool warnedAboutIndexes = false;
-                for ( set<Shard>::iterator i=servers.begin(); i!=servers.end(); i++ ){
+                for ( set<Shard>::iterator i=servers.begin(); i!=servers.end(); i++ ) {
                     ScopedDbConnection conn( *i );
                     BSONObj res;
-                    if ( ! conn->runCommand( dbName , cmdObj , res ) ){
+                    if ( ! conn->runCommand( dbName , cmdObj , res ) ) {
                         errmsg = "failed on shard: " + res.toString();
                         return false;
                     }
@@ -480,19 +483,19 @@ namespace mongo {
 
                     int myIndexes = res["nindexes"].numberInt();
 
-                    if ( nindexes == 0 ){
+                    if ( nindexes == 0 ) {
                         nindexes = myIndexes;
                     }
-                    else if ( nindexes == myIndexes ){
+                    else if ( nindexes == myIndexes ) {
                         // no-op
                     }
                     else {
                         // hopefully this means we're building an index
-                        
+
                         if ( myIndexes > nindexes )
                             nindexes = myIndexes;
-                        
-                        if ( ! warnedAboutIndexes ){
+
+                        if ( ! warnedAboutIndexes ) {
                             result.append( "warning" , "indexes don't all match - ok if ensureIndex is running" );
                             warnedAboutIndexes = true;
                         }
@@ -510,7 +513,7 @@ namespace mongo {
 
                 result.append("nchunks", cm->numChunks());
                 result.append("shards", shardStats.obj());
-                
+
                 return true;
             }
         } collectionStatsCmd;
@@ -518,19 +521,19 @@ namespace mongo {
         class FindAndModifyCmd : public PublicGridCommand {
         public:
             FindAndModifyCmd() : PublicGridCommand("findAndModify", "findandmodify") { }
-            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string collection = cmdObj.firstElement().valuestrsafe();
                 string fullns = dbName + "." + collection;
-                
+
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
                     return passthrough( conf , cmdObj , result);
                 }
-                
+
                 ChunkManagerPtr cm = conf->getChunkManager( fullns );
                 massert( 13002 ,  "how could chunk manager be null!" , cm );
-                
+
                 BSONObj filter = cmdObj.getObjectField("query");
                 uassert(13343,  "query for sharded findAndModify must have shardkey", cm->hasShardKey(filter));
 
@@ -542,11 +545,11 @@ namespace mongo {
                 bool ok = conn->runCommand( conf->getName() , cmdObj , res );
                 conn.done();
 
-                if (ok || (strcmp(res["errmsg"].valuestrsafe(), "No matching object found") != 0)){
+                if (ok || (strcmp(res["errmsg"].valuestrsafe(), "No matching object found") != 0)) {
                     result.appendElements(res);
                     return ok;
                 }
-                
+
                 return true;
             }
 
@@ -555,18 +558,18 @@ namespace mongo {
         class DataSizeCmd : public PublicGridCommand {
         public:
             DataSizeCmd() : PublicGridCommand("dataSize", "datasize") { }
-            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string fullns = cmdObj.firstElement().String();
-                
+
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
                     return passthrough( conf , cmdObj , result);
                 }
-                
+
                 ChunkManagerPtr cm = conf->getChunkManager( fullns );
                 massert( 13407 ,  "how could chunk manager be null!" , cm );
-                
+
                 BSONObj min = cmdObj.getObjectField( "min" );
                 BSONObj max = cmdObj.getObjectField( "max" );
                 BSONObj keyPattern = cmdObj.getObjectField( "keyPattern" );
@@ -580,13 +583,13 @@ namespace mongo {
 
                 set<Shard> shards;
                 cm->getShardsForRange(shards, min, max);
-                for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end; ++i ){
+                for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end; ++i ) {
                     ScopedDbConnection conn( *i );
                     BSONObj res;
                     bool ok = conn->runCommand( conf->getName() , cmdObj , res );
                     conn.done();
-                    
-                    if ( ! ok ){
+
+                    if ( ! ok ) {
                         result.appendElements( res );
                         return false;
                     }
@@ -607,64 +610,64 @@ namespace mongo {
 
         class ConvertToCappedCmd : public NotAllowedOnShardedCollectionCmd  {
         public:
-            ConvertToCappedCmd() : NotAllowedOnShardedCollectionCmd("convertToCapped"){}
-            
-            virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ){
+            ConvertToCappedCmd() : NotAllowedOnShardedCollectionCmd("convertToCapped") {}
+
+            virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ) {
                 return dbName + "." + cmdObj.firstElement().valuestrsafe();
             }
-            
+
         } convertToCappedCmd;
 
 
         class GroupCmd : public NotAllowedOnShardedCollectionCmd  {
         public:
-            GroupCmd() : NotAllowedOnShardedCollectionCmd("group"){}
-            
-            virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ){
+            GroupCmd() : NotAllowedOnShardedCollectionCmd("group") {}
+
+            virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ) {
                 return dbName + "." + cmdObj.firstElement().embeddedObjectUserCheck()["ns"].valuestrsafe();
             }
-            
+
         } groupCmd;
 
         class DistinctCmd : public PublicGridCommand {
         public:
-            DistinctCmd() : PublicGridCommand("distinct"){}
+            DistinctCmd() : PublicGridCommand("distinct") {}
             virtual void help( stringstream &help ) const {
                 help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }";
             }
-            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string collection = cmdObj.firstElement().valuestrsafe();
                 string fullns = dbName + "." + collection;
 
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
                     return passthrough( conf , cmdObj , result );
                 }
-                
+
                 ChunkManagerPtr cm = conf->getChunkManager( fullns );
                 massert( 10420 ,  "how could chunk manager be null!" , cm );
 
                 BSONObj query = getQuery(cmdObj);
                 set<Shard> shards;
                 cm->getShardsForQuery(shards, query);
-                
+
                 set<BSONObj,BSONObjCmp> all;
                 int size = 32;
-                
-                for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end; ++i ){
+
+                for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end; ++i ) {
                     ShardConnection conn( *i , fullns );
                     BSONObj res;
                     bool ok = conn->runCommand( conf->getName() , cmdObj , res );
                     conn.done();
-                    
-                    if ( ! ok ){
+
+                    if ( ! ok ) {
                         result.appendElements( res );
                         return false;
                     }
-                    
+
                     BSONObjIterator it( res["values"].embeddedObject() );
-                    while ( it.more() ){
+                    while ( it.more() ) {
                         BSONElement nxt = it.next();
                         BSONObjBuilder temp(32);
                         temp.appendAs( nxt , "" );
@@ -672,13 +675,13 @@ namespace mongo {
                     }
 
                 }
-                
+
                 BSONObjBuilder b( size );
                 int n=0;
-                for ( set<BSONObj,BSONObjCmp>::iterator i = all.begin() ; i != all.end(); i++ ){
-                    b.appendAs( i->firstElement() , b.numStr( n++ ).c_str() );
+                for ( set<BSONObj,BSONObjCmp>::iterator i = all.begin() ; i != all.end(); i++ ) {
+                    b.appendAs( i->firstElement() , b.numStr( n++ ) );
                 }
-                
+
                 result.appendArray( "values" , b.obj() );
                 return true;
             }
@@ -686,11 +689,11 @@ namespace mongo {
 
         class FileMD5Cmd : public PublicGridCommand {
         public:
-            FileMD5Cmd() : PublicGridCommand("filemd5"){}
+            FileMD5Cmd() : PublicGridCommand("filemd5") {}
             virtual void help( stringstream &help ) const {
                 help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }";
             }
-            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 string fullns = dbName;
                 fullns += ".";
                 {
@@ -702,17 +705,17 @@ namespace mongo {
                 fullns += ".chunks";
 
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
-                
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
                     return passthrough( conf , cmdObj , result );
                 }
-                
+
                 ChunkManagerPtr cm = conf->getChunkManager( fullns );
                 massert( 13091 , "how could chunk manager be null!" , cm );
                 uassert( 13092 , "GridFS chunks collection can only be sharded on files_id", cm->getShardKey().key() == BSON("files_id" << 1));
 
                 ChunkPtr chunk = cm->findChunk( BSON("files_id" << cmdObj.firstElement()) );
-                
+
                 ShardConnection conn( chunk->getShard() , fullns );
                 BSONObj res;
                 bool ok = conn->runCommand( conf->getName() , cmdObj , res );
@@ -723,104 +726,254 @@ namespace mongo {
             }
         } fileMD5Cmd;
 
+        class Geo2dFindNearCmd : public PublicGridCommand {
+        public:
+            Geo2dFindNearCmd() : PublicGridCommand( "geoNear" ) {}
+            void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Geospatial+Indexing#GeospatialIndexing-geoNearCommand"; }
+
+            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+                string collection = cmdObj.firstElement().valuestrsafe();
+                string fullns = dbName + "." + collection;
+
+                DBConfigPtr conf = grid.getDBConfig( dbName , false );
+
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
+                    return passthrough( conf , cmdObj , result );
+                }
+
+                ChunkManagerPtr cm = conf->getChunkManager( fullns );
+                massert( 13500 ,  "how could chunk manager be null!" , cm );
+
+                BSONObj query = getQuery(cmdObj);
+                set<Shard> shards;
+                cm->getShardsForQuery(shards, query);
+
+                int limit = 100;
+                if (cmdObj["num"].isNumber())
+                    limit = cmdObj["num"].numberInt();
+
+                list< shared_ptr<Future::CommandResult> > futures;
+                BSONArrayBuilder shardArray;
+                for ( set<Shard>::const_iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) {
+                    futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj ) );
+                    shardArray.append(i->getName());
+                }
+
+                multimap<double, BSONObj> results; // TODO: maybe use merge-sort instead
+                string nearStr;
+                double time = 0;
+                double btreelocs = 0;
+                double nscanned = 0;
+                double objectsLoaded = 0;
+                for ( list< shared_ptr<Future::CommandResult> >::iterator i=futures.begin(); i!=futures.end(); i++ ) {
+                    shared_ptr<Future::CommandResult> res = *i;
+                    if ( ! res->join() ) {
+                        errmsg = res->result()["errmsg"].String();
+                        return false;
+                    }
+
+                    nearStr = res->result()["near"].String();
+                    time += res->result()["stats"]["time"].Number();
+                    btreelocs += res->result()["stats"]["btreelocs"].Number();
+                    nscanned += res->result()["stats"]["nscanned"].Number();
+                    objectsLoaded += res->result()["stats"]["objectsLoaded"].Number();
+
+                    BSONForEach(obj, res->result()["results"].embeddedObject()) {
+                        results.insert(make_pair(obj["dis"].Number(), obj.embeddedObject().getOwned()));
+                    }
+
+                    // TODO: maybe shrink results if size() > limit
+                }
+
+                result.append("ns" , fullns);
+                result.append("near", nearStr);
+
+                int outCount = 0;
+                double totalDistance = 0;
+                double maxDistance = 0;
+                {
+                    BSONArrayBuilder sub (result.subarrayStart("results"));
+                    for (multimap<double, BSONObj>::const_iterator it(results.begin()), end(results.end()); it!= end && outCount < limit; ++it, ++outCount) {
+                        totalDistance += it->first;
+                        maxDistance = it->first; // guaranteed to be highest so far
+
+                        sub.append(it->second);
+                    }
+                    sub.done();
+                }
+
+                {
+                    BSONObjBuilder sub (result.subobjStart("stats"));
+                    sub.append("time", time);
+                    sub.append("btreelocs", btreelocs);
+                    sub.append("nscanned", nscanned);
+                    sub.append("objectsLoaded", objectsLoaded);
+                    sub.append("avgDistance", totalDistance / outCount);
+                    sub.append("maxDistance", maxDistance);
+                    sub.append("shards", shardArray.arr());
+                    sub.done();
+                }
+
+                return true;
+            }
+        } geo2dFindNearCmd;
+
         class MRCmd : public PublicGridCommand {
         public:
-            MRCmd() : PublicGridCommand( "mapreduce" ){}
-            
-            string getTmpName( const string& coll ){
+            MRCmd() : PublicGridCommand( "mapreduce" ) {}
+
+            string getTmpName( const string& coll ) {
                 static int inc = 1;
                 stringstream ss;
                 ss << "tmp.mrs." << coll << "_" << time(0) << "_" << inc++;
                 return ss.str();
             }
 
-            BSONObj fixForShards( const BSONObj& orig , const string& output ){
+            BSONObj fixForShards( const BSONObj& orig , const string& output, BSONObj& customOut , string& badShardedField ) {
                 BSONObjBuilder b;
                 BSONObjIterator i( orig );
-                while ( i.more() ){
+                while ( i.more() ) {
                     BSONElement e = i.next();
                     string fn = e.fieldName();
-                    if ( fn == "map" || 
-                         fn == "mapreduce" || 
-                         fn == "reduce" ||
-                         fn == "query" ||
-                         fn == "sort" ||
-                         fn == "scope" ||
-                         fn == "verbose" ){
+                    if ( fn == "map" ||
+                            fn == "mapreduce" ||
+                            fn == "mapparams" ||
+                            fn == "reduce" ||
+                            fn == "query" ||
+                            fn == "sort" ||
+                            fn == "scope" ||
+                            fn == "verbose" ) {
                         b.append( e );
                     }
-                    else if ( fn == "keeptemp" ||
-                              fn == "out" ||
-                              fn == "finalize" ){
+                    else if ( fn == "out" ||
+                              fn == "finalize" ) {
                         // we don't want to copy these
+                    	if (fn == "out" && e.type() == Object) {
+                    		// check if there is a custom output
+                    		BSONObj out = e.embeddedObject();
+                    		if (out.hasField("db"))
+                    			customOut = out;
+                    	}
                     }
                     else {
-                        uassert( 10177 ,  (string)"don't know mr field: " + fn , 0 );
+                        badShardedField = fn;
+                        return BSONObj();
                     }
                 }
                 b.append( "out" , output );
                 return b.obj();
             }
-            
-            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+
+            bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 Timer t;
 
                 string collection = cmdObj.firstElement().valuestrsafe();
                 string fullns = dbName + "." + collection;
 
+                const string shardedOutputCollection = getTmpName( collection );
+
+                string badShardedField;
+                BSONObj customOut;
+                BSONObj shardedCommand = fixForShards( cmdObj , shardedOutputCollection, customOut , badShardedField );
+
+                bool customOutDB = ! customOut.isEmpty() && customOut.hasField( "db" );
+
                 DBConfigPtr conf = grid.getDBConfig( dbName , false );
 
-                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ){
+                if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
+                    if ( customOutDB ) {
+                        errmsg = "can't use out 'db' with non-sharded db";
+                        return false;
+                    }
                     return passthrough( conf , cmdObj , result );
                 }
-                
+
+                if ( badShardedField.size() ) {
+                    errmsg = str::stream() << "unknown m/r field for sharding: " << badShardedField;
+                    return false;
+                }
+
                 BSONObjBuilder timingBuilder;
 
                 ChunkManagerPtr cm = conf->getChunkManager( fullns );
 
                 BSONObj q;
-                if ( cmdObj["query"].type() == Object ){
+                if ( cmdObj["query"].type() == Object ) {
                     q = cmdObj["query"].embeddedObjectUserCheck();
                 }
-                
+
                 set<Shard> shards;
                 cm->getShardsForQuery( shards , q );
-                
-                const string shardedOutputCollection = getTmpName( collection );
-                
-                BSONObj shardedCommand = fixForShards( cmdObj , shardedOutputCollection );
-                
+
+
                 BSONObjBuilder finalCmd;
                 finalCmd.append( "mapreduce.shardedfinish" , cmdObj );
                 finalCmd.append( "shardedOutputCollection" , shardedOutputCollection );
                 
-                list< shared_ptr<Future::CommandResult> > futures;
-                
-                for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ){
-                    futures.push_back( Future::spawnCommand( i->getConnString() , dbName , shardedCommand ) );
-                }
                 
-                BSONObjBuilder shardresults;
-                for ( list< shared_ptr<Future::CommandResult> >::iterator i=futures.begin(); i!=futures.end(); i++ ){
-                    shared_ptr<Future::CommandResult> res = *i;
-                    if ( ! res->join() ){
-                        errmsg = "mongod mr failed: ";
-                        errmsg += res->result().toString();
-                        return 0;
+                {
+                    // we need to use our connections to the shard
+                    // so filtering is done correctly for un-owned docs
+                    // so we allocate them in our thread
+                    // and hand off
+
+                    vector< shared_ptr<ShardConnection> > shardConns;
+
+                    list< shared_ptr<Future::CommandResult> > futures;
+                    
+                    for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) {
+                        shared_ptr<ShardConnection> temp( new ShardConnection( i->getConnString() , fullns ) );
+                        assert( temp->get() );
+                        futures.push_back( Future::spawnCommand( i->getConnString() , dbName , shardedCommand , temp->get() ) );
+                        shardConns.push_back( temp );
+                    }
+                    
+                    bool failed = false;
+                    
+                    BSONObjBuilder shardresults;
+                    for ( list< shared_ptr<Future::CommandResult> >::iterator i=futures.begin(); i!=futures.end(); i++ ) {
+                        shared_ptr<Future::CommandResult> res = *i;
+                        if ( ! res->join() ) {
+                            error() << "sharded m/r failed on shard: " << res->getServer() << " error: " << res->result() << endl;
+                            result.append( "cause" , res->result() );
+                            errmsg = "mongod mr failed: ";
+                            errmsg += res->result().toString();
+                            failed = true;
+                            continue;
+                        }
+                        shardresults.append( res->getServer() , res->result() );
                     }
-                    shardresults.append( res->getServer() , res->result() );
+
+                    for ( unsigned i=0; i<shardConns.size(); i++ )
+                        shardConns[i]->done();
+
+                    if ( failed )
+                        return 0;
+
+                    finalCmd.append( "shards" , shardresults.obj() );
+                    timingBuilder.append( "shards" , t.millis() );
                 }
-                
-                finalCmd.append( "shards" , shardresults.obj() );
-                timingBuilder.append( "shards" , t.millis() );
 
                 Timer t2;
-                ShardConnection conn( conf->getPrimary() , fullns );
+                // by default the target database is same as input
+                Shard outServer = conf->getPrimary();
+                string outns = fullns;
+                if ( customOutDB ) {
+                	// have to figure out shard for the output DB
+                	BSONElement elmt = customOut.getField("db");
+                	string outdb = elmt.valuestrsafe();
+                	outns = outdb + "." + collection;
+                    DBConfigPtr conf2 = grid.getDBConfig( outdb , true );
+                	outServer = conf2->getPrimary();
+                }
+                log() << "customOut: " << customOut << " outServer: " << outServer << endl;
+                        
+                ShardConnection conn( outServer , outns );
                 BSONObj finalResult;
                 bool ok = conn->runCommand( dbName , finalCmd.obj() , finalResult );
                 conn.done();
 
-                if ( ! ok ){
+                if ( ! ok ) {
                     errmsg = "final reduce failed: ";
                     errmsg += finalResult.toString();
                     return 0;
@@ -830,22 +983,22 @@ namespace mongo {
                 result.appendElements( finalResult );
                 result.append( "timeMillis" , t.millis() );
                 result.append( "timing" , timingBuilder.obj() );
-                
+
                 return 1;
             }
         } mrCmd;
-        
+
         class ApplyOpsCmd : public PublicGridCommand {
         public:
-            ApplyOpsCmd() : PublicGridCommand( "applyOps" ){}
-            
-            virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+            ApplyOpsCmd() : PublicGridCommand( "applyOps" ) {}
+
+            virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
                 errmsg = "applyOps not allowed through mongos";
                 return false;
             }
-            
+
         } applyOpsCmd;
-        
+
     }
 
 }
diff --git a/s/config.cpp b/s/config.cpp
index 1ad15d5..35a3be2 100644
--- a/s/config.cpp
+++ b/s/config.cpp
@@ -25,17 +25,17 @@
 #include "../db/pdfile.h"
 #include "../db/cmdline.h"
 
-#include "server.h"
-#include "config.h"
 #include "chunk.h"
+#include "config.h"
 #include "grid.h"
+#include "server.h"
 
 namespace mongo {
 
     int ConfigServer::VERSION = 3;
     Shard Shard::EMPTY;
 
-    string ShardNS::shard = "config.shards";    
+    string ShardNS::shard = "config.shards";
     string ShardNS::database = "config.databases";
     string ShardNS::collection = "config.collections";
     string ShardNS::chunk = "config.chunks";
@@ -45,42 +45,41 @@ namespace mongo {
 
     BSONField<bool>      ShardFields::draining("draining");
     BSONField<long long> ShardFields::maxSize ("maxSize");
-    BSONField<long long> ShardFields::currSize("currSize");
 
     OID serverID;
 
     /* --- DBConfig --- */
 
-    DBConfig::CollectionInfo::CollectionInfo( DBConfig * db , const BSONObj& in ){
+    DBConfig::CollectionInfo::CollectionInfo( const BSONObj& in ) {
         _dirty = false;
         _dropped = in["dropped"].trueValue();
         if ( in["key"].isABSONObj() )
-            shard( db , in["_id"].String() , in["key"].Obj() , in["unique"].trueValue() );
+            shard( in["_id"].String() , in["key"].Obj() , in["unique"].trueValue() );
     }
 
 
-    void DBConfig::CollectionInfo::shard( DBConfig * db , const string& ns , const ShardKeyPattern& key , bool unique ){
-        _cm.reset( new ChunkManager( db, ns , key , unique ) );
+    void DBConfig::CollectionInfo::shard( const string& ns , const ShardKeyPattern& key , bool unique ) {
+        _cm.reset( new ChunkManager( ns , key , unique ) );
         _dirty = true;
         _dropped = false;
     }
 
-    void DBConfig::CollectionInfo::unshard(){
+    void DBConfig::CollectionInfo::unshard() {
         _cm.reset();
         _dropped = true;
         _dirty = true;
     }
-    
-    void DBConfig::CollectionInfo::save( const string& ns , DBClientBase* conn ){
+
+    void DBConfig::CollectionInfo::save( const string& ns , DBClientBase* conn ) {
         BSONObj key = BSON( "_id" << ns );
-        
+
         BSONObjBuilder val;
         val.append( "_id" , ns );
         val.appendDate( "lastmod" , time(0) );
         val.appendBool( "dropped" , _dropped );
         if ( _cm )
             _cm->getInfo( val );
-        
+
         conn->update( ShardNS::collection , key , val.obj() , true );
         string err = conn->getLastError();
         uassert( 13473 , (string)"failed to save collection (" + ns + "): " + err , err.size() == 0 );
@@ -88,14 +87,14 @@ namespace mongo {
         _dirty = false;
     }
 
-    bool DBConfig::isSharded( const string& ns ){
+    bool DBConfig::isSharded( const string& ns ) {
         if ( ! _shardingEnabled )
             return false;
         scoped_lock lk( _lock );
         return _isSharded( ns );
     }
 
-    bool DBConfig::_isSharded( const string& ns ){
+    bool DBConfig::_isSharded( const string& ns ) {
         if ( ! _shardingEnabled )
             return false;
         Collections::iterator i = _collections.find( ns );
@@ -105,25 +104,28 @@ namespace mongo {
     }
 
 
-    const Shard& DBConfig::getShard( const string& ns ){
+    const Shard& DBConfig::getShard( const string& ns ) {
         if ( isSharded( ns ) )
             return Shard::EMPTY;
-        
+
         uassert( 10178 ,  "no primary!" , _primary.ok() );
         return _primary;
     }
-    
-    void DBConfig::enableSharding(){
+
+    void DBConfig::enableSharding() {
         if ( _shardingEnabled )
             return;
+        
+        assert( _name != "config" );
+
         scoped_lock lk( _lock );
-        _shardingEnabled = true; 
+        _shardingEnabled = true;
         _save();
     }
-    
-    ChunkManagerPtr DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ){
+
+    ChunkManagerPtr DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ) {
         uassert( 8042 , "db doesn't have sharding enabled" , _shardingEnabled );
-        
+
         scoped_lock lk( _lock );
 
         CollectionInfo& ci = _collections[ns];
@@ -131,35 +133,48 @@ namespace mongo {
 
         log() << "enable sharding on: " << ns << " with shard key: " << fieldsAndOrder << endl;
 
-        ci.shard( this , ns , fieldsAndOrder , unique );
-        ci.getCM()->maybeChunkCollection();
-
+        // From this point on, 'ns' is going to be treated as a sharded collection. We assume this is the first
+        // time it is seen by the sharded system and thus create the first chunk for the collection. All the remaining
+        // chunks will be created as a by-product of splitting.
+        ci.shard( ns , fieldsAndOrder , unique );
+        ChunkManagerPtr cm = ci.getCM();
+        uassert( 13449 , "collections already sharded" , (cm->numChunks() == 0) );
+        cm->createFirstChunk( getPrimary() );
         _save();
-        return ci.getCM();
+
+        try {
+            cm->maybeChunkCollection();
+        }
+        catch ( UserException& e ) {
+            // failure to chunk is not critical enough to abort the command (and undo the _save()'d configDB state)
+            log() << "couldn't chunk recently created collection: " << ns << " " << e << endl;
+        }
+
+        return cm;
     }
 
-    bool DBConfig::removeSharding( const string& ns ){
-        if ( ! _shardingEnabled ){
+    bool DBConfig::removeSharding( const string& ns ) {
+        if ( ! _shardingEnabled ) {
             return false;
         }
-        
+
         scoped_lock lk( _lock );
-        
+
         Collections::iterator i = _collections.find( ns );
 
         if ( i == _collections.end() )
             return false;
-        
+
         CollectionInfo& ci = _collections[ns];
         if ( ! ci.isSharded() )
             return false;
-        
+
         ci.unshard();
         _save();
         return true;
     }
-    
-    ChunkManagerPtr DBConfig::getChunkManager( const string& ns , bool shouldReload ){
+
+    ChunkManagerPtr DBConfig::getChunkManager( const string& ns , bool shouldReload ) {
         scoped_lock lk( _lock );
 
         if ( shouldReload )
@@ -170,93 +185,80 @@ namespace mongo {
         return ci.getCM();
     }
 
-    void DBConfig::setPrimary( string s ){
+    void DBConfig::setPrimary( string s ) {
         scoped_lock lk( _lock );
         _primary.reset( s );
         _save();
     }
-    
-    void DBConfig::serialize(BSONObjBuilder& to){
+
+    void DBConfig::serialize(BSONObjBuilder& to) {
         to.append("_id", _name);
         to.appendBool("partitioned", _shardingEnabled );
         to.append("primary", _primary.getName() );
     }
-    
-    bool DBConfig::unserialize(const BSONObj& from){
+
+    void DBConfig::unserialize(const BSONObj& from) {
         log(1) << "DBConfig unserialize: " << _name << " " << from << endl;
         assert( _name == from["_id"].String() );
 
         _shardingEnabled = from.getBoolField("partitioned");
         _primary.reset( from.getStringField("primary") );
 
-        // this is a temporary migration thing
+        // In the 1.5.x series, we used to have collection metadata nested in the database entry. The 1.6.x series
+        // had migration code that ported that info to where it belongs now: the 'collections' collection. We now
+        // just assert that we're not migrating from a 1.5.x directly into a 1.7.x without first converting.
         BSONObj sharded = from.getObjectField( "sharded" );
-        if ( sharded.isEmpty() )
-             return false;
-        
-        BSONObjIterator i(sharded);
-        while ( i.more() ){
-            BSONElement e = i.next();
-            uassert( 10182 ,  "sharded things have to be objects" , e.type() == Object );
-            
-            BSONObj c = e.embeddedObject();
-            uassert( 10183 ,  "key has to be an object" , c["key"].type() == Object );
-            
-            _collections[e.fieldName()].shard( this , e.fieldName() , c["key"].Obj() , c["unique"].trueValue() );
-        }
-        return true;
+        if ( ! sharded.isEmpty() )
+            uasserted( 13509 , "can't migrate from 1.5.x release to the current one; need to upgrade to 1.6.x first");
     }
 
-    bool DBConfig::load(){
+    bool DBConfig::load() {
         scoped_lock lk( _lock );
         return _load();
     }
 
-    bool DBConfig::_load(){
+    bool DBConfig::_load() {
         ScopedDbConnection conn( configServer.modelServer() );
-        
-        BSONObj o = conn->findOne( ShardNS::database , BSON( "_id" << _name ) );
 
+        BSONObj o = conn->findOne( ShardNS::database , BSON( "_id" << _name ) );
 
-        if ( o.isEmpty() ){
+        if ( o.isEmpty() ) {
             conn.done();
             return false;
         }
-        
-        if ( unserialize( o ) )
-            _save();
-        
+
+        unserialize( o );
+
         BSONObjBuilder b;
         b.appendRegex( "_id" , (string)"^" + _name + "." );
-        
 
         auto_ptr<DBClientCursor> cursor = conn->query( ShardNS::collection ,b.obj() );
         assert( cursor.get() );
-        while ( cursor->more() ){
+        while ( cursor->more() ) {
             BSONObj o = cursor->next();
-            _collections[o["_id"].String()] = CollectionInfo( this , o );
+            _collections[o["_id"].String()] = CollectionInfo( o );
         }
-        
-        conn.done();        
+
+        conn.done();
 
         return true;
     }
 
-    void DBConfig::_save(){
+    void DBConfig::_save() {
         ScopedDbConnection conn( configServer.modelServer() );
-        
+
         BSONObj n;
         {
             BSONObjBuilder b;
             serialize(b);
             n = b.obj();
         }
-        
+
         conn->update( ShardNS::database , BSON( "_id" << _name ) , n , true );
         string err = conn->getLastError();
         uassert( 13396 , (string)"DBConfig save failed: " + err , err.size() == 0 );
-        
-        for ( Collections::iterator i=_collections.begin(); i!=_collections.end(); ++i ){
+
+        for ( Collections::iterator i=_collections.begin(); i!=_collections.end(); ++i ) {
             if ( ! i->second.isDirty() )
                 continue;
             i->second.save( i->first , conn.get() );
@@ -265,18 +267,17 @@ namespace mongo {
         conn.done();
     }
 
-    
-    bool DBConfig::reload(){
+    bool DBConfig::reload() {
         scoped_lock lk( _lock );
         return _reload();
     }
-    
-    bool DBConfig::_reload(){
+
+    bool DBConfig::_reload() {
         // TODO: i don't think is 100% correct
         return _load();
     }
-    
-    bool DBConfig::dropDatabase( string& errmsg ){
+
+    bool DBConfig::dropDatabase( string& errmsg ) {
         /**
          * 1) make sure everything is up
          * 2) update config server
@@ -287,81 +288,88 @@ namespace mongo {
 
         log() << "DBConfig::dropDatabase: " << _name << endl;
         configServer.logChange( "dropDatabase.start" , _name , BSONObj() );
-        
+
         // 1
-        if ( ! configServer.allUp( errmsg ) ){
+        if ( ! configServer.allUp( errmsg ) ) {
             log(1) << "\t DBConfig::dropDatabase not all up" << endl;
             return 0;
         }
-        
+
         // 2
         grid.removeDB( _name );
         {
             ScopedDbConnection conn( configServer.modelServer() );
             conn->remove( ShardNS::database , BSON( "_id" << _name ) );
+            errmsg = conn->getLastError();
+            if ( ! errmsg.empty() ) {
+                log() << "could not drop '" << _name << "': " << errmsg << endl;
+                conn.done();
+                return false;
+            }
+
             conn.done();
         }
 
-        if ( ! configServer.allUp( errmsg ) ){
+        if ( ! configServer.allUp( errmsg ) ) {
             log() << "error removing from config server even after checking!" << endl;
             return 0;
         }
         log(1) << "\t removed entry from config server for: " << _name << endl;
-        
+
         set<Shard> allServers;
 
         // 3
-        while ( true ){
-            int num;
+        while ( true ) {
+            int num = 0;
             if ( ! _dropShardedCollections( num , allServers , errmsg ) )
                 return 0;
             log() << "   DBConfig::dropDatabase: " << _name << " dropped sharded collections: " << num << endl;
             if ( num == 0 )
                 break;
         }
-        
+
         // 4
         {
             ScopedDbConnection conn( _primary );
             BSONObj res;
-            if ( ! conn->dropDatabase( _name , &res ) ){
+            if ( ! conn->dropDatabase( _name , &res ) ) {
                 errmsg = res.toString();
                 return 0;
             }
             conn.done();
         }
-        
+
         // 5
-        for ( set<Shard>::iterator i=allServers.begin(); i!=allServers.end(); i++ ){
+        for ( set<Shard>::iterator i=allServers.begin(); i!=allServers.end(); i++ ) {
             ScopedDbConnection conn( *i );
             BSONObj res;
-            if ( ! conn->dropDatabase( _name , &res ) ){
+            if ( ! conn->dropDatabase( _name , &res ) ) {
                 errmsg = res.toString();
                 return 0;
             }
-            conn.done();            
+            conn.done();
         }
-        
+
         log(1) << "\t dropped primary db for: " << _name << endl;
 
         configServer.logChange( "dropDatabase" , _name , BSONObj() );
         return true;
     }
 
-    bool DBConfig::_dropShardedCollections( int& num, set<Shard>& allServers , string& errmsg ){
+    bool DBConfig::_dropShardedCollections( int& num, set<Shard>& allServers , string& errmsg ) {
         num = 0;
         set<string> seen;
-        while ( true ){
+        while ( true ) {
             Collections::iterator i = _collections.begin();
-            for ( ; i != _collections.end(); ++i ){
+            for ( ; i != _collections.end(); ++i ) {
                 if ( i->second.isSharded() )
                     break;
             }
-            
+
             if ( i == _collections.end() )
                 break;
 
-            if ( seen.count( i->first ) ){
+            if ( seen.count( i->first ) ) {
                 errmsg = "seen a collection twice!";
                 return false;
             }
@@ -371,19 +379,20 @@ namespace mongo {
 
             i->second.getCM()->getAllShards( allServers );
             i->second.getCM()->drop( i->second.getCM() );
-            
+            uassert( 10176 , str::stream() << "shard state missing for " << i->first , removeSharding( i->first ) );
+
             num++;
             uassert( 10184 ,  "_dropShardedCollections too many collections - bailing" , num < 100000 );
             log(2) << "\t\t dropped " << num << " so far" << endl;
         }
-        
+
         return true;
     }
-    
-    void DBConfig::getAllShards(set<Shard>& shards) const{
+
+    void DBConfig::getAllShards(set<Shard>& shards) const {
         shards.insert(getPrimary());
-        for (Collections::const_iterator it(_collections.begin()), end(_collections.end()); it != end; ++it){
-            if (it->second.isSharded()){
+        for (Collections::const_iterator it(_collections.begin()), end(_collections.end()); it != end; ++it) {
+            if (it->second.isSharded()) {
                 it->second.getCM()->getAllShards(shards);
             } // TODO: handle collections on non-primary shard
         }
@@ -391,20 +400,20 @@ namespace mongo {
 
     /* --- ConfigServer ---- */
 
-    ConfigServer::ConfigServer() : DBConfig( "config" ){
+    ConfigServer::ConfigServer() : DBConfig( "config" ) {
         _shardingEnabled = false;
     }
-    
+
     ConfigServer::~ConfigServer() {
     }
 
-    bool ConfigServer::init( string s ){
+    bool ConfigServer::init( string s ) {
         vector<string> configdbs;
         splitStringDelim( s, &configdbs, ',' );
         return init( configdbs );
     }
 
-    bool ConfigServer::init( vector<string> configHosts ){
+    bool ConfigServer::init( vector<string> configHosts ) {
         uassert( 10187 ,  "need configdbs" , configHosts.size() );
 
         string hn = getHostName();
@@ -412,19 +421,19 @@ namespace mongo {
             sleepsecs(5);
             dbexit( EXIT_BADOPTIONS );
         }
-        
+
         set<string> hosts;
-        for ( size_t i=0; i<configHosts.size(); i++ ){
+        for ( size_t i=0; i<configHosts.size(); i++ ) {
             string host = configHosts[i];
             hosts.insert( getHost( host , false ) );
             configHosts[i] = getHost( host , true );
         }
-        
-        for ( set<string>::iterator i=hosts.begin(); i!=hosts.end(); i++ ){
+
+        for ( set<string>::iterator i=hosts.begin(); i!=hosts.end(); i++ ) {
             string host = *i;
             bool ok = false;
-            for ( int x=10; x>0; x-- ){
-                if ( ! hostbyname( host.c_str() ).empty() ){
+            for ( int x=10; x>0; x-- ) {
+                if ( ! hostbyname( host.c_str() ).empty() ) {
                     ok = true;
                     break;
                 }
@@ -436,10 +445,10 @@ namespace mongo {
         }
 
         _config = configHosts;
-        
+
         string fullString;
         joinStringDelim( configHosts, &fullString, ',' );
-        _primary.setAddress( fullString , true );
+        _primary.setAddress( ConnectionString( fullString , ConnectionString::SYNC ) );
         log(1) << " config string : " << fullString << endl;
 
         return true;
@@ -448,14 +457,14 @@ namespace mongo {
     bool ConfigServer::checkConfigServersConsistent( string& errmsg , int tries ) const {
         if ( _config.size() == 1 )
             return true;
-        
+
         if ( tries <= 0 )
             return false;
-        
+
         unsigned firstGood = 0;
         int up = 0;
         vector<BSONObj> res;
-        for ( unsigned i=0; i<_config.size(); i++ ){
+        for ( unsigned i=0; i<_config.size(); i++ ) {
             BSONObj x;
             try {
                 ScopedDbConnection conn( _config[i] );
@@ -469,125 +478,125 @@ namespace mongo {
                 }
                 conn.done();
             }
-            catch ( std::exception&  ){
-                log(LL_WARNING) << " couldn't check on config server:" << _config[i] << " ok for now" << endl;
+            catch ( SocketException& e ) {
+                warning() << " couldn't check on config server:" << _config[i] << " ok for now : " << e.toString() << endl;
             }
             res.push_back(x);
         }
 
-        if ( up == 0 ){
+        if ( up == 0 ) {
             errmsg = "no config servers reachable";
             return false;
         }
 
-        if ( up == 1 ){
+        if ( up == 1 ) {
             log( LL_WARNING ) << "only 1 config server reachable, continuing" << endl;
             return true;
         }
 
         BSONObj base = res[firstGood];
-        for ( unsigned i=firstGood+1; i<res.size(); i++ ){
+        for ( unsigned i=firstGood+1; i<res.size(); i++ ) {
             if ( res[i].isEmpty() )
                 continue;
 
             string c1 = base.getFieldDotted( "collections.chunks" );
             string c2 = res[i].getFieldDotted( "collections.chunks" );
-            
+
             string d1 = base.getFieldDotted( "collections.databases" );
             string d2 = res[i].getFieldDotted( "collections.databases" );
 
             if ( c1 == c2 && d1 == d2 )
                 continue;
-            
+
             stringstream ss;
             ss << "config servers " << _config[firstGood] << " and " << _config[i] << " differ";
             log( LL_WARNING ) << ss.str();
-            if ( tries <= 1 ){
+            if ( tries <= 1 ) {
                 ss << "\n" << c1 << "\t" << c2 << "\n" << d1 << "\t" << d2;
                 errmsg = ss.str();
                 return false;
             }
-            
+
             return checkConfigServersConsistent( errmsg , tries - 1 );
         }
-        
+
         return true;
     }
 
-    bool ConfigServer::ok( bool checkConsistency ){
+    bool ConfigServer::ok( bool checkConsistency ) {
         if ( ! _primary.ok() )
             return false;
-        
-        if ( checkConsistency ){
+
+        if ( checkConsistency ) {
             string errmsg;
-            if ( ! checkConfigServersConsistent( errmsg ) ){
+            if ( ! checkConfigServersConsistent( errmsg ) ) {
                 log( LL_ERROR ) << "config servers not in sync! " << errmsg << endl;
                 return false;
             }
         }
-        
+
         return true;
     }
 
-    bool ConfigServer::allUp(){
+    bool ConfigServer::allUp() {
         string errmsg;
         return allUp( errmsg );
     }
-    
-    bool ConfigServer::allUp( string& errmsg ){
+
+    bool ConfigServer::allUp( string& errmsg ) {
         try {
             ScopedDbConnection conn( _primary );
             conn->getLastError();
             conn.done();
             return true;
         }
-        catch ( DBException& ){
+        catch ( DBException& ) {
             log() << "ConfigServer::allUp : " << _primary.toString() << " seems down!" << endl;
             errmsg = _primary.toString() + " seems down";
             return false;
         }
-        
+
     }
-    
-    int ConfigServer::dbConfigVersion(){
+
+    int ConfigServer::dbConfigVersion() {
         ScopedDbConnection conn( _primary );
         int version = dbConfigVersion( conn.conn() );
         conn.done();
         return version;
     }
-    
-    int ConfigServer::dbConfigVersion( DBClientBase& conn ){
+
+    int ConfigServer::dbConfigVersion( DBClientBase& conn ) {
         auto_ptr<DBClientCursor> c = conn.query( "config.version" , BSONObj() );
         int version = 0;
-        if ( c->more() ){
+        if ( c->more() ) {
             BSONObj o = c->next();
             version = o["version"].numberInt();
             uassert( 10189 ,  "should only have 1 thing in config.version" , ! c->more() );
         }
         else {
-            if ( conn.count( ShardNS::shard ) || conn.count( ShardNS::database ) ){
+            if ( conn.count( ShardNS::shard ) || conn.count( ShardNS::database ) ) {
                 version = 1;
             }
         }
-        
+
         return version;
     }
-    
-    void ConfigServer::reloadSettings(){
+
+    void ConfigServer::reloadSettings() {
         set<string> got;
-        
+
         ScopedDbConnection conn( _primary );
         auto_ptr<DBClientCursor> c = conn->query( ShardNS::settings , BSONObj() );
         assert( c.get() );
-        while ( c->more() ){
+        while ( c->more() ) {
             BSONObj o = c->next();
             string name = o["_id"].valuestrsafe();
             got.insert( name );
-            if ( name == "chunksize" ){
+            if ( name == "chunksize" ) {
                 log(1) << "MaxChunkSize: " << o["value"] << endl;
                 Chunk::MaxChunkSize = o["value"].numberInt() * 1024 * 1024;
             }
-            else if ( name == "balancer" ){
+            else if ( name == "balancer" ) {
                 // ones we ignore here
             }
             else {
@@ -595,12 +604,12 @@ namespace mongo {
             }
         }
 
-        if ( ! got.count( "chunksize" ) ){
+        if ( ! got.count( "chunksize" ) ) {
             conn->insert( ShardNS::settings , BSON( "_id" << "chunksize"  <<
                                                     "value" << (Chunk::MaxChunkSize / ( 1024 * 1024 ) ) ) );
         }
-        
-        
+
+
         // indexes
         try {
             conn->ensureIndex( ShardNS::chunk , BSON( "ns" << 1 << "min" << 1 ) , true );
@@ -608,66 +617,86 @@ namespace mongo {
             conn->ensureIndex( ShardNS::chunk , BSON( "ns" << 1 << "lastmod" << 1 ) , true );
             conn->ensureIndex( ShardNS::shard , BSON( "host" << 1 ) , true );
         }
-        catch ( std::exception& e ){
+        catch ( std::exception& e ) {
             log( LL_WARNING ) << "couldn't create indexes on config db: " << e.what() << endl;
         }
 
         conn.done();
     }
 
-    string ConfigServer::getHost( string name , bool withPort ){
-        if ( name.find( ":" ) != string::npos ){
+    string ConfigServer::getHost( string name , bool withPort ) {
+        if ( name.find( ":" ) != string::npos ) {
             if ( withPort )
                 return name;
             return name.substr( 0 , name.find( ":" ) );
         }
 
-        if ( withPort ){
+        if ( withPort ) {
             stringstream ss;
             ss << name << ":" << CmdLine::ConfigServerPort;
             return ss.str();
         }
-        
+
         return name;
     }
 
-    void ConfigServer::logChange( const string& what , const string& ns , const BSONObj& detail ){
-        assert( _primary.ok() );
+    /* must never throw */
+    void ConfigServer::logChange( const string& what , const string& ns , const BSONObj& detail ) {
+        string changeID;
 
-        static bool createdCapped = false;
-        static AtomicUInt num;
-        
-        ScopedDbConnection conn( _primary );
-        
-        if ( ! createdCapped ){
-            try {
-                conn->createCollection( "config.changelog" , 1024 * 1024 * 10 , true );
-            }
-            catch ( UserException& e ){
-                log(1) << "couldn't create changelog (like race condition): " << e << endl;
-                // don't care
+        try {
+            // get this entry's ID so we can use on the exception code path too
+            stringstream id;
+            static AtomicUInt num;
+            id << getHostNameCached() << "-" << terseCurrentTime() << "-" << num++;
+            changeID = id.str();
+
+            // send a copy of the message to the log in case it doesn't manage to reach config.changelog
+            Client* c = currentClient.get();
+            BSONObj msg = BSON( "_id" << changeID << "server" << getHostNameCached() << "clientAddr" << (c ? c->clientAddress(true) : "N/A")
+                                << "time" << DATENOW << "what" << what << "ns" << ns << "details" << detail );
+            log() << "about to log metadata event: " << msg << endl;
+
+            assert( _primary.ok() );
+
+            ScopedDbConnection conn( _primary );
+
+            static bool createdCapped = false;
+            if ( ! createdCapped ) {
+                try {
+                    conn->createCollection( "config.changelog" , 1024 * 1024 * 10 , true );
+                }
+                catch ( UserException& e ) {
+                    log(1) << "couldn't create changelog (like race condition): " << e << endl;
+                    // don't care
+                }
+                createdCapped = true;
             }
-            createdCapped = true;
+
+            conn->insert( "config.changelog" , msg );
+
+            conn.done();
+
         }
-     
-        stringstream id;
-        id << getHostNameCached() << "-" << terseCurrentTime() << "-" << num++;
 
-        BSONObj msg = BSON( "_id" << id.str() << "server" << getHostNameCached() << "time" << DATENOW <<
-                            "what" << what << "ns" << ns << "details" << detail );
-        log() << "config change: " << msg << endl;
+        catch ( std::exception& e ) {
+            // if we got here, it means the config change is only in the log; it didn't make it to config.changelog
+            log() << "not logging config change: " << changeID << " " << e.what() << endl;
+        }
+    }
 
+    void ConfigServer::replicaSetChange( const ReplicaSetMonitor * monitor ) {
         try {
-            conn->insert( "config.changelog" , msg );
+            ScopedDbConnection conn( configServer.getConnectionString() );
+            conn->update( ShardNS::shard , BSON( "_id" << monitor->getName() ) , BSON( "$set" << BSON( "host" << monitor->getServerAddress() ) ) );
+            conn.done();
         }
-        catch ( std::exception& e ){
-            log() << "not logging config change: " << e.what() << endl;                
+        catch ( DBException & ) {
+            error() << "RSChangeWatcher: could not update config db for set: " << monitor->getName() << " to: " << monitor->getServerAddress() << endl;
         }
-        
-        conn.done();
     }
 
-    DBConfigPtr configServerPtr (new ConfigServer());    
-    ConfigServer& configServer = dynamic_cast<ConfigServer&>(*configServerPtr);    
+    DBConfigPtr configServerPtr (new ConfigServer());
+    ConfigServer& configServer = dynamic_cast<ConfigServer&>(*configServerPtr);
 
-} 
+}
diff --git a/s/config.h b/s/config.h
index 5bff03f..0636835 100644
--- a/s/config.h
+++ b/s/config.h
@@ -26,14 +26,16 @@
 #include "../db/namespace.h"
 #include "../client/dbclient.h"
 #include "../client/model.h"
-#include "shardkey.h"
+
+#include "chunk.h"
 #include "shard.h"
+#include "shardkey.h"
 
 namespace mongo {
 
     struct ShardNS {
         static string shard;
-        
+
         static string database;
         static string collection;
         static string chunk;
@@ -46,11 +48,10 @@ namespace mongo {
      * Field names used in the 'shards' collection.
      */
     struct ShardFields {
-        static BSONField<bool> draining;
-        static BSONField<long long> maxSize;
-        static BSONField<long long> currSize;
+        static BSONField<bool> draining;      // is it draining chunks?
+        static BSONField<long long> maxSize;  // max allowed disk space usage
     };
-        
+
     class ConfigServer;
 
     class DBConfig;
@@ -59,93 +60,95 @@ namespace mongo {
     extern DBConfigPtr configServerPtr;
     extern ConfigServer& configServer;
 
-    class ChunkManager;
-    typedef shared_ptr<ChunkManager> ChunkManagerPtr;
-    
     /**
      * top level configuration for a database
      */
     class DBConfig  {
 
         struct CollectionInfo {
-            CollectionInfo(){
+            CollectionInfo() {
                 _dirty = false;
                 _dropped = false;
             }
-            
-            CollectionInfo( DBConfig * db , const BSONObj& in );
-            
+
+            CollectionInfo( const BSONObj& in );
+
             bool isSharded() const {
                 return _cm.get();
             }
-            
+
             ChunkManagerPtr getCM() const {
                 return _cm;
             }
 
-            void shard( DBConfig * db , const string& ns , const ShardKeyPattern& key , bool unique );
+            void shard( const string& ns , const ShardKeyPattern& key , bool unique );
             void unshard();
 
             bool isDirty() const { return _dirty; }
             bool wasDropped() const { return _dropped; }
-            
+
             void save( const string& ns , DBClientBase* conn );
-            
+
 
         private:
             ChunkManagerPtr _cm;
             bool _dirty;
             bool _dropped;
         };
-        
+
         typedef map<string,CollectionInfo> Collections;
-        
+
     public:
 
-        DBConfig( string name ) 
-            : _name( name ) , 
-              _primary("config","") , 
-              _shardingEnabled(false), 
-              _lock("DBConfig"){
+        DBConfig( string name )
+            : _name( name ) ,
+              _primary("config","") ,
+              _shardingEnabled(false),
+              _lock("DBConfig") {
             assert( name.size() );
         }
-        virtual ~DBConfig(){}
-        
-        string getName(){ return _name; };
+        virtual ~DBConfig() {}
+
+        string getName() { return _name; };
 
         /**
          * @return if anything in this db is partitioned or not
          */
-        bool isShardingEnabled(){
+        bool isShardingEnabled() {
             return _shardingEnabled;
         }
-        
+
         void enableSharding();
         ChunkManagerPtr shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique );
-        
+
+        /**
+           @return true if there was sharding info to remove
+         */
+        bool removeSharding( const string& ns );
+
         /**
          * @return whether or not the 'ns' collection is partitioned
          */
         bool isSharded( const string& ns );
-        
+
         ChunkManagerPtr getChunkManager( const string& ns , bool reload = false );
-        
+
         /**
          * @return the correct for shard for the ns
          * if the namespace is sharded, will return NULL
          */
         const Shard& getShard( const string& ns );
-        
+
         const Shard& getPrimary() const {
             uassert( 8041 , (string)"no primary shard configured for db: " + _name , _primary.ok() );
             return _primary;
         }
-        
+
         void setPrimary( string s );
 
         bool load();
         bool reload();
-        
+
         bool dropDatabase( string& errmsg );
 
         // model stuff
@@ -153,16 +156,13 @@ namespace mongo {
         // lockless loading
         void serialize(BSONObjBuilder& to);
 
-        /**
-         * if i need save in new format
-         */
-        bool unserialize(const BSONObj& from);
+        void unserialize(const BSONObj& from);
 
         void getAllShards(set<Shard>& shards) const;
 
     protected:
 
-        /** 
+        /**
             lockless
         */
         bool _isSharded( const string& ns );
@@ -173,24 +173,16 @@ namespace mongo {
         bool _reload();
         void _save();
 
-        
-        /**
-           @return true if there was sharding info to remove
-         */
-        bool removeSharding( const string& ns );
-
         string _name; // e.g. "alleyinsider"
         Shard _primary; // e.g. localhost , mongo.foo.com:9999
         bool _shardingEnabled;
-        
+
         //map<string,CollectionInfo> _sharded; // { "alleyinsider.blog.posts" : { ts : 1 }  , ... ] - all ns that are sharded
         //map<string,ChunkManagerPtr> _shards; // this will only have entries for things that have been looked at
 
         Collections _collections;
 
         mongo::mutex _lock; // TODO: change to r/w lock ??
-
-        friend class ChunkManager;
     };
 
     class ConfigServer : public DBConfig {
@@ -198,38 +190,42 @@ namespace mongo {
 
         ConfigServer();
         ~ConfigServer();
-        
+
         bool ok( bool checkConsistency = false );
-        
-        virtual string modelServer(){
+
+        virtual string modelServer() {
             uassert( 10190 ,  "ConfigServer not setup" , _primary.ok() );
             return _primary.getConnString();
         }
-        
+
         /**
-           call at startup, this will initiate connection to the grid db 
+           call at startup, this will initiate connection to the grid db
         */
         bool init( vector<string> configHosts );
-        
+
         bool init( string s );
 
         bool allUp();
         bool allUp( string& errmsg );
-        
+
         int dbConfigVersion();
         int dbConfigVersion( DBClientBase& conn );
-        
+
         void reloadSettings();
 
         /**
          * @return 0 = ok, otherwise error #
          */
         int checkConfigVersion( bool upgrade );
-        
+
         /**
-         * log a change to config.changes 
+         * Create a metadata change log entry in the config.changelog collection.
+         *
          * @param what e.g. "split" , "migrate"
-         * @param msg any more info
+         * @param ns to which collection the metadata change is being applied
+         * @param msg additional info about the metadata change
+         *
+         * This call is guaranteed never to throw.
          */
         void logChange( const string& what , const string& ns , const BSONObj& detail = BSONObj() );
 
@@ -237,8 +233,10 @@ namespace mongo {
             return ConnectionString( _primary.getConnString() , ConnectionString::SYNC );
         }
 
+        void replicaSetChange( const ReplicaSetMonitor * monitor );
+
         static int VERSION;
-        
+
 
         /**
          * check to see if all config servers have the same state
diff --git a/s/config_migrate.cpp b/s/config_migrate.cpp
index 1a42144..57890a0 100644
--- a/s/config_migrate.cpp
+++ b/s/config_migrate.cpp
@@ -30,12 +30,12 @@
 
 namespace mongo {
 
-    int ConfigServer::checkConfigVersion( bool upgrade ){
+    int ConfigServer::checkConfigVersion( bool upgrade ) {
         int cur = dbConfigVersion();
         if ( cur == VERSION )
             return 0;
-        
-        if ( cur == 0 ){
+
+        if ( cur == 0 ) {
             ScopedDbConnection conn( _primary );
             conn->insert( "config.version" , BSON( "_id" << 1 << "version" << VERSION ) );
             pool.flush();
@@ -43,20 +43,20 @@ namespace mongo {
             conn.done();
             return 0;
         }
-        
-        if ( cur == 2 ){
+
+        if ( cur == 2 ) {
 
             // need to upgrade
             assert( VERSION == 3 );
-            if ( ! upgrade ){
+            if ( ! upgrade ) {
                 log() << "newer version of mongo meta data\n"
                       << "need to --upgrade after shutting all mongos down"
                       << endl;
                 return -9;
             }
-            
+
             ScopedDbConnection conn( _primary );
-            
+
             // do a backup
             string backupName;
             {
@@ -67,20 +67,20 @@ namespace mongo {
             log() << "backing up config to: " << backupName << endl;
             conn->copyDatabase( "config" , backupName );
 
-            map<string,string> hostToShard;            
+            map<string,string> hostToShard;
             set<string> shards;
             // shards
             {
                 unsigned n = 0;
                 auto_ptr<DBClientCursor> c = conn->query( ShardNS::shard , BSONObj() );
-                while ( c->more() ){
+                while ( c->more() ) {
                     BSONObj o = c->next();
                     string host = o["host"].String();
 
                     string name = "";
-                    
+
                     BSONElement id = o["_id"];
-                    if ( id.type() == String ){
+                    if ( id.type() == String ) {
                         name = id.String();
                     }
                     else {
@@ -88,18 +88,18 @@ namespace mongo {
                         ss << "shard" << hostToShard.size();
                         name = ss.str();
                     }
-                    
+
                     hostToShard[host] = name;
                     shards.insert( name );
                     n++;
                 }
-                
+
                 assert( n == hostToShard.size() );
                 assert( n == shards.size() );
-                
+
                 conn->remove( ShardNS::shard , BSONObj() );
-                
-                for ( map<string,string>::iterator i=hostToShard.begin(); i != hostToShard.end(); i++ ){
+
+                for ( map<string,string>::iterator i=hostToShard.begin(); i != hostToShard.end(); i++ ) {
                     conn->insert( ShardNS::shard , BSON( "_id" << i->second << "host" << i->first ) );
                 }
             }
@@ -109,27 +109,27 @@ namespace mongo {
                 auto_ptr<DBClientCursor> c = conn->query( ShardNS::database , BSONObj() );
                 map<string,BSONObj> newDBs;
                 unsigned n = 0;
-                while ( c->more() ){
+                while ( c->more() ) {
                     BSONObj old = c->next();
                     n++;
-                    
-                    if ( old["name"].eoo() ){
+
+                    if ( old["name"].eoo() ) {
                         // already done
                         newDBs[old["_id"].String()] = old;
                         continue;
                     }
-                    
+
                     BSONObjBuilder b(old.objsize());
                     b.appendAs( old["name"] , "_id" );
-                    
+
                     BSONObjIterator i(old);
-                    while ( i.more() ){
+                    while ( i.more() ) {
                         BSONElement e = i.next();
                         if ( strcmp( "_id" , e.fieldName() ) == 0 ||
-                             strcmp( "name" , e.fieldName() ) == 0 ){
+                                strcmp( "name" , e.fieldName() ) == 0 ) {
                             continue;
                         }
-                        
+
                         b.append( e );
                     }
 
@@ -139,45 +139,45 @@ namespace mongo {
                 }
 
                 assert( n == newDBs.size() );
-                
+
                 conn->remove( ShardNS::database , BSONObj() );
-                
-                for ( map<string,BSONObj>::iterator i=newDBs.begin(); i!=newDBs.end(); i++ ){
+
+                for ( map<string,BSONObj>::iterator i=newDBs.begin(); i!=newDBs.end(); i++ ) {
                     conn->insert( ShardNS::database , i->second );
                 }
-                
+
             }
-            
+
             // chunks
             {
                 unsigned num = 0;
                 map<string,BSONObj> chunks;
                 auto_ptr<DBClientCursor> c = conn->query( ShardNS::chunk , BSONObj() );
-                while ( c->more() ){
+                while ( c->more() ) {
                     BSONObj x = c->next();
                     BSONObjBuilder b;
 
                     string id = Chunk::genID( x["ns"].String() , x["min"].Obj() );
                     b.append( "_id" , id );
-                    
+
                     BSONObjIterator i(x);
-                    while ( i.more() ){
+                    while ( i.more() ) {
                         BSONElement e = i.next();
                         if ( strcmp( e.fieldName() , "_id" ) == 0 )
                             continue;
                         b.append( e );
                     }
-                    
+
                     BSONObj n = b.obj();
                     log() << x << "\n\t" << n << endl;
                     chunks[id] = n;
                     num++;
                 }
-                
+
                 assert( num == chunks.size() );
-                
+
                 conn->remove( ShardNS::chunk , BSONObj() );
-                for ( map<string,BSONObj>::iterator i=chunks.begin(); i!=chunks.end(); i++ ){
+                for ( map<string,BSONObj>::iterator i=chunks.begin(); i!=chunks.end(); i++ ) {
                     conn->insert( ShardNS::chunk , i->second );
                 }
 
@@ -188,7 +188,7 @@ namespace mongo {
             pool.flush();
             return 1;
         }
-        
+
         log() << "don't know how to upgrade " << cur << " to " << VERSION << endl;
         return -8;
     }
diff --git a/s/cursors.cpp b/s/cursors.cpp
index 6dd7a20..cf2735b 100644
--- a/s/cursors.cpp
+++ b/s/cursors.cpp
@@ -21,90 +21,90 @@
 #include "../client/connpool.h"
 #include "../db/queryutil.h"
 #include "../db/commands.h"
-#include "../util/background.h"
+#include "../util/concurrency/task.h"
 
 namespace mongo {
-    
+
     // --------  ShardedCursor -----------
 
-    ShardedClientCursor::ShardedClientCursor( QueryMessage& q , ClusteredCursor * cursor ){
+    ShardedClientCursor::ShardedClientCursor( QueryMessage& q , ClusteredCursor * cursor ) {
         assert( cursor );
         _cursor = cursor;
-        
+
         _skip = q.ntoskip;
         _ntoreturn = q.ntoreturn;
-        
+
         _totalSent = 0;
         _done = false;
 
         _id = 0;
-        
-        if ( q.queryOptions & QueryOption_NoCursorTimeout ){
+
+        if ( q.queryOptions & QueryOption_NoCursorTimeout ) {
             _lastAccessMillis = 0;
         }
-        else 
+        else
             _lastAccessMillis = Listener::getElapsedTimeMillis();
     }
 
-    ShardedClientCursor::~ShardedClientCursor(){
+    ShardedClientCursor::~ShardedClientCursor() {
         assert( _cursor );
         delete _cursor;
         _cursor = 0;
     }
 
-    long long ShardedClientCursor::getId(){
-        if ( _id <= 0 ){
+    long long ShardedClientCursor::getId() {
+        if ( _id <= 0 ) {
             _id = cursorCache.genId();
             assert( _id >= 0 );
         }
         return _id;
     }
 
-    void ShardedClientCursor::accessed(){
+    void ShardedClientCursor::accessed() {
         if ( _lastAccessMillis > 0 )
             _lastAccessMillis = Listener::getElapsedTimeMillis();
     }
 
-    long long ShardedClientCursor::idleTime( long long now ){
+    long long ShardedClientCursor::idleTime( long long now ) {
         if ( _lastAccessMillis == 0 )
             return 0;
         return now - _lastAccessMillis;
     }
 
-    bool ShardedClientCursor::sendNextBatch( Request& r , int ntoreturn ){
+    bool ShardedClientCursor::sendNextBatch( Request& r , int ntoreturn ) {
         uassert( 10191 ,  "cursor already done" , ! _done );
-                
+
         int maxSize = 1024 * 1024;
         if ( _totalSent > 0 )
             maxSize *= 3;
-        
+
         BufBuilder b(32768);
-        
+
         int num = 0;
         bool sendMore = true;
 
-        while ( _cursor->more() ){
+        while ( _cursor->more() ) {
             BSONObj o = _cursor->next();
 
             b.appendBuf( (void*)o.objdata() , o.objsize() );
             num++;
-            
-            if ( b.len() > maxSize ){
+
+            if ( b.len() > maxSize ) {
                 break;
             }
 
-            if ( num == ntoreturn ){
+            if ( num == ntoreturn ) {
                 // soft limit aka batch size
                 break;
             }
 
-            if ( ntoreturn != 0 && ( -1 * num + _totalSent ) == ntoreturn ){
+            if ( ntoreturn != 0 && ( -1 * num + _totalSent ) == ntoreturn ) {
                 // hard limit - total to send
                 sendMore = false;
                 break;
             }
 
-            if ( ntoreturn == 0 && _totalSent == 0 && num > 100 ){
+            if ( ntoreturn == 0 && _totalSent == 0 && num > 100 ) {
                 // first batch should be max 100 unless batch size specified
                 break;
             }
@@ -112,123 +112,141 @@ namespace mongo {
 
         bool hasMore = sendMore && _cursor->more();
         log(6) << "\t hasMore:" << hasMore << " wouldSendMoreIfHad: " << sendMore << " id:" << getId() << " totalSent: " << _totalSent << endl;
-        
+
         replyToQuery( 0 , r.p() , r.m() , b.buf() , b.len() , num , _totalSent , hasMore ? getId() : 0 );
         _totalSent += num;
         _done = ! hasMore;
-        
+
         return hasMore;
     }
 
     // ---- CursorCache -----
-    
+
     long long CursorCache::TIMEOUT = 600000;
 
     CursorCache::CursorCache()
-        :_mutex( "CursorCache" ), _shardedTotal(0){
+        :_mutex( "CursorCache" ), _shardedTotal(0) {
     }
 
-    CursorCache::~CursorCache(){
+    CursorCache::~CursorCache() {
         // TODO: delete old cursors?
         int logLevel = 1;
         if ( _cursors.size() || _refs.size() )
             logLevel = 0;
         log( logLevel ) << " CursorCache at shutdown - "
-                        << " sharded: " << _cursors.size() 
+                        << " sharded: " << _cursors.size()
                         << " passthrough: " << _refs.size()
                         << endl;
     }
 
-    ShardedClientCursorPtr CursorCache::get( long long id ){
+    ShardedClientCursorPtr CursorCache::get( long long id ) const {
+        LOG(_myLogLevel) << "CursorCache::get id: " << id << endl;
         scoped_lock lk( _mutex );
-        MapSharded::iterator i = _cursors.find( id );
-        if ( i == _cursors.end() ){
+        MapSharded::const_iterator i = _cursors.find( id );
+        if ( i == _cursors.end() ) {
             OCCASIONALLY log() << "Sharded CursorCache missing cursor id: " << id << endl;
             return ShardedClientCursorPtr();
         }
         i->second->accessed();
         return i->second;
     }
-    
-    void CursorCache::store( ShardedClientCursorPtr cursor ){
+
+    void CursorCache::store( ShardedClientCursorPtr cursor ) {
+        LOG(_myLogLevel) << "CursorCache::store cursor " << " id: " << cursor->getId() << endl;
         assert( cursor->getId() );
         scoped_lock lk( _mutex );
         _cursors[cursor->getId()] = cursor;
         _shardedTotal++;
     }
-    void CursorCache::remove( long long id ){
+    void CursorCache::remove( long long id ) {
         assert( id );
         scoped_lock lk( _mutex );
         _cursors.erase( id );
     }
-
-    void CursorCache::storeRef( const string& server , long long id ){
+    
+    void CursorCache::storeRef( const string& server , long long id ) {
+        LOG(_myLogLevel) << "CursorCache::storeRef server: " << server << " id: " << id << endl;
         assert( id );
         scoped_lock lk( _mutex );
         _refs[id] = server;
     }
-    
-    long long CursorCache::genId(){
-        while ( true ){
+
+    string CursorCache::getRef( long long id ) const {
+        LOG(_myLogLevel) << "CursorCache::getRef id: " << id << endl;
+        assert( id );
+        scoped_lock lk( _mutex );
+        MapNormal::const_iterator i = _refs.find( id );
+        if ( i == _refs.end() )
+            return "";
+        return i->second;
+    }
+
+
+    long long CursorCache::genId() {
+        while ( true ) {
             long long x = security.getNonce();
             if ( x == 0 )
                 continue;
             if ( x < 0 )
                 x *= -1;
-            
+
             scoped_lock lk( _mutex );
             MapSharded::iterator i = _cursors.find( x );
             if ( i != _cursors.end() )
                 continue;
-            
+
             MapNormal::iterator j = _refs.find( x );
             if ( j != _refs.end() )
                 continue;
-            
+
             return x;
         }
     }
 
-    void CursorCache::gotKillCursors(Message& m ){
+    void CursorCache::gotKillCursors(Message& m ) {
         int *x = (int *) m.singleData()->_data;
         x++; // reserved
         int n = *x++;
 
-        if ( n > 2000 ){
+        if ( n > 2000 ) {
             log( n < 30000 ? LL_WARNING : LL_ERROR ) << "receivedKillCursors, n=" << n << endl;
         }
 
 
         uassert( 13286 , "sent 0 cursors to kill" , n >= 1 );
         uassert( 13287 , "too many cursors to kill" , n < 30000 );
-        
+
         long long * cursors = (long long *)x;
-        for ( int i=0; i<n; i++ ){
+        for ( int i=0; i<n; i++ ) {
             long long id = cursors[i];
-            if ( ! id ){
+            LOG(_myLogLevel) << "CursorCache::gotKillCursors id: " << id << endl;
+
+            if ( ! id ) {
                 log( LL_WARNING ) << " got cursor id of 0 to kill" << endl;
                 continue;
             }
-            
-            string server;            
+
+            string server;
             {
                 scoped_lock lk( _mutex );
 
                 MapSharded::iterator i = _cursors.find( id );
-                if ( i != _cursors.end() ){
+                if ( i != _cursors.end() ) {
                     _cursors.erase( i );
                     continue;
                 }
-                
+
                 MapNormal::iterator j = _refs.find( id );
-                if ( j == _refs.end() ){
+                if ( j == _refs.end() ) {
                     log( LL_WARNING ) << "can't find cursor: " << id << endl;
                     continue;
                 }
                 server = j->second;
                 _refs.erase( j );
             }
-            
+
+            LOG(_myLogLevel) << "CursorCache::found gotKillCursors id: " << id << " server: " << server << endl;
+
             assert( server.size() );
             ScopedDbConnection conn( server );
             conn->killCursor( id );
@@ -236,7 +254,7 @@ namespace mongo {
         }
     }
 
-    void CursorCache::appendInfo( BSONObjBuilder& result ){
+    void CursorCache::appendInfo( BSONObjBuilder& result ) const {
         scoped_lock lk( _mutex );
         result.append( "sharded" , (int)_cursors.size() );
         result.appendNumber( "shardedEver" , _shardedTotal );
@@ -244,12 +262,12 @@ namespace mongo {
         result.append( "totalOpen" , (int)(_cursors.size() + _refs.size() ) );
     }
 
-    void CursorCache::doTimeouts(){
+    void CursorCache::doTimeouts() {
         long long now = Listener::getElapsedTimeMillis();
         scoped_lock lk( _mutex );
-        for ( MapSharded::iterator i=_cursors.begin(); i!=_cursors.end(); ++i ){
+        for ( MapSharded::iterator i=_cursors.begin(); i!=_cursors.end(); ++i ) {
             long long idleFor = i->second->idleTime( now );
-            if ( idleFor < TIMEOUT ){
+            if ( idleFor < TIMEOUT ) {
                 continue;
             }
             log() << "killing old cursor " << i->second->getId() << " idle for: " << idleFor << "ms" << endl; // TODO: make log(1)
@@ -258,18 +276,19 @@ namespace mongo {
     }
 
     CursorCache cursorCache;
-    
-    class CursorTimeoutThread : public PeriodicBackgroundJob {
+
+    int CursorCache::_myLogLevel = 3;
+
+    class CursorTimeoutTask : public task::Task {
     public:
-        CursorTimeoutThread() : PeriodicBackgroundJob( 4000 ){}
-        virtual string name() { return "cursorTimeout"; }
-        virtual void runLoop(){
+        virtual string name() const { return "cursorTimeout"; }
+        virtual void doWork() {
             cursorCache.doTimeouts();
         }
-    } cursorTimeoutThread;
+    } cursorTimeoutTask;
 
-    void CursorCache::startTimeoutThread(){
-        cursorTimeoutThread.go();
+    void CursorCache::startTimeoutThread() {
+        task::repeat( &cursorTimeoutTask , 400 );
     }
 
     class CmdCursorInfo : public Command {
@@ -280,7 +299,7 @@ namespace mongo {
             help << " example: { cursorInfo : 1 }";
         }
         virtual LockType locktype() const { return NONE; }
-        bool run(const string&, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string&, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             cursorCache.appendInfo( result );
             if ( jsobj["setTimeout"].isNumber() )
                 CursorCache::TIMEOUT = jsobj["setTimeout"].numberLong();
diff --git a/s/cursors.h b/s/cursors.h
index 53c5b64..7b54af6 100644
--- a/s/cursors.h
+++ b/s/cursors.h
@@ -16,7 +16,7 @@
  */
 
 
-#pragma once 
+#pragma once
 
 #include "../pch.h"
 
@@ -35,21 +35,21 @@ namespace mongo {
         virtual ~ShardedClientCursor();
 
         long long getId();
-        
+
         /**
          * @return whether there is more data left
          */
-        bool sendNextBatch( Request& r ){ return sendNextBatch( r , _ntoreturn ); }
+        bool sendNextBatch( Request& r ) { return sendNextBatch( r , _ntoreturn ); }
         bool sendNextBatch( Request& r , int ntoreturn );
-        
+
         void accessed();
         /** @return idle time in ms */
         long long idleTime( long long now );
 
     protected:
-        
+
         ClusteredCursor * _cursor;
-        
+
         int _skip;
         int _ntoreturn;
 
@@ -62,10 +62,10 @@ namespace mongo {
     };
 
     typedef boost::shared_ptr<ShardedClientCursor> ShardedClientCursorPtr;
-    
+
     class CursorCache {
     public:
-        
+
         static long long TIMEOUT;
 
         typedef map<long long,ShardedClientCursorPtr> MapSharded;
@@ -73,29 +73,34 @@ namespace mongo {
 
         CursorCache();
         ~CursorCache();
-        
-        ShardedClientCursorPtr get( long long id );
+
+        ShardedClientCursorPtr get( long long id ) const;
         void store( ShardedClientCursorPtr cursor );
         void remove( long long id );
 
         void storeRef( const string& server , long long id );
 
-        void gotKillCursors(Message& m );
-        
-        void appendInfo( BSONObjBuilder& result );
+        /** @return the server for id or "" */
+        string getRef( long long id ) const ;
         
+        void gotKillCursors(Message& m );
+
+        void appendInfo( BSONObjBuilder& result ) const ;
+
         long long genId();
 
         void doTimeouts();
         void startTimeoutThread();
     private:
-        mutex _mutex;
+        mutable mongo::mutex _mutex;
 
         MapSharded _cursors;
         MapNormal _refs;
-        
+
         long long _shardedTotal;
+
+        static int _myLogLevel;
     };
-    
+
     extern CursorCache cursorCache;
 }
diff --git a/s/d_chunk_manager.cpp b/s/d_chunk_manager.cpp
new file mode 100644
index 0000000..d4fea30
--- /dev/null
+++ b/s/d_chunk_manager.cpp
@@ -0,0 +1,328 @@
+// @file d_chunk_manager.cpp
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+
+#include "../client/connpool.h"
+#include "../client/dbclientmockcursor.h"
+#include "../db/instance.h"
+
+#include "d_chunk_manager.h"
+
+namespace mongo {
+
+    ShardChunkManager::ShardChunkManager( const string& configServer , const string& ns , const string& shardName ) {
+
+        // have to get a connection to the config db
+        // special case if i'm the configdb since i'm locked and if i connect to myself
+        // its a deadlock
+        scoped_ptr<ScopedDbConnection> scoped;
+        scoped_ptr<DBDirectClient> direct;
+        DBClientBase * conn;
+        if ( configServer.empty() ) {
+            direct.reset( new DBDirectClient() );
+            conn = direct.get();
+        }
+        else {
+            scoped.reset( new ScopedDbConnection( configServer ) );
+            conn = scoped->get();
+        }
+
+        // get this collection's sharding key
+        BSONObj collectionDoc = conn->findOne( "config.collections", BSON( "_id" << ns ) );
+        uassert( 13539 , str::stream() << ns << " does not exist" , !collectionDoc.isEmpty() );
+        uassert( 13540 , str::stream() << ns << " collection config entry corrupted" , collectionDoc["dropped"].type() );
+        uassert( 13541 , str::stream() << ns << " dropped. Re-shard collection first." , !collectionDoc["dropped"].Bool() );
+        _fillCollectionKey( collectionDoc );
+
+        // query for all the chunks for 'ns' that live in this shard, sorting so we can efficiently bucket them
+        BSONObj q = BSON( "ns" << ns << "shard" << shardName );
+        auto_ptr<DBClientCursor> cursor = conn->query( "config.chunks" , Query(q).sort( "min" ) );
+        _fillChunks( cursor.get() );
+        _fillRanges();
+
+        if ( scoped.get() )
+            scoped->done();
+
+        if ( _chunksMap.empty() )
+            log() << "no chunk for collection " << ns << " on shard " << shardName << endl;
+    }
+
+    ShardChunkManager::ShardChunkManager( const BSONObj& collectionDoc , const BSONArray& chunksArr ) {
+        _fillCollectionKey( collectionDoc );
+
+        scoped_ptr<DBClientMockCursor> c ( new DBClientMockCursor( chunksArr ) );
+        _fillChunks( c.get() );
+        _fillRanges();
+    }
+
+    void ShardChunkManager::_fillCollectionKey( const BSONObj& collectionDoc ) {
+        BSONElement e = collectionDoc["key"];
+        uassert( 13542 , str::stream() << "collection doesn't have a key: " << collectionDoc , ! e.eoo() && e.isABSONObj() );
+
+        BSONObj keys = e.Obj().getOwned();
+        BSONObjBuilder b;
+        BSONForEach( key , keys ) {
+            b.append( key.fieldName() , 1 );
+        }
+        _key = b.obj();
+    }
+
+    void ShardChunkManager::_fillChunks( DBClientCursorInterface* cursor ) {
+        assert( cursor );
+
+        ShardChunkVersion version;
+        while ( cursor->more() ) {
+            BSONObj d = cursor->next();
+            _chunksMap.insert( make_pair( d["min"].Obj().getOwned() , d["max"].Obj().getOwned() ) );
+
+            ShardChunkVersion currVersion( d["lastmod"] );
+            if ( currVersion > version ) {
+                version = currVersion;
+            }
+        }
+        _version = version;
+    }
+
+    void ShardChunkManager::_fillRanges() {
+        if ( _chunksMap.empty() )
+            return;
+
+        // load the chunk information, coallesceing their ranges
+        // the version for this shard would be the highest version for any of the chunks
+        RangeMap::const_iterator it = _chunksMap.begin();
+        BSONObj min,max;
+        while ( it != _chunksMap.end() ) {
+            BSONObj currMin = it->first;
+            BSONObj currMax = it->second;
+            ++it;
+
+            // coallesce the chunk's bounds in ranges if they are adjacent chunks
+            if ( min.isEmpty() ) {
+                min = currMin;
+                max = currMax;
+                continue;
+            }
+            if ( max == currMin ) {
+                max = currMax;
+                continue;
+            }
+
+            _rangesMap.insert( make_pair( min , max ) );
+
+            min = currMin;
+            max = currMax;
+        }
+        assert( ! min.isEmpty() );
+
+        _rangesMap.insert( make_pair( min , max ) );
+    }
+
+    static bool contains( const BSONObj& min , const BSONObj& max , const BSONObj& point ) {
+        return point.woCompare( min ) >= 0 && point.woCompare( max ) < 0;
+    }
+
+    bool ShardChunkManager::belongsToMe( const BSONObj& obj ) const {
+        if ( _rangesMap.size() == 0 )
+            return false;
+
+        BSONObj x = obj.extractFields(_key);
+
+        RangeMap::const_iterator it = _rangesMap.upper_bound( x );
+        if ( it != _rangesMap.begin() )
+            it--;
+
+        bool good = contains( it->first , it->second , x );
+
+#if 0
+        if ( ! good ) {
+            log() << "bad: " << x << " " << it->first << " " << x.woCompare( it->first ) << " " << x.woCompare( it->second ) << endl;
+            for ( RangeMap::const_iterator i=_rangesMap.begin(); i!=_rangesMap.end(); ++i ) {
+                log() << "\t" << i->first << "\t" << i->second << "\t" << endl;
+            }
+        }
+#endif
+
+        return good;
+    }
+
+    bool ShardChunkManager::getNextChunk( const BSONObj& lookupKey, BSONObj* foundMin , BSONObj* foundMax ) const {
+        assert( foundMin );
+        assert( foundMax );
+        *foundMin = BSONObj();
+        *foundMax = BSONObj();
+
+        if ( _chunksMap.empty() ) {
+            return true;
+        }
+
+        RangeMap::const_iterator it;
+        if ( lookupKey.isEmpty() ) {
+            it = _chunksMap.begin();
+            *foundMin = it->first;
+            *foundMax = it->second;
+            return _chunksMap.size() == 1;
+        }
+
+        it = _chunksMap.upper_bound( lookupKey );
+        if ( it != _chunksMap.end() ) {
+            *foundMin = it->first;
+            *foundMax = it->second;
+            return false;
+        }
+
+        return true;
+    }
+
+    void ShardChunkManager::_assertChunkExists( const BSONObj& min , const BSONObj& max ) const {
+        RangeMap::const_iterator it = _chunksMap.find( min );
+        if ( it == _chunksMap.end() ) {
+            uasserted( 13586 , str::stream() << "couldn't find chunk " << min << "->" << max );
+        }
+
+        if ( it->second.woCompare( max ) != 0 ) {
+            ostringstream os;
+            os << "ranges differ, "
+               << "requested: "  << min << " -> " << max << " "
+               << "existing: " << (it == _chunksMap.end()) ? "<empty>" : it->first.toString() + " -> " + it->second.toString();
+            uasserted( 13587 , os.str() );
+        }
+    }
+
+    ShardChunkManager* ShardChunkManager::cloneMinus( const BSONObj& min, const BSONObj& max, const ShardChunkVersion& version ) {
+
+        // check that we have the exact chunk that'll be subtracted
+        _assertChunkExists( min , max );
+
+        auto_ptr<ShardChunkManager> p( new ShardChunkManager );
+        p->_key = this->_key;
+
+        if ( _chunksMap.size() == 1 ) {
+            // if left with no chunks, just reset version
+            uassert( 13590 , str::stream() << "setting version to " << version << " on removing last chunk", version == 0 );
+
+            p->_version = 0;
+
+        }
+        else {
+            // can't move version backwards when subtracting chunks
+            // this is what guarantees that no read or write would be taken once we subtract data from the current shard
+            if ( version <= _version ) {
+                uasserted( 13585 , str::stream() << "version " << version.toString() << " not greater than " << _version.toString() );
+            }
+
+            p->_chunksMap = this->_chunksMap;
+            p->_chunksMap.erase( min );
+            p->_version = version;
+            p->_fillRanges();
+        }
+
+        return p.release();
+    }
+
+    static bool overlap( const BSONObj& l1 , const BSONObj& h1 , const BSONObj& l2 , const BSONObj& h2 ) {
+        return ! ( ( h1.woCompare( l2 ) <= 0 ) || ( h2.woCompare( l1 ) <= 0 ) );
+    }
+
+    ShardChunkManager* ShardChunkManager::clonePlus( const BSONObj& min , const BSONObj& max , const ShardChunkVersion& version ) {
+
+        // it is acceptable to move version backwards (e.g., undoing a migration that went bad during commit)
+        // but only cloning away the last chunk may reset the version to 0
+        uassert( 13591 , "version can't be set to zero" , version > 0 );
+
+        if ( ! _chunksMap.empty() ) {
+
+            // check that there isn't any chunk on the interval to be added
+            RangeMap::const_iterator it = _chunksMap.lower_bound( max );
+            if ( it != _chunksMap.begin() ) {
+                --it;
+            }
+            if ( overlap( min , max , it->first , it->second ) ) {
+                ostringstream os;
+                os << "ranges overlap, "
+                   << "requested: " << min << " -> " << max << " "
+                   << "existing: " << it->first.toString() + " -> " + it->second.toString();
+                uasserted( 13588 , os.str() );
+            }
+        }
+
+        auto_ptr<ShardChunkManager> p( new ShardChunkManager );
+
+        p->_key = this->_key;
+        p->_chunksMap = this->_chunksMap;
+        p->_chunksMap.insert( make_pair( min.getOwned() , max.getOwned() ) );
+        p->_version = version;
+        p->_fillRanges();
+
+        return p.release();
+    }
+
+    ShardChunkManager* ShardChunkManager::cloneSplit( const BSONObj& min , const BSONObj& max , const vector<BSONObj>& splitKeys ,
+            const ShardChunkVersion& version ) {
+
+        // the version required in both resulting chunks could be simply an increment in the minor portion of the current version
+        // however, we are enforcing uniqueness over the attributes <ns, lastmod> of the configdb collection 'chunks'
+        // so in practice, a migrate somewhere may force this split to pick up a version that has the major portion higher
+        // than the one that this shard has been using
+        //
+        // TODO drop the uniqueness constraint and tigthen the check below so that only the minor portion of version changes
+        if ( version <= _version ) {
+            uasserted( 13592 , str::stream() << "version " << version.toString() << " not greater than " << _version.toString() );
+        }
+
+        // check that we have the exact chunk that'll be split and that the split point is valid
+        _assertChunkExists( min , max );
+        for ( vector<BSONObj>::const_iterator it = splitKeys.begin() ; it != splitKeys.end() ; ++it ) {
+            if ( ! contains( min , max , *it ) ) {
+                uasserted( 13593 , str::stream() << "can split " << min << " -> " << max << " on " << *it );
+            }
+        }
+
+        auto_ptr<ShardChunkManager> p( new ShardChunkManager );
+
+        p->_key = this->_key;
+        p->_chunksMap = this->_chunksMap;
+        p->_version = version; // will increment second, third, ... chunks below
+
+        BSONObj startKey = min;
+        for ( vector<BSONObj>::const_iterator it = splitKeys.begin() ; it != splitKeys.end() ; ++it ) {
+            BSONObj split = *it;
+            p->_chunksMap[min] = split.getOwned();
+            p->_chunksMap.insert( make_pair( split.getOwned() , max.getOwned() ) );
+            p->_version.incMinor();
+            startKey = split;
+        }
+        p->_fillRanges();
+
+        return p.release();
+    }
+
+    string ShardChunkManager::toString() const {
+        StringBuilder ss;
+        ss << " ShardChunkManager version: " << _version << " key: " << _key;
+        bool first = true;
+        for ( RangeMap::const_iterator i=_rangesMap.begin(); i!=_rangesMap.end(); ++i ) {
+            if ( first ) first = false;
+            else ss << " , ";
+
+            ss << i->first << " -> " << i->second;
+        }
+        return ss.str();
+    }
+    
+}  // namespace mongo
diff --git a/s/d_chunk_manager.h b/s/d_chunk_manager.h
new file mode 100644
index 0000000..9fb95e7
--- /dev/null
+++ b/s/d_chunk_manager.h
@@ -0,0 +1,150 @@
+// @file d_chunk_manager.h
+
+/**
+*    Copyright (C) 2008 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../pch.h"
+
+#include "../db/jsobj.h"
+#include "util.h"
+
+namespace mongo {
+
+    /**
+     * Controls the boundaries of all the chunks for a given collection that live in this shard.
+     *
+     * ShardChunkManager instances never change after construction. There are methods provided that would generate a
+     * new manager if new chunks are added, subtracted, or split.
+     *
+     * TODO
+     *   The responsibility of maintaining the version for a shard is still shared between this class and its caller. The
+     *   manager does check corner cases (e.g. cloning out the last chunk generates a manager with version 0) but ultimately
+     *   still cannot be responsible to set all versions. Currently, they are a function of the global state as opposed to
+     *   the per-shard one.
+     */
+    class ShardChunkManager : public boost::noncopyable {
+    public:
+
+        /**
+         * Loads the ShardChunkManager with all boundaries for chunks of a given collection that live in an given
+         * shard.
+         *
+         * @param configServer name of the server where the configDB currently is. Can be empty to indicate
+         *        that the configDB is running locally
+         * @param ns namespace for the collections whose chunks we're interested
+         * @param shardName name of the shard that this chunk matcher should track
+         *
+         * This constructor throws if collection is dropped/malformed and on connectivity errors
+         */
+        ShardChunkManager( const string& configServer , const string& ns , const string& shardName );
+
+        /**
+         * Same as the regular constructor but used in unittest (no access to configDB required).
+         *
+         * @param collectionDoc simulates config.collection's entry for one colleciton
+         * @param chunksDocs simulates config.chunks' entries for one collection's shard
+         */
+        ShardChunkManager( const BSONObj& collectionDoc , const BSONArray& chunksDoc );
+
+        ~ShardChunkManager() {}
+
+        /**
+         * Generates a new manager based on 'this's state minus a given chunk.
+         *
+         * @param min max chunk boundaries for the chunk to subtract
+         * @param version that the resulting manager should be at. The version has to be higher than the current one.
+         *        When cloning away the last chunk, verstion must be 0.
+         * @return a new ShardChunkManager, to be owned by the caller
+         */
+        ShardChunkManager* cloneMinus( const BSONObj& min , const BSONObj& max , const ShardChunkVersion& version );
+
+        /**
+         * Generates a new manager based on 'this's state plus a given chunk.
+         *
+         * @param min max chunk boundaries for the chunk to add
+         * @param version that the resulting manager should be at. It can never be 0, though (see CloneMinus).
+         * @return a new ShardChunkManager, to be owned by the caller
+         */
+        ShardChunkManager* clonePlus( const BSONObj& min , const BSONObj& max , const ShardChunkVersion& version );
+
+        /**
+         * Generates a new manager by splitting an existing chunk at one or more points.
+         *
+         * @param min max boundaries of chunk to be split
+         * @param splitKeys points to split original chunk at
+         * @param version to be used in first chunk. The subsequent chunks would increment the minor version.
+         * @return a new ShardChunkManager with the chunk split, to be owned by the caller
+         */
+        ShardChunkManager* cloneSplit( const BSONObj& min , const BSONObj& max , const vector<BSONObj>& splitKeys ,
+                                       const ShardChunkVersion& version );
+
+        /**
+         * Checks whether a document belongs to this shard.
+         *
+         * @param obj document containing sharding keys (and, optionally, other attributes)
+         * @return true if shards hold the object
+         */
+        bool belongsToMe( const BSONObj& obj ) const;
+
+        /**
+         * Given a chunk's min key (or empty doc), gets the boundary of the chunk following that one (the first).
+         *
+         * @param lookupKey is the min key for a previously obtained chunk or the empty document
+         * @param foundMin IN/OUT min for chunk following the one starting at lookupKey
+         * @param foundMax IN/OUT max for the above chunk
+         * @return true if the chunk returned is the last one
+         */
+        bool getNextChunk( const BSONObj& lookupKey, BSONObj* foundMin , BSONObj* foundMax ) const;
+
+        // accessors
+
+        ShardChunkVersion getVersion() const { return _version; }
+        BSONObj getKey() const { return _key.getOwned(); }
+        unsigned getNumChunks() const { return _chunksMap.size(); }
+
+        string toString() const;
+    private:
+        // highest ShardChunkVersion for which this ShardChunkManager's information is accurate
+        ShardChunkVersion _version;
+
+        // key pattern for chunks under this range
+        BSONObj _key;
+
+        // a map from a min key into the chunk's (or range's) max boundary
+        typedef map< BSONObj, BSONObj , BSONObjCmp > RangeMap;
+        RangeMap _chunksMap;
+
+        // a map from a min key into a range or continguous chunks
+        // redundant but we expect high chunk continguity, expecially in small installations
+        RangeMap _rangesMap;
+
+        /** constructors helpers */
+        void _fillCollectionKey( const BSONObj& collectionDoc );
+        void _fillChunks( DBClientCursorInterface* cursor );
+        void _fillRanges();
+
+        /** throws if the exact chunk is not in the chunks' map */
+        void _assertChunkExists( const BSONObj& min , const BSONObj& max ) const;
+
+        /** can only be used in the cloning calls */
+        ShardChunkManager() {}
+    };
+
+    typedef shared_ptr<ShardChunkManager> ShardChunkManagerPtr;
+
+}  // namespace mongo
diff --git a/s/d_logic.cpp b/s/d_logic.cpp
index 62288ed..c032883 100644
--- a/s/d_logic.cpp
+++ b/s/d_logic.cpp
@@ -1,4 +1,4 @@
-// d_logic.cpp
+// @file d_logic.cpp
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -37,32 +37,32 @@
 
 #include "shard.h"
 #include "d_logic.h"
+#include "d_writeback.h"
 
 using namespace std;
 
 namespace mongo {
 
-    bool handlePossibleShardedMessage( Message &m, DbResponse* dbresponse ){
-        if ( ! shardingState.enabled() )
-            return false;
+    bool _handlePossibleShardedMessage( Message &m, DbResponse* dbresponse ) {
+        DEV assert( shardingState.enabled() );
 
         int op = m.operation();
-        if ( op < 2000 
-             || op >= 3000 
-             || op == dbGetMore  // cursors are weird
-             )
+        if ( op < 2000
+                || op >= 3000
+                || op == dbGetMore  // cursors are weird
+           )
             return false;
-        
-        DbMessage d(m);        
+
+        DbMessage d(m);
         const char *ns = d.getns();
         string errmsg;
-        if ( shardVersionOk( ns , opIsWrite( op ) , errmsg ) ){
+        if ( shardVersionOk( ns , opIsWrite( op ) , errmsg ) ) {
             return false;
         }
 
         log(1) << "connection meta data too old - will retry ns:(" << ns << ") op:(" << opToString(op) << ") " << errmsg << endl;
-        
-        if ( doesOpGetAResponse( op ) ){
+
+        if ( doesOpGetAResponse( op ) ) {
             assert( dbresponse );
             BufBuilder b( 32768 );
             b.skip( sizeof( QueryResult ) );
@@ -70,7 +70,7 @@ namespace mongo {
                 BSONObj obj = BSON( "$err" << errmsg );
                 b.appendBuf( obj.objdata() , obj.objsize() );
             }
-            
+
             QueryResult *qr = (QueryResult*)b.buf();
             qr->_resultFlags() = ResultFlag_ErrSet | ResultFlag_ShardConfigStale;
             qr->len = b.len();
@@ -82,19 +82,19 @@ namespace mongo {
 
             Message * resp = new Message();
             resp->setData( qr , true );
-            
+
             dbresponse->response = resp;
             dbresponse->responseTo = m.header()->id;
             return true;
         }
-        
+
         OID writebackID;
         writebackID.init();
         lastError.getSafe()->writeback( writebackID );
 
         const OID& clientID = ShardedConnectionInfo::get(false)->getID();
         massert( 10422 ,  "write with bad shard config and no server id!" , clientID.isSet() );
-        
+
         log(1) << "got write with an old config - writing back ns: " << ns << endl;
         if ( logLevel ) log(1) << debugString( m ) << endl;
 
@@ -102,11 +102,12 @@ namespace mongo {
         b.appendBool( "writeBack" , true );
         b.append( "ns" , ns );
         b.append( "id" , writebackID );
+        b.append( "connectionId" , cc().getConnectionId() );
         b.appendTimestamp( "version" , shardingState.getVersion( ns ) );
         b.appendTimestamp( "yourVersion" , ShardedConnectionInfo::get( true )->getVersion( ns ) );
         b.appendBinData( "msg" , m.header()->len , bdtCustom , (char*)(m.singleData()) );
         log(2) << "writing back msg with len: " << m.header()->len << " op: " << m.operation() << endl;
-        queueWriteBack( clientID.str() , b.obj() );
+        writeBackManager.queueWriteBack( clientID.str() , b.obj() );
 
         return true;
     }
diff --git a/s/d_logic.h b/s/d_logic.h
index a000f6b..718836c 100644
--- a/s/d_logic.h
+++ b/s/d_logic.h
@@ -1,4 +1,4 @@
-// d_logic.h
+// @file d_logic.h
 /*
  *    Copyright (C) 2010 10gen Inc.
  *
@@ -19,38 +19,20 @@
 #pragma once
 
 #include "../pch.h"
+
 #include "../db/jsobj.h"
+
+#include "d_chunk_manager.h"
 #include "util.h"
 
 namespace mongo {
-    
-    class ShardingState;
-    
-    typedef ShardChunkVersion ConfigVersion;
-    typedef map<string,ConfigVersion> NSVersionMap;
-
-    // -----------
 
-    class ChunkMatcher {
-        typedef map<BSONObj,pair<BSONObj,BSONObj>,BSONObjCmp> MyMap;
-    public:
-        
-        bool belongsToMe( const BSONObj& key , const DiskLoc& loc ) const;
+    class Database;
+    class DiskLoc;
 
-    private:
-        ChunkMatcher( ConfigVersion version );
-        
-        void gotRange( const BSONObj& min , const BSONObj& max );
-        
-        ConfigVersion _version;
-        BSONObj _key;
-        MyMap _map;
-        
-        friend class ShardingState;
-    };
+    typedef ShardChunkVersion ConfigVersion;
+    typedef map<string,ConfigVersion> NSVersionMap;
 
-    typedef shared_ptr<ChunkMatcher> ChunkMatcherPtr;
-    
     // --------------
     // --- global state ---
     // --------------
@@ -58,100 +40,182 @@ namespace mongo {
     class ShardingState {
     public:
         ShardingState();
-        
+
         bool enabled() const { return _enabled; }
         const string& getConfigServer() const { return _configServer; }
         void enable( const string& server );
 
         void gotShardName( const string& name );
-        void gotShardHost( const string& host );
-        
+        void gotShardHost( string host );
+
+        /** Reverts back to a state where this mongod is not sharded. */
+        void resetShardingState(); 
+
+        // versioning support
+
         bool hasVersion( const string& ns );
         bool hasVersion( const string& ns , ConfigVersion& version );
-        ConfigVersion& getVersion( const string& ns ); // TODO: this is dangeroues
-        void setVersion( const string& ns , const ConfigVersion& version );
-        
+        const ConfigVersion getVersion( const string& ns ) const;
+
+        /**
+         * Uninstalls the manager for a given collection. This should be used when the collection is dropped.
+         *
+         * NOTE:
+         *   An existing collection with no chunks on this shard will have a manager on version 0, which is different than a
+         *   a dropped collection, which will not have a manager.
+         *
+         * TODO
+         *   When sharding state is enabled, absolutely all collections should have a manager. (The non-sharded ones are
+         *   a be degenerate case of one-chunk collections).
+         *   For now, a dropped collection and an non-sharded one are indistinguishable (SERVER-1849)
+         *
+         * @param ns the collection to be dropped
+         */
+        void resetVersion( const string& ns );
+
+        /**
+         * Requests to access a collection at a certain version. If the collection's manager is not at that version it
+         * will try to update itself to the newest version. The request is only granted if the version is the current or
+         * the newest one.
+         *
+         * @param ns collection to be accessed
+         * @param version (IN) the client belive this collection is on and (OUT) the version the manager is actually in
+         * @return true if the access can be allowed at the provided version
+         */
+        bool trySetVersion( const string& ns , ConfigVersion& version );
+
         void appendInfo( BSONObjBuilder& b );
-        
-        ChunkMatcherPtr getChunkMatcher( const string& ns );
-        
+
+        // querying support
+
+        bool needShardChunkManager( const string& ns ) const;
+        ShardChunkManagerPtr getShardChunkManager( const string& ns );
+
+        // chunk migrate and split support
+
+        /**
+         * Creates and installs a new chunk manager for a given collection by "forgetting" about one of its chunks.
+         * The new manager uses the provided version, which has to be higher than the current manager's.
+         * One exception: if the forgotten chunk is the last one in this shard for the collection, version has to be 0.
+         *
+         * If it runs successfully, clients need to grab the new version to access the collection.
+         *
+         * @param ns the collection
+         * @param min max the chunk to eliminate from the current manager
+         * @param version at which the new manager should be at
+         */
+        void donateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ShardChunkVersion version );
+
+        /**
+         * Creates and installs a new chunk manager for a given collection by reclaiming a previously donated chunk.
+         * The previous manager's version has to be provided.
+         *
+         * If it runs successfully, clients that became stale by the previous donateChunk will be able to access the
+         * collection again.
+         *
+         * @param ns the collection
+         * @param min max the chunk to reclaim and add to the current manager
+         * @param version at which the new manager should be at
+         */
+        void undoDonateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ShardChunkVersion version );
+
+        /**
+         * Creates and installs a new chunk manager for a given collection by splitting one of its chunks in two or more.
+         * The version for the first split chunk should be provided. The subsequent chunks' version would be the latter with the
+         * minor portion incremented.
+         *
+         * The effect on clients will depend on the version used. If the major portion is the same as the current shards,
+         * clients shouldn't perceive the split.
+         *
+         * @param ns the collection
+         * @param min max the chunk that should be split
+         * @param splitKeys point in which to split
+         * @param version at which the new manager should be at
+         */
+        void splitChunk( const string& ns , const BSONObj& min , const BSONObj& max , const vector<BSONObj>& splitKeys ,
+                         ShardChunkVersion version );
+
         bool inCriticalMigrateSection();
+
     private:
-        
         bool _enabled;
-        
+
         string _configServer;
-        
+
         string _shardName;
         string _shardHost;
 
-        mongo::mutex _mutex;
-        NSVersionMap _versions;
-        map<string,ChunkMatcherPtr> _chunks;
+        // protects state below
+        mutable mongo::mutex _mutex;
+
+        // map from a namespace into the ensemble of chunk ranges that are stored in this mongod
+        // a ShardChunkManager carries all state we need for a collection at this shard, including its version information
+        typedef map<string,ShardChunkManagerPtr> ChunkManagersMap;
+        ChunkManagersMap _chunks;
     };
-    
+
     extern ShardingState shardingState;
 
-    // --------------
-    // --- per connection ---
-    // --------------
-    
+    /**
+     * one per connection from mongos
+     * holds version state for each namesapce
+     */
     class ShardedConnectionInfo {
     public:
         ShardedConnectionInfo();
-        
+
         const OID& getID() const { return _id; }
         bool hasID() const { return _id.isSet(); }
         void setID( const OID& id );
-        
-        ConfigVersion& getVersion( const string& ns ); // TODO: this is dangeroues
+
+        const ConfigVersion getVersion( const string& ns ) const;
         void setVersion( const string& ns , const ConfigVersion& version );
-        
+
         static ShardedConnectionInfo* get( bool create );
         static void reset();
-        
-        bool inForceMode() const { 
-            return _forceMode;
+
+        bool inForceVersionOkMode() const {
+            return _forceVersionOk;
         }
-        
-        void enterForceMode(){ _forceMode = true; }
-        void leaveForceMode(){ _forceMode = false; }
+
+        void enterForceVersionOkMode() { _forceVersionOk = true; }
+        void leaveForceVersionOkMode() { _forceVersionOk = false; }
 
     private:
-        
+
         OID _id;
         NSVersionMap _versions;
-        bool _forceMode;
+        bool _forceVersionOk; // if this is true, then chunk version #s aren't check, and all ops are allowed
 
         static boost::thread_specific_ptr<ShardedConnectionInfo> _tl;
     };
 
-    struct ShardForceModeBlock {
-        ShardForceModeBlock(){
+    struct ShardForceVersionOkModeBlock {
+        ShardForceVersionOkModeBlock() {
             info = ShardedConnectionInfo::get( false );
             if ( info )
-                info->enterForceMode();
+                info->enterForceVersionOkMode();
         }
-        ~ShardForceModeBlock(){
+        ~ShardForceVersionOkModeBlock() {
             if ( info )
-                info->leaveForceMode();
+                info->leaveForceVersionOkMode();
         }
 
         ShardedConnectionInfo * info;
     };
-    
+
     // -----------------
     // --- core ---
     // -----------------
 
     unsigned long long extractVersion( BSONElement e , string& errmsg );
 
-    
+
     /**
      * @return true if we have any shard info for the ns
      */
     bool haveLocalShardingInfo( const string& ns );
-    
+
     /**
      * @return true if the current threads shard version is ok, or not in sharded version
      */
@@ -160,15 +224,18 @@ namespace mongo {
     /**
      * @return true if we took care of the message and nothing else should be done
      */
-    bool handlePossibleShardedMessage( Message &m, DbResponse * dbresponse );
+    struct DbResponse;
 
-    void logOpForSharding( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt );
+    bool _handlePossibleShardedMessage( Message &m, DbResponse * dbresponse );
 
-    // -----------------
-    // --- writeback ---
-    // -----------------
+    /** What does this do? document please? */
+    inline bool handlePossibleShardedMessage( Message &m, DbResponse * dbresponse ) {
+        if( !shardingState.enabled() ) 
+            return false;
+        return _handlePossibleShardedMessage(m, dbresponse);
+    }
 
-    /* queue a write back on a remote server for a failed write */
-    void queueWriteBack( const string& remote , const BSONObj& o );
+    void logOpForSharding( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt );
+    void aboutToDeleteForSharding( const Database* db , const DiskLoc& dl );
 
 }
diff --git a/s/d_migrate.cpp b/s/d_migrate.cpp
index 8e9584c..2878276 100644
--- a/s/d_migrate.cpp
+++ b/s/d_migrate.cpp
@@ -25,18 +25,24 @@
 #include "pch.h"
 #include <map>
 #include <string>
+#include <algorithm>
 
 #include "../db/commands.h"
 #include "../db/jsobj.h"
 #include "../db/dbmessage.h"
 #include "../db/query.h"
 #include "../db/cmdline.h"
+#include "../db/queryoptimizer.h"
+#include "../db/btree.h"
+#include "../db/repl_block.h"
+#include "../db/dur.h"
 
 #include "../client/connpool.h"
 #include "../client/distlock.h"
 
 #include "../util/queue.h"
 #include "../util/unittest.h"
+#include "../util/processinfo.h"
 
 #include "shard.h"
 #include "d_logic.h"
@@ -49,131 +55,185 @@ namespace mongo {
 
     class MoveTimingHelper {
     public:
-        MoveTimingHelper( const string& where , const string& ns , BSONObj min , BSONObj max )
-            : _where( where ) , _ns( ns ){
-            _next = 1;
+        MoveTimingHelper( const string& where , const string& ns , BSONObj min , BSONObj max , int total )
+            : _where( where ) , _ns( ns ) , _next( 0 ) , _total( total ) {
+            _nextNote = 0;
             _b.append( "min" , min );
             _b.append( "max" , max );
         }
 
-        ~MoveTimingHelper(){
-            configServer.logChange( (string)"moveChunk." + _where , _ns, _b.obj() );
+        ~MoveTimingHelper() {
+            // even if logChange doesn't throw, bson does
+            // sigh
+            try {
+                if ( _next != _total ) {
+                    note( "aborted" );
+                }
+                configServer.logChange( (string)"moveChunk." + _where , _ns, _b.obj() );
+            }
+            catch ( const std::exception& e ) {
+                log( LL_WARNING ) << "couldn't record timing for moveChunk '" << _where << "': " << e.what() << endl;
+            }
         }
-        
-        void done( int step ){
-            assert( step == _next++ );
-            
+
+        void done( int step ) {
+            assert( step == ++_next );
+            assert( step <= _total );
+
             stringstream ss;
             ss << "step" << step;
             string s = ss.str();
-            
+
             CurOp * op = cc().curop();
             if ( op )
                 op->setMessage( s.c_str() );
-            else 
+            else
                 log( LL_WARNING ) << "op is null in MoveTimingHelper::done" << endl;
-            
+
             _b.appendNumber( s , _t.millis() );
             _t.reset();
+
+#if 0
+            // debugging for memory leak?
+            ProcessInfo pi;
+            ss << " v:" << pi.getVirtualMemorySize()
+               << " r:" << pi.getResidentSize();
+            log() << ss.str() << endl;
+#endif
         }
-        
-        
+
+
+        void note( const string& s ) {
+            string field = "note";
+            if ( _nextNote > 0 ) {
+                StringBuilder buf;
+                buf << "note" << _nextNote;
+                field = buf.str();
+            }
+            _nextNote++;
+
+            _b.append( field , s );
+        }
+
     private:
         Timer _t;
 
         string _where;
         string _ns;
-        
+
         int _next;
-        
+        int _total; // expected # of steps
+        int _nextNote;
+
         BSONObjBuilder _b;
+
     };
 
     struct OldDataCleanup {
+        static AtomicUInt _numThreads; // how many threads are doing async cleanusp
+
         string ns;
         BSONObj min;
         BSONObj max;
         set<CursorId> initial;
-        void doRemove(){
-            ShardForceModeBlock sf;
+
+        OldDataCleanup(){
+            _numThreads++;
+        }
+        OldDataCleanup( const OldDataCleanup& other ) {
+            ns = other.ns;
+            min = other.min.getOwned();
+            max = other.max.getOwned();
+            initial = other.initial;
+            _numThreads++;
+        }
+        ~OldDataCleanup(){
+            _numThreads--;
+        }
+
+        void doRemove() {
+            ShardForceVersionOkModeBlock sf;
             writelock lk(ns);
             RemoveSaver rs("moveChunk",ns,"post-cleanup");
             long long num = Helpers::removeRange( ns , min , max , true , false , cmdLine.moveParanoia ? &rs : 0 );
             log() << "moveChunk deleted: " << num << endl;
         }
+
     };
 
+    AtomicUInt OldDataCleanup::_numThreads = 0;
+
     static const char * const cleanUpThreadName = "cleanupOldData";
-    
-    void _cleanupOldData( OldDataCleanup cleanup ){
+
+    void _cleanupOldData( OldDataCleanup cleanup ) {
         Client::initThread( cleanUpThreadName );
         log() << " (start) waiting to cleanup " << cleanup.ns << " from " << cleanup.min << " -> " << cleanup.max << "  # cursors:" << cleanup.initial.size() << endl;
 
         int loops = 0;
         Timer t;
-        while ( t.seconds() < 900 ){ // 15 minutes
+        while ( t.seconds() < 900 ) { // 15 minutes
             assert( dbMutex.getState() == 0 );
             sleepmillis( 20 );
-            
+
             set<CursorId> now;
-            ClientCursor::find( cleanup.ns , now );            
-            
+            ClientCursor::find( cleanup.ns , now );
+
             set<CursorId> left;
-            for ( set<CursorId>::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ){
+            for ( set<CursorId>::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ) {
                 CursorId id = *i;
                 if ( now.count(id) )
                     left.insert( id );
             }
-            
+
             if ( left.size() == 0 )
                 break;
             cleanup.initial = left;
-            
-            if ( ( loops++ % 200 ) == 0 ){
+
+            if ( ( loops++ % 200 ) == 0 ) {
                 log() << " (looping " << loops << ") waiting to cleanup " << cleanup.ns << " from " << cleanup.min << " -> " << cleanup.max << "  # cursors:" << cleanup.initial.size() << endl;
-                
+
                 stringstream ss;
-                for ( set<CursorId>::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ){
+                for ( set<CursorId>::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ) {
                     CursorId id = *i;
                     ss << id << " ";
                 }
                 log() << " cursors: " << ss.str() << endl;
             }
         }
-        
+
         cleanup.doRemove();
 
         cc().shutdown();
     }
 
-    void cleanupOldData( OldDataCleanup cleanup ){
+    void cleanupOldData( OldDataCleanup cleanup ) {
         try {
             _cleanupOldData( cleanup );
         }
-        catch ( std::exception& e ){
+        catch ( std::exception& e ) {
             log() << " error cleaning old data:" << e.what() << endl;
         }
-        catch ( ... ){
+        catch ( ... ) {
             log() << " unknown error cleaning old data" << endl;
         }
     }
 
     class ChunkCommandHelper : public Command {
     public:
-        ChunkCommandHelper( const char * name ) 
-            : Command( name ){
+        ChunkCommandHelper( const char * name )
+            : Command( name ) {
         }
-        
+
         virtual void help( stringstream& help ) const {
-            help << "internal should not be calling this directly" << endl;
+            help << "internal - should not be called directly" << endl;
         }
         virtual bool slaveOk() const { return false; }
         virtual bool adminOnly() const { return true; }
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
 
     };
 
-    bool isInRange( const BSONObj& obj , const BSONObj& min , const BSONObj& max ){
+    bool isInRange( const BSONObj& obj , const BSONObj& min , const BSONObj& max ) {
         BSONObj k = obj.extractFields( min, true );
 
         return k.woCompare( min ) >= 0 && k.woCompare( max ) < 0;
@@ -182,48 +242,57 @@ namespace mongo {
 
     class MigrateFromStatus {
     public:
-        
-        MigrateFromStatus()
-            : _mutex( "MigrateFromStatus" ){
+
+        MigrateFromStatus() : _m("MigrateFromStatus") {
             _active = false;
             _inCriticalSection = false;
+            _memoryUsed = 0;
         }
 
-        void start( string ns , const BSONObj& min , const BSONObj& max ){
+        void start( string ns , const BSONObj& min , const BSONObj& max ) {
+            scoped_lock l(_m); // reads and writes _active
+
             assert( ! _active );
-            
+
             assert( ! min.isEmpty() );
             assert( ! max.isEmpty() );
             assert( ns.size() );
-            
+
             _ns = ns;
             _min = min;
             _max = max;
-            
-            _deleted.clear();
-            _reload.clear();
-            
+
+            assert( _cloneLocs.size() == 0 );
+            assert( _deleted.size() == 0 );
+            assert( _reload.size() == 0 );
+            assert( _memoryUsed == 0 );
+
             _active = true;
         }
-        
-        void done(){
-            if ( ! _active )
-                return;
-            _active = false;
-            _inCriticalSection = false;
 
-            scoped_lock lk( _mutex );
+        void done() {
+            readlock lk( _ns );
+
             _deleted.clear();
             _reload.clear();
+            _cloneLocs.clear();
+            _memoryUsed = 0;
+
+            scoped_lock l(_m);
+            _active = false;
+            _inCriticalSection = false;
         }
-        
-        void logOp( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt ){
-            if ( ! _active )
+
+        void logOp( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt ) {
+            if ( ! _getActive() )
                 return;
 
             if ( _ns != ns )
                 return;
-            
+
+            // no need to log if this is not an insertion, an update, or an actual deletion
+            // note: opstr 'db' isn't a deletion but a mention that a database exists (for replication
+            // machinery mostly)
             char op = opstr[0];
             if ( op == 'n' || op =='c' || ( op == 'd' && opstr[1] == 'b' ) )
                 return;
@@ -231,68 +300,68 @@ namespace mongo {
             BSONElement ide;
             if ( patt )
                 ide = patt->getField( "_id" );
-            else 
+            else
                 ide = obj["_id"];
-            
-            if ( ide.eoo() ){
+
+            if ( ide.eoo() ) {
                 log( LL_WARNING ) << "logOpForSharding got mod with no _id, ignoring  obj: " << obj << endl;
                 return;
             }
-            
+
             BSONObj it;
 
-            switch ( opstr[0] ){
-                
+            switch ( opstr[0] ) {
+
             case 'd': {
-                
-                if ( getThreadName() == cleanUpThreadName ){
+
+                if ( getThreadName() == cleanUpThreadName ) {
                     // we don't want to xfer things we're cleaning
                     // as then they'll be deleted on TO
                     // which is bad
                     return;
                 }
-                
+
                 // can't filter deletes :(
-                scoped_lock lk( _mutex );
                 _deleted.push_back( ide.wrap() );
+                _memoryUsed += ide.size() + 5;
                 return;
             }
-                
-            case 'i': 
+
+            case 'i':
                 it = obj;
                 break;
-                
-            case 'u': 
-                if ( ! Helpers::findById( cc() , _ns.c_str() , ide.wrap() , it ) ){
+
+            case 'u':
+                if ( ! Helpers::findById( cc() , _ns.c_str() , ide.wrap() , it ) ) {
                     log( LL_WARNING ) << "logOpForSharding couldn't find: " << ide << " even though should have" << endl;
                     return;
                 }
                 break;
-                
+
             }
-            
+
             if ( ! isInRange( it , _min , _max ) )
                 return;
-            
-            scoped_lock lk( _mutex );
+
             _reload.push_back( ide.wrap() );
+            _memoryUsed += ide.size() + 5;
         }
 
-        void xfer( list<BSONObj> * l , BSONObjBuilder& b , const char * name , long long& size , bool explode ){
+        void xfer( list<BSONObj> * l , BSONObjBuilder& b , const char * name , long long& size , bool explode ) {
             const long long maxSize = 1024 * 1024;
-            
+
             if ( l->size() == 0 || size > maxSize )
                 return;
-            
+
             BSONArrayBuilder arr(b.subarrayStart(name));
-            
-            list<BSONObj>::iterator i = l->begin(); 
-            
-            while ( i != l->end() && size < maxSize ){
+
+            list<BSONObj>::iterator i = l->begin();
+
+            while ( i != l->end() && size < maxSize ) {
                 BSONObj t = *i;
-                if ( explode ){
+                if ( explode ) {
                     BSONObj it;
-                    if ( Helpers::findById( cc() , _ns.c_str() , t, it ) ){
+                    if ( Helpers::findById( cc() , _ns.c_str() , t, it ) ) {
                         arr.append( it );
                         size += it.objsize();
                     }
@@ -303,12 +372,16 @@ namespace mongo {
                 i = l->erase( i );
                 size += t.objsize();
             }
-            
+
             arr.done();
         }
 
-        bool transferMods( string& errmsg , BSONObjBuilder& b ){
-            if ( ! _active ){
+        /**
+         * called from the dest of a migrate
+         * transfers mods from src to dest
+         */
+        bool transferMods( string& errmsg , BSONObjBuilder& b ) {
+            if ( ! _getActive() ) {
                 errmsg = "no active migration!";
                 return false;
             }
@@ -318,8 +391,7 @@ namespace mongo {
             {
                 readlock rl( _ns );
                 Client::Context cx( _ns );
-                
-                scoped_lock lk( _mutex );
+
                 xfer( &_deleted , b , "deleted" , size , false );
                 xfer( &_reload , b , "reload" , size , true );
             }
@@ -329,45 +401,201 @@ namespace mongo {
             return true;
         }
 
-        bool _inCriticalSection;
+        /**
+         * Get the disklocs that belong to the chunk migrated and sort them in _cloneLocs (to avoid seeking disk later)
+         *
+         * @param maxChunkSize number of bytes beyond which a chunk's base data (no indices) is considered too large to move
+         * @param errmsg filled with textual description of error if this call return false
+         * @return false if approximate chunk size is too big to move or true otherwise
+         */
+        bool storeCurrentLocs( long long maxChunkSize , string& errmsg , BSONObjBuilder& result ) {
+            readlock l( _ns );
+            Client::Context ctx( _ns );
+            NamespaceDetails *d = nsdetails( _ns.c_str() );
+            if ( ! d ) {
+                errmsg = "ns not found, should be impossible";
+                return false;
+            }
+
+            BSONObj keyPattern;
+            // the copies are needed because the indexDetailsForRange destroys the input
+            BSONObj min = _min.copy();
+            BSONObj max = _max.copy();
+            IndexDetails *idx = indexDetailsForRange( _ns.c_str() , errmsg , min , max , keyPattern );
+            if ( idx == NULL ) {
+                errmsg = "can't find index in storeCurrentLocs";
+                return false;
+            }
+
+            scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout ,
+                                         shared_ptr<Cursor>( new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 ) ) ,
+                                         _ns ) );
+
+            // use the average object size to estimate how many objects a full chunk would carry
+            // do that while traversing the chunk's range using the sharding index, below
+            // there's a fair amout of slack before we determine a chunk is too large because object sizes will vary
+            unsigned long long maxRecsWhenFull;
+            long long avgRecSize;
+            const long long totalRecs = d->stats.nrecords;
+            if ( totalRecs > 0 ) {
+                avgRecSize = d->stats.datasize / totalRecs;
+                maxRecsWhenFull = maxChunkSize / avgRecSize;
+                maxRecsWhenFull = 130 * maxRecsWhenFull / 100; // slack
+            }
+            else {
+                avgRecSize = 0;
+                maxRecsWhenFull = numeric_limits<long long>::max();
+            }
+
+            // do a full traversal of the chunk and don't stop even if we think it is a large chunk
+            // we want the number of records to better report, in that case
+            bool isLargeChunk = false;
+            unsigned long long recCount = 0;;
+            while ( cc->ok() ) {
+                DiskLoc dl = cc->currLoc();
+                if ( ! isLargeChunk ) {
+                    _cloneLocs.insert( dl );
+                }
+                cc->advance();
+
+                // we can afford to yield here because any change to the base data that we might miss is already being
+                // queued and will be migrated in the 'transferMods' stage
+                if ( ! cc->yieldSometimes() ) {
+                    break;
+                }
+
+                if ( ++recCount > maxRecsWhenFull ) {
+                    isLargeChunk = true;
+                }
+            }
+
+            if ( isLargeChunk ) {
+                warning() << "can't move chunk of size (aprox) " << recCount * avgRecSize
+                          << " because maximum size allowed to move is " << maxChunkSize
+                          << " ns: " << _ns << " " << _min << " -> " << _max
+                          << endl;
+                result.appendBool( "chunkTooBig" , true );
+                result.appendNumber( "chunkSize" , (long long)(recCount * avgRecSize) );
+                errmsg = "chunk too big to move";
+                return false;
+            }
+
+            log() << "moveChunk number of documents: " << _cloneLocs.size() << endl;
+            return true;
+        }
+
+        bool clone( string& errmsg , BSONObjBuilder& result ) {
+            if ( ! _getActive() ) {
+                errmsg = "not active";
+                return false;
+            }
+
+            readlock l( _ns );
+            Client::Context ctx( _ns );
+
+            NamespaceDetails *d = nsdetails( _ns.c_str() );
+            assert( d );
+
+            BSONArrayBuilder a( std::min( BSONObjMaxUserSize , (int)( ( 12 + d->averageObjectSize() )* _cloneLocs.size() ) ) );
+
+            set<DiskLoc>::iterator i = _cloneLocs.begin();
+            for ( ; i!=_cloneLocs.end(); ++i ) {
+                DiskLoc dl = *i;
+                BSONObj o = dl.obj();
+
+                // use the builder size instead of accumulating 'o's size so that we take into consideration
+                // the overhead of BSONArray indices
+                if ( a.len() + o.objsize() + 1024 > BSONObjMaxUserSize ) {
+                    break;
+                }
+                a.append( o );
+            }
+
+            result.appendArray( "objects" , a.arr() );
+            _cloneLocs.erase( _cloneLocs.begin() , i );
+            return true;
+        }
+
+        void aboutToDelete( const Database* db , const DiskLoc& dl ) {
+            dbMutex.assertWriteLocked();
+
+            if ( ! _getActive() )
+                return;
+
+            if ( ! db->ownsNS( _ns ) )
+                return;
+
+            _cloneLocs.erase( dl );
+        }
+
+        long long mbUsed() const { return _memoryUsed / ( 1024 * 1024 ); }
+
+        bool getInCriticalSection() const { scoped_lock l(_m); return _inCriticalSection; }
+        void setInCriticalSection( bool b ) { scoped_lock l(_m); _inCriticalSection = b; }
+
+        bool isActive() const { return _getActive(); }
 
     private:
-        
+        mutable mongo::mutex _m; // protect _inCriticalSection and _active
+        bool _inCriticalSection;
         bool _active;
 
         string _ns;
         BSONObj _min;
         BSONObj _max;
 
-        list<BSONObj> _reload;
-        list<BSONObj> _deleted;
+        // disk locs yet to be transferred from here to the other side
+        // no locking needed because build by 1 thread in a read lock
+        // depleted by 1 thread in a read lock
+        // updates applied by 1 thread in a write lock
+        set<DiskLoc> _cloneLocs;
+
+        list<BSONObj> _reload; // objects that were modified that must be recloned
+        list<BSONObj> _deleted; // objects deleted during clone that should be deleted later
+        long long _memoryUsed; // bytes in _reload + _deleted
+
+        bool _getActive() const { scoped_lock l(_m); return _active; }
+        void _setActive( bool b ) { scoped_lock l(_m); _active = b; }
 
-        mongo::mutex _mutex;
-        
     } migrateFromStatus;
-    
+
     struct MigrateStatusHolder {
-        MigrateStatusHolder( string ns , const BSONObj& min , const BSONObj& max ){
+        MigrateStatusHolder( string ns , const BSONObj& min , const BSONObj& max ) {
             migrateFromStatus.start( ns , min , max );
         }
-        ~MigrateStatusHolder(){
+        ~MigrateStatusHolder() {
             migrateFromStatus.done();
         }
     };
 
-    void logOpForSharding( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt ){
+    void logOpForSharding( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt ) {
         migrateFromStatus.logOp( opstr , ns , obj , patt );
     }
 
-    class TransferModsCommand : public ChunkCommandHelper{
+    void aboutToDeleteForSharding( const Database* db , const DiskLoc& dl ) {
+        migrateFromStatus.aboutToDelete( db , dl );
+    }
+
+    class TransferModsCommand : public ChunkCommandHelper {
     public:
-        TransferModsCommand() : ChunkCommandHelper( "_transferMods" ){}
+        TransferModsCommand() : ChunkCommandHelper( "_transferMods" ) {}
 
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             return migrateFromStatus.transferMods( errmsg, result );
         }
     } transferModsCommand;
 
+
+    class InitialCloneCommand : public ChunkCommandHelper {
+    public:
+        InitialCloneCommand() : ChunkCommandHelper( "_migrateClone" ) {}
+
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+            return migrateFromStatus.clone( errmsg, result );
+        }
+    } initialCloneCommand;
+
+
     /**
      * this is the main entry for moveChunk
      * called to initial a move
@@ -376,20 +604,22 @@ namespace mongo {
      */
     class MoveChunkCommand : public Command {
     public:
-        MoveChunkCommand() : Command( "moveChunk" ){}
+        MoveChunkCommand() : Command( "moveChunk" ) {}
         virtual void help( stringstream& help ) const {
             help << "should not be calling this directly" << endl;
         }
 
         virtual bool slaveOk() const { return false; }
         virtual bool adminOnly() const { return true; }
-        virtual LockType locktype() const { return NONE; } 
-        
-        
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        virtual LockType locktype() const { return NONE; }
+
+
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             // 1. parse options
             // 2. make sure my view is complete and lock
             // 3. start migrate
+            //    in a read lock, get all DiskLoc and sort so we can do as little seeking as possible
+            //    tell to start transferring
             // 4. pause till migrate caught up
             // 5. LOCK
             //    a) update my config, essentially locking
@@ -398,10 +628,9 @@ namespace mongo {
             //    d) logChange to config server
             // 6. wait for all current cursors to expire
             // 7. remove data locally
-            
+
             // -------------------------------
-            
-            
+
             // 1.
             string ns = cmdObj.firstElement().str();
             string to = cmdObj["to"].str();
@@ -409,38 +638,45 @@ namespace mongo {
             BSONObj min  = cmdObj["min"].Obj();
             BSONObj max  = cmdObj["max"].Obj();
             BSONElement shardId = cmdObj["shardId"];
-            
-            if ( ns.empty() ){
+            BSONElement maxSizeElem = cmdObj["maxChunkSizeBytes"];
+
+            if ( ns.empty() ) {
                 errmsg = "need to specify namespace in command";
                 return false;
             }
-            
-            if ( to.empty() ){
-                errmsg = "need to specify server to move shard to";
+
+            if ( to.empty() ) {
+                errmsg = "need to specify server to move chunk to";
                 return false;
             }
-            if ( from.empty() ){
-                errmsg = "need to specify server to move shard from (redundat i know)";
+            if ( from.empty() ) {
+                errmsg = "need to specify server to move chunk from";
                 return false;
             }
-            
-            if ( min.isEmpty() ){
+
+            if ( min.isEmpty() ) {
                 errmsg = "need to specify a min";
                 return false;
             }
 
-            if ( max.isEmpty() ){
+            if ( max.isEmpty() ) {
                 errmsg = "need to specify a max";
                 return false;
             }
-            
-            if ( shardId.eoo() ){
+
+            if ( shardId.eoo() ) {
                 errmsg = "need shardId";
                 return false;
             }
-            
-            if ( ! shardingState.enabled() ){
-                if ( cmdObj["configdb"].type() != String ){
+
+            if ( maxSizeElem.eoo() || ! maxSizeElem.isNumber() ) {
+                errmsg = "need to specify maxChunkSizeBytes";
+                return false;
+            }
+            const long long maxChunkSize = maxSizeElem.numberLong(); // in bytes
+
+            if ( ! shardingState.enabled() ) {
+                if ( cmdObj["configdb"].type() != String ) {
                     errmsg = "sharding not enabled";
                     return false;
                 }
@@ -449,78 +685,107 @@ namespace mongo {
                 configServer.init( configdb );
             }
 
-            MoveTimingHelper timing( "from" , ns , min , max );
+            MoveTimingHelper timing( "from" , ns , min , max , 6 /* steps */);
 
             Shard fromShard( from );
             Shard toShard( to );
-            
-            log() << "got movechunk: " << cmdObj << endl;
+
+            log() << "received moveChunk request: " << cmdObj << endl;
 
             timing.done(1);
-            // 2. 
-            
+
+            // 2.
             DistributedLock lockSetup( ConnectionString( shardingState.getConfigServer() , ConnectionString::SYNC ) , ns );
             dist_lock_try dlk( &lockSetup , (string)"migrate-" + min.toString() );
-            if ( ! dlk.got() ){
-                errmsg = "someone else has the lock";
+            if ( ! dlk.got() ) {
+                errmsg = "the collection's metadata lock is taken";
                 result.append( "who" , dlk.other() );
                 return false;
             }
 
+            BSONObj chunkInfo = BSON("min" << min << "max" << max << "from" << fromShard.getName() << "to" << toShard.getName());
+            configServer.logChange( "moveChunk.start" , ns , chunkInfo );
+
             ShardChunkVersion maxVersion;
             string myOldShard;
             {
                 ScopedDbConnection conn( shardingState.getConfigServer() );
-                
+
                 BSONObj x = conn->findOne( ShardNS::chunk , Query( BSON( "ns" << ns ) ).sort( BSON( "lastmod" << -1 ) ) );
                 maxVersion = x["lastmod"];
 
-                x = conn->findOne( ShardNS::chunk , shardId.wrap( "_id" ) );
-                assert( x["shard"].type() );
-                myOldShard = x["shard"].String();
-                
-                if ( myOldShard != fromShard.getName() ){
-                    errmsg = "i'm out of date";
+                BSONObj currChunk = conn->findOne( ShardNS::chunk , shardId.wrap( "_id" ) );
+                assert( currChunk["shard"].type() );
+                assert( currChunk["min"].type() );
+                assert( currChunk["max"].type() );
+                myOldShard = currChunk["shard"].String();
+                conn.done();
+
+                BSONObj currMin = currChunk["min"].Obj();
+                BSONObj currMax = currChunk["max"].Obj();
+                if ( currMin.woCompare( min ) || currMax.woCompare( max ) ) {
+                    errmsg = "boundaries are outdated (likely a split occurred)";
+                    result.append( "currMin" , currMin );
+                    result.append( "currMax" , currMax );
+                    result.append( "requestedMin" , min );
+                    result.append( "requestedMax" , max );
+
+                    log( LL_WARNING ) << "aborted moveChunk because" <<  errmsg << ": " << min << "->" << max
+                                      << " is now " << currMin << "->" << currMax << endl;
+                    return false;
+                }
+
+                if ( myOldShard != fromShard.getName() ) {
+                    errmsg = "location is outdated (likely balance or migrate occurred)";
                     result.append( "from" , fromShard.getName() );
                     result.append( "official" , myOldShard );
+
+                    log( LL_WARNING ) << "aborted moveChunk because " << errmsg << ": chunk is at " << myOldShard
+                                      << " and not at " << fromShard.getName() << endl;
                     return false;
                 }
-                
-                if ( maxVersion < shardingState.getVersion( ns ) ){
-                    errmsg = "official version less than mine?";;
+
+                if ( maxVersion < shardingState.getVersion( ns ) ) {
+                    errmsg = "official version less than mine?";
                     result.appendTimestamp( "officialVersion" , maxVersion );
                     result.appendTimestamp( "myVersion" , shardingState.getVersion( ns ) );
+
+                    log( LL_WARNING ) << "aborted moveChunk because " << errmsg << ": official " << maxVersion
+                                      << " mine: " << shardingState.getVersion(ns) << endl;
                     return false;
                 }
 
-                conn.done();
+                // since this could be the first call that enable sharding we also make sure to have the chunk manager up to date
+                shardingState.gotShardName( myOldShard );
+                ShardChunkVersion shardVersion;
+                shardingState.trySetVersion( ns , shardVersion /* will return updated */ );
+
+                log() << "moveChunk request accepted at version " << shardVersion << endl;
             }
-            
+
             timing.done(2);
-            
+
             // 3.
             MigrateStatusHolder statusHolder( ns , min , max );
             {
-                dblock lk;
-                // this makes sure there wasn't a write inside the .cpp code we can miss
-            }
-            
-            {
-                
-                ScopedDbConnection conn( to );
-                BSONObj res;
-                bool ok = conn->runCommand( "admin" , 
-                                            BSON( "_recvChunkStart" << ns <<
-                                                  "from" << from <<
-                                                  "min" << min <<
-                                                  "max" << max <<
-                                                  "configServer" << configServer.modelServer()
-                                                  ) , 
-                                            res );
-                conn.done();
+                // this gets a read lock, so we know we have a checkpoint for mods
+                if ( ! migrateFromStatus.storeCurrentLocs( maxChunkSize , errmsg , result ) )
+                    return false;
 
-                if ( ! ok ){
-                    errmsg = "_recvChunkStart failed: ";
+                ScopedDbConnection connTo( to );
+                BSONObj res;
+                bool ok = connTo->runCommand( "admin" ,
+                                              BSON( "_recvChunkStart" << ns <<
+                                                    "from" << from <<
+                                                    "min" << min <<
+                                                    "max" << max <<
+                                                    "configServer" << configServer.modelServer()
+                                                  ) ,
+                                              res );
+                connTo.done();
+
+                if ( ! ok ) {
+                    errmsg = "moveChunk failed to engage TO-shard in the data transfer: ";
                     assert( res["errmsg"].type() );
                     errmsg += res["errmsg"].String();
                     result.append( "cause" , res );
@@ -529,118 +794,275 @@ namespace mongo {
 
             }
             timing.done( 3 );
-            
-            // 4. 
-            for ( int i=0; i<86400; i++ ){ // don't want a single chunk move to take more than a day
+
+            // 4.
+            for ( int i=0; i<86400; i++ ) { // don't want a single chunk move to take more than a day
                 assert( dbMutex.getState() == 0 );
-                sleepsecs( 1 ); 
+                sleepsecs( 1 );
                 ScopedDbConnection conn( to );
                 BSONObj res;
                 bool ok = conn->runCommand( "admin" , BSON( "_recvChunkStatus" << 1 ) , res );
                 res = res.getOwned();
                 conn.done();
-                
-                log(0) << "_recvChunkStatus : " << res << endl;
-                
-                if ( ! ok || res["state"].String() == "fail" ){
-                    log( LL_ERROR ) << "_recvChunkStatus error : " << res << endl;
-                    errmsg = "_recvChunkStatus error";
-                    result.append( "cause" ,res );
+
+                log(0) << "moveChunk data transfer progress: " << res << " my mem used: " << migrateFromStatus.mbUsed() << endl;
+
+                if ( ! ok || res["state"].String() == "fail" ) {
+                    log( LL_WARNING ) << "moveChunk error transfering data caused migration abort: " << res << endl;
+                    errmsg = "data transfer error";
+                    result.append( "cause" , res );
                     return false;
                 }
 
                 if ( res["state"].String() == "steady" )
                     break;
 
+                if ( migrateFromStatus.mbUsed() > (500 * 1024 * 1024) ) {
+                    // this is too much memory for us to use for this
+                    // so we're going to abort the migrate
+                    ScopedDbConnection conn( to );
+                    BSONObj res;
+                    conn->runCommand( "admin" , BSON( "_recvChunkAbort" << 1 ) , res );
+                    res = res.getOwned();
+                    conn.done();
+                    error() << "aborting migrate because too much memory used res: " << res << endl;
+                    errmsg = "aborting migrate because too much memory used";
+                    result.appendBool( "split" , true );
+                    return false;
+                }
+
                 killCurrentOp.checkForInterrupt();
             }
             timing.done(4);
 
             // 5.
-            { 
+            {
                 // 5.a
-                migrateFromStatus._inCriticalSection = true;
-                ShardChunkVersion myVersion = maxVersion;
+                // we're under the collection lock here, so no other migrate can change maxVersion or ShardChunkManager state
+                migrateFromStatus.setInCriticalSection( true );
+                ShardChunkVersion currVersion = maxVersion;
+                ShardChunkVersion myVersion = currVersion;
                 myVersion.incMajor();
-                
+
                 {
-                    dblock lk;
+                    writelock lk( ns );
                     assert( myVersion > shardingState.getVersion( ns ) );
-                    shardingState.setVersion( ns , myVersion );
-                    assert( myVersion == shardingState.getVersion( ns ) );
-                    log() << "moveChunk locking myself to: " << myVersion << endl;
+
+                    // bump the chunks manager's version up and "forget" about the chunk being moved
+                    // this is not the commit point but in practice the state in this shard won't until the commit it done
+                    shardingState.donateChunk( ns , min , max , myVersion );
                 }
 
-                
+                log() << "moveChunk setting version to: " << myVersion << endl;
+
                 // 5.b
+                // we're under the collection lock here, too, so we can undo the chunk donation because no other state change
+                // could be ongoing
                 {
                     BSONObj res;
-                    ScopedDbConnection conn( to );
-                    bool ok = conn->runCommand( "admin" , 
-                                                BSON( "_recvChunkCommit" << 1 ) ,
-                                                res );
-                    conn.done();
-                    log() << "moveChunk commit result: " << res << endl;
-                    if ( ! ok ){
-                        log() << "_recvChunkCommit failed: " << res << endl;
+                    ScopedDbConnection connTo( to );
+                    bool ok = connTo->runCommand( "admin" ,
+                                                  BSON( "_recvChunkCommit" << 1 ) ,
+                                                  res );
+                    connTo.done();
+
+                    if ( ! ok ) {
+                        {
+                            writelock lk( ns );
+
+                            // revert the chunk manager back to the state before "forgetting" about the chunk
+                            shardingState.undoDonateChunk( ns , min , max , currVersion );
+                        }
+
+                        log() << "movChunk migrate commit not accepted by TO-shard: " << res
+                              << " resetting shard version to: " << currVersion << endl;
+
                         errmsg = "_recvChunkCommit failed!";
                         result.append( "cause" , res );
                         return false;
                     }
+
+                    log() << "moveChunk migrate commit accepted by TO-shard: " << res << endl;
                 }
-                
+
                 // 5.c
-                ScopedDbConnection conn( shardingState.getConfigServer() );
-                
-                BSONObjBuilder temp;
-                temp.append( "shard" , toShard.getName() );
-                temp.appendTimestamp( "lastmod" , myVersion );
-
-                conn->update( ShardNS::chunk , shardId.wrap( "_id" ) , BSON( "$set" << temp.obj() ) );
-                
-                { 
-                    // update another random chunk
-                    BSONObj x = conn->findOne( ShardNS::chunk , Query( BSON( "ns" << ns << "shard" << myOldShard ) ).sort( BSON( "lastmod" << -1 ) ) );
-                    if ( ! x.isEmpty() ){
-                        
-                        BSONObjBuilder temp2;
-                        myVersion.incMinor();
-
-                        temp2.appendTimestamp( "lastmod" , myVersion );
-                        
-                        shardingState.setVersion( ns , myVersion );
-
-                        conn->update( ShardNS::chunk , x["_id"].wrap() , BSON( "$set" << temp2.obj() ) );
-                        
-                        log() << "moveChunk updating self to: " << myVersion << endl;
+
+                // version at which the next highest lastmod will be set
+                // if the chunk being moved is the last in the shard, nextVersion is that chunk's lastmod
+                // otherwise the highest version is from the chunk being bumped on the FROM-shard
+                ShardChunkVersion nextVersion;
+
+                // we want to go only once to the configDB but perhaps change two chunks, the one being migrated and another
+                // local one (so to bump version for the entire shard)
+                // we use the 'applyOps' mechanism to group the two updates and make them safer
+                // TODO pull config update code to a module
+
+                BSONObjBuilder cmdBuilder;
+
+                BSONArrayBuilder updates( cmdBuilder.subarrayStart( "applyOps" ) );
+                {
+                    // update for the chunk being moved
+                    BSONObjBuilder op;
+                    op.append( "op" , "u" );
+                    op.appendBool( "b" , false /* no upserting */ );
+                    op.append( "ns" , ShardNS::chunk );
+
+                    BSONObjBuilder n( op.subobjStart( "o" ) );
+                    n.append( "_id" , Chunk::genID( ns , min ) );
+                    n.appendTimestamp( "lastmod" , myVersion /* same as used on donateChunk */ );
+                    n.append( "ns" , ns );
+                    n.append( "min" , min );
+                    n.append( "max" , max );
+                    n.append( "shard" , toShard.getName() );
+                    n.done();
+
+                    BSONObjBuilder q( op.subobjStart( "o2" ) );
+                    q.append( "_id" , Chunk::genID( ns , min ) );
+                    q.done();
+
+                    updates.append( op.obj() );
+                }
+
+                nextVersion = myVersion;
+
+                // if we have chunks left on the FROM shard, update the version of one of them as well
+                // we can figure that out by grabbing the chunkManager installed on 5.a
+                // TODO expose that manager when installing it
+
+                ShardChunkManagerPtr chunkManager = shardingState.getShardChunkManager( ns );
+                if( chunkManager->getNumChunks() > 0 ) {
+
+                    // get another chunk on that shard
+                    BSONObj lookupKey;
+                    BSONObj bumpMin, bumpMax;
+                    do {
+                        chunkManager->getNextChunk( lookupKey , &bumpMin , &bumpMax );
+                        lookupKey = bumpMin;
+                    }
+                    while( bumpMin == min );
+
+                    BSONObjBuilder op;
+                    op.append( "op" , "u" );
+                    op.appendBool( "b" , false );
+                    op.append( "ns" , ShardNS::chunk );
+
+                    nextVersion.incMinor();  // same as used on donateChunk
+                    BSONObjBuilder n( op.subobjStart( "o" ) );
+                    n.append( "_id" , Chunk::genID( ns , bumpMin ) );
+                    n.appendTimestamp( "lastmod" , nextVersion );
+                    n.append( "ns" , ns );
+                    n.append( "min" , bumpMin );
+                    n.append( "max" , bumpMax );
+                    n.append( "shard" , fromShard.getName() );
+                    n.done();
+
+                    BSONObjBuilder q( op.subobjStart( "o2" ) );
+                    q.append( "_id" , Chunk::genID( ns , bumpMin  ) );
+                    q.done();
+
+                    updates.append( op.obj() );
+
+                    log() << "moveChunk updating self version to: " << nextVersion << " through "
+                          << bumpMin << " -> " << bumpMax << " for collection '" << ns << "'" << endl;
+
+                }
+                else {
+
+                    log() << "moveChunk moved last chunk out for collection '" << ns << "'" << endl;
+                }
+
+                updates.done();
+
+                BSONArrayBuilder preCond( cmdBuilder.subarrayStart( "preCondition" ) );
+                {
+                    BSONObjBuilder b;
+                    b.append( "ns" , ShardNS::chunk );
+                    b.append( "q" , BSON( "query" << BSON( "ns" << ns ) << "orderby" << BSON( "lastmod" << -1 ) ) );
+                    {
+                        BSONObjBuilder bb( b.subobjStart( "res" ) );
+                        bb.appendTimestamp( "lastmod" , maxVersion );
+                        bb.done();
                     }
-                    else {
-                        //++myVersion;
-                        shardingState.setVersion( ns , 0 );
+                    preCond.append( b.obj() );
+                }
+
+                preCond.done();
+
+                BSONObj cmd = cmdBuilder.obj();
+                log(7) << "moveChunk update: " << cmd << endl;
+
+                bool ok = false;
+                BSONObj cmdResult;
+                try {
+                    ScopedDbConnection conn( shardingState.getConfigServer() );
+                    ok = conn->runCommand( "config" , cmd , cmdResult );
+                    conn.done();
+                }
+                catch ( DBException& e ) {
+                    ok = false;
+                    BSONObjBuilder b;
+                    e.getInfo().append( b );
+                    cmdResult = b.obj();
+                }
+
+                if ( ! ok ) {
+
+                    // this could be a blip in the connectivity
+                    // wait out a few seconds and check if the commit request made it
+                    //
+                    // if the commit made it to the config, we'll see the chunk in the new shard and there's no action
+                    // if the commit did not make it, currently the only way to fix this state is to bounce the mongod so
+                    // that the old state (before migrating) be brought in
+
+                    warning() << "moveChunk commit outcome ongoing: " << cmd << " for command :" << cmdResult << endl;
+                    sleepsecs( 10 );
+
+                    try {
+                        ScopedDbConnection conn( shardingState.getConfigServer() );
+
+                        // look for the chunk in this shard whose version got bumped
+                        // we assume that if that mod made it to the config, the applyOps was successful
+                        BSONObj doc = conn->findOne( ShardNS::chunk , Query(BSON( "ns" << ns )).sort( BSON("lastmod" << -1)));
+                        ShardChunkVersion checkVersion = doc["lastmod"];
+
+                        if ( checkVersion == nextVersion ) {
+                            log() << "moveChunk commit confirmed" << endl;
+
+                        }
+                        else {
+                            error() << "moveChunk commit failed: version is at"
+                                            << checkVersion << " instead of " << nextVersion << endl;
+                            error() << "TERMINATING" << endl;
+                            dbexit( EXIT_SHARDING_ERROR );
+                        }
+
+                        conn.done();
 
-                        log() << "moveChunk now i'm empty" << endl;
+                    }
+                    catch ( ... ) {
+                        error() << "moveChunk failed to get confirmation of commit" << endl;
+                        error() << "TERMINATING" << endl;
+                        dbexit( EXIT_SHARDING_ERROR );
                     }
                 }
 
-                conn.done();
-                migrateFromStatus._inCriticalSection = false;
+                migrateFromStatus.setInCriticalSection( false );
+
                 // 5.d
-                configServer.logChange( "moveChunk" , ns , BSON( "min" << min << "max" << max <<
-                                                                 "from" << fromShard.getName() << 
-                                                                 "to" << toShard.getName() ) );
+                configServer.logChange( "moveChunk.commit" , ns , chunkInfo );
             }
-            
+
             migrateFromStatus.done();
             timing.done(5);
 
-            
-            { // 6.
+            {
+                // 6.
                 OldDataCleanup c;
                 c.ns = ns;
                 c.min = min.getOwned();
                 c.max = max.getOwned();
                 ClientCursor::find( ns , c.initial );
-                if ( c.initial.size() ){
+                if ( c.initial.size() ) {
                     log() << "forking for cleaning up chunk data" << endl;
                     boost::thread t( boost::bind( &cleanupOldData , c ) );
                 }
@@ -649,24 +1071,24 @@ namespace mongo {
                     // 7.
                     c.doRemove();
                 }
-                    
-                
+
+
             }
-            timing.done(6);            
+            timing.done(6);
 
             return true;
-            
+
         }
-        
+
     } moveChunkCmd;
 
-    bool ShardingState::inCriticalMigrateSection(){
-        return migrateFromStatus._inCriticalSection;
+    bool ShardingState::inCriticalMigrateSection() {
+        return migrateFromStatus.getInCriticalSection();
     }
 
     /* -----
        below this are the "to" side commands
-       
+
        command to initiate
        worker thread
          does initial clone
@@ -679,71 +1101,74 @@ namespace mongo {
 
     class MigrateStatus {
     public:
-        
-        MigrateStatus(){
-            active = false;
-        }
 
-        void prepare(){
+        MigrateStatus() : m_active("MigrateStatus") { active = false; }
+
+        void prepare() {
+            scoped_lock l(m_active); // reading and writing 'active'
+
             assert( ! active );
             state = READY;
             errmsg = "";
 
             numCloned = 0;
+            clonedBytes = 0;
             numCatchup = 0;
             numSteady = 0;
 
             active = true;
         }
 
-        void go(){
+        void go() {
             try {
                 _go();
             }
-            catch ( std::exception& e ){
+            catch ( std::exception& e ) {
                 state = FAIL;
                 errmsg = e.what();
                 log( LL_ERROR ) << "migrate failed: " << e.what() << endl;
             }
-            catch ( ... ){
+            catch ( ... ) {
                 state = FAIL;
                 errmsg = "UNKNOWN ERROR";
                 log( LL_ERROR ) << "migrate failed with unknown exception" << endl;
             }
-            active = false;
+            setActive( false );
         }
-        
-        void _go(){
-            assert( active );
+
+        void _go() {
+            assert( getActive() );
             assert( state == READY );
             assert( ! min.isEmpty() );
             assert( ! max.isEmpty() );
-            
-            MoveTimingHelper timing( "to" , ns , min , max );
-            
+
+            MoveTimingHelper timing( "to" , ns , min , max , 5 /* steps */ );
+
             ScopedDbConnection conn( from );
             conn->getLastError(); // just test connection
 
-            { // 1. copy indexes
+            {
+                // 1. copy indexes
                 auto_ptr<DBClientCursor> indexes = conn->getIndexes( ns );
                 vector<BSONObj> all;
-                while ( indexes->more() ){
+                while ( indexes->more() ) {
                     all.push_back( indexes->next().getOwned() );
                 }
-                
+
                 writelock lk( ns );
                 Client::Context ct( ns );
-                
+
                 string system_indexes = cc().database()->name + ".system.indexes";
-                for ( unsigned i=0; i<all.size(); i++ ){
+                for ( unsigned i=0; i<all.size(); i++ ) {
                     BSONObj idx = all[i];
-                    theDataFileMgr.insert( system_indexes.c_str() , idx.objdata() , idx.objsize() );
+                    theDataFileMgr.insertAndLog( system_indexes.c_str() , idx );
                 }
-                
+
                 timing.done(1);
             }
-            
-            { // 2. delete any data already in range
+
+            {
+                // 2. delete any data already in range
                 writelock lk( ns );
                 RemoveSaver rs( "moveChunk" , ns , "preCleanup" );
                 long long num = Helpers::removeRange( ns , min , max , true , false , cmdLine.moveParanoia ? &rs : 0 );
@@ -752,29 +1177,54 @@ namespace mongo {
 
                 timing.done(2);
             }
-            
-            
-            { // 3. initial bulk clone
+
+
+            {
+                // 3. initial bulk clone
                 state = CLONE;
-                auto_ptr<DBClientCursor> cursor = conn->query( ns , Query().minKey( min ).maxKey( max ) , /* QueryOption_Exhaust */ 0 );
-                assert( cursor.get() );
-                while ( cursor->more() ){
-                    BSONObj o = cursor->next().getOwned();
-                    {
-                        writelock lk( ns );
-                        Helpers::upsert( ns , o );
+
+                while ( true ) {
+                    BSONObj res;
+                    if ( ! conn->runCommand( "admin" , BSON( "_migrateClone" << 1 ) , res ) ) {
+                        state = FAIL;
+                        errmsg = "_migrateClone failed: ";
+                        errmsg += res.toString();
+                        error() << errmsg << endl;
+                        conn.done();
+                        return;
+                    }
+
+                    BSONObj arr = res["objects"].Obj();
+                    int thisTime = 0;
+
+                    BSONObjIterator i( arr );
+                    while( i.more() ) {
+                        BSONObj o = i.next().Obj();
+                        {
+                            writelock lk( ns );
+                            Helpers::upsert( ns , o );
+                        }
+                        thisTime++;
+                        numCloned++;
+                        clonedBytes += o.objsize();
                     }
-                    numCloned++;
+
+                    if ( thisTime == 0 )
+                        break;
                 }
 
                 timing.done(3);
             }
-            
-            { // 4. do bulk of mods
+
+            // if running on a replicated system, we'll need to flush the docs we cloned to the secondaries
+            ReplTime lastOpApplied;
+
+            {
+                // 4. do bulk of mods
                 state = CATCHUP;
-                while ( true ){
+                while ( true ) {
                     BSONObj res;
-                    if ( ! conn->runCommand( "admin" , BSON( "_transferMods" << 1 ) , res ) ){
+                    if ( ! conn->runCommand( "admin" , BSON( "_transferMods" << 1 ) , res ) ) {
                         state = FAIL;
                         errmsg = "_transferMods failed: ";
                         errmsg += res.toString();
@@ -784,18 +1234,26 @@ namespace mongo {
                     }
                     if ( res["size"].number() == 0 )
                         break;
-                    
-                    apply( res );
+
+                    apply( res , &lastOpApplied );
+
+                    if ( state == ABORT ) {
+                        timing.note( "aborted" );
+                        return;
+                    }
                 }
 
                 timing.done(4);
             }
-            
-            { // 5. wait for commit
+
+            {
+                // 5. wait for commit
+                Timer timeWaitingForCommit;
+
                 state = STEADY;
-                while ( state == STEADY || state == COMMIT_START ){
+                while ( state == STEADY || state == COMMIT_START ) {
                     BSONObj res;
-                    if ( ! conn->runCommand( "admin" , BSON( "_transferMods" << 1 ) , res ) ){
+                    if ( ! conn->runCommand( "admin" , BSON( "_transferMods" << 1 ) , res ) ) {
                         log() << "_transferMods failed in STEADY state: " << res << endl;
                         errmsg = res.toString();
                         state = FAIL;
@@ -803,36 +1261,48 @@ namespace mongo {
                         return;
                     }
 
-                    if ( res["size"].number() > 0 && apply( res ) )
+                    if ( res["size"].number() > 0 && apply( res , &lastOpApplied ) )
                         continue;
-                    
-                    if ( state == COMMIT_START )
+
+                    if ( state == COMMIT_START && flushPendingWrites( lastOpApplied ) )
                         break;
 
                     sleepmillis( 10 );
                 }
-                
+
+                if ( state == ABORT ) {
+                    timing.note( "aborted" );
+                    return;
+                }
+
+                if ( timeWaitingForCommit.seconds() > 86400 ) {
+                    state = FAIL;
+                    errmsg = "timed out waiting for commit";
+                    return;
+                }
+
                 timing.done(5);
             }
-            
+
             state = DONE;
             conn.done();
         }
 
-        void status( BSONObjBuilder& b ){
-            b.appendBool( "active" , active );
+        void status( BSONObjBuilder& b ) {
+            b.appendBool( "active" , getActive() );
 
             b.append( "ns" , ns );
             b.append( "from" , from );
             b.append( "min" , min );
             b.append( "max" , max );
-            
+
             b.append( "state" , stateString() );
             if ( state == FAIL )
                 b.append( "errmsg" , errmsg );
             {
                 BSONObjBuilder bb( b.subobjStart( "counts" ) );
                 bb.append( "cloned" , numCloned );
+                bb.append( "clonedBytes" , clonedBytes );
                 bb.append( "catchup" , numCatchup );
                 bb.append( "steady" , numSteady );
                 bb.done();
@@ -841,17 +1311,22 @@ namespace mongo {
 
         }
 
-        bool apply( const BSONObj& xfer ){
+        bool apply( const BSONObj& xfer , ReplTime* lastOpApplied ) {
+            ReplTime dummy;
+            if ( lastOpApplied == NULL ) {
+                lastOpApplied = &dummy;
+            }
+
             bool didAnything = false;
-            
-            if ( xfer["deleted"].isABSONObj() ){
+
+            if ( xfer["deleted"].isABSONObj() ) {
                 writelock lk(ns);
                 Client::Context cx(ns);
-                
+
                 RemoveSaver rs( "moveChunk" , ns , "removedDuring" );
 
                 BSONObjIterator i( xfer["deleted"].Obj() );
-                while ( i.more() ){
+                while ( i.more() ) {
                     BSONObj id = i.next().Obj();
 
                     // do not apply deletes if they do not belong to the chunk being migrated
@@ -865,27 +1340,56 @@ namespace mongo {
                     }
 
                     Helpers::removeRange( ns , id , id, false , true , cmdLine.moveParanoia ? &rs : 0 );
+
+                    *lastOpApplied = cx.getClient()->getLastOp();
                     didAnything = true;
                 }
             }
-            
-            if ( xfer["reload"].isABSONObj() ){
+
+            if ( xfer["reload"].isABSONObj() ) {
                 writelock lk(ns);
                 Client::Context cx(ns);
 
                 BSONObjIterator i( xfer["reload"].Obj() );
-                while ( i.more() ){
+                while ( i.more() ) {
                     BSONObj it = i.next().Obj();
+
                     Helpers::upsert( ns , it );
+
+                    *lastOpApplied = cx.getClient()->getLastOp();
                     didAnything = true;
                 }
             }
 
             return didAnything;
         }
-        
-        string stateString(){
-            switch ( state ){
+
+        bool flushPendingWrites( const ReplTime& lastOpApplied ) {
+            // if replication is on, try to force enough secondaries to catch up
+            // TODO opReplicatedEnough should eventually honor priorities and geo-awareness
+            //      for now, we try to replicate to a sensible number of secondaries
+            const int slaveCount = getSlaveCount() / 2 + 1;
+            if ( ! opReplicatedEnough( lastOpApplied , slaveCount ) ) {
+                log( LL_WARNING ) << "migrate commit attempt timed out contacting " << slaveCount
+                                  << " slaves for '" << ns << "' " << min << " -> " << max << endl;
+                return false;
+            }
+            log() << "migrate commit succeeded flushing to secondaries for '" << ns << "' " << min << " -> " << max << endl;
+
+            {
+                readlock lk(ns);  // commitNow() currently requires it
+
+                // if durability is on, force a write to journal
+                if ( getDur().commitNow() ) {
+                    log() << "migrate commit flushed to journal for '" << ns << "' " << min << " -> " << max << endl;
+                }
+            }
+
+            return true;
+        }
+
+        string stateString() {
+            switch ( state ) {
             case READY: return "ready";
             case CLONE: return "clone";
             case CATCHUP: return "catchup";
@@ -893,17 +1397,18 @@ namespace mongo {
             case COMMIT_START: return "commitStart";
             case DONE: return "done";
             case FAIL: return "fail";
+            case ABORT: return "abort";
             }
             assert(0);
             return "";
         }
 
-        bool startCommit(){
+        bool startCommit() {
             if ( state != STEADY )
                 return false;
             state = COMMIT_START;
-            
-            for ( int i=0; i<86400; i++ ){
+
+            for ( int i=0; i<86400; i++ ) {
                 sleepmillis(1);
                 if ( state == DONE )
                     return true;
@@ -912,42 +1417,60 @@ namespace mongo {
             return false;
         }
 
+        void abort() {
+            state = ABORT;
+            errmsg = "aborted";
+        }
+
+        bool getActive() const { scoped_lock l(m_active); return active; }
+        void setActive( bool b ) { scoped_lock l(m_active); active = b; }
+
+        mutable mongo::mutex m_active;
         bool active;
-        
+
         string ns;
         string from;
-        
+
         BSONObj min;
         BSONObj max;
-        
+
         long long numCloned;
+        long long clonedBytes;
         long long numCatchup;
         long long numSteady;
 
-        enum State { READY , CLONE , CATCHUP , STEADY , COMMIT_START , DONE , FAIL } state;
+        enum State { READY , CLONE , CATCHUP , STEADY , COMMIT_START , DONE , FAIL , ABORT } state;
         string errmsg;
-        
+
     } migrateStatus;
-    
-    void migrateThread(){
+
+    void migrateThread() {
         Client::initThread( "migrateThread" );
         migrateStatus.go();
         cc().shutdown();
     }
-    
+
     class RecvChunkStartCommand : public ChunkCommandHelper {
     public:
-        RecvChunkStartCommand() : ChunkCommandHelper( "_recvChunkStart" ){}
+        RecvChunkStartCommand() : ChunkCommandHelper( "_recvChunkStart" ) {}
 
         virtual LockType locktype() const { return WRITE; }  // this is so don't have to do locking internally
 
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
-            
-            if ( migrateStatus.active ){
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+
+            if ( migrateStatus.getActive() ) {
                 errmsg = "migrate already in progress";
                 return false;
             }
             
+            if ( OldDataCleanup::_numThreads > 0 ) {
+                errmsg = 
+                    str::stream() 
+                    << "still waiting for a previous migrates data to get cleaned, can't accept new chunks, num threads: " 
+                    << OldDataCleanup::_numThreads;
+                return false;
+            }
+
             if ( ! configServer.ok() )
                 configServer.init( cmdObj["configServer"].String() );
 
@@ -957,9 +1480,9 @@ namespace mongo {
             migrateStatus.from = cmdObj["from"].String();
             migrateStatus.min = cmdObj["min"].Obj().getOwned();
             migrateStatus.max = cmdObj["max"].Obj().getOwned();
-            
+
             boost::thread m( migrateThread );
-            
+
             result.appendBool( "started" , true );
             return true;
         }
@@ -968,20 +1491,20 @@ namespace mongo {
 
     class RecvChunkStatusCommand : public ChunkCommandHelper {
     public:
-        RecvChunkStatusCommand() : ChunkCommandHelper( "_recvChunkStatus" ){}
+        RecvChunkStatusCommand() : ChunkCommandHelper( "_recvChunkStatus" ) {}
 
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             migrateStatus.status( result );
             return 1;
         }
-        
+
     } recvChunkStatusCommand;
 
     class RecvChunkCommitCommand : public ChunkCommandHelper {
     public:
-        RecvChunkCommitCommand() : ChunkCommandHelper( "_recvChunkCommit" ){}
-        
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        RecvChunkCommitCommand() : ChunkCommandHelper( "_recvChunkCommit" ) {}
+
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             bool ok = migrateStatus.startCommit();
             migrateStatus.status( result );
             return ok;
@@ -989,10 +1512,22 @@ namespace mongo {
 
     } recvChunkCommitCommand;
 
+    class RecvChunkAbortCommand : public ChunkCommandHelper {
+    public:
+        RecvChunkAbortCommand() : ChunkCommandHelper( "_recvChunkAbort" ) {}
+
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+            migrateStatus.abort();
+            migrateStatus.status( result );
+            return true;
+        }
+
+    } recvChunkAboortCommand;
+
 
     class IsInRangeTest : public UnitTest {
     public:
-        void run(){
+        void run() {
             BSONObj min = BSON( "x" << 1 );
             BSONObj max = BSON( "x" << 5 );
 
@@ -1002,6 +1537,8 @@ namespace mongo {
             assert( isInRange( BSON( "x" << 4 ) , min , max ) );
             assert( ! isInRange( BSON( "x" << 5 ) , min , max ) );
             assert( ! isInRange( BSON( "x" << 6 ) , min , max ) );
+
+            log(1) << "isInRangeTest passed" << endl;
         }
     } isInRangeTest;
 }
diff --git a/s/d_split.cpp b/s/d_split.cpp
index fdefc7e..0896803 100644
--- a/s/d_split.cpp
+++ b/s/d_split.cpp
@@ -1,4 +1,4 @@
-// d_split.cpp
+// @file  d_split.cpp
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -27,6 +27,13 @@
 #include "../db/query.h"
 #include "../db/queryoptimizer.h"
 
+#include "../client/connpool.h"
+#include "../client/distlock.h"
+
+#include "chunk.h" // for static genID only
+#include "config.h"
+#include "d_logic.h"
+
 namespace mongo {
 
     // TODO: Fold these checks into each command.
@@ -43,19 +50,19 @@ namespace mongo {
     public:
         CmdMedianKey() : Command( "medianKey" ) {}
         virtual bool slaveOk() const { return true; }
-        virtual LockType locktype() const { return READ; } 
+        virtual LockType locktype() const { return READ; }
         virtual void help( stringstream &help ) const {
-            help << 
-                "Internal command.\n"
-                "example: { medianKey:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }\n"
-                "NOTE: This command may take a while to run";
+            help <<
+                 "Internal command.\n"
+                 "example: { medianKey:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }\n"
+                 "NOTE: This command may take a while to run";
         }
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
             const char *ns = jsobj.getStringField( "medianKey" );
             BSONObj min = jsobj.getObjectField( "min" );
             BSONObj max = jsobj.getObjectField( "max" );
             BSONObj keyPattern = jsobj.getObjectField( "keyPattern" );
-            
+
             Client::Context ctx( ns );
 
             IndexDetails *id = cmdIndexDetailsForRange( ns, errmsg, min, max, keyPattern );
@@ -66,22 +73,22 @@ namespace mongo {
             int num = 0;
             NamespaceDetails *d = nsdetails(ns);
             int idxNo = d->idxNo(*id);
-            
+
             // only yielding on firt half for now
             // after this it should be in ram, so 2nd should be fast
             {
                 shared_ptr<Cursor> c( new BtreeCursor( d, idxNo, *id, min, max, false, 1 ) );
                 scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
-                while ( c->ok() ){
+                while ( c->ok() ) {
                     num++;
                     c->advance();
                     if ( ! cc->yieldSometimes() )
                         break;
                 }
             }
-            
+
             num /= 2;
-            
+
             BtreeCursor c( d, idxNo, *id, min, max, false, 1 );
             for( ; num; c.advance(), --num );
 
@@ -99,15 +106,15 @@ namespace mongo {
 
             int x = median.woCompare( min , BSONObj() , false );
             int y = median.woCompare( max , BSONObj() , false );
-            if ( x == 0 || y == 0 ){
+            if ( x == 0 || y == 0 ) {
                 // its on an edge, ok
             }
-            else if ( x < 0 && y < 0 ){
+            else if ( x < 0 && y < 0 ) {
                 log( LL_ERROR ) << "median error (1) min: " << min << " max: " << max << " median: " << median << endl;
                 errmsg = "median error 1";
                 return false;
             }
-            else if ( x > 0 && y > 0 ){
+            else if ( x > 0 && y > 0 ) {
                 log( LL_ERROR ) << "median error (2) min: " << min << " max: " << max << " median: " << median << endl;
                 errmsg = "median error 2";
                 return false;
@@ -117,95 +124,662 @@ namespace mongo {
         }
     } cmdMedianKey;
 
-     class SplitVector : public Command {
-     public:
-        SplitVector() : Command( "splitVector" , false ){}
+    class CheckShardingIndex : public Command {
+    public:
+        CheckShardingIndex() : Command( "checkShardingIndex" , false ) {}
         virtual bool slaveOk() const { return false; }
         virtual LockType locktype() const { return READ; }
         virtual void help( stringstream &help ) const {
-            help <<
-                "Internal command.\n"
-                "example: { splitVector : \"myLargeCollection\" , keyPattern : {x:1} , maxChunkSize : 200 }\n"
-                "maxChunkSize unit in MBs\n"
-                "NOTE: This command may take a while to run";
+            help << "Internal command.\n";
         }
-        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
-            const char* ns = jsobj.getStringField( "splitVector" );
+
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+
+            const char* ns = jsobj.getStringField( "checkShardingIndex" );
             BSONObj keyPattern = jsobj.getObjectField( "keyPattern" );
 
-            long long maxChunkSize = 0;
-            BSONElement maxSizeElem = jsobj[ "maxChunkSize" ];
-            if ( ! maxSizeElem.eoo() ){
-                maxChunkSize = maxSizeElem.numberLong() * 1<<20;
-            } else {
-                errmsg = "need to specify the desired max chunk size";
+            // If min and max are not provided use the "minKey" and "maxKey" for the sharding key pattern.
+            BSONObj min = jsobj.getObjectField( "min" );
+            BSONObj max = jsobj.getObjectField( "max" );
+            if ( min.isEmpty() && max.isEmpty() ) {
+                BSONObjBuilder minBuilder;
+                BSONObjBuilder maxBuilder;
+                BSONForEach(key, keyPattern) {
+                    minBuilder.appendMinKey( key.fieldName() );
+                    maxBuilder.appendMaxKey( key.fieldName() );
+                }
+                min = minBuilder.obj();
+                max = maxBuilder.obj();
+            }
+            else if ( min.isEmpty() || max.isEmpty() ) {
+                errmsg = "either provide both min and max or leave both empty";
                 return false;
             }
-            
-            Client::Context ctx( ns );
 
-            BSONObjBuilder minBuilder;
-            BSONObjBuilder maxBuilder;
-            BSONForEach(key, keyPattern){
-                minBuilder.appendMinKey( key.fieldName() );
-                maxBuilder.appendMaxKey( key.fieldName() );
+            Client::Context ctx( ns );
+            NamespaceDetails *d = nsdetails( ns );
+            if ( ! d ) {
+                errmsg = "ns not found";
+                return false;
             }
-            BSONObj min = minBuilder.obj();
-            BSONObj max = maxBuilder.obj();
 
             IndexDetails *idx = cmdIndexDetailsForRange( ns , errmsg , min , max , keyPattern );
-            if ( idx == NULL ){
+            if ( idx == NULL ) {
                 errmsg = "couldn't find index over splitting key";
                 return false;
             }
 
-            NamespaceDetails *d = nsdetails( ns );
-            BtreeCursor c( d , d->idxNo(*idx) , *idx , min , max , false , 1 );
+            BtreeCursor * bc = new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 );
+            shared_ptr<Cursor> c( bc );
+            scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
+            if ( ! cc->ok() ) {
+                // range is empty
+                return true;
+            }
 
-            // We'll use the average object size and number of object to find approximately how many keys
-            // each chunk should have. We'll split a little smaller than the specificied by 'maxSize'
-            // assuming a recently sharded collectio is still going to grow.
+            // for now, the only check is that all shard keys are filled
+            // null is ok, 
+            // TODO if $exist for nulls were picking the index, it could be used instead efficiently
+            while ( cc->ok() ) {
+                BSONObj currKey = c->currKey();
+                
+                BSONObjIterator i( currKey );
+                int n = 0;
+                while ( i.more() ) {
+                    BSONElement key = i.next();
+                    n++;
 
-            const long long dataSize = d->datasize;
-            const long long recCount = d->nrecords;
-            long long keyCount = 0;
-            if (( dataSize > 0 ) && ( recCount > 0 )){
-                const long long avgRecSize = dataSize / recCount;
-                keyCount = 90 * maxChunkSize / (100 * avgRecSize);
+                    if ( key.type() && key.type() != jstNULL )
+                        continue;
+                    
+                    BSONObj obj = c->current();
+                    BSONObjIterator j( keyPattern );
+                    BSONElement real;
+                    for ( int x=0; x<n; x++ )
+                        real = j.next();
+                    
+                    real = obj.getFieldDotted( real.fieldName() );
+
+                    if ( real.type() )
+                        continue;
+                    
+                    ostringstream os;
+                    os << "found null value in key " << bc->prettyKey( currKey ) << " for doc: " << real["_id"];
+                    log() << "checkShardingIndex for '" << ns << "' failed: " << os.str() << endl;
+                    
+                    errmsg = os.str();
+                    return false;
+                }
+                cc->advance();
             }
 
-            // We traverse the index and add the keyCount-th key to the result vector. If that key
-            // appeared in the vector before, we omit it. The assumption here is that all the 
-            // instances of a key value live in the same chunk.
+            return true;
+        }
+    } cmdCheckShardingIndex;
 
-            Timer timer;
-            long long currCount = 0;
-            vector<BSONObj> splitKeys;
-            BSONObj currKey;
-            while ( c.ok() ){ 
-                currCount++;
-                if ( currCount > keyCount ){
-                    if ( ! currKey.isEmpty() && (currKey.woCompare( c.currKey() ) == 0 ) ) 
-                         continue;
-
-                    currKey = c.currKey();
-                    splitKeys.push_back( c.prettyKey( currKey ) );
-                    currCount = 0;
+    class SplitVector : public Command {
+    public:
+        SplitVector() : Command( "splitVector" , false ) {}
+        virtual bool slaveOk() const { return false; }
+        virtual LockType locktype() const { return READ; }
+        virtual void help( stringstream &help ) const {
+            help <<
+                 "Internal command.\n"
+                 "examples:\n"
+                 "  { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, maxChunkSize:200 }\n"
+                 "  maxChunkSize unit in MBs\n"
+                 "  May optionally specify 'maxSplitPoints' and 'maxChunkObjects' to avoid traversing the whole chunk\n"
+                 "  \n"
+                 "  { splitVector : \"blog.post\" , keyPattern:{x:1} , min:{x:10} , max:{x:20}, force: true }\n"
+                 "  'force' will produce one split point even if data is small; defaults to false\n"
+                 "NOTE: This command may take a while to run";
+        }
+
+        bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+
+            //
+            // 1.a We'll parse the parameters in two steps. First, make sure the we can use the split index to get
+            //     a good approximation of the size of the chunk -- without needing to access the actual data.
+            //
+
+            const char* ns = jsobj.getStringField( "splitVector" );
+            BSONObj keyPattern = jsobj.getObjectField( "keyPattern" );
+
+            // If min and max are not provided use the "minKey" and "maxKey" for the sharding key pattern.
+            BSONObj min = jsobj.getObjectField( "min" );
+            BSONObj max = jsobj.getObjectField( "max" );
+            if ( min.isEmpty() && max.isEmpty() ) {
+                BSONObjBuilder minBuilder;
+                BSONObjBuilder maxBuilder;
+                BSONForEach(key, keyPattern) {
+                    minBuilder.appendMinKey( key.fieldName() );
+                    maxBuilder.appendMaxKey( key.fieldName() );
                 }
-                c.advance();
+                min = minBuilder.obj();
+                max = maxBuilder.obj();
+            }
+            else if ( min.isEmpty() || max.isEmpty() ) {
+                errmsg = "either provide both min and max or leave both empty";
+                return false;
             }
 
-            ostringstream os;
-            os << "Finding the split vector for " <<  ns << " over "<< keyPattern;
-            logIfSlow( timer , os.str() );
+            long long maxSplitPoints = 0;
+            BSONElement maxSplitPointsElem = jsobj[ "maxSplitPoints" ];
+            if ( maxSplitPointsElem.isNumber() ) {
+                maxSplitPoints = maxSplitPointsElem.numberLong();
+            }
 
-            // Warning: we are sending back an array of keys but are currently limited to 
-            // 4MB work of 'result' size. This should be okay for now.
+            long long maxChunkObjects = 0;
+            BSONElement MaxChunkObjectsElem = jsobj[ "maxChunkObjects" ];
+            if ( MaxChunkObjectsElem.isNumber() ) {
+                maxChunkObjects = MaxChunkObjectsElem.numberLong();
+            }
+
+            vector<BSONObj> splitKeys;
+
+            {
+                // Get the size estimate for this namespace
+                Client::Context ctx( ns );
+                NamespaceDetails *d = nsdetails( ns );
+                if ( ! d ) {
+                    errmsg = "ns not found";
+                    return false;
+                }
+                
+                IndexDetails *idx = cmdIndexDetailsForRange( ns , errmsg , min , max , keyPattern );
+                if ( idx == NULL ) {
+                    errmsg = "couldn't find index over splitting key";
+                    return false;
+                }
+                
+                const long long recCount = d->stats.nrecords;
+                const long long dataSize = d->stats.datasize;
+                
+                //
+                // 1.b Now that we have the size estimate, go over the remaining parameters and apply any maximum size
+                //     restrictions specified there.
+                //
+                
+                // 'force'-ing a split is equivalent to having maxChunkSize be the size of the current chunk, i.e., the
+                // logic below will split that chunk in half
+                long long maxChunkSize = 0;
+                bool force = false;
+                {
+                    BSONElement maxSizeElem = jsobj[ "maxChunkSize" ];
+                    BSONElement forceElem = jsobj[ "force" ];
+                    
+                    if ( forceElem.trueValue() ) {
+                        force = true;
+                        maxChunkSize = dataSize;
+                        
+                    }
+                    else if ( maxSizeElem.isNumber() ) {
+                        maxChunkSize = maxSizeElem.numberLong() * 1<<20;
+                        
+                    }
+                    else {
+                        maxSizeElem = jsobj["maxChunkSizeBytes"];
+                        if ( maxSizeElem.isNumber() ) {
+                            maxChunkSize = maxSizeElem.numberLong();
+                        }
+                    }
+                    
+                    if ( maxChunkSize <= 0 ) {
+                        errmsg = "need to specify the desired max chunk size (maxChunkSize or maxChunkSizeBytes)";
+                        return false;
+                    }
+                }
+                
+                
+                // If there's not enough data for more than one chunk, no point continuing.
+                if ( dataSize < maxChunkSize || recCount == 0 ) {
+                    vector<BSONObj> emptyVector;
+                    result.append( "splitKeys" , emptyVector );
+                    return true;
+                }
+                
+                log() << "request split points lookup for chunk " << ns << " " << min << " -->> " << max << endl;
+                
+                // We'll use the average object size and number of object to find approximately how many keys
+                // each chunk should have. We'll split at half the maxChunkSize or maxChunkObjects, if
+                // provided.
+                const long long avgRecSize = dataSize / recCount;
+                long long keyCount = maxChunkSize / (2 * avgRecSize);
+                if ( maxChunkObjects && ( maxChunkObjects < keyCount ) ) {
+                    log() << "limiting split vector to " << maxChunkObjects << " (from " << keyCount << ") objects " << endl;
+                    keyCount = maxChunkObjects;
+                }
+                
+                //
+                // 2. Traverse the index and add the keyCount-th key to the result vector. If that key
+                //    appeared in the vector before, we omit it. The invariant here is that all the
+                //    instances of a given key value live in the same chunk.
+                //
+                
+                Timer timer;
+                long long currCount = 0;
+                long long numChunks = 0;
+                
+                BtreeCursor * bc = new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 );
+                shared_ptr<Cursor> c( bc );
+                scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
+                if ( ! cc->ok() ) {
+                    errmsg = "can't open a cursor for splitting (desired range is possibly empty)";
+                    return false;
+                }
+                
+                // Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel, to be removed
+                // at the end. If a key appears more times than entries allowed on a chunk, we issue a warning and
+                // split on the following key.
+                set<BSONObj> tooFrequentKeys;
+                splitKeys.push_back( c->currKey().getOwned() );
+                while ( 1 ) {
+                    while ( cc->ok() ) {
+                        currCount++;
+                        BSONObj currKey = c->currKey();
+                        
+                        DEV assert( currKey.woCompare( max ) <= 0 );
+                        
+                        if ( currCount > keyCount ) {
+                            // Do not use this split key if it is the same used in the previous split point.
+                            if ( currKey.woCompare( splitKeys.back() ) == 0 ) {
+                                tooFrequentKeys.insert( currKey.getOwned() );
+                                
+                            }
+                            else {
+                                splitKeys.push_back( currKey.getOwned() );
+                                currCount = 0;
+                                numChunks++;
+                                
+                                LOG(4) << "picked a split key: " << bc->prettyKey( currKey ) << endl;
+                            }
+                            
+                        }
+                        
+                        cc->advance();
+                        
+                        // Stop if we have enough split points.
+                        if ( maxSplitPoints && ( numChunks >= maxSplitPoints ) ) {
+                            log() << "max number of requested split points reached (" << numChunks
+                                  << ") before the end of chunk " << ns << " " << min << " -->> " << max
+                                  << endl;
+                            break;
+                        }
+                        
+                        if ( ! cc->yieldSometimes() ) {
+                            // we were near and and got pushed to the end
+                            // i think returning the splits we've already found is fine
+                            
+                            // don't use the btree cursor pointer to acces keys beyond this point but ok
+                            // to use it for format the keys we've got already
+                            
+                            break;
+                        }
+                    }
+                    
+                    if ( splitKeys.size() > 1 || ! force )
+                        break;
+                    
+                    force = false;
+                    keyCount = currCount / 2;
+                    currCount = 0;
+                    log() << "splitVector doing another cycle because of force, keyCount now: " << keyCount << endl;
+                    
+                    c.reset( new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 ) );
+                    cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
+                }
+                
+                //
+                // 3. Format the result and issue any warnings about the data we gathered while traversing the
+                //    index
+                //
+                
+                // Warn for keys that are more numerous than maxChunkSize allows.
+                for ( set<BSONObj>::const_iterator it = tooFrequentKeys.begin(); it != tooFrequentKeys.end(); ++it ) {
+                    warning() << "chunk is larger than " << maxChunkSize
+                              << " bytes because of key " << bc->prettyKey( *it ) << endl;
+                }
+                
+                // Remove the sentinel at the beginning before returning and add fieldnames.
+                splitKeys.erase( splitKeys.begin() );
+                for ( vector<BSONObj>::iterator it = splitKeys.begin(); it != splitKeys.end() ; ++it ) {
+                    *it = bc->prettyKey( *it );
+                }
+                
+                if ( timer.millis() > cmdLine.slowMS ) {
+                    warning() << "Finding the split vector for " <<  ns << " over "<< keyPattern
+                              << " keyCount: " << keyCount << " numSplits: " << splitKeys.size() 
+                              << " lookedAt: " << currCount << " took " << timer.millis() << "ms"
+                              << endl;
+                }
+                
+                // Warning: we are sending back an array of keys but are currently limited to
+                // 4MB work of 'result' size. This should be okay for now.
+                
+            }
 
             result.append( "splitKeys" , splitKeys );
+
             return true;
 
         }
     } cmdSplitVector;
 
+    // ** temporary ** 2010-10-22
+    // chunkInfo is a helper to collect and log information about the chunks generated in splitChunk.
+    // It should hold the chunk state for this module only, while we don't have min/max key info per chunk on the
+    // mongod side. Do not build on this; it will go away.
+    struct ChunkInfo {
+        BSONObj min;
+        BSONObj max;
+        ShardChunkVersion lastmod;
+
+        ChunkInfo() { }
+        ChunkInfo( BSONObj aMin , BSONObj aMax , ShardChunkVersion aVersion ) : min(aMin) , max(aMax) , lastmod(aVersion) {}
+        void appendShortVersion( const char* name, BSONObjBuilder& b ) const;
+        string toString() const;
+    };
+
+    void ChunkInfo::appendShortVersion( const char * name , BSONObjBuilder& b ) const {
+        BSONObjBuilder bb( b.subobjStart( name ) );
+        bb.append( "min" , min );
+        bb.append( "max" , max );
+        bb.appendTimestamp( "lastmod" , lastmod );
+        bb.done();
+    }
+
+    string ChunkInfo::toString() const {
+        ostringstream os;
+        os << "lastmod: " << lastmod.toString() << " min: " << min << " max: " << endl;
+        return os.str();
+    }
+    // ** end temporary **
+
+    class SplitChunkCommand : public Command {
+    public:
+        SplitChunkCommand() : Command( "splitChunk" ) {}
+        virtual void help( stringstream& help ) const {
+            help <<
+                 "internal command usage only\n"
+                 "example:\n"
+                 " { splitChunk:\"db.foo\" , keyPattern: {a:1} , min : {a:100} , max: {a:200} { splitKeys : [ {a:150} , ... ]}";
+        }
+
+        virtual bool slaveOk() const { return false; }
+        virtual bool adminOnly() const { return true; }
+        virtual LockType locktype() const { return NONE; }
+
+        bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+
+            //
+            // 1. check whether parameters passed to splitChunk are sound
+            //
+
+            const string ns = cmdObj.firstElement().str();
+            if ( ns.empty() ) {
+                errmsg  = "need to specify namespace in command";
+                return false;
+            }
+
+            BSONObj keyPattern = cmdObj["keyPattern"].Obj();
+            if ( keyPattern.isEmpty() ) {
+                errmsg = "need to specify the key pattern the collection is sharded over";
+                return false;
+            }
+
+            BSONObj min = cmdObj["min"].Obj();
+            if ( min.isEmpty() ) {
+                errmsg = "neet to specify the min key for the chunk";
+                return false;
+            }
+
+            BSONObj max = cmdObj["max"].Obj();
+            if ( max.isEmpty() ) {
+                errmsg = "neet to specify the max key for the chunk";
+                return false;
+            }
+
+            string from = cmdObj["from"].str();
+            if ( from.empty() ) {
+                errmsg = "need specify server to split chunk at";
+                return false;
+            }
+
+            BSONObj splitKeysElem = cmdObj["splitKeys"].Obj();
+            if ( splitKeysElem.isEmpty() ) {
+                errmsg = "need to provide the split points to chunk over";
+                return false;
+            }
+            vector<BSONObj> splitKeys;
+            BSONObjIterator it( splitKeysElem );
+            while ( it.more() ) {
+                splitKeys.push_back( it.next().Obj().getOwned() );
+            }
+
+            BSONElement shardId = cmdObj["shardId"];
+            if ( shardId.eoo() ) {
+                errmsg = "need to provide shardId";
+                return false;
+            }
+
+            // It is possible that this is the first sharded command this mongod is asked to perform. If so,
+            // start sharding apparatus. We'd still be missing some more shard-related info but we'll get it
+            // in step 2. below.
+            if ( ! shardingState.enabled() ) {
+                if ( cmdObj["configdb"].type() != String ) {
+                    errmsg = "sharding not enabled";
+                    return false;
+                }
+                string configdb = cmdObj["configdb"].String();
+                shardingState.enable( configdb );
+                configServer.init( configdb );
+            }
+
+            Shard myShard( from );
+
+            log() << "received splitChunk request: " << cmdObj << endl;
+
+            //
+            // 2. lock the collection's metadata and get highest version for the current shard
+            //
+
+            DistributedLock lockSetup( ConnectionString( shardingState.getConfigServer() , ConnectionString::SYNC) , ns );
+            dist_lock_try dlk( &lockSetup, string("split-") + min.toString() );
+            if ( ! dlk.got() ) {
+                errmsg = "the collection's metadata lock is taken";
+                result.append( "who" , dlk.other() );
+                return false;
+            }
+
+            // TODO This is a check migrate does to the letter. Factor it out and share. 2010-10-22
+
+            ShardChunkVersion maxVersion;
+            string shard;
+            ChunkInfo origChunk;
+            {
+                ScopedDbConnection conn( shardingState.getConfigServer() );
+
+                BSONObj x = conn->findOne( ShardNS::chunk , Query( BSON( "ns" << ns ) ).sort( BSON( "lastmod" << -1 ) ) );
+                maxVersion = x["lastmod"];
+
+                BSONObj currChunk = conn->findOne( ShardNS::chunk , shardId.wrap( "_id" ) ).getOwned();
+                assert( currChunk["shard"].type() );
+                assert( currChunk["min"].type() );
+                assert( currChunk["max"].type() );
+                shard = currChunk["shard"].String();
+                conn.done();
+
+                BSONObj currMin = currChunk["min"].Obj();
+                BSONObj currMax = currChunk["max"].Obj();
+                if ( currMin.woCompare( min ) || currMax.woCompare( max ) ) {
+                    errmsg = "chunk boundaries are outdated (likely a split occurred)";
+                    result.append( "currMin" , currMin );
+                    result.append( "currMax" , currMax );
+                    result.append( "requestedMin" , min );
+                    result.append( "requestedMax" , max );
+
+                    log( LL_WARNING ) << "aborted split because " << errmsg << ": " << min << "->" << max
+                                      << " is now " << currMin << "->" << currMax << endl;
+                    return false;
+                }
+
+                if ( shard != myShard.getName() ) {
+                    errmsg = "location is outdated (likely balance or migrate occurred)";
+                    result.append( "from" , myShard.getName() );
+                    result.append( "official" , shard );
+
+                    log( LL_WARNING ) << "aborted split because " << errmsg << ": chunk is at " << shard
+                                      << " and not at " << myShard.getName() << endl;
+                    return false;
+                }
+
+                if ( maxVersion < shardingState.getVersion( ns ) ) {
+                    errmsg = "official version less than mine?";
+                    result.appendTimestamp( "officialVersion" , maxVersion );
+                    result.appendTimestamp( "myVersion" , shardingState.getVersion( ns ) );
+
+                    log( LL_WARNING ) << "aborted split because " << errmsg << ": official " << maxVersion
+                                      << " mine: " << shardingState.getVersion(ns) << endl;
+                    return false;
+                }
+
+                origChunk.min = currMin.getOwned();
+                origChunk.max = currMax.getOwned();
+                origChunk.lastmod = currChunk["lastmod"];
+
+                // since this could be the first call that enable sharding we also make sure to have the chunk manager up to date
+                shardingState.gotShardName( shard );
+                ShardChunkVersion shardVersion;
+                shardingState.trySetVersion( ns , shardVersion /* will return updated */ );
+
+                log() << "splitChunk accepted at version " << shardVersion << endl;
+
+            }
+
+            //
+            // 3. create the batch of updates to metadata ( the new chunks ) to be applied via 'applyOps' command
+            //
+
+            BSONObjBuilder logDetail;
+            origChunk.appendShortVersion( "before" , logDetail );
+            log(1) << "before split on " << origChunk << endl;
+            vector<ChunkInfo> newChunks;
+
+            ShardChunkVersion myVersion = maxVersion;
+            BSONObj startKey = min;
+            splitKeys.push_back( max ); // makes it easier to have 'max' in the next loop. remove later.
+
+            BSONObjBuilder cmdBuilder;
+            BSONArrayBuilder updates( cmdBuilder.subarrayStart( "applyOps" ) );
+
+            for ( vector<BSONObj>::const_iterator it = splitKeys.begin(); it != splitKeys.end(); ++it ) {
+                BSONObj endKey = *it;
+
+                // splits only update the 'minor' portion of version
+                myVersion.incMinor();
+
+                // build an update operation against the chunks collection of the config database with
+                // upsert true
+                BSONObjBuilder op;
+                op.append( "op" , "u" );
+                op.appendBool( "b" , true );
+                op.append( "ns" , ShardNS::chunk );
+
+                // add the modified (new) chunk infomation as the update object
+                BSONObjBuilder n( op.subobjStart( "o" ) );
+                n.append( "_id" , Chunk::genID( ns , startKey ) );
+                n.appendTimestamp( "lastmod" , myVersion );
+                n.append( "ns" , ns );
+                n.append( "min" , startKey );
+                n.append( "max" , endKey );
+                n.append( "shard" , shard );
+                n.done();
+
+                // add the chunk's _id as the query part of the update statement
+                BSONObjBuilder q( op.subobjStart( "o2" ) );
+                q.append( "_id" , Chunk::genID( ns , startKey ) );
+                q.done();
+
+                updates.append( op.obj() );
+
+                // remember this chunk info for logging later
+                newChunks.push_back( ChunkInfo( startKey , endKey, myVersion ) );
+
+                startKey = endKey;
+            }
+
+            updates.done();
+
+            {
+                BSONArrayBuilder preCond( cmdBuilder.subarrayStart( "preCondition" ) );
+                BSONObjBuilder b;
+                b.append( "ns" , ShardNS::chunk );
+                b.append( "q" , BSON( "query" << BSON( "ns" << ns ) << "orderby" << BSON( "lastmod" << -1 ) ) );
+                {
+                    BSONObjBuilder bb( b.subobjStart( "res" ) );
+                    bb.appendTimestamp( "lastmod" , maxVersion );
+                    bb.done();
+                }
+                preCond.append( b.obj() );
+                preCond.done();
+            }
+
+            //
+            // 4. apply the batch of updates to metadata and to the chunk manager
+            //
+
+            BSONObj cmd = cmdBuilder.obj();
+
+            LOG(1) << "splitChunk update: " << cmd << endl;
+
+            bool ok;
+            BSONObj cmdResult;
+            {
+                ScopedDbConnection conn( shardingState.getConfigServer() );
+                ok = conn->runCommand( "config" , cmd , cmdResult );
+                conn.done();
+            }
+
+            if ( ! ok ) {
+                stringstream ss;
+                ss << "saving chunks failed.  cmd: " << cmd << " result: " << cmdResult;
+                error() << ss.str() << endl;
+                msgasserted( 13593 , ss.str() ); // assert(13593)
+            }
+
+            // install a chunk manager with knowledge about newly split chunks in this shard's state
+            splitKeys.pop_back(); // 'max' was used as sentinel
+            maxVersion.incMinor();
+            shardingState.splitChunk( ns , min , max , splitKeys , maxVersion );
+
+            //
+            // 5. logChanges
+            //
+
+            // single splits are logged different than multisplits
+            if ( newChunks.size() == 2 ) {
+                newChunks[0].appendShortVersion( "left" , logDetail );
+                newChunks[1].appendShortVersion( "right" , logDetail );
+                configServer.logChange( "split" , ns , logDetail.obj() );
+
+            }
+            else {
+                BSONObj beforeDetailObj = logDetail.obj();
+                BSONObj firstDetailObj = beforeDetailObj.getOwned();
+                const int newChunksSize = newChunks.size();
+
+                for ( int i=0; i < newChunksSize; i++ ) {
+                    BSONObjBuilder chunkDetail;
+                    chunkDetail.appendElements( beforeDetailObj );
+                    chunkDetail.append( "number", i );
+                    chunkDetail.append( "of" , newChunksSize );
+                    newChunks[i].appendShortVersion( "chunk" , chunkDetail );
+                    configServer.logChange( "multi-split" , ns , chunkDetail.obj() );
+                }
+            }
+
+            return true;
+        }
+    } cmdSplitChunk;
+
 }  // namespace mongo
diff --git a/s/d_state.cpp b/s/d_state.cpp
index 3f13b79..11fbcef 100644
--- a/s/d_state.cpp
+++ b/s/d_state.cpp
@@ -1,4 +1,4 @@
-// d_state.cpp
+// @file d_state.cpp
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -44,12 +44,12 @@ using namespace std;
 namespace mongo {
 
     // -----ShardingState START ----
-    
+
     ShardingState::ShardingState()
-        : _enabled(false) , _mutex( "ShardingState" ){
+        : _enabled(false) , _mutex( "ShardingState" ) {
     }
-    
-    void ShardingState::enable( const string& server ){
+
+    void ShardingState::enable( const string& server ) {
         _enabled = true;
         assert( server.size() );
         if ( _configServer.size() == 0 )
@@ -58,69 +58,177 @@ namespace mongo {
             assert( server == _configServer );
         }
     }
-    
-    void ShardingState::gotShardName( const string& name ){
-        if ( _shardName.size() == 0 ){
+
+    void ShardingState::gotShardName( const string& name ) {
+        scoped_lock lk(_mutex);
+        if ( _shardName.size() == 0 ) {
+            // TODO SERVER-2299 verify the name is sound w.r.t IPs
             _shardName = name;
             return;
         }
-        
+
         if ( _shardName == name )
             return;
 
         stringstream ss;
-        ss << "gotShardName different than what i had before " 
-           << " before [" << _shardName << "] " 
-           << " got [" << name << "] " 
-            ;
+        ss << "gotShardName different than what i had before "
+           << " before [" << _shardName << "] "
+           << " got [" << name << "] "
+           ;
         uasserted( 13298 , ss.str() );
     }
-    
-    void ShardingState::gotShardHost( const string& host ){
-        if ( _shardHost.size() == 0 ){
+
+    void ShardingState::gotShardHost( string host ) {
+        scoped_lock lk(_mutex);
+        size_t slash = host.find( '/' );
+        if ( slash != string::npos )
+            host = host.substr( 0 , slash );
+
+        if ( _shardHost.size() == 0 ) {
             _shardHost = host;
             return;
         }
-        
+
         if ( _shardHost == host )
             return;
 
         stringstream ss;
-        ss << "gotShardHost different than what i had before " 
-           << " before [" << _shardHost << "] " 
-           << " got [" << host << "] " 
-            ;
+        ss << "gotShardHost different than what i had before "
+           << " before [" << _shardHost << "] "
+           << " got [" << host << "] "
+           ;
         uasserted( 13299 , ss.str() );
     }
-    
-    bool ShardingState::hasVersion( const string& ns ){
+
+    void ShardingState::resetShardingState() {
         scoped_lock lk(_mutex);
-        NSVersionMap::const_iterator i = _versions.find(ns);
-        return i != _versions.end();
+        
+        _enabled = false;
+        _configServer.clear();
+        _shardName.clear();
+        _shardHost.clear();
+        _chunks.clear();
     }
-    
-    bool ShardingState::hasVersion( const string& ns , ConfigVersion& version ){
+
+    // TODO we shouldn't need three ways for checking the version. Fix this.
+    bool ShardingState::hasVersion( const string& ns ) {
         scoped_lock lk(_mutex);
-        NSVersionMap::const_iterator i = _versions.find(ns);
-        if ( i == _versions.end() )
+
+        ChunkManagersMap::const_iterator it = _chunks.find(ns);
+        return it != _chunks.end();
+    }
+
+    bool ShardingState::hasVersion( const string& ns , ConfigVersion& version ) {
+        scoped_lock lk(_mutex);
+
+        ChunkManagersMap::const_iterator it = _chunks.find(ns);
+        if ( it == _chunks.end() )
             return false;
-        version = i->second;
+
+        ShardChunkManagerPtr p = it->second;
+        version = p->getVersion();
         return true;
     }
-    
-    ConfigVersion& ShardingState::getVersion( const string& ns ){
+
+    const ConfigVersion ShardingState::getVersion( const string& ns ) const {
         scoped_lock lk(_mutex);
-        return _versions[ns];
+
+        ChunkManagersMap::const_iterator it = _chunks.find( ns );
+        if ( it != _chunks.end() ) {
+            ShardChunkManagerPtr p = it->second;
+            return p->getVersion();
+        }
+        else {
+            return 0;
+        }
     }
-    
-    void ShardingState::setVersion( const string& ns , const ConfigVersion& version ){
-        scoped_lock lk(_mutex);
-        ConfigVersion& me = _versions[ns];
-        assert( version == 0 || version > me );
-        me = version;
+
+    void ShardingState::donateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ShardChunkVersion version ) {
+        scoped_lock lk( _mutex );
+
+        ChunkManagersMap::const_iterator it = _chunks.find( ns );
+        assert( it != _chunks.end() ) ;
+        ShardChunkManagerPtr p = it->second;
+
+        // empty shards should have version 0
+        version = ( p->getNumChunks() > 1 ) ? version : ShardChunkVersion( 0 , 0 );
+
+        ShardChunkManagerPtr cloned( p->cloneMinus( min , max , version ) );
+        _chunks[ns] = cloned;
+    }
+
+    void ShardingState::undoDonateChunk( const string& ns , const BSONObj& min , const BSONObj& max , ShardChunkVersion version ) {
+        scoped_lock lk( _mutex );
+
+        ChunkManagersMap::const_iterator it = _chunks.find( ns );
+        assert( it != _chunks.end() ) ;
+        ShardChunkManagerPtr p( it->second->clonePlus( min , max , version ) );
+        _chunks[ns] = p;
+    }
+
+    void ShardingState::splitChunk( const string& ns , const BSONObj& min , const BSONObj& max , const vector<BSONObj>& splitKeys ,
+                                    ShardChunkVersion version ) {
+        scoped_lock lk( _mutex );
+
+        ChunkManagersMap::const_iterator it = _chunks.find( ns );
+        assert( it != _chunks.end() ) ;
+        ShardChunkManagerPtr p( it->second->cloneSplit( min , max , splitKeys , version ) );
+        _chunks[ns] = p;
     }
 
-    void ShardingState::appendInfo( BSONObjBuilder& b ){
+    void ShardingState::resetVersion( const string& ns ) {
+        scoped_lock lk( _mutex );
+
+        _chunks.erase( ns );
+    }
+
+    bool ShardingState::trySetVersion( const string& ns , ConfigVersion& version /* IN-OUT */ ) {
+
+        // fast path - requested version is at the same version as this chunk manager
+        //
+        // cases:
+        //   + this shard updated the version for a migrate's commit (FROM side)
+        //     a client reloaded chunk state from config and picked the newest version
+        //   + two clients reloaded
+        //     one triggered the 'slow path' (below)
+        //     when the second's request gets here, the version is already current
+        {
+            scoped_lock lk( _mutex );
+            ChunkManagersMap::const_iterator it = _chunks.find( ns );
+            if ( it != _chunks.end() && it->second->getVersion() == version )
+                return true;
+        }
+
+        // slow path - requested version is different than the current chunk manager's, if one exists, so must check for
+        // newest version in the config server
+        //
+        // cases:
+        //   + a chunk moved TO here
+        //     (we don't bump up the version on the TO side but the commit to config does use higher version)
+        //     a client reloads from config an issued the request
+        //   + there was a take over from a secondary
+        //     the secondary had no state (managers) at all, so every client request will fall here
+        //   + a stale client request a version that's not current anymore
+
+        const string c = (_configServer == _shardHost) ? "" /* local */ : _configServer;
+        ShardChunkManagerPtr p( new ShardChunkManager( c , ns , _shardName ) );
+        {
+            scoped_lock lk( _mutex );
+
+            // since we loaded the chunk manager unlocked, other thread may have done the same
+            // make sure we keep the freshest config info only
+            ChunkManagersMap::const_iterator it = _chunks.find( ns );
+            if ( it == _chunks.end() || p->getVersion() >= it->second->getVersion() ) {
+                _chunks[ns] = p;
+            }
+
+            ShardChunkVersion oldVersion = version;
+            version = p->getVersion();
+            return oldVersion == version;
+        }
+    }
+
+    void ShardingState::appendInfo( BSONObjBuilder& b ) {
         b.appendBool( "enabled" , _enabled );
         if ( ! _enabled )
             return;
@@ -131,117 +239,56 @@ namespace mongo {
 
         {
             BSONObjBuilder bb( b.subobjStart( "versions" ) );
-            
+
             scoped_lock lk(_mutex);
-            for ( NSVersionMap::iterator i=_versions.begin(); i!=_versions.end(); ++i ){
-                bb.appendTimestamp( i->first.c_str() , i->second );
+
+            for ( ChunkManagersMap::iterator it = _chunks.begin(); it != _chunks.end(); ++it ) {
+                ShardChunkManagerPtr p = it->second;
+                bb.appendTimestamp( it->first , p->getVersion() );
             }
             bb.done();
         }
 
     }
 
-    ChunkMatcherPtr ShardingState::getChunkMatcher( const string& ns ){
+    bool ShardingState::needShardChunkManager( const string& ns ) const {
         if ( ! _enabled )
-            return ChunkMatcherPtr();
-        
-        if ( ! ShardedConnectionInfo::get( false ) )
-            return ChunkMatcherPtr();
+            return false;
 
-        ConfigVersion version;
-        {
-            scoped_lock lk( _mutex );
-            version = _versions[ns];
-            
-            if ( ! version )
-                return ChunkMatcherPtr();
-            
-            ChunkMatcherPtr p = _chunks[ns];
-            if ( p && p->_version >= version )
-                return p;                
-        }
+        if ( ! ShardedConnectionInfo::get( false ) )
+            return false;
 
-        BSONObj q;
-        {
-            BSONObjBuilder b;
-            b.append( "ns" , ns.c_str() );
-            b.append( "shard" , BSON( "$in" << BSON_ARRAY( _shardHost << _shardName ) ) );
-            q = b.obj();
-        }
+        return true;
+    }
 
-        auto_ptr<ScopedDbConnection> scoped;
-        auto_ptr<DBDirectClient> direct;
-        
-        DBClientBase * conn;
+    ShardChunkManagerPtr ShardingState::getShardChunkManager( const string& ns ) {
+        scoped_lock lk( _mutex );
 
-        if ( _configServer == _shardHost ){
-            direct.reset( new DBDirectClient() );
-            conn = direct.get();
+        ChunkManagersMap::const_iterator it = _chunks.find( ns );
+        if ( it == _chunks.end() ) {
+            return ShardChunkManagerPtr();
         }
         else {
-            scoped.reset( new ScopedDbConnection( _configServer ) );
-            conn = scoped->get();
+            return it->second;
         }
-
-        auto_ptr<DBClientCursor> cursor = conn->query( "config.chunks" , Query(q).sort( "min" ) );
-        assert( cursor.get() );
-        if ( ! cursor->more() ){
-            if ( scoped.get() )
-                scoped->done();
-            return ChunkMatcherPtr();
-        }
-        
-        ChunkMatcherPtr p( new ChunkMatcher( version ) );
-        
-        BSONObj min,max;
-        while ( cursor->more() ){
-            BSONObj d = cursor->next();
-            
-            if ( min.isEmpty() ){
-                min = d["min"].Obj().getOwned();
-                max = d["max"].Obj().getOwned();
-                continue;
-            }
-
-            if ( max == d["min"].Obj() ){
-                max = d["max"].Obj().getOwned();
-                continue;
-            }
-
-            p->gotRange( min.getOwned() , max.getOwned() );
-            min = d["min"].Obj().getOwned();
-            max = d["max"].Obj().getOwned();
-        }
-        assert( ! min.isEmpty() );
-        p->gotRange( min.getOwned() , max.getOwned() );
-        
-        if ( scoped.get() )
-            scoped->done();
-
-        { 
-            scoped_lock lk( _mutex );
-            _chunks[ns] = p;
-        }
-
-        return p;
     }
 
     ShardingState shardingState;
 
     // -----ShardingState END ----
-    
+
     // -----ShardedConnectionInfo START ----
 
     boost::thread_specific_ptr<ShardedConnectionInfo> ShardedConnectionInfo::_tl;
 
-    ShardedConnectionInfo::ShardedConnectionInfo(){
-        _forceMode = false;
+    ShardedConnectionInfo::ShardedConnectionInfo() {
+        _forceVersionOk = false;
         _id.clear();
     }
-    
-    ShardedConnectionInfo* ShardedConnectionInfo::get( bool create ){
+
+    ShardedConnectionInfo* ShardedConnectionInfo::get( bool create ) {
         ShardedConnectionInfo* info = _tl.get();
-        if ( ! info && create ){
+        if ( ! info && create ) {
             log(1) << "entering shard mode for connection" << endl;
             info = new ShardedConnectionInfo();
             _tl.reset( info );
@@ -249,44 +296,50 @@ namespace mongo {
         return info;
     }
 
-    void ShardedConnectionInfo::reset(){
+    void ShardedConnectionInfo::reset() {
         _tl.reset();
     }
 
-    ConfigVersion& ShardedConnectionInfo::getVersion( const string& ns ){
-        return _versions[ns];
+    const ConfigVersion ShardedConnectionInfo::getVersion( const string& ns ) const {
+        NSVersionMap::const_iterator it = _versions.find( ns );
+        if ( it != _versions.end() ) {
+            return it->second;
+        }
+        else {
+            return 0;
+        }
     }
-    
-    void ShardedConnectionInfo::setVersion( const string& ns , const ConfigVersion& version ){
+
+    void ShardedConnectionInfo::setVersion( const string& ns , const ConfigVersion& version ) {
         _versions[ns] = version;
     }
 
-    void ShardedConnectionInfo::setID( const OID& id ){
+    void ShardedConnectionInfo::setID( const OID& id ) {
         _id = id;
     }
 
     // -----ShardedConnectionInfo END ----
 
-    unsigned long long extractVersion( BSONElement e , string& errmsg ){
-        if ( e.eoo() ){
+    unsigned long long extractVersion( BSONElement e , string& errmsg ) {
+        if ( e.eoo() ) {
             errmsg = "no version";
             return 0;
         }
-        
+
         if ( e.isNumber() )
             return (unsigned long long)e.number();
-        
+
         if ( e.type() == Date || e.type() == Timestamp )
             return e._numberLong();
 
-        
+
         errmsg = "version is not a numeric type";
         return 0;
     }
 
     class MongodShardCommand : public Command {
     public:
-        MongodShardCommand( const char * n ) : Command( n ){
+        MongodShardCommand( const char * n ) : Command( n ) {
         }
         virtual bool slaveOk() const {
             return false;
@@ -295,12 +348,12 @@ namespace mongo {
             return true;
         }
     };
-    
-    
-    bool haveLocalShardingInfo( const string& ns ){
+
+
+    bool haveLocalShardingInfo( const string& ns ) {
         if ( ! shardingState.enabled() )
             return false;
-        
+
         if ( ! shardingState.hasVersion( ns ) )
             return false;
 
@@ -309,266 +362,332 @@ namespace mongo {
 
     class UnsetShardingCommand : public MongodShardCommand {
     public:
-        UnsetShardingCommand() : MongodShardCommand("unsetSharding"){}
+        UnsetShardingCommand() : MongodShardCommand("unsetSharding") {}
 
         virtual void help( stringstream& help ) const {
             help << " example: { unsetSharding : 1 } ";
         }
-        
-        virtual LockType locktype() const { return NONE; } 
- 
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+
+        virtual LockType locktype() const { return NONE; }
+
+        virtual bool slaveOk() const { return true; }
+
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             ShardedConnectionInfo::reset();
             return true;
-        } 
-    
+        }
+
     } unsetShardingCommand;
 
-    
     class SetShardVersion : public MongodShardCommand {
     public:
-        SetShardVersion() : MongodShardCommand("setShardVersion"){}
+        SetShardVersion() : MongodShardCommand("setShardVersion") {}
 
         virtual void help( stringstream& help ) const {
             help << " example: { setShardVersion : 'alleyinsider.foo' , version : 1 , configdb : '' } ";
         }
-        
-        virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock
- 
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
-            lastError.disableForCommand();
-            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );
-
-            bool authoritative = cmdObj.getBoolField( "authoritative" );
 
-            string configdb = cmdObj["configdb"].valuestrsafe();
-            { // configdb checking
-                if ( configdb.size() == 0 ){
-                    errmsg = "no configdb";
-                    return false;
-                }
+        virtual LockType locktype() const { return NONE; }
+        
+        bool checkConfigOrInit( const string& configdb , bool authoritative , string& errmsg , BSONObjBuilder& result , bool locked=false ) const {
+            if ( configdb.size() == 0 ) {
+                errmsg = "no configdb";
+                return false;
+            }
+            
+            if ( shardingState.enabled() ) {
+                if ( configdb == shardingState.getConfigServer() ) 
+                    return true;
                 
-                if ( shardingState.enabled() ){
-                    if ( configdb != shardingState.getConfigServer() ){
-                        errmsg = "specified a different configdb!";
-                        return false;
-                    }
-                }
-                else {
-                    if ( ! authoritative ){
-                        result.appendBool( "need_authoritative" , true );
-                        errmsg = "first setShardVersion";
-                        return false;
-                    }
-                    shardingState.enable( configdb );
-                    configServer.init( configdb );
-                }
+                result.append( "configdb" , BSON( "stored" << shardingState.getConfigServer() << 
+                                                  "given" << configdb ) );
+                errmsg = "specified a different configdb!";
+                return false;
             }
             
-            if ( cmdObj["shard"].type() == String ){
-                shardingState.gotShardName( cmdObj["shard"].String() );
-                shardingState.gotShardHost( cmdObj["shardHost"].String() );
+            if ( ! authoritative ) {
+                result.appendBool( "need_authoritative" , true );
+                errmsg = "first setShardVersion";
+                return false;
+            }
+            
+            if ( locked ) {
+                shardingState.enable( configdb );
+                configServer.init( configdb );
+                return true;
             }
 
-            { // setting up ids
-                if ( cmdObj["serverID"].type() != jstOID ){
-                    // TODO: fix this
-                    //errmsg = "need serverID to be an OID";
-                    //return 0;
-                }
-                else {
-                    OID clientId = cmdObj["serverID"].__oid();
-                    if ( ! info->hasID() ){
-                        info->setID( clientId );
-                    }
-                    else if ( clientId != info->getID() ){
-                        errmsg = "server id has changed!";
-                        return 0;
-                    }
-                }
+            dblock lk;
+            return checkConfigOrInit( configdb , authoritative , errmsg , result , true );
+        }
+        
+        bool checkMongosID( ShardedConnectionInfo* info, const BSONElement& id, string errmsg ) {
+            if ( id.type() != jstOID ) {
+                // TODO: fix this
+                //errmsg = "need serverID to be an OID";
+                //return 0;
+                return true;
+            }
+            
+            OID clientId = id.__oid();
+            if ( ! info->hasID() ) {
+                info->setID( clientId );
+                return true;
             }
             
-            unsigned long long version = extractVersion( cmdObj["version"] , errmsg );
+            if ( clientId != info->getID() ) {
+                errmsg = "server id has changed!";
+                return false;
+            }
+
+            return true;
+        }
+
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+
+            // Steps
+            // 1. check basic config
+            // 2. extract params from command
+            // 3. fast check
+            // 4. slow check (LOCKS)
+            
+            // step 1
 
-            if ( errmsg.size() ){
+            lastError.disableForCommand();
+            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );
+
+            bool authoritative = cmdObj.getBoolField( "authoritative" );
+            
+            // check config server is ok or enable sharding
+            if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
                 return false;
+
+            // check shard name/hosts are correct
+            if ( cmdObj["shard"].type() == String ) {
+                shardingState.gotShardName( cmdObj["shard"].String() );
+                shardingState.gotShardHost( cmdObj["shardHost"].String() );
             }
             
+            // make sure we have the mongos id for writebacks
+            if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) )
+                return false;
+
+            // step 2
+            
             string ns = cmdObj["setShardVersion"].valuestrsafe();
-            if ( ns.size() == 0 ){
-                errmsg = "need to speciy fully namespace";
+            if ( ns.size() == 0 ) {
+                errmsg = "need to speciy namespace";
                 return false;
             }
+
+            const ConfigVersion version = extractVersion( cmdObj["version"] , errmsg );
+            if ( errmsg.size() )
+                return false;
+            
+            // step 3
+
+            const ConfigVersion oldVersion = info->getVersion(ns);
+            const ConfigVersion globalVersion = shardingState.getVersion(ns);
+
+            result.appendTimestamp( "oldVersion" , oldVersion );
             
-            ConfigVersion& oldVersion = info->getVersion(ns);
-            ConfigVersion& globalVersion = shardingState.getVersion(ns);
+            if ( globalVersion > 0 && version > 0 ) {
+                // this means there is no reset going on an either side
+                // so its safe to make some assuptions
+
+                if ( version == globalVersion ) {
+                    // mongos and mongod agree!
+                    if ( oldVersion != version ) {
+                        assert( oldVersion < globalVersion );
+                        info->setVersion( ns , version );
+                    }
+                    return true;
+                }
+                
+            }
+
+            // step 4
+            dblock setShardVersionLock; // TODO: can we get rid of this??
             
-            if ( oldVersion > 0 && globalVersion == 0 ){
+            if ( oldVersion > 0 && globalVersion == 0 ) {
                 // this had been reset
-                oldVersion = 0;
+                info->setVersion( ns , 0 );
             }
 
-            if ( version == 0 && globalVersion == 0 ){
+            if ( version == 0 && globalVersion == 0 ) {
                 // this connection is cleaning itself
-                oldVersion = 0;
-                return 1;
+                info->setVersion( ns , 0 );
+                return true;
             }
 
-            if ( version == 0 && globalVersion > 0 ){
-                if ( ! authoritative ){
+            if ( version == 0 && globalVersion > 0 ) {
+                if ( ! authoritative ) {
                     result.appendBool( "need_authoritative" , true );
+                    result.append( "ns" , ns );
                     result.appendTimestamp( "globalVersion" , globalVersion );
-                    result.appendTimestamp( "oldVersion" , oldVersion );
                     errmsg = "dropping needs to be authoritative";
-                    return 0;
+                    return false;
                 }
                 log() << "wiping data for: " << ns << endl;
                 result.appendTimestamp( "beforeDrop" , globalVersion );
                 // only setting global version on purpose
                 // need clients to re-find meta-data
-                globalVersion = 0;
-                oldVersion = 0;
-                return 1;
+                shardingState.resetVersion( ns );
+                info->setVersion( ns , 0 );
+                return true;
             }
 
-            if ( version < oldVersion ){
-                errmsg = "you already have a newer version";
-                result.appendTimestamp( "oldVersion" , oldVersion );
+            if ( version < oldVersion ) {
+                errmsg = "you already have a newer version of collection '" + ns + "'";
+                result.append( "ns" , ns );
                 result.appendTimestamp( "newVersion" , version );
                 result.appendTimestamp( "globalVersion" , globalVersion );
                 return false;
             }
-            
-            if ( version < globalVersion ){
-                while ( shardingState.inCriticalMigrateSection() ){
+
+            if ( version < globalVersion ) {
+                while ( shardingState.inCriticalMigrateSection() ) {
                     dbtemprelease r;
                     sleepmillis(2);
-                    log() << "waiting till out of critical section" << endl;
+                    OCCASIONALLY log() << "waiting till out of critical section" << endl;
                 }
-                errmsg = "going to older version for global";
+                errmsg = "going to older version for global for collection '" + ns + "'";
+                result.append( "ns" , ns );
                 result.appendTimestamp( "version" , version );
                 result.appendTimestamp( "globalVersion" , globalVersion );
                 return false;
             }
-            
-            if ( globalVersion == 0 && ! cmdObj.getBoolField( "authoritative" ) ){
+
+            if ( globalVersion == 0 && ! authoritative ) {
                 // need authoritative for first look
-                result.appendBool( "need_authoritative" , true );
                 result.append( "ns" , ns );
-                errmsg = "first time for this ns";
+                result.appendBool( "need_authoritative" , true );
+                errmsg = "first time for collection '" + ns + "'";
                 return false;
             }
 
+            Timer relockTime;
             {
                 dbtemprelease unlock;
-                shardingState.getChunkMatcher( ns );
+
+                ShardChunkVersion currVersion = version;
+                if ( ! shardingState.trySetVersion( ns , currVersion ) ) {
+                    errmsg = str::stream() << "client version differs from config's for colleciton '" << ns << "'";
+                    result.append( "ns" , ns );
+                    result.appendTimestamp( "version" , version );
+                    result.appendTimestamp( "globalVersion" , currVersion );
+                    return false;
+                }
             }
+            if ( relockTime.millis() >= ( cmdLine.slowMS - 10 ) ) {
+                log() << "setShardVersion - relocking slow: " << relockTime.millis() << endl;
+            }
+            
+            info->setVersion( ns , version );
+            return true;
+        }
 
-            result.appendTimestamp( "oldVersion" , oldVersion );
-            oldVersion = version;
-            globalVersion = version;
+    } setShardVersionCmd;
 
-            result.append( "ok" , 1 );
-            return 1;
-        }
-        
-    } setShardVersion;
-    
     class GetShardVersion : public MongodShardCommand {
     public:
-        GetShardVersion() : MongodShardCommand("getShardVersion"){}
+        GetShardVersion() : MongodShardCommand("getShardVersion") {}
 
         virtual void help( stringstream& help ) const {
             help << " example: { getShardVersion : 'alleyinsider.foo'  } ";
         }
-        
-        virtual LockType locktype() const { return NONE; } 
 
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        virtual LockType locktype() const { return NONE; }
+
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             string ns = cmdObj["getShardVersion"].valuestrsafe();
-            if ( ns.size() == 0 ){
+            if ( ns.size() == 0 ) {
                 errmsg = "need to speciy fully namespace";
                 return false;
             }
-            
+
             result.append( "configServer" , shardingState.getConfigServer() );
 
             result.appendTimestamp( "global" , shardingState.getVersion(ns) );
-            
+
             ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );
             if ( info )
                 result.appendTimestamp( "mine" , info->getVersion(ns) );
-            else 
+            else
                 result.appendTimestamp( "mine" , 0 );
-            
+
             return true;
         }
-        
+
     } getShardVersion;
 
     class ShardingStateCmd : public MongodShardCommand {
     public:
-        ShardingStateCmd() : MongodShardCommand( "shardingState" ){}
+        ShardingStateCmd() : MongodShardCommand( "shardingState" ) {}
 
         virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock
 
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){        
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
             shardingState.appendInfo( result );
             return true;
         }
-        
+
     } shardingStateCmd;
 
     /**
      * @ return true if not in sharded mode
                      or if version for this client is ok
      */
-    bool shardVersionOk( const string& ns , bool isWriteOp , string& errmsg ){
+    bool shardVersionOk( const string& ns , bool isWriteOp , string& errmsg ) {
         if ( ! shardingState.enabled() )
             return true;
 
         ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );
 
-        if ( ! info ){
+        if ( ! info ) {
             // this means the client has nothing sharded
             // so this allows direct connections to do whatever they want
             // which i think is the correct behavior
             return true;
         }
-        
-        if ( info->inForceMode() ){
+
+        if ( info->inForceVersionOkMode() ) {
             return true;
         }
 
-        ConfigVersion version;    
-        if ( ! shardingState.hasVersion( ns , version ) ){
+        // TODO
+        //   all collections at some point, be sharded or not, will have a version (and a ShardChunkManager)
+        //   for now, we remove the sharding state of dropped collection
+        //   so delayed request may come in. This has to be fixed.
+        ConfigVersion clientVersion = info->getVersion(ns);
+        ConfigVersion version;
+        if ( ! shardingState.hasVersion( ns , version ) && clientVersion == 0 ) {
             return true;
         }
 
-        ConfigVersion clientVersion = info->getVersion(ns);
 
-        if ( version == 0 && clientVersion > 0 ){
+        if ( version == 0 && clientVersion > 0 ) {
             stringstream ss;
             ss << "collection was dropped or this shard no longer valied version: " << version << " clientVersion: " << clientVersion;
             errmsg = ss.str();
             return false;
         }
-        
+
         if ( clientVersion >= version )
             return true;
-        
 
-        if ( clientVersion == 0 ){
+
+        if ( clientVersion == 0 ) {
             stringstream ss;
             ss << "client in sharded mode, but doesn't have version set for this collection: " << ns << " myVersion: " << version;
             errmsg = ss.str();
             return false;
         }
 
-        if ( isWriteOp && version.majorVersion() == clientVersion.majorVersion() ){
-            // this means there was just a split 
+        if ( version.majorVersion() == clientVersion.majorVersion() ) {
+            // this means there was just a split
             // since on a split w/o a migrate this server is ok
-            // going to accept write
+            // going to accept 
             return true;
         }
 
@@ -578,51 +697,4 @@ namespace mongo {
         return false;
     }
 
-    // --- ChunkMatcher ---
-
-    ChunkMatcher::ChunkMatcher( ConfigVersion version )
-        : _version( version ){
-
-    }
-
-    void ChunkMatcher::gotRange( const BSONObj& min , const BSONObj& max ){
-        if (_key.isEmpty()){
-            BSONObjBuilder b;
-
-            BSONForEach(e, min) {
-                b.append(e.fieldName(), 1);
-            }
-
-            _key = b.obj();
-        }
-
-        //TODO debug mode only?
-        assert(min.nFields() == _key.nFields());
-        assert(max.nFields() == _key.nFields());
-
-        _map[min] = make_pair(min,max);
-    }
-
-    bool ChunkMatcher::belongsToMe( const BSONObj& key , const DiskLoc& loc ) const {
-        if ( _map.size() == 0 )
-            return false;
-        
-        BSONObj x = loc.obj().extractFields(_key);
-        
-        MyMap::const_iterator a = _map.upper_bound( x );
-        if ( a != _map.begin() )
-            a--;
-        
-        bool good = x.woCompare( a->second.first ) >= 0 && x.woCompare( a->second.second ) < 0;
-#if 0
-        if ( ! good ){
-            cout << "bad: " << x << "\t" << a->second.first << "\t" << x.woCompare( a->second.first ) << "\t" << x.woCompare( a->second.second ) << endl;
-            for ( MyMap::const_iterator i=_map.begin(); i!=_map.end(); ++i ){
-                cout << "\t" << i->first << "\t" << i->second.first << "\t" << i->second.second << endl;
-            }
-        }
-#endif
-        return good;
-    }
-    
 }
diff --git a/s/d_util.cpp b/s/d_util.cpp
deleted file mode 100644
index a750fbc..0000000
--- a/s/d_util.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// util.cpp
-
-/**
-*    Copyright (C) 2008 10gen Inc.
-*
-*    This program is free software: you can redistribute it and/or  modify
-*    it under the terms of the GNU Affero General Public License, version 3,
-*    as published by the Free Software Foundation.
-*
-*    This program is distributed in the hope that it will be useful,
-*    but WITHOUT ANY WARRANTY; without even the implied warranty of
-*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-*    GNU Affero General Public License for more details.
-*
-*    You should have received a copy of the GNU Affero General Public License
-*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-
-/**
-   these are commands that live in mongod
-   mostly around shard management and checking
- */
-
-#include "pch.h"
-#include "util.h"
-
-using namespace std;
-
-namespace mongo {
-
-    bool checkShardVersion( DBClientBase & conn , const string& ns , bool authoritative , int tryNumber ){
-        // no-op in mongod
-        return false;
-    }
-    
-    void resetShardVersion( DBClientBase * conn ){
-        // no-op in mongod
-    }
-
-}
diff --git a/s/d_writeback.cpp b/s/d_writeback.cpp
index a18e5d5..401e0aa 100644
--- a/s/d_writeback.cpp
+++ b/s/d_writeback.cpp
@@ -19,62 +19,105 @@
 #include "pch.h"
 
 #include "../db/commands.h"
-#include "../db/jsobj.h"
-#include "../db/dbmessage.h"
-#include "../db/query.h"
-
-#include "../client/connpool.h"
-
 #include "../util/queue.h"
 
-#include "shard.h"
+#include "d_writeback.h"
 
 using namespace std;
 
 namespace mongo {
 
-    map< string , BlockingQueue<BSONObj>* > writebackQueue;
-    mongo::mutex writebackQueueLock("sharding:writebackQueueLock");
+    // ---------- WriteBackManager class ----------
+
+    // TODO init at mongod startup
+    WriteBackManager writeBackManager;
+
+    WriteBackManager::WriteBackManager() : _writebackQueueLock("sharding:writebackQueueLock") {
+    }
+
+    WriteBackManager::~WriteBackManager() {
+    }
+
+    void WriteBackManager::queueWriteBack( const string& remote , const BSONObj& o ) {
+        getWritebackQueue( remote )->push( o );
+    }
 
-    BlockingQueue<BSONObj>* getWritebackQueue( const string& remote ){
-        scoped_lock lk (writebackQueueLock );
-        BlockingQueue<BSONObj>*& q = writebackQueue[remote];
+    BlockingQueue<BSONObj>* WriteBackManager::getWritebackQueue( const string& remote ) {
+        scoped_lock lk ( _writebackQueueLock );
+        BlockingQueue<BSONObj>*& q = _writebackQueues[remote];
         if ( ! q )
             q = new BlockingQueue<BSONObj>();
         return q;
     }
-    
-    void queueWriteBack( const string& remote , const BSONObj& o ){
-        getWritebackQueue( remote )->push( o );
+
+    bool WriteBackManager::queuesEmpty() const {
+        scoped_lock lk( _writebackQueueLock );
+        for ( WriteBackQueuesMap::const_iterator it = _writebackQueues.begin(); it != _writebackQueues.end(); ++it ) {
+            const BlockingQueue<BSONObj>* queue = it->second;
+            if (! queue->empty() ) {
+                return false;
+            }
+        }
+        return true;
     }
 
+    // ---------- admin commands ----------
+
     // Note, this command will block until there is something to WriteBack
     class WriteBackCommand : public Command {
     public:
-        virtual LockType locktype() const { return NONE; } 
+        virtual LockType locktype() const { return NONE; }
         virtual bool slaveOk() const { return true; }
         virtual bool adminOnly() const { return true; }
-        
-        WriteBackCommand() : Command( "writebacklisten" ){}
+
+        WriteBackCommand() : Command( "writebacklisten" ) {}
 
         void help(stringstream& h) const { h<<"internal"; }
 
-        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
 
             BSONElement e = cmdObj.firstElement();
-            if ( e.type() != jstOID ){
+            if ( e.type() != jstOID ) {
                 errmsg = "need oid as first value";
                 return 0;
             }
-            
+
+            // get the command issuer's (a mongos) serverID
             const OID id = e.__oid();
-            BSONObj z = getWritebackQueue(id.str())->blockingPop();
-            log(1) << "WriteBackCommand got : " << z << endl;
-            
-            result.append( "data" , z );
-            
+
+            // the command issuer is blocked awaiting a response
+            // we want to do return at least at every 5 minutes so sockets don't timeout
+            BSONObj z;
+            if ( writeBackManager.getWritebackQueue(id.str())->blockingPop( z, 5 * 60 /* 5 minutes */ ) ) {
+                log(1) << "WriteBackCommand got : " << z << endl;
+                result.append( "data" , z );
+            }
+            else {
+                result.appendBool( "noop" , true );
+            }
+
             return true;
         }
     } writeBackCommand;
 
-}
+    class WriteBacksQueuedCommand : public Command {
+    public:
+        virtual LockType locktype() const { return NONE; }
+        virtual bool slaveOk() const { return true; }
+        virtual bool adminOnly() const { return true; }
+
+        WriteBacksQueuedCommand() : Command( "writeBacksQueued" ) {}
+
+        void help(stringstream& help) const {
+            help << "Returns whether there are operations in the writeback queue at the time the command was called. "
+                 << "This is an internal comand";
+        }
+
+        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+            result.appendBool( "hasOpsQueued" , ! writeBackManager.queuesEmpty() );
+            return true;
+        }
+
+    } writeBacksQueuedCommand;
+
+}  // namespace mongo
diff --git a/s/d_writeback.h b/s/d_writeback.h
new file mode 100644
index 0000000..32f5b1c
--- /dev/null
+++ b/s/d_writeback.h
@@ -0,0 +1,75 @@
+// @file d_writeback.h
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../pch.h"
+
+#include "../util/queue.h"
+
+namespace mongo {
+
+    /*
+     * The WriteBackManager keeps one queue of pending operations per mongos. The operations get here
+     * if they were directed to a chunk that is no longer in this mongod server. The operations are
+     * "written back" to the mongos server per its request (command 'writebacklisten').
+     *
+     * The class is thread safe.
+     */
+    class WriteBackManager {
+    public:
+        WriteBackManager();
+        ~WriteBackManager();
+
+        /*
+         * @param remote server ID this operation came from
+         * @param op the operation itself
+         *
+         * Enqueues opeartion 'op' in server 'remote's queue. The operation will be written back to
+         * remote at a later stager.
+         */
+        void queueWriteBack( const string& remote , const BSONObj& op );
+
+        /*
+         * @param remote server ID
+         * @return the queue for operations that came from 'remote'
+         *
+         * Gets access to server 'remote's queue, which is synchronized.
+         */
+        BlockingQueue<BSONObj>* getWritebackQueue( const string& remote );
+
+        /*
+         * @return true if there is no operation queued for write back
+         */
+        bool queuesEmpty() const;
+
+    private:
+        // a map from mongos's serverIDs to queues of "rejected" operations
+        // an operation is rejected if it targets data that does not live on this shard anymore
+        typedef map< string , BlockingQueue<BSONObj>* > WriteBackQueuesMap;
+
+        // '_writebackQueueLock' protects only the map itself, since each queue is syncrhonized.
+        mutable mongo::mutex _writebackQueueLock;
+        WriteBackQueuesMap _writebackQueues;
+
+    };
+
+    // TODO collect global state in a central place and init during startup
+    extern WriteBackManager writeBackManager;
+
+} // namespace mongo
diff --git a/s/dbgrid.vcproj b/s/dbgrid.vcproj
deleted file mode 100644
index 745d84e..0000000
--- a/s/dbgrid.vcproj
+++ /dev/null
@@ -1,1048 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
-	ProjectType="Visual C++"
-	Version="9.00"
-	Name="mongos"
-	ProjectGUID="{E03717ED-69B4-4D21-BC55-DF6690B585C6}"
-	RootNamespace="dbgrid"
-	Keyword="Win32Proj"
-	TargetFrameworkVersion="196613"
-	>
-	<Platforms>
-		<Platform
-			Name="Win32"
-		/>
-	</Platforms>
-	<ToolFiles>
-	</ToolFiles>
-	<Configurations>
-		<Configuration
-			Name="Debug|Win32"
-			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
-			IntermediateDirectory="$(ConfigurationName)"
-			ConfigurationType="1"
-			CharacterSet="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="0"
-				AdditionalIncludeDirectories="&quot;..\..\js\src&quot;;&quot;..\pcre-7.4&quot;;&quot;C:\Program Files\boost\latest&quot;;c:\boost;\boost"
-				PreprocessorDefinitions="MONGO_EXPOSE_MACROS;XP_WIN;OLDJS;STATIC_JS_API;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC"
-				MinimalRebuild="true"
-				BasicRuntimeChecks="3"
-				RuntimeLibrary="3"
-				UsePrecompiledHeader="2"
-				PrecompiledHeaderThrough="pch.h"
-				WarningLevel="3"
-				DebugInformationFormat="4"
-				DisableSpecificWarnings="4355;4800"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				AdditionalDependencies="ws2_32.lib Psapi.lib"
-				LinkIncremental="2"
-				AdditionalLibraryDirectories="&quot;c:\program files\boost\latest\lib&quot;;c:\boost\lib;\boost\lib"
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				TargetMachine="1"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-		<Configuration
-			Name="Release|Win32"
-			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
-			IntermediateDirectory="$(ConfigurationName)"
-			ConfigurationType="1"
-			CharacterSet="1"
-			WholeProgramOptimization="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="2"
-				EnableIntrinsicFunctions="true"
-				AdditionalIncludeDirectories="&quot;..\..\js\src&quot;;&quot;..\pcre-7.4&quot;;&quot;C:\Program Files\boost\latest&quot;;c:\boost;\boost"
-				PreprocessorDefinitions="MONGO_EXPOSE_MACROS;OLDJS;STATIC_JS_API;XP_WIN;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;HAVE_CONFIG_H;PCRE_STATIC"
-				RuntimeLibrary="2"
-				EnableFunctionLevelLinking="true"
-				UsePrecompiledHeader="2"
-				PrecompiledHeaderThrough="pch.h"
-				WarningLevel="3"
-				DebugInformationFormat="3"
-				DisableSpecificWarnings="4355;4800"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				AdditionalDependencies="ws2_32.lib Psapi.lib"
-				LinkIncremental="1"
-				AdditionalLibraryDirectories="&quot;c:\program files\boost\latest\lib&quot;;c:\boost\lib;\boost\lib"
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				OptimizeReferences="2"
-				EnableCOMDATFolding="2"
-				TargetMachine="1"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-	</Configurations>
-	<References>
-	</References>
-	<Files>
-		<Filter
-			Name="Source Files"
-			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
-			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
-			>
-			<File
-				RelativePath=".\balance.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\balancer_policy.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\chunk.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\commands_admin.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\commands_public.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\config.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\config_migrate.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\cursors.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\queryutil.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\request.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\s_only.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\server.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\shard.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\shardconnection.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\shardkey.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\stats.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\strategy.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\strategy_shard.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\strategy_single.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\utils.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Header Files"
-			Filter="h;hpp;hxx;hm;inl;inc;xsd"
-			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
-			>
-			<File
-				RelativePath=".\balancer_policy.h"
-				>
-			</File>
-			<File
-				RelativePath=".\shard.h"
-				>
-			</File>
-			<File
-				RelativePath=".\strategy.h"
-				>
-			</File>
-			<Filter
-				Name="Header Shared"
-				>
-				<File
-					RelativePath="..\util\background.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\commands.h"
-					>
-				</File>
-				<File
-					RelativePath="..\client\connpool.cpp"
-					>
-				</File>
-				<File
-					RelativePath="..\db\dbmessage.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\goodies.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\jsobj.h"
-					>
-				</File>
-				<File
-					RelativePath="..\db\json.h"
-					>
-				</File>
-				<File
-					RelativePath="..\pch.h"
-					>
-				</File>
-				<File
-					RelativePath="..\util\text.h"
-					>
-				</File>
-			</Filter>
-		</Filter>
-		<Filter
-			Name="Resource Files"
-			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
-			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
-			>
-		</Filter>
-		<Filter
-			Name="libs_etc"
-			>
-			<File
-				RelativePath="..\..\boostw\boost_1_34_1\boost\config\auto_link.hpp"
-				>
-			</File>
-			<File
-				RelativePath="..\..\js\src\js.lib"
-				>
-			</File>
-			<File
-				RelativePath="..\pcre-7.4\pcrecpp.cc"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\..\boostw\boost_1_34_1\boost\version.hpp"
-				>
-			</File>
-			<Filter
-				Name="pcre"
-				>
-				<File
-					RelativePath="..\pcre-7.4\config.h"
-					>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre.h"
-					>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_chartables.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_compile.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_config.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_dfa_exec.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_exec.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_fullinfo.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_get.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_globals.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_info.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_maketables.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_newline.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_ord2utf8.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_refcount.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_scanner.cc"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_stringpiece.cc"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_study.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_tables.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_try_flipped.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_ucp_searchfuncs.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_valid_utf8.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_version.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcre_xclass.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\pcre-7.4\pcreposix.c"
-					>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCLCompilerTool"
-							UsePrecompiledHeader="0"
-						/>
-					</FileConfiguration>
-				</File>
-			</Filter>
-		</Filter>
-		<Filter
-			Name="client"
-			>
-			<File
-				RelativePath="..\client\connpool.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\dbclient.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\dbclient.h"
-				>
-			</File>
-			<File
-				RelativePath="..\client\dbclientcursor.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\model.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\model.h"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Shared Source Files"
-			>
-			<File
-				RelativePath="..\util\assert_util.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\background.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\base64.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\cmdline.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\commands.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\stats\counters.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\debug_util.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\engine.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\scripting\engine_spidermonkey.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\indexkey.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\jsobj.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\json.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\lasterror.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\db\matcher.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\md5.c"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\util\md5main.cpp"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="2"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\util\message.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\message_server_port.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\mmap.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\mmap_win.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\shell\mongo_vstudio.cpp"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCLCompilerTool"
-						UsePrecompiledHeader="0"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\db\nonce.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\parallel.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\processinfo_win32.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\sock.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\client\syncclusterconnection.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\thread_pool.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\text.cpp"
-				>
-			</File>
-			<File
-				RelativePath="..\util\util.cpp"
-				>
-			</File>
-		</Filter>
-		<File
-			RelativePath="..\pch.cpp"
-			>
-			<FileConfiguration
-				Name="Debug|Win32"
-				>
-				<Tool
-					Name="VCCLCompilerTool"
-					UsePrecompiledHeader="1"
-				/>
-			</FileConfiguration>
-			<FileConfiguration
-				Name="Release|Win32"
-				>
-				<Tool
-					Name="VCCLCompilerTool"
-					UsePrecompiledHeader="1"
-					PrecompiledHeaderThrough="pch.h"
-				/>
-			</FileConfiguration>
-		</File>
-	</Files>
-	<Globals>
-	</Globals>
-</VisualStudioProject>
diff --git a/s/dbgrid.vcxproj b/s/dbgrid.vcxproj
index 83fbf68..61a8458 100644
--- a/s/dbgrid.vcxproj
+++ b/s/dbgrid.vcxproj
@@ -85,6 +85,10 @@
     <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
     <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;$(IncludePath)</IncludePath>
+    <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;$(IncludePath)</IncludePath>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
@@ -183,20 +187,31 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
+    <ClCompile Include="..\bson\oid.cpp" />
     <ClCompile Include="..\client\dbclientcursor.cpp" />
+    <ClCompile Include="..\client\dbclient_rs.cpp" />
     <ClCompile Include="..\client\distlock.cpp" />
     <ClCompile Include="..\db\dbwebserver.cpp" />
+    <ClCompile Include="..\db\security_key.cpp" />
+    <ClCompile Include="..\scripting\bench.cpp" />
+    <ClCompile Include="..\util\alignedbuilder.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="..\util\concurrency\spin_lock.cpp" />
+    <ClCompile Include="..\util\concurrency\task.cpp" />
     <ClCompile Include="..\util\concurrency\thread_pool.cpp" />
     <ClCompile Include="..\util\concurrency\vars.cpp" />
     <ClCompile Include="..\util\log.cpp" />
     <ClCompile Include="..\util\miniwebserver.cpp" />
     <ClCompile Include="..\util\processinfo.cpp" />
+    <ClCompile Include="..\util\signal_handlers.cpp" />
     <ClCompile Include="..\util\stringutils.cpp" />
     <ClCompile Include="..\util\text.cpp" />
     <ClCompile Include="..\util\version.cpp" />
     <ClCompile Include="balance.cpp" />
     <ClCompile Include="balancer_policy.cpp" />
     <ClCompile Include="chunk.cpp" />
+    <ClCompile Include="client.cpp" />
     <ClCompile Include="commands_admin.cpp" />
     <ClCompile Include="commands_public.cpp" />
     <ClCompile Include="config.cpp" />
@@ -212,6 +227,7 @@
     <ClCompile Include="grid.cpp" />
     <ClCompile Include="request.cpp" />
     <ClCompile Include="shardconnection.cpp" />
+    <ClCompile Include="shard_version.cpp" />
     <ClCompile Include="s_only.cpp" />
     <ClCompile Include="server.cpp" />
     <ClCompile Include="shard.cpp" />
@@ -512,9 +528,11 @@
     <ClCompile Include="..\util\sock.cpp" />
     <ClCompile Include="..\client\syncclusterconnection.cpp" />
     <ClCompile Include="..\util\util.cpp" />
+    <ClCompile Include="writeback_listener.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\util\processinfo.h" />
+    <ClInclude Include="..\util\signal_handlers.h" />
     <ClInclude Include="..\util\version.h" />
     <ClInclude Include="balancer_policy.h" />
     <ClInclude Include="grid.h" />
diff --git a/s/dbgrid.vcxproj.filters b/s/dbgrid.vcxproj.filters
index bce75b4..b87a1f2 100755
--- a/s/dbgrid.vcxproj.filters
+++ b/s/dbgrid.vcxproj.filters
@@ -287,6 +287,39 @@
     <ClCompile Include="..\util\miniwebserver.cpp">
       <Filter>Shared Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\util\concurrency\task.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\util\signal_handlers.cpp">
+      <Filter>Shared Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\scripting\bench.cpp">
+      <Filter>Shared Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="writeback_listener.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="shard_version.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\util\concurrency\spin_lock.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\util\alignedbuilder.cpp">
+      <Filter>Shared Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\bson\oid.cpp">
+      <Filter>Shared Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\client\dbclient_rs.cpp">
+      <Filter>client</Filter>
+    </ClCompile>
+    <ClCompile Include="client.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\db\security_key.cpp">
+      <Filter>Shared Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="gridconfig.h">
@@ -355,6 +388,9 @@
     <ClInclude Include="..\util\processinfo.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\util\signal_handlers.h">
+      <Filter>Shared Source Files</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <Library Include="..\..\js\js32d.lib" />
diff --git a/s/grid.cpp b/s/grid.cpp
index 443cd9a..0045754 100644
--- a/s/grid.cpp
+++ b/s/grid.cpp
@@ -19,46 +19,47 @@
 #include "pch.h"
 
 #include <iomanip>
-
 #include "../client/connpool.h"
 #include "../util/stringutils.h"
+#include "../util/unittest.h"
 
 #include "grid.h"
 #include "shard.h"
 
 namespace mongo {
-    
-    DBConfigPtr Grid::getDBConfig( string database , bool create , const string& shardNameHint ){
+
+    DBConfigPtr Grid::getDBConfig( string database , bool create , const string& shardNameHint ) {
         {
             string::size_type i = database.find( "." );
             if ( i != string::npos )
                 database = database.substr( 0 , i );
         }
-        
+
         if ( database == "config" )
             return configServerPtr;
 
         scoped_lock l( _lock );
 
         DBConfigPtr& cc = _databases[database];
-        if ( !cc ){
+        if ( !cc ) {
             cc.reset(new DBConfig( database ));
-            if ( ! cc->load() ){
-                if ( create ){
+            if ( ! cc->load() ) {
+                if ( create ) {
                     // note here that cc->primary == 0.
                     log() << "couldn't find database [" << database << "] in config db" << endl;
-                    
-                    { // lets check case
+
+                    {
+                        // lets check case
                         ScopedDbConnection conn( configServer.modelServer() );
                         BSONObjBuilder b;
                         b.appendRegex( "_id" , (string)"^" + database + "$" , "i" );
                         BSONObj d = conn->findOne( ShardNS::database , b.obj() );
                         conn.done();
 
-                        if ( ! d.isEmpty() ){
+                        if ( ! d.isEmpty() ) {
                             cc.reset();
                             stringstream ss;
-                            ss <<  "can't have 2 databases that just differ on case " 
+                            ss <<  "can't have 2 databases that just differ on case "
                                << " have: " << d["_id"].String()
                                << " want to add: " << database;
 
@@ -67,20 +68,22 @@ namespace mongo {
                     }
 
                     Shard primary;
-                    if ( database == "admin" ){
+                    if ( database == "admin" ) {
                         primary = configServer.getPrimary();
 
-                    } else if ( shardNameHint.empty() ){
+                    }
+                    else if ( shardNameHint.empty() ) {
                         primary = Shard::pick();
 
-                    } else {
+                    }
+                    else {
                         // use the shard name if provided
                         Shard shard;
                         shard.reset( shardNameHint );
                         primary = shard;
                     }
 
-                    if ( primary.ok() ){
+                    if ( primary.ok() ) {
                         cc->setPrimary( primary.getName() ); // saves 'cc' to configDB
                         log() << "\t put [" << database << "] on: " << primary << endl;
                     }
@@ -94,53 +97,63 @@ namespace mongo {
                     cc.reset();
                 }
             }
-            
+
         }
-        
+
         return cc;
     }
 
-    void Grid::removeDB( string database ){
+    void Grid::removeDB( string database ) {
         uassert( 10186 ,  "removeDB expects db name" , database.find( '.' ) == string::npos );
         scoped_lock l( _lock );
         _databases.erase( database );
-        
+
     }
 
     bool Grid::allowLocalHost() const {
         return _allowLocalShard;
     }
 
-    void Grid::setAllowLocalHost( bool allow ){
+    void Grid::setAllowLocalHost( bool allow ) {
         _allowLocalShard = allow;
     }
 
-    bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ){
+    bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) {
         // name can be NULL, so privide a dummy one here to avoid testing it elsewhere
         string nameInternal;
         if ( ! name ) {
             name = &nameInternal;
         }
 
-        // Check whether the host (or set) exists and run several sanity checks on this request. 
+        // Check whether the host (or set) exists and run several sanity checks on this request.
         // There are two set of sanity checks: making sure adding this particular shard is consistent
-        // with the replica set state (if it exists) and making sure this shards databases can be 
+        // with the replica set state (if it exists) and making sure this shards databases can be
         // brought into the grid without conflict.
 
         vector<string> dbNames;
         try {
             ScopedDbConnection newShardConn( servers );
             newShardConn->getLastError();
-            
-            if ( newShardConn->type() == ConnectionString::SYNC ){
+
+            if ( newShardConn->type() == ConnectionString::SYNC ) {
                 newShardConn.done();
                 errMsg = "can't use sync cluster as a shard.  for replica set, have to use <setname>/<server1>,<server2>,...";
                 return false;
             }
 
+            BSONObj resIsMongos;
+            bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos );
+
+            // should return ok=0, cmd not found if it's a normal mongod
+            if ( ok ) {
+                errMsg = "can't add a mongos process as a shard";
+                newShardConn.done();
+                return false;
+            }
+
             BSONObj resIsMaster;
-            bool ok =  newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster );
-            if ( !ok ){
+            ok =  newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster );
+            if ( !ok ) {
                 ostringstream ss;
                 ss << "failed running isMaster: " << resIsMaster;
                 errMsg = ss.str();
@@ -151,7 +164,7 @@ namespace mongo {
             // if the shard has only one host, make sure it is not part of a replica set
             string setName = resIsMaster["setName"].str();
             string commandSetName = servers.getSetName();
-            if ( commandSetName.empty() && ! setName.empty() ){
+            if ( commandSetName.empty() && ! setName.empty() ) {
                 ostringstream ss;
                 ss << "host is part of set: " << setName << " use replica set url format <setname>/<server1>,<server2>,....";
                 errMsg = ss.str();
@@ -160,7 +173,7 @@ namespace mongo {
             }
 
             // if the shard is part of replica set, make sure it is the right one
-            if ( ! commandSetName.empty() && ( commandSetName != setName ) ){
+            if ( ! commandSetName.empty() && ( commandSetName != setName ) ) {
                 ostringstream ss;
                 ss << "host is part of a different set: " << setName;
                 errMsg = ss.str();
@@ -168,30 +181,39 @@ namespace mongo {
                 return false;
             }
 
-            // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of 
+            // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of
             // the set. It is fine if not all members of the set are present in 'servers'.
             bool foundAll = true;
             string offendingHost;
-            if ( ! commandSetName.empty() ){
+            if ( ! commandSetName.empty() ) {
                 set<string> hostSet;
                 BSONObjIterator iter( resIsMaster["hosts"].Obj() );
-                while ( iter.more() ){
+                while ( iter.more() ) {
                     hostSet.insert( iter.next().String() ); // host:port
                 }
+                if ( resIsMaster["passives"].isABSONObj() ) {
+                    BSONObjIterator piter( resIsMaster["passives"].Obj() );
+                    while ( piter.more() ) {
+                        hostSet.insert( piter.next().String() ); // host:port
+                    }
+                }
 
                 vector<HostAndPort> hosts = servers.getServers();
-                for ( size_t i = 0 ; i < hosts.size() ; i++ ){
+                for ( size_t i = 0 ; i < hosts.size() ; i++ ) {
+                    if (!hosts[i].hasPort()) {
+                        hosts[i].setPort(CmdLine::DefaultDBPort);
+                    }
                     string host = hosts[i].toString(); // host:port
-                    if ( hostSet.find( host ) == hostSet.end() ){
+                    if ( hostSet.find( host ) == hostSet.end() ) {
                         offendingHost = host;
                         foundAll = false;
                         break;
                     }
                 }
             }
-            if ( ! foundAll ){
+            if ( ! foundAll ) {
                 ostringstream ss;
-                ss << "host " << offendingHost << " does not belong to replica set " << setName;;
+                ss << "host " << offendingHost << " does not belong to replica set as a non-passive member" << setName;;
                 errMsg = ss.str();
                 newShardConn.done();
                 return false;
@@ -199,15 +221,15 @@ namespace mongo {
 
             // shard name defaults to the name of the replica set
             if ( name->empty() && ! setName.empty() )
-                    *name = setName;
+                *name = setName;
 
-            // In order to be accepted as a new shard, that mongod must not have any database name that exists already 
-            // in any other shards. If that test passes, the new shard's databases are going to be entered as 
+            // In order to be accepted as a new shard, that mongod must not have any database name that exists already
+            // in any other shards. If that test passes, the new shard's databases are going to be entered as
             // non-sharded db's whose primary is the newly added shard.
 
             BSONObj resListDB;
             ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB );
-            if ( !ok ){
+            if ( !ok ) {
                 ostringstream ss;
                 ss << "failed listing " << servers.toString() << "'s databases:" << resListDB;
                 errMsg = ss.str();
@@ -216,20 +238,21 @@ namespace mongo {
             }
 
             BSONObjIterator i( resListDB["databases"].Obj() );
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONObj dbEntry = i.next().Obj();
                 const string& dbName = dbEntry["name"].String();
-                if ( _isSpecialLocalDB( dbName ) ){
+                if ( _isSpecialLocalDB( dbName ) ) {
                     // 'local', 'admin', and 'config' are system DBs and should be excluded here
                     continue;
-                } else {
+                }
+                else {
                     dbNames.push_back( dbName );
                 }
             }
 
             newShardConn.done();
         }
-        catch ( DBException& e ){
+        catch ( DBException& e ) {
             ostringstream ss;
             ss << "couldn't connect to new shard ";
             ss << e.what();
@@ -238,9 +261,9 @@ namespace mongo {
         }
 
         // check that none of the existing shard candidate's db's exist elsewhere
-        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ){
+        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
             DBConfigPtr config = getDBConfig( *it , false );
-            if ( config.get() != NULL ){
+            if ( config.get() != NULL ) {
                 ostringstream ss;
                 ss << "can't add shard " << servers.toString() << " because a local database '" << *it;
                 ss << "' exists in another " << config->getPrimary().toString();
@@ -250,26 +273,26 @@ namespace mongo {
         }
 
         // if a name for a shard wasn't provided, pick one.
-        if ( name->empty() && ! _getNewShardName( name ) ){
+        if ( name->empty() && ! _getNewShardName( name ) ) {
             errMsg = "error generating new shard name";
             return false;
         }
-            
+
         // build the ConfigDB shard document
         BSONObjBuilder b;
         b.append( "_id" , *name );
         b.append( "host" , servers.toString() );
-        if ( maxSize > 0 ){
+        if ( maxSize > 0 ) {
             b.append( ShardFields::maxSize.name() , maxSize );
         }
         BSONObj shardDoc = b.obj();
 
         {
             ScopedDbConnection conn( configServer.getPrimary() );
-                
+
             // check whether the set of hosts (or single host) is not an already a known shard
             BSONObj old = conn->findOne( ShardNS::shard , BSON( "host" << servers.toString() ) );
-            if ( ! old.isEmpty() ){
+            if ( ! old.isEmpty() ) {
                 errMsg = "host already used";
                 conn.done();
                 return false;
@@ -279,7 +302,7 @@ namespace mongo {
 
             conn->insert( ShardNS::shard , shardDoc );
             errMsg = conn->getLastError();
-            if ( ! errMsg.empty() ){
+            if ( ! errMsg.empty() ) {
                 log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl;
                 conn.done();
                 return false;
@@ -291,37 +314,37 @@ namespace mongo {
         Shard::reloadShardInfo();
 
         // add all databases of the new shard
-        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ){
+        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {
             DBConfigPtr config = getDBConfig( *it , true , *name );
-            if ( ! config ){
-                log() << "adding shard " << servers << " even though could not add database " << *it << endl; 
+            if ( ! config ) {
+                log() << "adding shard " << servers << " even though could not add database " << *it << endl;
             }
         }
 
         return true;
     }
-        
-    bool Grid::knowAboutShard( const string& name ) const{
+
+    bool Grid::knowAboutShard( const string& name ) const {
         ShardConnection conn( configServer.getPrimary() , "" );
         BSONObj shard = conn->findOne( ShardNS::shard , BSON( "host" << name ) );
         conn.done();
         return ! shard.isEmpty();
     }
 
-    bool Grid::_getNewShardName( string* name ) const{
+    bool Grid::_getNewShardName( string* name ) const {
         DEV assert( name );
 
         bool ok = false;
-        int count = 0; 
+        int count = 0;
 
         ShardConnection conn( configServer.getPrimary() , "" );
-        BSONObj o = conn->findOne( ShardNS::shard , Query( fromjson ( "{_id: /^shard/}" ) ).sort(  BSON( "_id" << -1 ) ) ); 
+        BSONObj o = conn->findOne( ShardNS::shard , Query( fromjson ( "{_id: /^shard/}" ) ).sort(  BSON( "_id" << -1 ) ) );
         if ( ! o.isEmpty() ) {
             string last = o["_id"].String();
             istringstream is( last.substr( 5 ) );
             is >> count;
             count++;
-        }                                                                                                               
+        }
         if (count < 9999) {
             stringstream ss;
             ss << "shard" << setfill('0') << setw(4) << count;
@@ -337,14 +360,75 @@ namespace mongo {
         ShardConnection conn( configServer.getPrimary() , "" );
 
         // look for the stop balancer marker
-        BSONObj stopMarker = conn->findOne( ShardNS::settings, BSON( "_id" << "balancer" << "stopped" << true ) );
+        BSONObj balancerDoc = conn->findOne( ShardNS::settings, BSON( "_id" << "balancer" ) );
         conn.done();
-        return stopMarker.isEmpty();
+
+        boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
+        if ( _balancerStopped( balancerDoc ) || ! _inBalancingWindow( balancerDoc , now ) ) {
+            return false;
+        }
+
+        return true;
+    }
+
+    bool Grid::_balancerStopped( const BSONObj& balancerDoc ) {
+        // check the 'stopped' marker maker
+        // if present, it is a simple bool
+        BSONElement stoppedElem = balancerDoc["stopped"];
+        if ( ! stoppedElem.eoo() && stoppedElem.isBoolean() ) {
+            return stoppedElem.boolean();
+        }
+        return false;
+    }
+
+    bool Grid::_inBalancingWindow( const BSONObj& balancerDoc , const boost::posix_time::ptime& now ) {
+        // check the 'activeWindow' marker
+        // if present, it is an interval during the day when the balancer should be active
+        // { start: "08:00" , stop: "19:30" }, strftime format is %H:%M
+        BSONElement windowElem = balancerDoc["activeWindow"];
+        if ( windowElem.eoo() ) {
+            return true;
+        }
+
+        // check if both 'start' and 'stop' are present
+        if ( ! windowElem.isABSONObj() ) {
+            log(1) << "'activeWindow' format is { start: \"hh:mm\" , stop: ... }" << balancerDoc << endl;
+            return true;
+        }
+        BSONObj intervalDoc = windowElem.Obj();
+        const string start = intervalDoc["start"].str();
+        const string stop = intervalDoc["stop"].str();
+        if ( start.empty() || stop.empty() ) {
+            log(1) << "must specify both start and end of balancing window: " << intervalDoc << endl;
+            return true;
+        }
+
+        // check that both 'start' and 'stop' are valid time-of-day
+        boost::posix_time::ptime startTime, stopTime;
+        if ( ! toPointInTime( start , &startTime ) || ! toPointInTime( stop , &stopTime ) ) {
+            log(1) << "cannot parse active window (use hh:mm 24hs format): " << intervalDoc << endl;
+            return true;
+        }
+
+        // allow balancing if during the activeWindow
+        // note that a window may be open during the night
+        if ( stopTime > startTime ) {
+            if ( ( now >= startTime ) && ( now <= stopTime ) ) {
+                return true;
+            }
+        }
+        else if ( startTime > stopTime ) {
+            if ( ( now >=startTime ) || ( now <= stopTime ) ) {
+                return true;
+            }
+        }
+
+        return false;
     }
 
     unsigned long long Grid::getNextOpTime() const {
         ScopedDbConnection conn( configServer.getPrimary() );
-        
+
         BSONObj result;
         massert( 10421 ,  "getoptime failed" , conn->simpleCommand( "admin" , &result , "getoptime" ) );
         conn.done();
@@ -352,10 +436,51 @@ namespace mongo {
         return result["optime"]._numberLong();
     }
 
-    bool Grid::_isSpecialLocalDB( const string& dbName ){
+    bool Grid::_isSpecialLocalDB( const string& dbName ) {
         return ( dbName == "local" ) || ( dbName == "admin" ) || ( dbName == "config" );
     }
 
     Grid grid;
 
-} 
+    // unit tests
+
+    class BalancingWindowUnitTest : public UnitTest {
+    public:
+        void run() {
+            // T0 < T1 < now < T2 < T3 and Error
+            const string T0 = "9:00";
+            const string T1 = "11:00";
+            boost::posix_time::ptime now( currentDate(), boost::posix_time::hours( 13 ) + boost::posix_time::minutes( 48 ) );
+            const string T2 = "17:00";
+            const string T3 = "21:30";
+            const string E = "28:35";
+
+            BSONObj w1 = BSON( "activeWindow" << BSON( "start" << T0 << "stop" << T1 ) ); // closed in the past
+            BSONObj w2 = BSON( "activeWindow" << BSON( "start" << T2 << "stop" << T3 ) ); // not opened until the future
+            BSONObj w3 = BSON( "activeWindow" << BSON( "start" << T1 << "stop" << T2 ) ); // open now
+            BSONObj w4 = BSON( "activeWindow" << BSON( "start" << T3 << "stop" << T2 ) ); // open since last day
+
+            assert( ! Grid::_inBalancingWindow( w1 , now ) );
+            assert( ! Grid::_inBalancingWindow( w2 , now ) );
+            assert( Grid::_inBalancingWindow( w3 , now ) );
+            assert( Grid::_inBalancingWindow( w4 , now ) );
+
+            // bad input should not stop the balancer
+
+            BSONObj w5; // empty window
+            BSONObj w6 = BSON( "activeWindow" << BSON( "start" << 1 ) ); // missing stop
+            BSONObj w7 = BSON( "activeWindow" << BSON( "stop" << 1 ) ); // missing start
+            BSONObj w8 = BSON( "wrongMarker" << 1 << "start" << 1 << "stop" << 1 ); // active window marker missing
+            BSONObj w9 = BSON( "activeWindow" << BSON( "start" << T3 << "stop" << E ) ); // garbage in window
+
+            assert( Grid::_inBalancingWindow( w5 , now ) );
+            assert( Grid::_inBalancingWindow( w6 , now ) );
+            assert( Grid::_inBalancingWindow( w7 , now ) );
+            assert( Grid::_inBalancingWindow( w8 , now ) );
+            assert( Grid::_inBalancingWindow( w9 , now ) );
+
+            log(1) << "BalancingWidowObjTest passed" << endl;
+        }
+    } BalancingWindowObjTest;
+
+}
diff --git a/s/grid.h b/s/grid.h
index 4f3c2ac..5692a82 100644
--- a/s/grid.h
+++ b/s/grid.h
@@ -18,6 +18,9 @@
 
 #pragma once
 
+#include <boost/date_time/posix_time/posix_time.hpp>
+
+#include "../util/time_support.h"
 #include "../util/concurrency/mutex.h"
 
 #include "config.h"  // DBConfigPtr
@@ -37,7 +40,7 @@ namespace mongo {
          * will return an empty DBConfig if not in db already
          */
         DBConfigPtr getDBConfig( string ns , bool create=true , const string& shardNameHint="" );
-        
+
         /**
          * removes db entry.
          * on next getDBConfig call will fetch from db
@@ -57,14 +60,14 @@ namespace mongo {
         /**
          *
          * addShard will create a new shard in the grid. It expects a mongod process to be runing
-         * on the provided address. Adding a shard that is a replica set is supported. 
+         * on the provided address. Adding a shard that is a replica set is supported.
          *
          * @param name is an optional string with the name of the shard. if ommited, grid will
-         * generate one and update the parameter.
+         *        generate one and update the parameter.
          * @param servers is the connection string of the shard being added
          * @param maxSize is the optional space quota in bytes. Zeros means there's no limitation to
-         * space usage
-         * @param errMsg is the error description in case the operation failed. 
+         *        space usage
+         * @param errMsg is the error description in case the operation failed.
          * @return true if shard was successfully added.
          */
         bool addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg );
@@ -73,7 +76,7 @@ namespace mongo {
          * @return true if the config database knows about a host 'name'
          */
         bool knowAboutShard( const string& name ) const;
-        
+
         /**
          * @return true if the chunk balancing functionality is enabled
          */
@@ -81,6 +84,15 @@ namespace mongo {
 
         unsigned long long getNextOpTime() const;
 
+        // exposed methods below are for testing only
+
+        /**
+         * @param balancerDoc bson that may contain a window of time for the balancer to work
+         *        format { ... , activeWindow: { start: "8:30" , stop: "19:00" } , ... }
+         * @return true if there is no window of time specified for the balancer or it we're currently in it
+         */
+        static bool _inBalancingWindow( const BSONObj& balancerDoc , const boost::posix_time::ptime& now );
+
     private:
         mongo::mutex              _lock;            // protects _databases; TODO: change to r/w lock ??
         map<string, DBConfigPtr > _databases;       // maps ns to DBConfig's
@@ -89,7 +101,7 @@ namespace mongo {
         /**
          * @param name is the chose name for the shard. Parameter is mandatory.
          * @return true if it managed to generate a shard name. May return false if (currently)
-         * 10000 shard 
+         * 10000 shard
          */
         bool _getNewShardName( string* name ) const;
 
@@ -98,6 +110,13 @@ namespace mongo {
          */
         static bool _isSpecialLocalDB( const string& dbName );
 
+        /**
+         * @param balancerDoc bson that may contain a marker to stop the balancer
+         *        format { ... , stopped: [ "true" | "false" ] , ... }
+         * @return true if the marker is present and is set to true
+         */
+        static bool _balancerStopped( const BSONObj& balancerDoc );
+
     };
 
     extern Grid grid;
diff --git a/s/request.cpp b/s/request.cpp
index ec245d7..52f2e54 100644
--- a/s/request.cpp
+++ b/s/request.cpp
@@ -1,7 +1,4 @@
-/* dbgrid/request.cpp
-
-   Top level handling of requests (operations such as query, insert, ...)
-*/
+// s/request.cpp
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -34,53 +31,56 @@
 #include "stats.h"
 #include "cursors.h"
 #include "grid.h"
+#include "client.h"
 
 namespace mongo {
 
-    Request::Request( Message& m, AbstractMessagingPort* p ) : 
-        _m(m) , _d( m ) , _p(p) , _didInit(false){
-        
+    Request::Request( Message& m, AbstractMessagingPort* p ) :
+        _m(m) , _d( m ) , _p(p) , _didInit(false) {
+
         assert( _d.getns() );
         _id = _m.header()->id;
-        
+
         _clientId = p ? p->getClientId() : 0;
         _clientInfo = ClientInfo::get( _clientId );
         _clientInfo->newRequest( p );
-        
+
     }
-    
-    void Request::init(){
+
+    void Request::init() {
         if ( _didInit )
             return;
         _didInit = true;
         reset();
     }
-    
-    void Request::reset( bool reload ){
-        if ( _m.operation() == dbKillCursors ){
+
+    void Request::reset( bool reload ) {
+        if ( _m.operation() == dbKillCursors ) {
             return;
         }
-        
+
+        uassert( 13644 , "can't use 'local' database through mongos" , ! str::startsWith( getns() , "local." ) );
+
         _config = grid.getDBConfig( getns() );
         if ( reload )
             uassert( 10192 ,  "db config reload failed!" , _config->reload() );
 
-        if ( _config->isSharded( getns() ) ){
+        if ( _config->isSharded( getns() ) ) {
             _chunkManager = _config->getChunkManager( getns() , reload );
             uassert( 10193 ,  (string)"no shard info for: " + getns() , _chunkManager );
         }
         else {
             _chunkManager.reset();
-        }        
+        }
 
         _m.header()->id = _id;
-        
+
     }
-    
+
     Shard Request::primaryShard() const {
         assert( _didInit );
-            
-        if ( _chunkManager ){
+
+        if ( _chunkManager ) {
             if ( _chunkManager->numChunks() > 1 )
                 throw UserException( 8060 , "can't call primaryShard on a sharded collection" );
             return _chunkManager->findChunk( _chunkManager->getShardKey().globalMin() )->getShard();
@@ -89,26 +89,26 @@ namespace mongo {
         uassert( 10194 ,  "can't call primaryShard on a sharded collection!" , s.ok() );
         return s;
     }
-    
-    void Request::process( int attempt ){
+
+    void Request::process( int attempt ) {
         init();
         int op = _m.operation();
         assert( op > dbMsg );
-        
-        if ( op == dbKillCursors ){
+
+        if ( op == dbKillCursors ) {
             cursorCache.gotKillCursors( _m );
             return;
         }
-        
+
 
         log(3) << "Request::process ns: " << getns() << " msg id:" << (int)(_m.header()->id) << " attempt: " << attempt << endl;
-        
+
         Strategy * s = SINGLE;
         _counter = &opsNonSharded;
-        
+
         _d.markSet();
-        
-        if ( _chunkManager ){
+
+        if ( _chunkManager ) {
             s = SHARDED;
             _counter = &opsSharded;
         }
@@ -119,7 +119,7 @@ namespace mongo {
             try {
                 s->queryOp( *this );
             }
-            catch ( StaleConfigException& staleConfig ){
+            catch ( StaleConfigException& staleConfig ) {
                 log() << staleConfig.what() << " attempt: " << attempt << endl;
                 uassert( 10195 ,  "too many attempts to update config, failing" , attempt < 5 );
                 ShardConnection::checkMyConnectionVersions( getns() );
@@ -141,115 +141,31 @@ namespace mongo {
         globalOpCounters.gotOp( op , iscmd );
         _counter->gotOp( op , iscmd );
     }
-    
+
     bool Request::isCommand() const {
         int x = _d.getQueryNToReturn();
         return ( x == 1 || x == -1 ) && strstr( getns() , ".$cmd" );
     }
 
-    void Request::gotInsert(){
+    void Request::gotInsert() {
         globalOpCounters.gotInsert();
         _counter->gotInsert();
     }
 
-    void Request::reply( Message & response , const string& fromServer ){
+    void Request::reply( Message & response , const string& fromServer ) {
         assert( _didInit );
         long long cursor =response.header()->getCursor();
-        if ( cursor ){
-            cursorCache.storeRef( fromServer , cursor );
-        }
-        _p->reply( _m , response , _id );
-    }
-    
-    ClientInfo::ClientInfo( int clientId ) : _id( clientId ){
-        _cur = &_a;
-        _prev = &_b;
-        newRequest();
-    }
-    
-    ClientInfo::~ClientInfo(){
-        if ( _lastAccess ){
-            scoped_lock lk( _clientsLock );
-            ClientCache::iterator i = _clients.find( _id );
-            if ( i != _clients.end() ){
-                _clients.erase( i );
+        if ( cursor ) {
+            if ( fromServer.size() ) {
+                cursorCache.storeRef( fromServer , cursor );
             }
-        }
-    }
-    
-    void ClientInfo::addShard( const string& shard ){
-        _cur->insert( shard );
-        _sinceLastGetError.insert( shard );
-    }
-    
-    void ClientInfo::newRequest( AbstractMessagingPort* p ){
-
-        if ( p ){
-            string r = p->remote().toString();
-            if ( _remote == "" )
-                _remote = r;
-            else if ( _remote != r ){
-                stringstream ss;
-                ss << "remotes don't match old [" << _remote << "] new [" << r << "]";
-                throw UserException( 13134 , ss.str() );
+            else {
+                // probably a getMore
+                // make sure we have a ref for this
+                assert( cursorCache.getRef( cursor ).size() );
             }
         }
-        
-        _lastAccess = (int) time(0);
-        
-        set<string> * temp = _cur;
-        _cur = _prev;
-        _prev = temp;
-        _cur->clear();
-    }
-    
-    void ClientInfo::disconnect(){
-        _lastAccess = 0;
-    }
-        
-    ClientInfo * ClientInfo::get( int clientId , bool create ){
-        
-        if ( ! clientId )
-            clientId = getClientId();
-        
-        if ( ! clientId ){
-            ClientInfo * info = _tlInfo.get();
-            if ( ! info ){
-                info = new ClientInfo( 0 );
-                _tlInfo.reset( info );
-            }
-            info->newRequest();
-            return info;
-        }
-        
-        scoped_lock lk( _clientsLock );
-        ClientCache::iterator i = _clients.find( clientId );
-        if ( i != _clients.end() )
-            return i->second;
-        if ( ! create )
-            return 0;
-        ClientInfo * info = new ClientInfo( clientId );
-        _clients[clientId] = info;
-        return info;
-    }
-        
-    void ClientInfo::disconnect( int clientId ){
-        if ( ! clientId )
-            return;
-
-        scoped_lock lk( _clientsLock );
-        ClientCache::iterator i = _clients.find( clientId );
-        if ( i == _clients.end() )
-            return;
-
-        ClientInfo* ci = i->second;
-        ci->disconnect();
-        delete ci;
-        _clients.erase( i );
+        _p->reply( _m , response , _id );
     }
 
-    ClientCache& ClientInfo::_clients = *(new ClientCache());
-    mongo::mutex ClientInfo::_clientsLock("_clientsLock");
-    boost::thread_specific_ptr<ClientInfo> ClientInfo::_tlInfo;
-
 } // namespace mongo
diff --git a/s/request.h b/s/request.h
index f063d0c..5b4c228 100644
--- a/s/request.h
+++ b/s/request.h
@@ -26,16 +26,16 @@
 
 namespace mongo {
 
-    
+
     class OpCounters;
     class ClientInfo;
-    
+
     class Request : boost::noncopyable {
     public:
         Request( Message& m, AbstractMessagingPort* p );
 
         // ---- message info -----
-        
+
 
         const char * getns() const {
             return _d.getns();
@@ -60,12 +60,12 @@ namespace mongo {
             assert( _didInit );
             return _config->isShardingEnabled();
         }
-        
+
         ChunkManagerPtr getChunkManager() const {
             assert( _didInit );
             return _chunkManager;
         }
-        
+
         int getClientId() const {
             return _clientId;
         }
@@ -74,14 +74,14 @@ namespace mongo {
         }
 
         // ---- remote location info -----
-        
-        
+
+
         Shard primaryShard() const ;
-        
+
         // ---- low level access ----
 
         void reply( Message & response , const string& fromServer );
-        
+
         Message& m() { return _m; }
         DbMessage& d() { return _d; }
         AbstractMessagingPort* p() const { return _p; }
@@ -93,16 +93,16 @@ namespace mongo {
         void init();
 
         void reset( bool reload=false );
-        
+
     private:
         Message& _m;
         DbMessage _d;
         AbstractMessagingPort* _p;
-        
+
         MSGID _id;
         DBConfigPtr _config;
         ChunkManagerPtr _chunkManager;
-        
+
         int _clientId;
         ClientInfo * _clientInfo;
 
@@ -111,45 +111,6 @@ namespace mongo {
         bool _didInit;
     };
 
-    typedef map<int,ClientInfo*> ClientCache;
-    
-    class ClientInfo {
-    public:
-        ClientInfo( int clientId );
-        ~ClientInfo();
-        
-        string getRemote() const { return _remote; }
-
-        void addShard( const string& shard );
-        set<string> * getPrev() const { return _prev; };
-        
-        void newRequest( AbstractMessagingPort* p = 0 );
-        void disconnect();
-        
-        static ClientInfo * get( int clientId = 0 , bool create = true );
-        static void disconnect( int clientId );
-        
-        const set<string>& sinceLastGetError() const { return _sinceLastGetError; }
-        void clearSinceLastGetError(){ 
-            _sinceLastGetError.clear(); 
-        }
-
-    private:
-        int _id;
-        string _remote;
-
-        set<string> _a;
-        set<string> _b;
-        set<string> * _cur;
-        set<string> * _prev;
-        int _lastAccess;
-        
-        set<string> _sinceLastGetError;
-
-        static mongo::mutex _clientsLock;
-        static ClientCache& _clients;
-        static boost::thread_specific_ptr<ClientInfo> _tlInfo;
-    };
 }
 
 #include "strategy.h"
diff --git a/s/s_only.cpp b/s/s_only.cpp
index 1f66e70..83bceac 100644
--- a/s/s_only.cpp
+++ b/s/s_only.cpp
@@ -16,6 +16,8 @@
  */
 
 #include "pch.h"
+#include "request.h"
+#include "client.h"
 #include "../client/dbclient.h"
 #include "../db/dbhelpers.h"
 #include "../db/matcher.h"
@@ -27,53 +29,54 @@
  */
 namespace mongo {
 
-    auto_ptr<CursorIterator> Helpers::find( const char *ns , BSONObj query , bool requireIndex ){
-        uassert( 10196 ,  "Helpers::find can't be used in mongos" , 0 );
-        auto_ptr<CursorIterator> i;
-        return i;
-    }
-
     boost::thread_specific_ptr<Client> currentClient;
 
-    Client::Client(const char *desc , MessagingPort *p) : 
-      _context(0),
-      _shutdown(false),
-      _desc(desc),
-      _god(0),
-      _lastOp(0),
-      _mp(p)
-    {
+    Client::Client(const char *desc , MessagingPort *p) :
+        _context(0),
+        _shutdown(false),
+        _desc(desc),
+        _god(0),
+        _lastOp(0),
+        _mp(p) {
     }
-    Client::~Client(){}
-    bool Client::shutdown(){ return true; }
+    Client::~Client() {}
+    bool Client::shutdown() { return true; }
 
-    bool webHaveAdminUsers(){
-        return false;
+    Client& Client::initThread(const char *desc, MessagingPort *mp) {
+        setThreadName(desc);
+        assert( currentClient.get() == 0 );
+        Client *c = new Client(desc, mp);
+        currentClient.reset(c);
+        mongo::lastError.initThread();
+        return *c;
     }
 
-    BSONObj webGetAdminUser( const string& username ){
-        return BSONObj();
+    string Client::clientAddress(bool includePort) const {
+        ClientInfo * ci = ClientInfo::get();
+        if ( ci )
+            return ci->getRemote();
+        return "";
     }
-    
+
     bool execCommand( Command * c ,
-                      Client& client , int queryOptions , 
-                      const char *ns, BSONObj& cmdObj , 
-                      BSONObjBuilder& result, 
-                      bool fromRepl ){
+                      Client& client , int queryOptions ,
+                      const char *ns, BSONObj& cmdObj ,
+                      BSONObjBuilder& result,
+                      bool fromRepl ) {
         assert(c);
-    
+
         string dbname = nsToDatabase( ns );
-         
-        if ( cmdObj["help"].trueValue() ){
+
+        if ( cmdObj["help"].trueValue() ) {
             stringstream ss;
             ss << "help for: " << c->name << " ";
             c->help( ss );
             result.append( "help" , ss.str() );
             result.append( "lockType" , c->locktype() );
             return true;
-        } 
+        }
 
-        if ( c->adminOnly() ){
+        if ( c->adminOnly() ) {
             if ( dbname != "admin" ) {
                 result.append( "errmsg" ,  "access denied- use admin db" );
                 log() << "command denied: " << cmdObj.toString() << endl;
diff --git a/s/server.cpp b/s/server.cpp
index c3dc24c..9bdeede 100644
--- a/s/server.cpp
+++ b/s/server.cpp
@@ -23,37 +23,41 @@
 #include "../util/message_server.h"
 #include "../util/stringutils.h"
 #include "../util/version.h"
+#include "../util/signal_handlers.h"
+#include "../util/admin_access.h"
 #include "../db/dbwebserver.h"
 
 #include "server.h"
 #include "request.h"
+#include "client.h"
 #include "config.h"
 #include "chunk.h"
 #include "balance.h"
 #include "grid.h"
 #include "cursors.h"
+#include "shard_version.h"
 
 namespace mongo {
-    
-    CmdLine cmdLine;    
+
+    CmdLine cmdLine;
     Database *database = 0;
     string mongosCommand;
     bool dbexitCalled = false;
 
-    bool inShutdown(){
+    bool inShutdown() {
         return dbexitCalled;
     }
-    
+
     string getDbContext() {
         return "?";
     }
 
-    bool haveLocalShardingInfo( const string& ns ){
+    bool haveLocalShardingInfo( const string& ns ) {
         assert( 0 );
         return false;
     }
-    
-    void usage( char * argv[] ){
+
+    void usage( char * argv[] ) {
         out() << argv[0] << " usage:\n\n";
         out() << " -v+  verbose 1: general 2: more 3: per request 4: more\n";
         out() << " --port <portno>\n";
@@ -64,23 +68,23 @@ namespace mongo {
     class ShardingConnectionHook : public DBConnectionHook {
     public:
 
-        virtual void onHandedOut( DBClientBase * conn ){
+        virtual void onHandedOut( DBClientBase * conn ) {
             ClientInfo::get()->addShard( conn->getServerAddress() );
         }
     } shardingConnectionHook;
-    
+
     class ShardedMessageHandler : public MessageHandler {
     public:
-        virtual ~ShardedMessageHandler(){}
+        virtual ~ShardedMessageHandler() {}
 
-        virtual void process( Message& m , AbstractMessagingPort* p ){
+        virtual void process( Message& m , AbstractMessagingPort* p ) {
             assert( p );
             Request r( m , p );
 
             LastError * le = lastError.startRequest( m , r.getClientId() );
             assert( le );
-            
-            if ( logLevel > 5 ){
+
+            if ( logLevel > 5 ) {
                 log(5) << "client id: " << hex << r.getClientId() << "\t" << r.getns() << "\t" << dec << r.op() << endl;
             }
             try {
@@ -88,43 +92,67 @@ namespace mongo {
                 setClientId( r.getClientId() );
                 r.process();
             }
-            catch ( DBException& e ){
+            catch ( AssertionException & e ) {
+                log( e.isUserAssertion() ? 1 : 0 ) << "AssertionException in process: " << e.what() << endl;
+
+                le->raiseError( e.getCode() , e.what() );
+
+                m.header()->id = r.id();
+
+                if ( r.expectResponse() ) {
+                    BSONObj err = BSON( "$err" << e.what() << "code" << e.getCode() );
+                    replyToQuery( ResultFlag_ErrSet, p , m , err );
+                }
+            }
+            catch ( DBException& e ) {
                 log() << "DBException in process: " << e.what() << endl;
-                
+
                 le->raiseError( e.getCode() , e.what() );
-                
+
                 m.header()->id = r.id();
-                
-                if ( r.expectResponse() ){
+
+                if ( r.expectResponse() ) {
                     BSONObj err = BSON( "$err" << e.what() << "code" << e.getCode() );
                     replyToQuery( ResultFlag_ErrSet, p , m , err );
                 }
             }
         }
 
-        virtual void disconnected( AbstractMessagingPort* p ){
+        virtual void disconnected( AbstractMessagingPort* p ) {
             ClientInfo::disconnect( p->getClientId() );
             lastError.disconnect( p->getClientId() );
         }
     };
 
-    void sighandler(int sig){
+    void sighandler(int sig) {
         dbexit(EXIT_CLEAN, (string("received signal ") + BSONObjBuilder::numStr(sig)).c_str());
     }
-    
-    void setupSignals(){
+
+    void setupSignals( bool inFork ) {
         signal(SIGTERM, sighandler);
         signal(SIGINT, sighandler);
+
+#if defined(SIGQUIT)
+        signal( SIGQUIT , printStackAndExit );
+#endif
+        signal( SIGSEGV , printStackAndExit );
+        signal( SIGABRT , printStackAndExit );
+        signal( SIGFPE , printStackAndExit );
+#if defined(SIGBUS)
+        signal( SIGBUS , printStackAndExit );
+#endif
     }
 
-    void init(){
+    void init() {
         serverID.init();
         setupSIGTRAPforGDB();
         setupCoreSignals();
-        setupSignals();
+        setupSignals( false );
     }
 
-    void start( const MessageServer::Options& opts ){
+    void start( const MessageServer::Options& opts ) {
+        setThreadName( "mongosMain" );
+        installChunkShardVersioning();
         balancer.go();
         cursorCache.startTimeoutThread();
 
@@ -137,12 +165,12 @@ namespace mongo {
         server->run();
     }
 
-    DBClientBase *createDirectClient(){
+    DBClientBase *createDirectClient() {
         uassert( 10197 ,  "createDirectClient not implemented for sharding yet" , 0 );
         return 0;
     }
 
-    void printShardingVersionInfo(){
+    void printShardingVersionInfo() {
         log() << mongosCommand << " " << mongodVersion() << " starting (--help for usage)" << endl;
         printGitVersion();
         printSysInfo();
@@ -156,91 +184,108 @@ using namespace mongo;
 
 namespace po = boost::program_options;
 
-int main(int argc, char* argv[], char *envp[] ) {
+int _main(int argc, char* argv[]) {
     static StaticObserver staticObserver;
     mongosCommand = argv[0];
 
-    po::options_description options("Sharding options");
+    po::options_description options("General options");
+    po::options_description sharding_options("Sharding options");
     po::options_description hidden("Hidden options");
     po::positional_options_description positional;
-    
+
     CmdLine::addGlobalOptions( options , hidden );
-    
-    options.add_options()
-        ( "configdb" , po::value<string>() , "1 or 3 comma separated config servers" )
-        ( "test" , "just run unit tests" )
-        ( "upgrade" , "upgrade meta data version" )
-        ( "chunkSize" , po::value<int>(), "maximum amount of data per chunk" )
-        ( "ipv6", "enable IPv6 support (disabled by default)" )
-        ;
-    
 
+    sharding_options.add_options()
+    ( "configdb" , po::value<string>() , "1 or 3 comma separated config servers" )
+    ( "test" , "just run unit tests" )
+    ( "upgrade" , "upgrade meta data version" )
+    ( "chunkSize" , po::value<int>(), "maximum amount of data per chunk" )
+    ( "ipv6", "enable IPv6 support (disabled by default)" )
+    ( "jsonp","allow JSONP access via http (has security implications)" )
+    ;
+
+    options.add(sharding_options);
     // parse options
     po::variables_map params;
     if ( ! CmdLine::store( argc , argv , options , hidden , positional , params ) )
         return 0;
-    
-    if ( params.count( "help" ) ){
+
+    // The default value may vary depending on compile options, but for mongos
+    // we want durability to be disabled.
+    cmdLine.dur = false;
+
+    if ( params.count( "help" ) ) {
         cout << options << endl;
         return 0;
     }
 
-    if ( params.count( "version" ) ){
+    if ( params.count( "version" ) ) {
         printShardingVersionInfo();
         return 0;
     }
 
-    if ( params.count( "chunkSize" ) ){
+    if ( params.count( "chunkSize" ) ) {
         Chunk::MaxChunkSize = params["chunkSize"].as<int>() * 1024 * 1024;
     }
 
-    if ( params.count( "ipv6" ) ){
+    if ( params.count( "ipv6" ) ) {
         enableIPv6();
     }
 
-    if ( params.count( "test" ) ){
+    if ( params.count( "jsonp" ) ) {
+        cmdLine.jsonp = true;
+    }
+
+    if ( params.count( "test" ) ) {
         logLevel = 5;
         UnitTest::runTests();
         cout << "tests passed" << endl;
         return 0;
     }
-    
-    if ( ! params.count( "configdb" ) ){
+
+    if ( ! params.count( "configdb" ) ) {
         out() << "error: no args for --configdb" << endl;
         return 4;
     }
 
     vector<string> configdbs;
     splitStringDelim( params["configdb"].as<string>() , &configdbs , ',' );
-    if ( configdbs.size() != 1 && configdbs.size() != 3 ){
+    if ( configdbs.size() != 1 && configdbs.size() != 3 ) {
         out() << "need either 1 or 3 configdbs" << endl;
         return 5;
     }
 
     // we either have a seeting were all process are in localhost or none is
-    for ( vector<string>::const_iterator it = configdbs.begin() ; it != configdbs.end() ; ++it ){
+    for ( vector<string>::const_iterator it = configdbs.begin() ; it != configdbs.end() ; ++it ) {
         try {
 
             HostAndPort configAddr( *it );  // will throw if address format is invalid
 
-            if ( it == configdbs.begin() ){
+            if ( it == configdbs.begin() ) {
                 grid.setAllowLocalHost( configAddr.isLocalHost() );
             }
 
-            if ( configAddr.isLocalHost() != grid.allowLocalHost() ){
+            if ( configAddr.isLocalHost() != grid.allowLocalHost() ) {
                 out() << "cannot mix localhost and ip addresses in configdbs" << endl;
                 return 10;
             }
 
-        } 
+        }
         catch ( DBException& e) {
             out() << "configdb: " << e.what() << endl;
             return 9;
         }
     }
     
+    // set some global state
+
     pool.addHook( &shardingConnectionHook );
+    pool.setName( "mongos connectionpool" );
+    
+    DBClientConnection::setLazyKillCursor( false );
 
+    ReplicaSetMonitor::setConfigChangeHook( boost::bind( &ConfigServer::replicaSetChange , &configServer , _1 ) );
+    
     if ( argc <= 1 ) {
         usage( argv );
         return 3;
@@ -252,22 +297,22 @@ int main(int argc, char* argv[], char *envp[] ) {
         usage( argv );
         return 1;
     }
-    
+
     printShardingVersionInfo();
-    
-    if ( ! configServer.init( configdbs ) ){
+
+    if ( ! configServer.init( configdbs ) ) {
         cout << "couldn't resolve config db address" << endl;
         return 7;
     }
-    
-    if ( ! configServer.ok( true ) ){
+
+    if ( ! configServer.ok( true ) ) {
         cout << "configServer startup check failed" << endl;
         return 8;
     }
-    
+
     int configError = configServer.checkConfigVersion( params.count( "upgrade" ) );
-    if ( configError ){
-        if ( configError > 0 ){
+    if ( configError ) {
+        if ( configError > 0 ) {
             cout << "upgrade success!" << endl;
         }
         else {
@@ -279,8 +324,8 @@ int main(int argc, char* argv[], char *envp[] ) {
 
     init();
 
-    boost::thread web( webServerThread );
-    
+    boost::thread web( boost::bind(&webServerThread, new NoAdminAccess() /* takes ownership */) );
+
     MessageServer::Options opts;
     opts.port = cmdLine.port;
     opts.ipList = cmdLine.bind_ip;
@@ -289,10 +334,30 @@ int main(int argc, char* argv[], char *envp[] ) {
     dbexit( EXIT_CLEAN );
     return 0;
 }
+int main(int argc, char* argv[]) {
+    try {
+        return _main(argc, argv);
+    }
+    catch(DBException& e) { 
+        cout << "uncaught exception in mongos main:" << endl;
+        cout << e.toString() << endl;
+    }
+    catch(std::exception& e) { 
+        cout << "uncaught exception in mongos main:" << endl;
+        cout << e.what() << endl;
+    }
+    catch(...) { 
+        cout << "uncaught exception in mongos main" << endl;
+    }
+    return 20;
+}
 
 #undef exit
-void mongo::dbexit( ExitCode rc, const char *why) {
+void mongo::dbexit( ExitCode rc, const char *why, bool tryToGetLock ) {
     dbexitCalled = true;
-    log() << "dbexit: " << why << " rc:" << rc << endl;
+    log() << "dbexit: " << why
+          << " rc:" << rc
+          << " " << ( why ? why : "" )
+          << endl;
     ::exit(rc);
 }
diff --git a/s/server.h b/s/server.h
index c45d77d..1a5c9ea 100644
--- a/s/server.h
+++ b/s/server.h
@@ -21,9 +21,9 @@
 #include "../db/jsobj.h"
 
 namespace mongo {
-    
+
     extern OID serverID;
-    
+
     // from request.cpp
     void processRequest(Message& m, MessagingPort& p);
 }
diff --git a/s/shard.cpp b/s/shard.cpp
index 4d73a66..dbfd8f9 100644
--- a/s/shard.cpp
+++ b/s/shard.cpp
@@ -1,147 +1,155 @@
 // shard.cpp
 
 /**
-*    Copyright (C) 2008 10gen Inc.
-*
-*    This program is free software: you can redistribute it and/or  modify
-*    it under the terms of the GNU Affero General Public License, version 3,
-*    as published by the Free Software Foundation.
-*
-*    This program is distributed in the hope that it will be useful,
-*    but WITHOUT ANY WARRANTY; without even the implied warranty of
-*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-*    GNU Affero General Public License for more details.
-*
-*    You should have received a copy of the GNU Affero General Public License
-*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
+ *    Copyright (C) 2008 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
 
 #include "pch.h"
 #include "shard.h"
 #include "config.h"
 #include "request.h"
+#include "../db/commands.h"
 #include <set>
 
 namespace mongo {
-    
+
     class StaticShardInfo {
     public:
         StaticShardInfo() : _mutex("StaticShardInfo") { }
-        void reload(){
+        void reload() {
 
             list<BSONObj> all;
             {
                 ScopedDbConnection conn( configServer.getPrimary() );
                 auto_ptr<DBClientCursor> c = conn->query( ShardNS::shard , Query() );
-                assert( c.get() );
-                while ( c->more() ){
+                massert( 13632 , "couldn't get updated shard list from config server" , c.get() );
+                while ( c->more() ) {
                     all.push_back( c->next().getOwned() );
                 }
                 conn.done();
             }
-            
+
             scoped_lock lk( _mutex );
-            
+
             // We use the _lookup table for all shards and for the primary config DB. The config DB info,
             // however, does not come from the ShardNS::shard. So when cleaning the _lookup table we leave
             // the config state intact. The rationale is that this way we could drop shards that
             // were removed without reinitializing the config DB information.
 
             map<string,Shard>::iterator i = _lookup.find( "config" );
-            if ( i != _lookup.end() ){
+            if ( i != _lookup.end() ) {
                 Shard config = i->second;
                 _lookup.clear();
                 _lookup[ "config" ] = config;
-            } else {
+            }
+            else {
                 _lookup.clear();
             }
 
-            for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); ++i ){
+            for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); ++i ) {
                 BSONObj o = *i;
                 string name = o["_id"].String();
                 string host = o["host"].String();
 
                 long long maxSize = 0;
                 BSONElement maxSizeElem = o[ ShardFields::maxSize.name() ];
-                if ( ! maxSizeElem.eoo() ){
+                if ( ! maxSizeElem.eoo() ) {
                     maxSize = maxSizeElem.numberLong();
                 }
 
                 bool isDraining = false;
                 BSONElement isDrainingElem = o[ ShardFields::draining.name() ];
-                if ( ! isDrainingElem.eoo() ){
+                if ( ! isDrainingElem.eoo() ) {
                     isDraining = isDrainingElem.Bool();
                 }
 
                 Shard s( name , host , maxSize , isDraining );
                 _lookup[name] = s;
-                _lookup[host] = s;
-
-                // add rs name to lookup (if it exists)
-                size_t pos;
-                if ((pos = host.find('/', 0)) != string::npos) {
-                    _lookup[host.substr(0, pos)] = s;
-                }
+                _installHost( host , s );
             }
 
         }
         
-        bool isMember( const string& addr ){
-            scoped_lock lk( _mutex );
-            map<string,Shard>::iterator i = _lookup.find( addr );
-            return i != _lookup.end();
-        }
+        const Shard& find( const string& ident ) {
+            string mykey = ident;
 
-        const Shard& find( const string& ident ){
             {
-                scoped_lock lk( _mutex );
-                map<string,Shard>::iterator i = _lookup.find( ident );
+                // if its a replica set, just use set name
+                size_t pos = mykey.find( '/' );
+                if ( pos != string::npos )
+                    mykey = mykey.substr(0,pos);
+            }
 
-                // if normal find didn't find anything, try to find by rs name
-                size_t pos;
-                if ( i == _lookup.end() && (pos = ident.find('/', 0)) != string::npos) {
-                    i = _lookup.find( ident.substr(0, pos) );
-                }
+            {
+                scoped_lock lk( _mutex );
+                map<string,Shard>::iterator i = _lookup.find( mykey );
 
                 if ( i != _lookup.end() )
                     return i->second;
             }
-            
+
             // not in our maps, re-load all
             reload();
 
             scoped_lock lk( _mutex );
-            map<string,Shard>::iterator i = _lookup.find( ident );
-            uassert( 13129 , (string)"can't find shard for: " + ident , i != _lookup.end() );
-            return i->second;        
+            map<string,Shard>::iterator i = _lookup.find( mykey );
+            massert( 13129 , (string)"can't find shard for: " + mykey , i != _lookup.end() );
+            return i->second;
         }
-        
-        void set( const string& name , const string& addr , bool setName = true , bool setAddr = true ){
-            Shard s(name,addr);
+
+        void set( const string& name , const Shard& s , bool setName = true , bool setAddr = true ) {
             scoped_lock lk( _mutex );
             if ( setName )
                 _lookup[name] = s;
             if ( setAddr )
-                _lookup[addr] = s;
+                _installHost( s.getConnString() , s );
+        }
+
+        void _installHost( const string& host , const Shard& s ) {
+            _lookup[host] = s;
+            
+            const ConnectionString& cs = s.getAddress();
+            if ( cs.type() == ConnectionString::SET ) {
+                if ( cs.getSetName().size() )
+                    _lookup[ cs.getSetName() ] = s;
+                
+                vector<HostAndPort> servers = cs.getServers();
+                for ( unsigned i=0; i<servers.size(); i++ ) {
+                    _lookup[ servers[i].toString() ] = s;
+                }
+            }
         }
         
-        void remove( const string& name ){
+        void remove( const string& name ) {
             scoped_lock lk( _mutex );
-            for ( map<string,Shard>::iterator i = _lookup.begin(); i!=_lookup.end(); ){
+            for ( map<string,Shard>::iterator i = _lookup.begin(); i!=_lookup.end(); ) {
                 Shard s = i->second;
-                if ( s.getName() == name ){
+                if ( s.getName() == name ) {
                     _lookup.erase(i++);
-                } else {
+                }
+                else {
                     ++i;
                 }
             }
         }
-
-        void getAllShards( vector<Shard>& all ){
+        
+        void getAllShards( vector<Shard>& all ) const {
             scoped_lock lk( _mutex );
             std::set<string> seen;
-            for ( map<string,Shard>::iterator i = _lookup.begin(); i!=_lookup.end(); ++i ){
-                Shard s = i->second;
+            for ( map<string,Shard>::const_iterator i = _lookup.begin(); i!=_lookup.end(); ++i ) {
+                const Shard& s = i->second;
                 if ( s.getName() == "config" )
                     continue;
                 if ( seen.count( s.getName() ) )
@@ -150,49 +158,131 @@ namespace mongo {
                 all.push_back( s );
             }
         }
+        
+        bool isAShardNode( const string& addr ) const {
+            scoped_lock lk( _mutex );      
+            
+            // check direct nods or set names
+            map<string,Shard>::const_iterator i = _lookup.find( addr );
+            if ( i != _lookup.end() )
+                return true;
+            
+            // check for set nodes
+            for ( map<string,Shard>::const_iterator i = _lookup.begin(); i!=_lookup.end(); ++i ) {
+                if ( i->first == "config" )
+                    continue;
+
+                const Shard& s = i->second;     
+                if ( s.containsNode( addr ) )
+                    return true;
+            }
+
+            return false;
+        }
+        
+        bool getShardMap( BSONObjBuilder& result , string& errmsg ) const {
+            scoped_lock lk( _mutex );
+
+            BSONObjBuilder b( _lookup.size() + 50 );
+
+            for ( map<string,Shard>::const_iterator i = _lookup.begin(); i!=_lookup.end(); ++i ) {
+                b.append( i->first , i->second.getConnString() );
+            }
+            
+            result.append( "map" , b.obj() );
+            
+            return true;
+        }
 
     private:
         map<string,Shard> _lookup;
-        mongo::mutex _mutex;        
+        mutable mongo::mutex _mutex;
     } staticShardInfo;
+
     
-    void Shard::setAddress( const string& addr , bool authoritative ){
-        assert( _name.size() );
+    class CmdGetShardMap : public Command {
+    public:
+        CmdGetShardMap() : Command( "getShardMap" ){}
+        virtual void help( stringstream &help ) const { help<<"internal"; }
+        virtual LockType locktype() const { return NONE; }
+        virtual bool slaveOk() const { return true; }
+        virtual bool adminOnly() const { return true; }
+
+        virtual bool run(const string&, mongo::BSONObj&, std::string& errmsg , mongo::BSONObjBuilder& result, bool) {
+            return staticShardInfo.getShardMap( result , errmsg );
+        }
+    } cmdGetShardMap;
+    
+
+    void Shard::_setAddr( const string& addr ) {
         _addr = addr;
-        if ( authoritative )
-            staticShardInfo.set( _name , _addr , true , false );
+        if ( _addr.size() ) {
+            _cs = ConnectionString( addr , ConnectionString::SET );
+            _rsInit();
+        }
     }
-    
-    void Shard::reset( const string& ident ){
+
+    void Shard::_rsInit() {
+        if ( _cs.type() == ConnectionString::SET ) {
+            string x = _cs.getSetName();
+            if ( x.size() == 0 ) {
+                warning() << "no set name for shard: " << _name << " " << _cs.toString() << endl;
+            }
+            assert( x.size() );
+            _rs = ReplicaSetMonitor::get( x , _cs.getServers() );
+        }
+    }
+
+    void Shard::setAddress( const ConnectionString& cs) {
+        assert( _name.size() );
+        _addr = cs.toString();
+        _cs = cs;
+        _rsInit();
+        staticShardInfo.set( _name , *this , true , false );
+    }
+
+    void Shard::reset( const string& ident ) {
         const Shard& s = staticShardInfo.find( ident );
-        uassert( 13128 , (string)"can't find shard for: " + ident , s.ok() );
+        massert( 13128 , (string)"can't find shard for: " + ident , s.ok() );
         _name = s._name;
         _addr = s._addr;
+        _cs = s._cs;
+        _rsInit();
         _maxSize = s._maxSize;
         _isDraining = s._isDraining;
     }
-    
-    void Shard::getAllShards( vector<Shard>& all ){
+
+    bool Shard::containsNode( const string& node ) const {
+        if ( _addr == node )
+            return true;
+        
+        if ( _rs && _rs->contains( node ) )
+            return true;
+
+        return false;
+    }
+
+    void Shard::getAllShards( vector<Shard>& all ) {
         staticShardInfo.getAllShards( all );
     }
 
-    bool Shard::isAShard( const string& ident ){
-        return staticShardInfo.isMember( ident );
+    bool Shard::isAShardNode( const string& ident ) {
+        return staticShardInfo.isAShardNode( ident );
     }
 
-    void Shard::printShardInfo( ostream& out ){
+    void Shard::printShardInfo( ostream& out ) {
         vector<Shard> all;
         getAllShards( all );
         for ( unsigned i=0; i<all.size(); i++ )
             out << all[i].toString() << "\n";
         out.flush();
     }
-    
+
     BSONObj Shard::runCommand( const string& db , const BSONObj& cmd ) const {
         ScopedDbConnection conn( this );
         BSONObj res;
         bool ok = conn->runCommand( db , cmd , res );
-        if ( ! ok ){
+        if ( ! ok ) {
             stringstream ss;
             ss << "runCommand (" << cmd << ") on shard (" << _name << ") failed : " << res;
             throw UserException( 13136 , ss.str() );
@@ -201,49 +291,50 @@ namespace mongo {
         conn.done();
         return res;
     }
-    
+
     ShardStatus Shard::getStatus() const {
         return ShardStatus( *this , runCommand( "admin" , BSON( "serverStatus" << 1 ) ) );
     }
-    
-    void Shard::reloadShardInfo(){
+
+    void Shard::reloadShardInfo() {
         staticShardInfo.reload();
     }
 
 
-    bool Shard::isMember( const string& addr ){
-        return staticShardInfo.isMember( addr );
-    }
-  
-    void Shard::removeShard( const string& name ){
+    void Shard::removeShard( const string& name ) {
         staticShardInfo.remove( name );
     }
 
-    Shard Shard::pick(){
+    Shard Shard::pick( const Shard& current ) {
         vector<Shard> all;
         staticShardInfo.getAllShards( all );
-        if ( all.size() == 0 ){
+        if ( all.size() == 0 ) {
             staticShardInfo.reload();
             staticShardInfo.getAllShards( all );
             if ( all.size() == 0 )
                 return EMPTY;
         }
-        
+
+        // if current shard was provided, pick a different shard only if it is a better choice
         ShardStatus best = all[0].getStatus();
-        
-        for ( size_t i=1; i<all.size(); i++ ){
+        if ( current != EMPTY ) {
+            best = current.getStatus();
+        }
+
+        for ( size_t i=0; i<all.size(); i++ ) {
             ShardStatus t = all[i].getStatus();
             if ( t < best )
                 best = t;
         }
 
-        log(1) << "picking shard: " << best << endl;
+        log(1) << "best shard for new allocation is " << best << endl;
         return best.shard();
     }
 
     ShardStatus::ShardStatus( const Shard& shard , const BSONObj& obj )
         : _shard( shard ) {
         _mapped = obj.getFieldDotted( "mem.mapped" ).numberLong();
+        _hasOpsQueued = obj["writeBacksQueued"].Bool();
         _writeLock = 0; // TODO
     }
 
diff --git a/s/shard.h b/s/shard.h
index ee807d8..836ffe7 100644
--- a/s/shard.h
+++ b/s/shard.h
@@ -1,4 +1,4 @@
-// shard.h
+// @file shard.h
 
 /**
 *    Copyright (C) 2008 10gen Inc.
@@ -26,48 +26,56 @@ namespace mongo {
     class ShardConnection;
     class ShardStatus;
 
+    /*
+     * A "shard" is a database (replica pair typically) which represents
+     * one partition of the overall database.
+     */
+
     class Shard {
     public:
         Shard()
-            : _name("") , _addr("") , _maxSize(0) , _isDraining( false ){
+            : _name("") , _addr("") , _maxSize(0) , _isDraining( false ) {
         }
 
         Shard( const string& name , const string& addr, long long maxSize = 0 , bool isDraining = false )
-            : _name(name) , _addr( addr ) , _maxSize( maxSize ) , _isDraining( isDraining ){
+            : _name(name) , _addr( addr ) , _maxSize( maxSize ) , _isDraining( isDraining ) {
+            _setAddr( addr );
         }
 
-        Shard( const string& ident ){
+        Shard( const string& ident ) {
             reset( ident );
         }
 
         Shard( const Shard& other )
-            : _name( other._name ) , _addr( other._addr ) , _maxSize( other._maxSize ) , _isDraining( other._isDraining ){
+            : _name( other._name ) , _addr( other._addr ) , _cs( other._cs ) , 
+              _maxSize( other._maxSize ) , _isDraining( other._isDraining ) , _rs( other._rs ) {
         }
 
         Shard( const Shard* other )
-            : _name( other->_name ) , _addr( other->_addr ), _maxSize( other->_maxSize ) , _isDraining( other->_isDraining ){
+            : _name( other->_name ) , _addr( other->_addr ), _cs( other->_cs ) , 
+              _maxSize( other->_maxSize ) , _isDraining( other->_isDraining ) , _rs( other->_rs ) {
         }
-        
-        static Shard make( const string& ident ){
+
+        static Shard make( const string& ident ) {
             Shard s;
             s.reset( ident );
             return s;
         }
 
-        static bool isAShard( const string& ident );
-        
         /**
          * @param ident either name or address
          */
         void reset( const string& ident );
+
+        void setAddress( const ConnectionString& cs );
         
-        void setAddress( const string& addr , bool authoritative = false );
+        ConnectionString getAddress() const { return _cs; }
 
         string getName() const {
             assert( _name.size() );
             return _name;
         }
-        
+
         string getConnString() const {
             assert( _addr.size() );
             return _addr;
@@ -92,7 +100,7 @@ namespace mongo {
         bool operator==( const Shard& s ) const {
             bool n = _name == s._name;
             bool a = _addr == s._addr;
-            
+
             assert( n == a ); // names and address are 1 to 1
             return n;
         }
@@ -107,7 +115,7 @@ namespace mongo {
         bool operator==( const string& s ) const {
             return _name == s || _addr == s;
         }
-        
+
         bool operator!=( const string& s ) const {
             return _name != s && _addr != s;
         }
@@ -115,44 +123,58 @@ namespace mongo {
         bool operator<(const Shard& o) const {
             return _name < o._name;
         }
-        
+
         bool ok() const {
             return _addr.size() > 0 && _addr.size() > 0;
         }
-        
+
         BSONObj runCommand( const string& db , const string& simple ) const {
             return runCommand( db , BSON( simple << 1 ) );
         }
         BSONObj runCommand( const string& db , const BSONObj& cmd ) const ;
-        
+
         ShardStatus getStatus() const ;
         
+        /**
+         * mostly for replica set
+         * retursn true if node is the shard 
+         * of if the replica set contains node
+         */
+        bool containsNode( const string& node ) const;
+
         static void getAllShards( vector<Shard>& all );
         static void printShardInfo( ostream& out );
-
+        
         /**
-         * picks a Shard for more load
+         * @parm current - shard where the chunk/database currently lives in
+         * @return the currently emptiest shard, if best then current, or EMPTY
          */
-        static Shard pick();
-        
+        static Shard pick( const Shard& current = EMPTY );
+
         static void reloadShardInfo();
 
         static void removeShard( const string& name );
 
-        static bool isMember( const string& addr );
+        static bool isAShardNode( const string& ident );
 
         static Shard EMPTY;
-
+        
     private:
+        
+	void _rsInit();
+        void _setAddr( const string& addr );
+        
         string    _name;
         string    _addr;
-        long long _maxSize;    // in MBytes, 0 is unlimited 
+        ConnectionString _cs;
+        long long _maxSize;    // in MBytes, 0 is unlimited
         bool      _isDraining; // shard is currently being removed
+        ReplicaSetMonitorPtr _rs;
     };
 
     class ShardStatus {
     public:
-        
+
         ShardStatus( const Shard& shard , const BSONObj& obj );
 
         friend ostream& operator << (ostream& out, const ShardStatus& s) {
@@ -162,14 +184,14 @@ namespace mongo {
 
         string toString() const {
             stringstream ss;
-            ss << "shard: " << _shard << " mapped: " << _mapped << " writeLock: " << _writeLock; 
+            ss << "shard: " << _shard << " mapped: " << _mapped << " writeLock: " << _writeLock;
             return ss.str();
         }
 
-        bool operator<( const ShardStatus& other ) const{
+        bool operator<( const ShardStatus& other ) const {
             return _mapped < other._mapped;
         }
-        
+
         Shard shard() const {
             return _shard;
         }
@@ -178,9 +200,14 @@ namespace mongo {
             return _mapped;
         }
 
+        bool hasOpsQueued() const {
+            return _hasOpsQueued;
+        }
+
     private:
         Shard _shard;
         long long _mapped;
+        bool _hasOpsQueued;  // true if 'writebacks' are pending
         double _writeLock;
     };
 
@@ -195,19 +222,19 @@ namespace mongo {
         void done();
         void kill();
 
-        DBClientBase& conn(){
+        DBClientBase& conn() {
             _finishInit();
             assert( _conn );
             return *_conn;
         }
-        
-        DBClientBase* operator->(){
+
+        DBClientBase* operator->() {
             _finishInit();
             assert( _conn );
             return _conn;
         }
 
-        DBClientBase* get(){
+        DBClientBase* get() {
             _finishInit();
             assert( _conn );
             return _conn;
@@ -224,7 +251,7 @@ namespace mongo {
 
         static void sync();
 
-        void donotCheckVersion(){
+        void donotCheckVersion() {
             _setVersion = false;
             _finishedInit = true;
         }
@@ -236,11 +263,11 @@ namespace mongo {
 
         /** checks all of my thread local connections for the version of this ns */
         static void checkMyConnectionVersions( const string & ns );
-        
+
     private:
         void _init();
         void _finishInit();
-        
+
         bool _finishedInit;
 
         string _addr;
diff --git a/s/shard_version.cpp b/s/shard_version.cpp
new file mode 100644
index 0000000..0f3e80f
--- /dev/null
+++ b/s/shard_version.cpp
@@ -0,0 +1,151 @@
+// @file shard_version.cpp
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+
+#include "chunk.h"
+#include "config.h"
+#include "grid.h"
+#include "util.h"
+#include "shard.h"
+#include "writeback_listener.h"
+
+#include "shard_version.h"
+
+namespace mongo {
+
+    // when running in sharded mode, use chunk shard version control
+
+    static bool checkShardVersion( DBClientBase & conn , const string& ns , bool authoritative = false , int tryNumber = 1 );
+    static void resetShardVersion( DBClientBase * conn );
+
+    void installChunkShardVersioning() {
+        //
+        // Overriding no-op behavior in shardconnection.cpp
+        //
+        // TODO: Better encapsulate this mechanism.
+        //
+        checkShardVersionCB = checkShardVersion;
+        resetShardVersionCB = resetShardVersion;
+    }
+
+    struct ConnectionShardStatus {
+
+        typedef unsigned long long S;
+
+        ConnectionShardStatus()
+            : _mutex( "ConnectionShardStatus" ) {
+        }
+
+        S getSequence( DBClientBase * conn , const string& ns ) {
+            scoped_lock lk( _mutex );
+            return _map[conn][ns];
+        }
+
+        void setSequence( DBClientBase * conn , const string& ns , const S& s ) {
+            scoped_lock lk( _mutex );
+            _map[conn][ns] = s;
+        }
+
+        void reset( DBClientBase * conn ) {
+            scoped_lock lk( _mutex );
+            _map.erase( conn );
+        }
+
+        // protects _map
+        mongo::mutex _mutex;
+
+        // a map from a connection into ChunkManager's sequence number for each namespace
+        map<DBClientBase*, map<string,unsigned long long> > _map;
+
+    } connectionShardStatus;
+
+    void resetShardVersion( DBClientBase * conn ) {
+        connectionShardStatus.reset( conn );
+    }
+
+    /**
+     * @return true if had to do something
+     */
+    bool checkShardVersion( DBClientBase& conn , const string& ns , bool authoritative , int tryNumber ) {
+        // TODO: cache, optimize, etc...
+
+        WriteBackListener::init( conn );
+
+        DBConfigPtr conf = grid.getDBConfig( ns );
+        if ( ! conf )
+            return false;
+
+        unsigned long long officialSequenceNumber = 0;
+
+        ChunkManagerPtr manager;
+        const bool isSharded = conf->isSharded( ns );
+        if ( isSharded ) {
+            manager = conf->getChunkManager( ns , authoritative );
+            officialSequenceNumber = manager->getSequenceNumber();
+        }
+
+        // has the ChunkManager been reloaded since the last time we updated the connection-level version?
+        // (ie, last time we issued the setShardVersions below)
+        unsigned long long sequenceNumber = connectionShardStatus.getSequence(&conn,ns);
+        if ( sequenceNumber == officialSequenceNumber ) {
+            return false;
+        }
+
+
+        ShardChunkVersion version = 0;
+        if ( isSharded ) {
+            version = manager->getVersion( Shard::make( conn.getServerAddress() ) );
+        }
+
+        log(2) << " have to set shard version for conn: " << &conn << " ns:" << ns
+               << " my last seq: " << sequenceNumber << "  current: " << officialSequenceNumber
+               << " version: " << version << " manager: " << manager.get()
+               << endl;
+
+        BSONObj result;
+        if ( setShardVersion( conn , ns , version , authoritative , result ) ) {
+            // success!
+            log(1) << "      setShardVersion success!" << endl;
+            connectionShardStatus.setSequence( &conn , ns , officialSequenceNumber );
+            return true;
+        }
+
+        log(1) << "       setShardVersion failed!\n" << result << endl;
+
+        if ( result.getBoolField( "need_authoritative" ) )
+            massert( 10428 ,  "need_authoritative set but in authoritative mode already" , ! authoritative );
+
+        if ( ! authoritative ) {
+            checkShardVersion( conn , ns , 1 , tryNumber + 1 );
+            return true;
+        }
+
+        if ( tryNumber < 4 ) {
+            log(1) << "going to retry checkShardVersion" << endl;
+            sleepmillis( 10 );
+            checkShardVersion( conn , ns , 1 , tryNumber + 1 );
+            return true;
+        }
+
+        log() << "     setShardVersion failed: " << result << endl;
+        massert( 10429 , (string)"setShardVersion failed! " + result.jsonString() , 0 );
+        return true;
+    }
+
+}  // namespace mongo
diff --git a/s/shard_version.h b/s/shard_version.h
new file mode 100644
index 0000000..023b7fc
--- /dev/null
+++ b/s/shard_version.h
@@ -0,0 +1,31 @@
+// @file shard_version.h
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+namespace mongo {
+
+    /*
+     * Install chunk shard vesion callbaks in shardconnection code. This activates
+     * the chunk shard version control that mongos needs.
+     *
+     * MUST be called before accepting any connections.
+     */
+    void installChunkShardVersioning();
+
+}  // namespace mongo
diff --git a/s/shardconnection.cpp b/s/shardconnection.cpp
index 694693b..d05f5b1 100644
--- a/s/shardconnection.cpp
+++ b/s/shardconnection.cpp
@@ -23,7 +23,24 @@
 #include <set>
 
 namespace mongo {
-    
+
+    // The code in shardconnection may run not only in mongos context. When elsewhere, chunk shard versioning
+    // is disabled. To enable chunk shard versioning, provide the check/resetShardVerionCB's below
+    //
+    // TODO: better encapsulate this mechanism.
+
+    bool defaultCheckShardVersion( DBClientBase & conn , const string& ns , bool authoritative , int tryNumber ) {
+        // no-op in mongod
+        return false;
+    }
+
+    void defaultResetShardVersion( DBClientBase * conn ) {
+        // no-op in mongod
+    }
+
+    boost::function4<bool, DBClientBase&, const string&, bool, int> checkShardVersionCB = defaultCheckShardVersion;
+    boost::function1<void, DBClientBase*> resetShardVersionCB = defaultResetShardVersion;
+
     /**
      * holds all the actual db connections for a client to various servers
      * 1 pre thread, so don't have to worry about thread safety
@@ -31,39 +48,22 @@ namespace mongo {
     class ClientConnections : boost::noncopyable {
     public:
         struct Status : boost::noncopyable {
-            Status() : created(0), avail(0){}
+            Status() : created(0), avail(0) {}
 
-            long long created;            
+            long long created;
             DBClientBase* avail;
         };
 
 
-        Nullstream& debug( Status * s = 0 , const string& addr = "" ){
-            static int ll = 9;
+        ClientConnections() {}
 
-            if ( logLevel < ll )
-                return nullstream;
-            Nullstream& l = log(ll);
-            
-            l << "ClientConnections DEBUG " << this << " ";
-            if ( s ){
-                l << "s: " << s << " addr: " << addr << " ";
-            }
-            return l;
-        }
-        
-        ClientConnections() : _mutex("ClientConnections") {
-            debug() << " NEW  " << endl;
-        }
-        
-        ~ClientConnections(){
-            debug() << " KILLING  " << endl;
-            for ( map<string,Status*>::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ){
+        ~ClientConnections() {
+            for ( HostMap::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ) {
                 string addr = i->first;
                 Status* ss = i->second;
                 assert( ss );
-                if ( ss->avail ){
-                    /* if we're shutting down, don't want to initiate release mechanism as it is slow, 
+                if ( ss->avail ) {
+                    /* if we're shutting down, don't want to initiate release mechanism as it is slow,
                        and isn't needed since all connections will be closed anyway */
                     if ( inShutdown() )
                         delete ss->avail;
@@ -75,49 +75,41 @@ namespace mongo {
             }
             _hosts.clear();
         }
-        
-        DBClientBase * get( const string& addr , const string& ns ){
+
+        DBClientBase * get( const string& addr , const string& ns ) {
             _check( ns );
-            scoped_lock lk( _mutex );
+
             Status* &s = _hosts[addr];
             if ( ! s )
                 s = new Status();
-            
-            debug( s , addr ) << "WANT ONE pool avail: " << s->avail << endl;
-            
-            if ( s->avail ){
+
+            if ( s->avail ) {
                 DBClientBase* c = s->avail;
                 s->avail = 0;
-                debug( s , addr ) << "GOT  " << c << endl;
                 pool.onHandedOut( c );
                 return c;
             }
 
-            debug() << "CREATING NEW CONNECTION" << endl;
             s->created++;
             return pool.get( addr );
         }
-        
-        void done( const string& addr , DBClientBase* conn ){
-            scoped_lock lk( _mutex );
+
+        void done( const string& addr , DBClientBase* conn ) {
             Status* s = _hosts[addr];
             assert( s );
-            if ( s->avail ){
-                debug( s , addr ) << "DONE WITH TEMP" << endl;
+            if ( s->avail ) {
                 release( addr , conn );
                 return;
             }
             s->avail = conn;
-            debug( s , addr ) << "PUSHING: " << conn << endl;
         }
-        
-        void sync(){
-            scoped_lock lk( _mutex );
-            for ( map<string,Status*>::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ){
+
+        void sync() {
+            for ( HostMap::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ) {
                 string addr = i->first;
                 Status* ss = i->second;
 
-                if ( ss->avail ){
+                if ( ss->avail ) {
                     ss->avail->getLastError();
                     release( addr , ss->avail );
                     ss->avail = 0;
@@ -127,63 +119,67 @@ namespace mongo {
             _hosts.clear();
         }
 
-        void checkVersions( const string& ns ){
+        void checkVersions( const string& ns ) {
             vector<Shard> all;
             Shard::getAllShards( all );
-            scoped_lock lk( _mutex );
-            for ( unsigned i=0; i<all.size(); i++ ){
+            for ( unsigned i=0; i<all.size(); i++ ) {
                 Status* &s = _hosts[all[i].getConnString()];
                 if ( ! s )
                     s = new Status();
             }
 
-            for ( map<string,Status*>::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ){
-                if ( ! Shard::isAShard( i->first ) )
+            for ( HostMap::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ) {
+                if ( ! Shard::isAShardNode( i->first ) )
                     continue;
                 Status* ss = i->second;
                 assert( ss );
                 if ( ! ss->avail )
                     ss->avail = pool.get( i->first );
-                checkShardVersion( *ss->avail , ns );
+                checkShardVersionCB( *ss->avail , ns , false , 1 );
             }
         }
 
-        void release( const string& addr , DBClientBase * conn ){
-            resetShardVersion( conn );
+        void release( const string& addr , DBClientBase * conn ) {
+            resetShardVersionCB( conn );
             BSONObj res;
-            
+
             try {
-                if ( conn->simpleCommand( "admin" , &res , "unsetSharding" ) ){
+                if ( conn->simpleCommand( "admin" , &res , "unsetSharding" ) ) {
                     pool.release( addr , conn );
                 }
                 else {
-                    log(LL_ERROR) << " couldn't unset sharding :( " << res << endl;
+                    error() << "unset sharding failed : " << res << endl;
                     delete conn;
                 }
             }
-            catch ( std::exception& e ){
-                log(LL_ERROR) << "couldn't unsert sharding : " << e.what() << endl;
+            catch ( SocketException& e ) {
+                // server down or something
+                LOG(1) << "socket exception trying to unset sharding: " << e.toString() << endl;
+                delete conn;
+            }
+            catch ( std::exception& e ) {
+                error() << "couldn't unset sharding : " << e.what() << endl;
                 delete conn;
             }
         }
-        
-        void _check( const string& ns ){
+
+        void _check( const string& ns ) {
             if ( ns.size() == 0 || _seenNS.count( ns ) )
                 return;
             _seenNS.insert( ns );
             checkVersions( ns );
         }
-
-        map<string,Status*> _hosts;
-        mongo::mutex _mutex;
+        
+        typedef map<string,Status*,DBConnectionPool::serverNameCompare> HostMap;
+        HostMap _hosts;
         set<string> _seenNS;
         // -----
-        
+
         static thread_specific_ptr<ClientConnections> _perThread;
 
-        static ClientConnections* get(){
+        static ClientConnections* threadInstance() {
             ClientConnections* cc = _perThread.get();
-            if ( ! cc ){
+            if ( ! cc ) {
                 cc = new ClientConnections();
                 _perThread.reset( cc );
             }
@@ -202,57 +198,58 @@ namespace mongo {
         : _addr( s.getConnString() ) , _ns( ns ) {
         _init();
     }
-    
+
     ShardConnection::ShardConnection( const string& addr , const string& ns )
         : _addr( addr ) , _ns( ns ) {
         _init();
     }
-    
-    void ShardConnection::_init(){
+
+    void ShardConnection::_init() {
         assert( _addr.size() );
-        _conn = ClientConnections::get()->get( _addr , _ns );
+        _conn = ClientConnections::threadInstance()->get( _addr , _ns );
         _finishedInit = false;
     }
 
-    void ShardConnection::_finishInit(){
+    void ShardConnection::_finishInit() {
         if ( _finishedInit )
             return;
         _finishedInit = true;
-        
-        if ( _ns.size() ){
-            _setVersion = checkShardVersion( *_conn , _ns );
+
+        if ( _ns.size() ) {
+            _setVersion = checkShardVersionCB( *_conn , _ns , false , 1 );
         }
         else {
             _setVersion = false;
         }
-        
+
     }
 
-    void ShardConnection::done(){
-        if ( _conn ){
-            ClientConnections::get()->done( _addr , _conn );
+    void ShardConnection::done() {
+        if ( _conn ) {
+            ClientConnections::threadInstance()->done( _addr , _conn );
             _conn = 0;
             _finishedInit = true;
         }
     }
 
-    void ShardConnection::kill(){
-        if ( _conn ){
+    void ShardConnection::kill() {
+        if ( _conn ) {
+            resetShardVersionCB( _conn );
             delete _conn;
             _conn = 0;
             _finishedInit = true;
         }
     }
 
-    void ShardConnection::sync(){
-        ClientConnections::get()->sync();
+    void ShardConnection::sync() {
+        ClientConnections::threadInstance()->sync();
     }
 
-    bool ShardConnection::runCommand( const string& db , const BSONObj& cmd , BSONObj& res ){
+    bool ShardConnection::runCommand( const string& db , const BSONObj& cmd , BSONObj& res ) {
         assert( _conn );
         bool ok = _conn->runCommand( db , cmd , res );
-        if ( ! ok ){
-            if ( res["code"].numberInt() == StaleConfigInContextCode ){
+        if ( ! ok ) {
+            if ( res["code"].numberInt() == StaleConfigInContextCode ) {
                 string big = res["errmsg"].String();
                 string ns,raw;
                 massert( 13409 , (string)"can't parse ns from: " + big  , StaleConfigException::parse( big , ns , raw ) );
@@ -263,12 +260,12 @@ namespace mongo {
         return ok;
     }
 
-    void ShardConnection::checkMyConnectionVersions( const string & ns ){
-        ClientConnections::get()->checkVersions( ns );
+    void ShardConnection::checkMyConnectionVersions( const string & ns ) {
+        ClientConnections::threadInstance()->checkVersions( ns );
     }
 
     ShardConnection::~ShardConnection() {
-        if ( _conn ){
+        if ( _conn ) {
             if ( ! _conn->isFailed() ) {
                 /* see done() comments above for why we log this line */
                 log() << "~ScopedDBConnection: _conn != null" << endl;
diff --git a/s/shardkey.cpp b/s/shardkey.cpp
index e4deeec..84cdb4b 100644
--- a/s/shardkey.cpp
+++ b/s/shardkey.cpp
@@ -20,6 +20,7 @@
 #include "chunk.h"
 #include "../db/jsobj.h"
 #include "../util/unittest.h"
+#include "../util/timer.h"
 
 namespace mongo {
 
@@ -30,12 +31,12 @@ namespace mongo {
         BSONObjBuilder max;
 
         BSONObjIterator it(p);
-        while (it.more()){
+        while (it.more()) {
             BSONElement e (it.next());
             min.appendMinKey(e.fieldName());
             max.appendMaxKey(e.fieldName());
         }
-        
+
         gMin = min.obj();
         gMax = max.obj();
     }
@@ -49,11 +50,11 @@ namespace mongo {
     }
 
     bool ShardKeyPattern::hasShardKey( const BSONObj& obj ) const {
-        /* this is written s.t. if obj has lots of fields, if the shard key fields are early, 
+        /* this is written s.t. if obj has lots of fields, if the shard key fields are early,
            it is fast.  so a bit more work to try to be semi-fast.
            */
 
-        for(set<string>::const_iterator it = patternfields.begin(); it != patternfields.end(); ++it){
+        for(set<string>::const_iterator it = patternfields.begin(); it != patternfields.end(); ++it) {
             if(obj.getFieldDotted(it->c_str()).eoo())
                 return false;
         }
@@ -63,28 +64,90 @@ namespace mongo {
     bool ShardKeyPattern::isPrefixOf( const BSONObj& otherPattern ) const {
         BSONObjIterator a( pattern );
         BSONObjIterator b( otherPattern );
-        
-        while ( a.more() && b.more() ){
+
+        while ( a.more() && b.more() ) {
             BSONElement x = a.next();
             BSONElement y = b.next();
             if ( strcmp( x.fieldName() , y.fieldName() ) )
                 return false;
         }
-        
+
         return ! a.more();
     }
-    
+
     string ShardKeyPattern::toString() const {
         return pattern.toString();
     }
-    
-    /* things to test for compound : 
+
+    BSONObj ShardKeyPattern::moveToFront(const BSONObj& obj) const {
+        vector<const char*> keysToMove;
+        keysToMove.push_back("_id");
+        BSONForEach(e, pattern) {
+            if (strchr(e.fieldName(), '.') == NULL)
+                keysToMove.push_back(e.fieldName());
+        }
+
+        if (keysToMove.size() == 1) {
+            return obj;
+
+        }
+        else {
+            BufBuilder buf (obj.objsize());
+            buf.appendNum(obj.objsize());
+
+            vector<pair<const char*, size_t> > copies;
+            pair<const char*, size_t> toCopy ((const char*)NULL, 0); // C++ NULL isn't a pointer type yet
+
+            BSONForEach(e, obj) {
+                bool moveToFront = false;
+                for (vector<const char*>::const_iterator it(keysToMove.begin()), end(keysToMove.end()); it!=end; ++it) {
+                    if (strcmp(e.fieldName(), *it) == 0) {
+                        moveToFront = true;
+                        break;
+                    }
+                }
+
+                if (moveToFront) {
+                    buf.appendBuf(e.fieldName()-1, e.size());
+                    if (toCopy.first) {
+                        copies.push_back(toCopy);
+                        toCopy.first = NULL;
+                    }
+                }
+                else {
+                    if (!toCopy.first) {
+                        toCopy.first = e.fieldName()-1;
+                        toCopy.second = e.size();
+                    }
+                    else {
+                        toCopy.second += e.size();
+                    }
+                }
+            }
+
+            for (vector<pair<const char*, size_t> >::const_iterator it(copies.begin()), end(copies.end()); it!=end; ++it) {
+                buf.appendBuf(it->first, it->second);
+            }
+
+            if (toCopy.first) {
+                buf.appendBuf(toCopy.first, toCopy.second);
+            }
+
+            buf.appendChar('\0');
+
+            BSONObj out (buf.buf(), true);
+            buf.decouple();
+            return out;
+        }
+    }
+
+    /* things to test for compound :
        \ middle (deprecating?)
     */
     class ShardKeyUnitTest : public UnitTest {
     public:
-        
-        void testIsPrefixOf(){
+
+        void testIsPrefixOf() {
             {
                 ShardKeyPattern k( BSON( "x" << 1 ) );
                 assert( ! k.isPrefixOf( BSON( "a" << 1 ) ) );
@@ -92,7 +155,7 @@ namespace mongo {
                 assert( k.isPrefixOf( BSON( "x" << 1 << "a" << 1 ) ) );
                 assert( ! k.isPrefixOf( BSON( "a" << 1 << "x" << 1 ) ) );
             }
-            { 
+            {
                 ShardKeyPattern k( BSON( "x" << 1 << "y" << 1 ) );
                 assert( ! k.isPrefixOf( BSON( "x" << 1 ) ) );
                 assert( ! k.isPrefixOf( BSON( "x" << 1 << "z" << 1 ) ) );
@@ -100,8 +163,8 @@ namespace mongo {
                 assert( k.isPrefixOf( BSON( "x" << 1 << "y" << 1 << "z" << 1 ) ) );
             }
         }
-        
-        void hasshardkeytest() { 
+
+        void hasshardkeytest() {
             BSONObj x = fromjson("{ zid : \"abcdefg\", num: 1.0, name: \"eliot\" }");
             ShardKeyPattern k( BSON( "num" << 1 ) );
             assert( k.hasShardKey(x) );
@@ -117,31 +180,68 @@ namespace mongo {
 
         }
 
-        void extractkeytest() { 
+        void extractkeytest() {
             ShardKeyPattern k( fromjson("{a:1,'sub.b':-1,'sub.c':1}") );
 
             BSONObj x = fromjson("{a:1,'sub.b':2,'sub.c':3}");
             assert( k.extractKey( fromjson("{a:1,sub:{b:2,c:3}}") ).woEqual(x) );
             assert( k.extractKey( fromjson("{sub:{b:2,c:3},a:1}") ).woEqual(x) );
         }
-        void run(){
+        void moveToFrontTest() {
+            ShardKeyPattern sk (BSON("a" << 1 << "b" << 1));
+
+            BSONObj ret;
+
+            ret = sk.moveToFront(BSON("z" << 1 << "_id" << 1 << "y" << 1 << "a" << 1 << "x" << 1 << "b" << 1 << "w" << 1));
+            assert(ret.woEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1)));
+
+            ret = sk.moveToFront(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1));
+            assert(ret.woEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1)));
+
+            ret = sk.moveToFront(BSON("z" << 1 << "y" << 1 << "a" << 1 << "b" << 1 << "Z" << 1 << "Y" << 1));
+            assert(ret.woEqual(BSON("a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "Z" << 1 << "Y" << 1)));
+
+        }
+
+        void moveToFrontBenchmark(int numFields) {
+            BSONObjBuilder bb;
+            bb.append("_id", 1);
+            for (int i=0; i < numFields; i++)
+                bb.append(BSONObjBuilder::numStr(i), 1);
+            bb.append("key", 1);
+            BSONObj o = bb.obj();
+
+            ShardKeyPattern sk (BSON("key" << 1));
+
+            Timer t;
+            const int iterations = 100*1000;
+            for (int i=0; i< iterations; i++) {
+                sk.moveToFront(o);
+            }
+
+            const double secs = t.micros() / 1000000.0;
+            const double ops_per_sec = iterations / secs;
+
+            cout << "moveToFront (" << numFields << " fields) secs: " << secs << " ops_per_sec: " << ops_per_sec << endl;
+        }
+        void run() {
             extractkeytest();
 
             ShardKeyPattern k( BSON( "key" << 1 ) );
-            
+
             BSONObj min = k.globalMin();
 
 //            cout << min.jsonString(TenGen) << endl;
 
             BSONObj max = k.globalMax();
-            
+
             BSONObj k1 = BSON( "key" << 5 );
 
             assert( k.compare( min , max ) < 0 );
             assert( k.compare( min , k1 ) < 0 );
             assert( k.compare( max , min ) > 0 );
             assert( k.compare( min , min ) == 0 );
-            
+
             hasshardkeytest();
             assert( k.hasShardKey( k1 ) );
             assert( ! k.hasShardKey( BSON( "key2" << 1 ) ) );
@@ -150,12 +250,20 @@ namespace mongo {
             BSONObj b = BSON( "key" << 999 );
 
             assert( k.compare(a,b) < 0 );
-            
+
             testIsPrefixOf();
             // add middle multitype tests
 
+            moveToFrontTest();
+
+            if (0) { // toggle to run benchmark
+                moveToFrontBenchmark(0);
+                moveToFrontBenchmark(10);
+                moveToFrontBenchmark(100);
+            }
+
             log(1) << "shardKeyTest passed" << endl;
         }
     } shardKeyTest;
-    
+
 } // namespace mongo
diff --git a/s/shardkey.h b/s/shardkey.h
index 976bbef..96301ff 100644
--- a/s/shardkey.h
+++ b/s/shardkey.h
@@ -21,7 +21,7 @@
 #include "../client/dbclient.h"
 
 namespace mongo {
-    
+
     class Chunk;
 
     /* A ShardKeyPattern is a pattern indicating what data to extract from the object to make the shard key from.
@@ -30,10 +30,10 @@ namespace mongo {
     class ShardKeyPattern {
     public:
         ShardKeyPattern( BSONObj p = BSONObj() );
-        
+
         /**
            global min is the lowest possible value for this key
-		   e.g. { num : MinKey }
+           e.g. { num : MinKey }
          */
         BSONObj globalMin() const { return gMin; }
 
@@ -42,15 +42,15 @@ namespace mongo {
          */
         BSONObj globalMax() const { return gMax; }
 
-        bool isGlobalMin( const BSONObj& k ) const{
+        bool isGlobalMin( const BSONObj& k ) const {
             return k.woCompare( globalMin() ) == 0;
         }
 
-        bool isGlobalMax( const BSONObj& k ) const{
+        bool isGlobalMax( const BSONObj& k ) const {
             return k.woCompare( globalMax() ) == 0;
         }
-        
-        bool isGlobal( const BSONObj& k ) const{
+
+        bool isGlobal( const BSONObj& k ) const {
             return isGlobalMin( k ) || isGlobalMax( k );
         }
 
@@ -60,22 +60,25 @@ namespace mongo {
            l > r positive
          */
         int compare( const BSONObj& l , const BSONObj& r ) const;
-        
+
         /**
            @return whether or not obj has all fields in this shard key pattern
-		   e.g. 
-		     ShardKey({num:1}).hasShardKey({ name:"joe", num:3 }) is true
+           e.g.
+             ShardKey({num:1}).hasShardKey({ name:"joe", num:3 }) is true
          */
         bool hasShardKey( const BSONObj& obj ) const;
-        
+
         BSONObj key() const { return pattern; }
 
         string toString() const;
 
         BSONObj extractKey(const BSONObj& from) const;
-        
+
+        bool partOfShardKey(const char* key ) const {
+            return pattern.hasField(key);
+        }
         bool partOfShardKey(const string& key ) const {
-            return patternfields.count( key ) > 0;
+            return pattern.hasField(key.c_str());
         }
 
         /**
@@ -83,7 +86,12 @@ namespace mongo {
          * true if 'this' is a prefix (not necessarily contained) of 'otherPattern'.
          */
         bool isPrefixOf( const BSONObj& otherPattern ) const;
-        
+
+        /**
+         * @return BSONObj with _id and shardkey at front. May return original object.
+         */
+        BSONObj moveToFront(const BSONObj& obj) const;
+
     private:
         BSONObj pattern;
         BSONObj gMin;
@@ -93,10 +101,10 @@ namespace mongo {
         set<string> patternfields;
     };
 
-    inline BSONObj ShardKeyPattern::extractKey(const BSONObj& from) const { 
+    inline BSONObj ShardKeyPattern::extractKey(const BSONObj& from) const {
         BSONObj k = from.extractFields(pattern);
         uassert(13334, "Shard Key must be less than 512 bytes", k.objsize() < 512);
         return k;
     }
 
-} 
+}
diff --git a/s/stats.cpp b/s/stats.cpp
index bb7a975..460ada3 100644
--- a/s/stats.cpp
+++ b/s/stats.cpp
@@ -20,7 +20,7 @@
 #include "stats.h"
 
 namespace mongo {
-    
+
     OpCounters opsNonSharded;
     OpCounters opsSharded;
 
diff --git a/s/stats.h b/s/stats.h
index cbabf25..a7cc784 100644
--- a/s/stats.h
+++ b/s/stats.h
@@ -22,7 +22,7 @@
 #include "../db/stats/counters.h"
 
 namespace mongo {
-    
+
     extern OpCounters opsNonSharded;
     extern OpCounters opsSharded;
 
diff --git a/s/strategy.cpp b/s/strategy.cpp
index b3c8f5b..7c1fb0b 100644
--- a/s/strategy.cpp
+++ b/s/strategy.cpp
@@ -1,3 +1,5 @@
+// @file strategy.cpp
+
 /*
  *    Copyright (C) 2010 10gen Inc.
  *
@@ -14,312 +16,64 @@
  *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-// stragegy.cpp
-
 #include "pch.h"
-#include "request.h"
-#include "../util/background.h"
+
 #include "../client/connpool.h"
 #include "../db/commands.h"
 
-#include "server.h"
 #include "grid.h"
+#include "request.h"
+#include "server.h"
+#include "writeback_listener.h"
+
+#include "strategy.h"
 
 namespace mongo {
 
     // ----- Strategy ------
 
-    void Strategy::doWrite( int op , Request& r , const Shard& shard , bool checkVersion ){
+    void Strategy::doWrite( int op , Request& r , const Shard& shard , bool checkVersion ) {
         ShardConnection conn( shard , r.getns() );
         if ( ! checkVersion )
             conn.donotCheckVersion();
-        else if ( conn.setVersion() ){
+        else if ( conn.setVersion() ) {
             conn.done();
             throw StaleConfigException( r.getns() , "doWRite" , true );
         }
         conn->say( r.m() );
         conn.done();
     }
-    
-    void Strategy::doQuery( Request& r , const Shard& shard ){
-        try{
-            ShardConnection dbcon( shard , r.getns() );
-            DBClientBase &c = dbcon.conn();
-            
-            Message response;
-            bool ok = c.call( r.m(), response);
 
-            {
-                QueryResult *qr = (QueryResult *) response.singleData();
-                if ( qr->resultFlags() & ResultFlag_ShardConfigStale ){
-                    dbcon.done();
-                    throw StaleConfigException( r.getns() , "Strategy::doQuery" );
-                }
-            }
+    void Strategy::doQuery( Request& r , const Shard& shard ) {
 
-            uassert( 10200 , "mongos: error calling db", ok);
-            r.reply( response , c.getServerAddress() );
-            dbcon.done();
-        }
-        catch ( AssertionException& e ) {
-            BSONObjBuilder err;
-            e.getInfo().append( err );
-            BSONObj errObj = err.done();
-            replyToQuery(ResultFlag_ErrSet, r.p() , r.m() , errObj);
-        }
-    }
-    
-    void Strategy::insert( const Shard& shard , const char * ns , const BSONObj& obj ){
-        ShardConnection dbcon( shard , ns );
-        if ( dbcon.setVersion() ){
-            dbcon.done();
-            throw StaleConfigException( ns , "for insert" );
-        }
-        dbcon->insert( ns , obj );
-        dbcon.done();
-    }
-
-    class WriteBackListener : public BackgroundJob {
-    protected:
-        string name() { return "WriteBackListener"; }
-        WriteBackListener( const string& addr ) : _addr( addr ){
-            log() << "creating WriteBackListener for: " << addr << endl;
-        }
-        
-        void run(){
-            OID lastID;
-            lastID.clear();
-            int secsToSleep = 0;
-            while ( Shard::isMember( _addr ) ){
-                
-                if ( lastID.isSet() ){
-                    scoped_lock lk( _seenWritebacksLock );
-                    _seenWritebacks.insert( lastID );
-                    lastID.clear();
-                }
-
-                try {
-                    ScopedDbConnection conn( _addr );
-                    
-                    BSONObj result;
-                    
-                    {
-                        BSONObjBuilder cmd;
-                        cmd.appendOID( "writebacklisten" , &serverID ); // Command will block for data
-                        if ( ! conn->runCommand( "admin" , cmd.obj() , result ) ){
-                            log() <<  "writebacklisten command failed!  "  << result << endl;
-                            conn.done();
-                            continue;
-                        }
-
-                    }
-                    
-                    log(1) << "writebacklisten result: " << result << endl;
-                    
-                    BSONObj data = result.getObjectField( "data" );
-                    if ( data.getBoolField( "writeBack" ) ){
-                        string ns = data["ns"].valuestrsafe();
-                        {
-                            BSONElement e = data["id"];
-                            if ( e.type() == jstOID )
-                                lastID = e.OID();
-                        }
-                        int len;
+        ShardConnection dbcon( shard , r.getns() );
+        DBClientBase &c = dbcon.conn();
 
-                        Message m( (void*)data["msg"].binData( len ) , false );
-                        massert( 10427 ,  "invalid writeback message" , m.header()->valid() );                        
+        string actualServer;
 
-                        DBConfigPtr db = grid.getDBConfig( ns );
-                        ShardChunkVersion needVersion( data["version"] );
-                        
-                        log(1) << "writeback id: " << lastID << " needVersion : " << needVersion.toString() 
-                               << " mine : " << db->getChunkManager( ns )->getVersion().toString() << endl;// TODO change to log(3)
-                        
-                        if ( logLevel ) log(1) << debugString( m ) << endl;
+        Message response;
+        bool ok = c.call( r.m(), response, true , &actualServer );
+        uassert( 10200 , "mongos: error calling db", ok );
 
-                        if ( needVersion.isSet() && needVersion <= db->getChunkManager( ns )->getVersion() ){
-                            // this means when the write went originally, the version was old
-                            // if we're here, it means we've already updated the config, so don't need to do again
-                            //db->getChunkManager( ns , true ); // SERVER-1349
-                        }
-                        else {
-                            db->getChunkManager( ns , true );
-                        }
-                        
-                        Request r( m , 0 );
-                        r.init();
-                        r.process();
-                    }
-                    else {
-                        log() << "unknown writeBack result: " << result << endl;
-                    }
-                    
-                    conn.done();
-                    secsToSleep = 0;
-                    continue;
-                }
-                catch ( std::exception e ){
-                    log() << "WriteBackListener exception : " << e.what() << endl;
-
-                    // It's possible this shard was removed
-                    Shard::reloadShardInfo();                    
-                }
-                catch ( ... ){
-                    log() << "WriteBackListener uncaught exception!" << endl;
-                }
-                secsToSleep++;
-                sleepsecs(secsToSleep);
-                if ( secsToSleep > 10 )
-                    secsToSleep = 0;
+        {
+            QueryResult *qr = (QueryResult *) response.singleData();
+            if ( qr->resultFlags() & ResultFlag_ShardConfigStale ) {
+                dbcon.done();
+                throw StaleConfigException( r.getns() , "Strategy::doQuery" );
             }
-
-            log() << "WriteBackListener exiting : address no longer in cluster " << _addr;
-
         }
-        
-    private:
-        string _addr;
 
-        static map<string,WriteBackListener*> _cache;
-        static mongo::mutex _cacheLock;
-        
-        static set<OID> _seenWritebacks;
-        static mongo::mutex _seenWritebacksLock;
-        
-    public:
-        static void init( DBClientBase& conn ){
-            scoped_lock lk( _cacheLock );
-            WriteBackListener*& l = _cache[conn.getServerAddress()];
-            if ( l )
-                return;
-            l = new WriteBackListener( conn.getServerAddress() );
-            l->go();
-        }
-        
-
-        static void waitFor( const OID& oid ){
-            Timer t;
-            for ( int i=0; i<5000; i++ ){
-                {
-                    scoped_lock lk( _seenWritebacksLock );
-                    if ( _seenWritebacks.count( oid ) )
-                        return;
-                }
-                sleepmillis( 10 );
-            }
-            stringstream ss;
-            ss << "didn't get writeback for: " << oid << " after: " << t.millis() << " ms";
-            uasserted( 13403 , ss.str() );
-        }
-    };
-
-    void waitForWriteback( const OID& oid ){
-        WriteBackListener::waitFor( oid );
-    }
-    
-    map<string,WriteBackListener*> WriteBackListener::_cache;
-    mongo::mutex WriteBackListener::_cacheLock("WriteBackListener");
-
-    set<OID> WriteBackListener::_seenWritebacks;
-    mongo::mutex WriteBackListener::_seenWritebacksLock( "WriteBackListener::seen" );
-
-    struct ConnectionShardStatus {
-        
-        typedef unsigned long long S;
-
-        ConnectionShardStatus() 
-            : _mutex( "ConnectionShardStatus" ){
-        }
-
-        S getSequence( DBClientBase * conn , const string& ns ){
-            scoped_lock lk( _mutex );
-            return _map[conn][ns];
-        }
-
-        void setSequence( DBClientBase * conn , const string& ns , const S& s ){
-            scoped_lock lk( _mutex );
-            _map[conn][ns] = s;
-        }
-
-        void reset( DBClientBase * conn ){
-            scoped_lock lk( _mutex );
-            _map.erase( conn );
-        }
-
-        map<DBClientBase*, map<string,unsigned long long> > _map;
-        mongo::mutex _mutex;
-    } connectionShardStatus;
-
-    void resetShardVersion( DBClientBase * conn ){
-        connectionShardStatus.reset( conn );
+        r.reply( response , actualServer.size() ? actualServer : c.getServerAddress() );
+        dbcon.done();
     }
-    
-    /**
-     * @return true if had to do something
-     */
-    bool checkShardVersion( DBClientBase& conn , const string& ns , bool authoritative , int tryNumber ){
-        // TODO: cache, optimize, etc...
-        
-        WriteBackListener::init( conn );
 
-        DBConfigPtr conf = grid.getDBConfig( ns );
-        if ( ! conf )
-            return false;
-        
-        unsigned long long officialSequenceNumber = 0;
-        
-        ChunkManagerPtr manager;
-        const bool isSharded = conf->isSharded( ns );
-        if ( isSharded ){
-            manager = conf->getChunkManager( ns , authoritative );
-            officialSequenceNumber = manager->getSequenceNumber();
-        }
-
-        unsigned long long sequenceNumber = connectionShardStatus.getSequence(&conn,ns);
-        if ( sequenceNumber == officialSequenceNumber ){
-            return false;
-        }
-
-
-        ShardChunkVersion version = 0;
-        if ( isSharded ){
-            version = manager->getVersion( Shard::make( conn.getServerAddress() ) );
-        }
-        
-        log(2) << " have to set shard version for conn: " << &conn << " ns:" << ns 
-               << " my last seq: " << sequenceNumber << "  current: " << officialSequenceNumber 
-               << " version: " << version << " manager: " << manager.get()
-               << endl;
-        
-        BSONObj result;
-        if ( setShardVersion( conn , ns , version , authoritative , result ) ){
-            // success!
-            log(1) << "      setShardVersion success!" << endl;
-            connectionShardStatus.setSequence( &conn , ns , officialSequenceNumber );
-            return true;
-        }
-        
-        log(1) << "       setShardVersion failed!\n" << result << endl;
-
-        if ( result.getBoolField( "need_authoritative" ) )
-            massert( 10428 ,  "need_authoritative set but in authoritative mode already" , ! authoritative );
-        
-        if ( ! authoritative ){
-            checkShardVersion( conn , ns , 1 , tryNumber + 1 );
-            return true;
-        }
-        
-        if ( tryNumber < 4 ){
-            log(1) << "going to retry checkShardVersion" << endl;
-            sleepmillis( 10 );
-            checkShardVersion( conn , ns , 1 , tryNumber + 1 );
-            return true;
+    void Strategy::insert( const Shard& shard , const char * ns , const BSONObj& obj ) {
+        ShardConnection dbcon( shard , ns );
+        if ( dbcon.setVersion() ) {
+            dbcon.done();
+            throw StaleConfigException( ns , "for insert" );
         }
-
-        log() << "     setShardVersion failed: " << result << endl;
-        massert( 10429 , (string)"setShardVersion failed! " + result.jsonString() , 0 );
-        return true;
+        dbcon->insert( ns , obj );
+        dbcon.done();
     }
-    
-    
 }
diff --git a/s/strategy.h b/s/strategy.h
index 2aa4434..10a5a3f 100644
--- a/s/strategy.h
+++ b/s/strategy.h
@@ -23,28 +23,25 @@
 #include "request.h"
 
 namespace mongo {
-    
+
     class Strategy {
     public:
-        Strategy(){}
+        Strategy() {}
         virtual ~Strategy() {}
         virtual void queryOp( Request& r ) = 0;
         virtual void getMore( Request& r ) = 0;
         virtual void writeOp( int op , Request& r ) = 0;
-        
+
     protected:
         void doWrite( int op , Request& r , const Shard& shard , bool checkVersion = true );
         void doQuery( Request& r , const Shard& shard );
-        
+
         void insert( const Shard& shard , const char * ns , const BSONObj& obj );
-        
+
     };
 
     extern Strategy * SINGLE;
     extern Strategy * SHARDED;
 
-    bool setShardVersion( DBClientBase & conn , const string& ns , ShardChunkVersion version , bool authoritative , BSONObj& result );
-    
-    void waitForWriteback( const OID& oid );
 }
 
diff --git a/s/strategy_shard.cpp b/s/strategy_shard.cpp
index 144bf79..2eca0c6 100644
--- a/s/strategy_shard.cpp
+++ b/s/strategy_shard.cpp
@@ -21,6 +21,7 @@
 #include "chunk.h"
 #include "cursors.h"
 #include "stats.h"
+#include "client.h"
 
 #include "../client/connpool.h"
 #include "../db/commands.h"
@@ -28,45 +29,45 @@
 // error codes 8010-8040
 
 namespace mongo {
-    
+
     class ShardStrategy : public Strategy {
 
-        virtual void queryOp( Request& r ){
+        virtual void queryOp( Request& r ) {
             QueryMessage q( r.d() );
 
             log(3) << "shard query: " << q.ns << "  " << q.query << endl;
-            
+
             if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") )
                 throw UserException( 8010 , "something is wrong, shouldn't see a command here" );
 
             ChunkManagerPtr info = r.getChunkManager();
             assert( info );
-            
+
             Query query( q.query );
 
             set<Shard> shards;
             info->getShardsForQuery( shards , query.getFilter()  );
-            
+
             set<ServerAndQuery> servers;
-            for ( set<Shard>::iterator i = shards.begin(); i != shards.end(); i++ ){
-                servers.insert( ServerAndQuery( i->getConnString() , BSONObj() ) ); 
+            for ( set<Shard>::iterator i = shards.begin(); i != shards.end(); i++ ) {
+                servers.insert( ServerAndQuery( i->getConnString() , BSONObj() ) );
             }
-            
-            if ( logLevel > 4 ){
+
+            if ( logLevel > 4 ) {
                 StringBuilder ss;
                 ss << " shard query servers: " << servers.size() << '\n';
-                for ( set<ServerAndQuery>::iterator i = servers.begin(); i!=servers.end(); i++ ){
+                for ( set<ServerAndQuery>::iterator i = servers.begin(); i!=servers.end(); i++ ) {
                     const ServerAndQuery& s = *i;
                     ss << "       " << s.toString() << '\n';
                 }
-                log() << ss.str();
+                log() << ss.str() << endl;
             }
 
             ClusteredCursor * cursor = 0;
-            
+
             BSONObj sort = query.getSort();
-            
-            if ( sort.isEmpty() ){
+
+            if ( sort.isEmpty() ) {
                 cursor = new SerialServerClusteredCursor( servers , q );
             }
             else {
@@ -80,85 +81,90 @@ namespace mongo {
 
                 log(5) << "   cursor type: " << cursor->type() << endl;
                 shardedCursorTypes.hit( cursor->type() );
-            
-                if ( query.isExplain() ){
+
+                if ( query.isExplain() ) {
                     BSONObj explain = cursor->explain();
                     replyToQuery( 0 , r.p() , r.m() , explain );
                     delete( cursor );
                     return;
                 }
-            } catch(...) {
+            }
+            catch(...) {
                 delete cursor;
                 throw;
             }
 
             ShardedClientCursorPtr cc (new ShardedClientCursor( q , cursor ));
-            if ( ! cc->sendNextBatch( r ) ){
+            if ( ! cc->sendNextBatch( r ) ) {
                 return;
             }
             log(6) << "storing cursor : " << cc->getId() << endl;
             cursorCache.store( cc );
         }
-        
-        virtual void getMore( Request& r ){
+
+        virtual void getMore( Request& r ) {
             int ntoreturn = r.d().pullInt();
             long long id = r.d().pullInt64();
 
             log(6) << "want cursor : " << id << endl;
 
             ShardedClientCursorPtr cursor = cursorCache.get( id );
-            if ( ! cursor ){
+            if ( ! cursor ) {
                 log(6) << "\t invalid cursor :(" << endl;
                 replyToQuery( ResultFlag_CursorNotFound , r.p() , r.m() , 0 , 0 , 0 );
                 return;
             }
-            
-            if ( cursor->sendNextBatch( r , ntoreturn ) ){
+
+            if ( cursor->sendNextBatch( r , ntoreturn ) ) {
                 // still more data
                 cursor->accessed();
                 return;
             }
-            
+
             // we've exhausted the cursor
             cursorCache.remove( id );
         }
-        
-        void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ){
-            
-            while ( d.moreJSObjs() ){
+
+        void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) {
+
+            while ( d.moreJSObjs() ) {
                 BSONObj o = d.nextJsObj();
-                if ( ! manager->hasShardKey( o ) ){
+                if ( ! manager->hasShardKey( o ) ) {
 
                     bool bad = true;
 
-                    if ( manager->getShardKey().partOfShardKey( "_id" ) ){
+                    if ( manager->getShardKey().partOfShardKey( "_id" ) ) {
                         BSONObjBuilder b;
                         b.appendOID( "_id" , 0 , true );
                         b.appendElements( o );
                         o = b.obj();
                         bad = ! manager->hasShardKey( o );
                     }
-                    
-                    if ( bad ){
+
+                    if ( bad ) {
                         log() << "tried to insert object without shard key: " << r.getns() << "  " << o << endl;
                         throw UserException( 8011 , "tried to insert object without shard key" );
                     }
-                    
+
                 }
-                
+
+                // Many operations benefit from having the shard key early in the object
+                o = manager->getShardKey().moveToFront(o);
+
                 bool gotThrough = false;
-                for ( int i=0; i<10; i++ ){
+                for ( int i=0; i<10; i++ ) {
                     try {
                         ChunkPtr c = manager->findChunk( o );
                         log(4) << "  server:" << c->getShard().toString() << " " << o << endl;
                         insert( c->getShard() , r.getns() , o );
-                        
+
                         r.gotInsert();
-                        c->splitIfShould( o.objsize() );
+                        if ( r.getClientInfo()->autoSplitOk() )
+                            c->splitIfShould( o.objsize() );
                         gotThrough = true;
                         break;
                     }
-                    catch ( StaleConfigException& ){
+                    catch ( StaleConfigException& ) {
                         log(1) << "retrying insert because of StaleConfigException: " << o << endl;
                         r.reset();
                         manager = r.getChunkManager();
@@ -168,34 +174,38 @@ namespace mongo {
 
                 assert( gotThrough );
 
-            }            
+            }
         }
 
-        void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ){
+        void _update( Request& r , DbMessage& d, ChunkManagerPtr manager ) {
             int flags = d.pullInt();
-            
+
             BSONObj query = d.nextJsObj();
+            uassert( 13506 ,  "$atomic not supported sharded" , query["$atomic"].eoo() );
             uassert( 10201 ,  "invalid update" , d.moreJSObjs() );
             BSONObj toupdate = d.nextJsObj();
 
             BSONObj chunkFinder = query;
-            
+
             bool upsert = flags & UpdateOption_Upsert;
             bool multi = flags & UpdateOption_Multi;
 
-            uassert( 10202 ,  "can't mix multi and upsert and sharding" , ! ( upsert && multi ) );
+            if (upsert) {
+                uassert(8012, "can't upsert something without shard key",
+                        (manager->hasShardKey(toupdate) ||
+                         (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query))));
 
-            if ( upsert && !(manager->hasShardKey(toupdate) ||
-                             (toupdate.firstElement().fieldName()[0] == '$' && manager->hasShardKey(query))))
-            {
-                throw UserException( 8012 , "can't upsert something without shard key" );
+                BSONObj key = manager->getShardKey().extractKey(query);
+                BSONForEach(e, key) {
+                    uassert(13465, "shard key in upsert query must be an exact match", getGtLtOp(e) == BSONObj::Equality);
+                }
             }
 
             bool save = false;
-            if ( ! manager->hasShardKey( query ) ){
-                if ( multi ){
+            if ( ! manager->hasShardKey( query ) ) {
+                if ( multi ) {
                 }
-                else if ( strcmp( query.firstElement().fieldName() , "_id" ) || query.nFields() != 1 ){
+                else if ( strcmp( query.firstElement().fieldName() , "_id" ) || query.nFields() != 1 ) {
                     throw UserException( 8013 , "can't do non-multi update with query that doesn't have the shard key" );
                 }
                 else {
@@ -204,50 +214,59 @@ namespace mongo {
                 }
             }
 
-            
-            if ( ! save ){
-                if ( toupdate.firstElement().fieldName()[0] == '$' ){
+
+            if ( ! save ) {
+                if ( toupdate.firstElement().fieldName()[0] == '$' ) {
                     BSONObjIterator ops(toupdate);
-                    while(ops.more()){
+                    while(ops.more()) {
                         BSONElement op(ops.next());
                         if (op.type() != Object)
                             continue;
                         BSONObjIterator fields(op.embeddedObject());
-                        while(fields.more()){
+                        while(fields.more()) {
                             const string field = fields.next().fieldName();
-                            uassert(13123, "Can't modify shard key's value", ! manager->getShardKey().partOfShardKey(field));
+                            uassert(13123,
+                                    str::stream() << "Can't modify shard key's value field" << field
+                                    << " for collection: " << manager->getns(),
+                                    ! manager->getShardKey().partOfShardKey(field));
                         }
                     }
-                } else if ( manager->hasShardKey( toupdate ) ){
-                    uassert( 8014, "change would move shards!", manager->getShardKey().compare( query , toupdate ) == 0 );
-                } else {
-                    uasserted(12376, "shard key must be in update object");
+                }
+                else if ( manager->hasShardKey( toupdate ) ) {
+                    uassert( 8014,
+                             str::stream() << "cannot modify shard key for collection: " << manager->getns(),
+                             manager->getShardKey().compare( query , toupdate ) == 0 );
+                }
+                else {
+                    uasserted(12376,
+                              str::stream() << "shard key must be in update object for collection: " << manager->getns() );
                 }
             }
-            
-            if ( multi ){
+
+            if ( multi ) {
                 set<Shard> shards;
                 manager->getShardsForQuery( shards , chunkFinder );
                 int * x = (int*)(r.d().afterNS());
                 x[0] |= UpdateOption_Broadcast;
-                for ( set<Shard>::iterator i=shards.begin(); i!=shards.end(); i++){
+                for ( set<Shard>::iterator i=shards.begin(); i!=shards.end(); i++) {
                     doWrite( dbUpdate , r , *i , false );
                 }
             }
             else {
                 int left = 5;
-                while ( true ){
+                while ( true ) {
                     try {
                         ChunkPtr c = manager->findChunk( chunkFinder );
                         doWrite( dbUpdate , r , c->getShard() );
-                        c->splitIfShould( d.msg().header()->dataLen() );
+                        if ( r.getClientInfo()->autoSplitOk() )
+                            c->splitIfShould( d.msg().header()->dataLen() );
                         break;
                     }
-                    catch ( StaleConfigException& e ){
+                    catch ( StaleConfigException& e ) {
                         if ( left <= 0 )
                             throw e;
                         left--;
-                        log() << "update failed b/c of StaleConfigException, retrying " 
+                        log() << "update failed b/c of StaleConfigException, retrying "
                               << " left:" << left << " ns: " << r.getns() << " query: " << query << endl;
                         r.reset( false );
                         manager = r.getChunkManager();
@@ -256,74 +275,75 @@ namespace mongo {
             }
 
         }
-        
-        void _delete( Request& r , DbMessage& d, ChunkManagerPtr manager ){
+
+        void _delete( Request& r , DbMessage& d, ChunkManagerPtr manager ) {
 
             int flags = d.pullInt();
             bool justOne = flags & 1;
-            
+
             uassert( 10203 ,  "bad delete message" , d.moreJSObjs() );
             BSONObj pattern = d.nextJsObj();
+            uassert( 13505 ,  "$atomic not supported sharded" , pattern["$atomic"].eoo() );
 
             set<Shard> shards;
             int left = 5;
-            
-            while ( true ){
+
+            while ( true ) {
                 try {
                     manager->getShardsForQuery( shards , pattern );
                     log(2) << "delete : " << pattern << " \t " << shards.size() << " justOne: " << justOne << endl;
-                    if ( shards.size() == 1 ){
+                    if ( shards.size() == 1 ) {
                         doWrite( dbDelete , r , *shards.begin() );
                         return;
                     }
                     break;
                 }
-                catch ( StaleConfigException& e ){
+                catch ( StaleConfigException& e ) {
                     if ( left <= 0 )
                         throw e;
                     left--;
-                    log() << "delete failed b/c of StaleConfigException, retrying " 
+                    log() << "delete failed b/c of StaleConfigException, retrying "
                           << " left:" << left << " ns: " << r.getns() << " patt: " << pattern << endl;
                     r.reset( false );
                     shards.clear();
                     manager = r.getChunkManager();
                 }
             }
-            
+
             if ( justOne && ! pattern.hasField( "_id" ) )
                 throw UserException( 8015 , "can only delete with a non-shard key pattern if can delete as many as we find" );
-            
-            for ( set<Shard>::iterator i=shards.begin(); i!=shards.end(); i++){
+
+            for ( set<Shard>::iterator i=shards.begin(); i!=shards.end(); i++) {
                 int * x = (int*)(r.d().afterNS());
                 x[0] |= RemoveOption_Broadcast;
                 doWrite( dbDelete , r , *i , false );
             }
         }
-        
-        virtual void writeOp( int op , Request& r ){
+
+        virtual void writeOp( int op , Request& r ) {
             const char *ns = r.getns();
             log(3) << "write: " << ns << endl;
-            
+
             DbMessage& d = r.d();
             ChunkManagerPtr info = r.getChunkManager();
             assert( info );
-            
-            if ( op == dbInsert ){
+
+            if ( op == dbInsert ) {
                 _insert( r , d , info );
             }
-            else if ( op == dbUpdate ){
-                _update( r , d , info );    
+            else if ( op == dbUpdate ) {
+                _update( r , d , info );
             }
-            else if ( op == dbDelete ){
+            else if ( op == dbDelete ) {
                 _delete( r , d , info );
             }
             else {
                 log() << "sharding can't do write op: " << op << endl;
                 throw UserException( 8016 , "can't do this write op on sharded collection" );
             }
-            
+
         }
     };
-    
+
     Strategy * SHARDED = new ShardStrategy();
 }
diff --git a/s/strategy_single.cpp b/s/strategy_single.cpp
index b840c9b..b3b5502 100644
--- a/s/strategy_single.cpp
+++ b/s/strategy_single.cpp
@@ -18,117 +18,102 @@
 
 #include "pch.h"
 #include "request.h"
+#include "cursors.h"
 #include "../client/connpool.h"
 #include "../db/commands.h"
 
 namespace mongo {
 
     class SingleStrategy : public Strategy {
-        
+
     public:
-        SingleStrategy(){
+        SingleStrategy() {
             _commandsSafeToPass.insert( "$eval" );
             _commandsSafeToPass.insert( "create" );
         }
 
     private:
-        virtual void queryOp( Request& r ){
+        virtual void queryOp( Request& r ) {
             QueryMessage q( r.d() );
-            
-            bool lateAssert = false;
-        
+
             log(3) << "single query: " << q.ns << "  " << q.query << "  ntoreturn: " << q.ntoreturn << endl;
-            
-            try {
-                if ( r.isCommand() ){
-                    
-                    if ( handleSpecialNamespaces( r , q ) )
-                        return;
-                    
-                    int loops = 5;
-                    while ( true ){
-                        BSONObjBuilder builder;
-                        try {
-                            bool ok = Command::runAgainstRegistered(q.ns, q.query, builder);
-                            if ( ok ) {
-                                BSONObj x = builder.done();
-                                replyToQuery(0, r.p(), r.m(), x);
-                                return;
-                            }
-                            break;
-                        }
-                        catch ( StaleConfigException& e ){
-                            if ( loops <= 0 )
-                                throw e;
-                            
-                            loops--;
-                            log() << "retrying command: " << q.query << endl;
-                            ShardConnection::checkMyConnectionVersions( e.getns() );
-                        }
-                        catch ( AssertionException& e ){
-                            e.getInfo().append( builder , "assertion" , "assertionCode" );
-                            builder.append( "errmsg" , "db assertion failure" );
-                            builder.append( "ok" , 0 );
+
+            if ( r.isCommand() ) {
+
+                if ( handleSpecialNamespaces( r , q ) )
+                    return;
+
+                int loops = 5;
+                while ( true ) {
+                    BSONObjBuilder builder;
+                    try {
+                        bool ok = Command::runAgainstRegistered(q.ns, q.query, builder);
+                        if ( ok ) {
                             BSONObj x = builder.done();
                             replyToQuery(0, r.p(), r.m(), x);
                             return;
                         }
+                        break;
                     }
-                    
-                    string commandName = q.query.firstElement().fieldName();
+                    catch ( StaleConfigException& e ) {
+                        if ( loops <= 0 )
+                            throw e;
 
-                    uassert(13390, "unrecognized command: " + commandName, _commandsSafeToPass.count(commandName) != 0);
-                }
-                
-                lateAssert = true;
-                doQuery( r , r.primaryShard() );
-            }
-            catch ( AssertionException& e ) {
-                if ( lateAssert ){
-                    log() << "lateAssert: " << e.getInfo() << endl;
-                    assert( !lateAssert );
+                        loops--;
+                        log() << "retrying command: " << q.query << endl;
+                        ShardConnection::checkMyConnectionVersions( e.getns() );
+                    }
+                    catch ( AssertionException& e ) {
+                        e.getInfo().append( builder , "assertion" , "assertionCode" );
+                        builder.append( "errmsg" , "db assertion failure" );
+                        builder.append( "ok" , 0 );
+                        BSONObj x = builder.done();
+                        replyToQuery(0, r.p(), r.m(), x);
+                        return;
+                    }
                 }
 
-                BSONObjBuilder err;
-                e.getInfo().append( err );
-                BSONObj errObj = err.done();
-                replyToQuery(ResultFlag_ErrSet, r.p() , r.m() , errObj);
-                return;
+                string commandName = q.query.firstElement().fieldName();
+
+                uassert(13390, "unrecognized command: " + commandName, _commandsSafeToPass.count(commandName) != 0);
             }
 
+            doQuery( r , r.primaryShard() );
         }
-        
-        virtual void getMore( Request& r ){
+
+        virtual void getMore( Request& r ) {
             const char *ns = r.getns();
-        
-            log(3) << "single getmore: " << ns << endl;
 
-            ShardConnection conn( r.primaryShard() , ns );
+            LOG(3) << "single getmore: " << ns << endl;
+
+            long long id = r.d().getInt64( 4 );
+
+            ShardConnection conn( cursorCache.getRef( id ) , ns );
 
             Message response;
             bool ok = conn->callRead( r.m() , response);
             uassert( 10204 , "dbgrid: getmore: error calling db", ok);
-            r.reply( response , conn->getServerAddress() );
-        
+            r.reply( response , "" /*conn->getServerAddress() */ );
+
             conn.done();
 
         }
-        
-        void handleIndexWrite( int op , Request& r ){
-            
+
+        void handleIndexWrite( int op , Request& r ) {
+
             DbMessage& d = r.d();
 
-            if ( op == dbInsert ){
-                while( d.moreJSObjs() ){
+            if ( op == dbInsert ) {
+                while( d.moreJSObjs() ) {
                     BSONObj o = d.nextJsObj();
                     const char * ns = o["ns"].valuestr();
-                    if ( r.getConfig()->isSharded( ns ) ){
+                    if ( r.getConfig()->isSharded( ns ) ) {
                         BSONObj newIndexKey = o["key"].embeddedObjectUserCheck();
-                        
-                        uassert( 10205 ,  (string)"can't use unique indexes with sharding  ns:" + ns + 
-                                 " key: " + o["key"].embeddedObjectUserCheck().toString() , 
+
+                        uassert( 10205 ,  (string)"can't use unique indexes with sharding  ns:" + ns +
+                                 " key: " + o["key"].embeddedObjectUserCheck().toString() ,
                                  IndexDetails::isIdIndexPattern( newIndexKey ) ||
-                                 ! o["unique"].trueValue() || 
+                                 ! o["unique"].trueValue() ||
                                  r.getConfig()->getChunkManager( ns )->getShardKey().isPrefixOf( newIndexKey ) );
 
                         ChunkManagerPtr cm = r.getConfig()->getChunkManager( ns );
@@ -145,10 +130,10 @@ namespace mongo {
                     r.gotInsert();
                 }
             }
-            else if ( op == dbUpdate ){
+            else if ( op == dbUpdate ) {
                 throw UserException( 8050 , "can't update system.indexes" );
             }
-            else if ( op == dbDelete ){
+            else if ( op == dbDelete ) {
                 // TODO
                 throw UserException( 8051 , "can't delete indexes on sharded collection yet" );
             }
@@ -156,26 +141,26 @@ namespace mongo {
                 log() << "handleIndexWrite invalid write op: " << op << endl;
                 throw UserException( 8052 , "handleIndexWrite invalid write op" );
             }
-                    
+
         }
 
-        virtual void writeOp( int op , Request& r ){
+        virtual void writeOp( int op , Request& r ) {
             const char *ns = r.getns();
-            
-            if ( r.isShardingEnabled() && 
-                 strstr( ns , ".system.indexes" ) == strchr( ns , '.' ) && 
-                 strchr( ns , '.' ) ) {
+
+            if ( r.isShardingEnabled() &&
+                    strstr( ns , ".system.indexes" ) == strchr( ns , '.' ) &&
+                    strchr( ns , '.' ) ) {
                 log(1) << " .system.indexes write for: " << ns << endl;
                 handleIndexWrite( op , r );
                 return;
             }
-            
+
             log(3) << "single write: " << ns << endl;
             doWrite( op , r , r.primaryShard() );
             r.gotInsert(); // Won't handle mulit-insert correctly. Not worth parsing the request.
         }
 
-        bool handleSpecialNamespaces( Request& r , QueryMessage& q ){
+        bool handleSpecialNamespaces( Request& r , QueryMessage& q ) {
             const char * ns = r.getns();
             ns = strstr( r.getns() , ".$cmd.sys." );
             if ( ! ns )
@@ -184,29 +169,32 @@ namespace mongo {
 
             BSONObjBuilder b;
             vector<Shard> shards;
-            
-            if ( strcmp( ns , "inprog" ) == 0 ){
+
+            if ( strcmp( ns , "inprog" ) == 0 ) {
                 Shard::getAllShards( shards );
-                
+
                 BSONArrayBuilder arr( b.subarrayStart( "inprog" ) );
 
-                for ( unsigned i=0; i<shards.size(); i++ ){
+                for ( unsigned i=0; i<shards.size(); i++ ) {
                     Shard shard = shards[i];
                     ScopedDbConnection conn( shard );
                     BSONObj temp = conn->findOne( r.getns() , BSONObj() );
-                    if ( temp["inprog"].isABSONObj() ){
+                    if ( temp["inprog"].isABSONObj() ) {
                         BSONObjIterator i( temp["inprog"].Obj() );
-                        while ( i.more() ){
+                        while ( i.more() ) {
                             BSONObjBuilder x;
-                            
+
                             BSONObjIterator j( i.next().Obj() );
-                            while( j.more() ){
+                            while( j.more() ) {
                                 BSONElement e = j.next();
-                                if ( strcmp( e.fieldName() , "opid" ) == 0 ){
+                                if ( str::equals( e.fieldName() , "opid" ) ) {
                                     stringstream ss;
                                     ss << shard.getName() << ':' << e.numberInt();
                                     x.append( "opid" , ss.str() );
                                 }
+                                else if ( str::equals( e.fieldName() , "client" ) ) {
+                                    x.appendAs( e , "client_s" );
+                                }
                                 else {
                                     x.append( e );
                                 }
@@ -216,15 +204,15 @@ namespace mongo {
                     }
                     conn.done();
                 }
-                
+
                 arr.done();
             }
-            else if ( strcmp( ns , "killop" ) == 0 ){
+            else if ( strcmp( ns , "killop" ) == 0 ) {
                 BSONElement e = q.query["op"];
-                if ( strstr( r.getns() , "admin." ) != 0 ){
+                if ( strstr( r.getns() , "admin." ) != 0 ) {
                     b.append( "err" , "unauthorized" );
                 }
-                else if ( e.type() != String ){
+                else if ( e.type() != String ) {
                     b.append( "err" , "bad op" );
                     b.append( e );
                 }
@@ -232,7 +220,7 @@ namespace mongo {
                     b.append( e );
                     string s = e.String();
                     string::size_type i = s.find( ':' );
-                    if ( i == string::npos ){
+                    if ( i == string::npos ) {
                         b.append( "err" , "bad opid" );
                     }
                     else {
@@ -243,14 +231,14 @@ namespace mongo {
 
                         log() << "want to kill op: " << e << endl;
                         Shard s(shard);
-                        
+
                         ScopedDbConnection conn( s );
                         conn->findOne( r.getns() , BSON( "op" << opid ) );
                         conn.done();
                     }
                 }
             }
-            else if ( strcmp( ns , "unlock" ) == 0 ){
+            else if ( strcmp( ns , "unlock" ) == 0 ) {
                 b.append( "err" , "can't do unlock through mongos" );
             }
             else {
@@ -265,6 +253,6 @@ namespace mongo {
 
         set<string> _commandsSafeToPass;
     };
-    
+
     Strategy * SINGLE = new SingleStrategy();
 }
diff --git a/s/util.h b/s/util.h
index 7695eda..b3f63d8 100644
--- a/s/util.h
+++ b/s/util.h
@@ -36,29 +36,30 @@ namespace mongo {
             };
             unsigned long long _combined;
         };
-        
+
         ShardChunkVersion( int major=0, int minor=0 )
-            : _minor(minor),_major(major){
+            : _minor(minor),_major(major) {
         }
-        
+
         ShardChunkVersion( unsigned long long ll )
-            : _combined( ll ){
+            : _combined( ll ) {
         }
-        
-        ShardChunkVersion( const BSONElement& e ){
-            if ( e.type() == Date || e.type() == Timestamp ){
+
+        ShardChunkVersion( const BSONElement& e ) {
+            if ( e.type() == Date || e.type() == Timestamp ) {
                 _combined = e._numberLong();
             }
-            else if ( e.eoo() ){
+            else if ( e.eoo() ) {
                 _combined = 0;
             }
             else {
+                _combined = 0;
                 log() << "ShardChunkVersion can't handle type (" << (int)(e.type()) << ") " << e << endl;
                 assert(0);
             }
         }
 
-        void inc( bool major ){
+        void inc( bool major ) {
             if ( major )
                 incMajor();
             else
@@ -69,7 +70,7 @@ namespace mongo {
             _major++;
             _minor = 0;
         }
-        
+
         void incMinor() {
             _minor++;
         }
@@ -82,19 +83,19 @@ namespace mongo {
             return _combined > 0;
         }
 
-        string toString() const { 
-            stringstream ss; 
-            ss << _major << "|" << _minor; 
-            return ss.str(); 
+        string toString() const {
+            stringstream ss;
+            ss << _major << "|" << _minor;
+            return ss.str();
         }
 
         int majorVersion() const { return _major; }
         int minorVersion() const { return _minor; }
-        
+
         operator unsigned long long() const { return _combined; }
-        
-        ShardChunkVersion& operator=( const BSONElement& elem ){
-            switch ( elem.type() ){
+
+        ShardChunkVersion& operator=( const BSONElement& elem ) {
+            switch ( elem.type() ) {
             case Timestamp:
             case NumberLong:
             case Date:
@@ -109,39 +110,39 @@ namespace mongo {
             return *this;
         }
     };
-    
-    inline ostream& operator<<( ostream &s , const ShardChunkVersion& v){
+
+    inline ostream& operator<<( ostream &s , const ShardChunkVersion& v) {
         s << v._major << "|" << v._minor;
         return s;
     }
 
-    /** 
-     * your config info for a given shard/chunk is out of date 
+    /**
+     * your config info for a given shard/chunk is out of date
      */
     class StaleConfigException : public AssertionException {
     public:
         StaleConfigException( const string& ns , const string& raw , bool justConnection = false )
-            : AssertionException( (string)"ns: " + ns + " " + raw , 9996 ) , 
+            : AssertionException( (string)"ns: " + ns + " " + raw , 9996 ) ,
               _justConnection(justConnection) ,
-              _ns(ns){
+              _ns(ns) {
         }
-        
-        virtual ~StaleConfigException() throw(){}
-        
+
+        virtual ~StaleConfigException() throw() {}
+
         virtual void appendPrefix( stringstream& ss ) const { ss << "StaleConfigException: "; }
-        
+
         bool justConnection() const { return _justConnection; }
-        
+
         string getns() const { return _ns; }
 
-        static bool parse( const string& big , string& ns , string& raw ){
+        static bool parse( const string& big , string& ns , string& raw ) {
             string::size_type start = big.find( '[' );
             if ( start == string::npos )
                 return false;
             string::size_type end = big.find( ']' ,start );
             if ( end == string::npos )
                 return false;
-            
+
             ns = big.substr( start + 1 , ( end - start ) - 1 );
             raw = big.substr( end + 1 );
             return true;
@@ -151,6 +152,7 @@ namespace mongo {
         string _ns;
     };
 
-    bool checkShardVersion( DBClientBase & conn , const string& ns , bool authoritative = false , int tryNumber = 1 );
-    void resetShardVersion( DBClientBase * conn );
+    extern boost::function4<bool, DBClientBase&, const string&, bool, int> checkShardVersionCB;
+    extern boost::function1<void, DBClientBase*> resetShardVersionCB;
+
 }
diff --git a/s/writeback_listener.cpp b/s/writeback_listener.cpp
new file mode 100644
index 0000000..21d59d0
--- /dev/null
+++ b/s/writeback_listener.cpp
@@ -0,0 +1,254 @@
+// @file writeback_listener.cpp
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+
+#include "../util/timer.h"
+
+#include "config.h"
+#include "grid.h"
+#include "request.h"
+#include "server.h"
+#include "shard.h"
+#include "util.h"
+#include "client.h"
+
+#include "writeback_listener.h"
+
+namespace mongo {
+
+    map<string,WriteBackListener*> WriteBackListener::_cache;
+    set<string> WriteBackListener::_seenSets;
+    mongo::mutex WriteBackListener::_cacheLock("WriteBackListener");
+
+    map<ConnectionId,WriteBackListener::WBStatus> WriteBackListener::_seenWritebacks;
+    mongo::mutex WriteBackListener::_seenWritebacksLock("WriteBackListener::seen");
+
+    WriteBackListener::WriteBackListener( const string& addr ) : _addr( addr ) {
+        log() << "creating WriteBackListener for: " << addr << endl;
+    }
+
+    /* static */
+    void WriteBackListener::init( DBClientBase& conn ) {
+        
+        if ( conn.type() == ConnectionString::SYNC ) {
+            // don't want write back listeners for config servers
+            return;
+        }
+
+        if ( conn.type() != ConnectionString::SET ) {
+            init( conn.getServerAddress() );
+            return;
+        }
+        
+
+        {
+            scoped_lock lk( _cacheLock );
+            if ( _seenSets.count( conn.getServerAddress() ) )
+                return;
+        }
+
+        // we want to do writebacks on all rs nodes
+        string errmsg;
+        ConnectionString cs = ConnectionString::parse( conn.getServerAddress() , errmsg );
+        uassert( 13641 , str::stream() << "can't parse host [" << conn.getServerAddress() << "]" , cs.isValid() );
+
+        vector<HostAndPort> hosts = cs.getServers();
+        
+        for ( unsigned i=0; i<hosts.size(); i++ )
+            init( hosts[i].toString() );
+
+    }
+    
+    /* static */
+    void WriteBackListener::init( const string& host ) {
+        scoped_lock lk( _cacheLock );
+        WriteBackListener*& l = _cache[host];
+        if ( l )
+            return;
+        l = new WriteBackListener( host );
+        l->go();
+    }
+
+    /* static */
+    BSONObj WriteBackListener::waitFor( ConnectionId connectionId, const OID& oid ) {
+        Timer t;
+        for ( int i=0; i<5000; i++ ) {
+            {
+                scoped_lock lk( _seenWritebacksLock );
+                WBStatus s = _seenWritebacks[connectionId];
+                if ( oid < s.id ) {
+                    // this means we're waiting for a GLE that already passed.
+                    // it should be impossible becauseonce we call GLE, no other
+                    // writebacks should happen with that connection id
+                    msgasserted( 13633 , str::stream() << "got writeback waitfor for older id " <<
+                                 " oid: " << oid << " s.id: " << s.id << " connectionId: " << connectionId );
+                }
+                else if ( oid == s.id ) {
+                    return s.gle;
+                }
+                
+            }
+            sleepmillis( 10 );
+        }
+        uasserted( 13403 , str::stream() << "didn't get writeback for: " << oid << " after: " << t.millis() << " ms" );
+        throw 1; // never gets here
+    }
+
+    void WriteBackListener::run() {
+        int secsToSleep = 0;
+        while ( ! inShutdown() ) {
+            
+            if ( ! Shard::isAShardNode( _addr ) ) {
+                log(1) << _addr << " is not a shard node" << endl;
+                sleepsecs( 60 );
+                continue;
+            }
+
+            try {
+                ScopedDbConnection conn( _addr );
+
+                BSONObj result;
+
+                {
+                    BSONObjBuilder cmd;
+                    cmd.appendOID( "writebacklisten" , &serverID ); // Command will block for data
+                    if ( ! conn->runCommand( "admin" , cmd.obj() , result ) ) {
+                        log() <<  "writebacklisten command failed!  "  << result << endl;
+                        conn.done();
+                        continue;
+                    }
+
+                }
+
+                log(1) << "writebacklisten result: " << result << endl;
+
+                BSONObj data = result.getObjectField( "data" );
+                if ( data.getBoolField( "writeBack" ) ) {
+                    string ns = data["ns"].valuestrsafe();
+
+                    ConnectionId cid = 0;
+                    OID wid;
+                    if ( data["connectionId"].isNumber() && data["id"].type() == jstOID ) {
+                        cid = data["connectionId"].numberLong();
+                        wid = data["id"].OID();
+                    }
+                    else {
+                        warning() << "mongos/mongod version mismatch (1.7.5 is the split)" << endl;
+                    }
+
+                    int len; // not used, but needed for next call
+                    Message m( (void*)data["msg"].binData( len ) , false );
+                    massert( 10427 ,  "invalid writeback message" , m.header()->valid() );
+
+                    DBConfigPtr db = grid.getDBConfig( ns );
+                    ShardChunkVersion needVersion( data["version"] );
+
+                    log(1) << "connectionId: " << cid << " writebackId: " << wid << " needVersion : " << needVersion.toString()
+                           << " mine : " << db->getChunkManager( ns )->getVersion().toString() << endl;// TODO change to log(3)
+
+                    if ( logLevel ) log(1) << debugString( m ) << endl;
+
+                    if ( needVersion.isSet() && needVersion <= db->getChunkManager( ns )->getVersion() ) {
+                        // this means when the write went originally, the version was old
+                        // if we're here, it means we've already updated the config, so don't need to do again
+                        //db->getChunkManager( ns , true ); // SERVER-1349
+                    }
+                    else {
+                        // we received a writeback object that was sent to a previous version of a shard
+                        // the actual shard may not have the object the writeback operation is for
+                        // we need to reload the chunk manager and get the new shard versions
+                        db->getChunkManager( ns , true );
+                    }
+
+                    // do request and then call getLastError
+                    // we have to call getLastError so we can return the right fields to the user if they decide to call getLastError
+
+                    BSONObj gle;
+                    try {
+                        
+                        Request r( m , 0 );
+                        r.init();
+
+                        ClientInfo * ci = r.getClientInfo();
+                        ci->noAutoSplit();
+
+                        r.process();
+                        
+                        ci->newRequest(); // this so we flip prev and cur shards
+
+                        BSONObjBuilder b;
+                        if ( ! ci->getLastError( BSON( "getLastError" << 1 ) , b , true ) ) {
+                            b.appendBool( "commandFailed" , true );
+                        }
+                        gle = b.obj();
+
+                        ci->clearSinceLastGetError();
+                    }
+                    catch ( DBException& e ) {
+                        error() << "error processing writeback: " << e << endl;
+                        BSONObjBuilder b;
+                        b.append( "err" , e.toString() );
+                        e.getInfo().append( b );
+                        gle = b.obj();
+                    }
+
+                    {
+                        scoped_lock lk( _seenWritebacksLock );
+                        WBStatus& s = _seenWritebacks[cid];
+                        s.id = wid;
+                        s.gle = gle;
+                    }
+                }
+                else if ( result["noop"].trueValue() ) {
+                    // no-op
+                }
+                else {
+                    log() << "unknown writeBack result: " << result << endl;
+                }
+
+                conn.done();
+                secsToSleep = 0;
+                continue;
+            }
+            catch ( std::exception e ) {
+
+                if ( inShutdown() ) {
+                    // we're shutting down, so just clean up
+                    return;
+                }
+
+                log() << "WriteBackListener exception : " << e.what() << endl;
+
+                // It's possible this shard was removed
+                Shard::reloadShardInfo();
+            }
+            catch ( ... ) {
+                log() << "WriteBackListener uncaught exception!" << endl;
+            }
+            secsToSleep++;
+            sleepsecs(secsToSleep);
+            if ( secsToSleep > 10 )
+                secsToSleep = 0;
+        }
+
+        log() << "WriteBackListener exiting : address no longer in cluster " << _addr;
+
+    }
+
+}  // namespace mongo
diff --git a/s/writeback_listener.h b/s/writeback_listener.h
new file mode 100644
index 0000000..7335999
--- /dev/null
+++ b/s/writeback_listener.h
@@ -0,0 +1,67 @@
+// @file writeback_listener.h
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../pch.h"
+
+#include "../client/connpool.h"
+#include "../util/background.h"
+#include "../db/client.h"
+
+namespace mongo {
+
+    /*
+     * The writeback listener takes back write attempts that were made against a wrong shard.
+     * (Wrong here in the sense that the target chunk moved before this mongos had a chance to
+     * learn so.) It is responsible for reapplying these writes to the correct shard.
+     *
+     * Currently, there is one listener per shard.
+     */
+    class WriteBackListener : public BackgroundJob {
+    public:
+        static void init( DBClientBase& conn );
+        static void init( const string& host );
+
+        static BSONObj waitFor( ConnectionId connectionId, const OID& oid );
+
+    protected:
+        WriteBackListener( const string& addr );
+
+        string name() const { return "WriteBackListener"; }
+        void run();
+
+    private:
+        string _addr;
+        
+        static mongo::mutex _cacheLock; // protects _cache
+        static map<string,WriteBackListener*> _cache; // server to listener
+        static set<string> _seenSets; // cache of set urls we've seen - note this is ever expanding for order, case, changes
+
+        struct WBStatus {
+            OID id;
+            BSONObj gle;
+        };
+
+        static mongo::mutex _seenWritebacksLock;  // protects _seenWritbacks
+        static map<ConnectionId,WBStatus> _seenWritebacks; // connectionId -> last write back GLE
+    };
+
+    void waitForWriteback( const OID& oid );
+
+}  // namespace mongo
diff --git a/scripting/bench.cpp b/scripting/bench.cpp
new file mode 100644
index 0000000..2723985
--- /dev/null
+++ b/scripting/bench.cpp
@@ -0,0 +1,173 @@
+/** @file bench.cpp */
+
+/*
+ *    Copyright (C) 2010 10gen Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "engine.h"
+#include "../util/md5.hpp"
+#include "../util/version.h"
+#include "../client/dbclient.h"
+#include "../client/connpool.h"
+// ---------------------------------
+// ---- benchmarking system --------
+// ---------------------------------
+
+
+namespace mongo {
+
+
+    /**
+     * benchQuery( "foo" , { _id : 1 } )
+     */
+    BSONObj benchQuery( const BSONObj& args ) {
+        return BSONObj();
+    }
+
+    struct BenchRunConfig {
+        BenchRunConfig() {
+            host = "localhost";
+            db = "test";
+
+            parallel = 1;
+            seconds = 1;
+
+            active = true;
+            threadsReady = 0;
+            error = false;
+        }
+
+        string host;
+        string db;
+
+        unsigned parallel;
+        int seconds;
+
+        BSONObj ops;
+
+        bool active; // true at starts, gets set to false when should stop
+        AtomicUInt threadsReady;
+
+        bool error;
+    };
+
+    static void benchThread( BenchRunConfig * config ) {
+        ScopedDbConnection conn( config->host );
+        config->threadsReady++;
+
+        while ( config->active ) {
+            BSONObjIterator i( config->ops );
+            while ( i.more() ) {
+                BSONElement e = i.next();
+                string ns = e["ns"].String();
+                string op = e["op"].String();
+
+                if ( op == "findOne" ) {
+                    conn->findOne( ns , e["query"].Obj() );
+                }
+                else {
+                    log() << "don't understand op: " << op << endl;
+                    config->error = true;
+                    return;
+                }
+
+            }
+        }
+
+        conn.done();
+    }
+
+    /**
+     * benchRun( { ops : [] , host : XXX , db : XXXX , parallel : 5 , seconds : 5 }
+     */
+    BSONObj benchRun( const BSONObj& argsFake ) {
+        assert( argsFake.firstElement().isABSONObj() );
+        BSONObj args = argsFake.firstElement().Obj();
+
+        // setup
+
+        BenchRunConfig config;
+
+        if ( args["host"].type() == String )
+            config.host = args["host"].String();
+        if ( args["db"].type() == String )
+            config.db = args["db"].String();
+
+        if ( args["parallel"].isNumber() )
+            config.parallel = args["parallel"].numberInt();
+        if ( args["seconds"].isNumber() )
+            config.seconds = args["seconds"].numberInt();
+
+
+        config.ops = args["ops"].Obj();
+
+        // execute
+
+        ScopedDbConnection conn( config.host );
+
+        //    start threads
+        vector<boost::thread*> all;
+        for ( unsigned i=0; i<config.parallel; i++ )
+            all.push_back( new boost::thread( boost::bind( benchThread , &config ) ) );
+
+        //    give them time to init
+        while ( config.threadsReady < config.parallel )
+            sleepmillis( 1 );
+
+        BSONObj before;
+        conn->simpleCommand( "admin" , &before , "serverStatus" );
+
+        sleepsecs( config.seconds );
+
+        BSONObj after;
+        conn->simpleCommand( "admin" , &after , "serverStatus" );
+
+        conn.done();
+
+        config.active = false;
+
+        for ( unsigned i=0; i<all.size(); i++ )
+            all[i]->join();
+
+        if ( config.error )
+            return BSON( "err" << 1 );
+
+        // compute actual ops/sec
+
+        before = before["opcounters"].Obj();
+        after = after["opcounters"].Obj();
+
+        BSONObjBuilder buf;
+        buf.append( "note" , "values per second" );
+
+        {
+            BSONObjIterator i( after );
+            while ( i.more() ) {
+                BSONElement e = i.next();
+                double x = e.number();
+                x = x - before[e.fieldName()].number();
+                buf.append( e.fieldName() , x / config.seconds );
+            }
+        }
+        BSONObj zoo = buf.obj();
+        return BSON( "" << zoo );
+    }
+
+    void installBenchmarkSystem( Scope& scope ) {
+        scope.injectNative( "benchRun" , benchRun );
+    }
+
+}
diff --git a/scripting/engine.cpp b/scripting/engine.cpp
index da108c6..60e56ae 100644
--- a/scripting/engine.cpp
+++ b/scripting/engine.cpp
@@ -23,27 +23,27 @@
 namespace mongo {
 
     long long Scope::_lastVersion = 1;
-    
+
     int Scope::_numScopes = 0;
 
-    Scope::Scope() : _localDBName("") , _loadedVersion(0){
+    Scope::Scope() : _localDBName("") , _loadedVersion(0) {
         _numScopes++;
     }
 
-    Scope::~Scope(){
+    Scope::~Scope() {
         _numScopes--;
     }
 
     ScriptEngine::ScriptEngine() : _scopeInitCallback() {
     }
 
-    ScriptEngine::~ScriptEngine(){
+    ScriptEngine::~ScriptEngine() {
     }
 
-    void Scope::append( BSONObjBuilder & builder , const char * fieldName , const char * scopeName ){
+    void Scope::append( BSONObjBuilder & builder , const char * fieldName , const char * scopeName ) {
         int t = type( scopeName );
-        
-        switch ( t ){
+
+        switch ( t ) {
         case Object:
             builder.append( fieldName , getObject( scopeName ) );
             break;
@@ -74,7 +74,7 @@ namespace mongo {
             builder.appendDate( fieldName , Date_t((unsigned long long)getNumber( scopeName )) );
             break;
         case Code:
-            builder.appendCode( fieldName , getString( scopeName ).c_str() );
+            builder.appendCode( fieldName , getString( scopeName ) );
             break;
         default:
             stringstream temp;
@@ -82,20 +82,20 @@ namespace mongo {
             temp << t;
             uassert( 10206 ,  temp.str() , 0 );
         }
-        
+
     }
 
-    int Scope::invoke( const char* code , const BSONObj& args, int timeoutMs ){
+    int Scope::invoke( const char* code , const BSONObj& args, int timeoutMs ) {
         ScriptingFunction func = createFunction( code );
         uassert( 10207 ,  "compile failed" , func );
         return invoke( func , args, timeoutMs );
     }
-    
-    bool Scope::execFile( const string& filename , bool printResult , bool reportError , bool assertOnError, int timeoutMs ){
-        
+
+    bool Scope::execFile( const string& filename , bool printResult , bool reportError , bool assertOnError, int timeoutMs ) {
+
         path p( filename );
 
-        if ( ! exists( p ) ){
+        if ( ! exists( p ) ) {
             log() << "file [" << filename << "] doesn't exist" << endl;
             if ( assertOnError )
                 assert( 0 );
@@ -103,10 +103,10 @@ namespace mongo {
         }
 
         // iterate directories and recurse using all *.js files in the directory
-        if ( is_directory( p ) ){
+        if ( is_directory( p ) ) {
             directory_iterator end;
             bool empty = true;
-            for (directory_iterator it (p); it != end; it++){
+            for (directory_iterator it (p); it != end; it++) {
                 empty = false;
                 path sub (*it);
                 if (!endsWith(sub.string().c_str(), ".js"))
@@ -115,7 +115,7 @@ namespace mongo {
                     return false;
             }
 
-            if (empty){
+            if (empty) {
                 log() << "directory [" << filename << "] doesn't have any *.js files" << endl;
                 if ( assertOnError )
                     assert( 0 );
@@ -124,83 +124,97 @@ namespace mongo {
 
             return true;
         }
-        
+
         File f;
         f.open( filename.c_str() , true );
 
-        fileofs L = f.len();
-        assert( L <= 0x7ffffffe );
-        char * data = (char*)malloc( (size_t) L+1 );
+        unsigned L;
+        {
+            fileofs fo = f.len();
+            assert( fo <= 0x7ffffffe );
+            L = (unsigned) fo;
+        }
+        boost::scoped_array<char> data (new char[L+1]);
         data[L] = 0;
-        f.read( 0 , data , (size_t) L );
-        
-        return exec( data , filename , printResult , reportError , assertOnError, timeoutMs );
+        f.read( 0 , data.get() , L );
+
+        int offset = 0;
+        if (data[0] == '#' && data[1] == '!') {
+            const char* newline = strchr(data.get(), '\n');
+            if (! newline)
+                return true; // file of just shebang treated same as empty file
+            offset = newline - data.get();
+        }
+
+        StringData code (data.get() + offset, L - offset);
+
+        return exec( code , filename , printResult , reportError , assertOnError, timeoutMs );
     }
 
-    void Scope::storedFuncMod(){
+    void Scope::storedFuncMod() {
         _lastVersion++;
     }
-    
+
     void Scope::validateObjectIdString( const string &str ) {
         massert( 10448 , "invalid object id: length", str.size() == 24 );
 
-        for ( string::size_type i=0; i<str.size(); i++ ){
+        for ( string::size_type i=0; i<str.size(); i++ ) {
             char c = str[i];
             if ( ( c >= '0' && c <= '9' ) ||
-                ( c >= 'a' && c <= 'f' ) ||
-                ( c >= 'A' && c <= 'F' ) ){
+                    ( c >= 'a' && c <= 'f' ) ||
+                    ( c >= 'A' && c <= 'F' ) ) {
                 continue;
             }
             massert( 10430 ,  "invalid object id: not hex", false );
-        }        
+        }
     }
 
-    void Scope::loadStored( bool ignoreNotConnected ){
-        if ( _localDBName.size() == 0 ){
+    void Scope::loadStored( bool ignoreNotConnected ) {
+        if ( _localDBName.size() == 0 ) {
             if ( ignoreNotConnected )
                 return;
             uassert( 10208 ,  "need to have locallyConnected already" , _localDBName.size() );
         }
         if ( _loadedVersion == _lastVersion )
             return;
-        
+
         _loadedVersion = _lastVersion;
 
         string coll = _localDBName + ".system.js";
-        
+
         static DBClientBase * db = createDirectClient();
-        auto_ptr<DBClientCursor> c = db->query( coll , Query() );
+        auto_ptr<DBClientCursor> c = db->query( coll , Query(), 0, 0, NULL, QueryOption_SlaveOk, 0 );
         assert( c.get() );
-        
+
         set<string> thisTime;
-        
-        while ( c->more() ){
+
+        while ( c->more() ) {
             BSONObj o = c->next();
 
             BSONElement n = o["_id"];
             BSONElement v = o["value"];
-            
+
             uassert( 10209 ,  "name has to be a string" , n.type() == String );
             uassert( 10210 ,  "value has to be set" , v.type() != EOO );
-            
+
             setElement( n.valuestr() , v );
 
             thisTime.insert( n.valuestr() );
             _storedNames.insert( n.valuestr() );
-            
+
         }
 
         // --- remove things from scope that were removed
 
         list<string> toremove;
 
-        for ( set<string>::iterator i=_storedNames.begin(); i!=_storedNames.end(); i++ ){
+        for ( set<string>::iterator i=_storedNames.begin(); i!=_storedNames.end(); i++ ) {
             string n = *i;
             if ( thisTime.count( n ) == 0 )
                 toremove.push_back( n );
         }
-        
-        for ( list<string>::iterator i=toremove.begin(); i!=toremove.end(); i++ ){
+
+        for ( list<string>::iterator i=toremove.begin(); i!=toremove.end(); i++ ) {
             string n = *i;
             _storedNames.erase( n );
             execSetup( (string)"delete " + n , "clean up scope" );
@@ -208,11 +222,11 @@ namespace mongo {
 
     }
 
-    ScriptingFunction Scope::createFunction( const char * code ){
-        if ( code[0] == '/' && code [1] == '*' ){
+    ScriptingFunction Scope::createFunction( const char * code ) {
+        if ( code[0] == '/' && code [1] == '*' ) {
             code += 2;
-            while ( code[0] && code[1] ){
-                if ( code[0] == '*' && code[1] == '/' ){
+            while ( code[0] && code[1] ) {
+                if ( code[0] == '*' && code[1] == '/' ) {
                     code += 2;
                     break;
                 }
@@ -226,7 +240,7 @@ namespace mongo {
         _cachedFunctions[code] = f;
         return f;
     }
-    
+
     typedef map< string , list<Scope*> > PoolToScopes;
 
     class ScopeCache {
@@ -235,21 +249,21 @@ namespace mongo {
         ScopeCache() : _mutex("ScopeCache") {
             _magic = 17;
         }
-        
-        ~ScopeCache(){
+
+        ~ScopeCache() {
             assert( _magic == 17 );
             _magic = 1;
 
             if ( inShutdown() )
                 return;
-            
+
             clear();
         }
 
-        void done( const string& pool , Scope * s ){
+        void done( const string& pool , Scope * s ) {
             scoped_lock lk( _mutex );
             list<Scope*> & l = _pools[pool];
-            if ( l.size() > 10 ){
+            if ( l.size() > 10 ) {
                 delete s;
             }
             else {
@@ -257,31 +271,31 @@ namespace mongo {
                 s->reset();
             }
         }
-        
-        Scope * get( const string& pool ){
+
+        Scope * get( const string& pool ) {
             scoped_lock lk( _mutex );
             list<Scope*> & l = _pools[pool];
             if ( l.size() == 0 )
                 return 0;
-            
+
             Scope * s = l.back();
             l.pop_back();
             s->reset();
             return s;
         }
-        
-        void clear(){
+
+        void clear() {
             set<Scope*> seen;
-            
-            for ( PoolToScopes::iterator i=_pools.begin() ; i != _pools.end(); i++ ){
-                for ( list<Scope*>::iterator j=i->second.begin(); j != i->second.end(); j++ ){
+
+            for ( PoolToScopes::iterator i=_pools.begin() ; i != _pools.end(); i++ ) {
+                for ( list<Scope*>::iterator j=i->second.begin(); j != i->second.end(); j++ ) {
                     Scope * s = *j;
                     assert( ! seen.count( s ) );
                     delete s;
                     seen.insert( s );
                 }
             }
-            
+
             _pools.clear();
         }
 
@@ -295,12 +309,12 @@ namespace mongo {
 
     class PooledScope : public Scope {
     public:
-        PooledScope( const string pool , Scope * real ) : _pool( pool ) , _real( real ){
+        PooledScope( const string pool , Scope * real ) : _pool( pool ) , _real( real ) {
             _real->loadStored( true );
         };
-        virtual ~PooledScope(){
+        virtual ~PooledScope() {
             ScopeCache * sc = scopeCache.get();
-            if ( sc ){
+            if ( sc ) {
                 sc->done( _pool , _real );
                 _real = 0;
             }
@@ -312,88 +326,92 @@ namespace mongo {
                 _real = 0;
             }
         }
-        
-        void reset(){
+
+        void reset() {
             _real->reset();
         }
-        void init( BSONObj * data ){
+        void init( const BSONObj * data ) {
             _real->init( data );
         }
-        
-        void localConnect( const char * dbName ){
+
+        void localConnect( const char * dbName ) {
             _real->localConnect( dbName );
         }
-        void externalSetup(){
+        void externalSetup() {
             _real->externalSetup();
         }
-        
-        double getNumber( const char *field ){
+
+        double getNumber( const char *field ) {
             return _real->getNumber( field );
         }
-        string getString( const char *field ){
+        string getString( const char *field ) {
             return _real->getString( field );
         }
-        bool getBoolean( const char *field ){
+        bool getBoolean( const char *field ) {
             return _real->getBoolean( field );
         }
-        BSONObj getObject( const char *field ){
+        BSONObj getObject( const char *field ) {
             return _real->getObject( field );
         }
 
-        int type( const char *field ){
+        int type( const char *field ) {
             return _real->type( field );
         }
 
-        void setElement( const char *field , const BSONElement& val ){
+        void setElement( const char *field , const BSONElement& val ) {
             _real->setElement( field , val );
         }
-        void setNumber( const char *field , double val ){
+        void setNumber( const char *field , double val ) {
             _real->setNumber( field , val );
         }
-        void setString( const char *field , const char * val ){
+        void setString( const char *field , const char * val ) {
             _real->setString( field , val );
         }
-        void setObject( const char *field , const BSONObj& obj , bool readOnly=true ){
+        void setObject( const char *field , const BSONObj& obj , bool readOnly=true ) {
             _real->setObject( field , obj , readOnly );
         }
-        void setBoolean( const char *field , bool val ){
+        void setBoolean( const char *field , bool val ) {
             _real->setBoolean( field , val );
         }
-        void setThis( const BSONObj * obj ){
+        void setThis( const BSONObj * obj ) {
             _real->setThis( obj );
         }
-        
-        ScriptingFunction createFunction( const char * code ){
+
+        ScriptingFunction createFunction( const char * code ) {
             return _real->createFunction( code );
         }
 
-        ScriptingFunction _createFunction( const char * code ){
+        ScriptingFunction _createFunction( const char * code ) {
             return _real->createFunction( code );
         }
 
+        void rename( const char * from , const char * to ) {
+            _real->rename( from , to );
+        }
+
         /**
          * @return 0 on success
          */
-        int invoke( ScriptingFunction func , const BSONObj& args, int timeoutMs , bool ignoreReturn ){
+        int invoke( ScriptingFunction func , const BSONObj& args, int timeoutMs , bool ignoreReturn ) {
             return _real->invoke( func , args , timeoutMs , ignoreReturn );
         }
 
-        string getError(){
+        string getError() {
             return _real->getError();
         }
-        
-        bool exec( const string& code , const string& name , bool printResult , bool reportError , bool assertOnError, int timeoutMs = 0 ){
+
+        bool exec( const StringData& code , const string& name , bool printResult , bool reportError , bool assertOnError, int timeoutMs = 0 ) {
             return _real->exec( code , name , printResult , reportError , assertOnError , timeoutMs );
         }
-        bool execFile( const string& filename , bool printResult , bool reportError , bool assertOnError, int timeoutMs = 0 ){
+        bool execFile( const string& filename , bool printResult , bool reportError , bool assertOnError, int timeoutMs = 0 ) {
             return _real->execFile( filename , printResult , reportError , assertOnError , timeoutMs );
         }
-        
-        void injectNative( const char *field, NativeFunction func ){
+
+        void injectNative( const char *field, NativeFunction func ) {
             _real->injectNative( field , func );
         }
-        
-        void gc(){
+
+        void gc() {
             _real->gc();
         }
 
@@ -402,40 +420,57 @@ namespace mongo {
         Scope * _real;
     };
 
-    auto_ptr<Scope> ScriptEngine::getPooledScope( const string& pool ){
-        if ( ! scopeCache.get() ){
+    auto_ptr<Scope> ScriptEngine::getPooledScope( const string& pool ) {
+        if ( ! scopeCache.get() ) {
             scopeCache.reset( new ScopeCache() );
         }
 
         Scope * s = scopeCache->get( pool );
-        if ( ! s ){
+        if ( ! s ) {
             s = newScope();
         }
-        
+
         auto_ptr<Scope> p;
         p.reset( new PooledScope( pool , s ) );
         return p;
     }
-    
-    void ScriptEngine::threadDone(){
+
+    void ScriptEngine::threadDone() {
         ScopeCache * sc = scopeCache.get();
-        if ( sc ){
+        if ( sc ) {
             sc->clear();
         }
     }
-    
+
     void ( *ScriptEngine::_connectCallback )( DBClientWithCommands & ) = 0;
-    
-    ScriptEngine * globalScriptEngine;
+    const char * ( *ScriptEngine::_checkInterruptCallback )() = 0;
+    unsigned ( *ScriptEngine::_getInterruptSpecCallback )() = 0;
+
+    ScriptEngine * globalScriptEngine = 0;
 
-    bool hasJSReturn( const string& code ){
+    bool hasJSReturn( const string& code ) {
         size_t x = code.find( "return" );
         if ( x == string::npos )
             return false;
 
-        return 
+        return
             ( x == 0 || ! isalpha( code[x-1] ) ) &&
             ! isalpha( code[x+6] );
     }
+
+    const char * jsSkipWhiteSpace( const char * raw ) {
+        while ( raw[0] ) {
+            while (isspace(*raw)) {
+                raw++;
+            }
+
+            if ( raw[0] != '/' || raw[1] != '/' )
+                break;
+
+            while ( raw[0] && raw[0] != '\n' )
+                raw++;
+        }
+        return raw;
+    }
 }
-    
+
diff --git a/scripting/engine.h b/scripting/engine.h
index e097401..62afd77 100644
--- a/scripting/engine.h
+++ b/scripting/engine.h
@@ -20,10 +20,23 @@
 #include "../pch.h"
 #include "../db/jsobj.h"
 
-extern const char * jsconcatcode; // TODO: change name to mongoJSCode
-
 namespace mongo {
 
+    struct JSFile {
+        const char* name;
+        const StringData& source;
+    };
+
+    namespace JSFiles {
+        extern const JSFile collection;
+        extern const JSFile db;
+        extern const JSFile mongo;
+        extern const JSFile mr;
+        extern const JSFile query;
+        extern const JSFile servers;
+        extern const JSFile utils;
+    }
+
     typedef unsigned long long ScriptingFunction;
     typedef BSONObj (*NativeFunction) ( const BSONObj &args );
 
@@ -31,20 +44,35 @@ namespace mongo {
     public:
         Scope();
         virtual ~Scope();
-        
+
         virtual void reset() = 0;
-        virtual void init( BSONObj * data ) = 0;
-        void init( const char * data ){
+        virtual void init( const BSONObj * data ) = 0;
+        void init( const char * data ) {
             BSONObj o( data , 0 );
             init( &o );
         }
-        
+
         virtual void localConnect( const char * dbName ) = 0;
         virtual void externalSetup() = 0;
-        
+
+        class NoDBAccess {
+            Scope * _s;
+        public:
+            NoDBAccess( Scope * s ) {
+                _s = s;
+            }
+            ~NoDBAccess() {
+                _s->rename( "____db____" , "db" );
+            }
+        };
+        NoDBAccess disableDBAccess( const char * why ) {
+            rename( "db" , "____db____" );
+            return NoDBAccess( this );
+        }
+
         virtual double getNumber( const char *field ) = 0;
-        virtual int getNumberInt( const char *field ){ return (int)getNumber( field ); }
-        virtual long long getNumberLongLong( const char *field ){ return (long long)getNumber( field ); }
+        virtual int getNumberInt( const char *field ) { return (int)getNumber( field ); }
+        virtual long long getNumberLongLong( const char *field ) { return (long long)getNumber( field ); }
         virtual string getString( const char *field ) = 0;
         virtual bool getBoolean( const char *field ) = 0;
         virtual BSONObj getObject( const char *field ) = 0;
@@ -59,52 +87,68 @@ namespace mongo {
         virtual void setObject( const char *field , const BSONObj& obj , bool readOnly=true ) = 0;
         virtual void setBoolean( const char *field , bool val ) = 0;
         virtual void setThis( const BSONObj * obj ) = 0;
-                    
+
         virtual ScriptingFunction createFunction( const char * code );
-        
+
+        virtual void rename( const char * from , const char * to ) = 0;
         /**
          * @return 0 on success
          */
         virtual int invoke( ScriptingFunction func , const BSONObj& args, int timeoutMs = 0 , bool ignoreReturn = false ) = 0;
-        void invokeSafe( ScriptingFunction func , const BSONObj& args, int timeoutMs = 0 ){
+        void invokeSafe( ScriptingFunction func , const BSONObj& args, int timeoutMs = 0 ) {
             int res = invoke( func , args , timeoutMs );
             if ( res == 0 )
                 return;
             throw UserException( 9004 , (string)"invoke failed: " + getError() );
         }
         virtual string getError() = 0;
-        
+
         int invoke( const char* code , const BSONObj& args, int timeoutMs = 0 );
-        void invokeSafe( const char* code , const BSONObj& args, int timeoutMs = 0 ){
+        void invokeSafe( const char* code , const BSONObj& args, int timeoutMs = 0 ) {
             if ( invoke( code , args , timeoutMs ) == 0 )
                 return;
             throw UserException( 9005 , (string)"invoke failed: " + getError() );
         }
 
-        virtual bool exec( const string& code , const string& name , bool printResult , bool reportError , bool assertOnError, int timeoutMs = 0 ) = 0;
-        virtual void execSetup( const string& code , const string& name = "setup" ){
+        virtual bool exec( const StringData& code , const string& name , bool printResult , bool reportError , bool assertOnError, int timeoutMs = 0 ) = 0;
+        virtual void execSetup( const StringData& code , const string& name = "setup" ) {
             exec( code , name , false , true , true , 0 );
         }
+
+        void execSetup( const JSFile& file) {
+            execSetup(file.source, file.name);
+        }
+
+        void execCoreFiles() {
+            // keeping same order as in SConstruct
+            execSetup(JSFiles::utils);
+            execSetup(JSFiles::db);
+            execSetup(JSFiles::mongo);
+            execSetup(JSFiles::mr);
+            execSetup(JSFiles::query);
+            execSetup(JSFiles::collection);
+        }
+
         virtual bool execFile( const string& filename , bool printResult , bool reportError , bool assertOnError, int timeoutMs = 0 );
-        
+
         virtual void injectNative( const char *field, NativeFunction func ) = 0;
 
         virtual void gc() = 0;
 
         void loadStored( bool ignoreNotConnected = false );
-        
+
         /**
          if any changes are made to .system.js, call this
          right now its just global - slightly inefficient, but a lot simpler
         */
         static void storedFuncMod();
-        
-        static int getNumScopes(){
+
+        static int getNumScopes() {
             return _numScopes;
         }
-        
+
         static void validateObjectIdString( const string &str );
-        
+
     protected:
 
         virtual ScriptingFunction _createFunction( const char * code ) = 0;
@@ -117,16 +161,16 @@ namespace mongo {
 
         static int _numScopes;
     };
-    
+
     void installGlobalUtils( Scope& scope );
 
     class DBClientWithCommands;
-    
+
     class ScriptEngine : boost::noncopyable {
     public:
         ScriptEngine();
         virtual ~ScriptEngine();
-        
+
         virtual Scope * newScope() {
             Scope *s = createScope();
             if ( s && _scopeInitCallback )
@@ -134,35 +178,63 @@ namespace mongo {
             installGlobalUtils( *s );
             return s;
         }
-        
+
         virtual void runTest() = 0;
-        
+
         virtual bool utf8Ok() const = 0;
 
         static void setup();
 
         auto_ptr<Scope> getPooledScope( const string& pool );
         void threadDone();
-        
+
         struct Unlocker { virtual ~Unlocker() {} };
         virtual auto_ptr<Unlocker> newThreadUnlocker() { return auto_ptr< Unlocker >( new Unlocker ); }
-        
+
         void setScopeInitCallback( void ( *func )( Scope & ) ) { _scopeInitCallback = func; }
         static void setConnectCallback( void ( *func )( DBClientWithCommands& ) ) { _connectCallback = func; }
         static void runConnectCallback( DBClientWithCommands &c ) {
             if ( _connectCallback )
                 _connectCallback( c );
         }
-        
+
+        // engine implementation may either respond to interrupt events or
+        // poll for interrupts
+
+        // the interrupt functions must not wait indefinitely on a lock
+        virtual void interrupt( unsigned opSpec ) {}
+        virtual void interruptAll() {}
+
+        static void setGetInterruptSpecCallback( unsigned ( *func )() ) { _getInterruptSpecCallback = func; }
+        static bool haveGetInterruptSpecCallback() { return _getInterruptSpecCallback; }
+        static unsigned getInterruptSpec() {
+            massert( 13474, "no _getInterruptSpecCallback", _getInterruptSpecCallback );
+            return _getInterruptSpecCallback();
+        }
+
+        static void setCheckInterruptCallback( const char * ( *func )() ) { _checkInterruptCallback = func; }
+        static bool haveCheckInterruptCallback() { return _checkInterruptCallback; }
+        static const char * checkInterrupt() {
+            return _checkInterruptCallback ? _checkInterruptCallback() : "";
+        }
+        static bool interrupted() {
+            const char *r = checkInterrupt();
+            return r && r[ 0 ];
+        }
+
     protected:
         virtual Scope * createScope() = 0;
-        
+
     private:
         void ( *_scopeInitCallback )( Scope & );
         static void ( *_connectCallback )( DBClientWithCommands & );
+        static const char * ( *_checkInterruptCallback )();
+        static unsigned ( *_getInterruptSpecCallback )();
     };
 
     bool hasJSReturn( const string& s );
 
+    const char * jsSkipWhiteSpace( const char * raw );
+
     extern ScriptEngine * globalScriptEngine;
 }
diff --git a/scripting/engine_java.cpp b/scripting/engine_java.cpp
index dacf532..fc8945f 100644
--- a/scripting/engine_java.cpp
+++ b/scripting/engine_java.cpp
@@ -55,19 +55,19 @@ namespace mongo {
             no tss cleanup on windows for boost lib?
             we don't care for now esp on windows only
 
-    		the boost source says:
-
-    		  This function's sole purpose is to cause a link error in cases where
-    		  automatic tss cleanup is not implemented by Boost.Threads as a
-    		  reminder that user code is responsible for calling the necessary
-    		  functions at the appropriate times (and for implementing an a
-    		  tss_cleanup_implemented() function to eliminate the linker's
-    		  missing symbol error).
-
-    		  If Boost.Threads later implements automatic tss cleanup in cases
-    		  where it currently doesn't (which is the plan), the duplicate
-    		  symbol error will warn the user that their custom solution is no
-    		  longer needed and can be removed.
+            the boost source says:
+
+              This function's sole purpose is to cause a link error in cases where
+              automatic tss cleanup is not implemented by Boost.Threads as a
+              reminder that user code is responsible for calling the necessary
+              functions at the appropriate times (and for implementing an a
+              tss_cleanup_implemented() function to eliminate the linker's
+              missing symbol error).
+
+              If Boost.Threads later implements automatic tss cleanup in cases
+              where it currently doesn't (which is the plan), the duplicate
+              symbol error will warn the user that their custom solution is no
+              longer needed and can be removed.
     */
     extern "C" void tss_cleanup_implemented(void) {
         //out() << "tss_cleanup_implemented called" << endl;
@@ -185,10 +185,10 @@ namespace mongo {
         if ( res ) {
             log() << "using classpath: " << q << endl;
             log()
-                << " res : " << (unsigned) res << " "
-                << "_jvm : " << _jvm  << " "
-                << "_env : " << _mainEnv << " "
-                << endl;
+                    << " res : " << (unsigned) res << " "
+                    << "_jvm : " << _jvm  << " "
+                    << "_env : " << _mainEnv << " "
+                    << endl;
             problem() << "Couldn't create JVM res:" << (int) res << " terminating" << endl;
             log() << "(try --nojni if you do not require that functionality)" << endl;
             exit(22);
@@ -397,12 +397,11 @@ namespace mongo {
         return retStr;
     }
 
-    BSONObj JavaJSImpl::scopeGetObject( jlong id , const char * field )
-    {
+    BSONObj JavaJSImpl::scopeGetObject( jlong id , const char * field ) {
         jstring s1 = _getEnv()->NewStringUTF( field );
         int guess = _getEnv()->CallStaticIntMethod( _dbhook , _scopeGuessObjectSize , id , _getEnv()->NewStringUTF( field ) );
         _getEnv()->DeleteLocalRef( s1 );
-        
+
         if ( guess == 0 )
             return BSONObj();
 
@@ -471,12 +470,12 @@ namespace mongo {
         return env;
     }
 
-    Scope * JavaJSImpl::createScope(){
+    Scope * JavaJSImpl::createScope() {
         return new JavaScope();
     }
 
-    void ScriptEngine::setup(){
-        if ( ! JavaJS ){
+    void ScriptEngine::setup() {
+        if ( ! JavaJS ) {
             JavaJS = new JavaJSImpl();
             globalScriptEngine = JavaJS;
         }
@@ -564,40 +563,40 @@ namespace mongo {
         if ( ! possible.size() ) {
             possible.push_back( "./" );
             possible.push_back( "../" );
-            
+
             log(2) << "dbExecCommand: " << dbExecCommand << endl;
-            
+
             string dbDir = dbExecCommand;
 #ifdef WIN32
-            if ( dbDir.find( "\\" ) != string::npos ){
+            if ( dbDir.find( "\\" ) != string::npos ) {
                 dbDir = dbDir.substr( 0 , dbDir.find_last_of( "\\" ) );
             }
             else {
                 dbDir = ".";
             }
 #else
-            if ( dbDir.find( "/" ) != string::npos ){
+            if ( dbDir.find( "/" ) != string::npos ) {
                 dbDir = dbDir.substr( 0 , dbDir.find_last_of( "/" ) );
             }
             else {
                 bool found = false;
-                
-                if ( getenv( "PATH" ) ){
+
+                if ( getenv( "PATH" ) ) {
                     string s = getenv( "PATH" );
                     s += ":";
                     pcrecpp::StringPiece input( s );
                     string dir;
                     pcrecpp::RE re("(.*?):");
-                    while ( re.Consume( &input, &dir ) ){
+                    while ( re.Consume( &input, &dir ) ) {
                         string test = dir + "/" + dbExecCommand;
-                        if ( boost::filesystem::exists( test ) ){
-                            while ( boost::filesystem::symbolic_link_exists( test ) ){
+                        if ( boost::filesystem::exists( test ) ) {
+                            while ( boost::filesystem::symbolic_link_exists( test ) ) {
                                 char tmp[2048];
                                 int len = readlink( test.c_str() , tmp , 2048 );
                                 tmp[len] = 0;
                                 log(5) << " symlink " << test << "  -->> " << tmp << endl;
                                 test = tmp;
-                                
+
                                 dir = test.substr( 0 , test.rfind( "/" ) );
                             }
                             dbDir = dir;
@@ -606,12 +605,12 @@ namespace mongo {
                         }
                     }
                 }
-                
+
                 if ( ! found )
                     dbDir = ".";
             }
 #endif
-            
+
             log(2) << "dbDir [" << dbDir << "]" << endl;
             possible.push_back( ( dbDir + "/../lib/mongo/" ));
             possible.push_back( ( dbDir + "/../lib64/mongo/" ));
@@ -624,7 +623,7 @@ namespace mongo {
         for ( list<string>::iterator i = possible.begin() ; i != possible.end(); i++ ) {
             const string temp = *i;
             const string jarDir = ((string)temp) + "jars/";
-            
+
             log(5) << "possible jarDir [" << jarDir << "]" << endl;
 
             path p(jarDir );
@@ -641,7 +640,7 @@ namespace mongo {
 
     };
 
-    
+
 // ---
 
     JNIEXPORT void JNICALL java_native_say(JNIEnv * env , jclass, jobject outBuffer ) {
@@ -692,7 +691,7 @@ namespace mongo {
 
 
         jlong func1 = JavaJS.functionCreate( "foo = 5.6; bar = \"eliot\"; abc = { foo : 517 }; " );
-         jassert( ! JavaJS.invoke( scope , func1 ) );
+        jassert( ! JavaJS.invoke( scope , func1 ) );
 
 
         if ( debug ) out() << "func3 start" << endl;
@@ -757,7 +756,7 @@ namespace mongo {
         assert( 12 == JavaJS.scopeGetNumber( scope , "return" ) );
 
     }
-    
+
 #endif
 
 } // namespace mongo
diff --git a/scripting/engine_java.h b/scripting/engine_java.h
index 5c6bc3b..b8245ba 100644
--- a/scripting/engine_java.h
+++ b/scripting/engine_java.h
@@ -163,10 +163,10 @@ namespace mongo {
             JavaJS->scopeInit( s , o );
         }
 
-        void localConnect( const char * dbName ){
+        void localConnect( const char * dbName ) {
             setString("$client", dbName );
         }
-        
+
         double getNumber(const char *field) {
             return JavaJS->scopeGetNumber(s,field);
         }
@@ -183,7 +183,7 @@ namespace mongo {
             return JavaJS->scopeGetType(s,field);
         }
 
-        void setThis( const BSONObj * obj ){
+        void setThis( const BSONObj * obj ) {
             JavaJS->scopeSetThis( s , obj );
         }
 
@@ -200,17 +200,17 @@ namespace mongo {
         void setBoolean(const char *field, bool val ) {
             JavaJS->scopeSetBoolean(s,field,val);
         }
-        
-        ScriptingFunction createFunction( const char * code ){
+
+        ScriptingFunction createFunction( const char * code ) {
             return JavaJS->functionCreate( code );
         }
 
-        int invoke( ScriptingFunction function , const BSONObj& args ){
+        int invoke( ScriptingFunction function , const BSONObj& args ) {
             setObject( "args" , args , true );
             return JavaJS->invoke(s,function);
         }
-        
-        string getError(){
+
+        string getError() {
             return getString( "error" );
         }
 
diff --git a/scripting/engine_none.cpp b/scripting/engine_none.cpp
index 2320d0e..d13dbec 100644
--- a/scripting/engine_none.cpp
+++ b/scripting/engine_none.cpp
@@ -18,7 +18,7 @@
 #include "engine.h"
 
 namespace mongo {
-    void ScriptEngine::setup(){
+    void ScriptEngine::setup() {
         // noop
     }
 }
diff --git a/scripting/engine_spidermonkey.cpp b/scripting/engine_spidermonkey.cpp
index c8f2eca..73ebfaa 100644
--- a/scripting/engine_spidermonkey.cpp
+++ b/scripting/engine_spidermonkey.cpp
@@ -26,31 +26,34 @@
 #endif
 
 #define smuassert( cx , msg , val ) \
-  if ( ! ( val ) ){ \
-    JS_ReportError( cx , msg ); \
-    return JS_FALSE; \
-  }
+    if ( ! ( val ) ){ \
+        JS_ReportError( cx , msg ); \
+        return JS_FALSE; \
+    }
 
 #define CHECKNEWOBJECT(xx,ctx,w)                                   \
     if ( ! xx ){                                                   \
         massert(13072,(string)"JS_NewObject failed: " + w ,xx);    \
     }
 
+#define CHECKJSALLOC( newthing )                \
+    massert( 13615 , "JS allocation failed, either memory leak or using too much memory" , newthing )
+
 namespace mongo {
-    
+
     class InvalidUTF8Exception : public UserException {
     public:
-        InvalidUTF8Exception() : UserException( 9006 , "invalid utf8" ){
+        InvalidUTF8Exception() : UserException( 9006 , "invalid utf8" ) {
         }
     };
 
-    string trim( string s ){
+    string trim( string s ) {
         while ( s.size() && isspace( s[0] ) )
             s = s.substr( 1 );
-        
+
         while ( s.size() && isspace( s[s.size()-1] ) )
             s = s.substr( 0 , s.size() - 1 );
-        
+
         return s;
     }
 
@@ -65,18 +68,18 @@ namespace mongo {
     class BSONHolder {
     public:
 
-        BSONHolder( BSONObj obj ){
+        BSONHolder( BSONObj obj ) {
             _obj = obj.getOwned();
             _inResolve = false;
             _modified = false;
             _magic = 17;
         }
-        
-        ~BSONHolder(){
+
+        ~BSONHolder() {
             _magic = 18;
         }
 
-        void check(){
+        void check() {
             uassert( 10212 ,  "holder magic value is wrong" , _magic == 17 && _obj.isValid() );
         }
 
@@ -89,24 +92,24 @@ namespace mongo {
         set<string> _removed;
         bool _modified;
     };
-    
+
     class BSONFieldIterator {
     public:
 
-        BSONFieldIterator( BSONHolder * holder ){
+        BSONFieldIterator( BSONHolder * holder ) {
 
             set<string> added;
 
             BSONObjIterator it( holder->_obj );
-            while ( it.more() ){
+            while ( it.more() ) {
                 BSONElement e = it.next();
                 if ( holder->_removed.count( e.fieldName() ) )
                     continue;
                 _names.push_back( e.fieldName() );
                 added.insert( e.fieldName() );
             }
-            
-            for ( list<string>::iterator i = holder->_extra.begin(); i != holder->_extra.end(); i++ ){
+
+            for ( list<string>::iterator i = holder->_extra.begin(); i != holder->_extra.end(); i++ ) {
                 if ( ! added.count( *i ) )
                     _names.push_back( *i );
             }
@@ -114,11 +117,11 @@ namespace mongo {
             _it = _names.begin();
         }
 
-        bool more(){
+        bool more() {
             return _it != _names.end();
         }
 
-        string next(){
+        string next() {
             string s = *_it;
             _it++;
             return s;
@@ -129,24 +132,24 @@ namespace mongo {
         list<string>::iterator _it;
     };
 
-    BSONFieldIterator * BSONHolder::it(){
+    BSONFieldIterator * BSONHolder::it() {
         return new BSONFieldIterator( this );
     }
 
     class TraverseStack {
     public:
-        TraverseStack(){
+        TraverseStack() {
             _o = 0;
             _parent = 0;
         }
 
-        TraverseStack( JSObject * o , const TraverseStack * parent ){
+        TraverseStack( JSObject * o , const TraverseStack * parent ) {
             _o = o;
             _parent = parent;
         }
 
         TraverseStack dive( JSObject * o ) const {
-            if ( o ){
+            if ( o ) {
                 uassert( 13076 , (string)"recursive toObject" , ! has( o ) );
             }
             return TraverseStack( o , this );
@@ -155,7 +158,7 @@ namespace mongo {
         int depth() const {
             int d = 0;
             const TraverseStack * s = _parent;
-            while ( s ){
+            while ( s ) {
                 s = s->_parent;
                 d++;
             }
@@ -165,12 +168,12 @@ namespace mongo {
         bool isTop() const {
             return _parent == 0;
         }
-        
+
         bool has( JSObject * o ) const {
             if ( ! o )
                 return false;
             const TraverseStack * s = this;
-            while ( s ){
+            while ( s ) {
                 if ( s->_o == o )
                     return true;
                 s = s->_parent;
@@ -184,11 +187,11 @@ namespace mongo {
 
     class Convertor : boost::noncopyable {
     public:
-        Convertor( JSContext * cx ){
+        Convertor( JSContext * cx ) {
             _context = cx;
         }
 
-        string toString( JSString * so ){
+        string toString( JSString * so ) {
             jschar * s = JS_GetStringChars( so );
             size_t srclen = JS_GetStringLength( so );
             if( srclen == 0 )
@@ -202,7 +205,16 @@ namespace mongo {
             // units, but experiments suggest 8bit units expected.  We allocate
             // enough memory that either will work.
 
-            assert( JS_EncodeCharacters( _context , s , srclen , dst , &len) );
+            if ( !JS_EncodeCharacters( _context , s , srclen , dst , &len) ) {
+                StringBuilder temp;
+                temp << "Not proper UTF-16: ";
+                for ( size_t i=0; i<srclen; i++ ) {
+                    if ( i > 0 )
+                        temp << ",";
+                    temp << s[i];
+                }
+                uasserted( 13498 , temp.str() );
+            }
 
             string ss( dst , len );
             free( dst );
@@ -212,7 +224,7 @@ namespace mongo {
             return ss;
         }
 
-        string toString( jsval v ){
+        string toString( jsval v ) {
             return toString( JS_ValueToString( _context , v ) );
         }
 
@@ -221,27 +233,28 @@ namespace mongo {
             boost::uint64_t val;
             if ( hasProperty( o, "top" ) ) {
                 val =
-                ( (boost::uint64_t)(boost::uint32_t)getNumber( o , "top" ) << 32 ) +
-                ( boost::uint32_t)( getNumber( o , "bottom" ) );
-            } else {
+                    ( (boost::uint64_t)(boost::uint32_t)getNumber( o , "top" ) << 32 ) +
+                    ( boost::uint32_t)( getNumber( o , "bottom" ) );
+            }
+            else {
                 val = (boost::uint64_t)(boost::int64_t) getNumber( o, "floatApprox" );
             }
             return val;
         }
-        
-        double toNumber( jsval v ){
+
+        double toNumber( jsval v ) {
             double d;
             uassert( 10214 ,  "not a number" , JS_ValueToNumber( _context , v , &d ) );
             return d;
         }
 
-        bool toBoolean( jsval v ){
+        bool toBoolean( jsval v ) {
             JSBool b;
             assert( JS_ValueToBoolean( _context, v , &b ) );
             return b;
         }
 
-        OID toOID( jsval v ){
+        OID toOID( jsval v ) {
             JSContext * cx = _context;
             assert( JSVAL_IS_OID( v ) );
 
@@ -251,21 +264,21 @@ namespace mongo {
             return oid;
         }
 
-        BSONObj toObject( JSObject * o , const TraverseStack& stack=TraverseStack() ){
+        BSONObj toObject( JSObject * o , const TraverseStack& stack=TraverseStack() ) {
             if ( ! o )
                 return BSONObj();
 
-            if ( JS_InstanceOf( _context , o , &bson_ro_class , 0 ) ){
+            if ( JS_InstanceOf( _context , o , &bson_ro_class , 0 ) ) {
                 BSONHolder * holder = GETHOLDER( _context , o );
                 assert( holder );
                 return holder->_obj.getOwned();
             }
 
             BSONObj orig;
-            if ( JS_InstanceOf( _context , o , &bson_class , 0 ) ){
+            if ( JS_InstanceOf( _context , o , &bson_class , 0 ) ) {
                 BSONHolder * holder = GETHOLDER(_context,o);
                 assert( holder );
-                if ( ! holder->_modified ){
+                if ( ! holder->_modified ) {
                     return holder->_obj;
                 }
                 orig = holder->_obj;
@@ -273,26 +286,26 @@ namespace mongo {
 
             BSONObjBuilder b;
 
-            if ( ! appendSpecialDBObject( this , b , "value" , OBJECT_TO_JSVAL( o ) , o ) ){
+            if ( ! appendSpecialDBObject( this , b , "value" , OBJECT_TO_JSVAL( o ) , o ) ) {
 
-                if ( stack.isTop() ){
+                if ( stack.isTop() ) {
                     jsval theid = getProperty( o , "_id" );
-                    if ( ! JSVAL_IS_VOID( theid ) ){
+                    if ( ! JSVAL_IS_VOID( theid ) ) {
                         append( b , "_id" , theid , EOO , stack.dive( o ) );
                     }
                 }
-                
+
                 JSIdArray * properties = JS_Enumerate( _context , o );
                 assert( properties );
-                
-                for ( jsint i=0; i<properties->length; i++ ){
+
+                for ( jsint i=0; i<properties->length; i++ ) {
                     jsid id = properties->vector[i];
                     jsval nameval;
                     assert( JS_IdToValue( _context ,id , &nameval ) );
                     string name = toString( nameval );
                     if ( stack.isTop() && name == "_id" )
                         continue;
-                    
+
                     append( b , name , getProperty( o , name.c_str() ) , orig[name].type() , stack.dive( o ) );
                 }
 
@@ -302,34 +315,34 @@ namespace mongo {
             return b.obj();
         }
 
-        BSONObj toObject( jsval v ){
+        BSONObj toObject( jsval v ) {
             if ( JSVAL_IS_NULL( v ) ||
-                 JSVAL_IS_VOID( v ) )
+                    JSVAL_IS_VOID( v ) )
                 return BSONObj();
 
             uassert( 10215 ,  "not an object" , JSVAL_IS_OBJECT( v ) );
             return toObject( JSVAL_TO_OBJECT( v ) );
         }
 
-        string getFunctionCode( JSFunction * func ){
+        string getFunctionCode( JSFunction * func ) {
             return toString( JS_DecompileFunction( _context , func , 0 ) );
         }
 
-        string getFunctionCode( jsval v ){
+        string getFunctionCode( jsval v ) {
             uassert( 10216 ,  "not a function" , JS_TypeOfValue( _context , v ) == JSTYPE_FUNCTION );
             return getFunctionCode( JS_ValueToFunction( _context , v ) );
         }
-        
-        void appendRegex( BSONObjBuilder& b , const string& name , string s ){
+
+        void appendRegex( BSONObjBuilder& b , const string& name , string s ) {
             assert( s[0] == '/' );
             s = s.substr(1);
             string::size_type end = s.rfind( '/' );
-            b.appendRegex( name , s.substr( 0 , end ).c_str() , s.substr( end + 1 ).c_str() );
+            b.appendRegex( name , s.substr( 0 , end ) , s.substr( end + 1 ) );
         }
 
-        void append( BSONObjBuilder& b , string name , jsval val , BSONType oldType = EOO , const TraverseStack& stack=TraverseStack() ){
+        void append( BSONObjBuilder& b , string name , jsval val , BSONType oldType = EOO , const TraverseStack& stack=TraverseStack() ) {
             //cout << "name: " << name << "\t" << typeString( val ) << " oldType: " << oldType << endl;
-            switch ( JS_TypeOfValue( _context , val ) ){
+            switch ( JS_TypeOfValue( _context , val ) ) {
 
             case JSTYPE_VOID: b.appendUndefined( name ); break;
             case JSTYPE_NULL: b.appendNull( name ); break;
@@ -347,12 +360,12 @@ namespace mongo {
 
             case JSTYPE_OBJECT: {
                 JSObject * o = JSVAL_TO_OBJECT( val );
-                if ( ! o || o == JSVAL_NULL ){
+                if ( ! o || o == JSVAL_NULL ) {
                     b.appendNull( name );
                 }
-                else if ( ! appendSpecialDBObject( this , b , name , val , o ) ){
+                else if ( ! appendSpecialDBObject( this , b , name , val , o ) ) {
                     BSONObj sub = toObject( o , stack );
-                    if ( JS_IsArrayObject( _context , o ) ){
+                    if ( JS_IsArrayObject( _context , o ) ) {
                         b.appendArray( name , sub );
                     }
                     else {
@@ -364,11 +377,11 @@ namespace mongo {
 
             case JSTYPE_FUNCTION: {
                 string s = toString(val);
-                if ( s[0] == '/' ){
+                if ( s[0] == '/' ) {
                     appendRegex( b , name , s );
                 }
                 else {
-                    b.appendCode( name , getFunctionCode( val ).c_str() );
+                    b.appendCode( name , getFunctionCode( val ) );
                 }
                 break;
             }
@@ -379,25 +392,28 @@ namespace mongo {
 
         // ---------- to spider monkey ---------
 
-        bool hasFunctionIdentifier( const string& code ){
+        bool hasFunctionIdentifier( const string& code ) {
             if ( code.size() < 9 || code.find( "function" ) != 0  )
                 return false;
 
             return code[8] == ' ' || code[8] == '(';
         }
 
-        bool isSimpleStatement( const string& code ){
+        bool isSimpleStatement( const string& code ) {
             if ( hasJSReturn( code ) )
                 return false;
 
-            if ( code.find( ";" ) != string::npos &&
-                 code.find( ";" ) != code.rfind( ";" ) )
+            if ( code.find( ';' ) != string::npos &&
+                    code.find( ';' ) != code.rfind( ';' ) )
+                return false;
+
+            if ( code.find( '\n') != string::npos )
                 return false;
 
             if ( code.find( "for(" ) != string::npos ||
-                 code.find( "for (" ) != string::npos ||
-                 code.find( "while (" ) != string::npos ||
-                 code.find( "while(" ) != string::npos )
+                    code.find( "for (" ) != string::npos ||
+                    code.find( "while (" ) != string::npos ||
+                    code.find( "while(" ) != string::npos )
                 return false;
 
             return true;
@@ -405,20 +421,20 @@ namespace mongo {
 
         void addRoot( JSFunction * f , const char * name );
 
-        JSFunction * compileFunction( const char * code, JSObject * assoc = 0 ){
+        JSFunction * compileFunction( const char * code, JSObject * assoc = 0 ) {
             const char * gcName = "unknown";
             JSFunction * f = _compileFunction( code , assoc , gcName );
             //addRoot( f , gcName );
             return f;
         }
 
-        JSFunction * _compileFunction( const char * raw , JSObject * assoc , const char *& gcName ){
+        JSFunction * _compileFunction( const char * raw , JSObject * assoc , const char *& gcName ) {
             if ( ! assoc )
                 assoc = JS_GetGlobalObject( _context );
 
-            while (isspace(*raw)) {
-                raw++;
-            }
+            raw = jsSkipWhiteSpace( raw );
+
+            //cout << "RAW\n---\n" << raw << "\n---" << endl;
 
             stringstream fname;
             fname << "cf_";
@@ -426,34 +442,34 @@ namespace mongo {
             fname << "_" << fnum++ << "_";
 
 
-            if ( ! hasFunctionIdentifier( raw ) ){
+            if ( ! hasFunctionIdentifier( raw ) ) {
                 string s = raw;
-                if ( isSimpleStatement( s ) ){
+                if ( isSimpleStatement( s ) ) {
                     s = "return " + s;
                 }
                 gcName = "cf anon";
                 fname << "anon";
-                return JS_CompileFunction( _context , assoc , fname.str().c_str() , 0 , 0 , s.c_str() , strlen( s.c_str() ) , "nofile_a" , 0 );
+                return JS_CompileFunction( _context , assoc , fname.str().c_str() , 0 , 0 , s.c_str() , s.size() , "nofile_a" , 0 );
             }
 
             string code = raw;
-            
+
             size_t start = code.find( '(' );
             assert( start != string::npos );
-            
+
             fname << "_f_" << trim( code.substr( 9 , start - 9 ) );
 
             code = code.substr( start + 1 );
             size_t end = code.find( ')' );
             assert( end != string::npos );
-            
+
             string paramString = trim( code.substr( 0 , end ) );
             code = code.substr( end + 1 );
-            
+
             vector<string> params;
-            while ( paramString.size() ){
+            while ( paramString.size() ) {
                 size_t c = paramString.find( ',' );
-                if ( c == string::npos ){
+                if ( c == string::npos ) {
                     params.push_back( paramString );
                     break;
                 }
@@ -461,14 +477,14 @@ namespace mongo {
                 paramString = trim( paramString.substr( c + 1 ) );
                 paramString = trim( paramString );
             }
-            
+
             boost::scoped_array<const char *> paramArray (new const char*[params.size()]);
             for ( size_t i=0; i<params.size(); i++ )
                 paramArray[i] = params[i].c_str();
-            
-            JSFunction * func = JS_CompileFunction( _context , assoc , fname.str().c_str() , params.size() , paramArray.get() , code.c_str() , strlen( code.c_str() ) , "nofile_b" , 0 );
 
-            if ( ! func ){
+            JSFunction * func = JS_CompileFunction( _context , assoc , fname.str().c_str() , params.size() , paramArray.get() , code.c_str() , code.size() , "nofile_b" , 0 );
+
+            if ( ! func ) {
                 log() << "compile failed for: " << raw << endl;
                 return 0;
             }
@@ -477,31 +493,31 @@ namespace mongo {
         }
 
 
-        jsval toval( double d ){
+        jsval toval( double d ) {
             jsval val;
             assert( JS_NewNumberValue( _context, d , &val ) );
             return val;
         }
 
-        jsval toval( const char * c ){
+        jsval toval( const char * c ) {
             JSString * s = JS_NewStringCopyZ( _context , c );
             if ( s )
                 return STRING_TO_JSVAL( s );
-            
+
             // possibly unicode, try manual
-            
+
             size_t len = strlen( c );
             size_t dstlen = len * 4;
             jschar * dst = (jschar*)malloc( dstlen );
-            
+
             JSBool res = JS_DecodeBytes( _context , c , len , dst, &dstlen );
-            if ( res ){
+            if ( res ) {
                 s = JS_NewUCStringCopyN( _context , dst , dstlen );
             }
 
             free( dst );
 
-            if ( ! res ){
+            if ( ! res ) {
                 tlog() << "decode failed. probably invalid utf-8 string [" << c << "]" << endl;
                 jsval v;
                 if ( JS_GetPendingException( _context , &v ) )
@@ -509,13 +525,13 @@ namespace mongo {
                 throw InvalidUTF8Exception();
             }
 
-            assert( s );
+            CHECKJSALLOC( s );
             return STRING_TO_JSVAL( s );
         }
 
-        JSObject * toJSObject( const BSONObj * obj , bool readOnly=false ){
+        JSObject * toJSObject( const BSONObj * obj , bool readOnly=false ) {
             static string ref = "$ref";
-            if ( ref == obj->firstElement().fieldName() ){
+            if ( ref == obj->firstElement().fieldName() ) {
                 JSObject * o = JS_NewObject( _context , &dbref_class , NULL, NULL);
                 CHECKNEWOBJECT(o,_context,"toJSObject1");
                 assert( JS_SetPrivate( _context , o , (void*)(new BSONHolder( obj->getOwned() ) ) ) );
@@ -527,7 +543,7 @@ namespace mongo {
             return o;
         }
 
-        jsval toval( const BSONObj* obj , bool readOnly=false ){
+        jsval toval( const BSONObj* obj , bool readOnly=false ) {
             JSObject * o = toJSObject( obj , readOnly );
             return OBJECT_TO_JSVAL( o );
         }
@@ -535,7 +551,7 @@ namespace mongo {
         void makeLongObj( long long n, JSObject * o ) {
             boost::uint64_t val = (boost::uint64_t)n;
             CHECKNEWOBJECT(o,_context,"NumberLong1");
-            setProperty( o , "floatApprox" , toval( (double)(boost::int64_t)( val ) ) );                    
+            setProperty( o , "floatApprox" , toval( (double)(boost::int64_t)( val ) ) );
             if ( (boost::int64_t)val != (boost::int64_t)(double)(boost::int64_t)( val ) ) {
                 // using 2 doubles here instead of a single double because certain double
                 // bit patterns represent undefined values and sm might trash them
@@ -543,16 +559,16 @@ namespace mongo {
                 setProperty( o , "bottom" , toval( (double)(boost::uint32_t)( val & 0x00000000ffffffff ) ) );
             }
         }
-        
+
         jsval toval( long long n ) {
             JSObject * o = JS_NewObject( _context , &numberlong_class , 0 , 0 );
             makeLongObj( n, o );
             return OBJECT_TO_JSVAL( o );
         }
-        
-        jsval toval( const BSONElement& e ){
 
-            switch( e.type() ){
+        jsval toval( const BSONElement& e ) {
+
+            switch( e.type() ) {
             case EOO:
             case jstNULL:
             case Undefined:
@@ -565,50 +581,50 @@ namespace mongo {
                 return toval( e.valuestr() );
             case Bool:
                 return e.boolean() ? JSVAL_TRUE : JSVAL_FALSE;
-            case Object:{
+            case Object: {
                 BSONObj embed = e.embeddedObject().getOwned();
                 return toval( &embed );
             }
-            case Array:{
+            case Array: {
 
                 BSONObj embed = e.embeddedObject().getOwned();
 
-                if ( embed.isEmpty() ){
+                if ( embed.isEmpty() ) {
                     return OBJECT_TO_JSVAL( JS_NewArrayObject( _context , 0 , 0 ) );
                 }
-
-                int n = embed.nFields();
-
-                JSObject * array = JS_NewArrayObject( _context , n , 0 );
-                assert( array );
+                
+                JSObject * array = JS_NewArrayObject( _context , 1 , 0 );
+                CHECKJSALLOC( array );
 
                 jsval myarray = OBJECT_TO_JSVAL( array );
 
-                for ( int i=0; i<n; i++ ){
-                    jsval v = toval( embed[i] );
-                    assert( JS_SetElement( _context , array , i , &v ) );
+                BSONObjIterator i( embed );
+                while ( i.more() ){
+                    const BSONElement& e = i.next();
+                    jsval v = toval( e );
+                    assert( JS_SetElement( _context , array , atoi(e.fieldName()) , &v ) );
                 }
 
                 return myarray;
             }
-            case jstOID:{
+            case jstOID: {
                 OID oid = e.__oid();
                 JSObject * o = JS_NewObject( _context , &object_id_class , 0 , 0 );
                 CHECKNEWOBJECT(o,_context,"jstOID");
                 setProperty( o , "str" , toval( oid.str().c_str() ) );
                 return OBJECT_TO_JSVAL( o );
             }
-            case RegEx:{
+            case RegEx: {
                 const char * flags = e.regexFlags();
                 uintN flagNumber = 0;
-                while ( *flags ){
-                    switch ( *flags ){
+                while ( *flags ) {
+                    switch ( *flags ) {
                     case 'g': flagNumber |= JSREG_GLOB; break;
                     case 'i': flagNumber |= JSREG_FOLD; break;
                     case 'm': flagNumber |= JSREG_MULTILINE; break;
                         //case 'y': flagNumber |= JSREG_STICKY; break;
-                        
-                    default: 
+
+                    default:
                         log() << "warning: unknown regex flag:" << *flags << endl;
                     }
                     flags++;
@@ -618,17 +634,17 @@ namespace mongo {
                 assert( r );
                 return OBJECT_TO_JSVAL( r );
             }
-            case Code:{
+            case Code: {
                 JSFunction * func = compileFunction( e.valuestr() );
                 if ( func )
                     return OBJECT_TO_JSVAL( JS_GetFunctionObject( func ) );
                 return JSVAL_NULL;
             }
-            case CodeWScope:{
+            case CodeWScope: {
                 JSFunction * func = compileFunction( e.codeWScopeCode() );
 
                 BSONObj extraScope = e.codeWScopeObject();
-                if ( ! extraScope.isEmpty() ){
+                if ( ! extraScope.isEmpty() ) {
                     log() << "warning: CodeWScope doesn't transfer to db.eval" << endl;
                 }
 
@@ -665,7 +681,7 @@ namespace mongo {
                 setProperty( o , "id" , OBJECT_TO_JSVAL( oid ) );
                 return OBJECT_TO_JSVAL( o );
             }
-            case BinData:{
+            case BinData: {
                 JSObject * o = JS_NewObject( _context , &bindata_class , 0 , 0 );
                 CHECKNEWOBJECT(o,_context,"Bindata_BinData1");
                 int len;
@@ -686,55 +702,55 @@ namespace mongo {
 
         // ------- object helpers ------
 
-        JSObject * getJSObject( JSObject * o , const char * name ){
+        JSObject * getJSObject( JSObject * o , const char * name ) {
             jsval v;
             assert( JS_GetProperty( _context , o , name , &v ) );
             return JSVAL_TO_OBJECT( v );
         }
 
-        JSObject * getGlobalObject( const char * name ){
+        JSObject * getGlobalObject( const char * name ) {
             return getJSObject( JS_GetGlobalObject( _context ) , name );
         }
 
-        JSObject * getGlobalPrototype( const char * name ){
+        JSObject * getGlobalPrototype( const char * name ) {
             return getJSObject( getGlobalObject( name ) , "prototype" );
         }
 
-        bool hasProperty( JSObject * o , const char * name ){
+        bool hasProperty( JSObject * o , const char * name ) {
             JSBool res;
             assert( JS_HasProperty( _context , o , name , & res ) );
             return res;
         }
 
-        jsval getProperty( JSObject * o , const char * field ){
+        jsval getProperty( JSObject * o , const char * field ) {
             uassert( 10219 ,  "object passed to getPropery is null" , o );
             jsval v;
             assert( JS_GetProperty( _context , o , field , &v ) );
             return v;
         }
 
-        void setProperty( JSObject * o , const char * field , jsval v ){
+        void setProperty( JSObject * o , const char * field , jsval v ) {
             assert( JS_SetProperty( _context , o , field , &v ) );
         }
 
-        string typeString( jsval v ){
+        string typeString( jsval v ) {
             JSType t = JS_TypeOfValue( _context , v );
             return JS_GetTypeName( _context , t );
         }
 
-        bool getBoolean( JSObject * o , const char * field ){
+        bool getBoolean( JSObject * o , const char * field ) {
             return toBoolean( getProperty( o , field ) );
         }
 
-        double getNumber( JSObject * o , const char * field ){
+        double getNumber( JSObject * o , const char * field ) {
             return toNumber( getProperty( o , field ) );
         }
 
-        string getString( JSObject * o , const char * field ){
+        string getString( JSObject * o , const char * field ) {
             return toString( getProperty( o , field ) );
         }
 
-        JSClass * getClass( JSObject * o , const char * field ){
+        JSClass * getClass( JSObject * o , const char * field ) {
             jsval v;
             assert( JS_GetProperty( _context , o , field , &v ) );
             if ( ! JSVAL_IS_OBJECT( v ) )
@@ -748,25 +764,25 @@ namespace mongo {
     };
 
 
-    void bson_finalize( JSContext * cx , JSObject * obj ){
+    void bson_finalize( JSContext * cx , JSObject * obj ) {
         BSONHolder * o = GETHOLDER( cx , obj );
-        if ( o ){
+        if ( o ) {
             delete o;
             assert( JS_SetPrivate( cx , obj , 0 ) );
         }
     }
 
-    JSBool bson_enumerate( JSContext *cx, JSObject *obj, JSIterateOp enum_op, jsval *statep, jsid *idp ){
+    JSBool bson_enumerate( JSContext *cx, JSObject *obj, JSIterateOp enum_op, jsval *statep, jsid *idp ) {
 
         BSONHolder * o = GETHOLDER( cx , obj );
-        
-        if ( enum_op == JSENUMERATE_INIT ){
-            if ( o ){
+
+        if ( enum_op == JSENUMERATE_INIT ) {
+            if ( o ) {
                 BSONFieldIterator * it = o->it();
                 *statep = PRIVATE_TO_JSVAL( it );
             }
             else {
-                *statep = 0;                
+                *statep = 0;
             }
             if ( idp )
                 *idp = JSVAL_ZERO;
@@ -774,13 +790,13 @@ namespace mongo {
         }
 
         BSONFieldIterator * it = (BSONFieldIterator*)JSVAL_TO_PRIVATE( *statep );
-        if ( ! it ){
+        if ( ! it ) {
             *statep = 0;
             return JS_TRUE;
         }
 
-        if ( enum_op == JSENUMERATE_NEXT ){
-            if ( it->more() ){
+        if ( enum_op == JSENUMERATE_NEXT ) {
+            if ( it->more() ) {
                 string name = it->next();
                 Convertor c(cx);
                 assert( JS_ValueToId( cx , c.toval( name.c_str() ) , idp ) );
@@ -792,7 +808,7 @@ namespace mongo {
             return JS_TRUE;
         }
 
-        if ( enum_op == JSENUMERATE_DESTROY ){
+        if ( enum_op == JSENUMERATE_DESTROY ) {
             if ( it )
                 delete it;
             return JS_TRUE;
@@ -802,9 +818,9 @@ namespace mongo {
         return JS_FALSE;
     }
 
-    JSBool noaccess( JSContext *cx, JSObject *obj, jsval idval, jsval *vp){
+    JSBool noaccess( JSContext *cx, JSObject *obj, jsval idval, jsval *vp) {
         BSONHolder * holder = GETHOLDER( cx , obj );
-        if ( ! holder ){
+        if ( ! holder ) {
             // in init code still
             return JS_TRUE;
         }
@@ -821,7 +837,7 @@ namespace mongo {
         JSCLASS_NO_OPTIONAL_MEMBERS
     };
 
-    JSBool bson_cons( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+    JSBool bson_cons( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         cerr << "bson_cons : shouldn't be here!" << endl;
         JS_ReportError( cx , "can't construct bson object" );
         return JS_FALSE;
@@ -830,26 +846,26 @@ namespace mongo {
     JSFunctionSpec bson_functions[] = {
         { 0 }
     };
-    
-    JSBool bson_add_prop( JSContext *cx, JSObject *obj, jsval idval, jsval *vp){
+
+    JSBool bson_add_prop( JSContext *cx, JSObject *obj, jsval idval, jsval *vp) {
         BSONHolder * holder = GETHOLDER( cx , obj );
-        if ( ! holder ){
+        if ( ! holder ) {
             // static init
             return JS_TRUE;
         }
-        if ( ! holder->_inResolve ){
+        if ( ! holder->_inResolve ) {
             Convertor c(cx);
             string name = c.toString( idval );
-            if ( holder->_obj[name].eoo() ){
+            if ( holder->_obj[name].eoo() ) {
                 holder->_extra.push_back( name );
             }
             holder->_modified = true;
         }
         return JS_TRUE;
     }
-    
 
-    JSBool mark_modified( JSContext *cx, JSObject *obj, jsval idval, jsval *vp){
+
+    JSBool mark_modified( JSContext *cx, JSObject *obj, jsval idval, jsval *vp) {
         Convertor c(cx);
         BSONHolder * holder = GETHOLDER( cx , obj );
         if ( !holder ) // needed when we're messing with DBRef.prototype
@@ -860,8 +876,8 @@ namespace mongo {
         holder->_removed.erase( c.toString( idval ) );
         return JS_TRUE;
     }
-    
-    JSBool mark_modified_remove( JSContext *cx, JSObject *obj, jsval idval, jsval *vp){
+
+    JSBool mark_modified_remove( JSContext *cx, JSObject *obj, jsval idval, jsval *vp) {
         Convertor c(cx);
         BSONHolder * holder = GETHOLDER( cx , obj );
         if ( holder->_inResolve )
@@ -887,10 +903,10 @@ namespace mongo {
 
     // --- global helpers ---
 
-    JSBool native_print( JSContext * cx , JSObject * obj , uintN argc, jsval *argv, jsval *rval ){
+    JSBool native_print( JSContext * cx , JSObject * obj , uintN argc, jsval *argv, jsval *rval ) {
         stringstream ss;
         Convertor c( cx );
-        for ( uintN i=0; i<argc; i++ ){
+        for ( uintN i=0; i<argc; i++ ) {
             if ( i > 0 )
                 ss << " ";
             ss << c.toString( argv[i] );
@@ -900,32 +916,32 @@ namespace mongo {
         return JS_TRUE;
     }
 
-    JSBool native_helper( JSContext *cx , JSObject *obj , uintN argc, jsval *argv , jsval *rval ){
+    JSBool native_helper( JSContext *cx , JSObject *obj , uintN argc, jsval *argv , jsval *rval ) {
         Convertor c(cx);
-        
+
         NativeFunction func = (NativeFunction)((long long)c.getNumber( obj , "x" ) );
         assert( func );
-        
+
         BSONObj a;
-        if ( argc > 0 ){
+        if ( argc > 0 ) {
             BSONObjBuilder args;
-            for ( uintN i=0; i<argc; i++ ){
+            for ( uintN i=0; i<argc; i++ ) {
                 c.append( args , args.numStr( i ) , argv[i] );
             }
-            
+
             a = args.obj();
         }
-        
+
         BSONObj out;
         try {
             out = func( a );
         }
-        catch ( std::exception& e ){
+        catch ( std::exception& e ) {
             JS_ReportError( cx , e.what() );
             return JS_FALSE;
         }
-        
-        if ( out.isEmpty() ){
+
+        if ( out.isEmpty() ) {
             *rval = JSVAL_VOID;
         }
         else {
@@ -937,7 +953,7 @@ namespace mongo {
 
     JSBool native_load( JSContext *cx , JSObject *obj , uintN argc, jsval *argv , jsval *rval );
 
-    JSBool native_gc( JSContext *cx , JSObject *obj , uintN argc, jsval *argv , jsval *rval ){
+    JSBool native_gc( JSContext *cx , JSObject *obj , uintN argc, jsval *argv , jsval *rval ) {
         JS_GC( cx );
         return JS_TRUE;
     }
@@ -953,22 +969,28 @@ namespace mongo {
     // ----END global helpers ----
 
     // Object helpers
-    
-    JSBool bson_get_size(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){
+
+    JSBool bson_get_size(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         if ( argc != 1 || !JSVAL_IS_OBJECT( argv[ 0 ] ) ) {
             JS_ReportError( cx , "bsonsize requires one valid object" );
             return JS_FALSE;
         }
-        
-        JSObject * o = JSVAL_TO_OBJECT( argv[0] );
 
         Convertor c(cx);
+
+        if ( argv[0] == JSVAL_VOID || argv[0] == JSVAL_NULL ) {
+            *rval = c.toval( 0.0 );
+            return JS_TRUE;
+        }
+
+        JSObject * o = JSVAL_TO_OBJECT( argv[0] );
+
         double size = 0;
 
         if ( JS_InstanceOf( cx , o , &bson_ro_class , 0 ) ||
-             JS_InstanceOf( cx , o , &bson_class , 0 ) ){
+                JS_InstanceOf( cx , o , &bson_class , 0 ) ) {
             BSONHolder * h = GETHOLDER( cx , o );
-            if ( h ){
+            if ( h ) {
                 size = h->_obj.objsize();
             }
         }
@@ -976,36 +998,36 @@ namespace mongo {
             BSONObj temp = c.toObject( o );
             size = temp.objsize();
         }
-        
+
         *rval = c.toval( size );
-        return JS_TRUE;        
+        return JS_TRUE;
     }
-    
+
     JSFunctionSpec objectHelpers[] = {
-    { "bsonsize" , &bson_get_size , 1 , 0 , 0 } ,
-    { 0 , 0 , 0 , 0 , 0 }
+        { "bsonsize" , &bson_get_size , 1 , 0 , 0 } ,
+        { 0 , 0 , 0 , 0 , 0 }
     };
-    
+
     // end Object helpers
 
-    JSBool resolveBSONField( JSContext *cx, JSObject *obj, jsval id, uintN flags, JSObject **objp ){
+    JSBool resolveBSONField( JSContext *cx, JSObject *obj, jsval id, uintN flags, JSObject **objp ) {
         assert( JS_EnterLocalRootScope( cx ) );
         Convertor c( cx );
 
         BSONHolder * holder = GETHOLDER( cx , obj );
-        if ( ! holder ){
+        if ( ! holder ) {
             // static init
             *objp = 0;
             JS_LeaveLocalRootScope( cx );
             return JS_TRUE;
         }
         holder->check();
-        
+
         string s = c.toString( id );
 
         BSONElement e = holder->_obj[ s.c_str() ];
-        
-        if ( e.type() == EOO || holder->_removed.count( s ) ){
+
+        if ( e.type() == EOO || holder->_removed.count( s ) ) {
             *objp = 0;
             JS_LeaveLocalRootScope( cx );
             return JS_TRUE;
@@ -1025,12 +1047,12 @@ namespace mongo {
         holder->_inResolve = true;
         assert( JS_SetProperty( cx , obj , s.c_str() , &val ) );
         holder->_inResolve = false;
-        
-        if ( val != JSVAL_NULL && val != JSVAL_VOID && JSVAL_IS_OBJECT( val ) ){
+
+        if ( val != JSVAL_NULL && val != JSVAL_VOID && JSVAL_IS_OBJECT( val ) ) {
             // TODO: this is a hack to get around sub objects being modified
             JSObject * oo = JSVAL_TO_OBJECT( val );
-            if ( JS_InstanceOf( cx , oo , &bson_class , 0 ) || 
-                 JS_IsArrayObject( cx , oo ) ){
+            if ( JS_InstanceOf( cx , oo , &bson_class , 0 ) ||
+                    JS_IsArrayObject( cx , oo ) ) {
                 holder->_modified = true;
             }
         }
@@ -1046,15 +1068,15 @@ namespace mongo {
     class SMEngine : public ScriptEngine {
     public:
 
-        SMEngine(){
+        SMEngine() {
 #ifdef SM18
             JS_SetCStringsAreUTF8();
 #endif
 
             _runtime = JS_NewRuntime(8L * 1024L * 1024L);
             uassert( 10221 ,  "JS_NewRuntime failed" , _runtime );
-            
-            if ( ! utf8Ok() ){
+
+            if ( ! utf8Ok() ) {
                 log() << "*** warning: spider monkey build without utf8 support.  consider rebuilding with utf8 support" << endl;
             }
 
@@ -1063,7 +1085,7 @@ namespace mongo {
             uassert( 10222 ,  "assert not being executed" , x == 1 );
         }
 
-        ~SMEngine(){
+        ~SMEngine() {
             JS_DestroyRuntime( _runtime );
             JS_ShutDown();
         }
@@ -1088,7 +1110,7 @@ namespace mongo {
     SMEngine * globalSMEngine;
 
 
-    void ScriptEngine::setup(){
+    void ScriptEngine::setup() {
         globalSMEngine = new SMEngine();
         globalScriptEngine = globalSMEngine;
     }
@@ -1097,11 +1119,11 @@ namespace mongo {
     // ------ scope ------
 
 
-    JSBool no_gc(JSContext *cx, JSGCStatus status){
+    JSBool no_gc(JSContext *cx, JSGCStatus status) {
         return JS_FALSE;
     }
 
-    JSBool yes_gc(JSContext *cx, JSGCStatus status){
+    JSBool yes_gc(JSContext *cx, JSGCStatus status) {
         return JS_TRUE;
     }
 
@@ -1125,64 +1147,65 @@ namespace mongo {
             JS_SetOptions( _context , JS_GetOptions( _context ) | JSOPTION_VAROBJFIX );
 
             JS_DefineFunctions( _context , _global , globalHelpers );
-            
+
             JS_DefineFunctions( _context , _convertor->getGlobalObject( "Object" ), objectHelpers );
 
             //JS_SetGCCallback( _context , no_gc ); // this is useful for seeing if something is a gc problem
 
             _postCreateHacks();
         }
-        
-        ~SMScope(){
+
+        ~SMScope() {
             smlock;
             uassert( 10223 ,  "deleted SMScope twice?" , _convertor );
 
-            for ( list<void*>::iterator i=_roots.begin(); i != _roots.end(); i++ ){
+            for ( list<void*>::iterator i=_roots.begin(); i != _roots.end(); i++ ) {
                 JS_RemoveRoot( _context , *i );
             }
             _roots.clear();
-            
-            if ( _this ){
+
+            if ( _this ) {
                 JS_RemoveRoot( _context , &_this );
                 _this = 0;
             }
 
-            if ( _convertor ){
+            if ( _convertor ) {
                 delete _convertor;
                 _convertor = 0;
             }
-            
-            if ( _context ){
+
+            if ( _context ) {
+                // This is expected to reclaim _global as well.
                 JS_DestroyContext( _context );
                 _context = 0;
             }
 
         }
-        
-        void reset(){
+
+        void reset() {
             smlock;
             assert( _convertor );
             return;
-            if ( _this ){
+            if ( _this ) {
                 JS_RemoveRoot( _context , &_this );
                 _this = 0;
             }
             currentScope.reset( this );
             _error = "";
         }
-        
-        void addRoot( void * root , const char * name ){
+
+        void addRoot( void * root , const char * name ) {
             JS_AddNamedRoot( _context , root , name );
             _roots.push_back( root );
         }
 
-        void init( BSONObj * data ){
+        void init( const BSONObj * data ) {
             smlock;
             if ( ! data )
                 return;
 
             BSONObjIterator i( *data );
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
                 _convertor->setProperty( _global , e.fieldName() , _convertor->toval( e ) );
                 _initFieldNames.insert( e.fieldName() );
@@ -1190,7 +1213,7 @@ namespace mongo {
 
         }
 
-        void externalSetup(){
+        void externalSetup() {
             smlock;
             uassert( 10224 ,  "already local connected" , ! _localConnect );
             if ( _externalSetup )
@@ -1199,20 +1222,20 @@ namespace mongo {
             _externalSetup = true;
         }
 
-        void localConnect( const char * dbName ){
+        void localConnect( const char * dbName ) {
             {
                 smlock;
                 uassert( 10225 ,  "already setup for external db" , ! _externalSetup );
-                if ( _localConnect ){
+                if ( _localConnect ) {
                     uassert( 10226 ,  "connected to different db" , _localDBName == dbName );
                     return;
                 }
-                
+
                 initMongoJS( this , _context , _global , true );
-                
+
                 exec( "_mongo = new Mongo();" );
                 exec( ((string)"db = _mongo.getDB( \"" + dbName + "\" ); ").c_str() );
-                
+
                 _localConnect = true;
                 _localDBName = dbName;
             }
@@ -1220,14 +1243,14 @@ namespace mongo {
         }
 
         // ----- getters ------
-        double getNumber( const char *field ){
+        double getNumber( const char *field ) {
             smlock;
             jsval val;
             assert( JS_GetProperty( _context , _global , field , &val ) );
             return _convertor->toNumber( val );
         }
 
-        string getString( const char *field ){
+        string getString( const char *field ) {
             smlock;
             jsval val;
             assert( JS_GetProperty( _context , _global , field , &val ) );
@@ -1235,27 +1258,27 @@ namespace mongo {
             return _convertor->toString( s );
         }
 
-        bool getBoolean( const char *field ){
+        bool getBoolean( const char *field ) {
             smlock;
             return _convertor->getBoolean( _global , field );
         }
 
-        BSONObj getObject( const char *field ){
+        BSONObj getObject( const char *field ) {
             smlock;
             return _convertor->toObject( _convertor->getProperty( _global , field ) );
         }
 
-        JSObject * getJSObject( const char * field ){
+        JSObject * getJSObject( const char * field ) {
             smlock;
             return _convertor->getJSObject( _global , field );
         }
 
-        int type( const char *field ){
+        int type( const char *field ) {
             smlock;
             jsval val;
             assert( JS_GetProperty( _context , _global , field , &val ) );
 
-            switch ( JS_TypeOfValue( _context , val ) ){
+            switch ( JS_TypeOfValue( _context , val ) ) {
             case JSTYPE_VOID: return Undefined;
             case JSTYPE_NULL: return jstNULL;
             case JSTYPE_OBJECT: {
@@ -1280,52 +1303,61 @@ namespace mongo {
 
         // ----- setters ------
 
-        void setElement( const char *field , const BSONElement& val ){
+        void setElement( const char *field , const BSONElement& val ) {
             smlock;
             jsval v = _convertor->toval( val );
             assert( JS_SetProperty( _context , _global , field , &v ) );
         }
 
-        void setNumber( const char *field , double val ){
+        void setNumber( const char *field , double val ) {
             smlock;
             jsval v = _convertor->toval( val );
             assert( JS_SetProperty( _context , _global , field , &v ) );
         }
 
-        void setString( const char *field , const char * val ){
+        void setString( const char *field , const char * val ) {
             smlock;
             jsval v = _convertor->toval( val );
             assert( JS_SetProperty( _context , _global , field , &v ) );
         }
 
-        void setObject( const char *field , const BSONObj& obj , bool readOnly ){
+        void setObject( const char *field , const BSONObj& obj , bool readOnly ) {
             smlock;
             jsval v = _convertor->toval( &obj , readOnly );
             JS_SetProperty( _context , _global , field , &v );
         }
 
-        void setBoolean( const char *field , bool val ){
+        void setBoolean( const char *field , bool val ) {
             smlock;
             jsval v = BOOLEAN_TO_JSVAL( val );
             assert( JS_SetProperty( _context , _global , field , &v ) );
         }
 
-        void setThis( const BSONObj * obj ){
+        void setThis( const BSONObj * obj ) {
             smlock;
-            if ( _this ){
+            if ( _this ) {
                 JS_RemoveRoot( _context , &_this );
                 _this = 0;
             }
-            
-            if ( obj ){
+
+            if ( obj ) {
                 _this = _convertor->toJSObject( obj );
                 JS_AddNamedRoot( _context , &_this , "scope this" );
             }
         }
 
+        void rename( const char * from , const char * to ) {
+            smlock;
+            jsval v;
+            assert( JS_GetProperty( _context , _global , from , &v ) );
+            assert( JS_SetProperty( _context , _global , to , &v ) );
+            v = JSVAL_VOID;
+            assert( JS_SetProperty( _context , _global , from , &v ) );
+        }
+
         // ---- functions -----
 
-        ScriptingFunction _createFunction( const char * code ){
+        ScriptingFunction _createFunction( const char * code ) {
             smlock;
             precall();
             return (ScriptingFunction)_convertor->compileFunction( code );
@@ -1337,40 +1369,49 @@ namespace mongo {
             int count;
         };
 
-        static JSBool _checkTimeout( JSContext *cx ){
+        // should not generate exceptions, as those can be caught in
+        // javascript code; returning false without an exception exits
+        // immediately
+        static JSBool _interrupt( JSContext *cx ) {
             TimeoutSpec &spec = *(TimeoutSpec *)( JS_GetContextPrivate( cx ) );
             if ( ++spec.count % 1000 != 0 )
                 return JS_TRUE;
+            const char * interrupt = ScriptEngine::checkInterrupt();
+            if ( interrupt && interrupt[ 0 ] ) {
+                return JS_FALSE;
+            }
+            if ( spec.timeout.ticks() == 0 ) {
+                return JS_TRUE;
+            }
             boost::posix_time::time_duration elapsed = ( boost::posix_time::microsec_clock::local_time() - spec.start );
             if ( elapsed < spec.timeout ) {
                 return JS_TRUE;
             }
-            JS_ReportError( cx, "Timeout exceeded" );
             return JS_FALSE;
 
         }
-        static JSBool checkTimeout( JSContext *cx, JSScript *script ){
-            return _checkTimeout( cx );
-        }
 
+        static JSBool interrupt( JSContext *cx, JSScript *script ) {
+            return _interrupt( cx );
+        }
 
-        void installCheckTimeout( int timeoutMs ) {
-            if ( timeoutMs > 0 ) {
+        void installInterrupt( int timeoutMs ) {
+            if ( timeoutMs != 0 || ScriptEngine::haveCheckInterruptCallback() ) {
                 TimeoutSpec *spec = new TimeoutSpec;
                 spec->timeout = boost::posix_time::millisec( timeoutMs );
                 spec->start = boost::posix_time::microsec_clock::local_time();
                 spec->count = 0;
                 JS_SetContextPrivate( _context, (void*)spec );
 #if defined(SM181) && !defined(XULRUNNER190)
-                JS_SetOperationCallback( _context, _checkTimeout );
+                JS_SetOperationCallback( _context, _interrupt );
 #else
-                JS_SetBranchCallback( _context, checkTimeout );
+                JS_SetBranchCallback( _context, interrupt );
 #endif
             }
         }
 
-        void uninstallCheckTimeout( int timeoutMs ) {
-            if ( timeoutMs > 0 ) {
+        void uninstallInterrupt( int timeoutMs ) {
+            if ( timeoutMs != 0 || ScriptEngine::haveCheckInterruptCallback() ) {
 #if defined(SM181) && !defined(XULRUNNER190)
                 JS_SetOperationCallback( _context , 0 );
 #else
@@ -1381,34 +1422,33 @@ namespace mongo {
             }
         }
 
-        void precall(){
+        void precall() {
             _error = "";
             currentScope.reset( this );
         }
 
-        bool exec( const string& code , const string& name = "(anon)" , bool printResult = false , bool reportError = true , bool assertOnError = true, int timeoutMs = 0 ){
+        bool exec( const StringData& code , const string& name = "(anon)" , bool printResult = false , bool reportError = true , bool assertOnError = true, int timeoutMs = 0 ) {
             smlock;
             precall();
 
             jsval ret = JSVAL_VOID;
 
-            installCheckTimeout( timeoutMs );
-            JSBool worked = JS_EvaluateScript( _context , _global , code.c_str() , strlen( code.c_str() ) , name.c_str() , 0 , &ret );
-            uninstallCheckTimeout( timeoutMs );
+            installInterrupt( timeoutMs );
+            JSBool worked = JS_EvaluateScript( _context , _global , code.data() , code.size() , name.c_str() , 1 , &ret );
+            uninstallInterrupt( timeoutMs );
 
-            if ( ! worked && _error.size() == 0 ){
+            if ( ! worked && _error.size() == 0 ) {
                 jsval v;
-                if ( JS_GetPendingException( _context , &v ) ){
+                if ( JS_GetPendingException( _context , &v ) ) {
                     _error = _convertor->toString( v );
                     if ( reportError )
                         cout << _error << endl;
                 }
             }
 
-            if ( assertOnError )
-                uassert( 10228 ,  name + " exec failed" , worked );
+            uassert( 10228 ,  str::stream() << name + " exec failed: " << _error , worked || ! assertOnError );
 
-            if ( reportError && ! _error.empty() ){
+            if ( reportError && ! _error.empty() ) {
                 // cout << "exec error: " << _error << endl;
                 // already printed in reportError, so... TODO
             }
@@ -1421,23 +1461,23 @@ namespace mongo {
 
             return worked;
         }
-        
-        int invoke( JSFunction * func , const BSONObj& args, int timeoutMs , bool ignoreReturn ){
+
+        int invoke( JSFunction * func , const BSONObj& args, int timeoutMs , bool ignoreReturn ) {
             smlock;
             precall();
 
             assert( JS_EnterLocalRootScope( _context ) );
-                
+
             int nargs = args.nFields();
             scoped_array<jsval> smargsPtr( new jsval[nargs] );
-            if ( nargs ){
+            if ( nargs ) {
                 BSONObjIterator it( args );
-                for ( int i=0; i<nargs; i++ ){
+                for ( int i=0; i<nargs; i++ ) {
                     smargsPtr[i] = _convertor->toval( it.next() );
                 }
             }
 
-            if ( args.isEmpty() ){
+            if ( args.isEmpty() ) {
                 _convertor->setProperty( _global , "args" , JSVAL_NULL );
             }
             else {
@@ -1446,35 +1486,35 @@ namespace mongo {
 
             JS_LeaveLocalRootScope( _context );
 
-            installCheckTimeout( timeoutMs );
+            installInterrupt( timeoutMs );
             jsval rval;
             JSBool ret = JS_CallFunction( _context , _this ? _this : _global , func , nargs , smargsPtr.get() , &rval );
-            uninstallCheckTimeout( timeoutMs );
+            uninstallInterrupt( timeoutMs );
 
             if ( !ret ) {
                 return -3;
             }
-            
-            if ( ! ignoreReturn ){
+
+            if ( ! ignoreReturn ) {
                 assert( JS_SetProperty( _context , _global , "return" , &rval ) );
             }
 
             return 0;
         }
 
-        int invoke( ScriptingFunction funcAddr , const BSONObj& args, int timeoutMs = 0 , bool ignoreReturn = 0 ){
+        int invoke( ScriptingFunction funcAddr , const BSONObj& args, int timeoutMs = 0 , bool ignoreReturn = 0 ) {
             return invoke( (JSFunction*)funcAddr , args , timeoutMs , ignoreReturn );
         }
 
-        void gotError( string s ){
+        void gotError( string s ) {
             _error = s;
         }
 
-        string getError(){
+        string getError() {
             return _error;
         }
 
-        void injectNative( const char *field, NativeFunction func ){
+        void injectNative( const char *field, NativeFunction func ) {
             smlock;
             string name = field;
             _convertor->setProperty( _global , (name + "_").c_str() , _convertor->toval( (double)(long long)func ) );
@@ -1482,19 +1522,19 @@ namespace mongo {
             stringstream code;
             code << field << "_" << " = { x : " << field << "_ }; ";
             code << field << " = function(){ return nativeHelper.apply( " << field << "_ , arguments ); }";
-            exec( code.str().c_str() );
+            exec( code.str() );
         }
 
-        virtual void gc(){
+        virtual void gc() {
             smlock;
             JS_GC( _context );
         }
 
         JSContext *SavedContext() const { return _context; }
-        
+
     private:
 
-        void _postCreateHacks(){
+        void _postCreateHacks() {
 #ifdef XULRUNNER
             exec( "__x__ = new Date(1);" );
             globalSMEngine->_dateClass = _convertor->getClass( _global , "__x__" );
@@ -1502,7 +1542,7 @@ namespace mongo {
             globalSMEngine->_regexClass = _convertor->getClass( _global , "__x__" );
 #endif
         }
-        
+
         JSContext * _context;
         Convertor * _convertor;
 
@@ -1514,41 +1554,41 @@ namespace mongo {
 
         bool _externalSetup;
         bool _localConnect;
-        
+
         set<string> _initFieldNames;
-        
+
     };
 
     /* used to make the logging not overly chatty in the mongo shell. */
     extern bool isShell;
 
-    void errorReporter( JSContext *cx, const char *message, JSErrorReport *report ){
+    void errorReporter( JSContext *cx, const char *message, JSErrorReport *report ) {
         stringstream ss;
-        if( !isShell ) 
+        if( !isShell )
             ss << "JS Error: ";
         ss << message;
 
-        if ( report && report->filename ){
+        if ( report && report->filename ) {
             ss << " " << report->filename << ":" << report->lineno;
         }
 
         tlog() << ss.str() << endl;
 
-        if ( currentScope.get() ){
+        if ( currentScope.get() ) {
             currentScope->gotError( ss.str() );
         }
     }
 
-    JSBool native_load( JSContext *cx , JSObject *obj , uintN argc, jsval *argv , jsval *rval ){
+    JSBool native_load( JSContext *cx , JSObject *obj , uintN argc, jsval *argv , jsval *rval ) {
         Convertor c(cx);
 
         Scope * s = currentScope.get();
 
-        for ( uintN i=0; i<argc; i++ ){
+        for ( uintN i=0; i<argc; i++ ) {
             string filename = c.toString( argv[i] );
             //cout << "load [" << filename << "]" << endl;
 
-            if ( ! s->execFile( filename , false , true , false ) ){
+            if ( ! s->execFile( filename , false , true , false ) ) {
                 JS_ReportError( cx , ((string)"error loading js file: " + filename ).c_str() );
                 return JS_FALSE;
             }
@@ -1559,7 +1599,7 @@ namespace mongo {
 
 
 
-    void SMEngine::runTest(){
+    void SMEngine::runTest() {
         SMScope s;
 
         s.localConnect( "foo" );
@@ -1589,17 +1629,17 @@ namespace mongo {
 
     }
 
-    Scope * SMEngine::createScope(){
+    Scope * SMEngine::createScope() {
         return new SMScope();
     }
 
-    void Convertor::addRoot( JSFunction * f , const char * name ){
+    void Convertor::addRoot( JSFunction * f , const char * name ) {
         if ( ! f )
             return;
 
         SMScope * scope = currentScope.get();
         uassert( 10229 ,  "need a scope" , scope );
-        
+
         JSObject * o = JS_GetFunctionObject( f );
         assert( o );
         scope->addRoot( &o , name );
diff --git a/scripting/engine_spidermonkey.h b/scripting/engine_spidermonkey.h
index 4617b5d..3ee7495 100644
--- a/scripting/engine_spidermonkey.h
+++ b/scripting/engine_spidermonkey.h
@@ -37,7 +37,7 @@
 #include "jstypes.h"
 #undef JS_PUBLIC_API
 #undef JS_PUBLIC_DATA
-#define JS_PUBLIC_API(t)    t __cdecl 
+#define JS_PUBLIC_API(t)    t __cdecl
 #define JS_PUBLIC_DATA(t)   t
 #endif
 
@@ -64,7 +64,7 @@
 
 #define JSCLASS_GLOBAL_FLAGS 0
 
-JSBool JS_CStringsAreUTF8(){
+JSBool JS_CStringsAreUTF8() {
     return false;
 }
 
@@ -85,7 +85,7 @@ namespace mongo {
 
     class SMScope;
     class Convertor;
-    
+
     extern JSClass bson_class;
     extern JSClass bson_ro_class;
 
@@ -99,10 +99,10 @@ namespace mongo {
     extern JSClass maxkey_class;
 
     // internal things
-    void dontDeleteScope( SMScope * s ){}
+    void dontDeleteScope( SMScope * s ) {}
     void errorReporter( JSContext *cx, const char *message, JSErrorReport *report );
     extern boost::thread_specific_ptr<SMScope> currentScope;
-    
+
     // bson
     JSBool resolveBSONField( JSContext *cx, JSObject *obj, jsval id, uintN flags, JSObject **objp );
 
@@ -112,14 +112,14 @@ namespace mongo {
     bool appendSpecialDBObject( Convertor * c , BSONObjBuilder& b , const string& name , jsval val , JSObject * o );
 
 #define JSVAL_IS_OID(v) ( JSVAL_IS_OBJECT( v ) && JS_InstanceOf( cx , JSVAL_TO_OBJECT( v ) , &object_id_class , 0 ) )
-    
+
     bool isDate( JSContext * cx , JSObject * o );
 
     // JS private data must be 2byte aligned, so we use a holder to refer to an unaligned pointer.
     struct BinDataHolder {
         BinDataHolder( const char *c, int copyLen = -1 ) :
-        c_( const_cast< char * >( c ) ),
-        iFree_( copyLen != -1 ) {
+            c_( const_cast< char * >( c ) ),
+            iFree_( copyLen != -1 ) {
             if ( copyLen != -1 ) {
                 c_ = (char*)malloc( copyLen );
                 memcpy( c_, c, copyLen );
diff --git a/scripting/engine_v8.cpp b/scripting/engine_v8.cpp
index 08826b1..cd186b4 100644
--- a/scripting/engine_v8.cpp
+++ b/scripting/engine_v8.cpp
@@ -1,4 +1,4 @@
-//engine_v8.cpp 
+//engine_v8.cpp
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -21,54 +21,74 @@
 #include "v8_utils.h"
 #include "v8_db.h"
 
-#define V8_SIMPLE_HEADER Locker l; HandleScope handle_scope; Context::Scope context_scope( _context );
+#define V8_SIMPLE_HEADER V8Lock l; HandleScope handle_scope; Context::Scope context_scope( _context );
 
 namespace mongo {
 
+    // guarded by v8 mutex
+    map< unsigned, int > __interruptSpecToThreadId;
+
     // --- engine ---
 
     V8ScriptEngine::V8ScriptEngine() {}
-    
-    V8ScriptEngine::~V8ScriptEngine(){
+
+    V8ScriptEngine::~V8ScriptEngine() {
     }
 
-    void ScriptEngine::setup(){
-        if ( !globalScriptEngine ){
+    void ScriptEngine::setup() {
+        if ( !globalScriptEngine ) {
             globalScriptEngine = new V8ScriptEngine();
         }
     }
 
+    void V8ScriptEngine::interrupt( unsigned opSpec ) {
+        v8::Locker l;
+        if ( __interruptSpecToThreadId.count( opSpec ) ) {
+            V8::TerminateExecution( __interruptSpecToThreadId[ opSpec ] );
+        }
+    }
+    void V8ScriptEngine::interruptAll() {
+        v8::Locker l;
+        vector< int > toKill; // v8 mutex could potentially be yielded during the termination call
+        for( map< unsigned, int >::const_iterator i = __interruptSpecToThreadId.begin(); i != __interruptSpecToThreadId.end(); ++i ) {
+            toKill.push_back( i->second );
+        }
+        for( vector< int >::const_iterator i = toKill.begin(); i != toKill.end(); ++i ) {
+            V8::TerminateExecution( *i );
+        }
+    }
+
     // --- scope ---
-    
-    V8Scope::V8Scope( V8ScriptEngine * engine ) 
-        : _engine( engine ) , 
-          _connectState( NOT ){
 
-        Locker l;
-        HandleScope handleScope;              
+    V8Scope::V8Scope( V8ScriptEngine * engine )
+        : _engine( engine ) ,
+          _connectState( NOT ) {
+
+        V8Lock l;
+        HandleScope handleScope;
         _context = Context::New();
         Context::Scope context_scope( _context );
         _global = Persistent< v8::Object >::New( _context->Global() );
 
         _this = Persistent< v8::Object >::New( v8::Object::New() );
 
-        _global->Set(v8::String::New("print"), v8::FunctionTemplate::New(Print)->GetFunction() );
-        _global->Set(v8::String::New("version"), v8::FunctionTemplate::New(Version)->GetFunction() );
+        _global->Set(v8::String::New("print"), newV8Function< Print >()->GetFunction() );
+        _global->Set(v8::String::New("version"), newV8Function< Version >()->GetFunction() );
 
         _global->Set(v8::String::New("load"),
-                     v8::FunctionTemplate::New(loadCallback, v8::External::New(this))->GetFunction() );
-        
+                     v8::FunctionTemplate::New( v8Callback< loadCallback >, v8::External::New(this))->GetFunction() );
+
         _wrapper = Persistent< v8::Function >::New( getObjectWrapperTemplate()->GetFunction() );
-        
-        _global->Set(v8::String::New("gc"), v8::FunctionTemplate::New(GCV8)->GetFunction() );
+
+        _global->Set(v8::String::New("gc"), newV8Function< GCV8 >()->GetFunction() );
 
 
         installDBTypes( _global );
     }
 
-    V8Scope::~V8Scope(){
-        Locker l;
-        Context::Scope context_scope( _context );        
+    V8Scope::~V8Scope() {
+        V8Lock l;
+        Context::Scope context_scope( _context );
         _wrapper.Dispose();
         _this.Dispose();
         for( unsigned i = 0; i < _funcs.size(); ++i )
@@ -79,7 +99,7 @@ namespace mongo {
     }
 
     Handle< Value > V8Scope::nativeCallback( const Arguments &args ) {
-        Locker l;
+        V8Lock l;
         HandleScope handle_scope;
         Local< External > f = External::Cast( *args.Callee()->Get( v8::String::New( "_native_function" ) ) );
         NativeFunction function = (NativeFunction)(f->Value());
@@ -93,16 +113,18 @@ namespace mongo {
         BSONObj ret;
         try {
             ret = function( nativeArgs );
-        } catch( const std::exception &e ) {
+        }
+        catch( const std::exception &e ) {
             return v8::ThrowException(v8::String::New(e.what()));
-        } catch( ... ) {
-            return v8::ThrowException(v8::String::New("unknown exception"));            
+        }
+        catch( ... ) {
+            return v8::ThrowException(v8::String::New("unknown exception"));
         }
         return handle_scope.Close( mongoToV8Element( ret.firstElement() ) );
     }
 
     Handle< Value > V8Scope::loadCallback( const Arguments &args ) {
-        Locker l;
+        V8Lock l;
         HandleScope handle_scope;
         Handle<External> field = Handle<External>::Cast(args.Data());
         void* ptr = field->Value();
@@ -120,46 +142,46 @@ namespace mongo {
 
     // ---- global stuff ----
 
-    void V8Scope::init( BSONObj * data ){
-        Locker l;
+    void V8Scope::init( const BSONObj * data ) {
+        V8Lock l;
         if ( ! data )
             return;
-        
+
         BSONObjIterator i( *data );
-        while ( i.more() ){
+        while ( i.more() ) {
             BSONElement e = i.next();
             setElement( e.fieldName() , e );
         }
     }
-    
-    void V8Scope::setNumber( const char * field , double val ){
+
+    void V8Scope::setNumber( const char * field , double val ) {
         V8_SIMPLE_HEADER
         _global->Set( v8::String::New( field ) , v8::Number::New( val ) );
     }
 
-    void V8Scope::setString( const char * field , const char * val ){
+    void V8Scope::setString( const char * field , const char * val ) {
         V8_SIMPLE_HEADER
         _global->Set( v8::String::New( field ) , v8::String::New( val ) );
     }
 
-    void V8Scope::setBoolean( const char * field , bool val ){
+    void V8Scope::setBoolean( const char * field , bool val ) {
         V8_SIMPLE_HEADER
         _global->Set( v8::String::New( field ) , v8::Boolean::New( val ) );
     }
 
-    void V8Scope::setElement( const char *field , const BSONElement& e ){ 
+    void V8Scope::setElement( const char *field , const BSONElement& e ) {
         V8_SIMPLE_HEADER
         _global->Set( v8::String::New( field ) , mongoToV8Element( e ) );
     }
 
-    void V8Scope::setObject( const char *field , const BSONObj& obj , bool readOnly){
+    void V8Scope::setObject( const char *field , const BSONObj& obj , bool readOnly) {
         V8_SIMPLE_HEADER
         // Set() accepts a ReadOnly parameter, but this just prevents the field itself
         // from being overwritten and doesn't protect the object stored in 'field'.
         _global->Set( v8::String::New( field ) , mongoToV8( obj, false, readOnly) );
     }
 
-    int V8Scope::type( const char *field ){
+    int V8Scope::type( const char *field ) {
         V8_SIMPLE_HEADER
         Handle<Value> v = get( field );
         if ( v->IsNull() )
@@ -178,7 +200,7 @@ namespace mongo {
             return NumberInt;
         if ( v->IsNumber() )
             return NumberDouble;
-        if ( v->IsExternal() ){
+        if ( v->IsExternal() ) {
             uassert( 10230 ,  "can't handle external yet" , 0 );
             return -1;
         }
@@ -190,36 +212,36 @@ namespace mongo {
         throw UserException( 12509, (string)"don't know what this is: " + field );
     }
 
-    v8::Handle<v8::Value> V8Scope::get( const char * field ){
+    v8::Handle<v8::Value> V8Scope::get( const char * field ) {
         return _global->Get( v8::String::New( field ) );
     }
 
-    double V8Scope::getNumber( const char *field ){ 
+    double V8Scope::getNumber( const char *field ) {
         V8_SIMPLE_HEADER
         return get( field )->ToNumber()->Value();
     }
 
-    int V8Scope::getNumberInt( const char *field ){
+    int V8Scope::getNumberInt( const char *field ) {
         V8_SIMPLE_HEADER
         return get( field )->ToInt32()->Value();
     }
 
-    long long V8Scope::getNumberLongLong( const char *field ){ 
+    long long V8Scope::getNumberLongLong( const char *field ) {
         V8_SIMPLE_HEADER
         return get( field )->ToInteger()->Value();
     }
 
-    string V8Scope::getString( const char *field ){ 
+    string V8Scope::getString( const char *field ) {
         V8_SIMPLE_HEADER
         return toSTLString( get( field ) );
     }
 
-    bool V8Scope::getBoolean( const char *field ){ 
+    bool V8Scope::getBoolean( const char *field ) {
         V8_SIMPLE_HEADER
         return get( field )->ToBoolean()->Value();
     }
-    
-    BSONObj V8Scope::getObject( const char * field ){
+
+    BSONObj V8Scope::getObject( const char * field ) {
         V8_SIMPLE_HEADER
         Handle<Value> v = get( field );
         if ( v->IsNull() || v->IsUndefined() )
@@ -227,21 +249,28 @@ namespace mongo {
         uassert( 10231 ,  "not an object" , v->IsObject() );
         return v8ToMongo( v->ToObject() );
     }
-    
+
     // --- functions -----
 
-    Local< v8::Function > V8Scope::__createFunction( const char * raw ){
-        for(; isspace( *raw ); ++raw ); // skip whitespace
+    bool hasFunctionIdentifier( const string& code ) {
+        if ( code.size() < 9 || code.find( "function" ) != 0  )
+            return false;
+
+        return code[8] == ' ' || code[8] == '(';
+    }
+
+    Local< v8::Function > V8Scope::__createFunction( const char * raw ) {
+        raw = jsSkipWhiteSpace( raw );
         string code = raw;
-        if ( code.find( "function" ) == string::npos ){
-            if ( code.find( "\n" ) == string::npos && 
-                 ! hasJSReturn( code ) && 
-                 ( code.find( ";" ) == string::npos || code.find( ";" ) == code.size() - 1 ) ){
+        if ( !hasFunctionIdentifier( code ) ) {
+            if ( code.find( "\n" ) == string::npos &&
+                    ! hasJSReturn( code ) &&
+                    ( code.find( ";" ) == string::npos || code.find( ";" ) == code.size() - 1 ) ) {
                 code = "return " + code;
             }
             code = "function(){ " + code + "}";
         }
-        
+
         int num = _funcs.size() + 1;
 
         string fn;
@@ -250,29 +279,30 @@ namespace mongo {
             ss << "_funcs" << num;
             fn = ss.str();
         }
-        
+
         code = fn + " = " + code;
 
         TryCatch try_catch;
-        Handle<Script> script = v8::Script::Compile( v8::String::New( code.c_str() ) , 
-                                                     v8::String::New( fn.c_str() ) );
-        if ( script.IsEmpty() ){
+        // this might be time consuming, consider allowing an interrupt
+        Handle<Script> script = v8::Script::Compile( v8::String::New( code.c_str() ) ,
+                                v8::String::New( fn.c_str() ) );
+        if ( script.IsEmpty() ) {
             _error = (string)"compile error: " + toSTLString( &try_catch );
             log() << _error << endl;
             return Local< v8::Function >();
         }
-        
+
         Local<Value> result = script->Run();
-        if ( result.IsEmpty() ){
+        if ( result.IsEmpty() ) {
             _error = (string)"compile error: " + toSTLString( &try_catch );
             log() << _error << endl;
             return Local< v8::Function >();
-        }        
-     
+        }
+
         return v8::Function::Cast( *_global->Get( v8::String::New( fn.c_str() ) ) );
     }
-    
-    ScriptingFunction V8Scope::_createFunction( const char * raw ){
+
+    ScriptingFunction V8Scope::_createFunction( const char * raw ) {
         V8_SIMPLE_HEADER
         Local< Value > ret = __createFunction( raw );
         if ( ret.IsEmpty() )
@@ -284,9 +314,9 @@ namespace mongo {
         return num;
     }
 
-    void V8Scope::setThis( const BSONObj * obj ){
+    void V8Scope::setThis( const BSONObj * obj ) {
         V8_SIMPLE_HEADER
-        if ( ! obj ){
+        if ( ! obj ) {
             _this = Persistent< v8::Object >::New( v8::Object::New() );
             return;
         }
@@ -296,57 +326,80 @@ namespace mongo {
         argv[0] = v8::External::New( createWrapperHolder( obj , true , false ) );
         _this = Persistent< v8::Object >::New( _wrapper->NewInstance( 1, argv ) );
     }
-    
-    int V8Scope::invoke( ScriptingFunction func , const BSONObj& argsObject, int timeoutMs , bool ignoreReturn ){
+
+    void V8Scope::rename( const char * from , const char * to ) {
+        V8_SIMPLE_HEADER;
+        v8::Local<v8::String> f = v8::String::New( from );
+        v8::Local<v8::String> t = v8::String::New( to );
+        _global->Set( t , _global->Get( f ) );
+        _global->Set( f , v8::Undefined() );
+    }
+
+    int V8Scope::invoke( ScriptingFunction func , const BSONObj& argsObject, int timeoutMs , bool ignoreReturn ) {
         V8_SIMPLE_HEADER
         Handle<Value> funcValue = _funcs[func-1];
-        
-        TryCatch try_catch;        
+
+        TryCatch try_catch;
         int nargs = argsObject.nFields();
         scoped_array< Handle<Value> > args;
-        if ( nargs ){
+        if ( nargs ) {
             args.reset( new Handle<Value>[nargs] );
             BSONObjIterator it( argsObject );
-            for ( int i=0; i<nargs; i++ ){
+            for ( int i=0; i<nargs; i++ ) {
                 BSONElement next = it.next();
                 args[i] = mongoToV8Element( next );
             }
             setObject( "args", argsObject, true ); // for backwards compatibility
-        } else {
+        }
+        else {
             _global->Set( v8::String::New( "args" ), v8::Undefined() );
         }
+        if ( globalScriptEngine->interrupted() ) {
+            stringstream ss;
+            ss << "error in invoke: " << globalScriptEngine->checkInterrupt();
+            _error = ss.str();
+            log() << _error << endl;
+            return 1;
+        }
+        enableV8Interrupt(); // because of v8 locker we can check interrupted, then enable
         Local<Value> result = ((v8::Function*)(*funcValue))->Call( _this , nargs , args.get() );
-                
-        if ( result.IsEmpty() ){
+        disableV8Interrupt();
+
+        if ( result.IsEmpty() ) {
             stringstream ss;
-            ss << "error in invoke: " << toSTLString( &try_catch );
+            if ( try_catch.HasCaught() && !try_catch.CanContinue() ) {
+                ss << "error in invoke: " << globalScriptEngine->checkInterrupt();
+            }
+            else {
+                ss << "error in invoke: " << toSTLString( &try_catch );
+            }
             _error = ss.str();
             log() << _error << endl;
             return 1;
         }
 
-        if ( ! ignoreReturn ){
+        if ( ! ignoreReturn ) {
             _global->Set( v8::String::New( "return" ) , result );
         }
 
         return 0;
     }
 
-    bool V8Scope::exec( const string& code , const string& name , bool printResult , bool reportError , bool assertOnError, int timeoutMs ){
-        if ( timeoutMs ){
+    bool V8Scope::exec( const StringData& code , const string& name , bool printResult , bool reportError , bool assertOnError, int timeoutMs ) {
+        if ( timeoutMs ) {
             static bool t = 1;
-            if ( t ){
-                log() << "timeoutMs not support for v8 yet" << endl;
+            if ( t ) {
+                log() << "timeoutMs not support for v8 yet  code: " << code << endl;
                 t = 0;
             }
         }
-        
+
         V8_SIMPLE_HEADER
-        
+
         TryCatch try_catch;
-    
-        Handle<Script> script = v8::Script::Compile( v8::String::New( code.c_str() ) , 
-                                                     v8::String::New( name.c_str() ) );
+
+        Handle<Script> script = v8::Script::Compile( v8::String::New( code.data() ) ,
+                                v8::String::New( name.c_str() ) );
         if (script.IsEmpty()) {
             stringstream ss;
             ss << "compile error: " << toSTLString( &try_catch );
@@ -356,65 +409,87 @@ namespace mongo {
             if ( assertOnError )
                 uassert( 10233 ,  _error , 0 );
             return false;
-        } 
-    
+        }
+
+        if ( globalScriptEngine->interrupted() ) {
+            _error = (string)"exec error: " + globalScriptEngine->checkInterrupt();
+            if ( reportError ) {
+                log() << _error << endl;
+            }
+            if ( assertOnError ) {
+                uassert( 13475 ,  _error , 0 );
+            }
+            return false;
+        }
+        enableV8Interrupt(); // because of v8 locker we can check interrupted, then enable
         Handle<v8::Value> result = script->Run();
-        if ( result.IsEmpty() ){
-            _error = (string)"exec error: " + toSTLString( &try_catch );
+        disableV8Interrupt();
+        if ( result.IsEmpty() ) {
+            if ( try_catch.HasCaught() && !try_catch.CanContinue() ) {
+                _error = (string)"exec error: " + globalScriptEngine->checkInterrupt();
+            }
+            else {
+                _error = (string)"exec error: " + toSTLString( &try_catch );
+            }
             if ( reportError )
                 log() << _error << endl;
             if ( assertOnError )
                 uassert( 10234 ,  _error , 0 );
             return false;
-        } 
-        
+        }
+
         _global->Set( v8::String::New( "__lastres__" ) , result );
 
-        if ( printResult && ! result->IsUndefined() ){
+        if ( printResult && ! result->IsUndefined() ) {
             cout << toSTLString( result ) << endl;
         }
-        
+
         return true;
     }
-    
-    void V8Scope::injectNative( const char *field, NativeFunction func ){
+
+    void V8Scope::injectNative( const char *field, NativeFunction func ) {
         V8_SIMPLE_HEADER
-        
-        Handle< FunctionTemplate > f( v8::FunctionTemplate::New( nativeCallback ) );
+
+        Handle< FunctionTemplate > f( newV8Function< nativeCallback >() );
         f->Set( v8::String::New( "_native_function" ), External::New( (void*)func ) );
         _global->Set( v8::String::New( field ), f->GetFunction() );
-    }        
-    
+    }
+
     void V8Scope::gc() {
         cout << "in gc" << endl;
-        Locker l;
-        while( V8::IdleNotification() );
+        V8Lock l;
+        while( !V8::IdleNotification() );
     }
 
     // ----- db access -----
 
-    void V8Scope::localConnect( const char * dbName ){
-        V8_SIMPLE_HEADER
+    void V8Scope::localConnect( const char * dbName ) {
+        {
+            V8_SIMPLE_HEADER
+
+            if ( _connectState == EXTERNAL )
+                throw UserException( 12510, "externalSetup already called, can't call externalSetup" );
+            if ( _connectState ==  LOCAL ) {
+                if ( _localDBName == dbName )
+                    return;
+                throw UserException( 12511, "localConnect called with a different name previously" );
+            }
 
-        if ( _connectState == EXTERNAL )
-            throw UserException( 12510, "externalSetup already called, can't call externalSetup" );
-        if ( _connectState ==  LOCAL ){
-            if ( _localDBName == dbName )
-                return;
-            throw UserException( 12511, "localConnect called with a different name previously" );
-        }
+            // needed for killop / interrupt support
+            v8::Locker::StartPreemption( 50 );
 
-        //_global->Set( v8::String::New( "Mongo" ) , _engine->_externalTemplate->GetFunction() );
-        _global->Set( v8::String::New( "Mongo" ) , getMongoFunctionTemplate( true )->GetFunction() );
-        exec( jsconcatcode , "localConnect 1" , false , true , true , 0 );
-        exec( "_mongo = new Mongo();" , "local connect 2" , false , true , true , 0 );
-        exec( (string)"db = _mongo.getDB(\"" + dbName + "\");" , "local connect 3" , false , true , true , 0 );
-        _connectState = LOCAL;
-        _localDBName = dbName;
+            //_global->Set( v8::String::New( "Mongo" ) , _engine->_externalTemplate->GetFunction() );
+            _global->Set( v8::String::New( "Mongo" ) , getMongoFunctionTemplate( true )->GetFunction() );
+            execCoreFiles();
+            exec( "_mongo = new Mongo();" , "local connect 2" , false , true , true , 0 );
+            exec( (string)"db = _mongo.getDB(\"" + dbName + "\");" , "local connect 3" , false , true , true , 0 );
+            _connectState = LOCAL;
+            _localDBName = dbName;
+        }
         loadStored();
     }
-    
-    void V8Scope::externalSetup(){
+
+    void V8Scope::externalSetup() {
         V8_SIMPLE_HEADER
         if ( _connectState == EXTERNAL )
             return;
@@ -423,18 +498,18 @@ namespace mongo {
 
         installFork( _global, _context );
         _global->Set( v8::String::New( "Mongo" ) , getMongoFunctionTemplate( false )->GetFunction() );
-        exec( jsconcatcode , "shell setup" , false , true , true , 0 );
+        execCoreFiles();
         _connectState = EXTERNAL;
     }
 
     // ----- internal -----
 
-    void V8Scope::reset(){
+    void V8Scope::reset() {
         _startCall();
     }
 
-    void V8Scope::_startCall(){
+    void V8Scope::_startCall() {
         _error = "";
     }
-    
+
 } // namespace mongo
diff --git a/scripting/engine_v8.h b/scripting/engine_v8.h
index 9d86d92..c770955 100644
--- a/scripting/engine_v8.h
+++ b/scripting/engine_v8.h
@@ -19,6 +19,7 @@
 
 #include <vector>
 #include "engine.h"
+#include "v8_db.h"
 #include <v8.h>
 
 using namespace v8;
@@ -26,19 +27,19 @@ using namespace v8;
 namespace mongo {
 
     class V8ScriptEngine;
-    
+
     class V8Scope : public Scope {
     public:
-        
+
         V8Scope( V8ScriptEngine * engine );
         ~V8Scope();
-        
+
         virtual void reset();
-        virtual void init( BSONObj * data );
+        virtual void init( const BSONObj * data );
 
         virtual void localConnect( const char * dbName );
         virtual void externalSetup();
-        
+
         v8::Handle<v8::Value> get( const char * field ); // caller must create context and handle scopes
         virtual double getNumber( const char *field );
         virtual int getNumberInt( const char *field );
@@ -46,7 +47,7 @@ namespace mongo {
         virtual string getString( const char *field );
         virtual bool getBoolean( const char *field );
         virtual BSONObj getObject( const char *field );
-        
+
         virtual int type( const char *field );
 
         virtual void setNumber( const char *field , double val );
@@ -55,22 +56,24 @@ namespace mongo {
         virtual void setElement( const char *field , const BSONElement& e );
         virtual void setObject( const char *field , const BSONObj& obj , bool readOnly);
         virtual void setThis( const BSONObj * obj );
-        
+
+        virtual void rename( const char * from , const char * to );
+
         virtual ScriptingFunction _createFunction( const char * code );
         Local< v8::Function > __createFunction( const char * code );
         virtual int invoke( ScriptingFunction func , const BSONObj& args, int timeoutMs = 0 , bool ignoreReturn = false );
-        virtual bool exec( const string& code , const string& name , bool printResult , bool reportError , bool assertOnError, int timeoutMs );
-        virtual string getError(){ return _error; }
-        
+        virtual bool exec( const StringData& code , const string& name , bool printResult , bool reportError , bool assertOnError, int timeoutMs );
+        virtual string getError() { return _error; }
+
         virtual void injectNative( const char *field, NativeFunction func );
 
         void gc();
 
         Handle< Context > context() const { return _context; }
-        
+
     private:
         void _startCall();
-        
+
         static Handle< Value > nativeCallback( const Arguments &args );
 
         static Handle< Value > loadCallback( const Arguments &args );
@@ -89,28 +92,32 @@ namespace mongo {
         enum ConnectState { NOT , LOCAL , EXTERNAL };
         ConnectState _connectState;
     };
-    
+
     class V8ScriptEngine : public ScriptEngine {
     public:
         V8ScriptEngine();
         virtual ~V8ScriptEngine();
-        
-        virtual Scope * createScope(){ return new V8Scope( this ); }
-        
-        virtual void runTest(){}
+
+        virtual Scope * createScope() { return new V8Scope( this ); }
+
+        virtual void runTest() {}
 
         bool utf8Ok() const { return true; }
 
-        class V8Unlocker : public Unlocker {
-            v8::Unlocker u_;
+        class V8UnlockForClient : public Unlocker {
+            V8Unlock u_;
         };
-        
-        virtual auto_ptr<Unlocker> newThreadUnlocker() { return auto_ptr< Unlocker >( new V8Unlocker ); }
-        
+
+        virtual auto_ptr<Unlocker> newThreadUnlocker() { return auto_ptr< Unlocker >( new V8UnlockForClient ); }
+
+        virtual void interrupt( unsigned opSpec );
+        virtual void interruptAll();
+
     private:
         friend class V8Scope;
     };
-    
-    
+
+
     extern ScriptEngine * globalScriptEngine;
+    extern map< unsigned, int > __interruptSpecToThreadId;
 }
diff --git a/scripting/sm_db.cpp b/scripting/sm_db.cpp
index 8ba612b..4c9d541 100644
--- a/scripting/sm_db.cpp
+++ b/scripting/sm_db.cpp
@@ -34,15 +34,15 @@ namespace mongo {
     bool haveLocalShardingInfo( const string& ns );
 
     // ------------    some defs needed ---------------
-    
+
     JSObject * doCreateCollection( JSContext * cx , JSObject * db , const string& shortName );
-        
+
     // ------------     utils          ------------------
-         
 
-    bool isSpecialName( const string& name ){
+
+    bool isSpecialName( const string& name ) {
         static set<string> names;
-        if ( names.size() == 0 ){
+        if ( names.size() == 0 ) {
             names.insert( "tojson" );
             names.insert( "toJson" );
             names.insert( "toString" );
@@ -50,10 +50,10 @@ namespace mongo {
 
         if ( name.length() == 0 )
             return false;
-        
+
         if ( name[0] == '_' )
             return true;
-        
+
         return names.count( name ) > 0;
     }
 
@@ -63,8 +63,8 @@ namespace mongo {
     class CursorHolder {
     public:
         CursorHolder( auto_ptr< DBClientCursor > &cursor, const shared_ptr< DBClientWithCommands > &connection ) :
-        connection_( connection ),
-        cursor_( cursor ) {
+            connection_( connection ),
+            cursor_( cursor ) {
             assert( cursor_.get() );
         }
         DBClientCursor *get() const { return cursor_.get(); }
@@ -72,60 +72,60 @@ namespace mongo {
         shared_ptr< DBClientWithCommands > connection_;
         auto_ptr< DBClientCursor > cursor_;
     };
-    
+
     DBClientCursor *getCursor( JSContext *cx, JSObject *obj ) {
         CursorHolder * holder = (CursorHolder*)JS_GetPrivate( cx , obj );
         uassert( 10235 ,  "no cursor!" , holder );
         return holder->get();
     }
-    
-    JSBool internal_cursor_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+
+    JSBool internal_cursor_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         uassert( 10236 ,  "no args to internal_cursor_constructor" , argc == 0 );
         assert( JS_SetPrivate( cx , obj , 0 ) ); // just for safety
         return JS_TRUE;
     }
 
-    void internal_cursor_finalize( JSContext * cx , JSObject * obj ){
+    void internal_cursor_finalize( JSContext * cx , JSObject * obj ) {
         CursorHolder * holder = (CursorHolder*)JS_GetPrivate( cx , obj );
-        if ( holder ){
+        if ( holder ) {
             delete holder;
             assert( JS_SetPrivate( cx , obj , 0 ) );
         }
     }
 
-    JSBool internal_cursor_hasNext(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){
+    JSBool internal_cursor_hasNext(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         DBClientCursor *cursor = getCursor( cx, obj );
         try {
             *rval = cursor->more() ? JSVAL_TRUE : JSVAL_FALSE;
         }
-        catch ( std::exception& e ){
+        catch ( std::exception& e ) {
             JS_ReportError( cx , e.what() );
             return JS_FALSE;
         }
         return JS_TRUE;
     }
 
-    JSBool internal_cursor_objsLeftInBatch(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){
+    JSBool internal_cursor_objsLeftInBatch(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         DBClientCursor *cursor = getCursor( cx, obj );
         Convertor c(cx);
         *rval = c.toval((double) cursor->objsLeftInBatch() );
         return JS_TRUE;
     }
 
-    JSBool internal_cursor_next(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){
+    JSBool internal_cursor_next(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         DBClientCursor *cursor = getCursor( cx, obj );
 
-        BSONObj n;        
-        
+        BSONObj n;
+
         try {
-            if ( ! cursor->more() ){
+            if ( ! cursor->more() ) {
                 JS_ReportError( cx , "cursor at the end" );
                 return JS_FALSE;
             }
 
             n = cursor->next();
         }
-        catch ( std::exception& e ){
+        catch ( std::exception& e ) {
             JS_ReportError( cx , e.what() );
             return JS_FALSE;
         }
@@ -149,15 +149,15 @@ namespace mongo {
         JSCLASS_NO_OPTIONAL_MEMBERS
     };
 
-    
+
     // ------ mongo stuff ------
 
-    JSBool mongo_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+    JSBool mongo_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         uassert( 10237 ,  "mongo_constructor not implemented yet" , 0 );
         throw -1;
     }
-    
-    JSBool mongo_local_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+
+    JSBool mongo_local_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         Convertor c( cx );
 
         shared_ptr< DBClientWithCommands > client( createDirectClient() );
@@ -169,29 +169,29 @@ namespace mongo {
         return JS_TRUE;
     }
 
-    JSBool mongo_external_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+    JSBool mongo_external_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         Convertor c( cx );
-        
+
         smuassert( cx ,  "0 or 1 args to Mongo" , argc <= 1 );
-        
+
         string host = "127.0.0.1";
         if ( argc > 0 )
             host = c.toString( argv[0] );
-        
+
         string errmsg;
 
         ConnectionString cs = ConnectionString::parse( host , errmsg );
-        if ( ! cs.isValid() ){
+        if ( ! cs.isValid() ) {
             JS_ReportError( cx , errmsg.c_str() );
             return JS_FALSE;
         }
 
         shared_ptr< DBClientWithCommands > conn( cs.connect( errmsg ) );
-        if ( ! conn ){
+        if ( ! conn ) {
             JS_ReportError( cx , errmsg.c_str() );
             return JS_FALSE;
         }
-        
+
         ScriptEngine::runConnectCallback( *conn );
 
         assert( JS_SetPrivate( cx , obj , (void*)( new shared_ptr< DBClientWithCommands >( conn ) ) ) );
@@ -206,10 +206,10 @@ namespace mongo {
         uassert( 10239 ,  "no connection!" , connHolder && connHolder->get() );
         return connHolder->get();
     }
-    
-    void mongo_finalize( JSContext * cx , JSObject * obj ){
+
+    void mongo_finalize( JSContext * cx , JSObject * obj ) {
         shared_ptr< DBClientWithCommands > * connHolder = (shared_ptr< DBClientWithCommands >*)JS_GetPrivate( cx , obj );
-        if ( connHolder ){
+        if ( connHolder ) {
             delete connHolder;
             assert( JS_SetPrivate( cx , obj , 0 ) );
         }
@@ -220,30 +220,31 @@ namespace mongo {
         JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
         JS_EnumerateStub, JS_ResolveStub , JS_ConvertStub, mongo_finalize,
         JSCLASS_NO_OPTIONAL_MEMBERS
-     };
+    };
 
-    JSBool mongo_find(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){
-        smuassert( cx , "mongo_find needs 6 args" , argc == 6 );
+    JSBool mongo_find(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
+        smuassert( cx , "mongo_find needs 7 args" , argc == 7 );
         shared_ptr< DBClientWithCommands > * connHolder = (shared_ptr< DBClientWithCommands >*)JS_GetPrivate( cx , obj );
         smuassert( cx ,  "no connection!" , connHolder && connHolder->get() );
         DBClientWithCommands *conn = connHolder->get();
-                      
+
         Convertor c( cx );
 
         string ns = c.toString( argv[0] );
-        
+
         BSONObj q = c.toObject( argv[1] );
         BSONObj f = c.toObject( argv[2] );
-        
+
         int nToReturn = (int) c.toNumber( argv[3] );
         int nToSkip = (int) c.toNumber( argv[4] );
         bool slaveOk = c.getBoolean( obj , "slaveOk" );
         int batchSize = (int) c.toNumber( argv[5] );
+        int options = (int)c.toNumber( argv[6] );
 
         try {
 
-            auto_ptr<DBClientCursor> cursor = conn->query( ns , q , nToReturn , nToSkip , f.nFields() ? &f : 0  , slaveOk ? QueryOption_SlaveOk : 0 , batchSize );
-            if ( ! cursor.get() ){
+            auto_ptr<DBClientCursor> cursor = conn->query( ns , q , nToReturn , nToSkip , f.nFields() ? &f : 0  , options | ( slaveOk ? QueryOption_SlaveOk : 0 ) , batchSize );
+            if ( ! cursor.get() ) {
                 log() << "query failed : " << ns << " " << q << " to: " << conn->toString() << endl;
                 JS_ReportError( cx , "error doing query: failed" );
                 return JS_FALSE;
@@ -254,19 +255,19 @@ namespace mongo {
             *rval = OBJECT_TO_JSVAL( mycursor );
             return JS_TRUE;
         }
-        catch ( ... ){
+        catch ( ... ) {
             JS_ReportError( cx , "error doing query: unknown" );
             return JS_FALSE;
         }
     }
 
-    JSBool mongo_update(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){
+    JSBool mongo_update(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         smuassert( cx ,  "mongo_find needs at elast 3 args" , argc >= 3 );
         smuassert( cx ,  "2nd param to update has to be an object" , JSVAL_IS_OBJECT( argv[1] ) );
         smuassert( cx ,  "3rd param to update has to be an object" , JSVAL_IS_OBJECT( argv[2] ) );
 
         Convertor c( cx );
-        if ( c.getBoolean( obj , "readOnly" ) ){
+        if ( c.getBoolean( obj , "readOnly" ) ) {
             JS_ReportError( cx , "js db in read only mode - mongo_update" );
             return JS_FALSE;
         }
@@ -283,76 +284,80 @@ namespace mongo {
             conn->update( ns , c.toObject( argv[1] ) , c.toObject( argv[2] ) , upsert , multi );
             return JS_TRUE;
         }
-        catch ( ... ){
+        catch ( ... ) {
             JS_ReportError( cx , "error doing update" );
             return JS_FALSE;
         }
     }
 
-    JSBool mongo_insert(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+    JSBool mongo_insert(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         smuassert( cx ,  "mongo_insert needs 2 args" , argc == 2 );
         smuassert( cx ,  "2nd param to insert has to be an object" , JSVAL_IS_OBJECT( argv[1] ) );
 
         Convertor c( cx );
-        if ( c.getBoolean( obj , "readOnly" ) ){
+        if ( c.getBoolean( obj , "readOnly" ) ) {
             JS_ReportError( cx , "js db in read only mode - mongo_insert" );
             return JS_FALSE;
         }
-        
+
         DBClientWithCommands * conn = getConnection( cx, obj );
         uassert( 10248 ,  "no connection!" , conn );
-        
-        
+
         string ns = c.toString( argv[0] );
-        BSONObj o = c.toObject( argv[1] );
 
-        // TODO: add _id
-        
         try {
+            BSONObj o = c.toObject( argv[1] );
+            // TODO: add _id
+
             conn->insert( ns , o );
             return JS_TRUE;
         }
-        catch ( std::exception& e ){
+        catch ( std::exception& e ) {
             stringstream ss;
             ss << "error doing insert:" << e.what();
             string s = ss.str();
             JS_ReportError( cx , s.c_str() );
             return JS_FALSE;
         }
-        catch ( ... ){
+        catch ( ... ) {
             JS_ReportError( cx , "error doing insert" );
             return JS_FALSE;
         }
     }
 
-    JSBool mongo_remove(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+    JSBool mongo_remove(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         smuassert( cx ,  "mongo_remove needs 2 or 3 arguments" , argc == 2 || argc == 3 );
         smuassert( cx ,  "2nd param to insert has to be an object" , JSVAL_IS_OBJECT( argv[1] ) );
 
         Convertor c( cx );
-        if ( c.getBoolean( obj , "readOnly" ) ){
+        if ( c.getBoolean( obj , "readOnly" ) ) {
             JS_ReportError( cx , "js db in read only mode - mongo_remove" );
             return JS_FALSE;
         }
 
         DBClientWithCommands * conn = getConnection( cx, obj );
         uassert( 10251 ,  "no connection!" , conn );
-        
+
         string ns = c.toString( argv[0] );
         BSONObj o = c.toObject( argv[1] );
         bool justOne = false;
         if ( argc > 2 )
             justOne = c.toBoolean( argv[2] );
-        
+
         try {
             conn->remove( ns , o , justOne );
             return JS_TRUE;
         }
-        catch ( ... ){
-            JS_ReportError( cx , "error doing remove" );
+        catch ( std::exception& e ) {
+            JS_ReportError( cx , e.what() );
             return JS_FALSE;
         }
         
+        catch ( ... ) {
+            JS_ReportError( cx , "error doing remove" );
+            return JS_FALSE;
+        }
+
     }
 
     JSFunctionSpec mongo_functions[] = {
@@ -363,93 +368,93 @@ namespace mongo {
         { 0 }
     };
 
-     // -------------  db_collection -------------
-
-     JSBool db_collection_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){    
-         smuassert( cx ,  "db_collection_constructor wrong args" , argc == 4 );
-         assert( JS_SetProperty( cx , obj , "_mongo" , &(argv[0]) ) );
-         assert( JS_SetProperty( cx , obj , "_db" , &(argv[1]) ) );
-         assert( JS_SetProperty( cx , obj , "_shortName" , &(argv[2]) ) );
-         assert( JS_SetProperty( cx , obj , "_fullName" , &(argv[3]) ) );
-         
-         Convertor c(cx);
-         if ( haveLocalShardingInfo( c.toString( argv[3] ) ) ){
-             JS_ReportError( cx , "can't use sharded collection from db.eval" );
-             return JS_FALSE;
-         }
-         
-         return JS_TRUE;
-     }
-
-     JSBool db_collection_resolve( JSContext *cx, JSObject *obj, jsval id, uintN flags, JSObject **objp ){
-         if ( flags & JSRESOLVE_ASSIGNING )
-             return JS_TRUE;
-         
-         Convertor c( cx );
-         string collname = c.toString( id );
-
-         if ( isSpecialName( collname ) )
-             return JS_TRUE;
-         
-         if ( obj == c.getGlobalPrototype( "DBCollection" ) )
-             return JS_TRUE;
-         
-         JSObject * proto = JS_GetPrototype( cx , obj );
-         if ( c.hasProperty( obj , collname.c_str() ) || ( proto && c.hasProperty( proto , collname.c_str() )  ) )
-             return JS_TRUE;
-         
-         string name = c.toString( c.getProperty( obj , "_shortName" ) );
-         name += ".";
-         name += collname;
-         
-         jsval db = c.getProperty( obj , "_db" );
-         if ( ! JSVAL_IS_OBJECT( db ) )
-             return JS_TRUE;
-
-         JSObject * coll = doCreateCollection( cx , JSVAL_TO_OBJECT( db ) , name );
-         if ( ! coll )
-             return JS_FALSE;
-         c.setProperty( obj , collname.c_str() , OBJECT_TO_JSVAL( coll ) );
-         *objp = obj;
-         return JS_TRUE;
-     }
+    // -------------  db_collection -------------
+
+    JSBool db_collection_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
+        smuassert( cx ,  "db_collection_constructor wrong args" , argc == 4 );
+        assert( JS_SetProperty( cx , obj , "_mongo" , &(argv[0]) ) );
+        assert( JS_SetProperty( cx , obj , "_db" , &(argv[1]) ) );
+        assert( JS_SetProperty( cx , obj , "_shortName" , &(argv[2]) ) );
+        assert( JS_SetProperty( cx , obj , "_fullName" , &(argv[3]) ) );
+
+        Convertor c(cx);
+        if ( haveLocalShardingInfo( c.toString( argv[3] ) ) ) {
+            JS_ReportError( cx , "can't use sharded collection from db.eval" );
+            return JS_FALSE;
+        }
+
+        return JS_TRUE;
+    }
+
+    JSBool db_collection_resolve( JSContext *cx, JSObject *obj, jsval id, uintN flags, JSObject **objp ) {
+        if ( flags & JSRESOLVE_ASSIGNING )
+            return JS_TRUE;
+
+        Convertor c( cx );
+        string collname = c.toString( id );
+
+        if ( isSpecialName( collname ) )
+            return JS_TRUE;
+
+        if ( obj == c.getGlobalPrototype( "DBCollection" ) )
+            return JS_TRUE;
+
+        JSObject * proto = JS_GetPrototype( cx , obj );
+        if ( c.hasProperty( obj , collname.c_str() ) || ( proto && c.hasProperty( proto , collname.c_str() )  ) )
+            return JS_TRUE;
+
+        string name = c.toString( c.getProperty( obj , "_shortName" ) );
+        name += ".";
+        name += collname;
+
+        jsval db = c.getProperty( obj , "_db" );
+        if ( ! JSVAL_IS_OBJECT( db ) )
+            return JS_TRUE;
+
+        JSObject * coll = doCreateCollection( cx , JSVAL_TO_OBJECT( db ) , name );
+        if ( ! coll )
+            return JS_FALSE;
+        c.setProperty( obj , collname.c_str() , OBJECT_TO_JSVAL( coll ) );
+        *objp = obj;
+        return JS_TRUE;
+    }
 
     JSClass db_collection_class = {
-         "DBCollection" , JSCLASS_HAS_PRIVATE | JSCLASS_NEW_RESOLVE , 
-         JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
-         JS_EnumerateStub, (JSResolveOp)(&db_collection_resolve) , JS_ConvertStub, JS_FinalizeStub,
-         JSCLASS_NO_OPTIONAL_MEMBERS
-     };
-
-
-     JSObject * doCreateCollection( JSContext * cx , JSObject * db , const string& shortName ){
-         Convertor c(cx);
-         
-         assert( c.hasProperty( db , "_mongo" ) );
-         assert( c.hasProperty( db , "_name" ) );
-         
-         JSObject * coll = JS_NewObject( cx , &db_collection_class , 0 , 0 );
-         CHECKNEWOBJECT( coll, cx, "doCreateCollection" );
-         c.setProperty( coll , "_mongo" , c.getProperty( db , "_mongo" ) );
-         c.setProperty( coll , "_db" , OBJECT_TO_JSVAL( db ) );
-         c.setProperty( coll , "_shortName" , c.toval( shortName.c_str() ) );
-         
-         string name = c.toString( c.getProperty( db , "_name" ) );
-         name += "." + shortName;
-         c.setProperty( coll , "_fullName" , c.toval( name.c_str() ) );
-         
-         if ( haveLocalShardingInfo( name ) ){
-             JS_ReportError( cx , "can't use sharded collection from db.eval" );
-             return 0;
-         }
-
-         return coll;
+        "DBCollection" , JSCLASS_HAS_PRIVATE | JSCLASS_NEW_RESOLVE ,
+        JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
+        JS_EnumerateStub, (JSResolveOp)(&db_collection_resolve) , JS_ConvertStub, JS_FinalizeStub,
+        JSCLASS_NO_OPTIONAL_MEMBERS
+    };
+
+
+    JSObject * doCreateCollection( JSContext * cx , JSObject * db , const string& shortName ) {
+        Convertor c(cx);
+
+        assert( c.hasProperty( db , "_mongo" ) );
+        assert( c.hasProperty( db , "_name" ) );
+
+        JSObject * coll = JS_NewObject( cx , &db_collection_class , 0 , 0 );
+        CHECKNEWOBJECT( coll, cx, "doCreateCollection" );
+        c.setProperty( coll , "_mongo" , c.getProperty( db , "_mongo" ) );
+        c.setProperty( coll , "_db" , OBJECT_TO_JSVAL( db ) );
+        c.setProperty( coll , "_shortName" , c.toval( shortName.c_str() ) );
+
+        string name = c.toString( c.getProperty( db , "_name" ) );
+        name += "." + shortName;
+        c.setProperty( coll , "_fullName" , c.toval( name.c_str() ) );
+
+        if ( haveLocalShardingInfo( name ) ) {
+            JS_ReportError( cx , "can't use sharded collection from db.eval" );
+            return 0;
+        }
+
+        return coll;
     }
-    
+
     // --------------  DB ---------------
-    
-    
-    JSBool db_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+
+
+    JSBool db_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         smuassert( cx,  "wrong number of arguments to DB" , argc == 2 );
         assert( JS_SetProperty( cx , obj , "_mongo" , &(argv[0]) ) );
         assert( JS_SetProperty( cx , obj , "_name" , &(argv[1]) ) );
@@ -457,7 +462,7 @@ namespace mongo {
         return JS_TRUE;
     }
 
-    JSBool db_resolve( JSContext *cx, JSObject *obj, jsval id, uintN flags, JSObject **objp ){
+    JSBool db_resolve( JSContext *cx, JSObject *obj, jsval id, uintN flags, JSObject **objp ) {
         if ( flags & JSRESOLVE_ASSIGNING )
             return JS_TRUE;
 
@@ -467,9 +472,9 @@ namespace mongo {
             return JS_TRUE;
 
         string collname = c.toString( id );
-        
+
         if ( isSpecialName( collname ) )
-             return JS_TRUE;
+            return JS_TRUE;
 
         JSObject * proto = JS_GetPrototype( cx , obj );
         if ( proto && c.hasProperty( proto , collname.c_str() ) )
@@ -479,26 +484,26 @@ namespace mongo {
         if ( ! coll )
             return JS_FALSE;
         c.setProperty( obj , collname.c_str() , OBJECT_TO_JSVAL( coll ) );
-        
+
         *objp = obj;
         return JS_TRUE;
     }
 
     JSClass db_class = {
-        "DB" , JSCLASS_HAS_PRIVATE | JSCLASS_NEW_RESOLVE , 
+        "DB" , JSCLASS_HAS_PRIVATE | JSCLASS_NEW_RESOLVE ,
         JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
         JS_EnumerateStub, (JSResolveOp)(&db_resolve) , JS_ConvertStub, JS_FinalizeStub,
         JSCLASS_NO_OPTIONAL_MEMBERS
     };
-    
+
 
     // -------------- object id -------------
 
-    JSBool object_id_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+    JSBool object_id_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         Convertor c( cx );
 
         OID oid;
-        if ( argc == 0 ){
+        if ( argc == 0 ) {
             oid.init();
         }
         else {
@@ -507,26 +512,27 @@ namespace mongo {
 
             try {
                 Scope::validateObjectIdString( s );
-            } catch ( const MsgAssertionException &m ) {
+            }
+            catch ( const MsgAssertionException &m ) {
                 static string error = m.toString();
                 JS_ReportError( cx, error.c_str() );
                 return JS_FALSE;
             }
             oid.init( s );
         }
-        
-        if ( ! JS_InstanceOf( cx , obj , &object_id_class , 0 ) ){
+
+        if ( ! JS_InstanceOf( cx , obj , &object_id_class , 0 ) ) {
             obj = JS_NewObject( cx , &object_id_class , 0 , 0 );
             CHECKNEWOBJECT( obj, cx, "object_id_constructor" );
             *rval = OBJECT_TO_JSVAL( obj );
         }
-        
+
         jsval v = c.toval( oid.str().c_str() );
         assert( JS_SetProperty( cx , obj , "str" , &v  ) );
 
         return JS_TRUE;
     }
-    
+
     JSClass object_id_class = {
         "ObjectId" , JSCLASS_HAS_PRIVATE ,
         JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
@@ -534,7 +540,7 @@ namespace mongo {
         JSCLASS_NO_OPTIONAL_MEMBERS
     };
 
-    JSBool object_id_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+    JSBool object_id_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         Convertor c(cx);
         return (JSBool) (*rval = c.getProperty( obj , "str" ));
     }
@@ -546,26 +552,26 @@ namespace mongo {
 
     // dbpointer
 
-    JSBool dbpointer_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+    JSBool dbpointer_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         Convertor c( cx );
-                
-        if ( argc == 2 ){
 
-            if ( ! JSVAL_IS_OID( argv[1] ) ){
+        if ( argc == 2 ) {
+
+            if ( ! JSVAL_IS_OID( argv[1] ) ) {
                 JS_ReportError( cx , "2nd arg to DBPointer needs to be oid" );
-                return JS_FALSE;            
+                return JS_FALSE;
             }
-            
+
             assert( JS_SetProperty( cx , obj , "ns" , &(argv[0]) ) );
             assert( JS_SetProperty( cx , obj , "id" , &(argv[1]) ) );
             return JS_TRUE;
         }
         else {
             JS_ReportError( cx , "DBPointer needs 2 arguments" );
-            return JS_FALSE;            
+            return JS_FALSE;
         }
     }
- 
+
     JSClass dbpointer_class = {
         "DBPointer" , JSCLASS_HAS_PRIVATE ,
         JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
@@ -578,10 +584,10 @@ namespace mongo {
     };
 
 
-    JSBool dbref_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+    JSBool dbref_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         Convertor c( cx );
 
-        if ( argc == 2 ){
+        if ( argc == 2 ) {
             JSObject * o = JS_NewObject( cx , NULL , NULL, NULL );
             CHECKNEWOBJECT( o, cx, "dbref_constructor" );
             assert( JS_SetProperty( cx, o , "$ref" , &argv[ 0 ] ) );
@@ -593,37 +599,37 @@ namespace mongo {
         else {
             JS_ReportError( cx , "DBRef needs 2 arguments" );
             assert( JS_SetPrivate( cx , obj , (void*)(new BSONHolder( BSONObj().getOwned() ) ) ) );
-            return JS_FALSE;            
+            return JS_FALSE;
         }
     }
- 
+
     JSClass dbref_class = bson_class; // name will be fixed later
 
     // UUID **************************
 
-    JSBool uuid_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+    JSBool uuid_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         Convertor c( cx );
-        
-        if( argc == 0 ) { 
+
+        if( argc == 0 ) {
 #if defined(HAVE_UUID)
             //uuids::uuid
 #else
 #endif
             JS_ReportError( cx , "UUID needs 1 argument -- UUID(hexstr)" );
-            return JS_FALSE;            
+            return JS_FALSE;
         }
         else if ( argc == 1 ) {
 
             string encoded = c.toString( argv[ 0 ] );
-	    if( encoded.size() != 32 ) { 
-	      JS_ReportError( cx, "expect 32 char hex string to UUID()" );
-	      return JS_FALSE;
-	    }
+            if( encoded.size() != 32 ) {
+                JS_ReportError( cx, "expect 32 char hex string to UUID()" );
+                return JS_FALSE;
+            }
 
-	    char buf[16];
-	    for( int i = 0; i < 16; i++ ) {
-	      buf[i] = fromHex(encoded.c_str() + i * 2);
-	    }
+            char buf[16];
+            for( int i = 0; i < 16; i++ ) {
+                buf[i] = fromHex(encoded.c_str() + i * 2);
+            }
 
             assert( JS_SetPrivate( cx, obj, new BinDataHolder( buf, 16 ) ) );
             c.setProperty( obj, "len", c.toval( (double)16 ) );
@@ -633,11 +639,11 @@ namespace mongo {
         }
         else {
             JS_ReportError( cx , "UUID needs 1 argument -- UUID(hexstr)" );
-            return JS_FALSE;            
+            return JS_FALSE;
         }
     }
- 
-  JSBool uuid_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+
+    JSBool uuid_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         Convertor c(cx);
         void *holder = JS_GetPrivate( cx, obj );
         assert( holder );
@@ -649,15 +655,15 @@ namespace mongo {
         return *rval = c.toval( ret.c_str() );
     }
 
-    void uuid_finalize( JSContext * cx , JSObject * obj ){
+    void uuid_finalize( JSContext * cx , JSObject * obj ) {
         Convertor c(cx);
         void *holder = JS_GetPrivate( cx, obj );
-        if ( holder ){
+        if ( holder ) {
             delete ( BinDataHolder* )holder;
             assert( JS_SetPrivate( cx , obj , 0 ) );
         }
-    }    
-    
+    }
+
     JSClass uuid_class = {
         "UUID" , JSCLASS_HAS_PRIVATE ,
         JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
@@ -669,25 +675,25 @@ namespace mongo {
         { "toString" , uuid_tostring , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } ,
         { 0 }
     };
-    
+
     // BinData **************************
 
-    JSBool bindata_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+    JSBool bindata_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         Convertor c( cx );
-        
-        if ( argc == 2 ){
+
+        if ( argc == 2 ) {
 
             int type = (int)c.toNumber( argv[ 0 ] );
-            if( type < 0 || type > 255 ) { 
+            if( type < 0 || type > 255 ) {
                 JS_ReportError( cx , "invalid BinData subtype -- range is 0..255 see bsonspec.org" );
-                return JS_FALSE;            
+                return JS_FALSE;
             }
             string encoded = c.toString( argv[ 1 ] );
             string decoded;
             try {
                 decoded = base64::decode( encoded );
             }
-            catch(...) { 
+            catch(...) {
                 JS_ReportError(cx, "BinData could not decode base64 parameter");
                 return JS_FALSE;
             }
@@ -700,11 +706,11 @@ namespace mongo {
         }
         else {
             JS_ReportError( cx , "BinData needs 2 arguments -- BinData(subtype,data)" );
-            return JS_FALSE;            
+            return JS_FALSE;
         }
     }
- 
-    JSBool bindata_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+
+    JSBool bindata_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         Convertor c(cx);
         int type = (int)c.getNumber( obj , "type" );
         int len = (int)c.getNumber( obj, "len" );
@@ -719,7 +725,7 @@ namespace mongo {
         return *rval = c.toval( ret.c_str() );
     }
 
-    JSBool bindataBase64(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+    JSBool bindataBase64(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         Convertor c(cx);
         int len = (int)c.getNumber( obj, "len" );
         void *holder = JS_GetPrivate( cx, obj );
@@ -731,7 +737,7 @@ namespace mongo {
         return *rval = c.toval( ret.c_str() );
     }
 
-    JSBool bindataAsHex(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+    JSBool bindataAsHex(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         Convertor c(cx);
         int len = (int)c.getNumber( obj, "len" );
         void *holder = JS_GetPrivate( cx, obj );
@@ -747,27 +753,27 @@ namespace mongo {
         return *rval = c.toval( ret.c_str() );
     }
 
-    JSBool bindataLength(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+    JSBool bindataLength(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         Convertor c(cx);
         int len = (int)c.getNumber( obj, "len" );
         return *rval = c.toval((double) len);
     }
 
-    JSBool bindataSubtype(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+    JSBool bindataSubtype(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         Convertor c(cx);
         int t = (int)c.getNumber( obj, "type" );
         return *rval = c.toval((double) t);
     }
 
-    void bindata_finalize( JSContext * cx , JSObject * obj ){
+    void bindata_finalize( JSContext * cx , JSObject * obj ) {
         Convertor c(cx);
         void *holder = JS_GetPrivate( cx, obj );
-        if ( holder ){
+        if ( holder ) {
             delete ( BinDataHolder* )holder;
             assert( JS_SetPrivate( cx , obj , 0 ) );
         }
-    }    
-    
+    }
+
     JSClass bindata_class = {
         "BinData" , JSCLASS_HAS_PRIVATE ,
         JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
@@ -783,15 +789,15 @@ namespace mongo {
         { "subtype", bindataSubtype, 0, JSPROP_READONLY | JSPROP_PERMANENT, 0 } ,
         { 0 }
     };
-    
+
     // Map
 
-    bool specialMapString( const string& s ){
+    bool specialMapString( const string& s ) {
         return s == "put" || s == "get" || s == "_get" || s == "values" || s == "_data" || s == "constructor" ;
     }
 
-    JSBool map_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
-        if ( argc > 0 ){
+    JSBool map_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
+        if ( argc > 0 ) {
             JS_ReportError( cx , "Map takes no arguments" );
             return JS_FALSE;
         }
@@ -804,28 +810,28 @@ namespace mongo {
 
         return JS_TRUE;
     }
- 
-    JSBool map_prop( JSContext *cx, JSObject *obj, jsval idval, jsval *vp ){
+
+    JSBool map_prop( JSContext *cx, JSObject *obj, jsval idval, jsval *vp ) {
         Convertor c(cx);
         if ( specialMapString( c.toString( idval ) ) )
             return JS_TRUE;
-        
+
         log() << "illegal prop access: " << c.toString( idval ) << endl;
         JS_ReportError( cx , "can't use array access with Map" );
         return JS_FALSE;
     }
-    
+
     JSClass map_class = {
         "Map" , JSCLASS_HAS_PRIVATE ,
         map_prop, JS_PropertyStub, map_prop, map_prop,
         JS_EnumerateStub, JS_ResolveStub , JS_ConvertStub, JS_FinalizeStub,
         JSCLASS_NO_OPTIONAL_MEMBERS
     };
-    
+
     JSFunctionSpec map_functions[] = {
         { 0 }
     };
-    
+
 
     // -----
 
@@ -835,23 +841,54 @@ namespace mongo {
         JS_EnumerateStub, JS_ResolveStub , JS_ConvertStub, JS_FinalizeStub,
         JSCLASS_NO_OPTIONAL_MEMBERS
     };
-    
+
+    JSBool timestamp_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
+        smuassert( cx , "Timestamp needs 0 or 2 args" , argc == 0 || argc == 2 );
+
+        if ( ! JS_InstanceOf( cx , obj , &timestamp_class , 0 ) ) {
+            obj = JS_NewObject( cx , &timestamp_class , 0 , 0 );
+            CHECKNEWOBJECT( obj, cx, "timestamp_constructor" );
+            *rval = OBJECT_TO_JSVAL( obj );
+        }
+
+        Convertor c( cx );
+        if ( argc == 0 ) {
+            c.setProperty( obj, "t", c.toval( 0.0 ) );
+            c.setProperty( obj, "i", c.toval( 0.0 ) );
+        }
+        else {
+            c.setProperty( obj, "t", argv[ 0 ] );
+            c.setProperty( obj, "i", argv[ 1 ] );
+        }
+
+        return JS_TRUE;
+    }
+
+
     JSClass numberlong_class = {
         "NumberLong" , JSCLASS_HAS_PRIVATE ,
         JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
         JS_EnumerateStub, JS_ResolveStub , JS_ConvertStub, JS_FinalizeStub,
         JSCLASS_NO_OPTIONAL_MEMBERS
     };
-    
-    JSBool numberlong_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+
+    JSBool numberlong_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         smuassert( cx , "NumberLong needs 0 or 1 args" , argc == 0 || argc == 1 );
-        
+
+        if ( ! JS_InstanceOf( cx , obj , &numberlong_class , 0 ) ) {
+            obj = JS_NewObject( cx , &numberlong_class , 0 , 0 );
+            CHECKNEWOBJECT( obj, cx, "numberlong_constructor" );
+            *rval = OBJECT_TO_JSVAL( obj );
+        }
+
         Convertor c( cx );
         if ( argc == 0 ) {
             c.setProperty( obj, "floatApprox", c.toval( 0.0 ) );
-        } else if ( JSVAL_IS_NUMBER( argv[ 0 ] ) ) {
+        }
+        else if ( JSVAL_IS_NUMBER( argv[ 0 ] ) ) {
             c.setProperty( obj, "floatApprox", argv[ 0 ] );
-        } else {
+        }
+        else {
             string num = c.toString( argv[ 0 ] );
             //PRINT(num);
             const char *numStr = num.c_str();
@@ -859,25 +896,26 @@ namespace mongo {
             try {
                 n = parseLL( numStr );
                 //PRINT(n);
-            } catch ( const AssertionException & ) {
-                smuassert( cx , "could not convert string to long long" , false );                
+            }
+            catch ( const AssertionException & ) {
+                smuassert( cx , "could not convert string to long long" , false );
             }
             c.makeLongObj( n, obj );
         }
-        
+
         return JS_TRUE;
     }
-    
-    JSBool numberlong_valueof(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+
+    JSBool numberlong_valueof(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         Convertor c(cx);
-        return *rval = c.toval( double( c.toNumberLongUnsafe( obj ) ) );        
+        return *rval = c.toval( double( c.toNumberLongUnsafe( obj ) ) );
     }
-    
-    JSBool numberlong_tonumber(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+
+    JSBool numberlong_tonumber(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         return numberlong_valueof( cx, obj, argc, argv, rval );
     }
 
-    JSBool numberlong_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval){    
+    JSBool numberlong_tostring(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) {
         Convertor c(cx);
         stringstream ss;
         long long val = c.toNumberLongUnsafe( obj );
@@ -893,12 +931,12 @@ namespace mongo {
     }
 
     JSFunctionSpec numberlong_functions[] = {
-    { "valueOf" , numberlong_valueof , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } ,
-    { "toNumber" , numberlong_tonumber , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } ,
-    { "toString" , numberlong_tostring , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } ,
-    { 0 }
-    };    
-    
+        { "valueOf" , numberlong_valueof , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } ,
+        { "toNumber" , numberlong_tonumber , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } ,
+        { "toString" , numberlong_tostring , 0 , JSPROP_READONLY | JSPROP_PERMANENT, 0 } ,
+        { 0 }
+    };
+
     JSClass minkey_class = {
         "MinKey" , JSCLASS_HAS_PRIVATE ,
         JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub,
@@ -912,12 +950,12 @@ namespace mongo {
         JS_EnumerateStub, JS_ResolveStub , JS_ConvertStub, JS_FinalizeStub,
         JSCLASS_NO_OPTIONAL_MEMBERS
     };
-    
+
     // dbquery
 
-    JSBool dbquery_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ){
+    JSBool dbquery_constructor( JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval ) {
         smuassert( cx ,  "DDQuery needs at least 4 args" , argc >= 4 );
-        
+
         Convertor c(cx);
         c.setProperty( obj , "_mongo" , argv[0] );
         c.setProperty( obj , "_db" , argv[1] );
@@ -931,28 +969,34 @@ namespace mongo {
             CHECKNEWOBJECT( temp, cx, "dbquery_constructor" );
             c.setProperty( obj , "_query" , OBJECT_TO_JSVAL( temp ) );
         }
-        
+
         if ( argc > 5 && JSVAL_IS_OBJECT( argv[5] ) )
             c.setProperty( obj , "_fields" , argv[5] );
         else
             c.setProperty( obj , "_fields" , JSVAL_NULL );
-        
-        
+
+
         if ( argc > 6 && JSVAL_IS_NUMBER( argv[6] ) )
             c.setProperty( obj , "_limit" , argv[6] );
-        else 
+        else
             c.setProperty( obj , "_limit" , JSVAL_ZERO );
-        
+
         if ( argc > 7 && JSVAL_IS_NUMBER( argv[7] ) )
             c.setProperty( obj , "_skip" , argv[7] );
-        else 
+        else
             c.setProperty( obj , "_skip" , JSVAL_ZERO );
 
         if ( argc > 8 && JSVAL_IS_NUMBER( argv[8] ) )
             c.setProperty( obj , "_batchSize" , argv[8] );
-        else 
+        else
             c.setProperty( obj , "_batchSize" , JSVAL_ZERO );
-        
+
+        if ( argc > 9 && JSVAL_IS_NUMBER( argv[9] ) )
+            c.setProperty( obj , "_options" , argv[8] );
+        else
+            c.setProperty( obj , "_options" , JSVAL_ZERO );
+
+
         c.setProperty( obj , "_cursor" , JSVAL_NULL );
         c.setProperty( obj , "_numReturned" , JSVAL_ZERO );
         c.setProperty( obj , "_special" , JSVAL_FALSE );
@@ -960,7 +1004,7 @@ namespace mongo {
         return JS_TRUE;
     }
 
-    JSBool dbquery_resolve( JSContext *cx, JSObject *obj, jsval id, uintN flags, JSObject **objp ){
+    JSBool dbquery_resolve( JSContext *cx, JSObject *obj, jsval id, uintN flags, JSObject **objp ) {
         if ( flags & JSRESOLVE_ASSIGNING )
             return JS_TRUE;
 
@@ -981,13 +1025,13 @@ namespace mongo {
         JS_EnumerateStub, (JSResolveOp)(&dbquery_resolve) , JS_ConvertStub, JS_FinalizeStub,
         JSCLASS_NO_OPTIONAL_MEMBERS
     };
-    
+
     // ---- other stuff ----
-    
-    void initMongoJS( SMScope * scope , JSContext * cx , JSObject * global , bool local ){
+
+    void initMongoJS( SMScope * scope , JSContext * cx , JSObject * global , bool local ) {
 
         assert( JS_InitClass( cx , global , 0 , &mongo_class , local ? mongo_local_constructor : mongo_external_constructor , 0 , 0 , mongo_functions , 0 , 0 ) );
-        
+
         assert( JS_InitClass( cx , global , 0 , &object_id_class , object_id_constructor , 0 , 0 , object_id_functions , 0 , 0 ) );
         assert( JS_InitClass( cx , global , 0 , &db_class , db_constructor , 2 , 0 , 0 , 0 , 0 ) );
         assert( JS_InitClass( cx , global , 0 , &db_collection_class , db_collection_constructor , 4 , 0 , 0 , 0 , 0 ) );
@@ -997,84 +1041,84 @@ namespace mongo {
         assert( JS_InitClass( cx , global , 0 , &bindata_class , bindata_constructor , 0 , 0 , bindata_functions , 0 , 0 ) );
         assert( JS_InitClass( cx , global , 0 , &uuid_class , uuid_constructor , 0 , 0 , uuid_functions , 0 , 0 ) );
 
-        assert( JS_InitClass( cx , global , 0 , &timestamp_class , 0 , 0 , 0 , 0 , 0 , 0 ) );
+        assert( JS_InitClass( cx , global , 0 , &timestamp_class , timestamp_constructor , 0 , 0 , 0 , 0 , 0 ) );
         assert( JS_InitClass( cx , global , 0 , &numberlong_class , numberlong_constructor , 0 , 0 , numberlong_functions , 0 , 0 ) );
         assert( JS_InitClass( cx , global , 0 , &minkey_class , 0 , 0 , 0 , 0 , 0 , 0 ) );
         assert( JS_InitClass( cx , global , 0 , &maxkey_class , 0 , 0 , 0 , 0 , 0 , 0 ) );
 
         assert( JS_InitClass( cx , global , 0 , &map_class , map_constructor , 0 , 0 , map_functions , 0 , 0 ) );
-        
+
         assert( JS_InitClass( cx , global , 0 , &bson_ro_class , bson_cons , 0 , 0 , bson_functions , 0 , 0 ) );
         assert( JS_InitClass( cx , global , 0 , &bson_class , bson_cons , 0 , 0 , bson_functions , 0 , 0 ) );
-        
+
         static const char *dbrefName = "DBRef";
         dbref_class.name = dbrefName;
         assert( JS_InitClass( cx , global , 0 , &dbref_class , dbref_constructor , 2 , 0 , bson_functions , 0 , 0 ) );
-        
-        scope->exec( jsconcatcode );
+
+        scope->execCoreFiles();
     }
 
-    bool appendSpecialDBObject( Convertor * c , BSONObjBuilder& b , const string& name , jsval val , JSObject * o ){
-        
-        if ( JS_InstanceOf( c->_context , o , &object_id_class , 0 ) ){
+    bool appendSpecialDBObject( Convertor * c , BSONObjBuilder& b , const string& name , jsval val , JSObject * o ) {
+
+        if ( JS_InstanceOf( c->_context , o , &object_id_class , 0 ) ) {
             OID oid;
             oid.init( c->getString( o , "str" ) );
             b.append( name , oid );
             return true;
         }
 
-        if ( JS_InstanceOf( c->_context , o , &minkey_class , 0 ) ){
+        if ( JS_InstanceOf( c->_context , o , &minkey_class , 0 ) ) {
             b.appendMinKey( name );
             return true;
         }
 
-        if ( JS_InstanceOf( c->_context , o , &maxkey_class , 0 ) ){
+        if ( JS_InstanceOf( c->_context , o , &maxkey_class , 0 ) ) {
             b.appendMaxKey( name );
             return true;
         }
-        
-        if ( JS_InstanceOf( c->_context , o , &timestamp_class , 0 ) ){
+
+        if ( JS_InstanceOf( c->_context , o , &timestamp_class , 0 ) ) {
             b.appendTimestamp( name , (unsigned long long)c->getNumber( o , "t" ) , (unsigned int )c->getNumber( o , "i" ) );
             return true;
         }
 
-        if ( JS_InstanceOf( c->_context , o , &numberlong_class , 0 ) ){
+        if ( JS_InstanceOf( c->_context , o , &numberlong_class , 0 ) ) {
             b.append( name , c->toNumberLongUnsafe( o ) );
             return true;
         }
-        
-        if ( JS_InstanceOf( c->_context , o , &dbpointer_class , 0 ) ){
-            b.appendDBRef( name , c->getString( o , "ns" ).c_str() , c->toOID( c->getProperty( o , "id" ) ) );
+
+        if ( JS_InstanceOf( c->_context , o , &dbpointer_class , 0 ) ) {
+            b.appendDBRef( name , c->getString( o , "ns" ) , c->toOID( c->getProperty( o , "id" ) ) );
             return true;
         }
-        
-        if ( JS_InstanceOf( c->_context , o , &bindata_class , 0 ) ){
+
+        if ( JS_InstanceOf( c->_context , o , &bindata_class , 0 ) ) {
             void *holder = JS_GetPrivate( c->_context , o );
             const char *data = ( ( BinDataHolder * )( holder ) )->c_;
-            b.appendBinData( name , 
-                             (int)(c->getNumber( o , "len" )) , (BinDataType)((char)(c->getNumber( o , "type" ) ) ) , 
+            b.appendBinData( name ,
+                             (int)(c->getNumber( o , "len" )) , (BinDataType)((char)(c->getNumber( o , "type" ) ) ) ,
                              data
-                             );
+                           );
             return true;
         }
-        
+
 #if defined( SM16 ) || defined( MOZJS )
 #warning dates do not work in your version of spider monkey
         {
             jsdouble d = js_DateGetMsecSinceEpoch( c->_context , o );
-            if ( d ){
+            if ( d ) {
                 b.appendDate( name , Date_t(d) );
                 return true;
             }
         }
 #elif defined( XULRUNNER )
-        if ( JS_InstanceOf( c->_context , o, globalSMEngine->_dateClass , 0 ) ){
+        if ( JS_InstanceOf( c->_context , o, globalSMEngine->_dateClass , 0 ) ) {
             jsdouble d = js_DateGetMsecSinceEpoch( c->_context , o );
             b.appendDate( name , Date_t(d) );
             return true;
         }
 #else
-        if ( JS_InstanceOf( c->_context , o, &js_DateClass , 0 ) ){
+        if ( JS_InstanceOf( c->_context , o, &js_DateClass , 0 ) ) {
             jsdouble d = js_DateGetMsecSinceEpoch( c->_context , o );
             //TODO: make signed
             b.appendDate( name , Date_t((unsigned long long)d) );
@@ -1082,35 +1126,35 @@ namespace mongo {
         }
 #endif
 
-        
+
         if ( JS_InstanceOf( c->_context , o , &dbquery_class , 0 ) ||
-             JS_InstanceOf( c->_context , o , &mongo_class , 0 ) || 
-             JS_InstanceOf( c->_context , o , &db_collection_class , 0 ) ){
+                JS_InstanceOf( c->_context , o , &mongo_class , 0 ) ||
+                JS_InstanceOf( c->_context , o , &db_collection_class , 0 ) ) {
             b.append( name , c->toString( val ) );
             return true;
         }
 
-#if defined( XULRUNNER ) 
-        if ( JS_InstanceOf( c->_context , o , globalSMEngine->_regexClass , 0 ) ){
+#if defined( XULRUNNER )
+        if ( JS_InstanceOf( c->_context , o , globalSMEngine->_regexClass , 0 ) ) {
             c->appendRegex( b , name , c->toString( val ) );
             return true;
         }
-#elif defined( SM18 ) 
-        if ( JS_InstanceOf( c->_context , o , &js_RegExpClass , 0 ) ){
+#elif defined( SM18 )
+        if ( JS_InstanceOf( c->_context , o , &js_RegExpClass , 0 ) ) {
             c->appendRegex( b , name , c->toString( val ) );
             return true;
         }
 #endif
-        
+
         return false;
     }
 
-    bool isDate( JSContext * cx , JSObject * o ){
+    bool isDate( JSContext * cx , JSObject * o ) {
 #if defined( SM16 ) || defined( MOZJS ) || defined( XULRUNNER )
         return js_DateGetMsecSinceEpoch( cx , o ) != 0;
 #else
         return JS_InstanceOf( cx , o, &js_DateClass, 0 );
 #endif
     }
-    
+
 }
diff --git a/scripting/utils.cpp b/scripting/utils.cpp
index ee01bb2..97eea10 100644
--- a/scripting/utils.cpp
+++ b/scripting/utils.cpp
@@ -23,31 +23,40 @@
 
 namespace mongo {
 
-    BSONObj jsmd5( const BSONObj &a ){
+    void installBenchmarkSystem( Scope& scope );
+
+    BSONObj jsmd5( const BSONObj &a ) {
         uassert( 10261 ,  "js md5 needs a string" , a.firstElement().type() == String );
         const char * s = a.firstElement().valuestrsafe();
-        
+
         md5digest d;
         md5_state_t st;
         md5_init(&st);
         md5_append( &st , (const md5_byte_t*)s , strlen( s ) );
         md5_finish(&st, d);
-        
+
         return BSON( "" << digestToString( d ) );
     }
-    
-    BSONObj JSVersion( const BSONObj& args ){
+
+    BSONObj JSVersion( const BSONObj& args ) {
         cout << "version: " << versionString << endl;
         if ( strstr( versionString , "+" ) )
             printGitVersion();
         return BSONObj();
     }
 
-    void installGlobalUtils( Scope& scope ){
+
+    // ---------------------------------
+    // ---- installer           --------
+    // ---------------------------------
+
+    void installGlobalUtils( Scope& scope ) {
         scope.injectNative( "hex_md5" , jsmd5 );
         scope.injectNative( "version" , JSVersion );
+
+        installBenchmarkSystem( scope );
     }
 
 }
-        
+
 
diff --git a/scripting/v8_db.cpp b/scripting/v8_db.cpp
index e178875..4d12454 100644
--- a/scripting/v8_db.cpp
+++ b/scripting/v8_db.cpp
@@ -18,10 +18,11 @@
 #include "v8_wrapper.h"
 #include "v8_utils.h"
 #include "v8_db.h"
-#include "engine.h"
+#include "engine_v8.h"
 #include "util/base64.h"
 #include "util/text.h"
 #include "../client/syncclusterconnection.h"
+#include "../s/d_logic.h"
 #include <iostream>
 
 using namespace std;
@@ -31,99 +32,118 @@ namespace mongo {
 
 #define DDD(x)
 
-    v8::Handle<v8::FunctionTemplate> getMongoFunctionTemplate( bool local ){
-        v8::Local<v8::FunctionTemplate> mongo = FunctionTemplate::New( local ? mongoConsLocal : mongoConsExternal );
+    v8::Handle<v8::FunctionTemplate> getMongoFunctionTemplate( bool local ) {
+        v8::Local<v8::FunctionTemplate> mongo;
+        if ( local ) {
+            mongo = newV8Function< mongoConsLocal >();
+        }
+        else {
+            mongo = newV8Function< mongoConsExternal >();
+        }
         mongo->InstanceTemplate()->SetInternalFieldCount( 1 );
-        
+
         v8::Local<v8::Template> proto = mongo->PrototypeTemplate();
 
-        proto->Set( v8::String::New( "find" ) , FunctionTemplate::New( mongoFind ) );
-        proto->Set( v8::String::New( "insert" ) , FunctionTemplate::New( mongoInsert ) );
-        proto->Set( v8::String::New( "remove" ) , FunctionTemplate::New( mongoRemove ) );
-        proto->Set( v8::String::New( "update" ) , FunctionTemplate::New( mongoUpdate ) );
+        proto->Set( v8::String::New( "find" ) , newV8Function< mongoFind >() );
+        proto->Set( v8::String::New( "insert" ) , newV8Function< mongoInsert >() );
+        proto->Set( v8::String::New( "remove" ) , newV8Function< mongoRemove >() );
+        proto->Set( v8::String::New( "update" ) , newV8Function< mongoUpdate >() );
 
-        Local<FunctionTemplate> ic = FunctionTemplate::New( internalCursorCons );
+        Local<FunctionTemplate> ic = newV8Function< internalCursorCons >();
         ic->InstanceTemplate()->SetInternalFieldCount( 1 );
-        ic->PrototypeTemplate()->Set( v8::String::New("next") , FunctionTemplate::New( internalCursorNext ) );
-        ic->PrototypeTemplate()->Set( v8::String::New("hasNext") , FunctionTemplate::New( internalCursorHasNext ) );
-        ic->PrototypeTemplate()->Set( v8::String::New("objsLeftInBatch") , FunctionTemplate::New( internalCursorObjsLeftInBatch ) );
+        ic->PrototypeTemplate()->Set( v8::String::New("next") , newV8Function< internalCursorNext >() );
+        ic->PrototypeTemplate()->Set( v8::String::New("hasNext") , newV8Function< internalCursorHasNext >() );
+        ic->PrototypeTemplate()->Set( v8::String::New("objsLeftInBatch") , newV8Function< internalCursorObjsLeftInBatch >() );
         proto->Set( v8::String::New( "internalCursor" ) , ic );
-        
+
 
 
         return mongo;
     }
 
     v8::Handle<v8::FunctionTemplate> getNumberLongFunctionTemplate() {
-        v8::Local<v8::FunctionTemplate> numberLong = FunctionTemplate::New( numberLongInit );
+        v8::Local<v8::FunctionTemplate> numberLong = newV8Function< numberLongInit >();
         v8::Local<v8::Template> proto = numberLong->PrototypeTemplate();
-        
-        proto->Set( v8::String::New( "valueOf" ) , FunctionTemplate::New( numberLongValueOf ) );        
-        proto->Set( v8::String::New( "toNumber" ) , FunctionTemplate::New( numberLongToNumber ) );        
-        proto->Set( v8::String::New( "toString" ) , FunctionTemplate::New( numberLongToString ) );
-        
+
+        proto->Set( v8::String::New( "valueOf" ) , newV8Function< numberLongValueOf >() );
+        proto->Set( v8::String::New( "toNumber" ) , newV8Function< numberLongToNumber >() );
+        proto->Set( v8::String::New( "toString" ) , newV8Function< numberLongToString >() );
+
         return numberLong;
     }
 
     v8::Handle<v8::FunctionTemplate> getBinDataFunctionTemplate() {
-        v8::Local<v8::FunctionTemplate> binData = FunctionTemplate::New( binDataInit );
+        v8::Local<v8::FunctionTemplate> binData = newV8Function< binDataInit >();
         v8::Local<v8::Template> proto = binData->PrototypeTemplate();
-        
-        proto->Set( v8::String::New( "toString" ) , FunctionTemplate::New( binDataToString ) );        
-        
+
+        proto->Set( v8::String::New( "toString" ) , newV8Function< binDataToString >() );
+
         return binData;
-    }    
-    
-    void installDBTypes( Handle<ObjectTemplate>& global ){
-        v8::Local<v8::FunctionTemplate> db = FunctionTemplate::New( dbInit );
+    }
+
+    v8::Handle<v8::FunctionTemplate> getTimestampFunctionTemplate() {
+        v8::Local<v8::FunctionTemplate> ts = newV8Function< dbTimestampInit >();
+        v8::Local<v8::Template> proto = ts->PrototypeTemplate();
+
+        ts->InstanceTemplate()->SetInternalFieldCount( 1 );
+
+        return ts;
+    }
+
+
+    void installDBTypes( Handle<ObjectTemplate>& global ) {
+        v8::Local<v8::FunctionTemplate> db = newV8Function< dbInit >();
         db->InstanceTemplate()->SetNamedPropertyHandler( collectionFallback );
         global->Set(v8::String::New("DB") , db );
-        
-        v8::Local<v8::FunctionTemplate> dbCollection = FunctionTemplate::New( collectionInit );
+
+        v8::Local<v8::FunctionTemplate> dbCollection = newV8Function< collectionInit >();
         dbCollection->InstanceTemplate()->SetNamedPropertyHandler( collectionFallback );
         global->Set(v8::String::New("DBCollection") , dbCollection );
 
 
-        v8::Local<v8::FunctionTemplate> dbQuery = FunctionTemplate::New( dbQueryInit );
+        v8::Local<v8::FunctionTemplate> dbQuery = newV8Function< dbQueryInit >();
         dbQuery->InstanceTemplate()->SetIndexedPropertyHandler( dbQueryIndexAccess );
         global->Set(v8::String::New("DBQuery") , dbQuery );
 
-        global->Set( v8::String::New("ObjectId") , FunctionTemplate::New( objectIdInit ) );
+        global->Set( v8::String::New("ObjectId") , newV8Function< objectIdInit >() );
 
-        global->Set( v8::String::New("DBRef") , FunctionTemplate::New( dbRefInit ) );
+        global->Set( v8::String::New("DBRef") , newV8Function< dbRefInit >() );
 
-        global->Set( v8::String::New("DBPointer") , FunctionTemplate::New( dbPointerInit ) );
+        global->Set( v8::String::New("DBPointer") , newV8Function< dbPointerInit >() );
 
         global->Set( v8::String::New("BinData") , getBinDataFunctionTemplate() );
 
         global->Set( v8::String::New("NumberLong") , getNumberLongFunctionTemplate() );
 
+        global->Set( v8::String::New("Timestamp") , getTimestampFunctionTemplate() );
     }
 
-    void installDBTypes( Handle<v8::Object>& global ){
-        v8::Local<v8::FunctionTemplate> db = FunctionTemplate::New( dbInit );
+    void installDBTypes( Handle<v8::Object>& global ) {
+        v8::Local<v8::FunctionTemplate> db = newV8Function< dbInit >();
         db->InstanceTemplate()->SetNamedPropertyHandler( collectionFallback );
         global->Set(v8::String::New("DB") , db->GetFunction() );
-        
-        v8::Local<v8::FunctionTemplate> dbCollection = FunctionTemplate::New( collectionInit );
+
+        v8::Local<v8::FunctionTemplate> dbCollection = newV8Function< collectionInit >();
         dbCollection->InstanceTemplate()->SetNamedPropertyHandler( collectionFallback );
         global->Set(v8::String::New("DBCollection") , dbCollection->GetFunction() );
 
 
-        v8::Local<v8::FunctionTemplate> dbQuery = FunctionTemplate::New( dbQueryInit );
+        v8::Local<v8::FunctionTemplate> dbQuery = newV8Function< dbQueryInit >();
         dbQuery->InstanceTemplate()->SetIndexedPropertyHandler( dbQueryIndexAccess );
         global->Set(v8::String::New("DBQuery") , dbQuery->GetFunction() );
 
-        global->Set( v8::String::New("ObjectId") , FunctionTemplate::New( objectIdInit )->GetFunction() );
+        global->Set( v8::String::New("ObjectId") , newV8Function< objectIdInit >()->GetFunction() );
 
-        global->Set( v8::String::New("DBRef") , FunctionTemplate::New( dbRefInit )->GetFunction() );
-        
-        global->Set( v8::String::New("DBPointer") , FunctionTemplate::New( dbPointerInit )->GetFunction() );
+        global->Set( v8::String::New("DBRef") , newV8Function< dbRefInit >()->GetFunction() );
+
+        global->Set( v8::String::New("DBPointer") , newV8Function< dbPointerInit >()->GetFunction() );
 
         global->Set( v8::String::New("BinData") , getBinDataFunctionTemplate()->GetFunction() );
 
         global->Set( v8::String::New("NumberLong") , getNumberLongFunctionTemplate()->GetFunction() );
 
+        global->Set( v8::String::New("Timestamp") , getTimestampFunctionTemplate()->GetFunction() );
+
         BSONObjBuilder b;
         b.appendMaxKey( "" );
         b.appendMinKey( "" );
@@ -131,21 +151,21 @@ namespace mongo {
         BSONObjIterator i( o );
         global->Set( v8::String::New("MaxKey"), mongoToV8Element( i.next() ) );
         global->Set( v8::String::New("MinKey"), mongoToV8Element( i.next() ) );
-        
-        global->Get( v8::String::New( "Object" ) )->ToObject()->Set( v8::String::New("bsonsize") , FunctionTemplate::New( bsonsize )->GetFunction() );
+
+        global->Get( v8::String::New( "Object" ) )->ToObject()->Set( v8::String::New("bsonsize") , newV8Function< bsonsize >()->GetFunction() );
     }
 
-    void destroyConnection( Persistent<Value> self, void* parameter){
+    void destroyConnection( Persistent<Value> self, void* parameter) {
         delete static_cast<DBClientBase*>(parameter);
         self.Dispose();
         self.Clear();
     }
 
-    Handle<Value> mongoConsExternal(const Arguments& args){
+    Handle<Value> mongoConsExternal(const Arguments& args) {
 
         char host[255];
-    
-        if ( args.Length() > 0 && args[0]->IsString() ){
+
+        if ( args.Length() > 0 && args[0]->IsString() ) {
             assert( args[0]->ToString()->Utf8Length() < 250 );
             args[0]->ToString()->WriteAscii( host );
         }
@@ -157,30 +177,41 @@ namespace mongo {
         ConnectionString cs = ConnectionString::parse( host , errmsg );
         if ( ! cs.isValid() )
             return v8::ThrowException( v8::String::New( errmsg.c_str() ) );
-        
-        
-        DBClientWithCommands * conn = cs.connect( errmsg );
+
+
+        DBClientWithCommands * conn;
+        {
+            V8Unlock ul;
+            conn = cs.connect( errmsg );
+        }
         if ( ! conn )
             return v8::ThrowException( v8::String::New( errmsg.c_str() ) );
-        
+
         Persistent<v8::Object> self = Persistent<v8::Object>::New( args.Holder() );
         self.MakeWeak( conn , destroyConnection );
 
-        ScriptEngine::runConnectCallback( *conn );
+        {
+            V8Unlock ul;
+            ScriptEngine::runConnectCallback( *conn );
+        }
 
         args.This()->SetInternalField( 0 , External::New( conn ) );
         args.This()->Set( v8::String::New( "slaveOk" ) , Boolean::New( false ) );
         args.This()->Set( v8::String::New( "host" ) , v8::String::New( host ) );
-    
+
         return v8::Undefined();
     }
 
-    Handle<Value> mongoConsLocal(const Arguments& args){
-        
+    Handle<Value> mongoConsLocal(const Arguments& args) {
+
         if ( args.Length() > 0 )
             return v8::ThrowException( v8::String::New( "local Mongo constructor takes no args" ) );
 
-        DBClientBase * conn = createDirectClient();
+        DBClientBase * conn;
+        {
+            V8Unlock ul;
+            conn = createDirectClient();
+        }
 
         Persistent<v8::Object> self = Persistent<v8::Object>::New( args.This() );
         self.MakeWeak( conn , destroyConnection );
@@ -189,7 +220,7 @@ namespace mongo {
         args.This()->SetInternalField( 0 , External::New( conn ) );
         args.This()->Set( v8::String::New( "slaveOk" ) , Boolean::New( false ) );
         args.This()->Set( v8::String::New( "host" ) , v8::String::New( "EMBEDDED" ) );
-        
+
         return v8::Undefined();
     }
 
@@ -197,12 +228,12 @@ namespace mongo {
     // ---
 
 #ifdef _WIN32
-#define GETNS char * ns = new char[args[0]->ToString()->Utf8Length()];  args[0]->ToString()->WriteUtf8( ns ); 
+#define GETNS char * ns = new char[args[0]->ToString()->Utf8Length()];  args[0]->ToString()->WriteUtf8( ns );
 #else
-#define GETNS char ns[args[0]->ToString()->Utf8Length()];  args[0]->ToString()->WriteUtf8( ns ); 
+#define GETNS char ns[args[0]->ToString()->Utf8Length()];  args[0]->ToString()->WriteUtf8( ns );
 #endif
 
-    DBClientBase * getConnection( const Arguments& args ){
+    DBClientBase * getConnection( const Arguments& args ) {
         Local<External> c = External::Cast( *(args.This()->GetInternalField( 0 )) );
         DBClientBase * conn = (DBClientBase*)(c->Value());
         assert( conn );
@@ -211,7 +242,7 @@ namespace mongo {
 
     // ---- real methods
 
-    void destroyCursor( Persistent<Value> self, void* parameter){
+    void destroyCursor( Persistent<Value> self, void* parameter) {
         delete static_cast<mongo::DBClientCursor*>(parameter);
         self.Dispose();
         self.Clear();
@@ -224,60 +255,64 @@ namespace mongo {
        3 - limit
        4 - skip
     */
-    Handle<Value> mongoFind(const Arguments& args){
+    Handle<Value> mongoFind(const Arguments& args) {
         HandleScope handle_scope;
 
-        jsassert( args.Length() == 6 , "find needs 6 args" );
+        jsassert( args.Length() == 7 , "find needs 7 args" );
         jsassert( args[1]->IsObject() , "needs to be an object" );
         DBClientBase * conn = getConnection( args );
         GETNS;
 
         BSONObj q = v8ToMongo( args[1]->ToObject() );
         DDD( "query:" << q  );
-    
+
         BSONObj fields;
         bool haveFields = args[2]->IsObject() && args[2]->ToObject()->GetPropertyNames()->Length() > 0;
         if ( haveFields )
             fields = v8ToMongo( args[2]->ToObject() );
-    
+
         Local<v8::Object> mongo = args.This();
         Local<v8::Value> slaveOkVal = mongo->Get( v8::String::New( "slaveOk" ) );
         jsassert( slaveOkVal->IsBoolean(), "slaveOk member invalid" );
         bool slaveOk = slaveOkVal->BooleanValue();
-        
+
         try {
             auto_ptr<mongo::DBClientCursor> cursor;
             int nToReturn = (int)(args[3]->ToNumber()->Value());
             int nToSkip = (int)(args[4]->ToNumber()->Value());
             int batchSize = (int)(args[5]->ToNumber()->Value());
+            int options = (int)(args[6]->ToNumber()->Value());
             {
-                v8::Unlocker u;
-                cursor = conn->query( ns, q ,  nToReturn , nToSkip , haveFields ? &fields : 0, slaveOk ? QueryOption_SlaveOk : 0 , batchSize );
+                V8Unlock u;
+                cursor = conn->query( ns, q ,  nToReturn , nToSkip , haveFields ? &fields : 0, options | ( slaveOk ? QueryOption_SlaveOk : 0 ) , batchSize );
             }
             v8::Function * cons = (v8::Function*)( *( mongo->Get( v8::String::New( "internalCursor" ) ) ) );
             assert( cons );
-            
+
             Persistent<v8::Object> c = Persistent<v8::Object>::New( cons->NewInstance() );
             c.MakeWeak( cursor.get() , destroyCursor );
-            
+
             c->SetInternalField( 0 , External::New( cursor.release() ) );
             return handle_scope.Close(c);
         }
-        catch ( ... ){
-            return v8::ThrowException( v8::String::New( "socket error on query" ) );        
+        catch ( ... ) {
+            return v8::ThrowException( v8::String::New( "socket error on query" ) );
         }
     }
 
-    v8::Handle<v8::Value> mongoInsert(const v8::Arguments& args){
+    v8::Handle<v8::Value> mongoInsert(const v8::Arguments& args) {
         jsassert( args.Length() == 2 , "insert needs 2 args" );
         jsassert( args[1]->IsObject() , "have to insert an object" );
-    
+
+        if ( args.This()->Get( v8::String::New( "readOnly" ) )->BooleanValue() )
+            return v8::ThrowException( v8::String::New( "js db in read only mode" ) );
+
         DBClientBase * conn = getConnection( args );
         GETNS;
-    
+
         v8::Handle<v8::Object> in = args[1]->ToObject();
-    
-        if ( ! in->Has( v8::String::New( "_id" ) ) ){
+
+        if ( ! in->Has( v8::String::New( "_id" ) ) ) {
             v8::Handle<v8::Value> argv[1];
             in->Set( v8::String::New( "_id" ) , getObjectIdCons()->NewInstance( 0 , argv ) );
         }
@@ -286,64 +321,70 @@ namespace mongo {
 
         DDD( "want to save : " << o.jsonString() );
         try {
-            v8::Unlocker u;
+            V8Unlock u;
             conn->insert( ns , o );
         }
-        catch ( ... ){
+        catch ( ... ) {
             return v8::ThrowException( v8::String::New( "socket error on insert" ) );
         }
-    
+
         return v8::Undefined();
     }
 
-    v8::Handle<v8::Value> mongoRemove(const v8::Arguments& args){
+    v8::Handle<v8::Value> mongoRemove(const v8::Arguments& args) {
         jsassert( args.Length() == 2 || args.Length() == 3 , "remove needs 2 args" );
         jsassert( args[1]->IsObject() , "have to remove an object template" );
 
+        if ( args.This()->Get( v8::String::New( "readOnly" ) )->BooleanValue() )
+            return v8::ThrowException( v8::String::New( "js db in read only mode" ) );
+
         DBClientBase * conn = getConnection( args );
         GETNS;
-    
+
         v8::Handle<v8::Object> in = args[1]->ToObject();
         BSONObj o = v8ToMongo( in );
-    
+
         bool justOne = false;
-        if ( args.Length() > 2 ){
+        if ( args.Length() > 2 ) {
             justOne = args[2]->BooleanValue();
         }
 
         DDD( "want to remove : " << o.jsonString() );
         try {
-            v8::Unlocker u;
+            V8Unlock u;
             conn->remove( ns , o , justOne );
         }
-        catch ( ... ){
+        catch ( ... ) {
             return v8::ThrowException( v8::String::New( "socket error on remove" ) );
         }
 
         return v8::Undefined();
     }
 
-    v8::Handle<v8::Value> mongoUpdate(const v8::Arguments& args){
+    v8::Handle<v8::Value> mongoUpdate(const v8::Arguments& args) {
         jsassert( args.Length() >= 3 , "update needs at least 3 args" );
         jsassert( args[1]->IsObject() , "1st param to update has to be an object" );
         jsassert( args[2]->IsObject() , "2nd param to update has to be an object" );
+        
+        if ( args.This()->Get( v8::String::New( "readOnly" ) )->BooleanValue() )
+            return v8::ThrowException( v8::String::New( "js db in read only mode" ) );
 
         DBClientBase * conn = getConnection( args );
         GETNS;
-    
+
         v8::Handle<v8::Object> q = args[1]->ToObject();
         v8::Handle<v8::Object> o = args[2]->ToObject();
-    
+
         bool upsert = args.Length() > 3 && args[3]->IsBoolean() && args[3]->ToBoolean()->Value();
-        bool multi = args.Length() > 4 && args[4]->IsBoolean() && args[4]->ToBoolean()->Value();        
-        
+        bool multi = args.Length() > 4 && args[4]->IsBoolean() && args[4]->ToBoolean()->Value();
+
         try {
             BSONObj q1 = v8ToMongo( q );
             BSONObj o1 = v8ToMongo( o );
-            v8::Unlocker u;
+            V8Unlock u;
             conn->update( ns , q1 , o1 , upsert, multi );
         }
-        catch ( ... ){
+        catch ( ... ) {
             return v8::ThrowException( v8::String::New( "socket error on remove" ) );
         }
 
@@ -355,48 +396,48 @@ namespace mongo {
 
     // --- cursor ---
 
-    mongo::DBClientCursor * getCursor( const Arguments& args ){
+    mongo::DBClientCursor * getCursor( const Arguments& args ) {
         Local<External> c = External::Cast( *(args.This()->GetInternalField( 0 ) ) );
 
         mongo::DBClientCursor * cursor = (mongo::DBClientCursor*)(c->Value());
         return cursor;
     }
 
-    v8::Handle<v8::Value> internalCursorCons(const v8::Arguments& args){
+    v8::Handle<v8::Value> internalCursorCons(const v8::Arguments& args) {
         return v8::Undefined();
     }
 
-    v8::Handle<v8::Value> internalCursorNext(const v8::Arguments& args){    
+    v8::Handle<v8::Value> internalCursorNext(const v8::Arguments& args) {
         mongo::DBClientCursor * cursor = getCursor( args );
         if ( ! cursor )
             return v8::Undefined();
         BSONObj o;
         {
-            v8::Unlocker u;
+            V8Unlock u;
             o = cursor->next();
         }
         return mongoToV8( o );
     }
 
-    v8::Handle<v8::Value> internalCursorHasNext(const v8::Arguments& args){
+    v8::Handle<v8::Value> internalCursorHasNext(const v8::Arguments& args) {
         mongo::DBClientCursor * cursor = getCursor( args );
         if ( ! cursor )
             return Boolean::New( false );
         bool ret;
         {
-            v8::Unlocker u;
+            V8Unlock u;
             ret = cursor->more();
         }
         return Boolean::New( ret );
     }
 
-    v8::Handle<v8::Value> internalCursorObjsLeftInBatch(const v8::Arguments& args){
+    v8::Handle<v8::Value> internalCursorObjsLeftInBatch(const v8::Arguments& args) {
         mongo::DBClientCursor * cursor = getCursor( args );
         if ( ! cursor )
             return v8::Number::New( (double) 0 );
         int ret;
         {
-            v8::Unlocker u;
+            V8Unlock u;
             ret = cursor->objsLeftInBatch();
         }
         return v8::Number::New( (double) ret );
@@ -405,7 +446,7 @@ namespace mongo {
 
     // --- DB ----
 
-    v8::Handle<v8::Value> dbInit(const v8::Arguments& args){
+    v8::Handle<v8::Value> dbInit(const v8::Arguments& args) {
         assert( args.Length() == 2 );
 
         args.This()->Set( v8::String::New( "_mongo" ) , args[0] );
@@ -417,26 +458,29 @@ namespace mongo {
         return v8::Undefined();
     }
 
-    v8::Handle<v8::Value> collectionInit( const v8::Arguments& args ){
+    v8::Handle<v8::Value> collectionInit( const v8::Arguments& args ) {
         assert( args.Length() == 4 );
 
         args.This()->Set( v8::String::New( "_mongo" ) , args[0] );
         args.This()->Set( v8::String::New( "_db" ) , args[1] );
         args.This()->Set( v8::String::New( "_shortName" ) , args[2] );
         args.This()->Set( v8::String::New( "_fullName" ) , args[3] );
-    
+        
+        if ( haveLocalShardingInfo( toSTLString( args[3] ) ) )
+            return v8::ThrowException( v8::String::New( "can't use sharded collection from db.eval" ) );
+
         for ( int i=0; i<args.Length(); i++ )
             assert( ! args[i]->IsUndefined() );
 
         return v8::Undefined();
     }
 
-    v8::Handle<v8::Value> dbQueryInit( const v8::Arguments& args ){
-    
+    v8::Handle<v8::Value> dbQueryInit( const v8::Arguments& args ) {
+
         v8::Handle<v8::Object> t = args.This();
 
         assert( args.Length() >= 4 );
-    
+
         t->Set( v8::String::New( "_mongo" ) , args[0] );
         t->Set( v8::String::New( "_db" ) , args[1] );
         t->Set( v8::String::New( "_collection" ) , args[2] );
@@ -444,46 +488,52 @@ namespace mongo {
 
         if ( args.Length() > 4 && args[4]->IsObject() )
             t->Set( v8::String::New( "_query" ) , args[4] );
-        else 
+        else
             t->Set( v8::String::New( "_query" ) , v8::Object::New() );
-    
+
         if ( args.Length() > 5 && args[5]->IsObject() )
             t->Set( v8::String::New( "_fields" ) , args[5] );
         else
             t->Set( v8::String::New( "_fields" ) , v8::Null() );
-    
+
 
         if ( args.Length() > 6 && args[6]->IsNumber() )
             t->Set( v8::String::New( "_limit" ) , args[6] );
-        else 
+        else
             t->Set( v8::String::New( "_limit" ) , Number::New( 0 ) );
 
         if ( args.Length() > 7 && args[7]->IsNumber() )
             t->Set( v8::String::New( "_skip" ) , args[7] );
-        else 
+        else
             t->Set( v8::String::New( "_skip" ) , Number::New( 0 ) );
 
         if ( args.Length() > 8 && args[8]->IsNumber() )
-            t->Set( v8::String::New( "_batchSize" ) , args[7] );
-        else 
+            t->Set( v8::String::New( "_batchSize" ) , args[8] );
+        else
             t->Set( v8::String::New( "_batchSize" ) , Number::New( 0 ) );
-    
+
+        if ( args.Length() > 9 && args[9]->IsNumber() )
+            t->Set( v8::String::New( "_options" ) , args[9] );
+        else
+            t->Set( v8::String::New( "_options" ) , Number::New( 0 ) );
+
+        
         t->Set( v8::String::New( "_cursor" ) , v8::Null() );
         t->Set( v8::String::New( "_numReturned" ) , v8::Number::New(0) );
         t->Set( v8::String::New( "_special" ) , Boolean::New(false) );
-    
+
         return v8::Undefined();
     }
 
     v8::Handle<v8::Value> collectionFallback( v8::Local<v8::String> name, const v8::AccessorInfo &info) {
         DDD( "collectionFallback [" << name << "]" );
-    
+
         v8::Handle<v8::Value> real = info.This()->GetPrototype()->ToObject()->Get( name );
         if ( ! real->IsUndefined() )
             return real;
-    
+
         string sname = toSTLString( name );
-        if ( sname[0] == '_' ){
+        if ( sname[0] == '_' ) {
             if ( ! ( info.This()->HasRealNamedProperty( name ) ) )
                 return v8::Undefined();
             return info.This()->GetRealNamedPropertyInPrototypeChain( name );
@@ -499,7 +549,7 @@ namespace mongo {
         return f->Call( info.This() , 1 , argv );
     }
 
-    v8::Handle<v8::Value> dbQueryIndexAccess( unsigned int index , const v8::AccessorInfo& info ){
+    v8::Handle<v8::Value> dbQueryIndexAccess( unsigned int index , const v8::AccessorInfo& info ) {
         v8::Handle<v8::Value> arrayAccess = info.This()->GetPrototype()->ToObject()->Get( v8::String::New( "arrayAccess" ) );
         assert( arrayAccess->IsFunction() );
 
@@ -507,35 +557,36 @@ namespace mongo {
         v8::Handle<v8::Value> argv[1];
         argv[0] = v8::Number::New( index );
 
-        return f->Call( info.This() , 1 , argv );    
+        return f->Call( info.This() , 1 , argv );
     }
 
-    v8::Handle<v8::Value> objectIdInit( const v8::Arguments& args ){
+    v8::Handle<v8::Value> objectIdInit( const v8::Arguments& args ) {
         v8::Handle<v8::Object> it = args.This();
-    
-        if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ){
+
+        if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ) {
             v8::Function * f = getObjectIdCons();
             it = f->NewInstance();
         }
-    
+
         OID oid;
-    
-        if ( args.Length() == 0 ){
+
+        if ( args.Length() == 0 ) {
             oid.init();
         }
         else {
             string s = toSTLString( args[0] );
             try {
                 Scope::validateObjectIdString( s );
-            } catch ( const MsgAssertionException &m ) {
+            }
+            catch ( const MsgAssertionException &m ) {
                 string error = m.toString();
                 return v8::ThrowException( v8::String::New( error.c_str() ) );
-            }            
+            }
             oid.init( s );
-        } 
+        }
 
         it->Set( v8::String::New( "str" ) , v8::String::New( oid.str().c_str() ) );
-   
+
         return it;
     }
 
@@ -547,7 +598,7 @@ namespace mongo {
 
         v8::Handle<v8::Object> it = args.This();
 
-        if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ){
+        if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ) {
             v8::Function* f = getNamedCons( "DBRef" );
             it = f->NewInstance();
         }
@@ -561,74 +612,98 @@ namespace mongo {
     }
 
     v8::Handle<v8::Value> dbPointerInit( const v8::Arguments& args ) {
-        
+
         if (args.Length() != 2) {
             return v8::ThrowException( v8::String::New( "DBPointer needs 2 arguments" ) );
         }
-        
+
         v8::Handle<v8::Object> it = args.This();
-        
-        if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ){
+
+        if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ) {
             v8::Function* f = getNamedCons( "DBPointer" );
             it = f->NewInstance();
         }
-        
+
         it->Set( v8::String::New( "ns" ) , args[0] );
         it->Set( v8::String::New( "id" ) , args[1] );
         it->SetHiddenValue( v8::String::New( "__DBPointer" ), v8::Number::New( 1 ) );
-        
+
+        return it;
+    }
+
+    v8::Handle<v8::Value> dbTimestampInit( const v8::Arguments& args ) {
+
+        v8::Handle<v8::Object> it = args.This();
+
+        if ( args.Length() == 0 ) {
+            it->Set( v8::String::New( "t" ) , v8::Number::New( 0 ) );
+            it->Set( v8::String::New( "i" ) , v8::Number::New( 0 ) );
+        }
+        else if ( args.Length() == 2 ) {
+            it->Set( v8::String::New( "t" ) , args[0] );
+            it->Set( v8::String::New( "i" ) , args[1] );
+        }
+        else {
+            return v8::ThrowException( v8::String::New( "Timestamp needs 0 or 2 arguments" ) );
+        }
+
+        it->SetInternalField( 0, v8::Uint32::New( Timestamp ) );
+
         return it;
     }
 
+
     v8::Handle<v8::Value> binDataInit( const v8::Arguments& args ) {
         v8::Handle<v8::Object> it = args.This();
-        
+
         // 3 args: len, type, data
         if (args.Length() == 3) {
-        
-            if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ){
+
+            if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ) {
                 v8::Function* f = getNamedCons( "BinData" );
                 it = f->NewInstance();
             }
-        
+
             it->Set( v8::String::New( "len" ) , args[0] );
             it->Set( v8::String::New( "type" ) , args[1] );
             it->Set( v8::String::New( "data" ), args[2] );
             it->SetHiddenValue( v8::String::New( "__BinData" ), v8::Number::New( 1 ) );
 
-        // 2 args: type, base64 string
-        } else if ( args.Length() == 2 ) {
-            
-            if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ){
+            // 2 args: type, base64 string
+        }
+        else if ( args.Length() == 2 ) {
+
+            if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ) {
                 v8::Function* f = getNamedCons( "BinData" );
                 it = f->NewInstance();
             }
-            
+
             v8::String::Utf8Value data( args[ 1 ] );
             string decoded = base64::decode( *data );
             it->Set( v8::String::New( "len" ) , v8::Number::New( decoded.length() ) );
             it->Set( v8::String::New( "type" ) , args[ 0 ] );
             it->Set( v8::String::New( "data" ), v8::String::New( decoded.data(), decoded.length() ) );
-            it->SetHiddenValue( v8::String::New( "__BinData" ), v8::Number::New( 1 ) );            
-            
-        } else {
+            it->SetHiddenValue( v8::String::New( "__BinData" ), v8::Number::New( 1 ) );
+
+        }
+        else {
             return v8::ThrowException( v8::String::New( "BinData needs 3 arguments" ) );
         }
 
         return it;
     }
-    
+
     v8::Handle<v8::Value> binDataToString( const v8::Arguments& args ) {
-        
+
         if (args.Length() != 0) {
             return v8::ThrowException( v8::String::New( "toString needs 0 arguments" ) );
         }
-        
+
         v8::Handle<v8::Object> it = args.This();
         int len = it->Get( v8::String::New( "len" ) )->ToInt32()->Value();
         int type = it->Get( v8::String::New( "type" ) )->ToInt32()->Value();
         v8::String::Utf8Value data( it->Get( v8::String::New( "data" ) ) );
-        
+
         stringstream ss;
         ss << "BinData(" << type << ",\"";
         base64::encode( ss, *data, len );
@@ -638,49 +713,54 @@ namespace mongo {
     }
 
     v8::Handle<v8::Value> numberLongInit( const v8::Arguments& args ) {
-        
+
         if (args.Length() != 0 && args.Length() != 1 && args.Length() != 3) {
             return v8::ThrowException( v8::String::New( "NumberLong needs 0, 1 or 3 arguments" ) );
         }
-        
+
         v8::Handle<v8::Object> it = args.This();
-        
-        if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ){
+
+        if ( it->IsUndefined() || it == v8::Context::GetCurrent()->Global() ) {
             v8::Function* f = getNamedCons( "NumberLong" );
             it = f->NewInstance();
         }
 
         if ( args.Length() == 0 ) {
             it->Set( v8::String::New( "floatApprox" ), v8::Number::New( 0 ) );
-        } else if ( args.Length() == 1 ) {
+        }
+        else if ( args.Length() == 1 ) {
             if ( args[ 0 ]->IsNumber() ) {
-                it->Set( v8::String::New( "floatApprox" ), args[ 0 ] );            
-            } else {
+                it->Set( v8::String::New( "floatApprox" ), args[ 0 ] );
+            }
+            else {
                 v8::String::Utf8Value data( args[ 0 ] );
                 string num = *data;
                 const char *numStr = num.c_str();
                 long long n;
                 try {
                     n = parseLL( numStr );
-                } catch ( const AssertionException & ) {
+                }
+                catch ( const AssertionException & ) {
                     return v8::ThrowException( v8::String::New( "could not convert string to long long" ) );
                 }
                 unsigned long long val = n;
                 if ( (long long)val == (long long)(double)(long long)(val) ) {
                     it->Set( v8::String::New( "floatApprox" ), v8::Number::New( (double)(long long)( val ) ) );
-                } else {
+                }
+                else {
                     it->Set( v8::String::New( "floatApprox" ), v8::Number::New( (double)(long long)( val ) ) );
                     it->Set( v8::String::New( "top" ), v8::Integer::New( val >> 32 ) );
                     it->Set( v8::String::New( "bottom" ), v8::Integer::New( (unsigned long)(val & 0x00000000ffffffff) ) );
-                }                
+                }
             }
-        } else {
+        }
+        else {
             it->Set( v8::String::New( "floatApprox" ) , args[0] );
             it->Set( v8::String::New( "top" ) , args[1] );
             it->Set( v8::String::New( "bottom" ) , args[2] );
         }
         it->SetHiddenValue( v8::String::New( "__NumberLong" ), v8::Number::New( 1 ) );
-        
+
         return it;
     }
 
@@ -688,21 +768,21 @@ namespace mongo {
         if ( !it->Has( v8::String::New( "top" ) ) )
             return (long long)( it->Get( v8::String::New( "floatApprox" ) )->NumberValue() );
         return
-        (long long)
-        ( (unsigned long long)( it->Get( v8::String::New( "top" ) )->ToInt32()->Value() ) << 32 ) +
-        (unsigned)( it->Get( v8::String::New( "bottom" ) )->ToInt32()->Value() );        
+            (long long)
+            ( (unsigned long long)( it->Get( v8::String::New( "top" ) )->ToInt32()->Value() ) << 32 ) +
+            (unsigned)( it->Get( v8::String::New( "bottom" ) )->ToInt32()->Value() );
     }
-    
+
     v8::Handle<v8::Value> numberLongValueOf( const v8::Arguments& args ) {
-        
+
         if (args.Length() != 0) {
             return v8::ThrowException( v8::String::New( "toNumber needs 0 arguments" ) );
         }
-        
+
         v8::Handle<v8::Object> it = args.This();
-        
+
         long long val = numberLongVal( it );
-        
+
         return v8::Number::New( double( val ) );
     }
 
@@ -711,13 +791,13 @@ namespace mongo {
     }
 
     v8::Handle<v8::Value> numberLongToString( const v8::Arguments& args ) {
-        
+
         if (args.Length() != 0) {
             return v8::ThrowException( v8::String::New( "toString needs 0 arguments" ) );
         }
-        
+
         v8::Handle<v8::Object> it = args.This();
-        
+
         stringstream ss;
         long long val = numberLongVal( it );
         const long long limit = 2LL << 30;
@@ -730,13 +810,65 @@ namespace mongo {
         string ret = ss.str();
         return v8::String::New( ret.c_str() );
     }
-    
+
     v8::Handle<v8::Value> bsonsize( const v8::Arguments& args ) {
-        
-        if (args.Length() != 1 || !args[ 0 ]->IsObject()) {
-            return v8::ThrowException( v8::String::New( "bonsisze needs 1 object" ) );
-        }
+
+        if ( args.Length() != 1 )
+            return v8::ThrowException( v8::String::New( "bonsisze needs 1 argument" ) );
+
+        if ( args[0]->IsNull() )
+            return v8::Number::New(0);
+
+        if ( ! args[ 0 ]->IsObject() )
+            return v8::ThrowException( v8::String::New( "argument to bonsisze has to be an object" ) );
 
         return v8::Number::New( v8ToMongo( args[ 0 ]->ToObject() ).objsize() );
     }
+
+    // to be called with v8 mutex
+    void enableV8Interrupt() {
+        if ( globalScriptEngine->haveGetInterruptSpecCallback() ) {
+            __interruptSpecToThreadId[ globalScriptEngine->getInterruptSpec() ] = v8::V8::GetCurrentThreadId();
+        }
+    }
+
+    // to be called with v8 mutex
+    void disableV8Interrupt() {
+        if ( globalScriptEngine->haveGetInterruptSpecCallback() ) {
+            __interruptSpecToThreadId.erase( globalScriptEngine->getInterruptSpec() );
+        }
+    }
+
+    namespace v8Locks {
+        boost::mutex& __v8Mutex = *( new boost::mutex );
+        ThreadLocalValue< bool > __locked;
+
+        RecursiveLock::RecursiveLock() : _unlock() {
+            if ( !__locked.get() ) {
+                __v8Mutex.lock();
+                __locked.set( true );
+                _unlock = true;
+            }
+        }
+        RecursiveLock::~RecursiveLock() {
+            if ( _unlock ) {
+                __v8Mutex.unlock();
+                __locked.set( false );
+            }
+        }
+
+        RecursiveUnlock::RecursiveUnlock() : _lock() {
+            if ( __locked.get() ) {
+                __v8Mutex.unlock();
+                __locked.set( false );
+                _lock = true;
+            }
+        }
+        RecursiveUnlock::~RecursiveUnlock() {
+            if ( _lock ) {
+                __v8Mutex.lock();
+                __locked.set( true );
+            }
+        }
+    } // namespace v8Locks
 }
diff --git a/scripting/v8_db.h b/scripting/v8_db.h
index 4bebb32..7dbca92 100644
--- a/scripting/v8_db.h
+++ b/scripting/v8_db.h
@@ -22,43 +22,45 @@
 #include <cstdio>
 #include <cstdlib>
 
+#include "engine.h"
 #include "../client/dbclient.h"
 
 namespace mongo {
 
     // These functions may depend on the caller creating a handle scope and context scope.
-    
+
     v8::Handle<v8::FunctionTemplate> getMongoFunctionTemplate( bool local );
     void installDBTypes( v8::Handle<v8::ObjectTemplate>& global );
     void installDBTypes( v8::Handle<v8::Object>& global );
-    
+
     // the actual globals
-    
+
     mongo::DBClientBase * getConnection( const v8::Arguments& args );
 
     // Mongo members
     v8::Handle<v8::Value> mongoConsLocal(const v8::Arguments& args);
     v8::Handle<v8::Value> mongoConsExternal(const v8::Arguments& args);
-    
+
     v8::Handle<v8::Value> mongoFind(const v8::Arguments& args);
     v8::Handle<v8::Value> mongoInsert(const v8::Arguments& args);
     v8::Handle<v8::Value> mongoRemove(const v8::Arguments& args);
     v8::Handle<v8::Value> mongoUpdate(const v8::Arguments& args);
-    
-    
+
+
     v8::Handle<v8::Value> internalCursorCons(const v8::Arguments& args);
     v8::Handle<v8::Value> internalCursorNext(const v8::Arguments& args);
     v8::Handle<v8::Value> internalCursorHasNext(const v8::Arguments& args);
     v8::Handle<v8::Value> internalCursorObjsLeftInBatch(const v8::Arguments& args);
-    
+
     // DB members
-    
+
     v8::Handle<v8::Value> dbInit(const v8::Arguments& args);
     v8::Handle<v8::Value> collectionInit( const v8::Arguments& args );
     v8::Handle<v8::Value> objectIdInit( const v8::Arguments& args );
 
     v8::Handle<v8::Value> dbRefInit( const v8::Arguments& args );
     v8::Handle<v8::Value> dbPointerInit( const v8::Arguments& args );
+    v8::Handle<v8::Value> dbTimestampInit( const v8::Arguments& args );
 
     v8::Handle<v8::Value> binDataInit( const v8::Arguments& args );
     v8::Handle<v8::Value> binDataToString( const v8::Arguments& args );
@@ -67,12 +69,82 @@ namespace mongo {
     v8::Handle<v8::Value> numberLongToNumber(const v8::Arguments& args);
     v8::Handle<v8::Value> numberLongValueOf(const v8::Arguments& args);
     v8::Handle<v8::Value> numberLongToString(const v8::Arguments& args);
-    
+
     v8::Handle<v8::Value> dbQueryInit( const v8::Arguments& args );
     v8::Handle<v8::Value> dbQueryIndexAccess( uint32_t index , const v8::AccessorInfo& info );
-    
+
     v8::Handle<v8::Value> collectionFallback( v8::Local<v8::String> name, const v8::AccessorInfo &info);
 
     v8::Handle<v8::Value> bsonsize( const v8::Arguments& args );
 
+    // call with v8 mutex:
+    void enableV8Interrupt();
+    void disableV8Interrupt();
+
+    // The implementation below assumes that SERVER-1816 has been fixed - in
+    // particular, interrupted() must return true if an interrupt was ever
+    // sent; currently that is not the case if a new killop overwrites the data
+    // for an old one
+    template < v8::Handle< v8::Value > ( *f ) ( const v8::Arguments& ) >
+    v8::Handle< v8::Value > v8Callback( const v8::Arguments &args ) {
+        disableV8Interrupt(); // we don't want to have to audit all v8 calls for termination exceptions, so we don't allow these exceptions during the callback
+        if ( globalScriptEngine->interrupted() ) {
+            v8::V8::TerminateExecution(); // experimentally it seems that TerminateExecution() will override the return value
+            return v8::Undefined();
+        }
+        v8::Handle< v8::Value > ret;
+        string exception;
+        try {
+            ret = f( args );
+        }
+        catch( const std::exception &e ) {
+            exception = e.what();
+        }
+        catch( ... ) {
+            exception = "unknown exception";
+        }
+        enableV8Interrupt();
+        if ( globalScriptEngine->interrupted() ) {
+            v8::V8::TerminateExecution();
+            return v8::Undefined();
+        }
+        if ( !exception.empty() ) {
+            // technically, ThrowException is supposed to be the last v8 call before returning
+            ret = v8::ThrowException( v8::String::New( exception.c_str() ) );
+        }
+        return ret;
+    }
+
+    template < v8::Handle< v8::Value > ( *f ) ( const v8::Arguments& ) >
+    v8::Local< v8::FunctionTemplate > newV8Function() {
+        return v8::FunctionTemplate::New( v8Callback< f > );
+    }
+
+    // Preemption is going to be allowed for the v8 mutex, and some of our v8
+    // usage is not preemption safe.  So we are using an additional mutex that
+    // will not be preempted.  The V8Lock should be used in place of v8::Locker
+    // except in certain special cases involving interrupts.
+    namespace v8Locks {
+        // the implementations are quite simple - objects must be destroyed in
+        // reverse of the order created, and should not be shared between threads
+        struct RecursiveLock {
+            RecursiveLock();
+            ~RecursiveLock();
+            bool _unlock;
+        };
+        struct RecursiveUnlock {
+            RecursiveUnlock();
+            ~RecursiveUnlock();
+            bool _lock;
+        };
+    } // namespace v8Locks
+    class V8Lock {
+        v8Locks::RecursiveLock _noPreemptionLock;
+        v8::Locker _preemptionLock;
+    };
+    struct V8Unlock {
+        v8::Unlocker _preemptionUnlock;
+        v8Locks::RecursiveUnlock _noPreemptionUnlock;
+    };
 }
+
diff --git a/scripting/v8_utils.cpp b/scripting/v8_utils.cpp
index 5a07a80..171ced5 100644
--- a/scripting/v8_utils.cpp
+++ b/scripting/v8_utils.cpp
@@ -16,6 +16,7 @@
  */
 
 #include "v8_utils.h"
+#include "v8_db.h"
 #include <iostream>
 #include <map>
 #include <sstream>
@@ -38,7 +39,8 @@ namespace mongo {
             HandleScope handle_scope;
             if (first) {
                 first = false;
-            } else {
+            }
+            else {
                 printf(" ");
             }
             v8::String::Utf8Value str(args[i]);
@@ -48,66 +50,66 @@ namespace mongo {
         return v8::Undefined();
     }
 
-    std::string toSTLString( const Handle<v8::Value> & o ){
-        v8::String::Utf8Value str(o);    
+    std::string toSTLString( const Handle<v8::Value> & o ) {
+        v8::String::Utf8Value str(o);
         const char * foo = *str;
         std::string s(foo);
         return s;
     }
 
-    std::string toSTLString( const v8::TryCatch * try_catch ){
-        
+    std::string toSTLString( const v8::TryCatch * try_catch ) {
+
         stringstream ss;
-        
+
         //while ( try_catch ){ // disabled for v8 bleeding edge
-            
-            v8::String::Utf8Value exception(try_catch->Exception());
-            Handle<v8::Message> message = try_catch->Message();
-            
-            if (message.IsEmpty()) {
-                ss << *exception << endl;
-            } 
-            else {
-                
-                v8::String::Utf8Value filename(message->GetScriptResourceName());
-                int linenum = message->GetLineNumber();
-                ss << *filename << ":" << linenum << " " << *exception << endl;
-                
-                v8::String::Utf8Value sourceline(message->GetSourceLine());
-                ss << *sourceline << endl;
-                
-                int start = message->GetStartColumn();
-                for (int i = 0; i < start; i++)
-                    ss << " ";
-                
-                int end = message->GetEndColumn();
-                for (int i = start; i < end; i++)
-                    ss << "^";
-                
-                ss << endl;
-            }    
-            
-            //try_catch = try_catch->next_;
+
+        v8::String::Utf8Value exception(try_catch->Exception());
+        Handle<v8::Message> message = try_catch->Message();
+
+        if (message.IsEmpty()) {
+            ss << *exception << endl;
+        }
+        else {
+
+            v8::String::Utf8Value filename(message->GetScriptResourceName());
+            int linenum = message->GetLineNumber();
+            ss << *filename << ":" << linenum << " " << *exception << endl;
+
+            v8::String::Utf8Value sourceline(message->GetSourceLine());
+            ss << *sourceline << endl;
+
+            int start = message->GetStartColumn();
+            for (int i = 0; i < start; i++)
+                ss << " ";
+
+            int end = message->GetEndColumn();
+            for (int i = start; i < end; i++)
+                ss << "^";
+
+            ss << endl;
+        }
+
+        //try_catch = try_catch->next_;
         //}
-        
+
         return ss.str();
     }
 
 
-    std::ostream& operator<<( std::ostream &s, const Handle<v8::Value> & o ){
-        v8::String::Utf8Value str(o);    
+    std::ostream& operator<<( std::ostream &s, const Handle<v8::Value> & o ) {
+        v8::String::Utf8Value str(o);
         s << *str;
         return s;
     }
 
-    std::ostream& operator<<( std::ostream &s, const v8::TryCatch * try_catch ){
+    std::ostream& operator<<( std::ostream &s, const v8::TryCatch * try_catch ) {
         HandleScope handle_scope;
         v8::String::Utf8Value exception(try_catch->Exception());
         Handle<v8::Message> message = try_catch->Message();
-    
+
         if (message.IsEmpty()) {
             s << *exception << endl;
-        } 
+        }
         else {
 
             v8::String::Utf8Value filename(message->GetScriptResourceName());
@@ -126,7 +128,7 @@ namespace mongo {
                 cout << "^";
 
             cout << endl;
-        }    
+        }
 
         //if ( try_catch->next_ ) // disabled for v8 bleeding edge
         //    s << try_catch->next_;
@@ -143,9 +145,9 @@ namespace mongo {
     void ReportException(v8::TryCatch* try_catch) {
         cout << try_catch << endl;
     }
-    
+
     Handle< Context > baseContext_;
-    
+
     class JSThreadConfig {
     public:
         JSThreadConfig( const Arguments &args, bool newScope = false ) : started_(), done_(), newScope_( newScope ) {
@@ -170,7 +172,7 @@ namespace mongo {
         }
         void join() {
             jsassert( started_ && !done_, "Thread not running" );
-            Unlocker u;
+            V8Unlock u;
             thread_->join();
             done_ = true;
         }
@@ -184,7 +186,7 @@ namespace mongo {
         public:
             JSThread( JSThreadConfig &config ) : config_( config ) {}
             void operator()() {
-                Locker l;
+                V8Lock l;
                 HandleScope handle_scope;
                 Handle< Context > context;
                 Handle< v8::Function > fun;
@@ -198,7 +200,8 @@ namespace mongo {
                     string fCode = toSTLString( config_.f_->ToString() );
                     Context::Scope context_scope( context );
                     fun = scope->__createFunction( fCode.c_str() );
-                } else {
+                }
+                else {
                     context = baseContext_;
                     Context::Scope context_scope( context );
                     fun = config_.f_;
@@ -220,7 +223,7 @@ namespace mongo {
         private:
             JSThreadConfig &config_;
         };
-        
+
         bool started_;
         bool done_;
         bool newScope_;
@@ -229,7 +232,7 @@ namespace mongo {
         auto_ptr< boost::thread > thread_;
         Persistent< Value > returnData_;
     };
-    
+
     Handle< Value > ThreadInit( const Arguments &args ) {
         Handle<v8::Object> it = args.This();
         // NOTE I believe the passed JSThreadConfig will never be freed.  If this
@@ -238,7 +241,7 @@ namespace mongo {
         it->SetHiddenValue( v8::String::New( "_JSThreadConfig" ), External::New( new JSThreadConfig( args ) ) );
         return v8::Undefined();
     }
-    
+
     Handle< Value > ScopedThreadInit( const Arguments &args ) {
         Handle<v8::Object> it = args.This();
         // NOTE I believe the passed JSThreadConfig will never be freed.  If this
@@ -253,17 +256,17 @@ namespace mongo {
         JSThreadConfig *config = (JSThreadConfig *)( c->Value() );
         return config;
     }
-    
+
     Handle< Value > ThreadStart( const Arguments &args ) {
         thisConfig( args )->start();
         return v8::Undefined();
     }
-    
+
     Handle< Value > ThreadJoin( const Arguments &args ) {
         thisConfig( args )->join();
         return v8::Undefined();
     }
-    
+
     Handle< Value > ThreadReturnData( const Arguments &args ) {
         HandleScope handle_scope;
         return handle_scope.Close( thisConfig( args )->returnData() );
@@ -272,39 +275,39 @@ namespace mongo {
     Handle< Value > ThreadInject( const Arguments &args ) {
         jsassert( args.Length() == 1 , "threadInject takes exactly 1 argument" );
         jsassert( args[0]->IsObject() , "threadInject needs to be passed a prototype" );
-        
+
         Local<v8::Object> o = args[0]->ToObject();
-        
-        o->Set( v8::String::New( "init" ) , FunctionTemplate::New( ThreadInit )->GetFunction() );
-        o->Set( v8::String::New( "start" ) , FunctionTemplate::New( ThreadStart )->GetFunction() );
-        o->Set( v8::String::New( "join" ) , FunctionTemplate::New( ThreadJoin )->GetFunction() );
-        o->Set( v8::String::New( "returnData" ) , FunctionTemplate::New( ThreadReturnData )->GetFunction() );
-        
-        return v8::Undefined();    
+
+        o->Set( v8::String::New( "init" ) , newV8Function< ThreadInit >()->GetFunction() );
+        o->Set( v8::String::New( "start" ) , newV8Function< ThreadStart >()->GetFunction() );
+        o->Set( v8::String::New( "join" ) , newV8Function< ThreadJoin >()->GetFunction() );
+        o->Set( v8::String::New( "returnData" ) , newV8Function< ThreadReturnData >()->GetFunction() );
+
+        return v8::Undefined();
     }
 
     Handle< Value > ScopedThreadInject( const Arguments &args ) {
         jsassert( args.Length() == 1 , "threadInject takes exactly 1 argument" );
         jsassert( args[0]->IsObject() , "threadInject needs to be passed a prototype" );
-        
+
         Local<v8::Object> o = args[0]->ToObject();
-        
-        o->Set( v8::String::New( "init" ) , FunctionTemplate::New( ScopedThreadInit )->GetFunction() );
+
+        o->Set( v8::String::New( "init" ) , newV8Function< ScopedThreadInit >()->GetFunction() );
         // inheritance takes care of other member functions
-        
+
         return v8::Undefined();
     }
-    
+
     void installFork( v8::Handle< v8::Object > &global, v8::Handle< v8::Context > &context ) {
         if ( baseContext_.IsEmpty() ) // if this is the shell, first call will be with shell context, otherwise don't expect to use fork() anyway
             baseContext_ = context;
-        global->Set( v8::String::New( "_threadInject" ), FunctionTemplate::New( ThreadInject )->GetFunction() );
-        global->Set( v8::String::New( "_scopedThreadInject" ), FunctionTemplate::New( ScopedThreadInject )->GetFunction() );
+        global->Set( v8::String::New( "_threadInject" ), newV8Function< ThreadInject >()->GetFunction() );
+        global->Set( v8::String::New( "_scopedThreadInject" ), newV8Function< ScopedThreadInject >()->GetFunction() );
     }
 
     Handle<v8::Value> GCV8(const Arguments& args) {
-        Locker l;
-        while( V8::IdleNotification() );
+        V8Lock l;
+        while( !V8::IdleNotification() );
         return v8::Undefined();
     }
 
diff --git a/scripting/v8_utils.h b/scripting/v8_utils.h
index bc4b524..40662d2 100644
--- a/scripting/v8_utils.h
+++ b/scripting/v8_utils.h
@@ -32,9 +32,9 @@ namespace mongo {
     v8::Handle<v8::Value> GCV8(const v8::Arguments& args);
 
     void ReportException(v8::TryCatch* handler);
-    
+
 #define jsassert(x,msg) assert(x)
-    
+
     std::ostream& operator<<( std::ostream &s, const v8::Handle<v8::Value> & o );
     std::ostream& operator<<( std::ostream &s, const v8::Handle<v8::TryCatch> * try_catch );
 
diff --git a/scripting/v8_wrapper.cpp b/scripting/v8_wrapper.cpp
index 0e71c9a..ff67e8c 100644
--- a/scripting/v8_wrapper.cpp
+++ b/scripting/v8_wrapper.cpp
@@ -17,6 +17,7 @@
 
 #include "v8_wrapper.h"
 #include "v8_utils.h"
+#include "v8_db.h"
 
 #include <iostream>
 
@@ -38,17 +39,17 @@ namespace mongo {
         cout << "cannot delete from read-only object" << endl;
         return Boolean::New( false );
     }
-    
+
     Handle<Value> IndexedReadOnlySet( uint32_t index, Local<Value> value, const AccessorInfo& info ) {
         cout << "cannot write to read-only array" << endl;
         return value;
     }
-    
+
     Handle<Boolean> IndexedReadOnlyDelete( uint32_t index, const AccessorInfo& info ) {
         cout << "cannot delete from read-only array" << endl;
         return Boolean::New( false );
     }
-    
+
     Local< v8::Value > newFunction( const char *code ) {
         stringstream codeSS;
         codeSS << "____MontoToV8_newFunction_temp = " << code;
@@ -57,15 +58,15 @@ namespace mongo {
         Local< Value > ret = compiled->Run();
         return ret;
     }
-    
+
     Local< v8::Value > newId( const OID &id ) {
         v8::Function * idCons = getObjectIdCons();
         v8::Handle<v8::Value> argv[1];
         argv[0] = v8::String::New( id.str().c_str() );
-        return idCons->NewInstance( 1 , argv );        
+        return idCons->NewInstance( 1 , argv );
     }
-    
-    Local<v8::Object> mongoToV8( const BSONObj& m , bool array, bool readOnly ){
+
+    Local<v8::Object> mongoToV8( const BSONObj& m , bool array, bool readOnly ) {
 
         Local<v8::Object> o;
 
@@ -86,13 +87,16 @@ namespace mongo {
 
         if ( !o.IsEmpty() ) {
             readOnly = false;
-        } else if ( array ) {
+        }
+        else if ( array ) {
             // NOTE Looks like it's impossible to add interceptors to v8 arrays.
             readOnly = false;
             o = v8::Array::New();
-        } else if ( !readOnly ) {
+        }
+        else if ( !readOnly ) {
             o = v8::Object::New();
-        } else {
+        }
+        else {
             // NOTE Our readOnly implemention relies on undocumented ObjectTemplate
             // functionality that may be fragile, but it still seems like the best option
             // for now -- fwiw, the v8 docs are pretty sparse.  I've determined experimentally
@@ -115,15 +119,15 @@ namespace mongo {
             readOnlyObjects->SetIndexedPropertyHandler( 0 );
             o = readOnlyObjects->NewInstance();
         }
-        
+
         mongo::BSONObj sub;
 
         for ( BSONObjIterator i(m); i.more(); ) {
             const BSONElement& f = i.next();
-        
+
             Local<Value> v;
-        
-            switch ( f.type() ){
+
+            switch ( f.type() ) {
 
             case mongo::Code:
                 o->Set( v8::String::New( f.fieldName() ), newFunction( f.valuestr() ) );
@@ -134,31 +138,31 @@ namespace mongo {
                     log() << "warning: CodeWScope doesn't transfer to db.eval" << endl;
                 o->Set( v8::String::New( f.fieldName() ), newFunction( f.codeWScopeCode() ) );
                 break;
-            
-            case mongo::String: 
+
+            case mongo::String:
                 o->Set( v8::String::New( f.fieldName() ) , v8::String::New( f.valuestr() ) );
                 break;
-            
+
             case mongo::jstOID: {
                 v8::Function * idCons = getObjectIdCons();
                 v8::Handle<v8::Value> argv[1];
                 argv[0] = v8::String::New( f.__oid().str().c_str() );
-                o->Set( v8::String::New( f.fieldName() ) , 
-                            idCons->NewInstance( 1 , argv ) );
+                o->Set( v8::String::New( f.fieldName() ) ,
+                        idCons->NewInstance( 1 , argv ) );
                 break;
             }
-            
+
             case mongo::NumberDouble:
             case mongo::NumberInt:
                 o->Set( v8::String::New( f.fieldName() ) , v8::Number::New( f.number() ) );
                 break;
-            
+
             case mongo::Array:
             case mongo::Object:
                 sub = f.embeddedObject();
                 o->Set( v8::String::New( f.fieldName() ) , mongoToV8( sub , f.type() == mongo::Array, readOnly ) );
                 break;
-            
+
             case mongo::Date:
                 o->Set( v8::String::New( f.fieldName() ) , v8::Date::New( f.date() ) );
                 break;
@@ -166,29 +170,29 @@ namespace mongo {
             case mongo::Bool:
                 o->Set( v8::String::New( f.fieldName() ) , v8::Boolean::New( f.boolean() ) );
                 break;
-            
+
             case mongo::jstNULL:
             case mongo::Undefined: // duplicate sm behavior
                 o->Set( v8::String::New( f.fieldName() ) , v8::Null() );
                 break;
-            
+
             case mongo::RegEx: {
                 v8::Function * regex = getNamedCons( "RegExp" );
-            
+
                 v8::Handle<v8::Value> argv[2];
                 argv[0] = v8::String::New( f.regex() );
                 argv[1] = v8::String::New( f.regexFlags() );
-            
+
                 o->Set( v8::String::New( f.fieldName() ) , regex->NewInstance( 2 , argv ) );
                 break;
             }
-            
+
             case mongo::BinData: {
                 Local<v8::Object> b = readOnly ? readOnlyObjects->NewInstance() : internalFieldObjects->NewInstance();
 
                 int len;
                 const char *data = f.binData( len );
-            
+
                 v8::Function* binData = getNamedCons( "BinData" );
                 v8::Handle<v8::Value> argv[3];
                 argv[0] = v8::Number::New( len );
@@ -197,36 +201,37 @@ namespace mongo {
                 o->Set( v8::String::New( f.fieldName() ), binData->NewInstance(3, argv) );
                 break;
             }
-            
+
             case mongo::Timestamp: {
                 Local<v8::Object> sub = readOnly ? readOnlyObjects->NewInstance() : internalFieldObjects->NewInstance();
-                
+
                 sub->Set( v8::String::New( "t" ) , v8::Number::New( f.timestampTime() ) );
                 sub->Set( v8::String::New( "i" ) , v8::Number::New( f.timestampInc() ) );
                 sub->SetInternalField( 0, v8::Uint32::New( f.type() ) );
-                
+
                 o->Set( v8::String::New( f.fieldName() ) , sub );
                 break;
             }
-            
+
             case mongo::NumberLong: {
                 Local<v8::Object> sub = readOnly ? readOnlyObjects->NewInstance() : internalFieldObjects->NewInstance();
                 unsigned long long val = f.numberLong();
                 v8::Function* numberLong = getNamedCons( "NumberLong" );
                 if ( (long long)val == (long long)(double)(long long)(val) ) {
-                    v8::Handle<v8::Value> argv[1];                
+                    v8::Handle<v8::Value> argv[1];
                     argv[0] = v8::Number::New( (double)(long long)( val ) );
                     o->Set( v8::String::New( f.fieldName() ), numberLong->NewInstance( 1, argv ) );
-                } else {                    
+                }
+                else {
                     v8::Handle<v8::Value> argv[3];
                     argv[0] = v8::Number::New( (double)(long long)(val) );
                     argv[1] = v8::Integer::New( val >> 32 );
                     argv[2] = v8::Integer::New( (unsigned long)(val & 0x00000000ffffffff) );
                     o->Set( v8::String::New( f.fieldName() ), numberLong->NewInstance(3, argv) );
                 }
-                break;                
+                break;
             }
-                    
+
             case mongo::MinKey: {
                 Local<v8::Object> sub = readOnly ? readOnlyObjects->NewInstance() : internalFieldObjects->NewInstance();
                 sub->Set( v8::String::New( "$MinKey" ), v8::Boolean::New( true ) );
@@ -234,7 +239,7 @@ namespace mongo {
                 o->Set( v8::String::New( f.fieldName() ) , sub );
                 break;
             }
-                    
+
             case mongo::MaxKey: {
                 Local<v8::Object> sub = readOnly ? readOnlyObjects->NewInstance() : internalFieldObjects->NewInstance();
                 sub->Set( v8::String::New( "$MaxKey" ), v8::Boolean::New( true ) );
@@ -251,7 +256,7 @@ namespace mongo {
                 o->Set( v8::String::New( f.fieldName() ), dbPointer->NewInstance(2, argv) );
                 break;
             }
-                    
+
             default:
                 cout << "can't handle type: ";
                 cout  << f.type() << " ";
@@ -259,14 +264,14 @@ namespace mongo {
                 cout  << endl;
                 break;
             }
-        
+
         }
 
         if ( readOnly ) {
             readOnlyObjects->SetNamedPropertyHandler( 0, NamedReadOnlySet, 0, NamedReadOnlyDelete );
-            readOnlyObjects->SetIndexedPropertyHandler( 0, IndexedReadOnlySet, 0, IndexedReadOnlyDelete );            
+            readOnlyObjects->SetIndexedPropertyHandler( 0, IndexedReadOnlySet, 0, IndexedReadOnlyDelete );
         }
-        
+
         return o;
     }
 
@@ -274,56 +279,56 @@ namespace mongo {
         Local< v8::ObjectTemplate > internalFieldObjects = v8::ObjectTemplate::New();
         internalFieldObjects->SetInternalFieldCount( 1 );
 
-        switch ( f.type() ){
+        switch ( f.type() ) {
 
         case mongo::Code:
             return newFunction( f.valuestr() );
-                
+
         case CodeWScope:
             if ( f.codeWScopeObject().isEmpty() )
                 log() << "warning: CodeWScope doesn't transfer to db.eval" << endl;
             return newFunction( f.codeWScopeCode() );
-                
-        case mongo::String: 
+
+        case mongo::String:
             return v8::String::New( f.valuestr() );
-            
+
         case mongo::jstOID:
             return newId( f.__oid() );
-            
+
         case mongo::NumberDouble:
         case mongo::NumberInt:
             return v8::Number::New( f.number() );
-            
+
         case mongo::Array:
         case mongo::Object:
             return mongoToV8( f.embeddedObject() , f.type() == mongo::Array );
-            
+
         case mongo::Date:
             return v8::Date::New( f.date() );
-            
+
         case mongo::Bool:
             return v8::Boolean::New( f.boolean() );
 
-        case mongo::EOO:            
+        case mongo::EOO:
         case mongo::jstNULL:
         case mongo::Undefined: // duplicate sm behavior
             return v8::Null();
-            
+
         case mongo::RegEx: {
             v8::Function * regex = getNamedCons( "RegExp" );
-            
+
             v8::Handle<v8::Value> argv[2];
             argv[0] = v8::String::New( f.regex() );
             argv[1] = v8::String::New( f.regexFlags() );
-            
+
             return regex->NewInstance( 2 , argv );
             break;
         }
-            
+
         case mongo::BinData: {
             int len;
             const char *data = f.binData( len );
-            
+
             v8::Function* binData = getNamedCons( "BinData" );
             v8::Handle<v8::Value> argv[3];
             argv[0] = v8::Number::New( len );
@@ -331,26 +336,27 @@ namespace mongo {
             argv[2] = v8::String::New( data, len );
             return binData->NewInstance( 3, argv );
         };
-            
+
         case mongo::Timestamp: {
             Local<v8::Object> sub = internalFieldObjects->NewInstance();
-            
+
             sub->Set( v8::String::New( "t" ) , v8::Number::New( f.timestampTime() ) );
             sub->Set( v8::String::New( "i" ) , v8::Number::New( f.timestampInc() ) );
             sub->SetInternalField( 0, v8::Uint32::New( f.type() ) );
 
             return sub;
         }
-                
+
         case mongo::NumberLong: {
             Local<v8::Object> sub = internalFieldObjects->NewInstance();
             unsigned long long val = f.numberLong();
             v8::Function* numberLong = getNamedCons( "NumberLong" );
             if ( (long long)val == (long long)(double)(long long)(val) ) {
-                v8::Handle<v8::Value> argv[1];                
+                v8::Handle<v8::Value> argv[1];
                 argv[0] = v8::Number::New( (double)(long long)( val ) );
                 return numberLong->NewInstance( 1, argv );
-            } else {
+            }
+            else {
                 v8::Handle<v8::Value> argv[3];
                 argv[0] = v8::Number::New( (double)(long long)( val ) );
                 argv[1] = v8::Integer::New( val >> 32 );
@@ -358,21 +364,21 @@ namespace mongo {
                 return numberLong->NewInstance( 3, argv );
             }
         }
-            
+
         case mongo::MinKey: {
             Local<v8::Object> sub = internalFieldObjects->NewInstance();
             sub->Set( v8::String::New( "$MinKey" ), v8::Boolean::New( true ) );
             sub->SetInternalField( 0, v8::Uint32::New( f.type() ) );
             return sub;
         }
-            
+
         case mongo::MaxKey: {
             Local<v8::Object> sub = internalFieldObjects->NewInstance();
             sub->Set( v8::String::New( "$MaxKey" ), v8::Boolean::New( true ) );
             sub->SetInternalField( 0, v8::Uint32::New( f.type() ) );
             return sub;
         }
-                
+
         case mongo::DBRef: {
             v8::Function* dbPointer = getNamedCons( "DBPointer" );
             v8::Handle<v8::Value> argv[2];
@@ -380,83 +386,83 @@ namespace mongo {
             argv[1] = newId( f.dbrefOID() );
             return dbPointer->NewInstance(2, argv);
         }
-                       
+
         default:
             cout << "can't handle type: ";
-			cout  << f.type() << " ";
-			cout  << f.toString();
-			cout  << endl;
+            cout  << f.type() << " ";
+            cout  << f.toString();
+            cout  << endl;
             break;
-        }    
-        
+        }
+
         return v8::Undefined();
     }
 
-    void v8ToMongoElement( BSONObjBuilder & b , v8::Handle<v8::String> name , const string sname , v8::Handle<v8::Value> value , int depth ){
-        
-        if ( value->IsString() ){
+    void v8ToMongoElement( BSONObjBuilder & b , v8::Handle<v8::String> name , const string sname , v8::Handle<v8::Value> value , int depth ) {
+
+        if ( value->IsString() ) {
             b.append( sname , toSTLString( value ).c_str() );
             return;
         }
-        
-        if ( value->IsFunction() ){
-            b.appendCode( sname , toSTLString( value ).c_str() );
+
+        if ( value->IsFunction() ) {
+            b.appendCode( sname , toSTLString( value ) );
             return;
         }
-    
-        if ( value->IsNumber() ){
+
+        if ( value->IsNumber() ) {
             if ( value->IsInt32() )
                 b.append( sname, int( value->ToInt32()->Value() ) );
             else
                 b.append( sname , value->ToNumber()->Value() );
             return;
         }
-    
-        if ( value->IsArray() ){
+
+        if ( value->IsArray() ) {
             BSONObj sub = v8ToMongo( value->ToObject() , depth );
             b.appendArray( sname , sub );
             return;
         }
-    
-        if ( value->IsDate() ){
+
+        if ( value->IsDate() ) {
             b.appendDate( sname , Date_t( (unsigned long long)(v8::Date::Cast( *value )->NumberValue())) );
             return;
         }
 
         if ( value->IsExternal() )
             return;
-        
-        if ( value->IsObject() ){
+
+        if ( value->IsObject() ) {
             // The user could potentially modify the fields of these special objects,
             // wreaking havoc when we attempt to reinterpret them.  Not doing any validation
             // for now...
             Local< v8::Object > obj = value->ToObject();
             if ( obj->InternalFieldCount() && obj->GetInternalField( 0 )->IsNumber() ) {
                 switch( obj->GetInternalField( 0 )->ToInt32()->Value() ) { // NOTE Uint32's Value() gave me a linking error, so going with this instead
-                    case Timestamp:
-                        b.appendTimestamp( sname,
-                                           Date_t( (unsigned long long)(obj->Get( v8::String::New( "t" ) )->ToNumber()->Value() )),
-                                           obj->Get( v8::String::New( "i" ) )->ToInt32()->Value() );
-                        return;
-                    case MinKey:
-                        b.appendMinKey( sname );
-                        return;
-                    case MaxKey:
-                        b.appendMaxKey( sname );
-                        return;
-                    default:
-                        assert( "invalid internal field" == 0 );
+                case Timestamp:
+                    b.appendTimestamp( sname,
+                                       Date_t( (unsigned long long)(obj->Get( v8::String::New( "t" ) )->ToNumber()->Value() )),
+                                       obj->Get( v8::String::New( "i" ) )->ToInt32()->Value() );
+                    return;
+                case MinKey:
+                    b.appendMinKey( sname );
+                    return;
+                case MaxKey:
+                    b.appendMaxKey( sname );
+                    return;
+                default:
+                    assert( "invalid internal field" == 0 );
                 }
             }
             string s = toSTLString( value );
-            if ( s.size() && s[0] == '/' ){
+            if ( s.size() && s[0] == '/' ) {
                 s = s.substr( 1 );
                 string r = s.substr( 0 , s.rfind( "/" ) );
                 string o = s.substr( s.rfind( "/" ) + 1 );
-                b.appendRegex( sname , r.c_str() , o.c_str() );
+                b.appendRegex( sname , r , o );
             }
             else if ( value->ToObject()->GetPrototype()->IsObject() &&
-                      value->ToObject()->GetPrototype()->ToObject()->HasRealNamedProperty( v8::String::New( "isObjectId" ) ) ){
+                      value->ToObject()->GetPrototype()->ToObject()->HasRealNamedProperty( v8::String::New( "isObjectId" ) ) ) {
                 OID oid;
                 oid.init( toSTLString( value ) );
                 b.appendOID( sname , &oid );
@@ -469,19 +475,20 @@ namespace mongo {
                 long long val;
                 if ( !it->Has( v8::String::New( "top" ) ) ) {
                     val = (long long)( it->Get( v8::String::New( "floatApprox" ) )->NumberValue() );
-                } else {
+                }
+                else {
                     val = (long long)
-                    ( (unsigned long long)( it->Get( v8::String::New( "top" ) )->ToInt32()->Value() ) << 32 ) +
-                    (unsigned)( it->Get( v8::String::New( "bottom" ) )->ToInt32()->Value() );        
+                          ( (unsigned long long)( it->Get( v8::String::New( "top" ) )->ToInt32()->Value() ) << 32 ) +
+                          (unsigned)( it->Get( v8::String::New( "bottom" ) )->ToInt32()->Value() );
                 }
-                
+
                 b.append( sname, val );
             }
             else if ( !value->ToObject()->GetHiddenValue( v8::String::New( "__DBPointer" ) ).IsEmpty() ) {
                 OID oid;
                 oid.init( toSTLString( value->ToObject()->Get( v8::String::New( "id" ) ) ) );
                 string ns = toSTLString( value->ToObject()->Get( v8::String::New( "ns" ) ) );
-                b.appendDBRef( sname, ns.c_str(), oid );                
+                b.appendDBRef( sname, ns, oid );
             }
             else if ( !value->ToObject()->GetHiddenValue( v8::String::New( "__BinData" ) ).IsEmpty() ) {
                 int len = obj->Get( v8::String::New( "len" ) )->ToInt32()->Value();
@@ -489,27 +496,28 @@ namespace mongo {
                 const char *dataArray = *data;
                 assert( data.length() == len );
                 b.appendBinData( sname,
-                                len,
-                                mongo::BinDataType( obj->Get( v8::String::New( "type" ) )->ToInt32()->Value() ),
-                                dataArray );
-            } else {
+                                 len,
+                                 mongo::BinDataType( obj->Get( v8::String::New( "type" ) )->ToInt32()->Value() ),
+                                 dataArray );
+            }
+            else {
                 BSONObj sub = v8ToMongo( value->ToObject() , depth );
                 b.append( sname , sub );
             }
             return;
         }
-    
-        if ( value->IsBoolean() ){
+
+        if ( value->IsBoolean() ) {
             b.appendBool( sname , value->ToBoolean()->Value() );
             return;
         }
-    
-        else if ( value->IsUndefined() ){
+
+        else if ( value->IsUndefined() ) {
             b.appendUndefined( sname );
             return;
         }
-    
-        else if ( value->IsNull() ){
+
+        else if ( value->IsNull() ) {
             b.appendNull( sname );
             return;
         }
@@ -517,26 +525,26 @@ namespace mongo {
         cout << "don't know how to convert to mongo field [" << name << "]\t" << value << endl;
     }
 
-    BSONObj v8ToMongo( v8::Handle<v8::Object> o , int depth ){
+    BSONObj v8ToMongo( v8::Handle<v8::Object> o , int depth ) {
         BSONObjBuilder b;
-        
-        if ( depth == 0 ){
+
+        if ( depth == 0 ) {
             v8::Handle<v8::String> idName = v8::String::New( "_id" );
-            if ( o->HasRealNamedProperty( idName ) ){
+            if ( o->HasRealNamedProperty( idName ) ) {
                 v8ToMongoElement( b , idName , "_id" , o->Get( idName ) );
             }
         }
-    
+
         Local<v8::Array> names = o->GetPropertyNames();
-        for ( unsigned int i=0; i<names->Length(); i++ ){
+        for ( unsigned int i=0; i<names->Length(); i++ ) {
             v8::Local<v8::String> name = names->Get(v8::Integer::New(i) )->ToString();
 
             if ( o->GetPrototype()->IsObject() &&
-                 o->GetPrototype()->ToObject()->HasRealNamedProperty( name ) )
+                    o->GetPrototype()->ToObject()->HasRealNamedProperty( name ) )
                 continue;
-        
+
             v8::Local<v8::Value> value = o->Get( name );
-        
+
             const string sname = toSTLString( name );
             if ( depth == 0 && sname == "_id" )
                 continue;
@@ -553,15 +561,15 @@ namespace mongo {
         WrapperHolder( const BSONObj * o , bool readOnly , bool iDelete )
             : _o(o), _readOnly( readOnly ), _iDelete( iDelete ) {
         }
-        
-        ~WrapperHolder(){
-            if ( _o && _iDelete ){
+
+        ~WrapperHolder() {
+            if ( _o && _iDelete ) {
                 delete _o;
             }
             _o = 0;
         }
 
-        v8::Handle<v8::Value> get( v8::Local<v8::String> name ){
+        v8::Handle<v8::Value> get( v8::Local<v8::String> name ) {
             const string& s = toSTLString( name );
             const BSONElement& e = _o->getField( s );
             return mongoToV8Element(e);
@@ -572,13 +580,13 @@ namespace mongo {
         bool _iDelete;
     };
 
-    WrapperHolder * createWrapperHolder( const BSONObj * o , bool readOnly , bool iDelete ){
+    WrapperHolder * createWrapperHolder( const BSONObj * o , bool readOnly , bool iDelete ) {
         return new WrapperHolder( o , readOnly , iDelete );
     }
 
 #define WRAPPER_STRING (v8::String::New( "_wrapper" ) )
 
-    WrapperHolder * getWrapper( v8::Handle<v8::Object> o ){
+    WrapperHolder * getWrapper( v8::Handle<v8::Object> o ) {
         Handle<v8::Value> t = o->GetRealNamedProperty( WRAPPER_STRING );
         assert( t->IsExternal() );
         Local<External> c = External::Cast( *t );
@@ -588,32 +596,32 @@ namespace mongo {
     }
 
 
-    Handle<Value> wrapperCons(const Arguments& args){
+    Handle<Value> wrapperCons(const Arguments& args) {
         if ( ! ( args.Length() == 1 && args[0]->IsExternal() ) )
             return v8::ThrowException( v8::String::New( "wrapperCons needs 1 External arg" ) );
 
         args.This()->Set( WRAPPER_STRING , args[0] );
-        
+
         return v8::Undefined();
     }
 
-    v8::Handle<v8::Value> wrapperGetHandler( v8::Local<v8::String> name, const v8::AccessorInfo &info){
+    v8::Handle<v8::Value> wrapperGetHandler( v8::Local<v8::String> name, const v8::AccessorInfo &info) {
         return getWrapper( info.This() )->get( name );
     }
 
-    v8::Handle<v8::FunctionTemplate> getObjectWrapperTemplate(){
-        v8::Local<v8::FunctionTemplate> t = FunctionTemplate::New( wrapperCons );
+    v8::Handle<v8::FunctionTemplate> getObjectWrapperTemplate() {
+        v8::Local<v8::FunctionTemplate> t = newV8Function< wrapperCons >();
         t->InstanceTemplate()->SetNamedPropertyHandler( wrapperGetHandler );
         return t;
     }
 
     // --- random utils ----
 
-    v8::Function * getNamedCons( const char * name ){
+    v8::Function * getNamedCons( const char * name ) {
         return v8::Function::Cast( *(v8::Context::GetCurrent()->Global()->Get( v8::String::New( name ) ) ) );
     }
 
-    v8::Function * getObjectIdCons(){
+    v8::Function * getObjectIdCons() {
         return getNamedCons( "ObjectId" );
     }
 
diff --git a/scripting/v8_wrapper.h b/scripting/v8_wrapper.h
index 838aaf4..e0b79e3 100644
--- a/scripting/v8_wrapper.h
+++ b/scripting/v8_wrapper.h
@@ -28,10 +28,10 @@ namespace mongo {
     v8::Local<v8::Object> mongoToV8( const mongo::BSONObj & m , bool array = 0 , bool readOnly = false );
     mongo::BSONObj v8ToMongo( v8::Handle<v8::Object> o , int depth = 0 );
 
-    void v8ToMongoElement( BSONObjBuilder & b , v8::Handle<v8::String> name , 
+    void v8ToMongoElement( BSONObjBuilder & b , v8::Handle<v8::String> name ,
                            const string sname , v8::Handle<v8::Value> value , int depth = 0 );
     v8::Handle<v8::Value> mongoToV8Element( const BSONElement &f );
-    
+
     v8::Function * getNamedCons( const char * name );
     v8::Function * getObjectIdCons();
 
diff --git a/shell/collection.js b/shell/collection.js
index dfbb6a1..8d4d4c7 100644
--- a/shell/collection.js
+++ b/shell/collection.js
@@ -1,4 +1,4 @@
-// collection.js - DBCollection support in the mongo shell
+// @file collection.js - DBCollection support in the mongo shell
 // db.colName is a DBCollection object
 // or db["colName"]
 
@@ -37,10 +37,10 @@ DBCollection.prototype.help = function () {
     print("\tdb." + shortName + ".drop() drop the collection");
     print("\tdb." + shortName + ".dropIndex(name)");
     print("\tdb." + shortName + ".dropIndexes()");
-    print("\tdb." + shortName + ".ensureIndex(keypattern,options) - options should be an object with these possible fields: name, unique, dropDups");
+    print("\tdb." + shortName + ".ensureIndex(keypattern[,options]) - options is an object with these possible fields: name, unique, dropDups");
     print("\tdb." + shortName + ".reIndex()");
-    print("\tdb." + shortName + ".find( [query] , [fields]) - first parameter is an optional query filter. second parameter is optional set of fields to return.");
-    print("\t                                   e.g. db." + shortName + ".find( { x : 77 } , { name : 1 , x : 1 } )");
+    print("\tdb." + shortName + ".find([query],[fields]) - query is an optional query filter. fields is optional set of fields to return.");
+    print("\t                                              e.g. db." + shortName + ".find( {x:77} , {name:1, x:1} )");
     print("\tdb." + shortName + ".find(...).count()");
     print("\tdb." + shortName + ".find(...).limit(n)");
     print("\tdb." + shortName + ".find(...).skip(n)");
@@ -146,7 +146,8 @@ DBCollection.prototype.find = function( query , fields , limit , skip ){
 }
 
 DBCollection.prototype.findOne = function( query , fields ){
-    var cursor = this._mongo.find( this._fullName , this._massageObject( query ) || {} , fields , -1 , 0 , 0 );
+    var cursor = this._mongo.find( this._fullName , this._massageObject( query ) || {} , fields , 
+        -1 /* limit */ , 0 /* skip*/, 0 /* batchSize */ , 0 /* options */ );
     if ( ! cursor.hasNext() )
         return null;
     var ret = cursor.next();
@@ -174,13 +175,28 @@ DBCollection.prototype.insert = function( obj , _allow_dot ){
 }
 
 DBCollection.prototype.remove = function( t , justOne ){
+    for ( var k in t ){
+        if ( k == "_id" && typeof( t[k] ) == "undefined" ){
+            throw "can't have _id set to undefined in a remove expression"
+        }
+    }
     this._mongo.remove( this._fullName , this._massageObject( t ) , justOne ? true : false );
 }
 
 DBCollection.prototype.update = function( query , obj , upsert , multi ){
     assert( query , "need a query" );
     assert( obj , "need an object" );
-    this._validateObject( obj );
+
+    var firstKey = null;
+    for (var k in obj) { firstKey = k; break; }
+
+    if (firstKey != null && firstKey[0] == '$') {
+        // for mods we only validate partially, for example keys may have dots
+        this._validateObject( obj );
+    } else {
+        // we're basically inserting a brand new object, do full validation
+        this._validateForStorage( obj );
+    }
     this._mongo.update( this._fullName , query , obj , upsert ? true : false , multi ? true : false );
 }
 
@@ -188,6 +204,9 @@ DBCollection.prototype.save = function( obj ){
     if ( obj == null || typeof( obj ) == "undefined" ) 
         throw "can't save a null";
 
+    if ( typeof( obj ) == "number" || typeof( obj) == "string" )
+        throw "can't save a number or string"
+
     if ( typeof( obj._id ) == "undefined" ){
         obj._id = new ObjectId();
         return this.insert( obj );
@@ -321,6 +340,8 @@ DBCollection.prototype.dropIndexes = function(){
 
 
 DBCollection.prototype.drop = function(){
+    if ( arguments.length > 0 )
+        throw "drop takes no argument";
     this.resetIndexCache();
     var ret = this._db.runCommand( { drop: this.getName() } );
     if ( ! ret.ok ){
@@ -531,7 +552,9 @@ MapReduceResult = function( db , o ){
     this._o = o;
     this._keys = Object.keySet( o );
     this._db = db;
-    this._coll = this._db.getCollection( this.result );
+    if ( this.result != null ) {
+        this._coll = this._db.getCollection( this.result );
+    }
 }
 
 MapReduceResult.prototype._simpleKeys = function(){
@@ -539,11 +562,15 @@ MapReduceResult.prototype._simpleKeys = function(){
 }
 
 MapReduceResult.prototype.find = function(){
+    if ( this.results )
+        return this.results;
     return DBCollection.prototype.find.apply( this._coll , arguments );
 }
 
 MapReduceResult.prototype.drop = function(){
-    return this._coll.drop();
+    if ( this._coll ) {
+        return this._coll.drop();
+    }
 }
 
 /**
@@ -555,16 +582,29 @@ MapReduceResult.prototype.convertToSingleObject = function(){
     return z;
 }
 
+DBCollection.prototype.convertToSingleObject = function(valueField){
+    var z = {};
+    this.find().forEach( function(a){ z[a._id] = a[valueField]; } );
+    return z;
+}
+
 /**
 * @param optional object of optional fields;
 */
-DBCollection.prototype.mapReduce = function( map , reduce , optional ){
+DBCollection.prototype.mapReduce = function( map , reduce , optionsOrOutString ){
     var c = { mapreduce : this._shortName , map : map , reduce : reduce };
-    if ( optional )
-        Object.extend( c , optional );
+    assert( optionsOrOutString , "need to an optionsOrOutString" )
+
+    if ( typeof( optionsOrOutString ) == "string" )
+        c["out"] = optionsOrOutString;
+    else
+        Object.extend( c , optionsOrOutString );
+
     var raw = this._db.runCommand( c );
-    if ( ! raw.ok )
-        throw "map reduce failed: " + tojson( raw );
+    if ( ! raw.ok ){
+        __mrerror__ = raw;
+        throw "map reduce failed:" + tojson(raw);
+    }
     return new MapReduceResult( this._db , raw );
 
 }
@@ -581,3 +621,16 @@ DBCollection.prototype.toString = function(){
 DBCollection.prototype.tojson = DBCollection.prototype.toString;
 
 DBCollection.prototype.shellPrint = DBCollection.prototype.toString;
+
+DBCollection.autocomplete = function(obj){
+    var colls = DB.autocomplete(obj.getDB());
+    var ret = [];
+    for (var i=0; i<colls.length; i++){
+        var c = colls[i];
+        if (c.length <= obj.getName().length) continue;
+        if (c.slice(0,obj.getName().length+1) != obj.getName()+'.') continue;
+
+        ret.push(c.slice(obj.getName().length+1));
+    }
+    return ret;
+}
diff --git a/shell/db.js b/shell/db.js
index 8299695..679f51e 100644
--- a/shell/db.js
+++ b/shell/db.js
@@ -12,10 +12,12 @@ DB.prototype.getMongo = function(){
     return this._mongo;
 }
 
-DB.prototype.getSisterDB = function( name ){
+DB.prototype.getSiblingDB = function( name ){
     return this.getMongo().getDB( name );
 }
 
+DB.prototype.getSisterDB = DB.prototype.getSiblingDB;
+
 DB.prototype.getName = function(){
     return this._name;
 }
@@ -32,7 +34,10 @@ DB.prototype.commandHelp = function( name ){
     var c = {};
     c[name] = 1;
     c.help = true;
-    return this.runCommand( c ).help;
+    var res = this.runCommand( c );
+    if ( ! res.ok )
+        throw res.errmsg;
+    return res.help;
 }
 
 DB.prototype.runCommand = function( obj ){
@@ -46,12 +51,14 @@ DB.prototype.runCommand = function( obj ){
 
 DB.prototype._dbCommand = DB.prototype.runCommand;
 
-DB.prototype._adminCommand = function( obj ){
+DB.prototype.adminCommand = function( obj ){
     if ( this._name == "admin" )
         return this.runCommand( obj );
-    return this.getSisterDB( "admin" ).runCommand( obj );
+    return this.getSiblingDB( "admin" ).runCommand( obj );
 }
 
+DB.prototype._adminCommand = DB.prototype.adminCommand; // alias old name
+
 DB.prototype.addUser = function( username , pass, readOnly ){
     readOnly = readOnly || false;
     var c = this.getCollection( "system.users" );
@@ -122,14 +129,28 @@ DB.prototype.createCollection = function(name, opt) {
 }
 
 /**
+ * @deprecated use getProfilingStatus
  *  Returns the current profiling level of this database
  *  @return SOMETHING_FIXME or null on error
  */
- DB.prototype.getProfilingLevel  = function() { 
+DB.prototype.getProfilingLevel  = function() { 
     var res = this._dbCommand( { profile: -1 } );
     return res ? res.was : null;
 }
 
+/**
+ *  @return the current profiling status
+ *  example { was : 0, slowms : 100 }
+ *  @return SOMETHING_FIXME or null on error
+ */
+DB.prototype.getProfilingStatus  = function() { 
+    var res = this._dbCommand( { profile: -1 } );
+    if ( ! res.ok )
+        throw "profile command failed: " + tojson( res );
+    delete res.ok
+    return res;
+}
+
 
 /**
   Erase the entire database.  (!)
@@ -270,9 +291,10 @@ DB.prototype.help = function() {
     print("\tdb.getMongo().setSlaveOk() allow this connection to read from the nonmaster member of a replica pair");
     print("\tdb.getName()");
     print("\tdb.getPrevError()");
-    print("\tdb.getProfilingLevel()");
+    print("\tdb.getProfilingLevel() - deprecated");
+    print("\tdb.getProfilingStatus() - returns if profiling is on and slow threshold ");
     print("\tdb.getReplicationInfo()");
-    print("\tdb.getSisterDB(name) get the db at the same server as this one");
+    print("\tdb.getSiblingDB(name) get the db at the same server as this one");
     print("\tdb.isMaster() check replica primary status");
     print("\tdb.killOp(opid) kills the current operation in the db");
     print("\tdb.listCommands() lists all the db commands");
@@ -538,8 +560,15 @@ DB.prototype.toString = function(){
 
 DB.prototype.isMaster = function () { return this.runCommand("isMaster"); }
 
-DB.prototype.currentOp = function(){
-    return db.$cmd.sys.inprog.findOne();
+DB.prototype.currentOp = function( arg ){
+    var q = {}
+    if ( arg ) {
+        if ( typeof( arg ) == "object" )
+            Object.extend( q , arg );
+        else if ( arg )
+            q["$all"] = true;
+    }
+    return db.$cmd.sys.inprog.findOne( q );
 }
 DB.prototype.currentOP = DB.prototype.currentOp;
 
@@ -574,22 +603,38 @@ DB.tsToSeconds = function(x){
   *                          of date than that, it can't recover without a complete resync
 */
 DB.prototype.getReplicationInfo = function() { 
-    var db = this.getSisterDB("local");
+    var db = this.getSiblingDB("local");
 
     var result = { };
-    var ol = db.system.namespaces.findOne({name:"local.oplog.$main"});
-    if( ol && ol.options ) {
-	result.logSizeMB = ol.options.size / 1000 / 1000;
+    var oplog;
+    if (db.system.namespaces.findOne({name:"local.oplog.rs"}) != null) {
+        oplog = 'oplog.rs';
+    }
+    else if (db.system.namespaces.findOne({name:"local.oplog.$main"}) != null) {
+        oplog = 'oplog.$main';
+    }
+    else {
+        result.errmsg = "neither master/slave nor replica set replication detected";
+        return result;
+    }
+    
+    var ol_entry = db.system.namespaces.findOne({name:"local."+oplog});
+    if( ol_entry && ol_entry.options ) {
+	result.logSizeMB = ol_entry.options.size / ( 1024 * 1024 );
     } else {
-	result.errmsg  = "local.oplog.$main, or its options, not found in system.namespaces collection (not --master?)";
-	return result;
+        result.errmsg  = "local."+oplog+", or its options, not found in system.namespaces collection";
+        return result;
     }
+    ol = db.getCollection(oplog);
 
-    var firstc = db.oplog.$main.find().sort({$natural:1}).limit(1);
-    var lastc = db.oplog.$main.find().sort({$natural:-1}).limit(1);
+    result.usedMB = ol.stats().size / ( 1024 * 1024 );
+    result.usedMB = Math.ceil( result.usedMB * 100 ) / 100;
+    
+    var firstc = ol.find().sort({$natural:1}).limit(1);
+    var lastc = ol.find().sort({$natural:-1}).limit(1);
     if( !firstc.hasNext() || !lastc.hasNext() ) { 
 	result.errmsg = "objects not found in local.oplog.$main -- is this a new and empty db instance?";
-	result.oplogMainRowCount = db.oplog.$main.count();
+	result.oplogMainRowCount = ol.count();
 	return result;
     }
 
@@ -614,7 +659,8 @@ DB.prototype.getReplicationInfo = function() {
     }
 
     return result;
-}
+};
+
 DB.prototype.printReplicationInfo = function() {
     var result = this.getReplicationInfo();
     if( result.errmsg ) { 
@@ -629,27 +675,53 @@ DB.prototype.printReplicationInfo = function() {
 }
 
 DB.prototype.printSlaveReplicationInfo = function() {
+    function getReplLag(st) {
+        var now = new Date();
+        print("\t syncedTo: " + st.toString() );
+        var ago = (now-st)/1000;
+        var hrs = Math.round(ago/36)/100;
+        print("\t\t = " + Math.round(ago) + "secs ago (" + hrs + "hrs)"); 
+    };
+    
     function g(x) {
         assert( x , "how could this be null (printSlaveReplicationInfo gx)" )
         print("source:   " + x.host);
         if ( x.syncedTo ){
             var st = new Date( DB.tsToSeconds( x.syncedTo ) * 1000 );
-            var now = new Date();
-            print("\t syncedTo: " + st.toString() );
-            var ago = (now-st)/1000;
-            var hrs = Math.round(ago/36)/100;
-            print("\t\t = " + Math.round(ago) + "secs ago (" + hrs + "hrs)"); 
+            getReplLag(st);
         }
         else {
             print( "\t doing initial sync" );
         }
+    };
+
+    function r(x) {
+        assert( x , "how could this be null (printSlaveReplicationInfo rx)" );
+        if ( x.state == 1 ) {
+            return;
+        }
+        
+        print("source:   " + x.name);
+        if ( x.optime ) {
+            getReplLag(x.optimeDate);
+        }
+        else {
+            print( "\t no replication info, yet.  State: " + x.stateStr );
+        }
+    };
+    
+    var L = this.getSiblingDB("local");
+    if( L.sources.count() != 0 ) { 
+        L.sources.find().forEach(g);
     }
-    var L = this.getSisterDB("local");
-    if( L.sources.count() == 0 ) { 
+    else if (L.system.replset.count() != 0) {
+        var status = this.adminCommand({'replSetGetStatus' : 1});
+        status.members.forEach(r);
+    }
+    else {
         print("local.sources is empty; is this db a --slave?");
         return;
     }
-    L.sources.find().forEach(g);
 }
 
 DB.prototype.serverBuildInfo = function(){
@@ -668,6 +740,10 @@ DB.prototype.version = function(){
     return this.serverBuildInfo().version;
 }
 
+DB.prototype.serverBits = function(){
+    return this.serverBuildInfo().bits;
+}
+
 DB.prototype.listCommands = function(){
     var x = this.runCommand( "listCommands" );
     for ( var name in x.commands ){
@@ -693,6 +769,16 @@ DB.prototype.listCommands = function(){
     }
 }
 
-DB.prototype.printShardingStatus = function(){
-    printShardingStatus( this.getSisterDB( "config" ) );
+DB.prototype.printShardingStatus = function( verbose ){
+    printShardingStatus( this.getSiblingDB( "config" ) , verbose );
+}
+
+DB.autocomplete = function(obj){
+    var colls = obj.getCollectionNames();
+    var ret=[];
+    for (var i=0; i<colls.length; i++){
+        if (colls[i].match(/^[a-zA-Z0-9_.\$]+$/))
+            ret.push(colls[i]);
+    }
+    return ret;
 }
diff --git a/shell/dbshell.cpp b/shell/dbshell.cpp
index 2bd8973..2e93682 100644
--- a/shell/dbshell.cpp
+++ b/shell/dbshell.cpp
@@ -39,14 +39,11 @@ jmp_buf jbuf;
 #include "../util/password.h"
 #include "../util/version.h"
 #include "../util/goodies.h"
+#include "../db/repl/rs_member.h"
 
 using namespace std;
 using namespace boost::filesystem;
-
-using mongo::BSONObj;
-using mongo::BSONObjBuilder;
-using mongo::BSONObjIterator;
-using mongo::BSONElement;
+using namespace mongo;
 
 string historyFile;
 bool gotInterrupted = 0;
@@ -59,20 +56,25 @@ bool autoKillOp = false;
 #define CTRLC_HANDLE
 #endif
 
-mongo::Scope * shellMainScope;
+namespace mongo {
+
+    Scope * shellMainScope;
+
+    extern bool dbexitCalled;
+}
 
-void generateCompletions( const string& prefix , vector<string>& all ){
+void generateCompletions( const string& prefix , vector<string>& all ) {
     if ( prefix.find( '"' ) != string::npos )
         return;
-    shellMainScope->exec( "shellAutocomplete( \"" + prefix + "\" );" , "autocomplete help" , false , true , false );
-    
+
+    shellMainScope->invokeSafe("function(x) {shellAutocomplete(x)}", BSON("0" << prefix), 1000);
     BSONObjBuilder b;
     shellMainScope->append( b , "" , "__autocomplete__" );
     BSONObj res = b.obj();
     BSONObj arr = res.firstElement().Obj();
 
     BSONObjIterator i(arr);
-    while ( i.more() ){
+    while ( i.more() ) {
         BSONElement e = i.next();
         all.push_back( e.String() );
     }
@@ -80,44 +82,43 @@ void generateCompletions( const string& prefix , vector<string>& all ){
 }
 
 #ifdef USE_READLINE
-static char** completionHook(const char* text , int start ,int end ){
+static char** completionHook(const char* text , int start ,int end ) {
     static map<string,string> m;
-    
+
     vector<string> all;
-    
-    if ( start == 0 ){
-        generateCompletions( string(text,end) , all );
-    }
-    
-    if ( all.size() == 0 ){
-        rl_bind_key('\t',0);
+
+    generateCompletions( string(text,end) , all );
+
+    if ( all.size() == 0 ) {
         return 0;
     }
-    
+
     string longest = all[0];
-    for ( vector<string>::iterator i=all.begin(); i!=all.end(); ++i ){
+    for ( vector<string>::iterator i=all.begin(); i!=all.end(); ++i ) {
         string s = *i;
-        for ( unsigned j=0; j<s.size(); j++ ){
+        for ( unsigned j=0; j<s.size(); j++ ) {
             if ( longest[j] == s[j] )
                 continue;
             longest = longest.substr(0,j);
             break;
         }
     }
-    
+
     char ** matches = (char**)malloc( sizeof(char*) * (all.size()+2) );
     unsigned x=0;
     matches[x++] = strdup( longest.c_str() );
-    for ( unsigned i=0; i<all.size(); i++ ){
+    for ( unsigned i=0; i<all.size(); i++ ) {
         matches[x++] = strdup( all[i].c_str() );
     }
     matches[x++] = 0;
 
+    rl_completion_append_character = '\0'; // don't add a space after completions
+
     return matches;
 }
 #endif
 
-void shellHistoryInit(){
+void shellHistoryInit() {
 #ifdef USE_READLINE
     stringstream ss;
     char * h = getenv( "HOME" );
@@ -128,19 +129,19 @@ void shellHistoryInit(){
 
     using_history();
     read_history( historyFile.c_str() );
-    
+
     rl_attempted_completion_function = completionHook;
-        
+
 #else
     //cout << "type \"exit\" to exit" << endl;
 #endif
 }
-void shellHistoryDone(){
+void shellHistoryDone() {
 #ifdef USE_READLINE
     write_history( historyFile.c_str() );
 #endif
 }
-void shellHistoryAdd( const char * line ){
+void shellHistoryAdd( const char * line ) {
 #ifdef USE_READLINE
     if ( line[0] == '\0' )
         return;
@@ -156,7 +157,7 @@ void shellHistoryAdd( const char * line ){
 #endif
 }
 
-void intr( int sig ){
+void intr( int sig ) {
 #ifdef CTRLC_HANDLE
     longjmp( jbuf , 1 );
 #endif
@@ -170,33 +171,40 @@ void killOps() {
     if ( atPrompt )
         return;
 
-    if ( !autoKillOp ) {
-        cout << endl << "do you want to kill the current op on the server? (y/n): ";
-        cout.flush();
+    sleepmillis(10); // give current op a chance to finish
 
-        char yn;
-        cin >> yn;
+    for( map< string, set<string> >::const_iterator i = shellUtils::_allMyUris.begin(); i != shellUtils::_allMyUris.end(); ++i ) {
+        string errmsg;
+        ConnectionString cs = ConnectionString::parse(i->first, errmsg);
+        if (!cs.isValid()) continue;
+        boost::scoped_ptr<DBClientWithCommands> conn (cs.connect(errmsg));
+        if (!conn) continue;
 
-        if (yn != 'y' && yn != 'Y')
-            return;
-    }
+        const set<string>& uris = i->second;
 
+        BSONObj inprog =  conn->findOne("admin.$cmd.sys.inprog", Query())["inprog"].embeddedObject().getOwned();
+        BSONForEach(op, inprog) {
+            if ( uris.count(op["client"].String()) ) {
+                ONCE if ( !autoKillOp ) {
+                    cout << endl << "do you want to kill the current op(s) on the server? (y/n): ";
+                    cout.flush();
 
-    vector< string > uris;
-    for( map< const void*, string >::iterator i = mongo::shellUtils::_allMyUris.begin(); i != mongo::shellUtils::_allMyUris.end(); ++i )
-        uris.push_back( i->second );
-    mongo::BSONObj spec = BSON( "" << uris );
-    try {
-        auto_ptr< mongo::Scope > scope( mongo::globalScriptEngine->newScope() );        
-        scope->invoke( "function( x ) { killWithUris( x ); }", spec );
-    } catch ( ... ) {
-        mongo::rawOut( "exception while cleaning up any db ops started by this shell\n" );
+                    char yn;
+                    cin >> yn;
+
+                    if (yn != 'y' && yn != 'Y')
+                        return;
+                }
+
+                conn->findOne("admin.$cmd.sys.killop", QUERY("op"<< op["opid"]));
+            }
+        }
     }
 }
 
-void quitNicely( int sig ){
-    mongo::goingAway = true;
-    if ( sig == SIGINT && inMultiLine ){
+void quitNicely( int sig ) {
+    mongo::dbexitCalled = true;
+    if ( sig == SIGINT && inMultiLine ) {
         gotInterrupted = 1;
         return;
     }
@@ -207,15 +215,16 @@ void quitNicely( int sig ){
     exit(0);
 }
 #else
-void quitNicely( int sig ){
-    mongo::goingAway = true;
+void quitNicely( int sig ) {
+    mongo::dbexitCalled = true;
     //killOps();
     shellHistoryDone();
     exit(0);
 }
 #endif
 
-char * shellReadline( const char * prompt , int handlesigint = 0 ){
+char * shellReadline( const char * prompt , int handlesigint = 0 ) {
+
     atPrompt = true;
 #ifdef USE_READLINE
 
@@ -223,12 +232,12 @@ char * shellReadline( const char * prompt , int handlesigint = 0 ){
 
 
 #ifdef CTRLC_HANDLE
-    if ( ! handlesigint ){
+    if ( ! handlesigint ) {
         char* ret = readline( prompt );
         atPrompt = false;
         return ret;
     }
-    if ( setjmp( jbuf ) ){
+    if ( setjmp( jbuf ) ) {
         gotInterrupted = 1;
         sigrelse(SIGINT);
         signal( SIGINT , quitNicely );
@@ -269,6 +278,14 @@ void quitAbruptly( int sig ) {
     exit(14);
 }
 
+// this will be called in certain c++ error cases, for example if there are two active
+// exceptions
+void myterminate() {
+    mongo::rawOut( "terminate() called in shell, printing stack:" );
+    mongo::printStackTrace();
+    exit(14);
+}
+
 void setupSignals() {
     signal( SIGINT , quitNicely );
     signal( SIGTERM , quitNicely );
@@ -277,28 +294,29 @@ void setupSignals() {
     signal( SIGSEGV , quitAbruptly );
     signal( SIGBUS , quitAbruptly );
     signal( SIGFPE , quitAbruptly );
+    set_terminate( myterminate );
 }
 #else
 inline void setupSignals() {}
 #endif
 
-string fixHost( string url , string host , string port ){
+string fixHost( string url , string host , string port ) {
     //cout << "fixHost url: " << url << " host: " << host << " port: " << port << endl;
 
-    if ( host.size() == 0 && port.size() == 0 ){
-        if ( url.find( "/" ) == string::npos ){
+    if ( host.size() == 0 && port.size() == 0 ) {
+        if ( url.find( "/" ) == string::npos ) {
             // check for ips
             if ( url.find( "." ) != string::npos )
                 return url + "/test";
 
             if ( url.rfind( ":" ) != string::npos &&
-                 isdigit( url[url.rfind(":")+1] ) )
+                    isdigit( url[url.rfind(":")+1] ) )
                 return url + "/test";
         }
         return url;
     }
 
-    if ( url.find( "/" ) != string::npos ){
+    if ( url.find( "/" ) != string::npos ) {
         cerr << "url can't have host or port if you specify them individually" << endl;
         exit(-1);
     }
@@ -309,7 +327,7 @@ string fixHost( string url , string host , string port ){
     string newurl = host;
     if ( port.size() > 0 )
         newurl += ":" + port;
-    else if (host.find(':') == string::npos){
+    else if (host.find(':') == string::npos) {
         // need to add port with IPv6 addresses
         newurl += ":27017";
     }
@@ -319,14 +337,23 @@ string fixHost( string url , string host , string port ){
     return newurl;
 }
 
-bool isBalanced( string code ){
+static string OpSymbols = "~!%^&*-+=|:,<>/?";
+
+bool isOpSymbol( char c ) {
+    for ( size_t i = 0; i < OpSymbols.size(); i++ )
+        if ( OpSymbols[i] == c ) return true;
+    return false;
+}
+
+bool isBalanced( string code ) {
     int brackets = 0;
     int parens = 0;
+    bool danglingOp = false;
 
-    for ( size_t i=0; i<code.size(); i++ ){
-        switch( code[i] ){
+    for ( size_t i=0; i<code.size(); i++ ) {
+        switch( code[i] ) {
         case '/':
-            if ( i+1 < code.size() && code[i+1] == '/' ){
+            if ( i+1 < code.size() && code[i+1] == '/' ) {
                 while ( i<code.size() && code[i] != '\n' )
                     i++;
             }
@@ -343,17 +370,30 @@ bool isBalanced( string code ){
             i++;
             while ( i < code.size() && code[i] != '\'' ) i++;
             break;
+        case '\\':
+            if ( i+1 < code.size() && code[i+1] == '/') i++;
+            break;
+        case '+':
+        case '-':
+            if ( i+1 < code.size() && code[i+1] == code[i]) {
+                i++;
+                continue; // postfix op (++/--) can't be a dangling op
+            }
+            break;
         }
+
+        if ( isOpSymbol( code[i] )) danglingOp = true;
+        else if (! std::isspace( code[i] )) danglingOp = false;
     }
 
-    return brackets == 0 && parens == 0;
+    return brackets == 0 && parens == 0 && !danglingOp;
 }
 
 using mongo::asserted;
 
 struct BalancedTest : public mongo::UnitTest {
 public:
-    void run(){
+    void run() {
         assert( isBalanced( "x = 5" ) );
         assert( isBalanced( "function(){}" ) );
         assert( isBalanced( "function(){\n}" ) );
@@ -362,12 +402,19 @@ public:
         assert( isBalanced( "// {" ) );
         assert( ! isBalanced( "// \n {" ) );
         assert( ! isBalanced( "\"//\" {" ) );
-
+        assert( isBalanced( "{x:/x\\//}" ) );
+        assert( ! isBalanced( "{ \\/// }" ) );
+        assert( isBalanced( "x = 5 + y ") );
+        assert( ! isBalanced( "x = ") );
+        assert( ! isBalanced( "x = // hello") );
+        assert( ! isBalanced( "x = 5 +") );
+        assert( isBalanced( " x ++") );
+        assert( isBalanced( "-- x") );
     }
 } balnaced_test;
 
-string finishCode( string code ){
-    while ( ! isBalanced( code ) ){
+string finishCode( string code ) {
+    while ( ! isBalanced( code ) ) {
         inMultiLine = 1;
         code += "\n";
         char * line = shellReadline("... " , 1 );
@@ -375,6 +422,10 @@ string finishCode( string code ){
             return "";
         if ( ! line )
             return "";
+
+        while (startsWith(line, "... "))
+            line += 4;
+
         code += line;
     }
     return code;
@@ -395,18 +446,51 @@ void show_help_text(const char* name, po::options_description options) {
          << "unless --shell is specified" << endl;
 };
 
-bool fileExists( string file ){
+bool fileExists( string file ) {
     try {
         path p(file);
         return boost::filesystem::exists( file );
     }
-    catch (...){
+    catch (...) {
         return false;
     }
 }
 
 namespace mongo {
     extern bool isShell;
+    extern DBClientWithCommands *latestConn;
+}
+
+string stateToString(MemberState s) {
+    if( s.s == MemberState::RS_STARTUP ) return "STARTUP";
+    if( s.s == MemberState::RS_PRIMARY ) return "PRIMARY";
+    if( s.s == MemberState::RS_SECONDARY ) return "SECONDARY";
+    if( s.s == MemberState::RS_RECOVERING ) return "RECOVERING";
+    if( s.s == MemberState::RS_FATAL ) return "FATAL";
+    if( s.s == MemberState::RS_STARTUP2 ) return "STARTUP2";
+    if( s.s == MemberState::RS_ARBITER ) return "ARBITER";
+    if( s.s == MemberState::RS_DOWN ) return "DOWN";
+    if( s.s == MemberState::RS_ROLLBACK ) return "ROLLBACK";
+    return "";
+}
+string sayReplSetMemberState() {
+    try {
+        if( latestConn ) {
+            BSONObj info;
+            if( latestConn->runCommand("admin", BSON( "replSetGetStatus" << 1 << "forShell" << 1 ) , info ) ) {
+                stringstream ss;
+                ss << info["set"].String() << ':';
+                int s = info["myState"].Int();
+                MemberState ms(s);
+                ss << stateToString(ms);
+                return ss.str();
+            }
+        }
+    }
+    catch( std::exception& e ) {
+        log(1) << "error in sayReplSetMemberState:" << e.what() << endl;
+    }
+    return "";
 }
 
 int _main(int argc, char* argv[]) {
@@ -425,35 +509,36 @@ int _main(int argc, char* argv[]) {
 
     bool runShell = false;
     bool nodb = false;
-    
+
     string script;
 
     po::options_description shell_options("options");
     po::options_description hidden_options("Hidden options");
     po::options_description cmdline_options("Command line options");
     po::positional_options_description positional_options;
-    
+
     shell_options.add_options()
-        ("shell", "run the shell after executing files")
-        ("nodb", "don't connect to mongod on startup - no 'db address' arg expected")
-        ("quiet", "be less chatty" )
-        ("port", po::value<string>(&port), "port to connect to")
-        ("host", po::value<string>(&dbhost), "server to connect to")
-        ("eval", po::value<string>(&script), "evaluate javascript")
-        ("username,u", po::value<string>(&username), "username for authentication")
-        ("password,p", new mongo::PasswordValue(&password),
-         "password for authentication")
-        ("help,h", "show this usage information")
-        ("version", "show version information")
-        ("ipv6", "enable IPv6 support (disabled by default)")
-        ;
+    ("shell", "run the shell after executing files")
+    ("nodb", "don't connect to mongod on startup - no 'db address' arg expected")
+    ("quiet", "be less chatty" )
+    ("port", po::value<string>(&port), "port to connect to")
+    ("host", po::value<string>(&dbhost), "server to connect to")
+    ("eval", po::value<string>(&script), "evaluate javascript")
+    ("username,u", po::value<string>(&username), "username for authentication")
+    ("password,p", new mongo::PasswordValue(&password),
+     "password for authentication")
+    ("help,h", "show this usage information")
+    ("version", "show version information")
+    ("verbose", "increase verbosity")
+    ("ipv6", "enable IPv6 support (disabled by default)")
+    ;
 
     hidden_options.add_options()
-        ("dbaddress", po::value<string>(), "dbaddress")
-        ("files", po::value< vector<string> >(), "files")
-        ("nokillop", "nokillop") // for testing, kill op will also be disabled automatically if the tests starts a mongo program
-        ("autokillop", "autokillop") // for testing, will kill op without prompting
-        ;
+    ("dbaddress", po::value<string>(), "dbaddress")
+    ("files", po::value< vector<string> >(), "files")
+    ("nokillop", "nokillop") // for testing, kill op will also be disabled automatically if the tests starts a mongo program
+    ("autokillop", "autokillop") // for testing, will kill op without prompting
+    ;
 
     positional_options.add("dbaddress", 1);
     positional_options.add("files", -1);
@@ -474,17 +559,18 @@ int _main(int argc, char* argv[]) {
                   positional(positional_options).
                   style(command_line_style).run(), params);
         po::notify(params);
-    } catch (po::error &e) {
+    }
+    catch (po::error &e) {
         cout << "ERROR: " << e.what() << endl << endl;
         show_help_text(argv[0], shell_options);
         return mongo::EXIT_BADOPTIONS;
     }
 
     // hide password from ps output
-    for (int i=0; i < (argc-1); ++i){
-        if (!strcmp(argv[i], "-p") || !strcmp(argv[i], "--password")){
+    for (int i=0; i < (argc-1); ++i) {
+        if (!strcmp(argv[i], "-p") || !strcmp(argv[i], "--password")) {
             char* arg = argv[i+1];
-            while (*arg){
+            while (*arg) {
                 *arg++ = 'x';
             }
         }
@@ -516,7 +602,7 @@ int _main(int argc, char* argv[]) {
     if (params.count("autokillop")) {
         autoKillOp = true;
     }
-    
+
     /* This is a bit confusing, here are the rules:
      *
      * if nodb is set then all positional parameters are files
@@ -528,41 +614,46 @@ int _main(int argc, char* argv[]) {
         string dbaddress = params["dbaddress"].as<string>();
         if (nodb) {
             files.insert(files.begin(), dbaddress);
-        } else {
+        }
+        else {
             string basename = dbaddress.substr(dbaddress.find_last_of("/\\") + 1);
             if (basename.find_first_of('.') == string::npos ||
-                (basename.find(".js", basename.size() - 3) == string::npos && !fileExists(dbaddress))) {
+                    (basename.find(".js", basename.size() - 3) == string::npos && !fileExists(dbaddress))) {
                 url = dbaddress;
-            } else {
+            }
+            else {
                 files.insert(files.begin(), dbaddress);
             }
         }
     }
-    if (params.count("ipv6")){
+    if (params.count("ipv6")) {
         mongo::enableIPv6();
     }
-    
-    if ( ! mongo::cmdLine.quiet ) 
+    if (params.count("verbose")) {
+        logLevel = 1;
+    }
+
+    if ( ! mongo::cmdLine.quiet )
         cout << "MongoDB shell version: " << mongo::versionString << endl;
 
     mongo::UnitTest::runTests();
 
     if ( !nodb ) { // connect to db
         //if ( ! mongo::cmdLine.quiet ) cout << "url: " << url << endl;
-        
+
         stringstream ss;
         if ( mongo::cmdLine.quiet )
             ss << "__quiet = true;";
         ss << "db = connect( \"" << fixHost( url , dbhost , port ) << "\")";
-        
+
         mongo::shellUtils::_dbConnect = ss.str();
 
         if ( params.count( "password" )
-             && ( password.empty() ) ) {
+                && ( password.empty() ) ) {
             password = mongo::askPassword();
         }
 
-        if ( username.size() && password.size() ){
+        if ( username.size() && password.size() ) {
             stringstream ss;
             ss << "if ( ! db.auth( \"" << username << "\" , \"" << password << "\" ) ){ throw 'login failed'; }";
             mongo::shellUtils::_dbAuth = ss.str();
@@ -573,12 +664,12 @@ int _main(int argc, char* argv[]) {
     mongo::ScriptEngine::setConnectCallback( mongo::shellUtils::onConnect );
     mongo::ScriptEngine::setup();
     mongo::globalScriptEngine->setScopeInitCallback( mongo::shellUtils::initScope );
-    auto_ptr< mongo::Scope > scope( mongo::globalScriptEngine->newScope() );    
+    auto_ptr< mongo::Scope > scope( mongo::globalScriptEngine->newScope() );
     shellMainScope = scope.get();
 
     if( runShell )
         cout << "type \"help\" for help" << endl;
-    
+
     if ( !script.empty() ) {
         mongo::shellUtils::MongoProgramScope s;
         if ( ! scope->exec( script , "(shell eval)" , true , true , false ) )
@@ -591,7 +682,7 @@ int _main(int argc, char* argv[]) {
         if ( files.size() > 1 )
             cout << "loading file: " << files[i] << endl;
 
-        if ( ! scope->execFile( files[i] , false , true , false ) ){
+        if ( ! scope->execFile( files[i] , false , true , false ) ) {
             cout << "failed to load: " << files[i] << endl;
             return -3;
         }
@@ -601,7 +692,7 @@ int _main(int argc, char* argv[]) {
         runShell = true;
     }
 
-    if ( runShell ){
+    if ( runShell ) {
 
         mongo::shellUtils::MongoProgramScope s;
 
@@ -609,29 +700,38 @@ int _main(int argc, char* argv[]) {
 
         //v8::Handle<v8::Object> shellHelper = baseContext_->Global()->Get( v8::String::New( "shellHelper" ) )->ToObject();
 
-        while ( 1 ){
+        while ( 1 ) {
             inMultiLine = 0;
             gotInterrupted = 0;
-            char * line = shellReadline( "> " );
+//            shellMainScope->localConnect;
+            //DBClientWithCommands *c = getConnection( JSContext *cx, JSObject *obj );
+
+            string prompt(sayReplSetMemberState()+"> ");
+
+            char * line = shellReadline( prompt.c_str() );
+
+            if ( line ) {
+                while (startsWith(line, "> "))
+                    line += 2;
 
-            if ( line )
                 while ( line[0] == ' ' )
                     line++;
+            }
 
-            if ( ! line || ( strlen(line) == 4 && strstr( line , "exit" ) ) ){
+            if ( ! line || ( strlen(line) == 4 && strstr( line , "exit" ) ) ) {
                 cout << "bye" << endl;
                 break;
             }
 
             string code = line;
-            if ( code == "exit" || code == "exit;" ){
+            if ( code == "exit" || code == "exit;" ) {
                 break;
             }
             if ( code.size() == 0 )
                 continue;
 
             code = finishCode( code );
-            if ( gotInterrupted ){
+            if ( gotInterrupted ) {
                 cout << endl;
                 continue;
             }
@@ -645,40 +745,39 @@ int _main(int argc, char* argv[]) {
                 if ( cmd.find( " " ) > 0 )
                     cmd = cmd.substr( 0 , cmd.find( " " ) );
 
-                if ( cmd.find( "\"" ) == string::npos ){
+                if ( cmd.find( "\"" ) == string::npos ) {
                     try {
                         scope->exec( (string)"__iscmd__ = shellHelper[\"" + cmd + "\"];" , "(shellhelp1)" , false , true , true );
-                        if ( scope->getBoolean( "__iscmd__" )  ){
+                        if ( scope->getBoolean( "__iscmd__" )  ) {
                             scope->exec( (string)"shellHelper( \"" + cmd + "\" , \"" + code.substr( cmd.size() ) + "\");" , "(shellhelp2)" , false , true , false );
                             wascmd = true;
                         }
                     }
-                    catch ( std::exception& e ){
-                        cout << "error2:" << e.what() << endl;    
+                    catch ( std::exception& e ) {
+                        cout << "error2:" << e.what() << endl;
                         wascmd = true;
                     }
                 }
 
             }
 
-            if ( ! wascmd ){
+            if ( ! wascmd ) {
                 try {
                     if ( scope->exec( code.c_str() , "(shell)" , false , true , false ) )
                         scope->exec( "shellPrintHelper( __lastres__ );" , "(shell2)" , true , true , false );
                 }
-                catch ( std::exception& e ){
+                catch ( std::exception& e ) {
                     cout << "error:" << e.what() << endl;
                 }
             }
 
-
             shellHistoryAdd( line );
         }
 
         shellHistoryDone();
     }
 
-    mongo::goingAway = true;
+    mongo::dbexitCalled = true;
     return 0;
 }
 
@@ -687,7 +786,7 @@ int main(int argc, char* argv[]) {
     try {
         return _main( argc , argv );
     }
-    catch ( mongo::DBException& e ){
+    catch ( mongo::DBException& e ) {
         cerr << "exception: " << e.what() << endl;
         return -1;
     }
diff --git a/shell/mongo.js b/shell/mongo.js
index 7353ca5..e129784 100644
--- a/shell/mongo.js
+++ b/shell/mongo.js
@@ -12,7 +12,7 @@ if ( ! Mongo.prototype ){
 }
 
 if ( ! Mongo.prototype.find )
-    Mongo.prototype.find = function( ns , query , fields , limit , skip ){ throw "find not implemented"; }
+    Mongo.prototype.find = function( ns , query , fields , limit , skip , batchSize , options ){ throw "find not implemented"; }
 if ( ! Mongo.prototype.insert )
     Mongo.prototype.insert = function( ns , obj ){ throw "insert not implemented"; }
 if ( ! Mongo.prototype.remove )
@@ -34,10 +34,15 @@ Mongo.prototype.getDB = function( name ){
 
 Mongo.prototype.getDBs = function(){
     var res = this.getDB( "admin" ).runCommand( { "listDatabases" : 1 } );
-    assert( res.ok == 1 , "listDatabases failed:" + tojson( res ) );
+    if ( ! res.ok )
+        throw "listDatabases failed:" + tojson( res );
     return res;
 }
 
+Mongo.prototype.adminCommand = function( cmd ){
+    return this.getDB( "admin" ).runCommand( cmd );
+}
+
 Mongo.prototype.getDBNames = function(){
     return this.getDBs().databases.map( 
         function(z){
diff --git a/shell/mongo_vstudio.cpp b/shell/mongo_vstudio.cpp
index c1a224d..08651d7 100644
--- a/shell/mongo_vstudio.cpp
+++ b/shell/mongo_vstudio.cpp
@@ -1,1795 +1,3428 @@
-const char * jsconcatcode = 
-"__quiet = false;\n"
- "__magicNoPrint = { __magicNoPrint : 1111 }\n"
- "chatty = function(s){\n"
- "if ( ! __quiet )\n"
- "print( s );}\n"
- "friendlyEqual = function( a , b ){\n"
- "if ( a == b )\n"
- "return true;\n"
- "if ( tojson( a ) == tojson( b ) )\n"
- "return true;\n"
- "return false;}\n"
- "doassert = function (msg) {\n"
- "if (msg.indexOf(\"assert\") == 0)\n"
- "print(msg);\n"
- "else\n"
- "print(\"assert: \" + msg);\n"
- "throw msg;}\n"
- "assert = function( b , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( b )\n"
- "return;\n"
- "doassert( msg == undefined ? \"assert failed\" : \"assert failed : \" + msg );}\n"
- "assert.automsg = function( b ) {\n"
- "assert( eval( b ), b );}\n"
- "assert._debug = false;\n"
- "assert.eq = function( a , b , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( a == b )\n"
- "return;\n"
- "if ( ( a != null && b != null ) && friendlyEqual( a , b ) )\n"
- "return;\n"
- "doassert( \"[\" + tojson( a ) + \"] != [\" + tojson( b ) + \"] are not equal : \" + msg );}\n"
- "assert.eq.automsg = function( a, b ) {\n"
- "assert.eq( eval( a ), eval( b ), \"[\" + a + \"] != [\" + b + \"]\" );}\n"
- "assert.neq = function( a , b , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( a != b )\n"
- "return;\n"
- "doassert( \"[\" + a + \"] != [\" + b + \"] are equal : \" + msg );}\n"
- "assert.repeat = function( f, msg, timeout, interval ) {\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "var start = new Date();\n"
- "timeout = timeout || 30000;\n"
- "interval = interval || 200;\n"
- "var last;\n"
- "while( 1 ) {\n"
- "if ( typeof( f ) == \"string\" ){\n"
- "if ( eval( f ) )\n"
- "return;}\n"
- "else {\n"
- "if ( f() )\n"
- "return;}\n"
- "if ( ( new Date() ).getTime() - start.getTime() > timeout )\n"
- "break;\n"
- "sleep( interval );}}\n"
- "assert.soon = function( f, msg, timeout, interval ) {\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "var start = new Date();\n"
- "timeout = timeout || 30000;\n"
- "interval = interval || 200;\n"
- "var last;\n"
- "while( 1 ) {\n"
- "if ( typeof( f ) == \"string\" ){\n"
- "if ( eval( f ) )\n"
- "return;}\n"
- "else {\n"
- "if ( f() )\n"
- "return;}\n"
- "if ( ( new Date() ).getTime() - start.getTime() > timeout )\n"
- "doassert( \"assert.soon failed: \" + f + \", msg:\" + msg );\n"
- "sleep( interval );}}\n"
- "assert.throws = function( func , params , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "try {\n"
- "func.apply( null , params );}\n"
- "catch ( e ){\n"
- "return e;}\n"
- "doassert( \"did not throw exception: \" + msg );}\n"
- "assert.throws.automsg = function( func, params ) {\n"
- "assert.throws( func, params, func.toString() );}\n"
- "assert.commandWorked = function( res , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( res.ok == 1 )\n"
- "return;\n"
- "doassert( \"command failed: \" + tojson( res ) + \" : \" + msg );}\n"
- "assert.commandFailed = function( res , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( res.ok == 0 )\n"
- "return;\n"
- "doassert( \"command worked when it should have failed: \" + tojson( res ) + \" : \" + msg );}\n"
- "assert.isnull = function( what , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( what == null )\n"
- "return;\n"
- "doassert( \"supposed to null (\" + ( msg || \"\" ) + \") was: \" + tojson( what ) );}\n"
- "assert.lt = function( a , b , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( a < b )\n"
- "return;\n"
- "doassert( a + \" is not less than \" + b + \" : \" + msg );}\n"
- "assert.gt = function( a , b , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( a > b )\n"
- "return;\n"
- "doassert( a + \" is not greater than \" + b + \" : \" + msg );}\n"
- "assert.lte = function( a , b , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( a <= b )\n"
- "return;\n"
- "doassert( a + \" is not less than or eq \" + b + \" : \" + msg );}\n"
- "assert.gte = function( a , b , msg ){\n"
- "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
- "if ( a >= b )\n"
- "return;\n"
- "doassert( a + \" is not greater than or eq \" + b + \" : \" + msg );}\n"
- "assert.close = function( a , b , msg , places ){\n"
- "if (places === undefined) {\n"
- "places = 4;}\n"
- "if (Math.round((a - b) * Math.pow(10, places)) === 0) {\n"
- "return;}\n"
- "doassert( a + \" is not equal to \" + b + \" within \" + places +\n"
- "\" places, diff: \" + (a-b) + \" : \" + msg );\n"
- "};\n"
- "Object.extend = function( dst , src , deep ){\n"
- "for ( var k in src ){\n"
- "var v = src[k];\n"
- "if ( deep && typeof(v) == \"object\" ){\n"
- "v = Object.extend( typeof ( v.length ) == \"number\" ? [] : {} , v , true );}\n"
- "dst[k] = v;}\n"
- "return dst;}\n"
- "argumentsToArray = function( a ){\n"
- "var arr = [];\n"
- "for ( var i=0; i<a.length; i++ )\n"
- "arr[i] = a[i];\n"
- "return arr;}\n"
- "isString = function( x ){\n"
- "return typeof( x ) == \"string\";}\n"
- "isNumber = function(x){\n"
- "return typeof( x ) == \"number\";}\n"
- "isObject = function( x ){\n"
- "return typeof( x ) == \"object\";}\n"
- "String.prototype.trim = function() {\n"
- "return this.replace(/^\\s+|\\s+$/g,\"\");}\n"
- "String.prototype.ltrim = function() {\n"
- "return this.replace(/^\\s+/,\"\");}\n"
- "String.prototype.rtrim = function() {\n"
- "return this.replace(/\\s+$/,\"\");}\n"
- "Date.timeFunc = function( theFunc , numTimes ){\n"
- "var start = new Date();\n"
- "numTimes = numTimes || 1;\n"
- "for ( var i=0; i<numTimes; i++ ){\n"
- "theFunc.apply( null , argumentsToArray( arguments ).slice( 2 ) );}\n"
- "return (new Date()).getTime() - start.getTime();}\n"
- "Date.prototype.tojson = function(){\n"
- "return \"\\\"\" + this.toString() + \"\\\"\";}\n"
- "RegExp.prototype.tojson = RegExp.prototype.toString;\n"
- "Array.contains = function( a  , x ){\n"
- "for ( var i=0; i<a.length; i++ ){\n"
- "if ( a[i] == x )\n"
- "return true;}\n"
- "return false;}\n"
- "Array.unique = function( a ){\n"
- "var u = [];\n"
- "for ( var i=0; i<a.length; i++){\n"
- "var o = a[i];\n"
- "if ( ! Array.contains( u , o ) ){\n"
- "u.push( o );}}\n"
- "return u;}\n"
- "Array.shuffle = function( arr ){\n"
- "for ( var i=0; i<arr.length-1; i++ ){\n"
- "var pos = i+Random.randInt(arr.length-i);\n"
- "var save = arr[i];\n"
- "arr[i] = arr[pos];\n"
- "arr[pos] = save;}\n"
- "return arr;}\n"
- "Array.tojson = function( a , indent ){\n"
- "if (!indent)\n"
- "indent = \"\";\n"
- "if (a.length == 0) {\n"
- "return \"[ ]\";}\n"
- "var s = \"[\\n\";\n"
- "indent += \"\\t\";\n"
- "for ( var i=0; i<a.length; i++){\n"
- "s += indent + tojson( a[i], indent );\n"
- "if ( i < a.length - 1 ){\n"
- "s += \",\\n\";}}\n"
- "if ( a.length == 0 ) {\n"
- "s += indent;}\n"
- "indent = indent.substring(1);\n"
- "s += \"\\n\"+indent+\"]\";\n"
- "return s;}\n"
- "Array.fetchRefs = function( arr , coll ){\n"
- "var n = [];\n"
- "for ( var i=0; i<arr.length; i ++){\n"
- "var z = arr[i];\n"
- "if ( coll && coll != z.getCollection() )\n"
- "continue;\n"
- "n.push( z.fetch() );}\n"
- "return n;}\n"
- "Array.sum = function( arr ){\n"
- "if ( arr.length == 0 )\n"
- "return null;\n"
- "var s = arr[0];\n"
- "for ( var i=1; i<arr.length; i++ )\n"
- "s += arr[i];\n"
- "return s;}\n"
- "Array.avg = function( arr ){\n"
- "if ( arr.length == 0 )\n"
- "return null;\n"
- "return Array.sum( arr ) / arr.length;}\n"
- "Array.stdDev = function( arr ){\n"
- "var avg = Array.avg( arr );\n"
- "var sum = 0;\n"
- "for ( var i=0; i<arr.length; i++ ){\n"
- "sum += Math.pow( arr[i] - avg , 2 );}\n"
- "return Math.sqrt( sum / arr.length );}\n"
- "Object.keySet = function( o ) {\n"
- "var ret = new Array();\n"
- "for( i in o ) {\n"
- "if ( !( i in o.__proto__ && o[ i ] === o.__proto__[ i ] ) ) {\n"
- "ret.push( i );}}\n"
- "return ret;}\n"
- "if ( ! NumberLong.prototype ) {\n"
- "NumberLong.prototype = {}}\n"
- "NumberLong.prototype.tojson = function() {\n"
- "return this.toString();}\n"
- "if ( ! ObjectId.prototype )\n"
- "ObjectId.prototype = {}\n"
- "ObjectId.prototype.toString = function(){\n"
- "return this.str;}\n"
- "ObjectId.prototype.tojson = function(){\n"
- "return \"ObjectId(\\\"\" + this.str + \"\\\")\";}\n"
- "ObjectId.prototype.isObjectId = true;\n"
- "ObjectId.prototype.getTimestamp = function(){\n"
- "return new Date(parseInt(this.toString().slice(0,8), 16)*1000);}\n"
- "ObjectId.prototype.equals = function( other){\n"
- "return this.str == other.str;}\n"
- "if ( typeof( DBPointer ) != \"undefined\" ){\n"
- "DBPointer.prototype.fetch = function(){\n"
- "assert( this.ns , \"need a ns\" );\n"
- "assert( this.id , \"need an id\" );\n"
- "return db[ this.ns ].findOne( { _id : this.id } );}\n"
- "DBPointer.prototype.tojson = function(indent){\n"
- "return tojson({\"ns\" : this.ns, \"id\" : this.id}, indent);}\n"
- "DBPointer.prototype.getCollection = function(){\n"
- "return this.ns;}\n"
- "DBPointer.prototype.toString = function(){\n"
- "return \"DBPointer \" + this.ns + \":\" + this.id;}}\n"
- "else {\n"
- "print( \"warning: no DBPointer\" );}\n"
- "if ( typeof( DBRef ) != \"undefined\" ){\n"
- "DBRef.prototype.fetch = function(){\n"
- "assert( this.$ref , \"need a ns\" );\n"
- "assert( this.$id , \"need an id\" );\n"
- "return db[ this.$ref ].findOne( { _id : this.$id } );}\n"
- "DBRef.prototype.tojson = function(indent){\n"
- "return tojson({\"$ref\" : this.$ref, \"$id\" : this.$id}, indent);}\n"
- "DBRef.prototype.getCollection = function(){\n"
- "return this.$ref;}\n"
- "DBRef.prototype.toString = function(){\n"
- "return this.tojson();}}\n"
- "else {\n"
- "print( \"warning: no DBRef\" );}\n"
- "if ( typeof( BinData ) != \"undefined\" ){\n"
- "BinData.prototype.tojson = function () {\n"
- "//return \"BinData type: \" + this.type + \" len: \" + this.len;\n"
- "return this.toString();}}\n"
- "else {\n"
- "print( \"warning: no BinData class\" );}\n"
- "if ( typeof( UUID ) != \"undefined\" ){\n"
- "UUID.prototype.tojson = function () {\n"
- "return this.toString();}}\n"
- "if ( typeof _threadInject != \"undefined\" ){\n"
- "print( \"fork() available!\" );\n"
- "Thread = function(){\n"
- "this.init.apply( this, arguments );}\n"
- "_threadInject( Thread.prototype );\n"
- "ScopedThread = function() {\n"
- "this.init.apply( this, arguments );}\n"
- "ScopedThread.prototype = new Thread( function() {} );\n"
- "_scopedThreadInject( ScopedThread.prototype );\n"
- "fork = function() {\n"
- "var t = new Thread( function() {} );\n"
- "Thread.apply( t, arguments );\n"
- "return t;}\n"
- "EventGenerator = function( me, collectionName, mean ) {\n"
- "this.mean = mean;\n"
- "this.events = new Array( me, collectionName );}\n"
- "EventGenerator.prototype._add = function( action ) {\n"
- "this.events.push( [ Random.genExp( this.mean ), action ] );}\n"
- "EventGenerator.prototype.addInsert = function( obj ) {\n"
- "this._add( \"t.insert( \" + tojson( obj ) + \" )\" );}\n"
- "EventGenerator.prototype.addRemove = function( obj ) {\n"
- "this._add( \"t.remove( \" + tojson( obj ) + \" )\" );}\n"
- "EventGenerator.prototype.addUpdate = function( objOld, objNew ) {\n"
- "this._add( \"t.update( \" + tojson( objOld ) + \", \" + tojson( objNew ) + \" )\" );}\n"
- "EventGenerator.prototype.addCheckCount = function( count, query, shouldPrint, checkQuery ) {\n"
- "query = query || {};\n"
- "shouldPrint = shouldPrint || false;\n"
- "checkQuery = checkQuery || false;\n"
- "var action = \"assert.eq( \" + count + \", t.count( \" + tojson( query ) + \" ) );\"\n"
- "if ( checkQuery ) {\n"
- "action += \" assert.eq( \" + count + \", t.find( \" + tojson( query ) + \" ).toArray().length );\"}\n"
- "if ( shouldPrint ) {\n"
- "action += \" print( me + ' ' + \" + count + \" );\";}\n"
- "this._add( action );}\n"
- "EventGenerator.prototype.getEvents = function() {\n"
- "return this.events;}\n"
- "EventGenerator.dispatch = function() {\n"
- "var args = argumentsToArray( arguments );\n"
- "var me = args.shift();\n"
- "var collectionName = args.shift();\n"
- "var m = new Mongo( db.getMongo().host );\n"
- "var t = m.getDB( \"test\" )[ collectionName ];\n"
- "for( var i in args ) {\n"
- "sleep( args[ i ][ 0 ] );\n"
- "eval( args[ i ][ 1 ] );}}\n"
- "ParallelTester = function() {\n"
- "this.params = new Array();}\n"
- "ParallelTester.prototype.add = function( fun, args ) {\n"
- "args = args || [];\n"
- "args.unshift( fun );\n"
- "this.params.push( args );}\n"
- "ParallelTester.prototype.run = function( msg, newScopes ) {\n"
- "newScopes = newScopes || false;\n"
- "assert.parallelTests( this.params, msg, newScopes );}\n"
- "ParallelTester.createJstestsLists = function( n ) {\n"
- "var params = new Array();\n"
- "for( var i = 0; i < n; ++i ) {\n"
- "params.push( [] );}\n"
- "var makeKeys = function( a ) {\n"
- "var ret = {};\n"
- "for( var i in a ) {\n"
- "ret[ a[ i ] ] = 1;}\n"
- "return ret;}\n"
- "var skipTests = makeKeys( [ \"jstests/dbadmin.js\",\n"
- "\"jstests/repair.js\",\n"
- "\"jstests/cursor8.js\",\n"
- "\"jstests/recstore.js\",\n"
- "\"jstests/extent.js\",\n"
- "\"jstests/indexb.js\",\n"
- "\"jstests/profile1.js\",\n"
- "\"jstests/mr3.js\",\n"
- "\"jstests/indexh.js\",\n"
- "\"jstests/apitest_db.js\",\n"
- "\"jstests/evalb.js\"] );\n"
- "var serialTestsArr = [ \"jstests/fsync.js\",\n"
- "\"jstests/fsync2.js\" ];\n"
- "var serialTests = makeKeys( serialTestsArr );\n"
- "params[ 0 ] = serialTestsArr;\n"
- "var files = listFiles(\"jstests\");\n"
- "files = Array.shuffle( files );\n"
- "var i = 0;\n"
- "files.forEach(\n"
- "function(x) {\n"
- "if ( ( /[\\/\\\\]_/.test(x.name) ) ||\n"
- "( ! /\\.js$/.test(x.name ) ) ||\n"
- "( x.name in skipTests ) ||\n"
- "( x.name in serialTests ) ||\n"
- "! /\\.js$/.test(x.name ) ){\n"
- "print(\" >>>>>>>>>>>>>>> skipping \" + x.name);\n"
- "return;}\n"
- "params[ i % n ].push( x.name );\n"
- "++i;}\n"
- ");\n"
- "params[ 0 ] = Array.shuffle( params[ 0 ] );\n"
- "for( var i in params ) {\n"
- "params[ i ].unshift( i );}\n"
- "return params;}\n"
- "ParallelTester.fileTester = function() {\n"
- "var args = argumentsToArray( arguments );\n"
- "var suite = args.shift();\n"
- "args.forEach(\n"
- "function( x ) {\n"
- "print(\"         S\" + suite + \" Test : \" + x + \" ...\");\n"
- "var time = Date.timeFunc( function() { load(x); }, 1);\n"
- "print(\"         S\" + suite + \" Test : \" + x + \" \" + time + \"ms\" );}\n"
- ");}\n"
- "assert.parallelTests = function( params, msg, newScopes ) {\n"
- "newScopes = newScopes || false;\n"
- "var wrapper = function( fun, argv ) {\n"
- "eval (\n"
- "\"var z = function() {\" +\n"
- "\"var __parallelTests__fun = \" + fun.toString() + \";\" +\n"
- "\"var __parallelTests__argv = \" + tojson( argv ) + \";\" +\n"
- "\"var __parallelTests__passed = false;\" +\n"
- "\"try {\" +\n"
- "\"__parallelTests__fun.apply( 0, __parallelTests__argv );\" +\n"
- "\"__parallelTests__passed = true;\" +\n"
- "\"} catch ( e ) {\" +\n"
- "\"print( e );\" +\n"
- "\"}\" +\n"
- "\"return __parallelTests__passed;\" +\n"
- "\"}\"\n"
- ");\n"
- "return z;}\n"
- "var runners = new Array();\n"
- "for( var i in params ) {\n"
- "var param = params[ i ];\n"
- "var test = param.shift();\n"
- "var t;\n"
- "if ( newScopes )\n"
- "t = new ScopedThread( wrapper( test, param ) );\n"
- "else\n"
- "t = new Thread( wrapper( test, param ) );\n"
- "runners.push( t );}\n"
- "runners.forEach( function( x ) { x.start(); } );\n"
- "var nFailed = 0;\n"
- "runners.forEach( function( x ) { if( !x.returnData() ) { ++nFailed; } } );\n"
- "assert.eq( 0, nFailed, msg );}}\n"
- "tojsononeline = function( x ){\n"
- "return tojson( x , \" \" , true );}\n"
- "tojson = function( x, indent , nolint ){\n"
- "if ( x === null )\n"
- "return \"null\";\n"
- "if ( x === undefined )\n"
- "return \"undefined\";\n"
- "if (!indent)\n"
- "indent = \"\";\n"
- "switch ( typeof x ) {\n"
- "case \"string\": {\n"
- "var s = \"\\\"\";\n"
- "for ( var i=0; i<x.length; i++ ){\n"
- "switch (x[i]){\n"
- "case '\"': s += '\\\\\"'; break;\n"
- "case '\\\\': s += '\\\\\\\\'; break;\n"
- "case '\\b': s += '\\\\b'; break;\n"
- "case '\\f': s += '\\\\f'; break;\n"
- "case '\\n': s += '\\\\n'; break;\n"
- "case '\\r': s += '\\\\r'; break;\n"
- "case '\\t': s += '\\\\t'; break;\n"
- "default: {\n"
- "var code = x.charCodeAt(i);\n"
- "if (code < 0x20){\n"
- "s += (code < 0x10 ? '\\\\u000' : '\\\\u00') + code.toString(16);\n"
- "} else {\n"
- "s += x[i];}}}}\n"
- "return s + \"\\\"\";}\n"
- "case \"number\":\n"
- "case \"boolean\":\n"
- "return \"\" + x;\n"
- "case \"object\":{\n"
- "var s = tojsonObject( x, indent , nolint );\n"
- "if ( ( nolint == null || nolint == true ) && s.length < 80 && ( indent == null || indent.length == 0 ) ){\n"
- "s = s.replace( /[\\s\\r\\n ]+/gm , \" \" );}\n"
- "return s;}\n"
- "case \"function\":\n"
- "return x.toString();\n"
- "default:\n"
- "throw \"tojson can't handle type \" + ( typeof x );}}\n"
- "tojsonObject = function( x, indent , nolint ){\n"
- "var lineEnding = nolint ? \" \" : \"\\n\";\n"
- "var tabSpace = nolint ? \"\" : \"\\t\";\n"
- "assert.eq( ( typeof x ) , \"object\" , \"tojsonObject needs object, not [\" + ( typeof x ) + \"]\" );\n"
- "if (!indent)\n"
- "indent = \"\";\n"
- "if ( typeof( x.tojson ) == \"function\" && x.tojson != tojson ) {\n"
- "return x.tojson(indent,nolint);}\n"
- "if ( x.constructor && typeof( x.constructor.tojson ) == \"function\" && x.constructor.tojson != tojson ) {\n"
- "return x.constructor.tojson( x, indent , nolint );}\n"
- "if ( x.toString() == \"[object MaxKey]\" )\n"
- "return \"{ $maxKey : 1 }\";\n"
- "if ( x.toString() == \"[object MinKey]\" )\n"
- "return \"{ $minKey : 1 }\";\n"
- "var s = \"{\" + lineEnding;\n"
- "indent += tabSpace;\n"
- "var total = 0;\n"
- "for ( var k in x ) total++;\n"
- "if ( total == 0 ) {\n"
- "s += indent + lineEnding;}\n"
- "var keys = x;\n"
- "if ( typeof( x._simpleKeys ) == \"function\" )\n"
- "keys = x._simpleKeys();\n"
- "var num = 1;\n"
- "for ( var k in keys ){\n"
- "var val = x[k];\n"
- "if ( val == DB.prototype || val == DBCollection.prototype )\n"
- "continue;\n"
- "s += indent + \"\\\"\" + k + \"\\\" : \" + tojson( val, indent , nolint );\n"
- "if (num != total) {\n"
- "s += \",\";\n"
- "num++;}\n"
- "s += lineEnding;}\n"
- "indent = indent.substring(1);\n"
- "return s + indent + \"}\";}\n"
- "shellPrint = function( x ){\n"
- "it = x;\n"
- "if ( x != undefined )\n"
- "shellPrintHelper( x );\n"
- "if ( db ){\n"
- "var e = db.getPrevError();\n"
- "if ( e.err ) {\n"
- "if( e.nPrev <= 1 )\n"
- "print( \"error on last call: \" + tojson( e.err ) );\n"
- "else\n"
- "print( \"an error \" + tojson(e.err) + \" occurred \" + e.nPrev + \" operations back in the command invocation\" );}\n"
- "db.resetError();}}\n"
- "printjson = function(x){\n"
- "print( tojson( x ) );}\n"
- "printjsononeline = function(x){\n"
- "print( tojsononeline( x ) );}\n"
- "shellPrintHelper = function( x ){\n"
- "if ( typeof( x ) == \"undefined\" ){\n"
- "if ( typeof( db ) != \"undefined\" && db.getLastError ){\n"
- "var e = db.getLastError();\n"
- "if ( e != null )\n"
- "print( e );}\n"
- "return;}\n"
- "if ( x == __magicNoPrint )\n"
- "return;\n"
- "if ( x == null ){\n"
- "print( \"null\" );\n"
- "return;}\n"
- "if ( typeof x != \"object\" )\n"
- "return print( x );\n"
- "var p = x.shellPrint;\n"
- "if ( typeof p == \"function\" )\n"
- "return x.shellPrint();\n"
- "var p = x.tojson;\n"
- "if ( typeof p == \"function\" )\n"
- "print( x.tojson() );\n"
- "else\n"
- "print( tojson( x ) );}\n"
- "shellAutocomplete = function( prefix ){\n"
- "var a = [];\n"
- "//a.push( prefix + \"z\" )\n"
- "//a.push( prefix + \"y\" )\n"
- "__autocomplete__ = a;}\n"
- "shellHelper = function( command , rest , shouldPrint ){\n"
- "command = command.trim();\n"
- "var args = rest.trim().replace(/;$/,\"\").split( \"\\s+\" );\n"
- "if ( ! shellHelper[command] )\n"
- "throw \"no command [\" + command + \"]\";\n"
- "var res = shellHelper[command].apply( null , args );\n"
- "if ( shouldPrint ){\n"
- "shellPrintHelper( res );}\n"
- "return res;}\n"
- "shellHelper.use = function( dbname ){\n"
- "db = db.getMongo().getDB( dbname );\n"
- "print( \"switched to db \" + db.getName() );}\n"
- "shellHelper.it = function(){\n"
- "if ( typeof( ___it___ ) == \"undefined\" || ___it___ == null ){\n"
- "print( \"no cursor\" );\n"
- "return;}\n"
- "shellPrintHelper( ___it___ );}\n"
- "shellHelper.show = function( what ){\n"
- "assert( typeof what == \"string\" );\n"
- "if( what == \"profile\" ) {\n"
- "if( db.system.profile.count() == 0 ) {\n"
- "print(\"db.system.profile is empty\");\n"
- "print(\"Use db.setProfilingLevel(2) will enable profiling\");\n"
- "print(\"Use db.system.profile.find() to show raw profile entries\");}\n"
- "else {\n"
- "print();\n"
- "db.system.profile.find({ millis : { $gt : 0 } }).sort({$natural:-1}).limit(5).forEach( function(x){print(\"\"+x.millis+\"ms \" + String(x.ts).substring(0,24)); print(x.info); print(\"\\n\");} )}\n"
- "return \"\";}\n"
- "if ( what == \"users\" ){\n"
- "db.system.users.find().forEach( printjson );\n"
- "return \"\";}\n"
- "if ( what == \"collections\" || what == \"tables\" ) {\n"
- "db.getCollectionNames().forEach( function(x){print(x)} );\n"
- "return \"\";}\n"
- "if ( what == \"dbs\" ) {\n"
- "db.getMongo().getDBNames().sort().forEach( function(x){print(x)} );\n"
- "return \"\";}\n"
- "throw \"don't know how to show [\" + what + \"]\";}\n"
- "if ( typeof( Map ) == \"undefined\" ){\n"
- "Map = function(){\n"
- "this._data = {};}}\n"
- "Map.hash = function( val ){\n"
- "if ( ! val )\n"
- "return val;\n"
- "switch ( typeof( val ) ){\n"
- "case 'string':\n"
- "case 'number':\n"
- "case 'date':\n"
- "return val.toString();\n"
- "case 'object':\n"
- "case 'array':\n"
- "var s = \"\";\n"
- "for ( var k in val ){\n"
- "s += k + val[k];}\n"
- "return s;}\n"
- "throw \"can't hash : \" + typeof( val );}\n"
- "Map.prototype.put = function( key , value ){\n"
- "var o = this._get( key );\n"
- "var old = o.value;\n"
- "o.value = value;\n"
- "return old;}\n"
- "Map.prototype.get = function( key ){\n"
- "return this._get( key ).value;}\n"
- "Map.prototype._get = function( key ){\n"
- "var h = Map.hash( key );\n"
- "var a = this._data[h];\n"
- "if ( ! a ){\n"
- "a = [];\n"
- "this._data[h] = a;}\n"
- "for ( var i=0; i<a.length; i++ ){\n"
- "if ( friendlyEqual( key , a[i].key ) ){\n"
- "return a[i];}}\n"
- "var o = { key : key , value : null };\n"
- "a.push( o );\n"
- "return o;}\n"
- "Map.prototype.values = function(){\n"
- "var all = [];\n"
- "for ( var k in this._data ){\n"
- "this._data[k].forEach( function(z){ all.push( z.value ); } );}\n"
- "return all;}\n"
- "if ( typeof( gc ) == \"undefined\" ){\n"
- "gc = function(){\n"
- "print( \"warning: using noop gc()\" );}}\n"
- "Math.sigFig = function( x , N ){\n"
- "if ( ! N ){\n"
- "N = 3;}\n"
- "var p = Math.pow( 10, N - Math.ceil( Math.log( Math.abs(x) ) / Math.log( 10 )) );\n"
- "return Math.round(x*p)/p;}\n"
- "Random = function() {}\n"
- "Random.srand = function( s ) { _srand( s ); }\n"
- "Random.rand = function() { return _rand(); }\n"
- "Random.randInt = function( n ) { return Math.floor( Random.rand() * n ); }\n"
- "Random.setRandomSeed = function( s ) {\n"
- "s = s || new Date().getTime();\n"
- "print( \"setting random seed: \" + s );\n"
- "Random.srand( s );}\n"
- "Random.genExp = function( mean ) {\n"
- "return -Math.log( Random.rand() ) * mean;}\n"
- "killWithUris = function( uris ) {\n"
- "var inprog = db.currentOp().inprog;\n"
- "for( var u in uris ) {\n"
- "for ( var i in inprog ) {\n"
- "if ( uris[ u ] == inprog[ i ].client ) {\n"
- "db.killOp( inprog[ i ].opid );}}}}\n"
- "Geo = {};\n"
- "Geo.distance = function( a , b ){\n"
- "var ax = null;\n"
- "var ay = null;\n"
- "var bx = null;\n"
- "var by = null;\n"
- "for ( var key in a ){\n"
- "if ( ax == null )\n"
- "ax = a[key];\n"
- "else if ( ay == null )\n"
- "ay = a[key];}\n"
- "for ( var key in b ){\n"
- "if ( bx == null )\n"
- "bx = b[key];\n"
- "else if ( by == null )\n"
- "by = b[key];}\n"
- "return Math.sqrt( Math.pow( by - ay , 2 ) +\n"
- "Math.pow( bx - ax , 2 ) );}\n"
- "rs = function () { return \"try rs.help()\"; }\n"
- "rs.help = function () {\n"
- "print(\"\\trs.status()                     { replSetGetStatus : 1 } checks repl set status\");\n"
- "print(\"\\trs.initiate()                   { replSetInitiate : null } initiates set with default settings\");\n"
- "print(\"\\trs.initiate(cfg)                { replSetInitiate : cfg } initiates set with configuration cfg\");\n"
- "print(\"\\trs.conf()                       get the current configuration object from local.system.replset\");\n"
- "print(\"\\trs.reconfig(cfg)                updates the configuration of a running replica set with cfg\");\n"
- "print(\"\\trs.add(hostportstr)             add a new member to the set with default attributes\");\n"
- "print(\"\\trs.add(membercfgobj)            add a new member to the set with extra attributes\");\n"
- "print(\"\\trs.addArb(hostportstr)          add a new member which is arbiterOnly:true\");\n"
- "print(\"\\trs.stepDown()                   step down as primary (momentarily)\");\n"
- "print(\"\\trs.remove(hostportstr)          remove a host from the replica set\");\n"
- "print(\"\\trs.slaveOk()                    shorthand for db.getMongo().setSlaveOk()\");\n"
- "print();\n"
- "print(\"\\tdb.isMaster()                   check who is primary\");\n"
- "print();\n"
- "print(\"\\tsee also http://<mongod_host>:28017/_replSet for additional diagnostic info\");}\n"
- "rs.slaveOk = function () { return db.getMongo().setSlaveOk(); }\n"
- "rs.status = function () { return db._adminCommand(\"replSetGetStatus\"); }\n"
- "rs.isMaster = function () { return db.isMaster(); }\n"
- "rs.initiate = function (c) { return db._adminCommand({ replSetInitiate: c }); }\n"
- "rs.reconfig = function(cfg) {\n"
- "cfg.version = rs.conf().version + 1;\n"
- "return db._adminCommand({ replSetReconfig: cfg });}\n"
- "rs.add = function (hostport, arb) {\n"
- "var cfg = hostport;\n"
- "var local = db.getSisterDB(\"local\");\n"
- "assert(local.system.replset.count() <= 1, \"error: local.system.replset has unexpected contents\");\n"
- "var c = local.system.replset.findOne();\n"
- "assert(c, \"no config object retrievable from local.system.replset\");\n"
- "c.version++;\n"
- "var max = 0;\n"
- "for (var i in c.members)\n"
- "if (c.members[i]._id > max) max = c.members[i]._id;\n"
- "if (isString(hostport)) {\n"
- "cfg = { _id: max + 1, host: hostport };\n"
- "if (arb)\n"
- "cfg.arbiterOnly = true;}\n"
- "c.members.push(cfg);\n"
- "return db._adminCommand({ replSetReconfig: c });}\n"
- "rs.stepDown = function () { return db._adminCommand({ replSetStepDown:true}); }\n"
- "rs.addArb = function (hn) { return this.add(hn, true); }\n"
- "rs.conf = function () { return db.getSisterDB(\"local\").system.replset.findOne(); }\n"
- "rs.remove = function (hn) {\n"
- "var local = db.getSisterDB(\"local\");\n"
- "assert(local.system.replset.count() <= 1, \"error: local.system.replset has unexpected contents\");\n"
- "var c = local.system.replset.findOne();\n"
- "assert(c, \"no config object retrievable from local.system.replset\");\n"
- "c.version++;\n"
- "for (var i in c.members) {\n"
- "if (c.members[i].host == hn) {\n"
- "c.members.splice(i, 1);\n"
- "return db._adminCommand({ replSetReconfig : c});}}\n"
- "return \"error: couldn't find \"+hn+\" in \"+tojson(c.members);\n"
- "};\n"
- "help = shellHelper.help = function (x) {\n"
- "if (x == \"connect\") {\n"
- "print(\"\\nNormally one specifies the server on the mongo shell command line.  Run mongo --help to see those options.\");\n"
- "print(\"Additional connections may be opened:\\n\");\n"
- "print(\"    var x = new Mongo('host[:port]');\");\n"
- "print(\"    var mydb = x.getDB('mydb');\");\n"
- "print(\"  or\");\n"
- "print(\"    var mydb = connect('host[:port]/mydb');\");\n"
- "print(\"\\nNote: the REPL prompt only auto-reports getLastError() for the shell command line connection.\\n\");\n"
- "return;}\n"
- "if (x == \"misc\") {\n"
- "print(\"\\tb = new BinData(subtype,base64str)  create a BSON BinData value\");\n"
- "print(\"\\tb.subtype()                         the BinData subtype (0..255)\");\n"
- "print(\"\\tb.length()                          length of the BinData data in bytes\");\n"
- "print(\"\\tb.hex()                             the data as a hex encoded string\");\n"
- "print(\"\\tb.base64()                          the data as a base 64 encoded string\");\n"
- "print(\"\\tb.toString()\");\n"
- "return;}\n"
- "if (x == \"admin\") {\n"
- "print(\"\\tls([path])                      list files\");\n"
- "print(\"\\tpwd()                           returns current directory\");\n"
- "print(\"\\tlistFiles([path])               returns file list\");\n"
- "print(\"\\thostname()                      returns name of this host\");\n"
- "print(\"\\tcat(fname)                      returns contents of text file as a string\");\n"
- "print(\"\\tremoveFile(f)                   delete a file\");\n"
- "print(\"\\tload(jsfilename)                load and execute a .js file\");\n"
- "print(\"\\trun(program[, args...])         spawn a program and wait for its completion\");\n"
- "print(\"\\tsleep(m)                        sleep m milliseconds\");\n"
- "print(\"\\tgetMemInfo()                    diagnostic\");\n"
- "return;}\n"
- "if (x == \"test\") {\n"
- "print(\"\\tstartMongodEmpty(args)        DELETES DATA DIR and then starts mongod\");\n"
- "print(\"\\t                              returns a connection to the new server\");\n"
- "print(\"\\tstartMongodTest()             DELETES DATA DIR\");\n"
- "print(\"\\t                              automatically picks port #s starting at 27000 and increasing\");\n"
- "print(\"\\t                              or you can specify the port as the first arg\");\n"
- "print(\"\\t                              dir is /data/db/<port>/ if not specified as the 2nd arg\");\n"
- "print(\"\\t                              returns a connection to the new server\");\n"
- "return;}\n"
- "print(\"\\t\" + \"db.help()                    help on db methods\");\n"
- "print(\"\\t\" + \"db.mycoll.help()             help on collection methods\");\n"
- "print(\"\\t\" + \"rs.help()                    help on replica set methods\");\n"
- "print(\"\\t\" + \"help connect                 connecting to a db help\");\n"
- "print(\"\\t\" + \"help admin                   administrative help\");\n"
- "print(\"\\t\" + \"help misc                    misc things to know\");\n"
- "print();\n"
- "print(\"\\t\" + \"show dbs                     show database names\");\n"
- "print(\"\\t\" + \"show collections             show collections in current database\");\n"
- "print(\"\\t\" + \"show users                   show users in current database\");\n"
- "print(\"\\t\" + \"show profile                 show most recent system.profile entries with time >= 1ms\");\n"
- "print(\"\\t\" + \"use <db_name>                set current database\");\n"
- "print(\"\\t\" + \"db.foo.find()                list objects in collection foo\");\n"
- "print(\"\\t\" + \"db.foo.find( { a : 1 } )     list objects in foo where a == 1\");\n"
- "print(\"\\t\" + \"it                           result of the last line evaluated; use to further iterate\");\n"
- "print(\"\\t\" + \"exit                         quit the mongo shell\");}\n"
- "if ( typeof DB == \"undefined\" ){\n"
- "DB = function( mongo , name ){\n"
- "this._mongo = mongo;\n"
- "this._name = name;}}\n"
- "DB.prototype.getMongo = function(){\n"
- "assert( this._mongo , \"why no mongo!\" );\n"
- "return this._mongo;}\n"
- "DB.prototype.getSisterDB = function( name ){\n"
- "return this.getMongo().getDB( name );}\n"
- "DB.prototype.getName = function(){\n"
- "return this._name;}\n"
- "DB.prototype.stats = function(){\n"
- "return this.runCommand( { dbstats : 1 } );}\n"
- "DB.prototype.getCollection = function( name ){\n"
- "return new DBCollection( this._mongo , this , name , this._name + \".\" + name );}\n"
- "DB.prototype.commandHelp = function( name ){\n"
- "var c = {};\n"
- "c[name] = 1;\n"
- "c.help = true;\n"
- "return this.runCommand( c ).help;}\n"
- "DB.prototype.runCommand = function( obj ){\n"
- "if ( typeof( obj ) == \"string\" ){\n"
- "var n = {};\n"
- "n[obj] = 1;\n"
- "obj = n;}\n"
- "return this.getCollection( \"$cmd\" ).findOne( obj );}\n"
- "DB.prototype._dbCommand = DB.prototype.runCommand;\n"
- "DB.prototype._adminCommand = function( obj ){\n"
- "if ( this._name == \"admin\" )\n"
- "return this.runCommand( obj );\n"
- "return this.getSisterDB( \"admin\" ).runCommand( obj );}\n"
- "DB.prototype.addUser = function( username , pass, readOnly ){\n"
- "readOnly = readOnly || false;\n"
- "var c = this.getCollection( \"system.users\" );\n"
- "var u = c.findOne( { user : username } ) || { user : username };\n"
- "u.readOnly = readOnly;\n"
- "u.pwd = hex_md5( username + \":mongo:\" + pass );\n"
- "print( tojson( u ) );\n"
- "c.save( u );}\n"
- "DB.prototype.removeUser = function( username ){\n"
- "this.getCollection( \"system.users\" ).remove( { user : username } );}\n"
- "DB.prototype.__pwHash = function( nonce, username, pass ) {\n"
- "return hex_md5( nonce + username + hex_md5( username + \":mongo:\" + pass ) );}\n"
- "DB.prototype.auth = function( username , pass ){\n"
- "var n = this.runCommand( { getnonce : 1 } );\n"
- "var a = this.runCommand(\n"
- "{\n"
- "authenticate : 1 ,\n"
- "user : username ,\n"
- "nonce : n.nonce ,\n"
- "key : this.__pwHash( n.nonce, username, pass )}\n"
- ");\n"
- "return a.ok;}\n"
- "\n"
- "DB.prototype.createCollection = function(name, opt) {\n"
- "var options = opt || {};\n"
- "var cmd = { create: name, capped: options.capped, size: options.size, max: options.max };\n"
- "var res = this._dbCommand(cmd);\n"
- "return res;}\n"
- "\n"
- "DB.prototype.getProfilingLevel  = function() {\n"
- "var res = this._dbCommand( { profile: -1 } );\n"
- "return res ? res.was : null;}\n"
- "\n"
- "DB.prototype.dropDatabase = function() {\n"
- "if ( arguments.length )\n"
- "throw \"dropDatabase doesn't take arguments\";\n"
- "return this._dbCommand( { dropDatabase: 1 } );}\n"
- "DB.prototype.shutdownServer = function() {\n"
- "if( \"admin\" != this._name ){\n"
- "return \"shutdown command only works with the admin database; try 'use admin'\";}\n"
- "try {\n"
- "var res = this._dbCommand(\"shutdown\");\n"
- "if( res )\n"
- "throw \"shutdownServer failed: \" + res.errmsg;\n"
- "throw \"shutdownServer failed\";}\n"
- "catch ( e ){\n"
- "assert( tojson( e ).indexOf( \"error doing query: failed\" ) >= 0 , \"unexpected error: \" + tojson( e ) );\n"
- "print( \"server should be down...\" );}}\n"
- "\n"
- "DB.prototype.cloneDatabase = function(from) {\n"
- "assert( isString(from) && from.length );\n"
- "return this._dbCommand( { clone: from } );}\n"
- "\n"
- "DB.prototype.cloneCollection = function(from, collection, query) {\n"
- "assert( isString(from) && from.length );\n"
- "assert( isString(collection) && collection.length );\n"
- "collection = this._name + \".\" + collection;\n"
- "query = query || {};\n"
- "return this._dbCommand( { cloneCollection:collection, from:from, query:query } );}\n"
- "\n"
- "DB.prototype.copyDatabase = function(fromdb, todb, fromhost, username, password) {\n"
- "assert( isString(fromdb) && fromdb.length );\n"
- "assert( isString(todb) && todb.length );\n"
- "fromhost = fromhost || \"\";\n"
- "if ( username && password ) {\n"
- "var n = this._adminCommand( { copydbgetnonce : 1, fromhost:fromhost } );\n"
- "return this._adminCommand( { copydb:1, fromhost:fromhost, fromdb:fromdb, todb:todb, username:username, nonce:n.nonce, key:this.__pwHash( n.nonce, username, password ) } );\n"
- "} else {\n"
- "return this._adminCommand( { copydb:1, fromhost:fromhost, fromdb:fromdb, todb:todb } );}}\n"
- "\n"
- "DB.prototype.repairDatabase = function() {\n"
- "return this._dbCommand( { repairDatabase: 1 } );}\n"
- "DB.prototype.help = function() {\n"
- "print(\"DB methods:\");\n"
- "print(\"\\tdb.addUser(username, password[, readOnly=false])\");\n"
- "print(\"\\tdb.auth(username, password)\");\n"
- "print(\"\\tdb.cloneDatabase(fromhost)\");\n"
- "print(\"\\tdb.commandHelp(name) returns the help for the command\");\n"
- "print(\"\\tdb.copyDatabase(fromdb, todb, fromhost)\");\n"
- "print(\"\\tdb.createCollection(name, { size : ..., capped : ..., max : ... } )\");\n"
- "print(\"\\tdb.currentOp() displays the current operation in the db\");\n"
- "print(\"\\tdb.dropDatabase()\");\n"
- "print(\"\\tdb.eval(func, args) run code server-side\");\n"
- "print(\"\\tdb.getCollection(cname) same as db['cname'] or db.cname\");\n"
- "print(\"\\tdb.getCollectionNames()\");\n"
- "print(\"\\tdb.getLastError() - just returns the err msg string\");\n"
- "print(\"\\tdb.getLastErrorObj() - return full status object\");\n"
- "print(\"\\tdb.getMongo() get the server connection object\");\n"
- "print(\"\\tdb.getMongo().setSlaveOk() allow this connection to read from the nonmaster member of a replica pair\");\n"
- "print(\"\\tdb.getName()\");\n"
- "print(\"\\tdb.getPrevError()\");\n"
- "print(\"\\tdb.getProfilingLevel()\");\n"
- "print(\"\\tdb.getReplicationInfo()\");\n"
- "print(\"\\tdb.getSisterDB(name) get the db at the same server as this one\");\n"
- "print(\"\\tdb.isMaster() check replica primary status\");\n"
- "print(\"\\tdb.killOp(opid) kills the current operation in the db\");\n"
- "print(\"\\tdb.listCommands() lists all the db commands\");\n"
- "print(\"\\tdb.printCollectionStats()\");\n"
- "print(\"\\tdb.printReplicationInfo()\");\n"
- "print(\"\\tdb.printSlaveReplicationInfo()\");\n"
- "print(\"\\tdb.printShardingStatus()\");\n"
- "print(\"\\tdb.removeUser(username)\");\n"
- "print(\"\\tdb.repairDatabase()\");\n"
- "print(\"\\tdb.resetError()\");\n"
- "print(\"\\tdb.runCommand(cmdObj) run a database command.  if cmdObj is a string, turns it into { cmdObj : 1 }\");\n"
- "print(\"\\tdb.serverStatus()\");\n"
- "print(\"\\tdb.setProfilingLevel(level,<slowms>) 0=off 1=slow 2=all\");\n"
- "print(\"\\tdb.shutdownServer()\");\n"
- "print(\"\\tdb.stats()\");\n"
- "print(\"\\tdb.version() current version of the server\");\n"
- "print(\"\\tdb.getMongo().setSlaveOk() allow queries on a replication slave server\");\n"
- "return __magicNoPrint;}\n"
- "DB.prototype.printCollectionStats = function(){\n"
- "var mydb = this;\n"
- "this.getCollectionNames().forEach(\n"
- "function(z){\n"
- "print( z );\n"
- "printjson( mydb.getCollection(z).stats() );\n"
- "print( \"---\" );}\n"
- ");}\n"
- "\n"
- "DB.prototype.setProfilingLevel = function(level,slowms) {\n"
- "if (level < 0 || level > 2) {\n"
- "throw { dbSetProfilingException : \"input level \" + level + \" is out of range [0..2]\" };}\n"
- "var cmd = { profile: level };\n"
- "if ( slowms )\n"
- "cmd[\"slowms\"] = slowms;\n"
- "return this._dbCommand( cmd );}\n"
- "\n"
- "DB.prototype.eval = function(jsfunction) {\n"
- "var cmd = { $eval : jsfunction };\n"
- "if ( arguments.length > 1 ) {\n"
- "cmd.args = argumentsToArray( arguments ).slice(1);}\n"
- "var res = this._dbCommand( cmd );\n"
- "if (!res.ok)\n"
- "throw tojson( res );\n"
- "return res.retval;}\n"
- "DB.prototype.dbEval = DB.prototype.eval;\n"
- "\n"
- "DB.prototype.groupeval = function(parmsObj) {\n"
- "var groupFunction = function() {\n"
- "var parms = args[0];\n"
- "var c = db[parms.ns].find(parms.cond||{});\n"
- "var map = new Map();\n"
- "var pks = parms.key ? Object.keySet( parms.key ) : null;\n"
- "var pkl = pks ? pks.length : 0;\n"
- "var key = {};\n"
- "while( c.hasNext() ) {\n"
- "var obj = c.next();\n"
- "if ( pks ) {\n"
- "for( var i=0; i<pkl; i++ ){\n"
- "var k = pks[i];\n"
- "key[k] = obj[k];}}\n"
- "else {\n"
- "key = parms.$keyf(obj);}\n"
- "var aggObj = map.get(key);\n"
- "if( aggObj == null ) {\n"
- "var newObj = Object.extend({}, key);\n"
- "aggObj = Object.extend(newObj, parms.initial)\n"
- "map.put( key , aggObj );}\n"
- "parms.$reduce(obj, aggObj);}\n"
- "return map.values();}\n"
- "return this.eval(groupFunction, this._groupFixParms( parmsObj ));}\n"
- "DB.prototype.groupcmd = function( parmsObj ){\n"
- "var ret = this.runCommand( { \"group\" : this._groupFixParms( parmsObj ) } );\n"
- "if ( ! ret.ok ){\n"
- "throw \"group command failed: \" + tojson( ret );}\n"
- "return ret.retval;}\n"
- "DB.prototype.group = DB.prototype.groupcmd;\n"
- "DB.prototype._groupFixParms = function( parmsObj ){\n"
- "var parms = Object.extend({}, parmsObj);\n"
- "if( parms.reduce ) {\n"
- "parms.$reduce = parms.reduce;\n"
- "delete parms.reduce;}\n"
- "if( parms.keyf ) {\n"
- "parms.$keyf = parms.keyf;\n"
- "delete parms.keyf;}\n"
- "return parms;}\n"
- "DB.prototype.resetError = function(){\n"
- "return this.runCommand( { reseterror : 1 } );}\n"
- "DB.prototype.forceError = function(){\n"
- "return this.runCommand( { forceerror : 1 } );}\n"
- "DB.prototype.getLastError = function( w , wtimeout ){\n"
- "var res = this.getLastErrorObj( w , wtimeout );\n"
- "if ( ! res.ok )\n"
- "throw \"getlasterror failed: \" + tojson( res );\n"
- "return res.err;}\n"
- "DB.prototype.getLastErrorObj = function( w , wtimeout ){\n"
- "var cmd = { getlasterror : 1 };\n"
- "if ( w ){\n"
- "cmd.w = w;\n"
- "if ( wtimeout )\n"
- "cmd.wtimeout = wtimeout;}\n"
- "var res = this.runCommand( cmd );\n"
- "if ( ! res.ok )\n"
- "throw \"getlasterror failed: \" + tojson( res );\n"
- "return res;}\n"
- "DB.prototype.getLastErrorCmd = DB.prototype.getLastErrorObj;\n"
- "/* Return the last error which has occurred, even if not the very last error.\n"
- "Returns:\n"
- "{ err : <error message>, nPrev : <how_many_ops_back_occurred>, ok : 1 }\n"
- "result.err will be null if no error has occurred.\n"
- "*/\n"
- "DB.prototype.getPrevError = function(){\n"
- "return this.runCommand( { getpreverror : 1 } );}\n"
- "DB.prototype.getCollectionNames = function(){\n"
- "var all = [];\n"
- "var nsLength = this._name.length + 1;\n"
- "var c = this.getCollection( \"system.namespaces\" ).find();\n"
- "while ( c.hasNext() ){\n"
- "var name = c.next().name;\n"
- "if ( name.indexOf( \"$\" ) >= 0 && name.indexOf( \".oplog.$\" ) < 0 )\n"
- "continue;\n"
- "all.push( name.substring( nsLength ) );}\n"
- "return all.sort();}\n"
- "DB.prototype.tojson = function(){\n"
- "return this._name;}\n"
- "DB.prototype.toString = function(){\n"
- "return this._name;}\n"
- "DB.prototype.isMaster = function () { return this.runCommand(\"isMaster\"); }\n"
- "DB.prototype.currentOp = function(){\n"
- "return db.$cmd.sys.inprog.findOne();}\n"
- "DB.prototype.currentOP = DB.prototype.currentOp;\n"
- "DB.prototype.killOp = function(op) {\n"
- "if( !op )\n"
- "throw \"no opNum to kill specified\";\n"
- "return db.$cmd.sys.killop.findOne({'op':op});}\n"
- "DB.prototype.killOP = DB.prototype.killOp;\n"
- "DB.tsToSeconds = function(x){\n"
- "if ( x.t && x.i )\n"
- "return x.t / 1000;\n"
- "return x / 4294967296;}\n"
- "\n"
- "DB.prototype.getReplicationInfo = function() {\n"
- "var db = this.getSisterDB(\"local\");\n"
- "var result = { };\n"
- "var ol = db.system.namespaces.findOne({name:\"local.oplog.$main\"});\n"
- "if( ol && ol.options ) {\n"
- "result.logSizeMB = ol.options.size / 1000 / 1000;\n"
- "} else {\n"
- "result.errmsg  = \"local.oplog.$main, or its options, not found in system.namespaces collection (not --master?)\";\n"
- "return result;}\n"
- "var firstc = db.oplog.$main.find().sort({$natural:1}).limit(1);\n"
- "var lastc = db.oplog.$main.find().sort({$natural:-1}).limit(1);\n"
- "if( !firstc.hasNext() || !lastc.hasNext() ) {\n"
- "result.errmsg = \"objects not found in local.oplog.$main -- is this a new and empty db instance?\";\n"
- "result.oplogMainRowCount = db.oplog.$main.count();\n"
- "return result;}\n"
- "var first = firstc.next();\n"
- "var last = lastc.next();\n"
- "{\n"
- "var tfirst = first.ts;\n"
- "var tlast = last.ts;\n"
- "if( tfirst && tlast ) {\n"
- "tfirst = DB.tsToSeconds( tfirst );\n"
- "tlast = DB.tsToSeconds( tlast );\n"
- "result.timeDiff = tlast - tfirst;\n"
- "result.timeDiffHours = Math.round(result.timeDiff / 36)/100;\n"
- "result.tFirst = (new Date(tfirst*1000)).toString();\n"
- "result.tLast  = (new Date(tlast*1000)).toString();\n"
- "result.now = Date();}\n"
- "else {\n"
- "result.errmsg = \"ts element not found in oplog objects\";}}\n"
- "return result;}\n"
- "DB.prototype.printReplicationInfo = function() {\n"
- "var result = this.getReplicationInfo();\n"
- "if( result.errmsg ) {\n"
- "print(tojson(result));\n"
- "return;}\n"
- "print(\"configured oplog size:   \" + result.logSizeMB + \"MB\");\n"
- "print(\"log length start to end: \" + result.timeDiff + \"secs (\" + result.timeDiffHours + \"hrs)\");\n"
- "print(\"oplog first event time:  \" + result.tFirst);\n"
- "print(\"oplog last event time:   \" + result.tLast);\n"
- "print(\"now:                     \" + result.now);}\n"
- "DB.prototype.printSlaveReplicationInfo = function() {\n"
- "function g(x) {\n"
- "assert( x , \"how could this be null (printSlaveReplicationInfo gx)\" )\n"
- "print(\"source:   \" + x.host);\n"
- "if ( x.syncedTo ){\n"
- "var st = new Date( DB.tsToSeconds( x.syncedTo ) * 1000 );\n"
- "var now = new Date();\n"
- "print(\"\\t syncedTo: \" + st.toString() );\n"
- "var ago = (now-st)/1000;\n"
- "var hrs = Math.round(ago/36)/100;\n"
- "print(\"\\t\\t = \" + Math.round(ago) + \"secs ago (\" + hrs + \"hrs)\");}\n"
- "else {\n"
- "print( \"\\t doing initial sync\" );}}\n"
- "var L = this.getSisterDB(\"local\");\n"
- "if( L.sources.count() == 0 ) {\n"
- "print(\"local.sources is empty; is this db a --slave?\");\n"
- "return;}\n"
- "L.sources.find().forEach(g);}\n"
- "DB.prototype.serverBuildInfo = function(){\n"
- "return this._adminCommand( \"buildinfo\" );}\n"
- "DB.prototype.serverStatus = function(){\n"
- "return this._adminCommand( \"serverStatus\" );}\n"
- "DB.prototype.serverCmdLineOpts = function(){\n"
- "return this._adminCommand( \"getCmdLineOpts\" );}\n"
- "DB.prototype.version = function(){\n"
- "return this.serverBuildInfo().version;}\n"
- "DB.prototype.listCommands = function(){\n"
- "var x = this.runCommand( \"listCommands\" );\n"
- "for ( var name in x.commands ){\n"
- "var c = x.commands[name];\n"
- "var s = name + \": \";\n"
- "switch ( c.lockType ){\n"
- "case -1: s += \"read-lock\"; break;\n"
- "case  0: s += \"no-lock\"; break;\n"
- "case  1: s += \"write-lock\"; break;\n"
- "default: s += c.lockType;}\n"
- "if (c.adminOnly) s += \" adminOnly \";\n"
- "if (c.adminOnly) s += \" slaveOk \";\n"
- "s += \"\\n  \";\n"
- "s += c.help.replace(/\\n/g, '\\n  ');\n"
- "s += \"\\n\";\n"
- "print( s );}}\n"
- "DB.prototype.printShardingStatus = function(){\n"
- "printShardingStatus( this.getSisterDB( \"config\" ) );}\n"
- "if ( typeof Mongo == \"undefined\" ){\n"
- "Mongo = function( host ){\n"
- "this.init( host );}}\n"
- "if ( ! Mongo.prototype ){\n"
- "throw \"Mongo.prototype not defined\";}\n"
- "if ( ! Mongo.prototype.find )\n"
- "Mongo.prototype.find = function( ns , query , fields , limit , skip ){ throw \"find not implemented\"; }\n"
- "if ( ! Mongo.prototype.insert )\n"
- "Mongo.prototype.insert = function( ns , obj ){ throw \"insert not implemented\"; }\n"
- "if ( ! Mongo.prototype.remove )\n"
- "Mongo.prototype.remove = function( ns , pattern ){ throw \"remove not implemented;\" }\n"
- "if ( ! Mongo.prototype.update )\n"
- "Mongo.prototype.update = function( ns , query , obj , upsert ){ throw \"update not implemented;\" }\n"
- "if ( typeof mongoInject == \"function\" ){\n"
- "mongoInject( Mongo.prototype );}\n"
- "Mongo.prototype.setSlaveOk = function() {\n"
- "this.slaveOk = true;}\n"
- "Mongo.prototype.getDB = function( name ){\n"
- "return new DB( this , name );}\n"
- "Mongo.prototype.getDBs = function(){\n"
- "var res = this.getDB( \"admin\" ).runCommand( { \"listDatabases\" : 1 } );\n"
- "assert( res.ok == 1 , \"listDatabases failed:\" + tojson( res ) );\n"
- "return res;}\n"
- "Mongo.prototype.getDBNames = function(){\n"
- "return this.getDBs().databases.map(\n"
- "function(z){\n"
- "return z.name;}\n"
- ");}\n"
- "Mongo.prototype.getCollection = function(ns){\n"
- "var idx = ns.indexOf( \".\" );\n"
- "if ( idx < 0 )\n"
- "throw \"need . in ns\";\n"
- "var db = ns.substring( 0 , idx );\n"
- "var c = ns.substring( idx + 1 );\n"
- "return this.getDB( db ).getCollection( c );}\n"
- "Mongo.prototype.toString = function(){\n"
- "return \"connection to \" + this.host;}\n"
- "Mongo.prototype.tojson = Mongo.prototype.toString;\n"
- "connect = function( url , user , pass ){\n"
- "chatty( \"connecting to: \" + url )\n"
- "if ( user && ! pass )\n"
- "throw \"you specified a user and not a password.  either you need a password, or you're using the old connect api\";\n"
- "var idx = url.lastIndexOf( \"/\" );\n"
- "var db;\n"
- "if ( idx < 0 )\n"
- "db = new Mongo().getDB( url );\n"
- "else\n"
- "db = new Mongo( url.substring( 0 , idx ) ).getDB( url.substring( idx + 1 ) );\n"
- "if ( user && pass ){\n"
- "if ( ! db.auth( user , pass ) ){\n"
- "throw \"couldn't login\";}}\n"
- "return db;}\n"
- "MR = {};\n"
- "MR.init = function(){\n"
- "$max = 0;\n"
- "$arr = [];\n"
- "emit = MR.emit;\n"
- "$numEmits = 0;\n"
- "$numReduces = 0;\n"
- "$numReducesToDB = 0;\n"
- "gc();}\n"
- "MR.cleanup = function(){\n"
- "MR.init();\n"
- "gc();}\n"
- "MR.emit = function(k,v){\n"
- "$numEmits++;\n"
- "var num = nativeHelper.apply( get_num_ , [ k ] );\n"
- "var data = $arr[num];\n"
- "if ( ! data ){\n"
- "data = { key : k , values : new Array(1000) , count : 0 };\n"
- "$arr[num] = data;}\n"
- "data.values[data.count++] = v;\n"
- "$max = Math.max( $max , data.count );}\n"
- "MR.doReduce = function( useDB ){\n"
- "$numReduces++;\n"
- "if ( useDB )\n"
- "$numReducesToDB++;\n"
- "$max = 0;\n"
- "for ( var i=0; i<$arr.length; i++){\n"
- "var data = $arr[i];\n"
- "if ( ! data )\n"
- "continue;\n"
- "if ( useDB ){\n"
- "var x = tempcoll.findOne( { _id : data.key } );\n"
- "if ( x ){\n"
- "data.values[data.count++] = x.value;}}\n"
- "var r = $reduce( data.key , data.values.slice( 0 , data.count ) );\n"
- "if ( r && r.length && r[0] ){\n"
- "data.values = r;\n"
- "data.count = r.length;}\n"
- "else{\n"
- "data.values[0] = r;\n"
- "data.count = 1;}\n"
- "$max = Math.max( $max , data.count );\n"
- "if ( useDB ){\n"
- "if ( data.count == 1 ){\n"
- "tempcoll.save( { _id : data.key , value : data.values[0] } );}\n"
- "else {\n"
- "tempcoll.save( { _id : data.key , value : data.values.slice( 0 , data.count ) } );}}}}\n"
- "MR.check = function(){\n"
- "if ( $max < 2000 && $arr.length < 1000 ){\n"
- "return 0;}\n"
- "MR.doReduce();\n"
- "if ( $max < 2000 && $arr.length < 1000 ){\n"
- "return 1;}\n"
- "MR.doReduce( true );\n"
- "$arr = [];\n"
- "$max = 0;\n"
- "reset_num();\n"
- "gc();\n"
- "return 2;}\n"
- "MR.finalize = function(){\n"
- "tempcoll.find().forEach(\n"
- "function(z){\n"
- "z.value = $finalize( z._id , z.value );\n"
- "tempcoll.save( z );}\n"
- ");}\n"
- "if ( typeof DBQuery == \"undefined\" ){\n"
- "DBQuery = function( mongo , db , collection , ns , query , fields , limit , skip , batchSize ){\n"
- "this._mongo = mongo;\n"
- "this._db = db;\n"
- "this._collection = collection;\n"
- "this._ns = ns;\n"
- "this._query = query || {};\n"
- "this._fields = fields;\n"
- "this._limit = limit || 0;\n"
- "this._skip = skip || 0;\n"
- "this._batchSize = batchSize || 0;\n"
- "this._cursor = null;\n"
- "this._numReturned = 0;\n"
- "this._special = false;\n"
- "this._prettyShell = false;}\n"
- "print( \"DBQuery probably won't have array access \" );}\n"
- "DBQuery.prototype.help = function () {\n"
- "print(\"find() modifiers\")\n"
- "print(\"\\t.sort( {...} )\")\n"
- "print(\"\\t.limit( n )\")\n"
- "print(\"\\t.skip( n )\")\n"
- "print(\"\\t.count() - total # of objects matching query, ignores skip,limit\")\n"
- "print(\"\\t.size() - total # of objects cursor would return, honors skip,limit\")\n"
- "print(\"\\t.explain([verbose])\")\n"
- "print(\"\\t.hint(...)\")\n"
- "print(\"\\t.showDiskLoc() - adds a $diskLoc field to each returned object\")\n"
- "print(\"\\nCursor methods\");\n"
- "print(\"\\t.forEach( func )\")\n"
- "print(\"\\t.print() - output to console in full pretty format\")\n"
- "print(\"\\t.map( func )\")\n"
- "print(\"\\t.hasNext()\")\n"
- "print(\"\\t.next()\")}\n"
- "DBQuery.prototype.clone = function(){\n"
- "var q =  new DBQuery( this._mongo , this._db , this._collection , this._ns ,\n"
- "this._query , this._fields ,\n"
- "this._limit , this._skip , this._batchSize );\n"
- "q._special = this._special;\n"
- "return q;}\n"
- "DBQuery.prototype._ensureSpecial = function(){\n"
- "if ( this._special )\n"
- "return;\n"
- "var n = { query : this._query };\n"
- "this._query = n;\n"
- "this._special = true;}\n"
- "DBQuery.prototype._checkModify = function(){\n"
- "if ( this._cursor )\n"
- "throw \"query already executed\";}\n"
- "DBQuery.prototype._exec = function(){\n"
- "if ( ! this._cursor ){\n"
- "assert.eq( 0 , this._numReturned );\n"
- "this._cursor = this._mongo.find( this._ns , this._query , this._fields , this._limit , this._skip , this._batchSize );\n"
- "this._cursorSeen = 0;}\n"
- "return this._cursor;}\n"
- "DBQuery.prototype.limit = function( limit ){\n"
- "this._checkModify();\n"
- "this._limit = limit;\n"
- "return this;}\n"
- "DBQuery.prototype.batchSize = function( batchSize ){\n"
- "this._checkModify();\n"
- "this._batchSize = batchSize;\n"
- "return this;}\n"
- "DBQuery.prototype.skip = function( skip ){\n"
- "this._checkModify();\n"
- "this._skip = skip;\n"
- "return this;}\n"
- "DBQuery.prototype.hasNext = function(){\n"
- "this._exec();\n"
- "if ( this._limit > 0 && this._cursorSeen >= this._limit )\n"
- "return false;\n"
- "var o = this._cursor.hasNext();\n"
- "return o;}\n"
- "DBQuery.prototype.next = function(){\n"
- "this._exec();\n"
- "var o = this._cursor.hasNext();\n"
- "if ( o )\n"
- "this._cursorSeen++;\n"
- "else\n"
- "throw \"error hasNext: \" + o;\n"
- "var ret = this._cursor.next();\n"
- "if ( ret.$err && this._numReturned == 0 && ! this.hasNext() )\n"
- "throw \"error: \" + tojson( ret );\n"
- "this._numReturned++;\n"
- "return ret;}\n"
- "DBQuery.prototype.objsLeftInBatch = function(){\n"
- "this._exec();\n"
- "var ret = this._cursor.objsLeftInBatch();\n"
- "if ( ret.$err )\n"
- "throw \"error: \" + tojson( ret );\n"
- "return ret;}\n"
- "DBQuery.prototype.toArray = function(){\n"
- "if ( this._arr )\n"
- "return this._arr;\n"
- "var a = [];\n"
- "while ( this.hasNext() )\n"
- "a.push( this.next() );\n"
- "this._arr = a;\n"
- "return a;}\n"
- "DBQuery.prototype.count = function( applySkipLimit ){\n"
- "var cmd = { count: this._collection.getName() };\n"
- "if ( this._query ){\n"
- "if ( this._special )\n"
- "cmd.query = this._query.query;\n"
- "else\n"
- "cmd.query = this._query;}\n"
- "cmd.fields = this._fields || {};\n"
- "if ( applySkipLimit ){\n"
- "if ( this._limit )\n"
- "cmd.limit = this._limit;\n"
- "if ( this._skip )\n"
- "cmd.skip = this._skip;}\n"
- "var res = this._db.runCommand( cmd );\n"
- "if( res && res.n != null ) return res.n;\n"
- "throw \"count failed: \" + tojson( res );}\n"
- "DBQuery.prototype.size = function(){\n"
- "return this.count( true );}\n"
- "DBQuery.prototype.countReturn = function(){\n"
- "var c = this.count();\n"
- "if ( this._skip )\n"
- "c = c - this._skip;\n"
- "if ( this._limit > 0 && this._limit < c )\n"
- "return this._limit;\n"
- "return c;}\n"
- "\n"
- "DBQuery.prototype.itcount = function(){\n"
- "var num = 0;\n"
- "while ( this.hasNext() ){\n"
- "num++;\n"
- "this.next();}\n"
- "return num;}\n"
- "DBQuery.prototype.length = function(){\n"
- "return this.toArray().length;}\n"
- "DBQuery.prototype._addSpecial = function( name , value ){\n"
- "this._ensureSpecial();\n"
- "this._query[name] = value;\n"
- "return this;}\n"
- "DBQuery.prototype.sort = function( sortBy ){\n"
- "return this._addSpecial( \"orderby\" , sortBy );}\n"
- "DBQuery.prototype.hint = function( hint ){\n"
- "return this._addSpecial( \"$hint\" , hint );}\n"
- "DBQuery.prototype.min = function( min ) {\n"
- "return this._addSpecial( \"$min\" , min );}\n"
- "DBQuery.prototype.max = function( max ) {\n"
- "return this._addSpecial( \"$max\" , max );}\n"
- "DBQuery.prototype.showDiskLoc = function() {\n"
- "return this._addSpecial( \"$showDiskLoc\" , true);}\n"
- "DBQuery.prototype.forEach = function( func ){\n"
- "while ( this.hasNext() )\n"
- "func( this.next() );}\n"
- "DBQuery.prototype.map = function( func ){\n"
- "var a = [];\n"
- "while ( this.hasNext() )\n"
- "a.push( func( this.next() ) );\n"
- "return a;}\n"
- "DBQuery.prototype.arrayAccess = function( idx ){\n"
- "return this.toArray()[idx];}\n"
- "DBQuery.prototype.explain = function (verbose) {\n"
- "/* verbose=true --> include allPlans, oldPlan fields */\n"
- "var n = this.clone();\n"
- "n._ensureSpecial();\n"
- "n._query.$explain = true;\n"
- "n._limit = n._limit * -1;\n"
- "var e = n.next();\n"
- "if (!verbose) {\n"
- "delete e.allPlans;\n"
- "delete e.oldPlan;\n"
- "if (e.shards){\n"
- "for (var key in e.shards){\n"
- "var s = e.shards[key];\n"
- "if(s.length === undefined){\n"
- "delete s.allPlans;\n"
- "delete s.oldPlan;\n"
- "} else {\n"
- "for (var i=0; i < s.length; i++){\n"
- "delete s[i].allPlans;\n"
- "delete s[i].oldPlan;}}}}}\n"
- "return e;}\n"
- "DBQuery.prototype.snapshot = function(){\n"
- "this._ensureSpecial();\n"
- "this._query.$snapshot = true;\n"
- "return this;}\n"
- "DBQuery.prototype.pretty = function(){\n"
- "this._prettyShell = true;\n"
- "return this;}\n"
- "DBQuery.prototype.shellPrint = function(){\n"
- "try {\n"
- "var n = 0;\n"
- "while ( this.hasNext() && n < DBQuery.shellBatchSize ){\n"
- "var s = this._prettyShell ? tojson( this.next() ) : tojson( this.next() , \"\" , true );\n"
- "print( s );\n"
- "n++;}\n"
- "if ( this.hasNext() ){\n"
- "print( \"has more\" );\n"
- "___it___  = this;}\n"
- "else {\n"
- "___it___  = null;}}\n"
- "catch ( e ){\n"
- "print( e );}}\n"
- "DBQuery.prototype.toString = function(){\n"
- "return \"DBQuery: \" + this._ns + \" -> \" + tojson( this.query );}\n"
- "DBQuery.shellBatchSize = 20;\n"
- "// or db[\"colName\"]\n"
- "if ( ( typeof  DBCollection ) == \"undefined\" ){\n"
- "DBCollection = function( mongo , db , shortName , fullName ){\n"
- "this._mongo = mongo;\n"
- "this._db = db;\n"
- "this._shortName = shortName;\n"
- "this._fullName = fullName;\n"
- "this.verify();}}\n"
- "DBCollection.prototype.verify = function(){\n"
- "assert( this._fullName , \"no fullName\" );\n"
- "assert( this._shortName , \"no shortName\" );\n"
- "assert( this._db , \"no db\" );\n"
- "assert.eq( this._fullName , this._db._name + \".\" + this._shortName , \"name mismatch\" );\n"
- "assert( this._mongo , \"no mongo in DBCollection\" );}\n"
- "DBCollection.prototype.getName = function(){\n"
- "return this._shortName;}\n"
- "DBCollection.prototype.help = function () {\n"
- "var shortName = this.getName();\n"
- "print(\"DBCollection help\");\n"
- "print(\"\\tdb.\" + shortName + \".find().help() - show DBCursor help\");\n"
- "print(\"\\tdb.\" + shortName + \".count()\");\n"
- "print(\"\\tdb.\" + shortName + \".dataSize()\");\n"
- "print(\"\\tdb.\" + shortName + \".distinct( key ) - eg. db.\" + shortName + \".distinct( 'x' )\");\n"
- "print(\"\\tdb.\" + shortName + \".drop() drop the collection\");\n"
- "print(\"\\tdb.\" + shortName + \".dropIndex(name)\");\n"
- "print(\"\\tdb.\" + shortName + \".dropIndexes()\");\n"
- "print(\"\\tdb.\" + shortName + \".ensureIndex(keypattern,options) - options should be an object with these possible fields: name, unique, dropDups\");\n"
- "print(\"\\tdb.\" + shortName + \".reIndex()\");\n"
- "print(\"\\tdb.\" + shortName + \".find( [query] , [fields]) - first parameter is an optional query filter. second parameter is optional set of fields to return.\");\n"
- "print(\"\\t                                   e.g. db.\" + shortName + \".find( { x : 77 } , { name : 1 , x : 1 } )\");\n"
- "print(\"\\tdb.\" + shortName + \".find(...).count()\");\n"
- "print(\"\\tdb.\" + shortName + \".find(...).limit(n)\");\n"
- "print(\"\\tdb.\" + shortName + \".find(...).skip(n)\");\n"
- "print(\"\\tdb.\" + shortName + \".find(...).sort(...)\");\n"
- "print(\"\\tdb.\" + shortName + \".findOne([query])\");\n"
- "print(\"\\tdb.\" + shortName + \".findAndModify( { update : ... , remove : bool [, query: {}, sort: {}, 'new': false] } )\");\n"
- "print(\"\\tdb.\" + shortName + \".getDB() get DB object associated with collection\");\n"
- "print(\"\\tdb.\" + shortName + \".getIndexes()\");\n"
- "print(\"\\tdb.\" + shortName + \".group( { key : ..., initial: ..., reduce : ...[, cond: ...] } )\");\n"
- "print(\"\\tdb.\" + shortName + \".mapReduce( mapFunction , reduceFunction , <optional params> )\");\n"
- "print(\"\\tdb.\" + shortName + \".remove(query)\");\n"
- "print(\"\\tdb.\" + shortName + \".renameCollection( newName , <dropTarget> ) renames the collection.\");\n"
- "print(\"\\tdb.\" + shortName + \".runCommand( name , <options> ) runs a db command with the given name where the first param is the collection name\");\n"
- "print(\"\\tdb.\" + shortName + \".save(obj)\");\n"
- "print(\"\\tdb.\" + shortName + \".stats()\");\n"
- "print(\"\\tdb.\" + shortName + \".storageSize() - includes free space allocated to this collection\");\n"
- "print(\"\\tdb.\" + shortName + \".totalIndexSize() - size in bytes of all the indexes\");\n"
- "print(\"\\tdb.\" + shortName + \".totalSize() - storage allocated for all data and indexes\");\n"
- "print(\"\\tdb.\" + shortName + \".update(query, object[, upsert_bool, multi_bool])\");\n"
- "print(\"\\tdb.\" + shortName + \".validate() - SLOW\");\n"
- "print(\"\\tdb.\" + shortName + \".getShardVersion() - only for use with sharding\");\n"
- "return __magicNoPrint;}\n"
- "DBCollection.prototype.getFullName = function(){\n"
- "return this._fullName;}\n"
- "DBCollection.prototype.getMongo = function(){\n"
- "return this._db.getMongo();}\n"
- "DBCollection.prototype.getDB = function(){\n"
- "return this._db;}\n"
- "DBCollection.prototype._dbCommand = function( cmd , params ){\n"
- "if ( typeof( cmd ) == \"object\" )\n"
- "return this._db._dbCommand( cmd );\n"
- "var c = {};\n"
- "c[cmd] = this.getName();\n"
- "if ( params )\n"
- "Object.extend( c , params );\n"
- "return this._db._dbCommand( c );}\n"
- "DBCollection.prototype.runCommand = DBCollection.prototype._dbCommand;\n"
- "DBCollection.prototype._massageObject = function( q ){\n"
- "if ( ! q )\n"
- "return {};\n"
- "var type = typeof q;\n"
- "if ( type == \"function\" )\n"
- "return { $where : q };\n"
- "if ( q.isObjectId )\n"
- "return { _id : q };\n"
- "if ( type == \"object\" )\n"
- "return q;\n"
- "if ( type == \"string\" ){\n"
- "if ( q.length == 24 )\n"
- "return { _id : q };\n"
- "return { $where : q };}\n"
- "throw \"don't know how to massage : \" + type;}\n"
- "DBCollection.prototype._validateObject = function( o ){\n"
- "if ( o._ensureSpecial && o._checkModify )\n"
- "throw \"can't save a DBQuery object\";}\n"
- "DBCollection._allowedFields = { $id : 1 , $ref : 1 };\n"
- "DBCollection.prototype._validateForStorage = function( o ){\n"
- "this._validateObject( o );\n"
- "for ( var k in o ){\n"
- "if ( k.indexOf( \".\" ) >= 0 ) {\n"
- "throw \"can't have . in field names [\" + k + \"]\" ;}\n"
- "if ( k.indexOf( \"$\" ) == 0 && ! DBCollection._allowedFields[k] ) {\n"
- "throw \"field names cannot start with $ [\" + k + \"]\";}\n"
- "if ( o[k] !== null && typeof( o[k] ) === \"object\" ) {\n"
- "this._validateForStorage( o[k] );}}\n"
- "};\n"
- "DBCollection.prototype.find = function( query , fields , limit , skip ){\n"
- "return new DBQuery( this._mongo , this._db , this ,\n"
- "this._fullName , this._massageObject( query ) , fields , limit , skip );}\n"
- "DBCollection.prototype.findOne = function( query , fields ){\n"
- "var cursor = this._mongo.find( this._fullName , this._massageObject( query ) || {} , fields , -1 , 0 , 0 );\n"
- "if ( ! cursor.hasNext() )\n"
- "return null;\n"
- "var ret = cursor.next();\n"
- "if ( cursor.hasNext() ) throw \"findOne has more than 1 result!\";\n"
- "if ( ret.$err )\n"
- "throw \"error \" + tojson( ret );\n"
- "return ret;}\n"
- "DBCollection.prototype.insert = function( obj , _allow_dot ){\n"
- "if ( ! obj )\n"
- "throw \"no object passed to insert!\";\n"
- "if ( ! _allow_dot ) {\n"
- "this._validateForStorage( obj );}\n"
- "if ( typeof( obj._id ) == \"undefined\" ){\n"
- "var tmp = obj;\n"
- "obj = {_id: new ObjectId()};\n"
- "for (var key in tmp){\n"
- "obj[key] = tmp[key];}}\n"
- "this._mongo.insert( this._fullName , obj );\n"
- "this._lastID = obj._id;}\n"
- "DBCollection.prototype.remove = function( t , justOne ){\n"
- "this._mongo.remove( this._fullName , this._massageObject( t ) , justOne ? true : false );}\n"
- "DBCollection.prototype.update = function( query , obj , upsert , multi ){\n"
- "assert( query , \"need a query\" );\n"
- "assert( obj , \"need an object\" );\n"
- "this._validateObject( obj );\n"
- "this._mongo.update( this._fullName , query , obj , upsert ? true : false , multi ? true : false );}\n"
- "DBCollection.prototype.save = function( obj ){\n"
- "if ( obj == null || typeof( obj ) == \"undefined\" )\n"
- "throw \"can't save a null\";\n"
- "if ( typeof( obj._id ) == \"undefined\" ){\n"
- "obj._id = new ObjectId();\n"
- "return this.insert( obj );}\n"
- "else {\n"
- "return this.update( { _id : obj._id } , obj , true );}}\n"
- "DBCollection.prototype._genIndexName = function( keys ){\n"
- "var name = \"\";\n"
- "for ( var k in keys ){\n"
- "var v = keys[k];\n"
- "if ( typeof v == \"function\" )\n"
- "continue;\n"
- "if ( name.length > 0 )\n"
- "name += \"_\";\n"
- "name += k + \"_\";\n"
- "if ( typeof v == \"number\" )\n"
- "name += v;}\n"
- "return name;}\n"
- "DBCollection.prototype._indexSpec = function( keys, options ) {\n"
- "var ret = { ns : this._fullName , key : keys , name : this._genIndexName( keys ) };\n"
- "if ( ! options ){}\n"
- "else if ( typeof ( options ) == \"string\" )\n"
- "ret.name = options;\n"
- "else if ( typeof ( options ) == \"boolean\" )\n"
- "ret.unique = true;\n"
- "else if ( typeof ( options ) == \"object\" ){\n"
- "if ( options.length ){\n"
- "var nb = 0;\n"
- "for ( var i=0; i<options.length; i++ ){\n"
- "if ( typeof ( options[i] ) == \"string\" )\n"
- "ret.name = options[i];\n"
- "else if ( typeof( options[i] ) == \"boolean\" ){\n"
- "if ( options[i] ){\n"
- "if ( nb == 0 )\n"
- "ret.unique = true;\n"
- "if ( nb == 1 )\n"
- "ret.dropDups = true;}\n"
- "nb++;}}}\n"
- "else {\n"
- "Object.extend( ret , options );}}\n"
- "else {\n"
- "throw \"can't handle: \" + typeof( options );}\n"
- "/*\n"
- "return ret;\n"
- "var name;\n"
- "var nTrue = 0;\n"
- "if ( ! isObject( options ) ) {\n"
- "options = [ options ];}\n"
- "if ( options.length ){\n"
- "for( var i = 0; i < options.length; ++i ) {\n"
- "var o = options[ i ];\n"
- "if ( isString( o ) ) {\n"
- "ret.name = o;\n"
- "} else if ( typeof( o ) == \"boolean\" ) {\n"
- "if ( o ) {\n"
- "++nTrue;}}}\n"
- "if ( nTrue > 0 ) {\n"
- "ret.unique = true;}\n"
- "if ( nTrue > 1 ) {\n"
- "ret.dropDups = true;}}\n"
- "*/\n"
- "return ret;}\n"
- "DBCollection.prototype.createIndex = function( keys , options ){\n"
- "var o = this._indexSpec( keys, options );\n"
- "this._db.getCollection( \"system.indexes\" ).insert( o , true );}\n"
- "DBCollection.prototype.ensureIndex = function( keys , options ){\n"
- "var name = this._indexSpec( keys, options ).name;\n"
- "this._indexCache = this._indexCache || {};\n"
- "if ( this._indexCache[ name ] ){\n"
- "return;}\n"
- "this.createIndex( keys , options );\n"
- "if ( this.getDB().getLastError() == \"\" ) {\n"
- "this._indexCache[name] = true;}}\n"
- "DBCollection.prototype.resetIndexCache = function(){\n"
- "this._indexCache = {};}\n"
- "DBCollection.prototype.reIndex = function() {\n"
- "return this._db.runCommand({ reIndex: this.getName() });}\n"
- "DBCollection.prototype.dropIndexes = function(){\n"
- "this.resetIndexCache();\n"
- "var res = this._db.runCommand( { deleteIndexes: this.getName(), index: \"*\" } );\n"
- "assert( res , \"no result from dropIndex result\" );\n"
- "if ( res.ok )\n"
- "return res;\n"
- "if ( res.errmsg.match( /not found/ ) )\n"
- "return res;\n"
- "throw \"error dropping indexes : \" + tojson( res );}\n"
- "DBCollection.prototype.drop = function(){\n"
- "this.resetIndexCache();\n"
- "var ret = this._db.runCommand( { drop: this.getName() } );\n"
- "if ( ! ret.ok ){\n"
- "if ( ret.errmsg == \"ns not found\" )\n"
- "return false;\n"
- "throw \"drop failed: \" + tojson( ret );}\n"
- "return true;}\n"
- "DBCollection.prototype.findAndModify = function(args){\n"
- "var cmd = { findandmodify: this.getName() };\n"
- "for (var key in args){\n"
- "cmd[key] = args[key];}\n"
- "var ret = this._db.runCommand( cmd );\n"
- "if ( ! ret.ok ){\n"
- "if (ret.errmsg == \"No matching object found\"){\n"
- "return null;}\n"
- "throw \"findAndModifyFailed failed: \" + tojson( ret.errmsg );}\n"
- "return ret.value;}\n"
- "DBCollection.prototype.renameCollection = function( newName , dropTarget ){\n"
- "return this._db._adminCommand( { renameCollection : this._fullName ,\n"
- "to : this._db._name + \".\" + newName ,\n"
- "dropTarget : dropTarget } )}\n"
- "DBCollection.prototype.validate = function() {\n"
- "var res = this._db.runCommand( { validate: this.getName() } );\n"
- "res.valid = false;\n"
- "var raw = res.result || res.raw;\n"
- "if ( raw ){\n"
- "var str = \"-\" + tojson( raw );\n"
- "res.valid = ! ( str.match( /exception/ ) || str.match( /corrupt/ ) );\n"
- "var p = /lastExtentSize:(\\d+)/;\n"
- "var r = p.exec( str );\n"
- "if ( r ){\n"
- "res.lastExtentSize = Number( r[1] );}}\n"
- "return res;}\n"
- "DBCollection.prototype.getShardVersion = function(){\n"
- "return this._db._adminCommand( { getShardVersion : this._fullName } );}\n"
- "DBCollection.prototype.getIndexes = function(){\n"
- "return this.getDB().getCollection( \"system.indexes\" ).find( { ns : this.getFullName() } ).toArray();}\n"
- "DBCollection.prototype.getIndices = DBCollection.prototype.getIndexes;\n"
- "DBCollection.prototype.getIndexSpecs = DBCollection.prototype.getIndexes;\n"
- "DBCollection.prototype.getIndexKeys = function(){\n"
- "return this.getIndexes().map(\n"
- "function(i){\n"
- "return i.key;}\n"
- ");}\n"
- "DBCollection.prototype.count = function( x ){\n"
- "return this.find( x ).count();}\n"
- "\n"
- "DBCollection.prototype.clean = function() {\n"
- "return this._dbCommand( { clean: this.getName() } );}\n"
- "\n"
- "DBCollection.prototype.dropIndex =  function(index) {\n"
- "assert(index , \"need to specify index to dropIndex\" );\n"
- "if ( ! isString( index ) && isObject( index ) )\n"
- "index = this._genIndexName( index );\n"
- "var res = this._dbCommand( \"deleteIndexes\" ,{ index: index } );\n"
- "this.resetIndexCache();\n"
- "return res;}\n"
- "DBCollection.prototype.copyTo = function( newName ){\n"
- "return this.getDB().eval(\n"
- "function( collName , newName ){\n"
- "var from = db[collName];\n"
- "var to = db[newName];\n"
- "to.ensureIndex( { _id : 1 } );\n"
- "var count = 0;\n"
- "var cursor = from.find();\n"
- "while ( cursor.hasNext() ){\n"
- "var o = cursor.next();\n"
- "count++;\n"
- "to.save( o );}\n"
- "return count;\n"
- "} , this.getName() , newName\n"
- ");}\n"
- "DBCollection.prototype.getCollection = function( subName ){\n"
- "return this._db.getCollection( this._shortName + \".\" + subName );}\n"
- "DBCollection.prototype.stats = function( scale ){\n"
- "return this._db.runCommand( { collstats : this._shortName , scale : scale } );}\n"
- "DBCollection.prototype.dataSize = function(){\n"
- "return this.stats().size;}\n"
- "DBCollection.prototype.storageSize = function(){\n"
- "return this.stats().storageSize;}\n"
- "DBCollection.prototype.totalIndexSize = function( verbose ){\n"
- "var stats = this.stats();\n"
- "if (verbose){\n"
- "for (var ns in stats.indexSizes){\n"
- "print( ns + \"\\t\" + stats.indexSizes[ns] );}}\n"
- "return stats.totalIndexSize;}\n"
- "DBCollection.prototype.totalSize = function(){\n"
- "var total = this.storageSize();\n"
- "var mydb = this._db;\n"
- "var shortName = this._shortName;\n"
- "this.getIndexes().forEach(\n"
- "function( spec ){\n"
- "var coll = mydb.getCollection( shortName + \".$\" + spec.name );\n"
- "var mysize = coll.storageSize();\n"
- "//print( coll + \"\\t\" + mysize + \"\\t\" + tojson( coll.validate() ) );\n"
- "total += coll.dataSize();}\n"
- ");\n"
- "return total;}\n"
- "DBCollection.prototype.convertToCapped = function( bytes ){\n"
- "if ( ! bytes )\n"
- "throw \"have to specify # of bytes\";\n"
- "return this._dbCommand( { convertToCapped : this._shortName , size : bytes } )}\n"
- "DBCollection.prototype.exists = function(){\n"
- "return this._db.system.namespaces.findOne( { name : this._fullName } );}\n"
- "DBCollection.prototype.isCapped = function(){\n"
- "var e = this.exists();\n"
- "return ( e && e.options && e.options.capped ) ? true : false;}\n"
- "DBCollection.prototype.distinct = function( keyString , query ){\n"
- "var res = this._dbCommand( { distinct : this._shortName , key : keyString , query : query || {} } );\n"
- "if ( ! res.ok )\n"
- "throw \"distinct failed: \" + tojson( res );\n"
- "return res.values;}\n"
- "DBCollection.prototype.group = function( params ){\n"
- "params.ns = this._shortName;\n"
- "return this._db.group( params );}\n"
- "DBCollection.prototype.groupcmd = function( params ){\n"
- "params.ns = this._shortName;\n"
- "return this._db.groupcmd( params );}\n"
- "MapReduceResult = function( db , o ){\n"
- "Object.extend( this , o );\n"
- "this._o = o;\n"
- "this._keys = Object.keySet( o );\n"
- "this._db = db;\n"
- "this._coll = this._db.getCollection( this.result );}\n"
- "MapReduceResult.prototype._simpleKeys = function(){\n"
- "return this._o;}\n"
- "MapReduceResult.prototype.find = function(){\n"
- "return DBCollection.prototype.find.apply( this._coll , arguments );}\n"
- "MapReduceResult.prototype.drop = function(){\n"
- "return this._coll.drop();}\n"
- "\n"
- "MapReduceResult.prototype.convertToSingleObject = function(){\n"
- "var z = {};\n"
- "this._coll.find().forEach( function(a){ z[a._id] = a.value; } );\n"
- "return z;}\n"
- "\n"
- "DBCollection.prototype.mapReduce = function( map , reduce , optional ){\n"
- "var c = { mapreduce : this._shortName , map : map , reduce : reduce };\n"
- "if ( optional )\n"
- "Object.extend( c , optional );\n"
- "var raw = this._db.runCommand( c );\n"
- "if ( ! raw.ok )\n"
- "throw \"map reduce failed: \" + tojson( raw );\n"
- "return new MapReduceResult( this._db , raw );}\n"
- "DBCollection.prototype.toString = function(){\n"
- "return this.getFullName();}\n"
- "DBCollection.prototype.toString = function(){\n"
- "return this.getFullName();}\n"
- "DBCollection.prototype.tojson = DBCollection.prototype.toString;\n"
- "DBCollection.prototype.shellPrint = DBCollection.prototype.toString;\n"
- ;
-
+#include "bson/stringdata.h"
+namespace mongo {
+struct JSFile{ const char* name; const StringData& source; };
+namespace JSFiles{
+const StringData _jscode_raw_utils = 
+"__quiet = false;\n" 
+"__magicNoPrint = { __magicNoPrint : 1111 }\n" 
+"\n" 
+"chatty = function(s){\n" 
+"if ( ! __quiet )\n" 
+"print( s );\n" 
+"}\n" 
+"\n" 
+"friendlyEqual = function( a , b ){\n" 
+"if ( a == b )\n" 
+"return true;\n" 
+"\n" 
+"if ( tojson( a ) == tojson( b ) )\n" 
+"return true;\n" 
+"\n" 
+"return false;\n" 
+"}\n" 
+"\n" 
+"printStackTrace = function(){\n" 
+"try{\n" 
+"throw new Error(\"Printing Stack Trace\");\n" 
+"} catch (e) {\n" 
+"print(e.stack);\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"doassert = function (msg) {\n" 
+"if (msg.indexOf(\"assert\") == 0)\n" 
+"print(msg);\n" 
+"else\n" 
+"print(\"assert: \" + msg);\n" 
+"printStackTrace();\n" 
+"throw msg;\n" 
+"}\n" 
+"\n" 
+"assert = function( b , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( b )\n" 
+"return;\n" 
+"\n" 
+"doassert( msg == undefined ? \"assert failed\" : \"assert failed : \" + msg );\n" 
+"}\n" 
+"\n" 
+"assert.automsg = function( b ) {\n" 
+"assert( eval( b ), b );\n" 
+"}\n" 
+"\n" 
+"assert._debug = false;\n" 
+"\n" 
+"assert.eq = function( a , b , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( a == b )\n" 
+"return;\n" 
+"\n" 
+"if ( ( a != null && b != null ) && friendlyEqual( a , b ) )\n" 
+"return;\n" 
+"\n" 
+"doassert( \"[\" + tojson( a ) + \"] != [\" + tojson( b ) + \"] are not equal : \" + msg );\n" 
+"}\n" 
+"\n" 
+"assert.eq.automsg = function( a, b ) {\n" 
+"assert.eq( eval( a ), eval( b ), \"[\" + a + \"] != [\" + b + \"]\" );\n" 
+"}\n" 
+"\n" 
+"assert.neq = function( a , b , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"if ( a != b )\n" 
+"return;\n" 
+"\n" 
+"doassert( \"[\" + a + \"] != [\" + b + \"] are equal : \" + msg );\n" 
+"}\n" 
+"\n" 
+"assert.repeat = function( f, msg, timeout, interval ) {\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"var start = new Date();\n" 
+"timeout = timeout || 30000;\n" 
+"interval = interval || 200;\n" 
+"var last;\n" 
+"while( 1 ) {\n" 
+"\n" 
+"if ( typeof( f ) == \"string\" ){\n" 
+"if ( eval( f ) )\n" 
+"return;\n" 
+"}\n" 
+"else {\n" 
+"if ( f() )\n" 
+"return;\n" 
+"}\n" 
+"\n" 
+"if ( ( new Date() ).getTime() - start.getTime() > timeout )\n" 
+"break;\n" 
+"sleep( interval );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"assert.soon = function( f, msg, timeout /*ms*/, interval ) {\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"var start = new Date();\n" 
+"timeout = timeout || 30000;\n" 
+"interval = interval || 200;\n" 
+"var last;\n" 
+"while( 1 ) {\n" 
+"\n" 
+"if ( typeof( f ) == \"string\" ){\n" 
+"if ( eval( f ) )\n" 
+"return;\n" 
+"}\n" 
+"else {\n" 
+"if ( f() )\n" 
+"return;\n" 
+"}\n" 
+"\n" 
+"if ( ( new Date() ).getTime() - start.getTime() > timeout )\n" 
+"doassert( \"assert.soon failed: \" + f + \", msg:\" + msg );\n" 
+"sleep( interval );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"assert.throws = function( func , params , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( params && typeof( params ) == \"string\" )\n" 
+"throw \"2nd argument to assert.throws has to be an array\"\n" 
+"\n" 
+"try {\n" 
+"func.apply( null , params );\n" 
+"}\n" 
+"catch ( e ){\n" 
+"return e;\n" 
+"}\n" 
+"\n" 
+"doassert( \"did not throw exception: \" + msg );\n" 
+"}\n" 
+"\n" 
+"assert.throws.automsg = function( func, params ) {\n" 
+"assert.throws( func, params, func.toString() );\n" 
+"}\n" 
+"\n" 
+"assert.commandWorked = function( res , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( res.ok == 1 )\n" 
+"return;\n" 
+"\n" 
+"doassert( \"command failed: \" + tojson( res ) + \" : \" + msg );\n" 
+"}\n" 
+"\n" 
+"assert.commandFailed = function( res , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( res.ok == 0 )\n" 
+"return;\n" 
+"\n" 
+"doassert( \"command worked when it should have failed: \" + tojson( res ) + \" : \" + msg );\n" 
+"}\n" 
+"\n" 
+"assert.isnull = function( what , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( what == null )\n" 
+"return;\n" 
+"\n" 
+"doassert( \"supposed to null (\" + ( msg || \"\" ) + \") was: \" + tojson( what ) );\n" 
+"}\n" 
+"\n" 
+"assert.lt = function( a , b , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( a < b )\n" 
+"return;\n" 
+"doassert( a + \" is not less than \" + b + \" : \" + msg );\n" 
+"}\n" 
+"\n" 
+"assert.gt = function( a , b , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( a > b )\n" 
+"return;\n" 
+"doassert( a + \" is not greater than \" + b + \" : \" + msg );\n" 
+"}\n" 
+"\n" 
+"assert.lte = function( a , b , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( a <= b )\n" 
+"return;\n" 
+"doassert( a + \" is not less than or eq \" + b + \" : \" + msg );\n" 
+"}\n" 
+"\n" 
+"assert.gte = function( a , b , msg ){\n" 
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" 
+"\n" 
+"if ( a >= b )\n" 
+"return;\n" 
+"doassert( a + \" is not greater than or eq \" + b + \" : \" + msg );\n" 
+"}\n" 
+"\n" 
+"\n" 
+"assert.close = function( a , b , msg , places ){\n" 
+"if (places === undefined) {\n" 
+"places = 4;\n" 
+"}\n" 
+"if (Math.round((a - b) * Math.pow(10, places)) === 0) {\n" 
+"return;\n" 
+"}\n" 
+"doassert( a + \" is not equal to \" + b + \" within \" + places +\n" 
+"\" places, diff: \" + (a-b) + \" : \" + msg );\n" 
+"};\n" 
+"\n" 
+"Object.extend = function( dst , src , deep ){\n" 
+"for ( var k in src ){\n" 
+"var v = src[k];\n" 
+"if ( deep && typeof(v) == \"object\" ){\n" 
+"if ( \"floatApprox\" in v ) { // convert NumberLong properly\n" 
+"eval( \"v = \" + tojson( v ) );\n" 
+"} else {\n" 
+"v = Object.extend( typeof ( v.length ) == \"number\" ? [] : {} , v , true );\n" 
+"}\n" 
+"}\n" 
+"dst[k] = v;\n" 
+"}\n" 
+"return dst;\n" 
+"}\n" 
+"\n" 
+"argumentsToArray = function( a ){\n" 
+"var arr = [];\n" 
+"for ( var i=0; i<a.length; i++ )\n" 
+"arr[i] = a[i];\n" 
+"return arr;\n" 
+"}\n" 
+"\n" 
+"isString = function( x ){\n" 
+"return typeof( x ) == \"string\";\n" 
+"}\n" 
+"\n" 
+"isNumber = function(x){\n" 
+"return typeof( x ) == \"number\";\n" 
+"}\n" 
+"\n" 
+"isObject = function( x ){\n" 
+"return typeof( x ) == \"object\";\n" 
+"}\n" 
+"\n" 
+"String.prototype.trim = function() {\n" 
+"return this.replace(/^\\s+|\\s+$/g,\"\");\n" 
+"}\n" 
+"String.prototype.ltrim = function() {\n" 
+"return this.replace(/^\\s+/,\"\");\n" 
+"}\n" 
+"String.prototype.rtrim = function() {\n" 
+"return this.replace(/\\s+$/,\"\");\n" 
+"}\n" 
+"\n" 
+"Number.prototype.zeroPad = function(width) {\n" 
+"var str = this + '';\n" 
+"while (str.length < width)\n" 
+"str = '0' + str;\n" 
+"return str;\n" 
+"}\n" 
+"\n" 
+"Date.timeFunc = function( theFunc , numTimes ){\n" 
+"\n" 
+"var start = new Date();\n" 
+"\n" 
+"numTimes = numTimes || 1;\n" 
+"for ( var i=0; i<numTimes; i++ ){\n" 
+"theFunc.apply( null , argumentsToArray( arguments ).slice( 2 ) );\n" 
+"}\n" 
+"\n" 
+"return (new Date()).getTime() - start.getTime();\n" 
+"}\n" 
+"\n" 
+"Date.prototype.tojson = function(){\n" 
+"\n" 
+"var UTC = Date.printAsUTC ? 'UTC' : '';\n" 
+"\n" 
+"var year = this['get'+UTC+'FullYear']().zeroPad(4);\n" 
+"var month = (this['get'+UTC+'Month']() + 1).zeroPad(2);\n" 
+"var date = this['get'+UTC+'Date']().zeroPad(2);\n" 
+"var hour = this['get'+UTC+'Hours']().zeroPad(2);\n" 
+"var minute = this['get'+UTC+'Minutes']().zeroPad(2);\n" 
+"var sec = this['get'+UTC+'Seconds']().zeroPad(2)\n" 
+"\n" 
+"if (this['get'+UTC+'Milliseconds']())\n" 
+"sec += '.' + this['get'+UTC+'Milliseconds']().zeroPad(3)\n" 
+"\n" 
+"var ofs = 'Z';\n" 
+"if (!Date.printAsUTC){\n" 
+"var ofsmin = this.getTimezoneOffset();\n" 
+"if (ofsmin != 0){\n" 
+"ofs = ofsmin > 0 ? '-' : '+'; // This is correct\n" 
+"ofs += (ofsmin/60).zeroPad(2)\n" 
+"ofs += (ofsmin%60).zeroPad(2)\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"return 'ISODate(\"'+year+'-'+month+'-'+date+'T'+hour+':'+minute+':'+sec+ofs+'\")';\n" 
+"}\n" 
+"\n" 
+"Date.printAsUTC = true;\n" 
+"\n" 
+"\n" 
+"ISODate = function(isoDateStr){\n" 
+"if (!isoDateStr)\n" 
+"return new Date();\n" 
+"\n" 
+"var isoDateRegex = /(\\d{4})-?(\\d{2})-?(\\d{2})([T ](\\d{2})(:?(\\d{2})(:?(\\d{2}(\\.\\d+)?))?)?(Z|([+-])(\\d{2}):?(\\d{2})?)?)?/;\n" 
+"var res = isoDateRegex.exec(isoDateStr);\n" 
+"\n" 
+"if (!res)\n" 
+"throw \"invalid ISO date\";\n" 
+"\n" 
+"var year = parseInt(res[1],10) || 1970; // this should always be present\n" 
+"var month = (parseInt(res[2],10) || 1) - 1;\n" 
+"var date = parseInt(res[3],10) || 0;\n" 
+"var hour = parseInt(res[5],10) || 0;\n" 
+"var min = parseInt(res[7],10) || 0;\n" 
+"var sec = parseFloat(res[9]) || 0;\n" 
+"var ms = Math.round((sec%1) * 1000)\n" 
+"sec -= ms/1000\n" 
+"\n" 
+"var time = Date.UTC(year, month, date, hour, min, sec, ms);\n" 
+"\n" 
+"if (res[11] && res[11] != 'Z'){\n" 
+"var ofs = 0;\n" 
+"ofs += (parseInt(res[13],10) || 0) * 60*60*1000; // hours\n" 
+"ofs += (parseInt(res[14],10) || 0) *    60*1000; // mins\n" 
+"if (res[12] == '+') // if ahead subtract\n" 
+"ofs *= -1;\n" 
+"\n" 
+"time += ofs\n" 
+"}\n" 
+"\n" 
+"return new Date(time);\n" 
+"}\n" 
+"\n" 
+"RegExp.prototype.tojson = RegExp.prototype.toString;\n" 
+"\n" 
+"Array.contains = function( a  , x ){\n" 
+"for ( var i=0; i<a.length; i++ ){\n" 
+"if ( a[i] == x )\n" 
+"return true;\n" 
+"}\n" 
+"return false;\n" 
+"}\n" 
+"\n" 
+"Array.unique = function( a ){\n" 
+"var u = [];\n" 
+"for ( var i=0; i<a.length; i++){\n" 
+"var o = a[i];\n" 
+"if ( ! Array.contains( u , o ) ){\n" 
+"u.push( o );\n" 
+"}\n" 
+"}\n" 
+"return u;\n" 
+"}\n" 
+"\n" 
+"Array.shuffle = function( arr ){\n" 
+"for ( var i=0; i<arr.length-1; i++ ){\n" 
+"var pos = i+Random.randInt(arr.length-i);\n" 
+"var save = arr[i];\n" 
+"arr[i] = arr[pos];\n" 
+"arr[pos] = save;\n" 
+"}\n" 
+"return arr;\n" 
+"}\n" 
+"\n" 
+"\n" 
+"Array.tojson = function( a , indent ){\n" 
+"if (!indent)\n" 
+"indent = \"\";\n" 
+"\n" 
+"if (a.length == 0) {\n" 
+"return \"[ ]\";\n" 
+"}\n" 
+"\n" 
+"var s = \"[\\n\";\n" 
+"indent += \"\\t\";\n" 
+"for ( var i=0; i<a.length; i++){\n" 
+"s += indent + tojson( a[i], indent );\n" 
+"if ( i < a.length - 1 ){\n" 
+"s += \",\\n\";\n" 
+"}\n" 
+"}\n" 
+"if ( a.length == 0 ) {\n" 
+"s += indent;\n" 
+"}\n" 
+"\n" 
+"indent = indent.substring(1);\n" 
+"s += \"\\n\"+indent+\"]\";\n" 
+"return s;\n" 
+"}\n" 
+"\n" 
+"Array.fetchRefs = function( arr , coll ){\n" 
+"var n = [];\n" 
+"for ( var i=0; i<arr.length; i ++){\n" 
+"var z = arr[i];\n" 
+"if ( coll && coll != z.getCollection() )\n" 
+"continue;\n" 
+"n.push( z.fetch() );\n" 
+"}\n" 
+"\n" 
+"return n;\n" 
+"}\n" 
+"\n" 
+"Array.sum = function( arr ){\n" 
+"if ( arr.length == 0 )\n" 
+"return null;\n" 
+"var s = arr[0];\n" 
+"for ( var i=1; i<arr.length; i++ )\n" 
+"s += arr[i];\n" 
+"return s;\n" 
+"}\n" 
+"\n" 
+"Array.avg = function( arr ){\n" 
+"if ( arr.length == 0 )\n" 
+"return null;\n" 
+"return Array.sum( arr ) / arr.length;\n" 
+"}\n" 
+"\n" 
+"Array.stdDev = function( arr ){\n" 
+"var avg = Array.avg( arr );\n" 
+"var sum = 0;\n" 
+"\n" 
+"for ( var i=0; i<arr.length; i++ ){\n" 
+"sum += Math.pow( arr[i] - avg , 2 );\n" 
+"}\n" 
+"\n" 
+"return Math.sqrt( sum / arr.length );\n" 
+"}\n" 
+"\n" 
+"//these two are helpers for Array.sort(func)\n" 
+"compare = function(l, r){ return (l == r ? 0 : (l < r ? -1 : 1)); }\n" 
+"\n" 
+"// arr.sort(compareOn('name'))\n" 
+"compareOn = function(field){\n" 
+"return function(l, r) { return compare(l[field], r[field]); }\n" 
+"}\n" 
+"\n" 
+"Object.keySet = function( o ) {\n" 
+"var ret = new Array();\n" 
+"for( i in o ) {\n" 
+"if ( !( i in o.__proto__ && o[ i ] === o.__proto__[ i ] ) ) {\n" 
+"ret.push( i );\n" 
+"}\n" 
+"}\n" 
+"return ret;\n" 
+"}\n" 
+"\n" 
+"if ( ! NumberLong.prototype ) {\n" 
+"NumberLong.prototype = {}\n" 
+"}\n" 
+"\n" 
+"NumberLong.prototype.tojson = function() {\n" 
+"return this.toString();\n" 
+"}\n" 
+"\n" 
+"if ( ! ObjectId.prototype )\n" 
+"ObjectId.prototype = {}\n" 
+"\n" 
+"ObjectId.prototype.toString = function(){\n" 
+"return this.str;\n" 
+"}\n" 
+"\n" 
+"ObjectId.prototype.tojson = function(){\n" 
+"return \"ObjectId(\\\"\" + this.str + \"\\\")\";\n" 
+"}\n" 
+"\n" 
+"ObjectId.prototype.isObjectId = true;\n" 
+"\n" 
+"ObjectId.prototype.getTimestamp = function(){\n" 
+"return new Date(parseInt(this.toString().slice(0,8), 16)*1000);\n" 
+"}\n" 
+"\n" 
+"ObjectId.prototype.equals = function( other){\n" 
+"return this.str == other.str;\n" 
+"}\n" 
+"\n" 
+"if ( typeof( DBPointer ) != \"undefined\" ){\n" 
+"DBPointer.prototype.fetch = function(){\n" 
+"assert( this.ns , \"need a ns\" );\n" 
+"assert( this.id , \"need an id\" );\n" 
+"\n" 
+"return db[ this.ns ].findOne( { _id : this.id } );\n" 
+"}\n" 
+"\n" 
+"DBPointer.prototype.tojson = function(indent){\n" 
+"return tojson({\"ns\" : this.ns, \"id\" : this.id}, indent);\n" 
+"}\n" 
+"\n" 
+"DBPointer.prototype.getCollection = function(){\n" 
+"return this.ns;\n" 
+"}\n" 
+"\n" 
+"DBPointer.prototype.toString = function(){\n" 
+"return \"DBPointer \" + this.ns + \":\" + this.id;\n" 
+"}\n" 
+"}\n" 
+"else {\n" 
+"print( \"warning: no DBPointer\" );\n" 
+"}\n" 
+"\n" 
+"if ( typeof( DBRef ) != \"undefined\" ){\n" 
+"DBRef.prototype.fetch = function(){\n" 
+"assert( this.$ref , \"need a ns\" );\n" 
+"assert( this.$id , \"need an id\" );\n" 
+"\n" 
+"return db[ this.$ref ].findOne( { _id : this.$id } );\n" 
+"}\n" 
+"\n" 
+"DBRef.prototype.tojson = function(indent){\n" 
+"return tojson({\"$ref\" : this.$ref, \"$id\" : this.$id}, indent);\n" 
+"}\n" 
+"\n" 
+"DBRef.prototype.getCollection = function(){\n" 
+"return this.$ref;\n" 
+"}\n" 
+"\n" 
+"DBRef.prototype.toString = function(){\n" 
+"return this.tojson();\n" 
+"}\n" 
+"}\n" 
+"else {\n" 
+"print( \"warning: no DBRef\" );\n" 
+"}\n" 
+"\n" 
+"if ( typeof( BinData ) != \"undefined\" ){\n" 
+"BinData.prototype.tojson = function () {\n" 
+"//return \"BinData type: \" + this.type + \" len: \" + this.len;\n" 
+"return this.toString();\n" 
+"}\n" 
+"}\n" 
+"else {\n" 
+"print( \"warning: no BinData class\" );\n" 
+"}\n" 
+"\n" 
+"if ( typeof( UUID ) != \"undefined\" ){\n" 
+"UUID.prototype.tojson = function () {\n" 
+"return this.toString();\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"if ( typeof _threadInject != \"undefined\" ){\n" 
+"print( \"fork() available!\" );\n" 
+"\n" 
+"Thread = function(){\n" 
+"this.init.apply( this, arguments );\n" 
+"}\n" 
+"_threadInject( Thread.prototype );\n" 
+"\n" 
+"ScopedThread = function() {\n" 
+"this.init.apply( this, arguments );\n" 
+"}\n" 
+"ScopedThread.prototype = new Thread( function() {} );\n" 
+"_scopedThreadInject( ScopedThread.prototype );\n" 
+"\n" 
+"fork = function() {\n" 
+"var t = new Thread( function() {} );\n" 
+"Thread.apply( t, arguments );\n" 
+"return t;\n" 
+"}\n" 
+"\n" 
+"// Helper class to generate a list of events which may be executed by a ParallelTester\n" 
+"EventGenerator = function( me, collectionName, mean ) {\n" 
+"this.mean = mean;\n" 
+"this.events = new Array( me, collectionName );\n" 
+"}\n" 
+"\n" 
+"EventGenerator.prototype._add = function( action ) {\n" 
+"this.events.push( [ Random.genExp( this.mean ), action ] );\n" 
+"}\n" 
+"\n" 
+"EventGenerator.prototype.addInsert = function( obj ) {\n" 
+"this._add( \"t.insert( \" + tojson( obj ) + \" )\" );\n" 
+"}\n" 
+"\n" 
+"EventGenerator.prototype.addRemove = function( obj ) {\n" 
+"this._add( \"t.remove( \" + tojson( obj ) + \" )\" );\n" 
+"}\n" 
+"\n" 
+"EventGenerator.prototype.addUpdate = function( objOld, objNew ) {\n" 
+"this._add( \"t.update( \" + tojson( objOld ) + \", \" + tojson( objNew ) + \" )\" );\n" 
+"}\n" 
+"\n" 
+"EventGenerator.prototype.addCheckCount = function( count, query, shouldPrint, checkQuery ) {\n" 
+"query = query || {};\n" 
+"shouldPrint = shouldPrint || false;\n" 
+"checkQuery = checkQuery || false;\n" 
+"var action = \"assert.eq( \" + count + \", t.count( \" + tojson( query ) + \" ) );\"\n" 
+"if ( checkQuery ) {\n" 
+"action += \" assert.eq( \" + count + \", t.find( \" + tojson( query ) + \" ).toArray().length );\"\n" 
+"}\n" 
+"if ( shouldPrint ) {\n" 
+"action += \" print( me + ' ' + \" + count + \" );\";\n" 
+"}\n" 
+"this._add( action );\n" 
+"}\n" 
+"\n" 
+"EventGenerator.prototype.getEvents = function() {\n" 
+"return this.events;\n" 
+"}\n" 
+"\n" 
+"EventGenerator.dispatch = function() {\n" 
+"var args = argumentsToArray( arguments );\n" 
+"var me = args.shift();\n" 
+"var collectionName = args.shift();\n" 
+"var m = new Mongo( db.getMongo().host );\n" 
+"var t = m.getDB( \"test\" )[ collectionName ];\n" 
+"for( var i in args ) {\n" 
+"sleep( args[ i ][ 0 ] );\n" 
+"eval( args[ i ][ 1 ] );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"// Helper class for running tests in parallel.  It assembles a set of tests\n" 
+"// and then calls assert.parallelests to run them.\n" 
+"ParallelTester = function() {\n" 
+"this.params = new Array();\n" 
+"}\n" 
+"\n" 
+"ParallelTester.prototype.add = function( fun, args ) {\n" 
+"args = args || [];\n" 
+"args.unshift( fun );\n" 
+"this.params.push( args );\n" 
+"}\n" 
+"\n" 
+"ParallelTester.prototype.run = function( msg, newScopes ) {\n" 
+"newScopes = newScopes || false;\n" 
+"assert.parallelTests( this.params, msg, newScopes );\n" 
+"}\n" 
+"\n" 
+"// creates lists of tests from jstests dir in a format suitable for use by\n" 
+"// ParallelTester.fileTester.  The lists will be in random order.\n" 
+"// n: number of lists to split these tests into\n" 
+"ParallelTester.createJstestsLists = function( n ) {\n" 
+"var params = new Array();\n" 
+"for( var i = 0; i < n; ++i ) {\n" 
+"params.push( [] );\n" 
+"}\n" 
+"\n" 
+"var makeKeys = function( a ) {\n" 
+"var ret = {};\n" 
+"for( var i in a ) {\n" 
+"ret[ a[ i ] ] = 1;\n" 
+"}\n" 
+"return ret;\n" 
+"}\n" 
+"\n" 
+"// some tests can't run in parallel with most others\n" 
+"var skipTests = makeKeys( [ \"jstests/dbadmin.js\",\n" 
+"\"jstests/repair.js\",\n" 
+"\"jstests/cursor8.js\",\n" 
+"\"jstests/recstore.js\",\n" 
+"\"jstests/extent.js\",\n" 
+"\"jstests/indexb.js\",\n" 
+"\"jstests/profile1.js\",\n" 
+"\"jstests/mr3.js\",\n" 
+"\"jstests/indexh.js\",\n" 
+"\"jstests/apitest_db.js\",\n" 
+"\"jstests/evalb.js\",\n" 
+"\"jstests/evald.js\",\n" 
+"\"jstests/evalf.js\",\n" 
+"\"jstests/killop.js\",\n" 
+"\"jstests/run_program1.js\",\n" 
+"\"jstests/notablescan.js\",\n" 
+"\"jstests/drop2.js\"] );\n" 
+"\n" 
+"// some tests can't be run in parallel with each other\n" 
+"var serialTestsArr = [ \"jstests/fsync.js\",\n" 
+"\"jstests/fsync2.js\" ];\n" 
+"var serialTests = makeKeys( serialTestsArr );\n" 
+"\n" 
+"params[ 0 ] = serialTestsArr;\n" 
+"\n" 
+"var files = listFiles(\"jstests\");\n" 
+"files = Array.shuffle( files );\n" 
+"\n" 
+"var i = 0;\n" 
+"files.forEach(\n" 
+"function(x) {\n" 
+"\n" 
+"if ( ( /[\\/\\\\]_/.test(x.name) ) ||\n" 
+"( ! /\\.js$/.test(x.name ) ) ||\n" 
+"( x.name in skipTests ) ||\n" 
+"( x.name in serialTests ) ||\n" 
+"! /\\.js$/.test(x.name ) ){\n" 
+"print(\" >>>>>>>>>>>>>>> skipping \" + x.name);\n" 
+"return;\n" 
+"}\n" 
+"\n" 
+"params[ i % n ].push( x.name );\n" 
+"++i;\n" 
+"}\n" 
+");\n" 
+"\n" 
+"// randomize ordering of the serialTests\n" 
+"params[ 0 ] = Array.shuffle( params[ 0 ] );\n" 
+"\n" 
+"for( var i in params ) {\n" 
+"params[ i ].unshift( i );\n" 
+"}\n" 
+"\n" 
+"return params;\n" 
+"}\n" 
+"\n" 
+"// runs a set of test files\n" 
+"// first argument is an identifier for this tester, remaining arguments are file names\n" 
+"ParallelTester.fileTester = function() {\n" 
+"var args = argumentsToArray( arguments );\n" 
+"var suite = args.shift();\n" 
+"args.forEach(\n" 
+"function( x ) {\n" 
+"print(\"         S\" + suite + \" Test : \" + x + \" ...\");\n" 
+"var time = Date.timeFunc( function() { load(x); }, 1);\n" 
+"print(\"         S\" + suite + \" Test : \" + x + \" \" + time + \"ms\" );\n" 
+"}\n" 
+");\n" 
+"}\n" 
+"\n" 
+"// params: array of arrays, each element of which consists of a function followed\n" 
+"// by zero or more arguments to that function.  Each function and its arguments will\n" 
+"// be called in a separate thread.\n" 
+"// msg: failure message\n" 
+"// newScopes: if true, each thread starts in a fresh scope\n" 
+"assert.parallelTests = function( params, msg, newScopes ) {\n" 
+"newScopes = newScopes || false;\n" 
+"var wrapper = function( fun, argv ) {\n" 
+"eval (\n" 
+"\"var z = function() {\" +\n" 
+"\"var __parallelTests__fun = \" + fun.toString() + \";\" +\n" 
+"\"var __parallelTests__argv = \" + tojson( argv ) + \";\" +\n" 
+"\"var __parallelTests__passed = false;\" +\n" 
+"\"try {\" +\n" 
+"\"__parallelTests__fun.apply( 0, __parallelTests__argv );\" +\n" 
+"\"__parallelTests__passed = true;\" +\n" 
+"\"} catch ( e ) {\" +\n" 
+"\"print( '********** Parallel Test FAILED: ' + tojson(e) );\" +\n" 
+"\"}\" +\n" 
+"\"return __parallelTests__passed;\" +\n" 
+"\"}\"\n" 
+");\n" 
+"return z;\n" 
+"}\n" 
+"var runners = new Array();\n" 
+"for( var i in params ) {\n" 
+"var param = params[ i ];\n" 
+"var test = param.shift();\n" 
+"var t;\n" 
+"if ( newScopes )\n" 
+"t = new ScopedThread( wrapper( test, param ) );\n" 
+"else\n" 
+"t = new Thread( wrapper( test, param ) );\n" 
+"runners.push( t );\n" 
+"}\n" 
+"\n" 
+"runners.forEach( function( x ) { x.start(); } );\n" 
+"var nFailed = 0;\n" 
+"// v8 doesn't like it if we exit before all threads are joined (SERVER-529)\n" 
+"runners.forEach( function( x ) { if( !x.returnData() ) { ++nFailed; } } );\n" 
+"assert.eq( 0, nFailed, msg );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"tojsononeline = function( x ){\n" 
+"return tojson( x , \" \" , true );\n" 
+"}\n" 
+"\n" 
+"tojson = function( x, indent , nolint ){\n" 
+"if ( x === null )\n" 
+"return \"null\";\n" 
+"\n" 
+"if ( x === undefined )\n" 
+"return \"undefined\";\n" 
+"\n" 
+"if (!indent)\n" 
+"indent = \"\";\n" 
+"\n" 
+"switch ( typeof x ) {\n" 
+"case \"string\": {\n" 
+"var s = \"\\\"\";\n" 
+"for ( var i=0; i<x.length; i++ ){\n" 
+"switch (x[i]){\n" 
+"case '\"': s += '\\\\\"'; break;\n" 
+"case '\\\\': s += '\\\\\\\\'; break;\n" 
+"case '\\b': s += '\\\\b'; break;\n" 
+"case '\\f': s += '\\\\f'; break;\n" 
+"case '\\n': s += '\\\\n'; break;\n" 
+"case '\\r': s += '\\\\r'; break;\n" 
+"case '\\t': s += '\\\\t'; break;\n" 
+"\n" 
+"default: {\n" 
+"var code = x.charCodeAt(i);\n" 
+"if (code < 0x20){\n" 
+"s += (code < 0x10 ? '\\\\u000' : '\\\\u00') + code.toString(16);\n" 
+"} else {\n" 
+"s += x[i];\n" 
+"}\n" 
+"}\n" 
+"}\n" 
+"}\n" 
+"return s + \"\\\"\";\n" 
+"}\n" 
+"case \"number\":\n" 
+"case \"boolean\":\n" 
+"return \"\" + x;\n" 
+"case \"object\":{\n" 
+"var s = tojsonObject( x, indent , nolint );\n" 
+"if ( ( nolint == null || nolint == true ) && s.length < 80 && ( indent == null || indent.length == 0 ) ){\n" 
+"s = s.replace( /[\\s\\r\\n ]+/gm , \" \" );\n" 
+"}\n" 
+"return s;\n" 
+"}\n" 
+"case \"function\":\n" 
+"return x.toString();\n" 
+"default:\n" 
+"throw \"tojson can't handle type \" + ( typeof x );\n" 
+"}\n" 
+"\n" 
+"}\n" 
+"\n" 
+"tojsonObject = function( x, indent , nolint ){\n" 
+"var lineEnding = nolint ? \" \" : \"\\n\";\n" 
+"var tabSpace = nolint ? \"\" : \"\\t\";\n" 
+"\n" 
+"assert.eq( ( typeof x ) , \"object\" , \"tojsonObject needs object, not [\" + ( typeof x ) + \"]\" );\n" 
+"\n" 
+"if (!indent)\n" 
+"indent = \"\";\n" 
+"\n" 
+"if ( typeof( x.tojson ) == \"function\" && x.tojson != tojson ) {\n" 
+"return x.tojson(indent,nolint);\n" 
+"}\n" 
+"\n" 
+"if ( x.constructor && typeof( x.constructor.tojson ) == \"function\" && x.constructor.tojson != tojson ) {\n" 
+"return x.constructor.tojson( x, indent , nolint );\n" 
+"}\n" 
+"\n" 
+"if ( x.toString() == \"[object MaxKey]\" )\n" 
+"return \"{ $maxKey : 1 }\";\n" 
+"if ( x.toString() == \"[object MinKey]\" )\n" 
+"return \"{ $minKey : 1 }\";\n" 
+"\n" 
+"var s = \"{\" + lineEnding;\n" 
+"\n" 
+"// push one level of indent\n" 
+"indent += tabSpace;\n" 
+"\n" 
+"var total = 0;\n" 
+"for ( var k in x ) total++;\n" 
+"if ( total == 0 ) {\n" 
+"s += indent + lineEnding;\n" 
+"}\n" 
+"\n" 
+"var keys = x;\n" 
+"if ( typeof( x._simpleKeys ) == \"function\" )\n" 
+"keys = x._simpleKeys();\n" 
+"var num = 1;\n" 
+"for ( var k in keys ){\n" 
+"\n" 
+"var val = x[k];\n" 
+"if ( val == DB.prototype || val == DBCollection.prototype )\n" 
+"continue;\n" 
+"\n" 
+"s += indent + \"\\\"\" + k + \"\\\" : \" + tojson( val, indent , nolint );\n" 
+"if (num != total) {\n" 
+"s += \",\";\n" 
+"num++;\n" 
+"}\n" 
+"s += lineEnding;\n" 
+"}\n" 
+"\n" 
+"// pop one level of indent\n" 
+"indent = indent.substring(1);\n" 
+"return s + indent + \"}\";\n" 
+"}\n" 
+"\n" 
+"shellPrint = function( x ){\n" 
+"it = x;\n" 
+"if ( x != undefined )\n" 
+"shellPrintHelper( x );\n" 
+"\n" 
+"if ( db ){\n" 
+"var e = db.getPrevError();\n" 
+"if ( e.err ) {\n" 
+"if( e.nPrev <= 1 )\n" 
+"print( \"error on last call: \" + tojson( e.err ) );\n" 
+"else\n" 
+"print( \"an error \" + tojson(e.err) + \" occurred \" + e.nPrev + \" operations back in the command invocation\" );\n" 
+"}\n" 
+"db.resetError();\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"printjson = function(x){\n" 
+"print( tojson( x ) );\n" 
+"}\n" 
+"\n" 
+"printjsononeline = function(x){\n" 
+"print( tojsononeline( x ) );\n" 
+"}\n" 
+"\n" 
+"shellPrintHelper = function (x) {\n" 
+"\n" 
+"if (typeof (x) == \"undefined\") {\n" 
+"\n" 
+"if (typeof (db) != \"undefined\" && db.getLastError) {\n" 
+"// explicit w:1 so that replset getLastErrorDefaults aren't used here which would be bad.\n" 
+"var e = db.getLastError(1);\n" 
+"if (e != null)\n" 
+"print(e);\n" 
+"}\n" 
+"\n" 
+"return;\n" 
+"}\n" 
+"\n" 
+"if (x == __magicNoPrint)\n" 
+"return;\n" 
+"\n" 
+"if (x == null) {\n" 
+"print(\"null\");\n" 
+"return;\n" 
+"}\n" 
+"\n" 
+"if (typeof x != \"object\")\n" 
+"return print(x);\n" 
+"\n" 
+"var p = x.shellPrint;\n" 
+"if (typeof p == \"function\")\n" 
+"return x.shellPrint();\n" 
+"\n" 
+"var p = x.tojson;\n" 
+"if (typeof p == \"function\")\n" 
+"print(x.tojson());\n" 
+"else\n" 
+"print(tojson(x));\n" 
+"}\n" 
+"\n" 
+"shellAutocomplete = function (/*prefix*/){ // outer scope function called on init. Actual function at end\n" 
+"\n" 
+"var universalMethods = \"constructor prototype toString valueOf toLocaleString hasOwnProperty propertyIsEnumerable\".split(' ');\n" 
+"\n" 
+"var builtinMethods = {}; // uses constructor objects as keys\n" 
+"builtinMethods[Array] = \"length concat join pop push reverse shift slice sort splice unshift indexOf lastIndexOf every filter forEach map some\".split(' ');\n" 
+"builtinMethods[Boolean] = \"\".split(' '); // nothing more than universal methods\n" 
+"builtinMethods[Date] = \"getDate getDay getFullYear getHours getMilliseconds getMinutes getMonth getSeconds getTime getTimezoneOffset getUTCDate getUTCDay getUTCFullYear getUTCHours getUTCMilliseconds getUTCMinutes getUTCMonth getUTCSeconds getYear parse setDate setFullYear setHours setMilliseconds setMinutes setMonth setSeconds setTime setUTCDate setUTCFullYear setUTCHours setUTCMilliseconds setUTCMinutes setUTCMonth setUTCSeconds setYear toDateString toGMTString toLocaleDateString toLocaleTimeString toTimeString toUTCString UTC\".split(' ');\n" 
+"builtinMethods[Math] = \"E LN2 LN10 LOG2E LOG10E PI SQRT1_2 SQRT2 abs acos asin atan atan2 ceil cos exp floor log max min pow random round sin sqrt tan\".split(' ');\n" 
+"builtinMethods[Number] = \"MAX_VALUE MIN_VALUE NEGATIVE_INFINITY POSITIVE_INFINITY toExponential toFixed toPrecision\".split(' ');\n" 
+"builtinMethods[RegExp] = \"global ignoreCase lastIndex multiline source compile exec test\".split(' ');\n" 
+"builtinMethods[String] = \"length charAt charCodeAt concat fromCharCode indexOf lastIndexOf match replace search slice split substr substring toLowerCase toUpperCase\".split(' ');\n" 
+"builtinMethods[Function] = \"call apply\".split(' ');\n" 
+"builtinMethods[Object] = \"bsonsize\".split(' ');\n" 
+"\n" 
+"builtinMethods[Mongo] = \"find update insert remove\".split(' ');\n" 
+"builtinMethods[BinData] = \"hex base64 length subtype\".split(' ');\n" 
+"builtinMethods[NumberLong] = \"toNumber\".split(' ');\n" 
+"\n" 
+"var extraGlobals = \"Infinity NaN undefined null true false decodeURI decodeURIComponent encodeURI encodeURIComponent escape eval isFinite isNaN parseFloat parseInt unescape Array Boolean Date Math Number RegExp String print load gc MinKey MaxKey Mongo NumberLong ObjectId DBPointer UUID BinData Map\".split(' ');\n" 
+"\n" 
+"var isPrivate = function(name){\n" 
+"if (shellAutocomplete.showPrivate) return false;\n" 
+"if (name == '_id') return false;\n" 
+"if (name[0] == '_') return true;\n" 
+"if (name[name.length-1] == '_') return true; // some native functions have an extra name_ method\n" 
+"return false;\n" 
+"}\n" 
+"\n" 
+"var customComplete = function(obj){\n" 
+"try {\n" 
+"if(obj.__proto__.constructor.autocomplete){\n" 
+"var ret = obj.constructor.autocomplete(obj);\n" 
+"if (ret.constructor != Array){\n" 
+"print(\"\\nautocompleters must return real Arrays\");\n" 
+"return [];\n" 
+"}\n" 
+"return ret;\n" 
+"} else {\n" 
+"return [];\n" 
+"}\n" 
+"} catch (e) {\n" 
+"// print(e); // uncomment if debugging custom completers\n" 
+"return [];\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"var worker = function( prefix ){\n" 
+"var global = (function(){return this;}).call(); // trick to get global object\n" 
+"\n" 
+"var curObj = global;\n" 
+"var parts = prefix.split('.');\n" 
+"for (var p=0; p < parts.length - 1; p++){ // doesn't include last part\n" 
+"curObj = curObj[parts[p]];\n" 
+"if (curObj == null)\n" 
+"return [];\n" 
+"}\n" 
+"\n" 
+"var lastPrefix = parts[parts.length-1] || '';\n" 
+"var begining = parts.slice(0, parts.length-1).join('.');\n" 
+"if (begining.length)\n" 
+"begining += '.';\n" 
+"\n" 
+"var possibilities = new Array().concat(\n" 
+"universalMethods,\n" 
+"Object.keySet(curObj),\n" 
+"Object.keySet(curObj.__proto__),\n" 
+"builtinMethods[curObj] || [], // curObj is a builtin constructor\n" 
+"builtinMethods[curObj.__proto__.constructor] || [], // curObj is made from a builtin constructor\n" 
+"curObj == global ? extraGlobals : [],\n" 
+"customComplete(curObj)\n" 
+");\n" 
+"\n" 
+"var ret = [];\n" 
+"for (var i=0; i < possibilities.length; i++){\n" 
+"var p = possibilities[i];\n" 
+"if (typeof(curObj[p]) == \"undefined\" && curObj != global) continue; // extraGlobals aren't in the global object\n" 
+"if (p.length == 0 || p.length < lastPrefix.length) continue;\n" 
+"if (isPrivate(p)) continue;\n" 
+"if (p.match(/^[0-9]+$/)) continue; // don't array number indexes\n" 
+"if (p.substr(0, lastPrefix.length) != lastPrefix) continue;\n" 
+"\n" 
+"var completion = begining + p;\n" 
+"if(curObj[p] && curObj[p].constructor == Function && p != 'constructor')\n" 
+"completion += '(';\n" 
+"\n" 
+"ret.push(completion);\n" 
+"}\n" 
+"\n" 
+"return ret;\n" 
+"}\n" 
+"\n" 
+"// this is the actual function that gets assigned to shellAutocomplete\n" 
+"return function( prefix ){\n" 
+"try {\n" 
+"__autocomplete__ = worker(prefix).sort();\n" 
+"}catch (e){\n" 
+"print(\"exception durring autocomplete: \" + tojson(e.message));\n" 
+"__autocomplete__ = [];\n" 
+"}\n" 
+"}\n" 
+"}();\n" 
+"\n" 
+"shellAutocomplete.showPrivate = false; // toggle to show (useful when working on internals)\n" 
+"\n" 
+"shellHelper = function( command , rest , shouldPrint ){\n" 
+"command = command.trim();\n" 
+"var args = rest.trim().replace(/;$/,\"\").split( \"\\s+\" );\n" 
+"\n" 
+"if ( ! shellHelper[command] )\n" 
+"throw \"no command [\" + command + \"]\";\n" 
+"\n" 
+"var res = shellHelper[command].apply( null , args );\n" 
+"if ( shouldPrint ){\n" 
+"shellPrintHelper( res );\n" 
+"}\n" 
+"return res;\n" 
+"}\n" 
+"\n" 
+"shellHelper.use = function (dbname) {\n" 
+"var s = \"\" + dbname;\n" 
+"if (s == \"\") {\n" 
+"print(\"bad use parameter\");\n" 
+"return;\n" 
+"}\n" 
+"db = db.getMongo().getDB(dbname);\n" 
+"print(\"switched to db \" + db.getName());\n" 
+"}\n" 
+"\n" 
+"shellHelper.it = function(){\n" 
+"if ( typeof( ___it___ ) == \"undefined\" || ___it___ == null ){\n" 
+"print( \"no cursor\" );\n" 
+"return;\n" 
+"}\n" 
+"shellPrintHelper( ___it___ );\n" 
+"}\n" 
+"\n" 
+"shellHelper.show = function (what) {\n" 
+"assert(typeof what == \"string\");\n" 
+"\n" 
+"if (what == \"profile\") {\n" 
+"if (db.system.profile.count() == 0) {\n" 
+"print(\"db.system.profile is empty\");\n" 
+"print(\"Use db.setProfilingLevel(2) will enable profiling\");\n" 
+"print(\"Use db.system.profile.find() to show raw profile entries\");\n" 
+"}\n" 
+"else {\n" 
+"print();\n" 
+"db.system.profile.find({ millis: { $gt: 0} }).sort({ $natural: -1 }).limit(5).forEach(function (x) { print(\"\" + x.millis + \"ms \" + String(x.ts).substring(0, 24)); print(x.info); print(\"\\n\"); })\n" 
+"}\n" 
+"return \"\";\n" 
+"}\n" 
+"\n" 
+"if (what == \"users\") {\n" 
+"db.system.users.find().forEach(printjson);\n" 
+"return \"\";\n" 
+"}\n" 
+"\n" 
+"if (what == \"collections\" || what == \"tables\") {\n" 
+"db.getCollectionNames().forEach(function (x) { print(x) });\n" 
+"return \"\";\n" 
+"}\n" 
+"\n" 
+"if (what == \"dbs\") {\n" 
+"var dbs = db.getMongo().getDBs();\n" 
+"var size = {};\n" 
+"dbs.databases.forEach(function (x) { size[x.name] = x.sizeOnDisk; });\n" 
+"var names = dbs.databases.map(function (z) { return z.name; }).sort();\n" 
+"names.forEach(function (n) {\n" 
+"if (size[n] > 1) {\n" 
+"print(n + \"\\t\" + size[n] / 1024 / 1024 / 1024 + \"GB\");\n" 
+"} else {\n" 
+"print(n + \"\\t(empty)\");\n" 
+"}\n" 
+"});\n" 
+"//db.getMongo().getDBNames().sort().forEach(function (x) { print(x) });\n" 
+"return \"\";\n" 
+"}\n" 
+"\n" 
+"throw \"don't know how to show [\" + what + \"]\";\n" 
+"\n" 
+"}\n" 
+"\n" 
+"if ( typeof( Map ) == \"undefined\" ){\n" 
+"Map = function(){\n" 
+"this._data = {};\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"Map.hash = function( val ){\n" 
+"if ( ! val )\n" 
+"return val;\n" 
+"\n" 
+"switch ( typeof( val ) ){\n" 
+"case 'string':\n" 
+"case 'number':\n" 
+"case 'date':\n" 
+"return val.toString();\n" 
+"case 'object':\n" 
+"case 'array':\n" 
+"var s = \"\";\n" 
+"for ( var k in val ){\n" 
+"s += k + val[k];\n" 
+"}\n" 
+"return s;\n" 
+"}\n" 
+"\n" 
+"throw \"can't hash : \" + typeof( val );\n" 
+"}\n" 
+"\n" 
+"Map.prototype.put = function( key , value ){\n" 
+"var o = this._get( key );\n" 
+"var old = o.value;\n" 
+"o.value = value;\n" 
+"return old;\n" 
+"}\n" 
+"\n" 
+"Map.prototype.get = function( key ){\n" 
+"return this._get( key ).value;\n" 
+"}\n" 
+"\n" 
+"Map.prototype._get = function( key ){\n" 
+"var h = Map.hash( key );\n" 
+"var a = this._data[h];\n" 
+"if ( ! a ){\n" 
+"a = [];\n" 
+"this._data[h] = a;\n" 
+"}\n" 
+"\n" 
+"for ( var i=0; i<a.length; i++ ){\n" 
+"if ( friendlyEqual( key , a[i].key ) ){\n" 
+"return a[i];\n" 
+"}\n" 
+"}\n" 
+"var o = { key : key , value : null };\n" 
+"a.push( o );\n" 
+"return o;\n" 
+"}\n" 
+"\n" 
+"Map.prototype.values = function(){\n" 
+"var all = [];\n" 
+"for ( var k in this._data ){\n" 
+"this._data[k].forEach( function(z){ all.push( z.value ); } );\n" 
+"}\n" 
+"return all;\n" 
+"}\n" 
+"\n" 
+"if ( typeof( gc ) == \"undefined\" ){\n" 
+"gc = function(){\n" 
+"print( \"warning: using noop gc()\" );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"\n" 
+"Math.sigFig = function( x , N ){\n" 
+"if ( ! N ){\n" 
+"N = 3;\n" 
+"}\n" 
+"var p = Math.pow( 10, N - Math.ceil( Math.log( Math.abs(x) ) / Math.log( 10 )) );\n" 
+"return Math.round(x*p)/p;\n" 
+"}\n" 
+"\n" 
+"Random = function() {}\n" 
+"\n" 
+"// set random seed\n" 
+"Random.srand = function( s ) { _srand( s ); }\n" 
+"\n" 
+"// random number 0 <= r < 1\n" 
+"Random.rand = function() { return _rand(); }\n" 
+"\n" 
+"// random integer 0 <= r < n\n" 
+"Random.randInt = function( n ) { return Math.floor( Random.rand() * n ); }\n" 
+"\n" 
+"Random.setRandomSeed = function( s ) {\n" 
+"s = s || new Date().getTime();\n" 
+"print( \"setting random seed: \" + s );\n" 
+"Random.srand( s );\n" 
+"}\n" 
+"\n" 
+"// generate a random value from the exponential distribution with the specified mean\n" 
+"Random.genExp = function( mean ) {\n" 
+"return -Math.log( Random.rand() ) * mean;\n" 
+"}\n" 
+"\n" 
+"Geo = {};\n" 
+"Geo.distance = function( a , b ){\n" 
+"var ax = null;\n" 
+"var ay = null;\n" 
+"var bx = null;\n" 
+"var by = null;\n" 
+"\n" 
+"for ( var key in a ){\n" 
+"if ( ax == null )\n" 
+"ax = a[key];\n" 
+"else if ( ay == null )\n" 
+"ay = a[key];\n" 
+"}\n" 
+"\n" 
+"for ( var key in b ){\n" 
+"if ( bx == null )\n" 
+"bx = b[key];\n" 
+"else if ( by == null )\n" 
+"by = b[key];\n" 
+"}\n" 
+"\n" 
+"return Math.sqrt( Math.pow( by - ay , 2 ) +\n" 
+"Math.pow( bx - ax , 2 ) );\n" 
+"}\n" 
+"\n" 
+"Geo.sphereDistance = function( a , b ){\n" 
+"var ax = null;\n" 
+"var ay = null;\n" 
+"var bx = null;\n" 
+"var by = null;\n" 
+"\n" 
+"// TODO swap order of x and y when done on server\n" 
+"for ( var key in a ){\n" 
+"if ( ax == null )\n" 
+"ax = a[key] * (Math.PI/180);\n" 
+"else if ( ay == null )\n" 
+"ay = a[key] * (Math.PI/180);\n" 
+"}\n" 
+"\n" 
+"for ( var key in b ){\n" 
+"if ( bx == null )\n" 
+"bx = b[key] * (Math.PI/180);\n" 
+"else if ( by == null )\n" 
+"by = b[key] * (Math.PI/180);\n" 
+"}\n" 
+"\n" 
+"var sin_x1=Math.sin(ax), cos_x1=Math.cos(ax);\n" 
+"var sin_y1=Math.sin(ay), cos_y1=Math.cos(ay);\n" 
+"var sin_x2=Math.sin(bx), cos_x2=Math.cos(bx);\n" 
+"var sin_y2=Math.sin(by), cos_y2=Math.cos(by);\n" 
+"\n" 
+"var cross_prod =\n" 
+"(cos_y1*cos_x1 * cos_y2*cos_x2) +\n" 
+"(cos_y1*sin_x1 * cos_y2*sin_x2) +\n" 
+"(sin_y1        * sin_y2);\n" 
+"\n" 
+"if (cross_prod >= 1 || cross_prod <= -1){\n" 
+"// fun with floats\n" 
+"assert( Math.abs(cross_prod)-1 < 1e-6 );\n" 
+"return cross_prod > 0 ? 0 : Math.PI;\n" 
+"}\n" 
+"\n" 
+"return Math.acos(cross_prod);\n" 
+"}\n" 
+"\n" 
+"rs = function () { return \"try rs.help()\"; }\n" 
+"\n" 
+"rs.help = function () {\n" 
+"print(\"\\trs.status()                     { replSetGetStatus : 1 } checks repl set status\");\n" 
+"print(\"\\trs.initiate()                   { replSetInitiate : null } initiates set with default settings\");\n" 
+"print(\"\\trs.initiate(cfg)                { replSetInitiate : cfg } initiates set with configuration cfg\");\n" 
+"print(\"\\trs.conf()                       get the current configuration object from local.system.replset\");\n" 
+"print(\"\\trs.reconfig(cfg)                updates the configuration of a running replica set with cfg (disconnects)\");\n" 
+"print(\"\\trs.add(hostportstr)             add a new member to the set with default attributes (disconnects)\");\n" 
+"print(\"\\trs.add(membercfgobj)            add a new member to the set with extra attributes (disconnects)\");\n" 
+"print(\"\\trs.addArb(hostportstr)          add a new member which is arbiterOnly:true (disconnects)\");\n" 
+"print(\"\\trs.stepDown([secs])             step down as primary (momentarily) (disconnects)\");\n" 
+"print(\"\\trs.freeze(secs)                 make a node ineligible to become primary for the time specified\");\n" 
+"print(\"\\trs.remove(hostportstr)          remove a host from the replica set (disconnects)\");\n" 
+"print(\"\\trs.slaveOk()                    shorthand for db.getMongo().setSlaveOk()\");\n" 
+"print();\n" 
+"print(\"\\tdb.isMaster()                   check who is primary\");\n" 
+"print();\n" 
+"print(\"\\treconfiguration helpers disconnect from the database so the shell will display\");\n" 
+"print(\"\\tan error, even if the command succeeds.\");\n" 
+"print(\"\\tsee also http://<mongod_host>:28017/_replSet for additional diagnostic info\");\n" 
+"}\n" 
+"rs.slaveOk = function () { return db.getMongo().setSlaveOk(); }\n" 
+"rs.status = function () { return db._adminCommand(\"replSetGetStatus\"); }\n" 
+"rs.isMaster = function () { return db.isMaster(); }\n" 
+"rs.initiate = function (c) { return db._adminCommand({ replSetInitiate: c }); }\n" 
+"rs.reconfig = function (cfg) {\n" 
+"cfg.version = rs.conf().version + 1;\n" 
+"var res = null;\n" 
+"try {\n" 
+"res = db.adminCommand({ replSetReconfig: cfg });\n" 
+"}\n" 
+"catch (e) {\n" 
+"print(\"shell got exception during reconfig: \" + e);\n" 
+"print(\"in some circumstances, the primary steps down and closes connections on a reconfig\");\n" 
+"}\n" 
+"return res;\n" 
+"}\n" 
+"rs.add = function (hostport, arb) {\n" 
+"var cfg = hostport;\n" 
+"\n" 
+"var local = db.getSisterDB(\"local\");\n" 
+"assert(local.system.replset.count() <= 1, \"error: local.system.replset has unexpected contents\");\n" 
+"var c = local.system.replset.findOne();\n" 
+"assert(c, \"no config object retrievable from local.system.replset\");\n" 
+"\n" 
+"c.version++;\n" 
+"\n" 
+"var max = 0;\n" 
+"for (var i in c.members)\n" 
+"if (c.members[i]._id > max) max = c.members[i]._id;\n" 
+"if (isString(hostport)) {\n" 
+"cfg = { _id: max + 1, host: hostport };\n" 
+"if (arb)\n" 
+"cfg.arbiterOnly = true;\n" 
+"}\n" 
+"c.members.push(cfg);\n" 
+"var res = null;\n" 
+"try {\n" 
+"res = db.adminCommand({ replSetReconfig: c });\n" 
+"}\n" 
+"catch (e) {\n" 
+"print(\"shell got exception during reconfig: \" + e);\n" 
+"print(\"in some circumstances, the primary steps down and closes connections on a reconfig\");\n" 
+"}\n" 
+"return res;\n" 
+"}\n" 
+"rs.stepDown = function (secs) { return db._adminCommand({ replSetStepDown:secs||60}); }\n" 
+"rs.freeze = function (secs) { return db._adminCommand({replSetFreeze:secs}); }\n" 
+"rs.addArb = function (hn) { return this.add(hn, true); }\n" 
+"rs.conf = function () { return db.getSisterDB(\"local\").system.replset.findOne(); }\n" 
+"\n" 
+"rs.remove = function (hn) {\n" 
+"var local = db.getSisterDB(\"local\");\n" 
+"assert(local.system.replset.count() <= 1, \"error: local.system.replset has unexpected contents\");\n" 
+"var c = local.system.replset.findOne();\n" 
+"assert(c, \"no config object retrievable from local.system.replset\");\n" 
+"c.version++;\n" 
+"\n" 
+"for (var i in c.members) {\n" 
+"if (c.members[i].host == hn) {\n" 
+"c.members.splice(i, 1);\n" 
+"return db._adminCommand({ replSetReconfig : c});\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"return \"error: couldn't find \"+hn+\" in \"+tojson(c.members);\n" 
+"};\n" 
+"\n" 
+"help = shellHelper.help = function (x) {\n" 
+"if (x == \"mr\") {\n" 
+"print(\"\\nSee also http://www.mongodb.org/display/DOCS/MapReduce\");\n" 
+"print(\"\\nfunction mapf() {\");\n" 
+"print(\"  // 'this' holds current document to inspect\");\n" 
+"print(\"  emit(key, value);\");\n" 
+"print(\"}\");\n" 
+"print(\"\\nfunction reducef(key,value_array) {\");\n" 
+"print(\"  return reduced_value;\");\n" 
+"print(\"}\");\n" 
+"print(\"\\ndb.mycollection.mapReduce(mapf, reducef[, options])\");\n" 
+"print(\"\\noptions\");\n" 
+"print(\"{[query : <query filter object>]\");\n" 
+"print(\" [, sort : <sort the query.  useful for optimization>]\");\n" 
+"print(\" [, limit : <number of objects to return from collection>]\");\n" 
+"print(\" [, out : <output-collection name>]\");\n" 
+"print(\" [, keeptemp: <true|false>]\");\n" 
+"print(\" [, finalize : <finalizefunction>]\");\n" 
+"print(\" [, scope : <object where fields go into javascript global scope >]\");\n" 
+"print(\" [, verbose : true]}\\n\");\n" 
+"return;\n" 
+"} else if (x == \"connect\") {\n" 
+"print(\"\\nNormally one specifies the server on the mongo shell command line.  Run mongo --help to see those options.\");\n" 
+"print(\"Additional connections may be opened:\\n\");\n" 
+"print(\"    var x = new Mongo('host[:port]');\");\n" 
+"print(\"    var mydb = x.getDB('mydb');\");\n" 
+"print(\"  or\");\n" 
+"print(\"    var mydb = connect('host[:port]/mydb');\");\n" 
+"print(\"\\nNote: the REPL prompt only auto-reports getLastError() for the shell command line connection.\\n\");\n" 
+"return;\n" 
+"}\n" 
+"else if (x == \"misc\") {\n" 
+"print(\"\\tb = new BinData(subtype,base64str)  create a BSON BinData value\");\n" 
+"print(\"\\tb.subtype()                         the BinData subtype (0..255)\");\n" 
+"print(\"\\tb.length()                          length of the BinData data in bytes\");\n" 
+"print(\"\\tb.hex()                             the data as a hex encoded string\");\n" 
+"print(\"\\tb.base64()                          the data as a base 64 encoded string\");\n" 
+"print(\"\\tb.toString()\");\n" 
+"print();\n" 
+"print(\"\\to = new ObjectId()                  create a new ObjectId\");\n" 
+"print(\"\\to.getTimestamp()                    return timestamp derived from first 32 bits of the OID\");\n" 
+"print(\"\\to.isObjectId()\");\n" 
+"print(\"\\to.toString()\");\n" 
+"print(\"\\to.equals(otherid)\");\n" 
+"return;\n" 
+"}\n" 
+"else if (x == \"admin\") {\n" 
+"print(\"\\tls([path])                      list files\");\n" 
+"print(\"\\tpwd()                           returns current directory\");\n" 
+"print(\"\\tlistFiles([path])               returns file list\");\n" 
+"print(\"\\thostname()                      returns name of this host\");\n" 
+"print(\"\\tcat(fname)                      returns contents of text file as a string\");\n" 
+"print(\"\\tremoveFile(f)                   delete a file or directory\");\n" 
+"print(\"\\tload(jsfilename)                load and execute a .js file\");\n" 
+"print(\"\\trun(program[, args...])         spawn a program and wait for its completion\");\n" 
+"print(\"\\trunProgram(program[, args...])  same as run(), above\");\n" 
+"print(\"\\tsleep(m)                        sleep m milliseconds\");\n" 
+"print(\"\\tgetMemInfo()                    diagnostic\");\n" 
+"return;\n" 
+"}\n" 
+"else if (x == \"test\") {\n" 
+"print(\"\\tstartMongodEmpty(args)        DELETES DATA DIR and then starts mongod\");\n" 
+"print(\"\\t                              returns a connection to the new server\");\n" 
+"print(\"\\tstartMongodTest(port,dir,options)\");\n" 
+"print(\"\\t                              DELETES DATA DIR\");\n" 
+"print(\"\\t                              automatically picks port #s starting at 27000 and increasing\");\n" 
+"print(\"\\t                              or you can specify the port as the first arg\");\n" 
+"print(\"\\t                              dir is /data/db/<port>/ if not specified as the 2nd arg\");\n" 
+"print(\"\\t                              returns a connection to the new server\");\n" 
+"print(\"\\tresetDbpath(dirpathstr)       deletes everything under the dir specified including subdirs\");\n" 
+"print(\"\\tstopMongoProgram(port[, signal])\");\n" 
+"return;\n" 
+"}\n" 
+"else if (x == \"\") {\n" 
+"print(\"\\t\" + \"db.help()                    help on db methods\");\n" 
+"print(\"\\t\" + \"db.mycoll.help()             help on collection methods\");\n" 
+"print(\"\\t\" + \"rs.help()                    help on replica set methods\");\n" 
+"print(\"\\t\" + \"help connect                 connecting to a db help\");\n" 
+"print(\"\\t\" + \"help admin                   administrative help\");\n" 
+"print(\"\\t\" + \"help misc                    misc things to know\");\n" 
+"print(\"\\t\" + \"help mr                      mapreduce help\");\n" 
+"print();\n" 
+"print(\"\\t\" + \"show dbs                     show database names\");\n" 
+"print(\"\\t\" + \"show collections             show collections in current database\");\n" 
+"print(\"\\t\" + \"show users                   show users in current database\");\n" 
+"print(\"\\t\" + \"show profile                 show most recent system.profile entries with time >= 1ms\");\n" 
+"print(\"\\t\" + \"use <db_name>                set current database\");\n" 
+"print(\"\\t\" + \"db.foo.find()                list objects in collection foo\");\n" 
+"print(\"\\t\" + \"db.foo.find( { a : 1 } )     list objects in foo where a == 1\");\n" 
+"print(\"\\t\" + \"it                           result of the last line evaluated; use to further iterate\");\n" 
+"print(\"\\t\" + \"DBQuery.shellBatchSize = x   set default number of items to display on shell\");\n" 
+"print(\"\\t\" + \"exit                         quit the mongo shell\");\n" 
+"}\n" 
+"else\n" 
+"print(\"unknown help option\");\n" 
+"}\n" 
+;
+extern const JSFile utils;
+const JSFile utils = { "shell/utils.js" , _jscode_raw_utils };
+const StringData _jscode_raw_db = 
+"// db.js\n" 
+"\n" 
+"if ( typeof DB == \"undefined\" ){\n" 
+"DB = function( mongo , name ){\n" 
+"this._mongo = mongo;\n" 
+"this._name = name;\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"DB.prototype.getMongo = function(){\n" 
+"assert( this._mongo , \"why no mongo!\" );\n" 
+"return this._mongo;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.getSiblingDB = function( name ){\n" 
+"return this.getMongo().getDB( name );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.getSisterDB = DB.prototype.getSiblingDB;\n" 
+"\n" 
+"DB.prototype.getName = function(){\n" 
+"return this._name;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.stats = function(){\n" 
+"return this.runCommand( { dbstats : 1 } );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.getCollection = function( name ){\n" 
+"return new DBCollection( this._mongo , this , name , this._name + \".\" + name );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.commandHelp = function( name ){\n" 
+"var c = {};\n" 
+"c[name] = 1;\n" 
+"c.help = true;\n" 
+"var res = this.runCommand( c );\n" 
+"if ( ! res.ok )\n" 
+"throw res.errmsg;\n" 
+"return res.help;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.runCommand = function( obj ){\n" 
+"if ( typeof( obj ) == \"string\" ){\n" 
+"var n = {};\n" 
+"n[obj] = 1;\n" 
+"obj = n;\n" 
+"}\n" 
+"return this.getCollection( \"$cmd\" ).findOne( obj );\n" 
+"}\n" 
+"\n" 
+"DB.prototype._dbCommand = DB.prototype.runCommand;\n" 
+"\n" 
+"DB.prototype.adminCommand = function( obj ){\n" 
+"if ( this._name == \"admin\" )\n" 
+"return this.runCommand( obj );\n" 
+"return this.getSiblingDB( \"admin\" ).runCommand( obj );\n" 
+"}\n" 
+"\n" 
+"DB.prototype._adminCommand = DB.prototype.adminCommand; // alias old name\n" 
+"\n" 
+"DB.prototype.addUser = function( username , pass, readOnly ){\n" 
+"readOnly = readOnly || false;\n" 
+"var c = this.getCollection( \"system.users\" );\n" 
+"\n" 
+"var u = c.findOne( { user : username } ) || { user : username };\n" 
+"u.readOnly = readOnly;\n" 
+"u.pwd = hex_md5( username + \":mongo:\" + pass );\n" 
+"print( tojson( u ) );\n" 
+"\n" 
+"c.save( u );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.removeUser = function( username ){\n" 
+"this.getCollection( \"system.users\" ).remove( { user : username } );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.__pwHash = function( nonce, username, pass ) {\n" 
+"return hex_md5( nonce + username + hex_md5( username + \":mongo:\" + pass ) );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.auth = function( username , pass ){\n" 
+"var n = this.runCommand( { getnonce : 1 } );\n" 
+"\n" 
+"var a = this.runCommand(\n" 
+"{\n" 
+"authenticate : 1 ,\n" 
+"user : username ,\n" 
+"nonce : n.nonce ,\n" 
+"key : this.__pwHash( n.nonce, username, pass )\n" 
+"}\n" 
+");\n" 
+"\n" 
+"return a.ok;\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"Create a new collection in the database.  Normally, collection creation is automatic.  You would\n" 
+"use this function if you wish to specify special options on creation.\n" 
+"\n" 
+"If the collection already exists, no action occurs.\n" 
+"\n" 
+"<p>Options:</p>\n" 
+"<ul>\n" 
+"<li>\n" 
+"size: desired initial extent size for the collection.  Must be <= 1000000000.\n" 
+"for fixed size (capped) collections, this size is the total/max size of the\n" 
+"collection.\n" 
+"</li>\n" 
+"<li>\n" 
+"capped: if true, this is a capped collection (where old data rolls out).\n" 
+"</li>\n" 
+"<li> max: maximum number of objects if capped (optional).</li>\n" 
+"</ul>\n" 
+"\n" 
+"<p>Example: </p>\n" 
+"\n" 
+"<code>db.createCollection(\"movies\", { size: 10 * 1024 * 1024, capped:true } );</code>\n" 
+"\n" 
+"* @param {String} name Name of new collection to create\n" 
+"* @param {Object} options Object with options for call.  Options are listed above.\n" 
+"* @return SOMETHING_FIXME\n" 
+"*/\n" 
+"DB.prototype.createCollection = function(name, opt) {\n" 
+"var options = opt || {};\n" 
+"var cmd = { create: name, capped: options.capped, size: options.size, max: options.max };\n" 
+"var res = this._dbCommand(cmd);\n" 
+"return res;\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"* @deprecated use getProfilingStatus\n" 
+"*  Returns the current profiling level of this database\n" 
+"*  @return SOMETHING_FIXME or null on error\n" 
+"*/\n" 
+"DB.prototype.getProfilingLevel  = function() {\n" 
+"var res = this._dbCommand( { profile: -1 } );\n" 
+"return res ? res.was : null;\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"*  @return the current profiling status\n" 
+"*  example { was : 0, slowms : 100 }\n" 
+"*  @return SOMETHING_FIXME or null on error\n" 
+"*/\n" 
+"DB.prototype.getProfilingStatus  = function() {\n" 
+"var res = this._dbCommand( { profile: -1 } );\n" 
+"if ( ! res.ok )\n" 
+"throw \"profile command failed: \" + tojson( res );\n" 
+"delete res.ok\n" 
+"return res;\n" 
+"}\n" 
+"\n" 
+"\n" 
+"/**\n" 
+"Erase the entire database.  (!)\n" 
+"\n" 
+"* @return Object returned has member ok set to true if operation succeeds, false otherwise.\n" 
+"*/\n" 
+"DB.prototype.dropDatabase = function() {\n" 
+"if ( arguments.length )\n" 
+"throw \"dropDatabase doesn't take arguments\";\n" 
+"return this._dbCommand( { dropDatabase: 1 } );\n" 
+"}\n" 
+"\n" 
+"\n" 
+"DB.prototype.shutdownServer = function() {\n" 
+"if( \"admin\" != this._name ){\n" 
+"return \"shutdown command only works with the admin database; try 'use admin'\";\n" 
+"}\n" 
+"\n" 
+"try {\n" 
+"var res = this._dbCommand(\"shutdown\");\n" 
+"if( res )\n" 
+"throw \"shutdownServer failed: \" + res.errmsg;\n" 
+"throw \"shutdownServer failed\";\n" 
+"}\n" 
+"catch ( e ){\n" 
+"assert( tojson( e ).indexOf( \"error doing query: failed\" ) >= 0 , \"unexpected error: \" + tojson( e ) );\n" 
+"print( \"server should be down...\" );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"Clone database on another server to here.\n" 
+"<p>\n" 
+"Generally, you should dropDatabase() first as otherwise the cloned information will MERGE\n" 
+"into whatever data is already present in this database.  (That is however a valid way to use\n" 
+"clone if you are trying to do something intentionally, such as union three non-overlapping\n" 
+"databases into one.)\n" 
+"<p>\n" 
+"This is a low level administrative function will is not typically used.\n" 
+"\n" 
+"* @param {String} from Where to clone from (dbhostname[:port]).  May not be this database\n" 
+"(self) as you cannot clone to yourself.\n" 
+"* @return Object returned has member ok set to true if operation succeeds, false otherwise.\n" 
+"* See also: db.copyDatabase()\n" 
+"*/\n" 
+"DB.prototype.cloneDatabase = function(from) {\n" 
+"assert( isString(from) && from.length );\n" 
+"//this.resetIndexCache();\n" 
+"return this._dbCommand( { clone: from } );\n" 
+"}\n" 
+"\n" 
+"\n" 
+"/**\n" 
+"Clone collection on another server to here.\n" 
+"<p>\n" 
+"Generally, you should drop() first as otherwise the cloned information will MERGE\n" 
+"into whatever data is already present in this collection.  (That is however a valid way to use\n" 
+"clone if you are trying to do something intentionally, such as union three non-overlapping\n" 
+"collections into one.)\n" 
+"<p>\n" 
+"This is a low level administrative function is not typically used.\n" 
+"\n" 
+"* @param {String} from mongod instance from which to clnoe (dbhostname:port).  May\n" 
+"not be this mongod instance, as clone from self is not allowed.\n" 
+"* @param {String} collection name of collection to clone.\n" 
+"* @param {Object} query query specifying which elements of collection are to be cloned.\n" 
+"* @return Object returned has member ok set to true if operation succeeds, false otherwise.\n" 
+"* See also: db.cloneDatabase()\n" 
+"*/\n" 
+"DB.prototype.cloneCollection = function(from, collection, query) {\n" 
+"assert( isString(from) && from.length );\n" 
+"assert( isString(collection) && collection.length );\n" 
+"collection = this._name + \".\" + collection;\n" 
+"query = query || {};\n" 
+"//this.resetIndexCache();\n" 
+"return this._dbCommand( { cloneCollection:collection, from:from, query:query } );\n" 
+"}\n" 
+"\n" 
+"\n" 
+"/**\n" 
+"Copy database from one server or name to another server or name.\n" 
+"\n" 
+"Generally, you should dropDatabase() first as otherwise the copied information will MERGE\n" 
+"into whatever data is already present in this database (and you will get duplicate objects\n" 
+"in collections potentially.)\n" 
+"\n" 
+"For security reasons this function only works when executed on the \"admin\" db.  However,\n" 
+"if you have access to said db, you can copy any database from one place to another.\n" 
+"\n" 
+"This method provides a way to \"rename\" a database by copying it to a new db name and\n" 
+"location.  Additionally, it effectively provides a repair facility.\n" 
+"\n" 
+"* @param {String} fromdb database name from which to copy.\n" 
+"* @param {String} todb database name to copy to.\n" 
+"* @param {String} fromhost hostname of the database (and optionally, \":port\") from which to\n" 
+"copy the data.  default if unspecified is to copy from self.\n" 
+"* @return Object returned has member ok set to true if operation succeeds, false otherwise.\n" 
+"* See also: db.clone()\n" 
+"*/\n" 
+"DB.prototype.copyDatabase = function(fromdb, todb, fromhost, username, password) {\n" 
+"assert( isString(fromdb) && fromdb.length );\n" 
+"assert( isString(todb) && todb.length );\n" 
+"fromhost = fromhost || \"\";\n" 
+"if ( username && password ) {\n" 
+"var n = this._adminCommand( { copydbgetnonce : 1, fromhost:fromhost } );\n" 
+"return this._adminCommand( { copydb:1, fromhost:fromhost, fromdb:fromdb, todb:todb, username:username, nonce:n.nonce, key:this.__pwHash( n.nonce, username, password ) } );\n" 
+"} else {\n" 
+"return this._adminCommand( { copydb:1, fromhost:fromhost, fromdb:fromdb, todb:todb } );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"Repair database.\n" 
+"\n" 
+"* @return Object returned has member ok set to true if operation succeeds, false otherwise.\n" 
+"*/\n" 
+"DB.prototype.repairDatabase = function() {\n" 
+"return this._dbCommand( { repairDatabase: 1 } );\n" 
+"}\n" 
+"\n" 
+"\n" 
+"DB.prototype.help = function() {\n" 
+"print(\"DB methods:\");\n" 
+"print(\"\\tdb.addUser(username, password[, readOnly=false])\");\n" 
+"print(\"\\tdb.auth(username, password)\");\n" 
+"print(\"\\tdb.cloneDatabase(fromhost)\");\n" 
+"print(\"\\tdb.commandHelp(name) returns the help for the command\");\n" 
+"print(\"\\tdb.copyDatabase(fromdb, todb, fromhost)\");\n" 
+"print(\"\\tdb.createCollection(name, { size : ..., capped : ..., max : ... } )\");\n" 
+"print(\"\\tdb.currentOp() displays the current operation in the db\");\n" 
+"print(\"\\tdb.dropDatabase()\");\n" 
+"print(\"\\tdb.eval(func, args) run code server-side\");\n" 
+"print(\"\\tdb.getCollection(cname) same as db['cname'] or db.cname\");\n" 
+"print(\"\\tdb.getCollectionNames()\");\n" 
+"print(\"\\tdb.getLastError() - just returns the err msg string\");\n" 
+"print(\"\\tdb.getLastErrorObj() - return full status object\");\n" 
+"print(\"\\tdb.getMongo() get the server connection object\");\n" 
+"print(\"\\tdb.getMongo().setSlaveOk() allow this connection to read from the nonmaster member of a replica pair\");\n" 
+"print(\"\\tdb.getName()\");\n" 
+"print(\"\\tdb.getPrevError()\");\n" 
+"print(\"\\tdb.getProfilingLevel() - deprecated\");\n" 
+"print(\"\\tdb.getProfilingStatus() - returns if profiling is on and slow threshold \");\n" 
+"print(\"\\tdb.getReplicationInfo()\");\n" 
+"print(\"\\tdb.getSiblingDB(name) get the db at the same server as this one\");\n" 
+"print(\"\\tdb.isMaster() check replica primary status\");\n" 
+"print(\"\\tdb.killOp(opid) kills the current operation in the db\");\n" 
+"print(\"\\tdb.listCommands() lists all the db commands\");\n" 
+"print(\"\\tdb.printCollectionStats()\");\n" 
+"print(\"\\tdb.printReplicationInfo()\");\n" 
+"print(\"\\tdb.printSlaveReplicationInfo()\");\n" 
+"print(\"\\tdb.printShardingStatus()\");\n" 
+"print(\"\\tdb.removeUser(username)\");\n" 
+"print(\"\\tdb.repairDatabase()\");\n" 
+"print(\"\\tdb.resetError()\");\n" 
+"print(\"\\tdb.runCommand(cmdObj) run a database command.  if cmdObj is a string, turns it into { cmdObj : 1 }\");\n" 
+"print(\"\\tdb.serverStatus()\");\n" 
+"print(\"\\tdb.setProfilingLevel(level,<slowms>) 0=off 1=slow 2=all\");\n" 
+"print(\"\\tdb.shutdownServer()\");\n" 
+"print(\"\\tdb.stats()\");\n" 
+"print(\"\\tdb.version() current version of the server\");\n" 
+"print(\"\\tdb.getMongo().setSlaveOk() allow queries on a replication slave server\");\n" 
+"\n" 
+"return __magicNoPrint;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.printCollectionStats = function(){\n" 
+"var mydb = this;\n" 
+"this.getCollectionNames().forEach(\n" 
+"function(z){\n" 
+"print( z );\n" 
+"printjson( mydb.getCollection(z).stats() );\n" 
+"print( \"---\" );\n" 
+"}\n" 
+");\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"* <p> Set profiling level for your db.  Profiling gathers stats on query performance. </p>\n" 
+"*\n" 
+"* <p>Default is off, and resets to off on a database restart -- so if you want it on,\n" 
+"*    turn it on periodically. </p>\n" 
+"*\n" 
+"*  <p>Levels :</p>\n" 
+"*   <ul>\n" 
+"*    <li>0=off</li>\n" 
+"*    <li>1=log very slow operations; optional argument slowms specifies slowness threshold</li>\n" 
+"*    <li>2=log all</li>\n" 
+"*  @param {String} level Desired level of profiling\n" 
+"*  @param {String} slowms For slow logging, query duration that counts as slow (default 100ms)\n" 
+"*  @return SOMETHING_FIXME or null on error\n" 
+"*/\n" 
+"DB.prototype.setProfilingLevel = function(level,slowms) {\n" 
+"\n" 
+"if (level < 0 || level > 2) {\n" 
+"throw { dbSetProfilingException : \"input level \" + level + \" is out of range [0..2]\" };\n" 
+"}\n" 
+"\n" 
+"var cmd = { profile: level };\n" 
+"if ( slowms )\n" 
+"cmd[\"slowms\"] = slowms;\n" 
+"return this._dbCommand( cmd );\n" 
+"}\n" 
+"\n" 
+"\n" 
+"/**\n" 
+"*  <p> Evaluate a js expression at the database server.</p>\n" 
+"*\n" 
+"* <p>Useful if you need to touch a lot of data lightly; in such a scenario\n" 
+"*  the network transfer of the data could be a bottleneck.  A good example\n" 
+"*  is \"select count(*)\" -- can be done server side via this mechanism.\n" 
+"* </p>\n" 
+"*\n" 
+"* <p>\n" 
+"* If the eval fails, an exception is thrown of the form:\n" 
+"* </p>\n" 
+"* <code>{ dbEvalException: { retval: functionReturnValue, ok: num [, errno: num] [, errmsg: str] } }</code>\n" 
+"*\n" 
+"* <p>Example: </p>\n" 
+"* <code>print( \"mycount: \" + db.eval( function(){db.mycoll.find({},{_id:ObjId()}).length();} );</code>\n" 
+"*\n" 
+"* @param {Function} jsfunction Javascript function to run on server.  Note this it not a closure, but rather just \"code\".\n" 
+"* @return result of your function, or null if error\n" 
+"*\n" 
+"*/\n" 
+"DB.prototype.eval = function(jsfunction) {\n" 
+"var cmd = { $eval : jsfunction };\n" 
+"if ( arguments.length > 1 ) {\n" 
+"cmd.args = argumentsToArray( arguments ).slice(1);\n" 
+"}\n" 
+"\n" 
+"var res = this._dbCommand( cmd );\n" 
+"\n" 
+"if (!res.ok)\n" 
+"throw tojson( res );\n" 
+"\n" 
+"return res.retval;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.dbEval = DB.prototype.eval;\n" 
+"\n" 
+"\n" 
+"/**\n" 
+"*\n" 
+"*  <p>\n" 
+"*   Similar to SQL group by.  For example: </p>\n" 
+"*\n" 
+"*  <code>select a,b,sum(c) csum from coll where active=1 group by a,b</code>\n" 
+"*\n" 
+"*  <p>\n" 
+"*    corresponds to the following in 10gen:\n" 
+"*  </p>\n" 
+"*\n" 
+"*  <code>\n" 
+"db.group(\n" 
+"{\n" 
+"ns: \"coll\",\n" 
+"key: { a:true, b:true },\n" 
+"// keyf: ...,\n" 
+"cond: { active:1 },\n" 
+"reduce: function(obj,prev) { prev.csum += obj.c; } ,\n" 
+"initial: { csum: 0 }\n" 
+"});\n" 
+"</code>\n" 
+"*\n" 
+"*\n" 
+"* <p>\n" 
+"*  An array of grouped items is returned.  The array must fit in RAM, thus this function is not\n" 
+"* suitable when the return set is extremely large.\n" 
+"* </p>\n" 
+"* <p>\n" 
+"* To order the grouped data, simply sort it client side upon return.\n" 
+"* <p>\n" 
+"Defaults\n" 
+"cond may be null if you want to run against all rows in the collection\n" 
+"keyf is a function which takes an object and returns the desired key.  set either key or keyf (not both).\n" 
+"* </p>\n" 
+"*/\n" 
+"DB.prototype.groupeval = function(parmsObj) {\n" 
+"\n" 
+"var groupFunction = function() {\n" 
+"var parms = args[0];\n" 
+"var c = db[parms.ns].find(parms.cond||{});\n" 
+"var map = new Map();\n" 
+"var pks = parms.key ? Object.keySet( parms.key ) : null;\n" 
+"var pkl = pks ? pks.length : 0;\n" 
+"var key = {};\n" 
+"\n" 
+"while( c.hasNext() ) {\n" 
+"var obj = c.next();\n" 
+"if ( pks ) {\n" 
+"for( var i=0; i<pkl; i++ ){\n" 
+"var k = pks[i];\n" 
+"key[k] = obj[k];\n" 
+"}\n" 
+"}\n" 
+"else {\n" 
+"key = parms.$keyf(obj);\n" 
+"}\n" 
+"\n" 
+"var aggObj = map.get(key);\n" 
+"if( aggObj == null ) {\n" 
+"var newObj = Object.extend({}, key); // clone\n" 
+"aggObj = Object.extend(newObj, parms.initial)\n" 
+"map.put( key , aggObj );\n" 
+"}\n" 
+"parms.$reduce(obj, aggObj);\n" 
+"}\n" 
+"\n" 
+"return map.values();\n" 
+"}\n" 
+"\n" 
+"return this.eval(groupFunction, this._groupFixParms( parmsObj ));\n" 
+"}\n" 
+"\n" 
+"DB.prototype.groupcmd = function( parmsObj ){\n" 
+"var ret = this.runCommand( { \"group\" : this._groupFixParms( parmsObj ) } );\n" 
+"if ( ! ret.ok ){\n" 
+"throw \"group command failed: \" + tojson( ret );\n" 
+"}\n" 
+"return ret.retval;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.group = DB.prototype.groupcmd;\n" 
+"\n" 
+"DB.prototype._groupFixParms = function( parmsObj ){\n" 
+"var parms = Object.extend({}, parmsObj);\n" 
+"\n" 
+"if( parms.reduce ) {\n" 
+"parms.$reduce = parms.reduce; // must have $ to pass to db\n" 
+"delete parms.reduce;\n" 
+"}\n" 
+"\n" 
+"if( parms.keyf ) {\n" 
+"parms.$keyf = parms.keyf;\n" 
+"delete parms.keyf;\n" 
+"}\n" 
+"\n" 
+"return parms;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.resetError = function(){\n" 
+"return this.runCommand( { reseterror : 1 } );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.forceError = function(){\n" 
+"return this.runCommand( { forceerror : 1 } );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.getLastError = function( w , wtimeout ){\n" 
+"var res = this.getLastErrorObj( w , wtimeout );\n" 
+"if ( ! res.ok )\n" 
+"throw \"getlasterror failed: \" + tojson( res );\n" 
+"return res.err;\n" 
+"}\n" 
+"DB.prototype.getLastErrorObj = function( w , wtimeout ){\n" 
+"var cmd = { getlasterror : 1 };\n" 
+"if ( w ){\n" 
+"cmd.w = w;\n" 
+"if ( wtimeout )\n" 
+"cmd.wtimeout = wtimeout;\n" 
+"}\n" 
+"var res = this.runCommand( cmd );\n" 
+"\n" 
+"if ( ! res.ok )\n" 
+"throw \"getlasterror failed: \" + tojson( res );\n" 
+"return res;\n" 
+"}\n" 
+"DB.prototype.getLastErrorCmd = DB.prototype.getLastErrorObj;\n" 
+"\n" 
+"\n" 
+"/* Return the last error which has occurred, even if not the very last error.\n" 
+"\n" 
+"Returns:\n" 
+"{ err : <error message>, nPrev : <how_many_ops_back_occurred>, ok : 1 }\n" 
+"\n" 
+"result.err will be null if no error has occurred.\n" 
+"*/\n" 
+"DB.prototype.getPrevError = function(){\n" 
+"return this.runCommand( { getpreverror : 1 } );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.getCollectionNames = function(){\n" 
+"var all = [];\n" 
+"\n" 
+"var nsLength = this._name.length + 1;\n" 
+"\n" 
+"var c = this.getCollection( \"system.namespaces\" ).find();\n" 
+"while ( c.hasNext() ){\n" 
+"var name = c.next().name;\n" 
+"\n" 
+"if ( name.indexOf( \"$\" ) >= 0 && name.indexOf( \".oplog.$\" ) < 0 )\n" 
+"continue;\n" 
+"\n" 
+"all.push( name.substring( nsLength ) );\n" 
+"}\n" 
+"\n" 
+"return all.sort();\n" 
+"}\n" 
+"\n" 
+"DB.prototype.tojson = function(){\n" 
+"return this._name;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.toString = function(){\n" 
+"return this._name;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.isMaster = function () { return this.runCommand(\"isMaster\"); }\n" 
+"\n" 
+"DB.prototype.currentOp = function( arg ){\n" 
+"var q = {}\n" 
+"if ( arg ) {\n" 
+"if ( typeof( arg ) == \"object\" )\n" 
+"Object.extend( q , arg );\n" 
+"else if ( arg )\n" 
+"q[\"$all\"] = true;\n" 
+"}\n" 
+"return db.$cmd.sys.inprog.findOne( q );\n" 
+"}\n" 
+"DB.prototype.currentOP = DB.prototype.currentOp;\n" 
+"\n" 
+"DB.prototype.killOp = function(op) {\n" 
+"if( !op )\n" 
+"throw \"no opNum to kill specified\";\n" 
+"return db.$cmd.sys.killop.findOne({'op':op});\n" 
+"}\n" 
+"DB.prototype.killOP = DB.prototype.killOp;\n" 
+"\n" 
+"DB.tsToSeconds = function(x){\n" 
+"if ( x.t && x.i )\n" 
+"return x.t / 1000;\n" 
+"return x / 4294967296; // low 32 bits are ordinal #s within a second\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"Get a replication log information summary.\n" 
+"<p>\n" 
+"This command is for the database/cloud administer and not applicable to most databases.\n" 
+"It is only used with the local database.  One might invoke from the JS shell:\n" 
+"<pre>\n" 
+"use local\n" 
+"db.getReplicationInfo();\n" 
+"</pre>\n" 
+"It is assumed that this database is a replication master -- the information returned is\n" 
+"about the operation log stored at local.oplog.$main on the replication master.  (It also\n" 
+"works on a machine in a replica pair: for replica pairs, both machines are \"masters\" from\n" 
+"an internal database perspective.\n" 
+"<p>\n" 
+"* @return Object timeSpan: time span of the oplog from start to end  if slave is more out\n" 
+"*                          of date than that, it can't recover without a complete resync\n" 
+"*/\n" 
+"DB.prototype.getReplicationInfo = function() {\n" 
+"var db = this.getSiblingDB(\"local\");\n" 
+"\n" 
+"var result = { };\n" 
+"var oplog;\n" 
+"if (db.system.namespaces.findOne({name:\"local.oplog.rs\"}) != null) {\n" 
+"oplog = 'oplog.rs';\n" 
+"}\n" 
+"else if (db.system.namespaces.findOne({name:\"local.oplog.$main\"}) != null) {\n" 
+"oplog = 'oplog.$main';\n" 
+"}\n" 
+"else {\n" 
+"result.errmsg = \"neither master/slave nor replica set replication detected\";\n" 
+"return result;\n" 
+"}\n" 
+"\n" 
+"var ol_entry = db.system.namespaces.findOne({name:\"local.\"+oplog});\n" 
+"if( ol_entry && ol_entry.options ) {\n" 
+"result.logSizeMB = ol_entry.options.size / ( 1024 * 1024 );\n" 
+"} else {\n" 
+"result.errmsg  = \"local.\"+oplog+\", or its options, not found in system.namespaces collection\";\n" 
+"return result;\n" 
+"}\n" 
+"ol = db.getCollection(oplog);\n" 
+"\n" 
+"result.usedMB = ol.stats().size / ( 1024 * 1024 );\n" 
+"result.usedMB = Math.ceil( result.usedMB * 100 ) / 100;\n" 
+"\n" 
+"var firstc = ol.find().sort({$natural:1}).limit(1);\n" 
+"var lastc = ol.find().sort({$natural:-1}).limit(1);\n" 
+"if( !firstc.hasNext() || !lastc.hasNext() ) {\n" 
+"result.errmsg = \"objects not found in local.oplog.$main -- is this a new and empty db instance?\";\n" 
+"result.oplogMainRowCount = ol.count();\n" 
+"return result;\n" 
+"}\n" 
+"\n" 
+"var first = firstc.next();\n" 
+"var last = lastc.next();\n" 
+"{\n" 
+"var tfirst = first.ts;\n" 
+"var tlast = last.ts;\n" 
+"\n" 
+"if( tfirst && tlast ) {\n" 
+"tfirst = DB.tsToSeconds( tfirst );\n" 
+"tlast = DB.tsToSeconds( tlast );\n" 
+"result.timeDiff = tlast - tfirst;\n" 
+"result.timeDiffHours = Math.round(result.timeDiff / 36)/100;\n" 
+"result.tFirst = (new Date(tfirst*1000)).toString();\n" 
+"result.tLast  = (new Date(tlast*1000)).toString();\n" 
+"result.now = Date();\n" 
+"}\n" 
+"else {\n" 
+"result.errmsg = \"ts element not found in oplog objects\";\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"return result;\n" 
+"};\n" 
+"\n" 
+"DB.prototype.printReplicationInfo = function() {\n" 
+"var result = this.getReplicationInfo();\n" 
+"if( result.errmsg ) {\n" 
+"print(tojson(result));\n" 
+"return;\n" 
+"}\n" 
+"print(\"configured oplog size:   \" + result.logSizeMB + \"MB\");\n" 
+"print(\"log length start to end: \" + result.timeDiff + \"secs (\" + result.timeDiffHours + \"hrs)\");\n" 
+"print(\"oplog first event time:  \" + result.tFirst);\n" 
+"print(\"oplog last event time:   \" + result.tLast);\n" 
+"print(\"now:                     \" + result.now);\n" 
+"}\n" 
+"\n" 
+"DB.prototype.printSlaveReplicationInfo = function() {\n" 
+"function getReplLag(st) {\n" 
+"var now = new Date();\n" 
+"print(\"\\t syncedTo: \" + st.toString() );\n" 
+"var ago = (now-st)/1000;\n" 
+"var hrs = Math.round(ago/36)/100;\n" 
+"print(\"\\t\\t = \" + Math.round(ago) + \"secs ago (\" + hrs + \"hrs)\");\n" 
+"};\n" 
+"\n" 
+"function g(x) {\n" 
+"assert( x , \"how could this be null (printSlaveReplicationInfo gx)\" )\n" 
+"print(\"source:   \" + x.host);\n" 
+"if ( x.syncedTo ){\n" 
+"var st = new Date( DB.tsToSeconds( x.syncedTo ) * 1000 );\n" 
+"getReplLag(st);\n" 
+"}\n" 
+"else {\n" 
+"print( \"\\t doing initial sync\" );\n" 
+"}\n" 
+"};\n" 
+"\n" 
+"function r(x) {\n" 
+"assert( x , \"how could this be null (printSlaveReplicationInfo rx)\" );\n" 
+"if ( x.state == 1 ) {\n" 
+"return;\n" 
+"}\n" 
+"\n" 
+"print(\"source:   \" + x.name);\n" 
+"if ( x.optime ) {\n" 
+"getReplLag(x.optimeDate);\n" 
+"}\n" 
+"else {\n" 
+"print( \"\\t no replication info, yet.  State: \" + x.stateStr );\n" 
+"}\n" 
+"};\n" 
+"\n" 
+"var L = this.getSiblingDB(\"local\");\n" 
+"if( L.sources.count() != 0 ) {\n" 
+"L.sources.find().forEach(g);\n" 
+"}\n" 
+"else if (L.system.replset.count() != 0) {\n" 
+"var status = this.adminCommand({'replSetGetStatus' : 1});\n" 
+"status.members.forEach(r);\n" 
+"}\n" 
+"else {\n" 
+"print(\"local.sources is empty; is this db a --slave?\");\n" 
+"return;\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"DB.prototype.serverBuildInfo = function(){\n" 
+"return this._adminCommand( \"buildinfo\" );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.serverStatus = function(){\n" 
+"return this._adminCommand( \"serverStatus\" );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.serverCmdLineOpts = function(){\n" 
+"return this._adminCommand( \"getCmdLineOpts\" );\n" 
+"}\n" 
+"\n" 
+"DB.prototype.version = function(){\n" 
+"return this.serverBuildInfo().version;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.serverBits = function(){\n" 
+"return this.serverBuildInfo().bits;\n" 
+"}\n" 
+"\n" 
+"DB.prototype.listCommands = function(){\n" 
+"var x = this.runCommand( \"listCommands\" );\n" 
+"for ( var name in x.commands ){\n" 
+"var c = x.commands[name];\n" 
+"\n" 
+"var s = name + \": \";\n" 
+"\n" 
+"switch ( c.lockType ){\n" 
+"case -1: s += \"read-lock\"; break;\n" 
+"case  0: s += \"no-lock\"; break;\n" 
+"case  1: s += \"write-lock\"; break;\n" 
+"default: s += c.lockType;\n" 
+"}\n" 
+"\n" 
+"if (c.adminOnly) s += \" adminOnly \";\n" 
+"if (c.adminOnly) s += \" slaveOk \";\n" 
+"\n" 
+"s += \"\\n  \";\n" 
+"s += c.help.replace(/\\n/g, '\\n  ');\n" 
+"s += \"\\n\";\n" 
+"\n" 
+"print( s );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"DB.prototype.printShardingStatus = function(){\n" 
+"printShardingStatus( this.getSiblingDB( \"config\" ) );\n" 
+"}\n" 
+"\n" 
+"DB.autocomplete = function(obj){\n" 
+"var colls = obj.getCollectionNames();\n" 
+"var ret=[];\n" 
+"for (var i=0; i<colls.length; i++){\n" 
+"if (colls[i].match(/^[a-zA-Z0-9_.\\$]+$/))\n" 
+"ret.push(colls[i]);\n" 
+"}\n" 
+"return ret;\n" 
+"}\n" 
+;
+extern const JSFile db;
+const JSFile db = { "shell/db.js" , _jscode_raw_db };
+const StringData _jscode_raw_mongo = 
+"// mongo.js\n" 
+"\n" 
+"// NOTE 'Mongo' may be defined here or in MongoJS.cpp.  Add code to init, not to this constructor.\n" 
+"if ( typeof Mongo == \"undefined\" ){\n" 
+"Mongo = function( host ){\n" 
+"this.init( host );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"if ( ! Mongo.prototype ){\n" 
+"throw \"Mongo.prototype not defined\";\n" 
+"}\n" 
+"\n" 
+"if ( ! Mongo.prototype.find )\n" 
+"Mongo.prototype.find = function( ns , query , fields , limit , skip , batchSize , options ){ throw \"find not implemented\"; }\n" 
+"if ( ! Mongo.prototype.insert )\n" 
+"Mongo.prototype.insert = function( ns , obj ){ throw \"insert not implemented\"; }\n" 
+"if ( ! Mongo.prototype.remove )\n" 
+"Mongo.prototype.remove = function( ns , pattern ){ throw \"remove not implemented;\" }\n" 
+"if ( ! Mongo.prototype.update )\n" 
+"Mongo.prototype.update = function( ns , query , obj , upsert ){ throw \"update not implemented;\" }\n" 
+"\n" 
+"if ( typeof mongoInject == \"function\" ){\n" 
+"mongoInject( Mongo.prototype );\n" 
+"}\n" 
+"\n" 
+"Mongo.prototype.setSlaveOk = function() {\n" 
+"this.slaveOk = true;\n" 
+"}\n" 
+"\n" 
+"Mongo.prototype.getDB = function( name ){\n" 
+"return new DB( this , name );\n" 
+"}\n" 
+"\n" 
+"Mongo.prototype.getDBs = function(){\n" 
+"var res = this.getDB( \"admin\" ).runCommand( { \"listDatabases\" : 1 } );\n" 
+"if ( ! res.ok )\n" 
+"throw \"listDatabases failed:\" + tojson( res );\n" 
+"return res;\n" 
+"}\n" 
+"\n" 
+"Mongo.prototype.adminCommand = function( cmd ){\n" 
+"return this.getDB( \"admin\" ).runCommand( cmd );\n" 
+"}\n" 
+"\n" 
+"Mongo.prototype.getDBNames = function(){\n" 
+"return this.getDBs().databases.map(\n" 
+"function(z){\n" 
+"return z.name;\n" 
+"}\n" 
+");\n" 
+"}\n" 
+"\n" 
+"Mongo.prototype.getCollection = function(ns){\n" 
+"var idx = ns.indexOf( \".\" );\n" 
+"if ( idx < 0 )\n" 
+"throw \"need . in ns\";\n" 
+"var db = ns.substring( 0 , idx );\n" 
+"var c = ns.substring( idx + 1 );\n" 
+"return this.getDB( db ).getCollection( c );\n" 
+"}\n" 
+"\n" 
+"Mongo.prototype.toString = function(){\n" 
+"return \"connection to \" + this.host;\n" 
+"}\n" 
+"Mongo.prototype.tojson = Mongo.prototype.toString;\n" 
+"\n" 
+"connect = function( url , user , pass ){\n" 
+"chatty( \"connecting to: \" + url )\n" 
+"\n" 
+"if ( user && ! pass )\n" 
+"throw \"you specified a user and not a password.  either you need a password, or you're using the old connect api\";\n" 
+"\n" 
+"var idx = url.lastIndexOf( \"/\" );\n" 
+"\n" 
+"var db;\n" 
+"\n" 
+"if ( idx < 0 )\n" 
+"db = new Mongo().getDB( url );\n" 
+"else\n" 
+"db = new Mongo( url.substring( 0 , idx ) ).getDB( url.substring( idx + 1 ) );\n" 
+"\n" 
+"if ( user && pass ){\n" 
+"if ( ! db.auth( user , pass ) ){\n" 
+"throw \"couldn't login\";\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"return db;\n" 
+"}\n" 
+;
+extern const JSFile mongo;
+const JSFile mongo = { "shell/mongo.js" , _jscode_raw_mongo };
+const StringData _jscode_raw_mr = 
+"// mr.js\n" 
+"\n" 
+"MR = {};\n" 
+"\n" 
+"MR.init = function(){\n" 
+"$max = 0;\n" 
+"$arr = [];\n" 
+"emit = MR.emit;\n" 
+"$numEmits = 0;\n" 
+"$numReduces = 0;\n" 
+"$numReducesToDB = 0;\n" 
+"gc(); // this is just so that keep memory size sane\n" 
+"}\n" 
+"\n" 
+"MR.cleanup = function(){\n" 
+"MR.init();\n" 
+"gc();\n" 
+"}\n" 
+"\n" 
+"MR.emit = function(k,v){\n" 
+"$numEmits++;\n" 
+"var num = nativeHelper.apply( get_num_ , [ k ] );\n" 
+"var data = $arr[num];\n" 
+"if ( ! data ){\n" 
+"data = { key : k , values : new Array(1000) , count : 0 };\n" 
+"$arr[num] = data;\n" 
+"}\n" 
+"data.values[data.count++] = v;\n" 
+"$max = Math.max( $max , data.count );\n" 
+"}\n" 
+"\n" 
+"MR.doReduce = function( useDB ){\n" 
+"$numReduces++;\n" 
+"if ( useDB )\n" 
+"$numReducesToDB++;\n" 
+"$max = 0;\n" 
+"for ( var i=0; i<$arr.length; i++){\n" 
+"var data = $arr[i];\n" 
+"if ( ! data )\n" 
+"continue;\n" 
+"\n" 
+"if ( useDB ){\n" 
+"var x = tempcoll.findOne( { _id : data.key } );\n" 
+"if ( x ){\n" 
+"data.values[data.count++] = x.value;\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"var r = $reduce( data.key , data.values.slice( 0 , data.count ) );\n" 
+"if ( r && r.length && r[0] ){\n" 
+"data.values = r;\n" 
+"data.count = r.length;\n" 
+"}\n" 
+"else{\n" 
+"data.values[0] = r;\n" 
+"data.count = 1;\n" 
+"}\n" 
+"\n" 
+"$max = Math.max( $max , data.count );\n" 
+"\n" 
+"if ( useDB ){\n" 
+"if ( data.count == 1 ){\n" 
+"tempcoll.save( { _id : data.key , value : data.values[0] } );\n" 
+"}\n" 
+"else {\n" 
+"tempcoll.save( { _id : data.key , value : data.values.slice( 0 , data.count ) } );\n" 
+"}\n" 
+"}\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"MR.check = function(){\n" 
+"if ( $max < 2000 && $arr.length < 1000 ){\n" 
+"return 0;\n" 
+"}\n" 
+"MR.doReduce();\n" 
+"if ( $max < 2000 && $arr.length < 1000 ){\n" 
+"return 1;\n" 
+"}\n" 
+"MR.doReduce( true );\n" 
+"$arr = [];\n" 
+"$max = 0;\n" 
+"reset_num();\n" 
+"gc();\n" 
+"return 2;\n" 
+"}\n" 
+"\n" 
+"MR.finalize = function(){\n" 
+"tempcoll.find().forEach(\n" 
+"function(z){\n" 
+"z.value = $finalize( z._id , z.value );\n" 
+"tempcoll.save( z );\n" 
+"}\n" 
+");\n" 
+"}\n" 
+;
+extern const JSFile mr;
+const JSFile mr = { "shell/mr.js" , _jscode_raw_mr };
+const StringData _jscode_raw_query = 
+"// query.js\n" 
+"\n" 
+"if ( typeof DBQuery == \"undefined\" ){\n" 
+"DBQuery = function( mongo , db , collection , ns , query , fields , limit , skip , batchSize , options ){\n" 
+"\n" 
+"this._mongo = mongo; // 0\n" 
+"this._db = db; // 1\n" 
+"this._collection = collection; // 2\n" 
+"this._ns = ns; // 3\n" 
+"\n" 
+"this._query = query || {}; // 4\n" 
+"this._fields = fields; // 5\n" 
+"this._limit = limit || 0; // 6\n" 
+"this._skip = skip || 0; // 7\n" 
+"this._batchSize = batchSize || 0;\n" 
+"this._options = options || 0;\n" 
+"\n" 
+"this._cursor = null;\n" 
+"this._numReturned = 0;\n" 
+"this._special = false;\n" 
+"this._prettyShell = false;\n" 
+"}\n" 
+"print( \"DBQuery probably won't have array access \" );\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.help = function () {\n" 
+"print(\"find() modifiers\")\n" 
+"print(\"\\t.sort( {...} )\")\n" 
+"print(\"\\t.limit( n )\")\n" 
+"print(\"\\t.skip( n )\")\n" 
+"print(\"\\t.count() - total # of objects matching query, ignores skip,limit\")\n" 
+"print(\"\\t.size() - total # of objects cursor would return, honors skip,limit\")\n" 
+"print(\"\\t.explain([verbose])\")\n" 
+"print(\"\\t.hint(...)\")\n" 
+"print(\"\\t.showDiskLoc() - adds a $diskLoc field to each returned object\")\n" 
+"print(\"\\nCursor methods\");\n" 
+"print(\"\\t.forEach( func )\")\n" 
+"print(\"\\t.print() - output to console in full pretty format\")\n" 
+"print(\"\\t.map( func )\")\n" 
+"print(\"\\t.hasNext()\")\n" 
+"print(\"\\t.next()\")\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.clone = function(){\n" 
+"var q =  new DBQuery( this._mongo , this._db , this._collection , this._ns ,\n" 
+"this._query , this._fields ,\n" 
+"this._limit , this._skip , this._batchSize , this._options );\n" 
+"q._special = this._special;\n" 
+"return q;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype._ensureSpecial = function(){\n" 
+"if ( this._special )\n" 
+"return;\n" 
+"\n" 
+"var n = { query : this._query };\n" 
+"this._query = n;\n" 
+"this._special = true;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype._checkModify = function(){\n" 
+"if ( this._cursor )\n" 
+"throw \"query already executed\";\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype._exec = function(){\n" 
+"if ( ! this._cursor ){\n" 
+"assert.eq( 0 , this._numReturned );\n" 
+"this._cursor = this._mongo.find( this._ns , this._query , this._fields , this._limit , this._skip , this._batchSize , this._options );\n" 
+"this._cursorSeen = 0;\n" 
+"}\n" 
+"return this._cursor;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.limit = function( limit ){\n" 
+"this._checkModify();\n" 
+"this._limit = limit;\n" 
+"return this;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.batchSize = function( batchSize ){\n" 
+"this._checkModify();\n" 
+"this._batchSize = batchSize;\n" 
+"return this;\n" 
+"}\n" 
+"\n" 
+"\n" 
+"DBQuery.prototype.addOption = function( option ){\n" 
+"this._options |= option;\n" 
+"return this;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.skip = function( skip ){\n" 
+"this._checkModify();\n" 
+"this._skip = skip;\n" 
+"return this;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.hasNext = function(){\n" 
+"this._exec();\n" 
+"\n" 
+"if ( this._limit > 0 && this._cursorSeen >= this._limit )\n" 
+"return false;\n" 
+"var o = this._cursor.hasNext();\n" 
+"return o;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.next = function(){\n" 
+"this._exec();\n" 
+"\n" 
+"var o = this._cursor.hasNext();\n" 
+"if ( o )\n" 
+"this._cursorSeen++;\n" 
+"else\n" 
+"throw \"error hasNext: \" + o;\n" 
+"\n" 
+"var ret = this._cursor.next();\n" 
+"if ( ret.$err && this._numReturned == 0 && ! this.hasNext() )\n" 
+"throw \"error: \" + tojson( ret );\n" 
+"\n" 
+"this._numReturned++;\n" 
+"return ret;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.objsLeftInBatch = function(){\n" 
+"this._exec();\n" 
+"\n" 
+"var ret = this._cursor.objsLeftInBatch();\n" 
+"if ( ret.$err )\n" 
+"throw \"error: \" + tojson( ret );\n" 
+"\n" 
+"return ret;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.toArray = function(){\n" 
+"if ( this._arr )\n" 
+"return this._arr;\n" 
+"\n" 
+"var a = [];\n" 
+"while ( this.hasNext() )\n" 
+"a.push( this.next() );\n" 
+"this._arr = a;\n" 
+"return a;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.count = function( applySkipLimit ){\n" 
+"var cmd = { count: this._collection.getName() };\n" 
+"if ( this._query ){\n" 
+"if ( this._special )\n" 
+"cmd.query = this._query.query;\n" 
+"else\n" 
+"cmd.query = this._query;\n" 
+"}\n" 
+"cmd.fields = this._fields || {};\n" 
+"\n" 
+"if ( applySkipLimit ){\n" 
+"if ( this._limit )\n" 
+"cmd.limit = this._limit;\n" 
+"if ( this._skip )\n" 
+"cmd.skip = this._skip;\n" 
+"}\n" 
+"\n" 
+"var res = this._db.runCommand( cmd );\n" 
+"if( res && res.n != null ) return res.n;\n" 
+"throw \"count failed: \" + tojson( res );\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.size = function(){\n" 
+"return this.count( true );\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.countReturn = function(){\n" 
+"var c = this.count();\n" 
+"\n" 
+"if ( this._skip )\n" 
+"c = c - this._skip;\n" 
+"\n" 
+"if ( this._limit > 0 && this._limit < c )\n" 
+"return this._limit;\n" 
+"\n" 
+"return c;\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"* iterative count - only for testing\n" 
+"*/\n" 
+"DBQuery.prototype.itcount = function(){\n" 
+"var num = 0;\n" 
+"while ( this.hasNext() ){\n" 
+"num++;\n" 
+"this.next();\n" 
+"}\n" 
+"return num;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.length = function(){\n" 
+"return this.toArray().length;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype._addSpecial = function( name , value ){\n" 
+"this._ensureSpecial();\n" 
+"this._query[name] = value;\n" 
+"return this;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.sort = function( sortBy ){\n" 
+"return this._addSpecial( \"orderby\" , sortBy );\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.hint = function( hint ){\n" 
+"return this._addSpecial( \"$hint\" , hint );\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.min = function( min ) {\n" 
+"return this._addSpecial( \"$min\" , min );\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.max = function( max ) {\n" 
+"return this._addSpecial( \"$max\" , max );\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.showDiskLoc = function() {\n" 
+"return this._addSpecial( \"$showDiskLoc\" , true);\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.forEach = function( func ){\n" 
+"while ( this.hasNext() )\n" 
+"func( this.next() );\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.map = function( func ){\n" 
+"var a = [];\n" 
+"while ( this.hasNext() )\n" 
+"a.push( func( this.next() ) );\n" 
+"return a;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.arrayAccess = function( idx ){\n" 
+"return this.toArray()[idx];\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.explain = function (verbose) {\n" 
+"/* verbose=true --> include allPlans, oldPlan fields */\n" 
+"var n = this.clone();\n" 
+"n._ensureSpecial();\n" 
+"n._query.$explain = true;\n" 
+"n._limit = Math.abs(n._limit) * -1;\n" 
+"var e = n.next();\n" 
+"\n" 
+"function cleanup(obj){\n" 
+"if (typeof(obj) != 'object'){\n" 
+"return;\n" 
+"}\n" 
+"\n" 
+"delete obj.allPlans;\n" 
+"delete obj.oldPlan;\n" 
+"\n" 
+"if (typeof(obj.length) == 'number'){\n" 
+"for (var i=0; i < obj.length; i++){\n" 
+"cleanup(obj[i]);\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"if (obj.shards){\n" 
+"for (var key in obj.shards){\n" 
+"cleanup(obj.shards[key]);\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"if (obj.clauses){\n" 
+"cleanup(obj.clauses);\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"if (!verbose)\n" 
+"cleanup(e);\n" 
+"\n" 
+"return e;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.snapshot = function(){\n" 
+"this._ensureSpecial();\n" 
+"this._query.$snapshot = true;\n" 
+"return this;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.pretty = function(){\n" 
+"this._prettyShell = true;\n" 
+"return this;\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.shellPrint = function(){\n" 
+"try {\n" 
+"var n = 0;\n" 
+"while ( this.hasNext() && n < DBQuery.shellBatchSize ){\n" 
+"var s = this._prettyShell ? tojson( this.next() ) : tojson( this.next() , \"\" , true );\n" 
+"print( s );\n" 
+"n++;\n" 
+"}\n" 
+"if ( this.hasNext() ){\n" 
+"print( \"has more\" );\n" 
+"___it___  = this;\n" 
+"}\n" 
+"else {\n" 
+"___it___  = null;\n" 
+"}\n" 
+"}\n" 
+"catch ( e ){\n" 
+"print( e );\n" 
+"}\n" 
+"\n" 
+"}\n" 
+"\n" 
+"DBQuery.prototype.toString = function(){\n" 
+"return \"DBQuery: \" + this._ns + \" -> \" + tojson( this.query );\n" 
+"}\n" 
+"\n" 
+"DBQuery.shellBatchSize = 20;\n" 
+;
+extern const JSFile query;
+const JSFile query = { "shell/query.js" , _jscode_raw_query };
+const StringData _jscode_raw_collection = 
+"// @file collection.js - DBCollection support in the mongo shell\n" 
+"// db.colName is a DBCollection object\n" 
+"// or db[\"colName\"]\n" 
+"\n" 
+"if ( ( typeof  DBCollection ) == \"undefined\" ){\n" 
+"DBCollection = function( mongo , db , shortName , fullName ){\n" 
+"this._mongo = mongo;\n" 
+"this._db = db;\n" 
+"this._shortName = shortName;\n" 
+"this._fullName = fullName;\n" 
+"\n" 
+"this.verify();\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.verify = function(){\n" 
+"assert( this._fullName , \"no fullName\" );\n" 
+"assert( this._shortName , \"no shortName\" );\n" 
+"assert( this._db , \"no db\" );\n" 
+"\n" 
+"assert.eq( this._fullName , this._db._name + \".\" + this._shortName , \"name mismatch\" );\n" 
+"\n" 
+"assert( this._mongo , \"no mongo in DBCollection\" );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.getName = function(){\n" 
+"return this._shortName;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.help = function () {\n" 
+"var shortName = this.getName();\n" 
+"print(\"DBCollection help\");\n" 
+"print(\"\\tdb.\" + shortName + \".find().help() - show DBCursor help\");\n" 
+"print(\"\\tdb.\" + shortName + \".count()\");\n" 
+"print(\"\\tdb.\" + shortName + \".dataSize()\");\n" 
+"print(\"\\tdb.\" + shortName + \".distinct( key ) - eg. db.\" + shortName + \".distinct( 'x' )\");\n" 
+"print(\"\\tdb.\" + shortName + \".drop() drop the collection\");\n" 
+"print(\"\\tdb.\" + shortName + \".dropIndex(name)\");\n" 
+"print(\"\\tdb.\" + shortName + \".dropIndexes()\");\n" 
+"print(\"\\tdb.\" + shortName + \".ensureIndex(keypattern[,options]) - options is an object with these possible fields: name, unique, dropDups\");\n" 
+"print(\"\\tdb.\" + shortName + \".reIndex()\");\n" 
+"print(\"\\tdb.\" + shortName + \".find([query],[fields]) - query is an optional query filter. fields is optional set of fields to return.\");\n" 
+"print(\"\\t                                              e.g. db.\" + shortName + \".find( {x:77} , {name:1, x:1} )\");\n" 
+"print(\"\\tdb.\" + shortName + \".find(...).count()\");\n" 
+"print(\"\\tdb.\" + shortName + \".find(...).limit(n)\");\n" 
+"print(\"\\tdb.\" + shortName + \".find(...).skip(n)\");\n" 
+"print(\"\\tdb.\" + shortName + \".find(...).sort(...)\");\n" 
+"print(\"\\tdb.\" + shortName + \".findOne([query])\");\n" 
+"print(\"\\tdb.\" + shortName + \".findAndModify( { update : ... , remove : bool [, query: {}, sort: {}, 'new': false] } )\");\n" 
+"print(\"\\tdb.\" + shortName + \".getDB() get DB object associated with collection\");\n" 
+"print(\"\\tdb.\" + shortName + \".getIndexes()\");\n" 
+"print(\"\\tdb.\" + shortName + \".group( { key : ..., initial: ..., reduce : ...[, cond: ...] } )\");\n" 
+"print(\"\\tdb.\" + shortName + \".mapReduce( mapFunction , reduceFunction , <optional params> )\");\n" 
+"print(\"\\tdb.\" + shortName + \".remove(query)\");\n" 
+"print(\"\\tdb.\" + shortName + \".renameCollection( newName , <dropTarget> ) renames the collection.\");\n" 
+"print(\"\\tdb.\" + shortName + \".runCommand( name , <options> ) runs a db command with the given name where the first param is the collection name\");\n" 
+"print(\"\\tdb.\" + shortName + \".save(obj)\");\n" 
+"print(\"\\tdb.\" + shortName + \".stats()\");\n" 
+"print(\"\\tdb.\" + shortName + \".storageSize() - includes free space allocated to this collection\");\n" 
+"print(\"\\tdb.\" + shortName + \".totalIndexSize() - size in bytes of all the indexes\");\n" 
+"print(\"\\tdb.\" + shortName + \".totalSize() - storage allocated for all data and indexes\");\n" 
+"print(\"\\tdb.\" + shortName + \".update(query, object[, upsert_bool, multi_bool])\");\n" 
+"print(\"\\tdb.\" + shortName + \".validate() - SLOW\");\n" 
+"print(\"\\tdb.\" + shortName + \".getShardVersion() - only for use with sharding\");\n" 
+"return __magicNoPrint;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.getFullName = function(){\n" 
+"return this._fullName;\n" 
+"}\n" 
+"DBCollection.prototype.getMongo = function(){\n" 
+"return this._db.getMongo();\n" 
+"}\n" 
+"DBCollection.prototype.getDB = function(){\n" 
+"return this._db;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype._dbCommand = function( cmd , params ){\n" 
+"if ( typeof( cmd ) == \"object\" )\n" 
+"return this._db._dbCommand( cmd );\n" 
+"\n" 
+"var c = {};\n" 
+"c[cmd] = this.getName();\n" 
+"if ( params )\n" 
+"Object.extend( c , params );\n" 
+"return this._db._dbCommand( c );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.runCommand = DBCollection.prototype._dbCommand;\n" 
+"\n" 
+"DBCollection.prototype._massageObject = function( q ){\n" 
+"if ( ! q )\n" 
+"return {};\n" 
+"\n" 
+"var type = typeof q;\n" 
+"\n" 
+"if ( type == \"function\" )\n" 
+"return { $where : q };\n" 
+"\n" 
+"if ( q.isObjectId )\n" 
+"return { _id : q };\n" 
+"\n" 
+"if ( type == \"object\" )\n" 
+"return q;\n" 
+"\n" 
+"if ( type == \"string\" ){\n" 
+"if ( q.length == 24 )\n" 
+"return { _id : q };\n" 
+"\n" 
+"return { $where : q };\n" 
+"}\n" 
+"\n" 
+"throw \"don't know how to massage : \" + type;\n" 
+"\n" 
+"}\n" 
+"\n" 
+"\n" 
+"DBCollection.prototype._validateObject = function( o ){\n" 
+"if ( o._ensureSpecial && o._checkModify )\n" 
+"throw \"can't save a DBQuery object\";\n" 
+"}\n" 
+"\n" 
+"DBCollection._allowedFields = { $id : 1 , $ref : 1 };\n" 
+"\n" 
+"DBCollection.prototype._validateForStorage = function( o ){\n" 
+"this._validateObject( o );\n" 
+"for ( var k in o ){\n" 
+"if ( k.indexOf( \".\" ) >= 0 ) {\n" 
+"throw \"can't have . in field names [\" + k + \"]\" ;\n" 
+"}\n" 
+"\n" 
+"if ( k.indexOf( \"$\" ) == 0 && ! DBCollection._allowedFields[k] ) {\n" 
+"throw \"field names cannot start with $ [\" + k + \"]\";\n" 
+"}\n" 
+"\n" 
+"if ( o[k] !== null && typeof( o[k] ) === \"object\" ) {\n" 
+"this._validateForStorage( o[k] );\n" 
+"}\n" 
+"}\n" 
+"};\n" 
+"\n" 
+"\n" 
+"DBCollection.prototype.find = function( query , fields , limit , skip ){\n" 
+"return new DBQuery( this._mongo , this._db , this ,\n" 
+"this._fullName , this._massageObject( query ) , fields , limit , skip );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.findOne = function( query , fields ){\n" 
+"var cursor = this._mongo.find( this._fullName , this._massageObject( query ) || {} , fields ,\n" 
+"-1 /* limit */ , 0 /* skip*/, 0 /* batchSize */ , 0 /* options */ );\n" 
+"if ( ! cursor.hasNext() )\n" 
+"return null;\n" 
+"var ret = cursor.next();\n" 
+"if ( cursor.hasNext() ) throw \"findOne has more than 1 result!\";\n" 
+"if ( ret.$err )\n" 
+"throw \"error \" + tojson( ret );\n" 
+"return ret;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.insert = function( obj , _allow_dot ){\n" 
+"if ( ! obj )\n" 
+"throw \"no object passed to insert!\";\n" 
+"if ( ! _allow_dot ) {\n" 
+"this._validateForStorage( obj );\n" 
+"}\n" 
+"if ( typeof( obj._id ) == \"undefined\" ){\n" 
+"var tmp = obj; // don't want to modify input\n" 
+"obj = {_id: new ObjectId()};\n" 
+"for (var key in tmp){\n" 
+"obj[key] = tmp[key];\n" 
+"}\n" 
+"}\n" 
+"this._mongo.insert( this._fullName , obj );\n" 
+"this._lastID = obj._id;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.remove = function( t , justOne ){\n" 
+"for ( var k in t ){\n" 
+"if ( k == \"_id\" && typeof( t[k] ) == \"undefined\" ){\n" 
+"throw \"can't have _id set to undefined in a remove expression\"\n" 
+"}\n" 
+"}\n" 
+"this._mongo.remove( this._fullName , this._massageObject( t ) , justOne ? true : false );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.update = function( query , obj , upsert , multi ){\n" 
+"assert( query , \"need a query\" );\n" 
+"assert( obj , \"need an object\" );\n" 
+"\n" 
+"var firstKey = null;\n" 
+"for (var k in obj) { firstKey = k; break; }\n" 
+"\n" 
+"if (firstKey != null && firstKey[0] == '$') {\n" 
+"// for mods we only validate partially, for example keys may have dots\n" 
+"this._validateObject( obj );\n" 
+"} else {\n" 
+"// we're basically inserting a brand new object, do full validation\n" 
+"this._validateForStorage( obj );\n" 
+"}\n" 
+"this._mongo.update( this._fullName , query , obj , upsert ? true : false , multi ? true : false );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.save = function( obj ){\n" 
+"if ( obj == null || typeof( obj ) == \"undefined\" )\n" 
+"throw \"can't save a null\";\n" 
+"\n" 
+"if ( typeof( obj ) == \"number\" || typeof( obj) == \"string\" )\n" 
+"throw \"can't save a number or string\"\n" 
+"\n" 
+"if ( typeof( obj._id ) == \"undefined\" ){\n" 
+"obj._id = new ObjectId();\n" 
+"return this.insert( obj );\n" 
+"}\n" 
+"else {\n" 
+"return this.update( { _id : obj._id } , obj , true );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype._genIndexName = function( keys ){\n" 
+"var name = \"\";\n" 
+"for ( var k in keys ){\n" 
+"var v = keys[k];\n" 
+"if ( typeof v == \"function\" )\n" 
+"continue;\n" 
+"\n" 
+"if ( name.length > 0 )\n" 
+"name += \"_\";\n" 
+"name += k + \"_\";\n" 
+"\n" 
+"if ( typeof v == \"number\" )\n" 
+"name += v;\n" 
+"}\n" 
+"return name;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype._indexSpec = function( keys, options ) {\n" 
+"var ret = { ns : this._fullName , key : keys , name : this._genIndexName( keys ) };\n" 
+"\n" 
+"if ( ! options ){\n" 
+"}\n" 
+"else if ( typeof ( options ) == \"string\" )\n" 
+"ret.name = options;\n" 
+"else if ( typeof ( options ) == \"boolean\" )\n" 
+"ret.unique = true;\n" 
+"else if ( typeof ( options ) == \"object\" ){\n" 
+"if ( options.length ){\n" 
+"var nb = 0;\n" 
+"for ( var i=0; i<options.length; i++ ){\n" 
+"if ( typeof ( options[i] ) == \"string\" )\n" 
+"ret.name = options[i];\n" 
+"else if ( typeof( options[i] ) == \"boolean\" ){\n" 
+"if ( options[i] ){\n" 
+"if ( nb == 0 )\n" 
+"ret.unique = true;\n" 
+"if ( nb == 1 )\n" 
+"ret.dropDups = true;\n" 
+"}\n" 
+"nb++;\n" 
+"}\n" 
+"}\n" 
+"}\n" 
+"else {\n" 
+"Object.extend( ret , options );\n" 
+"}\n" 
+"}\n" 
+"else {\n" 
+"throw \"can't handle: \" + typeof( options );\n" 
+"}\n" 
+"/*\n" 
+"return ret;\n" 
+"\n" 
+"var name;\n" 
+"var nTrue = 0;\n" 
+"\n" 
+"if ( ! isObject( options ) ) {\n" 
+"options = [ options ];\n" 
+"}\n" 
+"\n" 
+"if ( options.length ){\n" 
+"for( var i = 0; i < options.length; ++i ) {\n" 
+"var o = options[ i ];\n" 
+"if ( isString( o ) ) {\n" 
+"ret.name = o;\n" 
+"} else if ( typeof( o ) == \"boolean\" ) {\n" 
+"if ( o ) {\n" 
+"++nTrue;\n" 
+"}\n" 
+"}\n" 
+"}\n" 
+"if ( nTrue > 0 ) {\n" 
+"ret.unique = true;\n" 
+"}\n" 
+"if ( nTrue > 1 ) {\n" 
+"ret.dropDups = true;\n" 
+"}\n" 
+"}\n" 
+"*/\n" 
+"return ret;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.createIndex = function( keys , options ){\n" 
+"var o = this._indexSpec( keys, options );\n" 
+"this._db.getCollection( \"system.indexes\" ).insert( o , true );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.ensureIndex = function( keys , options ){\n" 
+"var name = this._indexSpec( keys, options ).name;\n" 
+"this._indexCache = this._indexCache || {};\n" 
+"if ( this._indexCache[ name ] ){\n" 
+"return;\n" 
+"}\n" 
+"\n" 
+"this.createIndex( keys , options );\n" 
+"if ( this.getDB().getLastError() == \"\" ) {\n" 
+"this._indexCache[name] = true;\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.resetIndexCache = function(){\n" 
+"this._indexCache = {};\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.reIndex = function() {\n" 
+"return this._db.runCommand({ reIndex: this.getName() });\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.dropIndexes = function(){\n" 
+"this.resetIndexCache();\n" 
+"\n" 
+"var res = this._db.runCommand( { deleteIndexes: this.getName(), index: \"*\" } );\n" 
+"assert( res , \"no result from dropIndex result\" );\n" 
+"if ( res.ok )\n" 
+"return res;\n" 
+"\n" 
+"if ( res.errmsg.match( /not found/ ) )\n" 
+"return res;\n" 
+"\n" 
+"throw \"error dropping indexes : \" + tojson( res );\n" 
+"}\n" 
+"\n" 
+"\n" 
+"DBCollection.prototype.drop = function(){\n" 
+"if ( arguments.length > 0 )\n" 
+"throw \"drop takes no argument\";\n" 
+"this.resetIndexCache();\n" 
+"var ret = this._db.runCommand( { drop: this.getName() } );\n" 
+"if ( ! ret.ok ){\n" 
+"if ( ret.errmsg == \"ns not found\" )\n" 
+"return false;\n" 
+"throw \"drop failed: \" + tojson( ret );\n" 
+"}\n" 
+"return true;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.findAndModify = function(args){\n" 
+"var cmd = { findandmodify: this.getName() };\n" 
+"for (var key in args){\n" 
+"cmd[key] = args[key];\n" 
+"}\n" 
+"\n" 
+"var ret = this._db.runCommand( cmd );\n" 
+"if ( ! ret.ok ){\n" 
+"if (ret.errmsg == \"No matching object found\"){\n" 
+"return null;\n" 
+"}\n" 
+"throw \"findAndModifyFailed failed: \" + tojson( ret.errmsg );\n" 
+"}\n" 
+"return ret.value;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.renameCollection = function( newName , dropTarget ){\n" 
+"return this._db._adminCommand( { renameCollection : this._fullName ,\n" 
+"to : this._db._name + \".\" + newName ,\n" 
+"dropTarget : dropTarget } )\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.validate = function() {\n" 
+"var res = this._db.runCommand( { validate: this.getName() } );\n" 
+"\n" 
+"res.valid = false;\n" 
+"\n" 
+"var raw = res.result || res.raw;\n" 
+"\n" 
+"if ( raw ){\n" 
+"var str = \"-\" + tojson( raw );\n" 
+"res.valid = ! ( str.match( /exception/ ) || str.match( /corrupt/ ) );\n" 
+"\n" 
+"var p = /lastExtentSize:(\\d+)/;\n" 
+"var r = p.exec( str );\n" 
+"if ( r ){\n" 
+"res.lastExtentSize = Number( r[1] );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"return res;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.getShardVersion = function(){\n" 
+"return this._db._adminCommand( { getShardVersion : this._fullName } );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.getIndexes = function(){\n" 
+"return this.getDB().getCollection( \"system.indexes\" ).find( { ns : this.getFullName() } ).toArray();\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.getIndices = DBCollection.prototype.getIndexes;\n" 
+"DBCollection.prototype.getIndexSpecs = DBCollection.prototype.getIndexes;\n" 
+"\n" 
+"DBCollection.prototype.getIndexKeys = function(){\n" 
+"return this.getIndexes().map(\n" 
+"function(i){\n" 
+"return i.key;\n" 
+"}\n" 
+");\n" 
+"}\n" 
+"\n" 
+"\n" 
+"DBCollection.prototype.count = function( x ){\n" 
+"return this.find( x ).count();\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"*  Drop free lists. Normally not used.\n" 
+"*  Note this only does the collection itself, not the namespaces of its indexes (see cleanAll).\n" 
+"*/\n" 
+"DBCollection.prototype.clean = function() {\n" 
+"return this._dbCommand( { clean: this.getName() } );\n" 
+"}\n" 
+"\n" 
+"\n" 
+"\n" 
+"/**\n" 
+"* <p>Drop a specified index.</p>\n" 
+"*\n" 
+"* <p>\n" 
+"* Name is the name of the index in the system.indexes name field. (Run db.system.indexes.find() to\n" 
+"*  see example data.)\n" 
+"* </p>\n" 
+"*\n" 
+"* <p>Note :  alpha: space is not reclaimed </p>\n" 
+"* @param {String} name of index to delete.\n" 
+"* @return A result object.  result.ok will be true if successful.\n" 
+"*/\n" 
+"DBCollection.prototype.dropIndex =  function(index) {\n" 
+"assert(index , \"need to specify index to dropIndex\" );\n" 
+"\n" 
+"if ( ! isString( index ) && isObject( index ) )\n" 
+"index = this._genIndexName( index );\n" 
+"\n" 
+"var res = this._dbCommand( \"deleteIndexes\" ,{ index: index } );\n" 
+"this.resetIndexCache();\n" 
+"return res;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.copyTo = function( newName ){\n" 
+"return this.getDB().eval(\n" 
+"function( collName , newName ){\n" 
+"var from = db[collName];\n" 
+"var to = db[newName];\n" 
+"to.ensureIndex( { _id : 1 } );\n" 
+"var count = 0;\n" 
+"\n" 
+"var cursor = from.find();\n" 
+"while ( cursor.hasNext() ){\n" 
+"var o = cursor.next();\n" 
+"count++;\n" 
+"to.save( o );\n" 
+"}\n" 
+"\n" 
+"return count;\n" 
+"} , this.getName() , newName\n" 
+");\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.getCollection = function( subName ){\n" 
+"return this._db.getCollection( this._shortName + \".\" + subName );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.stats = function( scale ){\n" 
+"return this._db.runCommand( { collstats : this._shortName , scale : scale } );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.dataSize = function(){\n" 
+"return this.stats().size;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.storageSize = function(){\n" 
+"return this.stats().storageSize;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.totalIndexSize = function( verbose ){\n" 
+"var stats = this.stats();\n" 
+"if (verbose){\n" 
+"for (var ns in stats.indexSizes){\n" 
+"print( ns + \"\\t\" + stats.indexSizes[ns] );\n" 
+"}\n" 
+"}\n" 
+"return stats.totalIndexSize;\n" 
+"}\n" 
+"\n" 
+"\n" 
+"DBCollection.prototype.totalSize = function(){\n" 
+"var total = this.storageSize();\n" 
+"var mydb = this._db;\n" 
+"var shortName = this._shortName;\n" 
+"this.getIndexes().forEach(\n" 
+"function( spec ){\n" 
+"var coll = mydb.getCollection( shortName + \".$\" + spec.name );\n" 
+"var mysize = coll.storageSize();\n" 
+"//print( coll + \"\\t\" + mysize + \"\\t\" + tojson( coll.validate() ) );\n" 
+"total += coll.dataSize();\n" 
+"}\n" 
+");\n" 
+"return total;\n" 
+"}\n" 
+"\n" 
+"\n" 
+"DBCollection.prototype.convertToCapped = function( bytes ){\n" 
+"if ( ! bytes )\n" 
+"throw \"have to specify # of bytes\";\n" 
+"return this._dbCommand( { convertToCapped : this._shortName , size : bytes } )\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.exists = function(){\n" 
+"return this._db.system.namespaces.findOne( { name : this._fullName } );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.isCapped = function(){\n" 
+"var e = this.exists();\n" 
+"return ( e && e.options && e.options.capped ) ? true : false;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.distinct = function( keyString , query ){\n" 
+"var res = this._dbCommand( { distinct : this._shortName , key : keyString , query : query || {} } );\n" 
+"if ( ! res.ok )\n" 
+"throw \"distinct failed: \" + tojson( res );\n" 
+"return res.values;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.group = function( params ){\n" 
+"params.ns = this._shortName;\n" 
+"return this._db.group( params );\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.groupcmd = function( params ){\n" 
+"params.ns = this._shortName;\n" 
+"return this._db.groupcmd( params );\n" 
+"}\n" 
+"\n" 
+"MapReduceResult = function( db , o ){\n" 
+"Object.extend( this , o );\n" 
+"this._o = o;\n" 
+"this._keys = Object.keySet( o );\n" 
+"this._db = db;\n" 
+"if ( this.result != null ) {\n" 
+"this._coll = this._db.getCollection( this.result );\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"MapReduceResult.prototype._simpleKeys = function(){\n" 
+"return this._o;\n" 
+"}\n" 
+"\n" 
+"MapReduceResult.prototype.find = function(){\n" 
+"if ( this.results )\n" 
+"return this.results;\n" 
+"return DBCollection.prototype.find.apply( this._coll , arguments );\n" 
+"}\n" 
+"\n" 
+"MapReduceResult.prototype.drop = function(){\n" 
+"if ( this._coll ) {\n" 
+"return this._coll.drop();\n" 
+"}\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"* just for debugging really\n" 
+"*/\n" 
+"MapReduceResult.prototype.convertToSingleObject = function(){\n" 
+"var z = {};\n" 
+"this._coll.find().forEach( function(a){ z[a._id] = a.value; } );\n" 
+"return z;\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.convertToSingleObject = function(valueField){\n" 
+"var z = {};\n" 
+"this.find().forEach( function(a){ z[a._id] = a[valueField]; } );\n" 
+"return z;\n" 
+"}\n" 
+"\n" 
+"/**\n" 
+"* @param optional object of optional fields;\n" 
+"*/\n" 
+"DBCollection.prototype.mapReduce = function( map , reduce , optionsOrOutString ){\n" 
+"var c = { mapreduce : this._shortName , map : map , reduce : reduce };\n" 
+"assert( optionsOrOutString , \"need to an optionsOrOutString\" )\n" 
+"\n" 
+"if ( typeof( optionsOrOutString ) == \"string\" )\n" 
+"c[\"out\"] = optionsOrOutString;\n" 
+"else\n" 
+"Object.extend( c , optionsOrOutString );\n" 
+"\n" 
+"var raw = this._db.runCommand( c );\n" 
+"if ( ! raw.ok ){\n" 
+"__mrerror__ = raw;\n" 
+"throw \"map reduce failed:\" + tojson(raw);\n" 
+"}\n" 
+"return new MapReduceResult( this._db , raw );\n" 
+"\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.toString = function(){\n" 
+"return this.getFullName();\n" 
+"}\n" 
+"\n" 
+"DBCollection.prototype.toString = function(){\n" 
+"return this.getFullName();\n" 
+"}\n" 
+"\n" 
+"\n" 
+"DBCollection.prototype.tojson = DBCollection.prototype.toString;\n" 
+"\n" 
+"DBCollection.prototype.shellPrint = DBCollection.prototype.toString;\n" 
+"\n" 
+"DBCollection.autocomplete = function(obj){\n" 
+"var colls = DB.autocomplete(obj.getDB());\n" 
+"var ret = [];\n" 
+"for (var i=0; i<colls.length; i++){\n" 
+"var c = colls[i];\n" 
+"if (c.length <= obj.getName().length) continue;\n" 
+"if (c.slice(0,obj.getName().length+1) != obj.getName()+'.') continue;\n" 
+"\n" 
+"ret.push(c.slice(obj.getName().length+1));\n" 
+"}\n" 
+"return ret;\n" 
+"}\n" 
+;
+extern const JSFile collection;
+const JSFile collection = { "shell/collection.js" , _jscode_raw_collection };
+} // namespace JSFiles
+} // namespace mongo
diff --git a/shell/msvc/mongo.vcxproj b/shell/msvc/mongo.vcxproj
index b158b9e..af5927c 100644
--- a/shell/msvc/mongo.vcxproj
+++ b/shell/msvc/mongo.vcxproj
@@ -53,7 +53,7 @@
       <PrecompiledHeader>Use</PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>USE_READLINE;XP_WIN;PCRE_STATIC;HAVE_CONFIG_H;OLDJS;MONGO_EXPOSE_MACROS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>XP_WIN;PCRE_STATIC;HAVE_CONFIG_H;OLDJS;MONGO_EXPOSE_MACROS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>\boost\</AdditionalIncludeDirectories>
       <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
       <DisableSpecificWarnings>4355;4800;4267;4244;%(DisableSpecificWarnings)</DisableSpecificWarnings>
@@ -87,8 +87,10 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
+    <ClCompile Include="..\..\bson\oid.cpp" />
     <ClCompile Include="..\..\client\clientOnly.cpp" />
     <ClCompile Include="..\..\client\connpool.cpp" />
+    <ClCompile Include="..\..\client\dbclient_rs.cpp" />
     <ClCompile Include="..\..\client\syncclusterconnection.cpp" />
     <ClCompile Include="..\..\db\commands.cpp" />
     <ClCompile Include="..\..\db\lasterror.cpp" />
@@ -111,9 +113,9 @@
       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
     </ClCompile>
+    <ClCompile Include="..\..\scripting\bench.cpp" />
     <ClCompile Include="..\..\scripting\engine_spidermonkey.cpp" />
     <ClCompile Include="..\..\scripting\utils.cpp" />
-    <ClCompile Include="..\..\s\d_util.cpp" />
     <ClCompile Include="..\..\s\shardconnection.cpp" />
     <ClCompile Include="..\..\util\background.cpp" />
     <ClCompile Include="..\..\util\log.cpp" />
@@ -239,7 +241,6 @@
     <Library Include="..\..\..\js\js32r.lib">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
     </Library>
-    <Library Include="..\..\..\readline\lib\readline.lib" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\db\lasterror.h" />
diff --git a/shell/msvc/mongo.vcxproj.filters b/shell/msvc/mongo.vcxproj.filters
index 426a8b0..5d0a9a6 100644
--- a/shell/msvc/mongo.vcxproj.filters
+++ b/shell/msvc/mongo.vcxproj.filters
@@ -192,9 +192,6 @@
     <ClCompile Include="..\..\util\text.cpp">
       <Filter>shell</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\s\d_util.cpp">
-      <Filter>shared source files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\s\shardconnection.cpp">
       <Filter>shared source files</Filter>
     </ClCompile>
@@ -219,6 +216,15 @@
     <ClCompile Include="..\..\util\log.cpp">
       <Filter>shared source files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\scripting\bench.cpp">
+      <Filter>scripting</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\bson\oid.cpp">
+      <Filter>bson</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\client\dbclient_rs.cpp">
+      <Filter>client</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <None Include="..\..\SConstruct" />
@@ -247,7 +253,6 @@
   <ItemGroup>
     <Library Include="..\..\..\js\js32d.lib" />
     <Library Include="..\..\..\js\js32r.lib" />
-    <Library Include="..\..\..\readline\lib\readline.lib" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\db\lasterror.h">
diff --git a/shell/query.js b/shell/query.js
index ebd3a22..4044894 100644
--- a/shell/query.js
+++ b/shell/query.js
@@ -1,7 +1,7 @@
 // query.js
 
 if ( typeof DBQuery == "undefined" ){
-    DBQuery = function( mongo , db , collection , ns , query , fields , limit , skip , batchSize ){
+    DBQuery = function( mongo , db , collection , ns , query , fields , limit , skip , batchSize , options ){
         
         this._mongo = mongo; // 0
         this._db = db; // 1
@@ -13,6 +13,7 @@ if ( typeof DBQuery == "undefined" ){
         this._limit = limit || 0; // 6
         this._skip = skip || 0; // 7
         this._batchSize = batchSize || 0;
+        this._options = options || 0;
 
         this._cursor = null;
         this._numReturned = 0;
@@ -43,7 +44,7 @@ DBQuery.prototype.help = function () {
 DBQuery.prototype.clone = function(){
     var q =  new DBQuery( this._mongo , this._db , this._collection , this._ns , 
         this._query , this._fields , 
-        this._limit , this._skip , this._batchSize );
+        this._limit , this._skip , this._batchSize , this._options );
     q._special = this._special;
     return q;
 }
@@ -65,7 +66,7 @@ DBQuery.prototype._checkModify = function(){
 DBQuery.prototype._exec = function(){
     if ( ! this._cursor ){
         assert.eq( 0 , this._numReturned );
-        this._cursor = this._mongo.find( this._ns , this._query , this._fields , this._limit , this._skip , this._batchSize );
+        this._cursor = this._mongo.find( this._ns , this._query , this._fields , this._limit , this._skip , this._batchSize , this._options );
         this._cursorSeen = 0;
     }
     return this._cursor;
@@ -84,6 +85,11 @@ DBQuery.prototype.batchSize = function( batchSize ){
 }
 
 
+DBQuery.prototype.addOption = function( option ){
+    this._options |= option;
+    return this;
+}
+
 DBQuery.prototype.skip = function( skip ){
     this._checkModify();
     this._skip = skip;
@@ -238,26 +244,37 @@ DBQuery.prototype.explain = function (verbose) {
     var n = this.clone();
     n._ensureSpecial();
     n._query.$explain = true;
-    n._limit = n._limit * -1;
+    n._limit = Math.abs(n._limit) * -1;
     var e = n.next();
-    if (!verbose) {
-        delete e.allPlans;
-        delete e.oldPlan;
-        if (e.shards){
-            for (var key in e.shards){
-                var s = e.shards[key];
-                if(s.length === undefined){
-                    delete s.allPlans;
-                    delete s.oldPlan;
-                } else {
-                    for (var i=0; i < s.length; i++){
-                        delete s[i].allPlans;
-                        delete s[i].oldPlan;
-                    }
-                }
+
+    function cleanup(obj){
+        if (typeof(obj) != 'object'){
+            return;
+        }
+
+        delete obj.allPlans;
+        delete obj.oldPlan;
+
+        if (typeof(obj.length) == 'number'){
+            for (var i=0; i < obj.length; i++){
+                cleanup(obj[i]);
             }
         }
+
+        if (obj.shards){
+            for (var key in obj.shards){
+                cleanup(obj.shards[key]);
+            }
+        }
+
+        if (obj.clauses){
+            cleanup(obj.clauses);
+        }
     }
+
+    if (!verbose)
+        cleanup(e);
+
     return e;
 }
 
diff --git a/shell/servers.js b/shell/servers.js
index 1b58c81..f713ecc 100644
--- a/shell/servers.js
+++ b/shell/servers.js
@@ -29,6 +29,7 @@ createMongoArgs = function( binaryName , args ){
     if ( args.length == 1 && isObject( args[0] ) ){
         var o = args[0];
         for ( var k in o ){
+          if ( o.hasOwnProperty(k) ){
             if ( k == "v" && isNumber( o[k] ) ){
                 var n = o[k];
                 if ( n > 0 ){
@@ -43,6 +44,7 @@ createMongoArgs = function( binaryName , args ){
                 if ( o[k] != "" )
                     fullArgs.push( "" + o[k] );
             }
+          }
         }
     }
     else {
@@ -92,6 +94,7 @@ startMongodTest = function (port, dirname, restart, extraOptions ) {
 // Start a mongod instance and return a 'Mongo' object connected to it.
 // This function's arguments are passed as command line arguments to mongod.
 // The specified 'dbpath' is cleared if it exists, created if not.
+// var conn = startMongodEmpty("--port", 30000, "--dbpath", "asdf");
 startMongodEmpty = function () {
     var args = createMongoArgs("mongod", arguments);
 
@@ -132,7 +135,7 @@ startMongoProgram = function(){
         } catch( e ) {
         }
         return false;
-    }, "unable to connect to mongo program on port " + port, 60000 );
+    }, "unable to connect to mongo program on port " + port, 300 * 1000 );
 
     return m;
 }
@@ -176,8 +179,9 @@ ShardingTest = function( testName , numShards , verboseLevel , numMongos , other
         // start replica sets
         this._rs = []
         for ( var i=0; i<numShards; i++){
-            var rs = new ReplSetTest( { name : testName + "-rs" + i , nodes : 3 , startPort : 31100 + ( i * 100 ) } );
-            this._rs[i] = { test : rs , nodes : rs.startSet( { oplogSize:40 } ) , url : rs.getURL() };
+            var setName = testName + "-rs" + i;
+            var rs = new ReplSetTest( { name : setName , nodes : 3 , startPort : 31100 + ( i * 100 ) } );
+            this._rs[i] = { setName : setName , test : rs , nodes : rs.startSet( { oplogSize:40 } ) , url : rs.getURL() };
             rs.initiate();
             
         }
@@ -250,6 +254,13 @@ ShardingTest = function( testName , numShards , verboseLevel , numMongos , other
     }
 }
 
+ShardingTest.prototype.getRSEntry = function( setName ){
+    for ( var i=0; i<this._rs.length; i++ )
+        if ( this._rs[i].setName == setName )
+            return this._rs[i];
+    throw "can't find rs: " + setName;
+}
+
 ShardingTest.prototype.getDB = function( name ){
     return this.s.getDB( name );
 }
@@ -425,7 +436,7 @@ ShardingTest.prototype.printCollectionInfo = function( ns , msg ){
     print( out );
 }
 
-printShardingStatus = function( configDB ){
+printShardingStatus = function( configDB , verbose ){
     if (configDB === undefined)
         configDB = db.getSisterDB('config')
     
@@ -459,12 +470,25 @@ printShardingStatus = function( configDB ){
                     function( coll ){
                         if ( coll.dropped == false ){
                             output("\t\t" + coll._id + " chunks:");
-                            configDB.chunks.find( { "ns" : coll._id } ).sort( { min : 1 } ).forEach( 
-                                function(chunk){
-                                    output( "\t\t\t" + tojson( chunk.min ) + " -->> " + tojson( chunk.max ) + 
-                                            " on : " + chunk.shard + " " + tojson( chunk.lastmod ) );
-                                }
-                            );
+                            
+                            res = configDB.chunks.group( { cond : { ns : coll._id } , key : { shard : 1 }  , reduce : function( doc , out ){ out.nChunks++; } , initial : { nChunks : 0 } } );
+                            var totalChunks = 0;
+                            res.forEach( function(z){
+                                totalChunks += z.nChunks;
+                                output( "\t\t\t\t" + z.shard + "\t" + z.nChunks );
+                            } )
+                            
+                            if ( totalChunks < 20 || verbose ){
+                                configDB.chunks.find( { "ns" : coll._id } ).sort( { min : 1 } ).forEach( 
+                                    function(chunk){
+                                        output( "\t\t\t" + tojson( chunk.min ) + " -->> " + tojson( chunk.max ) + 
+                                                " on : " + chunk.shard + " " + tojson( chunk.lastmod ) );
+                                    }
+                                );
+                            }
+                            else {
+                                output( "\t\t\ttoo many chunksn to print, use verbose if you want to force print" );
+                            }
                         }
                     }
                 )
@@ -580,6 +604,20 @@ ShardingTest.prototype.chunkCounts = function( collName , dbName ){
 
 }
 
+ShardingTest.prototype.chunkDiff = function( collName , dbName ){
+    var c = this.chunkCounts( collName , dbName );
+    var min = 100000000;
+    var max = 0;
+    for ( var s in c ){
+        if ( c[s] < min )
+            min = c[s];
+        if ( c[s] > max )
+            max = c[s];
+    }
+    print( "input: " + tojson( c ) + " min: " + min + " max: " + max  );
+    return max - min;
+}
+
 ShardingTest.prototype.shardGo = function( collName , key , split , move , dbName ){
     split = split || key;
     move = move || split;
@@ -591,16 +629,34 @@ ShardingTest.prototype.shardGo = function( collName , key , split , move , dbNam
     s.adminCommand( { split : c , middle : split } );
     s.adminCommand( { movechunk : c , find : move , to : this.getOther( s.getServer( dbName ) ).name } );
     
-}
+};
 
-MongodRunner = function( port, dbpath, peer, arbiter, extraArgs ) {
+/**
+ * Run a mongod process.
+ *
+ * After initializing a MongodRunner, you must call start() on it.
+ * @param {int} port port to run db on, use allocatePorts(num) to requision
+ * @param {string} dbpath path to use
+ * @param {boolean} peer pass in false (DEPRECATED, was used for replica pair host)
+ * @param {boolean} arbiter pass in false (DEPRECATED, was used for replica pair host)
+ * @param {array} extraArgs other arguments for the command line
+ * @param {object} options other options include no_bind to not bind_ip to 127.0.0.1
+ *    (necessary for replica set testing)
+ */
+MongodRunner = function( port, dbpath, peer, arbiter, extraArgs, options ) {
     this.port_ = port;
     this.dbpath_ = dbpath;
     this.peer_ = peer;
     this.arbiter_ = arbiter;
     this.extraArgs_ = extraArgs;
-}
+    this.options_ = options ? options : {};
+};
 
+/**
+ * Start this mongod process.
+ *
+ * @param {boolean} reuseData If the data directory should be left intact (default is to wipe it)
+ */
 MongodRunner.prototype.start = function( reuseData ) {
     var args = [];
     if ( reuseData ) {
@@ -622,8 +678,10 @@ MongodRunner.prototype.start = function( reuseData ) {
     args.push( "--nohttpinterface" );
     args.push( "--noprealloc" );
     args.push( "--smallfiles" );
-    args.push( "--bind_ip" );
-    args.push( "127.0.0.1" );
+    if (!this.options_.no_bind) {
+      args.push( "--bind_ip" );
+      args.push( "127.0.0.1" );
+    }
     if ( this.extraArgs_ ) {
         args = args.concat( this.extraArgs_ );
     }
@@ -834,7 +892,7 @@ ToolTest.prototype.runTool = function(){
         a.push( "127.0.0.1:" + this.port );
     }
 
-    runMongoProgram.apply( null , a );
+    return runMongoProgram.apply( null , a );
 }
 
 
@@ -974,8 +1032,15 @@ SyncCCTest.prototype.tempStart = function( num ){
 }
 
 
-function startParallelShell( jsCode ){
-    var x = startMongoProgramNoConnect( "mongo" , "--eval" , jsCode , db ? db.getMongo().host : null );
+function startParallelShell( jsCode, port ){
+    assert( jsCode.indexOf( '"' ) == -1,
+           "double quotes should not be used in jsCode because the windows shell will stip them out" );
+    var x;
+    if ( port ) {
+        x = startMongoProgramNoConnect( "mongo" , "--port" , port , "--eval" , jsCode );
+    } else {
+        x = startMongoProgramNoConnect( "mongo" , "--eval" , jsCode , db ? db.getMongo().host : null );        
+    }
     return function(){
         waitProgram( x );
     };
@@ -990,14 +1055,12 @@ function skipIfTestingReplication(){
     }
 }
 
-// ReplSetTest
 ReplSetTest = function( opts ){
     this.name  = opts.name || "testReplSet";
     this.host  = opts.host || getHostName();
     this.numNodes = opts.nodes || 0;
     this.oplogSize = opts.oplogSize || 2;
     this.useSeedList = opts.useSeedList || false;
-
     this.bridged = opts.bridged || false;
     this.ports = [];
 
@@ -1316,7 +1379,12 @@ ReplSetTest.prototype.awaitReplication = function() {
                var entry = log.find({}).sort({'$natural': -1}).limit(1).next();
                printjson( entry );
                var ts = entry['ts'];
-               print("TS for " + slave + " is " + ts.t + " and latest is " + latest.t);
+               print("TS for " + slave + " is " + ts.t+":"+ts.i + " and latest is " + latest.t+":"+latest.i);
+               
+               if (latest.t < ts.t || (latest.t == ts.t && latest.i < ts.i)) {
+                   latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts'];
+               }
+               
                print("Oplog size for " + slave + " is " + log.count());
                synced = (synced && friendlyEqual(latest,ts))
              }
diff --git a/shell/shell_utils.cpp b/shell/shell_utils.cpp
index b6a67e2..7a62030 100644
--- a/shell/shell_utils.cpp
+++ b/shell/shell_utils.cpp
@@ -40,47 +40,53 @@
 # include <sys/wait.h>
 #endif
 
+#include "utils.h"
 #include "../client/dbclient.h"
+#include "../util/md5.hpp"
 #include "../util/processinfo.h"
-#include "utils.h"
 #include "../util/text.h"
-#include "../util/md5.hpp"
-
-extern const char * jsconcatcode_server;
+#include "../util/heapcheck.h"
+#include "../util/time_support.h"
+#include "../util/file.h"
 
 namespace mongo {
+
+    DBClientWithCommands *latestConn = 0;
+    extern bool dbexitCalled;
+
 #ifdef _WIN32
     inline int close(int fd) { return _close(fd); }
     inline int read(int fd, void* buf, size_t size) { return _read(fd, buf, size); }
     inline int pipe(int fds[2]) { return _pipe(fds, 4096, _O_TEXT | _O_NOINHERIT); }
 #endif
 
+    // these functions have not been audited for thread safety - currently they are called with an exclusive js mutex
     namespace shellUtils {
 
         Scope* theScope = 0;
-        
+
         std::string _dbConnect;
         std::string _dbAuth;
-                
+
         const char *argv0 = 0;
         void RecordMyLocation( const char *_argv0 ) { argv0 = _argv0; }
-        
+
         // helpers
-        
+
         BSONObj makeUndefined() {
             BSONObjBuilder b;
             b.appendUndefined( "" );
             return b.obj();
         }
         const BSONObj undefined_ = makeUndefined();
-        
+
         BSONObj encapsulate( const BSONObj &obj ) {
             return BSON( "" << obj );
         }
-        
+
         // real methods
 
-        mongo::BSONObj JSSleep(const mongo::BSONObj &args){
+        mongo::BSONObj JSSleep(const mongo::BSONObj &args) {
             assert( args.nFields() == 1 );
             assert( args.firstElement().isNumber() );
             int ms = int( args.firstElement().number() );
@@ -101,52 +107,52 @@ namespace mongo {
             return undefined_;
         }
 
-        BSONObj JSGetMemInfo( const BSONObj& args ){
+        BSONObj JSGetMemInfo( const BSONObj& args ) {
             ProcessInfo pi;
             uassert( 10258 ,  "processinfo not supported" , pi.supported() );
-            
+
             BSONObjBuilder e;
             e.append( "virtual" , pi.getVirtualMemorySize() );
             e.append( "resident" , pi.getResidentSize() );
-            
+
             BSONObjBuilder b;
             b.append( "ret" , e.obj() );
-            
+
             return b.obj();
         }
 
 
 #ifndef MONGO_SAFE_SHELL
 
-        BSONObj listFiles(const BSONObj& _args){
+        BSONObj listFiles(const BSONObj& _args) {
             static BSONObj cd = BSON( "0" << "." );
             BSONObj args = _args.isEmpty() ? cd : _args;
 
             uassert( 10257 ,  "need to specify 1 argument to listFiles" , args.nFields() == 1 );
-            
+
             BSONObjBuilder lst;
-            
+
             string rootname = args.firstElement().valuestrsafe();
             path root( rootname );
             stringstream ss;
             ss << "listFiles: no such directory: " << rootname;
             string msg = ss.str();
             uassert( 12581, msg.c_str(), boost::filesystem::exists( root ) );
-            
+
             directory_iterator end;
             directory_iterator i( root);
-            
+
             int num =0;
-            while ( i != end ){
+            while ( i != end ) {
                 path p = *i;
                 BSONObjBuilder b;
                 b << "name" << p.string();
                 b.appendBool( "isDirectory", is_directory( p ) );
-                if ( ! is_directory( p ) ){
-                    try { 
+                if ( ! is_directory( p ) ) {
+                    try {
                         b.append( "size" , (double)file_size( p ) );
                     }
-                    catch ( ... ){
+                    catch ( ... ) {
                         i++;
                         continue;
                     }
@@ -159,16 +165,16 @@ namespace mongo {
                 num++;
                 i++;
             }
-            
+
             BSONObjBuilder ret;
             ret.appendArray( "", lst.done() );
             return ret.obj();
         }
 
-        BSONObj ls(const BSONObj& args) { 
+        BSONObj ls(const BSONObj& args) {
             BSONObj o = listFiles(args);
             if( !o.isEmpty() ) {
-                for( BSONObj::iterator i = o.firstElement().Obj().begin(); i.more(); ) { 
+                for( BSONObj::iterator i = o.firstElement().Obj().begin(); i.more(); ) {
                     BSONObj f = i.next().Obj();
                     cout << f["name"].String();
                     if( f["isDirectory"].trueValue() ) cout << '/';
@@ -179,38 +185,38 @@ namespace mongo {
             return BSONObj();
         }
 
-        BSONObj cd(const BSONObj& args) { 
+        BSONObj cd(const BSONObj& args) {
 #if defined(_WIN32)
             std::wstring dir = toWideString( args.firstElement().String().c_str() );
             if( SetCurrentDirectory(dir.c_str()) )
                 return BSONObj();
 #else
             string dir = args.firstElement().String();
-/*            if( chdir(dir.c_str) ) == 0 )
-                return BSONObj();
-                */
+            /*            if( chdir(dir.c_str) ) == 0 )
+                            return BSONObj();
+                            */
             if( 1 ) return BSON(""<<"implementation not done for posix");
 #endif
             return BSON( "" << "change directory failed" );
         }
 
-        BSONObj pwd(const BSONObj&) { 
+        BSONObj pwd(const BSONObj&) {
             boost::filesystem::path p = boost::filesystem::current_path();
             return BSON( "" << p.string() );
         }
 
-        BSONObj hostname(const BSONObj&) { 
+        BSONObj hostname(const BSONObj&) {
             return BSON( "" << getHostName() );
         }
 
-        static BSONElement oneArg(const BSONObj& args) { 
+        static BSONElement oneArg(const BSONObj& args) {
             uassert( 12597 , "need to specify 1 argument" , args.nFields() == 1 );
             return args.firstElement();
         }
 
         const int CANT_OPEN_FILE = 13300;
 
-        BSONObj cat(const BSONObj& args){
+        BSONObj cat(const BSONObj& args) {
             BSONElement e = oneArg(args);
             stringstream ss;
             ifstream f(e.valuestrsafe());
@@ -229,7 +235,7 @@ namespace mongo {
             return BSON( "" << ss.str() );
         }
 
-        BSONObj md5sumFile(const BSONObj& args){
+        BSONObj md5sumFile(const BSONObj& args) {
             BSONElement e = oneArg(args);
             stringstream ss;
             FILE* f = fopen(e.valuestrsafe(), "rb");
@@ -250,17 +256,17 @@ namespace mongo {
             return BSON( "" << digestToString( d ) );
         }
 
-        BSONObj mkdir(const BSONObj& args){
+        BSONObj mkdir(const BSONObj& args) {
             boost::filesystem::create_directories(args.firstElement().String());
             return BSON( "" << true );
         }
 
-        BSONObj removeFile(const BSONObj& args){
+        BSONObj removeFile(const BSONObj& args) {
             BSONElement e = oneArg(args);
             bool found = false;
-            
+
             path root( args.firstElement().valuestrsafe() );
-            if ( boost::filesystem::exists( root ) ){
+            if ( boost::filesystem::exists( root ) ) {
                 found = true;
                 boost::filesystem::remove_all( root );
             }
@@ -269,23 +275,43 @@ namespace mongo {
             b.appendBool( "removed" , found );
             return b.obj();
         }
+
+        /**
+         * @param args - [ name, byte index ]
+         * In this initial implementation, all bits in the specified byte are flipped.
+         */
+        BSONObj fuzzFile(const BSONObj& args) {
+            uassert( 13619, "fuzzFile takes 2 arguments", args.nFields() == 2 );
+            shared_ptr< File > f( new File() );
+            f->open( args.getStringField( "0" ) );
+            uassert( 13620, "couldn't open file to fuzz", !f->bad() && f->is_open() );
+
+            char c;
+            f->read( args.getIntField( "1" ), &c, 1 );
+            c = ~c;
+            f->write( args.getIntField( "1" ), &c, 1 );
+
+            return undefined_;
+            // f close is implicit
+        }
+
         map< int, pair< pid_t, int > > dbs;
         map< pid_t, int > shells;
 #ifdef _WIN32
         map< pid_t, HANDLE > handles;
 #endif
-        
+
         mongo::mutex mongoProgramOutputMutex("mongoProgramOutputMutex");
         stringstream mongoProgramOutput_;
 
-        void goingAwaySoon() { 
+        void goingAwaySoon() {
             mongo::mutex::scoped_lock lk( mongoProgramOutputMutex );
-            mongo::goingAway = true;
+            mongo::dbexitCalled = true;
         }
 
         void writeMongoProgramOutputLine( int port, int pid, const char *line ) {
             mongo::mutex::scoped_lock lk( mongoProgramOutputMutex );
-            if( mongo::goingAway ) throw "program is terminating";
+            if( mongo::dbexitCalled ) throw "program is terminating";
             stringstream buf;
             if ( port > 0 )
                 buf << "m" << port << "| " << line;
@@ -294,7 +320,7 @@ namespace mongo {
             cout << buf.str() << endl;
             mongoProgramOutput_ << buf.str() << endl;
         }
-        
+
         // only returns last 100000 characters
         BSONObj RawMongoProgramOutput( const BSONObj &args ) {
             mongo::mutex::scoped_lock lk( mongoProgramOutputMutex );
@@ -310,7 +336,7 @@ namespace mongo {
             mongoProgramOutput_.str( "" );
             return undefined_;
         }
-        
+
         class ProgramRunner {
             vector<string> argv_;
             int port_;
@@ -320,13 +346,13 @@ namespace mongo {
             pid_t pid() const { return pid_; }
             int port() const { return port_; }
 
-            boost::filesystem::path find(string prog) { 
+            boost::filesystem::path find(string prog) {
                 boost::filesystem::path p = prog;
 #ifdef _WIN32
                 p = change_extension(p, ".exe");
 #endif
 
-                if( boost::filesystem::exists(p) ){
+                if( boost::filesystem::exists(p) ) {
 #ifndef _WIN32
                     p = boost::filesystem::initial_path() / p;
 #endif
@@ -345,23 +371,23 @@ namespace mongo {
                             if( boost::filesystem::exists(t) ) return t;
                         }
                     }
-                } catch(...) { }
+                }
+                catch(...) { }
                 {
                     boost::filesystem::path t = boost::filesystem::initial_path() / p;
                     if( boost::filesystem::exists(t)  ) return t;
                 }
                 return p; // not found; might find via system path
-            } 
+            }
 
-            ProgramRunner( const BSONObj &args , bool isMongoProgram=true)
-            {
+            ProgramRunner( const BSONObj &args , bool isMongoProgram=true) {
                 assert( !args.isEmpty() );
 
                 string program( args.firstElement().valuestrsafe() );
                 assert( !program.empty() );
                 boost::filesystem::path programPath = find(program);
 
-                if (isMongoProgram){
+                if (isMongoProgram) {
 #if 0
                     if (program == "mongos") {
                         argv_.push_back("valgrind");
@@ -375,9 +401,9 @@ namespace mongo {
                 }
 
                 argv_.push_back( programPath.native_file_string() );
-                
+
                 port_ = -1;
-                
+
                 BSONObjIterator j( args );
                 j.next(); // skip program name (handled above)
                 while(j.more()) {
@@ -387,7 +413,8 @@ namespace mongo {
                         stringstream ss;
                         ss << e.number();
                         str = ss.str();
-                    } else {
+                    }
+                    else {
                         assert( e.type() == mongo::String );
                         str = e.valuestr();
                     }
@@ -397,7 +424,7 @@ namespace mongo {
                         port_ = strtol( str.c_str(), 0, 10 );
                     argv_.push_back(str);
                 }
-                
+
                 if ( program != "mongod" && program != "mongos" && program != "mongobridge" )
                     port_ = 0;
                 else {
@@ -405,19 +432,19 @@ namespace mongo {
                         cout << "error: a port number is expected when running mongod (etc.) from the shell" << endl;
                     assert( port_ > 0 );
                 }
-                if ( port_ > 0 && dbs.count( port_ ) != 0 ){
+                if ( port_ > 0 && dbs.count( port_ ) != 0 ) {
                     cerr << "count for port: " << port_ << " is not 0 is: " << dbs.count( port_ ) << endl;
-                    assert( dbs.count( port_ ) == 0 );        
+                    assert( dbs.count( port_ ) == 0 );
                 }
             }
-            
+
             void start() {
                 int pipeEnds[ 2 ];
                 assert( pipe( pipeEnds ) != -1 );
-                
+
                 fflush( 0 );
                 launch_process(pipeEnds[1]); //sets pid_
-                
+
                 {
                     stringstream ss;
                     ss << "shell: started program";
@@ -433,52 +460,54 @@ namespace mongo {
                     shells.insert( make_pair( pid_, pipeEnds[ 1 ] ) );
                 pipe_ = pipeEnds[ 0 ];
             }
-            
+
             // Continue reading output
             void operator()() {
                 try {
-                // This assumes there aren't any 0's in the mongo program output.
-                // Hope that's ok.
-                const unsigned bufSize = 64000;
-                char buf[ bufSize ];
-                char temp[ bufSize ];
-                char *start = buf;
-                while( 1 ) {
-                    int lenToRead = ( bufSize - 1 ) - ( start - buf );
-                    assert( lenToRead > 0 );
-                    int ret = read( pipe_, (void *)start, lenToRead );
-                    if( mongo::goingAway )
-                        break;
-                    assert( ret != -1 );
-                    start[ ret ] = '\0';
-                    if ( strlen( start ) != unsigned( ret ) )
-                        writeMongoProgramOutputLine( port_, pid_, "WARNING: mongod wrote null bytes to output" );
-                    char *last = buf;
-                    for( char *i = strchr( buf, '\n' ); i; last = i + 1, i = strchr( last, '\n' ) ) {
-                        *i = '\0';
-                        writeMongoProgramOutputLine( port_, pid_, last );
-                    }
-                    if ( ret == 0 ) {
-                        if ( *last )
+                    // This assumes there aren't any 0's in the mongo program output.
+                    // Hope that's ok.
+                    const unsigned bufSize = 64000;
+                    char buf[ bufSize ];
+                    char temp[ bufSize ];
+                    char *start = buf;
+                    while( 1 ) {
+                        int lenToRead = ( bufSize - 1 ) - ( start - buf );
+                        assert( lenToRead > 0 );
+                        int ret = read( pipe_, (void *)start, lenToRead );
+                        if( mongo::dbexitCalled )
+                            break;
+                        assert( ret != -1 );
+                        start[ ret ] = '\0';
+                        if ( strlen( start ) != unsigned( ret ) )
+                            writeMongoProgramOutputLine( port_, pid_, "WARNING: mongod wrote null bytes to output" );
+                        char *last = buf;
+                        for( char *i = strchr( buf, '\n' ); i; last = i + 1, i = strchr( last, '\n' ) ) {
+                            *i = '\0';
                             writeMongoProgramOutputLine( port_, pid_, last );
-                        close( pipe_ );
-                        break;
-                    }
-                    if ( last != buf ) {
-                        strcpy( temp, last );
-                        strcpy( buf, temp );
-                    } else {
-                        assert( strlen( buf ) < bufSize );
+                        }
+                        if ( ret == 0 ) {
+                            if ( *last )
+                                writeMongoProgramOutputLine( port_, pid_, last );
+                            close( pipe_ );
+                            break;
+                        }
+                        if ( last != buf ) {
+                            strcpy( temp, last );
+                            strcpy( buf, temp );
+                        }
+                        else {
+                            assert( strlen( buf ) < bufSize );
+                        }
+                        start = buf + strlen( buf );
                     }
-                    start = buf + strlen( buf );
-                }    
-                } catch(...) { 
+                }
+                catch(...) {
                 }
             }
-            void launch_process(int child_stdout){
+            void launch_process(int child_stdout) {
 #ifdef _WIN32
                 stringstream ss;
-                for( unsigned i=0; i < argv_.size(); i++ ){
+                for( unsigned i=0; i < argv_.size(); i++ ) {
                     if (i) ss << ' ';
                     if (argv_[i].find(' ') == string::npos)
                         ss << argv_[i];
@@ -487,7 +516,7 @@ namespace mongo {
                 }
 
                 string args = ss.str();
-                
+
                 boost::scoped_array<TCHAR> args_tchar (new TCHAR[args.size() + 1]);
                 size_t i;
                 for(i=0; i < args.size(); i++)
@@ -519,28 +548,39 @@ namespace mongo {
 
                 pid_ = pi.dwProcessId;
                 handles.insert( make_pair( pid_, pi.hProcess ) );
-                
+
 #else
 
                 pid_ = fork();
                 assert( pid_ != -1 );
-                
+
                 if ( pid_ == 0 ) {
                     // DON'T ASSERT IN THIS BLOCK - very bad things will happen
 
                     const char** argv = new const char* [argv_.size()+1]; // don't need to free - in child
-                    for (unsigned i=0; i < argv_.size(); i++){
+                    for (unsigned i=0; i < argv_.size(); i++) {
                         argv[i] = argv_[i].c_str();
                     }
                     argv[argv_.size()] = 0;
-                    
+
                     if ( dup2( child_stdout, STDOUT_FILENO ) == -1 ||
-                         dup2( child_stdout, STDERR_FILENO ) == -1 )
-                    {
+                            dup2( child_stdout, STDERR_FILENO ) == -1 ) {
                         cout << "Unable to dup2 child output: " << errnoWithDescription() << endl;
                         ::_Exit(-1); //do not pass go, do not call atexit handlers
                     }
 
+                    const char** env = new const char* [2]; // don't need to free - in child
+                    env[0] = NULL;
+#if defined(HEAP_CHECKING)
+                    env[0] = "HEAPCHECK=normal";
+                    env[1] = NULL;
+
+                    // Heap-check for mongos only. 'argv[0]' must be in the path format.
+                    if ( argv_[0].find("mongos") != string::npos) {
+                        execvpe( argv[ 0 ], const_cast<char**>(argv) , const_cast<char**>(env) );
+                    }
+#endif // HEAP_CHECKING
+
                     execvp( argv[ 0 ], const_cast<char**>(argv) );
 
                     cout << "Unable to start program " << argv[0] << ' ' << errnoWithDescription() << endl;
@@ -550,9 +590,9 @@ namespace mongo {
 #endif
             }
         };
-        
+
         //returns true if process exited
-        bool wait_for_pid(pid_t pid, bool block=true, int* exit_code=NULL){
+        bool wait_for_pid(pid_t pid, bool block=true, int* exit_code=NULL) {
 #ifdef _WIN32
             assert(handles.count(pid));
             HANDLE h = handles[pid];
@@ -561,13 +601,17 @@ namespace mongo {
                 WaitForSingleObject(h, INFINITE);
 
             DWORD tmp;
-            if(GetExitCodeProcess(h, &tmp)){
+            if(GetExitCodeProcess(h, &tmp)) {
+                if ( tmp == STILL_ACTIVE ) {
+                    return false;
+                }
                 CloseHandle(h);
                 handles.erase(pid);
                 if (exit_code)
                     *exit_code = tmp;
                 return true;
-            }else{
+            }
+            else {
                 return false;
             }
 #else
@@ -576,17 +620,28 @@ namespace mongo {
             if (exit_code)
                 *exit_code = WEXITSTATUS(tmp);
             return ret;
-                
+
 #endif
         }
 
-        BSONObj WaitProgram( const BSONObj& a ){
-            int pid = a.firstElement().numberInt();
+        BSONObj WaitProgram( const BSONObj& a ) {
+            int pid = oneArg( a ).numberInt();
             BSONObj x = BSON( "" << wait_for_pid( pid ) );
             shells.erase( pid );
             return x;
         }
 
+        BSONObj WaitMongoProgramOnPort( const BSONObj &a ) {
+            int port = oneArg( a ).numberInt();
+            uassert( 13621, "no known mongo program on port", dbs.count( port ) != 0 );
+            log() << "waiting port: " << port << ", pid: " << dbs[ port ].first << endl;
+            bool ret = wait_for_pid( dbs[ port ].first );
+            if ( ret ) {
+                dbs.erase( port );
+            }
+            return BSON( "" << ret );
+        }
+
         BSONObj StartMongoProgram( const BSONObj &a ) {
             _nokillop = true;
             ProgramRunner r( a );
@@ -603,7 +658,8 @@ namespace mongo {
             wait_for_pid( r.pid(), true, &exit_code );
             if ( r.port() > 0 ) {
                 dbs.erase( r.port() );
-            } else {
+            }
+            else {
                 shells.erase( r.pid() );
             }
             return BSON( string( "" ) << exit_code );
@@ -625,10 +681,10 @@ namespace mongo {
             assert( !path.empty() );
             if ( boost::filesystem::exists( path ) )
                 boost::filesystem::remove_all( path );
-            boost::filesystem::create_directory( path );    
+            boost::filesystem::create_directory( path );
             return undefined_;
         }
-        
+
         void copyDir( const path &from, const path &to ) {
             directory_iterator end;
             directory_iterator i( from );
@@ -639,14 +695,15 @@ namespace mongo {
                         path newDir = to / p.leaf();
                         boost::filesystem::create_directory( newDir );
                         copyDir( p, newDir );
-                    } else {
+                    }
+                    else {
                         boost::filesystem::copy_file( p, to / p.leaf() );
                     }
                 }
                 ++i;
-            }            
+            }
         }
-        
+
         // NOTE target dbpath will be cleared first
         BSONObj CopyDbpath( const BSONObj &a ) {
             assert( a.nFields() == 2 );
@@ -662,24 +719,26 @@ namespace mongo {
             return undefined_;
         }
 
-        inline void kill_wrapper(pid_t pid, int sig, int port){
+        inline void kill_wrapper(pid_t pid, int sig, int port) {
 #ifdef _WIN32
-            if (sig == SIGKILL || port == 0){
+            if (sig == SIGKILL || port == 0) {
                 assert( handles.count(pid) );
                 TerminateProcess(handles[pid], 1); // returns failure for "zombie" processes.
-            }else{
+            }
+            else {
                 DBClientConnection conn;
                 conn.connect("127.0.0.1:" + BSONObjBuilder::numStr(port));
                 try {
                     conn.simpleCommand("admin", NULL, "shutdown");
-                } catch (...) {
+                }
+                catch (...) {
                     //Do nothing. This command never returns data to the client and the driver doesn't like that.
                 }
             }
 #else
             int x = kill( pid, sig );
-            if ( x ){
-                if ( errno == ESRCH ){
+            if ( x ) {
+                if ( errno == ESRCH ) {
                 }
                 else {
                     cout << "killFailed: " << errnoWithDescription() << endl;
@@ -688,8 +747,8 @@ namespace mongo {
             }
 
 #endif
-        }            
-        
+        }
+
         int killDb( int port, pid_t _pid, int signal ) {
             pid_t pid;
             int exitCode = 0;
@@ -699,12 +758,13 @@ namespace mongo {
                     return 0;
                 }
                 pid = dbs[ port ].first;
-            } else {
+            }
+            else {
                 pid = _pid;
             }
-            
+
             kill_wrapper( pid, signal, port );
-            
+
             int i = 0;
             for( ; i < 130; ++i ) {
                 if ( i == 30 ) {
@@ -713,12 +773,12 @@ namespace mongo {
                     now[ 20 ] = 0;
                     cout << now << " process on port " << port << ", with pid " << pid << " not terminated, sending sigkill" << endl;
                     kill_wrapper( pid, SIGKILL, port );
-                }        
+                }
                 if(wait_for_pid(pid, false, &exitCode))
                     break;
                 sleepmillis( 1000 );
             }
-            if ( i == 65 ) {
+            if ( i == 130 ) {
                 char now[64];
                 time_t_to_String(time(0), now);
                 now[ 20 ] = 0;
@@ -729,14 +789,17 @@ namespace mongo {
             if ( port > 0 ) {
                 close( dbs[ port ].second );
                 dbs.erase( port );
-            } else {
+            }
+            else {
                 close( shells[ pid ] );
                 shells.erase( pid );
             }
+            // FIXME I think the intention here is to do an extra sleep only when SIGKILL is sent to the child process.
+            // We may want to change the 4 below to 29, since values of i greater than that indicate we sent a SIGKILL.
             if ( i > 4 || signal == SIGKILL ) {
                 sleepmillis( 4000 ); // allow operating system to reclaim resources
             }
-            
+
             return exitCode;
         }
 
@@ -751,7 +814,8 @@ namespace mongo {
             }
             return ret;
         }
-        
+
+        /** stopMongoProgram(port[, signal]) */
         BSONObj StopMongoProgram( const BSONObj &a ) {
             assert( a.nFields() == 1 || a.nFields() == 2 );
             assert( a.firstElement().isNumber() );
@@ -759,23 +823,23 @@ namespace mongo {
             int code = killDb( port, 0, getSignal( a ) );
             cout << "shell: stopped mongo program on port " << port << endl;
             return BSON( "" << code );
-        }        
-        
+        }
+
         BSONObj StopMongoProgramByPid( const BSONObj &a ) {
             assert( a.nFields() == 1 || a.nFields() == 2 );
             assert( a.firstElement().isNumber() );
-            int pid = int( a.firstElement().number() );            
+            int pid = int( a.firstElement().number() );
             int code = killDb( 0, pid, getSignal( a ) );
             cout << "shell: stopped mongo program on pid " << pid << endl;
             return BSON( "" << code );
         }
-                                                
+
         void KillMongoProgramInstances() {
             vector< int > ports;
             for( map< int, pair< pid_t, int > >::iterator i = dbs.begin(); i != dbs.end(); ++i )
                 ports.push_back( i->first );
             for( vector< int >::iterator i = ports.begin(); i != ports.end(); ++i )
-                killDb( *i, 0, SIGTERM );            
+                killDb( *i, 0, SIGTERM );
             vector< pid_t > pids;
             for( map< pid_t, int >::iterator i = shells.begin(); i != shells.end(); ++i )
                 pids.push_back( i->first );
@@ -785,7 +849,7 @@ namespace mongo {
 #else // ndef MONGO_SAFE_SHELL
         void KillMongoProgramInstances() {}
 #endif
-        
+
         MongoProgramScope::~MongoProgramScope() {
             DESTRUCTOR_GUARD(
                 KillMongoProgramInstances();
@@ -794,14 +858,14 @@ namespace mongo {
         }
 
         unsigned _randomSeed;
-        
+
         BSONObj JSSrand( const BSONObj &a ) {
             uassert( 12518, "srand requires a single numeric argument",
-                    a.nFields() == 1 && a.firstElement().isNumber() );
+                     a.nFields() == 1 && a.firstElement().isNumber() );
             _randomSeed = (unsigned)a.firstElement().numberLong(); // grab least significant digits
             return undefined_;
         }
-        
+
         BSONObj JSRand( const BSONObj &a ) {
             uassert( 12519, "rand accepts no arguments", a.nFields() == 0 );
             unsigned r;
@@ -813,7 +877,7 @@ namespace mongo {
             return BSON( "" << double( r ) / ( double( RAND_MAX ) + 1 ) );
         }
 
-        BSONObj isWindows(const BSONObj& a){
+        BSONObj isWindows(const BSONObj& a) {
             uassert( 13006, "isWindows accepts no arguments", a.nFields() == 0 );
 #ifdef _WIN32
             return BSON( "" << true );
@@ -822,7 +886,7 @@ namespace mongo {
 #endif
         }
 
-        BSONObj getHostName(const BSONObj& a){
+        BSONObj getHostName(const BSONObj& a) {
             uassert( 13411, "getHostName accepts no arguments", a.nFields() == 0 );
             char buf[260]; // HOST_NAME_MAX is usually 255
             assert(gethostname(buf, 260) == 0);
@@ -830,8 +894,8 @@ namespace mongo {
             return BSON("" << buf);
 
         }
-        
-        void installShellUtils( Scope& scope ){
+
+        void installShellUtils( Scope& scope ) {
             theScope = &scope;
             scope.injectNative( "sleep" , JSSleep );
             scope.injectNative( "quit", Quit );
@@ -847,14 +911,16 @@ namespace mongo {
             scope.injectNative( "run", RunProgram );
             scope.injectNative( "runMongoProgram", RunMongoProgram );
             scope.injectNative( "stopMongod", StopMongoProgram );
-            scope.injectNative( "stopMongoProgram", StopMongoProgram );        
-            scope.injectNative( "stopMongoProgramByPid", StopMongoProgramByPid );        
+            scope.injectNative( "stopMongoProgram", StopMongoProgram );
+            scope.injectNative( "stopMongoProgramByPid", StopMongoProgramByPid );
             scope.injectNative( "rawMongoProgramOutput", RawMongoProgramOutput );
             scope.injectNative( "clearRawMongoProgramOutput", ClearRawMongoProgramOutput );
             scope.injectNative( "waitProgram" , WaitProgram );
+            scope.injectNative( "waitMongoProgramOnPort" , WaitMongoProgramOnPort );
 
             scope.injectNative( "getHostName" , getHostName );
             scope.injectNative( "removeFile" , removeFile );
+            scope.injectNative( "fuzzFile" , fuzzFile );
             scope.injectNative( "listFiles" , listFiles );
             scope.injectNative( "ls" , ls );
             scope.injectNative( "pwd", pwd );
@@ -871,8 +937,8 @@ namespace mongo {
         void initScope( Scope &scope ) {
             scope.externalSetup();
             mongo::shellUtils::installShellUtils( scope );
-            scope.execSetup( jsconcatcode_server , "setupServerCode" );
-            
+            scope.execSetup(JSFiles::servers);
+
             if ( !_dbConnect.empty() ) {
                 uassert( 12513, "connect failed", scope.exec( _dbConnect , "(connect)" , false , true , false ) );
                 if ( !_dbAuth.empty() ) {
@@ -881,18 +947,21 @@ namespace mongo {
                 }
             }
         }
-        
-        map< const void*, string > _allMyUris;        
+
+        //   connstr, myuris
+        map< string, set<string> > _allMyUris;
+        mongo::mutex _allMyUrisMutex("_allMyUrisMutex");
         bool _nokillop = false;
         void onConnect( DBClientWithCommands &c ) {
+            latestConn = &c;
             if ( _nokillop ) {
                 return;
             }
             BSONObj info;
             if ( c.runCommand( "admin", BSON( "whatsmyuri" << 1 ), info ) ) {
-                // There's no way to explicitly disconnect a DBClientConnection, but we might allocate
-                // a new uri on automatic reconnect.  So just store one uri per connection.
-                _allMyUris[ &c ] = info[ "you" ].str();
+                string connstr = dynamic_cast<DBClientBase&>(c).getServerAddress();
+                mongo::mutex::scoped_lock lk( _allMyUrisMutex );
+                _allMyUris[connstr].insert(info[ "you" ].str());
             }
         }
     }
diff --git a/shell/utils.h b/shell/utils.h
index a2d420d..03b3f97 100644
--- a/shell/utils.h
+++ b/shell/utils.h
@@ -21,17 +21,17 @@
 #include "../scripting/engine.h"
 
 namespace mongo {
-    
+
     namespace shellUtils {
 
         extern std::string _dbConnect;
         extern std::string _dbAuth;
-        extern map< const void*, string > _allMyUris;
+        extern map< string, set<string> > _allMyUris;
         extern bool _nokillop;
 
         void RecordMyLocation( const char *_argv0 );
         void installShellUtils( Scope& scope );
-        
+
         // Scoped management of mongo program instances.  Simple implementation:
         // destructor kills all mongod instances created by the shell.
         struct MongoProgramScope {
@@ -39,7 +39,7 @@ namespace mongo {
             ~MongoProgramScope();
         };
         void KillMongoProgramInstances();
-        
+
         void initScope( Scope &scope );
         void onConnect( DBClientWithCommands &c );
     }
diff --git a/shell/utils.js b/shell/utils.js
index 71f3fbd..6b52ab9 100644
--- a/shell/utils.js
+++ b/shell/utils.js
@@ -16,11 +16,20 @@ friendlyEqual = function( a , b ){
     return false;
 }
 
+printStackTrace = function(){
+    try{
+        throw new Error("Printing Stack Trace");
+    } catch (e) {
+        print(e.stack);
+    }
+}
+
 doassert = function (msg) {
     if (msg.indexOf("assert") == 0)
         print(msg);
     else
         print("assert: " + msg);
+    printStackTrace();
     throw msg;
 }
 
@@ -87,7 +96,7 @@ assert.repeat = function( f, msg, timeout, interval ) {
     }
 }
     
-assert.soon = function( f, msg, timeout, interval ) {
+assert.soon = function( f, msg, timeout /*ms*/, interval ) {
     if ( assert._debug && msg ) print( "in assert for: " + msg );
 
     var start = new Date();
@@ -113,6 +122,10 @@ assert.soon = function( f, msg, timeout, interval ) {
 
 assert.throws = function( func , params , msg ){
     if ( assert._debug && msg ) print( "in assert for: " + msg );
+    
+    if ( params && typeof( params ) == "string" )
+        throw "2nd argument to assert.throws has to be an array"
+    
     try {
         func.apply( null , params );
     }
@@ -202,7 +215,11 @@ Object.extend = function( dst , src , deep ){
     for ( var k in src ){
         var v = src[k];
         if ( deep && typeof(v) == "object" ){
-            v = Object.extend( typeof ( v.length ) == "number" ? [] : {} , v , true );
+            if ( "floatApprox" in v ) { // convert NumberLong properly
+                eval( "v = " + tojson( v ) );
+            } else {
+                v = Object.extend( typeof ( v.length ) == "number" ? [] : {} , v , true );
+            }
         }
         dst[k] = v;
     }
@@ -238,6 +255,13 @@ String.prototype.rtrim = function() {
     return this.replace(/\s+$/,"");
 }
 
+Number.prototype.zeroPad = function(width) {
+    var str = this + '';
+    while (str.length < width)
+        str = '0' + str;
+    return str;
+}
+
 Date.timeFunc = function( theFunc , numTimes ){
 
     var start = new Date();
@@ -251,7 +275,67 @@ Date.timeFunc = function( theFunc , numTimes ){
 }
 
 Date.prototype.tojson = function(){
-    return "\"" + this.toString() + "\"";
+
+    var UTC = Date.printAsUTC ? 'UTC' : '';
+
+    var year = this['get'+UTC+'FullYear']().zeroPad(4);
+    var month = (this['get'+UTC+'Month']() + 1).zeroPad(2);
+    var date = this['get'+UTC+'Date']().zeroPad(2);
+    var hour = this['get'+UTC+'Hours']().zeroPad(2);
+    var minute = this['get'+UTC+'Minutes']().zeroPad(2);
+    var sec = this['get'+UTC+'Seconds']().zeroPad(2)
+
+    if (this['get'+UTC+'Milliseconds']())
+        sec += '.' + this['get'+UTC+'Milliseconds']().zeroPad(3)
+
+    var ofs = 'Z';
+    if (!Date.printAsUTC){
+        var ofsmin = this.getTimezoneOffset();
+        if (ofsmin != 0){
+            ofs = ofsmin > 0 ? '-' : '+'; // This is correct
+            ofs += (ofsmin/60).zeroPad(2)
+            ofs += (ofsmin%60).zeroPad(2)
+        }
+    }
+
+    return 'ISODate("'+year+'-'+month+'-'+date+'T'+hour+':'+minute+':'+sec+ofs+'")';
+}
+
+Date.printAsUTC = true;
+
+
+ISODate = function(isoDateStr){
+    if (!isoDateStr)
+        return new Date();
+
+    var isoDateRegex = /(\d{4})-?(\d{2})-?(\d{2})([T ](\d{2})(:?(\d{2})(:?(\d{2}(\.\d+)?))?)?(Z|([+-])(\d{2}):?(\d{2})?)?)?/;
+    var res = isoDateRegex.exec(isoDateStr);
+
+    if (!res)
+        throw "invalid ISO date";
+
+    var year = parseInt(res[1],10) || 1970; // this should always be present
+    var month = (parseInt(res[2],10) || 1) - 1;
+    var date = parseInt(res[3],10) || 0;
+    var hour = parseInt(res[5],10) || 0;
+    var min = parseInt(res[7],10) || 0;
+    var sec = parseFloat(res[9]) || 0;
+    var ms = Math.round((sec%1) * 1000)
+    sec -= ms/1000
+
+    var time = Date.UTC(year, month, date, hour, min, sec, ms);
+
+    if (res[11] && res[11] != 'Z'){
+        var ofs = 0;
+        ofs += (parseInt(res[13],10) || 0) * 60*60*1000; // hours
+        ofs += (parseInt(res[14],10) || 0) *    60*1000; // mins
+        if (res[12] == '+') // if ahead subtract
+            ofs *= -1;
+
+        time += ofs
+    }
+
+    return new Date(time);
 }
 
 RegExp.prototype.tojson = RegExp.prototype.toString;
@@ -349,6 +433,14 @@ Array.stdDev = function( arr ){
     return Math.sqrt( sum / arr.length );
 }
 
+//these two are helpers for Array.sort(func)
+compare = function(l, r){ return (l == r ? 0 : (l < r ? -1 : 1)); }
+
+// arr.sort(compareOn('name'))
+compareOn = function(field){
+    return function(l, r) { return compare(l[field], r[field]); }
+}
+
 Object.keySet = function( o ) {
     var ret = new Array();
     for( i in o ) {
@@ -569,7 +661,13 @@ if ( typeof _threadInject != "undefined" ){
                                    "jstests/mr3.js",
                                    "jstests/indexh.js",
                                    "jstests/apitest_db.js",
-                                   "jstests/evalb.js"] );
+                                   "jstests/evalb.js",
+                                   "jstests/evald.js",
+                                   "jstests/evalf.js",
+                                   "jstests/killop.js",
+                                   "jstests/run_program1.js",
+                                   "jstests/notablescan.js",
+                                   "jstests/drop2.js"] );
         
         // some tests can't be run in parallel with each other
         var serialTestsArr = [ "jstests/fsync.js",
@@ -640,7 +738,7 @@ if ( typeof _threadInject != "undefined" ){
                             "__parallelTests__fun.apply( 0, __parallelTests__argv );" +
                             "__parallelTests__passed = true;" +
                          "} catch ( e ) {" +
-                            "print( e );" +
+                            "print( '********** Parallel Test FAILED: ' + tojson(e) );" +
                          "}" +
                          "return __parallelTests__passed;" +
                          "}"
@@ -805,47 +903,146 @@ printjsononeline = function(x){
     print( tojsononeline( x ) );
 }
 
-shellPrintHelper = function( x ){
+shellPrintHelper = function (x) {
 
-    if ( typeof( x ) == "undefined" ){
+    if (typeof (x) == "undefined") {
 
-        if ( typeof( db ) != "undefined" && db.getLastError ){
-            var e = db.getLastError();
-            if ( e != null )
-                print( e );
+        if (typeof (db) != "undefined" && db.getLastError) {
+            // explicit w:1 so that replset getLastErrorDefaults aren't used here which would be bad.
+            var e = db.getLastError(1);
+            if (e != null)
+                print(e);
         }
 
         return;
     }
-    
-    if ( x == __magicNoPrint )
+
+    if (x == __magicNoPrint)
         return;
 
-    if ( x == null ){
-        print( "null" );
+    if (x == null) {
+        print("null");
         return;
     }
 
-    if ( typeof x != "object" ) 
-        return print( x );
-    
+    if (typeof x != "object")
+        return print(x);
+
     var p = x.shellPrint;
-    if ( typeof p == "function" )
+    if (typeof p == "function")
         return x.shellPrint();
 
     var p = x.tojson;
-    if ( typeof p == "function" )
-        print( x.tojson() );
+    if (typeof p == "function")
+        print(x.tojson());
     else
-        print( tojson( x ) );
+        print(tojson(x));
 }
 
-shellAutocomplete = function( prefix ){
-    var a = [];
-    //a.push( prefix + "z" )
-    //a.push( prefix + "y" )
-    __autocomplete__ = a;
-}
+shellAutocomplete = function (/*prefix*/){ // outer scope function called on init. Actual function at end
+
+    var universalMethods = "constructor prototype toString valueOf toLocaleString hasOwnProperty propertyIsEnumerable".split(' ');
+
+    var builtinMethods = {}; // uses constructor objects as keys
+    builtinMethods[Array] = "length concat join pop push reverse shift slice sort splice unshift indexOf lastIndexOf every filter forEach map some".split(' ');
+    builtinMethods[Boolean] = "".split(' '); // nothing more than universal methods
+    builtinMethods[Date] = "getDate getDay getFullYear getHours getMilliseconds getMinutes getMonth getSeconds getTime getTimezoneOffset getUTCDate getUTCDay getUTCFullYear getUTCHours getUTCMilliseconds getUTCMinutes getUTCMonth getUTCSeconds getYear parse setDate setFullYear setHours setMilliseconds setMinutes setMonth setSeconds setTime setUTCDate setUTCFullYear setUTCHours setUTCMilliseconds setUTCMinutes setUTCMonth setUTCSeconds setYear toDateString toGMTString toLocaleDateString toLocaleTimeString toTimeString toUTCString UTC".split(' ');
+    builtinMethods[Math] = "E LN2 LN10 LOG2E LOG10E PI SQRT1_2 SQRT2 abs acos asin atan atan2 ceil cos exp floor log max min pow random round sin sqrt tan".split(' ');
+    builtinMethods[Number] = "MAX_VALUE MIN_VALUE NEGATIVE_INFINITY POSITIVE_INFINITY toExponential toFixed toPrecision".split(' ');
+    builtinMethods[RegExp] = "global ignoreCase lastIndex multiline source compile exec test".split(' ');
+    builtinMethods[String] = "length charAt charCodeAt concat fromCharCode indexOf lastIndexOf match replace search slice split substr substring toLowerCase toUpperCase".split(' ');
+    builtinMethods[Function] = "call apply".split(' ');
+    builtinMethods[Object] = "bsonsize".split(' ');
+
+    builtinMethods[Mongo] = "find update insert remove".split(' ');
+    builtinMethods[BinData] = "hex base64 length subtype".split(' ');
+    builtinMethods[NumberLong] = "toNumber".split(' ');
+
+    var extraGlobals = "Infinity NaN undefined null true false decodeURI decodeURIComponent encodeURI encodeURIComponent escape eval isFinite isNaN parseFloat parseInt unescape Array Boolean Date Math Number RegExp String print load gc MinKey MaxKey Mongo NumberLong ObjectId DBPointer UUID BinData Map".split(' ');
+
+    var isPrivate = function(name){
+        if (shellAutocomplete.showPrivate) return false;
+        if (name == '_id') return false;
+        if (name[0] == '_') return true;
+        if (name[name.length-1] == '_') return true; // some native functions have an extra name_ method
+        return false;
+    }
+
+    var customComplete = function(obj){
+        try {
+            if(obj.__proto__.constructor.autocomplete){
+                var ret = obj.constructor.autocomplete(obj);
+                if (ret.constructor != Array){
+                    print("\nautocompleters must return real Arrays");
+                    return [];
+                }
+                return ret;
+            } else {
+                return [];
+            }
+        } catch (e) {
+            // print(e); // uncomment if debugging custom completers
+            return [];
+        }
+    }
+
+    var worker = function( prefix ){
+        var global = (function(){return this;}).call(); // trick to get global object
+
+        var curObj = global;
+        var parts = prefix.split('.');
+        for (var p=0; p < parts.length - 1; p++){ // doesn't include last part
+            curObj = curObj[parts[p]];
+            if (curObj == null)
+                return [];
+        }
+
+        var lastPrefix = parts[parts.length-1] || '';
+        var begining = parts.slice(0, parts.length-1).join('.');
+        if (begining.length)
+            begining += '.';
+
+        var possibilities = new Array().concat(
+            universalMethods,
+            Object.keySet(curObj),
+            Object.keySet(curObj.__proto__),
+            builtinMethods[curObj] || [], // curObj is a builtin constructor
+            builtinMethods[curObj.__proto__.constructor] || [], // curObj is made from a builtin constructor
+            curObj == global ? extraGlobals : [],
+            customComplete(curObj)
+        );
+
+        var ret = [];
+        for (var i=0; i < possibilities.length; i++){
+            var p = possibilities[i];
+            if (typeof(curObj[p]) == "undefined" && curObj != global) continue; // extraGlobals aren't in the global object
+            if (p.length == 0 || p.length < lastPrefix.length) continue;
+            if (isPrivate(p)) continue;
+            if (p.match(/^[0-9]+$/)) continue; // don't array number indexes
+            if (p.substr(0, lastPrefix.length) != lastPrefix) continue;
+
+            var completion = begining + p;
+            if(curObj[p] && curObj[p].constructor == Function && p != 'constructor')
+                completion += '(';
+
+            ret.push(completion);
+        }
+
+        return ret;
+    }
+
+    // this is the actual function that gets assigned to shellAutocomplete
+    return function( prefix ){
+        try {
+            __autocomplete__ = worker(prefix).sort();
+        }catch (e){
+            print("exception durring autocomplete: " + tojson(e.message));
+            __autocomplete__ = [];
+        }
+    }
+}();
+
+shellAutocomplete.showPrivate = false; // toggle to show (useful when working on internals)
 
 shellHelper = function( command , rest , shouldPrint ){
     command = command.trim();
@@ -861,9 +1058,14 @@ shellHelper = function( command , rest , shouldPrint ){
     return res;
 }
 
-shellHelper.use = function( dbname ){
-    db = db.getMongo().getDB( dbname );
-    print( "switched to db " + db.getName() );
+shellHelper.use = function (dbname) {
+    var s = "" + dbname;
+    if (s == "") {
+        print("bad use parameter");
+        return;
+    }
+    db = db.getMongo().getDB(dbname);
+    print("switched to db " + db.getName());
 }
 
 shellHelper.it = function(){
@@ -874,37 +1076,48 @@ shellHelper.it = function(){
     shellPrintHelper( ___it___ );
 }
 
-shellHelper.show = function( what ){
-    assert( typeof what == "string" );
-    
-    if( what == "profile" ) { 
-	if( db.system.profile.count() == 0 ) { 
-	    print("db.system.profile is empty");
-	    print("Use db.setProfilingLevel(2) will enable profiling");
-	    print("Use db.system.profile.find() to show raw profile entries");
-	} 
-	else { 
-	    print(); 
-	    db.system.profile.find({ millis : { $gt : 0 } }).sort({$natural:-1}).limit(5).forEach( function(x){print(""+x.millis+"ms " + String(x.ts).substring(0,24)); print(x.info); print("\n");} )
+shellHelper.show = function (what) {
+    assert(typeof what == "string");
+
+    if (what == "profile") {
+        if (db.system.profile.count() == 0) {
+            print("db.system.profile is empty");
+            print("Use db.setProfilingLevel(2) will enable profiling");
+            print("Use db.system.profile.find() to show raw profile entries");
         }
-	return "";
+        else {
+            print();
+            db.system.profile.find({ millis: { $gt: 0} }).sort({ $natural: -1 }).limit(5).forEach(function (x) { print("" + x.millis + "ms " + String(x.ts).substring(0, 24)); print(x.info); print("\n"); })
+        }
+        return "";
     }
 
-    if ( what == "users" ){
-	db.system.users.find().forEach( printjson );
+    if (what == "users") {
+        db.system.users.find().forEach(printjson);
         return "";
     }
 
-    if ( what == "collections" || what == "tables" ) {
-        db.getCollectionNames().forEach( function(x){print(x)} );
-	return "";
+    if (what == "collections" || what == "tables") {
+        db.getCollectionNames().forEach(function (x) { print(x) });
+        return "";
     }
-    
-    if ( what == "dbs" ) {
-        db.getMongo().getDBNames().sort().forEach( function(x){print(x)} );
-	return "";
+
+    if (what == "dbs") {
+        var dbs = db.getMongo().getDBs();
+        var size = {};
+        dbs.databases.forEach(function (x) { size[x.name] = x.sizeOnDisk; });
+        var names = dbs.databases.map(function (z) { return z.name; }).sort();
+        names.forEach(function (n) {
+            if (size[n] > 1) {
+                print(n + "\t" + size[n] / 1024 / 1024 / 1024 + "GB");
+            } else {
+                print(n + "\t(empty)");
+            }
+        });
+        //db.getMongo().getDBNames().sort().forEach(function (x) { print(x) });
+        return "";
     }
-    
+
     throw "don't know how to show [" + what + "]";
 
 }
@@ -1010,17 +1223,6 @@ Random.genExp = function( mean ) {
     return -Math.log( Random.rand() ) * mean;
 }
 
-killWithUris = function( uris ) {
-    var inprog = db.currentOp().inprog;
-    for( var u in uris ) {
-        for ( var i in inprog ) {
-            if ( uris[ u ] == inprog[ i ].client ) {
-                db.killOp( inprog[ i ].opid );
-            }
-        }
-    }
-}
-
 Geo = {};
 Geo.distance = function( a , b ){
     var ax = null;
@@ -1046,6 +1248,46 @@ Geo.distance = function( a , b ){
                       Math.pow( bx - ax , 2 ) );
 }
 
+Geo.sphereDistance = function( a , b ){
+    var ax = null;
+    var ay = null;
+    var bx = null;
+    var by = null;
+    
+    // TODO swap order of x and y when done on server
+    for ( var key in a ){
+        if ( ax == null )
+            ax = a[key] * (Math.PI/180);
+        else if ( ay == null )
+            ay = a[key] * (Math.PI/180);
+    }
+    
+    for ( var key in b ){
+        if ( bx == null )
+            bx = b[key] * (Math.PI/180);
+        else if ( by == null )
+            by = b[key] * (Math.PI/180);
+    }
+
+    var sin_x1=Math.sin(ax), cos_x1=Math.cos(ax);
+    var sin_y1=Math.sin(ay), cos_y1=Math.cos(ay);
+    var sin_x2=Math.sin(bx), cos_x2=Math.cos(bx);
+    var sin_y2=Math.sin(by), cos_y2=Math.cos(by);
+
+    var cross_prod = 
+        (cos_y1*cos_x1 * cos_y2*cos_x2) +
+        (cos_y1*sin_x1 * cos_y2*sin_x2) +
+        (sin_y1        * sin_y2);
+
+    if (cross_prod >= 1 || cross_prod <= -1){
+        // fun with floats
+        assert( Math.abs(cross_prod)-1 < 1e-6 );
+        return cross_prod > 0 ? 0 : Math.PI;
+    }
+
+    return Math.acos(cross_prod);
+}
+
 rs = function () { return "try rs.help()"; }
 
 rs.help = function () {
@@ -1053,26 +1295,36 @@ rs.help = function () {
     print("\trs.initiate()                   { replSetInitiate : null } initiates set with default settings");
     print("\trs.initiate(cfg)                { replSetInitiate : cfg } initiates set with configuration cfg");
     print("\trs.conf()                       get the current configuration object from local.system.replset");
-    print("\trs.reconfig(cfg)                updates the configuration of a running replica set with cfg");
-    print("\trs.add(hostportstr)             add a new member to the set with default attributes");
-    print("\trs.add(membercfgobj)            add a new member to the set with extra attributes");
-    print("\trs.addArb(hostportstr)          add a new member which is arbiterOnly:true");
-    print("\trs.stepDown()                   step down as primary (momentarily)");
-    print("\trs.remove(hostportstr)          remove a host from the replica set");
+    print("\trs.reconfig(cfg)                updates the configuration of a running replica set with cfg (disconnects)");
+    print("\trs.add(hostportstr)             add a new member to the set with default attributes (disconnects)");
+    print("\trs.add(membercfgobj)            add a new member to the set with extra attributes (disconnects)");
+    print("\trs.addArb(hostportstr)          add a new member which is arbiterOnly:true (disconnects)");
+    print("\trs.stepDown([secs])             step down as primary (momentarily) (disconnects)");
+    print("\trs.freeze(secs)                 make a node ineligible to become primary for the time specified");
+    print("\trs.remove(hostportstr)          remove a host from the replica set (disconnects)");
     print("\trs.slaveOk()                    shorthand for db.getMongo().setSlaveOk()");
     print();
     print("\tdb.isMaster()                   check who is primary");
     print();
+    print("\treconfiguration helpers disconnect from the database so the shell will display");
+    print("\tan error, even if the command succeeds.");
     print("\tsee also http://<mongod_host>:28017/_replSet for additional diagnostic info");
 }
 rs.slaveOk = function () { return db.getMongo().setSlaveOk(); }
 rs.status = function () { return db._adminCommand("replSetGetStatus"); }
 rs.isMaster = function () { return db.isMaster(); }
 rs.initiate = function (c) { return db._adminCommand({ replSetInitiate: c }); }
-rs.reconfig = function(cfg) {
-  cfg.version = rs.conf().version + 1;
-
-  return db._adminCommand({ replSetReconfig: cfg });
+rs.reconfig = function (cfg) {
+    cfg.version = rs.conf().version + 1;
+    var res = null;
+    try {
+        res = db.adminCommand({ replSetReconfig: cfg });
+    }
+    catch (e) {
+        print("shell got exception during reconfig: " + e);
+        print("in some circumstances, the primary steps down and closes connections on a reconfig");
+    }
+    return res;
 }
 rs.add = function (hostport, arb) {
     var cfg = hostport;
@@ -1093,9 +1345,18 @@ rs.add = function (hostport, arb) {
             cfg.arbiterOnly = true;
     }
     c.members.push(cfg);
-    return db._adminCommand({ replSetReconfig: c });
+    var res = null;
+    try { 
+        res = db.adminCommand({ replSetReconfig: c });
+    }
+    catch (e) {
+        print("shell got exception during reconfig: " + e);
+        print("in some circumstances, the primary steps down and closes connections on a reconfig");
+    }
+    return res;
 }
-rs.stepDown = function () { return db._adminCommand({ replSetStepDown:true}); }
+rs.stepDown = function (secs) { return db._adminCommand({ replSetStepDown:secs||60}); }
+rs.freeze = function (secs) { return db._adminCommand({replSetFreeze:secs}); }
 rs.addArb = function (hn) { return this.add(hn, true); }
 rs.conf = function () { return db.getSisterDB("local").system.replset.findOne(); }
 
@@ -1117,7 +1378,27 @@ rs.remove = function (hn) {
 };
 
 help = shellHelper.help = function (x) {
-    if (x == "connect") {
+    if (x == "mr") {
+        print("\nSee also http://www.mongodb.org/display/DOCS/MapReduce");
+        print("\nfunction mapf() {");
+        print("  // 'this' holds current document to inspect");
+        print("  emit(key, value);");
+        print("}");
+        print("\nfunction reducef(key,value_array) {");
+        print("  return reduced_value;");
+        print("}");
+        print("\ndb.mycollection.mapReduce(mapf, reducef[, options])");
+        print("\noptions");
+        print("{[query : <query filter object>]");
+        print(" [, sort : <sort the query.  useful for optimization>]");
+        print(" [, limit : <number of objects to return from collection>]");
+        print(" [, out : <output-collection name>]");
+        print(" [, keeptemp: <true|false>]");
+        print(" [, finalize : <finalizefunction>]");
+        print(" [, scope : <object where fields go into javascript global scope >]");
+        print(" [, verbose : true]}\n");
+        return;
+    } else if (x == "connect") {
         print("\nNormally one specifies the server on the mongo shell command line.  Run mongo --help to see those options.");
         print("Additional connections may be opened:\n");
         print("    var x = new Mongo('host[:port]');");
@@ -1127,52 +1408,68 @@ help = shellHelper.help = function (x) {
         print("\nNote: the REPL prompt only auto-reports getLastError() for the shell command line connection.\n");
         return;
     }
-    if (x == "misc") {
+    else if (x == "misc") {
         print("\tb = new BinData(subtype,base64str)  create a BSON BinData value");
         print("\tb.subtype()                         the BinData subtype (0..255)");
         print("\tb.length()                          length of the BinData data in bytes");
         print("\tb.hex()                             the data as a hex encoded string");
         print("\tb.base64()                          the data as a base 64 encoded string");
         print("\tb.toString()");
+        print();
+        print("\to = new ObjectId()                  create a new ObjectId");
+        print("\to.getTimestamp()                    return timestamp derived from first 32 bits of the OID");
+        print("\to.isObjectId()");
+        print("\to.toString()");
+        print("\to.equals(otherid)");
         return;
     }
-    if (x == "admin") {
+    else if (x == "admin") {
         print("\tls([path])                      list files");
         print("\tpwd()                           returns current directory");
         print("\tlistFiles([path])               returns file list");
         print("\thostname()                      returns name of this host");
         print("\tcat(fname)                      returns contents of text file as a string");
-        print("\tremoveFile(f)                   delete a file");
+        print("\tremoveFile(f)                   delete a file or directory");
         print("\tload(jsfilename)                load and execute a .js file");
         print("\trun(program[, args...])         spawn a program and wait for its completion");
+        print("\trunProgram(program[, args...])  same as run(), above");
         print("\tsleep(m)                        sleep m milliseconds");
         print("\tgetMemInfo()                    diagnostic");
         return;
     }
-    if (x == "test") {
+    else if (x == "test") {
         print("\tstartMongodEmpty(args)        DELETES DATA DIR and then starts mongod");
         print("\t                              returns a connection to the new server");
-        print("\tstartMongodTest()             DELETES DATA DIR");
+        print("\tstartMongodTest(port,dir,options)");
+        print("\t                              DELETES DATA DIR");
         print("\t                              automatically picks port #s starting at 27000 and increasing");
         print("\t                              or you can specify the port as the first arg");
         print("\t                              dir is /data/db/<port>/ if not specified as the 2nd arg");
         print("\t                              returns a connection to the new server");
+        print("\tresetDbpath(dirpathstr)       deletes everything under the dir specified including subdirs");
+        print("\tstopMongoProgram(port[, signal])");
         return;
     }
-    print("\t" + "db.help()                    help on db methods");
-    print("\t" + "db.mycoll.help()             help on collection methods");
-    print("\t" + "rs.help()                    help on replica set methods");
-    print("\t" + "help connect                 connecting to a db help");
-    print("\t" + "help admin                   administrative help");
-    print("\t" + "help misc                    misc things to know");
-    print();
-    print("\t" + "show dbs                     show database names");
-    print("\t" + "show collections             show collections in current database");
-    print("\t" + "show users                   show users in current database");
-    print("\t" + "show profile                 show most recent system.profile entries with time >= 1ms");
-    print("\t" + "use <db_name>                set current database");
-    print("\t" + "db.foo.find()                list objects in collection foo");
-    print("\t" + "db.foo.find( { a : 1 } )     list objects in foo where a == 1");
-    print("\t" + "it                           result of the last line evaluated; use to further iterate");
-    print("\t" + "exit                         quit the mongo shell");
+    else if (x == "") {
+        print("\t" + "db.help()                    help on db methods");
+        print("\t" + "db.mycoll.help()             help on collection methods");
+        print("\t" + "rs.help()                    help on replica set methods");
+        print("\t" + "help connect                 connecting to a db help");
+        print("\t" + "help admin                   administrative help");
+        print("\t" + "help misc                    misc things to know");
+        print("\t" + "help mr                      mapreduce help");
+        print();
+        print("\t" + "show dbs                     show database names");
+        print("\t" + "show collections             show collections in current database");
+        print("\t" + "show users                   show users in current database");
+        print("\t" + "show profile                 show most recent system.profile entries with time >= 1ms");
+        print("\t" + "use <db_name>                set current database");
+        print("\t" + "db.foo.find()                list objects in collection foo");
+        print("\t" + "db.foo.find( { a : 1 } )     list objects in foo where a == 1");
+        print("\t" + "it                           result of the last line evaluated; use to further iterate");
+        print("\t" + "DBQuery.shellBatchSize = x   set default number of items to display on shell");
+        print("\t" + "exit                         quit the mongo shell");
+    }
+    else
+        print("unknown help option");
 }
diff --git a/tools/bridge.cpp b/tools/bridge.cpp
index b0e1530..86dea0a 100644
--- a/tools/bridge.cpp
+++ b/tools/bridge.cpp
@@ -38,37 +38,44 @@ public:
             sleepmillis( 500 );
         Message m;
         while( 1 ) {
-            m.reset();
-            if ( !mp_.recv( m ) ) {
-                cout << "end connection " << mp_.farEnd.toString() << endl;
-                mp_.shutdown();
-                break;
-            }
-
-            int oldId = m.header()->id;
-            if ( m.operation() == dbQuery || m.operation() == dbMsg || m.operation() == dbGetMore ) {
-                bool exhaust = false;
-                if ( m.operation() == dbQuery ) {
-                    DbMessage d( m );
-                    QueryMessage q( d );
-                    exhaust = q.queryOptions & QueryOption_Exhaust;
+            try {
+                m.reset();
+                if ( !mp_.recv( m ) ) {
+                    cout << "end connection " << mp_.farEnd.toString() << endl;
+                    mp_.shutdown();
+                    break;
                 }
-                Message response;
-                dest.port().call( m, response );
-                mp_.reply( m, response, oldId );
-                while ( exhaust ) {
-                    MsgData *header = response.header();
-                    QueryResult *qr = (QueryResult *) header;
-                    if ( qr->cursorId ) {
-                        response.reset();
-                        dest.port().recv( response );
-                        mp_.reply( m, response ); // m argument is ignored anyway                    
-                    } else {
-                        exhaust = false;
+
+                int oldId = m.header()->id;
+                if ( m.operation() == dbQuery || m.operation() == dbMsg || m.operation() == dbGetMore ) {
+                    bool exhaust = false;
+                    if ( m.operation() == dbQuery ) {
+                        DbMessage d( m );
+                        QueryMessage q( d );
+                        exhaust = q.queryOptions & QueryOption_Exhaust;
                     }
+                    Message response;
+                    dest.port().call( m, response );
+                    mp_.reply( m, response, oldId );
+                    while ( exhaust ) {
+                        MsgData *header = response.header();
+                        QueryResult *qr = (QueryResult *) header;
+                        if ( qr->cursorId ) {
+                            response.reset();
+                            dest.port().recv( response );
+                            mp_.reply( m, response ); // m argument is ignored anyway
+                        }
+                        else {
+                            exhaust = false;
+                        }
+                    }
+                }
+                else {
+                    dest.port().say( m, oldId );
                 }
-            } else {
-                dest.port().say( m, oldId );
+            }
+            catch ( ... ) {
+                log() << "caught exception in Forwarder, continuing" << endl;
             }
         }
     }
@@ -90,7 +97,7 @@ public:
 
 auto_ptr< MyListener > listener;
 
-#if !defined(_WIN32) 
+#if !defined(_WIN32)
 void cleanup( int sig ) {
     ListeningSockets::get()->closeAll();
     for ( set<MessagingPort*>::iterator i = ports.begin(); i != ports.end(); i++ )
@@ -98,6 +105,12 @@ void cleanup( int sig ) {
     ::exit( 0 );
 }
 
+void myterminate() {
+    rawOut( "bridge terminate() called, printing stack:" );
+    printStackTrace();
+    abort();
+}
+
 void setupSignals() {
     signal( SIGINT , cleanup );
     signal( SIGTERM , cleanup );
@@ -106,6 +119,7 @@ void setupSignals() {
     signal( SIGSEGV , cleanup );
     signal( SIGBUS , cleanup );
     signal( SIGFPE , cleanup );
+    set_terminate( myterminate );
 }
 #else
 inline void setupSignals() {}
@@ -125,7 +139,7 @@ void check( bool b ) {
 
 int main( int argc, char **argv ) {
     static StaticObserver staticObserver;
-    
+
     setupSignals();
 
     check( argc == 5 );
@@ -134,9 +148,11 @@ int main( int argc, char **argv ) {
         check( i % 2 != 0 );
         if ( strcmp( argv[ i ], "--port" ) == 0 ) {
             port = strtol( argv[ ++i ], 0, 10 );
-        } else if ( strcmp( argv[ i ], "--dest" ) == 0 ) {
+        }
+        else if ( strcmp( argv[ i ], "--dest" ) == 0 ) {
             destUri = argv[ ++i ];
-        } else {
+        }
+        else {
             check( false );
         }
     }
diff --git a/tools/bsondump.cpp b/tools/bsondump.cpp
index 426b60e..506a4e7 100644
--- a/tools/bsondump.cpp
+++ b/tools/bsondump.cpp
@@ -35,23 +35,23 @@ class BSONDump : public BSONTool {
     enum OutputType { JSON , DEBUG } _type;
 
 public:
-    
-    BSONDump() : BSONTool( "bsondump" ){
+
+    BSONDump() : BSONTool( "bsondump", NONE ) {
         add_options()
-            ("type" , po::value<string>()->default_value("json") , "type of output: json,debug" )
-            ;
+        ("type" , po::value<string>()->default_value("json") , "type of output: json,debug" )
+        ;
         add_hidden_options()
-            ("file" , po::value<string>() , ".bson file" )
-            ;
+        ("file" , po::value<string>() , ".bson file" )
+        ;
         addPositionArg( "file" , 1 );
         _noconnection = true;
     }
-    
+
     virtual void printExtraHelp(ostream& out) {
         out << "usage: " << _name << " [options] <bson filename>" << endl;
     }
-    
-    virtual int doRun(){
+
+    virtual int doRun() {
         {
             string t = getParam( "type" );
             if ( t == "json" )
@@ -63,59 +63,66 @@ public:
                 return 1;
             }
         }
-        processFile( getParam( "file" ) );
+
+        path root = getParam( "file" );
+        if ( root == "" ) {
+            printExtraHelp(cout);
+            return 1;
+        }
+
+        processFile( root );
         return 0;
     }
-    
-    bool debug( const BSONObj& o , int depth=0){
+
+    bool debug( const BSONObj& o , int depth=0) {
         string prefix = "";
-        for ( int i=0; i<depth; i++ ){
+        for ( int i=0; i<depth; i++ ) {
             prefix += "\t\t\t";
         }
-        
+
         int read = 4;
 
         try {
             cout << prefix << "--- new object ---\n";
             cout << prefix << "\t size : " << o.objsize() << "\n";
             BSONObjIterator i(o);
-            while ( i.more() ){
+            while ( i.more() ) {
                 BSONElement e = i.next();
                 cout << prefix << "\t\t " << e.fieldName() << "\n" << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size() << endl;
-                if ( ( read + e.size() ) > o.objsize() ){
+                if ( ( read + e.size() ) > o.objsize() ) {
                     cout << prefix << " SIZE DOES NOT WORK" << endl;
                     return false;
                 }
                 read += e.size();
                 try {
                     e.validate();
-                    if ( e.isABSONObj() ){
+                    if ( e.isABSONObj() ) {
                         if ( ! debug( e.Obj() , depth + 1 ) )
                             return false;
                     }
-                    else if ( e.type() == String && ! isValidUTF8( e.valuestr() ) ){
+                    else if ( e.type() == String && ! isValidUTF8( e.valuestr() ) ) {
                         cout << prefix << "\t\t\t" << "bad utf8 String!" << endl;
                     }
-                    else if ( logLevel > 0 ){
+                    else if ( logLevel > 0 ) {
                         cout << prefix << "\t\t\t" << e << endl;
                     }
-                
+
                 }
-                catch ( std::exception& e ){
+                catch ( std::exception& e ) {
                     cout << prefix << "\t\t\t bad value: " << e.what() << endl;
                 }
             }
         }
-        catch ( std::exception& e ){
+        catch ( std::exception& e ) {
             cout << prefix << "\t" << e.what() << endl;
         }
         return true;
     }
 
-    virtual void gotObject( const BSONObj& o ){
-        switch ( _type ){
+    virtual void gotObject( const BSONObj& o ) {
+        switch ( _type ) {
         case JSON:
-            cout << o << endl;
+            cout << o.jsonString( TenGen ) << endl;
             break;
         case DEBUG:
             debug(o);
diff --git a/tools/dump.cpp b/tools/dump.cpp
index 7bb38ca..c99aaaa 100644
--- a/tools/dump.cpp
+++ b/tools/dump.cpp
@@ -28,78 +28,305 @@ namespace po = boost::program_options;
 
 class Dump : public Tool {
 public:
-    Dump() : Tool( "dump" , true , "*" ){
+    Dump() : Tool( "dump" , ALL , "*" , "*" , false ) {
         add_options()
-            ("out,o", po::value<string>()->default_value("dump"), "output directory")
-            ("query,q", po::value<string>() , "json query" )
-            ;
+        ("out,o", po::value<string>()->default_value("dump"), "output directory or \"-\" for stdout")
+        ("query,q", po::value<string>() , "json query" )
+        ("oplog", "Use oplog for point-in-time snapshotting" )
+        ("repair", "try to recover a crashed database" )
+        ;
     }
 
-    void doCollection( const string coll , path outputFile ) {
-        cout << "\t" << coll << " to " << outputFile.string() << endl;
-        
-        ofstream out;
-        out.open( outputFile.string().c_str() , ios_base::out | ios_base::binary  );
-        assertStreamGood( 10262 ,  "couldn't open file" , out );
+    // This is a functor that writes a BSONObj to a file
+    struct Writer {
+        Writer(ostream& out, ProgressMeter* m) :_out(out), _m(m) {}
 
-        ProgressMeter m( conn( true ).count( coll.c_str() , BSONObj() , QueryOption_SlaveOk ) );
+        void operator () (const BSONObj& obj) {
+            _out.write( obj.objdata() , obj.objsize() );
 
+            // if there's a progress bar, hit it
+            if (_m) {
+                _m->hit();
+            }
+        }
+
+        ostream& _out;
+        ProgressMeter* _m;
+    };
+
+    void doCollection( const string coll , ostream &out , ProgressMeter *m ) {
         Query q;
-        if ( _query.isEmpty() )
+        if ( _query.isEmpty() && !hasParam("dbpath"))
             q.snapshot();
         else
             q = _query;
 
-        auto_ptr<DBClientCursor> cursor = conn( true ).query( coll.c_str() , q , 0 , 0 , 0 , QueryOption_SlaveOk | QueryOption_NoCursorTimeout );
+        int queryOptions = QueryOption_SlaveOk | QueryOption_NoCursorTimeout;
+        if (startsWith(coll.c_str(), "local.oplog."))
+            queryOptions |= QueryOption_OplogReplay;
 
-        while ( cursor->more() ) {
-            BSONObj obj = cursor->next();
-            out.write( obj.objdata() , obj.objsize() );
-            m.hit();
+        DBClientBase& connBase = conn(true);
+        Writer writer(out, m);
+
+        // use low-latency "exhaust" mode if going over the network
+        if (!_usingMongos && typeid(connBase) == typeid(DBClientConnection&)) {
+            DBClientConnection& conn = static_cast<DBClientConnection&>(connBase);
+            boost::function<void(const BSONObj&)> castedWriter(writer); // needed for overload resolution
+            conn.query( castedWriter, coll.c_str() , q , NULL, queryOptions | QueryOption_Exhaust);
+        }
+        else {
+            //This branch should only be taken with DBDirectClient or mongos which doesn't support exhaust mode
+            scoped_ptr<DBClientCursor> cursor(connBase.query( coll.c_str() , q , 0 , 0 , 0 , queryOptions ));
+            while ( cursor->more() ) {
+                writer(cursor->next());
+            }
         }
+    }
+
+    void writeCollectionFile( const string coll , path outputFile ) {
+        cout << "\t" << coll << " to " << outputFile.string() << endl;
+
+        ofstream out;
+        out.open( outputFile.string().c_str() , ios_base::out | ios_base::binary  );
+        assertStreamGood( 10262 ,  "couldn't open file" , out );
+
+        ProgressMeter m( conn( true ).count( coll.c_str() , BSONObj() , QueryOption_SlaveOk ) );
+
+        doCollection(coll, out, &m);
 
         cout << "\t\t " << m.done() << " objects" << endl;
 
         out.close();
     }
 
+    void writeCollectionStdout( const string coll ) {
+        doCollection(coll, cout, NULL);
+    }
+
     void go( const string db , const path outdir ) {
         cout << "DATABASE: " << db << "\t to \t" << outdir.string() << endl;
 
         create_directories( outdir );
 
         string sns = db + ".system.namespaces";
-        
+
         auto_ptr<DBClientCursor> cursor = conn( true ).query( sns.c_str() , Query() , 0 , 0 , 0 , QueryOption_SlaveOk | QueryOption_NoCursorTimeout );
         while ( cursor->more() ) {
-            BSONObj obj = cursor->next();
+            BSONObj obj = cursor->nextSafe();
             if ( obj.toString().find( ".$" ) != string::npos )
                 continue;
 
             const string name = obj.getField( "name" ).valuestr();
             const string filename = name.substr( db.size() + 1 );
 
-            if ( _coll.length() > 0 && db + "." + _coll != name && _coll != name )
+            if ( _coll != "*" && db + "." + _coll != name && _coll != name )
                 continue;
 
-            doCollection( name.c_str() , outdir / ( filename + ".bson" ) );
+            writeCollectionFile( name.c_str() , outdir / ( filename + ".bson" ) );
 
         }
 
     }
+
+    int repair() {
+        if ( ! hasParam( "dbpath" ) ){
+            cout << "repair mode only works with --dbpath" << endl;
+            return -1;
+        }
+        
+        if ( ! hasParam( "db" ) ){
+            cout << "repair mode only works on 1 db right at a time right now" << endl;
+            return -1;
+        }
+
+        if ( hasParam( "collection" ) ){
+            cout << "repair mode can't work with collection, only on full db" << endl;
+            return -1;
+        }
+
+        string dbname = getParam( "db" );
+        log() << "going to try and recover data from: " << dbname << endl;
+
+        return _repair( dbname  );
+    }
     
-    int run(){
+    DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc ){
+        LogIndentLevel lil;
         
+        if ( eLoc.getOfs() <= 0 ){
+            error() << "invalid extent ofs: " << eLoc.getOfs() << endl;
+            return DiskLoc();
+        }
+        
+
+        MongoDataFile * mdf = db->getFile( eLoc.a() );
+
+        Extent * e = mdf->debug_getExtent( eLoc );
+        if ( ! e->isOk() ){
+            warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl;
+        }
+        
+        log() << "length:" << e->length << endl;
+        
+        LogIndentLevel lil2;
+        
+        DiskLoc loc = forward ? e->firstRecord : e->lastRecord;
+        while ( ! loc.isNull() ){
+            if ( loc.getOfs() <= 0 ){
+                error() << "offset is 0 for record which should be impossible" << endl;
+                break;
+            }
+            log() << loc << endl;
+            Record* rec = loc.rec();
+            log() << loc.obj() << endl;
+            loc = forward ? rec->getNext( loc ) : rec->getPrev( loc );
+        }
+        return forward ? e->xnext : e->xprev;
+        
+    }
+
+    void _repair( Database* db , string ns ){
+        NamespaceDetails * nsd = nsdetails( ns.c_str() );
+        log() << "nrecords: " << nsd->stats.nrecords 
+              << " datasize: " << nsd->stats.datasize 
+              << " firstExtent: " << nsd->firstExtent 
+              << endl;
+        
+        if ( nsd->firstExtent.isNull() ){
+            log() << " ERROR fisrtExtent is null" << endl;
+            return;
+        }
+        
+        if ( ! nsd->firstExtent.isValid() ){
+            log() << " ERROR fisrtExtent is not valid" << endl;
+            return;
+        }
+        
+        try {
+            log() << "forward extent pass" << endl;
+            LogIndentLevel lil;
+            DiskLoc eLoc = nsd->firstExtent;
+            while ( ! eLoc.isNull() ){
+                log() << "extent loc: " << eLoc << endl;
+                eLoc = _repairExtent( db , ns , true , eLoc );
+            }
+        }
+        catch ( DBException& e ){
+            error() << "forward extent pass failed:" << e.toString() << endl;
+        }
+
+        try {
+            log() << "backwards extent pass" << endl;
+            LogIndentLevel lil;
+            DiskLoc eLoc = nsd->lastExtent;
+            while ( ! eLoc.isNull() ){
+                log() << "extent loc: " << eLoc << endl;
+                eLoc = _repairExtent( db , ns , false , eLoc );
+            }
+        }
+        catch ( DBException& e ){
+            error() << "ERROR: backwards extent pass failed:" << e.toString() << endl;
+        }
+
+    }
+    
+    int _repair( string dbname ){
+        dblock lk;
+        Client::Context cx( dbname );
+        Database * db = cx.db();
+        
+        list<string> namespaces;
+        db->namespaceIndex.getNamespaces( namespaces );
+        
+        for ( list<string>::iterator i=namespaces.begin(); i!=namespaces.end(); ++i ){
+            LogIndentLevel lil;
+            string ns = *i;
+            if ( str::endsWith( ns , ".system.namespaces" ) )
+                continue;
+            log() << "trying to recover: " << ns << endl;
+            
+            LogIndentLevel lil2;
+            try {
+                _repair( db , ns );
+            }
+            catch ( DBException& e ){
+                log() << "ERROR recovering: " << ns << " " << e.toString() << endl;
+            }
+        }
+   
+        return 0;
+    }
+
+    int run() {
+        
+        if ( hasParam( "repair" ) ){
+            warning() << "repair is a work in progress" << endl;
+            return repair();
+        }
+
         {
             string q = getParam("query");
             if ( q.size() )
                 _query = fromjson( q );
         }
 
-        path root( getParam("out") );
+        string opLogName = "";
+        unsigned long long opLogStart = 0;
+        if (hasParam("oplog")) {
+            if (hasParam("query") || hasParam("db") || hasParam("collection")) {
+                cout << "oplog mode is only supported on full dumps" << endl;
+                return -1;
+            }
+
+
+            BSONObj isMaster;
+            conn("true").simpleCommand("admin", &isMaster, "isMaster");
+
+            if (isMaster.hasField("hosts")) { // if connected to replica set member
+                opLogName = "local.oplog.rs";
+            }
+            else {
+                opLogName = "local.oplog.$main";
+                if ( ! isMaster["ismaster"].trueValue() ) {
+                    cout << "oplog mode is only supported on master or replica set member" << endl;
+                    return -1;
+                }
+            }
+
+            BSONObj op = conn(true).findOne(opLogName, Query().sort("$natural", -1), 0, QueryOption_SlaveOk);
+            if (op.isEmpty()) {
+                cout << "No operations in oplog. Please ensure you are connecting to a master." << endl;
+                return -1;
+            }
+
+            assert(op["ts"].type() == Timestamp);
+            opLogStart = op["ts"]._numberLong();
+        }
+
+        // check if we're outputting to stdout
+        string out = getParam("out");
+        if ( out == "-" ) {
+            if ( _db != "*" && _coll != "*" ) {
+                writeCollectionStdout( _db+"."+_coll );
+                return 0;
+            }
+            else {
+                cout << "You must specify database and collection to print to stdout" << endl;
+                return -1;
+            }
+        }
+
+        {
+            // TODO: when mongos supports QueryOption_Exaust add a version check (SERVER-2628)
+            BSONObj isdbgrid;
+            conn("true").simpleCommand("admin", &isdbgrid, "isdbgrid");
+            _usingMongos = isdbgrid["isdbgrid"].trueValue();
+        }
+
+        path root( out );
         string db = _db;
 
-        if ( db == "*" ){
+        if ( db == "*" ) {
             cout << "all dbs" << endl;
             auth( "admin" );
 
@@ -123,9 +350,20 @@ public:
             auth( db );
             go( db , root / db );
         }
+
+        if (!opLogName.empty()) {
+            BSONObjBuilder b;
+            b.appendTimestamp("$gt", opLogStart);
+
+            _query = BSON("ts" << b.obj());
+
+            writeCollectionFile( opLogName , root / "oplog.bson" );
+        }
+
         return 0;
     }
 
+    bool _usingMongos;
     BSONObj _query;
 };
 
diff --git a/tools/export.cpp b/tools/export.cpp
index 5603823..0262c4b 100644
--- a/tools/export.cpp
+++ b/tools/export.cpp
@@ -33,34 +33,34 @@ namespace po = boost::program_options;
 
 class Export : public Tool {
 public:
-    Export() : Tool( "export" ){
+    Export() : Tool( "export" ) {
         addFieldOptions();
         add_options()
-            ("query,q" , po::value<string>() , "query filter, as a JSON string" )
-            ("csv","export to csv instead of json")
-            ("out,o", po::value<string>(), "output file; if not specified, stdout is used")
-            ("jsonArray", "output to a json array rather than one object per line")
-            ;
+        ("query,q" , po::value<string>() , "query filter, as a JSON string" )
+        ("csv","export to csv instead of json")
+        ("out,o", po::value<string>(), "output file; if not specified, stdout is used")
+        ("jsonArray", "output to a json array rather than one object per line")
+        ;
         _usesstdout = false;
     }
-    
-    int run(){
+
+    int run() {
         string ns;
         const bool csv = hasParam( "csv" );
         const bool jsonArray = hasParam( "jsonArray" );
         ostream *outPtr = &cout;
         string outfile = getParam( "out" );
         auto_ptr<ofstream> fileStream;
-        if ( hasParam( "out" ) ){
+        if ( hasParam( "out" ) ) {
             size_t idx = outfile.rfind( "/" );
-            if ( idx != string::npos ){
+            if ( idx != string::npos ) {
                 string dir = outfile.substr( 0 , idx + 1 );
                 create_directories( dir );
             }
             ofstream * s = new ofstream( outfile.c_str() , ios_base::out );
             fileStream.reset( s );
             outPtr = s;
-            if ( ! s->good() ){
+            if ( ! s->good() ) {
                 cerr << "couldn't open [" << outfile << "]" << endl;
                 return -1;
             }
@@ -72,36 +72,55 @@ public:
 
         try {
             ns = getNS();
-        } catch (...) {
+        }
+        catch (...) {
             printHelp(cerr);
             return 1;
         }
 
         auth();
 
-        if ( hasParam( "fields" ) || csv ){
+        if ( hasParam( "fields" ) || csv ) {
             needFields();
-            fieldsToReturn = &_fieldsObj;
+            
+            // we can't use just _fieldsObj since we support everything getFieldDotted does
+            
+            set<string> seen;
+            BSONObjBuilder b;
+            
+            BSONObjIterator i( _fieldsObj );
+            while ( i.more() ){
+                BSONElement e = i.next();
+                string f = str::before( e.fieldName() , '.' );
+                if ( seen.insert( f ).second )
+                    b.append( f , 1 );
+            }
+            
+            realFieldsToReturn = b.obj();
+            fieldsToReturn = &realFieldsToReturn;
         }
-
-
-        if ( csv && _fields.size() == 0 ){
+        
+        
+        if ( csv && _fields.size() == 0 ) {
             cerr << "csv mode requires a field list" << endl;
             return -1;
         }
 
+        Query q( getParam( "query" , "" ) );
+        if ( q.getFilter().isEmpty() && !hasParam("dbpath"))
+            q.snapshot();
 
-        auto_ptr<DBClientCursor> cursor = conn().query( ns.c_str() , ((Query)(getParam( "query" , "" ))).snapshot() , 0 , 0 , fieldsToReturn , QueryOption_SlaveOk | QueryOption_NoCursorTimeout );
+        auto_ptr<DBClientCursor> cursor = conn().query( ns.c_str() , q , 0 , 0 , fieldsToReturn , QueryOption_SlaveOk | QueryOption_NoCursorTimeout );
 
-        if ( csv ){
-            for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ){
+        if ( csv ) {
+            for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) {
                 if ( i != _fields.begin() )
                     out << ",";
                 out << *i;
             }
             out << endl;
         }
-        
+
         if (jsonArray)
             out << '[';
 
@@ -109,12 +128,12 @@ public:
         while ( cursor->more() ) {
             num++;
             BSONObj obj = cursor->next();
-            if ( csv ){
-                for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ){
+            if ( csv ) {
+                for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) {
                     if ( i != _fields.begin() )
                         out << ",";
                     const BSONElement & e = obj.getFieldDotted(i->c_str());
-                    if ( ! e.eoo() ){
+                    if ( ! e.eoo() ) {
                         out << e.jsonString( Strict , false );
                     }
                 }
@@ -133,7 +152,7 @@ public:
 
         if (jsonArray)
             out << ']' << endl;
-        
+
         cerr << "exported " << num << " records" << endl;
 
         return 0;
diff --git a/tools/files.cpp b/tools/files.cpp
index 5adcdae..b44e7fe 100644
--- a/tools/files.cpp
+++ b/tools/files.cpp
@@ -33,21 +33,21 @@ namespace po = boost::program_options;
 
 class Files : public Tool {
 public:
-    Files() : Tool( "files" ){
+    Files() : Tool( "files" ) {
         add_options()
-            ( "local,l", po::value<string>(), "local filename for put|get (default is to use the same name as 'gridfs filename')")
-            ( "type,t", po::value<string>(), "MIME type for put (default is to omit)")
-            ( "replace,r", "Remove other files with same name after PUT")
-            ;
+        ( "local,l", po::value<string>(), "local filename for put|get (default is to use the same name as 'gridfs filename')")
+        ( "type,t", po::value<string>(), "MIME type for put (default is to omit)")
+        ( "replace,r", "Remove other files with same name after PUT")
+        ;
         add_hidden_options()
-            ( "command" , po::value<string>() , "command (list|search|put|get)" )
-            ( "file" , po::value<string>() , "filename for get|put" )
-            ;
+        ( "command" , po::value<string>() , "command (list|search|put|get)" )
+        ( "file" , po::value<string>() , "filename for get|put" )
+        ;
         addPositionArg( "command" , 1 );
         addPositionArg( "file" , 2 );
     }
 
-    virtual void printExtraHelp( ostream & out ){
+    virtual void printExtraHelp( ostream & out ) {
         out << "usage: " << _name << " [options] command [gridfs filename]" << endl;
         out << "command:" << endl;
         out << "  one of (list|search|put|get)" << endl;
@@ -60,20 +60,20 @@ public:
         out << "  delete - delete all files with filename 'gridfs filename'" << endl;
     }
 
-    void display( GridFS * grid , BSONObj obj ){
+    void display( GridFS * grid , BSONObj obj ) {
         auto_ptr<DBClientCursor> c = grid->list( obj );
-        while ( c->more() ){
+        while ( c->more() ) {
             BSONObj obj = c->next();
             cout
-                << obj["filename"].str() << "\t"
-                << (long)obj["length"].number()
-                << endl;
+                    << obj["filename"].str() << "\t"
+                    << (long)obj["length"].number()
+                    << endl;
         }
     }
 
-    int run(){
+    int run() {
         string cmd = getParam( "command" );
-        if ( cmd.size() == 0 ){
+        if ( cmd.size() == 0 ) {
             cerr << "ERROR: need command" << endl << endl;
             printHelp(cout);
             return -1;
@@ -84,30 +84,30 @@ public:
 
         string filename = getParam( "file" );
 
-        if ( cmd == "list" ){
+        if ( cmd == "list" ) {
             BSONObjBuilder b;
             if ( filename.size() )
-                b.appendRegex( "filename" , ( (string)"^" + filename ).c_str() );
+                b.appendRegex( "filename" , ( (string)"^" + filename ) );
             display( &g , b.obj() );
             return 0;
         }
 
-        if ( filename.size() == 0 ){
+        if ( filename.size() == 0 ) {
             cerr << "ERROR: need a filename" << endl << endl;
             printHelp(cout);
             return -1;
         }
 
-        if ( cmd == "search" ){
+        if ( cmd == "search" ) {
             BSONObjBuilder b;
-            b.appendRegex( "filename" , filename.c_str() );
+            b.appendRegex( "filename" , filename );
             display( &g , b.obj() );
             return 0;
         }
 
-        if ( cmd == "get" ){
+        if ( cmd == "get" ) {
             GridFile f = g.findFile( filename );
-            if ( ! f.exists() ){
+            if ( ! f.exists() ) {
                 cerr << "ERROR: file not found" << endl;
                 return -2;
             }
@@ -121,16 +121,16 @@ public:
             return 0;
         }
 
-        if ( cmd == "put" ){
+        if ( cmd == "put" ) {
             const string& infile = getParam("local", filename);
             const string& type = getParam("type", "");
 
             BSONObj file = g.storeFile(infile, filename, type);
             cout << "added file: " << file << endl;
 
-            if (hasParam("replace")){
+            if (hasParam("replace")) {
                 auto_ptr<DBClientCursor> cursor = conn().query(_db+".fs.files", BSON("filename" << filename << "_id" << NE << file["_id"] ));
-                while (cursor->more()){
+                while (cursor->more()) {
                     BSONObj o = cursor->nextSafe();
                     conn().remove(_db+".fs.files", BSON("_id" << o["_id"]));
                     conn().remove(_db+".fs.chunks", BSON("_id" << o["_id"]));
@@ -144,7 +144,7 @@ public:
             return 0;
         }
 
-        if ( cmd == "delete" ){
+        if ( cmd == "delete" ) {
             g.removeFile(filename);
             conn().getLastError();
             cout << "done!" << endl;
diff --git a/tools/import.cpp b/tools/import.cpp
index 6335e59..6b59bdc 100644
--- a/tools/import.cpp
+++ b/tools/import.cpp
@@ -33,7 +33,7 @@ using namespace mongo;
 namespace po = boost::program_options;
 
 class Import : public Tool {
-    
+
     enum Type { JSON , CSV , TSV };
     Type _type;
 
@@ -44,36 +44,36 @@ class Import : public Tool {
     bool _doimport;
     bool _jsonArray;
     vector<string> _upsertFields;
-    
-    void _append( BSONObjBuilder& b , const string& fieldName , const string& data ){
+
+    void _append( BSONObjBuilder& b , const string& fieldName , const string& data ) {
         if ( b.appendAsNumber( fieldName , data ) )
             return;
-        
+
         if ( _ignoreBlanks && data.size() == 0 )
             return;
 
         // TODO: other types?
         b.append( fieldName , data );
     }
-    
-    BSONObj parseLine( char * line ){
+
+    BSONObj parseLine( char * line ) {
         uassert(13289, "Invalid UTF8 character detected", isValidUTF8(line));
 
-        if ( _type == JSON ){
+        if ( _type == JSON ) {
             char * end = ( line + strlen( line ) ) - 1;
-            while ( isspace(*end) ){
+            while ( isspace(*end) ) {
                 *end = 0;
                 end--;
             }
             return fromjson( line );
         }
-        
+
         BSONObjBuilder b;
 
         unsigned int pos=0;
-        while ( line[0] ){
+        while ( line[0] ) {
             string name;
-            if ( pos < _fields.size() ){
+            if ( pos < _fields.size() ) {
                 name = _fields[pos];
             }
             else {
@@ -82,76 +82,81 @@ class Import : public Tool {
                 name = ss.str();
             }
             pos++;
-            
+
             bool done = false;
             string data;
             char * end;
-            if ( _type == CSV && line[0] == '"' ){
+            if ( _type == CSV && line[0] == '"' ) {
                 line++; //skip first '"'
 
                 while (true) {
                     end = strchr( line , '"' );
-                    if (!end){
+                    if (!end) {
                         data += line;
                         done = true;
                         break;
-                    } else if (end[1] == '"') {
+                    }
+                    else if (end[1] == '"') {
                         // two '"'s get appended as one
                         data.append(line, end-line+1); //include '"'
                         line = end+2; //skip both '"'s
-                    } else if (end[-1] == '\\') {
+                    }
+                    else if (end[-1] == '\\') {
                         // "\\\"" gets appended as '"'
                         data.append(line, end-line-1); //exclude '\\'
                         data.append("\"");
                         line = end+1; //skip the '"'
-                    } else {
+                    }
+                    else {
                         data.append(line, end-line);
                         line = end+2; //skip '"' and ','
                         break;
                     }
                 }
-            } else {
+            }
+            else {
                 end = strstr( line , _sep );
-                if ( ! end ){
+                if ( ! end ) {
                     done = true;
                     data = string( line );
-                } else {
+                }
+                else {
                     data = string( line , end - line );
                     line = end+1;
                 }
             }
 
-            if ( _headerLine ){
+            if ( _headerLine ) {
                 while ( isspace( data[0] ) )
                     data = data.substr( 1 );
                 _fields.push_back( data );
             }
             else
                 _append( b , name , data );
-            
+
             if ( done )
                 break;
         }
         return b.obj();
     }
-    
+
 public:
-    Import() : Tool( "import" ){
+    Import() : Tool( "import" ) {
         addFieldOptions();
         add_options()
-            ("ignoreBlanks","if given, empty fields in csv and tsv will be ignored")
-            ("type",po::value<string>() , "type of file to import.  default: json (json,csv,tsv)")
-            ("file",po::value<string>() , "file to import from; if not specified stdin is used" )
-            ("drop", "drop collection first " )
-            ("headerline","CSV,TSV only - use first line as headers")
-            ("upsert", "insert or update objects that already exist" )
-            ("upsertFields", po::value<string>(), "comma-separated fields for the query part of the upsert. You should make sure this is indexed" )
-            ("stopOnError", "stop importing at first error rather than continuing" )
-            ("jsonArray", "load a json array, not one item per line. Currently limited to 4MB." )
-            ;
+        ("ignoreBlanks","if given, empty fields in csv and tsv will be ignored")
+        ("type",po::value<string>() , "type of file to import.  default: json (json,csv,tsv)")
+        ("file",po::value<string>() , "file to import from; if not specified stdin is used" )
+        ("drop", "drop collection first " )
+        ("headerline","CSV,TSV only - use first line as headers")
+        ("upsert", "insert or update objects that already exist" )
+        ("upsertFields", po::value<string>(), "comma-separated fields for the query part of the upsert. You should make sure this is indexed" )
+        ("stopOnError", "stop importing at first error rather than continuing" )
+        ("jsonArray", "load a json array, not one item per line. Currently limited to 4MB." )
+        ;
         add_hidden_options()
-            ("noimport", "don't actually import. useful for benchmarking parser" )
-            ;
+        ("noimport", "don't actually import. useful for benchmarking parser" )
+        ;
         addPositionArg( "file" , 1 );
         _type = JSON;
         _ignoreBlanks = false;
@@ -160,17 +165,17 @@ public:
         _doimport = true;
         _jsonArray = false;
     }
-    
-    int run(){
+
+    int run() {
         string filename = getParam( "file" );
-        long long fileSize = -1;
+        long long fileSize = 0;
 
         istream * in = &cin;
 
         ifstream file( filename.c_str() , ios_base::in);
 
-        if ( filename.size() > 0 && filename != "-" ){
-            if ( ! exists( filename ) ){
+        if ( filename.size() > 0 && filename != "-" ) {
+            if ( ! exists( filename ) ) {
                 cerr << "file doesn't exist: " << filename << endl;
                 return -1;
             }
@@ -178,52 +183,59 @@ public:
             fileSize = file_size( filename );
         }
 
+        // check if we're actually talking to a machine that can write
+        if (!isMaster()) {
+            return -1;
+        }
+
         string ns;
 
         try {
             ns = getNS();
-        } catch (...) {
+        }
+        catch (...) {
             printHelp(cerr);
             return -1;
         }
-        
+
         log(1) << "ns: " << ns << endl;
-        
+
         auth();
 
-        if ( hasParam( "drop" ) ){
+        if ( hasParam( "drop" ) ) {
             cout << "dropping: " << ns << endl;
             conn().dropCollection( ns.c_str() );
         }
 
-        if ( hasParam( "ignoreBlanks" ) ){
+        if ( hasParam( "ignoreBlanks" ) ) {
             _ignoreBlanks = true;
         }
 
-        if ( hasParam( "upsert" ) ){
+        if ( hasParam( "upsert" ) || hasParam( "upsertFields" )) {
             _upsert = true;
 
             string uf = getParam("upsertFields");
-            if (uf.empty()){
+            if (uf.empty()) {
                 _upsertFields.push_back("_id");
-            } else {
+            }
+            else {
                 StringSplitter(uf.c_str(), ",").split(_upsertFields);
             }
         }
 
-        if ( hasParam( "noimport" ) ){
+        if ( hasParam( "noimport" ) ) {
             _doimport = false;
         }
 
-        if ( hasParam( "type" ) ){
+        if ( hasParam( "type" ) ) {
             string type = getParam( "type" );
             if ( type == "json" )
                 _type = JSON;
-            else if ( type == "csv" ){
+            else if ( type == "csv" ) {
                 _type = CSV;
                 _sep = ",";
             }
-            else if ( type == "tsv" ){
+            else if ( type == "tsv" ) {
                 _type = TSV;
                 _sep = "\t";
             }
@@ -232,21 +244,21 @@ public:
                 return -1;
             }
         }
-        
-        if ( _type == CSV || _type == TSV ){
+
+        if ( _type == CSV || _type == TSV ) {
             _headerLine = hasParam( "headerline" );
             if ( ! _headerLine )
                 needFields();
         }
 
-        if (_type == JSON && hasParam("jsonArray")){
+        if (_type == JSON && hasParam("jsonArray")) {
             _jsonArray = true;
         }
 
         int errors = 0;
-        
+
         int num = 0;
-        
+
         time_t start = time(0);
 
         log(1) << "filesize: " << fileSize << endl;
@@ -254,37 +266,39 @@ public:
         const int BUF_SIZE = 1024 * 1024 * 4;
         boost::scoped_array<char> line(new char[BUF_SIZE+2]);
         char * buf = line.get();
-        while ( _jsonArray || in->rdstate() == 0 ){
-            if (_jsonArray){
-                if (buf == line.get()){ //first pass
+        while ( _jsonArray || in->rdstate() == 0 ) {
+            if (_jsonArray) {
+                if (buf == line.get()) { //first pass
                     in->read(buf, BUF_SIZE);
                     uassert(13295, "JSONArray file too large", (in->rdstate() & ios_base::eofbit));
                     buf[ in->gcount() ] = '\0';
                 }
-            } else {
+            }
+            else {
                 buf = line.get();
                 in->getline( buf , BUF_SIZE );
                 log(1) << "got line:" << buf << endl;
             }
             uassert( 10263 ,  "unknown error reading file" ,
-                    (!(in->rdstate() & ios_base::badbit)) &&
-                    (!(in->rdstate() & ios_base::failbit) || (in->rdstate() & ios_base::eofbit)) );
+                     (!(in->rdstate() & ios_base::badbit)) &&
+                     (!(in->rdstate() & ios_base::failbit) || (in->rdstate() & ios_base::eofbit)) );
 
             int len = 0;
-            if (strncmp("\xEF\xBB\xBF", buf, 3) == 0){ // UTF-8 BOM (notepad is stupid)
+            if (strncmp("\xEF\xBB\xBF", buf, 3) == 0) { // UTF-8 BOM (notepad is stupid)
                 buf += 3;
                 len += 3;
             }
 
-            if (_jsonArray){
+            if (_jsonArray) {
                 while (buf[0] != '{' && buf[0] != '\0') {
                     len++;
                     buf++;
                 }
                 if (buf[0] == '\0')
                     break;
-            } else {
-                while (isspace( buf[0] )){
+            }
+            else {
+                while (isspace( buf[0] )) {
                     len++;
                     buf++;
                 }
@@ -295,24 +309,26 @@ public:
 
             try {
                 BSONObj o;
-                if (_jsonArray){
+                if (_jsonArray) {
                     int jslen;
                     o = fromjson(buf, &jslen);
                     len += jslen;
                     buf += jslen;
-                } else {
+                }
+                else {
                     o = parseLine( buf );
                 }
 
-                if ( _headerLine ){
+                if ( _headerLine ) {
                     _headerLine = false;
-                } else if (_doimport) {
+                }
+                else if (_doimport) {
                     bool doUpsert = _upsert;
                     BSONObjBuilder b;
-                    if (_upsert){
-                        for (vector<string>::const_iterator it=_upsertFields.begin(), end=_upsertFields.end(); it!=end; ++it){
+                    if (_upsert) {
+                        for (vector<string>::const_iterator it=_upsertFields.begin(), end=_upsertFields.end(); it!=end; ++it) {
                             BSONElement e = o.getFieldDotted(it->c_str());
-                            if (e.eoo()){
+                            if (e.eoo()) {
                                 doUpsert = false;
                                 break;
                             }
@@ -320,25 +336,26 @@ public:
                         }
                     }
 
-                    if (doUpsert){
+                    if (doUpsert) {
                         conn().update(ns, Query(b.obj()), o, true);
-                    } else {
+                    }
+                    else {
                         conn().insert( ns.c_str() , o );
                     }
                 }
 
                 num++;
             }
-            catch ( std::exception& e ){
+            catch ( std::exception& e ) {
                 cout << "exception:" << e.what() << endl;
                 cout << buf << endl;
                 errors++;
-                
+
                 if (hasParam("stopOnError") || _jsonArray)
                     break;
             }
 
-            if ( pm.hit( len + 1 ) ){
+            if ( pm.hit( len + 1 ) ) {
                 cout << "\t\t\t" << num << "\t" << ( num / ( time(0) - start ) ) << "/second" << endl;
             }
         }
@@ -346,10 +363,10 @@ public:
         cout << "imported " << num << " objects" << endl;
 
         conn().getLastError();
-        
+
         if ( errors == 0 )
             return 0;
-        
+
         cerr << "encountered " << errors << " error" << ( errors == 1 ? "" : "s" ) << endl;
         return -1;
     }
diff --git a/tools/restore.cpp b/tools/restore.cpp
index 115297b..9a18c00 100644
--- a/tools/restore.cpp
+++ b/tools/restore.cpp
@@ -19,6 +19,7 @@
 #include "../pch.h"
 #include "../client/dbclient.h"
 #include "../util/mmap.h"
+#include "../util/version.h"
 #include "tool.h"
 
 #include <boost/program_options.hpp>
@@ -29,21 +30,26 @@ using namespace mongo;
 
 namespace po = boost::program_options;
 
+namespace {
+    const char* OPLOG_SENTINEL = "$oplog";  // compare by ptr not strcmp
+}
+
 class Restore : public BSONTool {
 public:
-    
+
     bool _drop;
-    bool _indexesLast;
-    const char * _curns;
+    string _curns;
+    string _curdb;
 
-    Restore() : BSONTool( "restore" ) , _drop(false){
+    Restore() : BSONTool( "restore" ) , _drop(false) {
         add_options()
-            ("drop" , "drop each collection before import" )
-            ("indexesLast" , "wait to add indexes (faster if data isn't inserted in index order)" )
-            ;
+        ("drop" , "drop each collection before import" )
+        ("oplogReplay" , "replay oplog for point-in-time restore")
+        ;
         add_hidden_options()
-            ("dir", po::value<string>()->default_value("dump"), "directory to restore from")
-            ;
+        ("dir", po::value<string>()->default_value("dump"), "directory to restore from")
+        ("indexesLast" , "wait to add indexes (now default)") // left in for backwards compatibility
+        ;
         addPositionArg("dir", 1);
     }
 
@@ -51,11 +57,44 @@ public:
         out << "usage: " << _name << " [options] [directory or filename to restore from]" << endl;
     }
 
-    virtual int doRun(){
+    virtual int doRun() {
         auth();
         path root = getParam("dir");
+
+        // check if we're actually talking to a machine that can write
+        if (!isMaster()) {
+            return -1;
+        }
+
         _drop = hasParam( "drop" );
-        _indexesLast = hasParam("indexesLast");
+
+        bool doOplog = hasParam( "oplogReplay" );
+        if (doOplog) {
+            // fail early if errors
+
+            if (_db != "") {
+                cout << "Can only replay oplog on full restore" << endl;
+                return -1;
+            }
+
+            if ( ! exists(root / "oplog.bson") ) {
+                cout << "No oplog file to replay. Make sure you run mongodump with --oplog." << endl;
+                return -1;
+            }
+
+
+            BSONObj out;
+            if (! conn().simpleCommand("admin", &out, "buildinfo")) {
+                cout << "buildinfo command failed: " << out["errmsg"].String() << endl;
+                return -1;
+            }
+
+            StringData version = out["version"].valuestr();
+            if (versionCmp(version, "1.7.4-pre-") < 0) {
+                cout << "Can only replay oplog to server version >= 1.7.4" << endl;
+                return -1;
+            }
+        }
 
         /* If _db is not "" then the user specified a db name to restore as.
          *
@@ -66,14 +105,25 @@ public:
          * given either a root directory that contains only a single
          * .bson file, or a single .bson file itself (a collection).
          */
-        drillDown(root, _db != "", _coll != "");
+        drillDown(root, _db != "", _coll != "", true);
         conn().getLastError();
+
+        if (doOplog) {
+            out() << "\t Replaying oplog" << endl;
+            _curns = OPLOG_SENTINEL;
+            processFile( root / "oplog.bson" );
+        }
+
         return EXIT_CLEAN;
     }
 
-    void drillDown( path root, bool use_db = false, bool use_coll = false ) {
+    void drillDown( path root, bool use_db, bool use_coll, bool top_level=false ) {
         log(2) << "drillDown: " << root.string() << endl;
 
+        // skip hidden files and directories
+        if (root.leaf()[0] == '.' && root.leaf() != ".")
+            return;
+
         if ( is_directory( root ) ) {
             directory_iterator end;
             directory_iterator i(root);
@@ -100,7 +150,11 @@ public:
                     }
                 }
 
-                if ( _indexesLast && p.leaf() == "system.indexes.bson" )
+                // don't insert oplog
+                if (top_level && !use_db && p.leaf() == "oplog.bson")
+                    continue;
+
+                if ( p.leaf() == "system.indexes.bson" )
                     indexes = p;
                 else
                     drillDown(p, use_db, use_coll);
@@ -120,7 +174,7 @@ public:
 
         log() << root.string() << endl;
 
-        if ( root.leaf() == "system.profile.bson" ){
+        if ( root.leaf() == "system.profile.bson" ) {
             log() << "\t skipping" << endl;
             return;
         }
@@ -128,23 +182,24 @@ public:
         string ns;
         if (use_db) {
             ns += _db;
-        } 
+        }
         else {
             string dir = root.branch_path().string();
             if ( dir.find( "/" ) == string::npos )
                 ns += dir;
             else
                 ns += dir.substr( dir.find_last_of( "/" ) + 1 );
-            
+
             if ( ns.size() == 0 )
                 ns = "test";
         }
-        
+
         assert( ns.size() );
 
         if (use_coll) {
             ns += "." + _coll;
-        } else {
+        }
+        else {
             string l = root.leaf();
             l = l.substr( 0 , l.find_last_of( "." ) );
             ns += "." + l;
@@ -152,20 +207,65 @@ public:
 
         out() << "\t going into namespace [" << ns << "]" << endl;
 
-        if ( _drop ){
+        if ( _drop ) {
             out() << "\t dropping" << endl;
             conn().dropCollection( ns );
         }
-        
+
         _curns = ns.c_str();
+        _curdb = NamespaceString(_curns).db;
         processFile( root );
     }
 
-    virtual void gotObject( const BSONObj& obj ){
-        conn().insert( _curns , obj );
+    virtual void gotObject( const BSONObj& obj ) {
+        if (_curns == OPLOG_SENTINEL) { // intentional ptr compare
+            if (obj["op"].valuestr()[0] == 'n') // skip no-ops
+                return;
+
+            string db = obj["ns"].valuestr();
+            db = db.substr(0, db.find('.'));
+
+            BSONObj cmd = BSON( "applyOps" << BSON_ARRAY( obj ) );
+            BSONObj out;
+            conn().runCommand(db, cmd, out);
+        }
+        else if ( endsWith( _curns.c_str() , ".system.indexes" )) {
+            /* Index construction is slightly special: when restoring
+               indexes, we must ensure that the ns attribute is
+               <dbname>.<indexname>, where <dbname> might be different
+               at restore time than what was dumped.  Also, we're
+               stricter about errors for indexes than for regular
+               data. */
+            BSONObjBuilder bo;
+            BSONObjIterator i(obj);
+            while ( i.more() ) {
+                BSONElement e = i.next();
+                if (strcmp(e.fieldName(), "ns") == 0) {
+                    NamespaceString n(e.String());
+                    string s = _curdb + "." + n.coll;
+                    bo.append("ns", s);
+                }
+                else {
+                    bo.append(e);
+                }
+            }
+            BSONObj o = bo.obj();
+            log(0) << o << endl;
+            conn().insert( _curns ,  o );
+            BSONObj err = conn().getLastErrorDetailed();
+            if ( ! ( err["err"].isNull() ) ) {
+                cerr << "Error creating index " << o["ns"].String();
+                cerr << ": " << err["code"].Int() << " " << err["err"].String() << endl;
+                cerr << "To resume index restoration, run " << _name << " on file" << _fileName << " manually." << endl;
+                abort();
+            }
+        }
+        else {
+            conn().insert( _curns , obj );
+        }
     }
 
-    
+
 };
 
 int main( int argc , char ** argv ) {
diff --git a/tools/sniffer.cpp b/tools/sniffer.cpp
index abc2dc0..0422f87 100644
--- a/tools/sniffer.cpp
+++ b/tools/sniffer.cpp
@@ -157,11 +157,11 @@ map< Connection, map< long long, long long > > mapCursor;
 
 void processMessage( Connection& c , Message& d );
 
-void got_packet(u_char *args, const struct pcap_pkthdr *header, const u_char *packet){
+void got_packet(u_char *args, const struct pcap_pkthdr *header, const u_char *packet) {
 
     const struct sniff_ip* ip = (struct sniff_ip*)(packet + captureHeaderSize);
     int size_ip = IP_HL(ip)*4;
-    if ( size_ip < 20 ){
+    if ( size_ip < 20 ) {
         cerr << "*** Invalid IP header length: " << size_ip << " bytes" << endl;
         return;
     }
@@ -170,13 +170,13 @@ void got_packet(u_char *args, const struct pcap_pkthdr *header, const u_char *pa
 
     const struct sniff_tcp* tcp = (struct sniff_tcp*)(packet + captureHeaderSize + size_ip);
     int size_tcp = TH_OFF(tcp)*4;
-    if (size_tcp < 20){
+    if (size_tcp < 20) {
         cerr << "*** Invalid TCP header length: " << size_tcp << " bytes" << endl;
         return;
     }
 
     if ( ! ( serverPorts.count( ntohs( tcp->th_sport ) ) ||
-             serverPorts.count( ntohs( tcp->th_dport ) ) ) ){
+             serverPorts.count( ntohs( tcp->th_dport ) ) ) ) {
         return;
     }
 
@@ -199,7 +199,8 @@ void got_packet(u_char *args, const struct pcap_pkthdr *header, const u_char *pa
         if ( expectedSeq[ c ] != ntohl( tcp->th_seq ) ) {
             cerr << "Warning: sequence # mismatch, there may be dropped packets" << endl;
         }
-    } else {
+    }
+    else {
         seen[ c ] = true;
     }
 
@@ -223,7 +224,8 @@ void got_packet(u_char *args, const struct pcap_pkthdr *header, const u_char *pa
             messageBuilder[ c ]->appendBuf( (void*)payload, size_payload );
             return;
         }
-    } else {
+    }
+    else {
         bytesRemainingInMessage[ c ] -= size_payload;
         messageBuilder[ c ]->appendBuf( (void*)payload, size_payload );
         if ( bytesRemainingInMessage[ c ] < 0 ) {
@@ -264,70 +266,71 @@ public:
     }
 };
 
-void processMessage( Connection& c , Message& m ){
+void processMessage( Connection& c , Message& m ) {
     AuditingDbMessage d(m);
-    
+
     if ( m.operation() == mongo::opReply )
         out() << " - " << (unsigned)m.header()->responseTo;
     out() << endl;
 
     try {
-        switch( m.operation() ){
-            case mongo::opReply:{
-                mongo::QueryResult* r = (mongo::QueryResult*)m.singleData();
-                out() << "\treply" << " n:" << r->nReturned << " cursorId: " << r->cursorId << endl;
-                if ( r->nReturned ){
-                    mongo::BSONObj o( r->data() , 0 );
-                    out() << "\t" << o << endl;
-                }
-                break;
-            }
-            case mongo::dbQuery:{
-                mongo::QueryMessage q(d);
-                out() << "\tquery: " << q.query << "  ntoreturn: " << q.ntoreturn << " ntoskip: " << q.ntoskip << endl;
-                break;
+        switch( m.operation() ) {
+        case mongo::opReply: {
+            mongo::QueryResult* r = (mongo::QueryResult*)m.singleData();
+            out() << "\treply" << " n:" << r->nReturned << " cursorId: " << r->cursorId << endl;
+            if ( r->nReturned ) {
+                mongo::BSONObj o( r->data() , 0 );
+                out() << "\t" << o << endl;
             }
-            case mongo::dbUpdate:{
-                int flags = d.pullInt();
-                BSONObj q = d.nextJsObj( "update" );
-                BSONObj o = d.nextJsObj( "update" );
-                out() << "\tupdate  flags:" << flags << " q:" << q << " o:" << o << endl;
-                break;
-            }
-            case mongo::dbInsert:{
-                out() << "\tinsert: " << d.nextJsObj( "insert" ) << endl;
-                while ( d.moreJSObjs() ) {
-                    out() << "\t\t" << d.nextJsObj( "insert" ) << endl;
-                }
-                break;
-            }
-            case mongo::dbGetMore:{
-                int nToReturn = d.pullInt();
-                long long cursorId = d.pullInt64();
-                out() << "\tgetMore nToReturn: " << nToReturn << " cursorId: " << cursorId << endl;
-                break;
-            }
-            case mongo::dbDelete:{
-                int flags = d.pullInt();
-                BSONObj q = d.nextJsObj( "delete" );
-                out() << "\tdelete flags: " << flags << " q: " << q << endl;
-                break;
-            }
-            case mongo::dbKillCursors:{
-                int *x = (int *) m.singleData()->_data;
-                x++; // reserved
-                int n = *x;
-                out() << "\tkillCursors n: " << n << endl;
-                break;
+            break;
+        }
+        case mongo::dbQuery: {
+            mongo::QueryMessage q(d);
+            out() << "\tquery: " << q.query << "  ntoreturn: " << q.ntoreturn << " ntoskip: " << q.ntoskip << endl;
+            break;
+        }
+        case mongo::dbUpdate: {
+            int flags = d.pullInt();
+            BSONObj q = d.nextJsObj( "update" );
+            BSONObj o = d.nextJsObj( "update" );
+            out() << "\tupdate  flags:" << flags << " q:" << q << " o:" << o << endl;
+            break;
+        }
+        case mongo::dbInsert: {
+            out() << "\tinsert: " << d.nextJsObj( "insert" ) << endl;
+            while ( d.moreJSObjs() ) {
+                out() << "\t\t" << d.nextJsObj( "insert" ) << endl;
             }
-            default:
-                cerr << "*** CANNOT HANDLE TYPE: " << m.operation() << endl;
+            break;
+        }
+        case mongo::dbGetMore: {
+            int nToReturn = d.pullInt();
+            long long cursorId = d.pullInt64();
+            out() << "\tgetMore nToReturn: " << nToReturn << " cursorId: " << cursorId << endl;
+            break;
+        }
+        case mongo::dbDelete: {
+            int flags = d.pullInt();
+            BSONObj q = d.nextJsObj( "delete" );
+            out() << "\tdelete flags: " << flags << " q: " << q << endl;
+            break;
         }
-    } catch ( ... ) {
+        case mongo::dbKillCursors: {
+            int *x = (int *) m.singleData()->_data;
+            x++; // reserved
+            int n = *x;
+            out() << "\tkillCursors n: " << n << endl;
+            break;
+        }
+        default:
+            cerr << "*** CANNOT HANDLE TYPE: " << m.operation() << endl;
+        }
+    }
+    catch ( ... ) {
         cerr << "Error parsing message for operation: " << m.operation() << endl;
     }
-    
-        
+
+
     if ( !forwardAddress.empty() ) {
         if ( m.operation() != mongo::opReply ) {
             boost::shared_ptr<DBClientConnection> conn = forwarder[ c ];
@@ -353,10 +356,12 @@ void processMessage( Connection& c , Message& m ){
                     }
                 }
                 lastCursor[ c ] = 0;
-            } else {
+            }
+            else {
                 conn->port().say( m );
             }
-        } else {
+        }
+        else {
             Connection r = c.reverse();
             long long myCursor = lastCursor[ r ];
             QueryResult *qr = (QueryResult *) m.singleData();
@@ -375,53 +380,55 @@ void processMessage( Connection& c , Message& m ){
     }
 }
 
-void processDiagLog( const char * file ){
+void processDiagLog( const char * file ) {
     Connection c;
     MemoryMappedFile f;
     long length;
-    
-    char * root = (char*)f.map( file , length , MemoryMappedFile::SEQUENTIAL );
+    unsigned long long L = 0;
+    char * root = (char*)f.map( file , L, MemoryMappedFile::SEQUENTIAL );
+    assert( L < 0x80000000 );
+    length = (long) L;
     assert( root );
     assert( length > 0 );
-    
+
     char * pos = root;
 
     long read = 0;
-    while ( read < length ){
+    while ( read < length ) {
         Message m(pos,false);
         int len = m.header()->len;
         DbMessage d(m);
         cout << len << " " << d.getns() << endl;
-        
+
         processMessage( c , m );
 
         read += len;
         pos += len;
     }
-    
+
     f.close();
 }
 
 void usage() {
     cout <<
-    "Usage: mongosniff [--help] [--forward host:port] [--source (NET <interface> | (FILE | DIAGLOG) <filename>)] [<port0> <port1> ... ]\n"
-    "--forward       Forward all parsed request messages to mongod instance at \n"
-    "                specified host:port\n"
-    "--source        Source of traffic to sniff, either a network interface or a\n"
-    "                file containing previously captured packets in pcap format,\n"
-    "                or a file containing output from mongod's --diaglog option.\n"
-    "                If no source is specified, mongosniff will attempt to sniff\n"
-    "                from one of the machine's network interfaces.\n"
-    "--objcheck      Log hex representation of invalid BSON objects and nothing\n"
-    "                else.  Spurious messages about invalid objects may result\n"
-    "                when there are dropped tcp packets.\n"
-    "<port0>...      These parameters are used to filter sniffing.  By default, \n"
-    "                only port 27017 is sniffed.\n"
-    "--help          Print this help message.\n"
-    << endl;
+         "Usage: mongosniff [--help] [--forward host:port] [--source (NET <interface> | (FILE | DIAGLOG) <filename>)] [<port0> <port1> ... ]\n"
+         "--forward       Forward all parsed request messages to mongod instance at \n"
+         "                specified host:port\n"
+         "--source        Source of traffic to sniff, either a network interface or a\n"
+         "                file containing previously captured packets in pcap format,\n"
+         "                or a file containing output from mongod's --diaglog option.\n"
+         "                If no source is specified, mongosniff will attempt to sniff\n"
+         "                from one of the machine's network interfaces.\n"
+         "--objcheck      Log hex representation of invalid BSON objects and nothing\n"
+         "                else.  Spurious messages about invalid objects may result\n"
+         "                when there are dropped tcp packets.\n"
+         "<port0>...      These parameters are used to filter sniffing.  By default, \n"
+         "                only port 27017 is sniffed.\n"
+         "--help          Print this help message.\n"
+         << endl;
 }
 
-int main(int argc, char **argv){
+int main(int argc, char **argv) {
 
     stringstream nullStream;
     nullStream.clear(ios::failbit);
@@ -433,7 +440,7 @@ int main(int argc, char **argv){
     struct bpf_program fp;
     bpf_u_int32 mask;
     bpf_u_int32 net;
-    
+
     bool source = false;
     bool replay = false;
     bool diaglog = false;
@@ -449,10 +456,10 @@ int main(int argc, char **argv){
             if ( arg == string( "--help" ) ) {
                 usage();
                 return 0;
-            } 
+            }
             else if ( arg == string( "--forward" ) ) {
                 forwardAddress = args[ ++i ];
-            } 
+            }
             else if ( arg == string( "--source" ) ) {
                 uassert( 10266 ,  "can't use --source twice" , source == false );
                 uassert( 10267 ,  "source needs more args" , args.size() > i + 2);
@@ -472,21 +479,22 @@ int main(int argc, char **argv){
                 serverPorts.insert( atoi( args[ i ] ) );
             }
         }
-    } catch ( ... ) {
+    }
+    catch ( ... ) {
         usage();
         return -1;
     }
 
     if ( !serverPorts.size() )
         serverPorts.insert( 27017 );
-    
-    if ( diaglog ){
+
+    if ( diaglog ) {
         processDiagLog( file );
         return 0;
     }
-    else if ( replay ){
+    else if ( replay ) {
         handle = pcap_open_offline(file, errbuf);
-        if ( ! handle ){
+        if ( ! handle ) {
             cerr << "error opening capture file!" << endl;
             return -1;
         }
@@ -500,18 +508,18 @@ int main(int argc, char **argv){
             }
             cout << "found device: " << dev << endl;
         }
-        if (pcap_lookupnet(dev, &net, &mask, errbuf) == -1){
+        if (pcap_lookupnet(dev, &net, &mask, errbuf) == -1) {
             cerr << "can't get netmask: " << errbuf << endl;
             return -1;
         }
         handle = pcap_open_live(dev, SNAP_LEN, 1, 1000, errbuf);
-        if ( ! handle ){
+        if ( ! handle ) {
             cerr << "error opening device: " << errbuf << endl;
             return -1;
         }
-    } 
+    }
 
-    switch ( pcap_datalink( handle ) ){
+    switch ( pcap_datalink( handle ) ) {
     case DLT_EN10MB:
         captureHeaderSize = 14;
         break;
diff --git a/tools/stat.cpp b/tools/stat.cpp
index d94cf8d..fa6be31 100644
--- a/tools/stat.cpp
+++ b/tools/stat.cpp
@@ -20,6 +20,7 @@
 #include "client/dbclient.h"
 #include "db/json.h"
 #include "../util/httpclient.h"
+#include "../util/text.h"
 
 #include "tool.h"
 
@@ -31,61 +32,65 @@
 namespace po = boost::program_options;
 
 namespace mongo {
-    
+
     class Stat : public Tool {
     public:
 
-        Stat() : Tool( "stat" , false , "admin" ){
+        Stat() : Tool( "stat" , REMOTE_SERVER , "admin" ) {
             _sleep = 1;
-            _rowNum = 0;
-            _showHeaders = true;
             _http = false;
+            _many = false;
 
             add_hidden_options()
-                ( "sleep" , po::value<int>() , "time to sleep between calls" )
-                ;
+            ( "sleep" , po::value<int>() , "time to sleep between calls" )
+            ;
             add_options()
-                ("noheaders", "don't output column names")
-                ("rowcount,n", po::value<int>()->default_value(0), "number of stats lines to print (0 for indefinite)")
-                ("http", "use http instead of raw db connection")
-                ;
+            ("noheaders", "don't output column names")
+            ("rowcount,n", po::value<int>()->default_value(0), "number of stats lines to print (0 for indefinite)")
+            ("http", "use http instead of raw db connection")
+            ("discover" , "discover nodes and display stats for all" )
+            ("all" , "all optional fields" )
+            ;
 
             addPositionArg( "sleep" , 1 );
 
             _autoreconnect = true;
         }
 
-        virtual void printExtraHelp( ostream & out ){
+        virtual void printExtraHelp( ostream & out ) {
             out << "usage: " << _name << " [options] [sleep time]" << endl;
             out << "sleep time: time to wait (in seconds) between calls" << endl;
         }
 
-        virtual void printExtraHelpAfter( ostream & out ){
+        virtual void printExtraHelpAfter( ostream & out ) {
             out << "\n";
             out << " Fields\n";
-            out << "   inserts/s \t- # of inserts per second\n";
-            out << "   query/s   \t- # of queries per second\n";
-            out << "   update/s  \t- # of updates per second\n";
-            out << "   delete/s  \t- # of deletes per second\n";
-            out << "   getmore/s \t- # of get mores (cursor batch) per second\n";
-            out << "   command/s \t- # of commands per second\n";
-            out << "   flushes/s \t- # of fsync flushes per second\n";
+            out << "   inserts \t- # of inserts per second\n";
+            out << "   query   \t- # of queries per second\n";
+            out << "   update  \t- # of updates per second\n";
+            out << "   delete  \t- # of deletes per second\n";
+            out << "   getmore \t- # of get mores (cursor batch) per second\n";
+            out << "   command \t- # of commands per second\n";
+            out << "   flushes \t- # of fsync flushes per second\n";
             out << "   mapped    \t- amount of data mmaped (total data size) megabytes\n";
             out << "   visze     \t- virtual size of process in megabytes\n";
             out << "   res       \t- resident size of process in megabytes\n";
-            out << "   faults/s  \t- # of pages faults/sec (linux only)\n";
+            out << "   faults  \t- # of pages faults per sec (linux only)\n";
             out << "   locked    \t- percent of time in global write lock\n";
             out << "   idx miss  \t- percent of btree page misses (sampled)\n";
-            out << "   q t|r|w   \t- ops waiting for lock from db.currentOp() (total|read|write)\n";
+            out << "   qr|qw     \t- queue lengths for clients waiting (read|write)\n";
+            out << "   ar|aw     \t- active clients (read|write)\n";
+            out << "   netIn     \t- network traffic in - bits\n";
+            out << "   netOut     \t- network traffic out - bits\n";
             out << "   conn      \t- number of open connections\n";
         }
 
-        
-        BSONObj stats(){
-            if ( _http ){
+
+        BSONObj stats() {
+            if ( _http ) {
                 HttpClient c;
                 HttpClient::Result r;
-                
+
                 string url;
                 {
                     stringstream ss;
@@ -96,36 +101,36 @@ namespace mongo {
                     url = ss.str();
                 }
 
-                if ( c.get( url , &r ) != 200 ){
+                if ( c.get( url , &r ) != 200 ) {
                     cout << "error (http): " << r.getEntireResponse() << endl;
                     return BSONObj();
                 }
-                
+
                 BSONObj x = fromjson( r.getBody() );
                 BSONElement e = x["serverStatus"];
-                if ( e.type() != Object ){
+                if ( e.type() != Object ) {
                     cout << "BROKEN: " << x << endl;
                     return BSONObj();
                 }
                 return e.embeddedObjectUserCheck();
             }
             BSONObj out;
-            if ( ! conn().simpleCommand( _db , &out , "serverStatus" ) ){
+            if ( ! conn().simpleCommand( _db , &out , "serverStatus" ) ) {
                 cout << "error: " << out << endl;
                 return BSONObj();
             }
             return out.getOwned();
         }
 
-        double diff( const string& name , const BSONObj& a , const BSONObj& b ){
+        double diff( const string& name , const BSONObj& a , const BSONObj& b ) {
             BSONElement x = a.getFieldDotted( name.c_str() );
             BSONElement y = b.getFieldDotted( name.c_str() );
             if ( ! x.isNumber() || ! y.isNumber() )
                 return -1;
             return ( y.number() - x.number() ) / _sleep;
         }
-        
-        double percent( const char * outof , const char * val , const BSONObj& a , const BSONObj& b ){
+
+        double percent( const char * outof , const char * val , const BSONObj& a , const BSONObj& b ) {
             double x = ( b.getFieldDotted( val ).number() - a.getFieldDotted( val ).number() );
             double y = ( b.getFieldDotted( outof ).number() - a.getFieldDotted( outof ).number() );
             if ( y == 0 )
@@ -135,151 +140,605 @@ namespace mongo {
             return p;
         }
 
-        void cellstart( stringstream& ss , string name , unsigned& width ){
-            if ( ! _showHeaders ) {
-                return;
-            }
+        template<typename T>
+        void _append( BSONObjBuilder& result , const string& name , unsigned width , const T& t ) {
             if ( name.size() > width )
                 width = name.size();
-            if ( _rowNum % 20 == 0 )
-                cout << setw(width) << name << " ";            
+            result.append( name , BSON( "width" << (int)width << "data" << t ) );
         }
 
-        void cell( stringstream& ss , string name , unsigned width , double val ){
-            cellstart( ss , name , width );
-            ss << setw(width) << setprecision(3) << val << " ";
-        }
+        void _appendMem( BSONObjBuilder& result , const string& name , unsigned width , double sz ) {
+            string unit = "m";
+            if ( sz > 1024 ) {
+                unit = "g";
+                sz /= 1024;
+            }
+
+            if ( sz >= 1000 ) {
+                string s = str::stream() << (int)sz << unit;
+                _append( result , name , width , s );
+                return;
+            }
 
-        void cell( stringstream& ss , string name , unsigned width , int val ){
-            cellstart( ss , name , width );
-            ss << setw(width) << val << " ";
+            stringstream ss;
+            ss << setprecision(3) << sz << unit;
+            _append( result , name , width , ss.str() );
         }
 
-        void cell( stringstream& ss , string name , unsigned width , const string& val ){
-            cellstart( ss , name , width );
-            ss << setw(width) << val << " ";
+        void _appendNet( BSONObjBuilder& result , const string& name , double diff ) {
+            // I think 1000 is correct for megabit, but I've seen conflicting things (ERH 11/2010)
+            const double div = 1000;
+
+            string unit = "b";
+
+            if ( diff >= div ) {
+                unit = "k";
+                diff /= div;
+            }
+
+            if ( diff >= div ) {
+                unit = "m";
+                diff /= div;
+            }
+
+            if ( diff >= div ) {
+                unit = "g";
+                diff /= div;
+            }
+
+            string out = str::stream() << (int)diff << unit;
+            _append( result , name , 6 , out );
         }
 
-        string doRow( const BSONObj& a , const BSONObj& b ){
-            stringstream ss;
+        /**
+         * BSON( <field> -> BSON( width : ### , data : XXX ) )
+         */
+        BSONObj doRow( const BSONObj& a , const BSONObj& b ) {
+            BSONObjBuilder result;
 
-            if ( b["opcounters"].type() == Object ){
+            if ( a["opcounters"].isABSONObj() && b["opcounters"].isABSONObj() ) {
                 BSONObj ax = a["opcounters"].embeddedObject();
                 BSONObj bx = b["opcounters"].embeddedObject();
+
+                BSONObj ar = a["opcountersRepl"].isABSONObj() ? a["opcountersRepl"].embeddedObject() : BSONObj();
+                BSONObj br = b["opcountersRepl"].isABSONObj() ? b["opcountersRepl"].embeddedObject() : BSONObj();
+
                 BSONObjIterator i( bx );
-                while ( i.more() ){
+                while ( i.more() ) {
                     BSONElement e = i.next();
-                    cell( ss , (string)(e.fieldName()) + "/s" , 6 , (int)diff( e.fieldName() , ax , bx ) );
+                    if ( ar.isEmpty() || br.isEmpty() ) {
+                        _append( result , e.fieldName() , 6 , (int)diff( e.fieldName() , ax , bx ) );
+                    }
+                    else {
+                        string f = e.fieldName();
+
+                        int m = (int)diff( f , ax , bx );
+                        int r = (int)diff( f , ar , br );
+
+                        string myout;
+
+                        if ( f == "command" ) {
+                            myout = str::stream() << m << "|" << r;
+                        }
+                        else if ( f == "getmore" ) {
+                            myout = str::stream() << m;
+                        }
+                        else if ( m && r ) {
+                            // this is weird...
+                            myout = str::stream() << m << "|" << r;
+                        }
+                        else if ( m ) {
+                            myout = str::stream() << m;
+                        }
+                        else if ( r ) {
+                            myout = str::stream() << "*" << r;
+                        }
+                        else {
+                            myout = "*0";
+                        }
+
+                        _append( result , f , 6 , myout );
+                    }
                 }
             }
 
-	    if ( b["backgroundFlushing"].type() == Object ){
+            if ( b["backgroundFlushing"].type() == Object ) {
                 BSONObj ax = a["backgroundFlushing"].embeddedObject();
                 BSONObj bx = b["backgroundFlushing"].embeddedObject();
-                BSONObjIterator i( bx );
-                cell( ss , "flushes/s" , 6 , (int)diff( "flushes" , ax , bx ) );
+                _append( result , "flushes" , 6 , (int)diff( "flushes" , ax , bx ) );
             }
 
-            if ( b.getFieldDotted("mem.supported").trueValue() ){
+            if ( b.getFieldDotted("mem.supported").trueValue() ) {
                 BSONObj bx = b["mem"].embeddedObject();
                 BSONObjIterator i( bx );
-                cell( ss , "mapped" , 6 , bx["mapped"].numberInt() );
-                cell( ss , "vsize" , 6 , bx["virtual"].numberInt() );
-                cell( ss , "res" , 6 , bx["resident"].numberInt() );
+                _appendMem( result , "mapped" , 6 , bx["mapped"].numberInt() );
+                _appendMem( result , "vsize" , 6 , bx["virtual"].numberInt() );
+                _appendMem( result , "res" , 6 , bx["resident"].numberInt() );
+
+                if ( _all )
+                    _appendMem( result , "non-mapped" , 6 , bx["virtual"].numberInt() - bx["mapped"].numberInt() );
             }
 
-            if ( b["extra_info"].type() == Object ){
+            if ( b["extra_info"].type() == Object ) {
                 BSONObj ax = a["extra_info"].embeddedObject();
                 BSONObj bx = b["extra_info"].embeddedObject();
                 if ( ax["page_faults"].type() || ax["page_faults"].type() )
-                    cell( ss , "faults/s" , 6 , (int)diff( "page_faults" , ax , bx ) );
+                    _append( result , "faults" , 6 , (int)diff( "page_faults" , ax , bx ) );
             }
-            
-            cell( ss , "locked %" , 8 , percent( "globalLock.totalTime" , "globalLock.lockTime" , a , b ) );
-            cell( ss , "idx miss %" , 8 , percent( "indexCounters.btree.accesses" , "indexCounters.btree.misses" , a , b ) );
 
-            if ( b.getFieldDotted( "globalLock.currentQueue" ).type() == Object ){
+            _append( result , "locked %" , 8 , percent( "globalLock.totalTime" , "globalLock.lockTime" , a , b ) );
+            _append( result , "idx miss %" , 8 , percent( "indexCounters.btree.accesses" , "indexCounters.btree.misses" , a , b ) );
+
+            if ( b.getFieldDotted( "globalLock.currentQueue" ).type() == Object ) {
                 int r = b.getFieldDotted( "globalLock.currentQueue.readers" ).numberInt();
                 int w = b.getFieldDotted( "globalLock.currentQueue.writers" ).numberInt();
                 stringstream temp;
-                temp << r+w << "|" << r << "|" << w;
-                cell( ss , "q t|r|w" , 10 , temp.str() );
+                temp << r << "|" << w;
+                _append( result , "qr|qw" , 9 , temp.str() );
+            }
+
+            if ( b.getFieldDotted( "globalLock.activeClients" ).type() == Object ) {
+                int r = b.getFieldDotted( "globalLock.activeClients.readers" ).numberInt();
+                int w = b.getFieldDotted( "globalLock.activeClients.writers" ).numberInt();
+                stringstream temp;
+                temp << r << "|" << w;
+                _append( result , "ar|aw" , 7 , temp.str() );
+            }
+
+            if ( a["network"].isABSONObj() && b["network"].isABSONObj() ) {
+                BSONObj ax = a["network"].embeddedObject();
+                BSONObj bx = b["network"].embeddedObject();
+                _appendNet( result , "netIn" , diff( "bytesIn" , ax , bx ) );
+                _appendNet( result , "netOut" , diff( "bytesOut" , ax , bx ) );
+            }
+
+            _append( result , "conn" , 5 , b.getFieldDotted( "connections.current" ).numberInt() );
+
+            if ( b["repl"].type() == Object ) {
+
+                BSONObj x = b["repl"].embeddedObject();
+                bool isReplSet = x["setName"].type() == String;
+
+                stringstream ss;
+
+                if ( isReplSet ) {
+                    string setName = x["setName"].String();
+                    _append( result , "set" , setName.size() , setName );
+                }
+
+                if ( x["ismaster"].trueValue() )
+                    ss << "M";
+                else if ( x["secondary"].trueValue() )
+                    ss << "SEC";
+                else if ( x["isreplicaset"].trueValue() )
+                    ss << "REC";
+                else if ( isReplSet )
+                    ss << "UNK";
+                else
+                    ss << "SLV";
+
+                _append( result , "repl" , 4 , ss.str() );
+
+            }
+            else if ( b["shardCursorType"].type() == Object ) {
+                // is a mongos
+                // TODO: should have a better check
+                _append( result , "repl" , 4 , "RTR" );
             }
-            cell( ss , "conn" , 5 , b.getFieldDotted( "connections.current" ).numberInt() );
 
             {
                 struct tm t;
                 time_t_to_Struct( time(0), &t , true );
                 stringstream temp;
-                temp << setfill('0') << setw(2) << t.tm_hour 
-                     << ":" 
+                temp << setfill('0') << setw(2) << t.tm_hour
+                     << ":"
                      << setfill('0') << setw(2) << t.tm_min
-                     << ":" 
+                     << ":"
                      << setfill('0') << setw(2) << t.tm_sec;
-                cell( ss , "time" , 10 , temp.str() );
+                _append( result , "time" , 10 , temp.str() );
             }
+            return result.obj();
+        }
 
-            if ( _showHeaders && _rowNum % 20 == 0 ){
-                // this is the newline after the header line
-                cout << endl;
+        virtual void preSetup() {
+            if ( hasParam( "http" ) ) {
+                _http = true;
+                _noconnection = true;
             }
-            _rowNum++;
 
-            return ss.str();
-        }
-        
-        virtual void preSetup(){
-            if ( hasParam( "http" ) ){
-                _http = true;
+            if ( hasParam( "host" ) &&
+                    getParam( "host" ).find( ',' ) != string::npos ) {
+                _noconnection = true;
+                _many = true;
+            }
+
+            if ( hasParam( "discover" ) ) {
                 _noconnection = true;
+                _many = true;
             }
         }
 
-        int run(){ 
+        int run() {
             _sleep = getParam( "sleep" , _sleep );
-            if ( hasParam( "noheaders" ) ) {
-                _showHeaders = false;
+            _all = hasParam( "all" );
+            if ( _many )
+                return runMany();
+            return runNormal();
+        }
+
+        static void printHeaders( const BSONObj& o ) {
+            BSONObjIterator i(o);
+            while ( i.more() ) {
+                BSONElement e = i.next();
+                BSONObj x = e.Obj();
+                cout << setw( x["width"].numberInt() ) << e.fieldName() << ' ';
             }
-            _rowCount = getParam( "rowcount" , 0 );
+            cout << endl;
+        }
+
+        static void printData( const BSONObj& o , const BSONObj& headers ) {
+
+            BSONObjIterator i(headers);
+            while ( i.more() ) {
+                BSONElement e = i.next();
+                BSONObj h = e.Obj();
+                int w = h["width"].numberInt();
+
+                BSONElement data;
+                {
+                    BSONElement temp = o[e.fieldName()];
+                    if ( temp.isABSONObj() )
+                        data = temp.Obj()["data"];
+                }
+
+                if ( data.type() == String )
+                    cout << setw(w) << data.String();
+                else if ( data.type() == NumberDouble )
+                    cout << setw(w) << setprecision(3) << data.number();
+                else if ( data.type() == NumberInt )
+                    cout << setw(w) << data.numberInt();
+                else if ( data.eoo() )
+                    cout << setw(w) << "";
+                else
+                    cout << setw(w) << "???";
+
+                cout << ' ';
+            }
+            cout << endl;
+        }
+
+        int runNormal() {
+            bool showHeaders = ! hasParam( "noheaders" );
+            int rowCount = getParam( "rowcount" , 0 );
+            int rowNum = 0;
 
             BSONObj prev = stats();
             if ( prev.isEmpty() )
                 return -1;
 
-            while ( _rowCount == 0 || _rowNum < _rowCount ){
+            while ( rowCount == 0 || rowNum < rowCount ) {
                 sleepsecs(_sleep);
                 BSONObj now;
                 try {
                     now = stats();
                 }
-                catch ( std::exception& e ){
+                catch ( std::exception& e ) {
                     cout << "can't get data: " << e.what() << endl;
                     continue;
                 }
 
                 if ( now.isEmpty() )
                     return -2;
-                
+
                 try {
-                    cout << doRow( prev , now ) << endl;
+
+                    BSONObj out = doRow( prev , now );
+
+                    if ( showHeaders && rowNum % 10 == 0 ) {
+                        printHeaders( out );
+                    }
+
+                    printData( out , out );
+
                 }
-                catch ( AssertionException& e ){
+                catch ( AssertionException& e ) {
                     cout << "\nerror: " << e.what() << "\n"
                          << now
                          << endl;
                 }
-                
+
                 prev = now;
+                rowNum++;
+            }
+            return 0;
+        }
+
+        struct ServerState {
+            ServerState() : lock( "Stat::ServerState" ) {}
+            string host;
+            scoped_ptr<boost::thread> thr;
+
+            mongo::mutex lock;
+
+            BSONObj prev;
+            BSONObj now;
+            time_t lastUpdate;
+            vector<BSONObj> shards;
+
+            string error;
+            bool mongos;
+        };
+
+        static void serverThread( shared_ptr<ServerState> state ) {
+            try {
+                DBClientConnection conn( true );
+                conn._logLevel = 1;
+                string errmsg;
+                if ( ! conn.connect( state->host , errmsg ) )
+                    state->error = errmsg;
+
+                long long cycleNumber = 0;
+
+                while ( ++cycleNumber ) {
+                    try {
+                        BSONObj out;
+                        if ( conn.simpleCommand( "admin" , &out , "serverStatus" ) ) {
+                            scoped_lock lk( state->lock );
+                            state->error = "";
+                            state->lastUpdate = time(0);
+                            state->prev = state->now;
+                            state->now = out.getOwned();
+                        }
+                        else {
+                            scoped_lock lk( state->lock );
+                            state->error = "serverStatus failed";
+                            state->lastUpdate = time(0);
+                        }
+
+                        if ( out["shardCursorType"].type() == Object ) {
+                            state->mongos = true;
+                            if ( cycleNumber % 10 == 1 ) {
+                                auto_ptr<DBClientCursor> c = conn.query( "config.shards" , BSONObj() );
+                                vector<BSONObj> shards;
+                                while ( c->more() ) {
+                                    shards.push_back( c->next().getOwned() );
+                                }
+                                scoped_lock lk( state->lock );
+                                state->shards = shards;
+                            }
+                        }
+                    }
+                    catch ( std::exception& e ) {
+                        scoped_lock lk( state->lock );
+                        state->error = e.what();
+                    }
+
+                    sleepsecs( 1 );
+                }
+
+
+            }
+            catch ( std::exception& e ) {
+                cout << "serverThread (" << state->host << ") fatal error : " << e.what() << endl;
+            }
+            catch ( ... ) {
+                cout << "serverThread (" << state->host << ") fatal error" << endl;
+            }
+        }
+
+        typedef map<string,shared_ptr<ServerState> >  StateMap;
+
+        bool _add( StateMap& threads , string host ) {
+            shared_ptr<ServerState>& state = threads[host];
+            if ( state )
+                return false;
+
+            state.reset( new ServerState() );
+            state->host = host;
+            state->thr.reset( new boost::thread( boost::bind( serverThread , state ) ) );
+            return true;
+        }
+
+        /**
+         * @param hosts [ "a.foo.com" , "b.foo.com" ]
+         */
+        bool _addAll( StateMap& threads , const BSONObj& hosts ) {
+            BSONObjIterator i( hosts );
+            bool added = false;
+            while ( i.more() ) {
+                bool me = _add( threads , i.next().String() );
+                added = added || me;
+            }
+            return added;
+        }
+
+        bool _discover( StateMap& threads , const string& host , const shared_ptr<ServerState>& ss ) {
+
+            BSONObj info = ss->now;
+
+            bool found = false;
+
+            if ( info["repl"].isABSONObj() ) {
+                BSONObj x = info["repl"].Obj();
+                if ( x["hosts"].isABSONObj() )
+                    if ( _addAll( threads , x["hosts"].Obj() ) )
+                        found = true;
+                if ( x["passives"].isABSONObj() )
+                    if ( _addAll( threads , x["passives"].Obj() ) )
+                        found = true;
+            }
+
+            if ( ss->mongos ) {
+                for ( unsigned i=0; i<ss->shards.size(); i++ ) {
+                    BSONObj x = ss->shards[i];
+
+                    string errmsg;
+                    ConnectionString cs = ConnectionString::parse( x["host"].String() , errmsg );
+                    if ( errmsg.size() ) {
+                        cerr << errmsg << endl;
+                        continue;
+                    }
+
+                    vector<HostAndPort> v = cs.getServers();
+                    for ( unsigned i=0; i<v.size(); i++ ) {
+                        if ( _add( threads , v[i].toString() ) )
+                            found = true;
+                    }
+                }
+            }
+
+            return found;
+        }
+
+        int runMany() {
+            StateMap threads;
+
+
+            {
+                string orig = getParam( "host" );
+                if ( orig == "" )
+                    orig = "localhost";
+                
+                if ( orig.find( ":" ) == string::npos ) {
+                    if ( hasParam( "port" ) )
+                        orig += ":" + _params["port"].as<string>();
+                    else 
+                        orig += ":27017";
+                }
+                
+                StringSplitter ss( orig.c_str() , "," );
+                while ( ss.more() ) {
+                    string host = ss.next();
+                    _add( threads , host );
+                }
             }
+
+            sleepsecs(1);
+
+            int row = 0;
+            bool discover = hasParam( "discover" );
+
+            while ( 1 ) {
+                sleepsecs( _sleep );
+
+                // collect data
+                vector<Row> rows;
+                for ( map<string,shared_ptr<ServerState> >::iterator i=threads.begin(); i!=threads.end(); ++i ) {
+                    scoped_lock lk( i->second->lock );
+
+                    if ( i->second->error.size() ) {
+                        rows.push_back( Row( i->first , i->second->error ) );
+                    }
+                    else if ( i->second->prev.isEmpty() || i->second->now.isEmpty() ) {
+                        rows.push_back( Row( i->first ) );
+                    }
+                    else {
+                        BSONObj out = doRow( i->second->prev , i->second->now );
+                        rows.push_back( Row( i->first , out ) );
+                    }
+
+                    if ( discover && ! i->second->now.isEmpty() ) {
+                        if ( _discover( threads , i->first , i->second ) )
+                            break;
+                    }
+                }
+
+                // compute some stats
+                unsigned longestHost = 0;
+                BSONObj biggest;
+                for ( unsigned i=0; i<rows.size(); i++ ) {
+                    if ( rows[i].host.size() > longestHost )
+                        longestHost = rows[i].host.size();
+                    if ( rows[i].data.nFields() > biggest.nFields() )
+                        biggest = rows[i].data;
+                }
+
+                {
+                    // check for any headers not in biggest
+
+                    // TODO: we put any new headers at end,
+                    //       ideally we would interleave
+
+                    set<string> seen;
+
+                    BSONObjBuilder b;
+
+                    {
+                        // iterate biggest
+                        BSONObjIterator i( biggest );
+                        while ( i.more() ) {
+                            BSONElement e = i.next();
+                            seen.insert( e.fieldName() );
+                            b.append( e );
+                        }
+                    }
+
+                    // now do the rest
+                    for ( unsigned j=0; j<rows.size(); j++ ) {
+                        BSONObjIterator i( rows[j].data );
+                        while ( i.more() ) {
+                            BSONElement e = i.next();
+                            if ( seen.count( e.fieldName() ) )
+                                continue;
+                            seen.insert( e.fieldName() );
+                            b.append( e );
+                        }
+
+                    }
+
+                    biggest = b.obj();
+
+                }
+
+                // display data
+
+                cout << endl;
+
+                //    header
+                if ( row++ % 5 == 0 && ! biggest.isEmpty() ) {
+                    cout << setw( longestHost ) << "" << "\t";
+                    printHeaders( biggest );
+                }
+
+                //    rows
+                for ( unsigned i=0; i<rows.size(); i++ ) {
+                    cout << setw( longestHost ) << rows[i].host << "\t";
+                    if ( rows[i].err.size() )
+                        cout << rows[i].err << endl;
+                    else if ( rows[i].data.isEmpty() )
+                        cout << "no data" << endl;
+                    else
+                        printData( rows[i].data , biggest );
+                }
+
+            }
+
             return 0;
         }
-        
 
         int _sleep;
-        int _rowNum;
-        int _rowCount;
-        bool _showHeaders;
         bool _http;
+        bool _many;
+        bool _all;
+
+        struct Row {
+            Row( string h , string e ) {
+                host = h;
+                err = e;
+            }
+
+            Row( string h ) {
+                host = h;
+            }
+
+            Row( string h , BSONObj d ) {
+                host = h;
+                data = d;
+            }
+            string host;
+            string err;
+            BSONObj data;
+        };
     };
 
 }
diff --git a/tools/tool.cpp b/tools/tool.cpp
index dbb3de1..f687269 100644
--- a/tools/tool.cpp
+++ b/tools/tool.cpp
@@ -35,31 +35,41 @@ namespace mongo {
 
     CmdLine cmdLine;
 
-    Tool::Tool( string name , bool localDBAllowed , string defaultDB , 
+    Tool::Tool( string name , DBAccess access , string defaultDB ,
                 string defaultCollection , bool usesstdout ) :
-        _name( name ) , _db( defaultDB ) , _coll( defaultCollection ) , 
-        _usesstdout(usesstdout), _noconnection(false), _autoreconnect(false), _conn(0), _paired(false) {
-        
+        _name( name ) , _db( defaultDB ) , _coll( defaultCollection ) ,
+        _usesstdout(usesstdout), _noconnection(false), _autoreconnect(false), _conn(0), _slaveConn(0), _paired(false) {
+
         _options = new po::options_description( "options" );
         _options->add_options()
-            ("help","produce help message")
-            ("verbose,v", "be more verbose (include multiple times for more verbosity e.g. -vvvvv)")
-            ("host,h",po::value<string>(), "mongo host to connect to (\"left,right\" for pairs)" )
+        ("help","produce help message")
+        ("verbose,v", "be more verbose (include multiple times for more verbosity e.g. -vvvvv)")
+        ;
+
+        if ( access & REMOTE_SERVER )
+            _options->add_options()
+            ("host,h",po::value<string>(), "mongo host to connect to ( <set name>/s1,s2 for sets)" )
             ("port",po::value<string>(), "server port. Can also use --host hostname:port" )
-            ("db,d",po::value<string>(), "database to use" )
-            ("collection,c",po::value<string>(), "collection to use (some commands)" )
+            ("ipv6", "enable IPv6 support (disabled by default)")
+
             ("username,u",po::value<string>(), "username" )
             ("password,p", new PasswordValue( &_password ), "password" )
-            ("ipv6", "enable IPv6 support (disabled by default)")
             ;
-        if ( localDBAllowed )
+
+        if ( access & LOCAL_SERVER )
             _options->add_options()
-                ("dbpath",po::value<string>(), "directly access mongod database "
-                 "files in the given path, instead of connecting to a mongod  "
-                 "server - needs to lock the data directory, so cannot be "
-                 "used if a mongod is currently accessing the same path" )
-                ("directoryperdb", "if dbpath specified, each db is in a separate directory" )
-                ;
+            ("dbpath",po::value<string>(), "directly access mongod database "
+             "files in the given path, instead of connecting to a mongod  "
+             "server - needs to lock the data directory, so cannot be "
+             "used if a mongod is currently accessing the same path" )
+            ("directoryperdb", "if dbpath specified, each db is in a separate directory" )
+            ;
+
+        if ( access & SPECIFY_DBCOL )
+            _options->add_options()
+            ("db,d",po::value<string>(), "database to use" )
+            ("collection,c",po::value<string>(), "collection to use (some commands)" )
+            ;
 
         _hidden_options = new po::options_description( name + " hidden options" );
 
@@ -69,7 +79,7 @@ namespace mongo {
         }
     }
 
-    Tool::~Tool(){
+    Tool::~Tool() {
         delete( _options );
         delete( _hidden_options );
         if ( _conn )
@@ -82,12 +92,20 @@ namespace mongo {
         printExtraHelpAfter(out);
     }
 
-    int Tool::main( int argc , char ** argv ){
+    int Tool::main( int argc , char ** argv ) {
         static StaticObserver staticObserver;
-        
+
         cmdLine.prealloc = false;
 
-        boost::filesystem::path::default_name_check( boost::filesystem::no_check );
+        // The default value may vary depending on compile options, but for tools
+        // we want durability to be disabled.
+        cmdLine.dur = false;
+
+#if( BOOST_VERSION >= 104500 )
+    boost::filesystem::path::default_name_check( boost::filesystem2::no_check );
+#else
+    boost::filesystem::path::default_name_check( boost::filesystem::no_check );
+#endif
 
         _name = argv[0];
 
@@ -106,23 +124,24 @@ namespace mongo {
                        style(command_line_style).run() , _params );
 
             po::notify( _params );
-        } catch (po::error &e) {
+        }
+        catch (po::error &e) {
             cerr << "ERROR: " << e.what() << endl << endl;
             printHelp(cerr);
             return EXIT_BADOPTIONS;
         }
 
         // hide password from ps output
-        for (int i=0; i < (argc-1); ++i){
-            if (!strcmp(argv[i], "-p") || !strcmp(argv[i], "--password")){
+        for (int i=0; i < (argc-1); ++i) {
+            if (!strcmp(argv[i], "-p") || !strcmp(argv[i], "--password")) {
                 char* arg = argv[i+1];
-                while (*arg){
+                while (*arg) {
                     *arg++ = 'x';
                 }
             }
         }
 
-        if ( _params.count( "help" ) ){
+        if ( _params.count( "help" ) ) {
             printHelp(cout);
             return 0;
         }
@@ -136,11 +155,11 @@ namespace mongo {
                 logLevel = s.length();
             }
         }
-        
+
         preSetup();
 
         bool useDirectClient = hasParam( "dbpath" );
-        
+
         if ( ! useDirectClient ) {
             _host = "127.0.0.1";
             if ( _params.count( "host" ) )
@@ -148,27 +167,28 @@ namespace mongo {
 
             if ( _params.count( "port" ) )
                 _host += ':' + _params["port"].as<string>();
-            
-            if ( _noconnection ){
+
+            if ( _noconnection ) {
                 // do nothing
             }
             else {
                 string errmsg;
 
                 ConnectionString cs = ConnectionString::parse( _host , errmsg );
-                if ( ! cs.isValid() ){
+                if ( ! cs.isValid() ) {
                     cerr << "invalid hostname [" << _host << "] " << errmsg << endl;
                     return -1;
                 }
-                
+
                 _conn = cs.connect( errmsg );
-                if ( ! _conn ){
+                if ( ! _conn ) {
                     cerr << "couldn't connect to [" << _host << "] " << errmsg << endl;
                     return -1;
                 }
+
+                (_usesstdout ? cout : cerr ) << "connected to: " << _host << endl;
             }
-            
-            (_usesstdout ? cout : cerr ) << "connected to: " << _host << endl;
+
         }
         else {
             if ( _params.count( "directoryperdb" ) ) {
@@ -183,15 +203,15 @@ namespace mongo {
             try {
                 acquirePathLock();
             }
-            catch ( DBException& ){
+            catch ( DBException& ) {
                 cerr << endl << "If you are running a mongod on the same "
-                    "path you should connect to that instead of direct data "
-                    "file access" << endl << endl;
+                     "path you should connect to that instead of direct data "
+                     "file access" << endl << endl;
                 dbexit( EXIT_CLEAN );
                 return -1;
             }
 
-            theFileAllocator().start();
+            FileAllocator::get()->start();
         }
 
         if ( _params.count( "db" ) )
@@ -204,7 +224,7 @@ namespace mongo {
             _username = _params["username"].as<string>();
 
         if ( _params.count( "password" )
-             && ( _password.empty() ) ) {
+                && ( _password.empty() ) ) {
             _password = askPassword();
         }
 
@@ -215,11 +235,11 @@ namespace mongo {
         try {
             ret = run();
         }
-        catch ( DBException& e ){
+        catch ( DBException& e ) {
             cerr << "assertion: " << e.toString() << endl;
             ret = -1;
         }
-    
+
         if ( currentClient.get() )
             currentClient->shutdown();
 
@@ -228,40 +248,60 @@ namespace mongo {
         return ret;
     }
 
-    DBClientBase& Tool::conn( bool slaveIfPaired ){
-        // TODO: _paired is deprecated
-        if ( slaveIfPaired && _conn->type() == ConnectionString::SET )
-            return ((DBClientReplicaSet*)_conn)->slaveConn();
+    DBClientBase& Tool::conn( bool slaveIfPaired ) {
+        if ( slaveIfPaired && _conn->type() == ConnectionString::SET ) {
+            if (!_slaveConn)
+                _slaveConn = &((DBClientReplicaSet*)_conn)->slaveConn();
+            return *_slaveConn;
+        }
         return *_conn;
     }
 
-    void Tool::addFieldOptions(){
+    bool Tool::isMaster() {
+        if ( hasParam("dbpath") ) {
+            return true;
+        }
+
+        BSONObj info;
+        bool isMaster;
+        bool ok = conn().isMaster(isMaster, &info);
+
+        if (ok && !isMaster) {
+            cerr << "ERROR: trying to write to non-master " << conn().toString() << endl;
+            cerr << "isMaster info: " << info << endl;
+            return false;
+        }
+
+        return true;
+    }
+
+    void Tool::addFieldOptions() {
         add_options()
-            ("fields,f" , po::value<string>() , "comma separated list of field names e.g. -f name,age" )
-            ("fieldFile" , po::value<string>() , "file with fields names - 1 per line" )
-            ;
+        ("fields,f" , po::value<string>() , "comma separated list of field names e.g. -f name,age" )
+        ("fieldFile" , po::value<string>() , "file with fields names - 1 per line" )
+        ;
     }
 
-    void Tool::needFields(){
+    void Tool::needFields() {
 
-        if ( hasParam( "fields" ) ){
+        if ( hasParam( "fields" ) ) {
             BSONObjBuilder b;
-        
+
             string fields_arg = getParam("fields");
             pcrecpp::StringPiece input(fields_arg);
-        
+
             string f;
             pcrecpp::RE re("([#\\w\\.\\s\\-]+),?" );
-            while ( re.Consume( &input, &f ) ){
+            while ( re.Consume( &input, &f ) ) {
                 _fields.push_back( f );
                 b.append( f , 1 );
             }
-        
+
             _fieldsObj = b.obj();
             return;
         }
 
-        if ( hasParam( "fieldFile" ) ){
+        if ( hasParam( "fieldFile" ) ) {
             string fn = getParam( "fieldFile" );
             if ( ! exists( fn ) )
                 throw UserException( 9999 , ((string)"file: " + fn ) + " doesn't exist" );
@@ -271,7 +311,7 @@ namespace mongo {
             ifstream file( fn.c_str() );
 
             BSONObjBuilder b;
-            while ( file.rdstate() == ios_base::goodbit ){
+            while ( file.rdstate() == ios_base::goodbit ) {
                 file.getline( line , BUF_SIZE );
                 const char * cur = line;
                 while ( isspace( cur[0] ) ) cur++;
@@ -288,7 +328,7 @@ namespace mongo {
         throw UserException( 9998 , "you need to specify fields" );
     }
 
-    void Tool::auth( string dbname ){
+    void Tool::auth( string dbname ) {
         if ( ! dbname.size() )
             dbname = _db;
 
@@ -307,39 +347,39 @@ namespace mongo {
         throw UserException( 9997 , (string)"auth failed: " + errmsg );
     }
 
-    BSONTool::BSONTool( const char * name , bool objcheck ) 
-        : Tool( name , true , "" , "" ) , _objcheck( objcheck ){
-        
+    BSONTool::BSONTool( const char * name, DBAccess access , bool objcheck )
+        : Tool( name , access , "" , "" ) , _objcheck( objcheck ) {
+
         add_options()
-            ("objcheck" , "validate object before inserting" )
-            ("filter" , po::value<string>() , "filter to apply before inserting" )
-            ;
+        ("objcheck" , "validate object before inserting" )
+        ("filter" , po::value<string>() , "filter to apply before inserting" )
+        ;
     }
 
 
-    int BSONTool::run(){
+    int BSONTool::run() {
         _objcheck = hasParam( "objcheck" );
-        
+
         if ( hasParam( "filter" ) )
             _matcher.reset( new Matcher( fromjson( getParam( "filter" ) ) ) );
-        
+
         return doRun();
     }
 
-    long long BSONTool::processFile( const path& root ){
-        string fileString = root.string();
-        
-        long long fileLength = file_size( root );
+    long long BSONTool::processFile( const path& root ) {
+        _fileName = root.string();
+
+        unsigned long long fileLength = file_size( root );
 
         if ( fileLength == 0 ) {
-            out() << "file " << fileString << " empty, skipping" << endl;
+            out() << "file " << _fileName << " empty, skipping" << endl;
             return 0;
         }
 
 
-        FILE* file = fopen( fileString.c_str() , "rb" );
-        if ( ! file ){
-            log() << "error opening file: " << fileString << endl;
+        FILE* file = fopen( _fileName.c_str() , "rb" );
+        if ( ! file ) {
+            log() << "error opening file: " << _fileName << endl;
             return 0;
         }
 
@@ -349,11 +389,11 @@ namespace mongo {
 
         log(1) << "\t file size: " << fileLength << endl;
 
-        long long read = 0;
-        long long num = 0;
-        long long processed = 0;
+        unsigned long long read = 0;
+        unsigned long long num = 0;
+        unsigned long long processed = 0;
 
-        const int BUF_SIZE = 1024 * 1024 * 5;
+        const int BUF_SIZE = BSONObjMaxUserSize + ( 1024 * 1024 );
         boost::scoped_array<char> buf_holder(new char[BUF_SIZE]);
         char * buf = buf_holder.get();
 
@@ -362,7 +402,7 @@ namespace mongo {
         while ( read < fileLength ) {
             int readlen = fread(buf, 4, 1, file);
             int size = ((int*)buf)[0];
-            if ( size >= BUF_SIZE ){
+            if ( size >= BUF_SIZE ) {
                 cerr << "got an object of size: " << size << "  terminating..." << endl;
             }
             uassert( 10264 ,  "invalid object size" , size < BUF_SIZE );
@@ -370,24 +410,24 @@ namespace mongo {
             readlen = fread(buf+4, size-4, 1, file);
 
             BSONObj o( buf );
-            if ( _objcheck && ! o.valid() ){
+            if ( _objcheck && ! o.valid() ) {
                 cerr << "INVALID OBJECT - going try and pring out " << endl;
                 cerr << "size: " << size << endl;
                 BSONObjIterator i(o);
-                while ( i.more() ){
+                while ( i.more() ) {
                     BSONElement e = i.next();
                     try {
                         e.validate();
                     }
-                    catch ( ... ){
+                    catch ( ... ) {
                         cerr << "\t\t NEXT ONE IS INVALID" << endl;
                     }
                     cerr << "\t name : " << e.fieldName() << " " << e.type() << endl;
                     cerr << "\t " << e << endl;
                 }
             }
-            
-            if ( _matcher.get() == 0 || _matcher->matches( o ) ){
+
+            if ( _matcher.get() == 0 || _matcher->matches( o ) ) {
                 gotObject( o );
                 processed++;
             }
@@ -398,14 +438,16 @@ namespace mongo {
             m.hit( o.objsize() );
         }
 
+        fclose( file );
+
         uassert( 10265 ,  "counts don't match" , m.done() == fileLength );
         out() << "\t "  << m.hits() << " objects found" << endl;
         if ( _matcher.get() )
             out() << "\t "  << processed << " objects processed" << endl;
         return processed;
     }
-            
 
 
-    void setupSignals(){}
+
+    void setupSignals( bool inFork ) {}
 }
diff --git a/tools/tool.h b/tools/tool.h
index 900c02f..f6124b8 100644
--- a/tools/tool.h
+++ b/tools/tool.h
@@ -35,82 +35,94 @@ namespace mongo {
 
     class Tool {
     public:
-        Tool( string name , bool localDBAllowed=true, string defaultDB="test" , 
-              string defaultCollection="", bool usesstdout=true );
+        enum DBAccess {
+            NONE = 0 ,
+            REMOTE_SERVER = 1 << 1 ,
+            LOCAL_SERVER = 1 << 2 ,
+            SPECIFY_DBCOL = 1 << 3 ,
+            ALL = REMOTE_SERVER | LOCAL_SERVER | SPECIFY_DBCOL
+        };
+
+        Tool( string name , DBAccess access=ALL, string defaultDB="test" ,
+              string defaultCollection="", bool usesstdout=true);
         virtual ~Tool();
 
         int main( int argc , char ** argv );
 
-        boost::program_options::options_description_easy_init add_options(){
+        boost::program_options::options_description_easy_init add_options() {
             return _options->add_options();
         }
-        boost::program_options::options_description_easy_init add_hidden_options(){
+        boost::program_options::options_description_easy_init add_hidden_options() {
             return _hidden_options->add_options();
         }
-        void addPositionArg( const char * name , int pos ){
+        void addPositionArg( const char * name , int pos ) {
             _positonalOptions.add( name , pos );
         }
 
-        string getParam( string name , string def="" ){
+        string getParam( string name , string def="" ) {
             if ( _params.count( name ) )
                 return _params[name.c_str()].as<string>();
             return def;
         }
-        int getParam( string name , int def ){
+        int getParam( string name , int def ) {
             if ( _params.count( name ) )
                 return _params[name.c_str()].as<int>();
             return def;
         }
-        bool hasParam( string name ){
+        bool hasParam( string name ) {
             return _params.count( name );
         }
 
-        string getNS(){
-            if ( _coll.size() == 0 ){
+        string getNS() {
+            if ( _coll.size() == 0 ) {
                 cerr << "no collection specified!" << endl;
                 throw -1;
             }
             return _db + "." + _coll;
         }
-        
-        virtual void preSetup(){}
+
+        bool isMaster();
+
+        virtual void preSetup() {}
 
         virtual int run() = 0;
 
         virtual void printHelp(ostream &out);
 
-        virtual void printExtraHelp( ostream & out ){}
-        virtual void printExtraHelpAfter( ostream & out ){}
+        virtual void printExtraHelp( ostream & out ) {}
+        virtual void printExtraHelpAfter( ostream & out ) {}
 
     protected:
 
         mongo::DBClientBase &conn( bool slaveIfPaired = false );
         void auth( string db = "" );
-        
+
         string _name;
 
         string _db;
         string _coll;
+        string _fileName;
 
         string _username;
         string _password;
-        
+
         bool _usesstdout;
         bool _noconnection;
         bool _autoreconnect;
 
         void addFieldOptions();
         void needFields();
-        
+
         vector<string> _fields;
         BSONObj _fieldsObj;
 
-        
+
         string _host;
 
     protected:
 
         mongo::DBClientBase * _conn;
+        mongo::DBClientBase * _slaveConn;
         bool _paired;
 
         boost::program_options::options_description * _options;
@@ -124,17 +136,17 @@ namespace mongo {
     class BSONTool : public Tool {
         bool _objcheck;
         auto_ptr<Matcher> _matcher;
-        
+
     public:
-        BSONTool( const char * name , bool objcheck = false );
-        
+        BSONTool( const char * name , DBAccess access=ALL, bool objcheck = false );
+
         virtual int doRun() = 0;
         virtual void gotObject( const BSONObj& obj ) = 0;
-        
+
         virtual int run();
 
         long long processFile( const path& file );
-        
+
     };
 
 }
diff --git a/util/admin_access.h b/util/admin_access.h
new file mode 100644
index 0000000..bb882b2
--- /dev/null
+++ b/util/admin_access.h
@@ -0,0 +1,52 @@
+/** @file admin_access.h
+ */
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+namespace mongo {
+
+    /*
+     * An AdminAccess is an interface class used to determine if certain users have
+     * priviledges to a given resource.
+     *
+     */
+    class AdminAccess {
+    public:
+        virtual ~AdminAccess() { }
+
+        /** @return if there are any priviledge users. This should not
+         *          block for long and throw if can't get a lock if needed.
+         */
+        virtual bool haveAdminUsers() const = 0;
+
+        /** @return priviledged user with this name. This should not block
+         *          for long and throw if can't get a lock if needed
+         */
+        virtual BSONObj getAdminUser( const string& username ) const = 0;
+    };
+
+    class NoAdminAccess : public AdminAccess {
+    public:
+        virtual ~NoAdminAccess() { }
+
+        virtual bool haveAdminUsers() const { return false; }
+        virtual BSONObj getAdminUser( const string& username ) const { return BSONObj(); }
+    };
+
+}  // namespace mongo
diff --git a/util/alignedbuilder.cpp b/util/alignedbuilder.cpp
new file mode 100644
index 0000000..1734431
--- /dev/null
+++ b/util/alignedbuilder.cpp
@@ -0,0 +1,102 @@
+// @file alignedbuilder.cpp
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "alignedbuilder.h"
+
+namespace mongo {
+
+    AlignedBuilder::AlignedBuilder(unsigned initSize) {
+        _len = 0;
+        _malloc(initSize);
+        uassert(13584, "out of memory AlignedBuilder", _p._allocationAddress);
+    }
+
+    BOOST_STATIC_ASSERT(sizeof(void*) == sizeof(size_t));
+
+    void AlignedBuilder::mallocSelfAligned(unsigned sz) {
+        assert( sz == _p._size );
+        void *p = malloc(sz + Alignment - 1);
+        _p._allocationAddress = p;
+        size_t s = (size_t) p;
+        size_t sold = s;
+        s += Alignment - 1;
+        s = (s/Alignment)*Alignment;
+        assert( s >= sold ); // begining
+        assert( (s + sz) <= (sold + sz + Alignment - 1) ); //end
+        _p._data = (char *) s;
+    }
+
+    /* "slow"/infrequent portion of 'grow()'  */
+    void NOINLINE_DECL AlignedBuilder::growReallocate(unsigned oldLen) {
+        unsigned a = _p._size;
+        assert( a );
+        while( 1 ) {
+            a *= 2;
+            wassert( a <= 256*1024*1024 );
+            assert( a <= 512*1024*1024 );
+            if( _len < a )
+                break;
+        }
+        _realloc(a, oldLen);
+    }
+
+    void AlignedBuilder::_malloc(unsigned sz) {
+        _p._size = sz;
+#if defined(_WIN32)
+        void *p = VirtualAlloc(0, sz, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+        _p._allocationAddress = p;
+        _p._data = (char *) p;
+#elif defined(__linux__)
+        // in theory #ifdef _POSIX_VERSION should work, but it doesn't on OS X 10.4, and needs to be testeed on solaris.
+        // so for now, linux only for this.
+        void *p = 0;
+        int res = posix_memalign(&p, Alignment, sz);
+        massert(13524, "out of memory AlignedBuilder", res == 0);
+        _p._allocationAddress = p;
+        _p._data = (char *) p;
+#else
+        mallocSelfAligned(sz);
+        assert( ((size_t) _p._data) % Alignment == 0 );
+#endif
+    }
+
+    void AlignedBuilder::_realloc(unsigned newSize, unsigned oldLen) {
+        // posix_memalign alignment is not maintained on reallocs, so we can't use realloc().
+        AllocationInfo old = _p;
+        _malloc(newSize);
+        assert( oldLen <= _len );
+        memcpy(_p._data, old._data, oldLen);
+        _free(old._allocationAddress);
+    }
+
+    void AlignedBuilder::_free(void *p) {
+#if defined(_WIN32)
+        VirtualFree(p, 0, MEM_RELEASE);
+#else
+        free(p);
+#endif
+    }
+
+    void AlignedBuilder::kill() {
+        _free(_p._allocationAddress);
+        _p._allocationAddress = 0;
+        _p._data = 0;
+    }
+
+}
diff --git a/util/alignedbuilder.h b/util/alignedbuilder.h
new file mode 100644
index 0000000..452cec2
--- /dev/null
+++ b/util/alignedbuilder.h
@@ -0,0 +1,123 @@
+// @file alignedbuilder.h
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../bson/stringdata.h"
+
+namespace mongo {
+
+    /** a page-aligned BufBuilder. */
+    class AlignedBuilder {
+    public:
+        AlignedBuilder(unsigned init_size);
+        ~AlignedBuilder() { kill(); }
+
+        /** reset for a re-use. shrinks if > 128MB */
+        void reset() {
+            _len = 0;
+            const unsigned sizeCap = 128*1024*1024;
+            if (_p._size > sizeCap)
+                _realloc(sizeCap, _len);
+        }
+
+        /** note this may be deallocated (realloced) if you keep writing or reset(). */
+        const char* buf() const { return _p._data; }
+
+        /** leave room for some stuff later
+            @return offset in the buffer that was our current position
+        */
+        size_t skip(unsigned n) {
+            unsigned l = len();
+            grow(n);
+            return l;
+        }
+
+        char* atOfs(unsigned ofs) { return _p._data + ofs; }
+
+        void appendChar(char j) {
+            *((char*)grow(sizeof(char))) = j;
+        }
+        void appendNum(char j) {
+            *((char*)grow(sizeof(char))) = j;
+        }
+        void appendNum(short j) {
+            *((short*)grow(sizeof(short))) = j;
+        }
+        void appendNum(int j) {
+            *((int*)grow(sizeof(int))) = j;
+        }
+        void appendNum(unsigned j) {
+            *((unsigned*)grow(sizeof(unsigned))) = j;
+        }
+        void appendNum(bool j) {
+            *((bool*)grow(sizeof(bool))) = j;
+        }
+        void appendNum(double j) {
+            *((double*)grow(sizeof(double))) = j;
+        }
+        void appendNum(long long j) {
+            *((long long*)grow(sizeof(long long))) = j;
+        }
+        void appendNum(unsigned long long j) {
+            *((unsigned long long*)grow(sizeof(unsigned long long))) = j;
+        }
+
+        void appendBuf(const void *src, size_t len) { memcpy(grow((unsigned) len), src, len); }
+
+        template<class T>
+        void appendStruct(const T& s) { appendBuf(&s, sizeof(T)); }
+
+        void appendStr(const StringData &str , bool includeEOO = true ) {
+            const unsigned len = str.size() + ( includeEOO ? 1 : 0 );
+            assert( len < (unsigned) BSONObjMaxUserSize );
+            memcpy(grow(len), str.data(), len);
+        }
+
+        /** @return the in-use length */
+        unsigned len() const { return _len; }
+
+    private:
+        static const unsigned Alignment = 8192;
+
+        /** returns the pre-grow write position */
+        inline char* grow(unsigned by) {
+            unsigned oldlen = _len;
+            _len += by;
+            if ( _len > _p._size ) {
+                growReallocate(oldlen);
+            }
+            return _p._data + oldlen;
+        }
+
+        void growReallocate(unsigned oldLenInUse);
+        void kill();
+        void mallocSelfAligned(unsigned sz);
+        void _malloc(unsigned sz);
+        void _realloc(unsigned newSize, unsigned oldLenInUse);
+        void _free(void*);
+
+        struct AllocationInfo {
+            char *_data;
+            void *_allocationAddress;
+            unsigned _size;
+        } _p;
+        unsigned _len;  // bytes in use
+    };
+
+}
diff --git a/util/allocator.h b/util/allocator.h
index 2c07973..a642e7c 100644
--- a/util/allocator.h
+++ b/util/allocator.h
@@ -18,22 +18,22 @@
 #pragma once
 
 namespace mongo {
-    
+
     inline void * ourmalloc(size_t size) {
         void *x = malloc(size);
         if ( x == 0 ) dbexit( EXIT_OOM_MALLOC , "malloc fails");
         return x;
     }
-    
+
     inline void * ourrealloc(void *ptr, size_t size) {
         void *x = realloc(ptr, size);
         if ( x == 0 ) dbexit( EXIT_OOM_REALLOC , "realloc fails");
         return x;
     }
-    
+
 #define MONGO_malloc mongo::ourmalloc
 #define malloc MONGO_malloc
 #define MONGO_realloc mongo::ourrealloc
 #define realloc MONGO_realloc
-    
+
 } // namespace mongo
diff --git a/util/array.h b/util/array.h
index 8da06fe..bf705a4 100644
--- a/util/array.h
+++ b/util/array.h
@@ -22,50 +22,50 @@ namespace mongo {
     class FastArray {
     public:
         FastArray( int capacity=10000 )
-            : _capacity( capacity ) , _size(0) , _end(this,capacity){
+            : _capacity( capacity ) , _size(0) , _end(this,capacity) {
             _data = new T[capacity];
         }
 
-        ~FastArray(){
+        ~FastArray() {
             delete[] _data;
         }
-        
-        void clear(){
+
+        void clear() {
             _size = 0;
         }
-        
-        T& operator[]( int x ){
+
+        T& operator[]( int x ) {
             assert( x >= 0 && x < _capacity );
             return _data[x];
         }
-        
-        T& getNext(){
+
+        T& getNext() {
             return _data[_size++];
         }
-        
-        void push_back( const T& t ){
+
+        void push_back( const T& t ) {
             _data[_size++] = t;
         }
-        
-        void sort( int (*comp)(const void *, const void *) ){
+
+        void sort( int (*comp)(const void *, const void *) ) {
             qsort( _data , _size , sizeof(T) , comp );
         }
-        
-        int size(){
+
+        int size() {
             return _size;
         }
-        
-        bool hasSpace(){
+
+        bool hasSpace() {
             return _size < _capacity;
         }
         class iterator {
         public:
-            iterator(){
+            iterator() {
                 _it = 0;
                 _pos = 0;
             }
-            
-            iterator( FastArray * it , int pos=0 ){
+
+            iterator( FastArray * it , int pos=0 ) {
                 _it = it;
                 _pos = pos;
             }
@@ -78,14 +78,14 @@ namespace mongo {
                 return _pos != other._pos;
             }
 
-            void operator++(){
+            void operator++() {
                 _pos++;
             }
 
-            T& operator*(){
+            T& operator*() {
                 return _it->_data[_pos];
             }
-            
+
             string toString() const {
                 stringstream ss;
                 ss << _pos;
@@ -97,13 +97,13 @@ namespace mongo {
 
             friend class FastArray;
         };
-        
 
-        iterator begin(){
+
+        iterator begin() {
             return iterator(this);
         }
 
-        iterator end(){
+        iterator end() {
             _end._pos = _size;
             return _end;
         }
@@ -112,7 +112,7 @@ namespace mongo {
     private:
         int _capacity;
         int _size;
-        
+
         iterator _end;
 
         T * _data;
diff --git a/util/assert_util.cpp b/util/assert_util.cpp
index faa18cb..47be5e9 100644
--- a/util/assert_util.cpp
+++ b/util/assert_util.cpp
@@ -18,7 +18,7 @@
 #include "pch.h"
 #include "assert_util.h"
 #include "assert.h"
-#include "file.h"
+//#include "file.h"
 #include <cmath>
 using namespace std;
 
@@ -33,12 +33,12 @@ using namespace std;
 namespace mongo {
 
     AssertionCount assertionCount;
-    
+
     AssertionCount::AssertionCount()
-        : regular(0),warning(0),msg(0),user(0),rollovers(0){
+        : regular(0),warning(0),msg(0),user(0),rollovers(0) {
     }
 
-    void AssertionCount::rollover(){
+    void AssertionCount::rollover() {
         rollovers++;
         regular = 0;
         warning = 0;
@@ -46,7 +46,7 @@ namespace mongo {
         user = 0;
     }
 
-    void AssertionCount::condrollover( int newvalue ){
+    void AssertionCount::condrollover( int newvalue ) {
         static int max = (int)pow( 2.0 , 30 );
         if ( newvalue >= max )
             rollover();
@@ -57,22 +57,19 @@ namespace mongo {
             b.append( m , "unknown assertion" );
         else
             b.append( m , msg );
-        
+
         if ( code )
             b.append( c , code );
     }
 
-    
-	string getDbContext();
-	
-	Assertion lastAssert[4];
-	
-	/* "warning" assert -- safe to continue, so we don't throw exception. */
+
+    string getDbContext();
+
+    /* "warning" assert -- safe to continue, so we don't throw exception. */
     void wasserted(const char *msg, const char *file, unsigned line) {
         problem() << "Assertion failure " << msg << ' ' << file << ' ' << dec << line << endl;
         sayDbContext();
         raiseError(0,msg && *msg ? msg : "wassertion failure");
-        lastAssert[1].set(msg, getDbContext().c_str(), file, line);
         assertionCount.condrollover( ++assertionCount.warning );
     }
 
@@ -81,7 +78,6 @@ namespace mongo {
         problem() << "Assertion failure " << msg << ' ' << file << ' ' << dec << line << endl;
         sayDbContext();
         raiseError(0,msg && *msg ? msg : "assertion failure");
-        lastAssert[0].set(msg, getDbContext().c_str(), file, line);
         stringstream temp;
         temp << "assertion " << file << ":" << line;
         AssertionException e(temp.str(),0);
@@ -90,13 +86,11 @@ namespace mongo {
     }
 
     void uassert_nothrow(const char *msg) {
-        lastAssert[3].set(msg, getDbContext().c_str(), "", 0);
         raiseError(0,msg);
     }
 
     void uasserted(int msgid, const char *msg) {
         assertionCount.condrollover( ++assertionCount.user );
-        lastAssert[3].set(msg, getDbContext().c_str(), "", 0);
         raiseError(msgid,msg);
         throw UserException(msgid, msg);
     }
@@ -104,7 +98,6 @@ namespace mongo {
     void msgasserted(int msgid, const char *msg) {
         assertionCount.condrollover( ++assertionCount.warning );
         tlog() << "Assertion: " << msgid << ":" << msg << endl;
-        lastAssert[2].set(msg, getDbContext().c_str(), "", 0);
         raiseError(msgid,msg && *msg ? msg : "massert failure");
         breakpoint();
         printStackTrace();
@@ -114,40 +107,19 @@ namespace mongo {
     void msgassertedNoTrace(int msgid, const char *msg) {
         assertionCount.condrollover( ++assertionCount.warning );
         log() << "Assertion: " << msgid << ":" << msg << endl;
-        lastAssert[2].set(msg, getDbContext().c_str(), "", 0);
         raiseError(msgid,msg && *msg ? msg : "massert failure");
         throw MsgAssertionException(msgid, msg);
     }
 
-    void streamNotGood( int code , string msg , std::ios& myios ){
+    void streamNotGood( int code , string msg , std::ios& myios ) {
         stringstream ss;
         // errno might not work on all systems for streams
         // if it doesn't for a system should deal with here
         ss << msg << " stream invalid: " << errnoWithDescription();
         throw UserException( code , ss.str() );
     }
-    
-    mongo::mutex *Assertion::_mutex = new mongo::mutex("Assertion");
-
-    string Assertion::toString() {
-        if( _mutex == 0 )
-            return "";
-
-        scoped_lock lk(*_mutex);
-
-        if ( !isSet() )
-            return "";
 
-        stringstream ss;
-        ss << msg << '\n';
-        if ( *context )
-            ss << context << '\n';
-        if ( *file )
-            ss << file << ' ' << line << '\n';
-        return ss.str();
-    }	
-
-    string errnoWithPrefix( const char * prefix ){
+    string errnoWithPrefix( const char * prefix ) {
         stringstream ss;
         if ( prefix )
             ss << prefix << ": ";
@@ -155,23 +127,21 @@ namespace mongo {
         return ss.str();
     }
 
-
-    string demangleName( const type_info& typeinfo ){
+    string demangleName( const type_info& typeinfo ) {
 #ifdef _WIN32
         return typeinfo.name();
 #else
         int status;
-        
+
         char * niceName = abi::__cxa_demangle(typeinfo.name(), 0, 0, &status);
         if ( ! niceName )
             return typeinfo.name();
-        
+
         string s = niceName;
         free(niceName);
         return s;
 #endif
     }
 
-
 }
 
diff --git a/util/assert_util.h b/util/assert_util.h
index 018dc43..151e950 100644
--- a/util/assert_util.h
+++ b/util/assert_util.h
@@ -27,51 +27,6 @@ namespace mongo {
         StaleConfigInContextCode = 13388
     };
 
-	/* these are manipulated outside of mutexes, so be careful */
-    struct Assertion {
-        Assertion() {
-            msg[0] = msg[127] = 0;
-            context[0] = context[127] = 0;
-            file = "";
-            line = 0;
-            when = 0;
-        }
-    private:
-        static mongo::mutex *_mutex;
-        char msg[128];
-        char context[128];
-        const char *file;
-        unsigned line;
-        time_t when;
-    public:
-        void set(const char *m, const char *ctxt, const char *f, unsigned l) {
-            if( _mutex == 0 ) {
-                /* asserted during global variable initialization */
-                return;
-            }
-            scoped_lock lk(*_mutex);
-            strncpy(msg, m, 127);
-            strncpy(context, ctxt, 127);
-            file = f;
-            line = l;
-            when = time(0);
-        }
-        std::string toString();
-        bool isSet() {
-            return when != 0;
-        }
-    };
-
-    enum {
-        AssertRegular = 0,
-        AssertW = 1,
-        AssertMsg = 2,
-        AssertUser = 3
-    };
-
-    /* last assert of diff types: regular, wassert, msgassert, uassert: */
-    extern Assertion lastAssert[4];
-
     class AssertionCount {
     public:
         AssertionCount();
@@ -84,24 +39,20 @@ namespace mongo {
         int user;
         int rollovers;
     };
-    
+
     extern AssertionCount assertionCount;
-    
+
     struct ExceptionInfo {
-        ExceptionInfo() : msg(""),code(-1){}
+        ExceptionInfo() : msg(""),code(-1) {}
         ExceptionInfo( const char * m , int c )
-            : msg( m ) , code( c ){
+            : msg( m ) , code( c ) {
         }
         ExceptionInfo( const string& m , int c )
-            : msg( m ) , code( c ){
+            : msg( m ) , code( c ) {
         }
-
         void append( BSONObjBuilder& b , const char * m = "$err" , const char * c = "code" ) const ;
-        
         string toString() const { stringstream ss; ss << "exception: " << code << " " << msg; return ss.str(); }
-
         bool empty() const { return msg.empty(); }
-                
 
         string msg;
         int code;
@@ -109,69 +60,81 @@ namespace mongo {
 
     class DBException : public std::exception {
     public:
-        DBException( const ExceptionInfo& ei ) : _ei(ei){}
-        DBException( const char * msg , int code ) : _ei(msg,code){}
-        DBException( const string& msg , int code ) : _ei(msg,code){}
+        DBException( const ExceptionInfo& ei ) : _ei(ei) {}
+        DBException( const char * msg , int code ) : _ei(msg,code) {}
+        DBException( const string& msg , int code ) : _ei(msg,code) {}
         virtual ~DBException() throw() { }
-        
-        virtual const char* what() const throw(){ return _ei.msg.c_str(); }
+
+        virtual const char* what() const throw() { return _ei.msg.c_str(); }
         virtual int getCode() const { return _ei.code; }
-        
+
         virtual void appendPrefix( stringstream& ss ) const { }
-        
+
         virtual string toString() const {
             stringstream ss; ss << getCode() << " " << what(); return ss.str();
             return ss.str();
         }
-        
+
         const ExceptionInfo& getInfo() const { return _ei; }
 
     protected:
         ExceptionInfo _ei;
     };
-    
+
     class AssertionException : public DBException {
     public:
 
-        AssertionException( const ExceptionInfo& ei ) : DBException(ei){}
-        AssertionException( const char * msg , int code ) : DBException(msg,code){}
-        AssertionException( const string& msg , int code ) : DBException(msg,code){}
+        AssertionException( const ExceptionInfo& ei ) : DBException(ei) {}
+        AssertionException( const char * msg , int code ) : DBException(msg,code) {}
+        AssertionException( const string& msg , int code ) : DBException(msg,code) {}
 
         virtual ~AssertionException() throw() { }
-        
+
         virtual bool severe() { return true; }
         virtual bool isUserAssertion() { return false; }
 
         /* true if an interrupted exception - see KillCurrentOp */
-        bool interrupted() { 
+        bool interrupted() {
             return _ei.code == 11600 || _ei.code == 11601;
         }
     };
-    
+
     /* UserExceptions are valid errors that a user can cause, like out of disk space or duplicate key */
     class UserException : public AssertionException {
     public:
-        UserException(int c , const string& m) : AssertionException( m , c ){}
+        UserException(int c , const string& m) : AssertionException( m , c ) {}
 
         virtual bool severe() { return false; }
         virtual bool isUserAssertion() { return true; }
         virtual void appendPrefix( stringstream& ss ) const { ss << "userassert:"; }
     };
-    
+
     class MsgAssertionException : public AssertionException {
     public:
-        MsgAssertionException( const ExceptionInfo& ei ) : AssertionException( ei ){}
-        MsgAssertionException(int c, const string& m) : AssertionException( m , c ){}
+        MsgAssertionException( const ExceptionInfo& ei ) : AssertionException( ei ) {}
+        MsgAssertionException(int c, const string& m) : AssertionException( m , c ) {}
         virtual bool severe() { return false; }
         virtual void appendPrefix( stringstream& ss ) const { ss << "massert:"; }
     };
 
+
     void asserted(const char *msg, const char *file, unsigned line);
     void wasserted(const char *msg, const char *file, unsigned line);
+
+    /** a "user assertion".  throws UserAssertion.  logs.  typically used for errors that a user
+       could cause, such as dupliate key, disk full, etc.
+    */
     void uasserted(int msgid, const char *msg);
     inline void uasserted(int msgid , string msg) { uasserted(msgid, msg.c_str()); }
-    void uassert_nothrow(const char *msg); // reported via lasterror, but don't throw exception
+
+    /** reported via lasterror, but don't throw exception */
+    void uassert_nothrow(const char *msg);
+
+    /** msgassert and massert are for errors that are internal but have a well defined error text string.
+        a stack trace is logged.
+    */
     void msgassertedNoTrace(int msgid, const char *msg);
+    inline void msgassertedNoTrace(int msgid, const string& msg) { msgassertedNoTrace( msgid , msg.c_str() ); }
     void msgasserted(int msgid, const char *msg);
     inline void msgasserted(int msgid, string msg) { msgasserted(msgid, msg.c_str()); }
 
@@ -204,21 +167,21 @@ namespace mongo {
 #if defined(_DEBUG)
 # define MONGO_dassert assert
 #else
-# define MONGO_dassert(x) 
+# define MONGO_dassert(x)
 #endif
 #define dassert MONGO_dassert
 
     // some special ids that we want to duplicate
-    
+
     // > 10000 asserts
     // < 10000 UserException
-    
+
     enum { ASSERT_ID_DUPKEY = 11000 };
 
     /* throws a uassertion with an appropriate msg */
     void streamNotGood( int code , string msg , std::ios& myios );
 
-    inline void assertStreamGood(unsigned msgid, string msg, std::ios& myios) { 
+    inline void assertStreamGood(unsigned msgid, string msg, std::ios& myios) {
         if( !myios.good() ) streamNotGood(msgid, msg, myios);
     }
 
@@ -228,15 +191,15 @@ namespace mongo {
 
 #define BOOST_CHECK_EXCEPTION MONGO_BOOST_CHECK_EXCEPTION
 #define MONGO_BOOST_CHECK_EXCEPTION( expression ) \
-	try { \
-		expression; \
-	} catch ( const std::exception &e ) { \
+    try { \
+        expression; \
+    } catch ( const std::exception &e ) { \
         stringstream ss; \
-		ss << "caught boost exception: " << e.what();   \
+        ss << "caught boost exception: " << e.what();   \
         msgasserted( 13294 , ss.str() );        \
-	} catch ( ... ) { \
-		massert( 10437 ,  "unknown boost failed" , false );   \
-	}
+    } catch ( ... ) { \
+        massert( 10437 ,  "unknown boost failed" , false );   \
+    }
 
 #define DESTRUCTOR_GUARD MONGO_DESTRUCTOR_GUARD
 #define MONGO_DESTRUCTOR_GUARD( expression ) \
diff --git a/util/background.cpp b/util/background.cpp
index ec5483c..746d14c 100644
--- a/util/background.cpp
+++ b/util/background.cpp
@@ -1,4 +1,4 @@
-//background.cpp
+// @file background.cpp
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -16,103 +16,105 @@
  */
 
 #include "pch.h"
-#include "goodies.h"
+
+#include "concurrency/mutex.h"
+
 #include "background.h"
-#include <list>
+
+#include "mongoutils/str.h"
 
 namespace mongo {
 
-    BackgroundJob *BackgroundJob::grab = 0;
-    mongo::mutex BackgroundJob::mutex("BackgroundJob");
+    // both the BackgroundJob and the internal thread point to JobStatus
+    struct BackgroundJob::JobStatus {
+        JobStatus( bool delFlag )
+            : deleteSelf(delFlag), m("backgroundJob"), state(NotStarted) { }
 
-    /* static */
-    void BackgroundJob::thr() {
-        assert( grab );
-        BackgroundJob *us = grab;
-        assert( us->state == NotStarted );
-        us->state = Running;
-        grab = 0;
+        const bool deleteSelf;
+
+        mongo::mutex m;  // protects state below
+        boost::condition finished; // means _state == Done
+        State state;
+    };
+
+    BackgroundJob::BackgroundJob( bool selfDelete ) {
+        _status.reset( new JobStatus( selfDelete ) );
+    }
 
+    // Background object can be only be destroyed after jobBody() ran
+    void BackgroundJob::jobBody( boost::shared_ptr<JobStatus> status ) {
+        LOG(1) << "BackgroundJob starting: " << name() << endl;
         {
-            string nm = us->name();
-            setThreadName(nm.c_str());
+            scoped_lock l( status->m );
+            massert( 13643 , mongoutils::str::stream() << "backgroundjob already started: " << name() , status->state == NotStarted );
+            status->state = Running;
         }
 
+        const string threadName = name();
+        if( ! threadName.empty() )
+            setThreadName( threadName.c_str() );
+
         try {
-            us->run();
+            run();
         }
-        catch ( std::exception& e ){
-            log( LL_ERROR ) << "backgroundjob error: " << e.what() << endl;
+        catch ( std::exception& e ) {
+            log( LL_ERROR ) << "backgroundjob " << name() << "error: " << e.what() << endl;
         }
         catch(...) {
-            log( LL_ERROR ) << "uncaught exception in BackgroundJob" << endl;
+            log( LL_ERROR ) << "uncaught exception in BackgroundJob " << name() << endl;
+        }
+
+        {
+            scoped_lock l( status->m );
+            status->state = Done;
+            status->finished.notify_all();
         }
-        us->state = Done;
-        bool delSelf = us->deleteSelf;
-        us->ending();
-        if( delSelf ) 
-            delete us;
+
+        if( status->deleteSelf )
+            delete this;
     }
 
     BackgroundJob& BackgroundJob::go() {
-        scoped_lock bl(mutex);
-        assert( grab == 0 );
-        grab = this;
-        boost::thread t(thr);
-        while ( grab )
-            sleepmillis(2);
+        boost::thread t( boost::bind( &BackgroundJob::jobBody , this, _status ) );
         return *this;
     }
 
-    bool BackgroundJob::wait(int msMax, unsigned maxsleep) {
-        unsigned ms = 1;
-        Date_t start = jsTime();
-        while ( state != Done ) {
-            sleepmillis(ms);
-            if( ms*2<maxsleep ) ms*=2;
-            if ( msMax && ( int( jsTime() - start ) > msMax) )
-                return false;
+    bool BackgroundJob::wait( unsigned msTimeOut ) {
+        scoped_lock l( _status->m );
+        while ( _status->state != Done ) {
+            if ( msTimeOut ) {
+                // add msTimeOut millisecond to current time
+                boost::xtime xt;
+                boost::xtime_get( &xt, boost::TIME_UTC );
+
+                unsigned long long ns = msTimeOut * 1000000ULL; // milli to nano
+                if ( xt.nsec + ns < 1000000000 ) {
+                    xt.nsec = (xtime::xtime_nsec_t) (xt.nsec + ns);
+                }
+                else {
+                    xt.sec += 1 + ns / 1000000000;
+                    xt.nsec = ( ns + xt.nsec ) % 1000000000;
+                }
+
+                if ( ! _status->finished.timed_wait( l.boost() , xt ) )
+                    return false;
+
+            }
+            else {
+                _status->finished.wait( l.boost() );
+            }
         }
         return true;
     }
 
-    void BackgroundJob::go(list<BackgroundJob*>& L) {
-        for( list<BackgroundJob*>::iterator i = L.begin(); i != L.end(); i++ )
-            (*i)->go();
+    BackgroundJob::State BackgroundJob::getState() const {
+        scoped_lock l( _status->m);
+        return _status->state;
     }
 
-    /* wait for several jobs to finish. */
-    void BackgroundJob::wait(list<BackgroundJob*>& L, unsigned maxsleep) {
-        unsigned ms = 1;
-        {
-            x:
-            sleepmillis(ms);
-            if( ms*2<maxsleep ) ms*=2;
-            for( list<BackgroundJob*>::iterator i = L.begin(); i != L.end(); i++ ) { 
-                assert( (*i)->state != NotStarted );
-                if( (*i)->state != Done )
-                    goto x;
-            }
-        }
-    }
-    
-    void PeriodicBackgroundJob::run(){
-        // want to handle first one differently so inShutdown is obeyed nicely
-        sleepmillis( _millis );
-        
-        while ( ! inShutdown() ){
-            try {
-                runLoop();
-            }
-            catch ( std::exception& e ){
-                log( LL_ERROR ) << "PeriodicBackgroundJob [" << name() << "] error: " << e.what() << endl;
-            }
-            catch ( ... ){
-                log( LL_ERROR ) << "PeriodicBackgroundJob [" << name() << "] unknown error" << endl;
-            }
-            
-            sleepmillis( _millis );
-        }
+    bool BackgroundJob::running() const {
+        scoped_lock l( _status->m);
+        return _status->state == Running;
     }
 
 } // namespace mongo
diff --git a/util/background.h b/util/background.h
index ee59455..861df9b 100644
--- a/util/background.h
+++ b/util/background.h
@@ -1,4 +1,4 @@
-// background.h
+// @file background.h
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -19,95 +19,88 @@
 
 namespace mongo {
 
-    /** object-orienty background thread dispatching.
+    /**
+     *  Background thread dispatching.
+     *  subclass and define run()
+     *
+     *  It is ok to call go(), that is, run the job, more than once -- if the
+     *  previous invocation has finished. Thus one pattern of use is to embed
+     *  a backgroundjob in your object and reuse it (or same thing with
+     *  inheritance).  Each go() call spawns a new thread.
+     *
+     *  Thread safety:
+     *    note when job destructs, the thread is not terminated if still running.
+     *    generally if the thread could still be running, allocate the job dynamically
+     *    and set deleteSelf to true.
+     *
+     *    go() and wait() are not thread safe
+     *    run() will be executed on the background thread
+     *    BackgroundJob object must exist for as long the background thread is running
+     */
 
-       subclass and define run()
-
-       It is ok to call go(), that is, run the job, more than once -- if the 
-       previous invocation has finished. Thus one pattern of use is to embed 
-       a backgroundjob in your object and reuse it (or same thing with 
-       inheritance).  Each go() call spawns a new thread.
-
-       note when job destructs, the thread is not terminated if still running.
-       generally if the thread could still be running, allocate the job dynamically 
-       and set deleteSelf to true.
-    */
-    /* example
-    class ConnectBG : public BackgroundJob {
-    public:
-        int sock;
-        int res;
-        SockAddr farEnd;
-        void run() {
-            res = ::connect(sock, farEnd.raw(), farEnd.addressSize);
-        }
-    };
-    */
     class BackgroundJob : boost::noncopyable {
     protected:
-        /** define this to do your work.
-            after this returns, state is set to done.
-            after this returns, deleted if deleteSelf true.
-        */
+        /**
+         * sub-class must intantiate the BackgrounJob
+         *
+         * @param selfDelete if set to true, object will destruct itself after the run() finished
+         * @note selfDelete instantes cannot be wait()-ed upon
+         */
+        explicit BackgroundJob(bool selfDelete = false);
+
+        virtual string name() const = 0;
+
+        /**
+         * define this to do your work.
+         * after this returns, state is set to done.
+         * after this returns, deleted if deleteSelf true.
+         *
+         * NOTE:
+         *   if run() throws, the exception will be caught within 'this' object and will ultimately lead to the
+         *   BackgroundJob's thread being finished, as if run() returned.
+         *
+         */
         virtual void run() = 0;
-        virtual string name() = 0;
-        virtual void ending() { } // hook for post processing if desired after everything else done. not called when deleteSelf=true
+
     public:
         enum State {
             NotStarted,
             Running,
             Done
         };
-        State getState() const { return state; }
-        bool running() const   { return state == Running; }
-
-        bool deleteSelf; // delete self when Done?
 
-        BackgroundJob() {
-            deleteSelf = false;
-            state = NotStarted;
-        }
         virtual ~BackgroundJob() { }
 
-        // starts job.  returns once it is "dispatched"
+        /**
+         * starts job.
+         * returns immediatelly after dispatching.
+         *
+         * @note the BackgroundJob object must live for as long the thread is still running, ie
+         * until getState() returns Done.
+         */
         BackgroundJob& go();
 
-        // wait for completion.  this spins with sleep() so not terribly efficient.
-        // returns true if did not time out.
-        //
-        // note you can call wait() more than once if the first call times out.
-        bool wait(int msMax = 0, unsigned maxSleepInterval=1000);
-
-        /* start several */
-        static void go(list<BackgroundJob*>&);
-
-        /* wait for several jobs to finish. */
-        static void wait(list<BackgroundJob*>&, unsigned maxSleepInterval=1000);
+        /**
+         * wait for completion.
+         *
+         * @param msTimeOut maximum amount of time to wait in millisecons
+         * @return true if did not time out. false otherwise.
+         *
+         * @note you can call wait() more than once if the first call times out.
+         * but you cannot call wait on a self-deleting job.
+         */
+        bool wait( unsigned msTimeOut = 0 );
+
+        // accessors
+        State getState() const;
+        bool running() const;
 
     private:
-        static BackgroundJob *grab;
-        static mongo::mutex mutex;
-        static void thr();
-        volatile State state;
-    };
-
-    class PeriodicBackgroundJob : public BackgroundJob {
-    public:
-        PeriodicBackgroundJob( int millisToSleep ) 
-            : _millis( millisToSleep ){
-        }
-        
-        virtual ~PeriodicBackgroundJob(){}
-
-        /** this gets called every millisToSleep ms */
-        virtual void runLoop() = 0;
-        
-        virtual void run();
+        struct JobStatus;
+        boost::shared_ptr<JobStatus> _status;  // shared between 'this' and body() thread
 
+        void jobBody( boost::shared_ptr<JobStatus> status );
 
-    private:
-        int _millis;
-                
     };
 
 } // namespace mongo
diff --git a/util/base64.cpp b/util/base64.cpp
index 35a3aba..aff06e2 100644
--- a/util/base64.cpp
+++ b/util/base64.cpp
@@ -21,20 +21,20 @@
 
 namespace mongo {
     namespace base64 {
-        
+
         Alphabet alphabet;
 
-        void encode( stringstream& ss , const char * data , int size ){
-            for ( int i=0; i<size; i+=3 ){
+        void encode( stringstream& ss , const char * data , int size ) {
+            for ( int i=0; i<size; i+=3 ) {
                 int left = size - i;
                 const unsigned char * start = (const unsigned char*)data + i;
-                
+
                 // byte 0
                 ss << alphabet.e(start[0]>>2);
-                
+
                 // byte 1
                 unsigned char temp = ( start[0] << 4 );
-                if ( left == 1 ){
+                if ( left == 1 ) {
                     ss << alphabet.e(temp);
                     break;
                 }
@@ -43,7 +43,7 @@ namespace mongo {
 
                 // byte 2
                 temp = ( start[1] & 0xF ) << 2;
-                if ( left == 2 ){
+                if ( left == 2 ) {
                     ss << alphabet.e(temp);
                     break;
                 }
@@ -55,50 +55,50 @@ namespace mongo {
             }
 
             int mod = size % 3;
-            if ( mod == 1 ){
+            if ( mod == 1 ) {
                 ss << "==";
             }
-            else if ( mod == 2 ){
+            else if ( mod == 2 ) {
                 ss << "=";
             }
         }
 
 
-        string encode( const char * data , int size ){
+        string encode( const char * data , int size ) {
             stringstream ss;
             encode( ss , data ,size );
             return ss.str();
         }
-        
-        string encode( const string& s ){
+
+        string encode( const string& s ) {
             return encode( s.c_str() , s.size() );
         }
 
 
-        void decode( stringstream& ss , const string& s ){
+        void decode( stringstream& ss , const string& s ) {
             uassert( 10270 ,  "invalid base64" , s.size() % 4 == 0 );
             const unsigned char * data = (const unsigned char*)s.c_str();
             int size = s.size();
-            
+
             unsigned char buf[3];
-            for ( int i=0; i<size; i+=4){
+            for ( int i=0; i<size; i+=4) {
                 const unsigned char * start = data + i;
                 buf[0] = ( ( alphabet.decode[start[0]] << 2 ) & 0xFC ) | ( ( alphabet.decode[start[1]] >> 4 ) & 0x3 );
                 buf[1] = ( ( alphabet.decode[start[1]] << 4 ) & 0xF0 ) | ( ( alphabet.decode[start[2]] >> 2 ) & 0xF );
                 buf[2] = ( ( alphabet.decode[start[2]] << 6 ) & 0xC0 ) | ( ( alphabet.decode[start[3]] & 0x3F ) );
-                
+
                 int len = 3;
-                if ( start[3] == '=' ){
+                if ( start[3] == '=' ) {
                     len = 2;
-                    if ( start[2] == '=' ){
+                    if ( start[2] == '=' ) {
                         len = 1;
                     }
                 }
                 ss.write( (const char*)buf , len );
             }
         }
-        
-        string decode( const string& s ){
+
+        string decode( const string& s ) {
             stringstream ss;
             decode( ss , s );
             return ss.str();
diff --git a/util/base64.h b/util/base64.h
index c113eed..505b5d7 100644
--- a/util/base64.h
+++ b/util/base64.h
@@ -24,45 +24,44 @@ namespace mongo {
         public:
             Alphabet()
                 : encode((unsigned char*)
-                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-                    "abcdefghijklmnopqrstuvwxyz"
-                    "0123456789"
-                    "+/")
-                , decode(new unsigned char[257])
-            {
+                         "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                         "abcdefghijklmnopqrstuvwxyz"
+                         "0123456789"
+                         "+/")
+                , decode(new unsigned char[257]) {
                 memset( decode.get() , 0 , 256 );
-                for ( int i=0; i<64; i++ ){
+                for ( int i=0; i<64; i++ ) {
                     decode[ encode[i] ] = i;
                 }
 
                 test();
             }
-            void test(){
+            void test() {
                 assert( strlen( (char*)encode ) == 64 );
                 for ( int i=0; i<26; i++ )
                     assert( encode[i] == toupper( encode[i+26] ) );
             }
 
-            char e( int x ){
+            char e( int x ) {
                 return encode[x&0x3f];
             }
-            
+
         private:
             const unsigned char * encode;
         public:
             boost::scoped_array<unsigned char> decode;
         };
-        
+
         extern Alphabet alphabet;
 
 
         void encode( stringstream& ss , const char * data , int size );
         string encode( const char * data , int size );
         string encode( const string& s );
-        
+
         void decode( stringstream& ss , const string& s );
         string decode( const string& s );
-        
+
 
         void testAlphabet();
     }
diff --git a/util/bufreader.h b/util/bufreader.h
new file mode 100644
index 0000000..a0dcefa
--- /dev/null
+++ b/util/bufreader.h
@@ -0,0 +1,98 @@
+// @file bufreader.h parse a memory region into usable pieces
+
+/**
+*    Copyright (C) 2009 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+namespace mongo {
+
+    /** helper to read and parse a block of memory
+        methods throw the eof exception if the operation would pass the end of the
+        buffer with which we are working.
+    */
+    class BufReader : boost::noncopyable {
+    public:
+        class eof : public std::exception {
+        public:
+            virtual const char * what() { return "BufReader eof"; }
+        };
+
+        BufReader(const void *p, unsigned len) : _start(p), _pos(p), _end(((char *)_pos)+len) { }
+
+        bool atEof() const { return _pos == _end; }
+
+        /** read in the object specified, and advance buffer pointer */
+        template <typename T>
+        void read(T &t) {
+            T* cur = (T*) _pos;
+            T *next = cur + 1;
+            if( _end < next ) throw eof();
+            t = *cur;
+            _pos = next;
+        }
+
+        /** verify we can look at t, but do not advance */
+        template <typename T>
+        void peek(T &t) {
+            T* cur = (T*) _pos;
+            T *next = cur + 1;
+            if( _end < next ) throw eof();
+            t = *cur;
+        }
+
+        /** return current offset into buffer */
+        unsigned offset() const { return (char*)_pos - (char*)_start; }
+
+        /** return remaining bytes */
+        unsigned remaining() const { return (char*)_end -(char*)_pos; }
+
+        /** back up by nbytes */
+        void rewind(unsigned nbytes) {
+            _pos = ((char *) _pos) - nbytes;
+            assert( _pos >= _start );
+        }
+
+        /** return current position pointer, and advance by len */
+        const void* skip(unsigned len) {
+            const char *nxt = ((char *) _pos) + len;
+            if( _end < nxt ) throw eof();
+            const void *p = _pos;
+            _pos = nxt;
+            return p;
+        }
+
+        void readStr(string& s) {
+            StringBuilder b;
+            while( 1 ) {
+                char ch;
+                read(ch);
+                if( ch == 0 )
+                    break;
+                b << ch;
+            }
+            s = b.str();
+        }
+
+        const void* pos() { return _pos; }
+
+    private:
+        const void *_start;
+        const void *_pos;
+        const void *_end;
+    };
+
+}
diff --git a/util/concurrency/README b/util/concurrency/README
new file mode 100644
index 0000000..1d72a1c
--- /dev/null
+++ b/util/concurrency/README
@@ -0,0 +1,19 @@
+util/concurrency/ files
+
+list.h - a list class that is lock-free for reads
+rwlock.h - read/write locks (RWLock)
+msg.h - message passing between threads
+task.h - an abstraction around threads
+mutex.h - small enhancements that wrap boost::mutex
+mvar.h
+ This is based on haskell's MVar synchronization primitive:
+ http://www.haskell.org/ghc/docs/latest/html/libraries/base-4.2.0.0/Control-Concurrent-MVar.html
+ It is a thread-safe queue that can hold at most one object.
+ You can also think of it as a box that can be either full or empty.
+value.h
+ Atomic wrapper for values/objects that are copy constructable / assignable
+thread_pool.h 
+spinlock.h
+synchronization.h 
+ A class to establish a sinchronization point between two threads. One thread is the waiter and one 
+ is the notifier. After the notification event, both proceed normally.
diff --git a/util/concurrency/list.h b/util/concurrency/list.h
index 968ff4d..e5eaec6 100644
--- a/util/concurrency/list.h
+++ b/util/concurrency/list.h
@@ -18,64 +18,64 @@
 
 #pragma once
 
-namespace mongo { 
+namespace mongo {
 
-/* this class uses a mutex for writes, but not for reads. 
-   we can get fancier later...
+    /* this class uses a mutex for writes, but not for reads.
+       we can get fancier later...
 
-        struct Member : public List1<Member>::Base {
-            const char *host;
-            int port;
-        };
-        List1<Member> _members;
-        _members.head()->next();
+            struct Member : public List1<Member>::Base {
+                const char *host;
+                int port;
+            };
+            List1<Member> _members;
+            _members.head()->next();
 
-*/
-template<typename T>
-class List1 : boost::noncopyable {
-public:
-    /* next() and head() return 0 at end of list */
+    */
+    template<typename T>
+    class List1 : boost::noncopyable {
+    public:
+        /* next() and head() return 0 at end of list */
 
-    List1() : _head(0), _m("List1"), _orphans(0) { }
+        List1() : _head(0), _m("List1"), _orphans(0) { }
 
-    class Base {
-        friend class List1;
-        T *_next;
-    public:
-        T* next() const { return _next; }
-    };
+        class Base {
+            friend class List1;
+            T *_next;
+        public:
+            T* next() const { return _next; }
+        };
 
-    T* head() const { return _head; }
+        T* head() const { return _head; }
 
-    void push(T* t) {
-        scoped_lock lk(_m);
-        t->_next = _head;
-        _head = t; 
-    }
+        void push(T* t) {
+            scoped_lock lk(_m);
+            t->_next = _head;
+            _head = t;
+        }
 
-    // intentionally leak.
-    void orphanAll() { 
-        _head = 0;
-    }
+        // intentionally leak.
+        void orphanAll() {
+            _head = 0;
+        }
 
-    /* t is not deleted, but is removed from the list. (orphaned) */
-    void orphan(T* t) { 
-        scoped_lock lk(_m);
-        T *&prev = _head;
-        T *n = prev;
-        while( n != t ) {
-            prev = n->_next;
-            n = prev;
+        /* t is not deleted, but is removed from the list. (orphaned) */
+        void orphan(T* t) {
+            scoped_lock lk(_m);
+            T *&prev = _head;
+            T *n = prev;
+            while( n != t ) {
+                prev = n->_next;
+                n = prev;
+            }
+            prev = t->_next;
+            if( ++_orphans > 500 )
+                log() << "warning orphans=" << _orphans << '\n';
         }
-        prev = t->_next;
-        if( ++_orphans > 500 ) 
-            log() << "warning orphans=" << _orphans << '\n';
-    }
 
-private:
-    T *_head;
-    mutex _m;
-    int _orphans;
-};
+    private:
+        T *_head;
+        mongo::mutex _m;
+        int _orphans;
+    };
 
 };
diff --git a/util/concurrency/msg.h b/util/concurrency/msg.h
index a5b07d3..f7c6788 100644
--- a/util/concurrency/msg.h
+++ b/util/concurrency/msg.h
@@ -21,14 +21,14 @@
 #include <deque>
 #include "task.h"
 
-namespace mongo { 
+namespace mongo {
 
-    namespace task { 
+    namespace task {
 
         typedef boost::function<void()> lam;
 
         /** typical usage is: task::fork( new Server("threadname") ); */
-        class Server : public Task { 
+        class Server : public Task {
         public:
             /** send a message to the port */
             void send(lam);
@@ -47,7 +47,7 @@ namespace mongo {
 
         private:
             virtual bool initClient() { return true; }
-            virtual string name() { return _name; }
+            virtual string name() const { return _name; }
             void doWork();
             deque<lam> d;
             boost::mutex m;
diff --git a/util/concurrency/mutex.h b/util/concurrency/mutex.h
index 797ab77..c463498 100644
--- a/util/concurrency/mutex.h
+++ b/util/concurrency/mutex.h
@@ -20,13 +20,29 @@
 #include <map>
 #include <set>
 
-namespace mongo { 
+#include "../heapcheck.h"
+
+namespace mongo {
 
-    extern bool __destroyingStatics;
     class mutex;
 
-    // only used on _DEBUG builds:
-    class MutexDebugger { 
+    inline boost::xtime incxtimemillis( long long s ) {
+        boost::xtime xt;
+        boost::xtime_get(&xt, boost::TIME_UTC);
+        xt.sec += (int)( s / 1000 );
+        xt.nsec += (int)(( s % 1000 ) * 1000000);
+        if ( xt.nsec >= 1000000000 ) {
+            xt.nsec -= 1000000000;
+            xt.sec++;
+        }
+        return xt;
+    }
+
+    /** only used on _DEBUG builds.
+        MutexDebugger checks that we always acquire locks for multiple mutexes in a consistant (acyclic) order.
+        If we were inconsistent we could deadlock.
+    */
+    class MutexDebugger {
         typedef const char * mid; // mid = mutex ID
         typedef map<mid,int> Preceeding;
         map< mid, int > maxNest;
@@ -34,36 +50,41 @@ namespace mongo {
         map< mid, set<mid> > followers;
         boost::mutex &x;
         unsigned magic;
+
+        void aBreakPoint() { } // for debugging
     public:
         // set these to create an assert that
         //   b must never be locked before a
-        //   so 
+        //   so
         //     a.lock(); b.lock(); is fine
         //     b.lock(); alone is fine too
         //   only checked on _DEBUG builds.
         string a,b;
-        
-        void aBreakPoint(){}
+
+        /** outputs some diagnostic info on mutexes (on _DEBUG builds) */
         void programEnding();
+
         MutexDebugger();
+
         void entering(mid m) {
-            if( magic != 0x12345678 ) return;
+            if( this == 0 ) return;
+            assert( magic == 0x12345678 );
 
             Preceeding *_preceeding = us.get();
             if( _preceeding == 0 )
                 us.reset( _preceeding = new Preceeding() );
             Preceeding &preceeding = *_preceeding;
 
-            if( a == m ) { 
+            if( a == m ) {
                 aBreakPoint();
                 if( preceeding[b.c_str()] ) {
-                    cout << "mutex problem " << b << " was locked before " << a << endl;
+                    cout << "****** MutexDebugger error! warning " << b << " was locked before " << a << endl;
                     assert(false);
                 }
             }
 
             preceeding[m]++;
-            if( preceeding[m] > 1 ) { 
+            if( preceeding[m] > 1 ) {
                 // recursive re-locking.
                 if( preceeding[m] > maxNest[m] )
                     maxNest[m] = preceeding[m];
@@ -75,19 +96,19 @@ namespace mongo {
             {
                 boost::mutex::scoped_lock lk(x);
                 followers[m];
-                for( Preceeding::iterator i = preceeding.begin(); i != preceeding.end(); i++ ) { 
+                for( Preceeding::iterator i = preceeding.begin(); i != preceeding.end(); i++ ) {
                     if( m != i->first && i->second > 0 ) {
                         followers[i->first].insert(m);
-                        if( followers[m].count(i->first) != 0 ){
+                        if( followers[m].count(i->first) != 0 ) {
                             failed = true;
                             stringstream ss;
                             mid bad = i->first;
                             ss << "mutex problem" <<
-                                "\n  when locking " << m <<
-                                "\n  " << bad << " was already locked and should not be."
-                                "\n  set a and b above to debug.\n";
+                               "\n  when locking " << m <<
+                               "\n  " << bad << " was already locked and should not be."
+                               "\n  set a and b above to debug.\n";
                             stringstream q;
-                            for( Preceeding::iterator i = preceeding.begin(); i != preceeding.end(); i++ ) { 
+                            for( Preceeding::iterator i = preceeding.begin(); i != preceeding.end(); i++ ) {
                                 if( i->first != m && i->first != bad && i->second > 0 )
                                     q << "  " << i->first << '\n';
                             }
@@ -105,8 +126,8 @@ namespace mongo {
                 assert( 0 );
             }
         }
-        void leaving(mid m) { 
-            if( magic != 0x12345678 ) return;
+        void leaving(mid m) {
+            if( this == 0 ) return; // still in startup pre-main()
             Preceeding& preceeding = *us.get();
             preceeding[m]--;
             if( preceeding[m] < 0 ) {
@@ -116,38 +137,67 @@ namespace mongo {
         }
     };
     extern MutexDebugger &mutexDebugger;
-    
+
     // If you create a local static instance of this class, that instance will be destroyed
-    // before all global static objects are destroyed, so __destroyingStatics will be set
+    // before all global static objects are destroyed, so _destroyingStatics will be set
     // to true before the global static variables are destroyed.
     class StaticObserver : boost::noncopyable {
     public:
-        ~StaticObserver() { __destroyingStatics = true; }
+        static bool _destroyingStatics;
+        ~StaticObserver() { _destroyingStatics = true; }
     };
 
-    // On pthread systems, it is an error to destroy a mutex while held.  Static global
-    // mutexes may be held upon shutdown in our implementation, and this way we avoid
-    // destroying them.
+    /** On pthread systems, it is an error to destroy a mutex while held.  Static global
+     * mutexes may be held upon shutdown in our implementation, and this way we avoid
+     * destroying them.
+     * NOT recursive.
+     */
     class mutex : boost::noncopyable {
     public:
 #if defined(_DEBUG)
-        const char *_name;
+        const char * const _name;
 #endif
 
 #if defined(_DEBUG)
-        mutex(const char *name) 
-           : _name(name) 
+        mutex(const char *name)
+            : _name(name)
 #else
-        mutex(const char *) 
+        mutex(const char *)
 #endif
-        { 
-            _m = new boost::mutex(); 
+        {
+            _m = new boost::timed_mutex();
+            IGNORE_OBJECT( _m  );   // Turn-off heap checking on _m
         }
         ~mutex() {
-            if( !__destroyingStatics ) {
+            if( !StaticObserver::_destroyingStatics ) {
+                UNIGNORE_OBJECT( _m );
                 delete _m;
             }
         }
+
+        class try_lock : boost::noncopyable {
+        public:
+            try_lock( mongo::mutex &m , int millis = 0 )
+                : _l( m.boost() , incxtimemillis( millis ) ) ,
+#if BOOST_VERSION >= 103500
+                  ok( _l.owns_lock() )
+#else
+                  ok( _l.locked() )
+#endif
+            {
+            }
+
+            ~try_lock() {
+            }
+
+        private:
+            boost::timed_mutex::scoped_timed_lock _l;
+
+        public:
+            const bool ok;
+        };
+
+
         class scoped_lock : boost::noncopyable {
 #if defined(_DEBUG)
             mongo::mutex *mut;
@@ -159,20 +209,23 @@ namespace mongo {
                 mutexDebugger.entering(mut->_name);
 #endif
             }
-            ~scoped_lock() { 
+            ~scoped_lock() {
 #if defined(_DEBUG)
                 mutexDebugger.leaving(mut->_name);
 #endif
             }
-            boost::mutex::scoped_lock &boost() { return _l; }
+            boost::timed_mutex::scoped_lock &boost() { return _l; }
         private:
-            boost::mutex::scoped_lock _l;
+            boost::timed_mutex::scoped_lock _l;
         };
+
+
     private:
-        boost::mutex &boost() { return *_m; }
-        boost::mutex *_m;
+
+        boost::timed_mutex &boost() { return *_m; }
+        boost::timed_mutex *_m;
     };
-    
+
     typedef mutex::scoped_lock scoped_lock;
     typedef boost::recursive_mutex::scoped_lock recursive_scoped_lock;
 
diff --git a/util/concurrency/mvar.h b/util/concurrency/mvar.h
index 7d17051..9c7a505 100644
--- a/util/concurrency/mvar.h
+++ b/util/concurrency/mvar.h
@@ -31,18 +31,18 @@ namespace mongo {
 
         // create an empty MVar
         MVar()
-          : _state(EMPTY)
+            : _state(EMPTY)
         {}
 
         // creates a full MVar
         MVar(const T& val)
-          : _state(FULL)
-          , _value(val)
+            : _state(FULL)
+            , _value(val)
         {}
 
         // puts val into the MVar and returns true or returns false if full
         // never blocks
-        bool tryPut(const T& val){
+        bool tryPut(const T& val) {
             // intentionally repeat test before and after lock
             if (_state == FULL) return false;
             Mutex::scoped_lock lock(_mutex);
@@ -59,17 +59,17 @@ namespace mongo {
 
         // puts val into the MVar
         // will block if the MVar is already full
-        void put(const T& val){
+        void put(const T& val) {
             Mutex::scoped_lock lock(_mutex);
-            while (!tryPut(val)){
-                 // unlocks lock while waiting and relocks before returning
+            while (!tryPut(val)) {
+                // unlocks lock while waiting and relocks before returning
                 _condition.wait(lock);
-            } 
+            }
         }
 
         // takes val out of the MVar and returns true or returns false if empty
         // never blocks
-        bool tryTake(T& out){
+        bool tryTake(T& out) {
             // intentionally repeat test before and after lock
             if (_state == EMPTY) return false;
             Mutex::scoped_lock lock(_mutex);
@@ -86,14 +86,14 @@ namespace mongo {
 
         // takes val out of the MVar
         // will block if the MVar is empty
-        T take(){
+        T take() {
             T ret = T();
 
             Mutex::scoped_lock lock(_mutex);
-            while (!tryTake(ret)){
-                 // unlocks lock while waiting and relocks before returning
+            while (!tryTake(ret)) {
+                // unlocks lock while waiting and relocks before returning
                 _condition.wait(lock);
-            } 
+            }
 
             return ret;
         }
@@ -102,7 +102,7 @@ namespace mongo {
         // Note: this is fast because there is no locking, but state could
         // change before you get a chance to act on it.
         // Mainly useful for sanity checks / asserts.
-        State getState(){ return _state; }
+        State getState() { return _state; }
 
 
     private:
diff --git a/util/concurrency/race.h b/util/concurrency/race.h
new file mode 100644
index 0000000..0b8338c
--- /dev/null
+++ b/util/concurrency/race.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include "../goodies.h" // printStackTrace
+
+namespace mongo {
+
+    /** some self-testing of synchronization and attempts to catch race conditions.
+
+        use something like:
+
+        CodeBlock myBlock;
+
+        void foo() { 
+            CodeBlock::Within w(myBlock);
+            ...
+        }
+
+        In _DEBUG builds, will (sometimes/maybe) fail if two threads are in the same code block at 
+        the same time. Also detects and disallows recursion.
+    */
+
+#if defined(_DEBUG)
+
+    class CodeBlock { 
+        volatile int n;
+        unsigned tid;
+        void fail() { 
+            log() << "synchronization (race condition) failure" << endl;
+            printStackTrace();
+            abort();
+        }
+        void enter() { 
+            if( ++n != 1 ) fail();
+#if defined(_WIN32)
+            tid = GetCurrentThreadId();
+#endif
+        }
+        void leave() {
+            if( --n != 0 ) fail();
+        }
+    public:
+        CodeBlock() : n(0) { }
+
+        class Within { 
+            CodeBlock& _s;
+        public:
+            Within(CodeBlock& s) : _s(s) { _s.enter(); }
+            ~Within() { _s.leave(); }
+        };
+
+        void assertWithin() {
+            assert( n == 1 );
+#if defined(_WIN32)
+            assert( GetCurrentThreadId() == tid );
+#endif
+        }
+    };
+    
+#else
+
+    class CodeBlock{ 
+    public:
+        class Within { 
+        public:
+            Within(CodeBlock&) { }
+        };
+        void assertWithin() { }
+    };
+
+#endif
+
+}
diff --git a/util/concurrency/readme.txt b/util/concurrency/readme.txt
deleted file mode 100644
index 6f308f5..0000000
--- a/util/concurrency/readme.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-util/concurrency/ files
-
-list.h - a list class that is lock-free for reads
-rwlock.h - read/write locks (RWLock)
-msg.h - message passing between threads
-task.h - an abstraction around threads
-mutex.h - small enhancements that wrap boost::mutex
-thread_pool.h 
-mvar.h
- This is based on haskell's MVar synchronization primitive:
- http://www.haskell.org/ghc/docs/latest/html/libraries/base-4.2.0.0/Control-Concurrent-MVar.html
- It is a thread-safe queue that can hold at most one object.
- You can also think of it as a box that can be either full or empty.
-value.h
- Atomic wrapper for values/objects that are copy constructable / assignable
diff --git a/util/concurrency/rwlock.h b/util/concurrency/rwlock.h
index 75169b2..ca81a9f 100644
--- a/util/concurrency/rwlock.h
+++ b/util/concurrency/rwlock.h
@@ -1,4 +1,4 @@
-// rwlock.h
+// @file rwlock.h generic reader-writer lock (cross platform support)
 
 /*
  *    Copyright (C) 2010 10gen Inc.
@@ -19,30 +19,79 @@
 #pragma once
 
 #include "mutex.h"
+#include "../time_support.h"
+
+// this requires Vista+ to work
+// it works better than sharable_mutex under high contention
+//#define MONGO_USE_SRW_ON_WINDOWS 1
+
+#if !defined(MONGO_USE_SRW_ON_WINDOWS)
 
 #if BOOST_VERSION >= 103500
-  #define BOOST_RWLOCK
+# define BOOST_RWLOCK
 #else
+# if defined(_WIN32)
+#  error need boost >= 1.35 for windows
+# endif
+# include <pthread.h>
+#endif
 
-  #if defined(_WIN32)
-    #error need boost >= 1.35 for windows
-  #endif
- 
-  #include <pthread.h>
-
+#if defined(_WIN32)
+# include "shared_mutex_win.hpp"
+namespace mongo {
+    typedef boost::modified_shared_mutex shared_mutex;
+}
+# undef assert
+# define assert MONGO_assert
+#elif defined(BOOST_RWLOCK)
+# include <boost/thread/shared_mutex.hpp>
+# undef assert
+# define assert MONGO_assert
 #endif
 
-#ifdef BOOST_RWLOCK
-#include <boost/thread/shared_mutex.hpp>
-#undef assert
-#define assert MONGO_assert
 #endif
 
 namespace mongo {
 
-#ifdef BOOST_RWLOCK
+#if defined(MONGO_USE_SRW_ON_WINDOWS) && defined(_WIN32)
+
     class RWLock {
-        boost::shared_mutex _m;
+    public:
+        RWLock(const char *) { InitializeSRWLock(&_lock); }
+        ~RWLock() { }
+        void lock()          { AcquireSRWLockExclusive(&_lock); }
+        void unlock()        { ReleaseSRWLockExclusive(&_lock); }
+        void lock_shared()   { AcquireSRWLockShared(&_lock); }
+        void unlock_shared() { ReleaseSRWLockShared(&_lock); }
+        bool lock_shared_try( int millis ) {
+            unsigned long long end = curTimeMicros64() + millis*1000;
+            while( 1 ) {
+                if( TryAcquireSRWLockShared(&_lock) )
+                    return true;
+                if( curTimeMicros64() >= end )
+                    break;
+                Sleep(1);
+            }
+            return false;
+        }
+        bool lock_try( int millis = 0 ) {
+            unsigned long long end = curTimeMicros64() + millis*1000;
+            while( 1 ) {
+                if( TryAcquireSRWLockExclusive(&_lock) )
+                    return true;
+                if( curTimeMicros64() >= end )
+                    break;
+                Sleep(1);
+            }
+            return false;
+        }
+    private:
+        SRWLOCK _lock;
+    };
+
+#elif defined(BOOST_RWLOCK)
+    class RWLock {
+        shared_mutex _m;
     public:
 #if defined(_DEBUG)
         const char *_name;
@@ -50,40 +99,40 @@ namespace mongo {
 #else
         RWLock(const char *) { }
 #endif
-        void lock(){
+        void lock() {
             _m.lock();
 #if defined(_DEBUG)
             mutexDebugger.entering(_name);
 #endif
         }
-        void unlock(){
+        void unlock() {
 #if defined(_DEBUG)
             mutexDebugger.leaving(_name);
 #endif
             _m.unlock();
         }
-        
-        void lock_shared(){
+
+        void lock_shared() {
             _m.lock_shared();
         }
-        
-        void unlock_shared(){
+
+        void unlock_shared() {
             _m.unlock_shared();
         }
 
-        bool lock_shared_try( int millis ){
+        bool lock_shared_try( int millis ) {
             boost::system_time until = get_system_time();
             until += boost::posix_time::milliseconds(millis);
-            if( _m.timed_lock_shared( until ) ) { 
+            if( _m.timed_lock_shared( until ) ) {
                 return true;
             }
             return false;
         }
 
-        bool lock_try( int millis = 0 ){
+        bool lock_try( int millis = 0 ) {
             boost::system_time until = get_system_time();
             until += boost::posix_time::milliseconds(millis);
-            if( _m.timed_lock( until ) ) { 
+            if( _m.timed_lock( until ) ) {
 #if defined(_DEBUG)
                 mutexDebugger.entering(_name);
 #endif
@@ -98,7 +147,7 @@ namespace mongo {
     class RWLock {
         pthread_rwlock_t _lock;
 
-        inline void check( int x ){
+        inline void check( int x ) {
             if( x == 0 )
                 return;
             log() << "pthread rwlock failed: " << x << endl;
@@ -114,40 +163,40 @@ namespace mongo {
 #endif
             check( pthread_rwlock_init( &_lock , 0 ) );
         }
-        
-        ~RWLock(){
-            if ( ! __destroyingStatics ){
+
+        ~RWLock() {
+            if ( ! StaticObserver::_destroyingStatics ) {
                 check( pthread_rwlock_destroy( &_lock ) );
             }
         }
 
-        void lock(){
+        void lock() {
             check( pthread_rwlock_wrlock( &_lock ) );
 #if defined(_DEBUG)
             mutexDebugger.entering(_name);
 #endif
         }
-        void unlock(){
+        void unlock() {
 #if defined(_DEBUG)
             mutexDebugger.leaving(_name);
 #endif
             check( pthread_rwlock_unlock( &_lock ) );
         }
-        
-        void lock_shared(){
+
+        void lock_shared() {
             check( pthread_rwlock_rdlock( &_lock ) );
         }
-        
-        void unlock_shared(){
+
+        void unlock_shared() {
             check( pthread_rwlock_unlock( &_lock ) );
         }
-        
-        bool lock_shared_try( int millis ){
+
+        bool lock_shared_try( int millis ) {
             return _try( millis , false );
         }
 
-        bool lock_try( int millis = 0 ){
-            if( _try( millis , true ) ) { 
+        bool lock_try( int millis = 0 ) {
+            if( _try( millis , true ) ) {
 #if defined(_DEBUG)
                 mutexDebugger.entering(_name);
 #endif
@@ -156,65 +205,66 @@ namespace mongo {
             return false;
         }
 
-        bool _try( int millis , bool write ){
+        bool _try( int millis , bool write ) {
             while ( true ) {
-                int x = write ? 
-                    pthread_rwlock_trywrlock( &_lock ) : 
-                    pthread_rwlock_tryrdlock( &_lock );
-                
+                int x = write ?
+                        pthread_rwlock_trywrlock( &_lock ) :
+                        pthread_rwlock_tryrdlock( &_lock );
+
                 if ( x <= 0 ) {
                     return true;
                 }
-                
+
                 if ( millis-- <= 0 )
                     return false;
-                
-                if ( x == EBUSY ){
+
+                if ( x == EBUSY ) {
                     sleepmillis(1);
                     continue;
                 }
                 check(x);
-            } 
-            
+            }
+
             return false;
         }
 
     };
-    
 
 #endif
 
+    /** throws on failure to acquire in the specified time period. */
     class rwlock_try_write {
-        RWLock& _l;
     public:
         struct exception { };
         rwlock_try_write(RWLock& l, int millis = 0) : _l(l) {
-            if( !l.lock_try(millis) ) throw exception();
+            if( !l.lock_try(millis) )
+                throw exception();
         }
         ~rwlock_try_write() { _l.unlock(); }
+    private:
+        RWLock& _l;
     };
 
-    /* scoped lock */
-    struct rwlock {
+    /* scoped lock for RWLock */
+    class rwlock {
+    public:
         rwlock( const RWLock& lock , bool write , bool alreadyHaveLock = false )
-            : _lock( (RWLock&)lock ) , _write( write ){
-
-            if ( ! alreadyHaveLock ){
+            : _lock( (RWLock&)lock ) , _write( write ) {
+            if ( ! alreadyHaveLock ) {
                 if ( _write )
                     _lock.lock();
                 else
                     _lock.lock_shared();
             }
         }
-
-        ~rwlock(){
+        ~rwlock() {
             if ( _write )
                 _lock.unlock();
             else
                 _lock.unlock_shared();
         }
-        
+    private:
         RWLock& _lock;
-        bool _write;
+        const bool _write;
     };
 }
diff --git a/util/concurrency/shared_mutex_win.hpp b/util/concurrency/shared_mutex_win.hpp
new file mode 100755
index 0000000..5356cf2
--- /dev/null
+++ b/util/concurrency/shared_mutex_win.hpp
@@ -0,0 +1,573 @@
+#ifndef BOOST_THREAD_WIN32_SHARED_MUTEX_HPP_MODIFIED
+#define BOOST_THREAD_WIN32_SHARED_MUTEX_HPP_MODIFIED
+
+//  (C) Copyright 2006-8 Anthony Williams
+//
+//  Distributed under the Boost Software License, Version 1.0. (See
+//  accompanying file LICENSE_1_0.txt or copy at
+//  http://www.boost.org/LICENSE_1_0.txt)
+
+// MongoDB : 
+//
+// Slightly modified boost file to not die above 127 pending writes
+// 
+
+#include <boost/assert.hpp>
+#include <boost/detail/interlocked.hpp>
+#include <boost/thread/win32/thread_primitives.hpp>
+#include <boost/static_assert.hpp>
+#include <limits.h>
+#include <boost/utility.hpp>
+#include <boost/thread/thread_time.hpp>
+
+#include <boost/config/abi_prefix.hpp>
+
+namespace boost
+{
+    class modified_shared_mutex:
+        private boost::noncopyable
+    {
+    private:
+        struct state_data
+        {
+            unsigned shared_count:11,
+                shared_waiting:11,
+                exclusive:1,
+                upgrade:1,
+                exclusive_waiting:7,
+                exclusive_waiting_blocked:1;
+
+            friend bool operator==(state_data const& lhs,state_data const& rhs)
+            {
+                return *reinterpret_cast<unsigned const*>(&lhs)==*reinterpret_cast<unsigned const*>(&rhs);
+            }
+        };
+        
+
+        template<typename T>
+        T interlocked_compare_exchange(T* target,T new_value,T comparand)
+        {
+            BOOST_STATIC_ASSERT(sizeof(T)==sizeof(long));
+            long const res=BOOST_INTERLOCKED_COMPARE_EXCHANGE(reinterpret_cast<long*>(target),
+                                                              *reinterpret_cast<long*>(&new_value),
+                                                              *reinterpret_cast<long*>(&comparand));
+            return *reinterpret_cast<T const*>(&res);
+        }
+
+        state_data state;
+        detail::win32::handle semaphores[2];
+        detail::win32::handle &unlock_sem;
+        detail::win32::handle &exclusive_sem;
+        detail::win32::handle upgrade_sem;
+
+        void release_waiters(state_data old_state)
+        {
+            if(old_state.exclusive_waiting)
+            {
+                BOOST_VERIFY(detail::win32::ReleaseSemaphore(exclusive_sem,1,0)!=0);
+            }
+                        
+            if(old_state.shared_waiting || old_state.exclusive_waiting)
+            {
+                BOOST_VERIFY(detail::win32::ReleaseSemaphore(unlock_sem,old_state.shared_waiting + (old_state.exclusive_waiting?1:0),0)!=0);
+            }
+        }
+        
+
+    public:
+        modified_shared_mutex():
+            unlock_sem(semaphores[0]),
+            exclusive_sem(semaphores[1]) 
+        {
+            unlock_sem=detail::win32::create_anonymous_semaphore(0,LONG_MAX);
+            exclusive_sem=detail::win32::create_anonymous_semaphore(0,LONG_MAX);
+            upgrade_sem=detail::win32::create_anonymous_semaphore(0,LONG_MAX);
+            state_data state_={0};
+            state=state_;
+        }
+
+        ~modified_shared_mutex()
+        {
+            detail::win32::CloseHandle(upgrade_sem);
+            detail::win32::CloseHandle(unlock_sem);
+            detail::win32::CloseHandle(exclusive_sem);
+        }
+
+        bool try_lock_shared()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                if(!new_state.exclusive && !new_state.exclusive_waiting_blocked)
+                {
+                    ++new_state.shared_count;
+                }
+                
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    break;
+                }
+                old_state=current_state;
+            }
+            return !(old_state.exclusive| old_state.exclusive_waiting_blocked);
+        }
+
+        void lock_shared()
+        {
+            BOOST_VERIFY(timed_lock_shared(::boost::detail::get_system_time_sentinel()));
+        }
+
+        template<typename TimeDuration>
+        bool timed_lock_shared(TimeDuration const & relative_time)
+        {
+            return timed_lock_shared(get_system_time()+relative_time);
+        }
+
+        bool timed_lock_shared(boost::system_time const& wait_until)
+        {
+            for(;;)
+            {
+                state_data old_state=state;
+                for(;;)
+                {
+                    state_data new_state=old_state;
+                    if(new_state.exclusive || new_state.exclusive_waiting_blocked)
+                    {
+                        ++new_state.shared_waiting;
+                    }
+                    else
+                    {
+                        ++new_state.shared_count;
+                    }
+
+                    state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                    if(current_state==old_state)
+                    {
+                        break;
+                    }
+                    old_state=current_state;
+                }
+
+                if(!(old_state.exclusive| old_state.exclusive_waiting_blocked))
+                {
+                    return true;
+                }
+                    
+                unsigned long const res=detail::win32::WaitForSingleObject(unlock_sem,::boost::detail::get_milliseconds_until(wait_until));
+                if(res==detail::win32::timeout)
+                {
+                    for(;;)
+                    {
+                        state_data new_state=old_state;
+                        if(new_state.exclusive || new_state.exclusive_waiting_blocked)
+                        {
+                            if(new_state.shared_waiting)
+                            {
+                                --new_state.shared_waiting;
+                            }
+                        }
+                        else
+                        {
+                            ++new_state.shared_count;
+                        }
+
+                        state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                        if(current_state==old_state)
+                        {
+                            break;
+                        }
+                        old_state=current_state;
+                    }
+
+                    if(!(old_state.exclusive| old_state.exclusive_waiting_blocked))
+                    {
+                        return true;
+                    }
+                    return false;
+                }
+                
+                BOOST_ASSERT(res==0);
+            }
+        }
+
+        void unlock_shared()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                bool const last_reader=!--new_state.shared_count;
+                
+                if(last_reader)
+                {
+                    if(new_state.upgrade)
+                    {
+                        new_state.upgrade=false;
+                        new_state.exclusive=true;
+                    }
+                    else
+                    {
+                        if(new_state.exclusive_waiting)
+                        {
+                            --new_state.exclusive_waiting;
+                            new_state.exclusive_waiting_blocked=false;
+                        }
+                        new_state.shared_waiting=0;
+                    }
+                }
+                
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    if(last_reader)
+                    {
+                        if(old_state.upgrade)
+                        {
+                            BOOST_VERIFY(detail::win32::ReleaseSemaphore(upgrade_sem,1,0)!=0);
+                        }
+                        else
+                        {
+                            release_waiters(old_state);
+                        }
+                    }
+                    break;
+                }
+                old_state=current_state;
+            }
+        }
+
+        void lock()
+        {
+            BOOST_VERIFY(timed_lock(::boost::detail::get_system_time_sentinel()));
+        }
+
+        template<typename TimeDuration>
+        bool timed_lock(TimeDuration const & relative_time)
+        {
+            return timed_lock(get_system_time()+relative_time);
+        }
+
+        bool try_lock()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                if(new_state.shared_count || new_state.exclusive)
+                {
+                    return false;
+                }
+                else
+                {
+                    new_state.exclusive=true;
+                }
+                
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    break;
+                }
+                old_state=current_state;
+            }
+            return true;
+        }
+
+
+        bool timed_lock(boost::system_time const& wait_until)
+        {
+            for(;;)
+            {
+                state_data old_state=state;
+
+                for(;;)
+                {
+                    state_data new_state=old_state;
+                    if(new_state.shared_count || new_state.exclusive)
+                    {
+                        if( new_state.exclusive_waiting == 127 ) // the maximum already!
+                            break;
+                        ++new_state.exclusive_waiting;
+                        new_state.exclusive_waiting_blocked=true;
+                    }
+                    else
+                    {
+                        new_state.exclusive=true;
+                    }
+
+                    state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                    if(current_state==old_state)
+                    {
+                        break;
+                    }
+                    old_state=current_state;
+                }
+
+                if(!old_state.shared_count && !old_state.exclusive)
+                {
+                    return true;
+                }
+                unsigned long const wait_res=detail::win32::WaitForMultipleObjects(2,semaphores,true,::boost::detail::get_milliseconds_until(wait_until));
+                if(wait_res==detail::win32::timeout)
+                {
+                    for(;;)
+                    {
+                        state_data new_state=old_state;
+                        if(new_state.shared_count || new_state.exclusive)
+                        {
+                            if(new_state.exclusive_waiting)
+                            {
+                                if(!--new_state.exclusive_waiting)
+                                {
+                                    new_state.exclusive_waiting_blocked=false;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            new_state.exclusive=true;
+                        }
+
+                        state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                        if(current_state==old_state)
+                        {
+                            break;
+                        }
+                        old_state=current_state;
+                    }
+                    if(!old_state.shared_count && !old_state.exclusive)
+                    {
+                        return true;
+                    }
+                    return false;
+                }
+                BOOST_ASSERT(wait_res<2);
+            }
+        }
+
+        void unlock()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                new_state.exclusive=false;
+                if(new_state.exclusive_waiting)
+                {
+                    --new_state.exclusive_waiting;
+                    new_state.exclusive_waiting_blocked=false;
+                }
+                new_state.shared_waiting=0;
+
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    break;
+                }
+                old_state=current_state;
+            }
+            release_waiters(old_state);
+        }
+
+        void lock_upgrade()
+        {
+            for(;;)
+            {
+                state_data old_state=state;
+                for(;;)
+                {
+                    state_data new_state=old_state;
+                    if(new_state.exclusive || new_state.exclusive_waiting_blocked || new_state.upgrade)
+                    {
+                        ++new_state.shared_waiting;
+                    }
+                    else
+                    {
+                        ++new_state.shared_count;
+                        new_state.upgrade=true;
+                    }
+
+                    state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                    if(current_state==old_state)
+                    {
+                        break;
+                    }
+                    old_state=current_state;
+                }
+
+                if(!(old_state.exclusive|| old_state.exclusive_waiting_blocked|| old_state.upgrade))
+                {
+                    return;
+                }
+                    
+                BOOST_VERIFY(!detail::win32::WaitForSingleObject(unlock_sem,detail::win32::infinite));
+            }
+        }
+
+        bool try_lock_upgrade()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                if(new_state.exclusive || new_state.exclusive_waiting_blocked || new_state.upgrade)
+                {
+                    return false;
+                }
+                else
+                {
+                    ++new_state.shared_count;
+                    new_state.upgrade=true;
+                }
+                
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    break;
+                }
+                old_state=current_state;
+            }
+            return true;
+        }
+
+        void unlock_upgrade()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                new_state.upgrade=false;
+                bool const last_reader=!--new_state.shared_count;
+                
+                if(last_reader)
+                {
+                    if(new_state.exclusive_waiting)
+                    {
+                        --new_state.exclusive_waiting;
+                        new_state.exclusive_waiting_blocked=false;
+                    }
+                    new_state.shared_waiting=0;
+                }
+                
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    if(last_reader)
+                    {
+                        release_waiters(old_state);
+                    }
+                    break;
+                }
+                old_state=current_state;
+            }
+        }
+
+        void unlock_upgrade_and_lock()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                bool const last_reader=!--new_state.shared_count;
+                
+                if(last_reader)
+                {
+                    new_state.upgrade=false;
+                    new_state.exclusive=true;
+                }
+                
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    if(!last_reader)
+                    {
+                        BOOST_VERIFY(!detail::win32::WaitForSingleObject(upgrade_sem,detail::win32::infinite));
+                    }
+                    break;
+                }
+                old_state=current_state;
+            }
+        }
+
+        void unlock_and_lock_upgrade()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                new_state.exclusive=false;
+                new_state.upgrade=true;
+                ++new_state.shared_count;
+                if(new_state.exclusive_waiting)
+                {
+                    --new_state.exclusive_waiting;
+                    new_state.exclusive_waiting_blocked=false;
+                }
+                new_state.shared_waiting=0;
+
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    break;
+                }
+                old_state=current_state;
+            }
+            release_waiters(old_state);
+        }
+        
+        void unlock_and_lock_shared()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                new_state.exclusive=false;
+                ++new_state.shared_count;
+                if(new_state.exclusive_waiting)
+                {
+                    --new_state.exclusive_waiting;
+                    new_state.exclusive_waiting_blocked=false;
+                }
+                new_state.shared_waiting=0;
+
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    break;
+                }
+                old_state=current_state;
+            }
+            release_waiters(old_state);
+        }
+        
+        void unlock_upgrade_and_lock_shared()
+        {
+            state_data old_state=state;
+            for(;;)
+            {
+                state_data new_state=old_state;
+                new_state.upgrade=false;
+                if(new_state.exclusive_waiting)
+                {
+                    --new_state.exclusive_waiting;
+                    new_state.exclusive_waiting_blocked=false;
+                }
+                new_state.shared_waiting=0;
+
+                state_data const current_state=interlocked_compare_exchange(&state,new_state,old_state);
+                if(current_state==old_state)
+                {
+                    break;
+                }
+                old_state=current_state;
+            }
+            release_waiters(old_state);
+        }
+        
+    };
+}
+
+#include <boost/config/abi_suffix.hpp>
+
+#endif
diff --git a/util/concurrency/spin_lock.cpp b/util/concurrency/spin_lock.cpp
index b3e689a..0f33609 100644
--- a/util/concurrency/spin_lock.cpp
+++ b/util/concurrency/spin_lock.cpp
@@ -16,18 +16,29 @@
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+#include "pch.h"
 #include <time.h>
 #include "spin_lock.h"
 
 namespace mongo {
 
-    SpinLock::SpinLock() : _locked( false ){}
-
-    SpinLock::~SpinLock(){}
+    SpinLock::~SpinLock() {
+#if defined(_WIN32)
+        DeleteCriticalSection(&_cs);
+#endif
+    }
 
-    void SpinLock::lock(){
+    SpinLock::SpinLock()
 #if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
+        : _locked( false ) { }
+#elif defined(_WIN32)
+    { InitializeCriticalSectionAndSpinCount(&_cs, 4000); }
+#else
+        : _mutex( "SpinLock" ) { }
+#endif
 
+    void SpinLock::lock() {
+#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
         // fast path
         if (!_locked && !__sync_lock_test_and_set(&_locked, true)) {
             return;
@@ -44,21 +55,28 @@ namespace mongo {
         while (__sync_lock_test_and_set(&_locked, true)) {
             nanosleep(&t, NULL);
         }
+#elif defined(_WIN32)
+        EnterCriticalSection(&_cs);
 #else
-
-        // WARNING "TODO Missing spin lock in this platform."
+        // WARNING Missing spin lock in this platform. This can potentially
+        // be slow.
+        _mutex.lock();
 
 #endif
     }
 
-    void SpinLock::unlock(){
+    void SpinLock::unlock() {
 #if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
 
         __sync_lock_release(&_locked);
 
+#elif defined(WIN32)
+
+        LeaveCriticalSection(&_cs);
+
 #else
 
-        // WARNING "TODO Missing spin lock in this platform."
+        _mutex.unlock();
 
 #endif
     }
diff --git a/util/concurrency/spin_lock.h b/util/concurrency/spin_lock.h
index 110290d..d5360f7 100644
--- a/util/concurrency/spin_lock.h
+++ b/util/concurrency/spin_lock.h
@@ -16,18 +16,18 @@
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-#ifndef CONCURRENCY_SPINLOCK_HEADER
-#define CONCURRENCY_SPINLOCK_HEADER
+#pragma once
+
+#include "pch.h"
+#include "rwlock.h"
 
 namespace mongo {
 
     /**
-     * BIG WARNING - COMPILES, BUT NOT READY FOR USE - BIG WARNING
-     *
-     * The spinlock currently requires late GCC support
-     * routines. Support for other platforms will be added soon.
+     * The spinlock currently requires late GCC support routines to be efficient.
+     * Other platforms default to a mutex implemenation.
      */
-    class SpinLock{
+    class SpinLock {
     public:
         SpinLock();
         ~SpinLock();
@@ -36,13 +36,19 @@ namespace mongo {
         void unlock();
 
     private:
-        bool _locked;
+#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
+        volatile bool _locked;
+#elif defined(_WIN32)
+        CRITICAL_SECTION _cs;
+#else
+        // default to a scoped mutex if not implemented
+        RWLock _mutex;
+#endif
 
         // Non-copyable, non-assignable
         SpinLock(SpinLock&);
         SpinLock& operator=(SpinLock&);
-    }; 
+    };
 
 }  // namespace mongo
 
-#endif  // CONCURRENCY_SPINLOCK_HEADER
diff --git a/util/concurrency/synchronization.cpp b/util/concurrency/synchronization.cpp
new file mode 100644
index 0000000..12e2894
--- /dev/null
+++ b/util/concurrency/synchronization.cpp
@@ -0,0 +1,56 @@
+// synchronization.cpp
+
+/*    Copyright 2010 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#include "pch.h"
+#include "synchronization.h"
+
+namespace mongo {
+
+    Notification::Notification() : _mutex ( "Notification" ) , _notified( false ) { }
+
+    Notification::~Notification() { }
+
+    void Notification::waitToBeNotified() {
+        scoped_lock lock( _mutex );
+        while ( ! _notified )
+            _condition.wait( lock.boost() );
+    }
+
+    void Notification::notifyOne() {
+        scoped_lock lock( _mutex );
+        assert( !_notified );
+        _notified = true;
+        _condition.notify_one();
+    }
+
+    NotifyAll::NotifyAll() : _mutex("NotifyAll"), _counter(0) { }
+
+    void NotifyAll::wait() {
+        scoped_lock lock( _mutex );
+        unsigned long long old = _counter;
+        while( old == _counter ) {
+            _condition.wait( lock.boost() );
+        }
+    }
+
+    void NotifyAll::notifyAll() {
+        scoped_lock lock( _mutex );
+        ++_counter;
+        _condition.notify_all();
+    }
+
+} // namespace mongo
diff --git a/util/concurrency/synchronization.h b/util/concurrency/synchronization.h
new file mode 100644
index 0000000..ac2fcab
--- /dev/null
+++ b/util/concurrency/synchronization.h
@@ -0,0 +1,73 @@
+// synchronization.h
+
+/*    Copyright 2010 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+#include <boost/thread/condition.hpp>
+#include "mutex.h"
+
+namespace mongo {
+
+    /*
+     * A class to establish a synchronization point between two threads. One thread is the waiter and one is
+     * the notifier. After the notification event, both proceed normally.
+     *
+     * This class is thread-safe.
+     */
+    class Notification {
+    public:
+        Notification();
+        ~Notification();
+
+        /*
+         * Blocks until the method 'notifyOne()' is called.
+         */
+        void waitToBeNotified();
+
+        /*
+         * Notifies the waiter of '*this' that it can proceed.  Can only be called once.
+         */
+        void notifyOne();
+
+    private:
+        mongo::mutex _mutex;          // protects state below
+        bool _notified;               // was notifyOne() issued?
+        boost::condition _condition;  // cond over _notified being true
+    };
+
+    /** establishes a synchronization point between threads. N threads are waits and one is notifier.
+        threadsafe.
+    */
+    class NotifyAll : boost::noncopyable {
+    public:
+        NotifyAll();
+
+        /** awaits the next notifyAll() call by another thread. notifications that precede this
+            call are ignored -- we are looking for a fresh event.
+        */
+        void wait();
+
+        /** may be called multiple times. notifies all waiters */
+        void notifyAll();
+
+    private:
+        mongo::mutex _mutex;
+        unsigned long long _counter;
+        boost::condition _condition;
+    };
+
+} // namespace mongo
diff --git a/util/concurrency/task.cpp b/util/concurrency/task.cpp
index 99288bb..d84cd71 100644
--- a/util/concurrency/task.cpp
+++ b/util/concurrency/task.cpp
@@ -17,16 +17,19 @@
 */
 
 #include "pch.h"
+
+#include <boost/thread/condition.hpp>
+
 #include "task.h"
 #include "../goodies.h"
 #include "../unittest.h"
-#include "boost/thread/condition.hpp"
+#include "../time_support.h"
 
-namespace mongo { 
+namespace mongo {
 
-    namespace task { 
+    namespace task {
 
-        /*void foo() { 
+        /*void foo() {
             boost::mutex m;
             boost::mutex::scoped_lock lk(m);
             boost::condition cond;
@@ -34,21 +37,21 @@ namespace mongo {
             cond.notify_one();
         }*/
 
-        Task::Task() { 
+        Task::Task()
+            : BackgroundJob( true /* deleteSelf */ ) {
             n = 0;
             repeat = 0;
-            deleteSelf = true;
         }
 
         void Task::halt() { repeat = 0; }
 
-        void Task::run() { 
+        void Task::run() {
             assert( n == 0 );
             while( 1 ) {
                 n++;
-                try { 
+                try {
                     doWork();
-                } 
+                }
                 catch(...) { }
                 if( repeat == 0 )
                     break;
@@ -62,11 +65,11 @@ namespace mongo {
             go();
         }
 
-        void fork(Task *t) { 
+        void fork(Task *t) {
             t->begin();
         }
 
-        void repeat(Task *t, unsigned millis) { 
+        void repeat(Task *t, unsigned millis) {
             t->repeat = millis;
             t->begin();
         }
@@ -107,7 +110,7 @@ namespace mongo {
             }
         }
 
-        void Server::send( lam msg ) { 
+        void Server::send( lam msg ) {
             {
                 boost::mutex::scoped_lock lk(m);
                 d.push_back(msg);
@@ -115,9 +118,9 @@ namespace mongo {
             c.notify_one();
         }
 
-        void Server::doWork() { 
+        void Server::doWork() {
             starting();
-            while( 1 ) { 
+            while( 1 ) {
                 lam f;
                 try {
                     boost::mutex::scoped_lock lk(m);
@@ -126,7 +129,7 @@ namespace mongo {
                     f = d.front();
                     d.pop_front();
                 }
-                catch(...) { 
+                catch(...) {
                     log() << "ERROR exception in Server:doWork?" << endl;
                 }
                 try {
@@ -138,27 +141,28 @@ namespace mongo {
                             d.push_back(f);
                         }
                     }
-                } catch(std::exception& e) { 
-  				    log() << "Server::doWork task:" << name() << " exception:" << e.what() << endl;
-                } 
-				catch(const char *p) {
-				    log() << "Server::doWork task:" << name() << " unknown c exception:" << 
-                      ((p&&strlen(p)<800)?p:"?") << endl;
-				}
-				catch(...) {
-				    log() << "Server::doWork unknown exception task:" << name() << endl;
+                }
+                catch(std::exception& e) {
+                    log() << "Server::doWork task:" << name() << " exception:" << e.what() << endl;
+                }
+                catch(const char *p) {
+                    log() << "Server::doWork task:" << name() << " unknown c exception:" <<
+                          ((p&&strlen(p)<800)?p:"?") << endl;
+                }
+                catch(...) {
+                    log() << "Server::doWork unknown exception task:" << name() << endl;
                 }
             }
         }
 
         static Server *s;
-        static void abc(int i) { 
+        static void abc(int i) {
             cout << "Hello " << i << endl;
             s->requeue();
         }
         class TaskUnitTest : public mongo::UnitTest {
         public:
-            virtual void run() { 
+            virtual void run() {
                 lam f = boost::bind(abc, 3);
                 //f();
 
diff --git a/util/concurrency/task.h b/util/concurrency/task.h
index b3a2ece..d7b45ee 100644
--- a/util/concurrency/task.h
+++ b/util/concurrency/task.h
@@ -20,9 +20,9 @@
 
 #include "../background.h"
 
-namespace mongo { 
+namespace mongo {
 
-    namespace task { 
+    namespace task {
 
         /** abstraction around threads.  simpler than BackgroundJob which is used behind the scenes.
             allocate the Task dynamically.  when the thread terminates, the Task object will delete itself.
@@ -30,11 +30,11 @@ namespace mongo {
         class Task : private BackgroundJob {
         protected:
             virtual void doWork() = 0;                  // implement the task here.
-            virtual string name() = 0;                  // name the threada
+            virtual string name() const = 0;            // name the threada
         public:
             Task();
 
-            /** for a repeating task, stop after current invocation ends. can be called by other threads 
+            /** for a repeating task, stop after current invocation ends. can be called by other threads
                 as long as the Task is still in scope.
                 */
             void halt();
@@ -43,7 +43,7 @@ namespace mongo {
             friend void fork(Task* t);
             friend void repeat(Task* t, unsigned millis);
             virtual void run();
-            virtual void ending() { }
+            //virtual void ending() { }
             void begin();
         };
 
@@ -54,8 +54,8 @@ namespace mongo {
         void repeat(Task *t, unsigned millis);
 
         /*** Example ***
-        inline void sample() { 
-            class Sample : public Task { 
+        inline void sample() {
+            class Sample : public Task {
             public:
                 int result;
                 virtual void doWork() { result = 1234; }
diff --git a/util/concurrency/thread_pool.cpp b/util/concurrency/thread_pool.cpp
index 2caac1f..1c25884 100644
--- a/util/concurrency/thread_pool.cpp
+++ b/util/concurrency/thread_pool.cpp
@@ -20,8 +20,8 @@
 #include "thread_pool.h"
 #include "mvar.h"
 
-namespace mongo{
-    namespace threadpool{
+namespace mongo {
+    namespace threadpool {
 
         // Worker thread
         class Worker : boost::noncopyable {
@@ -34,12 +34,12 @@ namespace mongo{
 
             // destructor will block until current operation is completed
             // Acts as a "join" on this thread
-            ~Worker(){
+            ~Worker() {
                 _task.put(Task());
                 _thread.join();
             }
 
-            void set_task(Task& func){
+            void set_task(Task& func) {
                 assert(!func.empty());
                 assert(_is_done);
                 _is_done = false;
@@ -47,13 +47,13 @@ namespace mongo{
                 _task.put(func);
             }
 
-            private:
+        private:
             ThreadPool& _owner;
             MVar<Task> _task;
             bool _is_done; // only used for error detection
             boost::thread _thread;
 
-            void loop(){
+            void loop() {
                 while (true) {
                     Task task = _task.take();
                     if (task.empty())
@@ -61,9 +61,11 @@ namespace mongo{
 
                     try {
                         task();
-                    } catch (std::exception e){
+                    }
+                    catch (std::exception e) {
                         log() << "Unhandled exception in worker thread: " << e.what() << endl;;
-                    } catch (...){
+                    }
+                    catch (...) {
                         log() << "Unhandled non-exception in worker thread" << endl;
                     }
                     _is_done = true;
@@ -74,16 +76,15 @@ namespace mongo{
 
         ThreadPool::ThreadPool(int nThreads)
             : _mutex("ThreadPool"), _tasksRemaining(0)
-            , _nThreads(nThreads)
-        {
+            , _nThreads(nThreads) {
             scoped_lock lock(_mutex);
-            while (nThreads-- > 0){
+            while (nThreads-- > 0) {
                 Worker* worker = new Worker(*this);
                 _freeWorkers.push_front(worker);
             }
         }
 
-        ThreadPool::~ThreadPool(){
+        ThreadPool::~ThreadPool() {
             join();
 
             assert(_tasks.empty());
@@ -91,40 +92,42 @@ namespace mongo{
             // O(n) but n should be small
             assert(_freeWorkers.size() == (unsigned)_nThreads);
 
-            while(!_freeWorkers.empty()){
+            while(!_freeWorkers.empty()) {
                 delete _freeWorkers.front();
                 _freeWorkers.pop_front();
             }
         }
 
-        void ThreadPool::join(){
+        void ThreadPool::join() {
             scoped_lock lock(_mutex);
-            while(_tasksRemaining){
+            while(_tasksRemaining) {
                 _condition.wait(lock.boost());
             }
         }
 
-        void ThreadPool::schedule(Task task){
+        void ThreadPool::schedule(Task task) {
             scoped_lock lock(_mutex);
 
             _tasksRemaining++;
 
-            if (!_freeWorkers.empty()){
+            if (!_freeWorkers.empty()) {
                 _freeWorkers.front()->set_task(task);
                 _freeWorkers.pop_front();
-            }else{
+            }
+            else {
                 _tasks.push_back(task);
             }
         }
 
         // should only be called by a worker from the worker thread
-        void ThreadPool::task_done(Worker* worker){
+        void ThreadPool::task_done(Worker* worker) {
             scoped_lock lock(_mutex);
 
-            if (!_tasks.empty()){
+            if (!_tasks.empty()) {
                 worker->set_task(_tasks.front());
                 _tasks.pop_front();
-            }else{
+            }
+            else {
                 _freeWorkers.push_front(worker);
             }
 
diff --git a/util/concurrency/thread_pool.h b/util/concurrency/thread_pool.h
index f0fe8f1..b348ed1 100644
--- a/util/concurrency/thread_pool.h
+++ b/util/concurrency/thread_pool.h
@@ -15,6 +15,8 @@
  *    limitations under the License.
  */
 
+#pragma once
+
 #include <boost/function.hpp>
 #include <boost/bind.hpp>
 #undef assert
@@ -22,59 +24,59 @@
 
 namespace mongo {
 
-namespace threadpool {
-    class Worker;
-
-    typedef boost::function<void(void)> Task; //nullary function or functor
-
-    // exported to the mongo namespace
-    class ThreadPool : boost::noncopyable{
-    public:
-        explicit ThreadPool(int nThreads=8);
-
-        // blocks until all tasks are complete (tasks_remaining() == 0)
-        // You should not call schedule while in the destructor
-        ~ThreadPool();
-
-        // blocks until all tasks are complete (tasks_remaining() == 0)
-        // does not prevent new tasks from being scheduled so could wait forever.
-        // Also, new tasks could be scheduled after this returns.
-        void join();
-
-        // task will be copied a few times so make sure it's relatively cheap
-        void schedule(Task task);
-
-        // Helpers that wrap schedule and boost::bind.
-        // Functor and args will be copied a few times so make sure it's relatively cheap
-        template<typename F, typename A>
-        void schedule(F f, A a){ schedule(boost::bind(f,a)); }
-        template<typename F, typename A, typename B>
-        void schedule(F f, A a, B b){ schedule(boost::bind(f,a,b)); }
-        template<typename F, typename A, typename B, typename C>
-        void schedule(F f, A a, B b, C c){ schedule(boost::bind(f,a,b,c)); }
-        template<typename F, typename A, typename B, typename C, typename D>
-        void schedule(F f, A a, B b, C c, D d){ schedule(boost::bind(f,a,b,c,d)); }
-        template<typename F, typename A, typename B, typename C, typename D, typename E>
-        void schedule(F f, A a, B b, C c, D d, E e){ schedule(boost::bind(f,a,b,c,d,e)); }
-
-        int tasks_remaining() { return _tasksRemaining; }
-
-    private:
-        mongo::mutex _mutex;
-        boost::condition _condition;
-
-        list<Worker*> _freeWorkers; //used as LIFO stack (always front)
-        list<Task> _tasks; //used as FIFO queue (push_back, pop_front)
-        int _tasksRemaining; // in queue + currently processing
-        int _nThreads; // only used for sanity checking. could be removed in the future.
-        
-        // should only be called by a worker from the worker's thread
-        void task_done(Worker* worker);
-        friend class Worker;
-    };
-
-} //namespace threadpool
-
-using threadpool::ThreadPool;
+    namespace threadpool {
+        class Worker;
+
+        typedef boost::function<void(void)> Task; //nullary function or functor
+
+        // exported to the mongo namespace
+        class ThreadPool : boost::noncopyable {
+        public:
+            explicit ThreadPool(int nThreads=8);
+
+            // blocks until all tasks are complete (tasks_remaining() == 0)
+            // You should not call schedule while in the destructor
+            ~ThreadPool();
+
+            // blocks until all tasks are complete (tasks_remaining() == 0)
+            // does not prevent new tasks from being scheduled so could wait forever.
+            // Also, new tasks could be scheduled after this returns.
+            void join();
+
+            // task will be copied a few times so make sure it's relatively cheap
+            void schedule(Task task);
+
+            // Helpers that wrap schedule and boost::bind.
+            // Functor and args will be copied a few times so make sure it's relatively cheap
+            template<typename F, typename A>
+            void schedule(F f, A a) { schedule(boost::bind(f,a)); }
+            template<typename F, typename A, typename B>
+            void schedule(F f, A a, B b) { schedule(boost::bind(f,a,b)); }
+            template<typename F, typename A, typename B, typename C>
+            void schedule(F f, A a, B b, C c) { schedule(boost::bind(f,a,b,c)); }
+            template<typename F, typename A, typename B, typename C, typename D>
+            void schedule(F f, A a, B b, C c, D d) { schedule(boost::bind(f,a,b,c,d)); }
+            template<typename F, typename A, typename B, typename C, typename D, typename E>
+            void schedule(F f, A a, B b, C c, D d, E e) { schedule(boost::bind(f,a,b,c,d,e)); }
+
+            int tasks_remaining() { return _tasksRemaining; }
+
+        private:
+            mongo::mutex _mutex;
+            boost::condition _condition;
+
+            list<Worker*> _freeWorkers; //used as LIFO stack (always front)
+            list<Task> _tasks; //used as FIFO queue (push_back, pop_front)
+            int _tasksRemaining; // in queue + currently processing
+            int _nThreads; // only used for sanity checking. could be removed in the future.
+
+            // should only be called by a worker from the worker's thread
+            void task_done(Worker* worker);
+            friend class Worker;
+        };
+
+    } //namespace threadpool
+
+    using threadpool::ThreadPool;
 
 } //namespace mongo
diff --git a/util/concurrency/value.h b/util/concurrency/value.h
index dabeb95..08d5306 100644
--- a/util/concurrency/value.h
+++ b/util/concurrency/value.h
@@ -20,11 +20,11 @@
 
 #pragma once
 
-namespace mongo { 
+namespace mongo {
 
     extern mutex _atomicMutex;
 
-    /** atomic wrapper for a value.  enters a mutex on each access.  must 
+    /** atomic wrapper for a value.  enters a mutex on each access.  must
         be copyable.
     */
     template<typename T>
@@ -33,20 +33,22 @@ namespace mongo {
     public:
         Atomic<T>() { }
 
-        void operator=(const T& a) { 
+        void operator=(const T& a) {
             scoped_lock lk(_atomicMutex);
-            val = a; }
+            val = a;
+        }
 
-        operator T() const { 
+        operator T() const {
             scoped_lock lk(_atomicMutex);
-            return val; }
-        
+            return val;
+        }
+
         /** example:
               Atomic<int> q;
               ...
               {
                 Atomic<int>::tran t(q);
-                if( q.ref() > 0 ) 
+                if( q.ref() > 0 )
                     q.ref()--;
               }
         */
@@ -58,11 +60,11 @@ namespace mongo {
         };
     };
 
-    /** this string COULD be mangled but with the double buffering, assuming writes 
-    are infrequent, it's unlikely.  thus, this is reasonable for lockless setting of 
+    /** this string COULD be mangled but with the double buffering, assuming writes
+    are infrequent, it's unlikely.  thus, this is reasonable for lockless setting of
     diagnostic strings, where their content isn't critical.
     */
-    class DiagStr { 
+    class DiagStr {
         char buf1[256];
         char buf2[256];
         char *p;
diff --git a/util/concurrency/vars.cpp b/util/concurrency/vars.cpp
index 8863a27..3d057a4 100644
--- a/util/concurrency/vars.cpp
+++ b/util/concurrency/vars.cpp
@@ -20,28 +20,28 @@
 #include "value.h"
 #include "mutex.h"
 
-namespace mongo { 
+namespace mongo {
 
-    mutex _atomicMutex("_atomicMutex");
+    mongo::mutex _atomicMutex("_atomicMutex");
 
     // intentional leak. otherwise destructor orders can be problematic at termination.
     MutexDebugger &mutexDebugger = *(new MutexDebugger());
 
-    MutexDebugger::MutexDebugger() : 
-      x( *(new boost::mutex()) ), magic(0x12345678) {
-          // optional way to debug lock order
-          /*
-          a = "a_lock";
-          b = "b_lock";
-          */
+    MutexDebugger::MutexDebugger() :
+        x( *(new boost::mutex()) ), magic(0x12345678) {
+        // optional way to debug lock order
+        /*
+        a = "a_lock";
+        b = "b_lock";
+        */
     }
 
-    void MutexDebugger::programEnding() { 
+    void MutexDebugger::programEnding() {
         if( logLevel>=1 && followers.size() ) {
             std::cout << followers.size() << " mutexes in program" << endl;
-            for( map< mid, set<mid> >::iterator i = followers.begin(); i != followers.end(); i++ ) { 
+            for( map< mid, set<mid> >::iterator i = followers.begin(); i != followers.end(); i++ ) {
                 cout << i->first;
-                if( maxNest[i->first] > 1 ) 
+                if( maxNest[i->first] > 1 )
                     cout << " maxNest:" << maxNest[i->first];
                 cout << '\n';
                 for( set<mid>::iterator j = i->second.begin(); j != i->second.end(); j++ )
diff --git a/util/debug_util.cpp b/util/debug_util.cpp
index f0a916d..8ba6534 100644
--- a/util/debug_util.cpp
+++ b/util/debug_util.cpp
@@ -29,7 +29,7 @@ namespace mongo {
      *  2) You have run "handle SIGSTOP noprint" in gdb
      *  3) cmdLine.port + 2000 is free
      */
-    void launchGDB(int){
+    void launchGDB(int) {
         // Don't come back here
         signal(SIGTRAP, SIG_IGN);
 
@@ -38,18 +38,19 @@ namespace mongo {
         string pidToDebug = BSONObjBuilder::numStr(getpid());
 
         cout << "\n\n\t**** Launching gdbserver on " << newPortStr << " ****" << endl << endl;
-        if (fork() == 0){
+        if (fork() == 0) {
             //child
             execlp("gdbserver", "gdbserver", "--attach", newPortStr.c_str(), pidToDebug.c_str(), NULL);
             perror(NULL);
-        }else{
+        }
+        else {
             //parent
             raise(SIGSTOP); // pause all threads until gdb connects and continues
             raise(SIGTRAP); // break inside gdbserver
         }
     }
 
-    void setupSIGTRAPforGDB(){
+    void setupSIGTRAPforGDB() {
         assert( signal(SIGTRAP , launchGDB ) != SIG_ERR );
     }
 #else
diff --git a/util/debug_util.h b/util/debug_util.h
index 7686ecc..abed8d9 100644
--- a/util/debug_util.h
+++ b/util/debug_util.h
@@ -62,7 +62,7 @@ namespace mongo {
 #define MONGO_RARELY SOMETIMES( rarely, 128 )
 #define RARELY MONGO_RARELY
 
-#define MONGO_ONCE for( static bool undone = true; undone; undone = false ) 
+#define MONGO_ONCE for( static bool undone = true; undone; undone = false )
 #define ONCE MONGO_ONCE
 
 #if defined(_WIN32)
@@ -74,30 +74,33 @@ namespace mongo {
     void setupSIGTRAPforGDB();
 
     extern int tlogLevel;
-    
-    inline void breakpoint(){
+
+    inline void breakpoint() {
         if ( tlogLevel < 0 )
             return;
+#ifdef _WIN32
+        //DEV DebugBreak();
+#endif
 #ifndef _WIN32
         // code to raise a breakpoint in GDB
         ONCE {
             //prevent SIGTRAP from crashing the program if default action is specified and we are not in gdb
             struct sigaction current;
             sigaction(SIGTRAP, NULL, &current);
-            if (current.sa_handler == SIG_DFL){
+            if (current.sa_handler == SIG_DFL) {
                 signal(SIGTRAP, SIG_IGN);
             }
         }
-        
+
         raise(SIGTRAP);
 #endif
     }
-    
+
 
     // conditional breakpoint
-    inline void breakif(bool test){
+    inline void breakif(bool test) {
         if (test)
             breakpoint();
     }
-   
+
 } // namespace mongo
diff --git a/util/embedded_builder.h b/util/embedded_builder.h
index 8ca47e5..abf518e 100644
--- a/util/embedded_builder.h
+++ b/util/embedded_builder.h
@@ -29,10 +29,10 @@ namespace mongo {
         // parameter in lex ascending order.
         void prepareContext( string &name ) {
             int i = 1, n = _builders.size();
-            while( i < n && 
-                   name.substr( 0, _builders[ i ].first.length() ) == _builders[ i ].first && 
-                   ( name[ _builders[i].first.length() ] == '.' || name[ _builders[i].first.length() ] == 0 )
-                   ){
+            while( i < n &&
+                    name.substr( 0, _builders[ i ].first.length() ) == _builders[ i ].first &&
+                    ( name[ _builders[i].first.length() ] == '.' || name[ _builders[i].first.length() ] == 0 )
+                 ) {
                 name = name.substr( _builders[ i ].first.length() + 1 );
                 ++i;
             }
@@ -54,7 +54,7 @@ namespace mongo {
         }
         BufBuilder &subarrayStartAs( string name ) {
             prepareContext( name );
-            return back()->subarrayStart( name.c_str() );
+            return back()->subarrayStart( name );
         }
         void done() {
             while( ! _builderStorage.empty() )
@@ -72,7 +72,7 @@ namespace mongo {
 
     private:
         void addBuilder( const string &name ) {
-            shared_ptr< BSONObjBuilder > newBuilder( new BSONObjBuilder( back()->subobjStart( name.c_str() ) ) );
+            shared_ptr< BSONObjBuilder > newBuilder( new BSONObjBuilder( back()->subobjStart( name ) ) );
             _builders.push_back( make_pair( name, newBuilder.get() ) );
             _builderStorage.push_back( newBuilder );
         }
@@ -83,10 +83,10 @@ namespace mongo {
         }
 
         BSONObjBuilder *back() { return _builders.back().second; }
-        
+
         vector< pair< string, BSONObjBuilder * > > _builders;
         vector< shared_ptr< BSONObjBuilder > > _builderStorage;
 
     };
-    
+
 } //namespace mongo
diff --git a/util/file.h b/util/file.h
index 0302290..0a973e3 100644
--- a/util/file.h
+++ b/util/file.h
@@ -1,4 +1,4 @@
-// file.h
+// file.h cross platform basic file class. supports 64 bit offsets and such.
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -29,139 +29,140 @@
 
 #include "text.h"
 
-namespace mongo { 
+namespace mongo {
 
 #ifndef __sunos__
-typedef uint64_t fileofs;
+    typedef uint64_t fileofs;
 #else
-typedef boost::uint64_t fileofs;
+    typedef boost::uint64_t fileofs;
 #endif
 
-class FileInterface { 
-public:
-    void open(const char *fn) {}
-    void write(fileofs o, const char *data, unsigned len) {}
-    void read(fileofs o, char *data, unsigned len) {}
-    bool bad() {return false;}
-    bool is_open() {return false;}
-    fileofs len() { return 0; }
-};
-
-#if defined(_WIN32) 
+    class FileInterface {
+    public:
+        void open(const char *fn) {}
+        void write(fileofs o, const char *data, unsigned len) {}
+        void read(fileofs o, char *data, unsigned len) {}
+        bool bad() {return false;}
+        bool is_open() {return false;}
+        fileofs len() { return 0; }
+        void fsync() { assert(false); }
+    };
+
+#if defined(_WIN32)
 #include <io.h>
 
-class File : public FileInterface { 
-    HANDLE fd;
-    bool _bad;
-    void err(BOOL b=false) { /* false = error happened */
-        if( !b && !_bad ) { 
+    class File : public FileInterface {
+        HANDLE fd;
+        bool _bad;
+        void err(BOOL b=false) { /* false = error happened */
+            if( !b && !_bad ) {
+                _bad = true;
+                log() << "File I/O error " << GetLastError() << '\n';
+            }
+        }
+    public:
+        File() {
+            fd = INVALID_HANDLE_VALUE;
             _bad = true;
-            log() << "File I/O error " << GetLastError() << '\n';
         }
-    }
-public:
-    File() { 
-        fd = INVALID_HANDLE_VALUE; 
-        _bad = true; 
-    }
-    ~File() { 
-        if( is_open() ) CloseHandle(fd);
-        fd = INVALID_HANDLE_VALUE; 
-    }
-    void open(const char *filename, bool readOnly=false ) {
-        fd = CreateFile(
-                 toNativeString(filename).c_str(),
-                 ( readOnly ? 0 : GENERIC_WRITE ) | GENERIC_READ, FILE_SHARE_READ,
-                 NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
-        if( !is_open() ) {
-            out() << "CreateFile failed " << filename << endl;
+        ~File() {
+            if( is_open() ) CloseHandle(fd);
+            fd = INVALID_HANDLE_VALUE;
         }
-        else 
-            _bad = false;
-    }
-    void write(fileofs o, const char *data, unsigned len) {
-        LARGE_INTEGER li;
-        li.QuadPart = o;
-        SetFilePointerEx(fd, li, NULL, FILE_BEGIN);
-        DWORD written;
-        err( WriteFile(fd, data, len, &written, NULL) );
-    }
-    void read(fileofs o, char *data, unsigned len) {
-        DWORD read;
-        LARGE_INTEGER li;
-        li.QuadPart = o;
-        SetFilePointerEx(fd, li, NULL, FILE_BEGIN);
-        int ok = ReadFile(fd, data, len, &read, 0);
-        if( !ok ) 
-            err(ok);
-        else
-            massert( 10438 , "ReadFile error - truncated file?", read == len);
-    }
-    bool bad() { return _bad; }
-    bool is_open() { return fd != INVALID_HANDLE_VALUE; }
-    fileofs len() {
-        LARGE_INTEGER li;
-        li.LowPart = GetFileSize(fd, (DWORD *) &li.HighPart);
-        if( li.HighPart == 0 && li.LowPart == INVALID_FILE_SIZE ) {
-            err( false );
-            return 0;
+        void open(const char *filename, bool readOnly=false ) {
+            fd = CreateFile(
+                     toNativeString(filename).c_str(),
+                     ( readOnly ? 0 : GENERIC_WRITE ) | GENERIC_READ, FILE_SHARE_WRITE|FILE_SHARE_READ,
+                     NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+            if( !is_open() ) {
+                DWORD e = GetLastError();
+                log() << "Create/Open File failed " << filename << ' ' << errnoWithDescription(e) << endl;
+            }
+            else
+                _bad = false;
+        }
+        void write(fileofs o, const char *data, unsigned len) {
+            LARGE_INTEGER li;
+            li.QuadPart = o;
+            SetFilePointerEx(fd, li, NULL, FILE_BEGIN);
+            DWORD written;
+            err( WriteFile(fd, data, len, &written, NULL) );
         }
-        return li.QuadPart;
-    }
-    void fsync() { FlushFileBuffers(fd); }
-};
+        void read(fileofs o, char *data, unsigned len) {
+            DWORD read;
+            LARGE_INTEGER li;
+            li.QuadPart = o;
+            SetFilePointerEx(fd, li, NULL, FILE_BEGIN);
+            int ok = ReadFile(fd, data, len, &read, 0);
+            if( !ok )
+                err(ok);
+            else
+                massert( 10438 , "ReadFile error - truncated file?", read == len);
+        }
+        bool bad() { return _bad; }
+        bool is_open() { return fd != INVALID_HANDLE_VALUE; }
+        fileofs len() {
+            LARGE_INTEGER li;
+            li.LowPart = GetFileSize(fd, (DWORD *) &li.HighPart);
+            if( li.HighPart == 0 && li.LowPart == INVALID_FILE_SIZE ) {
+                err( false );
+                return 0;
+            }
+            return li.QuadPart;
+        }
+        void fsync() { FlushFileBuffers(fd); }
+    };
 
 #else
 
-class File : public FileInterface { 
-    int fd;
-    bool _bad;
-    void err(bool ok) {
-        if( !ok && !_bad ) { 
+    class File : public FileInterface {
+    public:
+        int fd;
+    private:
+        bool _bad;
+        void err(bool ok) {
+            if( !ok && !_bad ) {
+                _bad = true;
+                log() << "File I/O " << errnoWithDescription() << '\n';
+            }
+        }
+    public:
+        File() {
+            fd = -1;
             _bad = true;
-            log() << "File I/O " << errnoWithDescription() << '\n';
         }
-    }
-public:
-    File() { 
-        fd = -1;
-        _bad = true; 
-    }
-    ~File() { 
-        if( is_open() ) ::close(fd);
-        fd = -1;
-    }
+        ~File() {
+            if( is_open() ) ::close(fd);
+            fd = -1;
+        }
 
 #ifndef O_NOATIME
 #define O_NOATIME 0
-#define lseek64 lseek
 #endif
 
-    void open(const char *filename, bool readOnly=false ) {
-        fd = ::open(filename, 
-                    O_CREAT | ( readOnly ? 0 : ( O_RDWR | O_NOATIME ) ) ,
-                    S_IRUSR | S_IWUSR);
-        if ( fd <= 0 ) {
-            out() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl;
-            return;
+        void open(const char *filename, bool readOnly=false ) {
+            fd = ::open(filename,
+                        O_CREAT | ( readOnly ? 0 : ( O_RDWR | O_NOATIME ) ) ,
+                        S_IRUSR | S_IWUSR);
+            if ( fd <= 0 ) {
+                out() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl;
+                return;
+            }
+            _bad = false;
+        }
+        void write(fileofs o, const char *data, unsigned len) {
+            err( ::pwrite(fd, data, len, o) == (int) len );
+        }
+        void read(fileofs o, char *data, unsigned len) {
+            err( ::pread(fd, data, len, o) == (int) len );
+        }
+        bool bad() { return _bad; }
+        bool is_open() { return fd > 0; }
+        fileofs len() {
+            return lseek(fd, 0, SEEK_END);
         }
-        _bad = false;
-    }
-    void write(fileofs o, const char *data, unsigned len) {
-        lseek64(fd, o, SEEK_SET);
-        err( ::write(fd, data, len) == (int) len );
-    }
-    void read(fileofs o, char *data, unsigned len) {
-        lseek(fd, o, SEEK_SET);
-        err( ::read(fd, data, len) == (int) len );
-    }
-    bool bad() { return _bad; }
-    bool is_open() { return fd > 0; }
-    fileofs len() {
-        return lseek(fd, 0, SEEK_END);
-    }
-    void fsync() { ::fsync(fd); }
-};
+        void fsync() { ::fsync(fd); }
+    };
 
 
 #endif
diff --git a/util/file_allocator.cpp b/util/file_allocator.cpp
new file mode 100644
index 0000000..54590ed
--- /dev/null
+++ b/util/file_allocator.cpp
@@ -0,0 +1,282 @@
+// @file file_allocator.cpp
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#include "pch.h"
+#include <fcntl.h>
+#include <errno.h>
+
+#if defined(__freebsd__) || defined(__openbsd__)
+#include <sys/stat.h>
+#endif
+
+#include "timer.h"
+#include "mongoutils/str.h"
+using namespace mongoutils;
+
+#ifndef O_NOATIME
+#define O_NOATIME (0)
+#endif
+
+#include "file_allocator.h"
+
+namespace mongo {
+
+    void ensureParentDirCreated(const boost::filesystem::path& p){
+        const boost::filesystem::path parent = p.branch_path();
+
+        if (! boost::filesystem::exists(parent)){
+            ensureParentDirCreated(parent);
+            log() << "creating directory " << parent.string() << endl;
+            boost::filesystem::create_directory(parent);
+        }
+
+        assert(boost::filesystem::is_directory(parent));
+    }
+
+#if defined(_WIN32)
+    FileAllocator::FileAllocator() {
+    }
+
+    void FileAllocator::start() {
+    }
+
+    void FileAllocator::requestAllocation( const string &name, long &size ) {
+        /* Some of the system calls in the file allocator don't work in win,
+           so no win support - 32 or 64 bit.  Plus we don't seem to need preallocation
+           on windows anyway as we don't have to pre-zero the file there.
+        */
+    }
+
+    void FileAllocator::allocateAsap( const string &name, unsigned long long &size ) {
+        // no-op
+    }
+
+    void FileAllocator::waitUntilFinished() const {
+        // no-op
+    }
+
+    void FileAllocator::ensureLength(int fd , long size) {
+        // we don't zero on windows
+        // TODO : we should to avoid fragmentation
+    }
+
+#else
+
+    FileAllocator::FileAllocator()
+        : _pendingMutex("FileAllocator"), _failed() {
+    }
+
+
+    void FileAllocator::start() {
+        boost::thread t( boost::bind( &FileAllocator::run , this ) );
+    }
+
+    void FileAllocator::requestAllocation( const string &name, long &size ) {
+        scoped_lock lk( _pendingMutex );
+        if ( _failed )
+            return;
+        long oldSize = prevSize( name );
+        if ( oldSize != -1 ) {
+            size = oldSize;
+            return;
+        }
+        _pending.push_back( name );
+        _pendingSize[ name ] = size;
+        _pendingUpdated.notify_all();
+    }
+
+    void FileAllocator::allocateAsap( const string &name, unsigned long long &size ) {
+        scoped_lock lk( _pendingMutex );
+        long oldSize = prevSize( name );
+        if ( oldSize != -1 ) {
+            size = oldSize;
+            if ( !inProgress( name ) )
+                return;
+        }
+        checkFailure();
+        _pendingSize[ name ] = size;
+        if ( _pending.size() == 0 )
+            _pending.push_back( name );
+        else if ( _pending.front() != name ) {
+            _pending.remove( name );
+            list< string >::iterator i = _pending.begin();
+            ++i;
+            _pending.insert( i, name );
+        }
+        _pendingUpdated.notify_all();
+        while( inProgress( name ) ) {
+            checkFailure();
+            _pendingUpdated.wait( lk.boost() );
+        }
+
+    }
+
+    void FileAllocator::waitUntilFinished() const {
+        if ( _failed )
+            return;
+        scoped_lock lk( _pendingMutex );
+        while( _pending.size() != 0 )
+            _pendingUpdated.wait( lk.boost() );
+    }
+
+    void FileAllocator::ensureLength(int fd , long size) {
+#if defined(__linux__)
+        int ret = posix_fallocate(fd,0,size);
+        if ( ret == 0 )
+            return;
+
+        log() << "FileAllocator: posix_fallocate failed: " << errnoWithDescription( ret ) << " falling back" << endl;
+#endif
+
+        off_t filelen = lseek(fd, 0, SEEK_END);
+        if ( filelen < size ) {
+            if (filelen != 0) {
+                stringstream ss;
+                ss << "failure creating new datafile; lseek failed for fd " << fd << " with errno: " << errnoWithDescription();
+                uassert( 10440 ,  ss.str(), filelen == 0 );
+            }
+            // Check for end of disk.
+
+            uassert( 10441 ,  str::stream() << "Unable to allocate new file of size " << size << ' ' << errnoWithDescription(),
+                     size - 1 == lseek(fd, size - 1, SEEK_SET) );
+            uassert( 10442 ,  str::stream() << "Unable to allocate new file of size " << size << ' ' << errnoWithDescription(),
+                     1 == write(fd, "", 1) );
+            lseek(fd, 0, SEEK_SET);
+
+            const long z = 256 * 1024;
+            const boost::scoped_array<char> buf_holder (new char[z]);
+            char* buf = buf_holder.get();
+            memset(buf, 0, z);
+            long left = size;
+            while ( left > 0 ) {
+                long towrite = left;
+                if ( towrite > z )
+                    towrite = z;
+
+                int written = write( fd , buf , towrite );
+                uassert( 10443 , errnoWithPrefix("FileAllocator: file write failed" ), written > 0 );
+                left -= written;
+            }
+        }
+    }
+
+    void FileAllocator::checkFailure() {
+        if (_failed) {
+            // we want to log the problem (diskfull.js expects it) but we do not want to dump a stack tracke
+            msgassertedNoTrace( 12520, "new file allocation failure" );
+        }
+    }
+
+    long FileAllocator::prevSize( const string &name ) const {
+        if ( _pendingSize.count( name ) > 0 )
+            return _pendingSize[ name ];
+        if ( boost::filesystem::exists( name ) )
+            return boost::filesystem::file_size( name );
+        return -1;
+    }
+
+    // caller must hold _pendingMutex lock.
+    bool FileAllocator::inProgress( const string &name ) const {
+        for( list< string >::const_iterator i = _pending.begin(); i != _pending.end(); ++i )
+            if ( *i == name )
+                return true;
+        return false;
+    }
+
+    void FileAllocator::run( FileAllocator * fa ) {
+        setThreadName( "FileAllocator" );
+        while( 1 ) {
+            {
+                scoped_lock lk( fa->_pendingMutex );
+                if ( fa->_pending.size() == 0 )
+                    fa->_pendingUpdated.wait( lk.boost() );
+            }
+            while( 1 ) {
+                string name;
+                long size;
+                {
+                    scoped_lock lk( fa->_pendingMutex );
+                    if ( fa->_pending.size() == 0 )
+                        break;
+                    name = fa->_pending.front();
+                    size = fa->_pendingSize[ name ];
+                }
+                try {
+                    log() << "allocating new datafile " << name << ", filling with zeroes..." << endl;
+                    ensureParentDirCreated(name);
+                    long fd = open(name.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR);
+                    if ( fd <= 0 ) {
+                        stringstream ss;
+                        ss << "FileAllocator: couldn't open " << name << ' ' << errnoWithDescription();
+                        uassert( 10439 ,  ss.str(), fd <= 0 );
+                    }
+
+#if defined(POSIX_FADV_DONTNEED)
+                    if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) {
+                        log() << "warning: posix_fadvise fails " << name << ' ' << errnoWithDescription() << endl;
+                    }
+#endif
+
+                    Timer t;
+
+                    /* make sure the file is the full desired length */
+                    ensureLength( fd , size );
+
+                    log() << "done allocating datafile " << name << ", "
+                          << "size: " << size/1024/1024 << "MB, "
+                          << " took " << ((double)t.millis())/1000.0 << " secs"
+                          << endl;
+
+                    close( fd );
+
+                }
+                catch ( ... ) {
+                    log() << "error failed to allocate new file: " << name
+                          << " size: " << size << ' ' << errnoWithDescription() << endl;
+                    try {
+                        BOOST_CHECK_EXCEPTION( boost::filesystem::remove( name ) );
+                    }
+                    catch ( ... ) {
+                    }
+                    scoped_lock lk( fa->_pendingMutex );
+                    fa->_failed = true;
+                    // not erasing from pending
+                    fa->_pendingUpdated.notify_all();
+                    return; // no more allocation
+                }
+
+                {
+                    scoped_lock lk( fa->_pendingMutex );
+                    fa->_pendingSize.erase( name );
+                    fa->_pending.pop_front();
+                    fa->_pendingUpdated.notify_all();
+                }
+            }
+        }
+    }
+
+#endif
+
+    FileAllocator* FileAllocator::_instance = 0;
+
+    FileAllocator* FileAllocator::get(){
+        if ( ! _instance )
+            _instance = new FileAllocator();
+        return _instance;
+    }
+
+} // namespace mongo
diff --git a/util/file_allocator.h b/util/file_allocator.h
index b0267d9..6cc7b2d 100644
--- a/util/file_allocator.h
+++ b/util/file_allocator.h
@@ -16,246 +16,74 @@
  */
 
 #include "../pch.h"
-#include <fcntl.h>
-#include <errno.h>
-#if defined(__freebsd__) || defined(__openbsd__)
-#include <sys/stat.h>
-#endif
-
-#ifndef O_NOATIME
-#define O_NOATIME 0
-#endif
 
 namespace mongo {
 
-    /* Handles allocation of contiguous files on disk.  Allocation may be
-       requested asynchronously or synchronously.
-       */
-    class FileAllocator {
-        /* The public functions may not be called concurrently.  The allocation
-           functions may be called multiple times per file, but only the first
-           size specified per file will be used.
+    /*
+     * Handles allocation of contiguous files on disk.  Allocation may be
+     * requested asynchronously or synchronously.
+     * singleton
+     */
+    class FileAllocator : boost::noncopyable {
+        /*
+         * The public functions may not be called concurrently.  The allocation
+         * functions may be called multiple times per file, but only the first
+         * size specified per file will be used.
         */
     public:
-#if !defined(_WIN32)
-        FileAllocator() : pendingMutex_("FileAllocator"), failed_() {}
-#endif
-        void start() {
-#if !defined(_WIN32)
-            Runner r( *this );
-            boost::thread t( r );
-#endif
-        }
-        // May be called if file exists. If file exists, or its allocation has
-        // been requested, size is updated to match existing file size.
-        void requestAllocation( const string &name, long &size ) {
-            /* Some of the system calls in the file allocator don't work in win, 
-               so no win support - 32 or 64 bit.  Plus we don't seem to need preallocation 
-               on windows anyway as we don't have to pre-zero the file there.
-            */
-#if !defined(_WIN32)
-            scoped_lock lk( pendingMutex_ );
-            if ( failed_ )
-                return;
-            long oldSize = prevSize( name );
-            if ( oldSize != -1 ) {
-                size = oldSize;
-                return;
-            }
-            pending_.push_back( name );
-            pendingSize_[ name ] = size;
-            pendingUpdated_.notify_all();
-#endif
-        }
-        // Returns when file has been allocated.  If file exists, size is
-        // updated to match existing file size.
-        void allocateAsap( const string &name, long &size ) {
-#if !defined(_WIN32)
-            scoped_lock lk( pendingMutex_ );
-            long oldSize = prevSize( name );
-            if ( oldSize != -1 ) {
-                size = oldSize;
-                if ( !inProgress( name ) )
-                    return;
-            }
-            checkFailure();
-            pendingSize_[ name ] = size;
-            if ( pending_.size() == 0 )
-                pending_.push_back( name );
-            else if ( pending_.front() != name ) {
-                pending_.remove( name );
-                list< string >::iterator i = pending_.begin();
-                ++i;
-                pending_.insert( i, name );
-            }
-            pendingUpdated_.notify_all();
-            while( inProgress( name ) ) {
-                checkFailure();
-                pendingUpdated_.wait( lk.boost() );
-            }
-#endif
-        }
+        void start();
 
-        void waitUntilFinished() const {
-#if !defined(_WIN32)
-            if ( failed_ )
-                return;
-            scoped_lock lk( pendingMutex_ );
-            while( pending_.size() != 0 )
-                pendingUpdated_.wait( lk.boost() );
-#endif
-        }
-        
-        static void ensureLength( int fd , long size ){
+        /**
+         * May be called if file exists. If file exists, or its allocation has
+         *  been requested, size is updated to match existing file size.
+         */
+        void requestAllocation( const string &name, long &size );
 
-#if defined(_WIN32)
-            // we don't zero on windows
-            // TODO : we should to avoid fragmentation
-#else
 
-#if defined(__linux__) 
-            int ret = posix_fallocate(fd,0,size);
-            if ( ret == 0 )
-                return;
-            
-            log() << "posix_fallocate failed: " << errnoWithDescription( ret ) << " falling back" << endl;
-#endif
-            
-            off_t filelen = lseek(fd, 0, SEEK_END);
-            if ( filelen < size ) {
-                if (filelen != 0) {
-                    stringstream ss;
-                    ss << "failure creating new datafile; lseek failed for fd " << fd << " with errno: " << errnoWithDescription();
-                    massert( 10440 ,  ss.str(), filelen == 0 );
-                }
-                // Check for end of disk.
-                massert( 10441 ,  "Unable to allocate file of desired size",
-                         size - 1 == lseek(fd, size - 1, SEEK_SET) );
-                massert( 10442 ,  "Unable to allocate file of desired size",
-                         1 == write(fd, "", 1) );
-                lseek(fd, 0, SEEK_SET);
-                
-                const long z = 256 * 1024;
-                const boost::scoped_array<char> buf_holder (new char[z]);
-                char* buf = buf_holder.get();
-                memset(buf, 0, z);
-                long left = size;
-                while ( left > 0 ) {
-                    long towrite = left;
-                    if ( towrite > z )
-                        towrite = z;
-                    
-                    int written = write( fd , buf , towrite );
-                    massert( 10443 , errnoWithPrefix("write failed" ), written > 0 );
-                    left -= written;
-                }
-            }
-#endif
-        }
-        
+        /**
+         * Returns when file has been allocated.  If file exists, size is
+         * updated to match existing file size.
+         */
+        void allocateAsap( const string &name, unsigned long long &size );
+
+        void waitUntilFinished() const;
+
+        static void ensureLength(int fd , long size);
+
+        /** @return the singletone */
+        static FileAllocator * get();
+
     private:
+
+        FileAllocator();
+
 #if !defined(_WIN32)
-        void checkFailure() {
-            massert( 12520, "file allocation failure", !failed_ );            
-        }
-        
-        // caller must hold pendingMutex_ lock.  Returns size if allocated or 
+        void checkFailure();
+
+        // caller must hold pendingMutex_ lock.  Returns size if allocated or
         // allocation requested, -1 otherwise.
-        long prevSize( const string &name ) const {
-            if ( pendingSize_.count( name ) > 0 )
-                return pendingSize_[ name ];
-            if ( boost::filesystem::exists( name ) )
-                return boost::filesystem::file_size( name );
-            return -1;
-        }
-         
+        long prevSize( const string &name ) const;
+
         // caller must hold pendingMutex_ lock.
-        bool inProgress( const string &name ) const {
-            for( list< string >::const_iterator i = pending_.begin(); i != pending_.end(); ++i )
-                if ( *i == name )
-                    return true;
-            return false;
-        }
+        bool inProgress( const string &name ) const;
 
-        mutable mongo::mutex pendingMutex_;
-        mutable boost::condition pendingUpdated_;
-        list< string > pending_;
-        mutable map< string, long > pendingSize_;
-        bool failed_;
-        
-        struct Runner {
-            Runner( FileAllocator &allocator ) : a_( allocator ) {}
-            FileAllocator &a_;
-            void operator()() {
-                while( 1 ) {
-                    {
-                        scoped_lock lk( a_.pendingMutex_ );
-                        if ( a_.pending_.size() == 0 )
-                            a_.pendingUpdated_.wait( lk.boost() );
-                    }
-                    while( 1 ) {
-                        string name;
-                        long size;
-                        {
-                            scoped_lock lk( a_.pendingMutex_ );
-                            if ( a_.pending_.size() == 0 )
-                                break;
-                            name = a_.pending_.front();
-                            size = a_.pendingSize_[ name ];
-                        }
-                        try {
-                            log() << "allocating new datafile " << name << ", filling with zeroes..." << endl;
-                            long fd = open(name.c_str(), O_CREAT | O_RDWR | O_NOATIME, S_IRUSR | S_IWUSR);
-                            if ( fd <= 0 ) {
-                                stringstream ss;
-                                ss << "couldn't open " << name << ' ' << errnoWithDescription();
-                                massert( 10439 ,  ss.str(), fd <= 0 );
-                            }
+        /** called from the worked thread */
+        static void run( FileAllocator * fa );
 
-#if defined(POSIX_FADV_DONTNEED)
-                            if( posix_fadvise(fd, 0, size, POSIX_FADV_DONTNEED) ) { 
-                                log() << "warning: posix_fadvise fails " << name << ' ' << errnoWithDescription() << endl;
-                            }
-#endif
-                            
-                            Timer t;
-                            
-                            /* make sure the file is the full desired length */
-                            ensureLength( fd , size );
+        mutable mongo::mutex _pendingMutex;
+        mutable boost::condition _pendingUpdated;
+
+        list< string > _pending;
+        mutable map< string, long > _pendingSize;
 
-                            log() << "done allocating datafile " << name << ", " 
-                                  << "size: " << size/1024/1024 << "MB, "
-                                  << " took " << ((double)t.millis())/1000.0 << " secs" 
-                                  << endl;
+        bool _failed;
+#endif
+        
+        static FileAllocator* _instance;
 
-                            close( fd );
-                            
-                        } catch ( ... ) {
-                            problem() << "Failed to allocate new file: " << name
-                                      << ", size: " << size << ", aborting." << endl;
-                            try {
-                                BOOST_CHECK_EXCEPTION( boost::filesystem::remove( name ) );
-                            } catch ( ... ) {
-                            }
-                            scoped_lock lk( a_.pendingMutex_ );
-                            a_.failed_ = true;
-                            // not erasing from pending
-                            a_.pendingUpdated_.notify_all();
-                            return; // no more allocation
-                        }
-                        
-                        {
-                            scoped_lock lk( a_.pendingMutex_ );
-                            a_.pendingSize_.erase( name );
-                            a_.pending_.pop_front();
-                            a_.pendingUpdated_.notify_all();
-                        }
-                    }
-                }
-            }
-        };
-#endif    
     };
-    
-    FileAllocator &theFileAllocator();
+
+    /** like "mkdir -p" but on parent dir of p rather than p itself */
+    void ensureParentDirCreated(const boost::filesystem::path& p);
+
 } // namespace mongo
diff --git a/util/goodies.h b/util/goodies.h
index 7b73996..53a74c2 100644
--- a/util/goodies.h
+++ b/util/goodies.h
@@ -1,5 +1,5 @@
 // @file goodies.h
-// miscellaneous junk
+// miscellaneous
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -23,11 +23,18 @@
 
 namespace mongo {
 
-    void setThreadName(const char * name);
+    /* @return a dump of the buffer as hex byte ascii output */
+    string hexdump(const char *data, unsigned len);
+
+    /**
+     * @return if this name has an increasing counter associated, return the value
+     *         otherwise 0
+     */
+    unsigned setThreadName(const char * name);
     string getThreadName();
-    
+
     template<class T>
-    inline string ToString(const T& t) { 
+    inline string ToString(const T& t) {
         stringstream s;
         s << t;
         return s.str();
@@ -49,17 +56,16 @@ namespace mongo {
     /* use "addr2line -CFe <exe>" to parse. */
     inline void printStackTrace( ostream &o = cout ) {
         void *b[20];
-        size_t size;
+
+        int size = backtrace(b, 20);
+        for (int i = 0; i < size; i++)
+            o << hex << b[i] << dec << ' ';
+        o << endl;
+
         char **strings;
-        size_t i;
 
-        size = backtrace(b, 20);
         strings = backtrace_symbols(b, size);
-
-        for (i = 0; i < size; i++)
-            o << hex << b[i] << dec << ' ';
-        o << '\n';
-        for (i = 0; i < size; i++)
+        for (int i = 0; i < size; i++)
             o << ' ' << strings[i] << '\n';
         o.flush();
         free (strings);
@@ -68,23 +74,6 @@ namespace mongo {
     inline void printStackTrace( ostream &o = cout ) { }
 #endif
 
-    /* set to TRUE if we are exiting */
-    extern bool goingAway;
-
-    /* find the multimap member which matches a particular key and value.
-
-       note this can be slow if there are a lot with the same key.
-    */
-    template<class C,class K,class V> inline typename C::iterator kv_find(C& c, const K& k,const V& v) {
-        pair<typename C::iterator,typename C::iterator> p = c.equal_range(k);
-
-        for ( typename C::iterator it=p.first; it!=p.second; ++it)
-            if ( it->second == v )
-                return it;
-
-        return c.end();
-    }
-
     bool isPrime(int n);
     int nextPrime(int n);
 
@@ -109,7 +98,8 @@ namespace mongo {
                 cout << endl;
                 len -= 16;
             }
-        } catch (...) {
+        }
+        catch (...) {
         }
     }
 
@@ -147,173 +137,15 @@ namespace mongo {
         }
     };
 
-    inline void time_t_to_Struct(time_t t, struct tm * buf , bool local = false ) {
-#if defined(_WIN32)
-        if ( local )
-            localtime_s( buf , &t );
-        else
-            gmtime_s(buf, &t);
-#else
-        if ( local )
-            localtime_r(&t, buf);
-        else
-            gmtime_r(&t, buf);
-#endif
-    }
-
-    // uses ISO 8601 dates without trailing Z
-    // colonsOk should be false when creating filenames
-    inline string terseCurrentTime(bool colonsOk=true){
-        struct tm t;
-        time_t_to_Struct( time(0) , &t );
-
-        const char* fmt = (colonsOk ? "%Y-%m-%dT%H:%M:%S" : "%Y-%m-%dT%H-%M-%S");
-        char buf[32];
-        assert(strftime(buf, sizeof(buf), fmt, &t) == 19);
-        return buf;
-    }
-
-#define MONGO_asctime _asctime_not_threadsafe_
-#define asctime MONGO_asctime
-#define MONGO_gmtime _gmtime_not_threadsafe_
-#define gmtime MONGO_gmtime
-#define MONGO_localtime _localtime_not_threadsafe_
-#define localtime MONGO_localtime
-#define MONGO_ctime _ctime_is_not_threadsafe_
-#define ctime MONGO_ctime
-
-#if defined(_WIN32) || defined(__sunos__)
-    inline void sleepsecs(int s) {
-        boost::xtime xt;
-        boost::xtime_get(&xt, boost::TIME_UTC);
-        xt.sec += s;
-        boost::thread::sleep(xt);
-    }
-    inline void sleepmillis(long long s) {
-        boost::xtime xt;
-        boost::xtime_get(&xt, boost::TIME_UTC);
-        xt.sec += (int)( s / 1000 );
-        xt.nsec += (int)(( s % 1000 ) * 1000000);
-        if ( xt.nsec >= 1000000000 ) {
-            xt.nsec -= 1000000000;
-            xt.sec++;
-        }        
-        boost::thread::sleep(xt);
-    }
-    inline void sleepmicros(long long s) {
-        if ( s <= 0 )
-            return;
-        boost::xtime xt;
-        boost::xtime_get(&xt, boost::TIME_UTC);
-        xt.sec += (int)( s / 1000000 );
-        xt.nsec += (int)(( s % 1000000 ) * 1000);
-        if ( xt.nsec >= 1000000000 ) {
-            xt.nsec -= 1000000000;
-            xt.sec++;
-        }        
-        boost::thread::sleep(xt);
-    }
-#else
-    inline void sleepsecs(int s) {
-        struct timespec t;
-        t.tv_sec = s;
-        t.tv_nsec = 0;
-        if ( nanosleep( &t , 0 ) ){
-            cout << "nanosleep failed" << endl;
-        }
-    }
-    inline void sleepmicros(long long s) {
-        if ( s <= 0 )
-            return;
-        struct timespec t;
-        t.tv_sec = (int)(s / 1000000);
-        t.tv_nsec = 1000 * ( s % 1000000 );
-        struct timespec out;
-        if ( nanosleep( &t , &out ) ){
-            cout << "nanosleep failed" << endl;
-        }
-    }
-    inline void sleepmillis(long long s) {
-        sleepmicros( s * 1000 );
-    }
-#endif
-
-    // note this wraps
-    inline int tdiff(unsigned told, unsigned tnew) {
-        return WrappingInt::diff(tnew, told);
-    }
-    inline unsigned curTimeMillis() {
-        boost::xtime xt;
-        boost::xtime_get(&xt, boost::TIME_UTC);
-        unsigned t = xt.nsec / 1000000;
-        return (xt.sec & 0xfffff) * 1000 + t;
-    }
-
-    inline Date_t jsTime() {
-        boost::xtime xt;
-        boost::xtime_get(&xt, boost::TIME_UTC);
-        unsigned long long t = xt.nsec / 1000000;
-        return ((unsigned long long) xt.sec * 1000) + t;
-    }
-
-    inline unsigned long long curTimeMicros64() {
-        boost::xtime xt;
-        boost::xtime_get(&xt, boost::TIME_UTC);
-        unsigned long long t = xt.nsec / 1000;
-        return (((unsigned long long) xt.sec) * 1000000) + t;
-    }
-
-// measures up to 1024 seconds.  or, 512 seconds with tdiff that is...
-    inline unsigned curTimeMicros() {
-        boost::xtime xt;
-        boost::xtime_get(&xt, boost::TIME_UTC);
-        unsigned t = xt.nsec / 1000;
-        unsigned secs = xt.sec % 1024;
-        return secs*1000000 + t;
-    }
-    
-// simple scoped timer
-    class Timer {
-    public:
-        Timer() {
-            reset();
-        }
-        Timer( unsigned long long start ) {
-            old = start;
-        }
-        int seconds() const {
-            return (int)(micros() / 1000000);
-        }
-        int millis() const {
-            return (long)(micros() / 1000);
-        }
-        unsigned long long micros() const {
-            unsigned long long n = curTimeMicros64();
-            return n - old;
-        }
-        unsigned long long micros(unsigned long long & n) const { // returns cur time in addition to timer result
-            n = curTimeMicros64();
-            return n - old;
-        }
-        unsigned long long startTime(){
-            return old;
-        }
-        void reset() {
-            old = curTimeMicros64();
-        }
-    private:
-        unsigned long long old;
-    };
-
     /*
 
     class DebugMutex : boost::noncopyable {
-    	friend class lock;
-    	mongo::mutex m;
-    	int locked;
+        friend class lock;
+        mongo::mutex m;
+        int locked;
     public:
-    	DebugMutex() : locked(0); { }
-    	bool isLocked() { return locked; }
+        DebugMutex() : locked(0); { }
+        bool isLocked() { return locked; }
     };
 
     */
@@ -351,19 +183,20 @@ namespace mongo {
         return swapEndian(x);
     }
 #endif
-    
+
 #if !defined(_WIN32)
     typedef int HANDLE;
     inline void strcpy_s(char *dst, unsigned len, const char *src) {
+        assert( strlen(src) < len );
         strcpy(dst, src);
     }
 #else
     typedef void *HANDLE;
 #endif
-    
+
     /* thread local "value" rather than a pointer
        good for things which have copy constructors (and the copy constructor is fast enough)
-       e.g. 
+       e.g.
          ThreadLocalValue<int> myint;
     */
     template<class T>
@@ -371,7 +204,7 @@ namespace mongo {
     public:
         ThreadLocalValue( T def = 0 ) : _default( def ) { }
 
-        T get() {
+        T get() const {
             T * val = _val.get();
             if ( val )
                 return *val;
@@ -380,7 +213,7 @@ namespace mongo {
 
         void set( const T& i ) {
             T *v = _val.get();
-            if( v ) { 
+            if( v ) {
                 *v = i;
                 return;
             }
@@ -389,21 +222,21 @@ namespace mongo {
         }
 
     private:
-        T _default;
         boost::thread_specific_ptr<T> _val;
+        const T _default;
     };
 
     class ProgressMeter : boost::noncopyable {
     public:
-        ProgressMeter( long long total , int secondsBetween = 3 , int checkInterval = 100 ){
+        ProgressMeter( unsigned long long total , int secondsBetween = 3 , int checkInterval = 100 ) {
             reset( total , secondsBetween , checkInterval );
         }
 
-        ProgressMeter(){
+        ProgressMeter() {
             _active = 0;
         }
-        
-        void reset( long long total , int secondsBetween = 3 , int checkInterval = 100 ){
+
+        void reset( unsigned long long total , int secondsBetween = 3 , int checkInterval = 100 ) {
             _total = total;
             _secondsBetween = secondsBetween;
             _checkInterval = checkInterval;
@@ -415,29 +248,33 @@ namespace mongo {
             _active = 1;
         }
 
-        void finished(){
+        void finished() {
             _active = 0;
         }
 
-        bool isActive(){
+        bool isActive() {
             return _active;
         }
-        
-        bool hit( int n = 1 ){
-            if ( ! _active ){
+
+        /**
+         * @return if row was printed
+         */
+        bool hit( int n = 1 ) {
+            if ( ! _active ) {
                 cout << "warning: hit on in-active ProgressMeter" << endl;
+                return false;
             }
 
             _done += n;
             _hits++;
             if ( _hits % _checkInterval )
                 return false;
-            
+
             int t = (int) time(0);
             if ( t - _lastTime < _secondsBetween )
                 return false;
-            
-            if ( _total > 0 ){
+
+            if ( _total > 0 ) {
                 int per = (int)( ( (double)_done * 100.0 ) / (double)_total );
                 cout << "\t\t" << _done << "/" << _total << "\t" << per << "%" << endl;
             }
@@ -445,11 +282,11 @@ namespace mongo {
             return true;
         }
 
-        long long done(){
+        unsigned long long done() {
             return _done;
         }
-        
-        long long hits(){
+
+        unsigned long long hits() {
             return _hits;
         }
 
@@ -467,42 +304,42 @@ namespace mongo {
     private:
 
         bool _active;
-        
-        long long _total;
+
+        unsigned long long _total;
         int _secondsBetween;
         int _checkInterval;
 
-        long long _done;
-        long long _hits;
+        unsigned long long _done;
+        unsigned long long _hits;
         int _lastTime;
     };
 
     class ProgressMeterHolder : boost::noncopyable {
     public:
         ProgressMeterHolder( ProgressMeter& pm )
-            : _pm( pm ){
+            : _pm( pm ) {
         }
-        
-        ~ProgressMeterHolder(){
+
+        ~ProgressMeterHolder() {
             _pm.finished();
         }
 
-        ProgressMeter* operator->(){
+        ProgressMeter* operator->() {
             return &_pm;
         }
 
-        bool hit( int n = 1 ){
+        bool hit( int n = 1 ) {
             return _pm.hit( n );
         }
 
-        void finished(){
+        void finished() {
             _pm.finished();
         }
-        
-        bool operator==( const ProgressMeter& other ){
+
+        bool operator==( const ProgressMeter& other ) {
             return _pm == other;
         }
-        
+
     private:
         ProgressMeter& _pm;
     };
@@ -513,11 +350,11 @@ namespace mongo {
             _outof = num;
             _num = num;
         }
-        
-        bool tryAcquire(){
+
+        bool tryAcquire() {
             scoped_lock lk( _mutex );
-            if ( _num <= 0 ){
-                if ( _num < 0 ){
+            if ( _num <= 0 ) {
+                if ( _num < 0 ) {
                     cerr << "DISASTER! in TicketHolder" << endl;
                 }
                 return false;
@@ -525,20 +362,20 @@ namespace mongo {
             _num--;
             return true;
         }
-        
-        void release(){
+
+        void release() {
             scoped_lock lk( _mutex );
             _num++;
         }
 
-        void resize( int newSize ){
-            scoped_lock lk( _mutex );            
+        void resize( int newSize ) {
+            scoped_lock lk( _mutex );
             int used = _outof - _num;
-            if ( used > newSize ){
+            if ( used > newSize ) {
                 cout << "ERROR: can't resize since we're using (" << used << ") more than newSize(" << newSize << ")" << endl;
                 return;
             }
-            
+
             _outof = newSize;
             _num = _outof - used;
         }
@@ -561,11 +398,11 @@ namespace mongo {
 
     class TicketHolderReleaser {
     public:
-        TicketHolderReleaser( TicketHolder * holder ){
+        TicketHolderReleaser( TicketHolder * holder ) {
             _holder = holder;
         }
-        
-        ~TicketHolderReleaser(){
+
+        ~TicketHolderReleaser() {
             _holder->release();
         }
     private:
@@ -580,26 +417,26 @@ namespace mongo {
     class ThreadSafeString {
     public:
         ThreadSafeString( size_t size=256 )
-            : _size( 256 ) , _buf( new char[256] ){
+            : _size( 256 ) , _buf( new char[256] ) {
             memset( _buf , 0 , _size );
         }
 
         ThreadSafeString( const ThreadSafeString& other )
-            : _size( other._size ) , _buf( new char[_size] ){
+            : _size( other._size ) , _buf( new char[_size] ) {
             strncpy( _buf , other._buf , _size );
         }
 
-        ~ThreadSafeString(){
+        ~ThreadSafeString() {
             delete[] _buf;
             _buf = 0;
         }
-        
+
         string toString() const {
             string s = _buf;
             return s;
         }
 
-        ThreadSafeString& operator=( const char * str ){
+        ThreadSafeString& operator=( const char * str ) {
             size_t s = strlen(str);
             if ( s >= _size - 2 )
                 s = _size - 2;
@@ -607,7 +444,7 @@ namespace mongo {
             _buf[s] = 0;
             return *this;
         }
-        
+
         bool operator==( const ThreadSafeString& other ) const {
             return strcmp( _buf , other._buf ) == 0;
         }
@@ -626,7 +463,7 @@ namespace mongo {
 
     private:
         size_t _size;
-        char * _buf;  
+        char * _buf;
     };
 
     ostream& operator<<( ostream &s, const ThreadSafeString &o );
@@ -648,7 +485,7 @@ namespace mongo {
         }
         return x;
     }
-    
+
     // for convenience, '{' is greater than anything and stops number parsing
     inline int lexNumCmp( const char *s1, const char *s2 ) {
         //cout << "START : " << s1 << "\t" << s2 << endl;
@@ -661,10 +498,10 @@ namespace mongo {
                 return 1;
             if ( p2 && !p1 )
                 return -1;
-                
+
             bool n1 = isNumber( *s1 );
             bool n2 = isNumber( *s2 );
-        
+
             if ( n1 && n2 ) {
                 // get rid of leading 0s
                 while ( *s1 == '0' ) s1++;
@@ -678,8 +515,8 @@ namespace mongo {
                 while ( isNumber (*e1) ) e1++;
                 while ( isNumber (*e2) ) e2++;
 
-                int len1 = e1-s1;
-                int len2 = e2-s2;
+                int len1 = (int)(e1-s1);
+                int len2 = (int)(e2-s2);
 
                 int result;
                 // if one is longer than the other, return
@@ -698,24 +535,24 @@ namespace mongo {
                 s1 = e1;
                 s2 = e2;
                 continue;
-            } 
-        
-            if ( n1 ) 
+            }
+
+            if ( n1 )
                 return 1;
-        
-            if ( n2 ) 
+
+            if ( n2 )
                 return -1;
-        
+
             if ( *s1 > *s2 )
                 return 1;
-        
+
             if ( *s2 > *s1 )
                 return -1;
-        
+
             s1++; s2++;
         }
-    
-        if ( *s1 ) 
+
+        if ( *s1 )
             return 1;
         if ( *s2 )
             return -1;
@@ -729,8 +566,8 @@ namespace mongo {
      *  ptr<const T>  =>  T const *  or  const T*
      */
     template <typename T>
-    struct ptr{
-        
+    struct ptr {
+
         ptr() : _p(NULL) {}
 
         // convert to ptr<T>
@@ -740,7 +577,7 @@ namespace mongo {
         template<typename U> ptr(const boost::shared_ptr<U>& p) : _p(p.get()) {}
         template<typename U> ptr(const boost::scoped_ptr<U>& p) : _p(p.get()) {}
         //template<typename U> ptr(const auto_ptr<U>& p) : _p(p.get()) {}
-        
+
         // assign to ptr<T>
         ptr& operator= (T* p) { _p = p; return *this; } // needed for NULL
         template<typename U> ptr& operator= (U* p) { _p = p; return *this; }
diff --git a/util/hashtab.h b/util/hashtab.h
index 16c5483..6818bef 100644
--- a/util/hashtab.h
+++ b/util/hashtab.h
@@ -24,6 +24,7 @@
 
 #include "../pch.h"
 #include <map>
+#include "../db/dur.h"
 
 namespace mongo {
 
@@ -36,9 +37,8 @@ namespace mongo {
 
     template <
     class Key,
-    class Type,
-    class PTR
-    >
+          class Type
+          >
     class HashTable : boost::noncopyable {
     public:
         const char *name;
@@ -53,12 +53,13 @@ namespace mongo {
                 hash = 0;
             }
         };
-        PTR _buf;
+        void* _buf;
         int n;
         int maxChain;
 
         Node& nodes(int i) {
-            return *((Node*) _buf.at(i * sizeof(Node), sizeof(Node)));
+            Node *nodes = (Node *) _buf;
+            return nodes[i];
         }
 
         int _find(const Key& k, bool& found) {
@@ -87,10 +88,10 @@ namespace mongo {
                     out() << "error: hashtable " << name << " is full n:" << n << endl;
                     return -1;
                 }
-                if( chain >= maxChain ) { 
+                if( chain >= maxChain ) {
                     if ( firstNonUsed >= 0 )
                         return firstNonUsed;
-                    out() << "error: hashtable " << name << " max chain n:" << n << endl;
+                    out() << "error: hashtable " << name << " max chain reached:" << maxChain << endl;
                     return -1;
                 }
             }
@@ -98,7 +99,7 @@ namespace mongo {
 
     public:
         /* buf must be all zeroes on initialization. */
-        HashTable(PTR buf, int buflen, const char *_name) : name(_name) {
+        HashTable(void* buf, int buflen, const char *_name) : name(_name) {
             int m = sizeof(Node);
             // out() << "hashtab init, buflen:" << buflen << " m:" << m << endl;
             n = buflen / m;
@@ -108,7 +109,7 @@ namespace mongo {
             _buf = buf;
             //nodes = (Node *) buf;
 
-            if ( sizeof(Node) != 628 ){
+            if ( sizeof(Node) != 628 ) {
                 out() << "HashTable() " << _name << " sizeof(node):" << sizeof(Node) << " n:" << n << " sizeof(Key): " << sizeof(Key) << " sizeof(Type):" << sizeof(Type) << endl;
                 assert( sizeof(Node) == 628 );
             }
@@ -127,41 +128,34 @@ namespace mongo {
             bool found;
             int i = _find(k, found);
             if ( i >= 0 && found ) {
-                Node& n = nodes(i);
-                n.k.kill();
-                n.setUnused();
+                Node* n = &nodes(i);
+                n = getDur().writing(n);
+                n->k.kill();
+                n->setUnused();
             }
         }
-/*
-        void drop(const Key& k) {
-            bool found;
-            int i = _find(k, found);
-            if ( i >= 0 && found ) {
-                nodes[i].setUnused();
-            }
-        }
-*/
+
         /** returns false if too full */
         bool put(const Key& k, const Type& value) {
             bool found;
             int i = _find(k, found);
             if ( i < 0 )
                 return false;
-            Node& n = nodes(i);
+            Node* n = getDur().writing( &nodes(i) );
             if ( !found ) {
-                n.k = k;
-                n.hash = k.hash();
+                n->k = k;
+                n->hash = k.hash();
             }
             else {
-                assert( n.hash == k.hash() );
+                assert( n->hash == k.hash() );
             }
-            n.value = value;
+            n->value = value;
             return true;
         }
-        
+
         typedef void (*IteratorCallback)( const Key& k , Type& v );
-        void iterAll( IteratorCallback callback ){
-            for ( int i=0; i<n; i++ ){
+        void iterAll( IteratorCallback callback ) {
+            for ( int i=0; i<n; i++ ) {
                 if ( ! nodes(i).inUse() )
                     continue;
                 callback( nodes(i).k , nodes(i).value );
@@ -170,14 +164,14 @@ namespace mongo {
 
         // TODO: should probably use boost::bind for this, but didn't want to look at it
         typedef void (*IteratorCallback2)( const Key& k , Type& v , void * extra );
-        void iterAll( IteratorCallback2 callback , void * extra ){
-            for ( int i=0; i<n; i++ ){
+        void iterAll( IteratorCallback2 callback , void * extra ) {
+            for ( int i=0; i<n; i++ ) {
                 if ( ! nodes(i).inUse() )
                     continue;
                 callback( nodes(i).k , nodes(i).value , extra );
             }
         }
-    
+
     };
 
 #pragma pack()
diff --git a/util/heapcheck.h b/util/heapcheck.h
new file mode 100644
index 0000000..95da953
--- /dev/null
+++ b/util/heapcheck.h
@@ -0,0 +1,33 @@
+// @file heapcheck.h
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#if defined(HEAP_CHECKING)
+
+#include <google/heap-checker.h>
+
+#define IGNORE_OBJECT( a ) HeapLeakChecker::IgnoreObject( a )
+#define UNIGNORE_OBJECT( a ) HeapLeakChecker::UnIgnoreObject( a )
+
+#else
+
+#define IGNORE_OBJECT( a )
+#define UNIGNORE_OBJECT( a )
+
+#endif
diff --git a/util/hex.h b/util/hex.h
index 45a08f4..8cf30f2 100644
--- a/util/hex.h
+++ b/util/hex.h
@@ -30,15 +30,15 @@ namespace mongo {
         return 0xff;
     }
     inline char fromHex( const char *c ) {
-        return ( fromHex( c[ 0 ] ) << 4 ) | fromHex( c[ 1 ] );
+        return (char)(( fromHex( c[ 0 ] ) << 4 ) | fromHex( c[ 1 ] ));
     }
 
-    inline string toHex(const void* inRaw, int len){
+    inline string toHex(const void* inRaw, int len) {
         static const char hexchars[] = "0123456789ABCDEF";
 
         StringBuilder out;
         const char* in = reinterpret_cast<const char*>(inRaw);
-        for (int i=0; i<len; ++i){
+        for (int i=0; i<len; ++i) {
             char c = in[i];
             char hi = hexchars[(c & 0xF0) >> 4];
             char lo = hexchars[(c & 0x0F)];
@@ -48,13 +48,13 @@ namespace mongo {
 
         return out.str();
     }
-    
-    inline string toHexLower(const void* inRaw, int len){
+
+    inline string toHexLower(const void* inRaw, int len) {
         static const char hexchars[] = "0123456789abcdef";
 
         StringBuilder out;
         const char* in = reinterpret_cast<const char*>(inRaw);
-        for (int i=0; i<len; ++i){
+        for (int i=0; i<len; ++i) {
             char c = in[i];
             char hi = hexchars[(c & 0xF0) >> 4];
             char lo = hexchars[(c & 0x0F)];
diff --git a/util/histogram.cpp b/util/histogram.cpp
index 4541dfd..17a8505 100644
--- a/util/histogram.cpp
+++ b/util/histogram.cpp
@@ -28,30 +28,31 @@ namespace mongo {
     using std::setfill;
     using std::setw;
 
-    Histogram::Histogram( const Options& opts ) 
+    Histogram::Histogram( const Options& opts )
         : _initialValue( opts.initialValue )
         , _numBuckets( opts.numBuckets )
         , _boundaries( new uint32_t[_numBuckets] )
-        , _buckets( new uint64_t[_numBuckets] ){
+        , _buckets( new uint64_t[_numBuckets] ) {
 
         // TODO more sanity checks
         // + not too few buckets
         // + initialBucket and bucketSize fit within 32 bit ints
 
         // _boundaries store the maximum value falling in that bucket.
-        if ( opts.exponential ){
+        if ( opts.exponential ) {
             uint32_t twoPow = 1; // 2^0
-            for ( uint32_t i = 0; i < _numBuckets - 1; i++){
+            for ( uint32_t i = 0; i < _numBuckets - 1; i++) {
                 _boundaries[i] = _initialValue + opts.bucketSize * twoPow;
                 twoPow *= 2;     // 2^i+1
             }
-        } else {
+        }
+        else {
             _boundaries[0] = _initialValue + opts.bucketSize;
-            for ( uint32_t i = 1; i < _numBuckets - 1; i++ ){
+            for ( uint32_t i = 1; i < _numBuckets - 1; i++ ) {
                 _boundaries[i] = _boundaries[ i-1 ] + opts.bucketSize;
             }
         }
-        _boundaries[ _numBuckets-1 ] = std::numeric_limits<uint32_t>::max();            
+        _boundaries[ _numBuckets-1 ] = std::numeric_limits<uint32_t>::max();
 
         for ( uint32_t i = 0; i < _numBuckets; i++ ) {
             _buckets[i] = 0;
@@ -63,16 +64,16 @@ namespace mongo {
         delete [] _buckets;
     }
 
-    void Histogram::insert( uint32_t element ){
+    void Histogram::insert( uint32_t element ) {
         if ( element < _initialValue) return;
 
         _buckets[ _findBucket(element) ] += 1;
     }
 
-    string Histogram::toHTML() const{
+    string Histogram::toHTML() const {
         uint64_t max = 0;
-        for ( uint32_t i = 0; i < _numBuckets; i++ ){
-            if ( _buckets[i] > max ){
+        for ( uint32_t i = 0; i < _numBuckets; i++ ) {
+            if ( _buckets[i] > max ) {
                 max = _buckets[i];
             }
         }
@@ -83,10 +84,10 @@ namespace mongo {
         // normalize buckets to max
         const int maxBar = 20;
         ostringstream ss;
-        for ( uint32_t i = 0; i < _numBuckets; i++ ){
+        for ( uint32_t i = 0; i < _numBuckets; i++ ) {
             int barSize = _buckets[i] * maxBar / max;
-            ss << string( barSize,'*' ) 
-               << setfill(' ') << setw( maxBar-barSize + 12 ) 
+            ss << string( barSize,'*' )
+               << setfill(' ') << setw( maxBar-barSize + 12 )
                << _boundaries[i] << '\n';
         }
 
@@ -109,21 +110,22 @@ namespace mongo {
         return _numBuckets;
     }
 
-    uint32_t Histogram::_findBucket( uint32_t element ) const{
+    uint32_t Histogram::_findBucket( uint32_t element ) const {
         // TODO assert not too small a value?
 
         uint32_t low = 0;
         uint32_t high = _numBuckets - 1;
-        while ( low < high ){
+        while ( low < high ) {
             // low + ( (high - low) / 2 );
             uint32_t mid = ( low + high ) >> 1;
-            if ( element > _boundaries[ mid ] ){
+            if ( element > _boundaries[ mid ] ) {
                 low = mid + 1;
-            } else {
+            }
+            else {
                 high = mid;
             }
         }
         return low;
-    }        
+    }
 
 }  // namespace mongo
diff --git a/util/histogram.h b/util/histogram.h
index d4a6fa7..40ec562 100644
--- a/util/histogram.h
+++ b/util/histogram.h
@@ -65,12 +65,12 @@ namespace mongo {
 
             // use exponential buckets?
             bool            exponential;
-            
-            Options() 
+
+            Options()
                 : numBuckets(0)
                 , bucketSize(0)
                 , initialValue(0)
-                , exponential(false){}
+                , exponential(false) {}
         };
         explicit Histogram( const Options& opts );
         ~Histogram();
@@ -103,7 +103,7 @@ namespace mongo {
          * Return the number of buckets in this histogram.
          */
         boost::uint32_t getBucketsNum() const;
-        
+
     private:
         /**
          * Returns the bucket where 'element' should fall
diff --git a/util/hostandport.h b/util/hostandport.h
index 6124570..fd27296 100644
--- a/util/hostandport.h
+++ b/util/hostandport.h
@@ -20,17 +20,17 @@
 #include "sock.h"
 #include "../db/cmdline.h"
 #include "mongoutils/str.h"
- 
-namespace mongo { 
+
+namespace mongo {
 
     using namespace mongoutils;
 
-    /** helper for manipulating host:port connection endpoints. 
+    /** helper for manipulating host:port connection endpoints.
       */
-    struct HostAndPort { 
+    struct HostAndPort {
         HostAndPort() : _port(-1) { }
 
-        /** From a string hostname[:portnumber] 
+        /** From a string hostname[:portnumber]
             Throws user assertion if bad config string or bad port #.
             */
         HostAndPort(string s);
@@ -38,11 +38,11 @@ namespace mongo {
         /** @param p port number. -1 is ok to use default. */
         HostAndPort(string h, int p /*= -1*/) : _host(h), _port(p) { }
 
-        HostAndPort(const SockAddr& sock ) 
-            : _host( sock.getAddr() ) , _port( sock.getPort() ){
+        HostAndPort(const SockAddr& sock )
+            : _host( sock.getAddr() ) , _port( sock.getPort() ) {
         }
 
-        static HostAndPort me() { 
+        static HostAndPort me() {
             return HostAndPort("localhost", cmdLine.port);
         }
 
@@ -50,7 +50,7 @@ namespace mongo {
         static HostAndPort Me();
 
         bool operator<(const HostAndPort& r) const {
-            if( _host < r._host ) 
+            if( _host < r._host )
                 return true;
             if( _host == r._host )
                 return port() < r.port();
@@ -61,13 +61,17 @@ namespace mongo {
             return _host == r._host && port() == r.port();
         }
 
+        bool operator!=(const HostAndPort& r) const {
+            return _host != r._host || port() != r.port();
+        }
+
         /* returns true if the host/port combo identifies this process instance. */
         bool isSelf() const; // defined in message.cpp
 
         bool isLocalHost() const;
 
         // @returns host:port
-        string toString() const; 
+        string toString() const;
 
         operator string() const { return toString(); }
 
@@ -84,15 +88,42 @@ namespace mongo {
     };
 
     /** returns true if strings seem to be the same hostname.
-        "nyc1" and "nyc1.acme.com" are treated as the same.
-        in fact "nyc1.foo.com" and "nyc1.acme.com" are treated the same - 
-        we oly look up to the first period.
+        "nyc1", "nyc1.acme", and "nyc1.acme.com" are treated as the same.
     */
     inline bool sameHostname(const string& a, const string& b) {
-        return str::before(a, '.') == str::before(b, '.');
+        size_t prefixLen = str::shareCommonPrefix(a.c_str(), b.c_str());
+
+        if (prefixLen == a.size()) { // (a == b) or (a isPrefixOf b)
+            if ( b[prefixLen] == '.' || b[prefixLen] == '\0')
+                return true;
+        }
+        else if(prefixLen == b.size()) {   // (b isPrefixOf a)
+            if ( a[prefixLen] == '.') // can't be '\0'
+                return true;
+        }
+
+        return false;
     }
 
-    inline HostAndPort HostAndPort::Me() { 
+    inline HostAndPort HostAndPort::Me() {
+        const char* ips = cmdLine.bind_ip.c_str();
+        while(*ips) {
+            string ip;
+            const char * comma = strchr(ips, ',');
+            if (comma) {
+                ip = string(ips, comma - ips);
+                ips = comma + 1;
+            }
+            else {
+                ip = string(ips);
+                ips = "";
+            }
+            HostAndPort h = HostAndPort(ip, cmdLine.port);
+            if (!h.isLocalHost()) {
+                return h;
+            }
+        }
+
         string h = getHostName();
         assert( !h.empty() );
         assert( h != "localhost" );
@@ -102,10 +133,10 @@ namespace mongo {
     inline string HostAndPort::toString() const {
         stringstream ss;
         ss << _host;
-        if ( _port != -1 ){
+        if ( _port != -1 ) {
             ss << ':';
 #if defined(_DEBUG)
-            if( _port >= 44000 && _port < 44100 ) { 
+            if( _port >= 44000 && _port < 44100 ) {
                 log() << "warning: special debug port 44xxx used" << endl;
                 ss << _port+1;
             }
@@ -118,7 +149,7 @@ namespace mongo {
         return ss.str();
     }
 
-    inline bool HostAndPort::isLocalHost() const { 
+    inline bool HostAndPort::isLocalHost() const {
         return _host == "localhost" || startsWith(_host.c_str(), "127.") || _host == "::1";
     }
 
diff --git a/util/httpclient.cpp b/util/httpclient.cpp
index 4f78029..61d5671 100644
--- a/util/httpclient.cpp
+++ b/util/httpclient.cpp
@@ -27,20 +27,20 @@ namespace mongo {
 #define HD(x)
 
 
-    int HttpClient::get( string url , Result * result ){
+    int HttpClient::get( string url , Result * result ) {
         return _go( "GET" , url , 0 , result );
     }
 
-    int HttpClient::post( string url , string data , Result * result ){
+    int HttpClient::post( string url , string data , Result * result ) {
         return _go( "POST" , url , data.c_str() , result );
-    }    
+    }
 
-    int HttpClient::_go( const char * command , string url , const char * body , Result * result ){
+    int HttpClient::_go( const char * command , string url , const char * body , Result * result ) {
         uassert( 10271 ,  "invalid url" , url.find( "http://" ) == 0 );
         url = url.substr( 7 );
-        
+
         string host , path;
-        if ( url.find( "/" ) == string::npos ){
+        if ( url.find( "/" ) == string::npos ) {
             host = url;
             path = "/";
         }
@@ -49,15 +49,15 @@ namespace mongo {
             path = url.substr( url.find( "/" ) );
         }
 
-        
+
         HD( "host [" << host << "]" );
         HD( "path [" << path << "]" );
 
         string server = host;
         int port = 80;
-        
+
         string::size_type idx = host.find( ":" );
-        if ( idx != string::npos ){
+        if ( idx != string::npos ) {
             server = host.substr( 0 , idx );
             string t = host.substr( idx + 1 );
             port = atoi( t.c_str() );
@@ -65,7 +65,7 @@ namespace mongo {
 
         HD( "server [" << server << "]" );
         HD( "port [" << port << "]" );
-        
+
         string req;
         {
             stringstream ss;
@@ -83,20 +83,20 @@ namespace mongo {
 
             req = ss.str();
         }
-        
+
         SockAddr addr( server.c_str() , port );
         HD( "addr: " << addr.toString() );
-        
+
         MessagingPort p;
         if ( ! p.connect( addr ) )
             return -1;
-        
-        { 
+
+        {
             const char * out = req.c_str();
             int toSend = req.size();
             p.send( out , toSend, "_go" );
         }
-        
+
         char buf[4096];
         int got = p.unsafe_recv( buf , 4096 );
         buf[got] = 0;
@@ -105,46 +105,46 @@ namespace mongo {
         char version[32];
         assert( sscanf( buf , "%s %d" , version , &rc ) == 2 );
         HD( "rc: " << rc );
-        
+
         StringBuilder sb;
         if ( result )
             sb << buf;
-        
-        while ( ( got = p.unsafe_recv( buf , 4096 ) ) > 0){
+
+        while ( ( got = p.unsafe_recv( buf , 4096 ) ) > 0) {
             if ( result )
                 sb << buf;
         }
 
-        if ( result ){
+        if ( result ) {
             result->_init( rc , sb.str() );
         }
 
         return rc;
     }
 
-    void HttpClient::Result::_init( int code , string entire ){
+    void HttpClient::Result::_init( int code , string entire ) {
         _code = code;
         _entireResponse = entire;
 
-        while ( true ){
+        while ( true ) {
             size_t i = entire.find( '\n' );
-            if ( i == string::npos ){
+            if ( i == string::npos ) {
                 // invalid
                 break;
             }
-            
+
             string h = entire.substr( 0 , i );
             entire = entire.substr( i + 1 );
-            
+
             if ( h.size() && h[h.size()-1] == '\r' )
                 h = h.substr( 0 , h.size() - 1 );
 
             if ( h.size() == 0 )
                 break;
         }
-        
+
         _body = entire;
     }
 
-    
+
 }
diff --git a/util/httpclient.h b/util/httpclient.h
index 8b9da97..d66544e 100644
--- a/util/httpclient.h
+++ b/util/httpclient.h
@@ -20,28 +20,28 @@
 #include "../pch.h"
 
 namespace mongo {
-    
+
     class HttpClient {
     public:
-        
+
         class Result {
         public:
-            Result(){}
-            
+            Result() {}
+
             const string& getEntireResponse() const {
                 return _entireResponse;
             }
-        
+
             const map<string,string> getHeaders() const {
                 return _headers;
             }
-            
+
             const string& getBody() const {
                 return _body;
             }
-            
+
         private:
-            
+
             void _init( int code , string entire );
 
             int _code;
@@ -49,10 +49,10 @@ namespace mongo {
 
             map<string,string> _headers;
             string _body;
-            
+
             friend class HttpClient;
         };
-        
+
         /**
          * @return response code
          */
@@ -65,7 +65,7 @@ namespace mongo {
 
     private:
         int _go( const char * command , string url , const char * body , Result * result );
-        
+
     };
 }
 
diff --git a/util/log.cpp b/util/log.cpp
index 334c66b..eb1cbae 100644
--- a/util/log.cpp
+++ b/util/log.cpp
@@ -19,7 +19,7 @@
 #include "pch.h"
 #include "assert_util.h"
 #include "assert.h"
-#include "file.h"
+//#include "file.h"
 #include <cmath>
 using namespace std;
 
@@ -42,49 +42,54 @@ namespace mongo {
         LoggingManager()
             : _enabled(0) , _file(0) {
         }
-        
-        void start( const string& lp , bool append ){
+
+        void start( const string& lp , bool append ) {
             uassert( 10268 ,  "LoggingManager already started" , ! _enabled );
             _append = append;
 
             // test path
             FILE * test = fopen( lp.c_str() , _append ? "a" : "w" );
-            if ( ! test ){
-                cout << "can't open [" << lp << "] for log file: " << errnoWithDescription() << endl;
+            if ( ! test ) {
+                if (boost::filesystem::is_directory(lp)) {
+                    cout << "logpath [" << lp << "] should be a file name not a directory" << endl;
+                }
+                else {
+                    cout << "can't open [" << lp << "] for log file: " << errnoWithDescription() << endl;
+                }
                 dbexit( EXIT_BADOPTIONS );
                 assert( 0 );
             }
             fclose( test );
-            
+
             _path = lp;
             _enabled = 1;
             rotate();
         }
-        
-        void rotate(){
-            if ( ! _enabled ){
+
+        void rotate() {
+            if ( ! _enabled ) {
                 cout << "LoggingManager not enabled" << endl;
                 return;
             }
 
-            if ( _file ){
+            if ( _file ) {
 #ifdef _WIN32
                 cout << "log rotation doesn't work on windows" << endl;
                 return;
 #else
                 struct tm t;
                 localtime_r( &_opened , &t );
-                
+
                 stringstream ss;
                 ss << _path << "." << terseCurrentTime(false);
                 string s = ss.str();
                 rename( _path.c_str() , s.c_str() );
 #endif
             }
-            
-            
+
+
             FILE* tmp = freopen(_path.c_str(), (_append ? "a" : "w"), stdout);
-            if (!tmp){
+            if (!tmp) {
                 cerr << "can't open: " << _path.c_str() << " for log file" << endl;
                 dbexit( EXIT_BADOPTIONS );
                 assert(0);
@@ -95,24 +100,24 @@ namespace mongo {
             _file = tmp;
             _opened = time(0);
         }
-        
+
     private:
-        
+
         bool _enabled;
         string _path;
         bool _append;
-        
+
         FILE * _file;
         time_t _opened;
-        
+
     } loggingManager;
 
-    void initLogging( const string& lp , bool append ){
+    void initLogging( const string& lp , bool append ) {
         cout << "all output going to: " << lp << endl;
         loggingManager.start( lp , append );
     }
 
-    void rotateLogs( int signal ){
+    void rotateLogs( int signal ) {
         loggingManager.rotate();
     }
 
diff --git a/util/log.h b/util/log.h
index 87b7b7e..86aae1c 100644
--- a/util/log.h
+++ b/util/log.h
@@ -28,24 +28,24 @@
 namespace mongo {
 
     enum LogLevel {  LL_DEBUG , LL_INFO , LL_NOTICE , LL_WARNING , LL_ERROR , LL_SEVERE };
-    
-    inline const char * logLevelToString( LogLevel l ){
-        switch ( l ){
+
+    inline const char * logLevelToString( LogLevel l ) {
+        switch ( l ) {
         case LL_DEBUG:
-        case LL_INFO: 
+        case LL_INFO:
         case LL_NOTICE:
             return "";
-        case LL_WARNING: 
-            return "warning" ; 
-        case LL_ERROR: 
+        case LL_WARNING:
+            return "warning" ;
+        case LL_ERROR:
             return "ERROR";
-        case LL_SEVERE: 
+        case LL_SEVERE:
             return "SEVERE";
         default:
             return "UNKNOWN";
         }
     }
-    
+
     class LazyString {
     public:
         virtual ~LazyString() {}
@@ -62,15 +62,15 @@ namespace mongo {
         const T& t_;
     };
 
-    class Tee { 
+    class Tee {
     public:
-        virtual ~Tee(){}
+        virtual ~Tee() {}
         virtual void write(LogLevel level , const string& str) = 0;
     };
 
     class Nullstream {
     public:
-        virtual Nullstream& operator<< (Tee* tee) { 
+        virtual Nullstream& operator<< (Tee* tee) {
             return *this;
         }
         virtual ~Nullstream() {}
@@ -80,6 +80,9 @@ namespace mongo {
         virtual Nullstream& operator<<(const string& ) {
             return *this;
         }
+        virtual Nullstream& operator<<(const StringData& ) {
+            return *this;
+        }
         virtual Nullstream& operator<<(char *) {
             return *this;
         }
@@ -125,53 +128,72 @@ namespace mongo {
         template< class T >
         Nullstream& operator<<(T *t) {
             return operator<<( static_cast<void*>( t ) );
-        }        
+        }
         template< class T >
         Nullstream& operator<<(const T *t) {
             return operator<<( static_cast<const void*>( t ) );
-        }        
+        }
         template< class T >
-        Nullstream& operator<<(const shared_ptr<T> p ){
+        Nullstream& operator<<(const shared_ptr<T> p ) {
+            T * t = p.get();
+            if ( ! t )
+                *this << "null";
+            else
+                *this << *t;
             return *this;
         }
         template< class T >
         Nullstream& operator<<(const T &t) {
             return operator<<( static_cast<const LazyString&>( LazyStringImpl< T >( t ) ) );
         }
+
         virtual Nullstream& operator<< (ostream& ( *endl )(ostream&)) {
             return *this;
         }
         virtual Nullstream& operator<< (ios_base& (*hex)(ios_base&)) {
             return *this;
         }
+
         virtual void flush(Tee *t = 0) {}
     };
     extern Nullstream nullstream;
-    
+
     class Logstream : public Nullstream {
         static mongo::mutex mutex;
         static int doneSetup;
         stringstream ss;
+        int indent;
         LogLevel logLevel;
         static FILE* logfile;
         static boost::scoped_ptr<ostream> stream;
         static vector<Tee*> * globalTees;
     public:
-
         inline static void logLockless( const StringData& s );
-        
-        static void setLogFile(FILE* f){
+
+        static void setLogFile(FILE* f) {
             scoped_lock lk(mutex);
             logfile = f;
         }
 
-        static int magicNumber(){
+        static int magicNumber() {
             return 1717;
         }
 
+        static int getLogDesc() {
+            int fd = -1;
+            if (logfile != NULL)
+#if defined(_WIN32)
+                // the ISO C++ conformant name is _fileno
+                fd = _fileno( logfile );
+#else
+                fd = fileno( logfile );
+#endif
+            return fd;
+        }
+
         inline void flush(Tee *t = 0);
-        
-        inline Nullstream& setLogLevel(LogLevel l){
+
+        inline Nullstream& setLogLevel(LogLevel l) {
             logLevel = l;
             return *this;
         }
@@ -179,6 +201,7 @@ namespace mongo {
         /** note these are virtual */
         Logstream& operator<<(const char *x) { ss << x; return *this; }
         Logstream& operator<<(const string& x) { ss << x; return *this; }
+        Logstream& operator<<(const StringData& x) { ss << x.data(); return *this; }
         Logstream& operator<<(char *x)       { ss << x; return *this; }
         Logstream& operator<<(char x)        { ss << x; return *this; }
         Logstream& operator<<(int x)         { ss << x; return *this; }
@@ -197,7 +220,7 @@ namespace mongo {
             ss << x.val();
             return *this;
         }
-        Nullstream& operator<< (Tee* tee) { 
+        Nullstream& operator<< (Tee* tee) {
             ss << '\n';
             flush(tee);
             return *this;
@@ -212,32 +235,27 @@ namespace mongo {
             return *this;
         }
 
-        template< class T >
-        Nullstream& operator<<(const shared_ptr<T> p ){
-            T * t = p.get();
-            if ( ! t )
-                *this << "null";
-            else 
-                *this << *t;
-            return *this;
-        }
-
         Logstream& prolog() {
             return *this;
         }
-        
-        void addGlobalTee( Tee * t ){
+
+        void addGlobalTee( Tee * t ) {
             if ( ! globalTees )
                 globalTees = new vector<Tee*>();
             globalTees->push_back( t );
         }
+        
+        void indentInc(){ indent++; }
+        void indentDec(){ indent--; }
+        int getIndent() const { return indent; }
 
     private:
         static thread_specific_ptr<Logstream> tsp;
-        Logstream(){
+        Logstream() {
+            indent = 0;
             _init();
         }
-        void _init(){
+        void _init() {
             ss.str("");
             logLevel = LL_INFO;
         }
@@ -258,16 +276,16 @@ namespace mongo {
             return nullstream;
         return Logstream::get();
     }
-    
-    /* flush the log stream if the log level is 
+
+    /* flush the log stream if the log level is
        at the specified level or higher. */
-    inline void logflush(int level = 0) { 
+    inline void logflush(int level = 0) {
         if( level > logLevel )
             Logstream::get().flush(0);
     }
 
     /* without prolog */
-    inline Nullstream& _log( int level = 0 ){
+    inline Nullstream& _log( int level = 0 ) {
         if ( level > logLevel )
             return nullstream;
         return Logstream::get();
@@ -287,6 +305,9 @@ namespace mongo {
         return Logstream::get().prolog();
     }
 
+#define MONGO_LOG(level) if ( logLevel >= (level) ) log( level )
+#define LOG MONGO_LOG
+
     inline Nullstream& log( LogLevel l ) {
         return Logstream::get().prolog().setLogLevel( l );
     }
@@ -295,7 +316,7 @@ namespace mongo {
     inline Nullstream& log() {
         return Logstream::get().prolog();
     }
-    
+
     inline Nullstream& error() {
         return log( LL_ERROR );
     }
@@ -317,7 +338,7 @@ namespace mongo {
 
     /**
        log to a file rather than stdout
-       defined in assert_util.cpp 
+       defined in assert_util.cpp
      */
     void initLogging( const string& logpath , bool append );
     void rotateLogs( int signal = 0 );
@@ -333,7 +354,7 @@ namespace mongo {
         FormatMessage(
             FORMAT_MESSAGE_FROM_SYSTEM
             |FORMAT_MESSAGE_ALLOCATE_BUFFER
-            |FORMAT_MESSAGE_IGNORE_INSERTS,  
+            |FORMAT_MESSAGE_IGNORE_INSERTS,
             NULL,
             x, 0,
             (LPTSTR) &errorText,  // output
@@ -342,7 +363,7 @@ namespace mongo {
         if( errorText ) {
             string x = toUtf8String(errorText);
             for( string::iterator i = x.begin(); i != x.end(); i++ ) {
-                if( *i == '\n' || *i == '\r' ) 
+                if( *i == '\n' || *i == '\r' )
                     break;
                 s << *i;
             }
@@ -351,11 +372,11 @@ namespace mongo {
         else
             s << strerror(x);
         /*
-        DWORD n = FormatMessage( 
-            FORMAT_MESSAGE_ALLOCATE_BUFFER | 
-            FORMAT_MESSAGE_FROM_SYSTEM | 
+        DWORD n = FormatMessage(
+            FORMAT_MESSAGE_ALLOCATE_BUFFER |
+            FORMAT_MESSAGE_FROM_SYSTEM |
             FORMAT_MESSAGE_IGNORE_INSERTS,
-            NULL, x, 
+            NULL, x,
             MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
             (LPTSTR) &lpMsgBuf, 0, NULL);
         */
@@ -365,22 +386,28 @@ namespace mongo {
         return s.str();
     }
 
-    /** output the error # and error message with prefix.  
+    /** output the error # and error message with prefix.
         handy for use as parm in uassert/massert.
         */
     string errnoWithPrefix( const char * prefix );
 
-    void Logstream::logLockless( const StringData& s ){
-        if ( doneSetup == 1717 ){
-            if(fwrite(s.data(), s.size(), 1, logfile)){
+    void Logstream::logLockless( const StringData& s ) {
+
+        if ( s.size() == 0 )
+            return;
+
+        if ( doneSetup == 1717 ) {
+            if (fwrite(s.data(), s.size(), 1, logfile)) {
                 fflush(logfile);
-            }else{
+            }
+            else {
                 int x = errno;
-                cout << "Failed to write to logfile: " << errnoWithDescription(x) << ": " << out << endl;
+                cout << "Failed to write to logfile: " << errnoWithDescription(x) << endl;
             }
         }
         else {
-            cout << s.data() << endl;
+            cout << s.data();
+            cout.flush();
         }
     }
 
@@ -391,23 +418,28 @@ namespace mongo {
             string threadName = getThreadName();
             const char * type = logLevelToString(logLevel);
 
-            int spaceNeeded = msg.size() + 64 + threadName.size();
+            int spaceNeeded = (int)(msg.size() + 64 + threadName.size());
             int bufSize = 128;
             while ( bufSize < spaceNeeded )
                 bufSize += 128;
 
             BufBuilder b(bufSize);
             time_t_to_String( time(0) , b.grow(20) );
-            if (!threadName.empty()){
+            if (!threadName.empty()) {
                 b.appendChar( '[' );
                 b.appendStr( threadName , false );
                 b.appendChar( ']' );
                 b.appendChar( ' ' );
             }
-            if ( type[0] ){
+
+            for ( int i=0; i<indent; i++ )
+                b.appendChar( '\t' );
+
+            if ( type[0] ) {
                 b.appendStr( type , false );
                 b.appendStr( ": " , false );
             }
+
             b.appendStr( msg );
 
             string out( b.buf() , b.len() - 1);
@@ -415,7 +447,7 @@ namespace mongo {
             scoped_lock lk(mutex);
 
             if( t ) t->write(logLevel,out);
-            if ( globalTees ){
+            if ( globalTees ) {
                 for ( unsigned i=0; i<globalTees->size(); i++ )
                     (*globalTees)[i]->write(logLevel,out);
             }
@@ -423,9 +455,10 @@ namespace mongo {
 #ifndef _WIN32
             //syslog( LOG_INFO , "%s" , cc );
 #endif
-            if(fwrite(out.data(), out.size(), 1, logfile)){
+            if(fwrite(out.data(), out.size(), 1, logfile)) {
                 fflush(logfile);
-            }else{
+            }
+            else {
                 int x = errno;
                 cout << "Failed to write to logfile: " << errnoWithDescription(x) << ": " << out << endl;
             }
@@ -433,4 +466,13 @@ namespace mongo {
         _init();
     }
 
+    struct LogIndentLevel {
+        LogIndentLevel(){
+            Logstream::get().indentInc();
+        }
+        ~LogIndentLevel(){
+            Logstream::get().indentDec();
+        }
+    };
+
 } // namespace mongo
diff --git a/util/logfile.cpp b/util/logfile.cpp
new file mode 100644
index 0000000..0386a59
--- /dev/null
+++ b/util/logfile.cpp
@@ -0,0 +1,157 @@
+// @file logfile.cpp simple file log writing / journaling
+
+/**
+*    Copyright (C) 2008 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include "logfile.h"
+#include "text.h"
+#include "mongoutils/str.h"
+#include "unittest.h"
+
+using namespace mongoutils;
+
+namespace mongo {
+    struct LogfileTest : public UnitTest {
+        LogfileTest() { }
+        void run() {
+            if( 0 && debug ) {
+                try {
+                    LogFile f("logfile_test");
+                    void *p = malloc(16384);
+                    char *buf = (char*) p;
+                    buf += 4095;
+                    buf = (char*) (((size_t)buf)&(~0xfff));
+                    memset(buf, 'z', 8192);
+                    buf[8190] = '\n';
+                    buf[8191] = 'B';
+                    buf[0] = 'A';
+                    f.synchronousAppend(buf, 8192);
+                    f.synchronousAppend(buf, 8192);
+                    free(p);
+                }
+                catch(DBException& e ) {
+                    log() << "logfile.cpp test failed : " << e.what() << endl;
+                    throw;
+                }
+            }
+        }
+    } __test;
+}
+
+#if defined(_WIN32)
+
+namespace mongo {
+
+    LogFile::LogFile(string name) : _name(name) {
+        _fd = CreateFile(
+                  toNativeString(name.c_str()).c_str(),
+                  GENERIC_WRITE,
+                  FILE_SHARE_READ,
+                  NULL,
+                  OPEN_ALWAYS,
+                  FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH,
+                  NULL);
+        if( _fd == INVALID_HANDLE_VALUE ) {
+            DWORD e = GetLastError();
+            uasserted(13518, str::stream() << "couldn't open file " << name << " for writing " << errnoWithDescription(e));
+        }
+        SetFilePointer(_fd, 0, 0, FILE_BEGIN);
+    }
+
+    LogFile::~LogFile() {
+        if( _fd != INVALID_HANDLE_VALUE )
+            CloseHandle(_fd);
+    }
+
+    void LogFile::synchronousAppend(const void *buf, size_t len) {
+        assert(_fd);
+        DWORD written;
+        if( !WriteFile(_fd, buf, len, &written, NULL) ) {
+            DWORD e = GetLastError();
+            if( e == 87 )
+                massert(13519, "error appending to file - misaligned direct write?", false);
+            else
+                uasserted(13517, str::stream() << "error appending to file " << errnoWithDescription(e));
+        }
+        else {
+            dassert( written == len );
+        }
+    }
+
+}
+
+#else
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+namespace mongo {
+
+    LogFile::LogFile(string name) : _name(name) {
+        _fd = open(name.c_str(),
+                   O_CREAT
+                   | O_WRONLY
+#if defined(O_DIRECT)
+                   | O_DIRECT
+#endif
+#if defined(O_NOATIME)
+                   | O_NOATIME
+#endif
+                   ,
+                   S_IRUSR | S_IWUSR);
+        if( _fd < 0 ) {
+            uasserted(13516, str::stream() << "couldn't open file " << name << " for writing " << errnoWithDescription());
+        }
+
+    }
+
+    LogFile::~LogFile() {
+        if( _fd >= 0 )
+            close(_fd);
+        _fd = -1;
+    }
+
+    void LogFile::synchronousAppend(const void *b, size_t len) {
+        const char *buf = (char *) b;
+        assert(_fd);
+        assert(((size_t)buf)%4096==0); // aligned
+        if( len % 4096 != 0 ) {
+            log() << len << ' ' << len % 4096 << endl;
+            assert(false);
+        }
+        ssize_t written = write(_fd, buf, len);
+        if( written != (ssize_t) len ) {
+            log() << "write fails written:" << written << " len:" << len << " buf:" << buf << ' ' << errnoWithDescription() << endl;
+            uasserted(13515, str::stream() << "error appending to file " << _fd  << ' ' << errnoWithDescription());
+        }
+
+        if( 
+#if defined(__linux__)
+           fdatasync(_fd) < 0 
+#else
+           fsync(_fd)
+#endif
+            ) {
+            uasserted(13514, str::stream() << "error appending to file on fsync " << ' ' << errnoWithDescription());
+        }
+
+    }
+
+}
+
+#endif
diff --git a/util/logfile.h b/util/logfile.h
new file mode 100644
index 0000000..9085161
--- /dev/null
+++ b/util/logfile.h
@@ -0,0 +1,50 @@
+// @file logfile.h simple file log writing / journaling
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+namespace mongo {
+
+    class LogFile {
+    public:
+        /** create the file and open.  must not already exist.
+            throws UserAssertion on i/o error
+        */
+        LogFile(string name);
+
+        /** closes */
+        ~LogFile();
+
+        /** append to file.  does not return until sync'd.  uses direct i/o when possible.
+            throws UserAssertion on an i/o error
+            note direct i/o may have alignment requirements
+        */
+        void synchronousAppend(const void *buf, size_t len);
+
+        const string _name;
+
+    private:
+#if defined(_WIN32)
+        typedef HANDLE fd_type;
+#else
+        typedef int fd_type;
+#endif
+        fd_type _fd;
+    };
+
+}
diff --git a/util/lruishmap.h b/util/lruishmap.h
index fe8b1dc..ba91bf6 100644
--- a/util/lruishmap.h
+++ b/util/lruishmap.h
@@ -23,8 +23,8 @@
 namespace mongo {
 
     /* Your K object must define:
-    	 int hash() - must always return > 0.
-    	 operator==
+         int hash() - must always return > 0.
+         operator==
     */
 
     template <class K, class V, int MaxChain>
diff --git a/util/md5.h b/util/md5.h
index d001234..a3f3b6d 100644
--- a/util/md5.h
+++ b/util/md5.h
@@ -27,7 +27,7 @@
 
   This code implements the MD5 Algorithm defined in RFC 1321, whose
   text is available at
-	http://www.ietf.org/rfc/rfc1321.txt
+    http://www.ietf.org/rfc/rfc1321.txt
   The code is derived from the text of the RFC, including the test suite
   (section A.5) but excluding the rest of Appendix A.  It does not include
   any code or documentation that is identified in the RFC as being
@@ -38,12 +38,12 @@
   that follows (in reverse chronological order):
 
   2002-04-13 lpd Removed support for non-ANSI compilers; removed
-	references to Ghostscript; clarified derivation from RFC 1321;
-	now handles byte order either statically or dynamically.
+    references to Ghostscript; clarified derivation from RFC 1321;
+    now handles byte order either statically or dynamically.
   1999-11-04 lpd Edited comments slightly for automatic TOC extraction.
   1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5);
-	added conditionalization for C++ compilation from Martin
-	Purschke <purschke@bnl.gov>.
+    added conditionalization for C++ compilation from Martin
+    Purschke <purschke@bnl.gov>.
   1999-05-03 lpd Original version.
  */
 
@@ -65,9 +65,9 @@ typedef unsigned int md5_word_t; /* 32-bit word */
 
 /* Define the state of the MD5 Algorithm. */
 typedef struct md5_state_s {
-    md5_word_t count[2];	/* message length in bits, lsw first */
-    md5_word_t abcd[4];		/* digest buffer */
-    md5_byte_t buf[64];		/* accumulate block */
+    md5_word_t count[2];    /* message length in bits, lsw first */
+    md5_word_t abcd[4];     /* digest buffer */
+    md5_byte_t buf[64];     /* accumulate block */
 } md5_state_t;
 
 #ifdef __cplusplus
diff --git a/util/md5.hpp b/util/md5.hpp
index d955910..dc06171 100644
--- a/util/md5.hpp
+++ b/util/md5.hpp
@@ -44,10 +44,15 @@ namespace mongo {
         return ss.str();
     }
 
-    inline std::string md5simpledigest( string s ){
+    inline std::string md5simpledigest( const void* buf, int nbytes){
         md5digest d;
-        md5( s.c_str() , d );
+        md5( buf, nbytes , d );
         return digestToString( d );
     }
 
+    inline std::string md5simpledigest( string s ){
+        return md5simpledigest(s.data(), s.size());
+    }
+
+
 } // namespace mongo
diff --git a/util/md5main.cpp b/util/md5main.cpp
index 9c56f91..9995fee 100644
--- a/util/md5main.cpp
+++ b/util/md5main.cpp
@@ -27,7 +27,7 @@
 
   This code implements the MD5 Algorithm defined in RFC 1321, whose
   text is available at
-	http://www.ietf.org/rfc/rfc1321.txt
+    http://www.ietf.org/rfc/rfc1321.txt
   The code is derived from the text of the RFC, including the test suite
   (section A.5) but excluding the rest of Appendix A.  It does not include
   any code or documentation that is identified in the RFC as being
@@ -49,7 +49,7 @@
 /*
  * This file builds an executable that performs various functions related
  * to the MD5 library.  Typical compilation:
- *	gcc -o md5main -lm md5main.c md5.c
+ *  gcc -o md5main -lm md5main.c md5.c
  */
 static const char *const usage = "\
 Usage:\n\
@@ -63,62 +63,61 @@ static const char *const version = "2002-04-13";
 /* Run the self-test. */
 /*static*/ int
 //do_test(void)
-do_md5_test(void)
-{
+do_md5_test(void) {
     static const char *const test[7*2] = {
-	"", "d41d8cd98f00b204e9800998ecf8427e",
-	"a", "0cc175b9c0f1b6a831c399e269772661",
-	"abc", "900150983cd24fb0d6963f7d28e17f72",
-	"message digest", "f96b697d7cb7938d525a2f31aaf161d0",
-	"abcdefghijklmnopqrstuvwxyz", "c3fcd3d76192e4007dfb496cca67e13b",
-	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
-				"d174ab98d277d9f5a5611c2c9f419d9f",
-	"12345678901234567890123456789012345678901234567890123456789012345678901234567890", "57edf4a22be3c955ac49da2e2107b67a"
+        "", "d41d8cd98f00b204e9800998ecf8427e",
+        "a", "0cc175b9c0f1b6a831c399e269772661",
+        "abc", "900150983cd24fb0d6963f7d28e17f72",
+        "message digest", "f96b697d7cb7938d525a2f31aaf161d0",
+        "abcdefghijklmnopqrstuvwxyz", "c3fcd3d76192e4007dfb496cca67e13b",
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
+        "d174ab98d277d9f5a5611c2c9f419d9f",
+        "12345678901234567890123456789012345678901234567890123456789012345678901234567890", "57edf4a22be3c955ac49da2e2107b67a"
     };
     int i;
     int status = 0;
 
     for (i = 0; i < 7*2; i += 2) {
-	md5_state_t state;
-	md5_byte_t digest[16];
-	char hex_output[16*2 + 1];
-	int di;
-
-	md5_init(&state);
-	md5_append(&state, (const md5_byte_t *)test[i], strlen(test[i]));
-	md5_finish(&state, digest);
-	for (di = 0; di < 16; ++di)
-	    sprintf(hex_output + di * 2, "%02x", digest[di]);
-	if (strcmp(hex_output, test[i + 1])) {
-	    printf("MD5 (\"%s\") = ", test[i]);
-	    puts(hex_output);
-	    printf("**** ERROR, should be: %s\n", test[i + 1]);
-	    status = 1;
-	}
+        md5_state_t state;
+        md5_byte_t digest[16];
+        char hex_output[16*2 + 1];
+        int di;
+
+        md5_init(&state);
+        md5_append(&state, (const md5_byte_t *)test[i], strlen(test[i]));
+        md5_finish(&state, digest);
+        for (di = 0; di < 16; ++di)
+            sprintf(hex_output + di * 2, "%02x", digest[di]);
+        if (strcmp(hex_output, test[i + 1])) {
+            printf("MD5 (\"%s\") = ", test[i]);
+            puts(hex_output);
+            printf("**** ERROR, should be: %s\n", test[i + 1]);
+            status = 1;
+        }
     }
 //    if (status == 0)
-/*modified commented out:	puts("md5 self-test completed successfully."); */
+    /*modified commented out:   puts("md5 self-test completed successfully."); */
     return status;
 }
 
 /* Print the T values. */
 static int
-do_t_values(void)
-{
+do_t_values(void) {
     int i;
     for (i = 1; i <= 64; ++i) {
-	unsigned long v = (unsigned long)(4294967296.0 * fabs(sin((double)i)));
-
-	/*
-	 * The following nonsense is only to avoid compiler warnings about
-	 * "integer constant is unsigned in ANSI C, signed with -traditional".
-	 */
-	if (v >> 31) {
-	    printf("#define T%d /* 0x%08lx */ (T_MASK ^ 0x%08lx)\n", i,
-		   v, (unsigned long)(unsigned int)(~v));
-	} else {
-	    printf("#define T%d    0x%08lx\n", i, v);
-	}
+        unsigned long v = (unsigned long)(4294967296.0 * fabs(sin((double)i)));
+
+        /*
+         * The following nonsense is only to avoid compiler warnings about
+         * "integer constant is unsigned in ANSI C, signed with -traditional".
+         */
+        if (v >> 31) {
+            printf("#define T%d /* 0x%08lx */ (T_MASK ^ 0x%08lx)\n", i,
+                   v, (unsigned long)(unsigned int)(~v));
+        }
+        else {
+            printf("#define T%d    0x%08lx\n", i, v);
+        }
     }
     return 0;
 }
@@ -126,17 +125,16 @@ do_t_values(void)
 /* modified from original code changed function name main->md5main */
 /* Main program */
 int
-md5main(int argc, char *argv[])
-{
+md5main(int argc, char *argv[]) {
     if (argc == 2) {
-	if (!strcmp(argv[1], "--test"))
-	    return do_md5_test();
-	if (!strcmp(argv[1], "--t-values"))
-	    return do_t_values();
-	if (!strcmp(argv[1], "--version")) {
-	    puts(version);
-	    return 0;
-	}
+        if (!strcmp(argv[1], "--test"))
+            return do_md5_test();
+        if (!strcmp(argv[1], "--t-values"))
+            return do_t_values();
+        if (!strcmp(argv[1], "--version")) {
+            puts(version);
+            return 0;
+        }
     }
     puts(usage);
     return 0;
diff --git a/util/message.cpp b/util/message.cpp
index cd19bd5..653b562 100644
--- a/util/message.cpp
+++ b/util/message.cpp
@@ -27,9 +27,14 @@
 #include <errno.h>
 #include "../db/cmdline.h"
 #include "../client/dbclient.h"
+#include "../util/time_support.h"
 
 #ifndef _WIN32
-#include <sys/resource.h>
+# ifndef __sunos__
+#  include <ifaddrs.h>
+# endif
+# include <sys/resource.h>
+# include <sys/stat.h>
 #else
 
 // errno doesn't work for winsock.
@@ -45,7 +50,7 @@ namespace mongo {
     bool objcheck = false;
 
     void checkTicketNumbers();
-    
+
 // if you want trace output:
 #define mmm(x)
 
@@ -59,9 +64,23 @@ namespace mongo {
 
     const Listener* Listener::_timeTracker;
 
-    vector<SockAddr> ipToAddrs(const char* ips, int port){
+    string SocketException::toString() const {
+        stringstream ss;
+        ss << _ei.code << " socket exception [" << _type << "] ";
+        
+        if ( _server.size() )
+            ss << "server [" << _server << "] ";
+        
+        if ( _extra.size() )
+            ss << _extra;
+        
+        return ss.str();
+    }
+
+
+    vector<SockAddr> ipToAddrs(const char* ips, int port) {
         vector<SockAddr> out;
-        if (*ips == '\0'){
+        if (*ips == '\0') {
             out.push_back(SockAddr("0.0.0.0", port)); // IPv4 all
 
             if (IPv6Enabled())
@@ -73,13 +92,14 @@ namespace mongo {
             return out;
         }
 
-        while(*ips){
+        while(*ips) {
             string ip;
             const char * comma = strchr(ips, ',');
-            if (comma){
+            if (comma) {
                 ip = string(ips, comma - ips);
                 ips = comma + 1;
-            }else{
+            }
+            else {
                 ip = string(ips);
                 ips = "";
             }
@@ -104,7 +124,7 @@ namespace mongo {
         vector<int> socks;
         SOCKET maxfd = 0; // needed for select()
 
-        for (vector<SockAddr>::iterator it=mine.begin(), end=mine.end(); it != end; ++it){
+        for (vector<SockAddr>::iterator it=mine.begin(), end=mine.end(); it != end; ++it) {
             SockAddr& me = *it;
 
             SOCKET sock = ::socket(me.getType(), SOCK_STREAM, 0);
@@ -112,17 +132,18 @@ namespace mongo {
                 log() << "ERROR: listen(): invalid socket? " << errnoWithDescription() << endl;
             }
 
-            if (me.getType() == AF_UNIX){
+            if (me.getType() == AF_UNIX) {
 #if !defined(_WIN32)
-                if (unlink(me.getAddr().c_str()) == -1){
+                if (unlink(me.getAddr().c_str()) == -1) {
                     int x = errno;
-                    if (x != ENOENT){
+                    if (x != ENOENT) {
                         log() << "couldn't unlink socket file " << me << errnoWithDescription(x) << " skipping" << endl;
                         continue;
                     }
                 }
 #endif
-            } else if (me.getType() == AF_INET6) {
+            }
+            else if (me.getType() == AF_INET6) {
                 // IPv6 can also accept IPv4 connections as mapped addresses (::ffff:127.0.0.1)
                 // That causes a conflict if we don't do set it to IPV6_ONLY
                 const int one = 1;
@@ -130,7 +151,7 @@ namespace mongo {
             }
 
             prebindOptions( sock );
-            
+
             if ( ::bind(sock, me.raw(), me.addressSize) != 0 ) {
                 int x = errno;
                 log() << "listen(): bind() failed " << errnoWithDescription(x) << " for socket: " << me.toString() << endl;
@@ -140,6 +161,16 @@ namespace mongo {
                 return;
             }
 
+#if !defined(_WIN32)
+            if (me.getType() == AF_UNIX) {
+                if (chmod(me.getAddr().c_str(), 0777) == -1) {
+                    log() << "couldn't chmod socket file " << me << errnoWithDescription() << endl;
+                }
+
+                ListeningSockets::get()->addPath( me.getAddr() );
+            }
+#endif
+
             if ( ::listen(sock, 128) != 0 ) {
                 log() << "listen(): listen() failed " << errnoWithDescription() << endl;
                 closesocket(sock);
@@ -159,15 +190,15 @@ namespace mongo {
             fd_set fds[1];
             FD_ZERO(fds);
 
-            for (vector<int>::iterator it=socks.begin(), end=socks.end(); it != end; ++it){
+            for (vector<int>::iterator it=socks.begin(), end=socks.end(); it != end; ++it) {
                 FD_SET(*it, fds);
             }
 
             maxSelectTime.tv_sec = 0;
             maxSelectTime.tv_usec = 10000;
             const int ret = select(maxfd+1, fds, NULL, NULL, &maxSelectTime);
-            
-            if (ret == 0){
+
+            if (ret == 0) {
 #if defined(__linux__)
                 _elapsedTime += ( 10000 - maxSelectTime.tv_usec ) / 1000;
 #else
@@ -176,11 +207,11 @@ namespace mongo {
                 continue;
             }
             _elapsedTime += ret; // assume 1ms to grab connection. very rough
-            
-            if (ret < 0){
+
+            if (ret < 0) {
                 int x = errno;
 #ifdef EINTR
-                if ( x == EINTR ){
+                if ( x == EINTR ) {
                     log() << "select() signal caught, continuing" << endl;
                     continue;
                 }
@@ -190,7 +221,7 @@ namespace mongo {
                 return;
             }
 
-            for (vector<int>::iterator it=socks.begin(), end=socks.end(); it != end; ++it){
+            for (vector<int>::iterator it=socks.begin(), end=socks.end(); it != end; ++it) {
                 if (! (FD_ISSET(*it, fds)))
                     continue;
 
@@ -201,24 +232,24 @@ namespace mongo {
                     if ( x == ECONNABORTED || x == EBADF ) {
                         log() << "Listener on port " << _port << " aborted" << endl;
                         return;
-                    } 
+                    }
                     if ( x == 0 && inShutdown() ) {
                         return;   // socket closed
                     }
                     if( !inShutdown() )
                         log() << "Listener: accept() returns " << s << " " << errnoWithDescription(x) << endl;
                     continue;
-                } 
+                }
                 if (from.getType() != AF_UNIX)
                     disableNagle(s);
-                if ( _logConnect && ! cmdLine.quiet ) 
+                if ( _logConnect && ! cmdLine.quiet )
                     log() << "connection accepted from " << from.toString() << " #" << ++connNumber << endl;
                 accepted(s, from);
             }
         }
     }
 
-    void Listener::accepted(int sock, const SockAddr& from){
+    void Listener::accepted(int sock, const SockAddr& from) {
         accepted( new MessagingPort(sock, from) );
     }
 
@@ -265,7 +296,7 @@ namespace mongo {
         char * _cur;
     };
 
-    class Ports { 
+    class Ports {
         set<MessagingPort*> ports;
         mongo::mutex m;
     public:
@@ -278,11 +309,11 @@ namespace mongo {
                 (*i)->shutdown();
             }
         }
-        void insert(MessagingPort* p) { 
+        void insert(MessagingPort* p) {
             scoped_lock bl(m);
             ports.insert(p);
         }
-        void erase(MessagingPort* p) { 
+        void erase(MessagingPort* p) {
             scoped_lock bl(m);
             ports.erase(p);
         }
@@ -296,12 +327,12 @@ namespace mongo {
         ports.closeAll(mask);
     }
 
-    MessagingPort::MessagingPort(int _sock, const SockAddr& _far) : sock(_sock), piggyBackData(0), farEnd(_far), _timeout(), tag(0) {
+    MessagingPort::MessagingPort(int _sock, const SockAddr& _far) : sock(_sock), piggyBackData(0), _bytesIn(0), _bytesOut(0), farEnd(_far), _timeout(), tag(0) {
         _logLevel = 0;
         ports.insert(this);
     }
 
-    MessagingPort::MessagingPort( double timeout, int ll ) : tag(0) {
+    MessagingPort::MessagingPort( double timeout, int ll ) : _bytesIn(0), _bytesOut(0), tag(0) {
         _logLevel = ll;
         ports.insert(this);
         sock = -1;
@@ -325,17 +356,19 @@ namespace mongo {
 
     class ConnectBG : public BackgroundJob {
     public:
-        int sock;
-        int res;
-        SockAddr farEnd;
-        void run() {
-            res = ::connect(sock, farEnd.raw(), farEnd.addressSize);
-        }
-        string name() { return "ConnectBG"; }
+        ConnectBG(int sock, SockAddr farEnd) : _sock(sock), _farEnd(farEnd) { }
+
+        void run() { _res = ::connect(_sock, _farEnd.raw(), _farEnd.addressSize); }
+        string name() const { return ""; /* too short lived to need to name */ }
+        int inError() const { return _res; }
+
+    private:
+        int _sock;
+        int _res;
+        SockAddr _farEnd;
     };
 
-    bool MessagingPort::connect(SockAddr& _far)
-    {
+    bool MessagingPort::connect(SockAddr& _far) {
         farEnd = _far;
 
         sock = socket(farEnd.getType(), SOCK_STREAM, 0);
@@ -347,14 +380,11 @@ namespace mongo {
         if ( _timeout > 0 ) {
             setSockTimeouts( sock, _timeout );
         }
-                
-        ConnectBG bg;
-        bg.sock = sock;
-        bg.farEnd = farEnd;
-        bg.go();
 
+        ConnectBG bg(sock, farEnd);
+        bg.go();
         if ( bg.wait(5000) ) {
-            if ( bg.res ) {
+            if ( bg.inError() ) {
                 closesocket(sock);
                 sock = -1;
                 return false;
@@ -377,28 +407,37 @@ namespace mongo {
         setsockopt(sock, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int));
 #endif
 
+        /*
+          // SO_LINGER is bad
+        #ifdef SO_LINGER
+        struct linger ling;
+        ling.l_onoff = 1;
+        ling.l_linger = 0;
+        setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *) &ling, sizeof(ling));
+        #endif
+        */
         return true;
     }
 
     bool MessagingPort::recv(Message& m) {
         try {
-        again:
+again:
             mmm( log() << "*  recv() sock:" << this->sock << endl; )
             int len = -1;
-            
+
             char *lenbuf = (char *) &len;
             int lft = 4;
             recv( lenbuf, lft );
-            
-            if ( len < 16 || len > 16000000 ) { // messages must be large enough for headers
+
+            if ( len < 16 || len > 48000000 ) { // messages must be large enough for headers
                 if ( len == -1 ) {
-                    // Endian check from the database, after connecting, to see what mode server is running in.
+                    // Endian check from the client, after connecting, to see what mode server is running in.
                     unsigned foo = 0x10203040;
                     send( (char *) &foo, 4, "endian" );
                     goto again;
                 }
-                
-                if ( len == 542393671 ){
+
+                if ( len == 542393671 ) {
                     // an http GET
                     log(_logLevel) << "looks like you're trying to access db over http on native driver port.  please add 1000 for webserver" << endl;
                     string msg = "You are trying to access MongoDB on the native driver port. For http diagnostic access, add 1000 to the port number\n";
@@ -408,36 +447,39 @@ namespace mongo {
                     send( s.c_str(), s.size(), "http" );
                     return false;
                 }
-                log(_logLevel) << "bad recv() len: " << len << '\n';
+                log(0) << "recv(): message len " << len << " is too large" << len << endl;
                 return false;
             }
-            
+
             int z = (len+1023)&0xfffffc00;
             assert(z>=len);
             MsgData *md = (MsgData *) malloc(z);
             assert(md);
             md->len = len;
-            
+
             char *p = (char *) &md->id;
             int left = len -4;
 
             try {
                 recv( p, left );
-            } catch (...) {
+            }
+            catch (...) {
                 free(md);
                 throw;
             }
-            
+
+            _bytesIn += len;
             m.setData(md, true);
             return true;
-            
-        } catch ( const SocketException & e ) {
-            log(_logLevel + (e.shouldPrint() ? 0 : 1) ) << "SocketException: " << e << endl;
+
+        }
+        catch ( const SocketException & e ) {
+            log(_logLevel + (e.shouldPrint() ? 0 : 1) ) << "SocketException: remote: " << remote() << " error: " << e << endl;
             m.reset();
             return false;
         }
     }
-    
+
     void MessagingPort::reply(Message& received, Message& response) {
         say(/*received.from, */response, received.header()->id);
     }
@@ -448,8 +490,11 @@ namespace mongo {
 
     bool MessagingPort::call(Message& toSend, Message& response) {
         mmm( log() << "*call()" << endl; )
-        MSGID old = toSend.header()->id;
-        say(/*to,*/ toSend);
+        say(toSend);
+        return recv( toSend , response );
+    }
+
+    bool MessagingPort::recv( const Message& toSend , Message& response ) {
         while ( 1 ) {
             bool ok = recv(response);
             if ( !ok )
@@ -457,13 +502,13 @@ namespace mongo {
             //log() << "got response: " << response.data->responseTo << endl;
             if ( response.header()->responseTo == toSend.header()->id )
                 break;
-            log() << "********************" << endl;
-            log() << "ERROR: MessagingPort::call() wrong id got:" << hex << (unsigned)response.header()->responseTo << " expect:" << (unsigned)toSend.header()->id << endl;
-            log() << "  toSend op: " << toSend.operation() << " old id:" << (unsigned)old << endl;
-            log() << "  response msgid:" << (unsigned)response.header()->id << endl;
-            log() << "  response len:  " << (unsigned)response.header()->len << endl;
-            log() << "  response op:  " << response.operation() << endl;
-            log() << "  farEnd: " << farEnd << endl;
+            error() << "MessagingPort::call() wrong id got:" << hex << (unsigned)response.header()->responseTo << " expect:" << (unsigned)toSend.header()->id << '\n'
+                    << dec
+                    << "  toSend op: " << (unsigned)toSend.operation() << '\n'
+                    << "  response msgid:" << (unsigned)response.header()->id << '\n'
+                    << "  response len:  " << (unsigned)response.header()->len << '\n'
+                    << "  response op:  " << response.operation() << '\n'
+                    << "  farEnd: " << farEnd << endl;
             assert(false);
             response.reset();
         }
@@ -493,30 +538,37 @@ namespace mongo {
         toSend.send( *this, "say" );
     }
 
-    // sends all data or throws an exception    
+    // sends all data or throws an exception
     void MessagingPort::send( const char * data , int len, const char *context ) {
+        _bytesOut += len;
         while( len > 0 ) {
             int ret = ::send( sock , data , len , portSendFlags );
             if ( ret == -1 ) {
                 if ( errno != EAGAIN || _timeout == 0 ) {
+                    SocketException::Type t = SocketException::SEND_ERROR;
+#if defined(_WINDOWS)
+                    if( e == WSAETIMEDOUT ) t = SocketException::SEND_TIMEOUT;
+#endif
                     log(_logLevel) << "MessagingPort " << context << " send() " << errnoWithDescription() << ' ' << farEnd.toString() << endl;
-                    throw SocketException( SocketException::SEND_ERROR );                    
-                } else {
+                    throw SocketException( t );
+                }
+                else {
                     if ( !serverAlive( farEnd.toString() ) ) {
                         log(_logLevel) << "MessagingPort " << context << " send() remote dead " << farEnd.toString() << endl;
-                        throw SocketException( SocketException::SEND_ERROR );                        
+                        throw SocketException( SocketException::SEND_ERROR );
                     }
                 }
-            } else {
+            }
+            else {
                 assert( ret <= len );
                 len -= ret;
                 data += ret;
             }
-        }        
+        }
     }
-    
+
     // sends all data or throws an exception
-    void MessagingPort::send( const vector< pair< char *, int > > &data, const char *context ){
+    void MessagingPort::send( const vector< pair< char *, int > > &data, const char *context ) {
 #if defined(_WIN32)
         // TODO use scatter/gather api
         for( vector< pair< char *, int > >::const_iterator i = data.begin(); i != data.end(); ++i ) {
@@ -538,27 +590,30 @@ namespace mongo {
         memset( &meta, 0, sizeof( meta ) );
         meta.msg_iov = &d[ 0 ];
         meta.msg_iovlen = d.size();
-    
+
         while( meta.msg_iovlen > 0 ) {
             int ret = ::sendmsg( sock , &meta , portSendFlags );
             if ( ret == -1 ) {
                 if ( errno != EAGAIN || _timeout == 0 ) {
                     log(_logLevel) << "MessagingPort " << context << " send() " << errnoWithDescription() << ' ' << farEnd.toString() << endl;
-                    throw SocketException( SocketException::SEND_ERROR );                    
-                } else {
+                    throw SocketException( SocketException::SEND_ERROR );
+                }
+                else {
                     if ( !serverAlive( farEnd.toString() ) ) {
                         log(_logLevel) << "MessagingPort " << context << " send() remote dead " << farEnd.toString() << endl;
-                        throw SocketException( SocketException::SEND_ERROR );                        
+                        throw SocketException( SocketException::SEND_ERROR );
                     }
                 }
-            } else {
+            }
+            else {
                 struct iovec *& i = meta.msg_iov;
                 while( ret > 0 ) {
                     if ( i->iov_len > unsigned( ret ) ) {
                         i->iov_len -= ret;
                         i->iov_base = (char*)(i->iov_base) + ret;
                         ret = 0;
-                    } else {
+                    }
+                    else {
                         ret -= i->iov_len;
                         ++i;
                         --(meta.msg_iovlen);
@@ -569,25 +624,42 @@ namespace mongo {
 #endif
     }
 
-    void MessagingPort::recv( char * buf , int len ){
+    void MessagingPort::recv( char * buf , int len ) {
+        unsigned retries = 0;
         while( len > 0 ) {
             int ret = ::recv( sock , buf , len , portRecvFlags );
             if ( ret == 0 ) {
                 log(3) << "MessagingPort recv() conn closed? " << farEnd.toString() << endl;
                 throw SocketException( SocketException::CLOSED );
             }
-            if ( ret == -1 ) {
+            if ( ret < 0 ) {
                 int e = errno;
-                if ( e != EAGAIN || _timeout == 0 ) {                
+#if defined(EINTR) && !defined(_WIN32)
+                if( e == EINTR ) {
+                    if( ++retries == 1 ) {
+                        log() << "EINTR retry" << endl;
+                        continue;
+                    }
+                }
+#endif
+                if ( e != EAGAIN || _timeout == 0 ) {
+                    SocketException::Type t = SocketException::RECV_ERROR;
+#if defined(_WINDOWS)
+                    if( e == WSAETIMEDOUT ) t = SocketException::RECV_TIMEOUT;
+#else
+                    /* todo: what is the error code on an SO_RCVTIMEO on linux? EGAIN? EWOULDBLOCK? */
+#endif
                     log(_logLevel) << "MessagingPort recv() " << errnoWithDescription(e) << " " << farEnd.toString() <<endl;
-                    throw SocketException( SocketException::RECV_ERROR );
-                } else {
+                    throw SocketException(t);
+                }
+                else {
                     if ( !serverAlive( farEnd.toString() ) ) {
                         log(_logLevel) << "MessagingPort recv() remote dead " << farEnd.toString() << endl;
-                        throw SocketException( SocketException::RECV_ERROR );                        
+                        throw SocketException( SocketException::RECV_ERROR );
                     }
                 }
-            } else {
+            }
+            else {
                 if ( len <= 4 && ret != len )
                     log(_logLevel) << "MessagingPort recv() got " << ret << " bytes wanted len=" << len << endl;
                 assert( ret <= len );
@@ -598,9 +670,9 @@ namespace mongo {
     }
 
     int MessagingPort::unsafe_recv( char *buf, int max ) {
-        return ::recv( sock , buf , max , portRecvFlags );        
+        return ::recv( sock , buf , max , portRecvFlags );
     }
-    
+
     void MessagingPort::piggyBack( Message& toSend , int responseTo ) {
 
         if ( toSend.header()->len > 1300 ) {
@@ -624,7 +696,9 @@ namespace mongo {
     }
 
     HostAndPort MessagingPort::remote() const {
-        return farEnd;
+        if ( _farEndParsed.port() == -1 )
+            _farEndParsed = HostAndPort( farEnd );
+        return _farEndParsed;
     }
 
 
@@ -637,80 +711,66 @@ namespace mongo {
             assert(MsgDataHeaderSize == 16);
         }
     } msgstart;
-    
-    MSGID nextMessageId(){
+
+    MSGID nextMessageId() {
         MSGID msgid = NextMsgId++;
         return msgid;
     }
 
-    bool doesOpGetAResponse( int op ){
+    bool doesOpGetAResponse( int op ) {
         return op == dbQuery || op == dbGetMore;
     }
-    
-    void setClientId( int id ){
+
+    void setClientId( int id ) {
         clientId.set( id );
     }
-    
-    int getClientId(){
+
+    int getClientId() {
         return clientId.get();
     }
-    
-    int getMaxConnections(){
+
+    const int DEFAULT_MAX_CONN = 20000;
+    const int MAX_MAX_CONN = 20000;
+
+    int getMaxConnections() {
 #ifdef _WIN32
-        return 20000;
+        return DEFAULT_MAX_CONN;
 #else
         struct rlimit limit;
         assert( getrlimit(RLIMIT_NOFILE,&limit) == 0 );
 
         int max = (int)(limit.rlim_cur * .8);
 
-        log(1) << "fd limit" 
-               << " hard:" << limit.rlim_max 
-               << " soft:" << limit.rlim_cur 
+        log(1) << "fd limit"
+               << " hard:" << limit.rlim_max
+               << " soft:" << limit.rlim_cur
                << " max conn: " << max
                << endl;
-        
-        if ( max > 20000 )
-            max = 20000;
+
+        if ( max > MAX_MAX_CONN )
+            max = MAX_MAX_CONN;
 
         return max;
 #endif
     }
 
-    void checkTicketNumbers(){
-        connTicketHolder.resize( getMaxConnections() );
-    }
-
-    TicketHolder connTicketHolder(20000);
-
-    namespace {
-        map<string, bool> isSelfCache; // host, isSelf
-    }
-    
-    bool HostAndPort::isSelf() const { 
-        int p = _port == -1 ? CmdLine::DefaultDBPort : _port;
-
-        if( p != cmdLine.port ){
-            return false;
-        } else if (sameHostname(getHostName(), _host) || isLocalHost()) {
-            return true;
-        } else {
-            map<string, bool>::const_iterator it = isSelfCache.find(_host);
-            if (it != isSelfCache.end()){
-                return it->second;
+    void checkTicketNumbers() {
+        int want = getMaxConnections();
+        int current = connTicketHolder.outof();
+        if ( current != DEFAULT_MAX_CONN ) {
+            if ( current < want ) {
+                // they want fewer than they can handle
+                // which is fine
+                log(1) << " only allowing " << current << " connections" << endl;
+                return;
+            }
+            if ( current > want ) {
+                log() << " --maxConns too high, can only handle " << want << endl;
             }
-
-            SockAddr addr (_host.c_str(), 0); // port 0 is dynamically assigned
-            SOCKET sock = ::socket(addr.getType(), SOCK_STREAM, 0);
-            assert(sock != INVALID_SOCKET);
-
-            bool ret = (::bind(sock, addr.raw(), addr.addressSize) == 0);
-            isSelfCache[_host] = ret;
-
-            closesocket(sock);
-
-            return ret;
         }
+        connTicketHolder.resize( want );
     }
 
+    TicketHolder connTicketHolder(DEFAULT_MAX_CONN);
+
 } // namespace mongo
diff --git a/util/message.h b/util/message.h
index 9651141..37e9433 100644
--- a/util/message.h
+++ b/util/message.h
@@ -32,7 +32,7 @@ namespace mongo {
 
     class Listener : boost::noncopyable {
     public:
-        Listener(const string &ip, int p, bool logConnect=true ) : _port(p), _ip(ip), _logConnect(logConnect), _elapsedTime(0){ }
+        Listener(const string &ip, int p, bool logConnect=true ) : _port(p), _ip(ip), _logConnect(logConnect), _elapsedTime(0) { }
         virtual ~Listener() {
             if ( _timeTracker == this )
                 _timeTracker = 0;
@@ -41,28 +41,30 @@ namespace mongo {
 
         /* spawn a thread, etc., then return */
         virtual void accepted(int sock, const SockAddr& from);
-        virtual void accepted(MessagingPort *mp){
+        virtual void accepted(MessagingPort *mp) {
             assert(!"You must overwrite one of the accepted methods");
         }
 
         const int _port;
-        
+
         /**
          * @return a rough estimate of elepased time since the server started
          */
         long long getMyElapsedTimeMillis() const { return _elapsedTime; }
 
-        void setAsTimeTracker(){
+        void setAsTimeTracker() {
             _timeTracker = this;
         }
 
-        static const Listener* getTimeTracker(){
+        static const Listener* getTimeTracker() {
             return _timeTracker;
         }
-        
-        static long long getElapsedTimeMillis() { 
+
+        static long long getElapsedTimeMillis() {
             if ( _timeTracker )
                 return _timeTracker->getMyElapsedTimeMillis();
+
+            // should this assert or throw?  seems like callers may not expect to get zero back, certainly not forever.
             return 0;
         }
 
@@ -79,11 +81,11 @@ namespace mongo {
         virtual ~AbstractMessagingPort() { }
         virtual void reply(Message& received, Message& response, MSGID responseTo) = 0; // like the reply below, but doesn't rely on received.data still being available
         virtual void reply(Message& received, Message& response) = 0;
-        
+
         virtual HostAndPort remote() const = 0;
         virtual unsigned remotePort() const = 0;
 
-        virtual int getClientId(){
+        virtual int getClientId() {
             int x = remotePort();
             x = x << 16;
             x |= ( ( 0xFF0 & (long long)this ) >> 8 ); // lowest byte in pointer often meaningless
@@ -98,12 +100,12 @@ namespace mongo {
         // in some cases the timeout will actually be 2x this value - eg we do a partial send,
         // then the timeout fires, then we try to send again, then the timeout fires again with
         // no data sent, then we detect that the other side is down
-        MessagingPort(double timeout = 0, int logLevel = 0 );
+        MessagingPort(double so_timeout = 0, int logLevel = 0 );
 
         virtual ~MessagingPort();
 
         void shutdown();
-        
+
         bool connect(SockAddr& farEnd);
 
         /* it's assumed if you reuse a message object, that it doesn't cross MessagingPort's.
@@ -113,8 +115,20 @@ namespace mongo {
         void reply(Message& received, Message& response, MSGID responseTo);
         void reply(Message& received, Message& response);
         bool call(Message& toSend, Message& response);
+
         void say(Message& toSend, int responseTo = -1);
 
+        /**
+         * this is used for doing 'async' queries
+         * instead of doing call( to , from )
+         * you would do
+         * say( to )
+         * recv( from )
+         * Note: if you fail to call recv and someone else uses this port,
+         *       horrible things will happend
+         */
+        bool recv( const Message& sent , Message& response );
+
         void piggyBack( Message& toSend , int responseTo = -1 );
 
         virtual unsigned remotePort() const;
@@ -126,11 +140,23 @@ namespace mongo {
 
         // recv len or throw SocketException
         void recv( char * data , int len );
-        
+
         int unsafe_recv( char *buf, int max );
+
+        void clearCounters() { _bytesIn = 0; _bytesOut = 0; }
+        long long getBytesIn() const { return _bytesIn; }
+        long long getBytesOut() const { return _bytesOut; }
     private:
         int sock;
         PiggyBackData * piggyBackData;
+
+        long long _bytesIn;
+        long long _bytesOut;
+        
+        // this is the parsed version of farEnd
+        // mutable because its initialized only on call to remote()
+        mutable HostAndPort _farEndParsed; 
+
     public:
         SockAddr farEnd;
         double _timeout;
@@ -158,8 +184,8 @@ namespace mongo {
 
     bool doesOpGetAResponse( int op );
 
-    inline const char * opToString( int op ){
-        switch ( op ){
+    inline const char * opToString( int op ) {
+        switch ( op ) {
         case 0: return "none";
         case opReply: return "reply";
         case dbMsg: return "msg";
@@ -169,54 +195,54 @@ namespace mongo {
         case dbGetMore: return "getmore";
         case dbDelete: return "remove";
         case dbKillCursors: return "killcursors";
-        default: 
+        default:
             PRINT(op);
-            assert(0); 
+            assert(0);
             return "";
         }
     }
-    
-    inline bool opIsWrite( int op ){
-        switch ( op ){
-
-        case 0: 
-        case opReply: 
-        case dbMsg: 
-        case dbQuery: 
-        case dbGetMore: 
-        case dbKillCursors: 
+
+    inline bool opIsWrite( int op ) {
+        switch ( op ) {
+
+        case 0:
+        case opReply:
+        case dbMsg:
+        case dbQuery:
+        case dbGetMore:
+        case dbKillCursors:
             return false;
-            
-        case dbUpdate: 
-        case dbInsert: 
-        case dbDelete: 
+
+        case dbUpdate:
+        case dbInsert:
+        case dbDelete:
             return false;
 
-        default: 
+        default:
             PRINT(op);
-            assert(0); 
+            assert(0);
             return "";
         }
-        
+
     }
 
 #pragma pack(1)
-/* see http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol 
-*/
-struct MSGHEADER { 
-    int messageLength; // total message size, including this
-    int requestID;     // identifier for this message
-    int responseTo;    // requestID from the original request
-                       //   (used in reponses from db)
-    int opCode;     
-};
-struct OP_GETMORE : public MSGHEADER {
-    MSGHEADER header;             // standard message header
-    int       ZERO_or_flags;      // 0 - reserved for future use
-    //cstring   fullCollectionName; // "dbname.collectionname"
-    //int32     numberToReturn;     // number of documents to return
-    //int64     cursorID;           // cursorID from the OP_REPLY
-};
+    /* see http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol
+    */
+    struct MSGHEADER {
+        int messageLength; // total message size, including this
+        int requestID;     // identifier for this message
+        int responseTo;    // requestID from the original request
+        //   (used in reponses from db)
+        int opCode;
+    };
+    struct OP_GETMORE : public MSGHEADER {
+        MSGHEADER header;             // standard message header
+        int       ZERO_or_flags;      // 0 - reserved for future use
+        //cstring   fullCollectionName; // "dbname.collectionname"
+        //int32     numberToReturn;     // number of documents to return
+        //int64     cursorID;           // cursorID from the OP_REPLY
+    };
 #pragma pack()
 
 #pragma pack(1)
@@ -225,11 +251,15 @@ struct OP_GETMORE : public MSGHEADER {
         int len; /* len of the msg, including this field */
         MSGID id; /* request/reply id's match... */
         MSGID responseTo; /* id of the message we are responding to */
-        int _operation;
+        short _operation;
+        char _flags;
+        char _version;
         int operation() const {
             return _operation;
         }
         void setOperation(int o) {
+            _flags = 0;
+            _version = 0;
             _operation = o;
         }
         char _data[4];
@@ -237,16 +267,16 @@ struct OP_GETMORE : public MSGHEADER {
         int& dataAsInt() {
             return *((int *) _data);
         }
-        
-        bool valid(){
-            if ( len <= 0 || len > ( 1024 * 1024 * 10 ) )
+
+        bool valid() {
+            if ( len <= 0 || len > ( 4 * BSONObjMaxInternalSize ) )
                 return false;
-            if ( _operation < 0 || _operation > 100000 )
+            if ( _operation < 0 || _operation > 30000 )
                 return false;
             return true;
         }
 
-        long long getCursor(){
+        long long getCursor() {
             assert( responseTo > 0 );
             assert( _operation == opReply );
             long long * l = (long long *)(_data + 4);
@@ -269,13 +299,13 @@ struct OP_GETMORE : public MSGHEADER {
             _buf( 0 ), _data( 0 ), _freeIt( false ) {
             _setData( reinterpret_cast< MsgData* >( data ), freeIt );
         };
-        Message(Message& r) : _buf( 0 ), _data( 0 ), _freeIt( false ) { 
+        Message(Message& r) : _buf( 0 ), _data( 0 ), _freeIt( false ) {
             *this = r;
         }
         ~Message() {
             reset();
         }
-        
+
         SockAddr _from;
 
         MsgData *header() const {
@@ -283,7 +313,7 @@ struct OP_GETMORE : public MSGHEADER {
             return _buf ? _buf : reinterpret_cast< MsgData* > ( _data[ 0 ].first );
         }
         int operation() const { return header()->operation(); }
-        
+
         MsgData *singleData() const {
             massert( 13273, "single data buffer expected", _buf );
             return header();
@@ -291,25 +321,28 @@ struct OP_GETMORE : public MSGHEADER {
 
         bool empty() const { return !_buf && _data.empty(); }
 
-        int size() const{
+        int size() const {
             int res = 0;
-            if ( _buf ){
+            if ( _buf ) {
                 res =  _buf->len;
-            } else {
-                for (MsgVec::const_iterator it = _data.begin(); it != _data.end(); ++it){
+            }
+            else {
+                for (MsgVec::const_iterator it = _data.begin(); it != _data.end(); ++it) {
                     res += it->second;
                 }
             }
             return res;
         }
-        
+
+        int dataSize() const { return size() - sizeof(MSGHEADER); }
+
         // concat multiple buffers - noop if <2 buffers already, otherwise can be expensive copy
         // can get rid of this if we make response handling smarter
         void concat() {
             if ( _buf || empty() ) {
                 return;
             }
-            
+
             assert( _freeIt );
             int totalSize = 0;
             for( vector< pair< char *, int > >::const_iterator i = _data.begin(); i != _data.end(); ++i ) {
@@ -324,7 +357,7 @@ struct OP_GETMORE : public MSGHEADER {
             reset();
             _setData( (MsgData*)buf, true );
         }
-        
+
         // vector swap() so this is fast
         Message& operator=(Message& r) {
             assert( empty() );
@@ -373,7 +406,7 @@ struct OP_GETMORE : public MSGHEADER {
             _data.push_back( make_pair( d, size ) );
             header()->len += size;
         }
-        
+
         // use to set first buffer if empty
         void setData(MsgData *d, bool freeIt) {
             assert( empty() );
@@ -402,7 +435,8 @@ struct OP_GETMORE : public MSGHEADER {
             }
             if ( _buf != 0 ) {
                 p.send( (char*)_buf, _buf->len, context );
-            } else {
+            }
+            else {
                 p.send( _data, context );
             }
         }
@@ -422,13 +456,17 @@ struct OP_GETMORE : public MSGHEADER {
 
     class SocketException : public DBException {
     public:
-        enum Type { CLOSED , RECV_ERROR , SEND_ERROR } type;
-        SocketException( Type t ) : DBException( "socket exception" , 9001 ) , type(t){}
-        
-        bool shouldPrint() const {
-            return type != CLOSED;
-        }
+        const enum Type { CLOSED , RECV_ERROR , SEND_ERROR, RECV_TIMEOUT, SEND_TIMEOUT, FAILED_STATE, CONNECT_ERROR } _type;
         
+        SocketException( Type t , string server="" , int code = 9001 , string extra="" ) : DBException( "socket exception" , code ) , _type(t) , _server(server), _extra(extra){ }
+        virtual ~SocketException() throw() {}
+
+        bool shouldPrint() const { return _type != CLOSED; }
+        virtual string toString() const;
+
+    private:
+        string _server;
+        string _extra;
     };
 
     MSGID nextMessageId();
@@ -441,7 +479,7 @@ struct OP_GETMORE : public MSGHEADER {
     class ElapsedTracker {
     public:
         ElapsedTracker( int hitsBetweenMarks , int msBetweenMarks )
-            : _h( hitsBetweenMarks ) , _ms( msBetweenMarks ) , _pings(0){
+            : _h( hitsBetweenMarks ) , _ms( msBetweenMarks ) , _pings(0) {
             _last = Listener::getElapsedTimeMillis();
         }
 
@@ -449,18 +487,18 @@ struct OP_GETMORE : public MSGHEADER {
          * call this for every iteration
          * returns true if one of the triggers has gone off
          */
-        bool ping(){
-            if ( ( ++_pings % _h ) == 0 ){
+        bool ping() {
+            if ( ( ++_pings % _h ) == 0 ) {
                 _last = Listener::getElapsedTimeMillis();
                 return true;
             }
-            
+
             long long now = Listener::getElapsedTimeMillis();
-            if ( now - _last > _ms ){
+            if ( now - _last > _ms ) {
                 _last = now;
                 return true;
             }
-                
+
             return false;
         }
 
@@ -471,7 +509,7 @@ struct OP_GETMORE : public MSGHEADER {
         unsigned long long _pings;
 
         long long _last;
-        
+
     };
-        
+
 } // namespace mongo
diff --git a/util/message_server.h b/util/message_server.h
index 9d6a8f2..39375c8 100644
--- a/util/message_server.h
+++ b/util/message_server.h
@@ -25,10 +25,10 @@
 #include "../pch.h"
 
 namespace mongo {
-    
+
     class MessageHandler {
     public:
-        virtual ~MessageHandler(){}
+        virtual ~MessageHandler() {}
         virtual void process( Message& m , AbstractMessagingPort* p ) = 0;
         virtual void disconnected( AbstractMessagingPort* p ) = 0;
     };
@@ -39,14 +39,14 @@ namespace mongo {
             int port;                   // port to bind to
             string ipList;             // addresses to bind to
 
-            Options() : port(0), ipList(""){} 
+            Options() : port(0), ipList("") {}
         };
 
-        virtual ~MessageServer(){}
+        virtual ~MessageServer() {}
         virtual void run() = 0;
         virtual void setAsTimeTracker() = 0;
     };
 
-    // TODO use a factory here to decide between port and asio variations 
+    // TODO use a factory here to decide between port and asio variations
     MessageServer * createServer( const MessageServer::Options& opts , MessageHandler * handler );
 }
diff --git a/util/message_server_asio.cpp b/util/message_server_asio.cpp
index 0c9479c..0c6a7d9 100644
--- a/util/message_server_asio.cpp
+++ b/util/message_server_asio.cpp
@@ -37,29 +37,29 @@ namespace mongo {
     class MessageServerSession;
 
     namespace {
-        class StickyThread{
+        class StickyThread {
         public:
             StickyThread()
                 : _thread(boost::ref(*this))
             {}
 
-            ~StickyThread(){
+            ~StickyThread() {
                 _mss.put(boost::shared_ptr<MessageServerSession>());
                 _thread.join();
             }
 
-            void ready(boost::shared_ptr<MessageServerSession> mss){
+            void ready(boost::shared_ptr<MessageServerSession> mss) {
                 _mss.put(mss);
             }
 
-            void operator() (){
+            void operator() () {
                 boost::shared_ptr<MessageServerSession> mss;
-                while((mss = _mss.take())){ // intentionally not using ==
+                while((mss = _mss.take())) { // intentionally not using ==
                     task(mss.get());
                     mss.reset();
                 }
             }
-            
+
         private:
             boost::thread _thread;
             inline void task(MessageServerSession* mss); // must be defined after MessageServerSession
@@ -79,34 +79,34 @@ namespace mongo {
             , _portCache(0)
         { }
 
-        ~MessageServerSession(){
+        ~MessageServerSession() {
             cout << "disconnect from: " << _socket.remote_endpoint() << endl;
         }
 
-        tcp::socket& socket(){
+        tcp::socket& socket() {
             return _socket;
         }
 
-        void start(){
+        void start() {
             cout << "MessageServerSession start from:" << _socket.remote_endpoint() << endl;
             _startHeaderRead();
         }
-        
-        void handleReadHeader( const boost::system::error_code& error ){
+
+        void handleReadHeader( const boost::system::error_code& error ) {
             if ( _inHeader.len == 0 )
                 return;
 
-            if ( ! _inHeader.valid() ){
+            if ( ! _inHeader.valid() ) {
                 cout << "  got invalid header from: " << _socket.remote_endpoint() << " closing connected" << endl;
                 return;
             }
-            
+
             char * raw = (char*)malloc( _inHeader.len );
-            
+
             MsgData * data = (MsgData*)raw;
             memcpy( data , &_inHeader , sizeof( _inHeader ) );
             assert( data->len == _inHeader.len );
-            
+
             uassert( 10273 ,  "_cur not empty! pipelining requests not supported" , ! _cur.data );
 
             _cur.setData( data , true );
@@ -114,11 +114,11 @@ namespace mongo {
                         buffer( raw + sizeof( _inHeader ) , _inHeader.len - sizeof( _inHeader ) ) ,
                         boost::bind( &MessageServerSession::handleReadBody , shared_from_this() , boost::asio::placeholders::error ) );
         }
-        
-        void handleReadBody( const boost::system::error_code& error ){
-            if (!_myThread){
+
+        void handleReadBody( const boost::system::error_code& error ) {
+            if (!_myThread) {
                 mongo::mutex::scoped_lock(tp_mutex);
-                if (!thread_pool.empty()){
+                if (!thread_pool.empty()) {
                     _myThread = thread_pool.back();
                     thread_pool.pop_back();
                 }
@@ -132,20 +132,21 @@ namespace mongo {
             _myThread->ready(shared_from_this());
         }
 
-        void process(){
+        void process() {
             _handler->process( _cur , this );
 
-            if (_reply.data){
+            if (_reply.data) {
                 async_write( _socket ,
                              buffer( (char*)_reply.data , _reply.data->len ) ,
                              boost::bind( &MessageServerSession::handleWriteDone , shared_from_this() , boost::asio::placeholders::error ) );
-            } else {
+            }
+            else {
                 _cur.reset();
                 _startHeaderRead();
             }
         }
-        
-        void handleWriteDone( const boost::system::error_code& error ){
+
+        void handleWriteDone( const boost::system::error_code& error ) {
             {
                 // return thread to pool after we have sent data to the client
                 mongo::mutex::scoped_lock(tp_mutex);
@@ -157,12 +158,12 @@ namespace mongo {
             _reply.reset();
             _startHeaderRead();
         }
-        
-        virtual void reply( Message& received, Message& response ){
+
+        virtual void reply( Message& received, Message& response ) {
             reply( received , response , received.data->id );
         }
-        
-        virtual void reply( Message& query , Message& toSend, MSGID responseTo ){
+
+        virtual void reply( Message& query , Message& toSend, MSGID responseTo ) {
             _reply = toSend;
 
             _reply.data->id = nextMessageId();
@@ -170,22 +171,22 @@ namespace mongo {
             uassert( 10274 ,  "pipelining requests doesn't work yet" , query.data->id == _cur.data->id );
         }
 
-        
-        virtual unsigned remotePort(){
+
+        virtual unsigned remotePort() {
             if (!_portCache)
                 _portCache = _socket.remote_endpoint().port(); //this is expensive
             return _portCache;
         }
-        
-    private:        
-        
-        void _startHeaderRead(){
+
+    private:
+
+        void _startHeaderRead() {
             _inHeader.len = 0;
-            async_read( _socket , 
+            async_read( _socket ,
                         buffer( &_inHeader , sizeof( _inHeader ) ) ,
                         boost::bind( &MessageServerSession::handleReadHeader , shared_from_this() , boost::asio::placeholders::error ) );
         }
-        
+
         MessageHandler * _handler;
         tcp::socket _socket;
         MsgData _inHeader;
@@ -197,10 +198,10 @@ namespace mongo {
         boost::shared_ptr<StickyThread> _myThread;
     };
 
-    void StickyThread::task(MessageServerSession* mss){
+    void StickyThread::task(MessageServerSession* mss) {
         mss->process();
     }
-    
+
 
     class AsyncMessageServer : public MessageServer {
     public:
@@ -209,39 +210,38 @@ namespace mongo {
             : _port( opts.port )
             , _handler(handler)
             , _endpoint( tcp::v4() , opts.port )
-            , _acceptor( _ioservice , _endpoint )
-        {
+            , _acceptor( _ioservice , _endpoint ) {
             _accept();
         }
-        virtual ~AsyncMessageServer(){
-            
+        virtual ~AsyncMessageServer() {
+
         }
 
-        void run(){
+        void run() {
             cout << "AsyncMessageServer starting to listen on: " << _port << endl;
             boost::thread other(boost::bind(&io_service::run, &_ioservice));
             _ioservice.run();
             cout << "AsyncMessageServer done listening on: " << _port << endl;
         }
-        
-        void handleAccept( shared_ptr<MessageServerSession> session , 
-                           const boost::system::error_code& error ){
-            if ( error ){
+
+        void handleAccept( shared_ptr<MessageServerSession> session ,
+                           const boost::system::error_code& error ) {
+            if ( error ) {
                 cout << "handleAccept error!" << endl;
                 return;
             }
             session->start();
             _accept();
         }
-        
-        void _accept( ){
+
+        void _accept( ) {
             shared_ptr<MessageServerSession> session( new MessageServerSession( _handler , _ioservice ) );
             _acceptor.async_accept( session->socket() ,
-                boost::bind( &AsyncMessageServer::handleAccept,
-                                          this, 
-                                          session,
-                                          boost::asio::placeholders::error )
-                                    );            
+                                    boost::bind( &AsyncMessageServer::handleAccept,
+                                                 this,
+                                                 session,
+                                                 boost::asio::placeholders::error )
+                                  );
         }
 
     private:
@@ -252,9 +252,9 @@ namespace mongo {
         tcp::acceptor _acceptor;
     };
 
-    MessageServer * createServer( const MessageServer::Options& opts , MessageHandler * handler ){
+    MessageServer * createServer( const MessageServer::Options& opts , MessageHandler * handler ) {
         return new AsyncMessageServer( opts , handler );
-    }    
+    }
 
 }
 
diff --git a/util/message_server_port.cpp b/util/message_server_port.cpp
index 9649e45..6d00628 100644
--- a/util/message_server_port.cpp
+++ b/util/message_server_port.cpp
@@ -23,29 +23,32 @@
 #include "message_server.h"
 
 #include "../db/cmdline.h"
+#include "../db/stats/counters.h"
 
 namespace mongo {
 
     namespace pms {
 
         MessageHandler * handler;
-        
-        void threadRun( MessagingPort * inPort){
-            assert( inPort );
+
+        void threadRun( MessagingPort * inPort) {
+            TicketHolderReleaser connTicketReleaser( &connTicketHolder );
             
+            assert( inPort );
+
             setThreadName( "conn" );
-            TicketHolderReleaser connTicketReleaser( &connTicketHolder );
 
             auto_ptr<MessagingPort> p( inPort );
-        
+
             string otherSide;
-    
+
             Message m;
             try {
                 otherSide = p->farEnd.toString();
 
-                while ( 1 ){
+                while ( 1 ) {
                     m.reset();
+                    p->clearCounters();
 
                     if ( ! p->recv(m) ) {
                         if( !cmdLine.quiet )
@@ -53,20 +56,21 @@ namespace mongo {
                         p->shutdown();
                         break;
                     }
-                    
+
                     handler->process( m , p.get() );
+                    networkCounter.hit( p->getBytesIn() , p->getBytesOut() );
                 }
             }
-            catch ( const SocketException& ){
+            catch ( const SocketException& ) {
                 log() << "unclean socket shutdown from: " << otherSide << endl;
             }
-            catch ( const std::exception& e ){
+            catch ( const std::exception& e ) {
                 problem() << "uncaught exception (" << e.what() << ")(" << demangleName( typeid(e) ) <<") in PortMessageServer::threadRun, closing connection" << endl;
             }
-            catch ( ... ){
+            catch ( ... ) {
                 problem() << "uncaught exception in PortMessageServer::threadRun, closing connection" << endl;
-            }            
-            
+            }
+
             handler->disconnected( p.get() );
         }
 
@@ -74,16 +78,16 @@ namespace mongo {
 
     class PortMessageServer : public MessageServer , public Listener {
     public:
-            PortMessageServer(  const MessageServer::Options& opts, MessageHandler * handler ) :
-            Listener( opts.ipList, opts.port ){
-            
+        PortMessageServer(  const MessageServer::Options& opts, MessageHandler * handler ) :
+            Listener( opts.ipList, opts.port ) {
+
             uassert( 10275 ,  "multiple PortMessageServer not supported" , ! pms::handler );
             pms::handler = handler;
         }
-        
+
         virtual void accepted(MessagingPort * p) {
-            
-            if ( ! connTicketHolder.tryAcquire() ){
+
+            if ( ! connTicketHolder.tryAcquire() ) {
                 log() << "connection refused because too many open connections: " << connTicketHolder.used() << endl;
 
                 // TODO: would be nice if we notified them...
@@ -97,7 +101,8 @@ namespace mongo {
             try {
                 boost::thread thr( boost::bind( &pms::threadRun , p ) );
             }
-            catch ( boost::thread_resource_error& ){
+            catch ( boost::thread_resource_error& ) {
+                connTicketHolder.release();
                 log() << "can't create new thread, closing connection" << endl;
 
                 p->shutdown();
@@ -106,21 +111,21 @@ namespace mongo {
                 sleepmillis(2);
             }
         }
-        
-        virtual void setAsTimeTracker(){
+
+        virtual void setAsTimeTracker() {
             Listener::setAsTimeTracker();
         }
 
-        void run(){
+        void run() {
             initAndListen();
         }
 
     };
 
 
-    MessageServer * createServer( const MessageServer::Options& opts , MessageHandler * handler ){
+    MessageServer * createServer( const MessageServer::Options& opts , MessageHandler * handler ) {
         return new PortMessageServer( opts , handler );
-    }    
+    }
 
 }
 
diff --git a/util/miniwebserver.cpp b/util/miniwebserver.cpp
index 0193c5d..e700112 100644
--- a/util/miniwebserver.cpp
+++ b/util/miniwebserver.cpp
@@ -55,10 +55,10 @@ namespace mongo {
     void MiniWebServer::parseParams( BSONObj & params , string query ) {
         if ( query.size() == 0 )
             return;
-        
+
         BSONObjBuilder b;
         while ( query.size() ) {
-            
+
             string::size_type amp = query.find( "&" );
 
             string cur;
@@ -77,7 +77,7 @@ namespace mongo {
 
             b.append( urlDecode(cur.substr(0,eq)) , urlDecode(cur.substr(eq+1) ) );
         }
-        
+
         params = b.obj();
     }
 
@@ -132,16 +132,16 @@ namespace mongo {
         string responseMsg;
         int responseCode = 599;
         vector<string> headers;
-        
+
         try {
             doRequest(buf, parseURL( buf ), responseMsg, responseCode, headers, from);
         }
-        catch ( std::exception& e ){
+        catch ( std::exception& e ) {
             responseCode = 500;
             responseMsg = "error loading page: ";
             responseMsg += e.what();
         }
-        catch ( ... ){
+        catch ( ... ) {
             responseCode = 500;
             responseMsg = "unknown error loading page";
         }
@@ -168,32 +168,34 @@ namespace mongo {
         ::send(s, response.c_str(), response.size(), 0);
         closesocket(s);
     }
-    
-    string MiniWebServer::getHeader( const char * req , string wanted ){
+
+    string MiniWebServer::getHeader( const char * req , string wanted ) {
         const char * headers = strchr( req , '\n' );
         if ( ! headers )
             return "";
         pcrecpp::StringPiece input( headers + 1 );
-        
+
         string name;
         string val;
         pcrecpp::RE re("([\\w\\-]+): (.*?)\r?\n");
-        while ( re.Consume( &input, &name, &val) ){
+        while ( re.Consume( &input, &name, &val) ) {
             if ( name == wanted )
                 return val;
         }
         return "";
     }
 
-    string MiniWebServer::urlDecode(const char* s){
+    string MiniWebServer::urlDecode(const char* s) {
         stringstream out;
-        while(*s){
-            if (*s == '+'){
+        while(*s) {
+            if (*s == '+') {
                 out << ' ';
-            }else if (*s == '%'){
+            }
+            else if (*s == '%') {
                 out << fromHex(s+1);
                 s+=2;
-            }else{
+            }
+            else {
                 out << *s;
             }
             s++;
diff --git a/util/miniwebserver.h b/util/miniwebserver.h
index bbd1ba2..b385afc 100644
--- a/util/miniwebserver.h
+++ b/util/miniwebserver.h
@@ -41,7 +41,7 @@ namespace mongo {
         // --- static helpers ----
 
         static void parseParams( BSONObj & params , string query );
-        
+
         static string parseURL( const char * buf );
         static string parseMethod( const char * headers );
         static string getHeader( const char * headers , string name );
diff --git a/util/mmap.cpp b/util/mmap.cpp
index b9c1994..18edc34 100644
--- a/util/mmap.cpp
+++ b/util/mmap.cpp
@@ -19,48 +19,59 @@
 #include "mmap.h"
 #include "processinfo.h"
 #include "concurrency/rwlock.h"
+#include "../db/namespace.h"
 
 namespace mongo {
 
-    /*static*/ void MemoryMappedFile::updateLength( const char *filename, long &length ) {
+    set<MongoFile*> MongoFile::mmfiles;
+    map<string,MongoFile*> MongoFile::pathToFile;
+
+    /* Create. Must not exist.
+    @param zero fill file with zeros when true
+    */
+    void* MemoryMappedFile::create(string filename, unsigned long long len, bool zero) {
+        uassert( 13468, string("can't create file already exists ") + filename, !exists(filename) );
+        void *p = map(filename.c_str(), len);
+        if( p && zero ) {
+            size_t sz = (size_t) len;
+            assert( len == sz );
+            memset(p, 0, sz);
+        }
+        return p;
+    }
+
+    /*static*/ void MemoryMappedFile::updateLength( const char *filename, unsigned long long &length ) {
         if ( !boost::filesystem::exists( filename ) )
             return;
         // make sure we map full length if preexisting file.
         boost::uintmax_t l = boost::filesystem::file_size( filename );
-        assert( l <= 0x7fffffff );
-        length = (long) l;
+        length = l;
     }
 
     void* MemoryMappedFile::map(const char *filename) {
-        boost::uintmax_t l = boost::filesystem::file_size( filename );
-        assert( l <= 0x7fffffff );
-        long i = (long)l;
-        return map( filename , i );
+        unsigned long long l = boost::filesystem::file_size( filename );
+        return map( filename , l );
     }
-
-    void printMemInfo( const char * where ){
-        cout << "mem info: ";
-        if ( where ) 
-            cout << where << " "; 
-        ProcessInfo pi;
-        if ( ! pi.supported() ){
-            cout << " not supported" << endl;
-            return;
-        }
-        
-        cout << "vsize: " << pi.getVirtualMemorySize() << " resident: " << pi.getResidentSize() << " mapped: " << ( MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ) ) << endl;
+    void* MemoryMappedFile::mapWithOptions(const char *filename, int options) {
+        unsigned long long l = boost::filesystem::file_size( filename );
+        return map( filename , l, options );
     }
 
     /* --- MongoFile -------------------------------------------------
-       this is the administrative stuff 
+       this is the administrative stuff
     */
 
-    static set<MongoFile*> mmfiles;
-    static RWLock mmmutex("rw:mmmutex");
+    RWLock MongoFile::mmmutex("rw:mmmutex");
 
+    /* subclass must call in destructor (or at close).
+        removes this from pathToFile and other maps
+        safe to call more than once, albeit might be wasted work
+        ideal to call close to the close, if the close is well before object destruction
+    */
     void MongoFile::destroyed() {
         rwlock lk( mmmutex , true );
         mmfiles.erase(this);
+        pathToFile.erase( filename() );
     }
 
     /*static*/
@@ -75,17 +86,17 @@ namespace mongo {
         rwlock lk( mmmutex , true );
 
         ProgressMeter pm( mmfiles.size() , 2 , 1 );
-        for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){
+        for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) {
             (*i)->close();
             pm.hit();
         }
-        message << "    closeAllFiles() finished" << endl;
+        message << "closeAllFiles() finished";
         --closingAllFiles;
     }
 
-    /*static*/ long long MongoFile::totalMappedLength(){
+    /*static*/ long long MongoFile::totalMappedLength() {
         unsigned long long total = 0;
-        
+
         rwlock lk( mmmutex , false );
         for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ )
             total += (*i)->length();
@@ -93,28 +104,41 @@ namespace mongo {
         return total;
     }
 
-    /*static*/ int MongoFile::flushAll( bool sync ){
-        if ( ! sync ){
+    void nullFunc() { }
+
+    // callback notifications
+    void (*MongoFile::notifyPreFlush)() = nullFunc;
+    void (*MongoFile::notifyPostFlush)() = nullFunc;
+
+    /*static*/ int MongoFile::flushAll( bool sync ) {
+        notifyPreFlush();
+        int x = _flushAll(sync);
+        notifyPostFlush();
+        return x;
+    }
+
+    /*static*/ int MongoFile::_flushAll( bool sync ) {
+        if ( ! sync ) {
             int num = 0;
             rwlock lk( mmmutex , false );
-            for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){
+            for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) {
                 num++;
                 MongoFile * mmf = *i;
                 if ( ! mmf )
                     continue;
-                
+
                 mmf->flush( sync );
             }
             return num;
         }
-        
+
         // want to do it sync
         set<MongoFile*> seen;
-        while ( true ){
+        while ( true ) {
             auto_ptr<Flushable> f;
             {
                 rwlock lk( mmmutex , false );
-                for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){
+                for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) {
                     MongoFile * mmf = *i;
                     if ( ! mmf )
                         continue;
@@ -127,34 +151,41 @@ namespace mongo {
             }
             if ( ! f.get() )
                 break;
-            
+
             f->flush();
         }
         return seen.size();
     }
 
-    void MongoFile::created(){
+    void MongoFile::created() {
         rwlock lk( mmmutex , true );
         mmfiles.insert(this);
     }
 
-#ifdef _DEBUG
+    void MongoFile::setFilename(string fn) {
+        rwlock( mmmutex, true );
+        assert( _filename.empty() );
+        _filename = fn;
+        MongoFile *&ptf = pathToFile[fn];
+        massert(13617, "MongoFile : multiple opens of same filename", ptf == 0);
+        ptf = this;
+    }
 
-    void MongoFile::lockAll() {
+#if defined(_DEBUG)
+    void MongoFile::markAllWritable() {
         rwlock lk( mmmutex , false );
-        for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){
+        for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) {
             MongoFile * mmf = *i;
             if (mmf) mmf->_lock();
         }
     }
 
-    void MongoFile::unlockAll() {
+    void MongoFile::unmarkAllWritable() {
         rwlock lk( mmmutex , false );
-        for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ){
+        for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ ) {
             MongoFile * mmf = *i;
             if (mmf) mmf->_unlock();
         }
     }
 #endif
-
 } // namespace mongo
diff --git a/util/mmap.h b/util/mmap.h
index c954ef7..2ef4176 100644
--- a/util/mmap.h
+++ b/util/mmap.h
@@ -16,20 +16,63 @@
  */
 
 #pragma once
+#include <boost/thread/xtime.hpp>
+#include "concurrency/rwlock.h"
 
 namespace mongo {
-    
+
     /* the administrative-ish stuff here */
-    class MongoFile : boost::noncopyable { 
-        
+    class MongoFile : boost::noncopyable {
     public:
         /** Flushable has to fail nicely if the underlying object gets killed */
         class Flushable {
         public:
-            virtual ~Flushable(){}
+            virtual ~Flushable() {}
             virtual void flush() = 0;
         };
-        
+
+        virtual ~MongoFile() {}
+
+        enum Options {
+            SEQUENTIAL = 1, // hint - e.g. FILE_FLAG_SEQUENTIAL_SCAN on windows
+            READONLY = 2    // not contractually guaranteed, but if specified the impl has option to fault writes
+        };
+
+        /** @param fun is called for each MongoFile.
+            calledl from within a mutex that MongoFile uses. so be careful not to deadlock.
+        */
+        template < class F >
+        static void forEach( F fun );
+
+        /** note: you need to be in mmmutex when using this. forEach (above) handles that for you automatically. */
+        static set<MongoFile*>& getAllFiles()  { return mmfiles; }
+
+        // callbacks if you need them
+        static void (*notifyPreFlush)();
+        static void (*notifyPostFlush)();
+
+        static int flushAll( bool sync ); // returns n flushed
+        static long long totalMappedLength();
+        static void closeAllFiles( stringstream &message );
+
+#if defined(_DEBUG)
+        static void markAllWritable();
+        static void unmarkAllWritable();
+#else
+        static void markAllWritable() { }
+        static void unmarkAllWritable() { }
+#endif
+
+        static bool exists(boost::filesystem::path p) { return boost::filesystem::exists(p); }
+
+        virtual bool isMongoMMF() { return false; }
+
+        string filename() const { return _filename; }
+        void setFilename(string fn);
+
+    private:
+        string _filename;
+        static int _flushAll( bool sync ); // returns n flushed
     protected:
         virtual void close() = 0;
         virtual void flush(bool sync) = 0;
@@ -38,164 +81,178 @@ namespace mongo {
          * Flushable has to fail nicely if the underlying object gets killed
          */
         virtual Flushable * prepareFlush() = 0;
-        
+
         void created(); /* subclass must call after create */
-        void destroyed(); /* subclass must call in destructor */
+
+        /* subclass must call in destructor (or at close).
+           removes this from pathToFile and other maps
+           safe to call more than once, albeit might be wasted work
+           ideal to call close to the close, if the close is well before object destruction
+        */
+        void destroyed(); 
+
+        virtual unsigned long long length() const = 0;
 
         // only supporting on posix mmap
         virtual void _lock() {}
         virtual void _unlock() {}
 
+        static set<MongoFile*> mmfiles;
     public:
-        virtual ~MongoFile() {}
-        virtual long length() = 0;
-
-        enum Options {
-            SEQUENTIAL = 1 // hint - e.g. FILE_FLAG_SEQUENTIAL_SCAN on windows
-        };
-
-        static int flushAll( bool sync ); // returns n flushed
-        static long long totalMappedLength();
-        static void closeAllFiles( stringstream &message );
+        static map<string,MongoFile*> pathToFile;
+        static RWLock mmmutex;
+    };
 
-        // Locking allows writes. Reads are always allowed
-        static void lockAll();
-        static void unlockAll();
+    /** look up a MMF by filename. scoped mutex locking convention.
+        example:
+          MMFFinderByName finder;
+          MongoMMF *a = finder.find("file_name_a");
+          MongoMMF *b = finder.find("file_name_b");
+    */
+    class MongoFileFinder : boost::noncopyable {
+    public:
+        MongoFileFinder() : _lk(MongoFile::mmmutex,false) { }
 
-        /* can be "overriden" if necessary */
-        static bool exists(boost::filesystem::path p) {
-            return boost::filesystem::exists(p);
+        /** @return The MongoFile object associated with the specified file name.  If no file is open
+                    with the specified name, returns null.
+        */
+        MongoFile* findByPath(string path) {
+            map<string,MongoFile*>::iterator i = MongoFile::pathToFile.find(path);
+            return  i == MongoFile::pathToFile.end() ? NULL : i->second;
         }
-    };
-
-#ifndef _DEBUG
-    // no-ops in production
-    inline void MongoFile::lockAll() {}
-    inline void MongoFile::unlockAll() {}
 
-#endif
+    private:
+        rwlock _lk;
+    };
 
     struct MongoFileAllowWrites {
-        MongoFileAllowWrites(){
-            MongoFile::lockAll();
+        MongoFileAllowWrites() {
+            MongoFile::markAllWritable();
         }
-        ~MongoFileAllowWrites(){
-            MongoFile::unlockAll();
+        ~MongoFileAllowWrites() {
+            MongoFile::unmarkAllWritable();
         }
     };
 
-    /** template for what a new storage engine's class definition must implement 
-        PRELIMINARY - subject to change.
-    */
-    class StorageContainerTemplate : public MongoFile {
-    protected:
-        virtual void close();
-        virtual void flush(bool sync);
-    public:
-        virtual long length();
-
-        /** pointer to a range of space in this storage unit */
-        class Pointer {
-        public:
-            /** retried address of buffer at offset 'offset' withing the storage unit. returned range is a contiguous 
-                buffer reflecting what is in storage.  caller will not read or write past 'len'.
-
-                note calls may be received that are at different points in a range and different lengths. however 
-                for now assume that on writes, if a call is made, previously returned addresses are no longer valid. i.e.
-                  p = at(10000, 500);
-                  q = at(10000, 600);
-                after the second call it is ok if p is invalid.
-            */
-            void* at(int offset, int len);
-
-            /** indicate that we wrote to the range (from a previous at() call) and that it needs 
-                flushing to disk.
-                */
-            void written(int offset, int len);
-
-            bool isNull() const;
-        };
-
-        /** commit written() calls from above. */
-        void commit();
-        
-        Pointer open(const char *filename);
-        Pointer open(const char *_filename, long &length, int options=0);
-    };
-
     class MemoryMappedFile : public MongoFile {
     public:
-        class Pointer {
-            char *_base;
-        public:
-            Pointer() : _base(0) { }
-            Pointer(void *p) : _base((char*) p) { }
-            void* at(int offset, int maxLen) { return _base + offset; } 
-			void grow(int offset, int len) { /* no action required with mem mapped file */ }
-            bool isNull() const { return _base == 0; }
-        };
-
         MemoryMappedFile();
-        ~MemoryMappedFile() {
-            destroyed();
+
+        virtual ~MemoryMappedFile() {
+            destroyed(); // cleans up from the master list of mmaps
             close();
         }
-        void close();
-        
-        // Throws exception if file doesn't exist. (dm may2010: not sure if this is always true?)
-        void* map( const char *filename );
 
-        /*To replace map():
-        
-          Pointer open( const char *filename ) {
-            void *p = map(filename);
-            uassert(13077, "couldn't open/map file", p);
-            return Pointer(p);
-        }*/
+        virtual void close();
+
+        // Throws exception if file doesn't exist. (dm may2010: not sure if this is always true?)
+        void* map(const char *filename);
+        void* mapWithOptions(const char *filename, int options);
 
         /* Creates with length if DNE, otherwise uses existing file length,
            passed length.
+           @param options MongoFile::Options bits
         */
-        void* map(const char *filename, long &length, int options = 0 );
+        void* map(const char *filename, unsigned long long &length, int options = 0 );
+
+        /* Create. Must not exist.
+           @param zero fill file with zeros when true
+        */
+        void* create(string filename, unsigned long long len, bool zero);
 
         void flush(bool sync);
         virtual Flushable * prepareFlush();
 
-        /*void* viewOfs() {
-            return view;
-        }*/
+        long shortLength() const          { return (long) len; }
+        unsigned long long length() const { return len; }
 
-        long length() {
-            return len;
-        }
+        /** create a new view with the specified properties.
+            automatically cleaned up upon close/destruction of the MemoryMappedFile object.
+            */
+        void* createReadOnlyMap();
+        void* createPrivateMap();
 
-        string filename() const { return _filename; }
+        /** make the private map range writable (necessary for our windows implementation) */
+        static void makeWritable(void *, unsigned len)
+#if defined(_WIN32)
+            ;
+#else
+        { }
+#endif
 
     private:
-        static void updateLength( const char *filename, long &length );
-        
+        static void updateLength( const char *filename, unsigned long long &length );
+
         HANDLE fd;
         HANDLE maphandle;
-        void *view;
-        long len;
-        string _filename;
+        vector<void *> views;
+        unsigned long long len;
+
+#ifdef _WIN32
+        boost::shared_ptr<mutex> _flushMutex;
+        void clearWritableBits(void *privateView);
+    public:
+        static const unsigned ChunkSize = 64 * 1024 * 1024;
+        static const unsigned NChunks = 1024 * 1024;
+#else
+        void clearWritableBits(void *privateView) { }
+#endif
 
     protected:
         // only posix mmap implementations will support this
         virtual void _lock();
         virtual void _unlock();
 
+        /** close the current private view and open a new replacement */
+        void* remapPrivateView(void *oldPrivateAddr);
     };
 
-    void printMemInfo( const char * where );    
+    typedef MemoryMappedFile MMF;
 
-#include "ramstore.h"
+    /** p is called from within a mutex that MongoFile uses.  so be careful not to deadlock. */
+    template < class F >
+    inline void MongoFile::forEach( F p ) {
+        rwlock lk( mmmutex , false );
+        for ( set<MongoFile*>::iterator i = mmfiles.begin(); i != mmfiles.end(); i++ )
+            p(*i);
+    }
+
+#if defined(_WIN32)    
+    class ourbitset {
+        volatile unsigned bits[MemoryMappedFile::NChunks]; // volatile as we are doing double check locking
+    public:
+        ourbitset() { 
+            memset((void*) bits, 0, sizeof(bits));
+        }
+        bool get(unsigned i) const { 
+            unsigned x = i / 32;
+            assert( x < MemoryMappedFile::NChunks );
+            return bits[x] & (1 << (i%32));
+        }
+        void set(unsigned i) { 
+            unsigned x = i / 32;
+            assert( x < MemoryMappedFile::NChunks );
+            bits[x] |= (1 << (i%32));
+        }
+        void clear(unsigned i) { 
+            unsigned x = i / 32;
+            assert( x < MemoryMappedFile::NChunks );
+            bits[x] &= ~(1 << (i%32));
+        }
+    };
+    extern ourbitset writable;
+    void makeChunkWritable(size_t chunkno);
+    inline void MemoryMappedFile::makeWritable(void *_p, unsigned len) {
+        size_t p = (size_t) _p;
+        unsigned a = p/ChunkSize;
+        unsigned b = (p+len)/ChunkSize;
+        for( unsigned i = a; i <= b; i++ ) {
+            if( !writable.get(i) ) {
+                makeChunkWritable(i);
+            }
+        }
+    }
 
-//#define _RAMSTORE
-#if defined(_RAMSTORE)
-    typedef RamStoreFile MMF;
-#else
-    typedef MemoryMappedFile MMF;
 #endif
 
 } // namespace mongo
diff --git a/util/mmap_mm.cpp b/util/mmap_mm.cpp
index 3cbb0d2..ec2400e 100644
--- a/util/mmap_mm.cpp
+++ b/util/mmap_mm.cpp
@@ -44,9 +44,9 @@ namespace mongo {
 
     void MemoryMappedFile::flush(bool sync) {
     }
-    
+
     void MemoryMappedFile::_lock() {}
     void MemoryMappedFile::_unlock() {}
 
-} 
+}
 
diff --git a/util/mmap_posix.cpp b/util/mmap_posix.cpp
index af1592c..f47a06f 100644
--- a/util/mmap_posix.cpp
+++ b/util/mmap_posix.cpp
@@ -26,20 +26,23 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 
+#include "mongoutils/str.h"
+using namespace mongoutils;
+
 namespace mongo {
 
     MemoryMappedFile::MemoryMappedFile() {
         fd = 0;
         maphandle = 0;
-        view = 0;
         len = 0;
         created();
     }
 
     void MemoryMappedFile::close() {
-        if ( view )
-            munmap(view, len);
-        view = 0;
+        for( vector<void*>::iterator i = views.begin(); i != views.end(); i++ ) {
+            munmap(*i,len);
+        }
+        views.clear();
 
         if ( fd )
             ::close(fd);
@@ -47,76 +50,129 @@ namespace mongo {
     }
 
 #ifndef O_NOATIME
-#define O_NOATIME 0
+#define O_NOATIME (0)
+#endif
+
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE (0)
 #endif
 
-    void* MemoryMappedFile::map(const char *filename, long &length, int options) {
+    void* MemoryMappedFile::map(const char *filename, unsigned long long &length, int options) {
         // length may be updated by callee.
-        _filename = filename;
-        theFileAllocator().allocateAsap( filename, length );
+        setFilename(filename);
+        FileAllocator::get()->allocateAsap( filename, length );
         len = length;
 
-        massert( 10446 ,  (string)"mmap() can't map area of size 0 [" + filename + "]" , length > 0 );
+        massert( 10446 , str::stream() << "mmap: can't map area of size 0 file: " << filename, length > 0 );
 
-        
         fd = open(filename, O_RDWR | O_NOATIME);
         if ( fd <= 0 ) {
-            out() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl;
+            log() << "couldn't open " << filename << ' ' << errnoWithDescription() << endl;
             return 0;
         }
 
-        off_t filelen = lseek(fd, 0, SEEK_END);
-        if ( filelen != length ){
-            cout << "wanted length: " << length << " filelen: " << filelen << endl;
-            cout << sizeof(size_t) << endl;
-            massert( 10447 ,  "file size allocation failed", filelen == length );
-        }
+        unsigned long long filelen = lseek(fd, 0, SEEK_END);
+        uassert(10447,  str::stream() << "map file alloc failed, wanted: " << length << " filelen: " << filelen << ' ' << sizeof(size_t), filelen == length );
         lseek( fd, 0, SEEK_SET );
-        
-        view = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+
+        void * view = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
         if ( view == MAP_FAILED ) {
-            out() << "  mmap() failed for " << filename << " len:" << length << " " << errnoWithDescription() << endl;
-            if ( errno == ENOMEM ){
-                out() << "     mmap failed with out of memory, if you're using 32-bits, then you probably need to upgrade to 64" << endl;
+            error() << "  mmap() failed for " << filename << " len:" << length << " " << errnoWithDescription() << endl;
+            if ( errno == ENOMEM ) {
+                if( sizeof(void*) == 4 )
+                    error() << "mmap failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl;
+                else
+                    error() << "mmap failed with out of memory. (64 bit build)" << endl;
             }
             return 0;
         }
 
+
 #if defined(__sunos__)
 #warning madvise not supported on solaris yet
 #else
-        if ( options & SEQUENTIAL ){
-            if ( madvise( view , length , MADV_SEQUENTIAL ) ){
-                out() << " madvise failed for " << filename << " " << errnoWithDescription() << endl;
+        if ( options & SEQUENTIAL ) {
+            if ( madvise( view , length , MADV_SEQUENTIAL ) ) {
+                warning() << "map: madvise failed for " << filename << ' ' << errnoWithDescription() << endl;
             }
         }
 #endif
 
-        DEV if (! dbMutex.info().isLocked()){
+        views.push_back( view );
+
+        DEV if (! dbMutex.info().isLocked()) {
             _unlock();
         }
 
         return view;
     }
-    
+
+    void* MemoryMappedFile::createReadOnlyMap() {
+        void * x = mmap( /*start*/0 , len , PROT_READ , MAP_SHARED , fd , 0 );
+        if( x == MAP_FAILED ) {
+            if ( errno == ENOMEM ) {
+                if( sizeof(void*) == 4 )
+                    error() << "mmap ro failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl;
+                else
+                    error() << "mmap ro failed with out of memory. (64 bit build)" << endl;
+            }
+            return 0;
+        }
+        return x;
+    }
+
+    void* MemoryMappedFile::createPrivateMap() {
+        void * x = mmap( /*start*/0 , len , PROT_READ|PROT_WRITE , MAP_PRIVATE|MAP_NORESERVE , fd , 0 );
+        if( x == MAP_FAILED ) {
+            if ( errno == ENOMEM ) {
+                if( sizeof(void*) == 4 ) {
+                    error() << "mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64" << endl;
+                }
+                else {
+                    error() << "mmap private failed with out of memory. (64 bit build)" << endl;
+                }
+            }
+            else { 
+                error() << "mmap private failed " << errnoWithDescription() << endl;
+            }
+            return 0;
+        }
+
+        views.push_back(x);
+        return x;
+    }
+
+    void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) {
+        // don't unmap, just mmap over the old region
+        void * x = mmap( oldPrivateAddr, len , PROT_READ|PROT_WRITE , MAP_PRIVATE|MAP_NORESERVE|MAP_FIXED , fd , 0 );
+        if( x == MAP_FAILED ) {
+            int err = errno;
+            error()  << "13601 Couldn't remap private view: " << errnoWithDescription(err) << endl;
+            log() << "aborting" << endl;
+            abort();
+        }
+        assert( x == oldPrivateAddr );
+        return x;
+    }
+
     void MemoryMappedFile::flush(bool sync) {
-        if ( view == 0 || fd == 0 )
+        if ( views.empty() || fd == 0 )
             return;
-        if ( msync(view, len, sync ? MS_SYNC : MS_ASYNC) )
+        if ( msync(views[0], len, sync ? MS_SYNC : MS_ASYNC) )
             problem() << "msync " << errnoWithDescription() << endl;
     }
-    
+
     class PosixFlushable : public MemoryMappedFile::Flushable {
     public:
         PosixFlushable( void * view , HANDLE fd , long len )
-            : _view( view ) , _fd( fd ) , _len(len){
+            : _view( view ) , _fd( fd ) , _len(len) {
         }
 
-        void flush(){
+        void flush() {
             if ( _view && _fd )
                 if ( msync(_view, _len, MS_SYNC ) )
                     problem() << "msync " << errnoWithDescription() << endl;
-            
+
         }
 
         void * _view;
@@ -124,16 +180,18 @@ namespace mongo {
         long _len;
     };
 
-    MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush(){
-        return new PosixFlushable( view , fd , len );
+    MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush() {
+        return new PosixFlushable( views.empty() ? 0 : views[0] , fd , len );
     }
 
     void MemoryMappedFile::_lock() {
-        if (view) assert(mprotect(view, len, PROT_READ | PROT_WRITE) == 0);
+        if (! views.empty() && isMongoMMF() ) 
+            assert(mprotect(views[0], len, PROT_READ | PROT_WRITE) == 0);
     }
 
     void MemoryMappedFile::_unlock() {
-        if (view) assert(mprotect(view, len, PROT_READ) == 0);
+        if (! views.empty() && isMongoMMF() ) 
+            assert(mprotect(views[0], len, PROT_READ) == 0);
     }
 
 } // namespace mongo
diff --git a/util/mmap_win.cpp b/util/mmap_win.cpp
index 97e1589..0b0b834 100644
--- a/util/mmap_win.cpp
+++ b/util/mmap_win.cpp
@@ -19,21 +19,36 @@
 #include "mmap.h"
 #include "text.h"
 #include <windows.h>
+#include "../db/mongommf.h"
+#include "../db/concurrency.h"
 
 namespace mongo {
 
-    MemoryMappedFile::MemoryMappedFile() {
+    mutex mapViewMutex("mapView");
+    ourbitset writable;
+
+    /** notification on unmapping so we can clear writable bits */
+    void MemoryMappedFile::clearWritableBits(void *p) {
+        for( unsigned i = ((size_t)p)/ChunkSize; i <= (((size_t)p)+len)/ChunkSize; i++ ) {
+            writable.clear(i);
+            assert( !writable.get(i) );
+        }
+    }
+
+    MemoryMappedFile::MemoryMappedFile()
+        : _flushMutex(new mutex("flushMutex")) {
         fd = 0;
         maphandle = 0;
-        view = 0;
         len = 0;
         created();
     }
 
     void MemoryMappedFile::close() {
-        if ( view )
-            UnmapViewOfFile(view);
-        view = 0;
+        for( vector<void*>::iterator i = views.begin(); i != views.end(); i++ ) {
+            clearWritableBits(*i);
+            UnmapViewOfFile(*i);
+        }
+        views.clear();
         if ( maphandle )
             CloseHandle(maphandle);
         maphandle = 0;
@@ -41,22 +56,37 @@ namespace mongo {
             CloseHandle(fd);
         fd = 0;
     }
-    
-    unsigned mapped = 0;
 
-    void* MemoryMappedFile::map(const char *filenameIn, long &length, int options) {
-        _filename = filenameIn;
+    unsigned long long mapped = 0;
+
+    void* MemoryMappedFile::createReadOnlyMap() {
+        assert( maphandle );
+        scoped_lock lk(mapViewMutex);
+        void *p = MapViewOfFile(maphandle, FILE_MAP_READ, /*f ofs hi*/0, /*f ofs lo*/ 0, /*dwNumberOfBytesToMap 0 means to eof*/0);
+        if ( p == 0 ) {
+            DWORD e = GetLastError();
+            log() << "FILE_MAP_READ MapViewOfFile failed " << filename() << " " << errnoWithDescription(e) << endl;
+        }
+        else {
+            views.push_back(p);
+        }
+        return p;
+    }
+
+    void* MemoryMappedFile::map(const char *filenameIn, unsigned long long &length, int options) {
+        assert( fd == 0 && len == 0 ); // can't open more than once
+        setFilename(filenameIn);
         /* big hack here: Babble uses db names with colons.  doesn't seem to work on windows.  temporary perhaps. */
         char filename[256];
         strncpy(filename, filenameIn, 255);
         filename[255] = 0;
-        { 
+        {
             size_t len = strlen( filename );
-            for ( size_t i=len-1; i>=0; i-- ){
+            for ( size_t i=len-1; i>=0; i-- ) {
                 if ( filename[i] == '/' ||
-                     filename[i] == '\\' )
+                        filename[i] == '\\' )
                     break;
-                
+
                 if ( filename[i] == ':' )
                     filename[i] = '_';
             }
@@ -64,78 +94,104 @@ namespace mongo {
 
         updateLength( filename, length );
 
-        DWORD createOptions = FILE_ATTRIBUTE_NORMAL;
-        if ( options & SEQUENTIAL )
-            createOptions |= FILE_FLAG_SEQUENTIAL_SCAN;
-
-        fd = CreateFile(
-                 toNativeString(filename).c_str(),
-                 GENERIC_WRITE | GENERIC_READ, FILE_SHARE_READ,
-                 NULL, OPEN_ALWAYS, createOptions , NULL);
-        if ( fd == INVALID_HANDLE_VALUE ) {
-            out() << "Create/OpenFile failed " << filename << ' ' << GetLastError() << endl;
-            return 0;
+        {
+            DWORD createOptions = FILE_ATTRIBUTE_NORMAL;
+            if ( options & SEQUENTIAL )
+                createOptions |= FILE_FLAG_SEQUENTIAL_SCAN;
+            DWORD rw = GENERIC_READ | GENERIC_WRITE;
+            fd = CreateFile(
+                     toNativeString(filename).c_str(),
+                     rw, // desired access
+                     FILE_SHARE_WRITE | FILE_SHARE_READ, // share mode
+                     NULL, // security
+                     OPEN_ALWAYS, // create disposition
+                     createOptions , // flags
+                     NULL); // hTempl
+            if ( fd == INVALID_HANDLE_VALUE ) {
+                DWORD e = GetLastError();
+                log() << "Create/OpenFile failed " << filename << " errno:" << e << endl;
+                return 0;
+            }
         }
 
         mapped += length;
 
-        maphandle = CreateFileMapping(fd, NULL, PAGE_READWRITE, 0, length, NULL);
-        if ( maphandle == NULL ) {
-            out() << "CreateFileMapping failed " << filename << ' ' << GetLastError() << endl;
-            return 0;
+        {
+            DWORD flProtect = PAGE_READWRITE; //(options & READONLY)?PAGE_READONLY:PAGE_READWRITE;
+            maphandle = CreateFileMapping(fd, NULL, flProtect,
+                                          length >> 32 /*maxsizehigh*/,
+                                          (unsigned) length /*maxsizelow*/,
+                                          NULL/*lpName*/);
+            if ( maphandle == NULL ) {
+                DWORD e = GetLastError(); // log() call was killing lasterror before we get to that point in the stream
+                log() << "CreateFileMapping failed " << filename << ' ' << errnoWithDescription(e) << endl;
+                close();
+                return 0;
+            }
         }
 
-        view = MapViewOfFile(maphandle, FILE_MAP_ALL_ACCESS, 0, 0, 0);
+        void *view = 0;
+        {
+            scoped_lock lk(mapViewMutex);
+            DWORD access = (options&READONLY)? FILE_MAP_READ : FILE_MAP_ALL_ACCESS;
+            view = MapViewOfFile(maphandle, access, /*f ofs hi*/0, /*f ofs lo*/ 0, /*dwNumberOfBytesToMap 0 means to eof*/0);
+        }
         if ( view == 0 ) {
-            out() << "MapViewOfFile failed " << filename << " " << errnoWithDescription() << " ";
-            out() << GetLastError();
-            out() << endl;
+            DWORD e = GetLastError();
+            log() << "MapViewOfFile failed " << filename << " " << errnoWithDescription(e) << endl;
+            close();
+        }
+        else {
+            views.push_back(view);
         }
         len = length;
+
         return view;
     }
 
     class WindowsFlushable : public MemoryMappedFile::Flushable {
     public:
-        WindowsFlushable( void * view , HANDLE fd , string filename )
-            : _view(view) , _fd(fd) , _filename(filename){
-            
-        }
-        
-        void flush(){
-            if (!_view || !_fd) 
+        WindowsFlushable( void * view , HANDLE fd , string filename , boost::shared_ptr<mutex> flushMutex )
+            : _view(view) , _fd(fd) , _filename(filename) , _flushMutex(flushMutex)
+        {}
+
+        void flush() {
+            if (!_view || !_fd)
                 return;
 
+            scoped_lock lk(*_flushMutex);
+
             bool success = FlushViewOfFile(_view, 0); // 0 means whole mapping
-            if (!success){
+            if (!success) {
                 int err = GetLastError();
                 out() << "FlushViewOfFile failed " << err << " file: " << _filename << endl;
             }
-            
+
             success = FlushFileBuffers(_fd);
-            if (!success){
+            if (!success) {
                 int err = GetLastError();
                 out() << "FlushFileBuffers failed " << err << " file: " << _filename << endl;
             }
         }
-        
+
         void * _view;
         HANDLE _fd;
         string _filename;
-        
+        boost::shared_ptr<mutex> _flushMutex;
     };
-    
+
     void MemoryMappedFile::flush(bool sync) {
         uassert(13056, "Async flushing not supported on windows", sync);
-        
-        WindowsFlushable f( view , fd , _filename );
-        f.flush();
+        if( !views.empty() ) {
+            WindowsFlushable f( views[0] , fd , filename() , _flushMutex);
+            f.flush();
+        }
     }
 
-    MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush(){
-        return new WindowsFlushable( view , fd , _filename );
+    MemoryMappedFile::Flushable * MemoryMappedFile::prepareFlush() {
+        return new WindowsFlushable( views.empty() ? 0 : views[0] , fd , filename() , _flushMutex );
     }
     void MemoryMappedFile::_lock() {}
     void MemoryMappedFile::_unlock() {}
 
-} 
+}
diff --git a/util/mongoutils/README b/util/mongoutils/README
index ab614b6..fd2a589 100755
--- a/util/mongoutils/README
+++ b/util/mongoutils/README
@@ -3,5 +3,11 @@
     (1) code is not database specific, rather, true utilities
     (2) are cross platform
     (3) may require boost headers, but not libs
-    (4) are clean and easy to use in any c++ project without pulling in lots of other stuff
+    (4) are clean and easy to use in any c++ project without pulling in lots of other stuff.
+        specifically, does not use anything in the mongo namespace!
     (5) apache license
+    (6) must be documented!  if you aren't going to bother (but don't do that), stick it in util.
+    (7) ideally header only (in the spirit of #3)
+
+    So basically, easy to use, general purpose stuff, with no arduous dependencies to drop into 
+    any new project.
diff --git a/util/mongoutils/checksum.h b/util/mongoutils/checksum.h
index 6beb7f4..ea3d051 100644
--- a/util/mongoutils/checksum.h
+++ b/util/mongoutils/checksum.h
@@ -1,4 +1,4 @@
-/** @checksum.h */
+/** @file checksum.h */
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -22,7 +22,7 @@ namespace mongoutils {
     /**
      * this is a silly temporary implementation
      */
-    inline int checksum( const char* x , int size ){
+    inline int checksum( const char* x , int size ) {
         int ck = 0;
         for ( int i=0; i<size; i++ )
             ck += ( (int)x[i] * ( i + 1 ) );
diff --git a/util/mongoutils/hash.h b/util/mongoutils/hash.h
new file mode 100644
index 0000000..49f30b3
--- /dev/null
+++ b/util/mongoutils/hash.h
@@ -0,0 +1,41 @@
+/** @file hash.h */
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+namespace mongoutils {
+
+    /** @return hash of a pointer to an unsigned. so you get a 32 bit hash out, regardless of whether
+                pointers are 32 or 64 bit on the particular platform.
+
+        is there a faster way to impl this that hashes just as well?
+    */
+    inline unsigned hashPointer(void *v) {
+        unsigned x = 0;
+        unsigned char *p = (unsigned char *) &v;
+        for( unsigned i = 0; i < sizeof(void*); i++ ) {
+            x = x * 131 + p[i];
+        }
+        return x;
+    }
+
+    inline unsigned hash(unsigned u) {
+        unsigned char *p = (unsigned char *) &u;
+        return (((((p[3] * 131) + p[2]) * 131) + p[1]) * 131) + p[0];
+    }
+
+}
diff --git a/util/mongoutils/html.h b/util/mongoutils/html.h
index e8502ec..f79e6ca 100644
--- a/util/mongoutils/html.h
+++ b/util/mongoutils/html.h
@@ -2,7 +2,7 @@
 
 #pragma once
 
-/* Things in the mongoutils namespace 
+/* Things in the mongoutils namespace
    (1) are not database specific, rather, true utilities
    (2) are cross platform
    (3) may require boost headers, but not libs
@@ -37,41 +37,41 @@ namespace mongoutils {
         inline string _tr() { return "</tr>\n"; }
 
         inline string tr() { return "<tr>"; }
-        inline string tr(string a, string b) { 
+        inline string tr(string a, string b) {
             stringstream ss;
             ss << "<tr><td>" << a << "</td><td>" << b << "</td></tr>\n";
             return ss.str();
         }
         template <class T>
-        inline string td(T x) { 
+        inline string td(T x) {
             stringstream ss;
             ss << "<td>" << x << "</td>";
             return ss.str();
         }
-        inline string td(string x) { 
+        inline string td(string x) {
             return "<td>" + x + "</td>";
         }
-        inline string th(string x) { 
+        inline string th(string x) {
             return "<th>" + x + "</th>";
         }
 
-        inline void tablecell( stringstream& ss , bool b ){
+        inline void tablecell( stringstream& ss , bool b ) {
             ss << "<td>" << (b ? "<b>X</b>" : "") << "</td>";
         }
 
-        template< typename T> 
-        inline void tablecell( stringstream& ss , const T& t ){
+        template< typename T>
+        inline void tablecell( stringstream& ss , const T& t ) {
             ss << "<td>" << t << "</td>";
         }
-        
-        inline string table(const char *headers[] = 0, bool border = true) { 
+
+        inline string table(const char *headers[] = 0, bool border = true) {
             stringstream ss;
-            ss << "\n<table " 
-                << (border?"border=1 ":"")
-                << "cellpadding=2 cellspacing=0>\n";
-            if( headers ) { 
+            ss << "\n<table "
+               << (border?"border=1 ":"")
+               << "cellpadding=2 cellspacing=0>\n";
+            if( headers ) {
                 ss << "<tr>";
-                while( *headers ) { 
+                while( *headers ) {
                     ss << "<th>" << *headers << "</th>";
                     headers++;
                 }
@@ -80,18 +80,18 @@ namespace mongoutils {
             return ss.str();
         }
 
-        inline string start(string title) { 
+        inline string start(string title) {
             stringstream ss;
             ss << "<html><head>\n<title>";
             ss << title;
             ss << "</title>\n";
 
             ss << "<style type=\"text/css\" media=\"screen\">"
-                "body { font-family: helvetica, arial, san-serif }\n"
-                "table { border-collapse:collapse; border-color:#999; margin-top:.5em }\n"
-                "th { background-color:#bbb; color:#000 }\n"
-                "td,th { padding:.25em }\n"
-                "</style>\n";
+               "body { font-family: helvetica, arial, san-serif }\n"
+               "table { border-collapse:collapse; border-color:#999; margin-top:.5em }\n"
+               "th { background-color:#bbb; color:#000 }\n"
+               "td,th { padding:.25em }\n"
+               "</style>\n";
 
             ss << "</head>\n<body>\n";
             return ss.str();
@@ -141,7 +141,7 @@ namespace mongoutils {
         }
 
         /* does NOT escape the strings. */
-        inline string a(string href, string title="", string contentHtml = "") { 
+        inline string a(string href, string title="", string contentHtml = "") {
             stringstream ss;
             ss << "<a";
             if( !href.empty() ) ss << " href=\"" << href << '"';
diff --git a/util/mongoutils/mongoutils.vcxproj b/util/mongoutils/mongoutils.vcxproj
index f8919cd..f6ec093 100755
--- a/util/mongoutils/mongoutils.vcxproj
+++ b/util/mongoutils/mongoutils.vcxproj
@@ -42,6 +42,7 @@
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>c:\boost;\boost</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -53,6 +54,7 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>c:\boost;\boost</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
diff --git a/util/mongoutils/str.h b/util/mongoutils/str.h
index 2028264..ea8f938 100644
--- a/util/mongoutils/str.h
+++ b/util/mongoutils/str.h
@@ -17,36 +17,37 @@
 
 #pragma once
 
-/* Things in the mongoutils namespace 
+/* Things in the mongoutils namespace
    (1) are not database specific, rather, true utilities
    (2) are cross platform
    (3) may require boost headers, but not libs
    (4) are clean and easy to use in any c++ project without pulling in lots of other stuff
 
-   Note: within this module, we use int for all offsets -- there are no unsigned offsets 
-   and no size_t's.  If you need 3 gigabyte long strings, don't use this module. 
+   Note: within this module, we use int for all offsets -- there are no unsigned offsets
+   and no size_t's.  If you need 3 gigabyte long strings, don't use this module.
 */
 
 #include <string>
 #include <sstream>
+#include "../../bson/util/builder.h"
 
 namespace mongoutils {
 
     namespace str {
 
-        using namespace std;
+        typedef std::string string;
 
-        /** the idea here is to make one liners easy.  e.g.: 
+        /** the idea here is to make one liners easy.  e.g.:
 
                return str::stream() << 1 << ' ' << 2;
 
             since the following doesn't work:
-               
+
                (stringstream() << 1).str();
         */
         class stream {
         public:
-            stringstream ss;
+            mongo::StringBuilder ss;
 
             template<class T>
             stream& operator<<(const T& v) {
@@ -60,7 +61,7 @@ namespace mongoutils {
         inline bool startsWith(const char *str, const char *prefix) {
             const char *s = str;
             const char *p = prefix;
-            while( *p ) { 
+            while( *p ) {
                 if( *p != *s ) return false;
                 p++; s++;
             }
@@ -68,37 +69,56 @@ namespace mongoutils {
         }
         inline bool startsWith(string s, string p) { return startsWith(s.c_str(), p.c_str()); }
 
-        inline bool endsWith(string s, string p) { 
+        inline bool endsWith(string s, string p) {
             int l = p.size();
             int x = s.size();
             if( x < l ) return false;
             return strncmp(s.c_str()+x-l, p.c_str(), l) == 0;
         }
 
+        inline bool equals( const char * a , const char * b ) { return strcmp( a , b ) == 0; }
+
         /** find char x, and return rest of string thereafter, or "" if not found */
         inline const char * after(const char *s, char x) {
             const char *p = strchr(s, x);
-            return (p != 0) ? p+1 : ""; }
+            return (p != 0) ? p+1 : "";
+        }
         inline string after(const string& s, char x) {
             const char *p = strchr(s.c_str(), x);
-            return (p != 0) ? string(p+1) : ""; }
+            return (p != 0) ? string(p+1) : "";
+        }
 
+        /** find string x, and return rest of string thereafter, or "" if not found */
         inline const char * after(const char *s, const char *x) {
             const char *p = strstr(s, x);
-            return (p != 0) ? p+strlen(x) : ""; }
+            return (p != 0) ? p+strlen(x) : "";
+        }
         inline string after(string s, string x) {
             const char *p = strstr(s.c_str(), x.c_str());
-            return (p != 0) ? string(p+x.size()) : ""; }
+            return (p != 0) ? string(p+x.size()) : "";
+        }
 
-        inline bool contains(string s, string x) { 
-            return strstr(s.c_str(), x.c_str()) != 0; }
+        /** @return true if s contains x */
+        inline bool contains(string s, string x) {
+            return strstr(s.c_str(), x.c_str()) != 0;
+        }
+        inline bool contains(string s, char x) {
+            return strchr(s.c_str(), x) != 0;
+        }
 
         /** @return everything befor the character x, else entire string */
         inline string before(const string& s, char x) {
             const char *p = strchr(s.c_str(), x);
-            return (p != 0) ? s.substr(0, p-s.c_str()) : s; }
+            return (p != 0) ? s.substr(0, p-s.c_str()) : s;
+        }
 
-        /** check if if strings share a common starting prefix 
+        /** @return everything befor the string x, else entire string */
+        inline string before(const string& s, const string& x) {
+            const char *p = strstr(s.c_str(), x.c_str());
+            return (p != 0) ? s.substr(0, p-s.c_str()) : s;
+        }
+
+        /** check if if strings share a common starting prefix
             @return offset of divergence (or length if equal).  0=nothing in common. */
         inline int shareCommonPrefix(const char *p, const char *q) {
             int ofs = 0;
@@ -109,10 +129,80 @@ namespace mongoutils {
                     break;
                 p++; q++; ofs++;
             }
-            return ofs; }
+            return ofs;
+        }
         inline int shareCommonPrefix(const string &a, const string &b)
         { return shareCommonPrefix(a.c_str(), b.c_str()); }
 
+        /** string to unsigned. zero if not a number. can end with non-num chars */
+        inline unsigned toUnsigned(const string& a) {
+            unsigned x = 0;
+            const char *p = a.c_str();
+            while( 1 ) {
+                if( !isdigit(*p) )
+                    break;
+                x = x * 10 + (*p - '0');
+                p++;
+            }
+            return x;
+        }
+
+        /** split a string on a specific char.  We don't split N times, just once
+            on the first occurrence.  If char not present entire string is in L
+            and R is empty.
+            @return true if char found
+        */
+        inline bool splitOn(const string &s, char c, string& L, string& R) {
+            const char *start = s.c_str();
+            const char *p = strchr(start, c);
+            if( p == 0 ) {
+                L = s; R.clear();
+                return false;
+            }
+            L = string(start, p-start);
+            R = string(p+1);
+            return true;
+        }
+        /** split scanning reverse direction. Splits ONCE ONLY. */
+        inline bool rSplitOn(const string &s, char c, string& L, string& R) {
+            const char *start = s.c_str();
+            const char *p = strrchr(start, c);
+            if( p == 0 ) {
+                L = s; R.clear();
+                return false;
+            }
+            L = string(start, p-start);
+            R = string(p+1);
+            return true;
+        }
+
+        /** @return number of occurrences of c in s */
+        inline unsigned count( const string& s , char c ) {
+            unsigned n=0;
+            for ( unsigned i=0; i<s.size(); i++ )
+                if ( s[i] == c )
+                    n++;
+            return n;
+        }
+
+        /** trim leading spaces. spaces only, not tabs etc. */
+        inline string ltrim(const string& s) {
+            const char *p = s.c_str();
+            while( *p == ' ' ) p++;
+            return p;
+        }
+
+        /** remove trailing chars in place */
+        inline void stripTrailing(string& s, const char *chars) {
+            string::iterator i = s.end();
+            while( s.begin() != i ) {
+                i--;
+                if( contains(chars, *i) ) {
+                    s.erase(i);
+                }
+            }
+        }
+
     }
 
 }
diff --git a/util/mongoutils/test.cpp b/util/mongoutils/test.cpp
old mode 100755
new mode 100644
index 0420624..d8ee461
--- a/util/mongoutils/test.cpp
+++ b/util/mongoutils/test.cpp
@@ -1,34 +1,45 @@
-/* @file test.cpp
-  utils/mongoutils/test.cpp
-  unit tests for mongoutils
-*/
-
-/*
- *    Copyright 2010 10gen Inc.
- *
- *    Licensed under the Apache License, Version 2.0 (the "License");
- *    you may not use this file except in compliance with the License.
- *    You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- *    Unless required by applicable law or agreed to in writing, software
- *    distributed under the License is distributed on an "AS IS" BASIS,
- *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *    See the License for the specific language governing permissions and
- *    limitations under the License.
- */
-
-#include "str.h"
-#include "html.h"
-#include <assert.h>
-
-using namespace std;
-using namespace mongoutils;
-
-int main() {
-    string x = str::after("abcde", 'c');
-    assert( x == "de" );
-    assert( str::after("abcde", 'x') == "" );
-    return 0;
-}
+/* @file test.cpp
+  utils/mongoutils/test.cpp
+  unit tests for mongoutils
+*/
+
+/*
+ *    Copyright 2010 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#include "str.h"
+#include "html.h"
+#include <assert.h>
+
+using namespace std;
+using namespace mongoutils;
+
+int main() {
+    {
+        string s = "abcde";
+        str::stripTrailing(s, "ef");
+        assert( s == "abcd" );
+        str::stripTrailing(s, "abcd");
+        assert( s.empty() );
+        s = "abcddd";
+        str::stripTrailing(s, "d");
+        assert( s == "abc" );
+    }
+
+    string x = str::after("abcde", 'c');
+    assert( x == "de" );
+    assert( str::after("abcde", 'x') == "" );
+    return 0;
+}
diff --git a/util/moveablebuffer.h b/util/moveablebuffer.h
new file mode 100644
index 0000000..e01f2d8
--- /dev/null
+++ b/util/moveablebuffer.h
@@ -0,0 +1,51 @@
+/* moveablebuffer.h
+*/
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+namespace mongo {
+
+    /** this is a sort of smart pointer class where we can move where something is and all the pointers will adjust.
+        not threadsafe.
+        */
+    struct MoveableBuffer {
+        MoveableBuffer();
+        MoveableBuffer(void *);
+        MoveableBuffer& operator=(const MoveableBuffer&);
+        ~MoveableBuffer();
+
+        void *p;
+    };
+
+    /* implementation (inlines) below */
+
+    // this is a temp stub implementation...not really done yet - just having everything compile & such for checkpointing into git
+
+    inline MoveableBuffer::MoveableBuffer() : p(0) { }
+
+    inline MoveableBuffer::MoveableBuffer(void *_p) : p(_p) { }
+
+    inline MoveableBuffer& MoveableBuffer::operator=(const MoveableBuffer& r) {
+        p = r.p;
+        return *this;
+    }
+
+    inline MoveableBuffer::~MoveableBuffer() {
+    }
+
+}
diff --git a/util/ntservice.cpp b/util/ntservice.cpp
index 22f83a5..ccf2981 100644
--- a/util/ntservice.cpp
+++ b/util/ntservice.cpp
@@ -25,15 +25,107 @@
 
 namespace mongo {
 
-	void shutdown();
+    void shutdownServer();
 
-	SERVICE_STATUS_HANDLE ServiceController::_statusHandle = NULL;
-	std::wstring ServiceController::_serviceName;
-	ServiceCallback ServiceController::_serviceCallback = NULL;
+    SERVICE_STATUS_HANDLE ServiceController::_statusHandle = NULL;
+    std::wstring ServiceController::_serviceName;
+    ServiceCallback ServiceController::_serviceCallback = NULL;
 
-	ServiceController::ServiceController() {
+    ServiceController::ServiceController() {}
+
+    bool initService();
+
+    // returns true if the service is started.
+    bool serviceParamsCheck( program_options::variables_map& params, const std::string dbpath, int argc, char* argv[] ) {
+        bool installService = false;
+        bool removeService = false;
+        bool reinstallService = false;
+        bool startService = false;
+
+        std::wstring windowsServiceName = L"MongoDB";
+        std::wstring windowsServiceDisplayName = L"Mongo DB";
+        std::wstring windowsServiceDescription = L"Mongo DB Server";
+        std::wstring windowsServiceUser = L"";
+        std::wstring windowsServicePassword = L"";
+
+        if (params.count("install")) {
+            if ( ! params.count( "logpath" ) ) {
+                cerr << "--install has to be used with --logpath" << endl;
+                ::exit(-1);
+            }
+            installService = true;
+        }
+        if (params.count("reinstall")) {
+            if ( ! params.count( "logpath" ) ) {
+                cerr << "--reinstall has to be used with --logpath" << endl;
+                ::exit(-1);
+            }
+            reinstallService = true;
+        }
+        if (params.count("remove")) {
+            removeService = true;
+        }
+        if (params.count("service")) {
+            startService = true;
+        }
+
+        if (params.count("serviceName")) {
+            string x = params["serviceName"].as<string>();
+            windowsServiceName = wstring(x.size(),L' ');
+            for ( size_t i=0; i<x.size(); i++) {
+                windowsServiceName[i] = x[i];
+            }
+        }
+        if (params.count("serviceDisplayName")) {
+            string x = params["serviceDisplayName"].as<string>();
+            windowsServiceDisplayName = wstring(x.size(),L' ');
+            for ( size_t i=0; i<x.size(); i++) {
+                windowsServiceDisplayName[i] = x[i];
+            }
+        }
+        if (params.count("serviceDescription")) {
+            string x = params["serviceDescription"].as<string>();
+            windowsServiceDescription = wstring(x.size(),L' ');
+            for ( size_t i=0; i<x.size(); i++) {
+                windowsServiceDescription[i] = x[i];
+            }
+        }
+        if (params.count("serviceUser")) {
+            string x = params["serviceUser"].as<string>();
+            windowsServiceUser = wstring(x.size(),L' ');
+            for ( size_t i=0; i<x.size(); i++) {
+                windowsServiceUser[i] = x[i];
+            }
+        }
+        if (params.count("servicePassword")) {
+            string x = params["servicePassword"].as<string>();
+            windowsServicePassword = wstring(x.size(),L' ');
+            for ( size_t i=0; i<x.size(); i++) {
+                windowsServicePassword[i] = x[i];
+            }
+        }
+
+        if ( reinstallService ) {
+            ServiceController::removeService( windowsServiceName );
+        }
+        if ( installService || reinstallService ) {
+            if ( !ServiceController::installService( windowsServiceName , windowsServiceDisplayName, windowsServiceDescription, windowsServiceUser, windowsServicePassword, dbpath, argc, argv ) )
+                dbexit( EXIT_NTSERVICE_ERROR );
+            dbexit( EXIT_CLEAN );
+        }
+        else if ( removeService ) {
+            if ( !ServiceController::removeService( windowsServiceName ) )
+                dbexit( EXIT_NTSERVICE_ERROR );
+            dbexit( EXIT_CLEAN );
+        }
+        else if ( startService ) {
+            if ( !ServiceController::startService( windowsServiceName , mongo::initService ) )
+                dbexit( EXIT_NTSERVICE_ERROR );
+            return true;
+        }
+        return false;
     }
-    
+
     bool ServiceController::installService( const std::wstring& serviceName, const std::wstring& displayName, const std::wstring& serviceDesc, const std::wstring& serviceUser, const std::wstring& servicePassword, const std::string dbpath, int argc, char* argv[] ) {
         assert(argc >= 1);
 
@@ -41,26 +133,30 @@ namespace mongo {
 
         if ( strchr(argv[0], ':') ) { // a crude test for fully qualified path
             commandLine << '"' << argv[0] << "\" ";
-        } else {
+        }
+        else {
             char buffer[256];
             assert( _getcwd(buffer, 256) );
             commandLine << '"' << buffer << '\\' << argv[0] << "\" ";
         }
-        
+
         for ( int i = 1; i < argc; i++ ) {
             std::string arg( argv[ i ] );
             // replace install command to indicate process is being started as a service
             if ( arg == "--install" || arg == "--reinstall" ) {
                 arg = "--service";
-            } else if ( arg == "--dbpath" && i + 1 < argc ) {
+            }
+            else if ( arg == "--dbpath" && i + 1 < argc ) {
                 commandLine << arg << "  \"" << dbpath << "\"  ";
                 i++;
                 continue;
-            } else if ( arg == "--logpath" && i + 1 < argc ) {
+            }
+            else if ( arg == "--logpath" && i + 1 < argc ) {
                 commandLine << arg << "  \"" << argv[i+1] << "\"  ";
                 i++;
                 continue;
-            } else if ( arg.length() > 9 && arg.substr(0, 9) == "--service" ) {
+            }
+            else if ( arg.length() > 9 && arg.substr(0, 9) == "--service" ) {
                 // Strip off --service(Name|User|Password) arguments
                 i++;
                 continue;
@@ -75,25 +171,25 @@ namespace mongo {
             return false;
         }
 
-		// Make sure servise doesn't already exist. 
-		// TODO: Check to see if service is in "Deleting" status, suggest the user close down Services MMC snap-ins.
-		SC_HANDLE schService = ::OpenService( schSCManager, serviceName.c_str(), SERVICE_ALL_ACCESS );
-		if ( schService != NULL ) {
-			cerr << "There is already a service named " << toUtf8String(serviceName) << ". Aborting" << endl;
-			::CloseServiceHandle( schService );
-			::CloseServiceHandle( schSCManager );
-			return false;
-		}
-		std::basic_ostringstream< TCHAR > commandLineWide;
- 		commandLineWide << commandLine.str().c_str();
-
-		cerr << "Creating service " << toUtf8String(serviceName) << "." << endl;
-
-		// create new service
-		schService = ::CreateService( schSCManager, serviceName.c_str(), displayName.c_str(),
-												SERVICE_ALL_ACCESS, SERVICE_WIN32_OWN_PROCESS,
-												SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
-												commandLineWide.str().c_str(), NULL, NULL, L"\0\0", NULL, NULL );
+        // Make sure servise doesn't already exist.
+        // TODO: Check to see if service is in "Deleting" status, suggest the user close down Services MMC snap-ins.
+        SC_HANDLE schService = ::OpenService( schSCManager, serviceName.c_str(), SERVICE_ALL_ACCESS );
+        if ( schService != NULL ) {
+            cerr << "There is already a service named " << toUtf8String(serviceName) << ". Aborting" << endl;
+            ::CloseServiceHandle( schService );
+            ::CloseServiceHandle( schSCManager );
+            return false;
+        }
+        std::basic_ostringstream< TCHAR > commandLineWide;
+        commandLineWide << commandLine.str().c_str();
+
+        cerr << "Creating service " << toUtf8String(serviceName) << "." << endl;
+
+        // create new service
+        schService = ::CreateService( schSCManager, serviceName.c_str(), displayName.c_str(),
+                                      SERVICE_ALL_ACCESS, SERVICE_WIN32_OWN_PROCESS,
+                                      SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
+                                      commandLineWide.str().c_str(), NULL, NULL, L"\0\0", NULL, NULL );
         if ( schService == NULL ) {
             DWORD err = ::GetLastError();
             cerr << "Error creating service: " << GetWinErrMsg(err) << endl;
@@ -101,56 +197,56 @@ namespace mongo {
             return false;
         }
 
-		cerr << "Service creation successful." << endl;
-		cerr << "Service can be started from the command line via 'net start \"" << toUtf8String(serviceName) << "\"'." << endl;
-
-		bool serviceInstalled;
-
-		// TODO: If neccessary grant user "Login as a Service" permission.
-		if ( !serviceUser.empty() ) {
-			std::wstring actualServiceUser;
-			if ( serviceUser.find(L"\\") == string::npos ) {
-				actualServiceUser = L".\\" + serviceUser;
-			}
-			else {
-				actualServiceUser = serviceUser;
-			}
-
-			cerr << "Setting service login credentials. User: " << toUtf8String(actualServiceUser) << endl;
-			serviceInstalled = ::ChangeServiceConfig( schService, SERVICE_NO_CHANGE, SERVICE_NO_CHANGE, SERVICE_NO_CHANGE, NULL, NULL, NULL, NULL, actualServiceUser.c_str(), servicePassword.c_str(), NULL );
-			if ( !serviceInstalled ) {
-				cerr << "Setting service login failed. Service has 'LocalService' permissions." << endl;
-			}
-		}
-		
-		// set the service description
-		SERVICE_DESCRIPTION serviceDescription;
-		serviceDescription.lpDescription = (LPTSTR)serviceDesc.c_str();
-		serviceInstalled = ::ChangeServiceConfig2( schService, SERVICE_CONFIG_DESCRIPTION, &serviceDescription );
-
-		
-		if ( serviceInstalled ) {
-			SC_ACTION aActions[ 3 ] = { { SC_ACTION_RESTART, 0 }, { SC_ACTION_RESTART, 0 }, { SC_ACTION_RESTART, 0 } };
-			
-			SERVICE_FAILURE_ACTIONS serviceFailure;
-			ZeroMemory( &serviceFailure, sizeof( SERVICE_FAILURE_ACTIONS ) );
-			serviceFailure.cActions = 3;
-			serviceFailure.lpsaActions = aActions;
-			
-			// set service recovery options
-			serviceInstalled = ::ChangeServiceConfig2( schService, SERVICE_CONFIG_FAILURE_ACTIONS, &serviceFailure );
-
-		}
-		else {
-			cerr << "Could not set service description. Check the event log for more details." << endl;
-		}
-
-		::CloseServiceHandle( schService );
-		::CloseServiceHandle( schSCManager );
-		
-		return serviceInstalled;
+        cerr << "Service creation successful." << endl;
+        cerr << "Service can be started from the command line via 'net start \"" << toUtf8String(serviceName) << "\"'." << endl;
+
+        bool serviceInstalled;
+
+        // TODO: If neccessary grant user "Login as a Service" permission.
+        if ( !serviceUser.empty() ) {
+            std::wstring actualServiceUser;
+            if ( serviceUser.find(L"\\") == string::npos ) {
+                actualServiceUser = L".\\" + serviceUser;
+            }
+            else {
+                actualServiceUser = serviceUser;
+            }
+
+            cerr << "Setting service login credentials. User: " << toUtf8String(actualServiceUser) << endl;
+            serviceInstalled = ::ChangeServiceConfig( schService, SERVICE_NO_CHANGE, SERVICE_NO_CHANGE, SERVICE_NO_CHANGE, NULL, NULL, NULL, NULL, actualServiceUser.c_str(), servicePassword.c_str(), NULL );
+            if ( !serviceInstalled ) {
+                cerr << "Setting service login failed. Service has 'LocalService' permissions." << endl;
+            }
+        }
+
+        // set the service description
+        SERVICE_DESCRIPTION serviceDescription;
+        serviceDescription.lpDescription = (LPTSTR)serviceDesc.c_str();
+        serviceInstalled = ::ChangeServiceConfig2( schService, SERVICE_CONFIG_DESCRIPTION, &serviceDescription );
+
+
+        if ( serviceInstalled ) {
+            SC_ACTION aActions[ 3 ] = { { SC_ACTION_RESTART, 0 }, { SC_ACTION_RESTART, 0 }, { SC_ACTION_RESTART, 0 } };
+
+            SERVICE_FAILURE_ACTIONS serviceFailure;
+            ZeroMemory( &serviceFailure, sizeof( SERVICE_FAILURE_ACTIONS ) );
+            serviceFailure.cActions = 3;
+            serviceFailure.lpsaActions = aActions;
+
+            // set service recovery options
+            serviceInstalled = ::ChangeServiceConfig2( schService, SERVICE_CONFIG_FAILURE_ACTIONS, &serviceFailure );
+
+        }
+        else {
+            cerr << "Could not set service description. Check the event log for more details." << endl;
+        }
+
+        ::CloseServiceHandle( schService );
+        ::CloseServiceHandle( schSCManager );
+
+        return serviceInstalled;
     }
-    
+
     bool ServiceController::removeService( const std::wstring& serviceName ) {
         SC_HANDLE schSCManager = ::OpenSCManager( NULL, NULL, SC_MANAGER_ALL_ACCESS );
         if ( schSCManager == NULL ) {
@@ -159,96 +255,96 @@ namespace mongo {
             return false;
         }
 
-		SC_HANDLE schService = ::OpenService( schSCManager, serviceName.c_str(), SERVICE_ALL_ACCESS );
-		if ( schService == NULL ) {
-			cerr << "Could not find a service named " << toUtf8String(serviceName) << " to uninstall." << endl;
-			::CloseServiceHandle( schSCManager );
-			return false;
-		}
-
-		SERVICE_STATUS serviceStatus;
-		
-		// stop service if its running
-		if ( ::ControlService( schService, SERVICE_CONTROL_STOP, &serviceStatus ) ) {
-			cerr << "Service " << toUtf8String(serviceName) << " is currently running. Stopping service." << endl;
-			while ( ::QueryServiceStatus( schService, &serviceStatus ) ) {
-				if ( serviceStatus.dwCurrentState == SERVICE_STOP_PENDING )
-				{
-				  Sleep( 1000 );
-				}
-				else { break; }
-			}
-			cerr << "Service stopped." << endl;
-		}
-
-		cerr << "Deleting service " << toUtf8String(serviceName) << "." << endl;
-		bool serviceRemoved = ::DeleteService( schService );
-		
-		::CloseServiceHandle( schService );
-		::CloseServiceHandle( schSCManager );
-
-		if (serviceRemoved) {
-			cerr << "Service deleted successfully." << endl;
-		}
-		else {
-			cerr << "Failed to delete service." << endl;
-		}
-
-		return serviceRemoved;
+        SC_HANDLE schService = ::OpenService( schSCManager, serviceName.c_str(), SERVICE_ALL_ACCESS );
+        if ( schService == NULL ) {
+            cerr << "Could not find a service named " << toUtf8String(serviceName) << " to uninstall." << endl;
+            ::CloseServiceHandle( schSCManager );
+            return false;
+        }
+
+        SERVICE_STATUS serviceStatus;
+
+        // stop service if its running
+        if ( ::ControlService( schService, SERVICE_CONTROL_STOP, &serviceStatus ) ) {
+            cerr << "Service " << toUtf8String(serviceName) << " is currently running. Stopping service." << endl;
+            while ( ::QueryServiceStatus( schService, &serviceStatus ) ) {
+                if ( serviceStatus.dwCurrentState == SERVICE_STOP_PENDING ) {
+                    Sleep( 1000 );
+                }
+                else { break; }
+            }
+            cerr << "Service stopped." << endl;
+        }
+
+        cerr << "Deleting service " << toUtf8String(serviceName) << "." << endl;
+        bool serviceRemoved = ::DeleteService( schService );
+
+        ::CloseServiceHandle( schService );
+        ::CloseServiceHandle( schSCManager );
+
+        if (serviceRemoved) {
+            cerr << "Service deleted successfully." << endl;
+        }
+        else {
+            cerr << "Failed to delete service." << endl;
+        }
+
+        return serviceRemoved;
     }
-    
+
     bool ServiceController::startService( const std::wstring& serviceName, ServiceCallback startService ) {
         _serviceName = serviceName;
-		_serviceCallback = startService;
-	
+        _serviceCallback = startService;
+
         SERVICE_TABLE_ENTRY dispTable[] = {
-			{ (LPTSTR)serviceName.c_str(), (LPSERVICE_MAIN_FUNCTION)ServiceController::initService },
-			{ NULL, NULL }
-		};
+            { (LPTSTR)serviceName.c_str(), (LPSERVICE_MAIN_FUNCTION)ServiceController::initService },
+            { NULL, NULL }
+        };
 
-		return StartServiceCtrlDispatcher( dispTable );
+        return StartServiceCtrlDispatcher( dispTable );
     }
-    
+
     bool ServiceController::reportStatus( DWORD reportState, DWORD waitHint ) {
-		if ( _statusHandle == NULL )
-			return false;
-
-		static DWORD checkPoint = 1;
-		
-		SERVICE_STATUS ssStatus;
-
-		ssStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS;
-		ssStatus.dwServiceSpecificExitCode = 0;
-		ssStatus.dwControlsAccepted = reportState == SERVICE_START_PENDING ? 0 : SERVICE_ACCEPT_STOP;
-		ssStatus.dwCurrentState = reportState;
-		ssStatus.dwWin32ExitCode = NO_ERROR;
-		ssStatus.dwWaitHint = waitHint;
-		ssStatus.dwCheckPoint = ( reportState == SERVICE_RUNNING || reportState == SERVICE_STOPPED ) ? 0 : checkPoint++;
-
-		return SetServiceStatus( _statusHandle, &ssStatus );
-	}
-    
+        if ( _statusHandle == NULL )
+            return false;
+
+        static DWORD checkPoint = 1;
+
+        SERVICE_STATUS ssStatus;
+
+        ssStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS;
+        ssStatus.dwServiceSpecificExitCode = 0;
+        ssStatus.dwControlsAccepted = reportState == SERVICE_START_PENDING ? 0 : SERVICE_ACCEPT_STOP;
+        ssStatus.dwCurrentState = reportState;
+        ssStatus.dwWin32ExitCode = NO_ERROR;
+        ssStatus.dwWaitHint = waitHint;
+        ssStatus.dwCheckPoint = ( reportState == SERVICE_RUNNING || reportState == SERVICE_STOPPED ) ? 0 : checkPoint++;
+
+        return SetServiceStatus( _statusHandle, &ssStatus );
+    }
+
     void WINAPI ServiceController::initService( DWORD argc, LPTSTR *argv ) {
-		_statusHandle = RegisterServiceCtrlHandler( _serviceName.c_str(), serviceCtrl );
-		if ( !_statusHandle )
-			return;
-
-		reportStatus( SERVICE_START_PENDING, 1000 );
-		
-		_serviceCallback();
-		
-		reportStatus( SERVICE_STOPPED );
-	}
-	
-	void WINAPI ServiceController::serviceCtrl( DWORD ctrlCode ) {
-		switch ( ctrlCode ) {
-			case SERVICE_CONTROL_STOP:
-			case SERVICE_CONTROL_SHUTDOWN:
-				shutdown();
-				reportStatus( SERVICE_STOPPED );
-				return;
-		}
-	}
+        _statusHandle = RegisterServiceCtrlHandler( _serviceName.c_str(), serviceCtrl );
+        if ( !_statusHandle )
+            return;
+
+        reportStatus( SERVICE_START_PENDING, 1000 );
+
+        _serviceCallback();
+        dbexit( EXIT_CLEAN );
+
+        reportStatus( SERVICE_STOPPED );
+    }
+
+    void WINAPI ServiceController::serviceCtrl( DWORD ctrlCode ) {
+        switch ( ctrlCode ) {
+        case SERVICE_CONTROL_STOP:
+        case SERVICE_CONTROL_SHUTDOWN:
+            shutdownServer();
+            reportStatus( SERVICE_STOPPED );
+            return;
+        }
+    }
 
 } // namespace mongo
 
diff --git a/util/ntservice.h b/util/ntservice.h
index 271e7d7..4958d03 100644
--- a/util/ntservice.h
+++ b/util/ntservice.h
@@ -22,25 +22,26 @@
 
 namespace mongo {
 
-	typedef bool ( *ServiceCallback )( void );
+    typedef bool ( *ServiceCallback )( void );
+    bool serviceParamsCheck( program_options::variables_map& params, const std::string dbpath, int argc, char* argv[] );
 
     class ServiceController {
     public:
         ServiceController();
         virtual ~ServiceController() {}
-        
+
         static bool installService( const std::wstring& serviceName, const std::wstring& displayName, const std::wstring& serviceDesc, const std::wstring& serviceUser, const std::wstring& servicePassword, const std::string dbpath, int argc, char* argv[] );
         static bool removeService( const std::wstring& serviceName );
         static bool startService( const std::wstring& serviceName, ServiceCallback startService );
         static bool reportStatus( DWORD reportState, DWORD waitHint = 0 );
-        
+
         static void WINAPI initService( DWORD argc, LPTSTR *argv );
-		static void WINAPI serviceCtrl( DWORD ctrlCode );
-    
+        static void WINAPI serviceCtrl( DWORD ctrlCode );
+
     protected:
-		static std::wstring _serviceName;
-		static SERVICE_STATUS_HANDLE _statusHandle;
-		static ServiceCallback _serviceCallback;
+        static std::wstring _serviceName;
+        static SERVICE_STATUS_HANDLE _statusHandle;
+        static ServiceCallback _serviceCallback;
     };
 
 } // namespace mongo
diff --git a/util/optime.h b/util/optime.h
index 9214479..7e6be4d 100644
--- a/util/optime.h
+++ b/util/optime.h
@@ -21,9 +21,9 @@
 
 namespace mongo {
     void exitCleanly( ExitCode code );
-    
+
     struct ClockSkewException : public DBException {
-        ClockSkewException() : DBException( "clock skew exception" , 20001 ){}
+        ClockSkewException() : DBException( "clock skew exception" , 20001 ) {}
     };
 
     /* replsets use RSOpTime.
@@ -56,14 +56,17 @@ namespace mongo {
             secs = a;
             i = b;
         }
+        OpTime( const OpTime& other ) { 
+            secs = other.secs;
+            i = other.i;
+        }
         OpTime() {
             secs = 0;
             i = 0;
         }
         static OpTime now() {
             unsigned t = (unsigned) time(0);
-//            DEV assertInWriteLock();
-            if ( t < last.secs ){
+            if ( t < last.secs ) {
                 bool toLog = false;
                 ONCE toLog = true;
                 RARELY toLog = true;
@@ -82,13 +85,13 @@ namespace mongo {
                 return last;
             }
             last = OpTime(t, 1);
-            return last;            
+            return last;
         }
-        
+
         /* We store OpTime's in the database as BSON Date datatype -- we needed some sort of
          64 bit "container" for these values.  While these are not really "Dates", that seems a
          better choice for now than say, Number, which is floating point.  Note the BinData type
-         is perhaps the cleanest choice, lacking a true unsigned64 datatype, but BinData has 5 
+         is perhaps the cleanest choice, lacking a true unsigned64 datatype, but BinData has 5
          bytes of overhead.
          */
         unsigned long long asDate() const {
@@ -97,9 +100,9 @@ namespace mongo {
         long long asLL() const {
             return reinterpret_cast<const long long*>(&i)[0];
         }
-        
+
         bool isNull() const { return secs == 0; }
-        
+
         string toStringLong() const {
             char buf[64];
             time_t_to_String(secs, buf);
@@ -108,13 +111,13 @@ namespace mongo {
             ss << hex << secs << ':' << i;
             return ss.str();
         }
-        
+
         string toStringPretty() const {
             stringstream ss;
             ss << time_t_to_String_short(secs) << ':' << hex << i;
             return ss.str();
         }
-        
+
         string toString() const {
             stringstream ss;
             ss << hex << secs << ':' << i;
@@ -132,10 +135,10 @@ namespace mongo {
                 return secs < r.secs;
             return i < r.i;
         }
-        bool operator<=(const OpTime& r) const { 
+        bool operator<=(const OpTime& r) const {
             return *this < r || *this == r;
         }
-        bool operator>(const OpTime& r) const { 
+        bool operator>(const OpTime& r) const {
             return !(*this <= r);
         }
         bool operator>=(const OpTime& r) const {
@@ -143,5 +146,5 @@ namespace mongo {
         }
     };
 #pragma pack()
-    
+
 } // namespace mongo
diff --git a/util/password.h b/util/password.h
index 18294b2..519f712 100644
--- a/util/password.h
+++ b/util/password.h
@@ -39,7 +39,7 @@ namespace mongo {
             return false;
         }
 
-        void xparse( boost::any& value_store, 
+        void xparse( boost::any& value_store,
                      const std::vector<std::string>& new_tokens ) const {
             if ( !value_store.empty() )
 #if BOOST_VERSION >= 104200
@@ -49,7 +49,7 @@ namespace mongo {
 #endif
             else if ( !new_tokens.empty() )
                 boost::program_options::typed_value<std::string>::xparse
-                    (value_store, new_tokens);
+                (value_store, new_tokens);
             else
                 value_store = std::string();
         }
diff --git a/util/paths.h b/util/paths.h
new file mode 100644
index 0000000..ce0a378
--- /dev/null
+++ b/util/paths.h
@@ -0,0 +1,79 @@
+// @file paths.h
+// file paths and directory handling
+
+/*    Copyright 2010 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+#include "mongoutils/str.h"
+
+using namespace mongoutils;
+
+namespace mongo {
+
+    extern string dbpath;
+
+    /** this is very much like a boost::path.  however, we define a new type to get some type
+        checking.  if you want to say 'my param MUST be a relative path", use this.
+    */
+    struct RelativePath {
+        string _p;
+
+        bool empty() const { return _p.empty(); }
+
+        static RelativePath fromRelativePath(string f) {
+            RelativePath rp;
+            rp._p = f;
+            return rp;
+        }
+
+        /** from a full path */
+        static RelativePath fromFullPath(path f) {
+            path dbp(dbpath); // normalizes / and backslash
+            string fullpath = f.string();
+            string relative = str::after(fullpath, dbp.string());
+            if( relative.empty() ) {
+                log() << "warning file is not under db path? " << fullpath << ' ' << dbp.string() << endl;
+                RelativePath rp;
+                rp._p = fullpath;
+                return rp;
+            }
+            /*uassert(13600,
+                    str::stream() << "file path is not under the db path? " << fullpath << ' ' << dbpath,
+                    relative != fullpath);*/
+            if( str::startsWith(relative, "/") || str::startsWith(relative, "\\") ) {
+                relative.erase(0, 1);
+            }
+            RelativePath rp;
+            rp._p = relative;
+            return rp;
+        }
+
+        string toString() const { return _p; }
+
+        bool operator!=(const RelativePath& r) const { return _p != r._p; }
+        bool operator==(const RelativePath& r) const { return _p == r._p; }
+        bool operator<(const RelativePath& r) const { return _p < r._p; }
+
+        string asFullPath() const {
+            path x(dbpath);
+            x /= _p;
+            return x.string();
+        }
+
+    };
+
+}
diff --git a/util/processinfo.cpp b/util/processinfo.cpp
index 3257b5e..2d5b6e3 100644
--- a/util/processinfo.cpp
+++ b/util/processinfo.cpp
@@ -17,31 +17,46 @@
 
 #include "pch.h"
 #include "processinfo.h"
+#include "mmap.h"
 
 #include <iostream>
 using namespace std;
 
 namespace mongo {
-    
+
     class PidFileWiper {
     public:
-        ~PidFileWiper(){
+        ~PidFileWiper() {
             ofstream out( path.c_str() , ios_base::out );
-            out.close();    
+            out.close();
         }
-        
-        void write( const string& p ){
+
+        void write( const string& p ) {
             path = p;
             ofstream out( path.c_str() , ios_base::out );
             out << getpid() << endl;
             out.close();
         }
-        
+
         string path;
     } pidFileWiper;
-    
-    void writePidFile( const string& path ){
+
+    void writePidFile( const string& path ) {
         pidFileWiper.write( path );
-    }    
+    }
+
+    void printMemInfo( const char * where ) {
+        cout << "mem info: ";
+        if ( where )
+            cout << where << " ";
+        ProcessInfo pi;
+        if ( ! pi.supported() ) {
+            cout << " not supported" << endl;
+            return;
+        }
+
+        cout << "vsize: " << pi.getVirtualMemorySize() << " resident: " << pi.getResidentSize() << " mapped: " << ( MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ) ) << endl;
+    }
+
 
 }
diff --git a/util/processinfo.h b/util/processinfo.h
index 8e20beb..b10e6fe 100644
--- a/util/processinfo.h
+++ b/util/processinfo.h
@@ -30,12 +30,12 @@ int getpid();
 namespace mongo {
 
     class BSONObjBuilder;
-    
+
     class ProcessInfo {
     public:
         ProcessInfo( pid_t pid = getpid() );
         ~ProcessInfo();
-        
+
         /**
          * @return mbytes
          */
@@ -50,7 +50,7 @@ namespace mongo {
          * Append platform-specific data to obj
          */
         void getExtraInfo(BSONObjBuilder& info);
-        
+
         bool supported();
 
         bool blockCheckSupported();
@@ -59,7 +59,9 @@ namespace mongo {
     private:
         pid_t _pid;
     };
-    
+
     void writePidFile( const std::string& path );
-    
+
+    void printMemInfo( const char * where );
+
 }
diff --git a/util/processinfo_darwin.cpp b/util/processinfo_darwin.cpp
index cb54bed..c1190ae 100644
--- a/util/processinfo_darwin.cpp
+++ b/util/processinfo_darwin.cpp
@@ -36,58 +36,58 @@
 using namespace std;
 
 namespace mongo {
-    
-    ProcessInfo::ProcessInfo( pid_t pid ) : _pid( pid ){
+
+    ProcessInfo::ProcessInfo( pid_t pid ) : _pid( pid ) {
     }
 
-    ProcessInfo::~ProcessInfo(){
+    ProcessInfo::~ProcessInfo() {
     }
 
-    bool ProcessInfo::supported(){
+    bool ProcessInfo::supported() {
         return true;
     }
-    
-    int ProcessInfo::getVirtualMemorySize(){
+
+    int ProcessInfo::getVirtualMemorySize() {
         task_t result;
-        
+
         mach_port_t task;
-        
-        if ( ( result = task_for_pid( mach_task_self() , _pid , &task) ) != KERN_SUCCESS ){
+
+        if ( ( result = task_for_pid( mach_task_self() , _pid , &task) ) != KERN_SUCCESS ) {
             cout << "error getting task\n";
             return 0;
         }
-        
+
 #if !defined(__LP64__)
         task_basic_info_32 ti;
 #else
         task_basic_info_64 ti;
 #endif
         mach_msg_type_number_t  count = TASK_BASIC_INFO_COUNT;
-        if ( ( result = task_info( task , TASK_BASIC_INFO , (task_info_t)&ti, &count ) )  != KERN_SUCCESS ){
+        if ( ( result = task_info( task , TASK_BASIC_INFO , (task_info_t)&ti, &count ) )  != KERN_SUCCESS ) {
             cout << "error getting task_info: " << result << endl;
             return 0;
         }
         return (int)((double)ti.virtual_size / (1024.0 * 1024 ) );
     }
-    
-    int ProcessInfo::getResidentSize(){
+
+    int ProcessInfo::getResidentSize() {
         task_t result;
-        
+
         mach_port_t task;
-        
-        if ( ( result = task_for_pid( mach_task_self() , _pid , &task) ) != KERN_SUCCESS ){
+
+        if ( ( result = task_for_pid( mach_task_self() , _pid , &task) ) != KERN_SUCCESS ) {
             cout << "error getting task\n";
             return 0;
         }
-        
-        
+
+
 #if !defined(__LP64__)
         task_basic_info_32 ti;
 #else
         task_basic_info_64 ti;
 #endif
         mach_msg_type_number_t  count = TASK_BASIC_INFO_COUNT;
-        if ( ( result = task_info( task , TASK_BASIC_INFO , (task_info_t)&ti, &count ) )  != KERN_SUCCESS ){
+        if ( ( result = task_info( task , TASK_BASIC_INFO , (task_info_t)&ti, &count ) )  != KERN_SUCCESS ) {
             cout << "error getting task_info: " << result << endl;
             return 0;
         }
@@ -96,18 +96,18 @@ namespace mongo {
 
     void ProcessInfo::getExtraInfo(BSONObjBuilder& info) {}
 
-    bool ProcessInfo::blockCheckSupported(){
+    bool ProcessInfo::blockCheckSupported() {
         return true;
     }
-    
-    bool ProcessInfo::blockInMemory( char * start ){
+
+    bool ProcessInfo::blockInMemory( char * start ) {
         static long pageSize = 0;
-        if ( pageSize == 0 ){
+        if ( pageSize == 0 ) {
             pageSize = sysconf( _SC_PAGESIZE );
         }
         start = start - ( (unsigned long long)start % pageSize );
         char x = 0;
-        if ( mincore( start , 128 , &x ) ){
+        if ( mincore( start , 128 , &x ) ) {
             log() << "mincore failed: " << errnoWithDescription() << endl;
             return 1;
         }
diff --git a/util/processinfo_linux2.cpp b/util/processinfo_linux2.cpp
index 02a7ad0..e82e2d1 100644
--- a/util/processinfo_linux2.cpp
+++ b/util/processinfo_linux2.cpp
@@ -30,188 +30,189 @@ using namespace std;
 #define KLF "l"
 
 namespace mongo {
-    
+
     class LinuxProc {
     public:
-        LinuxProc( pid_t pid = getpid() ){
+        LinuxProc( pid_t pid = getpid() ) {
             char name[128];
             sprintf( name , "/proc/%d/stat"  , pid );
-  
+
             FILE * f = fopen( name , "r");
-            if ( ! f ){
+            if ( ! f ) {
                 stringstream ss;
                 ss << "couldn't open [" << name << "] " << errnoWithDescription();
                 string s = ss.str();
-                msgassertedNoTrace( 13276 , s.c_str() );
+                // help the assert# control uasserted( 13538 , s.c_str() );
+                msgassertedNoTrace( 13538 , s.c_str() );
             }
             int found = fscanf(f,
-                   "%d %s %c "
-                   "%d %d %d %d %d "
-                   "%lu %lu %lu %lu %lu "
-                   "%lu %lu %ld %ld "  /* utime stime cutime cstime */
-                   "%ld %ld "
-                   "%ld "
-                   "%ld "
-                   "%lu "  /* start_time */
-                   "%lu "
-                   "%ld " // rss
-                   "%lu %"KLF"u %"KLF"u %"KLF"u %"KLF"u %"KLF"u "
-                   /*
-                     "%*s %*s %*s %*s " 
-                     "%"KLF"u %*lu %*lu "
-                     "%d %d "
-                     "%lu %lu"
-                   */
-
-                   ,
-
-                   &_pid,
-                   _comm,
-                   &_state,
-                   &_ppid, &_pgrp, &_session, &_tty, &_tpgid,
-                   &_flags, &_min_flt, &_cmin_flt, &_maj_flt, &_cmaj_flt,
-                   &_utime, &_stime, &_cutime, &_cstime,
-                   &_priority, &_nice,
-                   &_alarm,
-                   &_nlwp,
-                   &_start_time,
-                   &_vsize,
-                   &_rss,
-                   &_rss_rlim, &_start_code, &_end_code, &_start_stack, &_kstk_esp, &_kstk_eip
-
-                   /*
-                     &_wchan, 
-                     &_exit_signal, &_processor,  
-                     &_rtprio, &_sched  
-                   */
-                   );
-            if ( found == 0 ){
+                               "%d %s %c "
+                               "%d %d %d %d %d "
+                               "%lu %lu %lu %lu %lu "
+                               "%lu %lu %ld %ld "  /* utime stime cutime cstime */
+                               "%ld %ld "
+                               "%ld "
+                               "%ld "
+                               "%lu "  /* start_time */
+                               "%lu "
+                               "%ld " // rss
+                               "%lu %"KLF"u %"KLF"u %"KLF"u %"KLF"u %"KLF"u "
+                               /*
+                                 "%*s %*s %*s %*s "
+                                 "%"KLF"u %*lu %*lu "
+                                 "%d %d "
+                                 "%lu %lu"
+                               */
+
+                               ,
+
+                               &_pid,
+                               _comm,
+                               &_state,
+                               &_ppid, &_pgrp, &_session, &_tty, &_tpgid,
+                               &_flags, &_min_flt, &_cmin_flt, &_maj_flt, &_cmaj_flt,
+                               &_utime, &_stime, &_cutime, &_cstime,
+                               &_priority, &_nice,
+                               &_alarm,
+                               &_nlwp,
+                               &_start_time,
+                               &_vsize,
+                               &_rss,
+                               &_rss_rlim, &_start_code, &_end_code, &_start_stack, &_kstk_esp, &_kstk_eip
+
+                               /*
+                                 &_wchan,
+                                 &_exit_signal, &_processor,
+                                 &_rtprio, &_sched
+                               */
+                              );
+            if ( found == 0 ) {
                 cout << "system error: reading proc info" << endl;
             }
             fclose( f );
         }
-        
-        unsigned long getVirtualMemorySize(){
+
+        unsigned long getVirtualMemorySize() {
             return _vsize;
         }
-        
-        unsigned long getResidentSize(){
+
+        unsigned long getResidentSize() {
             return (unsigned long)_rss * 4 * 1024;
         }
-        
-        int _pid; 
+
+        int _pid;
         // The process ID.
-        
-        char _comm[128]; 
+
+        char _comm[128];
         // The filename of the executable, in parentheses.  This is visible whether or not the executable is swapped out.
-        
+
         char _state;
         //One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting  in  uninterruptible
         //  disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging.
-        
+
         int _ppid;
         // The PID of the parent.
-        
+
         int _pgrp;
         // The process group ID of the process.
-        
+
         int _session;
         // The session ID of the process.
-        
+
         int _tty;
         // The tty the process uses.
-        
+
         int _tpgid;
         // The process group ID of the process which currently owns the tty that the process is connected to.
-        
+
         unsigned long _flags; // %lu
         // The  kernel flags word of the process. For bit meanings, see the PF_* defines in <linux/sched.h>.  Details depend on the kernel version.
-        
+
         unsigned long _min_flt; // %lu
         // The number of minor faults the process has made which have not required loading a memory page from disk.
-        
+
         unsigned long _cmin_flt; // %lu
         // The number of minor faults that the process
-        
+
         unsigned long _maj_flt; // %lu
         // The number of major faults the process has made which have required loading a memory page from disk.
-        
+
         unsigned long _cmaj_flt; // %lu
         // The number of major faults that the process
-        
+
         unsigned long _utime; // %lu
         // The number of jiffies that this process has been scheduled in user mode.
-        
+
         unsigned long _stime; //  %lu
         // The number of jiffies that this process has been scheduled in kernel mode.
-        
+
         long _cutime; // %ld
         // The number of jiffies that this removed field.
-        
+
         long _cstime; // %ld
-        
+
         long _priority;
         long _nice;
-        
+
         long _nlwp; // %ld
         // The time in jiffies before the next SIGALRM is sent to the process due to an interval timer.
-        
+
         unsigned long _alarm;
-        
+
         unsigned long _start_time; // %lu
         // The time in jiffies the process started after system boot.
-        
+
         unsigned long _vsize; // %lu
         // Virtual memory size in bytes.
-        
+
         long _rss; // %ld
         // Resident Set Size: number of pages the process has in real memory, minus 3 for administrative purposes. This is just the pages which
         // count  towards  text,  data, or stack space.  This does not include pages which have not been demand-loaded in, or which are swapped out
-        
+
         unsigned long _rss_rlim; // %lu
         // Current limit in bytes on the rss of the process (usually 4294967295 on i386).
-        
+
         unsigned long _start_code; // %lu
         // The address above which program text can run.
-        
+
         unsigned long _end_code; // %lu
         // The address below which program text can run.
-        
+
         unsigned long _start_stack; // %lu
         // The address of the start of the stack.
-        
+
         unsigned long _kstk_esp; // %lu
         // The current value of esp (stack pointer), as found in the kernel stack page for the process.
-        
+
         unsigned long _kstk_eip; // %lu
         // The current EIP (instruction pointer).
-        
-  
-  
+
+
+
     };
 
 
-    ProcessInfo::ProcessInfo( pid_t pid ) : _pid( pid ){
+    ProcessInfo::ProcessInfo( pid_t pid ) : _pid( pid ) {
     }
 
-    ProcessInfo::~ProcessInfo(){
+    ProcessInfo::~ProcessInfo() {
     }
 
-    bool ProcessInfo::supported(){
+    bool ProcessInfo::supported() {
         return true;
     }
-    
-    int ProcessInfo::getVirtualMemorySize(){
+
+    int ProcessInfo::getVirtualMemorySize() {
         LinuxProc p(_pid);
         return (int)( p.getVirtualMemorySize() / ( 1024.0 * 1024 ) );
     }
-    
-    int ProcessInfo::getResidentSize(){
+
+    int ProcessInfo::getResidentSize() {
         LinuxProc p(_pid);
         return (int)( p.getResidentSize() / ( 1024.0 * 1024 ) );
     }
 
-    void ProcessInfo::getExtraInfo(BSONObjBuilder& info){
+    void ProcessInfo::getExtraInfo(BSONObjBuilder& info) {
         struct mallinfo malloc_info = mallinfo(); // structure has same name as function that returns it. (see malloc.h)
         info.append("heap_usage_bytes", malloc_info.uordblks);
 
@@ -219,18 +220,18 @@ namespace mongo {
         info.append("page_faults", (int)p._maj_flt);
     }
 
-    bool ProcessInfo::blockCheckSupported(){
+    bool ProcessInfo::blockCheckSupported() {
         return true;
     }
-    
-    bool ProcessInfo::blockInMemory( char * start ){
+
+    bool ProcessInfo::blockInMemory( char * start ) {
         static long pageSize = 0;
-        if ( pageSize == 0 ){
+        if ( pageSize == 0 ) {
             pageSize = sysconf( _SC_PAGESIZE );
         }
         start = start - ( (unsigned long long)start % pageSize );
         unsigned char x = 0;
-        if ( mincore( start , 128 , &x ) ){
+        if ( mincore( start , 128 , &x ) ) {
             log() << "mincore failed: " << errnoWithDescription() << endl;
             return 1;
         }
diff --git a/util/processinfo_none.cpp b/util/processinfo_none.cpp
index b54cb13..7d1e84d 100644
--- a/util/processinfo_none.cpp
+++ b/util/processinfo_none.cpp
@@ -22,32 +22,32 @@
 using namespace std;
 
 namespace mongo {
-    
-    ProcessInfo::ProcessInfo( pid_t pid ){
+
+    ProcessInfo::ProcessInfo( pid_t pid ) {
     }
 
-    ProcessInfo::~ProcessInfo(){
+    ProcessInfo::~ProcessInfo() {
     }
 
-    bool ProcessInfo::supported(){
+    bool ProcessInfo::supported() {
         return false;
     }
-    
-    int ProcessInfo::getVirtualMemorySize(){
+
+    int ProcessInfo::getVirtualMemorySize() {
         return -1;
     }
-    
-    int ProcessInfo::getResidentSize(){
+
+    int ProcessInfo::getResidentSize() {
         return -1;
     }
 
     void ProcessInfo::getExtraInfo(BSONObjBuilder& info) {}
-    
-    bool ProcessInfo::blockCheckSupported(){
+
+    bool ProcessInfo::blockCheckSupported() {
         return false;
     }
-    
-    bool ProcessInfo::blockInMemory( char * start ){
+
+    bool ProcessInfo::blockInMemory( char * start ) {
         assert(0);
         return true;
     }
diff --git a/util/processinfo_win32.cpp b/util/processinfo_win32.cpp
index 5fc6ab5..d62b21b 100644
--- a/util/processinfo_win32.cpp
+++ b/util/processinfo_win32.cpp
@@ -25,27 +25,27 @@
 
 using namespace std;
 
-int getpid(){
+int getpid() {
     return GetCurrentProcessId();
 }
 
 namespace mongo {
-    
-    int _wconvertmtos( SIZE_T s ){
+
+    int _wconvertmtos( SIZE_T s ) {
         return (int)( s / ( 1024 * 1024 ) );
     }
-    
-    ProcessInfo::ProcessInfo( pid_t pid ){
+
+    ProcessInfo::ProcessInfo( pid_t pid ) {
     }
 
-    ProcessInfo::~ProcessInfo(){
+    ProcessInfo::~ProcessInfo() {
     }
 
-    bool ProcessInfo::supported(){
+    bool ProcessInfo::supported() {
         return true;
     }
-    
-    int ProcessInfo::getVirtualMemorySize(){
+
+    int ProcessInfo::getVirtualMemorySize() {
         MEMORYSTATUSEX mse;
         mse.dwLength = sizeof(mse);
         assert( GlobalMemoryStatusEx( &mse ) );
@@ -53,8 +53,8 @@ namespace mongo {
         assert( x <= 0x7fffffff );
         return (int) x;
     }
-    
-    int ProcessInfo::getResidentSize(){
+
+    int ProcessInfo::getResidentSize() {
         PROCESS_MEMORY_COUNTERS pmc;
         assert( GetProcessMemoryInfo( GetCurrentProcess() , &pmc, sizeof(pmc) ) );
         return _wconvertmtos( pmc.WorkingSetSize );
@@ -62,11 +62,11 @@ namespace mongo {
 
     void ProcessInfo::getExtraInfo(BSONObjBuilder& info) {}
 
-    bool ProcessInfo::blockCheckSupported(){
+    bool ProcessInfo::blockCheckSupported() {
         return false;
     }
-    
-    bool ProcessInfo::blockInMemory( char * start ){
+
+    bool ProcessInfo::blockInMemory( char * start ) {
         assert(0);
         return true;
     }
diff --git a/util/queue.h b/util/queue.h
index 35e02a8..6a1e33a 100644
--- a/util/queue.h
+++ b/util/queue.h
@@ -18,12 +18,13 @@
 #pragma once
 
 #include "../pch.h"
-#include "../util/goodies.h"
 
 #include <queue>
 
+#include "../util/timer.h"
+
 namespace mongo {
-    
+
     /**
      * simple blocking queue
      */
@@ -31,42 +32,67 @@ namespace mongo {
     public:
         BlockingQueue() : _lock("BlockingQueue") { }
 
-        void push(T const& t){
+        void push(T const& t) {
             scoped_lock l( _lock );
             _queue.push( t );
             _condition.notify_one();
         }
-        
+
         bool empty() const {
             scoped_lock l( _lock );
             return _queue.empty();
         }
-        
-        bool tryPop( T & t ){
+
+        bool tryPop( T & t ) {
             scoped_lock l( _lock );
             if ( _queue.empty() )
                 return false;
-            
+
             t = _queue.front();
             _queue.pop();
-            
+
             return true;
         }
-        
-        T blockingPop(){
+
+        T blockingPop() {
 
             scoped_lock l( _lock );
             while( _queue.empty() )
                 _condition.wait( l.boost() );
-            
+
             T t = _queue.front();
             _queue.pop();
-            return t;    
+            return t;
         }
-        
+
+
+        /**
+         * blocks waiting for an object until maxSecondsToWait passes
+         * if got one, return true and set in t
+         * otherwise return false and t won't be changed
+         */
+        bool blockingPop( T& t , int maxSecondsToWait ) {
+
+            Timer timer;
+
+            boost::xtime xt;
+            boost::xtime_get(&xt, boost::TIME_UTC);
+            xt.sec += maxSecondsToWait;
+
+            scoped_lock l( _lock );
+            while( _queue.empty() ) {
+                if ( ! _condition.timed_wait( l.boost() , xt ) )
+                    return false;
+            }
+
+            t = _queue.front();
+            _queue.pop();
+            return true;
+        }
+
     private:
         std::queue<T> _queue;
-        
+
         mutable mongo::mutex _lock;
         boost::condition _condition;
     };
diff --git a/util/ramlog.h b/util/ramlog.h
index 393527d..fc588e6 100644
--- a/util/ramlog.h
+++ b/util/ramlog.h
@@ -23,7 +23,7 @@
 namespace mongo {
 
     class RamLog : public Tee {
-        enum { 
+        enum {
             N = 128,
             C = 256
         };
@@ -31,7 +31,7 @@ namespace mongo {
         unsigned h, n;
 
     public:
-        RamLog() { 
+        RamLog() {
             h = 0; n = 0;
             for( int i = 0; i < N; i++ )
                 lines[i][C-1] = 0;
@@ -52,7 +52,7 @@ namespace mongo {
                 v.push_back(lines[i]);
         }
 
-        static int repeats(const vector<const char *>& v, int i) { 
+        static int repeats(const vector<const char *>& v, int i) {
             for( int j = i-1; j >= 0 && j+8 > i; j-- ) {
                 if( strcmp(v[i]+20,v[j]+20) == 0 ) {
                     for( int x = 1; ; x++ ) {
@@ -67,14 +67,14 @@ namespace mongo {
         }
 
 
-        static string clean(const vector<const char *>& v, int i, string line="") { 
+        static string clean(const vector<const char *>& v, int i, string line="") {
             if( line.empty() ) line = v[i];
             if( i > 0 && strncmp(v[i], v[i-1], 11) == 0 )
                 return string("           ") + line.substr(11);
             return v[i];
         }
 
-        static string color(string line) { 
+        static string color(string line) {
             string s = str::after(line, "replSet ");
             if( str::startsWith(s, "warning") || startsWith(s, "error") )
                 return html::red(line);
@@ -85,16 +85,16 @@ namespace mongo {
                     return html::yellow(line);
                 return line; //html::blue(line);
             }
-            
+
             return line;
         }
 
         /* turn http:... into an anchor */
-        string linkify(const char *s) { 
+        string linkify(const char *s) {
             const char *p = s;
             const char *h = strstr(p, "http://");
             if( h == 0 ) return s;
-	  
+
             const char *sp = h + 7;
             while( *sp && *sp != ' ' ) sp++;
 
@@ -115,15 +115,15 @@ namespace mongo {
                 int r = repeats(v, i);
                 if( r < 0 ) {
                     s << color( linkify( clean(v,i).c_str() ) );
-                } 
+                }
                 else {
                     stringstream x;
                     x << string(v[i], 0, 20);
                     int nr = (i-r);
                     int last = i+nr-1;
                     for( ; r < i ; r++ ) x << '.';
-                    if( 1 ) { 
-                        stringstream r; 
+                    if( 1 ) {
+                        stringstream r;
                         if( nr == 1 ) r << "repeat last line";
                         else r << "repeats last " << nr << " lines; ends " << string(v[last]+4,0,15);
                         first = false; s << html::a("", r.str(), clean(v,i,x.str()));
@@ -135,7 +135,7 @@ namespace mongo {
             }
             s << "</pre>\n";
         }
-        
+
 
     };
 
diff --git a/util/ramstore.cpp b/util/ramstore.cpp
deleted file mode 100644
index 0bdf2e2..0000000
--- a/util/ramstore.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
-*    Copyright (C) 2008 10gen Inc.info
-*
-*    This program is free software: you can redistribute it and/or  modify
-*    it under the terms of the GNU Affero General Public License, version 3,
-*    as published by the Free Software Foundation.
-*
-*    This program is distributed in the hope that it will be useful,
-*    but WITHOUT ANY WARRANTY; without even the implied warranty of
-*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-*    GNU Affero General Public License for more details.
-*
-*    You should have received a copy of the GNU Affero General Public License
-*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "pch.h"
-#include "mmap.h"
-
-namespace mongo {
-
-	//extern bool checkNsFilesOnLoad;
-
-static set<RamStoreFile*> files;
-
-void RamStoreFile::grow(int offset, int len) {
-	cout << "GROW ofs:" << offset << " len:" << len;
-	assert( len > 0 );
-	Node& n = _m[offset];
-	cout << " oldlen:" << n.len << endl;
-	assert( n.len > 0 );
-	if( len > n.len ) { 
-		n.p = (char *) realloc(n.p, len);
-		memset(((char *)n.p) + n.len, 0, len - n.len);
-		n.len = len;
-	}
-}
-
-/* maxLen can be -1 for existing data */
-void* RamStoreFile::at(int offset, int maxLen) {
-    if( offset != _last ) {
-        if( _m.count(_last) ) {
-            _m[_last].check();
-            if( !(offset < _last || offset >= _last + _m[_last].len) ) {
-                cout << offset << ' ' << _last << ' ' << _m[_last].len << endl;
-                assert(false);
-            }
-        }
-    }
-    _last = offset;
-
-    Node& n = _m[offset];
-    if( n.len == 0 ) { 
-        // create
-        if( strstr(name, ".ns") == 0 )
-                cout << "CREATE " << name << " ofs:" << offset << " len:" << maxLen << endl;
-            assert( maxLen >= 0 );
-            n.p = (char *) calloc(maxLen+1, 1);
-            n.len = maxLen;
-        }
-        assert( n.len >= maxLen );
-        n.check();
-        return n.p;
-    }
-
-void RamStoreFile::Node::check() { 
-    assert( p[len] == 0 );
-}
-
-void RamStoreFile::check() { 
-    for( std::map<int,Node>::iterator i = _m.begin(); i != _m.end(); i++ ) { 
-        i->second.check();
-    }
-}
-
-void RamStoreFile::validate() {
-    for( set<RamStoreFile*>::iterator i = files.begin(); i != files.end(); i++ ) { 
-        (*i)->check();
-    }
-}
-
-RamStoreFile::~RamStoreFile() { 
-    check();
-    files.erase(this);
-}
-
-RamStoreFile::RamStoreFile() : _len(0) { 
-	//    checkNsFilesOnLoad = false;
-    files.insert(this);
-}
-
-}
-
diff --git a/util/ramstore.h b/util/ramstore.h
deleted file mode 100644
index f75a57a..0000000
--- a/util/ramstore.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// ramstore.h
-
-// mmap.h
-
-/*    Copyright 2009 10gen Inc.
- *
- *    Licensed under the Apache License, Version 2.0 (the "License");
- *    you may not use this file except in compliance with the License.
- *    You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- *    Unless required by applicable law or agreed to in writing, software
- *    distributed under the License is distributed on an "AS IS" BASIS,
- *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *    See the License for the specific language governing permissions and
- *    limitations under the License.
- */
-
-extern bool checkNsFilesOnLoad;
-
-class RamStoreFile : public MongoFile {
-    char name[256];
-    struct Node { 
-        char *p;
-        int len;
-        Node() : len(0) { }
-        void check();
-    };
-    std::map<int,Node> _m;
-    long _len;
-
-    static void validate();
-    void check();
-
-    int _last;
-
-	void grow(int offset, int len);
-
-    /* maxLen can be -1 for existing data */
-    void* at(int offset, int maxLen);
-
-protected:
-    virtual void close() { 
-        cout << "ramstore dealloc not yet implemented" << endl;
-        if( _len ) {
-            _len = 0;
-        }
-    }
-    virtual void flush(bool sync) { }
-
-public:
-    ~RamStoreFile();
-    RamStoreFile();
-
-    virtual long length() { return _len; }
-
-    class Pointer {
-        RamStoreFile* _f;
-        friend class RamStoreFile;
-    public:
-        void* at(int offset, int len) { 
-            assert( len <= /*MaxBSONObjectSize*/4*1024*1024 + 128 );
-            return _f->at(offset,len);
-        }
-		void grow(int offset, int len) {
-            assert( len <= /*MaxBSONObjectSize*/4*1024*1024 + 128 );
-            _f->grow(offset,len);
-		}
-        bool isNull() const { return _f == 0; }
-    };
-
-    Pointer map( const char *filename ) { 
-        assert(false); return Pointer(); 
-    }
-    Pointer map(const char *_filename, long &length, int options=0) { 
-        strncpy(name, _filename, sizeof(name)-1);
-        Pointer p;
-        p._f = this;
-        return p;
-    }
-
-    static bool exists(boost::filesystem::path p) {
-        return false;
-    }
-};
diff --git a/util/signal_handlers.cpp b/util/signal_handlers.cpp
new file mode 100644
index 0000000..0e9ec7a
--- /dev/null
+++ b/util/signal_handlers.cpp
@@ -0,0 +1,122 @@
+// signal_handlers.cpp
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+
+#if !defined(_WIN32)  // TODO: windows support
+#include <unistd.h>
+#endif
+
+#if !defined(_WIN32) && !defined(NOEXECINFO)
+#include <execinfo.h>
+#endif
+
+#include "log.h"
+#include "signal_handlers.h"
+
+namespace mongo {
+
+    /*
+     * WARNING: PLEASE READ BEFORE CHANGING THIS MODULE
+     *
+     * All code in this module should be singal-friendly. Before adding any system
+     * call or other dependency, please make sure the latter still holds.
+     *
+     */
+
+    static int rawWrite( int fd , char* c , int size ) {
+#if !defined(_WIN32)
+
+        int toWrite = size;
+        int writePos = 0;
+        int wrote;
+        while ( toWrite > 0 ) {
+            wrote = write( fd , &c[writePos] , toWrite );
+            if ( wrote < 1 ) break;
+            toWrite -= wrote;
+            writePos += wrote;
+        }
+        return writePos;
+
+#else
+
+        return -1;
+
+#endif
+    }
+
+    static int formattedWrite( int fd , const char* format, ... ) {
+        const int MAX_ENTRY = 256;
+        static char entryBuf[MAX_ENTRY];
+
+        va_list ap;
+        va_start( ap , format );
+        int entrySize = vsnprintf( entryBuf , MAX_ENTRY-1 , format , ap );
+        if ( entrySize < 0 ) {
+            return -1;
+        }
+
+        if ( rawWrite( fd , entryBuf , entrySize ) < 0 ) {
+            return -1;
+        }
+
+        return 0;
+    }
+
+    static void formattedBacktrace( int fd ) {
+
+#if !defined(_WIN32) && !defined(NOEXECINFO)
+
+        int numFrames;
+        const int MAX_DEPTH = 20;
+        void* stackFrames[MAX_DEPTH];
+
+        numFrames = backtrace( stackFrames , 20 );
+        for ( int i = 0; i < numFrames; i++ ) {
+            formattedWrite( fd , "%p " , stackFrames[i] );
+        }
+        formattedWrite( fd , "\n" );
+
+        backtrace_symbols_fd( stackFrames , numFrames , fd );
+
+#else
+
+        formattedWrite( fd, "backtracing not implemented for this platform yet\n" );
+
+#endif
+
+    }
+
+    void printStackAndExit( int signalNum ) {
+        int fd = Logstream::getLogDesc();
+
+        if ( fd >= 0 ) {
+            formattedWrite( fd , "Received signal %d\n" , signalNum );
+            formattedWrite( fd , "Backtrace: " );
+            formattedBacktrace( fd );
+            formattedWrite( fd , "===\n" );
+        }
+
+        ::exit( EXIT_ABRUPT );
+    }
+
+} // namespace mongo
diff --git a/util/signal_handlers.h b/util/signal_handlers.h
new file mode 100644
index 0000000..9d3a735
--- /dev/null
+++ b/util/signal_handlers.h
@@ -0,0 +1,34 @@
+// signal_handlers.h
+
+/**
+*    Copyright (C) 2010 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "../pch.h"
+
+namespace mongo {
+
+    /**
+     * Obtains the log file handler and writes the current thread's stack trace to
+     * it. This call issues an exit(). The function can safely be called from within a
+     * signal handler.
+     *
+     * @param signal that this hadler is called for
+     */
+    void printStackAndExit( int signalNum );
+
+} // namespace mongo
diff --git a/util/sock.cpp b/util/sock.cpp
index 3b97c4b..ef3ed0e 100644
--- a/util/sock.cpp
+++ b/util/sock.cpp
@@ -1,4 +1,4 @@
-// sock.cpp
+// @file sock.cpp
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -26,6 +26,15 @@ namespace mongo {
     void enableIPv6(bool state) { ipv6 = state; }
     bool IPv6Enabled() { return ipv6; }
 
+    string getAddrInfoStrError(int code) {
+#if !defined(_WIN32)
+        return gai_strerror(code);
+#else
+        /* gai_strerrorA is not threadsafe on windows. don't use it. */
+        return errnoWithDescription(code);
+#endif
+    }
+
     SockAddr::SockAddr(int sourcePort) {
         memset(as<sockaddr_in>().sin_zero, 0, sizeof(as<sockaddr_in>().sin_zero));
         as<sockaddr_in>().sin_family = AF_INET;
@@ -38,7 +47,7 @@ namespace mongo {
         if (!strcmp(iporhost, "localhost"))
             iporhost = "127.0.0.1";
 
-        if (strchr(iporhost, '/')){
+        if (strchr(iporhost, '/')) {
 #ifdef _WIN32
             uassert(13080, "no unix socket support on windows", false);
 #endif
@@ -46,21 +55,37 @@ namespace mongo {
             as<sockaddr_un>().sun_family = AF_UNIX;
             strcpy(as<sockaddr_un>().sun_path, iporhost);
             addressSize = sizeof(sockaddr_un);
-        }else{
+        }
+        else {
             addrinfo* addrs = NULL;
             addrinfo hints;
             memset(&hints, 0, sizeof(addrinfo));
             hints.ai_socktype = SOCK_STREAM;
             //hints.ai_flags = AI_ADDRCONFIG; // This is often recommended but don't do it. SERVER-1579
+            hints.ai_flags |= AI_NUMERICHOST; // first pass tries w/o DNS lookup
             hints.ai_family = (IPv6Enabled() ? AF_UNSPEC : AF_INET);
 
             stringstream ss;
             ss << port;
             int ret = getaddrinfo(iporhost, ss.str().c_str(), &hints, &addrs);
-            if (ret){
+
+            // old C compilers on IPv6-capable hosts return EAI_NODATA error
+#ifdef EAI_NODATA
+            int nodata = (ret == EAI_NODATA);
+#else
+            int nodata = false;
+#endif
+            if (ret == EAI_NONAME || nodata) {
+                // iporhost isn't an IP address, allow DNS lookup
+                hints.ai_flags &= ~AI_NUMERICHOST;
+                ret = getaddrinfo(iporhost, ss.str().c_str(), &hints, &addrs);
+            }
+
+            if (ret) {
                 log() << "getaddrinfo(\"" << iporhost << "\") failed: " << gai_strerror(ret) << endl;
-                *this = SockAddr(port); 
-            }else{
+                *this = SockAddr(port);
+            }
+            else {
                 //TODO: handle other addresses in linked list;
                 assert(addrs->ai_addrlen <= sizeof(sa));
                 memcpy(&sa, addrs->ai_addr, addrs->ai_addrlen);
@@ -69,13 +94,13 @@ namespace mongo {
             }
         }
     }
- 
+
     bool SockAddr::isLocalHost() const {
-        switch (getType()){
-            case AF_INET: return getAddr() == "127.0.0.1";
-            case AF_INET6: return getAddr() == "::1";
-            case AF_UNIX: return true;
-            default: return false;
+        switch (getType()) {
+        case AF_INET: return getAddr() == "127.0.0.1";
+        case AF_INET6: return getAddr() == "::1";
+        case AF_UNIX: return true;
+        default: return false;
         }
         assert(false);
         return false;
@@ -191,19 +216,20 @@ namespace mongo {
     SockAddr unknownAddress( "0.0.0.0", 0 );
 
     ListeningSockets* ListeningSockets::_instance = new ListeningSockets();
-    
-    ListeningSockets* ListeningSockets::get(){
+
+    ListeningSockets* ListeningSockets::get() {
         return _instance;
     }
 
-    
-    string getHostNameCached(){
-        static string host;
-        if ( host.empty() ){
-            string s = getHostName();
-            host = s;
-        }
-        return host;
+    string _hostNameCached;
+    static void _hostNameCachedInit() {
+        _hostNameCached = getHostName();
+    }
+    boost::once_flag _hostNameCachedInitFlags = BOOST_ONCE_INIT;
+
+    string getHostNameCached() {
+        boost::call_once( _hostNameCachedInit , _hostNameCachedInitFlags );
+        return _hostNameCached;
     }
 
 } // namespace mongo
diff --git a/util/sock.h b/util/sock.h
index 897be8a..84690fe 100644
--- a/util/sock.h
+++ b/util/sock.h
@@ -1,4 +1,4 @@
-// sock.h
+// @file sock.h
 
 /*    Copyright 2009 10gen Inc.
  *
@@ -23,21 +23,18 @@
 #include <sstream>
 #include "goodies.h"
 #include "../db/jsobj.h"
+#include "../db/cmdline.h"
 
 namespace mongo {
 
     const int SOCK_FAMILY_UNKNOWN_ERROR=13078;
+    string getAddrInfoStrError(int code);
 
 #if defined(_WIN32)
 
     typedef short sa_family_t;
     typedef int socklen_t;
-    inline int getLastError() {
-        return WSAGetLastError();
-    }
-    inline const char* gai_strerror(int code) {
-        return ::gai_strerrorA(code);
-    }
+    inline int getLastError() { return WSAGetLastError(); }
     inline void disableNagle(int sock) {
         int x = 1;
         if ( setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &x, sizeof(x)) )
@@ -45,8 +42,7 @@ namespace mongo {
         if ( setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &x, sizeof(x)) )
             out() << "ERROR: SO_KEEPALIVE failed" << endl;
     }
-    inline void prebindOptions( int sock ) {
-    }
+    inline void prebindOptions( int sock ) { }
 
     // This won't actually be used on windows
     struct sockaddr_un {
@@ -56,6 +52,8 @@ namespace mongo {
 
 #else
 
+    extern CmdLine cmdLine;
+    
 } // namespace mongo
 
 #include <sys/socket.h>
@@ -111,8 +109,8 @@ namespace mongo {
 
 #endif
 
-    inline string makeUnixSockPath(int port){
-        return "/tmp/mongodb-" + BSONObjBuilder::numStr(port) + ".sock";
+    inline string makeUnixSockPath(int port) {
+        return cmdLine.socket + "/mongodb-" + BSONObjBuilder::numStr(port) + ".sock";
     }
 
     inline void setSockTimeouts(int sock, double secs) {
@@ -148,7 +146,7 @@ namespace mongo {
         template <typename T>
         const T& as() const { return *(const T*)(&sa); }
 
-        string toString(bool includePort=true) const{
+        string toString(bool includePort=true) const {
             string out = getAddr();
             if (includePort && getType() != AF_UNIX && getType() != AF_UNSPEC)
                 out += ':' + BSONObjBuilder::numStr(getPort());
@@ -161,34 +159,34 @@ namespace mongo {
         }
 
         unsigned getPort() const {
-            switch (getType()){
-                case AF_INET:  return ntohs(as<sockaddr_in>().sin_port);
-                case AF_INET6: return ntohs(as<sockaddr_in6>().sin6_port);
-                case AF_UNIX: return 0;
-                case AF_UNSPEC: return 0;
-                default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false); return 0;
+            switch (getType()) {
+            case AF_INET:  return ntohs(as<sockaddr_in>().sin_port);
+            case AF_INET6: return ntohs(as<sockaddr_in6>().sin6_port);
+            case AF_UNIX: return 0;
+            case AF_UNSPEC: return 0;
+            default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false); return 0;
             }
         }
 
         string getAddr() const {
-            switch (getType()){
-                case AF_INET:
-                case AF_INET6: {
-                    const int buflen=128;
-                    char buffer[buflen];
-                    int ret = getnameinfo(raw(), addressSize, buffer, buflen, NULL, 0, NI_NUMERICHOST);
-                    massert(13082, gai_strerror(ret), ret == 0);
-                    return buffer;
-                }
-
-                case AF_UNIX:  return (addressSize > 2 ? as<sockaddr_un>().sun_path : "anonymous unix socket");
-                case AF_UNSPEC: return "(NONE)";
-                default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false); return "";
+            switch (getType()) {
+            case AF_INET:
+            case AF_INET6: {
+                const int buflen=128;
+                char buffer[buflen];
+                int ret = getnameinfo(raw(), addressSize, buffer, buflen, NULL, 0, NI_NUMERICHOST);
+                massert(13082, getAddrInfoStrError(ret), ret == 0);
+                return buffer;
+            }
+
+            case AF_UNIX:  return (addressSize > 2 ? as<sockaddr_un>().sun_path : "anonymous unix socket");
+            case AF_UNSPEC: return "(NONE)";
+            default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false); return "";
             }
         }
 
         bool isLocalHost() const;
-        
+
         bool operator==(const SockAddr& r) const {
             if (getType() != r.getType())
                 return false;
@@ -196,12 +194,12 @@ namespace mongo {
             if (getPort() != r.getPort())
                 return false;
 
-            switch (getType()){
-                case AF_INET:  return as<sockaddr_in>().sin_addr.s_addr == r.as<sockaddr_in>().sin_addr.s_addr;
-                case AF_INET6: return memcmp(as<sockaddr_in6>().sin6_addr.s6_addr, r.as<sockaddr_in6>().sin6_addr.s6_addr, sizeof(in6_addr)) == 0;
-                case AF_UNIX:  return strcmp(as<sockaddr_un>().sun_path, r.as<sockaddr_un>().sun_path) == 0;
-                case AF_UNSPEC: return true; // assume all unspecified addresses are the same
-                default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false);
+            switch (getType()) {
+            case AF_INET:  return as<sockaddr_in>().sin_addr.s_addr == r.as<sockaddr_in>().sin_addr.s_addr;
+            case AF_INET6: return memcmp(as<sockaddr_in6>().sin6_addr.s6_addr, r.as<sockaddr_in6>().sin6_addr.s6_addr, sizeof(in6_addr)) == 0;
+            case AF_UNIX:  return strcmp(as<sockaddr_un>().sun_path, r.as<sockaddr_un>().sun_path) == 0;
+            case AF_UNSPEC: return true; // assume all unspecified addresses are the same
+            default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false);
             }
         }
         bool operator!=(const SockAddr& r) const {
@@ -218,12 +216,12 @@ namespace mongo {
             else if (getPort() > r.getPort())
                 return false;
 
-            switch (getType()){
-                case AF_INET:  return as<sockaddr_in>().sin_addr.s_addr < r.as<sockaddr_in>().sin_addr.s_addr;
-                case AF_INET6: return memcmp(as<sockaddr_in6>().sin6_addr.s6_addr, r.as<sockaddr_in6>().sin6_addr.s6_addr, sizeof(in6_addr)) < 0;
-                case AF_UNIX:  return strcmp(as<sockaddr_un>().sun_path, r.as<sockaddr_un>().sun_path) < 0;
-                case AF_UNSPEC: return false;
-                default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false);
+            switch (getType()) {
+            case AF_INET:  return as<sockaddr_in>().sin_addr.s_addr < r.as<sockaddr_in>().sin_addr.s_addr;
+            case AF_INET6: return memcmp(as<sockaddr_in6>().sin6_addr.s6_addr, r.as<sockaddr_in6>().sin6_addr.s6_addr, sizeof(in6_addr)) < 0;
+            case AF_UNIX:  return strcmp(as<sockaddr_un>().sun_path, r.as<sockaddr_un>().sun_path) < 0;
+            case AF_UNSPEC: return false;
+            default: massert(SOCK_FAMILY_UNKNOWN_ERROR, "unsupported address family", false);
             }
         }
 
@@ -231,7 +229,7 @@ namespace mongo {
         sockaddr* raw() {return (sockaddr*)&sa;}
 
         socklen_t addressSize;
-        private:
+    private:
         struct sockaddr_storage sa;
     };
 
@@ -253,32 +251,52 @@ namespace mongo {
 
     class ListeningSockets {
     public:
-        ListeningSockets() : _mutex("ListeningSockets"), _sockets( new set<int>() ) { }
-        void add( int sock ){
+        ListeningSockets()
+            : _mutex("ListeningSockets")
+            , _sockets( new set<int>() )
+            , _socketPaths( new set<string>() )
+        { }
+        void add( int sock ) {
             scoped_lock lk( _mutex );
             _sockets->insert( sock );
         }
-        void remove( int sock ){
+        void addPath( string path ) {
+            scoped_lock lk( _mutex );
+            _socketPaths->insert( path );
+        }
+        void remove( int sock ) {
             scoped_lock lk( _mutex );
             _sockets->erase( sock );
         }
-        void closeAll(){
-            set<int>* s;
+        void closeAll() {
+            set<int>* sockets;
+            set<string>* paths;
+
             {
                 scoped_lock lk( _mutex );
-                s = _sockets;
+                sockets = _sockets;
                 _sockets = new set<int>();
+                paths = _socketPaths;
+                _socketPaths = new set<string>();
             }
-            for ( set<int>::iterator i=s->begin(); i!=s->end(); i++ ) {
+
+            for ( set<int>::iterator i=sockets->begin(); i!=sockets->end(); i++ ) {
                 int sock = *i;
                 log() << "closing listening socket: " << sock << endl;
                 closesocket( sock );
-            }            
+            }
+
+            for ( set<string>::iterator i=paths->begin(); i!=paths->end(); i++ ) {
+                string path = *i;
+                log() << "removing socket file: " << path << endl;
+                ::remove( path.c_str() );
+            }
         }
         static ListeningSockets* get();
     private:
         mongo::mutex _mutex;
         set<int>* _sockets;
+        set<string>* _socketPaths; // for unix domain sockets
         static ListeningSockets* _instance;
     };
 
diff --git a/util/stringutils.cpp b/util/stringutils.cpp
index 3f989fd..229f57b 100644
--- a/util/stringutils.cpp
+++ b/util/stringutils.cpp
@@ -20,13 +20,13 @@
 
 namespace mongo {
 
-    void splitStringDelim( const string& str , vector<string>* res , char delim ){
+    void splitStringDelim( const string& str , vector<string>* res , char delim ) {
         if ( str.empty() )
             return;
 
         size_t beg = 0;
         size_t pos = str.find( delim );
-        while ( pos != string::npos ){
+        while ( pos != string::npos ) {
             res->push_back( str.substr( beg, pos - beg) );
             beg = ++pos;
             pos = str.find( delim, beg );
@@ -34,8 +34,8 @@ namespace mongo {
         res->push_back( str.substr( beg ) );
     }
 
-    void joinStringDelim( const vector<string>& strs , string* res , char delim ){
-        for ( vector<string>::const_iterator it = strs.begin(); it != strs.end(); ++it ){
+    void joinStringDelim( const vector<string>& strs , string* res , char delim ) {
+        for ( vector<string>::const_iterator it = strs.begin(); it != strs.end(); ++it ) {
             if ( it !=strs.begin() ) res->push_back( delim );
             res->append( *it );
         }
diff --git a/util/stringutils.h b/util/stringutils.h
index 6b79c33..60571e6 100644
--- a/util/stringutils.h
+++ b/util/stringutils.h
@@ -20,24 +20,26 @@
 
 namespace mongo {
 
+    // see also mongoutils/str.h - perhaps move these there?
+
     void splitStringDelim( const string& str , vector<string>* res , char delim );
 
     void joinStringDelim( const vector<string>& strs , string* res , char delim );
 
-    inline string tolowerString( const string& input ){
+    inline string tolowerString( const string& input ) {
         string::size_type sz = input.size();
-        
+
         boost::scoped_array<char> line(new char[sz+1]);
         char * copy = line.get();
-        
-        for ( string::size_type i=0; i<sz; i++ ){
+
+        for ( string::size_type i=0; i<sz; i++ ) {
             char c = input[i];
             copy[i] = (char)tolower( (int)c );
         }
         copy[sz] = 0;
         return string(copy);
     }
-   
+
 } // namespace mongo
 
 #endif // UTIL_STRING_UTILS_HEADER
diff --git a/util/text.cpp b/util/text.cpp
index f381e01..51a2556 100644
--- a/util/text.cpp
+++ b/util/text.cpp
@@ -19,9 +19,9 @@
 #include "text.h"
 #include "unittest.h"
 
-namespace mongo{
+namespace mongo {
 
-    inline int leadingOnes(unsigned char c){
+    inline int leadingOnes(unsigned char c) {
         if (c < 0x80) return 0;
         static const char _leadingOnes[128] = {
             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 - 0x8F
@@ -32,24 +32,25 @@ namespace mongo{
             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xD0 - 0xD9
             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE0 - 0xE9
             4, 4, 4, 4, 4, 4, 4, 4,                         // 0xF0 - 0xF7
-                                    5, 5, 5, 5,             // 0xF8 - 0xFB
-                                                6, 6,       // 0xFC - 0xFD
-                                                      7,    // 0xFE
-                                                         8, // 0xFF
+            5, 5, 5, 5,             // 0xF8 - 0xFB
+            6, 6,       // 0xFC - 0xFD
+            7,    // 0xFE
+            8, // 0xFF
         };
         return _leadingOnes[c & 0x7f];
 
     }
 
-    bool isValidUTF8(const char *s){
+    bool isValidUTF8(const char *s) {
         int left = 0; // how many bytes are left in the current codepoint
-        while (*s){
+        while (*s) {
             const unsigned char c = (unsigned char) *(s++);
             const int ones = leadingOnes(c);
-            if (left){
+            if (left) {
                 if (ones != 1) return false; // should be a continuation byte
                 left--;
-            }else{
+            }
+            else {
                 if (ones == 0) continue; // ASCII byte
                 if (ones == 1) return false; // unexpected continuation byte
                 if (c > 0xF4) return false; // codepoint too large (< 0x10FFFF)
@@ -61,53 +62,50 @@ namespace mongo{
         }
         if (left!=0) return false; // string ended mid-codepoint
         return true;
-    }   
-
-    #if defined(_WIN32)
-
-	std::string toUtf8String(const std::wstring& wide)
-	{
-		if (wide.size() > boost::integer_traits<int>::const_max)
-			throw std::length_error(
-					"Wide string cannot be more than INT_MAX characters long.");
-		if (wide.size() == 0)
-			return "";
-
-		// Calculate necessary buffer size
-		int len = ::WideCharToMultiByte(
-			CP_UTF8, 0, wide.c_str(), static_cast<int>(wide.size()), 
-			NULL, 0, NULL, NULL);
-
-		// Perform actual conversion
-		if (len > 0)
-		{
-			std::vector<char> buffer(len);
-			len = ::WideCharToMultiByte(
-					CP_UTF8, 0, wide.c_str(), static_cast<int>(wide.size()),
-					&buffer[0], static_cast<int>(buffer.size()), NULL, NULL);
-			if (len > 0)
-			{
-					assert(len == static_cast<int>(buffer.size()));
-					return std::string(&buffer[0], buffer.size());
-			}
-		}
-
-		throw boost::system::system_error(
-			::GetLastError(), boost::system::system_category);
-	}
+    }
+
+#if defined(_WIN32)
+
+    std::string toUtf8String(const std::wstring& wide) {
+        if (wide.size() > boost::integer_traits<int>::const_max)
+            throw std::length_error(
+                "Wide string cannot be more than INT_MAX characters long.");
+        if (wide.size() == 0)
+            return "";
+
+        // Calculate necessary buffer size
+        int len = ::WideCharToMultiByte(
+                      CP_UTF8, 0, wide.c_str(), static_cast<int>(wide.size()),
+                      NULL, 0, NULL, NULL);
+
+        // Perform actual conversion
+        if (len > 0) {
+            std::vector<char> buffer(len);
+            len = ::WideCharToMultiByte(
+                      CP_UTF8, 0, wide.c_str(), static_cast<int>(wide.size()),
+                      &buffer[0], static_cast<int>(buffer.size()), NULL, NULL);
+            if (len > 0) {
+                assert(len == static_cast<int>(buffer.size()));
+                return std::string(&buffer[0], buffer.size());
+            }
+        }
+
+        throw boost::system::system_error(
+            ::GetLastError(), boost::system::system_category);
+    }
 
 #if defined(_UNICODE)
-	std::wstring toWideString(const char *s) {
+    std::wstring toWideString(const char *s) {
         std::basic_ostringstream<TCHAR> buf;
         buf << s;
         return buf.str();
     }
 #endif
 
-    #endif
+#endif
 
     struct TextUnitTest : public UnitTest {
-        void run() { 
+        void run() {
             assert( parseLL("123") == 123 );
             assert( parseLL("-123000000000") == -123000000000LL );
         }
diff --git a/util/text.h b/util/text.h
index 4ba622f..fc2da21 100644
--- a/util/text.h
+++ b/util/text.h
@@ -33,52 +33,57 @@
 #pragma once
 
 namespace mongo {
-    
+
     class StringSplitter {
     public:
+        /** @param big the string to be split
+            @param splitter the delimiter
+        */
         StringSplitter( const char * big , const char * splitter )
-            : _big( big ) , _splitter( splitter ){
+            : _big( big ) , _splitter( splitter ) {
         }
 
-        bool more(){
+        /** @return true if more to be taken via next() */
+        bool more() {
             return _big[0];
         }
 
-        string next(){
+        /** get next split string fragment */
+        string next() {
             const char * foo = strstr( _big , _splitter );
-            if ( foo ){
+            if ( foo ) {
                 string s( _big , foo - _big );
                 _big = foo + 1;
                 while ( *_big && strstr( _big , _splitter ) == _big )
                     _big++;
                 return s;
             }
-            
+
             string s = _big;
             _big += strlen( _big );
             return s;
         }
-        
-        void split( vector<string>& l ){
-            while ( more() ){
+
+        void split( vector<string>& l ) {
+            while ( more() ) {
                 l.push_back( next() );
             }
         }
-        
-        vector<string> split(){
+
+        vector<string> split() {
             vector<string> l;
             split( l );
             return l;
         }
 
-        static vector<string> split( const string& big , const string& splitter ){
+        static vector<string> split( const string& big , const string& splitter ) {
             StringSplitter ss( big.c_str() , splitter.c_str() );
             return ss.split();
         }
 
-        static string join( vector<string>& l , const string& split ){
+        static string join( vector<string>& l , const string& split ) {
             stringstream ss;
-            for ( unsigned i=0; i<l.size(); i++ ){
+            for ( unsigned i=0; i<l.size(); i++ ) {
                 if ( i > 0 )
                     ss << split;
                 ss << l[i];
@@ -90,20 +95,20 @@ namespace mongo {
         const char * _big;
         const char * _splitter;
     };
-    
+
     /* This doesn't defend against ALL bad UTF8, but it will guarantee that the
      * string can be converted to sequence of codepoints. However, it doesn't
      * guarantee that the codepoints are valid.
      */
     bool isValidUTF8(const char *s);
-    inline bool isValidUTF8(string s) { return isValidUTF8(s.c_str()); }   
+    inline bool isValidUTF8(string s) { return isValidUTF8(s.c_str()); }
 
 #if defined(_WIN32)
 
     std::string toUtf8String(const std::wstring& wide);
 
     std::wstring toWideString(const char *s);
-	
+
     /* like toWideString but UNICODE macro sensitive */
 # if !defined(_UNICODE)
 #error temp error 
@@ -111,9 +116,9 @@ namespace mongo {
 # else
     inline std::wstring toNativeString(const char *s) { return toWideString(s); }
 # endif
-    
+
 #endif
-        
+
     // expect that n contains a base ten number and nothing else after it
     // NOTE win version hasn't been tested directly
     inline long long parseLL( const char *n ) {
@@ -124,11 +129,12 @@ namespace mongo {
         errno = 0;
         ret = strtoll( n, &endPtr, 10 );
         uassert( 13305, "could not convert string to long long", *endPtr == 0 && errno == 0 );
-#elif _MSC_VER>=1600	// 1600 is VS2k10 1500 is VS2k8
+#elif _MSC_VER>=1600    // 1600 is VS2k10 1500 is VS2k8
         size_t endLen = 0;
         try {
             ret = stoll( n, &endLen, 10 );
-        } catch ( ... ) {
+        }
+        catch ( ... ) {
             endLen = 0;
         }
         uassert( 13306, "could not convert string to long long", endLen != 0 && n[ endLen ] == 0 );
diff --git a/util/time_support.h b/util/time_support.h
new file mode 100644
index 0000000..5dedec9
--- /dev/null
+++ b/util/time_support.h
@@ -0,0 +1,201 @@
+// @file time_support.h
+
+/*    Copyright 2010 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdio> // sscanf
+#include <ctime>
+#include <boost/date_time/posix_time/posix_time.hpp>
+#include <boost/thread/xtime.hpp>
+#undef assert
+#define assert MONGO_assert
+
+namespace mongo {
+
+    inline void time_t_to_Struct(time_t t, struct tm * buf , bool local = false ) {
+#if defined(_WIN32)
+        if ( local )
+            localtime_s( buf , &t );
+        else
+            gmtime_s(buf, &t);
+#else
+        if ( local )
+            localtime_r(&t, buf);
+        else
+            gmtime_r(&t, buf);
+#endif
+    }
+
+    // uses ISO 8601 dates without trailing Z
+    // colonsOk should be false when creating filenames
+    inline string terseCurrentTime(bool colonsOk=true) {
+        struct tm t;
+        time_t_to_Struct( time(0) , &t );
+
+        const char* fmt = (colonsOk ? "%Y-%m-%dT%H:%M:%S" : "%Y-%m-%dT%H-%M-%S");
+        char buf[32];
+        assert(strftime(buf, sizeof(buf), fmt, &t) == 19);
+        return buf;
+    }
+
+    inline boost::gregorian::date currentDate() {
+        boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
+        return now.date();
+    }
+
+    // parses time of day in "hh:mm" format assuming 'hh' is 00-23
+    inline bool toPointInTime( const string& str , boost::posix_time::ptime* timeOfDay ) {
+        int hh = 0;
+        int mm = 0;
+        if ( 2 != sscanf( str.c_str() , "%d:%d" , &hh , &mm ) ) {
+            return false;
+        }
+
+        // verify that time is well formed
+        if ( ( hh / 24 ) || ( mm / 60 ) ) {
+            return false;
+        }
+
+        boost::posix_time::ptime res( currentDate() , boost::posix_time::hours( hh ) + boost::posix_time::minutes( mm ) );
+        *timeOfDay = res;
+        return true;
+    }
+
+#define MONGO_asctime _asctime_not_threadsafe_
+#define asctime MONGO_asctime
+#define MONGO_gmtime _gmtime_not_threadsafe_
+#define gmtime MONGO_gmtime
+#define MONGO_localtime _localtime_not_threadsafe_
+#define localtime MONGO_localtime
+#define MONGO_ctime _ctime_is_not_threadsafe_
+#define ctime MONGO_ctime
+
+#if defined(_WIN32)
+    inline void sleepsecs(int s) {
+        Sleep(s*1000);
+    }
+    inline void sleepmillis(long long s) {
+        assert( s <= 0xffffffff );
+        Sleep((DWORD) s);
+    }
+    inline void sleepmicros(long long s) {
+        if ( s <= 0 )
+            return;
+        boost::xtime xt;
+        boost::xtime_get(&xt, boost::TIME_UTC);
+        xt.sec += (int)( s / 1000000 );
+        xt.nsec += (int)(( s % 1000000 ) * 1000);
+        if ( xt.nsec >= 1000000000 ) {
+            xt.nsec -= 1000000000;
+            xt.sec++;
+        }
+        boost::thread::sleep(xt);
+    }
+#elif defined(__sunos__)
+    inline void sleepsecs(int s) {
+        boost::xtime xt;
+        boost::xtime_get(&xt, boost::TIME_UTC);
+        xt.sec += s;
+        boost::thread::sleep(xt);
+    }
+    inline void sleepmillis(long long s) {
+        boost::xtime xt;
+        boost::xtime_get(&xt, boost::TIME_UTC);
+        xt.sec += (int)( s / 1000 );
+        xt.nsec += (int)(( s % 1000 ) * 1000000);
+        if ( xt.nsec >= 1000000000 ) {
+            xt.nsec -= 1000000000;
+            xt.sec++;
+        }
+        boost::thread::sleep(xt);
+    }
+    inline void sleepmicros(long long s) {
+        if ( s <= 0 )
+            return;
+        boost::xtime xt;
+        boost::xtime_get(&xt, boost::TIME_UTC);
+        xt.sec += (int)( s / 1000000 );
+        xt.nsec += (int)(( s % 1000000 ) * 1000);
+        if ( xt.nsec >= 1000000000 ) {
+            xt.nsec -= 1000000000;
+            xt.sec++;
+        }
+        boost::thread::sleep(xt);
+    }
+#else
+    inline void sleepsecs(int s) {
+        struct timespec t;
+        t.tv_sec = s;
+        t.tv_nsec = 0;
+        if ( nanosleep( &t , 0 ) ) {
+            cout << "nanosleep failed" << endl;
+        }
+    }
+    inline void sleepmicros(long long s) {
+        if ( s <= 0 )
+            return;
+        struct timespec t;
+        t.tv_sec = (int)(s / 1000000);
+        t.tv_nsec = 1000 * ( s % 1000000 );
+        struct timespec out;
+        if ( nanosleep( &t , &out ) ) {
+            cout << "nanosleep failed" << endl;
+        }
+    }
+    inline void sleepmillis(long long s) {
+        sleepmicros( s * 1000 );
+    }
+#endif
+
+    // note this wraps
+    inline int tdiff(unsigned told, unsigned tnew) {
+        return WrappingInt::diff(tnew, told);
+    }
+
+    /** curTimeMillis will overflow - use curTimeMicros64 instead if you care about that. */
+    inline unsigned curTimeMillis() {
+        boost::xtime xt;
+        boost::xtime_get(&xt, boost::TIME_UTC);
+        unsigned t = xt.nsec / 1000000;
+        return (xt.sec & 0xfffff) * 1000 + t;
+    }
+
+    /** Date_t is milliseconds since epoch */
+    inline Date_t jsTime() {
+        boost::xtime xt;
+        boost::xtime_get(&xt, boost::TIME_UTC);
+        unsigned long long t = xt.nsec / 1000000;
+        return ((unsigned long long) xt.sec * 1000) + t;
+    }
+
+    inline unsigned long long curTimeMicros64() {
+        boost::xtime xt;
+        boost::xtime_get(&xt, boost::TIME_UTC);
+        unsigned long long t = xt.nsec / 1000;
+        return (((unsigned long long) xt.sec) * 1000000) + t;
+    }
+
+    // measures up to 1024 seconds.  or, 512 seconds with tdiff that is...
+    inline unsigned curTimeMicros() {
+        boost::xtime xt;
+        boost::xtime_get(&xt, boost::TIME_UTC);
+        unsigned t = xt.nsec / 1000;
+        unsigned secs = xt.sec % 1024;
+        return secs*1000000 + t;
+    }
+
+}  // namespace mongo
diff --git a/util/timer.h b/util/timer.h
new file mode 100644
index 0000000..f5a21f8
--- /dev/null
+++ b/util/timer.h
@@ -0,0 +1,67 @@
+// @file timer.h
+
+/*    Copyright 2010 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
+
+#pragma once
+
+#include "time_support.h"
+
+namespace mongo {
+
+    /**
+     *  simple scoped timer
+     */
+    class Timer {
+    public:
+        Timer() {
+            reset();
+        }
+
+        Timer( unsigned long long start ) {
+            old = start;
+        }
+
+        int seconds() const {
+            return (int)(micros() / 1000000);
+        }
+
+        int millis() const {
+            return (long)(micros() / 1000);
+        }
+
+        unsigned long long micros() const {
+            unsigned long long n = curTimeMicros64();
+            return n - old;
+        }
+
+        unsigned long long micros(unsigned long long & n) const { // returns cur time in addition to timer result
+            n = curTimeMicros64();
+            return n - old;
+        }
+
+        unsigned long long startTime() {
+            return old;
+        }
+
+        void reset() {
+            old = curTimeMicros64();
+        }
+
+    private:
+        unsigned long long old;
+    };
+
+}  // namespace mongo
diff --git a/util/unittest.h b/util/unittest.h
index caf8cb3..94be444 100644
--- a/util/unittest.h
+++ b/util/unittest.h
@@ -25,6 +25,9 @@ namespace mongo {
 
        To define a unit test, inherit from this and implement run. instantiate one object for the new class
        as a global.
+
+       These tests are ran on *every* startup of mongod, so they have to be very lightweight.  But it is a
+       good quick check for a bad build.
     */
     struct UnitTest {
         UnitTest() {
diff --git a/util/util.cpp b/util/util.cpp
index b4b1053..216683a 100644
--- a/util/util.cpp
+++ b/util/util.cpp
@@ -20,28 +20,47 @@
 #include "unittest.h"
 #include "file_allocator.h"
 #include "optime.h"
+#include "time_support.h"
 
 namespace mongo {
 
+    string hexdump(const char *data, unsigned len) {
+        assert( len < 1000000 );
+        const unsigned char *p = (const unsigned char *) data;
+        stringstream ss;
+        for( unsigned i = 0; i < 4 && i < len; i++ ) {
+            ss << std::hex << setw(2) << setfill('0');
+            unsigned n = p[i];
+            ss << n;
+            ss << ' ';
+        }
+        string s = ss.str();
+        return s;
+    }
+
     boost::thread_specific_ptr<string> _threadName;
-    
-    void _setThreadName( const char * name ){
-        static int N = 0;
-        if ( strcmp( name , "conn" ) == 0 ){
+
+    unsigned _setThreadName( const char * name ) {
+        if ( ! name ) name = "NONE";
+
+        static unsigned N = 0;
+
+        if ( strcmp( name , "conn" ) == 0 ) {
+            unsigned n = ++N;
             stringstream ss;
-            ss << name << ++N;
+            ss << name << n;
             _threadName.reset( new string( ss.str() ) );
+            return n;
         }
-        else {
-            _threadName.reset( new string(name) );        
-        }
+
+        _threadName.reset( new string(name) );
+        return 0;
     }
 
 #if defined(_WIN32)
 #define MS_VC_EXCEPTION 0x406D1388
 #pragma pack(push,8)
-    typedef struct tagTHREADNAME_INFO
-    {
+    typedef struct tagTHREADNAME_INFO {
         DWORD dwType; // Must be 0x1000.
         LPCSTR szName; // Pointer to name (in user addr space).
         DWORD dwThreadID; // Thread ID (-1=caller thread).
@@ -49,30 +68,42 @@ namespace mongo {
     } THREADNAME_INFO;
 #pragma pack(pop)
 
-    void setThreadName(const char *name)
-    {
-        _setThreadName( name );
-        Sleep(10);
+    void setWinThreadName(const char *name) {
+        /* is the sleep here necessary???
+           Sleep(10);
+           */
         THREADNAME_INFO info;
         info.dwType = 0x1000;
         info.szName = name;
         info.dwThreadID = -1;
         info.dwFlags = 0;
-        __try
-            {
-                RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
-            }
-        __except(EXCEPTION_EXECUTE_HANDLER)
-        {
+        __try {
+            RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
+        }
+        __except(EXCEPTION_EXECUTE_HANDLER) {
         }
     }
+
+    unsigned setThreadName(const char *name) {
+        unsigned n = _setThreadName( name );
+#if !defined(_DEBUG)
+        // naming might be expensive so don't do "conn*" over and over
+        if( string("conn") == name )
+            return n;
+#endif
+        setWinThreadName(name);
+        return n;
+    }
+
 #else
-    void setThreadName(const char * name ) { 
-        _setThreadName( name );
+
+    unsigned setThreadName(const char * name ) {
+        return _setThreadName( name );
     }
+
 #endif
 
-    string getThreadName(){
+    string getThreadName() {
         string * s = _threadName.get();
         if ( s )
             return *s;
@@ -89,8 +120,6 @@ namespace mongo {
     int tlogLevel = 0;
     mongo::mutex Logstream::mutex("Logstream");
     int Logstream::doneSetup = Logstream::magicNumber();
-    
-    bool goingAway = false;
 
     bool isPrime(int n) {
         int z = 2;
@@ -140,13 +169,9 @@ namespace mongo {
 
         }
     } utilTest;
-    
-    // The mutex contained in this object may be held on shutdown.
-    FileAllocator &theFileAllocator_ = *(new FileAllocator());
-    FileAllocator &theFileAllocator() { return theFileAllocator_; }
-    
+
     OpTime OpTime::last(0, 0);
-    
+
     /* this is a good place to set a breakpoint when debugging, as lots of warning things
        (assert, wassert) call it.
     */
@@ -174,11 +199,11 @@ namespace mongo {
         Logstream::logLockless("\n");
     }
 
-    ostream& operator<<( ostream &s, const ThreadSafeString &o ){
+    ostream& operator<<( ostream &s, const ThreadSafeString &o ) {
         s << o.toString();
         return s;
     }
 
-    bool __destroyingStatics = false;
-    
+    bool StaticObserver::_destroyingStatics = false;
+
 } // namespace mongo
diff --git a/util/version.cpp b/util/version.cpp
index c5ca8d4..1755a92 100644
--- a/util/version.cpp
+++ b/util/version.cpp
@@ -1,20 +1,32 @@
-#include "pch.h"
+// @file version.cpp
+
+/*    Copyright 2009 10gen Inc.
+ *
+ *    Licensed under the Apache License, Version 2.0 (the "License");
+ *    you may not use this file except in compliance with the License.
+ *    You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ */
 
+#include "pch.h"
 #include <cstdlib>
 #include <iostream>
 #include <iomanip>
 #include <sstream>
 #include <string>
-
+#include "unittest.h"
 #include "version.h"
 
 namespace mongo {
 
-    //
-    // mongo processes version support
-    //
-
-    const char versionString[] = "1.6.6-pre-";
+    const char versionString[] = "1.8.0";
 
     string mongodVersion() {
         stringstream ss;
@@ -22,24 +34,16 @@ namespace mongo {
         return ss.str();
     }
 
-    //
-    // git version support
-    //
-
 #ifndef _SCONS
     // only works in scons
-    const char * gitVersion(){ return "not-scons"; }
+    const char * gitVersion() { return "not-scons"; }
 #endif
 
     void printGitVersion() { log() << "git version: " << gitVersion() << endl; }
 
-    //
-    // sys info support
-    //
-
 #ifndef _SCONS
 #if defined(_WIN32)
-    string sysInfo(){ 
+    string sysInfo() {
         stringstream ss;
         ss << "not-scons win";
         ss << " mscver:" << _MSC_FULL_VER << " built:" << __DATE__;
@@ -51,17 +55,18 @@ namespace mongo {
         return ss.str();
     }
 #else
-    string sysInfo(){ return ""; }
+    string sysInfo() { return ""; }
 #endif
 #endif
 
-    void printSysInfo() { log() << "sys info: " << sysInfo() << endl; }
+    void printSysInfo() {
+        log() << "build sys info: " << sysInfo() << endl;
+    }
 
     //
     // 32 bit systems warning
     //
-
-    void show_warnings(){
+    void show_warnings() {
         // each message adds a leading but not a trailing newline
 
         bool warned = false;
@@ -79,11 +84,12 @@ namespace mongo {
             cout << endl;
             cout << "** NOTE: when using MongoDB 32 bit, you are limited to about 2 gigabytes of data" << endl;
             cout << "**       see http://blog.mongodb.org/post/137788967/32-bit-limitations" << endl;
+            cout << "**       with --dur, the limit is lower" << endl;
             warned = true;
         }
 
 #ifdef __linux__
-        if (boost::filesystem::exists("/proc/vz") && !boost::filesystem::exists("/proc/bc")){
+        if (boost::filesystem::exists("/proc/vz") && !boost::filesystem::exists("/proc/bc")) {
             cout << endl;
             cout << "** WARNING: You are running in OpenVZ. This is known to be broken!!!" << endl;
             warned = true;
@@ -94,4 +100,38 @@ namespace mongo {
             cout << endl;
     }
 
+    int versionCmp(StringData rhs, StringData lhs) {
+        if (strcmp(rhs.data(),lhs.data()) == 0)
+            return 0;
+
+        // handle "1.2.3-" and "1.2.3-pre"
+        if (rhs.size() < lhs.size()) {
+            if (strncmp(rhs.data(), lhs.data(), rhs.size()) == 0 && lhs.data()[rhs.size()] == '-')
+                return +1;
+        }
+        else if (rhs.size() > lhs.size()) {
+            if (strncmp(rhs.data(), lhs.data(), lhs.size()) == 0 && rhs.data()[lhs.size()] == '-')
+                return -1;
+        }
+
+        return lexNumCmp(rhs.data(), lhs.data());
+    }
+
+    class VersionCmpTest : public UnitTest {
+    public:
+        void run() {
+            assert( versionCmp("1.2.3", "1.2.3") == 0 );
+            assert( versionCmp("1.2.3", "1.2.4") < 0 );
+            assert( versionCmp("1.2.3", "1.2.20") < 0 );
+            assert( versionCmp("1.2.3", "1.20.3") < 0 );
+            assert( versionCmp("2.2.3", "10.2.3") < 0 );
+            assert( versionCmp("1.2.3", "1.2.3-") > 0 );
+            assert( versionCmp("1.2.3", "1.2.3-pre") > 0 );
+            assert( versionCmp("1.2.3", "1.2.4-") < 0 );
+            assert( versionCmp("1.2.3-", "1.2.3") < 0 );
+            assert( versionCmp("1.2.3-pre", "1.2.3") < 0 );
+
+            log(1) << "versionCmpTest passed" << endl;
+        }
+    } versionCmpTest;
 }
diff --git a/util/version.h b/util/version.h
index ea22a35..779fbdc 100644
--- a/util/version.h
+++ b/util/version.h
@@ -9,7 +9,8 @@ namespace mongo {
 
     // mongo version
     extern const char versionString[];
-    string mongodVersion();    
+    string mongodVersion();
+    int versionCmp(StringData rhs, StringData lhs); // like strcmp
 
     const char * gitVersion();
     void printGitVersion();
-- 
cgit v1.2.3