diff options
author | Antonin Kral <a.kral@bobek.cz> | 2011-03-17 00:05:43 +0100 |
---|---|---|
committer | Antonin Kral <a.kral@bobek.cz> | 2011-03-17 00:05:43 +0100 |
commit | 582fc32574a3b158c81e49cb00e6ae59205e66ba (patch) | |
tree | ac64a3243e0d2121709f685695247052858115c8 /db/mongommf.cpp | |
parent | 2761bffa96595ac1698d86bbc2e95ebb0d4d6e93 (diff) | |
download | mongodb-582fc32574a3b158c81e49cb00e6ae59205e66ba.tar.gz |
Imported Upstream version 1.8.0
Diffstat (limited to 'db/mongommf.cpp')
-rw-r--r-- | db/mongommf.cpp | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/db/mongommf.cpp b/db/mongommf.cpp new file mode 100644 index 0000000..5ae573d --- /dev/null +++ b/db/mongommf.cpp @@ -0,0 +1,391 @@ +// @file mongommf.cpp + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* this module adds some of our layers atop memory mapped files - specifically our handling of private views & such + if you don't care about journaling/durability (temp sort files & such) use MemoryMappedFile class, not this. +*/ + +#include "pch.h" +#include "cmdline.h" +#include "mongommf.h" +#include "dur.h" +#include "dur_journalformat.h" +#include "../util/mongoutils/str.h" + +using namespace mongoutils; + +namespace mongo { + +#if defined(_WIN32) + extern mutex mapViewMutex; + + __declspec(noinline) void makeChunkWritable(size_t chunkno) { + scoped_lock lk(mapViewMutex); + + if( writable.get(chunkno) ) // double check lock + return; + + // remap all maps in this chunk. common case is a single map, but could have more than one with smallfiles or .ns files + size_t chunkStart = chunkno * MemoryMappedFile::ChunkSize; + size_t chunkNext = chunkStart + MemoryMappedFile::ChunkSize; + + scoped_lock lk2(privateViews._mutex()); + map<void*,MongoMMF*>::iterator i = privateViews.finditer_inlock((void*) (chunkNext-1)); + while( 1 ) { + const pair<void*,MongoMMF*> x = *(--i); + MongoMMF *mmf = x.second; + if( mmf == 0 ) + break; + + size_t viewStart = (size_t) x.first; + size_t viewEnd = viewStart + mmf->length(); + if( viewEnd <= chunkStart ) + break; + + size_t protectStart = max(viewStart, chunkStart); + dassert(protectStart<chunkNext); + + size_t protectEnd = min(viewEnd, chunkNext); + size_t protectSize = protectEnd - protectStart; + dassert(protectSize>0&&protectSize<=MemoryMappedFile::ChunkSize); + + DWORD old; + bool ok = VirtualProtect((void*)protectStart, protectSize, PAGE_WRITECOPY, &old); + if( !ok ) { + DWORD e = GetLastError(); + log() << "VirtualProtect failed " << chunkno << hex << protectStart << ' ' << protectSize << ' ' << errnoWithDescription(e) << endl; + assert(false); + } + } + + writable.set(chunkno); + } + + __declspec(noinline) void makeChunkWritableOld(size_t chunkno) { + scoped_lock lk(mapViewMutex); + + if( writable.get(chunkno) ) + return; + + size_t loc = chunkno * MemoryMappedFile::ChunkSize; + void *Loc = (void*) loc; + size_t ofs; + MongoMMF *mmf = privateViews.find( (void *) (loc), ofs ); + MemoryMappedFile *f = (MemoryMappedFile*) mmf; + assert(f); + + size_t len = MemoryMappedFile::ChunkSize; + assert( mmf->getView() <= Loc ); + if( ofs + len > f->length() ) { + // at the very end of the map + len = f->length() - ofs; + } + else { + ; + } + + // todo: check this goes away on remap + DWORD old; + bool ok = VirtualProtect(Loc, len, PAGE_WRITECOPY, &old); + if( !ok ) { + DWORD e = GetLastError(); + log() << "VirtualProtect failed " << Loc << ' ' << len << ' ' << errnoWithDescription(e) << endl; + assert(false); + } + + writable.set(chunkno); + } + + // align so that there is only one map per chunksize so our bitset works right + void* mapaligned(HANDLE h, unsigned long long _len) { + void *loc = 0; + int n = 0; + while( 1 ) { + n++; + void *m = MapViewOfFileEx(h, FILE_MAP_READ, 0, 0, 0, loc); + if( m == 0 ) { + DWORD e = GetLastError(); + if( n == 0 ) { + // if first fails, it isn't going to work + log() << "mapaligned errno: " << e << endl; + break; + } + if( debug && n == 1 ) { + log() << "mapaligned info e:" << e << " at n=1" << endl; + } + if( n > 98 ) { + log() << "couldn't align mapped view of file len:" << _len/1024.0/1024.0 << "MB errno:" << e << endl; + break; + } + loc = (void*) (((size_t)loc)+MemoryMappedFile::ChunkSize); + continue; + } + + size_t x = (size_t) m; + if( x % MemoryMappedFile::ChunkSize == 0 ) { + void *end = (void*) (x+_len); + DEV log() << "mapaligned " << m << '-' << end << " len:" << _len << endl; + return m; + } + + UnmapViewOfFile(m); + x = ((x+MemoryMappedFile::ChunkSize-1) / MemoryMappedFile::ChunkSize) * MemoryMappedFile::ChunkSize; + loc = (void*) x; + if( n % 20 == 0 ) { + log() << "warning mapaligned n=20" << endl; + } + if( n > 100 ) { + log() << "couldn't align mapped view of file len:" << _len/1024.0/1024.0 << "MB" << endl; + break; + } + } + return 0; + } + + void* MemoryMappedFile::createPrivateMap() { + assert( maphandle ); + scoped_lock lk(mapViewMutex); + //void *p = mapaligned(maphandle, len); + void *p = MapViewOfFile(maphandle, FILE_MAP_READ, 0, 0, 0); + if ( p == 0 ) { + DWORD e = GetLastError(); + log() << "createPrivateMap failed " << filename() << " " << errnoWithDescription(e) << endl; + } + else { + clearWritableBits(p); + views.push_back(p); + } + return p; + } + + void* MemoryMappedFile::remapPrivateView(void *oldPrivateAddr) { + dbMutex.assertWriteLocked(); // short window where we are unmapped so must be exclusive + + // the mapViewMutex is to assure we get the same address on the remap + scoped_lock lk(mapViewMutex); + + clearWritableBits(oldPrivateAddr); + + if( !UnmapViewOfFile(oldPrivateAddr) ) { + DWORD e = GetLastError(); + log() << "UnMapViewOfFile failed " << filename() << ' ' << errnoWithDescription(e) << endl; + assert(false); + } + + // we want the new address to be the same as the old address in case things keep pointers around (as namespaceindex does). + void *p = MapViewOfFileEx(maphandle, FILE_MAP_READ, 0, 0, + /*dwNumberOfBytesToMap 0 means to eof*/0 /*len*/, + oldPrivateAddr); + + if ( p == 0 ) { + DWORD e = GetLastError(); + log() << "MapViewOfFileEx failed " << filename() << " " << errnoWithDescription(e) << endl; + assert(p); + } + assert(p == oldPrivateAddr); + return p; + } +#endif + + void MongoMMF::remapThePrivateView() { + assert( cmdLine.dur ); + + // todo 1.9 : it turns out we require that we always remap to the same address. + // so the remove / add isn't necessary and can be removed + privateViews.remove(_view_private); + _view_private = remapPrivateView(_view_private); + privateViews.add(_view_private, this); + } + + /** register view. threadsafe */ + void PointerToMMF::add(void *view, MongoMMF *f) { + assert(view); + assert(f); + mutex::scoped_lock lk(_m); + _views.insert( pair<void*,MongoMMF*>(view,f) ); + } + + /** de-register view. threadsafe */ + void PointerToMMF::remove(void *view) { + if( view ) { + mutex::scoped_lock lk(_m); + _views.erase(view); + } + } + + PointerToMMF::PointerToMMF() : _m("PointerToMMF") { +#if defined(SIZE_MAX) + size_t max = SIZE_MAX; +#else + size_t max = ~((size_t)0); +#endif + assert( max > (size_t) this ); // just checking that no one redef'd SIZE_MAX and that it is sane + + // this way we don't need any boundary checking in _find() + _views.insert( pair<void*,MongoMMF*>((void*)0,(MongoMMF*)0) ); + _views.insert( pair<void*,MongoMMF*>((void*)max,(MongoMMF*)0) ); + } + + /** underscore version of find is for when you are already locked + @param ofs out return our offset in the view + @return the MongoMMF to which this pointer belongs + */ + MongoMMF* PointerToMMF::find_inlock(void *p, /*out*/ size_t& ofs) { + // + // .................memory.......................... + // v1 p v2 + // [--------------------] [-------] + // + // e.g., _find(p) == v1 + // + const pair<void*,MongoMMF*> x = *(--_views.upper_bound(p)); + MongoMMF *mmf = x.second; + if( mmf ) { + size_t o = ((char *)p) - ((char*)x.first); + if( o < mmf->length() ) { + ofs = o; + return mmf; + } + } + return 0; + } + + /** find associated MMF object for a given pointer. + threadsafe + @param ofs out returns offset into the view of the pointer, if found. + @return the MongoMMF to which this pointer belongs. null if not found. + */ + MongoMMF* PointerToMMF::find(void *p, /*out*/ size_t& ofs) { + mutex::scoped_lock lk(_m); + return find_inlock(p, ofs); + } + + PointerToMMF privateViews; + + /* void* MongoMMF::switchToPrivateView(void *readonly_ptr) { + assert( cmdLine.dur ); + assert( testIntent ); + + void *p = readonly_ptr; + + { + size_t ofs=0; + MongoMMF *mmf = ourReadViews.find(p, ofs); + if( mmf ) { + void *res = ((char *)mmf->_view_private) + ofs; + return res; + } + } + + { + size_t ofs=0; + MongoMMF *mmf = privateViews.find(p, ofs); + if( mmf ) { + log() << "dur: perf warning p=" << p << " is already in the writable view of " << mmf->filename() << endl; + return p; + } + } + + // did you call writing() with a pointer that isn't into a datafile? + log() << "dur error switchToPrivateView " << p << endl; + return p; + }*/ + + /* switch to _view_write. normally, this is a bad idea since your changes will not + show up in _view_private if there have been changes there; thus the leading underscore + as a tad of a "warning". but useful when done with some care, such as during + initialization. + */ + void* MongoMMF::_switchToWritableView(void *p) { + size_t ofs; + MongoMMF *f = privateViews.find(p, ofs); + assert( f ); + return (((char *)f->_view_write)+ofs); + } + + extern string dbpath; + + // here so that it is precomputed... + void MongoMMF::setPath(string f) { + string suffix; + string prefix; + bool ok = str::rSplitOn(f, '.', prefix, suffix); + uassert(13520, str::stream() << "MongoMMF only supports filenames in a certain format " << f, ok); + if( suffix == "ns" ) + _fileSuffixNo = dur::JEntry::DotNsSuffix; + else + _fileSuffixNo = (int) str::toUnsigned(suffix); + + _p = RelativePath::fromFullPath(prefix); + } + + bool MongoMMF::open(string fname, bool sequentialHint) { + setPath(fname); + _view_write = mapWithOptions(fname.c_str(), sequentialHint ? SEQUENTIAL : 0); + return finishOpening(); + } + + bool MongoMMF::create(string fname, unsigned long long& len, bool sequentialHint) { + setPath(fname); + _view_write = map(fname.c_str(), len, sequentialHint ? SEQUENTIAL : 0); + return finishOpening(); + } + + bool MongoMMF::finishOpening() { + if( _view_write ) { + if( cmdLine.dur ) { + _view_private = createPrivateMap(); + if( _view_private == 0 ) { + massert( 13636 , "createPrivateMap failed (look in log for error)" , false ); + } + privateViews.add(_view_private, this); // note that testIntent builds use this, even though it points to view_write then... + } + else { + _view_private = _view_write; + } + return true; + } + return false; + } + + MongoMMF::MongoMMF() : _willNeedRemap(false) { + _view_write = _view_private = 0; + } + + MongoMMF::~MongoMMF() { + close(); + } + + namespace dur { + void closingFileNotification(); + } + + /*virtual*/ void MongoMMF::close() { + { + if( cmdLine.dur && _view_write/*actually was opened*/ ) { + if( debug ) + log() << "closingFileNotication:" << filename() << endl; + dur::closingFileNotification(); + } + privateViews.remove(_view_private); + } + _view_write = _view_private = 0; + MemoryMappedFile::close(); + } + +} |