diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/pkgdefs/SUNWhea/prototype_com | 1 | ||||
-rw-r--r-- | usr/src/uts/common/Makefile.files | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs3_vnops.c | 85 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_client.c | 36 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs4_vnops.c | 84 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs_client.c | 35 | ||||
-rw-r--r-- | usr/src/uts/common/fs/nfs/nfs_vnops.c | 86 | ||||
-rw-r--r-- | usr/src/uts/common/fs/specfs/specvnops.c | 75 | ||||
-rw-r--r-- | usr/src/uts/common/fs/tmpfs/tmp_vnops.c | 99 | ||||
-rw-r--r-- | usr/src/uts/common/fs/ufs/ufs_vnops.c | 85 | ||||
-rw-r--r-- | usr/src/uts/common/vm/Makefile | 11 | ||||
-rw-r--r-- | usr/src/uts/common/vm/page.h | 11 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_map.c | 11 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_map.h | 8 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vpm.c | 1141 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vpm.h | 286 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/startup.c | 3 | ||||
-rw-r--r-- | usr/src/uts/i86pc/vm/hat_i86.c | 14 |
18 files changed, 1888 insertions, 184 deletions
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com index f265268418..4fec41a28f 100644 --- a/usr/src/pkgdefs/SUNWhea/prototype_com +++ b/usr/src/pkgdefs/SUNWhea/prototype_com @@ -1230,6 +1230,7 @@ f none usr/include/vm/seg_spt.h 644 root bin f none usr/include/vm/seg_vn.h 644 root bin f none usr/include/vm/seg_kpm.h 644 root bin f none usr/include/vm/vpage.h 644 root bin +f none usr/include/vm/vpm.h 644 root bin f none usr/include/volmgt.h 644 root bin f none usr/include/wait.h 644 root bin f none usr/include/wchar.h 644 root bin diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index b025f1d7c6..2504b4664c 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -135,6 +135,7 @@ GENUNIX_OBJS += \ flock.o \ fm.o \ fork.o \ + vpm.o \ fsat.o \ fs_subr.o \ fsflush.o \ diff --git a/usr/src/uts/common/fs/nfs/nfs3_vnops.c b/usr/src/uts/common/fs/nfs/nfs3_vnops.c index a53d2270b5..3b54de5ea7 100644 --- a/usr/src/uts/common/fs/nfs/nfs3_vnops.c +++ b/usr/src/uts/common/fs/nfs/nfs3_vnops.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -567,9 +566,18 @@ nfs3_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, if (diff < n) n = (size_t)diff; - base = segmap_getmapflt(segkmap, vp, off + on, n, 1, S_READ); + if (vpm_enable) { + /* + * Copy data. + */ + error = vpm_data_copy(vp, off + on, n, uiop, + 1, NULL, 0, S_READ); + } else { + base = segmap_getmapflt(segkmap, vp, off + on, n, 1, + S_READ); - error = uiomove(base + on, n, UIO_READ, uiop); + error = uiomove(base + on, n, UIO_READ, uiop); + } if (!error) { /* @@ -583,9 +591,18 @@ nfs3_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, else flags = 0; mutex_exit(&rp->r_statelock); - error = segmap_release(segkmap, base, flags); - } else - (void) segmap_release(segkmap, base, 0); + if (vpm_enable) { + error = vpm_sync_pages(vp, off, n, flags); + } else { + error = segmap_release(segkmap, base, flags); + } + } else { + if (vpm_enable) { + (void) vpm_sync_pages(vp, off, n, 0); + } else { + (void) segmap_release(segkmap, base, 0); + } + } } while (!error && uiop->uio_resid > 0); return (error); @@ -749,25 +766,35 @@ nfs3_fwrite: cv_wait(&rp->r_cv, &rp->r_statelock); mutex_exit(&rp->r_statelock); - if (segmap_kpm) { - int pon = uiop->uio_loffset & PAGEOFFSET; - size_t pn = MIN(PAGESIZE - pon, uiop->uio_resid); - int pagecreate; + if (vpm_enable) { + /* + * It will use kpm mappings, so no need to + * pass an address. + */ + error = writerp(rp, NULL, n, uiop, 0); + } else { + if (segmap_kpm) { + int pon = uiop->uio_loffset & PAGEOFFSET; + size_t pn = MIN(PAGESIZE - pon, + uiop->uio_resid); + int pagecreate; - mutex_enter(&rp->r_statelock); - pagecreate = (pon == 0) && (pn == PAGESIZE || - uiop->uio_loffset + pn >= rp->r_size); - mutex_exit(&rp->r_statelock); + mutex_enter(&rp->r_statelock); + pagecreate = (pon == 0) && (pn == PAGESIZE || + uiop->uio_loffset + pn >= rp->r_size); + mutex_exit(&rp->r_statelock); - base = segmap_getmapflt(segkmap, vp, off + on, + base = segmap_getmapflt(segkmap, vp, off + on, pn, !pagecreate, S_WRITE); - error = writerp(rp, base + pon, n, uiop, pagecreate); + error = writerp(rp, base + pon, n, uiop, + pagecreate); - } else { - base = segmap_getmapflt(segkmap, vp, off + on, - n, 0, S_READ); - error = writerp(rp, base + on, n, uiop, 0); + } else { + base = segmap_getmapflt(segkmap, vp, off + on, + n, 0, S_READ); + error = writerp(rp, base + on, n, uiop, 0); + } } if (!error) { @@ -790,9 +817,17 @@ nfs3_fwrite: flags &= ~SM_ASYNC; flags |= SM_WRITE; } - error = segmap_release(segkmap, base, flags); + if (vpm_enable) { + error = vpm_sync_pages(vp, off, n, flags); + } else { + error = segmap_release(segkmap, base, flags); + } } else { - (void) segmap_release(segkmap, base, 0); + if (vpm_enable) { + (void) vpm_sync_pages(vp, off, n, 0); + } else { + (void) segmap_release(segkmap, base, 0); + } /* * In the event that we got an access error while * faulting in a page for a write-only file just diff --git a/usr/src/uts/common/fs/nfs/nfs4_client.c b/usr/src/uts/common/fs/nfs/nfs4_client.c index c95a0cd347..81e01a543b 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_client.c +++ b/usr/src/uts/common/fs/nfs/nfs4_client.c @@ -2185,10 +2185,13 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) u_offset_t offset; int error; int sm_error; + vnode_t *vp = RTOV(rp); ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid); - ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE); ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_WRITER)); + if (!vpm_enable) { + ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE); + } /* * Move bytes in at most PAGESIZE chunks. We must avoid @@ -2206,8 +2209,7 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) * n is the number of bytes required to satisfy the request * or the number of bytes to fill out the page. */ - n = (int)MIN((PAGESIZE - ((uintptr_t)base & PAGEOFFSET)), - tcount); + n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount); /* * Check to see if we can skip reading in the page @@ -2226,12 +2228,12 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) * created and mapped at base. */ pagecreate = pgcreated || - (((uintptr_t)base & PAGEOFFSET) == 0 && + ((offset & PAGEOFFSET) == 0 && (n == PAGESIZE || ((offset + n) >= rp->r_size))); mutex_exit(&rp->r_statelock); - if (pagecreate) { + if (!vpm_enable && pagecreate) { /* * The last argument tells segmap_pagecreate() to * always lock the page, as opposed to sometimes @@ -2267,7 +2269,17 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) rp->r_modaddr = (offset & MAXBMASK); mutex_exit(&rp->r_statelock); - error = uiomove(base, n, UIO_WRITE, uio); + if (vpm_enable) { + /* + * Copy data. If new pages are created, part of + * the page that is not written will be initizliazed + * with zeros. + */ + error = vpm_data_copy(vp, offset, n, uio, + !pagecreate, NULL, 0, S_WRITE); + } else { + error = uiomove(base, n, UIO_WRITE, uio); + } /* * r_size is the maximum number of @@ -2284,7 +2296,11 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) /* n = # of bytes written */ n = (int)(uio->uio_loffset - offset); - base += n; + + if (!vpm_enable) { + base += n; + } + tcount -= n; /* * If we created pages w/o initializing them completely, @@ -2292,7 +2308,7 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) * This happens on a most EOF write cases and if * we had some sort of error during the uiomove. */ - if (pagecreate) { + if (!vpm_enable && pagecreate) { if ((uio->uio_loffset & PAGEOFFSET) || n == 0) (void) kzero(base, PAGESIZE - n); @@ -2310,8 +2326,8 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) * segmap_pagecreate(). */ sm_error = segmap_fault(kas.a_hat, segkmap, - saved_base, saved_n, - F_SOFTUNLOCK, S_WRITE); + saved_base, saved_n, + F_SOFTUNLOCK, S_WRITE); if (error == 0) error = sm_error; } diff --git a/usr/src/uts/common/fs/nfs/nfs4_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_vnops.c index faf08573ba..97fc46809c 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_vnops.c +++ b/usr/src/uts/common/fs/nfs/nfs4_vnops.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -2617,9 +2616,19 @@ nfs4_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, if (diff < n) n = (uint_t)diff; - base = segmap_getmapflt(segkmap, vp, off + on, n, 1, S_READ); + if (vpm_enable) { + /* + * Copy data. + */ + error = vpm_data_copy(vp, off + on, n, uiop, + 1, NULL, 0, S_READ); - error = uiomove(base + on, n, UIO_READ, uiop); + } else { + base = segmap_getmapflt(segkmap, vp, off + on, n, 1, + S_READ); + + error = uiomove(base + on, n, UIO_READ, uiop); + } if (!error) { /* @@ -2633,9 +2642,18 @@ nfs4_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, else flags = 0; mutex_exit(&rp->r_statelock); - error = segmap_release(segkmap, base, flags); - } else - (void) segmap_release(segkmap, base, 0); + if (vpm_enable) { + error = vpm_sync_pages(vp, off, n, flags); + } else { + error = segmap_release(segkmap, base, flags); + } + } else { + if (vpm_enable) { + (void) vpm_sync_pages(vp, off, n, 0); + } else { + (void) segmap_release(segkmap, base, 0); + } + } } while (!error && uiop->uio_resid > 0); return (error); @@ -2826,25 +2844,35 @@ nfs4_fwrite: cv_wait(&rp->r_cv, &rp->r_statelock); mutex_exit(&rp->r_statelock); - if (segmap_kpm) { - int pon = uiop->uio_loffset & PAGEOFFSET; - size_t pn = MIN(PAGESIZE - pon, uiop->uio_resid); - int pagecreate; + if (vpm_enable) { + /* + * It will use kpm mappings, so no need to + * pass an address. + */ + error = writerp4(rp, NULL, n, uiop, 0); + } else { + if (segmap_kpm) { + int pon = uiop->uio_loffset & PAGEOFFSET; + size_t pn = MIN(PAGESIZE - pon, + uiop->uio_resid); + int pagecreate; - mutex_enter(&rp->r_statelock); - pagecreate = (pon == 0) && (pn == PAGESIZE || - uiop->uio_loffset + pn >= rp->r_size); - mutex_exit(&rp->r_statelock); + mutex_enter(&rp->r_statelock); + pagecreate = (pon == 0) && (pn == PAGESIZE || + uiop->uio_loffset + pn >= rp->r_size); + mutex_exit(&rp->r_statelock); - base = segmap_getmapflt(segkmap, vp, off + on, + base = segmap_getmapflt(segkmap, vp, off + on, pn, !pagecreate, S_WRITE); - error = writerp4(rp, base + pon, n, uiop, pagecreate); + error = writerp4(rp, base + pon, n, uiop, + pagecreate); - } else { - base = segmap_getmapflt(segkmap, vp, off + on, - n, 0, S_READ); - error = writerp4(rp, base + on, n, uiop, 0); + } else { + base = segmap_getmapflt(segkmap, vp, off + on, + n, 0, S_READ); + error = writerp4(rp, base + on, n, uiop, 0); + } } if (!error) { @@ -2867,9 +2895,17 @@ nfs4_fwrite: flags &= ~SM_ASYNC; flags |= SM_WRITE; } - error = segmap_release(segkmap, base, flags); + if (vpm_enable) { + error = vpm_sync_pages(vp, off, n, flags); + } else { + error = segmap_release(segkmap, base, flags); + } } else { - (void) segmap_release(segkmap, base, 0); + if (vpm_enable) { + (void) vpm_sync_pages(vp, off, n, 0); + } else { + (void) segmap_release(segkmap, base, 0); + } /* * In the event that we got an access error while * faulting in a page for a write-only file just diff --git a/usr/src/uts/common/fs/nfs/nfs_client.c b/usr/src/uts/common/fs/nfs/nfs_client.c index d6c0a25d7a..40c886fc85 100644 --- a/usr/src/uts/common/fs/nfs/nfs_client.c +++ b/usr/src/uts/common/fs/nfs/nfs_client.c @@ -2104,10 +2104,13 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) u_offset_t offset; int error; int sm_error; + vnode_t *vp = RTOV(rp); ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid); - ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE); ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_WRITER)); + if (!vpm_enable) { + ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE); + } /* * Move bytes in at most PAGESIZE chunks. We must avoid @@ -2125,8 +2128,7 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) * n is the number of bytes required to satisfy the request * or the number of bytes to fill out the page. */ - n = (int)MIN((PAGESIZE - ((uintptr_t)base & PAGEOFFSET)), - tcount); + n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount); /* * Check to see if we can skip reading in the page @@ -2145,11 +2147,11 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) * created and mapped at base. */ pagecreate = pgcreated || - (((uintptr_t)base & PAGEOFFSET) == 0 && + ((offset & PAGEOFFSET) == 0 && (n == PAGESIZE || ((offset + n) >= rp->r_size))); mutex_exit(&rp->r_statelock); - if (pagecreate) { + if (!vpm_enable && pagecreate) { /* * The last argument tells segmap_pagecreate() to * always lock the page, as opposed to sometimes @@ -2185,7 +2187,17 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) rp->r_modaddr = (offset & MAXBMASK); mutex_exit(&rp->r_statelock); - error = uiomove(base, n, UIO_WRITE, uio); + if (vpm_enable) { + /* + * Copy data. If new pages are created, part of + * the page that is not written will be initizliazed + * with zeros. + */ + error = vpm_data_copy(vp, offset, n, uio, + !pagecreate, NULL, 0, S_WRITE); + } else { + error = uiomove(base, n, UIO_WRITE, uio); + } /* * r_size is the maximum number of @@ -2202,7 +2214,10 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) /* n = # of bytes written */ n = (int)(uio->uio_loffset - offset); - base += n; + + if (!vpm_enable) { + base += n; + } tcount -= n; /* * If we created pages w/o initializing them completely, @@ -2210,7 +2225,7 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) * This happens on a most EOF write cases and if * we had some sort of error during the uiomove. */ - if (pagecreate) { + if (!vpm_enable && pagecreate) { if ((uio->uio_loffset & PAGEOFFSET) || n == 0) (void) kzero(base, PAGESIZE - n); @@ -2228,8 +2243,8 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) * segmap_pagecreate(). */ sm_error = segmap_fault(kas.a_hat, segkmap, - saved_base, saved_n, - F_SOFTUNLOCK, S_WRITE); + saved_base, saved_n, + F_SOFTUNLOCK, S_WRITE); if (error == 0) error = sm_error; } diff --git a/usr/src/uts/common/fs/nfs/nfs_vnops.c b/usr/src/uts/common/fs/nfs/nfs_vnops.c index b98b3d280e..583ce42473 100644 --- a/usr/src/uts/common/fs/nfs/nfs_vnops.c +++ b/usr/src/uts/common/fs/nfs/nfs_vnops.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. @@ -472,9 +471,17 @@ nfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, if (diff < n) n = (size_t)diff; - base = segmap_getmapflt(segkmap, vp, off + on, n, 1, S_READ); - - error = uiomove(base + on, n, UIO_READ, uiop); + if (vpm_enable) { + /* + * Copy data. + */ + error = vpm_data_copy(vp, off + on, n, uiop, + 1, NULL, 0, S_READ); + } else { + base = segmap_getmapflt(segkmap, vp, off + on, n, + 1, S_READ); + error = uiomove(base + on, n, UIO_READ, uiop); + } if (!error) { /* @@ -488,9 +495,18 @@ nfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, else flags = 0; mutex_exit(&rp->r_statelock); - error = segmap_release(segkmap, base, flags); - } else - (void) segmap_release(segkmap, base, 0); + if (vpm_enable) { + error = vpm_sync_pages(vp, off, n, flags); + } else { + error = segmap_release(segkmap, base, flags); + } + } else { + if (vpm_enable) { + (void) vpm_sync_pages(vp, off, n, 0); + } else { + (void) segmap_release(segkmap, base, 0); + } + } } while (!error && uiop->uio_resid > 0); return (error); @@ -651,25 +667,35 @@ nfs_fwrite: cv_wait(&rp->r_cv, &rp->r_statelock); mutex_exit(&rp->r_statelock); - if (segmap_kpm) { - int pon = uiop->uio_loffset & PAGEOFFSET; - size_t pn = MIN(PAGESIZE - pon, uiop->uio_resid); - int pagecreate; + if (vpm_enable) { + /* + * It will use kpm mappings, so no need to + * pass an address. + */ + error = writerp(rp, NULL, n, uiop, 0); + } else { + if (segmap_kpm) { + int pon = uiop->uio_loffset & PAGEOFFSET; + size_t pn = MIN(PAGESIZE - pon, + uiop->uio_resid); + int pagecreate; - mutex_enter(&rp->r_statelock); - pagecreate = (pon == 0) && (pn == PAGESIZE || - uiop->uio_loffset + pn >= rp->r_size); - mutex_exit(&rp->r_statelock); + mutex_enter(&rp->r_statelock); + pagecreate = (pon == 0) && (pn == PAGESIZE || + uiop->uio_loffset + pn >= rp->r_size); + mutex_exit(&rp->r_statelock); - base = segmap_getmapflt(segkmap, vp, off + on, + base = segmap_getmapflt(segkmap, vp, off + on, pn, !pagecreate, S_WRITE); - error = writerp(rp, base + pon, n, uiop, pagecreate); + error = writerp(rp, base + pon, n, uiop, + pagecreate); - } else { - base = segmap_getmapflt(segkmap, vp, off + on, - n, 0, S_READ); - error = writerp(rp, base + on, n, uiop, 0); + } else { + base = segmap_getmapflt(segkmap, vp, off + on, + n, 0, S_READ); + error = writerp(rp, base + on, n, uiop, 0); + } } if (!error) { @@ -691,9 +717,17 @@ nfs_fwrite: flags &= ~SM_ASYNC; flags |= SM_WRITE; } - error = segmap_release(segkmap, base, flags); + if (vpm_enable) { + error = vpm_sync_pages(vp, off, n, flags); + } else { + error = segmap_release(segkmap, base, flags); + } } else { - (void) segmap_release(segkmap, base, 0); + if (vpm_enable) { + (void) vpm_sync_pages(vp, off, n, 0); + } else { + (void) segmap_release(segkmap, base, 0); + } /* * In the event that we got an access error while * faulting in a page for a write-only file just diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c index d4ee630b92..6a2d6f73d0 100644 --- a/usr/src/uts/common/fs/specfs/specvnops.c +++ b/usr/src/uts/common/fs/specfs/specvnops.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -880,10 +879,16 @@ spec_read( if (diff < n) n = (size_t)diff; - base = segmap_getmapflt(segkmap, blkvp, - (u_offset_t)(off + on), n, 1, S_READ); + if (vpm_enable) { + error = vpm_data_copy(blkvp, (u_offset_t)(off + on), + n, uiop, 1, NULL, 0, S_READ); + } else { + base = segmap_getmapflt(segkmap, blkvp, + (u_offset_t)(off + on), n, 1, S_READ); - if ((error = uiomove(base + on, n, UIO_READ, uiop)) == 0) { + error = uiomove(base + on, n, UIO_READ, uiop); + } + if (!error) { int flags = 0; /* * If we read a whole block, we won't need this @@ -891,9 +896,17 @@ spec_read( */ if (n + on == MAXBSIZE) flags = SM_DONTNEED | SM_FREE; - error = segmap_release(segkmap, base, flags); + if (vpm_enable) { + error = vpm_sync_pages(blkvp, off, n, flags); + } else { + error = segmap_release(segkmap, base, flags); + } } else { - (void) segmap_release(segkmap, base, 0); + if (vpm_enable) { + (void) vpm_sync_pages(blkvp, off, n, 0); + } else { + (void) segmap_release(segkmap, base, 0); + } if (bdevsize == UNKNOWN_SIZE) { error = 0; break; @@ -984,22 +997,27 @@ spec_write( if (n == MAXBSIZE || (on == 0 && (off + n) == bdevsize)) pagecreate = 1; - base = segmap_getmapflt(segkmap, blkvp, - (u_offset_t)(off + on), n, !pagecreate, S_WRITE); - - /* - * segmap_pagecreate() returns 1 if it calls - * page_create_va() to allocate any pages. - */ newpage = 0; + if (vpm_enable) { + error = vpm_data_copy(blkvp, (u_offset_t)(off + on), + n, uiop, !pagecreate, NULL, 0, S_WRITE); + } else { + base = segmap_getmapflt(segkmap, blkvp, + (u_offset_t)(off + on), n, !pagecreate, S_WRITE); - if (pagecreate) - newpage = segmap_pagecreate(segkmap, base + on, - n, 0); + /* + * segmap_pagecreate() returns 1 if it calls + * page_create_va() to allocate any pages. + */ + + if (pagecreate) + newpage = segmap_pagecreate(segkmap, base + on, + n, 0); - error = uiomove(base + on, n, UIO_WRITE, uiop); + error = uiomove(base + on, n, UIO_WRITE, uiop); + } - if (pagecreate && + if (!vpm_enable && pagecreate && uiop->uio_loffset < P2ROUNDUP_TYPED(off + on + n, PAGESIZE, offset_t)) { /* @@ -1029,7 +1047,7 @@ spec_write( * Unlock the pages which have been allocated by * page_create_va() in segmap_pagecreate(). */ - if (newpage) + if (!vpm_enable && newpage) segmap_pageunlock(segkmap, base + on, (size_t)n, S_WRITE); @@ -1053,9 +1071,18 @@ spec_write( flags = SM_WRITE | SM_ASYNC | SM_DONTNEED; } smark(sp, SUPD|SCHG); - error = segmap_release(segkmap, base, flags); - } else - (void) segmap_release(segkmap, base, SM_INVAL); + if (vpm_enable) { + error = vpm_sync_pages(blkvp, off, n, flags); + } else { + error = segmap_release(segkmap, base, flags); + } + } else { + if (vpm_enable) { + (void) vpm_sync_pages(blkvp, off, n, SM_INVAL); + } else { + (void) segmap_release(segkmap, base, SM_INVAL); + } + } } while (error == 0 && uiop->uio_resid > 0 && n != 0); diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c index 490cbfc61c..d623dce3f7 100644 --- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c +++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -261,14 +260,32 @@ wrtmp( if (!pagecreate) rw_exit(&tp->tn_contents); - /* Get offset within the segmap mapping */ - segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; - base = segmap_getmapflt(segkmap, vp, (offset & MAXBMASK), - PAGESIZE, !pagecreate, S_WRITE); - newpage = 0; + if (vpm_enable) { + /* + * XXX Why do we need to hold the contents lock? + * The kpm mappings will not cause a fault. + * + * Copy data. If new pages are created, part of + * the page that is not written will be initizliazed + * with zeros. + */ + error = vpm_data_copy(vp, offset, bytes, uio, + !pagecreate, &newpage, 1, S_WRITE); + + if (pagecreate) { + rw_exit(&tp->tn_contents); + } + } else { + /* Get offset within the segmap mapping */ + segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; + base = segmap_getmapflt(segkmap, vp, + (offset & MAXBMASK), + PAGESIZE, !pagecreate, S_WRITE); + } - if (pagecreate) { + + if (!vpm_enable && pagecreate) { rw_downgrade(&tp->tn_contents); /* @@ -287,10 +304,12 @@ wrtmp( (size_t)pageoffset); } - error = uiomove(base + segmap_offset + pageoffset, + if (!vpm_enable) { + error = uiomove(base + segmap_offset + pageoffset, (long)bytes, UIO_WRITE, uio); + } - if (pagecreate && + if (!vpm_enable && pagecreate && uio->uio_offset < P2ROUNDUP(offset + bytes, PAGESIZE)) { long zoffset; /* zero from offset into page */ /* @@ -310,16 +329,17 @@ wrtmp( */ if ((zoffset = pageoffset + nmoved) < PAGESIZE) (void) kzero(base + segmap_offset + zoffset, - (size_t)PAGESIZE - zoffset); + (size_t)PAGESIZE - zoffset); } /* * Unlock the pages which have been allocated by * page_create_va() in segmap_pagecreate() */ - if (newpage) + if (!vpm_enable && newpage) { segmap_pageunlock(segkmap, base + segmap_offset, (size_t)PAGESIZE, S_WRITE); + } if (error) { /* @@ -327,9 +347,19 @@ wrtmp( * be sure to invalidate any pages that may have * been allocated. */ - (void) segmap_release(segkmap, base, SM_INVAL); + if (vpm_enable) { + (void) vpm_sync_pages(vp, offset, + PAGESIZE, SM_INVAL); + } else { + (void) segmap_release(segkmap, base, SM_INVAL); + } } else { - error = segmap_release(segkmap, base, 0); + if (vpm_enable) { + error = vpm_sync_pages(vp, offset, + PAGESIZE, 0); + } else { + error = segmap_release(segkmap, base, 0); + } } /* @@ -468,17 +498,36 @@ rdtmp( */ rw_exit(&tp->tn_contents); - segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; - base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, - bytes, 1, S_READ); + if (vpm_enable) { + /* + * Copy data. + */ + error = vpm_data_copy(vp, offset, bytes, uio, + 1, NULL, 0, S_READ); + } else { + segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; + base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, + bytes, 1, S_READ); - error = uiomove(base + segmap_offset + pageoffset, - (long)bytes, UIO_READ, uio); + error = uiomove(base + segmap_offset + pageoffset, + (long)bytes, UIO_READ, uio); + } - if (error) - (void) segmap_release(segkmap, base, 0); - else - error = segmap_release(segkmap, base, 0); + if (error) { + if (vpm_enable) { + (void) vpm_sync_pages(vp, offset, + PAGESIZE, 0); + } else { + (void) segmap_release(segkmap, base, 0); + } + } else { + if (vpm_enable) { + error = vpm_sync_pages(vp, offset, + PAGESIZE, 0); + } else { + error = segmap_release(segkmap, base, 0); + } + } /* * Re-acquire contents lock. diff --git a/usr/src/uts/common/fs/ufs/ufs_vnops.c b/usr/src/uts/common/fs/ufs/ufs_vnops.c index 5dd9495aa2..db8a0fc09b 100644 --- a/usr/src/uts/common/fs/ufs/ufs_vnops.c +++ b/usr/src/uts/common/fs/ufs/ufs_vnops.c @@ -999,21 +999,32 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr) rw_exit(&ufsvfsp->vfs_dqrwlock); } - base = segmap_getmapflt(segkmap, vp, (off + mapon), + newpage = 0; + premove_resid = uio->uio_resid; + if (vpm_enable) { + /* + * Copy data. If new pages are created, part of + * the page that is not written will be initizliazed + * with zeros. + */ + error = vpm_data_copy(vp, (off + mapon), (uint_t)n, + uio, !pagecreate, &newpage, 0, S_WRITE); + } else { + + base = segmap_getmapflt(segkmap, vp, (off + mapon), (uint_t)n, !pagecreate, S_WRITE); - /* - * segmap_pagecreate() returns 1 if it calls - * page_create_va() to allocate any pages. - */ - newpage = 0; + /* + * segmap_pagecreate() returns 1 if it calls + * page_create_va() to allocate any pages. + */ - if (pagecreate) - newpage = segmap_pagecreate(segkmap, base, - (size_t)n, 0); + if (pagecreate) + newpage = segmap_pagecreate(segkmap, base, + (size_t)n, 0); - premove_resid = uio->uio_resid; - error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); + error = uiomove(base + mapon, (long)n, UIO_WRITE, uio); + } /* * If "newpage" is set, then a new page was created and it @@ -1028,7 +1039,7 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr) * If uiomove fails because of an error, the old valid data * is kept instead of filling the rest of the page with zero's. */ - if (newpage && + if (!vpm_enable && newpage && uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) { /* * We created pages w/o initializing them completely, @@ -1049,7 +1060,7 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr) * Unlock the pages allocated by page_create_va() * in segmap_pagecreate() */ - if (newpage) + if (!vpm_enable && newpage) segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE); /* @@ -1130,7 +1141,15 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr) */ flags = SM_INVAL; } - (void) segmap_release(segkmap, base, flags); + + if (vpm_enable) { + /* + * Flush pages. + */ + (void) vpm_sync_pages(vp, off, n, flags); + } else { + (void) segmap_release(segkmap, base, flags); + } } else { flags = 0; /* @@ -1163,7 +1182,14 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr) */ flags = SM_WRITE | SM_ASYNC | SM_DONTNEED; } - error = segmap_release(segkmap, base, flags); + if (vpm_enable) { + /* + * Flush pages. + */ + (void) vpm_sync_pages(vp, off, n, flags); + } else { + (void) segmap_release(segkmap, base, flags); + } /* * If the operation failed and is synchronous, * then we need to unwind what uiomove() last @@ -1429,10 +1455,18 @@ rdip(struct inode *ip, struct uio *uio, int ioflag, cred_t *cr) */ if (rwtype == RW_READER) rw_exit(&ip->i_contents); - base = segmap_getmapflt(segkmap, vp, (off + mapon), - (uint_t)n, 1, S_READ); - error = uiomove(base + mapon, (long)n, UIO_READ, uio); + if (vpm_enable) { + /* + * Copy data. + */ + error = vpm_data_copy(vp, (off + mapon), (uint_t)n, + uio, 1, NULL, 0, S_READ); + } else { + base = segmap_getmapflt(segkmap, vp, (off + mapon), + (uint_t)n, 1, S_READ); + error = uiomove(base + mapon, (long)n, UIO_READ, uio); + } flags = 0; if (!error) { @@ -1460,9 +1494,18 @@ rdip(struct inode *ip, struct uio *uio, int ioflag, cred_t *cr) flags &= ~SM_ASYNC; flags |= SM_WRITE; } - error = segmap_release(segkmap, base, flags); - } else - (void) segmap_release(segkmap, base, flags); + if (vpm_enable) { + error = vpm_sync_pages(vp, off, n, flags); + } else { + error = segmap_release(segkmap, base, flags); + } + } else { + if (vpm_enable) { + (void) vpm_sync_pages(vp, off, n, flags); + } else { + (void) segmap_release(segkmap, base, flags); + } + } if (rwtype == RW_READER) rw_enter(&ip->i_contents, rwtype); diff --git a/usr/src/uts/common/vm/Makefile b/usr/src/uts/common/vm/Makefile index fcd6582985..642c393f8f 100644 --- a/usr/src/uts/common/vm/Makefile +++ b/usr/src/uts/common/vm/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -29,8 +28,8 @@ # include global definitions include ../../../Makefile.master -HDRS= anon.h as.h faultcode.h hat.h kpm.h page.h pvn.h rm.h seg.h vpage.h \ - seg_dev.h seg_enum.h seg_kmem.h seg_kp.h seg_kpm.h seg_map.h \ +HDRS= anon.h as.h faultcode.h vpm.h hat.h kpm.h page.h pvn.h rm.h seg.h \ + vpage.h seg_dev.h seg_enum.h seg_kmem.h seg_kp.h seg_kpm.h seg_map.h \ seg_vn.h seg_spt.h ROOTDIRS= $(ROOT)/usr/include/vm diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h index e3317b9fea..a1aa25bbbc 100644 --- a/usr/src/uts/common/vm/page.h +++ b/usr/src/uts/common/vm/page.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -468,7 +467,7 @@ typedef struct page { struct vnode *p_vnode; /* vnode that this page is named by */ selock_t p_selock; /* shared/exclusive lock on the page */ #if defined(_LP64) - int p_selockpad; /* pad for growing selock */ + uint_t p_vpmref; /* vpm ref - index of the vpmap_t */ #endif struct page *p_hash; /* hash by [vnode, offset] */ struct page *p_vpnext; /* next page in vnode list */ @@ -506,7 +505,11 @@ typedef struct page { /* index of entry in p_map when p_embed is set */ uint_t p_mlentry; #endif +#if defined(_LP64) + kmutex_t p_ilock; /* protects p_vpmref */ +#else uint64_t p_msresv_2; /* page allocation debugging */ +#endif } page_t; diff --git a/usr/src/uts/common/vm/seg_map.c b/usr/src/uts/common/vm/seg_map.c index 9fd8d37e5a..de27f6e2ff 100644 --- a/usr/src/uts/common/vm/seg_map.c +++ b/usr/src/uts/common/vm/seg_map.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -466,6 +465,10 @@ segmap_create(struct seg *seg, void *argsp) scpu->scpu.scpu_last_smap = smd_smap; } + if (vpm_enable) { + vpm_init(); + } + #ifdef DEBUG /* * Keep track of which colors are used more often. diff --git a/usr/src/uts/common/vm/seg_map.h b/usr/src/uts/common/vm/seg_map.h index 339dabe674..0e3cd9bf9b 100644 --- a/usr/src/uts/common/vm/seg_map.h +++ b/usr/src/uts/common/vm/seg_map.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,6 +64,7 @@ struct segmap_crargs { }; #include <vm/kpm.h> +#include <vm/vpm.h> /* * Each smap struct represents a MAXBSIZE sized mapping to the diff --git a/usr/src/uts/common/vm/vpm.c b/usr/src/uts/common/vm/vpm.c new file mode 100644 index 0000000000..1f4f2fdf58 --- /dev/null +++ b/usr/src/uts/common/vm/vpm.c @@ -0,0 +1,1141 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * VM - generic vnode page mapping interfaces. + * + * Mechanism to provide temporary mappings to vnode pages. + * The typical use would be to copy/access file data. + */ + +#include <sys/types.h> +#include <sys/t_lock.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/buf.h> +#include <sys/systm.h> +#include <sys/vnode.h> +#include <sys/mman.h> +#include <sys/errno.h> +#include <sys/cred.h> +#include <sys/kmem.h> +#include <sys/vtrace.h> +#include <sys/cmn_err.h> +#include <sys/debug.h> +#include <sys/thread.h> +#include <sys/dumphdr.h> +#include <sys/bitmap.h> +#include <sys/lgrp.h> + +#include <vm/seg_kmem.h> +#include <vm/hat.h> +#include <vm/as.h> +#include <vm/seg.h> +#include <vm/seg_kpm.h> +#include <vm/seg_map.h> +#include <vm/page.h> +#include <vm/pvn.h> +#include <vm/rm.h> +#include <vm/vpm.h> + +/* + * Needs to be enabled by each platform. + */ +int vpm_enable = 0; + +#ifdef SEGKPM_SUPPORT + + +int vpm_cache_enable = 1; +long vpm_cache_percent = 12; +long vpm_cache_size; +int vpm_nfreelist = 0; +int vpmd_freemsk = 0; + +#define VPM_S_PAD 64 +union vpm_cpu { + struct { + int vcpu_free_ndx; + ulong_t vcpu_hits; + ulong_t vcpu_misses; + } vcpu; + char vpm_pad[VPM_S_PAD]; +}; +static union vpm_cpu *vpmd_cpu; + +#define vfree_ndx vcpu.vcpu_free_ndx + +int vpm_cachemode = VPMCACHE_LRU; + +#define PPMTX(pp) (&(pp)->p_ilock) + +static struct vpmap *vpmd_vpmap; /* list of vpmap structs preallocated */ +static struct vpmfree *vpmd_free; +#define VPMAPMTX(vpm) (&vpm->vpm_mtx) +#define VPMAP2VMF(vpm) (&vpmd_free[(vpm - vpmd_vpmap) & vpmd_freemsk]) +#define VPMAP2VMF_NDX(vpm) (ushort_t)((vpm - vpmd_vpmap) & vpmd_freemsk) +#define VPMP(id) (&vpmd_vpmap[id - 1]) +#define VPMID(vpm) (uint_t)((vpm - vpmd_vpmap) + 1) + + +#ifdef DEBUG + +struct vpm_debug { + int vpmd_steals; + int vpmd_contend; + int vpmd_prevpagelocked; + int vpmd_getpagefailed; + int vpmd_zerostart; + int vpmd_emptyfreelist; + int vpmd_nofreevpms; +} vpm_debug; + +#define VPM_DEBUG(x) ((vpm_debug.x)++) + +int steals; +int steals_mtbf = 7; +int contend; +int contend_mtbf = 127; + +#define VPM_MTBF(v, f) (((++(v)) & (f)) != (f)) + +#else /* DEBUG */ + +#define VPM_MTBF(v, f) (1) +#define VPM_DEBUG(x) /* nothing */ + +#endif + +/* + * The vpm cache. + * + * The main purpose of having a cache here is to speed up page_lookup() + * operations and also provide an LRU(default) behaviour of file pages. The + * page_lookup() operation tends to be expensive if a page has to be + * reclaimed from the system page cache("cachelist"). Once we speed up the + * page_lookup()->page_reclaim() path then there there should be no need for + * this cache. The system page cache(cachelist) should effectively serve the + * purpose of caching file pages. + * + * This cache is very similar to segmap's smap cache. Each page in the + * cache is tracked by the structure vpmap_t. But unlike segmap, there is no + * hash table. The page_t has a reference to the vpmap_t when cached. For a + * given vnode, offset the page is found by means of a page_lookup() operation. + * Any page which has a mapping(i.e when cached) will not be in the + * system 'cachelist'. Hence the page_lookup() will not have to do a + * page_reclaim(). That is how the cache serves to speed up page_lookup() + * operations. + * + * This cache can be disabled by setting vpm_cache_enable = 0 in /etc/system. + */ + +void +vpm_init() +{ + long npages; + struct vpmap *vpm; + struct vpmfree *vpmflp; + int i, ndx; + extern void prefetch_smap_w(void *); + + if (!vpm_cache_enable) { + return; + } + + /* + * Set the size of the cache. + */ + vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100); + if (vpm_cache_size < VPMAP_MINCACHE) { + vpm_cache_size = VPMAP_MINCACHE; + } + + /* + * Number of freelists. + */ + if (vpm_nfreelist == 0) { + vpm_nfreelist = max_ncpus; + } else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) { + cmn_err(CE_WARN, "vpmap create : number of freelist " + "vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus); + vpm_nfreelist = 2 * max_ncpus; + } + + /* + * Round it up to the next power of 2 + */ + if (vpm_nfreelist & (vpm_nfreelist - 1)) { + vpm_nfreelist = 1 << (highbit(vpm_nfreelist)); + } + vpmd_freemsk = vpm_nfreelist - 1; + + /* + * Use a per cpu rotor index to spread the allocations evenly + * across the available vpm freelists. + */ + vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP); + ndx = 0; + for (i = 0; i < max_ncpus; i++) { + + vpmd_cpu[i].vfree_ndx = ndx; + ndx = (ndx + 1) & vpmd_freemsk; + } + + /* + * Allocate and initialize the freelist. + */ + vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree), + KM_SLEEP); + for (i = 0; i < vpm_nfreelist; i++) { + + vpmflp = &vpmd_free[i]; + /* + * Set up initial queue pointers. They will get flipped + * back and forth. + */ + vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ]; + vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ]; + } + + npages = mmu_btop(vpm_cache_size); + + + /* + * Allocate and initialize the vpmap structs. + */ + vpmd_vpmap = kmem_zalloc(sizeof (struct vpmap) * npages, KM_SLEEP); + for (vpm = vpmd_vpmap; vpm <= &vpmd_vpmap[npages - 1]; vpm++) { + struct vpmfree *vpmflp; + union vpm_freeq *releq; + struct vpmap *vpmapf; + + /* + * Use prefetch as we have to walk thru a large number of + * these data structures. We just use the smap's prefetch + * routine as it does the same. This should work fine + * for x64(this needs to be modifed when enabled on sparc). + */ + prefetch_smap_w((void *)vpm); + + vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm); + + vpmflp = VPMAP2VMF(vpm); + releq = vpmflp->vpm_releq; + + vpmapf = releq->vpmq_free; + if (vpmapf == NULL) { + releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; + } else { + vpm->vpm_next = vpmapf; + vpm->vpm_prev = vpmapf->vpm_prev; + vpmapf->vpm_prev = vpm; + vpm->vpm_prev->vpm_next = vpm; + releq->vpmq_free = vpm->vpm_next; + } + + /* + * Indicate that the vpmap is on the releq at start + */ + vpm->vpm_ndxflg = VPMRELEQ; + } +} + + +/* + * unhooks vpm from the freelist if it is still on the freelist. + */ +#define VPMAP_RMFREELIST(vpm) \ + { \ + if (vpm->vpm_next != NULL) { \ + union vpm_freeq *freeq; \ + struct vpmfree *vpmflp; \ + vpmflp = &vpmd_free[vpm->vpm_free_ndx]; \ + freeq = &vpmflp->vpm_freeq[vpm->vpm_ndxflg]; \ + mutex_enter(&freeq->vpmq_mtx); \ + if (freeq->vpmq_free != vpm) { \ + vpm->vpm_prev->vpm_next = vpm->vpm_next; \ + vpm->vpm_next->vpm_prev = vpm->vpm_prev; \ + } else if (vpm == vpm->vpm_next) { \ + freeq->vpmq_free = NULL; \ + } else { \ + freeq->vpmq_free = vpm->vpm_next; \ + vpm->vpm_prev->vpm_next = vpm->vpm_next; \ + vpm->vpm_next->vpm_prev = vpm->vpm_prev; \ + } \ + mutex_exit(&freeq->vpmq_mtx); \ + vpm->vpm_next = vpm->vpm_prev = NULL; \ + } \ + } + +static int +get_freelndx(int mode) +{ + int ndx; + + ndx = vpmd_cpu[CPU->cpu_seqid].vfree_ndx & vpmd_freemsk; + switch (mode) { + + case VPMCACHE_LRU: + default: + vpmd_cpu[CPU->cpu_seqid].vfree_ndx++; + break; + } + return (ndx); +} + + +/* + * Find one vpmap structure from the free lists and use it for the newpage. + * The previous page it cached is dissociated and released. The page_t's + * p_vpmref is cleared only when the vpm it is pointing to is locked(or + * for AMD64 when the page is exclusively locked in page_unload. That is + * because the p_vpmref is treated as mapping). + * + * The page's p_vpmref is set when the page is + * locked(at least SHARED locked). + */ +static struct vpmap * +get_free_vpmap(page_t *newpage) +{ + struct vpmfree *vpmflp; + kmutex_t *vmtx; + struct vpmap *vpm, *first; + union vpm_freeq *allocq, *releq; + page_t *pp = NULL; + int end_ndx, page_locked = 0; + int free_ndx; + + /* + * get the freelist bin index. + */ + free_ndx = get_freelndx(vpm_cachemode); + + end_ndx = free_ndx; + vpmflp = &vpmd_free[free_ndx]; + +retry_queue: + allocq = vpmflp->vpm_allocq; + mutex_enter(&allocq->vpmq_mtx); + + if ((vpm = allocq->vpmq_free) == NULL) { + +skip_queue: + /* + * The alloc list is empty or this queue is being skipped; + * first see if the allocq toggled. + */ + if (vpmflp->vpm_allocq != allocq) { + /* queue changed */ + mutex_exit(&allocq->vpmq_mtx); + goto retry_queue; + } + releq = vpmflp->vpm_releq; + if (!mutex_tryenter(&releq->vpmq_mtx)) { + /* cannot get releq; a free vpmap may be there now */ + mutex_exit(&allocq->vpmq_mtx); + + /* + * This loop could spin forever if this thread has + * higher priority than the thread that is holding + * releq->vpmq_mtx. In order to force the other thread + * to run, we'll lock/unlock the mutex which is safe + * since we just unlocked the allocq mutex. + */ + mutex_enter(&releq->vpmq_mtx); + mutex_exit(&releq->vpmq_mtx); + goto retry_queue; + } + if (releq->vpmq_free == NULL) { + VPM_DEBUG(vpmd_emptyfreelist); + /* + * This freelist is empty. + * This should not happen unless clients + * are failing to release the vpmap after + * accessing the data. Before resorting + * to sleeping, try the next list of the same color. + */ + free_ndx = (free_ndx + 1) & vpmd_freemsk; + if (free_ndx != end_ndx) { + mutex_exit(&releq->vpmq_mtx); + mutex_exit(&allocq->vpmq_mtx); + vpmflp = &vpmd_free[free_ndx]; + goto retry_queue; + } + /* + * Tried all freelists. + * wait on this list and hope something gets freed. + */ + vpmflp->vpm_want++; + mutex_exit(&vpmflp->vpm_freeq[1].vpmq_mtx); + cv_wait(&vpmflp->vpm_free_cv, + &vpmflp->vpm_freeq[0].vpmq_mtx); + vpmflp->vpm_want--; + mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx); + vpmflp = &vpmd_free[free_ndx]; + VPM_DEBUG(vpmd_nofreevpms); + goto retry_queue; + } else { + /* + * Something on the rele queue; flip the alloc + * and rele queues and retry. + */ + vpmflp->vpm_allocq = releq; + vpmflp->vpm_releq = allocq; + mutex_exit(&allocq->vpmq_mtx); + mutex_exit(&releq->vpmq_mtx); + if (page_locked) { + delay(hz >> 2); + page_locked = 0; + } + goto retry_queue; + } + } else { + int gotnewvpm; + kmutex_t *pmtx; + uint_t vpmref; + + /* + * Fastpath the case we get the vpmap mutex + * on the first try. + */ + first = vpm; +next_vpmap: + vmtx = VPMAPMTX(vpm); + if (!mutex_tryenter(vmtx)) { + /* + * Another thread is trying to reclaim this slot. + * Skip to the next queue or vpmap. + */ + if ((vpm = vpm->vpm_next) == first) { + goto skip_queue; + } else { + goto next_vpmap; + } + } + + /* + * Assign this vpm to the newpage. + */ + pmtx = PPMTX(newpage); + gotnewvpm = 0; + mutex_enter(pmtx); + + /* + * Check if some other thread already assigned a vpm to + * this page. + */ + if ((vpmref = newpage->p_vpmref) == 0) { + newpage->p_vpmref = VPMID(vpm); + gotnewvpm = 1; + } else { + VPM_DEBUG(vpmd_contend); + mutex_exit(vmtx); + } + mutex_exit(pmtx); + + if (gotnewvpm) { + + /* + * At this point, we've selected the vpm. Remove vpm + * from its freelist. If vpm is the first one in + * the freelist, update the head of the freelist. + */ + if (first == vpm) { + ASSERT(first == allocq->vpmq_free); + allocq->vpmq_free = vpm->vpm_next; + } + + /* + * If the head of the freelist still points to vpm, + * then there are no more free vpmaps in that list. + */ + if (allocq->vpmq_free == vpm) + /* + * Took the last one + */ + allocq->vpmq_free = NULL; + else { + vpm->vpm_prev->vpm_next = vpm->vpm_next; + vpm->vpm_next->vpm_prev = vpm->vpm_prev; + } + mutex_exit(&allocq->vpmq_mtx); + vpm->vpm_prev = vpm->vpm_next = NULL; + + /* + * Disassociate the previous page. On x64 systems + * p_vpmref is used as a mapping reference to the page. + */ + if ((pp = vpm->vpm_pp) != NULL && + vpm->vpm_vp == pp->p_vnode && + vpm->vpm_off == pp->p_offset) { + + pmtx = PPMTX(pp); + if (page_trylock(pp, SE_SHARED)) { + /* + * Now verify that it is the correct + * page. If not someone else stole it, + * so just unlock it and leave. + */ + mutex_enter(pmtx); + if (PP_ISFREE(pp) || + vpm->vpm_vp != pp->p_vnode || + vpm->vpm_off != pp->p_offset || + pp->p_vpmref != VPMID(vpm)) { + mutex_exit(pmtx); + + page_unlock(pp); + } else { + /* + * Release the page. + */ + pp->p_vpmref = 0; + mutex_exit(pmtx); + hat_kpm_mapout(pp, 0, + hat_kpm_page2va(pp, 1)); + (void) page_release(pp, 1); + } + } else { + /* + * If the page cannot be locked, just + * clear the p_vpmref and go. + */ + mutex_enter(pmtx); + if (pp->p_vpmref == VPMID(vpm)) { + pp->p_vpmref = 0; + } + mutex_exit(pmtx); + VPM_DEBUG(vpmd_prevpagelocked); + } + } + + /* + * Setup vpm to point to the new page. + */ + vpm->vpm_pp = newpage; + vpm->vpm_vp = newpage->p_vnode; + vpm->vpm_off = newpage->p_offset; + + } else { + int steal = !VPM_MTBF(steals, steals_mtbf); + /* + * Page already has a vpm assigned just use that. + * Grab the vpm mutex and verify that it is still + * the correct one. The pp->p_vpmref should not change + * once we have the vpm mutex and the page lock. + */ + mutex_exit(&allocq->vpmq_mtx); + vpm = VPMP(vpmref); + vmtx = VPMAPMTX(vpm); + mutex_enter(vmtx); + if ((steal && vpm->vpm_refcnt == 0) || + vpm->vpm_pp != newpage) { + /* + * The vpm got stolen, retry. + * clear the p_vpmref. + */ + pmtx = PPMTX(newpage); + mutex_enter(pmtx); + if (newpage->p_vpmref == vpmref) { + newpage->p_vpmref = 0; + } + mutex_exit(pmtx); + + mutex_exit(vmtx); + VPM_DEBUG(vpmd_steals); + goto retry_queue; + } else if (vpm->vpm_refcnt == 0) { + /* + * Remove it from the free list if it + * exists there. + */ + VPMAP_RMFREELIST(vpm); + } + } + return (vpm); + } +} + +static void +free_vpmap(struct vpmap *vpm) +{ + struct vpmfree *vpmflp; + struct vpmap *vpmfreelist; + union vpm_freeq *releq; + + ASSERT(MUTEX_HELD(VPMAPMTX(vpm))); + + if (vpm->vpm_refcnt != 0) { + panic("free_vpmap"); + /*NOTREACHED*/ + } + + vpmflp = &vpmd_free[vpm->vpm_free_ndx]; + /* + * Add to the tail of the release queue + * Note that vpm_releq and vpm_allocq could toggle + * before we get the lock. This does not affect + * correctness as the 2 queues are only maintained + * to reduce lock pressure. + */ + releq = vpmflp->vpm_releq; + if (releq == &vpmflp->vpm_freeq[0]) { + vpm->vpm_ndxflg = 0; + } else { + vpm->vpm_ndxflg = 1; + } + mutex_enter(&releq->vpmq_mtx); + vpmfreelist = releq->vpmq_free; + if (vpmfreelist == 0) { + int want; + + releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; + /* + * Both queue mutexes are held to set vpm_want; + * snapshot the value before dropping releq mutex. + * If vpm_want appears after the releq mutex is dropped, + * then the vpmap just freed is already gone. + */ + want = vpmflp->vpm_want; + mutex_exit(&releq->vpmq_mtx); + /* + * See if there was a waiter before dropping the releq mutex + * then recheck after obtaining vpm_freeq[0] mutex as + * the another thread may have already signaled. + */ + if (want) { + mutex_enter(&vpmflp->vpm_freeq[0].vpmq_mtx); + if (vpmflp->vpm_want) + cv_signal(&vpmflp->vpm_free_cv); + mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx); + } + } else { + vpm->vpm_next = vpmfreelist; + vpm->vpm_prev = vpmfreelist->vpm_prev; + vpmfreelist->vpm_prev = vpm; + vpm->vpm_prev->vpm_next = vpm; + mutex_exit(&releq->vpmq_mtx); + } +} + +/* + * Get the vpmap for the page. + * The refcnt of this vpm is incremented. + */ +static struct vpmap * +get_vpmap(page_t *pp) +{ + struct vpmap *vpm = NULL; + kmutex_t *vmtx; + kmutex_t *pmtx; + unsigned int refid; + + ASSERT((pp != NULL) && PAGE_LOCKED(pp)); + + if (VPM_MTBF(contend, contend_mtbf) && (refid = pp->p_vpmref) != 0) { + vpm = VPMP(refid); + vmtx = VPMAPMTX(vpm); + mutex_enter(vmtx); + /* + * Since we have the page lock and the vpm mutex, the + * pp->p_vpmref cannot change. + */ + if (vpm->vpm_pp != pp) { + pmtx = PPMTX(pp); + + /* + * Clear the p_vpmref as it is incorrect. + * This can happen if the page was stolen. + * On x64 this should not happen as p_vpmref + * is treated as a mapping on the page. So + * if the page is stolen, the mapping would have + * been cleared in page_unload(). + */ + mutex_enter(pmtx); + if (pp->p_vpmref == refid) + pp->p_vpmref = 0; + mutex_exit(pmtx); + + mutex_exit(vmtx); + vpm = NULL; + } else if (vpm->vpm_refcnt == 0) { + /* + * Got the vpm, remove it from the free + * list if it exists there. + */ + VPMAP_RMFREELIST(vpm); + } + } + if (vpm == NULL) { + /* + * get_free_vpmap() returns with the vpmap mutex held. + */ + vpm = get_free_vpmap(pp); + vmtx = VPMAPMTX(vpm); + vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_misses++; + } else { + vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_hits++; + } + + vpm->vpm_refcnt++; + mutex_exit(vmtx); + + return (vpm); +} + +/* END --- vpm cache ---- */ + +/* + * The vnode page mapping(vpm) interface routines. + */ + +/* + * Find or create the pages starting form baseoff for specified + * length 'len'. + */ +static int +vpm_pagecreate( + struct vnode *vp, + u_offset_t baseoff, + size_t len, + vmap_t vml[], + int nseg, + int *newpage) +{ + + page_t *pp = NULL; + caddr_t base; + u_offset_t off = baseoff; + int i; + ASSERT(nseg >= MINVMAPS && nseg < MAXVMAPS); + + for (i = 0; len > 0; len -= MIN(len, PAGESIZE), i++) { + struct vpmap *vpm; + + + if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { + + base = segkpm_create_va(off); + + /* + * the seg pointer passed in is just advisor. Just + * pass segkmap for now like segmap does with + * segmap_kpm enabled. + */ + if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, + segkmap, base)) == NULL) { + panic("segmap_pagecreate_vpm: " + "page_create failed"); + /*NOTREACHED*/ + } + if (newpage != NULL) + *newpage = 1; + + page_io_unlock(pp); + } + + /* + * Get the vpm for this page_t. + */ + if (vpm_cache_enable) { + vpm = get_vpmap(pp); + vml[i].vs_data = (void *)&vpm->vpm_pp; + } else { + vml[i].vs_data = (void *)pp; + pp->p_vpmref = 0; + } + + vml[i].vs_addr = hat_kpm_mapin(pp, 0); + vml[i].vs_len = PAGESIZE; + + off += PAGESIZE; + } + vml[i].vs_data = NULL; + vml[i].vs_addr = (caddr_t)NULL; + return (0); +} + + +/* + * Returns vpm mappings of pages in the range [off, off+len], where + * len is rounded up to the PAGESIZE boundary. The list of pages and + * the page addresses are returned in the SGL vml (vmap_t) array passed in. + * The nseg is the number of vmap_t entries in the array. + * + * Currently max len allowed is MAXBSIZE therefore, it will either + * fetch/create one or two pages depending on what is the PAGESIZE. + * + * The segmap's SM_LOCKPROTO usage is not supported by these interfaces. + * For such cases, use the seg_map interfaces. + */ +int +vpm_map_pages( + struct vnode *vp, + u_offset_t off, + size_t len, + int fetchpage, + vmap_t *vml, + int nseg, + int *newpage, + enum seg_rw rw) +{ + extern struct vnode *common_specvp(); + u_offset_t baseoff; + uint_t prot; + caddr_t base; + page_t *pp, *pplist[MAXVMAPS]; + struct vpmap *vpm; + int i, error = 0; + + ASSERT(nseg >= MINVMAPS && nseg < MAXVMAPS); + baseoff = off & (offset_t)PAGEMASK; + vml[0].vs_data = NULL; + vml[0].vs_addr = (caddr_t)NULL; + /* + * For now, lets restrict it to MAXBSIZE. XXX - We can allow + * len longer then MAXBSIZE, but there should be a limit + * which should be determined by how many pages the VOP_GETPAGE() + * can fetch. + */ + if (off + len > baseoff + MAXBSIZE) { + panic("vpm_map_pages bad len"); + /*NOTREACHED*/ + } + + /* + * If this is a block device we have to be sure to use the + * "common" block device vnode for the mapping. + */ + if (vp->v_type == VBLK) + vp = common_specvp(vp); + + + if (!fetchpage) + return (vpm_pagecreate(vp, baseoff, len, vml, nseg, newpage)); + + for (i = 0; len > 0; len -= MIN(len, PAGESIZE), i++, + pplist[i] = NULL) { + + pp = page_lookup(vp, baseoff, SE_SHARED); + + /* + * If we did not find the page or if this page was not + * in our cache, then let VOP_GETPAGE get all the pages. + * We need to call VOP_GETPAGE so that filesytems can do some + * (un)necessary tracking for sequential access. + */ + + if (pp == NULL || (vpm_cache_enable && pp->p_vpmref == 0) || + (rw == S_WRITE && hat_page_getattr(pp, P_MOD | P_REF) + != (P_MOD | P_REF))) { + if (pp != NULL) { + page_unlock(pp); + } + + /* + * Pass a dummy address as it will be required + * by page_create_va(). We pass segkmap as the seg + * as some file systems(UFS) check it. + */ + base = segkpm_create_va(baseoff); + + error = VOP_GETPAGE(vp, baseoff, len, &prot, &pplist[i], + roundup(len, PAGESIZE), segkmap, base, rw, CRED()); + if (error) { + VPM_DEBUG(vpmd_getpagefailed); + pplist[i] = NULL; + } + break; + } else { + pplist[i] = pp; + baseoff += PAGESIZE; + } + } + + if (error) { + for (i = 0; pplist[i] != NULL; i++) { + page_unlock(pplist[i]); + pplist[i] = NULL; + } + vml[0].vs_addr = NULL; + vml[0].vs_data = NULL; + return (FC_MAKE_ERR(error)); + } + + /* + * Get the vpm's for pages. + */ + for (i = 0; pplist[i] != NULL; i++) { + if (vpm_cache_enable) { + vpm = get_vpmap(pplist[i]); + vml[i].vs_data = (void *)&(vpm->vpm_pp); + } else { + vml[i].vs_data = (void *)pplist[i]; + pplist[i]->p_vpmref = 0; + } + + vml[i].vs_addr = hat_kpm_mapin(pplist[i], 0); + vml[i].vs_len = PAGESIZE; + } + + vml[i].vs_data = NULL; + vml[i].vs_addr = (caddr_t)NULL; + + return (0); +} + +/* + * Release the vpm mappings on the pages and unlock them. + */ +void +vpm_unmap_pages(vmap_t vml[], enum seg_rw rw) +{ + int i; + struct vpmap *vpm; + kmutex_t *mtx; + page_t *pp; + + for (i = 0; vml[i].vs_data != NULL; i++) { + ASSERT(IS_KPM_ADDR(vml[i].vs_addr)); + + if (vpm_cache_enable) { + pp = *(((page_t **)vml[i].vs_data)); + } else { + pp = (page_t *)vml[i].vs_data; + } + + /* + * Mark page as being modified or referenced, bacause vpm pages + * would not cause faults where it would be set normally. + */ + if (rw == S_WRITE) { + hat_setrefmod(pp); + } else { + ASSERT(rw == S_READ); + hat_setref(pp); + } + + if (vpm_cache_enable) { + page_unlock(pp); + vpm = (struct vpmap *)((char *)vml[i].vs_data + - offsetof(struct vpmap, vpm_pp)); + mtx = VPMAPMTX(vpm); + mutex_enter(mtx); + + if (--vpm->vpm_refcnt == 0) { + free_vpmap(vpm); + } + mutex_exit(mtx); + } else { + hat_kpm_mapout(pp, 0, vml[i].vs_addr); + (void) page_release(pp, 1); + } + vml[i].vs_data = NULL; + vml[i].vs_addr = NULL; + } +} + +/* + * Given the vp, off and the uio structure, this routine will do the + * the copy (uiomove). If the last page created is partially written, + * the rest of the page is zeroed out. It also zeros the beginning of + * the first page till the start offset if requested(zerostart). + * If pages are to be fetched, it will call the filesystem's getpage + * function (VOP_GETPAGE) to get them, otherwise they will be created if + * not already present in the page cache. + */ +int +vpm_data_copy(struct vnode *vp, + u_offset_t off, + size_t len, + struct uio *uio, + int fetchpage, + int *newpage, + int zerostart, + enum seg_rw rw) +{ + int error; + struct vmap vml[MINVMAPS]; + enum uio_rw uiorw; + int npages = 0; + + uiorw = (rw == S_WRITE) ? UIO_WRITE : UIO_READ; + /* + * 'off' will be the offset where the I/O starts. + * We get the pages starting at the (off & PAGEMASK) + * page boundary. + */ + error = vpm_map_pages(vp, off, (uint_t)len, + fetchpage, vml, MINVMAPS, &npages, rw); + + if (newpage != NULL) + *newpage = npages; + if (!error) { + int i, pn, slen = len; + int pon = off & PAGEOFFSET; + + /* + * Clear from the beginning of the page to start offset + * if requested. + */ + if (!fetchpage && zerostart) { + (void) kzero(vml[0].vs_addr, (uint_t)pon); + VPM_DEBUG(vpmd_zerostart); + } + + for (i = 0; !error && slen > 0 && + vml[i].vs_addr != NULL; i++) { + pn = (int)MIN(slen, (PAGESIZE - pon)); + error = uiomove(vml[i].vs_addr + pon, + (long)pn, uiorw, uio); + slen -= pn; + pon = 0; + } + + /* + * When new pages are created, zero out part of the + * page we did not copy to. + */ + if (!fetchpage && npages && + uio->uio_loffset < roundup(off + len, PAGESIZE)) { + int nzero; + + pon = (uio->uio_loffset & PAGEOFFSET); + nzero = PAGESIZE - pon; + i = (uio->uio_loffset - (off & PAGEMASK)) / PAGESIZE; + (void) kzero(vml[i].vs_addr + pon, (uint_t)nzero); + } + vpm_unmap_pages(vml, rw); + } + return (error); +} + +/* + * called to flush pages for the given vnode covering + * [off, off+len] range. + */ +int +vpm_sync_pages(struct vnode *vp, + u_offset_t off, + size_t len, + uint_t flags) +{ + extern struct vnode *common_specvp(); + int bflags = 0; + int error = 0; + size_t psize = roundup(len, PAGESIZE); + + /* + * If this is a block device we have to be sure to use the + * "common" block device vnode for the mapping. + */ + if (vp->v_type == VBLK) + vp = common_specvp(vp); + + if ((flags & ~SM_DONTNEED) != 0) { + if (flags & SM_ASYNC) + bflags |= B_ASYNC; + if (flags & SM_INVAL) + bflags |= B_INVAL; + if (flags & SM_DESTROY) + bflags |= (B_INVAL|B_TRUNC); + if (flags & SM_FREE) + bflags |= B_FREE; + if (flags & SM_DONTNEED) + bflags |= B_DONTNEED; + + error = VOP_PUTPAGE(vp, off, psize, bflags, CRED()); + } + + return (error); +} + + +#else /* SEGKPM_SUPPORT */ + +/* vpm stubs */ +void +vpm_init() +{ +} + +/*ARGSUSED*/ +int +vpm_pagecreate( + struct vnode *vp, + u_offset_t baseoff, + size_t len, + vmap_t vml[], + int nseg, + int *newpage) +{ + return (0); +} + +/*ARGSUSED*/ +int +vpm_map_pages( + struct vnode *vp, + u_offset_t off, + size_t len, + int fetchpage, + vmap_t vml[], + int nseg, + int *newpage, + enum seg_rw rw) +{ + return (0); +} + +/*ARGSUSED*/ +int +vpm_data_copy(struct vnode *vp, + u_offset_t off, + size_t len, + struct uio *uio, + int fetchpage, + int *newpage, + int zerostart, + enum seg_rw rw) +{ + return (0); +} + +/*ARGSUSED*/ +void +vpm_unmap_pages(vmap_t vml[], enum seg_rw rw) +{ +} +/*ARGSUSED*/ +int +vpm_sync_pages(struct vnode *vp, + u_offset_t off, + size_t len, + uint_t flags) +{ + return (0); +} +#endif /* SEGKPM_SUPPORT */ diff --git a/usr/src/uts/common/vm/vpm.h b/usr/src/uts/common/vm/vpm.h new file mode 100644 index 0000000000..6d9c53b009 --- /dev/null +++ b/usr/src/uts/common/vm/vpm.h @@ -0,0 +1,286 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _VM_VPM_H +#define _VM_VPM_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The vnode page mappings(VPM) interfaces. + * "Commitment level - Consolidation private". They are subject + * to change without notice. Use them at your own risk. + * + * At this stage these interfaces are provided only to utilize the + * segkpm mappings and are enabled for solaris x64. Therefore these + * interfaces have to be used under the 'vpm_enable' check as an + * alternative to segmap interfaces where applicable. + * + * The VPM interfaces provide temporary mappings to file pages. They + * return the mappings in a scatter gather list(SGL). + * The SGL elements are the structure 'vmap_t'. + * + * typedef struct vmap { + * caddr_t vs_addr; / public / + * size_t vs_len; / public - Currently not used / + * void *vs_data; / opaque - private data / + * } vmap_t; + * + * An array of this structure has to be passed to the interface routines + * along with the size(# of elements) of the SGL array. Depending on the + * requested length and mapped chunk sizes(PAGESIZE here), the number of + * valid mappings returned can be less then actual size of the SGL array. + * Always, an element in the SGL will have 'vs_addr' set to NULL which + * marks the end of the valid entires in the SGL. + * + * The vmap_t structure members are populated with the mapped address + * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the + * mapping length is fixed at PAGESIZE. The 'vs_data' member is private + * and the caller should not access or modify it. + * + * Using a scatter gather list to return the mappings and length makes it + * possible to provide mappings of variable length. Currently mapping length + * of only 'PAGESIZE' per vmap_t is possible. Also, similar to the segmap + * interfaces, on each request, the max length of 'MAXBSIZE' is supported + * for now. The MAXBSIZE mappings will be returned in 1 or 2 vmap_t elements + * of the SGL depending on the PAGESIZE. The scatter gather list array size + * needs to be a minimum of MINVMAPS elements to accommodate MAXBSIZE. + * The MAXBSIZE restriction exists because the filesystems are not capable + * of handling more(disk block allocations at a time) for now. + * + * + * Interfaces: + * + * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len, + * int fetchpage, vmap_t *vml, int vmlsz, + * int *newpagecreated, enum seg_rw rw); + * + * This function returns mappings to vnode pages. + * + * It takes a vnode, offset and length and returns mappings to the pages + * covering the range [off, off +len) in the vmap_t SGL array 'vml'. + * Currently these interfaces are subject to restrictions similar to the segmap + * interfaces. The length passed in should satisfy the following criteria. + * '(off + len) <= ((off & PAGEMASK) + MAXBSIZE)' + * The mapped address returned, in 'vs_addr', are for the page boundary. + * + * The 'vmlsz' is the size(# elements) of the 'vml' array. + * + * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched + * (calls VOP_GETPAGE) from the backing store(disk) if not found in the + * system page cache. If 'fetchpage == 0', the vnode(file) pages for the + * given offset will be just created if they are not already present in the + * system page cache. The 'newpagecreated' flag is set on return if new pages + * are created when 'fetchpage == 0'(requested to just create new pages). + * + * The 'seg_rw rw' indicates the intended operation on these mappings + * (S_WRITE or S_READ). + * + * Currently these interfaces only return segkpm mappings. Therefore the + * vnode pages that are being accessed will be locked(at least SHARED locked) + * for the duration these mappings are in use. After use, the unmap + * function, vpm_unmap_pages(), has to be called and the same SGL array + * needs to be passed to the unmap function. + * + * + * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);. + * + * This function unmaps the pages that where mapped by vpm_map_pages. + * The SGL array 'vml' has to be the one that was passed to vpm_map_pages(). + * + * + * ex: + * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer + * 'buf' the following code snippet shows how to use the above two interfaces. + * Here the the copy length is till the MAXBSIZE boundary. This code can be + * executed repeatedly, in a loop to copy more then MAXBSIZE length of data. + * + * vmap_t vml[MINVMAPS]; + * int err, i, newpage, len; + * int pon; + * + * pon = (off & PAGEOFFSET); + * len = MAXBSIZE - pon; + * + * if (vpm_enable) { + * err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS, + * &newpage, S_WRITE); + * + * if (err) + * return; + * + * for (i=0; vml[i].vs_addr != NULL); i++) { + * bcopy (buf, vml[i].vs_addr + pon, + * PAGESIZE - pon); + * buf += (PAGESIZE - pon); + * pon = 0; + * } + * + * if (newpage) { + * pon = (off & PAGEOFFSET); + * bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon); + * } + * + * vpm_unmap_pages(vml, S_WRITE); + * } + * + * + * + * + * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len, + * struct uio *uio, int fetchpage, int *newpagecreated, + * int zerostart, enum seg_rw rw); + * + * This function can be called if the need is to just transfer data to/from + * the vnode pages. It takes a 'uio' structure and calls 'uiomove()' to + * do the data transfer. It can be used in the context of read and write + * system calls to transfer data between a user buffer, which is specified + * in the uio structure, and the vnode pages. If the data needs to be + * transferred between a kernel buffer and the pages, like in the above + * example, a uio structure can be set up accordingly and passed. The 'rw' + * parameter will determine the direction of the data transfer. + * + * The 'fetchpage' and 'newpagecreated' are same as explained before. + * The 'zerostart' flag when set will zero fill start of the page till the + * offset 'off' in the first page. i.e from 'off & PAGEMASK' to 'off'. + * Here too the MAXBSIZE restriction mentioned above applies to the length + * requested. + * + * + * int vpm_sync_pages(struct vnode *vp, u_offset_t off, + * size_t len, uint_t flags) + * + * This function can be called to flush or sync the vnode(file) pages that + * have been accessed. It will call VOP_PUTPAGE(). + * + * For the given vnode, off and len the pages covering the range + * [off, off + len) are flushed. Currently it uses the same flags that + * are used with segmap_release() interface. Refer vm/seg_map.h. + * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY) + * + */ + + +/* + * vpm cache related definitions. + */ +#define VPMAP_MINCACHE (64 * 1024 * 1024) + +/* + * vpm caching mode + */ +#define VPMCACHE_LRU 0 +#define VPMCACHE_RANDOM 1 +/* + * Data structures to manage the cache of pages referenced by + * the vpm interfaces. There is one vpmap struct per page in the cache. + */ +struct vpmap { + kmutex_t vpm_mtx; /* protects non list fields */ + struct vnode *vpm_vp; /* pointer to vnode of cached page */ + struct vpmap *vpm_next; /* free list pointers */ + struct vpmap *vpm_prev; + u_offset_t vpm_off; /* offset of the page */ + page_t *vpm_pp; /* page pointer */ + ushort_t vpm_refcnt; /* Number active references */ + ushort_t vpm_ndxflg; /* indicates which queue */ + ushort_t vpm_free_ndx; /* freelist it belongs to */ +}; + +/* + * Multiple vpmap free lists are maintaned so that allocations + * scale with cpu count. To further reduce contentions between + * allocation and deallocations, each list is made up of two queues. + */ +#define VPM_FREEQ_PAD 64 +union vpm_freeq { + struct { + struct vpmap *vpmsq_free; + kmutex_t vpmsq_mtx; + } vpmfq; + char vpmq_pad[VPM_FREEQ_PAD]; +}; + +#define vpmq_free vpmfq.vpmsq_free +#define vpmq_mtx vpmfq.vpmsq_mtx + +struct vpmfree { + union vpm_freeq vpm_freeq[2]; /* alloc and release queue */ + union vpm_freeq *vpm_allocq; /* current alloc queue */ + union vpm_freeq *vpm_releq; /* current release queue */ + kcondvar_t vpm_free_cv; + ushort_t vpm_want; +}; + +#define VPMALLOCQ 0 +#define VPMRELEQ 1 + +/* + * VPM Interface definitions. + */ + +/* + * This structure is the scatter gather list element. The page + * mappings will be returned in this structure. A pointer to an + * array of this structure is passed to the interface routines. + */ +typedef struct vmap { + caddr_t vs_addr; /* mapped address */ + size_t vs_len; /* length, currently fixed at PAGESIZE */ + void *vs_data; /* opaque - private data */ +} vmap_t; + +/* + * The minimum and maximum number of array elements in the scatter + * gather list. + */ +#define MINVMAPS 3 /* ((MAXBSIZE/4096 + 1) min # mappings */ +#define MAXVMAPS 10 /* Max # the scatter gather list */ + +#ifdef _KERNEL + +extern int vpm_enable; +/* + * vpm page mapping operations. + */ +extern void vpm_init(void); +extern int vpm_map_pages(struct vnode *, u_offset_t, size_t, int, + vmap_t *, int, int *, enum seg_rw); + +extern void vpm_unmap_pages(vmap_t *, enum seg_rw); +extern int vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t); +extern int vpm_data_copy(struct vnode *, u_offset_t, size_t, + struct uio *, int, int *, int, enum seg_rw rw); +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _VM_VPM_H */ diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index b58cad94f8..8ac9c6ffec 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -1528,7 +1528,7 @@ startup_vm(void) * between kernelbase and the beginning of segkpm. */ kpm_vbase = final_kernelheap + KERNEL_REDZONE_SIZE; - kpm_size = mmu_ptob(physmax); + kpm_size = mmu_ptob(physmax + 1); PRM_DEBUG(kpm_vbase); PRM_DEBUG(kpm_size); final_kernelheap = @@ -1763,6 +1763,7 @@ startup_vm(void) if (kpm_desired) { kpm_init(); kpm_enable = 1; + vpm_enable = 1; } /* diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c index 89fc15e20e..4c06279917 100644 --- a/usr/src/uts/i86pc/vm/hat_i86.c +++ b/usr/src/uts/i86pc/vm/hat_i86.c @@ -3124,6 +3124,7 @@ hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry) return (hm); } +extern int vpm_enable; /* * Unload all translations to a page. If the page is a subpage of a large * page, the large page mappings are also removed. @@ -3142,6 +3143,14 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag) uint_t entry; level_t level; +#if defined(__amd64) + /* + * clear the vpm ref. + */ + if (vpm_enable) { + pp->p_vpmref = 0; + } +#endif /* * The loop with next_size handles pages with multiple pagesize mappings */ @@ -3488,6 +3497,11 @@ hat_page_getshare(page_t *pp) { uint_t cnt; cnt = hment_mapcnt(pp); +#if defined(__amd64) + if (vpm_enable && pp->p_vpmref) { + cnt += 1; + } +#endif return (cnt); } |