summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorpraks <none@none>2006-04-19 18:07:13 -0700
committerpraks <none@none>2006-04-19 18:07:13 -0700
commita5652762e5f7bf683d19f18542e5e39df63bad79 (patch)
tree113821225c18b190514811f3e27a638333bc2dcd /usr/src
parent4ab777b1b0f310e59b52a57c79efa0571506942a (diff)
downloadillumos-joyent-a5652762e5f7bf683d19f18542e5e39df63bad79.tar.gz
6256083 Need a lightweight file page mapping mechanism to substitute segmap
6387639 segkpm segment set to incorrect size for amd64
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/pkgdefs/SUNWhea/prototype_com1
-rw-r--r--usr/src/uts/common/Makefile.files1
-rw-r--r--usr/src/uts/common/fs/nfs/nfs3_vnops.c85
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_client.c36
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_vnops.c84
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_client.c35
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_vnops.c86
-rw-r--r--usr/src/uts/common/fs/specfs/specvnops.c75
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_vnops.c99
-rw-r--r--usr/src/uts/common/fs/ufs/ufs_vnops.c85
-rw-r--r--usr/src/uts/common/vm/Makefile11
-rw-r--r--usr/src/uts/common/vm/page.h11
-rw-r--r--usr/src/uts/common/vm/seg_map.c11
-rw-r--r--usr/src/uts/common/vm/seg_map.h8
-rw-r--r--usr/src/uts/common/vm/vpm.c1141
-rw-r--r--usr/src/uts/common/vm/vpm.h286
-rw-r--r--usr/src/uts/i86pc/os/startup.c3
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c14
18 files changed, 1888 insertions, 184 deletions
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com
index f265268418..4fec41a28f 100644
--- a/usr/src/pkgdefs/SUNWhea/prototype_com
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com
@@ -1230,6 +1230,7 @@ f none usr/include/vm/seg_spt.h 644 root bin
f none usr/include/vm/seg_vn.h 644 root bin
f none usr/include/vm/seg_kpm.h 644 root bin
f none usr/include/vm/vpage.h 644 root bin
+f none usr/include/vm/vpm.h 644 root bin
f none usr/include/volmgt.h 644 root bin
f none usr/include/wait.h 644 root bin
f none usr/include/wchar.h 644 root bin
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index b025f1d7c6..2504b4664c 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -135,6 +135,7 @@ GENUNIX_OBJS += \
flock.o \
fm.o \
fork.o \
+ vpm.o \
fsat.o \
fs_subr.o \
fsflush.o \
diff --git a/usr/src/uts/common/fs/nfs/nfs3_vnops.c b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
index a53d2270b5..3b54de5ea7 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -567,9 +566,18 @@ nfs3_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
if (diff < n)
n = (size_t)diff;
- base = segmap_getmapflt(segkmap, vp, off + on, n, 1, S_READ);
+ if (vpm_enable) {
+ /*
+ * Copy data.
+ */
+ error = vpm_data_copy(vp, off + on, n, uiop,
+ 1, NULL, 0, S_READ);
+ } else {
+ base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
+ S_READ);
- error = uiomove(base + on, n, UIO_READ, uiop);
+ error = uiomove(base + on, n, UIO_READ, uiop);
+ }
if (!error) {
/*
@@ -583,9 +591,18 @@ nfs3_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
else
flags = 0;
mutex_exit(&rp->r_statelock);
- error = segmap_release(segkmap, base, flags);
- } else
- (void) segmap_release(segkmap, base, 0);
+ if (vpm_enable) {
+ error = vpm_sync_pages(vp, off, n, flags);
+ } else {
+ error = segmap_release(segkmap, base, flags);
+ }
+ } else {
+ if (vpm_enable) {
+ (void) vpm_sync_pages(vp, off, n, 0);
+ } else {
+ (void) segmap_release(segkmap, base, 0);
+ }
+ }
} while (!error && uiop->uio_resid > 0);
return (error);
@@ -749,25 +766,35 @@ nfs3_fwrite:
cv_wait(&rp->r_cv, &rp->r_statelock);
mutex_exit(&rp->r_statelock);
- if (segmap_kpm) {
- int pon = uiop->uio_loffset & PAGEOFFSET;
- size_t pn = MIN(PAGESIZE - pon, uiop->uio_resid);
- int pagecreate;
+ if (vpm_enable) {
+ /*
+ * It will use kpm mappings, so no need to
+ * pass an address.
+ */
+ error = writerp(rp, NULL, n, uiop, 0);
+ } else {
+ if (segmap_kpm) {
+ int pon = uiop->uio_loffset & PAGEOFFSET;
+ size_t pn = MIN(PAGESIZE - pon,
+ uiop->uio_resid);
+ int pagecreate;
- mutex_enter(&rp->r_statelock);
- pagecreate = (pon == 0) && (pn == PAGESIZE ||
- uiop->uio_loffset + pn >= rp->r_size);
- mutex_exit(&rp->r_statelock);
+ mutex_enter(&rp->r_statelock);
+ pagecreate = (pon == 0) && (pn == PAGESIZE ||
+ uiop->uio_loffset + pn >= rp->r_size);
+ mutex_exit(&rp->r_statelock);
- base = segmap_getmapflt(segkmap, vp, off + on,
+ base = segmap_getmapflt(segkmap, vp, off + on,
pn, !pagecreate, S_WRITE);
- error = writerp(rp, base + pon, n, uiop, pagecreate);
+ error = writerp(rp, base + pon, n, uiop,
+ pagecreate);
- } else {
- base = segmap_getmapflt(segkmap, vp, off + on,
- n, 0, S_READ);
- error = writerp(rp, base + on, n, uiop, 0);
+ } else {
+ base = segmap_getmapflt(segkmap, vp, off + on,
+ n, 0, S_READ);
+ error = writerp(rp, base + on, n, uiop, 0);
+ }
}
if (!error) {
@@ -790,9 +817,17 @@ nfs3_fwrite:
flags &= ~SM_ASYNC;
flags |= SM_WRITE;
}
- error = segmap_release(segkmap, base, flags);
+ if (vpm_enable) {
+ error = vpm_sync_pages(vp, off, n, flags);
+ } else {
+ error = segmap_release(segkmap, base, flags);
+ }
} else {
- (void) segmap_release(segkmap, base, 0);
+ if (vpm_enable) {
+ (void) vpm_sync_pages(vp, off, n, 0);
+ } else {
+ (void) segmap_release(segkmap, base, 0);
+ }
/*
* In the event that we got an access error while
* faulting in a page for a write-only file just
diff --git a/usr/src/uts/common/fs/nfs/nfs4_client.c b/usr/src/uts/common/fs/nfs/nfs4_client.c
index c95a0cd347..81e01a543b 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_client.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_client.c
@@ -2185,10 +2185,13 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
u_offset_t offset;
int error;
int sm_error;
+ vnode_t *vp = RTOV(rp);
ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
- ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_WRITER));
+ if (!vpm_enable) {
+ ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
+ }
/*
* Move bytes in at most PAGESIZE chunks. We must avoid
@@ -2206,8 +2209,7 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
* n is the number of bytes required to satisfy the request
* or the number of bytes to fill out the page.
*/
- n = (int)MIN((PAGESIZE - ((uintptr_t)base & PAGEOFFSET)),
- tcount);
+ n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
/*
* Check to see if we can skip reading in the page
@@ -2226,12 +2228,12 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
* created and mapped at base.
*/
pagecreate = pgcreated ||
- (((uintptr_t)base & PAGEOFFSET) == 0 &&
+ ((offset & PAGEOFFSET) == 0 &&
(n == PAGESIZE || ((offset + n) >= rp->r_size)));
mutex_exit(&rp->r_statelock);
- if (pagecreate) {
+ if (!vpm_enable && pagecreate) {
/*
* The last argument tells segmap_pagecreate() to
* always lock the page, as opposed to sometimes
@@ -2267,7 +2269,17 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
rp->r_modaddr = (offset & MAXBMASK);
mutex_exit(&rp->r_statelock);
- error = uiomove(base, n, UIO_WRITE, uio);
+ if (vpm_enable) {
+ /*
+ * Copy data. If new pages are created, part of
+ * the page that is not written will be initizliazed
+ * with zeros.
+ */
+ error = vpm_data_copy(vp, offset, n, uio,
+ !pagecreate, NULL, 0, S_WRITE);
+ } else {
+ error = uiomove(base, n, UIO_WRITE, uio);
+ }
/*
* r_size is the maximum number of
@@ -2284,7 +2296,11 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
/* n = # of bytes written */
n = (int)(uio->uio_loffset - offset);
- base += n;
+
+ if (!vpm_enable) {
+ base += n;
+ }
+
tcount -= n;
/*
* If we created pages w/o initializing them completely,
@@ -2292,7 +2308,7 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
* This happens on a most EOF write cases and if
* we had some sort of error during the uiomove.
*/
- if (pagecreate) {
+ if (!vpm_enable && pagecreate) {
if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
(void) kzero(base, PAGESIZE - n);
@@ -2310,8 +2326,8 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
* segmap_pagecreate().
*/
sm_error = segmap_fault(kas.a_hat, segkmap,
- saved_base, saved_n,
- F_SOFTUNLOCK, S_WRITE);
+ saved_base, saved_n,
+ F_SOFTUNLOCK, S_WRITE);
if (error == 0)
error = sm_error;
}
diff --git a/usr/src/uts/common/fs/nfs/nfs4_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
index faf08573ba..97fc46809c 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -2617,9 +2616,19 @@ nfs4_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
if (diff < n)
n = (uint_t)diff;
- base = segmap_getmapflt(segkmap, vp, off + on, n, 1, S_READ);
+ if (vpm_enable) {
+ /*
+ * Copy data.
+ */
+ error = vpm_data_copy(vp, off + on, n, uiop,
+ 1, NULL, 0, S_READ);
- error = uiomove(base + on, n, UIO_READ, uiop);
+ } else {
+ base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
+ S_READ);
+
+ error = uiomove(base + on, n, UIO_READ, uiop);
+ }
if (!error) {
/*
@@ -2633,9 +2642,18 @@ nfs4_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
else
flags = 0;
mutex_exit(&rp->r_statelock);
- error = segmap_release(segkmap, base, flags);
- } else
- (void) segmap_release(segkmap, base, 0);
+ if (vpm_enable) {
+ error = vpm_sync_pages(vp, off, n, flags);
+ } else {
+ error = segmap_release(segkmap, base, flags);
+ }
+ } else {
+ if (vpm_enable) {
+ (void) vpm_sync_pages(vp, off, n, 0);
+ } else {
+ (void) segmap_release(segkmap, base, 0);
+ }
+ }
} while (!error && uiop->uio_resid > 0);
return (error);
@@ -2826,25 +2844,35 @@ nfs4_fwrite:
cv_wait(&rp->r_cv, &rp->r_statelock);
mutex_exit(&rp->r_statelock);
- if (segmap_kpm) {
- int pon = uiop->uio_loffset & PAGEOFFSET;
- size_t pn = MIN(PAGESIZE - pon, uiop->uio_resid);
- int pagecreate;
+ if (vpm_enable) {
+ /*
+ * It will use kpm mappings, so no need to
+ * pass an address.
+ */
+ error = writerp4(rp, NULL, n, uiop, 0);
+ } else {
+ if (segmap_kpm) {
+ int pon = uiop->uio_loffset & PAGEOFFSET;
+ size_t pn = MIN(PAGESIZE - pon,
+ uiop->uio_resid);
+ int pagecreate;
- mutex_enter(&rp->r_statelock);
- pagecreate = (pon == 0) && (pn == PAGESIZE ||
- uiop->uio_loffset + pn >= rp->r_size);
- mutex_exit(&rp->r_statelock);
+ mutex_enter(&rp->r_statelock);
+ pagecreate = (pon == 0) && (pn == PAGESIZE ||
+ uiop->uio_loffset + pn >= rp->r_size);
+ mutex_exit(&rp->r_statelock);
- base = segmap_getmapflt(segkmap, vp, off + on,
+ base = segmap_getmapflt(segkmap, vp, off + on,
pn, !pagecreate, S_WRITE);
- error = writerp4(rp, base + pon, n, uiop, pagecreate);
+ error = writerp4(rp, base + pon, n, uiop,
+ pagecreate);
- } else {
- base = segmap_getmapflt(segkmap, vp, off + on,
- n, 0, S_READ);
- error = writerp4(rp, base + on, n, uiop, 0);
+ } else {
+ base = segmap_getmapflt(segkmap, vp, off + on,
+ n, 0, S_READ);
+ error = writerp4(rp, base + on, n, uiop, 0);
+ }
}
if (!error) {
@@ -2867,9 +2895,17 @@ nfs4_fwrite:
flags &= ~SM_ASYNC;
flags |= SM_WRITE;
}
- error = segmap_release(segkmap, base, flags);
+ if (vpm_enable) {
+ error = vpm_sync_pages(vp, off, n, flags);
+ } else {
+ error = segmap_release(segkmap, base, flags);
+ }
} else {
- (void) segmap_release(segkmap, base, 0);
+ if (vpm_enable) {
+ (void) vpm_sync_pages(vp, off, n, 0);
+ } else {
+ (void) segmap_release(segkmap, base, 0);
+ }
/*
* In the event that we got an access error while
* faulting in a page for a write-only file just
diff --git a/usr/src/uts/common/fs/nfs/nfs_client.c b/usr/src/uts/common/fs/nfs/nfs_client.c
index d6c0a25d7a..40c886fc85 100644
--- a/usr/src/uts/common/fs/nfs/nfs_client.c
+++ b/usr/src/uts/common/fs/nfs/nfs_client.c
@@ -2104,10 +2104,13 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
u_offset_t offset;
int error;
int sm_error;
+ vnode_t *vp = RTOV(rp);
ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
- ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_WRITER));
+ if (!vpm_enable) {
+ ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
+ }
/*
* Move bytes in at most PAGESIZE chunks. We must avoid
@@ -2125,8 +2128,7 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
* n is the number of bytes required to satisfy the request
* or the number of bytes to fill out the page.
*/
- n = (int)MIN((PAGESIZE - ((uintptr_t)base & PAGEOFFSET)),
- tcount);
+ n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
/*
* Check to see if we can skip reading in the page
@@ -2145,11 +2147,11 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
* created and mapped at base.
*/
pagecreate = pgcreated ||
- (((uintptr_t)base & PAGEOFFSET) == 0 &&
+ ((offset & PAGEOFFSET) == 0 &&
(n == PAGESIZE || ((offset + n) >= rp->r_size)));
mutex_exit(&rp->r_statelock);
- if (pagecreate) {
+ if (!vpm_enable && pagecreate) {
/*
* The last argument tells segmap_pagecreate() to
* always lock the page, as opposed to sometimes
@@ -2185,7 +2187,17 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
rp->r_modaddr = (offset & MAXBMASK);
mutex_exit(&rp->r_statelock);
- error = uiomove(base, n, UIO_WRITE, uio);
+ if (vpm_enable) {
+ /*
+ * Copy data. If new pages are created, part of
+ * the page that is not written will be initizliazed
+ * with zeros.
+ */
+ error = vpm_data_copy(vp, offset, n, uio,
+ !pagecreate, NULL, 0, S_WRITE);
+ } else {
+ error = uiomove(base, n, UIO_WRITE, uio);
+ }
/*
* r_size is the maximum number of
@@ -2202,7 +2214,10 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
/* n = # of bytes written */
n = (int)(uio->uio_loffset - offset);
- base += n;
+
+ if (!vpm_enable) {
+ base += n;
+ }
tcount -= n;
/*
* If we created pages w/o initializing them completely,
@@ -2210,7 +2225,7 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
* This happens on a most EOF write cases and if
* we had some sort of error during the uiomove.
*/
- if (pagecreate) {
+ if (!vpm_enable && pagecreate) {
if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
(void) kzero(base, PAGESIZE - n);
@@ -2228,8 +2243,8 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
* segmap_pagecreate().
*/
sm_error = segmap_fault(kas.a_hat, segkmap,
- saved_base, saved_n,
- F_SOFTUNLOCK, S_WRITE);
+ saved_base, saved_n,
+ F_SOFTUNLOCK, S_WRITE);
if (error == 0)
error = sm_error;
}
diff --git a/usr/src/uts/common/fs/nfs/nfs_vnops.c b/usr/src/uts/common/fs/nfs/nfs_vnops.c
index b98b3d280e..583ce42473 100644
--- a/usr/src/uts/common/fs/nfs/nfs_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs_vnops.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
@@ -472,9 +471,17 @@ nfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
if (diff < n)
n = (size_t)diff;
- base = segmap_getmapflt(segkmap, vp, off + on, n, 1, S_READ);
-
- error = uiomove(base + on, n, UIO_READ, uiop);
+ if (vpm_enable) {
+ /*
+ * Copy data.
+ */
+ error = vpm_data_copy(vp, off + on, n, uiop,
+ 1, NULL, 0, S_READ);
+ } else {
+ base = segmap_getmapflt(segkmap, vp, off + on, n,
+ 1, S_READ);
+ error = uiomove(base + on, n, UIO_READ, uiop);
+ }
if (!error) {
/*
@@ -488,9 +495,18 @@ nfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
else
flags = 0;
mutex_exit(&rp->r_statelock);
- error = segmap_release(segkmap, base, flags);
- } else
- (void) segmap_release(segkmap, base, 0);
+ if (vpm_enable) {
+ error = vpm_sync_pages(vp, off, n, flags);
+ } else {
+ error = segmap_release(segkmap, base, flags);
+ }
+ } else {
+ if (vpm_enable) {
+ (void) vpm_sync_pages(vp, off, n, 0);
+ } else {
+ (void) segmap_release(segkmap, base, 0);
+ }
+ }
} while (!error && uiop->uio_resid > 0);
return (error);
@@ -651,25 +667,35 @@ nfs_fwrite:
cv_wait(&rp->r_cv, &rp->r_statelock);
mutex_exit(&rp->r_statelock);
- if (segmap_kpm) {
- int pon = uiop->uio_loffset & PAGEOFFSET;
- size_t pn = MIN(PAGESIZE - pon, uiop->uio_resid);
- int pagecreate;
+ if (vpm_enable) {
+ /*
+ * It will use kpm mappings, so no need to
+ * pass an address.
+ */
+ error = writerp(rp, NULL, n, uiop, 0);
+ } else {
+ if (segmap_kpm) {
+ int pon = uiop->uio_loffset & PAGEOFFSET;
+ size_t pn = MIN(PAGESIZE - pon,
+ uiop->uio_resid);
+ int pagecreate;
- mutex_enter(&rp->r_statelock);
- pagecreate = (pon == 0) && (pn == PAGESIZE ||
- uiop->uio_loffset + pn >= rp->r_size);
- mutex_exit(&rp->r_statelock);
+ mutex_enter(&rp->r_statelock);
+ pagecreate = (pon == 0) && (pn == PAGESIZE ||
+ uiop->uio_loffset + pn >= rp->r_size);
+ mutex_exit(&rp->r_statelock);
- base = segmap_getmapflt(segkmap, vp, off + on,
+ base = segmap_getmapflt(segkmap, vp, off + on,
pn, !pagecreate, S_WRITE);
- error = writerp(rp, base + pon, n, uiop, pagecreate);
+ error = writerp(rp, base + pon, n, uiop,
+ pagecreate);
- } else {
- base = segmap_getmapflt(segkmap, vp, off + on,
- n, 0, S_READ);
- error = writerp(rp, base + on, n, uiop, 0);
+ } else {
+ base = segmap_getmapflt(segkmap, vp, off + on,
+ n, 0, S_READ);
+ error = writerp(rp, base + on, n, uiop, 0);
+ }
}
if (!error) {
@@ -691,9 +717,17 @@ nfs_fwrite:
flags &= ~SM_ASYNC;
flags |= SM_WRITE;
}
- error = segmap_release(segkmap, base, flags);
+ if (vpm_enable) {
+ error = vpm_sync_pages(vp, off, n, flags);
+ } else {
+ error = segmap_release(segkmap, base, flags);
+ }
} else {
- (void) segmap_release(segkmap, base, 0);
+ if (vpm_enable) {
+ (void) vpm_sync_pages(vp, off, n, 0);
+ } else {
+ (void) segmap_release(segkmap, base, 0);
+ }
/*
* In the event that we got an access error while
* faulting in a page for a write-only file just
diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c
index d4ee630b92..6a2d6f73d0 100644
--- a/usr/src/uts/common/fs/specfs/specvnops.c
+++ b/usr/src/uts/common/fs/specfs/specvnops.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -880,10 +879,16 @@ spec_read(
if (diff < n)
n = (size_t)diff;
- base = segmap_getmapflt(segkmap, blkvp,
- (u_offset_t)(off + on), n, 1, S_READ);
+ if (vpm_enable) {
+ error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
+ n, uiop, 1, NULL, 0, S_READ);
+ } else {
+ base = segmap_getmapflt(segkmap, blkvp,
+ (u_offset_t)(off + on), n, 1, S_READ);
- if ((error = uiomove(base + on, n, UIO_READ, uiop)) == 0) {
+ error = uiomove(base + on, n, UIO_READ, uiop);
+ }
+ if (!error) {
int flags = 0;
/*
* If we read a whole block, we won't need this
@@ -891,9 +896,17 @@ spec_read(
*/
if (n + on == MAXBSIZE)
flags = SM_DONTNEED | SM_FREE;
- error = segmap_release(segkmap, base, flags);
+ if (vpm_enable) {
+ error = vpm_sync_pages(blkvp, off, n, flags);
+ } else {
+ error = segmap_release(segkmap, base, flags);
+ }
} else {
- (void) segmap_release(segkmap, base, 0);
+ if (vpm_enable) {
+ (void) vpm_sync_pages(blkvp, off, n, 0);
+ } else {
+ (void) segmap_release(segkmap, base, 0);
+ }
if (bdevsize == UNKNOWN_SIZE) {
error = 0;
break;
@@ -984,22 +997,27 @@ spec_write(
if (n == MAXBSIZE || (on == 0 && (off + n) == bdevsize))
pagecreate = 1;
- base = segmap_getmapflt(segkmap, blkvp,
- (u_offset_t)(off + on), n, !pagecreate, S_WRITE);
-
- /*
- * segmap_pagecreate() returns 1 if it calls
- * page_create_va() to allocate any pages.
- */
newpage = 0;
+ if (vpm_enable) {
+ error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
+ n, uiop, !pagecreate, NULL, 0, S_WRITE);
+ } else {
+ base = segmap_getmapflt(segkmap, blkvp,
+ (u_offset_t)(off + on), n, !pagecreate, S_WRITE);
- if (pagecreate)
- newpage = segmap_pagecreate(segkmap, base + on,
- n, 0);
+ /*
+ * segmap_pagecreate() returns 1 if it calls
+ * page_create_va() to allocate any pages.
+ */
+
+ if (pagecreate)
+ newpage = segmap_pagecreate(segkmap, base + on,
+ n, 0);
- error = uiomove(base + on, n, UIO_WRITE, uiop);
+ error = uiomove(base + on, n, UIO_WRITE, uiop);
+ }
- if (pagecreate &&
+ if (!vpm_enable && pagecreate &&
uiop->uio_loffset <
P2ROUNDUP_TYPED(off + on + n, PAGESIZE, offset_t)) {
/*
@@ -1029,7 +1047,7 @@ spec_write(
* Unlock the pages which have been allocated by
* page_create_va() in segmap_pagecreate().
*/
- if (newpage)
+ if (!vpm_enable && newpage)
segmap_pageunlock(segkmap, base + on,
(size_t)n, S_WRITE);
@@ -1053,9 +1071,18 @@ spec_write(
flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
}
smark(sp, SUPD|SCHG);
- error = segmap_release(segkmap, base, flags);
- } else
- (void) segmap_release(segkmap, base, SM_INVAL);
+ if (vpm_enable) {
+ error = vpm_sync_pages(blkvp, off, n, flags);
+ } else {
+ error = segmap_release(segkmap, base, flags);
+ }
+ } else {
+ if (vpm_enable) {
+ (void) vpm_sync_pages(blkvp, off, n, SM_INVAL);
+ } else {
+ (void) segmap_release(segkmap, base, SM_INVAL);
+ }
+ }
} while (error == 0 && uiop->uio_resid > 0 && n != 0);
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
index 490cbfc61c..d623dce3f7 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -261,14 +260,32 @@ wrtmp(
if (!pagecreate)
rw_exit(&tp->tn_contents);
- /* Get offset within the segmap mapping */
- segmap_offset = (offset & PAGEMASK) & MAXBOFFSET;
- base = segmap_getmapflt(segkmap, vp, (offset & MAXBMASK),
- PAGESIZE, !pagecreate, S_WRITE);
-
newpage = 0;
+ if (vpm_enable) {
+ /*
+ * XXX Why do we need to hold the contents lock?
+ * The kpm mappings will not cause a fault.
+ *
+ * Copy data. If new pages are created, part of
+ * the page that is not written will be initizliazed
+ * with zeros.
+ */
+ error = vpm_data_copy(vp, offset, bytes, uio,
+ !pagecreate, &newpage, 1, S_WRITE);
+
+ if (pagecreate) {
+ rw_exit(&tp->tn_contents);
+ }
+ } else {
+ /* Get offset within the segmap mapping */
+ segmap_offset = (offset & PAGEMASK) & MAXBOFFSET;
+ base = segmap_getmapflt(segkmap, vp,
+ (offset & MAXBMASK),
+ PAGESIZE, !pagecreate, S_WRITE);
+ }
- if (pagecreate) {
+
+ if (!vpm_enable && pagecreate) {
rw_downgrade(&tp->tn_contents);
/*
@@ -287,10 +304,12 @@ wrtmp(
(size_t)pageoffset);
}
- error = uiomove(base + segmap_offset + pageoffset,
+ if (!vpm_enable) {
+ error = uiomove(base + segmap_offset + pageoffset,
(long)bytes, UIO_WRITE, uio);
+ }
- if (pagecreate &&
+ if (!vpm_enable && pagecreate &&
uio->uio_offset < P2ROUNDUP(offset + bytes, PAGESIZE)) {
long zoffset; /* zero from offset into page */
/*
@@ -310,16 +329,17 @@ wrtmp(
*/
if ((zoffset = pageoffset + nmoved) < PAGESIZE)
(void) kzero(base + segmap_offset + zoffset,
- (size_t)PAGESIZE - zoffset);
+ (size_t)PAGESIZE - zoffset);
}
/*
* Unlock the pages which have been allocated by
* page_create_va() in segmap_pagecreate()
*/
- if (newpage)
+ if (!vpm_enable && newpage) {
segmap_pageunlock(segkmap, base + segmap_offset,
(size_t)PAGESIZE, S_WRITE);
+ }
if (error) {
/*
@@ -327,9 +347,19 @@ wrtmp(
* be sure to invalidate any pages that may have
* been allocated.
*/
- (void) segmap_release(segkmap, base, SM_INVAL);
+ if (vpm_enable) {
+ (void) vpm_sync_pages(vp, offset,
+ PAGESIZE, SM_INVAL);
+ } else {
+ (void) segmap_release(segkmap, base, SM_INVAL);
+ }
} else {
- error = segmap_release(segkmap, base, 0);
+ if (vpm_enable) {
+ error = vpm_sync_pages(vp, offset,
+ PAGESIZE, 0);
+ } else {
+ error = segmap_release(segkmap, base, 0);
+ }
}
/*
@@ -468,17 +498,36 @@ rdtmp(
*/
rw_exit(&tp->tn_contents);
- segmap_offset = (offset & PAGEMASK) & MAXBOFFSET;
- base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK,
- bytes, 1, S_READ);
+ if (vpm_enable) {
+ /*
+ * Copy data.
+ */
+ error = vpm_data_copy(vp, offset, bytes, uio,
+ 1, NULL, 0, S_READ);
+ } else {
+ segmap_offset = (offset & PAGEMASK) & MAXBOFFSET;
+ base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK,
+ bytes, 1, S_READ);
- error = uiomove(base + segmap_offset + pageoffset,
- (long)bytes, UIO_READ, uio);
+ error = uiomove(base + segmap_offset + pageoffset,
+ (long)bytes, UIO_READ, uio);
+ }
- if (error)
- (void) segmap_release(segkmap, base, 0);
- else
- error = segmap_release(segkmap, base, 0);
+ if (error) {
+ if (vpm_enable) {
+ (void) vpm_sync_pages(vp, offset,
+ PAGESIZE, 0);
+ } else {
+ (void) segmap_release(segkmap, base, 0);
+ }
+ } else {
+ if (vpm_enable) {
+ error = vpm_sync_pages(vp, offset,
+ PAGESIZE, 0);
+ } else {
+ error = segmap_release(segkmap, base, 0);
+ }
+ }
/*
* Re-acquire contents lock.
diff --git a/usr/src/uts/common/fs/ufs/ufs_vnops.c b/usr/src/uts/common/fs/ufs/ufs_vnops.c
index 5dd9495aa2..db8a0fc09b 100644
--- a/usr/src/uts/common/fs/ufs/ufs_vnops.c
+++ b/usr/src/uts/common/fs/ufs/ufs_vnops.c
@@ -999,21 +999,32 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
rw_exit(&ufsvfsp->vfs_dqrwlock);
}
- base = segmap_getmapflt(segkmap, vp, (off + mapon),
+ newpage = 0;
+ premove_resid = uio->uio_resid;
+ if (vpm_enable) {
+ /*
+ * Copy data. If new pages are created, part of
+ * the page that is not written will be initizliazed
+ * with zeros.
+ */
+ error = vpm_data_copy(vp, (off + mapon), (uint_t)n,
+ uio, !pagecreate, &newpage, 0, S_WRITE);
+ } else {
+
+ base = segmap_getmapflt(segkmap, vp, (off + mapon),
(uint_t)n, !pagecreate, S_WRITE);
- /*
- * segmap_pagecreate() returns 1 if it calls
- * page_create_va() to allocate any pages.
- */
- newpage = 0;
+ /*
+ * segmap_pagecreate() returns 1 if it calls
+ * page_create_va() to allocate any pages.
+ */
- if (pagecreate)
- newpage = segmap_pagecreate(segkmap, base,
- (size_t)n, 0);
+ if (pagecreate)
+ newpage = segmap_pagecreate(segkmap, base,
+ (size_t)n, 0);
- premove_resid = uio->uio_resid;
- error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
+ error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
+ }
/*
* If "newpage" is set, then a new page was created and it
@@ -1028,7 +1039,7 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
* If uiomove fails because of an error, the old valid data
* is kept instead of filling the rest of the page with zero's.
*/
- if (newpage &&
+ if (!vpm_enable && newpage &&
uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
/*
* We created pages w/o initializing them completely,
@@ -1049,7 +1060,7 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
* Unlock the pages allocated by page_create_va()
* in segmap_pagecreate()
*/
- if (newpage)
+ if (!vpm_enable && newpage)
segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
/*
@@ -1130,7 +1141,15 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
*/
flags = SM_INVAL;
}
- (void) segmap_release(segkmap, base, flags);
+
+ if (vpm_enable) {
+ /*
+ * Flush pages.
+ */
+ (void) vpm_sync_pages(vp, off, n, flags);
+ } else {
+ (void) segmap_release(segkmap, base, flags);
+ }
} else {
flags = 0;
/*
@@ -1163,7 +1182,14 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
*/
flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
}
- error = segmap_release(segkmap, base, flags);
+ if (vpm_enable) {
+ /*
+ * Flush pages.
+ */
+ (void) vpm_sync_pages(vp, off, n, flags);
+ } else {
+ (void) segmap_release(segkmap, base, flags);
+ }
/*
* If the operation failed and is synchronous,
* then we need to unwind what uiomove() last
@@ -1429,10 +1455,18 @@ rdip(struct inode *ip, struct uio *uio, int ioflag, cred_t *cr)
*/
if (rwtype == RW_READER)
rw_exit(&ip->i_contents);
- base = segmap_getmapflt(segkmap, vp, (off + mapon),
- (uint_t)n, 1, S_READ);
- error = uiomove(base + mapon, (long)n, UIO_READ, uio);
+ if (vpm_enable) {
+ /*
+ * Copy data.
+ */
+ error = vpm_data_copy(vp, (off + mapon), (uint_t)n,
+ uio, 1, NULL, 0, S_READ);
+ } else {
+ base = segmap_getmapflt(segkmap, vp, (off + mapon),
+ (uint_t)n, 1, S_READ);
+ error = uiomove(base + mapon, (long)n, UIO_READ, uio);
+ }
flags = 0;
if (!error) {
@@ -1460,9 +1494,18 @@ rdip(struct inode *ip, struct uio *uio, int ioflag, cred_t *cr)
flags &= ~SM_ASYNC;
flags |= SM_WRITE;
}
- error = segmap_release(segkmap, base, flags);
- } else
- (void) segmap_release(segkmap, base, flags);
+ if (vpm_enable) {
+ error = vpm_sync_pages(vp, off, n, flags);
+ } else {
+ error = segmap_release(segkmap, base, flags);
+ }
+ } else {
+ if (vpm_enable) {
+ (void) vpm_sync_pages(vp, off, n, flags);
+ } else {
+ (void) segmap_release(segkmap, base, flags);
+ }
+ }
if (rwtype == RW_READER)
rw_enter(&ip->i_contents, rwtype);
diff --git a/usr/src/uts/common/vm/Makefile b/usr/src/uts/common/vm/Makefile
index fcd6582985..642c393f8f 100644
--- a/usr/src/uts/common/vm/Makefile
+++ b/usr/src/uts/common/vm/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
@@ -29,8 +28,8 @@
# include global definitions
include ../../../Makefile.master
-HDRS= anon.h as.h faultcode.h hat.h kpm.h page.h pvn.h rm.h seg.h vpage.h \
- seg_dev.h seg_enum.h seg_kmem.h seg_kp.h seg_kpm.h seg_map.h \
+HDRS= anon.h as.h faultcode.h vpm.h hat.h kpm.h page.h pvn.h rm.h seg.h \
+ vpage.h seg_dev.h seg_enum.h seg_kmem.h seg_kp.h seg_kpm.h seg_map.h \
seg_vn.h seg_spt.h
ROOTDIRS= $(ROOT)/usr/include/vm
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index e3317b9fea..a1aa25bbbc 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -468,7 +467,7 @@ typedef struct page {
struct vnode *p_vnode; /* vnode that this page is named by */
selock_t p_selock; /* shared/exclusive lock on the page */
#if defined(_LP64)
- int p_selockpad; /* pad for growing selock */
+ uint_t p_vpmref; /* vpm ref - index of the vpmap_t */
#endif
struct page *p_hash; /* hash by [vnode, offset] */
struct page *p_vpnext; /* next page in vnode list */
@@ -506,7 +505,11 @@ typedef struct page {
/* index of entry in p_map when p_embed is set */
uint_t p_mlentry;
#endif
+#if defined(_LP64)
+ kmutex_t p_ilock; /* protects p_vpmref */
+#else
uint64_t p_msresv_2; /* page allocation debugging */
+#endif
} page_t;
diff --git a/usr/src/uts/common/vm/seg_map.c b/usr/src/uts/common/vm/seg_map.c
index 9fd8d37e5a..de27f6e2ff 100644
--- a/usr/src/uts/common/vm/seg_map.c
+++ b/usr/src/uts/common/vm/seg_map.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -466,6 +465,10 @@ segmap_create(struct seg *seg, void *argsp)
scpu->scpu.scpu_last_smap = smd_smap;
}
+ if (vpm_enable) {
+ vpm_init();
+ }
+
#ifdef DEBUG
/*
* Keep track of which colors are used more often.
diff --git a/usr/src/uts/common/vm/seg_map.h b/usr/src/uts/common/vm/seg_map.h
index 339dabe674..0e3cd9bf9b 100644
--- a/usr/src/uts/common/vm/seg_map.h
+++ b/usr/src/uts/common/vm/seg_map.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -65,6 +64,7 @@ struct segmap_crargs {
};
#include <vm/kpm.h>
+#include <vm/vpm.h>
/*
* Each smap struct represents a MAXBSIZE sized mapping to the
diff --git a/usr/src/uts/common/vm/vpm.c b/usr/src/uts/common/vm/vpm.c
new file mode 100644
index 0000000000..1f4f2fdf58
--- /dev/null
+++ b/usr/src/uts/common/vm/vpm.c
@@ -0,0 +1,1141 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * VM - generic vnode page mapping interfaces.
+ *
+ * Mechanism to provide temporary mappings to vnode pages.
+ * The typical use would be to copy/access file data.
+ */
+
+#include <sys/types.h>
+#include <sys/t_lock.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/buf.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/mman.h>
+#include <sys/errno.h>
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/vtrace.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/thread.h>
+#include <sys/dumphdr.h>
+#include <sys/bitmap.h>
+#include <sys/lgrp.h>
+
+#include <vm/seg_kmem.h>
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_kpm.h>
+#include <vm/seg_map.h>
+#include <vm/page.h>
+#include <vm/pvn.h>
+#include <vm/rm.h>
+#include <vm/vpm.h>
+
+/*
+ * Needs to be enabled by each platform.
+ */
+int vpm_enable = 0;
+
+#ifdef SEGKPM_SUPPORT
+
+
+int vpm_cache_enable = 1;
+long vpm_cache_percent = 12;
+long vpm_cache_size;
+int vpm_nfreelist = 0;
+int vpmd_freemsk = 0;
+
+#define VPM_S_PAD 64
+union vpm_cpu {
+ struct {
+ int vcpu_free_ndx;
+ ulong_t vcpu_hits;
+ ulong_t vcpu_misses;
+ } vcpu;
+ char vpm_pad[VPM_S_PAD];
+};
+static union vpm_cpu *vpmd_cpu;
+
+#define vfree_ndx vcpu.vcpu_free_ndx
+
+int vpm_cachemode = VPMCACHE_LRU;
+
+#define PPMTX(pp) (&(pp)->p_ilock)
+
+static struct vpmap *vpmd_vpmap; /* list of vpmap structs preallocated */
+static struct vpmfree *vpmd_free;
+#define VPMAPMTX(vpm) (&vpm->vpm_mtx)
+#define VPMAP2VMF(vpm) (&vpmd_free[(vpm - vpmd_vpmap) & vpmd_freemsk])
+#define VPMAP2VMF_NDX(vpm) (ushort_t)((vpm - vpmd_vpmap) & vpmd_freemsk)
+#define VPMP(id) (&vpmd_vpmap[id - 1])
+#define VPMID(vpm) (uint_t)((vpm - vpmd_vpmap) + 1)
+
+
+#ifdef DEBUG
+
+struct vpm_debug {
+ int vpmd_steals;
+ int vpmd_contend;
+ int vpmd_prevpagelocked;
+ int vpmd_getpagefailed;
+ int vpmd_zerostart;
+ int vpmd_emptyfreelist;
+ int vpmd_nofreevpms;
+} vpm_debug;
+
+#define VPM_DEBUG(x) ((vpm_debug.x)++)
+
+int steals;
+int steals_mtbf = 7;
+int contend;
+int contend_mtbf = 127;
+
+#define VPM_MTBF(v, f) (((++(v)) & (f)) != (f))
+
+#else /* DEBUG */
+
+#define VPM_MTBF(v, f) (1)
+#define VPM_DEBUG(x) /* nothing */
+
+#endif
+
+/*
+ * The vpm cache.
+ *
+ * The main purpose of having a cache here is to speed up page_lookup()
+ * operations and also provide an LRU(default) behaviour of file pages. The
+ * page_lookup() operation tends to be expensive if a page has to be
+ * reclaimed from the system page cache("cachelist"). Once we speed up the
+ * page_lookup()->page_reclaim() path then there there should be no need for
+ * this cache. The system page cache(cachelist) should effectively serve the
+ * purpose of caching file pages.
+ *
+ * This cache is very similar to segmap's smap cache. Each page in the
+ * cache is tracked by the structure vpmap_t. But unlike segmap, there is no
+ * hash table. The page_t has a reference to the vpmap_t when cached. For a
+ * given vnode, offset the page is found by means of a page_lookup() operation.
+ * Any page which has a mapping(i.e when cached) will not be in the
+ * system 'cachelist'. Hence the page_lookup() will not have to do a
+ * page_reclaim(). That is how the cache serves to speed up page_lookup()
+ * operations.
+ *
+ * This cache can be disabled by setting vpm_cache_enable = 0 in /etc/system.
+ */
+
+void
+vpm_init()
+{
+ long npages;
+ struct vpmap *vpm;
+ struct vpmfree *vpmflp;
+ int i, ndx;
+ extern void prefetch_smap_w(void *);
+
+ if (!vpm_cache_enable) {
+ return;
+ }
+
+ /*
+ * Set the size of the cache.
+ */
+ vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100);
+ if (vpm_cache_size < VPMAP_MINCACHE) {
+ vpm_cache_size = VPMAP_MINCACHE;
+ }
+
+ /*
+ * Number of freelists.
+ */
+ if (vpm_nfreelist == 0) {
+ vpm_nfreelist = max_ncpus;
+ } else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) {
+ cmn_err(CE_WARN, "vpmap create : number of freelist "
+ "vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus);
+ vpm_nfreelist = 2 * max_ncpus;
+ }
+
+ /*
+ * Round it up to the next power of 2
+ */
+ if (vpm_nfreelist & (vpm_nfreelist - 1)) {
+ vpm_nfreelist = 1 << (highbit(vpm_nfreelist));
+ }
+ vpmd_freemsk = vpm_nfreelist - 1;
+
+ /*
+ * Use a per cpu rotor index to spread the allocations evenly
+ * across the available vpm freelists.
+ */
+ vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP);
+ ndx = 0;
+ for (i = 0; i < max_ncpus; i++) {
+
+ vpmd_cpu[i].vfree_ndx = ndx;
+ ndx = (ndx + 1) & vpmd_freemsk;
+ }
+
+ /*
+ * Allocate and initialize the freelist.
+ */
+ vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree),
+ KM_SLEEP);
+ for (i = 0; i < vpm_nfreelist; i++) {
+
+ vpmflp = &vpmd_free[i];
+ /*
+ * Set up initial queue pointers. They will get flipped
+ * back and forth.
+ */
+ vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ];
+ vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ];
+ }
+
+ npages = mmu_btop(vpm_cache_size);
+
+
+ /*
+ * Allocate and initialize the vpmap structs.
+ */
+ vpmd_vpmap = kmem_zalloc(sizeof (struct vpmap) * npages, KM_SLEEP);
+ for (vpm = vpmd_vpmap; vpm <= &vpmd_vpmap[npages - 1]; vpm++) {
+ struct vpmfree *vpmflp;
+ union vpm_freeq *releq;
+ struct vpmap *vpmapf;
+
+ /*
+ * Use prefetch as we have to walk thru a large number of
+ * these data structures. We just use the smap's prefetch
+ * routine as it does the same. This should work fine
+ * for x64(this needs to be modifed when enabled on sparc).
+ */
+ prefetch_smap_w((void *)vpm);
+
+ vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm);
+
+ vpmflp = VPMAP2VMF(vpm);
+ releq = vpmflp->vpm_releq;
+
+ vpmapf = releq->vpmq_free;
+ if (vpmapf == NULL) {
+ releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm;
+ } else {
+ vpm->vpm_next = vpmapf;
+ vpm->vpm_prev = vpmapf->vpm_prev;
+ vpmapf->vpm_prev = vpm;
+ vpm->vpm_prev->vpm_next = vpm;
+ releq->vpmq_free = vpm->vpm_next;
+ }
+
+ /*
+ * Indicate that the vpmap is on the releq at start
+ */
+ vpm->vpm_ndxflg = VPMRELEQ;
+ }
+}
+
+
+/*
+ * unhooks vpm from the freelist if it is still on the freelist.
+ */
+#define VPMAP_RMFREELIST(vpm) \
+ { \
+ if (vpm->vpm_next != NULL) { \
+ union vpm_freeq *freeq; \
+ struct vpmfree *vpmflp; \
+ vpmflp = &vpmd_free[vpm->vpm_free_ndx]; \
+ freeq = &vpmflp->vpm_freeq[vpm->vpm_ndxflg]; \
+ mutex_enter(&freeq->vpmq_mtx); \
+ if (freeq->vpmq_free != vpm) { \
+ vpm->vpm_prev->vpm_next = vpm->vpm_next; \
+ vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
+ } else if (vpm == vpm->vpm_next) { \
+ freeq->vpmq_free = NULL; \
+ } else { \
+ freeq->vpmq_free = vpm->vpm_next; \
+ vpm->vpm_prev->vpm_next = vpm->vpm_next; \
+ vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
+ } \
+ mutex_exit(&freeq->vpmq_mtx); \
+ vpm->vpm_next = vpm->vpm_prev = NULL; \
+ } \
+ }
+
+static int
+get_freelndx(int mode)
+{
+ int ndx;
+
+ ndx = vpmd_cpu[CPU->cpu_seqid].vfree_ndx & vpmd_freemsk;
+ switch (mode) {
+
+ case VPMCACHE_LRU:
+ default:
+ vpmd_cpu[CPU->cpu_seqid].vfree_ndx++;
+ break;
+ }
+ return (ndx);
+}
+
+
+/*
+ * Find one vpmap structure from the free lists and use it for the newpage.
+ * The previous page it cached is dissociated and released. The page_t's
+ * p_vpmref is cleared only when the vpm it is pointing to is locked(or
+ * for AMD64 when the page is exclusively locked in page_unload. That is
+ * because the p_vpmref is treated as mapping).
+ *
+ * The page's p_vpmref is set when the page is
+ * locked(at least SHARED locked).
+ */
+static struct vpmap *
+get_free_vpmap(page_t *newpage)
+{
+ struct vpmfree *vpmflp;
+ kmutex_t *vmtx;
+ struct vpmap *vpm, *first;
+ union vpm_freeq *allocq, *releq;
+ page_t *pp = NULL;
+ int end_ndx, page_locked = 0;
+ int free_ndx;
+
+ /*
+ * get the freelist bin index.
+ */
+ free_ndx = get_freelndx(vpm_cachemode);
+
+ end_ndx = free_ndx;
+ vpmflp = &vpmd_free[free_ndx];
+
+retry_queue:
+ allocq = vpmflp->vpm_allocq;
+ mutex_enter(&allocq->vpmq_mtx);
+
+ if ((vpm = allocq->vpmq_free) == NULL) {
+
+skip_queue:
+ /*
+ * The alloc list is empty or this queue is being skipped;
+ * first see if the allocq toggled.
+ */
+ if (vpmflp->vpm_allocq != allocq) {
+ /* queue changed */
+ mutex_exit(&allocq->vpmq_mtx);
+ goto retry_queue;
+ }
+ releq = vpmflp->vpm_releq;
+ if (!mutex_tryenter(&releq->vpmq_mtx)) {
+ /* cannot get releq; a free vpmap may be there now */
+ mutex_exit(&allocq->vpmq_mtx);
+
+ /*
+ * This loop could spin forever if this thread has
+ * higher priority than the thread that is holding
+ * releq->vpmq_mtx. In order to force the other thread
+ * to run, we'll lock/unlock the mutex which is safe
+ * since we just unlocked the allocq mutex.
+ */
+ mutex_enter(&releq->vpmq_mtx);
+ mutex_exit(&releq->vpmq_mtx);
+ goto retry_queue;
+ }
+ if (releq->vpmq_free == NULL) {
+ VPM_DEBUG(vpmd_emptyfreelist);
+ /*
+ * This freelist is empty.
+ * This should not happen unless clients
+ * are failing to release the vpmap after
+ * accessing the data. Before resorting
+ * to sleeping, try the next list of the same color.
+ */
+ free_ndx = (free_ndx + 1) & vpmd_freemsk;
+ if (free_ndx != end_ndx) {
+ mutex_exit(&releq->vpmq_mtx);
+ mutex_exit(&allocq->vpmq_mtx);
+ vpmflp = &vpmd_free[free_ndx];
+ goto retry_queue;
+ }
+ /*
+ * Tried all freelists.
+ * wait on this list and hope something gets freed.
+ */
+ vpmflp->vpm_want++;
+ mutex_exit(&vpmflp->vpm_freeq[1].vpmq_mtx);
+ cv_wait(&vpmflp->vpm_free_cv,
+ &vpmflp->vpm_freeq[0].vpmq_mtx);
+ vpmflp->vpm_want--;
+ mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx);
+ vpmflp = &vpmd_free[free_ndx];
+ VPM_DEBUG(vpmd_nofreevpms);
+ goto retry_queue;
+ } else {
+ /*
+ * Something on the rele queue; flip the alloc
+ * and rele queues and retry.
+ */
+ vpmflp->vpm_allocq = releq;
+ vpmflp->vpm_releq = allocq;
+ mutex_exit(&allocq->vpmq_mtx);
+ mutex_exit(&releq->vpmq_mtx);
+ if (page_locked) {
+ delay(hz >> 2);
+ page_locked = 0;
+ }
+ goto retry_queue;
+ }
+ } else {
+ int gotnewvpm;
+ kmutex_t *pmtx;
+ uint_t vpmref;
+
+ /*
+ * Fastpath the case we get the vpmap mutex
+ * on the first try.
+ */
+ first = vpm;
+next_vpmap:
+ vmtx = VPMAPMTX(vpm);
+ if (!mutex_tryenter(vmtx)) {
+ /*
+ * Another thread is trying to reclaim this slot.
+ * Skip to the next queue or vpmap.
+ */
+ if ((vpm = vpm->vpm_next) == first) {
+ goto skip_queue;
+ } else {
+ goto next_vpmap;
+ }
+ }
+
+ /*
+ * Assign this vpm to the newpage.
+ */
+ pmtx = PPMTX(newpage);
+ gotnewvpm = 0;
+ mutex_enter(pmtx);
+
+ /*
+ * Check if some other thread already assigned a vpm to
+ * this page.
+ */
+ if ((vpmref = newpage->p_vpmref) == 0) {
+ newpage->p_vpmref = VPMID(vpm);
+ gotnewvpm = 1;
+ } else {
+ VPM_DEBUG(vpmd_contend);
+ mutex_exit(vmtx);
+ }
+ mutex_exit(pmtx);
+
+ if (gotnewvpm) {
+
+ /*
+ * At this point, we've selected the vpm. Remove vpm
+ * from its freelist. If vpm is the first one in
+ * the freelist, update the head of the freelist.
+ */
+ if (first == vpm) {
+ ASSERT(first == allocq->vpmq_free);
+ allocq->vpmq_free = vpm->vpm_next;
+ }
+
+ /*
+ * If the head of the freelist still points to vpm,
+ * then there are no more free vpmaps in that list.
+ */
+ if (allocq->vpmq_free == vpm)
+ /*
+ * Took the last one
+ */
+ allocq->vpmq_free = NULL;
+ else {
+ vpm->vpm_prev->vpm_next = vpm->vpm_next;
+ vpm->vpm_next->vpm_prev = vpm->vpm_prev;
+ }
+ mutex_exit(&allocq->vpmq_mtx);
+ vpm->vpm_prev = vpm->vpm_next = NULL;
+
+ /*
+ * Disassociate the previous page. On x64 systems
+ * p_vpmref is used as a mapping reference to the page.
+ */
+ if ((pp = vpm->vpm_pp) != NULL &&
+ vpm->vpm_vp == pp->p_vnode &&
+ vpm->vpm_off == pp->p_offset) {
+
+ pmtx = PPMTX(pp);
+ if (page_trylock(pp, SE_SHARED)) {
+ /*
+ * Now verify that it is the correct
+ * page. If not someone else stole it,
+ * so just unlock it and leave.
+ */
+ mutex_enter(pmtx);
+ if (PP_ISFREE(pp) ||
+ vpm->vpm_vp != pp->p_vnode ||
+ vpm->vpm_off != pp->p_offset ||
+ pp->p_vpmref != VPMID(vpm)) {
+ mutex_exit(pmtx);
+
+ page_unlock(pp);
+ } else {
+ /*
+ * Release the page.
+ */
+ pp->p_vpmref = 0;
+ mutex_exit(pmtx);
+ hat_kpm_mapout(pp, 0,
+ hat_kpm_page2va(pp, 1));
+ (void) page_release(pp, 1);
+ }
+ } else {
+ /*
+ * If the page cannot be locked, just
+ * clear the p_vpmref and go.
+ */
+ mutex_enter(pmtx);
+ if (pp->p_vpmref == VPMID(vpm)) {
+ pp->p_vpmref = 0;
+ }
+ mutex_exit(pmtx);
+ VPM_DEBUG(vpmd_prevpagelocked);
+ }
+ }
+
+ /*
+ * Setup vpm to point to the new page.
+ */
+ vpm->vpm_pp = newpage;
+ vpm->vpm_vp = newpage->p_vnode;
+ vpm->vpm_off = newpage->p_offset;
+
+ } else {
+ int steal = !VPM_MTBF(steals, steals_mtbf);
+ /*
+ * Page already has a vpm assigned just use that.
+ * Grab the vpm mutex and verify that it is still
+ * the correct one. The pp->p_vpmref should not change
+ * once we have the vpm mutex and the page lock.
+ */
+ mutex_exit(&allocq->vpmq_mtx);
+ vpm = VPMP(vpmref);
+ vmtx = VPMAPMTX(vpm);
+ mutex_enter(vmtx);
+ if ((steal && vpm->vpm_refcnt == 0) ||
+ vpm->vpm_pp != newpage) {
+ /*
+ * The vpm got stolen, retry.
+ * clear the p_vpmref.
+ */
+ pmtx = PPMTX(newpage);
+ mutex_enter(pmtx);
+ if (newpage->p_vpmref == vpmref) {
+ newpage->p_vpmref = 0;
+ }
+ mutex_exit(pmtx);
+
+ mutex_exit(vmtx);
+ VPM_DEBUG(vpmd_steals);
+ goto retry_queue;
+ } else if (vpm->vpm_refcnt == 0) {
+ /*
+ * Remove it from the free list if it
+ * exists there.
+ */
+ VPMAP_RMFREELIST(vpm);
+ }
+ }
+ return (vpm);
+ }
+}
+
+static void
+free_vpmap(struct vpmap *vpm)
+{
+ struct vpmfree *vpmflp;
+ struct vpmap *vpmfreelist;
+ union vpm_freeq *releq;
+
+ ASSERT(MUTEX_HELD(VPMAPMTX(vpm)));
+
+ if (vpm->vpm_refcnt != 0) {
+ panic("free_vpmap");
+ /*NOTREACHED*/
+ }
+
+ vpmflp = &vpmd_free[vpm->vpm_free_ndx];
+ /*
+ * Add to the tail of the release queue
+ * Note that vpm_releq and vpm_allocq could toggle
+ * before we get the lock. This does not affect
+ * correctness as the 2 queues are only maintained
+ * to reduce lock pressure.
+ */
+ releq = vpmflp->vpm_releq;
+ if (releq == &vpmflp->vpm_freeq[0]) {
+ vpm->vpm_ndxflg = 0;
+ } else {
+ vpm->vpm_ndxflg = 1;
+ }
+ mutex_enter(&releq->vpmq_mtx);
+ vpmfreelist = releq->vpmq_free;
+ if (vpmfreelist == 0) {
+ int want;
+
+ releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm;
+ /*
+ * Both queue mutexes are held to set vpm_want;
+ * snapshot the value before dropping releq mutex.
+ * If vpm_want appears after the releq mutex is dropped,
+ * then the vpmap just freed is already gone.
+ */
+ want = vpmflp->vpm_want;
+ mutex_exit(&releq->vpmq_mtx);
+ /*
+ * See if there was a waiter before dropping the releq mutex
+ * then recheck after obtaining vpm_freeq[0] mutex as
+ * the another thread may have already signaled.
+ */
+ if (want) {
+ mutex_enter(&vpmflp->vpm_freeq[0].vpmq_mtx);
+ if (vpmflp->vpm_want)
+ cv_signal(&vpmflp->vpm_free_cv);
+ mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx);
+ }
+ } else {
+ vpm->vpm_next = vpmfreelist;
+ vpm->vpm_prev = vpmfreelist->vpm_prev;
+ vpmfreelist->vpm_prev = vpm;
+ vpm->vpm_prev->vpm_next = vpm;
+ mutex_exit(&releq->vpmq_mtx);
+ }
+}
+
+/*
+ * Get the vpmap for the page.
+ * The refcnt of this vpm is incremented.
+ */
+static struct vpmap *
+get_vpmap(page_t *pp)
+{
+ struct vpmap *vpm = NULL;
+ kmutex_t *vmtx;
+ kmutex_t *pmtx;
+ unsigned int refid;
+
+ ASSERT((pp != NULL) && PAGE_LOCKED(pp));
+
+ if (VPM_MTBF(contend, contend_mtbf) && (refid = pp->p_vpmref) != 0) {
+ vpm = VPMP(refid);
+ vmtx = VPMAPMTX(vpm);
+ mutex_enter(vmtx);
+ /*
+ * Since we have the page lock and the vpm mutex, the
+ * pp->p_vpmref cannot change.
+ */
+ if (vpm->vpm_pp != pp) {
+ pmtx = PPMTX(pp);
+
+ /*
+ * Clear the p_vpmref as it is incorrect.
+ * This can happen if the page was stolen.
+ * On x64 this should not happen as p_vpmref
+ * is treated as a mapping on the page. So
+ * if the page is stolen, the mapping would have
+ * been cleared in page_unload().
+ */
+ mutex_enter(pmtx);
+ if (pp->p_vpmref == refid)
+ pp->p_vpmref = 0;
+ mutex_exit(pmtx);
+
+ mutex_exit(vmtx);
+ vpm = NULL;
+ } else if (vpm->vpm_refcnt == 0) {
+ /*
+ * Got the vpm, remove it from the free
+ * list if it exists there.
+ */
+ VPMAP_RMFREELIST(vpm);
+ }
+ }
+ if (vpm == NULL) {
+ /*
+ * get_free_vpmap() returns with the vpmap mutex held.
+ */
+ vpm = get_free_vpmap(pp);
+ vmtx = VPMAPMTX(vpm);
+ vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_misses++;
+ } else {
+ vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_hits++;
+ }
+
+ vpm->vpm_refcnt++;
+ mutex_exit(vmtx);
+
+ return (vpm);
+}
+
+/* END --- vpm cache ---- */
+
+/*
+ * The vnode page mapping(vpm) interface routines.
+ */
+
+/*
+ * Find or create the pages starting form baseoff for specified
+ * length 'len'.
+ */
+static int
+vpm_pagecreate(
+ struct vnode *vp,
+ u_offset_t baseoff,
+ size_t len,
+ vmap_t vml[],
+ int nseg,
+ int *newpage)
+{
+
+ page_t *pp = NULL;
+ caddr_t base;
+ u_offset_t off = baseoff;
+ int i;
+ ASSERT(nseg >= MINVMAPS && nseg < MAXVMAPS);
+
+ for (i = 0; len > 0; len -= MIN(len, PAGESIZE), i++) {
+ struct vpmap *vpm;
+
+
+ if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
+
+ base = segkpm_create_va(off);
+
+ /*
+ * the seg pointer passed in is just advisor. Just
+ * pass segkmap for now like segmap does with
+ * segmap_kpm enabled.
+ */
+ if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
+ segkmap, base)) == NULL) {
+ panic("segmap_pagecreate_vpm: "
+ "page_create failed");
+ /*NOTREACHED*/
+ }
+ if (newpage != NULL)
+ *newpage = 1;
+
+ page_io_unlock(pp);
+ }
+
+ /*
+ * Get the vpm for this page_t.
+ */
+ if (vpm_cache_enable) {
+ vpm = get_vpmap(pp);
+ vml[i].vs_data = (void *)&vpm->vpm_pp;
+ } else {
+ vml[i].vs_data = (void *)pp;
+ pp->p_vpmref = 0;
+ }
+
+ vml[i].vs_addr = hat_kpm_mapin(pp, 0);
+ vml[i].vs_len = PAGESIZE;
+
+ off += PAGESIZE;
+ }
+ vml[i].vs_data = NULL;
+ vml[i].vs_addr = (caddr_t)NULL;
+ return (0);
+}
+
+
+/*
+ * Returns vpm mappings of pages in the range [off, off+len], where
+ * len is rounded up to the PAGESIZE boundary. The list of pages and
+ * the page addresses are returned in the SGL vml (vmap_t) array passed in.
+ * The nseg is the number of vmap_t entries in the array.
+ *
+ * Currently max len allowed is MAXBSIZE therefore, it will either
+ * fetch/create one or two pages depending on what is the PAGESIZE.
+ *
+ * The segmap's SM_LOCKPROTO usage is not supported by these interfaces.
+ * For such cases, use the seg_map interfaces.
+ */
+int
+vpm_map_pages(
+ struct vnode *vp,
+ u_offset_t off,
+ size_t len,
+ int fetchpage,
+ vmap_t *vml,
+ int nseg,
+ int *newpage,
+ enum seg_rw rw)
+{
+ extern struct vnode *common_specvp();
+ u_offset_t baseoff;
+ uint_t prot;
+ caddr_t base;
+ page_t *pp, *pplist[MAXVMAPS];
+ struct vpmap *vpm;
+ int i, error = 0;
+
+ ASSERT(nseg >= MINVMAPS && nseg < MAXVMAPS);
+ baseoff = off & (offset_t)PAGEMASK;
+ vml[0].vs_data = NULL;
+ vml[0].vs_addr = (caddr_t)NULL;
+ /*
+ * For now, lets restrict it to MAXBSIZE. XXX - We can allow
+ * len longer then MAXBSIZE, but there should be a limit
+ * which should be determined by how many pages the VOP_GETPAGE()
+ * can fetch.
+ */
+ if (off + len > baseoff + MAXBSIZE) {
+ panic("vpm_map_pages bad len");
+ /*NOTREACHED*/
+ }
+
+ /*
+ * If this is a block device we have to be sure to use the
+ * "common" block device vnode for the mapping.
+ */
+ if (vp->v_type == VBLK)
+ vp = common_specvp(vp);
+
+
+ if (!fetchpage)
+ return (vpm_pagecreate(vp, baseoff, len, vml, nseg, newpage));
+
+ for (i = 0; len > 0; len -= MIN(len, PAGESIZE), i++,
+ pplist[i] = NULL) {
+
+ pp = page_lookup(vp, baseoff, SE_SHARED);
+
+ /*
+ * If we did not find the page or if this page was not
+ * in our cache, then let VOP_GETPAGE get all the pages.
+ * We need to call VOP_GETPAGE so that filesytems can do some
+ * (un)necessary tracking for sequential access.
+ */
+
+ if (pp == NULL || (vpm_cache_enable && pp->p_vpmref == 0) ||
+ (rw == S_WRITE && hat_page_getattr(pp, P_MOD | P_REF)
+ != (P_MOD | P_REF))) {
+ if (pp != NULL) {
+ page_unlock(pp);
+ }
+
+ /*
+ * Pass a dummy address as it will be required
+ * by page_create_va(). We pass segkmap as the seg
+ * as some file systems(UFS) check it.
+ */
+ base = segkpm_create_va(baseoff);
+
+ error = VOP_GETPAGE(vp, baseoff, len, &prot, &pplist[i],
+ roundup(len, PAGESIZE), segkmap, base, rw, CRED());
+ if (error) {
+ VPM_DEBUG(vpmd_getpagefailed);
+ pplist[i] = NULL;
+ }
+ break;
+ } else {
+ pplist[i] = pp;
+ baseoff += PAGESIZE;
+ }
+ }
+
+ if (error) {
+ for (i = 0; pplist[i] != NULL; i++) {
+ page_unlock(pplist[i]);
+ pplist[i] = NULL;
+ }
+ vml[0].vs_addr = NULL;
+ vml[0].vs_data = NULL;
+ return (FC_MAKE_ERR(error));
+ }
+
+ /*
+ * Get the vpm's for pages.
+ */
+ for (i = 0; pplist[i] != NULL; i++) {
+ if (vpm_cache_enable) {
+ vpm = get_vpmap(pplist[i]);
+ vml[i].vs_data = (void *)&(vpm->vpm_pp);
+ } else {
+ vml[i].vs_data = (void *)pplist[i];
+ pplist[i]->p_vpmref = 0;
+ }
+
+ vml[i].vs_addr = hat_kpm_mapin(pplist[i], 0);
+ vml[i].vs_len = PAGESIZE;
+ }
+
+ vml[i].vs_data = NULL;
+ vml[i].vs_addr = (caddr_t)NULL;
+
+ return (0);
+}
+
+/*
+ * Release the vpm mappings on the pages and unlock them.
+ */
+void
+vpm_unmap_pages(vmap_t vml[], enum seg_rw rw)
+{
+ int i;
+ struct vpmap *vpm;
+ kmutex_t *mtx;
+ page_t *pp;
+
+ for (i = 0; vml[i].vs_data != NULL; i++) {
+ ASSERT(IS_KPM_ADDR(vml[i].vs_addr));
+
+ if (vpm_cache_enable) {
+ pp = *(((page_t **)vml[i].vs_data));
+ } else {
+ pp = (page_t *)vml[i].vs_data;
+ }
+
+ /*
+ * Mark page as being modified or referenced, bacause vpm pages
+ * would not cause faults where it would be set normally.
+ */
+ if (rw == S_WRITE) {
+ hat_setrefmod(pp);
+ } else {
+ ASSERT(rw == S_READ);
+ hat_setref(pp);
+ }
+
+ if (vpm_cache_enable) {
+ page_unlock(pp);
+ vpm = (struct vpmap *)((char *)vml[i].vs_data
+ - offsetof(struct vpmap, vpm_pp));
+ mtx = VPMAPMTX(vpm);
+ mutex_enter(mtx);
+
+ if (--vpm->vpm_refcnt == 0) {
+ free_vpmap(vpm);
+ }
+ mutex_exit(mtx);
+ } else {
+ hat_kpm_mapout(pp, 0, vml[i].vs_addr);
+ (void) page_release(pp, 1);
+ }
+ vml[i].vs_data = NULL;
+ vml[i].vs_addr = NULL;
+ }
+}
+
+/*
+ * Given the vp, off and the uio structure, this routine will do the
+ * the copy (uiomove). If the last page created is partially written,
+ * the rest of the page is zeroed out. It also zeros the beginning of
+ * the first page till the start offset if requested(zerostart).
+ * If pages are to be fetched, it will call the filesystem's getpage
+ * function (VOP_GETPAGE) to get them, otherwise they will be created if
+ * not already present in the page cache.
+ */
+int
+vpm_data_copy(struct vnode *vp,
+ u_offset_t off,
+ size_t len,
+ struct uio *uio,
+ int fetchpage,
+ int *newpage,
+ int zerostart,
+ enum seg_rw rw)
+{
+ int error;
+ struct vmap vml[MINVMAPS];
+ enum uio_rw uiorw;
+ int npages = 0;
+
+ uiorw = (rw == S_WRITE) ? UIO_WRITE : UIO_READ;
+ /*
+ * 'off' will be the offset where the I/O starts.
+ * We get the pages starting at the (off & PAGEMASK)
+ * page boundary.
+ */
+ error = vpm_map_pages(vp, off, (uint_t)len,
+ fetchpage, vml, MINVMAPS, &npages, rw);
+
+ if (newpage != NULL)
+ *newpage = npages;
+ if (!error) {
+ int i, pn, slen = len;
+ int pon = off & PAGEOFFSET;
+
+ /*
+ * Clear from the beginning of the page to start offset
+ * if requested.
+ */
+ if (!fetchpage && zerostart) {
+ (void) kzero(vml[0].vs_addr, (uint_t)pon);
+ VPM_DEBUG(vpmd_zerostart);
+ }
+
+ for (i = 0; !error && slen > 0 &&
+ vml[i].vs_addr != NULL; i++) {
+ pn = (int)MIN(slen, (PAGESIZE - pon));
+ error = uiomove(vml[i].vs_addr + pon,
+ (long)pn, uiorw, uio);
+ slen -= pn;
+ pon = 0;
+ }
+
+ /*
+ * When new pages are created, zero out part of the
+ * page we did not copy to.
+ */
+ if (!fetchpage && npages &&
+ uio->uio_loffset < roundup(off + len, PAGESIZE)) {
+ int nzero;
+
+ pon = (uio->uio_loffset & PAGEOFFSET);
+ nzero = PAGESIZE - pon;
+ i = (uio->uio_loffset - (off & PAGEMASK)) / PAGESIZE;
+ (void) kzero(vml[i].vs_addr + pon, (uint_t)nzero);
+ }
+ vpm_unmap_pages(vml, rw);
+ }
+ return (error);
+}
+
+/*
+ * called to flush pages for the given vnode covering
+ * [off, off+len] range.
+ */
+int
+vpm_sync_pages(struct vnode *vp,
+ u_offset_t off,
+ size_t len,
+ uint_t flags)
+{
+ extern struct vnode *common_specvp();
+ int bflags = 0;
+ int error = 0;
+ size_t psize = roundup(len, PAGESIZE);
+
+ /*
+ * If this is a block device we have to be sure to use the
+ * "common" block device vnode for the mapping.
+ */
+ if (vp->v_type == VBLK)
+ vp = common_specvp(vp);
+
+ if ((flags & ~SM_DONTNEED) != 0) {
+ if (flags & SM_ASYNC)
+ bflags |= B_ASYNC;
+ if (flags & SM_INVAL)
+ bflags |= B_INVAL;
+ if (flags & SM_DESTROY)
+ bflags |= (B_INVAL|B_TRUNC);
+ if (flags & SM_FREE)
+ bflags |= B_FREE;
+ if (flags & SM_DONTNEED)
+ bflags |= B_DONTNEED;
+
+ error = VOP_PUTPAGE(vp, off, psize, bflags, CRED());
+ }
+
+ return (error);
+}
+
+
+#else /* SEGKPM_SUPPORT */
+
+/* vpm stubs */
+void
+vpm_init()
+{
+}
+
+/*ARGSUSED*/
+int
+vpm_pagecreate(
+ struct vnode *vp,
+ u_offset_t baseoff,
+ size_t len,
+ vmap_t vml[],
+ int nseg,
+ int *newpage)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+vpm_map_pages(
+ struct vnode *vp,
+ u_offset_t off,
+ size_t len,
+ int fetchpage,
+ vmap_t vml[],
+ int nseg,
+ int *newpage,
+ enum seg_rw rw)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+vpm_data_copy(struct vnode *vp,
+ u_offset_t off,
+ size_t len,
+ struct uio *uio,
+ int fetchpage,
+ int *newpage,
+ int zerostart,
+ enum seg_rw rw)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+vpm_unmap_pages(vmap_t vml[], enum seg_rw rw)
+{
+}
+/*ARGSUSED*/
+int
+vpm_sync_pages(struct vnode *vp,
+ u_offset_t off,
+ size_t len,
+ uint_t flags)
+{
+ return (0);
+}
+#endif /* SEGKPM_SUPPORT */
diff --git a/usr/src/uts/common/vm/vpm.h b/usr/src/uts/common/vm/vpm.h
new file mode 100644
index 0000000000..6d9c53b009
--- /dev/null
+++ b/usr/src/uts/common/vm/vpm.h
@@ -0,0 +1,286 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VM_VPM_H
+#define _VM_VPM_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The vnode page mappings(VPM) interfaces.
+ * "Commitment level - Consolidation private". They are subject
+ * to change without notice. Use them at your own risk.
+ *
+ * At this stage these interfaces are provided only to utilize the
+ * segkpm mappings and are enabled for solaris x64. Therefore these
+ * interfaces have to be used under the 'vpm_enable' check as an
+ * alternative to segmap interfaces where applicable.
+ *
+ * The VPM interfaces provide temporary mappings to file pages. They
+ * return the mappings in a scatter gather list(SGL).
+ * The SGL elements are the structure 'vmap_t'.
+ *
+ * typedef struct vmap {
+ * caddr_t vs_addr; / public /
+ * size_t vs_len; / public - Currently not used /
+ * void *vs_data; / opaque - private data /
+ * } vmap_t;
+ *
+ * An array of this structure has to be passed to the interface routines
+ * along with the size(# of elements) of the SGL array. Depending on the
+ * requested length and mapped chunk sizes(PAGESIZE here), the number of
+ * valid mappings returned can be less then actual size of the SGL array.
+ * Always, an element in the SGL will have 'vs_addr' set to NULL which
+ * marks the end of the valid entires in the SGL.
+ *
+ * The vmap_t structure members are populated with the mapped address
+ * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the
+ * mapping length is fixed at PAGESIZE. The 'vs_data' member is private
+ * and the caller should not access or modify it.
+ *
+ * Using a scatter gather list to return the mappings and length makes it
+ * possible to provide mappings of variable length. Currently mapping length
+ * of only 'PAGESIZE' per vmap_t is possible. Also, similar to the segmap
+ * interfaces, on each request, the max length of 'MAXBSIZE' is supported
+ * for now. The MAXBSIZE mappings will be returned in 1 or 2 vmap_t elements
+ * of the SGL depending on the PAGESIZE. The scatter gather list array size
+ * needs to be a minimum of MINVMAPS elements to accommodate MAXBSIZE.
+ * The MAXBSIZE restriction exists because the filesystems are not capable
+ * of handling more(disk block allocations at a time) for now.
+ *
+ *
+ * Interfaces:
+ *
+ * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len,
+ * int fetchpage, vmap_t *vml, int vmlsz,
+ * int *newpagecreated, enum seg_rw rw);
+ *
+ * This function returns mappings to vnode pages.
+ *
+ * It takes a vnode, offset and length and returns mappings to the pages
+ * covering the range [off, off +len) in the vmap_t SGL array 'vml'.
+ * Currently these interfaces are subject to restrictions similar to the segmap
+ * interfaces. The length passed in should satisfy the following criteria.
+ * '(off + len) <= ((off & PAGEMASK) + MAXBSIZE)'
+ * The mapped address returned, in 'vs_addr', are for the page boundary.
+ *
+ * The 'vmlsz' is the size(# elements) of the 'vml' array.
+ *
+ * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched
+ * (calls VOP_GETPAGE) from the backing store(disk) if not found in the
+ * system page cache. If 'fetchpage == 0', the vnode(file) pages for the
+ * given offset will be just created if they are not already present in the
+ * system page cache. The 'newpagecreated' flag is set on return if new pages
+ * are created when 'fetchpage == 0'(requested to just create new pages).
+ *
+ * The 'seg_rw rw' indicates the intended operation on these mappings
+ * (S_WRITE or S_READ).
+ *
+ * Currently these interfaces only return segkpm mappings. Therefore the
+ * vnode pages that are being accessed will be locked(at least SHARED locked)
+ * for the duration these mappings are in use. After use, the unmap
+ * function, vpm_unmap_pages(), has to be called and the same SGL array
+ * needs to be passed to the unmap function.
+ *
+ *
+ * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);.
+ *
+ * This function unmaps the pages that where mapped by vpm_map_pages.
+ * The SGL array 'vml' has to be the one that was passed to vpm_map_pages().
+ *
+ *
+ * ex:
+ * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer
+ * 'buf' the following code snippet shows how to use the above two interfaces.
+ * Here the the copy length is till the MAXBSIZE boundary. This code can be
+ * executed repeatedly, in a loop to copy more then MAXBSIZE length of data.
+ *
+ * vmap_t vml[MINVMAPS];
+ * int err, i, newpage, len;
+ * int pon;
+ *
+ * pon = (off & PAGEOFFSET);
+ * len = MAXBSIZE - pon;
+ *
+ * if (vpm_enable) {
+ * err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS,
+ * &newpage, S_WRITE);
+ *
+ * if (err)
+ * return;
+ *
+ * for (i=0; vml[i].vs_addr != NULL); i++) {
+ * bcopy (buf, vml[i].vs_addr + pon,
+ * PAGESIZE - pon);
+ * buf += (PAGESIZE - pon);
+ * pon = 0;
+ * }
+ *
+ * if (newpage) {
+ * pon = (off & PAGEOFFSET);
+ * bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon);
+ * }
+ *
+ * vpm_unmap_pages(vml, S_WRITE);
+ * }
+ *
+ *
+ *
+ *
+ * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len,
+ * struct uio *uio, int fetchpage, int *newpagecreated,
+ * int zerostart, enum seg_rw rw);
+ *
+ * This function can be called if the need is to just transfer data to/from
+ * the vnode pages. It takes a 'uio' structure and calls 'uiomove()' to
+ * do the data transfer. It can be used in the context of read and write
+ * system calls to transfer data between a user buffer, which is specified
+ * in the uio structure, and the vnode pages. If the data needs to be
+ * transferred between a kernel buffer and the pages, like in the above
+ * example, a uio structure can be set up accordingly and passed. The 'rw'
+ * parameter will determine the direction of the data transfer.
+ *
+ * The 'fetchpage' and 'newpagecreated' are same as explained before.
+ * The 'zerostart' flag when set will zero fill start of the page till the
+ * offset 'off' in the first page. i.e from 'off & PAGEMASK' to 'off'.
+ * Here too the MAXBSIZE restriction mentioned above applies to the length
+ * requested.
+ *
+ *
+ * int vpm_sync_pages(struct vnode *vp, u_offset_t off,
+ * size_t len, uint_t flags)
+ *
+ * This function can be called to flush or sync the vnode(file) pages that
+ * have been accessed. It will call VOP_PUTPAGE().
+ *
+ * For the given vnode, off and len the pages covering the range
+ * [off, off + len) are flushed. Currently it uses the same flags that
+ * are used with segmap_release() interface. Refer vm/seg_map.h.
+ * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY)
+ *
+ */
+
+
+/*
+ * vpm cache related definitions.
+ */
+#define VPMAP_MINCACHE (64 * 1024 * 1024)
+
+/*
+ * vpm caching mode
+ */
+#define VPMCACHE_LRU 0
+#define VPMCACHE_RANDOM 1
+/*
+ * Data structures to manage the cache of pages referenced by
+ * the vpm interfaces. There is one vpmap struct per page in the cache.
+ */
+struct vpmap {
+ kmutex_t vpm_mtx; /* protects non list fields */
+ struct vnode *vpm_vp; /* pointer to vnode of cached page */
+ struct vpmap *vpm_next; /* free list pointers */
+ struct vpmap *vpm_prev;
+ u_offset_t vpm_off; /* offset of the page */
+ page_t *vpm_pp; /* page pointer */
+ ushort_t vpm_refcnt; /* Number active references */
+ ushort_t vpm_ndxflg; /* indicates which queue */
+ ushort_t vpm_free_ndx; /* freelist it belongs to */
+};
+
+/*
+ * Multiple vpmap free lists are maintaned so that allocations
+ * scale with cpu count. To further reduce contentions between
+ * allocation and deallocations, each list is made up of two queues.
+ */
+#define VPM_FREEQ_PAD 64
+union vpm_freeq {
+ struct {
+ struct vpmap *vpmsq_free;
+ kmutex_t vpmsq_mtx;
+ } vpmfq;
+ char vpmq_pad[VPM_FREEQ_PAD];
+};
+
+#define vpmq_free vpmfq.vpmsq_free
+#define vpmq_mtx vpmfq.vpmsq_mtx
+
+struct vpmfree {
+ union vpm_freeq vpm_freeq[2]; /* alloc and release queue */
+ union vpm_freeq *vpm_allocq; /* current alloc queue */
+ union vpm_freeq *vpm_releq; /* current release queue */
+ kcondvar_t vpm_free_cv;
+ ushort_t vpm_want;
+};
+
+#define VPMALLOCQ 0
+#define VPMRELEQ 1
+
+/*
+ * VPM Interface definitions.
+ */
+
+/*
+ * This structure is the scatter gather list element. The page
+ * mappings will be returned in this structure. A pointer to an
+ * array of this structure is passed to the interface routines.
+ */
+typedef struct vmap {
+ caddr_t vs_addr; /* mapped address */
+ size_t vs_len; /* length, currently fixed at PAGESIZE */
+ void *vs_data; /* opaque - private data */
+} vmap_t;
+
+/*
+ * The minimum and maximum number of array elements in the scatter
+ * gather list.
+ */
+#define MINVMAPS 3 /* ((MAXBSIZE/4096 + 1) min # mappings */
+#define MAXVMAPS 10 /* Max # the scatter gather list */
+
+#ifdef _KERNEL
+
+extern int vpm_enable;
+/*
+ * vpm page mapping operations.
+ */
+extern void vpm_init(void);
+extern int vpm_map_pages(struct vnode *, u_offset_t, size_t, int,
+ vmap_t *, int, int *, enum seg_rw);
+
+extern void vpm_unmap_pages(vmap_t *, enum seg_rw);
+extern int vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t);
+extern int vpm_data_copy(struct vnode *, u_offset_t, size_t,
+ struct uio *, int, int *, int, enum seg_rw rw);
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VM_VPM_H */
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index b58cad94f8..8ac9c6ffec 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -1528,7 +1528,7 @@ startup_vm(void)
* between kernelbase and the beginning of segkpm.
*/
kpm_vbase = final_kernelheap + KERNEL_REDZONE_SIZE;
- kpm_size = mmu_ptob(physmax);
+ kpm_size = mmu_ptob(physmax + 1);
PRM_DEBUG(kpm_vbase);
PRM_DEBUG(kpm_size);
final_kernelheap =
@@ -1763,6 +1763,7 @@ startup_vm(void)
if (kpm_desired) {
kpm_init();
kpm_enable = 1;
+ vpm_enable = 1;
}
/*
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index 89fc15e20e..4c06279917 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -3124,6 +3124,7 @@ hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
return (hm);
}
+extern int vpm_enable;
/*
* Unload all translations to a page. If the page is a subpage of a large
* page, the large page mappings are also removed.
@@ -3142,6 +3143,14 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
uint_t entry;
level_t level;
+#if defined(__amd64)
+ /*
+ * clear the vpm ref.
+ */
+ if (vpm_enable) {
+ pp->p_vpmref = 0;
+ }
+#endif
/*
* The loop with next_size handles pages with multiple pagesize mappings
*/
@@ -3488,6 +3497,11 @@ hat_page_getshare(page_t *pp)
{
uint_t cnt;
cnt = hment_mapcnt(pp);
+#if defined(__amd64)
+ if (vpm_enable && pp->p_vpmref) {
+ cnt += 1;
+ }
+#endif
return (cnt);
}