6256083 Need a lightweight file page mapping mechanism to substitute segmap

6387639 segkpm segment set to incorrect size for amd64
author: praks <none@none> 2006-04-19 18:07:13 -0700
committer: praks <none@none> 2006-04-19 18:07:13 -0700
commit: a5652762e5f7bf683d19f18542e5e39df63bad79 (patch)
tree: 113821225c18b190514811f3e27a638333bc2dcd /usr/src
parent: 4ab777b1b0f310e59b52a57c79efa0571506942a (diff)
download: illumos-joyent-a5652762e5f7bf683d19f18542e5e39df63bad79.tar.gz
18 files changed, 1888 insertions, 184 deletions
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com
index f265268418..4fec41a28f 100644
--- a/usr/src/pkgdefs/SUNWhea/prototype_com
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com
@@ -1230,6 +1230,7 @@ f none usr/include/vm/seg_spt.h 644 root bin
 f none usr/include/vm/seg_vn.h 644 root bin
 f none usr/include/vm/seg_kpm.h 644 root bin
 f none usr/include/vm/vpage.h 644 root bin
+f none usr/include/vm/vpm.h 644 root bin
 f none usr/include/volmgt.h 644 root bin
 f none usr/include/wait.h 644 root bin
 f none usr/include/wchar.h 644 root bin
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index b025f1d7c6..2504b4664c 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -135,6 +135,7 @@ GENUNIX_OBJS +=	\
 		flock.o		\
 		fm.o		\
 		fork.o		\
+		vpm.o		\
 		fsat.o		\
 		fs_subr.o	\
 		fsflush.o	\
diff --git a/usr/src/uts/common/fs/nfs/nfs3_vnops.c b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
index a53d2270b5..3b54de5ea7 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -567,9 +566,18 @@ nfs3_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 		if (diff < n)
 			n = (size_t)diff;
 
-		base = segmap_getmapflt(segkmap, vp, off + on, n, 1, S_READ);
+		if (vpm_enable) {
+			/*
+			 * Copy data.
+			 */
+			error = vpm_data_copy(vp, off + on, n, uiop,
+						1, NULL, 0, S_READ);
+		} else {
+			base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
+							S_READ);
 
-		error = uiomove(base + on, n, UIO_READ, uiop);
+			error = uiomove(base + on, n, UIO_READ, uiop);
+		}
 
 		if (!error) {
 			/*
@@ -583,9 +591,18 @@ nfs3_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 			else
 				flags = 0;
 			mutex_exit(&rp->r_statelock);
-			error = segmap_release(segkmap, base, flags);
-		} else
-			(void) segmap_release(segkmap, base, 0);
+			if (vpm_enable) {
+				error = vpm_sync_pages(vp, off, n, flags);
+			} else {
+				error = segmap_release(segkmap, base, flags);
+			}
+		} else {
+			if (vpm_enable) {
+				(void) vpm_sync_pages(vp, off, n, 0);
+			} else {
+				(void) segmap_release(segkmap, base, 0);
+			}
+		}
 	} while (!error && uiop->uio_resid > 0);
 
 	return (error);
@@ -749,25 +766,35 @@ nfs3_fwrite:
 			cv_wait(&rp->r_cv, &rp->r_statelock);
 		mutex_exit(&rp->r_statelock);
 
-		if (segmap_kpm) {
-			int pon = uiop->uio_loffset & PAGEOFFSET;
-			size_t pn = MIN(PAGESIZE - pon, uiop->uio_resid);
-			int pagecreate;
+		if (vpm_enable) {
+			/*
+			 * It will use kpm mappings, so no need to
+			 * pass an address.
+			 */
+			error = writerp(rp, NULL, n, uiop, 0);
+		} else  {
+			if (segmap_kpm) {
+				int pon = uiop->uio_loffset & PAGEOFFSET;
+				size_t pn = MIN(PAGESIZE - pon,
+							uiop->uio_resid);
+				int pagecreate;
 
-			mutex_enter(&rp->r_statelock);
-			pagecreate = (pon == 0) && (pn == PAGESIZE ||
-				uiop->uio_loffset + pn >= rp->r_size);
-			mutex_exit(&rp->r_statelock);
+				mutex_enter(&rp->r_statelock);
+				pagecreate = (pon == 0) && (pn == PAGESIZE ||
+					uiop->uio_loffset + pn >= rp->r_size);
+				mutex_exit(&rp->r_statelock);
 
-			base = segmap_getmapflt(segkmap, vp, off + on,
+				base = segmap_getmapflt(segkmap, vp, off + on,
 						pn, !pagecreate, S_WRITE);
 
-			error = writerp(rp, base + pon, n, uiop, pagecreate);
+				error = writerp(rp, base + pon, n, uiop,
+								pagecreate);
 
-		} else {
-			base = segmap_getmapflt(segkmap, vp, off + on,
-						n, 0, S_READ);
-			error = writerp(rp, base + on, n, uiop, 0);
+			} else {
+				base = segmap_getmapflt(segkmap, vp, off + on,
+							n, 0, S_READ);
+				error = writerp(rp, base + on, n, uiop, 0);
+			}
 		}
 
 		if (!error) {
@@ -790,9 +817,17 @@ nfs3_fwrite:
 				flags &= ~SM_ASYNC;
 				flags |= SM_WRITE;
 			}
-			error = segmap_release(segkmap, base, flags);
+			if (vpm_enable) {
+				error = vpm_sync_pages(vp, off, n, flags);
+			} else {
+				error = segmap_release(segkmap, base, flags);
+			}
 		} else {
-			(void) segmap_release(segkmap, base, 0);
+			if (vpm_enable) {
+				(void) vpm_sync_pages(vp, off, n, 0);
+			} else {
+				(void) segmap_release(segkmap, base, 0);
+			}
 			/*
 			 * In the event that we got an access error while
 			 * faulting in a page for a write-only file just
diff --git a/usr/src/uts/common/fs/nfs/nfs4_client.c b/usr/src/uts/common/fs/nfs/nfs4_client.c
index c95a0cd347..81e01a543b 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_client.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_client.c
@@ -2185,10 +2185,13 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 	u_offset_t offset;
 	int error;
 	int sm_error;
+	vnode_t *vp = RTOV(rp);
 
 	ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
-	ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
 	ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_WRITER));
+	if (!vpm_enable) {
+		ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
+	}
 
 	/*
 	 * Move bytes in at most PAGESIZE chunks. We must avoid
@@ -2206,8 +2209,7 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 		 * n is the number of bytes required to satisfy the request
 		 *   or the number of bytes to fill out the page.
 		 */
-		n = (int)MIN((PAGESIZE - ((uintptr_t)base & PAGEOFFSET)),
-		    tcount);
+		n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
 
 		/*
 		 * Check to see if we can skip reading in the page
@@ -2226,12 +2228,12 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 		 * created and mapped at base.
 		 */
 		pagecreate = pgcreated ||
-			(((uintptr_t)base & PAGEOFFSET) == 0 &&
+			((offset & PAGEOFFSET) == 0 &&
 			(n == PAGESIZE || ((offset + n) >= rp->r_size)));
 
 		mutex_exit(&rp->r_statelock);
 
-		if (pagecreate) {
+		if (!vpm_enable && pagecreate) {
 			/*
 			 * The last argument tells segmap_pagecreate() to
 			 * always lock the page, as opposed to sometimes
@@ -2267,7 +2269,17 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 		rp->r_modaddr = (offset & MAXBMASK);
 		mutex_exit(&rp->r_statelock);
 
-		error = uiomove(base, n, UIO_WRITE, uio);
+		if (vpm_enable) {
+			/*
+			 * Copy data. If new pages are created, part of
+			 * the page that is not written will be initizliazed
+			 * with zeros.
+			 */
+			error = vpm_data_copy(vp, offset, n, uio,
+				!pagecreate, NULL, 0, S_WRITE);
+		} else {
+			error = uiomove(base, n, UIO_WRITE, uio);
+		}
 
 		/*
 		 * r_size is the maximum number of
@@ -2284,7 +2296,11 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 
 		/* n = # of bytes written */
 		n = (int)(uio->uio_loffset - offset);
-		base += n;
+
+		if (!vpm_enable) {
+			base += n;
+		}
+
 		tcount -= n;
 		/*
 		 * If we created pages w/o initializing them completely,
@@ -2292,7 +2308,7 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 		 * This happens on a most EOF write cases and if
 		 * we had some sort of error during the uiomove.
 		 */
-		if (pagecreate) {
+		if (!vpm_enable && pagecreate) {
 			if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
 				(void) kzero(base, PAGESIZE - n);
 
@@ -2310,8 +2326,8 @@ writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 				 * segmap_pagecreate().
 				 */
 				sm_error = segmap_fault(kas.a_hat, segkmap,
-						saved_base, saved_n,
-						F_SOFTUNLOCK, S_WRITE);
+					saved_base, saved_n,
+					F_SOFTUNLOCK, S_WRITE);
 				if (error == 0)
 					error = sm_error;
 			}
diff --git a/usr/src/uts/common/fs/nfs/nfs4_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
index faf08573ba..97fc46809c 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -2617,9 +2616,19 @@ nfs4_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 		if (diff < n)
 			n = (uint_t)diff;
 
-		base = segmap_getmapflt(segkmap, vp, off + on, n, 1, S_READ);
+		if (vpm_enable) {
+			/*
+			 * Copy data.
+			 */
+			error = vpm_data_copy(vp, off + on, n, uiop,
+						1, NULL, 0, S_READ);
 
-		error = uiomove(base + on, n, UIO_READ, uiop);
+		} else {
+			base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
+							S_READ);
+
+			error = uiomove(base + on, n, UIO_READ, uiop);
+		}
 
 		if (!error) {
 			/*
@@ -2633,9 +2642,18 @@ nfs4_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 			else
 				flags = 0;
 			mutex_exit(&rp->r_statelock);
-			error = segmap_release(segkmap, base, flags);
-		} else
-			(void) segmap_release(segkmap, base, 0);
+			if (vpm_enable) {
+				error = vpm_sync_pages(vp, off, n, flags);
+			} else {
+				error = segmap_release(segkmap, base, flags);
+			}
+		} else {
+			if (vpm_enable) {
+				(void) vpm_sync_pages(vp, off, n, 0);
+			} else {
+				(void) segmap_release(segkmap, base, 0);
+			}
+		}
 	} while (!error && uiop->uio_resid > 0);
 
 	return (error);
@@ -2826,25 +2844,35 @@ nfs4_fwrite:
 			cv_wait(&rp->r_cv, &rp->r_statelock);
 		mutex_exit(&rp->r_statelock);
 
-		if (segmap_kpm) {
-			int pon = uiop->uio_loffset & PAGEOFFSET;
-			size_t pn = MIN(PAGESIZE - pon, uiop->uio_resid);
-			int pagecreate;
+		if (vpm_enable) {
+			/*
+			 * It will use kpm mappings, so no need to
+			 * pass an address.
+			 */
+			error = writerp4(rp, NULL, n, uiop, 0);
+		} else  {
+			if (segmap_kpm) {
+				int pon = uiop->uio_loffset & PAGEOFFSET;
+				size_t pn = MIN(PAGESIZE - pon,
+							uiop->uio_resid);
+				int pagecreate;
 
-			mutex_enter(&rp->r_statelock);
-			pagecreate = (pon == 0) && (pn == PAGESIZE ||
-				uiop->uio_loffset + pn >= rp->r_size);
-			mutex_exit(&rp->r_statelock);
+				mutex_enter(&rp->r_statelock);
+				pagecreate = (pon == 0) && (pn == PAGESIZE ||
+					uiop->uio_loffset + pn >= rp->r_size);
+				mutex_exit(&rp->r_statelock);
 
-			base = segmap_getmapflt(segkmap, vp, off + on,
+				base = segmap_getmapflt(segkmap, vp, off + on,
 						pn, !pagecreate, S_WRITE);
 
-			error = writerp4(rp, base + pon, n, uiop, pagecreate);
+				error = writerp4(rp, base + pon, n, uiop,
+								pagecreate);
 
-		} else {
-			base = segmap_getmapflt(segkmap, vp, off + on,
-						n, 0, S_READ);
-			error = writerp4(rp, base + on, n, uiop, 0);
+			} else {
+				base = segmap_getmapflt(segkmap, vp, off + on,
+							n, 0, S_READ);
+				error = writerp4(rp, base + on, n, uiop, 0);
+			}
 		}
 
 		if (!error) {
@@ -2867,9 +2895,17 @@ nfs4_fwrite:
 				flags &= ~SM_ASYNC;
 				flags |= SM_WRITE;
 			}
-			error = segmap_release(segkmap, base, flags);
+			if (vpm_enable) {
+				error = vpm_sync_pages(vp, off, n, flags);
+			} else {
+				error = segmap_release(segkmap, base, flags);
+			}
 		} else {
-			(void) segmap_release(segkmap, base, 0);
+			if (vpm_enable) {
+				(void) vpm_sync_pages(vp, off, n, 0);
+			} else {
+				(void) segmap_release(segkmap, base, 0);
+			}
 			/*
 			 * In the event that we got an access error while
 			 * faulting in a page for a write-only file just
diff --git a/usr/src/uts/common/fs/nfs/nfs_client.c b/usr/src/uts/common/fs/nfs/nfs_client.c
index d6c0a25d7a..40c886fc85 100644
--- a/usr/src/uts/common/fs/nfs/nfs_client.c
+++ b/usr/src/uts/common/fs/nfs/nfs_client.c
@@ -2104,10 +2104,13 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 	u_offset_t offset;
 	int error;
 	int sm_error;
+	vnode_t *vp = RTOV(rp);
 
 	ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
-	ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
 	ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_WRITER));
+	if (!vpm_enable) {
+		ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
+	}
 
 	/*
 	 * Move bytes in at most PAGESIZE chunks. We must avoid
@@ -2125,8 +2128,7 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 		 * n is the number of bytes required to satisfy the request
 		 *   or the number of bytes to fill out the page.
 		 */
-		n = (int)MIN((PAGESIZE - ((uintptr_t)base & PAGEOFFSET)),
-		    tcount);
+		n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
 
 		/*
 		 * Check to see if we can skip reading in the page
@@ -2145,11 +2147,11 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 		 * created and mapped at base.
 		 */
 		pagecreate = pgcreated ||
-			(((uintptr_t)base & PAGEOFFSET) == 0 &&
+			((offset & PAGEOFFSET) == 0 &&
 			(n == PAGESIZE || ((offset + n) >= rp->r_size)));
 
 		mutex_exit(&rp->r_statelock);
-		if (pagecreate) {
+		if (!vpm_enable && pagecreate) {
 			/*
 			 * The last argument tells segmap_pagecreate() to
 			 * always lock the page, as opposed to sometimes
@@ -2185,7 +2187,17 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 		rp->r_modaddr = (offset & MAXBMASK);
 		mutex_exit(&rp->r_statelock);
 
-		error = uiomove(base, n, UIO_WRITE, uio);
+		if (vpm_enable) {
+			/*
+			 * Copy data. If new pages are created, part of
+			 * the page that is not written will be initizliazed
+			 * with zeros.
+			 */
+			error = vpm_data_copy(vp, offset, n, uio,
+				!pagecreate, NULL, 0, S_WRITE);
+		} else {
+			error = uiomove(base, n, UIO_WRITE, uio);
+		}
 
 		/*
 		 * r_size is the maximum number of
@@ -2202,7 +2214,10 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 
 		/* n = # of bytes written */
 		n = (int)(uio->uio_loffset - offset);
-		base += n;
+
+		if (!vpm_enable) {
+			base += n;
+		}
 		tcount -= n;
 		/*
 		 * If we created pages w/o initializing them completely,
@@ -2210,7 +2225,7 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 		 * This happens on a most EOF write cases and if
 		 * we had some sort of error during the uiomove.
 		 */
-		if (pagecreate) {
+		if (!vpm_enable && pagecreate) {
 			if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
 				(void) kzero(base, PAGESIZE - n);
 
@@ -2228,8 +2243,8 @@ writerp(rnode_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
 				 * segmap_pagecreate().
 				 */
 				sm_error = segmap_fault(kas.a_hat, segkmap,
-						saved_base, saved_n,
-						F_SOFTUNLOCK, S_WRITE);
+					saved_base, saved_n,
+					F_SOFTUNLOCK, S_WRITE);
 				if (error == 0)
 					error = sm_error;
 			}
diff --git a/usr/src/uts/common/fs/nfs/nfs_vnops.c b/usr/src/uts/common/fs/nfs/nfs_vnops.c
index b98b3d280e..583ce42473 100644
--- a/usr/src/uts/common/fs/nfs/nfs_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs_vnops.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
@@ -472,9 +471,17 @@ nfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 		if (diff < n)
 			n = (size_t)diff;
 
-		base = segmap_getmapflt(segkmap, vp, off + on, n, 1, S_READ);
-
-		error = uiomove(base + on, n, UIO_READ, uiop);
+		if (vpm_enable) {
+			/*
+			 * Copy data.
+			 */
+			error = vpm_data_copy(vp, off + on, n, uiop,
+						1, NULL, 0, S_READ);
+		} else {
+			base = segmap_getmapflt(segkmap, vp, off + on, n,
+								1, S_READ);
+			error = uiomove(base + on, n, UIO_READ, uiop);
+		}
 
 		if (!error) {
 			/*
@@ -488,9 +495,18 @@ nfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 			else
 				flags = 0;
 			mutex_exit(&rp->r_statelock);
-			error = segmap_release(segkmap, base, flags);
-		} else
-			(void) segmap_release(segkmap, base, 0);
+			if (vpm_enable) {
+				error = vpm_sync_pages(vp, off, n, flags);
+			} else {
+				error = segmap_release(segkmap, base, flags);
+			}
+		} else {
+			if (vpm_enable) {
+				(void) vpm_sync_pages(vp, off, n, 0);
+			} else {
+				(void) segmap_release(segkmap, base, 0);
+			}
+		}
 	} while (!error && uiop->uio_resid > 0);
 
 	return (error);
@@ -651,25 +667,35 @@ nfs_fwrite:
 			cv_wait(&rp->r_cv, &rp->r_statelock);
 		mutex_exit(&rp->r_statelock);
 
-		if (segmap_kpm) {
-			int pon = uiop->uio_loffset & PAGEOFFSET;
-			size_t pn = MIN(PAGESIZE - pon, uiop->uio_resid);
-			int pagecreate;
+		if (vpm_enable) {
+			/*
+			 * It will use kpm mappings, so no need to
+			 * pass an address.
+			 */
+			error = writerp(rp, NULL, n, uiop, 0);
+		} else  {
+			if (segmap_kpm) {
+				int pon = uiop->uio_loffset & PAGEOFFSET;
+				size_t pn = MIN(PAGESIZE - pon,
+							uiop->uio_resid);
+				int pagecreate;
 
-			mutex_enter(&rp->r_statelock);
-			pagecreate = (pon == 0) && (pn == PAGESIZE ||
-				uiop->uio_loffset + pn >= rp->r_size);
-			mutex_exit(&rp->r_statelock);
+				mutex_enter(&rp->r_statelock);
+				pagecreate = (pon == 0) && (pn == PAGESIZE ||
+					uiop->uio_loffset + pn >= rp->r_size);
+				mutex_exit(&rp->r_statelock);
 
-			base = segmap_getmapflt(segkmap, vp, off + on,
+				base = segmap_getmapflt(segkmap, vp, off + on,
 						pn, !pagecreate, S_WRITE);
 
-			error = writerp(rp, base + pon, n, uiop, pagecreate);
+				error = writerp(rp, base + pon, n, uiop,
+								pagecreate);
 
-		} else {
-			base = segmap_getmapflt(segkmap, vp, off + on,
-						n, 0, S_READ);
-			error = writerp(rp, base + on, n, uiop, 0);
+			} else {
+				base = segmap_getmapflt(segkmap, vp, off + on,
+							n, 0, S_READ);
+				error = writerp(rp, base + on, n, uiop, 0);
+			}
 		}
 
 		if (!error) {
@@ -691,9 +717,17 @@ nfs_fwrite:
 				flags &= ~SM_ASYNC;
 				flags |= SM_WRITE;
 			}
-			error = segmap_release(segkmap, base, flags);
+			if (vpm_enable) {
+				error = vpm_sync_pages(vp, off, n, flags);
+			} else {
+				error = segmap_release(segkmap, base, flags);
+			}
 		} else {
-			(void) segmap_release(segkmap, base, 0);
+			if (vpm_enable) {
+				(void) vpm_sync_pages(vp, off, n, 0);
+			} else {
+				(void) segmap_release(segkmap, base, 0);
+			}
 			/*
 			 * In the event that we got an access error while
 			 * faulting in a page for a write-only file just
diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c
index d4ee630b92..6a2d6f73d0 100644
--- a/usr/src/uts/common/fs/specfs/specvnops.c
+++ b/usr/src/uts/common/fs/specfs/specvnops.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -880,10 +879,16 @@ spec_read(
 		if (diff < n)
 			n = (size_t)diff;
 
-		base = segmap_getmapflt(segkmap, blkvp,
-			(u_offset_t)(off + on), n, 1, S_READ);
+		if (vpm_enable) {
+			error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
+				n, uiop, 1, NULL, 0, S_READ);
+		} else {
+			base = segmap_getmapflt(segkmap, blkvp,
+				(u_offset_t)(off + on), n, 1, S_READ);
 
-		if ((error = uiomove(base + on, n, UIO_READ, uiop)) == 0) {
+			error = uiomove(base + on, n, UIO_READ, uiop);
+		}
+		if (!error) {
 			int flags = 0;
 			/*
 			 * If we read a whole block, we won't need this
@@ -891,9 +896,17 @@ spec_read(
 			 */
 			if (n + on == MAXBSIZE)
 				flags = SM_DONTNEED | SM_FREE;
-			error = segmap_release(segkmap, base, flags);
+			if (vpm_enable) {
+				error = vpm_sync_pages(blkvp, off, n, flags);
+			} else {
+				error = segmap_release(segkmap, base, flags);
+			}
 		} else {
-			(void) segmap_release(segkmap, base, 0);
+			if (vpm_enable) {
+				(void) vpm_sync_pages(blkvp, off, n, 0);
+			} else {
+				(void) segmap_release(segkmap, base, 0);
+			}
 			if (bdevsize == UNKNOWN_SIZE) {
 				error = 0;
 				break;
@@ -984,22 +997,27 @@ spec_write(
 		if (n == MAXBSIZE || (on == 0 && (off + n) == bdevsize))
 			pagecreate = 1;
 
-		base = segmap_getmapflt(segkmap, blkvp,
-		    (u_offset_t)(off + on), n, !pagecreate, S_WRITE);
-
-		/*
-		 * segmap_pagecreate() returns 1 if it calls
-		 * page_create_va() to allocate any pages.
-		 */
 		newpage = 0;
+		if (vpm_enable) {
+			error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
+				n, uiop, !pagecreate, NULL, 0, S_WRITE);
+		} else {
+			base = segmap_getmapflt(segkmap, blkvp,
+			    (u_offset_t)(off + on), n, !pagecreate, S_WRITE);
 
-		if (pagecreate)
-			newpage = segmap_pagecreate(segkmap, base + on,
-				n, 0);
+			/*
+			 * segmap_pagecreate() returns 1 if it calls
+			 * page_create_va() to allocate any pages.
+			 */
+
+			if (pagecreate)
+				newpage = segmap_pagecreate(segkmap, base + on,
+					n, 0);
 
-		error = uiomove(base + on, n, UIO_WRITE, uiop);
+			error = uiomove(base + on, n, UIO_WRITE, uiop);
+		}
 
-		if (pagecreate &&
+		if (!vpm_enable && pagecreate &&
 		    uiop->uio_loffset <
 		    P2ROUNDUP_TYPED(off + on + n, PAGESIZE, offset_t)) {
 			/*
@@ -1029,7 +1047,7 @@ spec_write(
 		 * Unlock the pages which have been allocated by
 		 * page_create_va() in segmap_pagecreate().
 		 */
-		if (newpage)
+		if (!vpm_enable && newpage)
 			segmap_pageunlock(segkmap, base + on,
 				(size_t)n, S_WRITE);
 
@@ -1053,9 +1071,18 @@ spec_write(
 				flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
 			}
 			smark(sp, SUPD|SCHG);
-			error = segmap_release(segkmap, base, flags);
-		} else
-			(void) segmap_release(segkmap, base, SM_INVAL);
+			if (vpm_enable) {
+				error = vpm_sync_pages(blkvp, off, n, flags);
+			} else {
+				error = segmap_release(segkmap, base, flags);
+			}
+		} else {
+			if (vpm_enable) {
+				(void) vpm_sync_pages(blkvp, off, n, SM_INVAL);
+			} else {
+				(void) segmap_release(segkmap, base, SM_INVAL);
+			}
+		}
 
 	} while (error == 0 && uiop->uio_resid > 0 && n != 0);
 
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
index 490cbfc61c..d623dce3f7 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -261,14 +260,32 @@ wrtmp(
 		if (!pagecreate)
 			rw_exit(&tp->tn_contents);
 
-		/* Get offset within the segmap mapping */
-		segmap_offset = (offset & PAGEMASK) & MAXBOFFSET;
-		base = segmap_getmapflt(segkmap, vp, (offset &  MAXBMASK),
-		    PAGESIZE, !pagecreate, S_WRITE);
-
 		newpage = 0;
+		if (vpm_enable) {
+			/*
+			 * XXX Why do we need to hold the contents lock?
+			 * The kpm mappings will not cause a fault.
+			 *
+			 * Copy data. If new pages are created, part of
+			 * the page that is not written will be initizliazed
+			 * with zeros.
+			 */
+			error = vpm_data_copy(vp, offset, bytes, uio,
+				!pagecreate, &newpage, 1, S_WRITE);
+
+			if (pagecreate) {
+				rw_exit(&tp->tn_contents);
+			}
+		} else {
+			/* Get offset within the segmap mapping */
+			segmap_offset = (offset & PAGEMASK) & MAXBOFFSET;
+			base = segmap_getmapflt(segkmap, vp,
+						(offset &  MAXBMASK),
+			    PAGESIZE, !pagecreate, S_WRITE);
+		}
 
-		if (pagecreate) {
+
+		if (!vpm_enable && pagecreate) {
 			rw_downgrade(&tp->tn_contents);
 
 			/*
@@ -287,10 +304,12 @@ wrtmp(
 				    (size_t)pageoffset);
 		}
 
-		error = uiomove(base + segmap_offset + pageoffset,
+		if (!vpm_enable) {
+			error = uiomove(base + segmap_offset + pageoffset,
 			(long)bytes, UIO_WRITE, uio);
+		}
 
-		if (pagecreate &&
+		if (!vpm_enable && pagecreate &&
 		    uio->uio_offset < P2ROUNDUP(offset + bytes, PAGESIZE)) {
 			long	zoffset; /* zero from offset into page */
 			/*
@@ -310,16 +329,17 @@ wrtmp(
 			 */
 			if ((zoffset = pageoffset + nmoved) < PAGESIZE)
 				(void) kzero(base + segmap_offset + zoffset,
-				    (size_t)PAGESIZE - zoffset);
+					(size_t)PAGESIZE - zoffset);
 		}
 
 		/*
 		 * Unlock the pages which have been allocated by
 		 * page_create_va() in segmap_pagecreate()
 		 */
-		if (newpage)
+		if (!vpm_enable && newpage) {
 			segmap_pageunlock(segkmap, base + segmap_offset,
 			    (size_t)PAGESIZE, S_WRITE);
+		}
 
 		if (error) {
 			/*
@@ -327,9 +347,19 @@ wrtmp(
 			 * be sure to invalidate any pages that may have
 			 * been allocated.
 			 */
-			(void) segmap_release(segkmap, base, SM_INVAL);
+			if (vpm_enable) {
+				(void) vpm_sync_pages(vp, offset,
+						PAGESIZE, SM_INVAL);
+			} else {
+				(void) segmap_release(segkmap, base, SM_INVAL);
+			}
 		} else {
-			error = segmap_release(segkmap, base, 0);
+			if (vpm_enable) {
+				error = vpm_sync_pages(vp, offset,
+						PAGESIZE, 0);
+			} else {
+				error = segmap_release(segkmap, base, 0);
+			}
 		}
 
 		/*
@@ -468,17 +498,36 @@ rdtmp(
 		 */
 		rw_exit(&tp->tn_contents);
 
-		segmap_offset = (offset & PAGEMASK) & MAXBOFFSET;
-		base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK,
-		    bytes, 1, S_READ);
+		if (vpm_enable) {
+			/*
+			 * Copy data.
+			 */
+			error = vpm_data_copy(vp, offset, bytes, uio,
+				1, NULL, 0, S_READ);
+		} else {
+			segmap_offset = (offset & PAGEMASK) & MAXBOFFSET;
+			base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK,
+			    bytes, 1, S_READ);
 
-		error = uiomove(base + segmap_offset + pageoffset,
-		    (long)bytes, UIO_READ, uio);
+			error = uiomove(base + segmap_offset + pageoffset,
+			    (long)bytes, UIO_READ, uio);
+		}
 
-		if (error)
-			(void) segmap_release(segkmap, base, 0);
-		else
-			error = segmap_release(segkmap, base, 0);
+		if (error) {
+			if (vpm_enable) {
+				(void) vpm_sync_pages(vp, offset,
+						PAGESIZE, 0);
+			} else {
+				(void) segmap_release(segkmap, base, 0);
+			}
+		} else {
+			if (vpm_enable) {
+				error = vpm_sync_pages(vp, offset,
+						PAGESIZE, 0);
+			} else {
+				error = segmap_release(segkmap, base, 0);
+			}
+		}
 
 		/*
 		 * Re-acquire contents lock.
diff --git a/usr/src/uts/common/fs/ufs/ufs_vnops.c b/usr/src/uts/common/fs/ufs/ufs_vnops.c
index 5dd9495aa2..db8a0fc09b 100644
--- a/usr/src/uts/common/fs/ufs/ufs_vnops.c
+++ b/usr/src/uts/common/fs/ufs/ufs_vnops.c
@@ -999,21 +999,32 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
 			rw_exit(&ufsvfsp->vfs_dqrwlock);
 		}
 
-		base = segmap_getmapflt(segkmap, vp, (off + mapon),
+		newpage = 0;
+		premove_resid = uio->uio_resid;
+		if (vpm_enable) {
+			/*
+			 * Copy data. If new pages are created, part of
+			 * the page that is not written will be initizliazed
+			 * with zeros.
+			 */
+			error = vpm_data_copy(vp, (off + mapon), (uint_t)n,
+				uio, !pagecreate, &newpage, 0, S_WRITE);
+		} else {
+
+			base = segmap_getmapflt(segkmap, vp, (off + mapon),
 					(uint_t)n, !pagecreate, S_WRITE);
 
-		/*
-		 * segmap_pagecreate() returns 1 if it calls
-		 * page_create_va() to allocate any pages.
-		 */
-		newpage = 0;
+			/*
+			 * segmap_pagecreate() returns 1 if it calls
+			 * page_create_va() to allocate any pages.
+			 */
 
-		if (pagecreate)
-			newpage = segmap_pagecreate(segkmap, base,
-			    (size_t)n, 0);
+			if (pagecreate)
+				newpage = segmap_pagecreate(segkmap, base,
+				    (size_t)n, 0);
 
-		premove_resid = uio->uio_resid;
-		error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
+			error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
+		}
 
 		/*
 		 * If "newpage" is set, then a new page was created and it
@@ -1028,7 +1039,7 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
 		 * If uiomove fails because of an error, the old valid data
 		 * is kept instead of filling the rest of the page with zero's.
 		 */
-		if (newpage &&
+		if (!vpm_enable && newpage &&
 		    uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
 			/*
 			 * We created pages w/o initializing them completely,
@@ -1049,7 +1060,7 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
 		 * Unlock the pages allocated by page_create_va()
 		 * in segmap_pagecreate()
 		 */
-		if (newpage)
+		if (!vpm_enable && newpage)
 			segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
 
 		/*
@@ -1130,7 +1141,15 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
 				 */
 				flags = SM_INVAL;
 			}
-			(void) segmap_release(segkmap, base, flags);
+
+			if (vpm_enable) {
+				/*
+				 *  Flush pages.
+				 */
+				(void) vpm_sync_pages(vp, off, n, flags);
+			} else {
+				(void) segmap_release(segkmap, base, flags);
+			}
 		} else {
 			flags = 0;
 			/*
@@ -1163,7 +1182,14 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
 				 */
 				flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
 			}
-			error = segmap_release(segkmap, base, flags);
+			if (vpm_enable) {
+				/*
+				 * Flush pages.
+				 */
+				(void) vpm_sync_pages(vp, off, n, flags);
+			} else {
+				(void) segmap_release(segkmap, base, flags);
+			}
 			/*
 			 * If the operation failed and is synchronous,
 			 * then we need to unwind what uiomove() last
@@ -1429,10 +1455,18 @@ rdip(struct inode *ip, struct uio *uio, int ioflag, cred_t *cr)
 		 */
 		if (rwtype == RW_READER)
 			rw_exit(&ip->i_contents);
-		base = segmap_getmapflt(segkmap, vp, (off + mapon),
-					(uint_t)n, 1, S_READ);
 
-		error = uiomove(base + mapon, (long)n, UIO_READ, uio);
+		if (vpm_enable) {
+			/*
+			 * Copy data.
+			 */
+			error = vpm_data_copy(vp, (off + mapon), (uint_t)n,
+				uio, 1, NULL, 0, S_READ);
+		} else {
+			base = segmap_getmapflt(segkmap, vp, (off + mapon),
+					(uint_t)n, 1, S_READ);
+			error = uiomove(base + mapon, (long)n, UIO_READ, uio);
+		}
 
 		flags = 0;
 		if (!error) {
@@ -1460,9 +1494,18 @@ rdip(struct inode *ip, struct uio *uio, int ioflag, cred_t *cr)
 				flags &= ~SM_ASYNC;
 				flags |= SM_WRITE;
 			}
-			error = segmap_release(segkmap, base, flags);
-		} else
-			(void) segmap_release(segkmap, base, flags);
+			if (vpm_enable) {
+				error = vpm_sync_pages(vp, off, n, flags);
+			} else {
+				error = segmap_release(segkmap, base, flags);
+			}
+		} else {
+			if (vpm_enable) {
+				(void) vpm_sync_pages(vp, off, n, flags);
+			} else {
+				(void) segmap_release(segkmap, base, flags);
+			}
+		}
 
 		if (rwtype == RW_READER)
 			rw_enter(&ip->i_contents, rwtype);
diff --git a/usr/src/uts/common/vm/Makefile b/usr/src/uts/common/vm/Makefile
index fcd6582985..642c393f8f 100644
--- a/usr/src/uts/common/vm/Makefile
+++ b/usr/src/uts/common/vm/Makefile
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #ident	"%Z%%M%	%I%	%E% SMI"
@@ -29,8 +28,8 @@
 # include global definitions
 include ../../../Makefile.master
 
-HDRS=	anon.h as.h faultcode.h hat.h kpm.h page.h pvn.h rm.h seg.h vpage.h \
-	seg_dev.h seg_enum.h seg_kmem.h seg_kp.h seg_kpm.h seg_map.h \
+HDRS=	anon.h as.h faultcode.h vpm.h hat.h kpm.h page.h pvn.h rm.h seg.h \
+	vpage.h seg_dev.h seg_enum.h seg_kmem.h seg_kp.h seg_kpm.h seg_map.h \
 	seg_vn.h seg_spt.h
 
 ROOTDIRS= $(ROOT)/usr/include/vm
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index e3317b9fea..a1aa25bbbc 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -468,7 +467,7 @@ typedef struct page {
 	struct vnode	*p_vnode;	/* vnode that this page is named by */
 	selock_t	p_selock;	/* shared/exclusive lock on the page */
 #if defined(_LP64)
-	int		p_selockpad;	/* pad for growing selock */
+	uint_t		p_vpmref;	/* vpm ref - index of the vpmap_t */
 #endif
 	struct page	*p_hash;	/* hash by [vnode, offset] */
 	struct page	*p_vpnext;	/* next page in vnode list */
@@ -506,7 +505,11 @@ typedef struct page {
 	/* index of entry in p_map when p_embed is set */
 	uint_t		p_mlentry;
 #endif
+#if defined(_LP64)
+	kmutex_t	p_ilock;	/* protects p_vpmref */
+#else
 	uint64_t	p_msresv_2;	/* page allocation debugging */
+#endif
 } page_t;
 
 
diff --git a/usr/src/uts/common/vm/seg_map.c b/usr/src/uts/common/vm/seg_map.c
index 9fd8d37e5a..de27f6e2ff 100644
--- a/usr/src/uts/common/vm/seg_map.c
+++ b/usr/src/uts/common/vm/seg_map.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -466,6 +465,10 @@ segmap_create(struct seg *seg, void *argsp)
 		scpu->scpu.scpu_last_smap = smd_smap;
 	}
 
+	if (vpm_enable) {
+		vpm_init();
+	}
+
 #ifdef DEBUG
 	/*
 	 * Keep track of which colors are used more often.
diff --git a/usr/src/uts/common/vm/seg_map.h b/usr/src/uts/common/vm/seg_map.h
index 339dabe674..0e3cd9bf9b 100644
--- a/usr/src/uts/common/vm/seg_map.h
+++ b/usr/src/uts/common/vm/seg_map.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -65,6 +64,7 @@ struct segmap_crargs {
 };
 
 #include <vm/kpm.h>
+#include <vm/vpm.h>
 
 /*
  * Each smap struct represents a MAXBSIZE sized mapping to the
diff --git a/usr/src/uts/common/vm/vpm.c b/usr/src/uts/common/vm/vpm.c
new file mode 100644
index 0000000000..1f4f2fdf58
--- /dev/null
+++ b/usr/src/uts/common/vm/vpm.c
@@ -0,0 +1,1141 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * VM - generic vnode page mapping interfaces.
+ *
+ * Mechanism to provide temporary mappings to vnode pages.
+ * The typical use would be to copy/access file data.
+ */
+
+#include <sys/types.h>
+#include <sys/t_lock.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/buf.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/mman.h>
+#include <sys/errno.h>
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/vtrace.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/thread.h>
+#include <sys/dumphdr.h>
+#include <sys/bitmap.h>
+#include <sys/lgrp.h>
+
+#include <vm/seg_kmem.h>
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_kpm.h>
+#include <vm/seg_map.h>
+#include <vm/page.h>
+#include <vm/pvn.h>
+#include <vm/rm.h>
+#include <vm/vpm.h>
+
+/*
+ * Needs to be enabled by each platform.
+ */
+int vpm_enable = 0;
+
+#ifdef	SEGKPM_SUPPORT
+
+
+int	vpm_cache_enable = 1;
+long	vpm_cache_percent = 12;
+long	vpm_cache_size;
+int	vpm_nfreelist = 0;
+int	vpmd_freemsk = 0;
+
+#define	VPM_S_PAD	64
+union vpm_cpu {
+	struct {
+		int	vcpu_free_ndx;
+		ulong_t	vcpu_hits;
+		ulong_t vcpu_misses;
+	} vcpu;
+	char vpm_pad[VPM_S_PAD];
+};
+static union vpm_cpu	*vpmd_cpu;
+
+#define	vfree_ndx	vcpu.vcpu_free_ndx
+
+int	vpm_cachemode = VPMCACHE_LRU;
+
+#define	PPMTX(pp) (&(pp)->p_ilock)
+
+static struct vpmap *vpmd_vpmap;	/* list of vpmap structs preallocated */
+static struct vpmfree *vpmd_free;
+#define	VPMAPMTX(vpm)	(&vpm->vpm_mtx)
+#define	VPMAP2VMF(vpm)	(&vpmd_free[(vpm - vpmd_vpmap) & vpmd_freemsk])
+#define	VPMAP2VMF_NDX(vpm)	(ushort_t)((vpm - vpmd_vpmap) & vpmd_freemsk)
+#define	VPMP(id)	(&vpmd_vpmap[id - 1])
+#define	VPMID(vpm)	(uint_t)((vpm - vpmd_vpmap) + 1)
+
+
+#ifdef	DEBUG
+
+struct	vpm_debug {
+	int vpmd_steals;
+	int vpmd_contend;
+	int vpmd_prevpagelocked;
+	int vpmd_getpagefailed;
+	int vpmd_zerostart;
+	int vpmd_emptyfreelist;
+	int vpmd_nofreevpms;
+} vpm_debug;
+
+#define	VPM_DEBUG(x)	((vpm_debug.x)++)
+
+int	steals;
+int	steals_mtbf = 7;
+int	contend;
+int	contend_mtbf = 127;
+
+#define	VPM_MTBF(v, f)	(((++(v)) & (f)) != (f))
+
+#else	/* DEBUG */
+
+#define	VPM_MTBF(v, f)	(1)
+#define	VPM_DEBUG(x)	/* nothing */
+
+#endif
+
+/*
+ * The vpm cache.
+ *
+ * The main purpose of having a cache here is to speed up page_lookup()
+ * operations and also provide an LRU(default) behaviour of file pages. The
+ * page_lookup() operation tends to be expensive if a page has to be
+ * reclaimed from the system page cache("cachelist"). Once we speed up the
+ * page_lookup()->page_reclaim() path then there there should be no need for
+ * this cache. The system page cache(cachelist) should effectively serve the
+ * purpose of caching file pages.
+ *
+ * This cache is very similar to segmap's smap cache. Each page in the
+ * cache is tracked by the structure vpmap_t. But unlike segmap, there is no
+ * hash table. The page_t has a reference to the vpmap_t when cached. For a
+ * given vnode, offset the page is found by means of a page_lookup() operation.
+ * Any page which has a mapping(i.e when cached) will not be in the
+ * system 'cachelist'. Hence the page_lookup() will not have to do a
+ * page_reclaim(). That is how the cache serves to speed up page_lookup()
+ * operations.
+ *
+ * This cache can be disabled by setting vpm_cache_enable = 0 in /etc/system.
+ */
+
+void
+vpm_init()
+{
+	long  npages;
+	struct vpmap *vpm;
+	struct vpmfree *vpmflp;
+	int i, ndx;
+	extern void prefetch_smap_w(void *);
+
+	if (!vpm_cache_enable) {
+		return;
+	}
+
+	/*
+	 * Set the size of the cache.
+	 */
+	vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100);
+	if (vpm_cache_size < VPMAP_MINCACHE) {
+		vpm_cache_size = VPMAP_MINCACHE;
+	}
+
+	/*
+	 * Number of freelists.
+	 */
+	if (vpm_nfreelist == 0) {
+		vpm_nfreelist = max_ncpus;
+	} else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) {
+		cmn_err(CE_WARN, "vpmap create : number of freelist "
+		"vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus);
+		vpm_nfreelist = 2 * max_ncpus;
+	}
+
+	/*
+	 * Round it up to the next power of 2
+	 */
+	if (vpm_nfreelist & (vpm_nfreelist - 1)) {
+		vpm_nfreelist = 1 << (highbit(vpm_nfreelist));
+	}
+	vpmd_freemsk = vpm_nfreelist - 1;
+
+	/*
+	 * Use a per cpu rotor index to spread the allocations evenly
+	 * across the available vpm freelists.
+	 */
+	vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP);
+	ndx = 0;
+	for (i = 0; i < max_ncpus; i++) {
+
+		vpmd_cpu[i].vfree_ndx = ndx;
+		ndx = (ndx + 1) & vpmd_freemsk;
+	}
+
+	/*
+	 * Allocate and initialize the freelist.
+	 */
+	vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree),
+				KM_SLEEP);
+	for (i = 0; i < vpm_nfreelist; i++) {
+
+		vpmflp = &vpmd_free[i];
+		/*
+		 * Set up initial queue pointers. They will get flipped
+		 * back and forth.
+		 */
+		vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ];
+		vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ];
+	}
+
+	npages = mmu_btop(vpm_cache_size);
+
+
+	/*
+	 * Allocate and initialize the vpmap structs.
+	 */
+	vpmd_vpmap = kmem_zalloc(sizeof (struct vpmap) * npages, KM_SLEEP);
+	for (vpm = vpmd_vpmap; vpm <= &vpmd_vpmap[npages - 1]; vpm++) {
+		struct vpmfree *vpmflp;
+		union vpm_freeq *releq;
+		struct vpmap *vpmapf;
+
+		/*
+		 * Use prefetch as we have to walk thru a large number of
+		 * these data structures. We just use the smap's prefetch
+		 * routine as it does the same. This should work fine
+		 * for x64(this needs to be modifed when enabled on sparc).
+		 */
+		prefetch_smap_w((void *)vpm);
+
+		vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm);
+
+		vpmflp = VPMAP2VMF(vpm);
+		releq = vpmflp->vpm_releq;
+
+		vpmapf = releq->vpmq_free;
+		if (vpmapf == NULL) {
+			releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm;
+		} else {
+			vpm->vpm_next = vpmapf;
+			vpm->vpm_prev = vpmapf->vpm_prev;
+			vpmapf->vpm_prev = vpm;
+			vpm->vpm_prev->vpm_next = vpm;
+			releq->vpmq_free = vpm->vpm_next;
+		}
+
+		/*
+		 * Indicate that the vpmap is on the releq at start
+		 */
+		vpm->vpm_ndxflg = VPMRELEQ;
+	}
+}
+
+
+/*
+ * unhooks vpm from the freelist if it is still on the freelist.
+ */
+#define	VPMAP_RMFREELIST(vpm) \
+	{ \
+		if (vpm->vpm_next != NULL) { \
+			union vpm_freeq *freeq; \
+			struct vpmfree *vpmflp; \
+			vpmflp = &vpmd_free[vpm->vpm_free_ndx]; \
+			freeq = &vpmflp->vpm_freeq[vpm->vpm_ndxflg]; \
+			mutex_enter(&freeq->vpmq_mtx); \
+			if (freeq->vpmq_free != vpm) { \
+				vpm->vpm_prev->vpm_next = vpm->vpm_next; \
+				vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
+			} else if (vpm == vpm->vpm_next) { \
+				freeq->vpmq_free = NULL; \
+			} else { \
+				freeq->vpmq_free = vpm->vpm_next; \
+				vpm->vpm_prev->vpm_next = vpm->vpm_next; \
+				vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
+			} \
+			mutex_exit(&freeq->vpmq_mtx); \
+			vpm->vpm_next = vpm->vpm_prev = NULL; \
+		} \
+	}
+
+static int
+get_freelndx(int mode)
+{
+	int ndx;
+
+	ndx = vpmd_cpu[CPU->cpu_seqid].vfree_ndx & vpmd_freemsk;
+	switch (mode) {
+
+	case	VPMCACHE_LRU:
+	default:
+			vpmd_cpu[CPU->cpu_seqid].vfree_ndx++;
+			break;
+	}
+	return (ndx);
+}
+
+
+/*
+ * Find one vpmap structure from the free lists and use it for the newpage.
+ * The previous page it cached is dissociated and released. The page_t's
+ * p_vpmref is cleared only when the vpm it is pointing to is locked(or
+ * for AMD64 when the page is exclusively locked in page_unload. That is
+ * because the p_vpmref is treated as mapping).
+ *
+ * The page's p_vpmref is set when the page is
+ * locked(at least SHARED locked).
+ */
+static struct vpmap *
+get_free_vpmap(page_t *newpage)
+{
+	struct vpmfree *vpmflp;
+	kmutex_t *vmtx;
+	struct vpmap *vpm, *first;
+	union vpm_freeq *allocq, *releq;
+	page_t *pp = NULL;
+	int end_ndx, page_locked = 0;
+	int free_ndx;
+
+	/*
+	 * get the freelist bin index.
+	 */
+	free_ndx = get_freelndx(vpm_cachemode);
+
+	end_ndx = free_ndx;
+	vpmflp = &vpmd_free[free_ndx];
+
+retry_queue:
+	allocq = vpmflp->vpm_allocq;
+	mutex_enter(&allocq->vpmq_mtx);
+
+	if ((vpm = allocq->vpmq_free) == NULL) {
+
+skip_queue:
+		/*
+		 * The alloc list is empty or this queue is being skipped;
+		 * first see if the allocq toggled.
+		 */
+		if (vpmflp->vpm_allocq != allocq) {
+			/* queue changed */
+			mutex_exit(&allocq->vpmq_mtx);
+			goto retry_queue;
+		}
+		releq = vpmflp->vpm_releq;
+		if (!mutex_tryenter(&releq->vpmq_mtx)) {
+			/* cannot get releq; a free vpmap may be there now */
+			mutex_exit(&allocq->vpmq_mtx);
+
+			/*
+			 * This loop could spin forever if this thread has
+			 * higher priority than the thread that is holding
+			 * releq->vpmq_mtx. In order to force the other thread
+			 * to run, we'll lock/unlock the mutex which is safe
+			 * since we just unlocked the allocq mutex.
+			 */
+			mutex_enter(&releq->vpmq_mtx);
+			mutex_exit(&releq->vpmq_mtx);
+			goto retry_queue;
+		}
+		if (releq->vpmq_free == NULL) {
+			VPM_DEBUG(vpmd_emptyfreelist);
+			/*
+			 * This freelist is empty.
+			 * This should not happen unless clients
+			 * are failing to release the vpmap after
+			 * accessing the data. Before resorting
+			 * to sleeping, try the next list of the same color.
+			 */
+			free_ndx = (free_ndx + 1) & vpmd_freemsk;
+			if (free_ndx != end_ndx) {
+				mutex_exit(&releq->vpmq_mtx);
+				mutex_exit(&allocq->vpmq_mtx);
+				vpmflp = &vpmd_free[free_ndx];
+				goto retry_queue;
+			}
+			/*
+			 * Tried all freelists.
+			 * wait on this list and hope something gets freed.
+			 */
+			vpmflp->vpm_want++;
+			mutex_exit(&vpmflp->vpm_freeq[1].vpmq_mtx);
+			cv_wait(&vpmflp->vpm_free_cv,
+				&vpmflp->vpm_freeq[0].vpmq_mtx);
+			vpmflp->vpm_want--;
+			mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx);
+			vpmflp = &vpmd_free[free_ndx];
+			VPM_DEBUG(vpmd_nofreevpms);
+			goto retry_queue;
+		} else {
+			/*
+			 * Something on the rele queue; flip the alloc
+			 * and rele queues and retry.
+			 */
+			vpmflp->vpm_allocq = releq;
+			vpmflp->vpm_releq = allocq;
+			mutex_exit(&allocq->vpmq_mtx);
+			mutex_exit(&releq->vpmq_mtx);
+			if (page_locked) {
+				delay(hz >> 2);
+				page_locked = 0;
+			}
+			goto retry_queue;
+		}
+	} else {
+		int gotnewvpm;
+		kmutex_t *pmtx;
+		uint_t vpmref;
+
+		/*
+		 * Fastpath the case we get the vpmap mutex
+		 * on the first try.
+		 */
+		first = vpm;
+next_vpmap:
+		vmtx = VPMAPMTX(vpm);
+		if (!mutex_tryenter(vmtx)) {
+			/*
+			 * Another thread is trying to reclaim this slot.
+			 * Skip to the next queue or vpmap.
+			 */
+			if ((vpm = vpm->vpm_next) == first) {
+				goto skip_queue;
+			} else {
+				goto next_vpmap;
+			}
+		}
+
+		/*
+		 * Assign this vpm to the newpage.
+		 */
+		pmtx = PPMTX(newpage);
+		gotnewvpm = 0;
+		mutex_enter(pmtx);
+
+		/*
+		 * Check if some other thread already assigned a vpm to
+		 * this page.
+		 */
+		if ((vpmref = newpage->p_vpmref) == 0) {
+			newpage->p_vpmref = VPMID(vpm);
+			gotnewvpm = 1;
+		} else {
+			VPM_DEBUG(vpmd_contend);
+			mutex_exit(vmtx);
+		}
+		mutex_exit(pmtx);
+
+		if (gotnewvpm) {
+
+			/*
+			 * At this point, we've selected the vpm. Remove vpm
+			 * from its freelist. If vpm is the first one in
+			 * the freelist, update the head of the freelist.
+			 */
+			if (first == vpm) {
+				ASSERT(first == allocq->vpmq_free);
+				allocq->vpmq_free = vpm->vpm_next;
+			}
+
+			/*
+			 * If the head of the freelist still points to vpm,
+			 * then there are no more free vpmaps in that list.
+			 */
+			if (allocq->vpmq_free == vpm)
+				/*
+				 * Took the last one
+				 */
+				allocq->vpmq_free = NULL;
+			else {
+				vpm->vpm_prev->vpm_next = vpm->vpm_next;
+				vpm->vpm_next->vpm_prev = vpm->vpm_prev;
+			}
+			mutex_exit(&allocq->vpmq_mtx);
+			vpm->vpm_prev = vpm->vpm_next = NULL;
+
+			/*
+			 * Disassociate the previous page. On x64 systems
+			 * p_vpmref is used as a mapping reference to the page.
+			 */
+			if ((pp = vpm->vpm_pp) != NULL &&
+				vpm->vpm_vp == pp->p_vnode &&
+				vpm->vpm_off == pp->p_offset) {
+
+				pmtx = PPMTX(pp);
+				if (page_trylock(pp, SE_SHARED)) {
+					/*
+					 * Now verify that it is the correct
+					 * page. If not someone else stole it,
+					 * so just unlock it and leave.
+					 */
+					mutex_enter(pmtx);
+					if (PP_ISFREE(pp) ||
+						vpm->vpm_vp != pp->p_vnode ||
+						vpm->vpm_off != pp->p_offset ||
+						pp->p_vpmref != VPMID(vpm)) {
+						mutex_exit(pmtx);
+
+						page_unlock(pp);
+					} else {
+						/*
+						 * Release the page.
+						 */
+						pp->p_vpmref = 0;
+						mutex_exit(pmtx);
+						hat_kpm_mapout(pp, 0,
+							hat_kpm_page2va(pp, 1));
+						(void) page_release(pp, 1);
+					}
+				} else {
+					/*
+					 * If the page cannot be locked, just
+					 * clear the p_vpmref and go.
+					 */
+					mutex_enter(pmtx);
+					if (pp->p_vpmref == VPMID(vpm)) {
+						pp->p_vpmref = 0;
+					}
+					mutex_exit(pmtx);
+					VPM_DEBUG(vpmd_prevpagelocked);
+				}
+			}
+
+			/*
+			 * Setup vpm to point to the new page.
+			 */
+			vpm->vpm_pp = newpage;
+			vpm->vpm_vp = newpage->p_vnode;
+			vpm->vpm_off = newpage->p_offset;
+
+		} else {
+			int steal = !VPM_MTBF(steals, steals_mtbf);
+			/*
+			 * Page already has a vpm assigned just use that.
+			 * Grab the vpm mutex and verify that it is still
+			 * the correct one. The pp->p_vpmref should not change
+			 * once we have the vpm mutex and the page lock.
+			 */
+			mutex_exit(&allocq->vpmq_mtx);
+			vpm = VPMP(vpmref);
+			vmtx = VPMAPMTX(vpm);
+			mutex_enter(vmtx);
+			if ((steal && vpm->vpm_refcnt == 0) ||
+			    vpm->vpm_pp != newpage) {
+				/*
+				 * The vpm got stolen, retry.
+				 * clear the p_vpmref.
+				 */
+				pmtx = PPMTX(newpage);
+				mutex_enter(pmtx);
+				if (newpage->p_vpmref == vpmref) {
+					newpage->p_vpmref = 0;
+				}
+				mutex_exit(pmtx);
+
+				mutex_exit(vmtx);
+				VPM_DEBUG(vpmd_steals);
+				goto retry_queue;
+			} else if (vpm->vpm_refcnt == 0) {
+				/*
+				 * Remove it from the free list if it
+				 * exists there.
+				 */
+				VPMAP_RMFREELIST(vpm);
+			}
+		}
+		return (vpm);
+	}
+}
+
+static void
+free_vpmap(struct vpmap *vpm)
+{
+	struct vpmfree *vpmflp;
+	struct vpmap *vpmfreelist;
+	union vpm_freeq *releq;
+
+	ASSERT(MUTEX_HELD(VPMAPMTX(vpm)));
+
+	if (vpm->vpm_refcnt != 0) {
+		panic("free_vpmap");
+		/*NOTREACHED*/
+	}
+
+	vpmflp = &vpmd_free[vpm->vpm_free_ndx];
+	/*
+	 * Add to the tail of the release queue
+	 * Note that vpm_releq and vpm_allocq could toggle
+	 * before we get the lock. This does not affect
+	 * correctness as the 2 queues are only maintained
+	 * to reduce lock pressure.
+	 */
+	releq = vpmflp->vpm_releq;
+	if (releq == &vpmflp->vpm_freeq[0]) {
+		vpm->vpm_ndxflg = 0;
+	} else {
+		vpm->vpm_ndxflg = 1;
+	}
+	mutex_enter(&releq->vpmq_mtx);
+	vpmfreelist = releq->vpmq_free;
+	if (vpmfreelist == 0) {
+		int want;
+
+		releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm;
+		/*
+		 * Both queue mutexes are held to set vpm_want;
+		 * snapshot the value before dropping releq mutex.
+		 * If vpm_want appears after the releq mutex is dropped,
+		 * then the vpmap just freed is already gone.
+		 */
+		want = vpmflp->vpm_want;
+		mutex_exit(&releq->vpmq_mtx);
+		/*
+		 * See if there was a waiter before dropping the releq mutex
+		 * then recheck after obtaining vpm_freeq[0] mutex as
+		 * the another thread may have already signaled.
+		 */
+		if (want) {
+			mutex_enter(&vpmflp->vpm_freeq[0].vpmq_mtx);
+			if (vpmflp->vpm_want)
+				cv_signal(&vpmflp->vpm_free_cv);
+			mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx);
+		}
+	} else {
+		vpm->vpm_next = vpmfreelist;
+		vpm->vpm_prev = vpmfreelist->vpm_prev;
+		vpmfreelist->vpm_prev = vpm;
+		vpm->vpm_prev->vpm_next = vpm;
+		mutex_exit(&releq->vpmq_mtx);
+	}
+}
+
+/*
+ * Get the vpmap for the page.
+ * The refcnt of this vpm is incremented.
+ */
+static struct vpmap *
+get_vpmap(page_t *pp)
+{
+	struct vpmap *vpm = NULL;
+	kmutex_t *vmtx;
+	kmutex_t *pmtx;
+	unsigned int refid;
+
+	ASSERT((pp != NULL) && PAGE_LOCKED(pp));
+
+	if (VPM_MTBF(contend, contend_mtbf) && (refid = pp->p_vpmref) != 0) {
+		vpm = VPMP(refid);
+		vmtx = VPMAPMTX(vpm);
+		mutex_enter(vmtx);
+		/*
+		 * Since we have the page lock and the vpm mutex, the
+		 * pp->p_vpmref cannot change.
+		 */
+		if (vpm->vpm_pp != pp) {
+			pmtx = PPMTX(pp);
+
+			/*
+			 * Clear the p_vpmref as it is incorrect.
+			 * This can happen if the page was stolen.
+			 * On x64 this should not happen as p_vpmref
+			 * is treated as a mapping on the page. So
+			 * if the page is stolen, the mapping would have
+			 * been cleared in page_unload().
+			 */
+			mutex_enter(pmtx);
+			if (pp->p_vpmref == refid)
+				pp->p_vpmref = 0;
+			mutex_exit(pmtx);
+
+			mutex_exit(vmtx);
+			vpm = NULL;
+		} else if (vpm->vpm_refcnt == 0) {
+			/*
+			 * Got the vpm, remove it from the free
+			 * list if it exists there.
+			 */
+			VPMAP_RMFREELIST(vpm);
+		}
+	}
+	if (vpm == NULL) {
+		/*
+		 * get_free_vpmap() returns with the vpmap mutex held.
+		 */
+		vpm = get_free_vpmap(pp);
+		vmtx = VPMAPMTX(vpm);
+		vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_misses++;
+	} else {
+		vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_hits++;
+	}
+
+	vpm->vpm_refcnt++;
+	mutex_exit(vmtx);
+
+	return (vpm);
+}
+
+/* END --- vpm cache ---- */
+
+/*
+ * The vnode page mapping(vpm) interface routines.
+ */
+
+/*
+ * Find or create the pages starting form baseoff for specified
+ * length 'len'.
+ */
+static int
+vpm_pagecreate(
+	struct vnode *vp,
+	u_offset_t baseoff,
+	size_t len,
+	vmap_t vml[],
+	int nseg,
+	int *newpage)
+{
+
+	page_t *pp = NULL;
+	caddr_t base;
+	u_offset_t off = baseoff;
+	int i;
+	ASSERT(nseg >= MINVMAPS && nseg < MAXVMAPS);
+
+	for (i = 0; len > 0; len -= MIN(len, PAGESIZE), i++) {
+		struct vpmap *vpm;
+
+
+		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
+
+			base = segkpm_create_va(off);
+
+			/*
+			 * the seg pointer passed in is just advisor. Just
+			 * pass segkmap for now like segmap does with
+			 * segmap_kpm enabled.
+			 */
+			if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
+			    segkmap, base)) == NULL) {
+				panic("segmap_pagecreate_vpm: "
+				    "page_create failed");
+				/*NOTREACHED*/
+			}
+			if (newpage != NULL)
+				*newpage = 1;
+
+			page_io_unlock(pp);
+		}
+
+		/*
+		 * Get the vpm for this page_t.
+		 */
+		if (vpm_cache_enable) {
+			vpm = get_vpmap(pp);
+			vml[i].vs_data = (void *)&vpm->vpm_pp;
+		} else {
+			vml[i].vs_data = (void *)pp;
+			pp->p_vpmref = 0;
+		}
+
+		vml[i].vs_addr = hat_kpm_mapin(pp, 0);
+		vml[i].vs_len = PAGESIZE;
+
+		off += PAGESIZE;
+	}
+	vml[i].vs_data = NULL;
+	vml[i].vs_addr = (caddr_t)NULL;
+	return (0);
+}
+
+
+/*
+ * Returns vpm mappings of pages in the range [off, off+len], where
+ * len is rounded up to the PAGESIZE boundary. The list of pages and
+ * the page addresses are returned in the SGL vml (vmap_t) array passed in.
+ * The nseg is the number of vmap_t entries in the array.
+ *
+ * Currently max len allowed is MAXBSIZE therefore, it will either
+ * fetch/create one or two pages depending on what is the PAGESIZE.
+ *
+ * The segmap's SM_LOCKPROTO  usage is not supported by these interfaces.
+ * For such cases, use the seg_map interfaces.
+ */
+int
+vpm_map_pages(
+	struct vnode *vp,
+	u_offset_t off,
+	size_t len,
+	int fetchpage,
+	vmap_t *vml,
+	int nseg,
+	int  *newpage,
+	enum seg_rw rw)
+{
+	extern struct vnode *common_specvp();
+	u_offset_t baseoff;
+	uint_t prot;
+	caddr_t base;
+	page_t *pp, *pplist[MAXVMAPS];
+	struct vpmap *vpm;
+	int i, error = 0;
+
+	ASSERT(nseg >= MINVMAPS && nseg < MAXVMAPS);
+	baseoff = off & (offset_t)PAGEMASK;
+	vml[0].vs_data = NULL;
+	vml[0].vs_addr = (caddr_t)NULL;
+	/*
+	 * For now, lets restrict it to MAXBSIZE. XXX - We can allow
+	 * len longer then MAXBSIZE, but there should be a limit
+	 * which should be determined by how many pages the VOP_GETPAGE()
+	 * can fetch.
+	 */
+	if (off + len > baseoff + MAXBSIZE) {
+		panic("vpm_map_pages bad len");
+		/*NOTREACHED*/
+	}
+
+	/*
+	 * If this is a block device we have to be sure to use the
+	 * "common" block device vnode for the mapping.
+	 */
+	if (vp->v_type == VBLK)
+		vp = common_specvp(vp);
+
+
+	if (!fetchpage)
+		return (vpm_pagecreate(vp, baseoff, len, vml, nseg, newpage));
+
+	for (i = 0; len > 0; len -= MIN(len, PAGESIZE), i++,
+						pplist[i] = NULL) {
+
+		pp = page_lookup(vp, baseoff, SE_SHARED);
+
+		/*
+		 * If we did not find the page or if this page was not
+		 * in our cache, then let VOP_GETPAGE get all the pages.
+		 * We need to call VOP_GETPAGE so that filesytems can do some
+		 * (un)necessary tracking for sequential access.
+		 */
+
+		if (pp == NULL || (vpm_cache_enable && pp->p_vpmref == 0) ||
+			(rw == S_WRITE && hat_page_getattr(pp, P_MOD | P_REF)
+							!= (P_MOD | P_REF))) {
+			if (pp != NULL) {
+				page_unlock(pp);
+			}
+
+			/*
+			 * Pass a dummy address as it will be required
+			 * by page_create_va(). We pass segkmap as the seg
+			 * as some file systems(UFS) check it.
+			 */
+			base = segkpm_create_va(baseoff);
+
+			error = VOP_GETPAGE(vp, baseoff, len, &prot, &pplist[i],
+			roundup(len, PAGESIZE), segkmap, base, rw, CRED());
+			if (error) {
+				VPM_DEBUG(vpmd_getpagefailed);
+				pplist[i] = NULL;
+			}
+			break;
+		} else {
+			pplist[i] = pp;
+			baseoff += PAGESIZE;
+		}
+	}
+
+	if (error) {
+		for (i = 0; pplist[i] != NULL; i++) {
+			page_unlock(pplist[i]);
+			pplist[i] = NULL;
+		}
+		vml[0].vs_addr = NULL;
+		vml[0].vs_data = NULL;
+		return (FC_MAKE_ERR(error));
+	}
+
+	/*
+	 * Get the vpm's for pages.
+	 */
+	for (i = 0; pplist[i] != NULL; i++) {
+		if (vpm_cache_enable) {
+			vpm = get_vpmap(pplist[i]);
+			vml[i].vs_data = (void *)&(vpm->vpm_pp);
+		} else {
+			vml[i].vs_data = (void *)pplist[i];
+			pplist[i]->p_vpmref = 0;
+		}
+
+		vml[i].vs_addr = hat_kpm_mapin(pplist[i], 0);
+		vml[i].vs_len = PAGESIZE;
+	}
+
+	vml[i].vs_data = NULL;
+	vml[i].vs_addr = (caddr_t)NULL;
+
+	return (0);
+}
+
+/*
+ * Release the vpm mappings on the pages and unlock them.
+ */
+void
+vpm_unmap_pages(vmap_t vml[], enum seg_rw rw)
+{
+	int i;
+	struct vpmap *vpm;
+	kmutex_t *mtx;
+	page_t *pp;
+
+	for (i = 0; vml[i].vs_data != NULL; i++) {
+		ASSERT(IS_KPM_ADDR(vml[i].vs_addr));
+
+		if (vpm_cache_enable) {
+			pp = *(((page_t **)vml[i].vs_data));
+		} else {
+			pp = (page_t *)vml[i].vs_data;
+		}
+
+		/*
+		 * Mark page as being modified or referenced, bacause vpm pages
+		 * would not cause faults where it would be set normally.
+		 */
+		if (rw == S_WRITE) {
+			hat_setrefmod(pp);
+		} else {
+			ASSERT(rw == S_READ);
+			hat_setref(pp);
+		}
+
+		if (vpm_cache_enable) {
+			page_unlock(pp);
+			vpm = (struct vpmap *)((char *)vml[i].vs_data
+					- offsetof(struct vpmap, vpm_pp));
+			mtx = VPMAPMTX(vpm);
+			mutex_enter(mtx);
+
+			if (--vpm->vpm_refcnt == 0) {
+				free_vpmap(vpm);
+			}
+			mutex_exit(mtx);
+		} else {
+			hat_kpm_mapout(pp, 0, vml[i].vs_addr);
+			(void) page_release(pp, 1);
+		}
+		vml[i].vs_data = NULL;
+		vml[i].vs_addr = NULL;
+	}
+}
+
+/*
+ * Given the vp, off and the uio structure, this routine will do the
+ * the copy (uiomove). If the last page created is partially written,
+ * the rest of the page is zeroed out. It also zeros the beginning of
+ * the first page till the start offset if requested(zerostart).
+ * If pages are to be fetched, it will call the filesystem's getpage
+ * function (VOP_GETPAGE) to get them, otherwise they will be created if
+ * not already present in the page cache.
+ */
+int
+vpm_data_copy(struct vnode *vp,
+	u_offset_t off,
+	size_t len,
+	struct uio *uio,
+	int fetchpage,
+	int *newpage,
+	int zerostart,
+	enum seg_rw rw)
+{
+	int error;
+	struct vmap vml[MINVMAPS];
+	enum uio_rw uiorw;
+	int npages = 0;
+
+	uiorw = (rw == S_WRITE) ? UIO_WRITE : UIO_READ;
+	/*
+	 * 'off' will be the offset where the I/O starts.
+	 * We get the pages starting at the (off & PAGEMASK)
+	 * page boundary.
+	 */
+	error = vpm_map_pages(vp, off, (uint_t)len,
+		fetchpage, vml, MINVMAPS, &npages,  rw);
+
+	if (newpage != NULL)
+		*newpage = npages;
+	if (!error) {
+		int i, pn, slen = len;
+		int pon = off & PAGEOFFSET;
+
+		/*
+		 * Clear from the beginning of the page to start offset
+		 * if requested.
+		 */
+		if (!fetchpage && zerostart) {
+			(void) kzero(vml[0].vs_addr,  (uint_t)pon);
+			VPM_DEBUG(vpmd_zerostart);
+		}
+
+		for (i = 0; !error && slen > 0 &&
+				vml[i].vs_addr != NULL; i++) {
+			pn = (int)MIN(slen, (PAGESIZE - pon));
+			error = uiomove(vml[i].vs_addr + pon,
+				    (long)pn, uiorw, uio);
+			slen -= pn;
+			pon = 0;
+		}
+
+		/*
+		 * When new pages are created, zero out part of the
+		 * page we did not copy to.
+		 */
+		if (!fetchpage && npages &&
+			uio->uio_loffset < roundup(off + len, PAGESIZE)) {
+			int nzero;
+
+			pon = (uio->uio_loffset & PAGEOFFSET);
+			nzero = PAGESIZE  - pon;
+			i = (uio->uio_loffset - (off & PAGEMASK)) / PAGESIZE;
+			(void) kzero(vml[i].vs_addr + pon, (uint_t)nzero);
+		}
+		vpm_unmap_pages(vml, rw);
+	}
+	return (error);
+}
+
+/*
+ * called to flush pages for the given vnode covering
+ * [off, off+len] range.
+ */
+int
+vpm_sync_pages(struct vnode *vp,
+		u_offset_t off,
+		size_t len,
+		uint_t flags)
+{
+	extern struct vnode *common_specvp();
+	int bflags = 0;
+	int error = 0;
+	size_t psize = roundup(len, PAGESIZE);
+
+	/*
+	 * If this is a block device we have to be sure to use the
+	 * "common" block device vnode for the mapping.
+	 */
+	if (vp->v_type == VBLK)
+		vp = common_specvp(vp);
+
+	if ((flags & ~SM_DONTNEED) != 0) {
+		if (flags & SM_ASYNC)
+			bflags |= B_ASYNC;
+		if (flags & SM_INVAL)
+			bflags |= B_INVAL;
+		if (flags & SM_DESTROY)
+			bflags |= (B_INVAL|B_TRUNC);
+		if (flags & SM_FREE)
+			bflags |= B_FREE;
+		if (flags & SM_DONTNEED)
+			bflags |= B_DONTNEED;
+
+		error = VOP_PUTPAGE(vp, off, psize, bflags, CRED());
+	}
+
+	return (error);
+}
+
+
+#else	/* SEGKPM_SUPPORT */
+
+/* vpm stubs */
+void
+vpm_init()
+{
+}
+
+/*ARGSUSED*/
+int
+vpm_pagecreate(
+	struct vnode *vp,
+	u_offset_t baseoff,
+	size_t len,
+	vmap_t vml[],
+	int nseg,
+	int *newpage)
+{
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+vpm_map_pages(
+	struct vnode *vp,
+	u_offset_t off,
+	size_t len,
+	int fetchpage,
+	vmap_t vml[],
+	int nseg,
+	int *newpage,
+	enum seg_rw rw)
+{
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+vpm_data_copy(struct vnode *vp,
+	u_offset_t off,
+	size_t len,
+	struct uio *uio,
+	int fetchpage,
+	int *newpage,
+	int zerostart,
+	enum seg_rw rw)
+{
+	return (0);
+}
+
+/*ARGSUSED*/
+void
+vpm_unmap_pages(vmap_t vml[], enum seg_rw rw)
+{
+}
+/*ARGSUSED*/
+int
+vpm_sync_pages(struct vnode *vp,
+		u_offset_t off,
+		size_t len,
+		uint_t flags)
+{
+	return (0);
+}
+#endif	/* SEGKPM_SUPPORT */
diff --git a/usr/src/uts/common/vm/vpm.h b/usr/src/uts/common/vm/vpm.h
new file mode 100644
index 0000000000..6d9c53b009
--- /dev/null
+++ b/usr/src/uts/common/vm/vpm.h
@@ -0,0 +1,286 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_VM_VPM_H
+#define	_VM_VPM_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * The vnode page mappings(VPM) interfaces.
+ * "Commitment level - Consolidation private". They are subject
+ * to change without notice. Use them at your own risk.
+ *
+ * At this stage these interfaces are provided only to utilize the
+ * segkpm mappings and are enabled for solaris x64. Therefore these
+ * interfaces have to be used under the 'vpm_enable' check as an
+ * alternative to segmap interfaces where applicable.
+ *
+ * The VPM interfaces provide temporary mappings to file pages. They
+ * return the mappings in a scatter gather list(SGL).
+ * The SGL elements are the structure 'vmap_t'.
+ *
+ *	typedef struct vmap {
+ *		caddr_t	vs_addr;        / public /
+ *		size_t	vs_len;         / public - Currently not used /
+ *		void	*vs_data;	/ opaque - private data /
+ *	} vmap_t;
+ *
+ * An array of this structure has to be passed to the interface routines
+ * along with the size(# of elements) of the SGL array. Depending on the
+ * requested length and mapped chunk sizes(PAGESIZE here), the number of
+ * valid mappings returned can be less then actual size of the SGL array.
+ * Always, an element in the SGL will have 'vs_addr' set to NULL which
+ * marks the end of the valid entires in the SGL.
+ *
+ * The vmap_t structure members are populated with the mapped address
+ * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the
+ * mapping length is fixed at PAGESIZE. The 'vs_data' member is private
+ * and the caller should not access or modify it.
+ *
+ * Using a scatter gather list to return the mappings and length makes it
+ * possible to provide mappings of variable length. Currently mapping length
+ * of only 'PAGESIZE' per vmap_t is possible. Also, similar to the segmap
+ * interfaces, on each request, the max length of 'MAXBSIZE' is supported
+ * for now. The MAXBSIZE mappings will be returned in 1 or 2 vmap_t elements
+ * of the SGL depending on the PAGESIZE. The scatter gather list array size
+ * needs to be a minimum of MINVMAPS elements to accommodate MAXBSIZE.
+ * The MAXBSIZE restriction exists because the filesystems are not capable
+ * of handling more(disk block allocations at a time) for now.
+ *
+ *
+ * Interfaces:
+ *
+ * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len,
+ *			int fetchpage, vmap_t *vml, int vmlsz,
+ *			int *newpagecreated, enum seg_rw rw);
+ *
+ * This function returns mappings to vnode pages.
+ *
+ * It takes a vnode, offset and length and returns mappings to the  pages
+ * covering the range [off, off +len) in the vmap_t SGL array 'vml'.
+ * Currently these interfaces are subject to restrictions similar to the segmap
+ * interfaces. The length passed in should satisfy the following criteria.
+ * '(off + len)  <= ((off & PAGEMASK) + MAXBSIZE)'
+ * The mapped address returned, in 'vs_addr', are for the page boundary.
+ *
+ * The 'vmlsz' is the size(# elements) of the 'vml' array.
+ *
+ * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched
+ * (calls VOP_GETPAGE) from the backing store(disk) if not found in the
+ * system page cache. If 'fetchpage == 0', the vnode(file) pages for the
+ * given offset will be just created if they are not already present in the
+ * system page cache. The 'newpagecreated' flag is set on return if new pages
+ * are created when 'fetchpage == 0'(requested to just create new pages).
+ *
+ * The 'seg_rw rw' indicates the intended operation on these mappings
+ * (S_WRITE or S_READ).
+ *
+ * Currently these interfaces only return segkpm mappings. Therefore the
+ * vnode pages that are being accessed will be locked(at least SHARED locked)
+ * for the duration these mappings are in use. After use, the  unmap
+ * function, vpm_unmap_pages(), has to be called and the same SGL array
+ * needs to be passed to the unmap function.
+ *
+ *
+ * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);.
+ *
+ * This function unmaps the pages that where mapped by vpm_map_pages.
+ * The SGL array 'vml' has to be the one that was passed to vpm_map_pages().
+ *
+ *
+ * ex:
+ * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer
+ * 'buf' the following code snippet shows how to use the above two interfaces.
+ * Here the the copy length is till the MAXBSIZE boundary. This code can be
+ * executed repeatedly, in a loop to copy more then MAXBSIZE length of data.
+ *
+ *	vmap_t  vml[MINVMAPS];
+ *	int err, i, newpage, len;
+ *	int pon;
+ *
+ *	pon = (off & PAGEOFFSET);
+ *	len = MAXBSIZE - pon;
+ *
+ *	if (vpm_enable) {
+ *             err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS,
+ *				 &newpage, S_WRITE);
+ *
+ *		if (err)
+ *			return;
+ *
+ *		for (i=0; vml[i].vs_addr != NULL); i++) {
+ *			bcopy (buf, vml[i].vs_addr + pon,
+ *				 PAGESIZE - pon);
+ *			buf += (PAGESIZE - pon);
+ *			pon = 0;
+ *		}
+ *
+ *		if (newpage) {
+ *			pon = (off & PAGEOFFSET);
+ *			bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon);
+ *		}
+ *
+ *		vpm_unmap_pages(vml, S_WRITE);
+ *	}
+ *
+ *
+ *
+ *
+ * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len,
+ *		struct uio *uio, int fetchpage, int *newpagecreated,
+ *		int zerostart, enum seg_rw rw);
+ *
+ * This function can be called if the need is to just transfer data to/from
+ * the vnode pages. It takes a 'uio' structure and  calls 'uiomove()' to
+ * do the data transfer. It can be used in the context of read and write
+ * system calls to transfer data between a user buffer, which is specified
+ * in the uio structure, and the vnode pages. If the data needs to be
+ * transferred between a kernel buffer and the pages, like in the above
+ * example, a uio structure can be set up accordingly and passed. The 'rw'
+ * parameter will determine the direction of the data transfer.
+ *
+ * The 'fetchpage' and 'newpagecreated' are same as explained before.
+ * The 'zerostart' flag when set will zero fill start of the page till the
+ * offset 'off' in the first page. i.e  from 'off & PAGEMASK' to 'off'.
+ * Here too the MAXBSIZE restriction mentioned above applies to the length
+ * requested.
+ *
+ *
+ * int vpm_sync_pages(struct vnode *vp, u_offset_t off,
+ *					 size_t len, uint_t flags)
+ *
+ * This function can be called to flush or sync the vnode(file) pages that
+ * have been accessed. It will call VOP_PUTPAGE().
+ *
+ * For the given vnode, off and len the pages covering the range
+ * [off, off + len) are flushed. Currently it uses the same flags that
+ * are used with segmap_release() interface. Refer vm/seg_map.h.
+ * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY)
+ *
+ */
+
+
+/*
+ * vpm cache related definitions.
+ */
+#define	VPMAP_MINCACHE		(64 * 1024 * 1024)
+
+/*
+ * vpm caching mode
+ */
+#define	VPMCACHE_LRU		0
+#define	VPMCACHE_RANDOM		1
+/*
+ * Data structures to manage the cache of pages referenced by
+ * the vpm interfaces. There is one vpmap struct per page in the cache.
+ */
+struct vpmap {
+	kmutex_t	vpm_mtx;	/* protects non list fields */
+	struct vnode	*vpm_vp;	/* pointer to vnode of cached page */
+	struct vpmap	*vpm_next;	/* free list pointers */
+	struct vpmap	*vpm_prev;
+	u_offset_t	vpm_off;	/* offset of the page */
+	page_t		*vpm_pp;	/* page pointer */
+	ushort_t	vpm_refcnt;	/* Number active references */
+	ushort_t	vpm_ndxflg;	/* indicates which queue */
+	ushort_t	vpm_free_ndx;	/* freelist it belongs to */
+};
+
+/*
+ * Multiple vpmap free lists are maintaned so that allocations
+ * scale with cpu count. To further reduce contentions between
+ * allocation and deallocations, each list is made up of two queues.
+ */
+#define	VPM_FREEQ_PAD	64
+union vpm_freeq {
+	struct {
+		struct vpmap	*vpmsq_free;
+		kmutex_t	vpmsq_mtx;
+	} vpmfq;
+	char vpmq_pad[VPM_FREEQ_PAD];
+};
+
+#define	vpmq_free	vpmfq.vpmsq_free
+#define	vpmq_mtx	vpmfq.vpmsq_mtx
+
+struct vpmfree {
+	union vpm_freeq vpm_freeq[2];	/* alloc and release queue */
+	union vpm_freeq *vpm_allocq;	/* current alloc queue */
+	union vpm_freeq *vpm_releq;	/* current release queue */
+	kcondvar_t	vpm_free_cv;
+	ushort_t	vpm_want;
+};
+
+#define	VPMALLOCQ	0
+#define	VPMRELEQ	1
+
+/*
+ * VPM Interface definitions.
+ */
+
+/*
+ * This structure is the scatter gather list element. The page
+ * mappings will be returned in this structure. A pointer to an
+ * array of this structure is passed to the interface routines.
+ */
+typedef struct vmap {
+	caddr_t	vs_addr;	/* mapped address */
+	size_t	vs_len;		/* length, currently fixed at PAGESIZE */
+	void	*vs_data;	/* opaque - private data */
+} vmap_t;
+
+/*
+ * The minimum and maximum number of array elements in the scatter
+ * gather list.
+ */
+#define	MINVMAPS   3		/* ((MAXBSIZE/4096 + 1)  min # mappings */
+#define	MAXVMAPS   10		/* Max # the scatter gather list */
+
+#ifdef _KERNEL
+
+extern int	vpm_enable;
+/*
+ * vpm page mapping operations.
+ */
+extern void	vpm_init(void);
+extern int	vpm_map_pages(struct vnode *, u_offset_t, size_t, int,
+		vmap_t *, int, int  *, enum seg_rw);
+
+extern void	vpm_unmap_pages(vmap_t *, enum seg_rw);
+extern int	vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t);
+extern int	vpm_data_copy(struct vnode *, u_offset_t, size_t,
+		struct uio *, int, int *, int, enum seg_rw rw);
+#endif	/* _KERNEL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _VM_VPM_H */
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index b58cad94f8..8ac9c6ffec 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -1528,7 +1528,7 @@ startup_vm(void)
 		 * between kernelbase and the beginning of segkpm.
 		 */
 		kpm_vbase = final_kernelheap + KERNEL_REDZONE_SIZE;
-		kpm_size = mmu_ptob(physmax);
+		kpm_size = mmu_ptob(physmax + 1);
 		PRM_DEBUG(kpm_vbase);
 		PRM_DEBUG(kpm_size);
 		final_kernelheap =
@@ -1763,6 +1763,7 @@ startup_vm(void)
 	if (kpm_desired) {
 		kpm_init();
 		kpm_enable = 1;
+		vpm_enable = 1;
 	}
 
 	/*
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index 89fc15e20e..4c06279917 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -3124,6 +3124,7 @@ hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
 	return (hm);
 }
 
+extern int	vpm_enable;
 /*
  * Unload all translations to a page. If the page is a subpage of a large
  * page, the large page mappings are also removed.
@@ -3142,6 +3143,14 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
 	uint_t		entry;
 	level_t		level;
 
+#if defined(__amd64)
+	/*
+	 * clear the vpm ref.
+	 */
+	if (vpm_enable) {
+		pp->p_vpmref = 0;
+	}
+#endif
 	/*
 	 * The loop with next_size handles pages with multiple pagesize mappings
 	 */
@@ -3488,6 +3497,11 @@ hat_page_getshare(page_t *pp)
 {
 	uint_t cnt;
 	cnt = hment_mapcnt(pp);
+#if defined(__amd64)
+	if (vpm_enable && pp->p_vpmref) {
+		cnt += 1;
+	}
+#endif
 	return (cnt);
 }
author	praks <none@none>	2006-04-19 18:07:13 -0700
committer	praks <none@none>	2006-04-19 18:07:13 -0700
commit	a5652762e5f7bf683d19f18542e5e39df63bad79 (patch)
tree	113821225c18b190514811f3e27a638333bc2dcd /usr/src
parent	4ab777b1b0f310e59b52a57c79efa0571506942a (diff)
download	illumos-joyent-a5652762e5f7bf683d19f18542e5e39df63bad79.tar.gz