summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDonghai Qiao <Donghai.Qiao@Sun.COM>2008-11-10 13:00:51 -0800
committerDonghai Qiao <Donghai.Qiao@Sun.COM>2008-11-10 13:00:51 -0800
commit6f5f1c638c7bce3a35e88526a88fc78bdfd58ffe (patch)
treec54fb15fae9f79d8362c50bac205f3731737fbed
parentf14f3ae7296249c648ea8c6330e49a8b587c8c7b (diff)
downloadillumos-gate-6f5f1c638c7bce3a35e88526a88fc78bdfd58ffe.tar.gz
1246893 mmap and write to the same file deadlocks.
-rw-r--r--usr/src/uts/common/fs/cachefs/cachefs_vnops.c12
-rw-r--r--usr/src/uts/common/fs/nfs/nfs3_vnops.c8
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_vnops.c8
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_vnops.c8
-rw-r--r--usr/src/uts/common/fs/pcfs/pc_vnops.c10
-rw-r--r--usr/src/uts/common/fs/specfs/specvnops.c12
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_vnops.c8
-rw-r--r--usr/src/uts/common/fs/udfs/udf_vnops.c190
-rw-r--r--usr/src/uts/common/fs/ufs/ufs_trans.c71
-rw-r--r--usr/src/uts/common/fs/ufs/ufs_vnops.c9
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c65
-rw-r--r--usr/src/uts/common/os/move.c67
-rw-r--r--usr/src/uts/common/sys/uio.h1
13 files changed, 241 insertions, 228 deletions
diff --git a/usr/src/uts/common/fs/cachefs/cachefs_vnops.c b/usr/src/uts/common/fs/cachefs/cachefs_vnops.c
index 1a3d122eb7..5f5005d565 100644
--- a/usr/src/uts/common/fs/cachefs/cachefs_vnops.c
+++ b/usr/src/uts/common/fs/cachefs/cachefs_vnops.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
@@ -987,6 +985,16 @@ cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
n = MAXBSIZE - on;
if (n > uiop->uio_resid)
n = (int)uiop->uio_resid;
+
+ /*
+ * Touch the page and fault it in if it is not in
+ * core before segmap_getmapflt can lock it. This
+ * is to avoid the deadlock if the buffer is mapped
+ * to the same file through mmap which we want to
+ * write to.
+ */
+ uio_prefaultpages((long)n, uiop);
+
base = segmap_getmap(segkmap, vp, off);
error = cachefs_writepage(vp, (base + on), n, uiop);
if (error == 0) {
diff --git a/usr/src/uts/common/fs/nfs/nfs3_vnops.c b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
index 84620044a7..b12cff73a8 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
@@ -802,6 +802,14 @@ nfs3_fwrite:
cv_wait(&rp->r_cv, &rp->r_statelock);
mutex_exit(&rp->r_statelock);
+ /*
+ * Touch the page and fault it in if it is not in core
+ * before segmap_getmapflt or vpm_data_copy can lock it.
+ * This is to avoid the deadlock if the buffer is mapped
+ * to the same file through mmap which we want to write.
+ */
+ uio_prefaultpages((long)n, uiop);
+
if (vpm_enable) {
/*
* It will use kpm mappings, so no need to
diff --git a/usr/src/uts/common/fs/nfs/nfs4_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
index 740a4874f5..ff637c27f1 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
@@ -2885,6 +2885,14 @@ nfs4_fwrite:
cv_wait(&rp->r_cv, &rp->r_statelock);
mutex_exit(&rp->r_statelock);
+ /*
+ * Touch the page and fault it in if it is not in core
+ * before segmap_getmapflt or vpm_data_copy can lock it.
+ * This is to avoid the deadlock if the buffer is mapped
+ * to the same file through mmap which we want to write.
+ */
+ uio_prefaultpages((long)n, uiop);
+
if (vpm_enable) {
/*
* It will use kpm mappings, so no need to
diff --git a/usr/src/uts/common/fs/nfs/nfs_vnops.c b/usr/src/uts/common/fs/nfs/nfs_vnops.c
index 73a619c238..7fbf2edefa 100644
--- a/usr/src/uts/common/fs/nfs/nfs_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs_vnops.c
@@ -695,6 +695,14 @@ nfs_fwrite:
cv_wait(&rp->r_cv, &rp->r_statelock);
mutex_exit(&rp->r_statelock);
+ /*
+ * Touch the page and fault it in if it is not in core
+ * before segmap_getmapflt or vpm_data_copy can lock it.
+ * This is to avoid the deadlock if the buffer is mapped
+ * to the same file through mmap which we want to write.
+ */
+ uio_prefaultpages((long)n, uiop);
+
if (vpm_enable) {
/*
* It will use kpm mappings, so no need to
diff --git a/usr/src/uts/common/fs/pcfs/pc_vnops.c b/usr/src/uts/common/fs/pcfs/pc_vnops.c
index e3126a24f7..d0ea0004ae 100644
--- a/usr/src/uts/common/fs/pcfs/pc_vnops.c
+++ b/usr/src/uts/common/fs/pcfs/pc_vnops.c
@@ -388,6 +388,16 @@ rwpcp(
}
n = (int)(limit - uio->uio_loffset);
}
+
+ /*
+ * Touch the page and fault it in if it is not in
+ * core before segmap_getmapflt can lock it. This
+ * is to avoid the deadlock if the buffer is mapped
+ * to the same file through mmap which we want to
+ * write to.
+ */
+ uio_prefaultpages((long)n, uio);
+
base = segmap_getmap(segkmap, vp, (u_offset_t)off);
pagecreate = 0;
newpage = 0;
diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c
index 6c0e69dc1d..94183f2f76 100644
--- a/usr/src/uts/common/fs/specfs/specvnops.c
+++ b/usr/src/uts/common/fs/specfs/specvnops.c
@@ -36,9 +36,6 @@
* contributors.
*/
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/thread.h>
#include <sys/t_lock.h>
@@ -1141,6 +1138,15 @@ spec_write(
pagecreate = 1;
newpage = 0;
+
+ /*
+ * Touch the page and fault it in if it is not in core
+ * before segmap_getmapflt or vpm_data_copy can lock it.
+ * This is to avoid the deadlock if the buffer is mapped
+ * to the same file through mmap which we want to write.
+ */
+ uio_prefaultpages((long)n, uiop);
+
if (vpm_enable) {
error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
n, uiop, !pagecreate, NULL, 0, S_WRITE);
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
index c036d27fec..e2ffee0cde 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
@@ -274,6 +274,14 @@ wrtmp(
*/
rw_exit(&tp->tn_contents);
+ /*
+ * Touch the page and fault it in if it is not in core
+ * before segmap_getmapflt or vpm_data_copy can lock it.
+ * This is to avoid the deadlock if the buffer is mapped
+ * to the same file through mmap which we want to write.
+ */
+ uio_prefaultpages((long)bytes, uio);
+
newpage = 0;
if (vpm_enable) {
/*
diff --git a/usr/src/uts/common/fs/udfs/udf_vnops.c b/usr/src/uts/common/fs/udfs/udf_vnops.c
index 9496e9a86d..afb07d1ee3 100644
--- a/usr/src/uts/common/fs/udfs/udf_vnops.c
+++ b/usr/src/uts/common/fs/udfs/udf_vnops.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/param.h>
@@ -275,7 +273,7 @@ udf_close(
struct ud_inode *ip = VTOI(vp);
if (ip->i_delaylen) {
(void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
- B_ASYNC | B_FREE, cr);
+ B_ASYNC | B_FREE, cr);
ip->i_delaylen = 0;
}
}
@@ -308,7 +306,7 @@ udf_read(
* udf_getattr ends up being called by chklock
*/
error = chklock(vp, FREAD, uiop->uio_loffset,
- uiop->uio_resid, uiop->uio_fmode, ct);
+ uiop->uio_resid, uiop->uio_fmode, ct);
if (error) {
goto end;
}
@@ -357,7 +355,7 @@ udf_write(
* ud_getattr ends up being called by chklock
*/
error = chklock(vp, FWRITE, uiop->uio_loffset,
- uiop->uio_resid, uiop->uio_fmode, ct);
+ uiop->uio_resid, uiop->uio_fmode, ct);
if (error) {
goto end;
}
@@ -522,7 +520,7 @@ udf_setattr(
ovap.va_uid = ip->i_uid;
ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
- ud_iaccess_vmode, ip);
+ ud_iaccess_vmode, ip);
if (error)
goto update_inode;
@@ -677,9 +675,9 @@ udf_lookup(
ip = xip;
*vpp = ITOV(ip);
if ((ip->i_type != VDIR) &&
- (ip->i_char & ISVTX) &&
- ((ip->i_perm & IEXEC) == 0) &&
- udfs_stickyhack) {
+ (ip->i_char & ISVTX) &&
+ ((ip->i_perm & IEXEC) == 0) &&
+ udfs_stickyhack) {
mutex_enter(&(*vpp)->v_lock);
(*vpp)->v_flag |= VISSWAP;
mutex_exit(&(*vpp)->v_lock);
@@ -691,7 +689,7 @@ udf_lookup(
if (IS_DEVVP(*vpp)) {
struct vnode *newvp;
newvp = specvp(*vpp, (*vpp)->v_rdev,
- (*vpp)->v_type, cr);
+ (*vpp)->v_type, cr);
VN_RELE(*vpp);
if (newvp == NULL) {
error = ENOSYS;
@@ -737,8 +735,8 @@ udf_create(
xip = NULL;
rw_enter(&ip->i_rwlock, RW_WRITER);
error = ud_direnter(ip, name, DE_CREATE,
- (struct ud_inode *)0, (struct ud_inode *)0,
- vap, &xip, cr, ct);
+ (struct ud_inode *)0, (struct ud_inode *)0,
+ vap, &xip, cr, ct);
rw_exit(&ip->i_rwlock);
ITIMES(ip);
ip = xip;
@@ -763,7 +761,7 @@ udf_create(
error = EISDIR;
} else if (mode) {
error = ud_iaccess(ip,
- UD_UPERM2DPERM(mode), cr);
+ UD_UPERM2DPERM(mode), cr);
} else {
error = 0;
}
@@ -773,7 +771,7 @@ udf_create(
VN_RELE(ITOV(ip));
goto out;
} else if ((ip->i_type == VREG) &&
- (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
+ (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
/*
* Truncate regular files, if requested by caller.
* Grab i_rwlock to make sure no one else is
@@ -843,7 +841,7 @@ udf_remove(
rw_enter(&ip->i_rwlock, RW_WRITER);
error = ud_dirremove(ip, nm,
- (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
+ (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
rw_exit(&ip->i_rwlock);
ITIMES(ip);
@@ -886,7 +884,7 @@ udf_link(
rw_enter(&tdp->i_rwlock, RW_WRITER);
error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
- sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
+ sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
rw_exit(&tdp->i_rwlock);
ITIMES(sip);
ITIMES(tdp);
@@ -965,8 +963,8 @@ udf_rename(
* Check for renaming '.' or '..' or alias of '.'
*/
if ((strcmp(snm, ".") == 0) ||
- (strcmp(snm, "..") == 0) ||
- (sdp == sip)) {
+ (strcmp(snm, "..") == 0) ||
+ (sdp == sip)) {
error = EINVAL;
rw_exit(&sip->i_contents);
rw_exit(&sdp->i_contents);
@@ -1041,7 +1039,7 @@ udf_mkdir(
ip = VTOI(dvp);
rw_enter(&ip->i_rwlock, RW_WRITER);
error = ud_direnter(ip, dirname, DE_MKDIR,
- (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
+ (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
rw_exit(&ip->i_rwlock);
ITIMES(ip);
if (error == 0) {
@@ -1073,7 +1071,7 @@ udf_rmdir(
rw_enter(&ip->i_rwlock, RW_WRITER);
error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
- cr, ct);
+ cr, ct);
rw_exit(&ip->i_rwlock);
ITIMES(ip);
@@ -1117,7 +1115,7 @@ udf_readdir(
dirsiz = ip->i_size;
if ((uiop->uio_offset >= dirsiz) ||
- (ip->i_nlink <= 0)) {
+ (ip->i_nlink <= 0)) {
if (eofp) {
*eofp = 1;
}
@@ -1154,7 +1152,7 @@ udf_readdir(
while (offset < dirsiz) {
error = ud_get_next_fid(ip, &fbp,
- offset, &fid, &name, buf);
+ offset, &fid, &name, buf);
if (error != 0) {
break;
}
@@ -1176,10 +1174,10 @@ udf_readdir(
bzero(&nd->d_name[2],
DIRENT64_NAMELEN(len) - 2);
nd = (struct dirent64 *)
- ((char *)nd + nd->d_reclen);
+ ((char *)nd + nd->d_reclen);
} else {
if ((error = ud_uncompress(fid->fid_idlen,
- &length, name, dname)) != 0) {
+ &length, name, dname)) != 0) {
break;
}
if (length == 0) {
@@ -1198,13 +1196,13 @@ udf_readdir(
bzero(&nd->d_name[length],
DIRENT64_NAMELEN(len) - length);
nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
- SWAP_16(fid->fid_icb.lad_ext_prn),
- SWAP_32(fid->fid_icb.lad_ext_loc), 1,
- &dummy);
+ SWAP_16(fid->fid_icb.lad_ext_prn),
+ SWAP_32(fid->fid_icb.lad_ext_loc), 1,
+ &dummy);
nd->d_reclen = (uint16_t)len;
nd->d_off = offset + FID_LEN(fid);
nd = (struct dirent64 *)
- ((char *)nd + nd->d_reclen);
+ ((char *)nd + nd->d_reclen);
}
outcount++;
}
@@ -1260,7 +1258,7 @@ udf_symlink(
rw_enter(&dip->i_rwlock, RW_WRITER);
error = ud_direnter(dip, linkname, DE_CREATE,
- (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
+ (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
rw_exit(&dip->i_rwlock);
if (error == 0) {
dname = kmem_zalloc(1024, KM_SLEEP);
@@ -1297,7 +1295,7 @@ udf_symlink(
pc->pc_type = 4;
pc = (struct path_comp *)(((char *)pc) + 4);
} else if (((target - sp) == 2) &&
- (*sp == '.') && ((*(sp + 1)) == '.')) {
+ (*sp == '.') && ((*(sp + 1)) == '.')) {
/*
* DotDot entry.
*/
@@ -1311,7 +1309,7 @@ udf_symlink(
*/
outlen = 1024; /* set to size of dname */
if (error = ud_compress(target - sp, &outlen,
- (uint8_t *)sp, (uint8_t *)dname)) {
+ (uint8_t *)sp, (uint8_t *)dname)) {
break;
}
pc->pc_type = 5;
@@ -1320,7 +1318,7 @@ udf_symlink(
dname[outlen] = '\0';
(void) strcpy((char *)pc->pc_id, dname);
pc = (struct path_comp *)
- (((char *)pc) + 4 + outlen);
+ (((char *)pc) + 4 + outlen);
}
while (*target == '/') {
target++;
@@ -1337,15 +1335,15 @@ udf_symlink(
ioflag |= FDSYNC;
}
error = ud_rdwri(UIO_WRITE, ioflag, ip,
- uname, ((int8_t *)pc) - uname,
- (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
+ uname, ((int8_t *)pc) - uname,
+ (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
}
if (error) {
ud_idrop(ip);
rw_exit(&ip->i_contents);
rw_enter(&dip->i_rwlock, RW_WRITER);
(void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
- (struct vnode *)0, DR_REMOVE, cr, ct);
+ (struct vnode *)0, DR_REMOVE, cr, ct);
rw_exit(&dip->i_rwlock);
goto update_inode;
}
@@ -1431,7 +1429,7 @@ udf_readlink(
break;
case 5 :
if ((error = ud_uncompress(pc->pc_len, &id_len,
- pc->pc_id, (uint8_t *)dname)) != 0) {
+ pc->pc_id, (uint8_t *)dname)) != 0) {
break;
}
dname[id_len] = '\0';
@@ -1600,7 +1598,7 @@ udf_frlock(
* meaningless to have held tlock in the first place.
*/
if ((ip->i_mapcnt > 0) &&
- (MANDLOCK(vp, ip->i_char))) {
+ (MANDLOCK(vp, ip->i_char))) {
return (EAGAIN);
}
@@ -1734,7 +1732,7 @@ retrylock:
*/
offset = uoff;
while ((offset < uoff + len) &&
- (offset < ip->i_size)) {
+ (offset < ip->i_size)) {
/*
* the variable "bnp" is to simplify the expression for
* the compiler; * just passing in &bn to bmap_write
@@ -1787,7 +1785,7 @@ retrylock:
*/
eoff = (uoff + len);
for (pgoff = uoff, pgaddr = addr, pl = plarr;
- pgoff < eoff; /* empty */) {
+ pgoff < eoff; /* empty */) {
page_t *pp;
u_offset_t nextrio;
se_t se;
@@ -1832,7 +1830,7 @@ retrylock:
* We have to create the page, or read it from disk.
*/
if (error = ud_getpage_miss(vp, pgoff, len,
- seg, pgaddr, pl, plsz, rw, seqmode)) {
+ seg, pgaddr, pl, plsz, rw, seqmode)) {
goto error_out;
}
@@ -1854,7 +1852,7 @@ retrylock:
if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
ASSERT((protp == NULL) ||
- !(has_holes && (*protp & PROT_WRITE)));
+ !(has_holes && (*protp & PROT_WRITE)));
eoff = pgoff + plsz;
while (pgoff < eoff) {
@@ -2029,7 +2027,7 @@ udf_map(
}
if ((off < (offset_t)0) ||
- ((off + len) < (offset_t)0)) {
+ ((off + len) < (offset_t)0)) {
error = EINVAL;
goto end;
}
@@ -2218,7 +2216,7 @@ udf_pageio(
bp = NULL;
contig = 0;
if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
- &bn, &contig)) {
+ &bn, &contig)) {
break;
}
@@ -2263,7 +2261,7 @@ udf_pageio(
(void) bdev_strategy(bp);
} else {
error = ud_multi_strat(ip, cpp, bp,
- (u_offset_t)(io_off + done_len));
+ (u_offset_t)(io_off + done_len));
if (error != 0) {
pageio_done(bp);
break;
@@ -2399,14 +2397,14 @@ ud_getpage_miss(struct vnode *vp, u_offset_t off,
*/
if (rw == S_CREATE) {
if ((pp = page_create_va(vp, off,
- PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
+ PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
cmn_err(CE_WARN, "ud_getpage_miss: page_create");
return (EINVAL);
}
io_len = PAGESIZE;
} else {
pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
- &io_len, off, PAGESIZE, 0);
+ &io_len, off, PAGESIZE, 0);
/*
* Some other thread has entered the page.
@@ -2471,7 +2469,7 @@ ud_getpage_ra(struct vnode *vp,
}
pp = pvn_read_kluster(vp, io_off, seg, addr2,
- &io_off, &io_len, io_off, PAGESIZE, 1);
+ &io_off, &io_len, io_off, PAGESIZE, 1);
/*
* Some other thread has entered the page.
@@ -2508,9 +2506,9 @@ ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
* portions
*/
bp = ud_bread(ip->i_dev,
- ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
+ ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
if ((bp->b_error == 0) &&
- (bp->b_resid == 0)) {
+ (bp->b_resid == 0)) {
caddr = bp->b_un.b_addr + ip->i_data_off;
@@ -2518,7 +2516,7 @@ ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
* mapin to kvm
*/
kaddr = (caddr_t)ppmapin(pp,
- PROT_READ | PROT_WRITE, (caddr_t)-1);
+ PROT_READ | PROT_WRITE, (caddr_t)-1);
(void) kcopy(caddr, kaddr, ip->i_size);
/*
@@ -2551,7 +2549,7 @@ ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
* the page
*/
if (((off + contig) == isize) ||
- (contig == PAGESIZE)) {
+ (contig == PAGESIZE)) {
pagezero(pp->p_prev, 0, PAGESIZE);
goto out;
}
@@ -2679,7 +2677,7 @@ ud_putpages(struct vnode *vp, offset_t off,
* Search the entire vp list for pages >= off.
*/
err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
- flags, cr);
+ flags, cr);
} else {
/*
* Loop over all offsets in the range looking for
@@ -2700,11 +2698,11 @@ ud_putpages(struct vnode *vp, offset_t off,
*/
if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
pp = page_lookup(vp, io_off,
- (flags & (B_INVAL | B_FREE)) ?
- SE_EXCL : SE_SHARED);
+ (flags & (B_INVAL | B_FREE)) ?
+ SE_EXCL : SE_SHARED);
} else {
pp = page_lookup_nowait(vp, io_off,
- (flags & B_FREE) ? SE_EXCL : SE_SHARED);
+ (flags & B_FREE) ? SE_EXCL : SE_SHARED);
}
if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
@@ -2712,7 +2710,7 @@ ud_putpages(struct vnode *vp, offset_t off,
} else {
err = ud_putapage(vp, pp,
- &io_off, &io_len, flags, cr);
+ &io_off, &io_len, flags, cr);
if (err != 0) {
break;
}
@@ -2801,14 +2799,14 @@ ud_putapage(struct vnode *vp,
ASSERT(ip->i_size <= ip->i_max_emb);
pp = pvn_write_kluster(vp, pp, &io_off,
- &io_len, off, PAGESIZE, flags);
+ &io_len, off, PAGESIZE, flags);
if (io_len == 0) {
io_len = PAGESIZE;
}
bp = ud_bread(ip->i_dev,
- ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
- udf_vfsp->udf_lbsize);
+ ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
+ udf_vfsp->udf_lbsize);
fe = (struct file_entry *)bp->b_un.b_addr;
if ((bp->b_flags & B_ERROR) ||
(ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
@@ -2825,19 +2823,19 @@ ud_putapage(struct vnode *vp,
return (error);
}
if ((bp->b_error == 0) &&
- (bp->b_resid == 0)) {
+ (bp->b_resid == 0)) {
caddr = bp->b_un.b_addr + ip->i_data_off;
kaddr = (caddr_t)ppmapin(pp,
- PROT_READ | PROT_WRITE, (caddr_t)-1);
+ PROT_READ | PROT_WRITE, (caddr_t)-1);
(void) kcopy(kaddr, caddr, ip->i_size);
ppmapout(kaddr);
}
crc_len = ((uint32_t)&((struct file_entry *)0)->fe_spec) +
- SWAP_32(fe->fe_len_ear);
+ SWAP_32(fe->fe_len_ear);
crc_len += ip->i_size;
ud_make_tag(ip->i_udf, &fe->fe_tag,
- UD_FILE_ENTRY, ip->i_icb_block, crc_len);
+ UD_FILE_ENTRY, ip->i_icb_block, crc_len);
bwrite(bp);
@@ -2874,7 +2872,7 @@ ud_putapage(struct vnode *vp,
}
pp = pvn_write_kluster(vp, pp, &io_off,
- &io_len, off, contig, flags);
+ &io_len, off, contig, flags);
if (io_len == 0) {
io_len = PAGESIZE;
}
@@ -2981,8 +2979,8 @@ ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
ASSERT(RW_LOCK_HELD(&ip->i_contents));
if ((ip->i_type != VREG) &&
- (ip->i_type != VDIR) &&
- (ip->i_type != VLNK)) {
+ (ip->i_type != VDIR) &&
+ (ip->i_type != VLNK)) {
return (EIO);
}
@@ -2991,7 +2989,7 @@ ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
}
if ((uio->uio_loffset < (offset_t)0) ||
- ((uio->uio_loffset + uio->uio_resid) < 0)) {
+ ((uio->uio_loffset + uio->uio_resid) < 0)) {
return (EINVAL);
}
if (uio->uio_resid == 0) {
@@ -3024,8 +3022,8 @@ ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
n = (int)diff;
}
dofree = ud_freebehind &&
- ip->i_nextr == (off & PAGEMASK) &&
- off > ud_smallfile;
+ ip->i_nextr == (off & PAGEMASK) &&
+ off > ud_smallfile;
#ifndef __lock_lint
if (rwtype == RW_READER) {
@@ -3034,7 +3032,7 @@ ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
#endif
base = segmap_getmapflt(segkmap, vp, (off + mapon),
- (uint32_t)n, 1, S_READ);
+ (uint32_t)n, 1, S_READ);
error = uiomove(base + mapon, (long)n, UIO_READ, uio);
flags = 0;
@@ -3044,7 +3042,7 @@ ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
* won't need this buffer again soon.
*/
if (n + on == MAXBSIZE && ud_freebehind && dofree &&
- freemem < lotsfree + pages_before_pager) {
+ freemem < lotsfree + pages_before_pager) {
flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
}
/*
@@ -3079,7 +3077,8 @@ out:
*/
if (ioflag & FRSYNC) {
if ((ioflag & FSYNC) ||
- ((ioflag & FDSYNC) && (ip->i_flag & (IATTCHG|IBDWRITE)))) {
+ ((ioflag & FDSYNC) &&
+ (ip->i_flag & (IATTCHG|IBDWRITE)))) {
rw_exit(&ip->i_contents);
rw_enter(&ip->i_contents, RW_WRITER);
ud_iupdat(ip, 1);
@@ -3114,8 +3113,8 @@ ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
ASSERT(RW_WRITE_HELD(&ip->i_contents));
if ((ip->i_type != VREG) &&
- (ip->i_type != VDIR) &&
- (ip->i_type != VLNK)) {
+ (ip->i_type != VDIR) &&
+ (ip->i_type != VLNK)) {
return (EIO);
}
@@ -3138,7 +3137,7 @@ ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
return (EFBIG);
}
if ((uio->uio_loffset < (offset_t)0) ||
- ((uio->uio_loffset + uio->uio_resid) < 0)) {
+ ((uio->uio_loffset + uio->uio_resid) < 0)) {
return (EINVAL);
}
if (uio->uio_resid == 0) {
@@ -3179,7 +3178,7 @@ ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
* is done here before we up the file size.
*/
error = ud_bmap_write(ip, uoff,
- (int)(on + n), mapon == 0, cr);
+ (int)(on + n), mapon == 0, cr);
if (error) {
break;
}
@@ -3201,7 +3200,7 @@ ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
* needed blocks are allocated first.
*/
error = ud_bmap_write(ip, uoff,
- (int)(on + n), 1, cr);
+ (int)(on + n), 1, cr);
if (error) {
break;
}
@@ -3212,8 +3211,17 @@ ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
rw_exit(&ip->i_contents);
+ /*
+ * Touch the page and fault it in if it is not in
+ * core before segmap_getmapflt can lock it. This
+ * is to avoid the deadlock if the buffer is mapped
+ * to the same file through mmap which we want to
+ * write to.
+ */
+ uio_prefaultpages((long)n, uio);
+
base = segmap_getmapflt(segkmap, vp, (off + mapon),
- (uint32_t)n, !pagecreate, S_WRITE);
+ (uint32_t)n, !pagecreate, S_WRITE);
/*
* segmap_pagecreate() returns 1 if it calls
@@ -3222,14 +3230,14 @@ ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
newpage = 0;
if (pagecreate) {
newpage = segmap_pagecreate(segkmap, base,
- (size_t)n, 0);
+ (size_t)n, 0);
}
premove_resid = uio->uio_resid;
error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
if (pagecreate &&
- uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
+ uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
/*
* We created pages w/o initializing them completely,
* thus we need to zero the part that wasn't set up.
@@ -3280,13 +3288,13 @@ ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
*/
if (IS_SWAPVP(vp)) {
flags = SM_WRITE | SM_FREE |
- SM_DONTNEED;
+ SM_DONTNEED;
iupdat_flag = 0;
} else {
flags = SM_WRITE;
}
} else if (((mapon + n) == MAXBSIZE) ||
- IS_SWAPVP(vp)) {
+ IS_SWAPVP(vp)) {
/*
* Have written a whole block.
* Start an asynchronous write and
@@ -3315,7 +3323,7 @@ ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
*/
if (error) {
if ((ioflag & (FSYNC | FDSYNC)) ||
- ip->i_type == VDIR) {
+ ip->i_type == VDIR) {
uio->uio_resid = premove_resid;
} else {
error = 0;
@@ -3349,7 +3357,7 @@ ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
ip->i_flag |= IATTCHG;
}
if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
- (IEXEC >> 10))) != 0 &&
+ (IEXEC >> 10))) != 0 &&
(ip->i_char & (ISUID | ISGID)) != 0 &&
secpolicy_vnode_setid_retain(cr,
(ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
@@ -3385,7 +3393,7 @@ out:
* we have eliminated nosync
*/
if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
- ((ioflag & FSYNC) && iupdat_flag)) {
+ ((ioflag & FSYNC) && iupdat_flag)) {
ud_iupdat(ip, 1);
}
}
@@ -3423,7 +3431,7 @@ ud_multi_strat(struct ud_inode *ip,
for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
contig = 0;
if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
- &bn, &contig)) {
+ &bn, &contig)) {
goto end;
}
if (contig == 0) {
@@ -3455,7 +3463,7 @@ ud_multi_strat(struct ud_inode *ip,
* required number of buffers
*/
alloc_sz = sizeof (mio_master_t) +
- (sizeof (mio_slave_t) * io_count);
+ (sizeof (mio_slave_t) * io_count);
mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
if (mm == NULL) {
error = ENOMEM;
@@ -3481,8 +3489,8 @@ ud_multi_strat(struct ud_inode *ip,
for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
contig = 0;
if (error = ud_bmap_read(ip,
- (u_offset_t)(start + io_off),
- &bn, &contig)) {
+ (u_offset_t)(start + io_off),
+ &bn, &contig)) {
goto end;
}
ASSERT(contig);
@@ -3497,8 +3505,8 @@ ud_multi_strat(struct ud_inode *ip,
ms->ms_ptr = mm;
bioinit(&ms->ms_buf);
rbp = bioclone(bp, io_off, (size_t)contig,
- bp->b_edev, bn, ud_slave_done,
- &ms->ms_buf, KM_NOSLEEP);
+ bp->b_edev, bn, ud_slave_done,
+ &ms->ms_buf, KM_NOSLEEP);
ASSERT(rbp == &ms->ms_buf);
mm->mm_resid += contig;
io_count++;
diff --git a/usr/src/uts/common/fs/ufs/ufs_trans.c b/usr/src/uts/common/fs/ufs/ufs_trans.c
index 2a8cd134b0..411fad9ace 100644
--- a/usr/src/uts/common/fs/ufs/ufs_trans.c
+++ b/usr/src/uts/common/fs/ufs/ufs_trans.c
@@ -31,8 +31,6 @@
* under license from the Regents of the University of California.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/sysmacros.h>
#include <sys/param.h>
#include <sys/types.h>
@@ -880,69 +878,6 @@ again:
}
/*
- * Fault in the pages of the first n bytes specified by the uio structure.
- * 1 byte in each page is touched and the uio struct is unmodified.
- * Any error will terminate the process as this is only a best
- * attempt to get the pages resident.
- */
-static void
-ufs_trans_touch(ssize_t n, struct uio *uio)
-{
- struct iovec *iov;
- ulong_t cnt, incr;
- caddr_t p;
- uint8_t tmp;
-
- iov = uio->uio_iov;
-
- while (n) {
- cnt = MIN(iov->iov_len, n);
- if (cnt == 0) {
- /* empty iov entry */
- iov++;
- continue;
- }
- n -= cnt;
- /*
- * touch each page in this segment.
- */
- p = iov->iov_base;
- while (cnt) {
- switch (uio->uio_segflg) {
- case UIO_USERSPACE:
- case UIO_USERISPACE:
- if (fuword8(p, &tmp))
- return;
- break;
- case UIO_SYSSPACE:
- if (kcopy(p, &tmp, 1))
- return;
- break;
- }
- incr = MIN(cnt, PAGESIZE);
- p += incr;
- cnt -= incr;
- }
- /*
- * touch the last byte in case it straddles a page.
- */
- p--;
- switch (uio->uio_segflg) {
- case UIO_USERSPACE:
- case UIO_USERISPACE:
- if (fuword8(p, &tmp))
- return;
- break;
- case UIO_SYSSPACE:
- if (kcopy(p, &tmp, 1))
- return;
- break;
- }
- iov++;
- }
-}
-
-/*
* Calculate the amount of log space that needs to be reserved for this
* write request. If the amount of log space is too large, then
* calculate the size that the requests needs to be split into.
@@ -968,7 +903,7 @@ ufs_trans_write_resv(
resid = MIN(uio->uio_resid, ufs_trans_max_resid);
resv = ufs_log_amt(ip, offset, resid, 0);
if (resv <= ufs_trans_max_resv) {
- ufs_trans_touch(resid, uio);
+ uio_prefaultpages(resid, uio);
if (resid != uio->uio_resid)
*residp = resid;
*resvp = resv;
@@ -982,7 +917,7 @@ ufs_trans_write_resv(
nchunks++;
resid = uio->uio_resid / nchunks;
}
- ufs_trans_touch(resid, uio);
+ uio_prefaultpages(resid, uio);
/*
* If this request takes too much log space, it will be split
*/
@@ -1049,7 +984,7 @@ again:
* Make sure the input buffer is resident before starting
* the next transaction.
*/
- ufs_trans_touch(MIN(resid, realresid), uio);
+ uio_prefaultpages(MIN(resid, realresid), uio);
/*
* Generate BOT for next part of the request
diff --git a/usr/src/uts/common/fs/ufs/ufs_vnops.c b/usr/src/uts/common/fs/ufs/ufs_vnops.c
index 7e45078774..5d90722563 100644
--- a/usr/src/uts/common/fs/ufs/ufs_vnops.c
+++ b/usr/src/uts/common/fs/ufs/ufs_vnops.c
@@ -998,6 +998,15 @@ wrip(struct inode *ip, struct uio *uio, int ioflag, struct cred *cr)
newpage = 0;
premove_resid = uio->uio_resid;
+
+ /*
+ * Touch the page and fault it in if it is not in core
+ * before segmap_getmapflt or vpm_data_copy can lock it.
+ * This is to avoid the deadlock if the buffer is mapped
+ * to the same file through mmap which we want to write.
+ */
+ uio_prefaultpages((long)n, uio);
+
if (vpm_enable) {
/*
* Copy data. If new pages are created, part of
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 0eba0b1940..8e0037e37d 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -560,69 +560,6 @@ out:
}
/*
- * Fault in the pages of the first n bytes specified by the uio structure.
- * 1 byte in each page is touched and the uio struct is unmodified.
- * Any error will exit this routine as this is only a best
- * attempt to get the pages resident. This is a copy of ufs_trans_touch().
- */
-static void
-zfs_prefault_write(ssize_t n, struct uio *uio)
-{
- struct iovec *iov;
- ulong_t cnt, incr;
- caddr_t p;
- uint8_t tmp;
-
- iov = uio->uio_iov;
-
- while (n) {
- cnt = MIN(iov->iov_len, n);
- if (cnt == 0) {
- /* empty iov entry */
- iov++;
- continue;
- }
- n -= cnt;
- /*
- * touch each page in this segment.
- */
- p = iov->iov_base;
- while (cnt) {
- switch (uio->uio_segflg) {
- case UIO_USERSPACE:
- case UIO_USERISPACE:
- if (fuword8(p, &tmp))
- return;
- break;
- case UIO_SYSSPACE:
- if (kcopy(p, &tmp, 1))
- return;
- break;
- }
- incr = MIN(cnt, PAGESIZE);
- p += incr;
- cnt -= incr;
- }
- /*
- * touch the last byte in case it straddles a page.
- */
- p--;
- switch (uio->uio_segflg) {
- case UIO_USERSPACE:
- case UIO_USERISPACE:
- if (fuword8(p, &tmp))
- return;
- break;
- case UIO_SYSSPACE:
- if (kcopy(p, &tmp, 1))
- return;
- break;
- }
- iov++;
- }
-}
-
-/*
* Write the bytes to a file.
*
* IN: vp - vnode of file to be written to.
@@ -689,7 +626,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
* Pre-fault the pages to ensure slow (eg NFS) pages
* don't hold up txg.
*/
- zfs_prefault_write(n, uio);
+ uio_prefaultpages(n, uio);
/*
* If in append mode, set the io offset pointer to eof.
diff --git a/usr/src/uts/common/os/move.c b/usr/src/uts/common/os/move.c
index 9ada82a72f..7e1c2f2d62 100644
--- a/usr/src/uts/common/os/move.c
+++ b/usr/src/uts/common/os/move.c
@@ -112,6 +112,73 @@ uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
}
/*
+ * Fault in the pages of the first n bytes specified by the uio structure.
+ * 1 byte in each page is touched and the uio struct is unmodified. Any
+ * error will terminate the process as this is only a best attempt to get
+ * the pages resident.
+ */
+void
+uio_prefaultpages(ssize_t n, struct uio *uio)
+{
+ struct iovec *iov;
+ ulong_t cnt, incr;
+ caddr_t p;
+ uint8_t tmp;
+ int iovcnt;
+
+ iov = uio->uio_iov;
+ iovcnt = uio->uio_iovcnt;
+
+ while ((n > 0) && (iovcnt > 0)) {
+ cnt = MIN(iov->iov_len, n);
+ if (cnt == 0) {
+ /* empty iov entry */
+ iov++;
+ iovcnt--;
+ continue;
+ }
+ n -= cnt;
+ /*
+ * touch each page in this segment.
+ */
+ p = iov->iov_base;
+ while (cnt) {
+ switch (uio->uio_segflg) {
+ case UIO_USERSPACE:
+ case UIO_USERISPACE:
+ if (fuword8(p, &tmp))
+ return;
+ break;
+ case UIO_SYSSPACE:
+ if (kcopy(p, &tmp, 1))
+ return;
+ break;
+ }
+ incr = MIN(cnt, PAGESIZE);
+ p += incr;
+ cnt -= incr;
+ }
+ /*
+ * touch the last byte in case it straddles a page.
+ */
+ p--;
+ switch (uio->uio_segflg) {
+ case UIO_USERSPACE:
+ case UIO_USERISPACE:
+ if (fuword8(p, &tmp))
+ return;
+ break;
+ case UIO_SYSSPACE:
+ if (kcopy(p, &tmp, 1))
+ return;
+ break;
+ }
+ iov++;
+ iovcnt--;
+ }
+}
+
+/*
* transfer a character value into the address space
* delineated by a uio and update fields within the
* uio for next character. Return 0 for success, EFAULT
diff --git a/usr/src/uts/common/sys/uio.h b/usr/src/uts/common/sys/uio.h
index 8b6b3ef8d6..c58fda95dc 100644
--- a/usr/src/uts/common/sys/uio.h
+++ b/usr/src/uts/common/sys/uio.h
@@ -191,6 +191,7 @@ typedef struct uioasync_s {
#if defined(_KERNEL)
int uiomove(void *, size_t, enum uio_rw, uio_t *);
+void uio_prefaultpages(ssize_t, uio_t *);
int ureadc(int, uio_t *); /* should be errno_t in future */
int uwritec(struct uio *);
void uioskip(uio_t *, size_t);