summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authormaybee <none@none>2006-04-24 18:28:45 -0700
committermaybee <none@none>2006-04-24 18:28:45 -0700
commit5730cc9a43c5f11a472d7536ed81facfd10f1e2e (patch)
tree93f9154e2ff22af070ec950fb2342ee8ea9aab0b /usr/src
parent75212827077c6a9a0e60c5f619c961a68d7433ba (diff)
downloadillumos-gate-5730cc9a43c5f11a472d7536ed81facfd10f1e2e.tar.gz
6413573 deadlock between fsflush() and zfs_create()
6416101 du inside snapshot produces bad sizes and paths
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_znode.h3
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_dir.c3
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c184
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_znode.c124
4 files changed, 131 insertions, 183 deletions
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
index 019eefb1a5..c585d2af78 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
@@ -242,8 +242,7 @@ extern void zfs_create_fs(objset_t *os, cred_t *cr, dmu_tx_t *tx);
extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
-extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, dmu_tx_t *,
- cred_t *cr);
+extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
extern void zfs_znode_init(void);
extern void zfs_znode_fini(void);
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
diff --git a/usr/src/uts/common/fs/zfs/zfs_dir.c b/usr/src/uts/common/fs/zfs/zfs_dir.c
index d73315b47d..8262e9d882 100644
--- a/usr/src/uts/common/fs/zfs/zfs_dir.c
+++ b/usr/src/uts/common/fs/zfs/zfs_dir.c
@@ -250,7 +250,8 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp)
* If we are a snapshot mounted under .zfs, return
* the vp for the snapshot directory.
*/
- if (zfsvfs->z_parent != zfsvfs) {
+ if (dzp->z_phys->zp_parent == dzp->z_id &&
+ zfsvfs->z_parent != zfsvfs) {
error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
"snapshot", vpp, NULL, 0, NULL, kcred);
return (error);
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 8a953174ec..6edb60c7b0 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -1045,7 +1045,6 @@ zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
objset_t *os = zfsvfs->z_os;
zfs_dirlock_t *dl;
dmu_tx_t *tx;
- rl_t *rl;
int error;
uint64_t zoid;
@@ -1144,51 +1143,24 @@ top:
if (mode && (error = zfs_zaccess_rwx(zp, mode, cr))) {
goto out;
}
- /*
- * Truncate regular files if requested.
- */
-
- /*
- * Need to update dzp->z_seq?
- */
mutex_enter(&dzp->z_lock);
dzp->z_seq++;
mutex_exit(&dzp->z_lock);
- if ((ZTOV(zp)->v_type == VREG) && (zp->z_phys->zp_size != 0) &&
+ /*
+ * Truncate regular files if requested.
+ */
+ if ((ZTOV(zp)->v_type == VREG) &&
+ (zp->z_phys->zp_size != 0) &&
(vap->va_mask & AT_SIZE) && (vap->va_size == 0)) {
- /*
- * Truncate the file.
- */
- tx = dmu_tx_create(os);
- dmu_tx_hold_bonus(tx, zoid);
- dmu_tx_hold_free(tx, zoid, 0, DMU_OBJECT_END);
- /* Lock the whole range of the file */
- rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- dmu_tx_abort(tx);
- zfs_range_unlock(zp, rl);
- if (dl)
- zfs_dirent_unlock(dl);
- VN_RELE(ZTOV(zp));
- if (error == ERESTART &&
- zfsvfs->z_assign == TXG_NOWAIT) {
- txg_wait_open(dmu_objset_pool(os), 0);
- goto top;
- }
- ZFS_EXIT(zfsvfs);
- return (error);
- }
- error = zfs_freesp(zp, 0, 0, mode, tx, cr);
- if (error == 0) {
- zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
- seq = zfs_log_truncate(zilog, tx,
- TX_TRUNCATE, zp, 0, 0);
+ error = zfs_freesp(zp, 0, 0, mode, TRUE);
+ if (error == ERESTART &&
+ zfsvfs->z_assign == TXG_NOWAIT) {
+ zfs_dirent_unlock(dl);
+ txg_wait_open(dmu_objset_pool(os), 0);
+ goto top;
}
- zfs_range_unlock(zp, rl);
- dmu_tx_commit(tx);
}
}
out:
@@ -1937,12 +1909,10 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
zilog_t *zilog = zfsvfs->z_log;
uint64_t seq = 0;
dmu_tx_t *tx;
- rl_t *rl;
- uint_t mask = vap->va_mask;
- uint_t mask_applied = 0;
vattr_t oldva;
+ uint_t mask = vap->va_mask;
+ uint_t saved_mask;
int trim_mask = FALSE;
- int saved_mask;
uint64_t new_mode;
znode_t *attrzp;
int need_policy = FALSE;
@@ -1963,7 +1933,6 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
ZFS_ENTER(zfsvfs);
top:
- rl = NULL;
attrzp = NULL;
if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
@@ -1981,6 +1950,21 @@ top:
ZFS_EXIT(zfsvfs);
return (err);
}
+ /*
+ * XXX - Note, we are not providing any open
+ * mode flags here (like FNDELAY), so we may
+ * block if there are locks present... this
+ * should be addressed in openat().
+ */
+ do {
+ if (err == ERESTART)
+ txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
+ err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
+ } while (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT);
+ if (err) {
+ ZFS_EXIT(zfsvfs);
+ return (err);
+ }
}
if (mask & (AT_ATIME|AT_MTIME))
@@ -2085,27 +2069,10 @@ top:
0, ZFS_ACL_SIZE(MAX_ACL_SIZE));
}
- if (mask & AT_SIZE) {
- uint64_t off = vap->va_size;
- /*
- * Range lock the entire file, to ensure the truncate
- * is serialised.
- */
- rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
- ASSERT(rl != NULL);
- if (off < zp->z_phys->zp_size)
- dmu_tx_hold_free(tx, zp->z_id, off, DMU_OBJECT_END);
- else if (zp->z_blksz < zfsvfs->z_max_blksz && off > zp->z_blksz)
- /* we will rewrite this block if we grow */
- dmu_tx_hold_write(tx, zp->z_id, 0, zp->z_phys->zp_size);
- }
-
if ((mask & (AT_UID | AT_GID)) && zp->z_phys->zp_xattr != 0) {
err = zfs_zget(zp->z_zfsvfs, zp->z_phys->zp_xattr, &attrzp);
if (err) {
dmu_tx_abort(tx);
- if (rl != NULL)
- zfs_range_unlock(zp, rl);
ZFS_EXIT(zfsvfs);
return (err);
}
@@ -2117,8 +2084,6 @@ top:
if (attrzp)
VN_RELE(ZTOV(attrzp));
dmu_tx_abort(tx);
- if (rl != NULL)
- zfs_range_unlock(zp, rl);
if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
goto top;
@@ -2136,22 +2101,6 @@ top:
* Note: you cannot set ctime directly, although it will be
* updated as a side-effect of calling this function.
*/
- if (mask & AT_SIZE) {
- /*
- * XXX - Note, we are not providing any open
- * mode flags here (like FNDELAY), so we may
- * block if there are locks present... this
- * should be addressed in openat().
- */
- err = zfs_freesp(zp, vap->va_size, 0, 0, tx, cr);
- if (err) {
- mutex_enter(&zp->z_lock);
- goto out;
- }
- mask_applied |= AT_SIZE;
- }
-
- mask_applied = mask; /* no errors after this point */
mutex_enter(&zp->z_lock);
@@ -2185,25 +2134,19 @@ top:
if (mask & AT_MTIME)
ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime);
- if (mask_applied & AT_SIZE)
+ if (mask & AT_SIZE)
zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx);
- else if (mask_applied != 0)
+ else if (mask != 0)
zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
-out:
-
- if (mask_applied != 0)
- seq = zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap,
- mask_applied);
+ if (mask != 0)
+ seq = zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask);
mutex_exit(&zp->z_lock);
if (attrzp)
VN_RELE(ZTOV(attrzp));
- if (rl != NULL)
- zfs_range_unlock(zp, rl);
-
dmu_tx_commit(tx);
zil_commit(zilog, seq, 0);
@@ -3437,12 +3380,8 @@ static int
zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag,
offset_t offset, cred_t *cr, caller_context_t *ct)
{
- dmu_tx_t *tx;
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- zilog_t *zilog = zfsvfs->z_log;
- rl_t *rl;
- uint64_t seq = 0;
uint64_t off, len;
int error;
@@ -3466,63 +3405,12 @@ top:
off = bfp->l_start;
len = bfp->l_len; /* 0 means from off to end of file */
- tx = dmu_tx_create(zfsvfs->z_os);
- dmu_tx_hold_bonus(tx, zp->z_id);
- /*
- * If we will change zp_size (in zfs_freesp) then lock the whole file,
- * otherwise just lock the range being freed.
- */
- if (len == 0 || off + len > zp->z_phys->zp_size) {
- rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
- } else {
- rl = zfs_range_lock(zp, off, len, RL_WRITER);
- /* recheck, in case zp_size changed */
- if (off + len > zp->z_phys->zp_size) {
- /* lost race: file size changed, lock whole file */
- zfs_range_unlock(zp, rl);
- rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
- }
- }
- if (off + len > zp->z_blksz && zp->z_blksz < zfsvfs->z_max_blksz &&
- off >= zp->z_phys->zp_size) {
- /*
- * We are increasing the length of the file,
- * and this may mean a block size increase.
- */
- dmu_tx_hold_write(tx, zp->z_id, 0,
- MIN(off + len, zfsvfs->z_max_blksz));
- } else if (off < zp->z_phys->zp_size) {
- /*
- * If len == 0, we are truncating the file.
- */
- dmu_tx_hold_free(tx, zp->z_id, off, len ? len : DMU_OBJECT_END);
- }
-
- error = dmu_tx_assign(tx, zfsvfs->z_assign);
- if (error) {
- dmu_tx_abort(tx);
- zfs_range_unlock(zp, rl);
- if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
+ do {
+ if (error == ERESTART)
txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0);
- goto top;
- }
- ZFS_EXIT(zfsvfs);
- return (error);
- }
-
- error = zfs_freesp(zp, off, len, flag, tx, cr);
-
- if (error == 0) {
- zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
- seq = zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
- }
-
- zfs_range_unlock(zp, rl);
-
- dmu_tx_commit(tx);
-
- zil_commit(zilog, seq, 0);
+ error = zfs_freesp(zp, off, len, flag, TRUE);
+ } while (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT);
ZFS_EXIT(zfsvfs);
return (error);
diff --git a/usr/src/uts/common/fs/zfs/zfs_znode.c b/usr/src/uts/common/fs/zfs/zfs_znode.c
index 15b23b5854..5472481e02 100644
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c
@@ -902,30 +902,51 @@ zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
* Free space in a file.
*
* IN: zp - znode of file to free data in.
- * from - start of section to free.
+ * off - start of section to free.
* len - length of section to free (0 => to EOF).
* flag - current file open mode flags.
- * tx - open transaction.
*
* RETURN: 0 if success
* error code if failure
*/
int
-zfs_freesp(znode_t *zp, uint64_t from, uint64_t len, int flag, dmu_tx_t *tx,
- cred_t *cr)
+zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
{
vnode_t *vp = ZTOV(zp);
- uint64_t size = zp->z_phys->zp_size;
- uint64_t end = from + len;
+ dmu_tx_t *tx;
+ zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+ zilog_t *zilog = zfsvfs->z_log;
+ rl_t *rl;
+ uint64_t seq = 0;
+ uint64_t end = off + len;
+ uint64_t size, new_blksz;
int error;
if (ZTOV(zp)->v_type == VFIFO)
return (0);
/*
+ * If we will change zp_size then lock the whole file,
+ * otherwise just lock the range being freed.
+ */
+ if (len == 0 || off + len > zp->z_phys->zp_size) {
+ rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
+ } else {
+ rl = zfs_range_lock(zp, off, len, RL_WRITER);
+ /* recheck, in case zp_size changed */
+ if (off + len > zp->z_phys->zp_size) {
+ /* lost race: file size changed, lock whole file */
+ zfs_range_unlock(zp, rl);
+ rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
+ }
+ }
+
+ /*
* Nothing to do if file already at desired length.
*/
- if (len == 0 && size == from) {
+ size = zp->z_phys->zp_size;
+ if (len == 0 && size == off) {
+ zfs_range_unlock(zp, rl);
return (0);
}
@@ -933,19 +954,26 @@ zfs_freesp(znode_t *zp, uint64_t from, uint64_t len, int flag, dmu_tx_t *tx,
* Check for any locks in the region to be freed.
*/
if (MANDLOCK(vp, (mode_t)zp->z_phys->zp_mode)) {
- uint64_t start;
+ uint64_t start = off;
+ uint64_t extent = len;
- if (size > from)
- start = from;
- else
+ if (off > size) {
start = size;
- if (error = chklock(vp, FWRITE, start, 0, flag, NULL))
+ extent += off - size;
+ } else if (len == 0) {
+ extent = size - off;
+ }
+ if (error = chklock(vp, FWRITE, start, extent, flag, NULL)) {
+ zfs_range_unlock(zp, rl);
return (error);
+ }
}
- if (end > zp->z_blksz && (!ISP2(zp->z_blksz) ||
- zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
- uint64_t new_blksz;
+ tx = dmu_tx_create(zfsvfs->z_os);
+ dmu_tx_hold_bonus(tx, zp->z_id);
+ new_blksz = 0;
+ if (end > size &&
+ (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
/*
* We are growing the file past the current block size.
*/
@@ -955,43 +983,75 @@ zfs_freesp(znode_t *zp, uint64_t from, uint64_t len, int flag, dmu_tx_t *tx,
} else {
new_blksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
}
- zfs_grow_blocksize(zp, new_blksz, tx);
+ dmu_tx_hold_write(tx, zp->z_id, 0, MIN(end, new_blksz));
+ } else if (off < size) {
+ /*
+ * If len == 0, we are truncating the file.
+ */
+ dmu_tx_hold_free(tx, zp->z_id, off, len ? len : DMU_OBJECT_END);
+ }
+
+ error = dmu_tx_assign(tx, zfsvfs->z_assign);
+ if (error) {
+ dmu_tx_abort(tx);
+ zfs_range_unlock(zp, rl);
+ return (error);
}
+
+ if (new_blksz)
+ zfs_grow_blocksize(zp, new_blksz, tx);
+
if (end > size || len == 0)
zp->z_phys->zp_size = end;
- if (from > size)
- return (0);
+
+ if (off < size) {
+ objset_t *os = zfsvfs->z_os;
+
+ if (len == 0)
+ len = -1;
+ else if (end > size)
+ len = size - off;
+ VERIFY(0 == dmu_free_range(os, zp->z_id, off, len, tx));
+ }
+
+ if (log) {
+ zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
+ seq = zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
+ }
+
+ zfs_range_unlock(zp, rl);
+
+ dmu_tx_commit(tx);
+
+ if (log)
+ zil_commit(zilog, seq, 0);
/*
- * Clear any mapped pages in the truncated region.
+ * Clear any mapped pages in the truncated region. This has to
+ * happen outside of the transaction to avoid the possibility of
+ * a deadlock with someone trying to push a page that we are
+ * about to invalidate.
*/
rw_enter(&zp->z_map_lock, RW_WRITER);
- if (vn_has_cached_data(vp)) {
+ if (off < size && vn_has_cached_data(vp)) {
page_t *pp;
- uint64_t start = from & PAGEMASK;
- int off = from & PAGEOFFSET;
+ uint64_t start = off & PAGEMASK;
+ int poff = off & PAGEOFFSET;
- if (off != 0 && (pp = page_lookup(vp, start, SE_SHARED))) {
+ if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) {
/*
* We need to zero a partial page.
*/
- pagezero(pp, off, PAGESIZE - off);
+ pagezero(pp, poff, PAGESIZE - poff);
start += PAGESIZE;
page_unlock(pp);
}
error = pvn_vplist_dirty(vp, start, zfs_no_putpage,
- B_INVAL | B_TRUNC, cr);
+ B_INVAL | B_TRUNC, NULL);
ASSERT(error == 0);
}
rw_exit(&zp->z_map_lock);
- if (len == 0)
- len = -1;
- else if (end > size)
- len = size - from;
- VERIFY(0 == dmu_free_range(zp->z_zfsvfs->z_os,
- zp->z_id, from, len, tx));
-
return (0);
}