diff options
author | maybee <none@none> | 2006-04-24 18:28:45 -0700 |
---|---|---|
committer | maybee <none@none> | 2006-04-24 18:28:45 -0700 |
commit | 5730cc9a43c5f11a472d7536ed81facfd10f1e2e (patch) | |
tree | 93f9154e2ff22af070ec950fb2342ee8ea9aab0b /usr/src | |
parent | 75212827077c6a9a0e60c5f619c961a68d7433ba (diff) | |
download | illumos-gate-5730cc9a43c5f11a472d7536ed81facfd10f1e2e.tar.gz |
6413573 deadlock between fsflush() and zfs_create()
6416101 du inside snapshot produces bad sizes and paths
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zfs_znode.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_dir.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_vnops.c | 184 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_znode.c | 124 |
4 files changed, 131 insertions, 183 deletions
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h index 019eefb1a5..c585d2af78 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h @@ -242,8 +242,7 @@ extern void zfs_create_fs(objset_t *os, cred_t *cr, dmu_tx_t *tx); extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *); extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *); extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *); -extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, dmu_tx_t *, - cred_t *cr); +extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t); extern void zfs_znode_init(void); extern void zfs_znode_fini(void); extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **); diff --git a/usr/src/uts/common/fs/zfs/zfs_dir.c b/usr/src/uts/common/fs/zfs/zfs_dir.c index d73315b47d..8262e9d882 100644 --- a/usr/src/uts/common/fs/zfs/zfs_dir.c +++ b/usr/src/uts/common/fs/zfs/zfs_dir.c @@ -250,7 +250,8 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp) * If we are a snapshot mounted under .zfs, return * the vp for the snapshot directory. */ - if (zfsvfs->z_parent != zfsvfs) { + if (dzp->z_phys->zp_parent == dzp->z_id && + zfsvfs->z_parent != zfsvfs) { error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, "snapshot", vpp, NULL, 0, NULL, kcred); return (error); diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index 8a953174ec..6edb60c7b0 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -1045,7 +1045,6 @@ zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, objset_t *os = zfsvfs->z_os; zfs_dirlock_t *dl; dmu_tx_t *tx; - rl_t *rl; int error; uint64_t zoid; @@ -1144,51 +1143,24 @@ top: if (mode && (error = zfs_zaccess_rwx(zp, mode, cr))) { goto out; } - /* - * Truncate regular files if requested. - */ - - /* - * Need to update dzp->z_seq? - */ mutex_enter(&dzp->z_lock); dzp->z_seq++; mutex_exit(&dzp->z_lock); - if ((ZTOV(zp)->v_type == VREG) && (zp->z_phys->zp_size != 0) && + /* + * Truncate regular files if requested. + */ + if ((ZTOV(zp)->v_type == VREG) && + (zp->z_phys->zp_size != 0) && (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { - /* - * Truncate the file. - */ - tx = dmu_tx_create(os); - dmu_tx_hold_bonus(tx, zoid); - dmu_tx_hold_free(tx, zoid, 0, DMU_OBJECT_END); - /* Lock the whole range of the file */ - rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); - error = dmu_tx_assign(tx, zfsvfs->z_assign); - if (error) { - dmu_tx_abort(tx); - zfs_range_unlock(zp, rl); - if (dl) - zfs_dirent_unlock(dl); - VN_RELE(ZTOV(zp)); - if (error == ERESTART && - zfsvfs->z_assign == TXG_NOWAIT) { - txg_wait_open(dmu_objset_pool(os), 0); - goto top; - } - ZFS_EXIT(zfsvfs); - return (error); - } - error = zfs_freesp(zp, 0, 0, mode, tx, cr); - if (error == 0) { - zfs_time_stamper(zp, CONTENT_MODIFIED, tx); - seq = zfs_log_truncate(zilog, tx, - TX_TRUNCATE, zp, 0, 0); + error = zfs_freesp(zp, 0, 0, mode, TRUE); + if (error == ERESTART && + zfsvfs->z_assign == TXG_NOWAIT) { + zfs_dirent_unlock(dl); + txg_wait_open(dmu_objset_pool(os), 0); + goto top; } - zfs_range_unlock(zp, rl); - dmu_tx_commit(tx); } } out: @@ -1937,12 +1909,10 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, zilog_t *zilog = zfsvfs->z_log; uint64_t seq = 0; dmu_tx_t *tx; - rl_t *rl; - uint_t mask = vap->va_mask; - uint_t mask_applied = 0; vattr_t oldva; + uint_t mask = vap->va_mask; + uint_t saved_mask; int trim_mask = FALSE; - int saved_mask; uint64_t new_mode; znode_t *attrzp; int need_policy = FALSE; @@ -1963,7 +1933,6 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, ZFS_ENTER(zfsvfs); top: - rl = NULL; attrzp = NULL; if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { @@ -1981,6 +1950,21 @@ top: ZFS_EXIT(zfsvfs); return (err); } + /* + * XXX - Note, we are not providing any open + * mode flags here (like FNDELAY), so we may + * block if there are locks present... this + * should be addressed in openat(). + */ + do { + if (err == ERESTART) + txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); + err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); + } while (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT); + if (err) { + ZFS_EXIT(zfsvfs); + return (err); + } } if (mask & (AT_ATIME|AT_MTIME)) @@ -2085,27 +2069,10 @@ top: 0, ZFS_ACL_SIZE(MAX_ACL_SIZE)); } - if (mask & AT_SIZE) { - uint64_t off = vap->va_size; - /* - * Range lock the entire file, to ensure the truncate - * is serialised. - */ - rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); - ASSERT(rl != NULL); - if (off < zp->z_phys->zp_size) - dmu_tx_hold_free(tx, zp->z_id, off, DMU_OBJECT_END); - else if (zp->z_blksz < zfsvfs->z_max_blksz && off > zp->z_blksz) - /* we will rewrite this block if we grow */ - dmu_tx_hold_write(tx, zp->z_id, 0, zp->z_phys->zp_size); - } - if ((mask & (AT_UID | AT_GID)) && zp->z_phys->zp_xattr != 0) { err = zfs_zget(zp->z_zfsvfs, zp->z_phys->zp_xattr, &attrzp); if (err) { dmu_tx_abort(tx); - if (rl != NULL) - zfs_range_unlock(zp, rl); ZFS_EXIT(zfsvfs); return (err); } @@ -2117,8 +2084,6 @@ top: if (attrzp) VN_RELE(ZTOV(attrzp)); dmu_tx_abort(tx); - if (rl != NULL) - zfs_range_unlock(zp, rl); if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); goto top; @@ -2136,22 +2101,6 @@ top: * Note: you cannot set ctime directly, although it will be * updated as a side-effect of calling this function. */ - if (mask & AT_SIZE) { - /* - * XXX - Note, we are not providing any open - * mode flags here (like FNDELAY), so we may - * block if there are locks present... this - * should be addressed in openat(). - */ - err = zfs_freesp(zp, vap->va_size, 0, 0, tx, cr); - if (err) { - mutex_enter(&zp->z_lock); - goto out; - } - mask_applied |= AT_SIZE; - } - - mask_applied = mask; /* no errors after this point */ mutex_enter(&zp->z_lock); @@ -2185,25 +2134,19 @@ top: if (mask & AT_MTIME) ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); - if (mask_applied & AT_SIZE) + if (mask & AT_SIZE) zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); - else if (mask_applied != 0) + else if (mask != 0) zfs_time_stamper_locked(zp, STATE_CHANGED, tx); -out: - - if (mask_applied != 0) - seq = zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, - mask_applied); + if (mask != 0) + seq = zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask); mutex_exit(&zp->z_lock); if (attrzp) VN_RELE(ZTOV(attrzp)); - if (rl != NULL) - zfs_range_unlock(zp, rl); - dmu_tx_commit(tx); zil_commit(zilog, seq, 0); @@ -3437,12 +3380,8 @@ static int zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, cred_t *cr, caller_context_t *ct) { - dmu_tx_t *tx; znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; - zilog_t *zilog = zfsvfs->z_log; - rl_t *rl; - uint64_t seq = 0; uint64_t off, len; int error; @@ -3466,63 +3405,12 @@ top: off = bfp->l_start; len = bfp->l_len; /* 0 means from off to end of file */ - tx = dmu_tx_create(zfsvfs->z_os); - dmu_tx_hold_bonus(tx, zp->z_id); - /* - * If we will change zp_size (in zfs_freesp) then lock the whole file, - * otherwise just lock the range being freed. - */ - if (len == 0 || off + len > zp->z_phys->zp_size) { - rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); - } else { - rl = zfs_range_lock(zp, off, len, RL_WRITER); - /* recheck, in case zp_size changed */ - if (off + len > zp->z_phys->zp_size) { - /* lost race: file size changed, lock whole file */ - zfs_range_unlock(zp, rl); - rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); - } - } - if (off + len > zp->z_blksz && zp->z_blksz < zfsvfs->z_max_blksz && - off >= zp->z_phys->zp_size) { - /* - * We are increasing the length of the file, - * and this may mean a block size increase. - */ - dmu_tx_hold_write(tx, zp->z_id, 0, - MIN(off + len, zfsvfs->z_max_blksz)); - } else if (off < zp->z_phys->zp_size) { - /* - * If len == 0, we are truncating the file. - */ - dmu_tx_hold_free(tx, zp->z_id, off, len ? len : DMU_OBJECT_END); - } - - error = dmu_tx_assign(tx, zfsvfs->z_assign); - if (error) { - dmu_tx_abort(tx); - zfs_range_unlock(zp, rl); - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + do { + if (error == ERESTART) txg_wait_open(dmu_objset_pool(zfsvfs->z_os), 0); - goto top; - } - ZFS_EXIT(zfsvfs); - return (error); - } - - error = zfs_freesp(zp, off, len, flag, tx, cr); - - if (error == 0) { - zfs_time_stamper(zp, CONTENT_MODIFIED, tx); - seq = zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); - } - - zfs_range_unlock(zp, rl); - - dmu_tx_commit(tx); - - zil_commit(zilog, seq, 0); + error = zfs_freesp(zp, off, len, flag, TRUE); + } while (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT); ZFS_EXIT(zfsvfs); return (error); diff --git a/usr/src/uts/common/fs/zfs/zfs_znode.c b/usr/src/uts/common/fs/zfs/zfs_znode.c index 15b23b5854..5472481e02 100644 --- a/usr/src/uts/common/fs/zfs/zfs_znode.c +++ b/usr/src/uts/common/fs/zfs/zfs_znode.c @@ -902,30 +902,51 @@ zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, * Free space in a file. * * IN: zp - znode of file to free data in. - * from - start of section to free. + * off - start of section to free. * len - length of section to free (0 => to EOF). * flag - current file open mode flags. - * tx - open transaction. * * RETURN: 0 if success * error code if failure */ int -zfs_freesp(znode_t *zp, uint64_t from, uint64_t len, int flag, dmu_tx_t *tx, - cred_t *cr) +zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) { vnode_t *vp = ZTOV(zp); - uint64_t size = zp->z_phys->zp_size; - uint64_t end = from + len; + dmu_tx_t *tx; + zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zilog_t *zilog = zfsvfs->z_log; + rl_t *rl; + uint64_t seq = 0; + uint64_t end = off + len; + uint64_t size, new_blksz; int error; if (ZTOV(zp)->v_type == VFIFO) return (0); /* + * If we will change zp_size then lock the whole file, + * otherwise just lock the range being freed. + */ + if (len == 0 || off + len > zp->z_phys->zp_size) { + rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); + } else { + rl = zfs_range_lock(zp, off, len, RL_WRITER); + /* recheck, in case zp_size changed */ + if (off + len > zp->z_phys->zp_size) { + /* lost race: file size changed, lock whole file */ + zfs_range_unlock(zp, rl); + rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); + } + } + + /* * Nothing to do if file already at desired length. */ - if (len == 0 && size == from) { + size = zp->z_phys->zp_size; + if (len == 0 && size == off) { + zfs_range_unlock(zp, rl); return (0); } @@ -933,19 +954,26 @@ zfs_freesp(znode_t *zp, uint64_t from, uint64_t len, int flag, dmu_tx_t *tx, * Check for any locks in the region to be freed. */ if (MANDLOCK(vp, (mode_t)zp->z_phys->zp_mode)) { - uint64_t start; + uint64_t start = off; + uint64_t extent = len; - if (size > from) - start = from; - else + if (off > size) { start = size; - if (error = chklock(vp, FWRITE, start, 0, flag, NULL)) + extent += off - size; + } else if (len == 0) { + extent = size - off; + } + if (error = chklock(vp, FWRITE, start, extent, flag, NULL)) { + zfs_range_unlock(zp, rl); return (error); + } } - if (end > zp->z_blksz && (!ISP2(zp->z_blksz) || - zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) { - uint64_t new_blksz; + tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_hold_bonus(tx, zp->z_id); + new_blksz = 0; + if (end > size && + (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { /* * We are growing the file past the current block size. */ @@ -955,43 +983,75 @@ zfs_freesp(znode_t *zp, uint64_t from, uint64_t len, int flag, dmu_tx_t *tx, } else { new_blksz = MIN(end, zp->z_zfsvfs->z_max_blksz); } - zfs_grow_blocksize(zp, new_blksz, tx); + dmu_tx_hold_write(tx, zp->z_id, 0, MIN(end, new_blksz)); + } else if (off < size) { + /* + * If len == 0, we are truncating the file. + */ + dmu_tx_hold_free(tx, zp->z_id, off, len ? len : DMU_OBJECT_END); + } + + error = dmu_tx_assign(tx, zfsvfs->z_assign); + if (error) { + dmu_tx_abort(tx); + zfs_range_unlock(zp, rl); + return (error); } + + if (new_blksz) + zfs_grow_blocksize(zp, new_blksz, tx); + if (end > size || len == 0) zp->z_phys->zp_size = end; - if (from > size) - return (0); + + if (off < size) { + objset_t *os = zfsvfs->z_os; + + if (len == 0) + len = -1; + else if (end > size) + len = size - off; + VERIFY(0 == dmu_free_range(os, zp->z_id, off, len, tx)); + } + + if (log) { + zfs_time_stamper(zp, CONTENT_MODIFIED, tx); + seq = zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); + } + + zfs_range_unlock(zp, rl); + + dmu_tx_commit(tx); + + if (log) + zil_commit(zilog, seq, 0); /* - * Clear any mapped pages in the truncated region. + * Clear any mapped pages in the truncated region. This has to + * happen outside of the transaction to avoid the possibility of + * a deadlock with someone trying to push a page that we are + * about to invalidate. */ rw_enter(&zp->z_map_lock, RW_WRITER); - if (vn_has_cached_data(vp)) { + if (off < size && vn_has_cached_data(vp)) { page_t *pp; - uint64_t start = from & PAGEMASK; - int off = from & PAGEOFFSET; + uint64_t start = off & PAGEMASK; + int poff = off & PAGEOFFSET; - if (off != 0 && (pp = page_lookup(vp, start, SE_SHARED))) { + if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) { /* * We need to zero a partial page. */ - pagezero(pp, off, PAGESIZE - off); + pagezero(pp, poff, PAGESIZE - poff); start += PAGESIZE; page_unlock(pp); } error = pvn_vplist_dirty(vp, start, zfs_no_putpage, - B_INVAL | B_TRUNC, cr); + B_INVAL | B_TRUNC, NULL); ASSERT(error == 0); } rw_exit(&zp->z_map_lock); - if (len == 0) - len = -1; - else if (end > size) - len = size - from; - VERIFY(0 == dmu_free_range(zp->z_zfsvfs->z_os, - zp->z_id, from, len, tx)); - return (0); } |