summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/fs')
-rw-r--r--usr/src/uts/common/fs/nfs/nfs3_srv.c35
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv.c100
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv_attr.c6
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_subr.c19
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_server.c22
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_srv.c21
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_delete.c3
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_kutil.c112
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_odir.c11
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_pathname.c5
-rw-r--r--usr/src/uts/common/fs/vfs.c40
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c86
-rw-r--r--usr/src/uts/common/fs/zfs/dbuf.c19
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_tx.c2
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c6
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c75
-rw-r--r--usr/src/uts/common/fs/zfs/sys/arc.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/sa_impl.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa_boot.h6
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa_impl.h4
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h10
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_context.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h12
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zio.h4
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c38
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_disk.c21
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_file.c67
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_queue.c14
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vfsops.c34
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_znode.c34
-rw-r--r--usr/src/uts/common/fs/zfs/zio.c2
-rw-r--r--usr/src/uts/common/fs/zfs/zio_inject.c65
34 files changed, 606 insertions, 276 deletions
diff --git a/usr/src/uts/common/fs/nfs/nfs3_srv.c b/usr/src/uts/common/fs/nfs/nfs3_srv.c
index c72f823cd3..4acbe92ad9 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_srv.c
@@ -433,16 +433,25 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
goto out1;
}
+ exi_hold(exi);
+
/*
* If the public filehandle is used then allow
* a multi-component lookup
*/
if (PUBLIC_FH3(&args->what.dir)) {
+ struct exportinfo *new;
+
publicfh_flag = TRUE;
+
error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
- &exi, &sec);
- if (error && exi != NULL)
- exi_rele(exi); /* See comment below Re: publicfh_flag */
+ &new, &sec);
+
+ if (error == 0) {
+ exi_rele(exi);
+ exi = new;
+ }
+
/*
* Since WebNFS may bypass MOUNT, we need to ensure this
* request didn't come from an unlabeled admin_low client.
@@ -464,8 +473,6 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
if (tp == NULL || tp->tpc_tp.tp_doi !=
l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
SUN_CIPSO) {
- if (exi != NULL)
- exi_rele(exi);
VN_RELE(vp);
resp->status = NFS3ERR_ACCES;
error = 1;
@@ -491,8 +498,6 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
if (!blequal(&l_admin_low->tsl_label, clabel)) {
if (!do_rfs_label_check(clabel, dvp,
DOMINANCE_CHECK, exi)) {
- if (publicfh_flag && exi != NULL)
- exi_rele(exi);
VN_RELE(vp);
resp->status = NFS3ERR_ACCES;
error = 1;
@@ -519,18 +524,10 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
goto out;
}
- /*
- * If publicfh_flag is true then we have called rfs_publicfh_mclookup
- * and have obtained a new exportinfo in exi which needs to be
- * released. Note the the original exportinfo pointed to by exi
- * will be released by the caller, common_dispatch.
- */
- if (publicfh_flag)
- exi_rele(exi);
-
va.va_mask = AT_ALL;
vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
+ exi_rele(exi);
VN_RELE(vp);
resp->status = NFS3_OK;
@@ -552,6 +549,12 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
return;
out:
+ /*
+ * The passed argument exportinfo is released by the
+ * caller, common_dispatch
+ */
+ exi_rele(exi);
+
if (curthread->t_flag & T_WOULDBLOCK) {
curthread->t_flag &= ~T_WOULDBLOCK;
resp->status = NFS3ERR_JUKEBOX;
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c
index 29a9d67497..f2a9734541 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c
@@ -21,6 +21,9 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
@@ -1131,6 +1134,7 @@ rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
char *nm;
struct sockaddr *ca;
char *name = NULL;
+ nfsstat4 status = NFS4_OK;
DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
SECINFO4args *, args);
@@ -1154,11 +1158,12 @@ rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
* do not error out if the component name is a "..".
* SECINFO will return its parents secinfo data for SECINFO "..".
*/
- if (!utf8_dir_verify(utfnm)) {
+ status = utf8_dir_verify(utfnm);
+ if (status != NFS4_OK) {
if (utfnm->utf8string_len != 2 ||
utfnm->utf8string_val[0] != '.' ||
utfnm->utf8string_val[1] != '.') {
- *cs->statusp = resp->status = NFS4ERR_INVAL;
+ *cs->statusp = resp->status = status;
goto out;
}
}
@@ -1336,7 +1341,8 @@ rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
blequal(clabel, slabel)))
resp->access |=
(args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
- resp->supported |= (ACCESS4_MODIFY | ACCESS4_EXTEND);
+ resp->supported |=
+ resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
}
if (checkwriteperm &&
@@ -1570,8 +1576,9 @@ rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
*cs->statusp = resp->status = NFS4ERR_NOTDIR;
goto out;
}
- if (!utf8_dir_verify(&args->objname)) {
- *cs->statusp = resp->status = NFS4ERR_INVAL;
+ status = utf8_dir_verify(&args->objname);
+ if (status != NFS4_OK) {
+ *cs->statusp = resp->status = status;
goto out;
}
@@ -2446,6 +2453,7 @@ rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
uint_t len;
struct sockaddr *ca;
char *name = NULL;
+ nfsstat4 status;
DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
LINK4args *, args);
@@ -2495,8 +2503,9 @@ rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
goto out;
}
- if (!utf8_dir_verify(&args->newname)) {
- *cs->statusp = resp->status = NFS4ERR_INVAL;
+ status = utf8_dir_verify(&args->newname);
+ if (status != NFS4_OK) {
+ *cs->statusp = resp->status = status;
goto out;
}
@@ -2886,6 +2895,7 @@ rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
uint_t len;
struct sockaddr *ca;
char *name = NULL;
+ nfsstat4 status;
DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
LOOKUP4args *, args);
@@ -2905,8 +2915,9 @@ rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
goto out;
}
- if (!utf8_dir_verify(&args->objname)) {
- *cs->statusp = resp->status = NFS4ERR_INVAL;
+ status = utf8_dir_verify(&args->objname);
+ if (status != NFS4_OK) {
+ *cs->statusp = resp->status = status;
goto out;
}
@@ -3655,30 +3666,6 @@ out:
}
/*
- * A directory entry is a valid nfsv4 entry if
- * - it has a non-zero ino
- * - it is not a dot or dotdot name
- * - it is visible in a pseudo export or in a real export that can
- * only have a limited view.
- */
-static bool_t
-valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
- int *expseudo, int check_visible)
-{
- if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
- *expseudo = 0;
- return (FALSE);
- }
-
- if (! check_visible) {
- *expseudo = 0;
- return (TRUE);
- }
-
- return (nfs_visible_inode(exi, dp->d_ino, expseudo));
-}
-
-/*
* set_rdattr_params sets up the variables used to manage what information
* to get for each directory entry.
*/
@@ -4101,6 +4088,7 @@ rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
bslabel_t *clabel;
struct sockaddr *ca;
char *name = NULL;
+ nfsstat4 status;
DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
REMOVE4args *, args);
@@ -4131,8 +4119,9 @@ rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
goto out;
}
- if (!utf8_dir_verify(&args->target)) {
- *cs->statusp = resp->status = NFS4ERR_INVAL;
+ status = utf8_dir_verify(&args->target);
+ if (status != NFS4_OK) {
+ *cs->statusp = resp->status = status;
goto out;
}
@@ -4398,6 +4387,7 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
struct sockaddr *ca;
char *converted_onm = NULL;
char *converted_nnm = NULL;
+ nfsstat4 status;
DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
RENAME4args *, args);
@@ -4454,13 +4444,15 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
goto out;
}
- if (!utf8_dir_verify(&args->oldname)) {
- *cs->statusp = resp->status = NFS4ERR_INVAL;
+ status = utf8_dir_verify(&args->oldname);
+ if (status != NFS4_OK) {
+ *cs->statusp = resp->status = status;
goto out;
}
- if (!utf8_dir_verify(&args->newname)) {
- *cs->statusp = resp->status = NFS4ERR_INVAL;
+ status = utf8_dir_verify(&args->newname);
+ if (status != NFS4_OK) {
+ *cs->statusp = resp->status = status;
goto out;
}
@@ -5789,6 +5781,8 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
cs.statusp = &resp->status;
cs.req = req;
+ resp->array = NULL;
+ resp->array_len = 0;
/*
* XXX for now, minorversion should be zero
@@ -5796,14 +5790,17 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
if (args->minorversion != NFS4_MINORVERSION) {
DTRACE_NFSV4_2(compound__start, struct compound_state *,
&cs, COMPOUND4args *, args);
- resp->array_len = 0;
- resp->array = NULL;
resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
DTRACE_NFSV4_2(compound__done, struct compound_state *,
&cs, COMPOUND4res *, resp);
return;
}
+ if (args->array_len == 0) {
+ resp->status = NFS4_OK;
+ return;
+ }
+
ASSERT(exi == NULL);
ASSERT(cr == NULL);
@@ -6079,8 +6076,9 @@ rfs4_lookup(component4 *component, struct svc_req *req,
return (NFS4ERR_NOTDIR);
}
- if (!utf8_dir_verify(component))
- return (NFS4ERR_INVAL);
+ status = utf8_dir_verify(component);
+ if (status != NFS4_OK)
+ return (status);
nm = utf8_to_fn(component, &len, NULL);
if (nm == NULL) {
@@ -6372,8 +6370,9 @@ rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
* the including directory on success.
*/
component = &args->open_claim4_u.file;
- if (!utf8_dir_verify(component))
- return (NFS4ERR_INVAL);
+ status = utf8_dir_verify(component);
+ if (status != NFS4_OK)
+ return (status);
nm = utf8_to_fn(component, &buflen, NULL);
@@ -7594,6 +7593,12 @@ rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
goto out;
}
+ if (cs->vp->v_type != VREG) {
+ *cs->statusp = resp->status =
+ cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
+ return;
+ }
+
status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
if (status != NFS4_OK) {
*cs->statusp = resp->status = status;
@@ -7709,6 +7714,11 @@ rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
goto out;
}
+ if (cs->vp->v_type != VREG) {
+ *cs->statusp = resp->status = NFS4ERR_INVAL;
+ return;
+ }
+
status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
if (status != NFS4_OK) {
*cs->statusp = resp->status = status;
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c
index dbd3263608..855cd8cd92 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c
@@ -22,6 +22,9 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
#include <sys/systm.h>
#include <sys/cmn_err.h>
@@ -1585,7 +1588,8 @@ rfs4_fattr4_fs_locations(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg,
case NFS4ATTR_GETIT:
fsl = fetch_referral(sarg->cs->vp, sarg->cs->cr);
if (fsl == NULL)
- error = EINVAL;
+ (void) memset(&(na->fs_locations), 0,
+ sizeof (fs_locations4));
else {
na->fs_locations = *fsl;
kmem_free(fsl, sizeof (fs_locations4));
diff --git a/usr/src/uts/common/fs/nfs/nfs4_subr.c b/usr/src/uts/common/fs/nfs/nfs4_subr.c
index c14117c009..cfac742707 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_subr.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_subr.c
@@ -22,6 +22,9 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ */
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
@@ -713,33 +716,33 @@ utf8_compare(const utf8string *a, const utf8string *b)
/*
* utf8_dir_verify - checks that the utf8 string is valid
*/
-int
+nfsstat4
utf8_dir_verify(utf8string *str)
{
char *nm;
int len;
if (str == NULL)
- return (0);
+ return (NFS4ERR_INVAL);
nm = str->utf8string_val;
len = str->utf8string_len;
if (nm == NULL || len == 0) {
- return (0);
+ return (NFS4ERR_INVAL);
}
if (len == 1 && nm[0] == '.')
- return (0);
+ return (NFS4ERR_BADNAME);
if (len == 2 && nm[0] == '.' && nm[1] == '.')
- return (0);
+ return (NFS4ERR_BADNAME);
if (utf8_strchr(str, '/') != NULL)
- return (0);
+ return (NFS4ERR_BADNAME);
if (utf8_strchr(str, '\0') != NULL)
- return (0);
+ return (NFS4ERR_BADNAME);
- return (1);
+ return (NFS4_OK);
}
/*
diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c
index bb625bb175..22d1ad4d68 100644
--- a/usr/src/uts/common/fs/nfs/nfs_server.c
+++ b/usr/src/uts/common/fs/nfs/nfs_server.c
@@ -22,6 +22,7 @@
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Bayard G. Bell. All rights reserved.
* Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -2804,8 +2805,8 @@ rfs_publicfh_mclookup(char *p, vnode_t *dvp, cred_t *cr, vnode_t **vpp,
*/
/* Release the reference on the old exi value */
- ASSERT(*exi != NULL);
exi_rele(*exi);
+ *exi = NULL;
if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
VN_RELE(*vpp);
@@ -2818,6 +2819,9 @@ publicfh_done:
if (mc_dvp)
VN_RELE(mc_dvp);
+ if (error && *exi != NULL)
+ exi_rele(*exi);
+
return (error);
}
@@ -2963,16 +2967,19 @@ URLparse(char *str)
/*
* Get the export information for the lookup vnode, and verify its
* useable.
+ *
+ * Set @exip only in success
*/
int
nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
- struct exportinfo **exi)
+ struct exportinfo **exip)
{
int walk;
int error = 0;
+ struct exportinfo *exi;
- *exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
- if (*exi == NULL)
+ exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
+ if (exi == NULL)
error = EACCES;
else {
/*
@@ -2981,10 +2988,13 @@ nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
* must not terminate below the
* exported directory.
*/
- if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
+ if (exi->exi_export.ex_flags & EX_NOSUB && walk > 0) {
error = EACCES;
+ exi_rele(exi);
+ }
}
-
+ if (error == 0)
+ *exip = exi;
return (error);
}
diff --git a/usr/src/uts/common/fs/nfs/nfs_srv.c b/usr/src/uts/common/fs/nfs/nfs_srv.c
index 8ca8ee5d1d..f0cd9633aa 100644
--- a/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs_srv.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -399,6 +400,8 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
return;
}
+ exi_hold(exi);
+
/*
* If the public filehandle is used then allow
* a multi-component lookup, i.e. evaluate
@@ -409,9 +412,16 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
* which is OK as long as the filesystem is exported.
*/
if (PUBLIC_FH2(fhp)) {
+ struct exportinfo *new;
+
publicfh_flag = TRUE;
- error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
+ error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &new,
&sec);
+
+ if (error == 0) {
+ exi_rele(exi);
+ exi = new;
+ }
} else {
/*
* Do a normal single component lookup.
@@ -452,13 +462,10 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
VN_RELE(dvp);
/*
- * If publicfh_flag is true then we have called rfs_publicfh_mclookup
- * and have obtained a new exportinfo in exi which needs to be
- * released. Note the the original exportinfo pointed to by exi
- * will be released by the caller, comon_dispatch.
+ * The passed argument exportinfo is released by the
+ * caller, comon_dispatch
*/
- if (publicfh_flag && exi != NULL)
- exi_rele(exi);
+ exi_rele(exi);
/*
* If it's public fh, no 0x81, and client's flavor is
diff --git a/usr/src/uts/common/fs/smbsrv/smb_delete.c b/usr/src/uts/common/fs/smbsrv/smb_delete.c
index 43f6d733bd..8a27b7408e 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_delete.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_delete.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
#include <smbsrv/smb_kproto.h>
@@ -553,7 +554,7 @@ smb_delete_check_path(smb_request_t *sr)
/* fname component is, or resolves to, '.' (dot) */
if ((strcmp(pn->pn_fname, ".") == 0) ||
(SMB_SEARCH_DIRECTORY(fqi->fq_sattr) &&
- (smb_match(pn->pn_fname, ".")))) {
+ (smb_match(pn->pn_fname, ".", B_FALSE)))) {
smbsr_error(sr, NT_STATUS_OBJECT_NAME_INVALID,
ERRDOS, ERROR_INVALID_NAME);
return (-1);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_kutil.c b/usr/src/uts/common/fs/smbsrv/smb_kutil.c
index 5d45081e2e..aed58277be 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_kutil.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_kutil.c
@@ -18,8 +18,10 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -98,116 +100,34 @@ smb_ascii_or_unicode_null_len(struct smb_request *sr)
}
/*
- * Return B_TRUE if pattern contains wildcards
- */
-boolean_t
-smb_contains_wildcards(const char *pattern)
-{
- static const char *wildcards = "*?";
-
- return (strpbrk(pattern, wildcards) != NULL);
-}
-
-/*
- * When converting wildcards a '.' in a name is treated as a base and
- * extension separator even if the name is longer than 8.3.
- *
- * The '*' character matches an entire part of the name. For example,
- * "*.abc" matches any name with an extension of "abc".
*
- * The '?' character matches a single character.
- * If the base contains all ? (8 or more) then it is treated as *.
- * If the extension contains all ? (3 or more) then it is treated as *.
- *
- * Clients convert ASCII wildcards to Unicode wildcards as follows:
+ * Convert old-style (DOS, LanMan) wildcard strings to NT style.
+ * This should ONLY happen to patterns that come from old clients,
+ * meaning dialect LANMAN2_1 etc. (dialect < NT_LM_0_12).
*
* ? is converted to >
- * . is converted to " if it is followed by ? or *
* * is converted to < if it is followed by .
+ * . is converted to " if it is followed by ? or * or end of pattern
*
- * Note that clients convert "*." to '< and drop the '.' but "*.txt"
- * is sent as "<.TXT", i.e.
- *
- * dir *. -> dir <
- * dir *.txt -> dir <.TXT
- *
- * Since " and < are illegal in Windows file names, we always convert
- * these Unicode wildcards without checking the following character.
+ * Note: modifies pattern in place.
*/
void
smb_convert_wildcards(char *pattern)
{
- static char *match_all[] = {
- "*.",
- "*.*"
- };
- char *extension;
char *p;
- int len;
- int i;
- /*
- * Special case "<" for "dir *.", and fast-track for "*".
- */
- if ((*pattern == '<') || (*pattern == '*')) {
- if (*(pattern + 1) == '\0') {
- *pattern = '*';
- return;
- }
- }
-
- for (p = pattern; *p != '\0'; ++p) {
+ for (p = pattern; *p != '\0'; p++) {
switch (*p) {
- case '<':
- *p = '*';
- break;
- case '>':
- *p = '?';
+ case '?':
+ *p = '>';
break;
- case '\"':
- *p = '.';
+ case '*':
+ if (p[1] == '.')
+ *p = '<';
break;
- default:
- break;
- }
- }
-
- /*
- * Replace "????????.ext" with "*.ext".
- */
- p = pattern;
- p += strspn(p, "?");
- if (*p == '.') {
- *p = '\0';
- len = strlen(pattern);
- *p = '.';
- if (len >= SMB_NAME83_BASELEN) {
- *pattern = '*';
- (void) strlcpy(pattern + 1, p, MAXPATHLEN - 1);
- }
- }
-
- /*
- * Replace "base.???" with 'base.*'.
- */
- if ((extension = strrchr(pattern, '.')) != NULL) {
- p = ++extension;
- p += strspn(p, "?");
- if (*p == '\0') {
- len = strlen(extension);
- if (len >= SMB_NAME83_EXTLEN) {
- *extension = '\0';
- (void) strlcat(pattern, "*", MAXPATHLEN);
- }
- }
- }
-
- /*
- * Replace anything that matches an entry in match_all with "*".
- */
- for (i = 0; i < sizeof (match_all) / sizeof (match_all[0]); ++i) {
- if (strcmp(pattern, match_all[i]) == 0) {
- (void) strlcpy(pattern, "*", MAXPATHLEN);
+ case '.':
+ if (p[1] == '?' || p[1] == '*' || p[1] == '\0')
+ *p = '\"';
break;
}
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_odir.c b/usr/src/uts/common/fs/smbsrv/smb_odir.c
index ea9b505f0d..610126753b 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_odir.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_odir.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -286,7 +287,8 @@ smb_odir_open(smb_request_t *sr, char *path, uint16_t sattr, uint32_t flags)
tree = sr->tid_tree;
- smb_convert_wildcards(path);
+ if (sr->session->dialect < NT_LM_0_12)
+ smb_convert_wildcards(path);
rc = smb_pathname_reduce(sr, sr->user_cr, path,
tree->t_snode, tree->t_snode, &dnode, pattern);
@@ -1278,22 +1280,23 @@ smb_odir_lookup_link(smb_request_t *sr, smb_odir_t *od,
* - If shortnames are supported, generate the shortname from
* odirent->od_name and check if it matches od->d_pattern.
*/
-boolean_t
+static boolean_t
smb_odir_match_name(smb_odir_t *od, smb_odirent_t *odirent)
{
char *name = odirent->od_name;
char shortname[SMB_SHORTNAMELEN];
ino64_t ino = odirent->od_ino;
+ boolean_t ci = (od->d_flags & SMB_ODIR_FLAG_IGNORE_CASE) != 0;
if (smb_is_reserved_dos_name(name))
return (B_FALSE);
- if (smb_match_ci(od->d_pattern, name))
+ if (smb_match(od->d_pattern, name, ci))
return (B_TRUE);
if (od->d_flags & SMB_ODIR_FLAG_SHORTNAMES) {
smb_mangle(name, ino, shortname, SMB_SHORTNAMELEN);
- if (smb_match_ci(od->d_pattern, shortname))
+ if (smb_match(od->d_pattern, shortname, ci))
return (B_TRUE);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_pathname.c b/usr/src/uts/common/fs/smbsrv/smb_pathname.c
index e3ae3ffba2..db9883667e 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_pathname.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_pathname.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
#include <smbsrv/smb_kproto.h>
@@ -732,8 +733,8 @@ smb_pathname_preprocess(smb_request_t *sr, smb_pathname_t *pn)
return;
}
- /* perform unicode wildcard conversion */
- smb_convert_wildcards(pn->pn_path);
+ if (sr->session->dialect < NT_LM_0_12)
+ smb_convert_wildcards(pn->pn_path);
/* treat '/' as '\\' */
(void) strsubst(pn->pn_path, '/', '\\');
diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c
index abe3a23e75..8d5c741428 100644
--- a/usr/src/uts/common/fs/vfs.c
+++ b/usr/src/uts/common/fs/vfs.c
@@ -936,29 +936,33 @@ vfs_mountroot(void)
}
#endif /* __sparc */
- /*
- * Look up the root device via devfs so that a dv_node is
- * created for it. The vnode is never VN_RELE()ed.
- * We allocate more than MAXPATHLEN so that the
- * buffer passed to i_ddi_prompath_to_devfspath() is
- * exactly MAXPATHLEN (the function expects a buffer
- * of that length).
- */
- plen = strlen("/devices");
- path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP);
- (void) strcpy(path, "/devices");
+ if (strcmp(rootfs.bo_fstype, "zfs") != 0) {
+ /*
+ * Look up the root device via devfs so that a dv_node is
+ * created for it. The vnode is never VN_RELE()ed.
+ * We allocate more than MAXPATHLEN so that the
+ * buffer passed to i_ddi_prompath_to_devfspath() is
+ * exactly MAXPATHLEN (the function expects a buffer
+ * of that length).
+ */
+ plen = strlen("/devices");
+ path = kmem_alloc(plen + MAXPATHLEN, KM_SLEEP);
+ (void) strcpy(path, "/devices");
- if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen)
- != DDI_SUCCESS ||
- lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) {
+ if (i_ddi_prompath_to_devfspath(rootfs.bo_name, path + plen)
+ != DDI_SUCCESS ||
+ lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &rvp)) {
- /* NUL terminate in case "path" has garbage */
- path[plen + MAXPATHLEN - 1] = '\0';
+ /* NUL terminate in case "path" has garbage */
+ path[plen + MAXPATHLEN - 1] = '\0';
#ifdef DEBUG
- cmn_err(CE_WARN, "!Cannot lookup root device: %s", path);
+ cmn_err(CE_WARN, "!Cannot lookup root device: %s",
+ path);
#endif
+ }
+ kmem_free(path, plen + MAXPATHLEN);
}
- kmem_free(path, plen + MAXPATHLEN);
+
vfs_mnttabvp_setup();
}
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index 5caabf8260..d8e9f26bdb 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -190,6 +190,7 @@ uint64_t zfs_arc_meta_limit = 0;
int zfs_arc_grow_retry = 0;
int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
+int zfs_disable_dup_eviction = 0;
/*
* Note that buffers can be in one of 6 states:
@@ -292,6 +293,9 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_size;
kstat_named_t arcstat_l2_hdr_size;
kstat_named_t arcstat_memory_throttle_count;
+ kstat_named_t arcstat_duplicate_buffers;
+ kstat_named_t arcstat_duplicate_buffers_size;
+ kstat_named_t arcstat_duplicate_reads;
} arc_stats_t;
static arc_stats_t arc_stats = {
@@ -347,7 +351,10 @@ static arc_stats_t arc_stats = {
{ "l2_io_error", KSTAT_DATA_UINT64 },
{ "l2_size", KSTAT_DATA_UINT64 },
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
- { "memory_throttle_count", KSTAT_DATA_UINT64 }
+ { "memory_throttle_count", KSTAT_DATA_UINT64 },
+ { "duplicate_buffers", KSTAT_DATA_UINT64 },
+ { "duplicate_buffers_size", KSTAT_DATA_UINT64 },
+ { "duplicate_reads", KSTAT_DATA_UINT64 }
};
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
@@ -1362,6 +1369,17 @@ arc_buf_clone(arc_buf_t *from)
hdr->b_buf = buf;
arc_get_data_buf(buf);
bcopy(from->b_data, buf->b_data, size);
+
+ /*
+ * This buffer already exists in the arc so create a duplicate
+ * copy for the caller. If the buffer is associated with user data
+ * then track the size and number of duplicates. These stats will be
+ * updated as duplicate buffers are created and destroyed.
+ */
+ if (hdr->b_type == ARC_BUFC_DATA) {
+ ARCSTAT_BUMP(arcstat_duplicate_buffers);
+ ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
+ }
hdr->b_datacnt += 1;
return (buf);
}
@@ -1460,6 +1478,16 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
ASSERT3U(state->arcs_size, >=, size);
atomic_add_64(&state->arcs_size, -size);
buf->b_data = NULL;
+
+ /*
+ * If we're destroying a duplicate buffer make sure
+ * that the appropriate statistics are updated.
+ */
+ if (buf->b_hdr->b_datacnt > 1 &&
+ buf->b_hdr->b_type == ARC_BUFC_DATA) {
+ ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
+ ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
+ }
ASSERT(buf->b_hdr->b_datacnt > 0);
buf->b_hdr->b_datacnt -= 1;
}
@@ -1644,6 +1672,48 @@ arc_buf_size(arc_buf_t *buf)
}
/*
+ * Called from the DMU to determine if the current buffer should be
+ * evicted. In order to ensure proper locking, the eviction must be initiated
+ * from the DMU. Return true if the buffer is associated with user data and
+ * duplicate buffers still exist.
+ */
+boolean_t
+arc_buf_eviction_needed(arc_buf_t *buf)
+{
+ arc_buf_hdr_t *hdr;
+ boolean_t evict_needed = B_FALSE;
+
+ if (zfs_disable_dup_eviction)
+ return (B_FALSE);
+
+ mutex_enter(&buf->b_evict_lock);
+ hdr = buf->b_hdr;
+ if (hdr == NULL) {
+ /*
+ * We are in arc_do_user_evicts(); let that function
+ * perform the eviction.
+ */
+ ASSERT(buf->b_data == NULL);
+ mutex_exit(&buf->b_evict_lock);
+ return (B_FALSE);
+ } else if (buf->b_data == NULL) {
+ /*
+ * We have already been added to the arc eviction list;
+ * recommend eviction.
+ */
+ ASSERT3P(hdr, ==, &arc_eviction_hdr);
+ mutex_exit(&buf->b_evict_lock);
+ return (B_TRUE);
+ }
+
+ if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
+ evict_needed = B_TRUE;
+
+ mutex_exit(&buf->b_evict_lock);
+ return (evict_needed);
+}
+
+/*
* Evict buffers from list until we've removed the specified number of
* bytes. Move the removed buffers to the appropriate evict state.
* If the recycle flag is set, then attempt to "recycle" a buffer:
@@ -2638,8 +2708,10 @@ arc_read_done(zio_t *zio)
abuf = buf;
for (acb = callback_list; acb; acb = acb->acb_next) {
if (acb->acb_done) {
- if (abuf == NULL)
+ if (abuf == NULL) {
+ ARCSTAT_BUMP(arcstat_duplicate_reads);
abuf = arc_buf_clone(buf);
+ }
acb->acb_buf = abuf;
abuf = NULL;
}
@@ -3186,6 +3258,16 @@ arc_release(arc_buf_t *buf, void *tag)
ASSERT3U(*size, >=, hdr->b_size);
atomic_add_64(size, -hdr->b_size);
}
+
+ /*
+ * We're releasing a duplicate user data buffer, update
+ * our statistics accordingly.
+ */
+ if (hdr->b_type == ARC_BUFC_DATA) {
+ ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
+ ARCSTAT_INCR(arcstat_duplicate_buffers_size,
+ -hdr->b_size);
+ }
hdr->b_datacnt -= 1;
arc_cksum_verify(buf);
arc_buf_unwatch(buf);
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index 437e0ac85c..e8bf55c321 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -2089,7 +2089,24 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
dbuf_evict(db);
} else {
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
- if (!DBUF_IS_CACHEABLE(db))
+
+ /*
+ * A dbuf will be eligible for eviction if either the
+ * 'primarycache' property is set or a duplicate
+ * copy of this buffer is already cached in the arc.
+ *
+ * In the case of the 'primarycache' a buffer
+ * is considered for eviction if it matches the
+ * criteria set in the property.
+ *
+ * To decide if our buffer is considered a
+ * duplicate, we must call into the arc to determine
+ * if multiple buffers are referencing the same
+ * block on-disk. If so, then we simply evict
+ * ourselves.
+ */
+ if (!DBUF_IS_CACHEABLE(db) ||
+ arc_buf_eviction_needed(db->db_buf))
dbuf_clear(db);
else
mutex_exit(&db->db_mtx);
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index 190b26e5bf..a9308b0c08 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -574,7 +574,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
(dn->dn_indblkshift - SPA_BLKPTRSHIFT);
while (level++ < maxlevel) {
- txh->txh_memory_tohold += MIN(blkcnt, (nl1blks >> epbs))
+ txh->txh_memory_tohold += MAX(MIN(blkcnt, nl1blks), 1)
<< dn->dn_indblkshift;
blkcnt = 1 + (blkcnt >> epbs);
}
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index d9cd70f1c8..968fbd80d6 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -5983,6 +5983,10 @@ spa_sync(spa_t *spa, uint64_t txg)
tx = dmu_tx_create_assigned(dp, txg);
+ spa->spa_sync_starttime = gethrtime();
+ VERIFY(cyclic_reprogram(spa->spa_deadman_cycid,
+ spa->spa_sync_starttime + spa->spa_deadman_synctime));
+
/*
* If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
* set spa_deflate if we have no raid-z vdevs.
@@ -6111,6 +6115,8 @@ spa_sync(spa_t *spa, uint64_t txg)
}
dmu_tx_commit(tx);
+ VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY));
+
/*
* Clear the dirty config list.
*/
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 30681b6464..a254c8d656 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -26,6 +26,7 @@
#include <sys/zfs_context.h>
#include <sys/spa_impl.h>
+#include <sys/spa_boot.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
@@ -249,6 +250,26 @@ int zfs_flags = 0;
*/
int zfs_recover = 0;
+extern int zfs_txg_synctime_ms;
+
+/*
+ * Expiration time in units of zfs_txg_synctime_ms. This value has two
+ * meanings. First it is used to determine when the spa_deadman logic
+ * should fire. By default the spa_deadman will fire if spa_sync has
+ * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds).
+ * Secondly, the value determines if an I/O is considered "hung".
+ * Any I/O that has not completed in zfs_deadman_synctime is considered
+ * "hung" resulting in a system panic.
+ * 1000 zfs_txg_synctime_ms (i.e. 1000 seconds).
+ */
+uint64_t zfs_deadman_synctime = 1000ULL;
+
+/*
+ * Override the zfs deadman behavior via /etc/system. By default the
+ * deadman is enabled except on VMware and sparc deployments.
+ */
+int zfs_deadman_enabled = -1;
+
/*
* ==========================================================================
@@ -418,6 +439,23 @@ spa_lookup(const char *name)
}
/*
+ * Fires when spa_sync has not completed within zfs_deadman_synctime_ms.
+ * If the zfs_deadman_enabled flag is set then it inspects all vdev queues
+ * looking for potentially hung I/Os.
+ */
+void
+spa_deadman(void *arg)
+{
+ spa_t *spa = arg;
+
+ zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
+ (gethrtime() - spa->spa_sync_starttime) / NANOSEC,
+ ++spa->spa_deadman_calls);
+ if (zfs_deadman_enabled)
+ vdev_deadman(spa->spa_root_vdev);
+}
+
+/*
* Create an uninitialized spa_t with the given name. Requires
* spa_namespace_lock. The caller must ensure that the spa_t doesn't already
* exist by calling spa_lookup() first.
@@ -427,6 +465,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
{
spa_t *spa;
spa_config_dirent_t *dp;
+ cyc_handler_t hdlr;
+ cyc_time_t when;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
@@ -458,6 +498,25 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_proc = &p0;
spa->spa_proc_state = SPA_PROC_NONE;
+ hdlr.cyh_func = spa_deadman;
+ hdlr.cyh_arg = spa;
+ hdlr.cyh_level = CY_LOW_LEVEL;
+
+ spa->spa_deadman_synctime = zfs_deadman_synctime *
+ zfs_txg_synctime_ms * MICROSEC;
+
+ /*
+ * This determines how often we need to check for hung I/Os after
+ * the cyclic has already fired. Since checking for hung I/Os is
+ * an expensive operation we don't want to check too frequently.
+ * Instead wait for 5 synctimes before checking again.
+ */
+ when.cyt_interval = 5ULL * zfs_txg_synctime_ms * MICROSEC;
+ when.cyt_when = CY_INFINITY;
+ mutex_enter(&cpu_lock);
+ spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
+ mutex_exit(&cpu_lock);
+
refcount_create(&spa->spa_refcount);
spa_config_lock_init(spa);
@@ -540,6 +599,12 @@ spa_remove(spa_t *spa)
nvlist_free(spa->spa_load_info);
spa_config_set(spa, NULL);
+ mutex_enter(&cpu_lock);
+ if (spa->spa_deadman_cycid != CYCLIC_NONE)
+ cyclic_remove(spa->spa_deadman_cycid);
+ mutex_exit(&cpu_lock);
+ spa->spa_deadman_cycid = CYCLIC_NONE;
+
refcount_destroy(&spa->spa_refcount);
spa_config_lock_destroy(spa);
@@ -1507,6 +1572,12 @@ spa_prev_software_version(spa_t *spa)
}
uint64_t
+spa_deadman_synctime(spa_t *spa)
+{
+ return (spa->spa_deadman_synctime);
+}
+
+uint64_t
dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
{
uint64_t asize = DVA_GET_ASIZE(dva);
@@ -1600,7 +1671,9 @@ spa_init(int mode)
spa_mode_global = mode;
-#ifndef _KERNEL
+#ifdef _KERNEL
+ spa_arch_init();
+#else
if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
arc_procfd = open("/proc/self/ctl", O_WRONLY);
if (arc_procfd == -1) {
diff --git a/usr/src/uts/common/fs/zfs/sys/arc.h b/usr/src/uts/common/fs/zfs/sys/arc.h
index 28dbc57275..b109dcafbc 100644
--- a/usr/src/uts/common/fs/zfs/sys/arc.h
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h
@@ -99,6 +99,7 @@ int arc_released(arc_buf_t *buf);
int arc_has_callback(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);
void arc_buf_thaw(arc_buf_t *buf);
+boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
#ifdef ZFS_DEBUG
int arc_referenced(arc_buf_t *buf);
#endif
diff --git a/usr/src/uts/common/fs/zfs/sys/sa_impl.h b/usr/src/uts/common/fs/zfs/sys/sa_impl.h
index 6661e47cfc..8ae05ce364 100644
--- a/usr/src/uts/common/fs/zfs/sys/sa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/sa_impl.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_SA_IMPL_H
@@ -181,7 +182,7 @@ typedef struct sa_hdr_phys {
*/
#define SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
-#define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
+#define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 6, 3, 0)
#define SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
{ \
BF32_SET_SB(x, 10, 6, 3, 0, size); \
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 1043f4038a..172a9f141e 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -604,6 +604,7 @@ extern boolean_t spa_suspended(spa_t *spa);
extern uint64_t spa_bootfs(spa_t *spa);
extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
+extern uint64_t spa_deadman_synctime(spa_t *spa);
/* Miscellaneous support routines */
extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_boot.h b/usr/src/uts/common/fs/zfs/sys/spa_boot.h
index 1d3622f5a1..8df5072a55 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_boot.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_boot.h
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
#ifndef _SYS_SPA_BOOT_H
#define _SYS_SPA_BOOT_H
@@ -35,6 +39,8 @@ extern "C" {
extern char *spa_get_bootprop(char *prop);
extern void spa_free_bootprop(char *prop);
+extern void spa_arch_init(void);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index 027832e858..42ce5556d3 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -227,6 +227,10 @@ struct spa {
uint64_t spa_feat_for_write_obj; /* required to write to pool */
uint64_t spa_feat_for_read_obj; /* required to read from pool */
uint64_t spa_feat_desc_obj; /* Feature descriptions */
+ cyclic_id_t spa_deadman_cycid; /* cyclic id */
+ uint64_t spa_deadman_calls; /* number of deadman calls */
+ uint64_t spa_sync_starttime; /* starting time fo spa_sync */
+ uint64_t spa_deadman_synctime; /* deadman expiration timer */
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index 7e34889b61..5a7836612b 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -79,6 +79,7 @@ extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_metaslab_set_size(vdev_t *);
extern void vdev_expand(vdev_t *vd, uint64_t txg);
extern void vdev_split(vdev_t *vd);
+extern void vdev_deadman(vdev_t *vd);
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index c772d954bb..e4c02bde1d 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -105,6 +105,8 @@ struct vdev_queue {
avl_tree_t vq_write_tree;
avl_tree_t vq_pending_tree;
zoneid_t vq_last_zone_id;
+ uint64_t vq_io_complete_ts;
+ uint64_t vq_io_delta_ts;
kmutex_t vq_lock;
};
@@ -321,6 +323,14 @@ extern void vdev_set_min_asize(vdev_t *vd);
*/
extern int zfs_vdev_cache_size;
+/*
+ * The vdev_buf_t is used to translate between zio_t and buf_t, and back again.
+ */
+typedef struct vdev_buf {
+ buf_t vb_buf; /* buffer that describes the io */
+ zio_t *vb_io; /* pointer back to the original zio_t */
+} vdev_buf_t;
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_context.h b/usr/src/uts/common/fs/zfs/sys/zfs_context.h
index fdd0412fee..0dc8d8859c 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_context.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_context.h
@@ -22,8 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_CONTEXT_H
@@ -67,6 +69,7 @@ extern "C" {
#include <sys/sysevent/dev.h>
#include <sys/fm/util.h>
#include <sys/sunddi.h>
+#include <sys/cyclic.h>
#define CPU_SEQID (CPU->cpu_seqid)
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 4d781ad2a4..86e901be0d 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -240,12 +240,24 @@ typedef struct zinject_record {
uint32_t zi_iotype;
int32_t zi_duration;
uint64_t zi_timer;
+ uint32_t zi_cmd;
+ uint32_t zi_pad;
} zinject_record_t;
#define ZINJECT_NULL 0x1
#define ZINJECT_FLUSH_ARC 0x2
#define ZINJECT_UNLOAD_SPA 0x4
+typedef enum zinject_type {
+ ZINJECT_UNINITIALIZED,
+ ZINJECT_DATA_FAULT,
+ ZINJECT_DEVICE_FAULT,
+ ZINJECT_LABEL_FAULT,
+ ZINJECT_IGNORED_WRITES,
+ ZINJECT_PANIC,
+ ZINJECT_DELAY_IO,
+} zinject_type_t;
+
typedef struct zfs_share {
uint64_t z_exportdata;
uint64_t z_sharedata;
diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h
index ce3a983d9f..9c718f691a 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h
@@ -21,8 +21,6 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright 2011 Joyent, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
@@ -406,6 +404,7 @@ struct zio {
uint64_t io_offset;
uint64_t io_deadline;
+ uint64_t io_timestamp;
avl_node_t io_offset_node;
avl_node_t io_deadline_node;
avl_tree_t *io_vdev_tree;
@@ -554,6 +553,7 @@ extern int zio_handle_fault_injection(zio_t *zio, int error);
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
extern int zio_handle_label_injection(zio_t *zio, int error);
extern void zio_handle_ignored_writes(zio_t *zio);
+extern uint64_t zio_handle_io_delay(zio_t *zio);
/*
* Checksum ereport functions
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index fa0a579e66..18180ecad3 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -3153,3 +3153,41 @@ vdev_split(vdev_t *vd)
}
vdev_propagate_state(cvd);
}
+
+void
+vdev_deadman(vdev_t *vd)
+{
+ for (int c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+
+ vdev_deadman(cvd);
+ }
+
+ if (vd->vdev_ops->vdev_op_leaf) {
+ vdev_queue_t *vq = &vd->vdev_queue;
+
+ mutex_enter(&vq->vq_lock);
+ if (avl_numnodes(&vq->vq_pending_tree) > 0) {
+ spa_t *spa = vd->vdev_spa;
+ zio_t *fio;
+ uint64_t delta;
+
+ /*
+ * Look at the head of all the pending queues,
+ * if any I/O has been outstanding for longer than
+ * the spa_deadman_synctime we panic the system.
+ */
+ fio = avl_first(&vq->vq_pending_tree);
+ delta = ddi_get_lbolt64() - fio->io_timestamp;
+ if (delta > NSEC_TO_TICK(spa_deadman_synctime(spa))) {
+ zfs_dbgmsg("SLOW IO: zio timestamp %llu, "
+ "delta %llu, last io %llu",
+ fio->io_timestamp, delta,
+ vq->vq_io_complete_ts);
+ fm_panic("I/O to pool '%s' appears to be "
+ "hung.", spa_name(spa));
+ }
+ }
+ mutex_exit(&vq->vq_lock);
+ }
+}
diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c
index 1ba343226f..dfadeca9d4 100644
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c
@@ -42,11 +42,6 @@
extern ldi_ident_t zfs_li;
-typedef struct vdev_disk_buf {
- buf_t vdb_buf;
- zio_t *vdb_io;
-} vdev_disk_buf_t;
-
static void
vdev_disk_hold(vdev_t *vd)
{
@@ -170,7 +165,7 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
/*
* When opening a disk device, we want to preserve the user's original
* intent. We always want to open the device by the path the user gave
- * us, even if it is one of multiple paths to the save device. But we
+ * us, even if it is one of multiple paths to the same device. But we
* also want to be able to survive disks being removed/recabled.
* Therefore the sequence of opening devices is:
*
@@ -416,8 +411,8 @@ vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
static void
vdev_disk_io_intr(buf_t *bp)
{
- vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
- zio_t *zio = vdb->vdb_io;
+ vdev_buf_t *vb = (vdev_buf_t *)bp;
+ zio_t *zio = vb->vb_io;
/*
* The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
@@ -429,7 +424,7 @@ vdev_disk_io_intr(buf_t *bp)
if (zio->io_error == 0 && bp->b_resid != 0)
zio->io_error = EIO;
- kmem_free(vdb, sizeof (vdev_disk_buf_t));
+ kmem_free(vb, sizeof (vdev_buf_t));
zio_interrupt(zio);
}
@@ -460,7 +455,7 @@ vdev_disk_io_start(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
vdev_disk_t *dvd = vd->vdev_tsd;
- vdev_disk_buf_t *vdb;
+ vdev_buf_t *vb;
struct dk_callback *dkc;
buf_t *bp;
int error;
@@ -524,10 +519,10 @@ vdev_disk_io_start(zio_t *zio)
return (ZIO_PIPELINE_CONTINUE);
}
- vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
+ vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
- vdb->vdb_io = zio;
- bp = &vdb->vdb_buf;
+ vb->vb_io = zio;
+ bp = &vb->vb_buf;
bioinit(bp);
bp->b_flags = B_BUSY | B_NOCACHE |
diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c
index 043fa51294..1fbce5e542 100644
--- a/usr/src/uts/common/fs/zfs/vdev_file.c
+++ b/usr/src/uts/common/fs/zfs/vdev_file.c
@@ -25,6 +25,7 @@
#include <sys/zfs_context.h>
#include <sys/spa.h>
+#include <sys/spa_impl.h>
#include <sys/vdev_file.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
@@ -140,12 +141,55 @@ vdev_file_close(vdev_t *vd)
vd->vdev_tsd = NULL;
}
+/*
+ * Implements the interrupt side for file vdev types. This routine will be
+ * called when the I/O completes allowing us to transfer the I/O to the
+ * interrupt taskqs. For consistency, the code structure mimics disk vdev
+ * types.
+ */
+static void
+vdev_file_io_intr(buf_t *bp)
+{
+ vdev_buf_t *vb = (vdev_buf_t *)bp;
+ zio_t *zio = vb->vb_io;
+
+ zio->io_error = (geterror(bp) != 0 ? EIO : 0);
+ if (zio->io_error == 0 && bp->b_resid != 0)
+ zio->io_error = ENOSPC;
+
+ kmem_free(vb, sizeof (vdev_buf_t));
+ zio_interrupt(zio);
+}
+
+static void
+vdev_file_io_strategy(void *arg)
+{
+ buf_t *bp = arg;
+ vnode_t *vp = bp->b_private;
+ ssize_t resid;
+ int error;
+
+ error = vn_rdwr((bp->b_flags & B_READ) ? UIO_READ : UIO_WRITE,
+ vp, bp->b_un.b_addr, bp->b_bcount, ldbtob(bp->b_lblkno),
+ UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
+
+ if (error == 0) {
+ bp->b_resid = resid;
+ biodone(bp);
+ } else {
+ bioerror(bp, error);
+ biodone(bp);
+ }
+}
+
static int
vdev_file_io_start(zio_t *zio)
{
+ spa_t *spa = zio->io_spa;
vdev_t *vd = zio->io_vd;
vdev_file_t *vf = vd->vdev_tsd;
- ssize_t resid;
+ vdev_buf_t *vb;
+ buf_t *bp;
if (zio->io_type == ZIO_TYPE_IOCTL) {
/* XXPOLICY */
@@ -166,15 +210,22 @@ vdev_file_io_start(zio_t *zio)
return (ZIO_PIPELINE_CONTINUE);
}
- zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
- UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
- zio->io_size, zio->io_offset, UIO_SYSSPACE,
- 0, RLIM64_INFINITY, kcred, &resid);
+ vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
- if (resid != 0 && zio->io_error == 0)
- zio->io_error = ENOSPC;
+ vb->vb_io = zio;
+ bp = &vb->vb_buf;
- zio_interrupt(zio);
+ bioinit(bp);
+ bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
+ bp->b_bcount = zio->io_size;
+ bp->b_un.b_addr = zio->io_data;
+ bp->b_lblkno = lbtodb(zio->io_offset);
+ bp->b_bufsize = zio->io_size;
+ bp->b_private = vf->vf_vnode;
+ bp->b_iodone = (int (*)())vdev_file_io_intr;
+
+ taskq_dispatch_ent(spa->spa_zio_taskq[ZIO_TYPE_FREE][ZIO_TASKQ_ISSUE],
+ vdev_file_io_strategy, bp, 0, &zio->io_tqent);
return (ZIO_PIPELINE_STOP);
}
diff --git a/usr/src/uts/common/fs/zfs/vdev_queue.c b/usr/src/uts/common/fs/zfs/vdev_queue.c
index 4ea958a9f6..8dec283fee 100644
--- a/usr/src/uts/common/fs/zfs/vdev_queue.c
+++ b/usr/src/uts/common/fs/zfs/vdev_queue.c
@@ -24,6 +24,10 @@
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
#include <sys/zfs_context.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
@@ -298,6 +302,7 @@ again:
zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_AGG,
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
vdev_queue_agg_io_done, NULL);
+ aio->io_timestamp = fio->io_timestamp;
nio = fio;
do {
@@ -369,7 +374,8 @@ vdev_queue_io(zio_t *zio)
mutex_enter(&vq->vq_lock);
- zio->io_deadline = (ddi_get_lbolt64() >> zfs_vdev_time_shift) +
+ zio->io_timestamp = ddi_get_lbolt64();
+ zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
zio->io_priority;
vdev_queue_io_add(vq, zio);
@@ -394,10 +400,16 @@ vdev_queue_io_done(zio_t *zio)
{
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
+ if (zio_injection_enabled)
+ delay(SEC_TO_TICK(zio_handle_io_delay(zio)));
+
mutex_enter(&vq->vq_lock);
avl_remove(&vq->vq_pending_tree, zio);
+ vq->vq_io_complete_ts = ddi_get_lbolt64();
+ vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp;
+
for (int i = 0; i < zfs_vdev_ramp_rate; i++) {
zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending);
if (nio == NULL)
diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
index 2292f658b3..c7bfbbaec4 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
@@ -50,6 +50,7 @@
#include <sys/spa.h>
#include <sys/zap.h>
#include <sys/sa.h>
+#include <sys/sa_impl.h>
#include <sys/varargs.h>
#include <sys/policy.h>
#include <sys/atomic.h>
@@ -64,7 +65,6 @@
#include <sys/dnlc.h>
#include <sys/dmu_objset.h>
#include <sys/spa_boot.h>
-#include <sys/sa.h>
#include "zfs_comutil.h"
int zfsfstype;
@@ -578,7 +578,6 @@ static int
zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
uint64_t *userp, uint64_t *groupp)
{
- znode_phys_t *znp = data;
int error = 0;
/*
@@ -597,20 +596,18 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
return (EEXIST);
if (bonustype == DMU_OT_ZNODE) {
+ znode_phys_t *znp = data;
*userp = znp->zp_uid;
*groupp = znp->zp_gid;
} else {
int hdrsize;
+ sa_hdr_phys_t *sap = data;
+ sa_hdr_phys_t sa = *sap;
+ boolean_t swap = B_FALSE;
ASSERT(bonustype == DMU_OT_SA);
- hdrsize = sa_hdrsize(data);
- if (hdrsize != 0) {
- *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
- SA_UID_OFFSET));
- *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
- SA_GID_OFFSET));
- } else {
+ if (sa.sa_magic == 0) {
/*
* This should only happen for newly created
* files that haven't had the znode data filled
@@ -618,6 +615,25 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
*/
*userp = 0;
*groupp = 0;
+ return (0);
+ }
+ if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
+ sa.sa_magic = SA_MAGIC;
+ sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
+ swap = B_TRUE;
+ } else {
+ VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
+ }
+
+ hdrsize = sa_hdrsize(&sa);
+ VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
+ *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
+ SA_UID_OFFSET));
+ *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
+ SA_GID_OFFSET));
+ if (swap) {
+ *userp = BSWAP_64(*userp);
+ *groupp = BSWAP_64(*groupp);
}
}
return (error);
diff --git a/usr/src/uts/common/fs/zfs/zfs_znode.c b/usr/src/uts/common/fs/zfs/zfs_znode.c
index 0c86cac427..92dc05f4a0 100644
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c
@@ -1947,13 +1947,16 @@ zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
* or not the object is an extended attribute directory.
*/
static int
-zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp,
- int *is_xattrdir)
+zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
+ uint64_t *pobjp, int *is_xattrdir)
{
uint64_t parent;
uint64_t pflags;
uint64_t mode;
+ uint64_t parent_mode;
sa_bulk_attr_t bulk[3];
+ sa_handle_t *sa_hdl;
+ dmu_buf_t *sa_db;
int count = 0;
int error;
@@ -1967,9 +1970,32 @@ zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp,
if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
return (error);
- *pobjp = parent;
+ /*
+ * When a link is removed its parent pointer is not changed and will
+ * be invalid. There are two cases where a link is removed but the
+ * file stays around, when it goes to the delete queue and when there
+ * are additional links.
+ */
+ error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
+ if (error != 0)
+ return (error);
+
+ error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
+ zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
+ if (error != 0)
+ return (error);
+
*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
+ /*
+ * Extended attributes can be applied to files, directories, etc.
+ * Otherwise the parent must be a directory.
+ */
+ if (!*is_xattrdir && !S_ISDIR(parent_mode))
+ return (EINVAL);
+
+ *pobjp = parent;
+
return (0);
}
@@ -2018,7 +2044,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
if (prevdb)
zfs_release_sa_handle(prevhdl, prevdb, FTAG);
- if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj,
+ if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
&is_xattrdir)) != 0)
break;
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index e2e98b7896..00964aa83f 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -2928,7 +2928,7 @@ zio_done(zio_t *zio)
* Hand it off to the otherwise-unused claim taskq.
*/
ASSERT(zio->io_tqent.tqent_next == NULL);
- (void) taskq_dispatch_ent(
+ taskq_dispatch_ent(
spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
(task_func_t *)zio_reexecute, zio, 0,
&zio->io_tqent);
diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c
index 9ae7d1f697..a9d4ab4070 100644
--- a/usr/src/uts/common/fs/zfs/zio_inject.c
+++ b/usr/src/uts/common/fs/zfs/zio_inject.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -147,14 +148,8 @@ zio_handle_fault_injection(zio_t *zio, int error)
for (handler = list_head(&inject_handlers); handler != NULL;
handler = list_next(&inject_handlers, handler)) {
- /* Ignore errors not destined for this pool */
- if (zio->io_spa != handler->zi_spa)
- continue;
-
- /* Ignore device errors and panic injection */
- if (handler->zi_record.zi_guid != 0 ||
- handler->zi_record.zi_func[0] != '\0' ||
- handler->zi_record.zi_duration != 0)
+ if (zio->io_spa != handler->zi_spa ||
+ handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT)
continue;
/* If this handler matches, return EIO */
@@ -197,10 +192,7 @@ zio_handle_label_injection(zio_t *zio, int error)
uint64_t start = handler->zi_record.zi_start;
uint64_t end = handler->zi_record.zi_end;
- /* Ignore device only faults or panic injection */
- if (handler->zi_record.zi_start == 0 ||
- handler->zi_record.zi_func[0] != '\0' ||
- handler->zi_record.zi_duration != 0)
+ if (handler->zi_record.zi_cmd != ZINJECT_LABEL_FAULT)
continue;
/*
@@ -246,13 +238,7 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
for (handler = list_head(&inject_handlers); handler != NULL;
handler = list_next(&inject_handlers, handler)) {
- /*
- * Ignore label specific faults, panic injection
- * or fake writes
- */
- if (handler->zi_record.zi_start != 0 ||
- handler->zi_record.zi_func[0] != '\0' ||
- handler->zi_record.zi_duration != 0)
+ if (handler->zi_record.zi_cmd != ZINJECT_DEVICE_FAULT)
continue;
if (vd->vdev_guid == handler->zi_record.zi_guid) {
@@ -316,10 +302,8 @@ zio_handle_ignored_writes(zio_t *zio)
handler = list_next(&inject_handlers, handler)) {
/* Ignore errors not destined for this pool */
- if (zio->io_spa != handler->zi_spa)
- continue;
-
- if (handler->zi_record.zi_duration == 0)
+ if (zio->io_spa != handler->zi_spa ||
+ handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
continue;
/*
@@ -355,11 +339,8 @@ spa_handle_ignored_writes(spa_t *spa)
for (handler = list_head(&inject_handlers); handler != NULL;
handler = list_next(&inject_handlers, handler)) {
- /* Ignore errors not destined for this pool */
- if (spa != handler->zi_spa)
- continue;
-
- if (handler->zi_record.zi_duration == 0)
+ if (spa != handler->zi_spa ||
+ handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
continue;
if (handler->zi_record.zi_duration > 0) {
@@ -379,6 +360,34 @@ spa_handle_ignored_writes(spa_t *spa)
rw_exit(&inject_lock);
}
+uint64_t
+zio_handle_io_delay(zio_t *zio)
+{
+ vdev_t *vd = zio->io_vd;
+ inject_handler_t *handler;
+ uint64_t seconds = 0;
+
+ if (zio_injection_enabled == 0)
+ return (0);
+
+ rw_enter(&inject_lock, RW_READER);
+
+ for (handler = list_head(&inject_handlers); handler != NULL;
+ handler = list_next(&inject_handlers, handler)) {
+
+ if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
+ continue;
+
+ if (vd->vdev_guid == handler->zi_record.zi_guid) {
+ seconds = handler->zi_record.zi_timer;
+ break;
+ }
+
+ }
+ rw_exit(&inject_lock);
+ return (seconds);
+}
+
/*
* Create a new handler for the given record. We add it to the list, adding
* a reference to the spa_t in the process. We increment zio_injection_enabled,