summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/lib/libiscsit/common/libiscsit.h4
-rw-r--r--usr/src/lib/libsmbfs/smb/ctx.c8
-rw-r--r--usr/src/lib/libsmbfs/smb/rcfile.c3
-rw-r--r--usr/src/lib/smbclnt/libfknsmb/common/fksmb_sign_pkcs.c116
-rw-r--r--usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_sign_pkcs.c10
-rw-r--r--usr/src/uts/common/Makefile.files2
-rw-r--r--usr/src/uts/common/fs/doorfs/door_sys.c4
-rw-r--r--usr/src/uts/common/fs/smbclnt/netsmb/nsmb_sign_kcf.c124
-rw-r--r--usr/src/uts/common/fs/smbclnt/netsmb/smb2_sign.c144
-rw-r--r--usr/src/uts/common/fs/smbclnt/netsmb/smb2_smb.c32
-rw-r--r--usr/src/uts/common/fs/smbclnt/netsmb/smb_conn.h8
-rw-r--r--usr/src/uts/common/fs/smbclnt/netsmb/smb_signing.h7
-rw-r--r--usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c944
-rw-r--r--usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.h157
-rw-r--r--usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c125
-rw-r--r--usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h32
-rw-r--r--usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c120
-rw-r--r--usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c670
-rw-r--r--usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h8
-rw-r--r--usr/src/uts/common/io/comstar/port/fct/fct.c39
-rw-r--r--usr/src/uts/common/io/comstar/port/iscsit/iscsit.c55
-rw-r--r--usr/src/uts/common/io/comstar/port/iscsit/iscsit.h301
-rw-r--r--usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c48
-rw-r--r--usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c9
-rw-r--r--usr/src/uts/common/io/comstar/port/pppt/pppt.h103
-rw-r--r--usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c4
-rw-r--r--usr/src/uts/common/io/comstar/stmf/lun_map.c173
-rw-r--r--usr/src/uts/common/io/comstar/stmf/lun_map.h2
-rw-r--r--usr/src/uts/common/io/comstar/stmf/stmf.c1613
-rw-r--r--usr/src/uts/common/io/comstar/stmf/stmf_impl.h54
-rw-r--r--usr/src/uts/common/io/comstar/stmf/stmf_state.h7
-rw-r--r--usr/src/uts/common/io/comstar/stmf/stmf_stats.h17
-rw-r--r--usr/src/uts/common/io/idm/idm.c54
-rw-r--r--usr/src/uts/common/io/idm/idm_conn_sm.c6
-rw-r--r--usr/src/uts/common/io/idm/idm_so.c8
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c3
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h9
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c11
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c7
-rw-r--r--usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c175
-rw-r--r--usr/src/uts/common/netsmb/smb_dev.h2
-rw-r--r--usr/src/uts/common/sys/idm/idm.h79
-rw-r--r--usr/src/uts/common/sys/idm/idm_conn_sm.h218
-rw-r--r--usr/src/uts/common/sys/idm/idm_impl.h20
-rw-r--r--usr/src/uts/common/sys/lpif.h6
-rw-r--r--usr/src/uts/common/sys/portif.h4
-rw-r--r--usr/src/uts/common/sys/scsi/generic/commands.h2
-rw-r--r--usr/src/uts/common/sys/scsi/generic/mode.h2
-rw-r--r--usr/src/uts/common/sys/scsi/scsi_names.h14
-rw-r--r--usr/src/uts/common/sys/stmf.h14
-rw-r--r--usr/src/uts/common/sys/stmf_defines.h2
51 files changed, 4025 insertions, 1554 deletions
diff --git a/usr/src/lib/libiscsit/common/libiscsit.h b/usr/src/lib/libiscsit/common/libiscsit.h
index 704c790edc..6666b4613e 100644
--- a/usr/src/lib/libiscsit/common/libiscsit.h
+++ b/usr/src/lib/libiscsit/common/libiscsit.h
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _LIBISCSIT_H
@@ -40,7 +40,7 @@
extern "C" {
#endif
-#define MAX_TARGETS 255 /* maximum targets that may be created */
+#define MAX_TARGETS 4095 /* maximum targets that may be created */
#define MAX_TPGT 256
#define CFG_TPGTLIST "tpgt-list"
diff --git a/usr/src/lib/libsmbfs/smb/ctx.c b/usr/src/lib/libsmbfs/smb/ctx.c
index 3aa67fd5f5..64122e3416 100644
--- a/usr/src/lib/libsmbfs/smb/ctx.c
+++ b/usr/src/lib/libsmbfs/smb/ctx.c
@@ -1411,10 +1411,16 @@ smb_cf_minauth_from_str(char *str)
return (-1);
}
-
+/*
+ * SMB 2.1 is the oldest SMB2 dialect implemented (we skipped SMB 2.002)
+ * so if we see a_protocol value of just "2" assume they meant 2.1
+ */
static struct nv
smbver_table[] = {
+ { "3.02", SMB2_DIALECT_0302 },
+ { "3.0", SMB2_DIALECT_0300 },
{ "2.1", SMB2_DIALECT_0210 },
+ { "2", SMB2_DIALECT_0210 },
{ "1", 1 },
{ NULL, 0 }
};
diff --git a/usr/src/lib/libsmbfs/smb/rcfile.c b/usr/src/lib/libsmbfs/smb/rcfile.c
index d7ee2d15af..da96b13c34 100644
--- a/usr/src/lib/libsmbfs/smb/rcfile.c
+++ b/usr/src/lib/libsmbfs/smb/rcfile.c
@@ -492,8 +492,7 @@ rc_parse(struct rcfile *rcp)
if (home_nsmbrc != 0 && (
strcmp(buf, "nbns") == 0 ||
strcmp(buf, "nbns_enable") == 0 ||
- strcmp(buf, "nbns_broadcast") == 0 ||
- strcmp(buf, "signing") == 0)) {
+ strcmp(buf, "nbns_broadcast") == 0)) {
fprintf(stderr, dgettext(TEXT_DOMAIN,
"option %s may not be set "
"in user .nsmbrc file\n"), buf);
diff --git a/usr/src/lib/smbclnt/libfknsmb/common/fksmb_sign_pkcs.c b/usr/src/lib/smbclnt/libfknsmb/common/fksmb_sign_pkcs.c
index 24bb8fccbc..44c6666313 100644
--- a/usr/src/lib/smbclnt/libfknsmb/common/fksmb_sign_pkcs.c
+++ b/usr/src/lib/smbclnt/libfknsmb/common/fksmb_sign_pkcs.c
@@ -23,22 +23,46 @@
#include <stdlib.h>
#include <strings.h>
+#include <sys/cmn_err.h>
#include <netsmb/smb_signing.h>
#include <security/cryptoki.h>
#include <security/pkcs11.h>
/*
+ * Common function to see if a mech is available.
+ */
+static int
+find_mech(smb_sign_mech_t *mech, ulong_t mid)
+{
+ CK_SESSION_HANDLE hdl;
+ CK_RV rv;
+
+ rv = SUNW_C_GetMechSession(mid, &hdl);
+ if (rv != CKR_OK) {
+ cmn_err(CE_NOTE, "PKCS#11: no mech 0x%x",
+ (unsigned int)mid);
+ return (-1);
+ }
+ (void) C_CloseSession(hdl);
+
+ mech->mechanism = mid;
+ mech->pParameter = NULL;
+ mech->ulParameterLen = 0;
+ return (0);
+}
+
+/*
* SMB1 signing helpers:
* (getmech, init, update, final)
*/
+/*
+ * Find out if we have this mech.
+ */
int
smb_md5_getmech(smb_sign_mech_t *mech)
{
- mech->mechanism = CKM_MD5;
- mech->pParameter = NULL;
- mech->ulParameterLen = 0;
- return (0);
+ return (find_mech(mech, CKM_MD5));
}
/*
@@ -93,13 +117,13 @@ smb_md5_final(smb_sign_ctx_t ctx, uint8_t *digest16)
* (getmech, init, update, final)
*/
+/*
+ * Find out if we have this mech.
+ */
int
smb2_hmac_getmech(smb_sign_mech_t *mech)
{
- mech->mechanism = CKM_SHA256_HMAC;
- mech->pParameter = NULL;
- mech->ulParameterLen = 0;
- return (0);
+ return (find_mech(mech, CKM_SHA256_HMAC));
}
/*
@@ -161,3 +185,79 @@ smb2_hmac_final(smb_sign_ctx_t ctx, uint8_t *digest16)
return (rv == CKR_OK ? 0 : -1);
}
+
+/*
+ * SMB3 signing helpers:
+ * (getmech, init, update, final)
+ */
+
+/*
+ * Find out if we have this mech.
+ */
+int
+smb3_cmac_getmech(smb_sign_mech_t *mech)
+{
+ return (find_mech(mech, CKM_AES_CMAC));
+}
+
+/*
+ * Start PKCS#11 session, load the key.
+ */
+int
+smb3_cmac_init(smb_sign_ctx_t *ctxp, smb_sign_mech_t *mech,
+ uint8_t *key, size_t key_len)
+{
+ CK_OBJECT_HANDLE hkey = 0;
+ CK_RV rv;
+
+ rv = SUNW_C_GetMechSession(mech->mechanism, ctxp);
+ if (rv != CKR_OK)
+ return (-1);
+
+ rv = SUNW_C_KeyToObject(*ctxp, mech->mechanism,
+ key, key_len, &hkey);
+ if (rv != CKR_OK) {
+ (void) C_CloseSession(*ctxp);
+ return (-1);
+ }
+
+ rv = C_SignInit(*ctxp, mech, hkey);
+ (void) C_DestroyObject(*ctxp, hkey);
+ if (rv != CKR_OK) {
+ (void) C_CloseSession(*ctxp);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * Digest one segment
+ */
+int
+smb3_cmac_update(smb_sign_ctx_t ctx, uint8_t *in, size_t len)
+{
+ CK_RV rv;
+
+ rv = C_SignUpdate(ctx, in, len);
+ if (rv != CKR_OK)
+ (void) C_CloseSession(ctx);
+
+ return (rv == CKR_OK ? 0 : -1);
+}
+
+/*
+ * Note, the SMB2 signature is just the AES CMAC digest.
+ * (both are 16 bytes long)
+ */
+int
+smb3_cmac_final(smb_sign_ctx_t ctx, uint8_t *digest)
+{
+ CK_ULONG len = SMB2_SIG_SIZE;
+ CK_RV rv;
+
+ rv = C_SignFinal(ctx, digest, &len);
+ (void) C_CloseSession(ctx);
+
+ return (rv == CKR_OK ? 0 : -1);
+}
diff --git a/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_sign_pkcs.c b/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_sign_pkcs.c
index e018c6eb0c..f0eb05093c 100644
--- a/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_sign_pkcs.c
+++ b/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_sign_pkcs.c
@@ -215,13 +215,19 @@ smb3_cmac_init(smb_sign_ctx_t *ctxp, smb_crypto_mech_t *mech,
rv = SUNW_C_KeyToObject(*ctxp, mech->mechanism,
key, key_len, &hkey);
- if (rv != CKR_OK)
+ if (rv != CKR_OK) {
+ (void) C_CloseSession(*ctxp);
return (-1);
+ }
rv = C_SignInit(*ctxp, mech, hkey);
(void) C_DestroyObject(*ctxp, hkey);
+ if (rv != CKR_OK) {
+ (void) C_CloseSession(*ctxp);
+ return (-1);
+ }
- return (rv == CKR_OK ? 0 : -1);
+ return (0);
}
/*
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 78894e23f2..2b614cd7cd 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -944,7 +944,7 @@ PPPT_OBJS += alua_ic_if.o pppt.o pppt_msg.o pppt_tgt.o
STMF_OBJS += lun_map.o stmf.o
-STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o sbd_zvol.o
+STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o sbd_zvol.o ats_copy_mgr.o
SYSMSG_OBJS += sysmsg.o
diff --git a/usr/src/uts/common/fs/doorfs/door_sys.c b/usr/src/uts/common/fs/doorfs/door_sys.c
index f2c22ec249..68a7a11d82 100644
--- a/usr/src/uts/common/fs/doorfs/door_sys.c
+++ b/usr/src/uts/common/fs/doorfs/door_sys.c
@@ -81,7 +81,7 @@ size_t door_max_arg = 16 * 1024;
* door_upcall. Need to guard against a process returning huge amounts
* of data and getting the kernel stuck in kmem_alloc.
*/
-size_t door_max_upcall_reply = 1024 * 1024;
+size_t door_max_upcall_reply = 4 * 1024 * 1024;
/*
* Maximum number of descriptors allowed to be passed in a single
@@ -2725,7 +2725,7 @@ door_translate_out(void)
*/
static int
door_results(kthread_t *caller, caddr_t data_ptr, size_t data_size,
- door_desc_t *desc_ptr, uint_t desc_num)
+ door_desc_t *desc_ptr, uint_t desc_num)
{
door_client_t *ct = DOOR_CLIENT(caller->t_door);
door_upcall_t *dup = ct->d_upcall;
diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/nsmb_sign_kcf.c b/usr/src/uts/common/fs/smbclnt/netsmb/nsmb_sign_kcf.c
index 2be033a8fc..4235c94a06 100644
--- a/usr/src/uts/common/fs/smbclnt/netsmb/nsmb_sign_kcf.c
+++ b/usr/src/uts/common/fs/smbclnt/netsmb/nsmb_sign_kcf.c
@@ -29,23 +29,34 @@
#include <netsmb/smb_signing.h>
/*
- * SMB1 signing helpers:
- * (getmech, init, update, final)
+ * Common function to see if a mech is available.
*/
-
-int
-smb_md5_getmech(smb_sign_mech_t *mech)
+static int
+find_mech(smb_sign_mech_t *mech, crypto_mech_name_t name)
{
crypto_mech_type_t t;
- t = crypto_mech2id(SUN_CKM_MD5);
- if (t == CRYPTO_MECH_INVALID)
+ t = crypto_mech2id(name);
+ if (t == CRYPTO_MECH_INVALID) {
+ cmn_err(CE_NOTE, "nsmb: no kcf mech: %s", name);
return (-1);
+ }
mech->cm_type = t;
return (0);
}
/*
+ * SMB1 signing helpers:
+ * (getmech, init, update, final)
+ */
+
+int
+smb_md5_getmech(smb_sign_mech_t *mech)
+{
+ return (find_mech(mech, SUN_CKM_MD5));
+}
+
+/*
* Start the KCF session, load the key
*/
int
@@ -75,7 +86,12 @@ smb_md5_update(smb_sign_ctx_t ctx, void *buf, size_t len)
rv = crypto_digest_update(ctx, &data, 0);
- return (rv == CRYPTO_SUCCESS ? 0 : -1);
+ if (rv != CRYPTO_SUCCESS) {
+ crypto_cancel_ctx(ctx);
+ return (-1);
+ }
+
+ return (0);
}
/*
@@ -106,13 +122,7 @@ smb_md5_final(smb_sign_ctx_t ctx, uint8_t *digest16)
int
smb2_hmac_getmech(smb_sign_mech_t *mech)
{
- crypto_mech_type_t t;
-
- t = crypto_mech2id(SUN_CKM_SHA256_HMAC);
- if (t == CRYPTO_MECH_INVALID)
- return (-1);
- mech->cm_type = t;
- return (0);
+ return (find_mech(mech, SUN_CKM_SHA256_HMAC));
}
/*
@@ -152,7 +162,12 @@ smb2_hmac_update(smb_sign_ctx_t ctx, uint8_t *in, size_t len)
rv = crypto_mac_update(ctx, &data, 0);
- return (rv == CRYPTO_SUCCESS ? 0 : -1);
+ if (rv != CRYPTO_SUCCESS) {
+ crypto_cancel_ctx(ctx);
+ return (-1);
+ }
+
+ return (0);
}
/*
@@ -178,3 +193,80 @@ smb2_hmac_final(smb_sign_ctx_t ctx, uint8_t *digest16)
return (rv == CRYPTO_SUCCESS ? 0 : -1);
}
+
+/*
+ * SMB3 signing helpers:
+ * (getmech, init, update, final)
+ */
+
+int
+smb3_cmac_getmech(smb_sign_mech_t *mech)
+{
+ return (find_mech(mech, SUN_CKM_AES_CMAC));
+}
+
+/*
+ * Start the KCF session, load the key
+ */
+int
+smb3_cmac_init(smb_sign_ctx_t *ctxp, smb_sign_mech_t *mech,
+ uint8_t *key, size_t key_len)
+{
+ crypto_key_t ckey;
+ int rv;
+
+ bzero(&ckey, sizeof (ckey));
+ ckey.ck_format = CRYPTO_KEY_RAW;
+ ckey.ck_data = key;
+ ckey.ck_length = key_len * 8; /* in bits */
+
+ rv = crypto_mac_init(mech, &ckey, NULL, ctxp, NULL);
+
+ return (rv == CRYPTO_SUCCESS ? 0 : -1);
+}
+
+/*
+ * Digest one segment
+ */
+int
+smb3_cmac_update(smb_sign_ctx_t ctx, uint8_t *in, size_t len)
+{
+ crypto_data_t data;
+ int rv;
+
+ bzero(&data, sizeof (data));
+ data.cd_format = CRYPTO_DATA_RAW;
+ data.cd_length = len;
+ data.cd_raw.iov_base = (void *)in;
+ data.cd_raw.iov_len = len;
+
+ rv = crypto_mac_update(ctx, &data, 0);
+
+ if (rv != CRYPTO_SUCCESS) {
+ crypto_cancel_ctx(ctx);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * Note, the SMB2 signature is just the AES CMAC digest.
+ * (both are 16 bytes long)
+ */
+int
+smb3_cmac_final(smb_sign_ctx_t ctx, uint8_t *digest16)
+{
+ crypto_data_t out;
+ int rv;
+
+ bzero(&out, sizeof (out));
+ out.cd_format = CRYPTO_DATA_RAW;
+ out.cd_length = SMB2_SIG_SIZE;
+ out.cd_raw.iov_len = SMB2_SIG_SIZE;
+ out.cd_raw.iov_base = (void *)digest16;
+
+ rv = crypto_mac_final(ctx, &out, 0);
+
+ return (rv == CRYPTO_SUCCESS ? 0 : -1);
+}
diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/smb2_sign.c b/usr/src/uts/common/fs/smbclnt/netsmb/smb2_sign.c
index 46bf28c370..f7f20bdb5a 100644
--- a/usr/src/uts/common/fs/smbclnt/netsmb/smb2_sign.c
+++ b/usr/src/uts/common/fs/smbclnt/netsmb/smb2_sign.c
@@ -54,10 +54,45 @@
#define SMB2_SIG_OFF 48
#define SMB2_SIG_LEN 16
+typedef struct smb_mac_ops {
+ int (*mac_init)(smb_sign_ctx_t *, smb_sign_mech_t *,
+ uint8_t *, size_t);
+ int (*mac_update)(smb_sign_ctx_t, uint8_t *, size_t);
+ int (*mac_final)(smb_sign_ctx_t, uint8_t *);
+} smb_mac_ops_t;
+
+static smb_mac_ops_t
+smb2_sign_ops = {
+ smb2_hmac_init,
+ smb2_hmac_update,
+ smb2_hmac_final
+};
+
+static struct smb_mac_ops
+smb3_sign_ops = {
+ smb3_cmac_init,
+ smb3_cmac_update,
+ smb3_cmac_final
+};
+
+/*
+ * Input to KDF for SigningKey.
+ * See comment for smb3_do_kdf for content.
+ */
+static uint8_t sign_kdf_input[29] = {
+ 0, 0, 0, 1, 'S', 'M', 'B', '2',
+ 'A', 'E', 'S', 'C', 'M', 'A', 'C', 0,
+ 0, 'S', 'm', 'b', 'S', 'i', 'g', 'n',
+ 0, 0, 0, 0, 0x80 };
+
+int smb3_do_kdf(void *outbuf, size_t outbuf_len,
+ void *input, size_t input_len,
+ uint8_t *key, uint32_t key_len);
+
/*
* smb2_sign_init
*
- * Get the mechanism info and initilize SMB2 signing.
+ * Get the mechanism info and initilize SMB2 or SMB3 signing.
*/
int
smb2_sign_init(smb_vc_t *vcp)
@@ -68,31 +103,103 @@ smb2_sign_init(smb_vc_t *vcp)
ASSERT(vcp->vc_ssnkey != NULL);
ASSERT(vcp->vc_mackey == NULL);
- rc = smb2_hmac_getmech(&vcp->vc_signmech);
- if (rc != 0) {
- cmn_err(CE_NOTE, "smb2 can't get signing mechanism");
+ if (SMB_DIALECT(vcp) < SMB2_DIALECT_0300)
+ rc = smb2_hmac_getmech(&vcp->vc_signmech);
+ else
+ rc = smb3_cmac_getmech(&vcp->vc_signmech);
+ if (rc != 0)
return (EAUTH);
- }
/*
* Convert the session key to the MAC key.
*
* For SMB2, the signing key is just the first 16 bytes
* of the session key (truncated or padded with zeros).
- * [MS-SMB2] 3.2.5.3.1
- *
- * SMB3 would do KDF here.
+ * For SMB3, the signing key is a "KDF" hash of the
+ * session key. [MS-SMB2] 3.2.5.3.1
*/
vcp->vc_mackeylen = SMB2_SIG_LEN;
vcp->vc_mackey = kmem_zalloc(vcp->vc_mackeylen, KM_SLEEP);
- copysize = vcp->vc_ssnkeylen;
- if (copysize > vcp->vc_mackeylen)
- copysize = vcp->vc_mackeylen;
- bcopy(vcp->vc_ssnkey, vcp->vc_mackey, copysize);
+ if (SMB_DIALECT(vcp) < SMB2_DIALECT_0300) {
+ copysize = vcp->vc_ssnkeylen;
+ if (copysize > vcp->vc_mackeylen)
+ copysize = vcp->vc_mackeylen;
+ bcopy(vcp->vc_ssnkey, vcp->vc_mackey, copysize);
+
+ vcp->vc_sign_ops = &smb2_sign_ops;
+ } else {
+ rc = smb3_do_kdf(vcp->vc_mackey, vcp->vc_mackeylen,
+ sign_kdf_input, sizeof (sign_kdf_input),
+ vcp->vc_ssnkey, vcp->vc_ssnkeylen);
+ if (rc != 0)
+ return (EAUTH);
+ vcp->vc_sign_ops = &smb3_sign_ops;
+ }
return (0);
}
+/*
+ * Derive SMB3 key as described in [MS-SMB2] 3.1.4.2
+ * and [NIST SP800-108]
+ *
+ * r = 32, L = 128, PRF = HMAC-SHA256, key = (session key)
+ *
+ * Note that these describe pre-3.1.1 inputs.
+ *
+ * Session.SigningKey for binding a session:
+ * - Session.SessionKey as K1
+ * - label = SMB2AESCMAC (size 12)
+ * - context = SmbSign (size 8)
+ * Channel.SigningKey for for all other requests
+ * - if SMB2_SESSION_FLAG_BINDING, GSS key (in Session.SessionKey?) as K1;
+ * - otherwise, Session.SessionKey as K1
+ * - label = SMB2AESCMAC (size 12)
+ * - context = SmbSign (size 8)
+ * Session.ApplicationKey for ... (not sure what yet)
+ * - Session.SessionKey as K1
+ * - label = SMB2APP (size 8)
+ * - context = SmbRpc (size 7)
+ * Session.EncryptionKey for encrypting server messages
+ * - Session.SessionKey as K1
+ * - label = "SMB2AESCCM" (size 11)
+ * - context = "ServerOut" (size 10)
+ * Session.DecryptionKey for decrypting client requests
+ * - Session.SessionKey as K1
+ * - label = "SMB2AESCCM" (size 11)
+ * - context = "ServerIn " (size 10) (Note the space)
+ */
+int
+smb3_do_kdf(void *outbuf, size_t outbuf_len,
+ void *input, size_t input_len,
+ uint8_t *key, uint32_t key_len)
+{
+ uint8_t digest32[SHA256_DIGEST_LENGTH];
+ smb_sign_mech_t mech;
+ smb_sign_ctx_t hctx = 0;
+ int rc;
+
+ bzero(&mech, sizeof (mech));
+ if ((rc = smb2_hmac_getmech(&mech)) != 0)
+ return (rc);
+
+ /* Limit the SessionKey input to its maximum size (16 bytes) */
+ if (key_len > SMB2_SIG_SIZE)
+ key_len = SMB2_SIG_SIZE;
+ rc = smb2_hmac_init(&hctx, &mech, key, key_len);
+ if (rc != 0)
+ return (rc);
+
+ if ((rc = smb2_hmac_update(hctx, input, input_len)) != 0)
+ return (rc);
+
+ if ((rc = smb2_hmac_final(hctx, digest32)) != 0)
+ return (rc);
+
+ /* Output is first 16 bytes of digest. */
+ bcopy(digest32, outbuf, outbuf_len);
+ return (0);
+}
/*
* Compute MAC signature of packet data, using the stored MAC key.
@@ -109,14 +216,17 @@ smb2_compute_MAC(struct smb_vc *vcp, mblk_t *mp, uchar_t *signature)
{
uint8_t tmp_hdr[SMB2_HDR_SIZE];
smb_sign_ctx_t ctx = 0;
+ smb_mac_ops_t *ops;
mblk_t *m = mp;
int size;
int rc;
if (vcp->vc_mackey == NULL)
return (-1);
+ if ((ops = vcp->vc_sign_ops) == NULL)
+ return (-1);
- rc = smb2_hmac_init(&ctx, &vcp->vc_signmech,
+ rc = ops->mac_init(&ctx, &vcp->vc_signmech,
vcp->vc_mackey, vcp->vc_mackeylen);
if (rc != 0)
return (rc);
@@ -131,7 +241,7 @@ smb2_compute_MAC(struct smb_vc *vcp, mblk_t *mp, uchar_t *signature)
size = SMB2_HDRLEN;
bcopy(m->b_rptr, tmp_hdr, size);
bzero(tmp_hdr + SMB2_SIG_OFF, SMB2_SIG_LEN);
- rc = smb2_hmac_update(ctx, tmp_hdr, size);
+ rc = ops->mac_update(ctx, tmp_hdr, size);
if (rc != 0)
return (rc);
@@ -140,7 +250,7 @@ smb2_compute_MAC(struct smb_vc *vcp, mblk_t *mp, uchar_t *signature)
* the data just after the SMB2 header.
*/
size = MBLKL(m) - SMB2_HDRLEN;
- rc = smb2_hmac_update(ctx, m->b_rptr + SMB2_HDRLEN, size);
+ rc = ops->mac_update(ctx, m->b_rptr + SMB2_HDRLEN, size);
if (rc != 0)
return (rc);
m = m->b_cont;
@@ -149,13 +259,13 @@ smb2_compute_MAC(struct smb_vc *vcp, mblk_t *mp, uchar_t *signature)
while (m != NULL) {
size = MBLKL(m);
if (size > 0) {
- rc = smb2_hmac_update(ctx, m->b_rptr, size);
+ rc = ops->mac_update(ctx, m->b_rptr, size);
if (rc != 0)
return (rc);
}
m = m->b_cont;
}
- rc = smb2_hmac_final(ctx, signature);
+ rc = ops->mac_final(ctx, signature);
return (rc);
}
diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/smb2_smb.c b/usr/src/uts/common/fs/smbclnt/netsmb/smb2_smb.c
index c3df18faa9..52d8661b7a 100644
--- a/usr/src/uts/common/fs/smbclnt/netsmb/smb2_smb.c
+++ b/usr/src/uts/common/fs/smbclnt/netsmb/smb2_smb.c
@@ -50,9 +50,15 @@
#include <netsmb/smb_rq.h>
#include <netsmb/smb2_rq.h>
-#define NDIALECTS 1
-static const uint16_t smb2_dialects[1] = {
- SMB2_DIALECT_0210
+/*
+ * Supported dialects. Keep sorted by number because of how the
+ * vc_maxver check below may truncate this list.
+ */
+#define NDIALECTS 3
+static const uint16_t smb2_dialects[NDIALECTS] = {
+ SMB2_DIALECT_0210,
+ SMB2_DIALECT_0300,
+ SMB2_DIALECT_0302,
};
/* Optional capabilities we advertise (none yet). */
@@ -113,13 +119,13 @@ smb2_parse_smb1nego_resp(struct smb_rq *rqp)
md_get_uint16le(mdp, &sp->sv2_security_mode);
/* Get Dialect. */
- error = md_get_uint16le(mdp, &sp->sv2_dialect);
+ error = md_get_uint16le(mdp, &sp->sv_proto);
if (error != 0)
return (error);
/* What dialect did we get? */
- if (sp->sv2_dialect != SMB2_DIALECT_02ff) {
- SMBERROR("Unknown dialect 0x%x\n", sp->sv2_dialect);
+ if (sp->sv_proto != SMB2_DIALECT_02ff) {
+ SMBERROR("Unknown dialect 0x%x\n", sp->sv_proto);
return (EINVAL);
}
/* Set our (internal) SMB1 dialect also. */
@@ -148,6 +154,7 @@ smb2_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
struct smb_rq *rqp = NULL;
struct mbchain *mbp = NULL;
struct mdchain *mdp = NULL;
+ uint16_t *ndialects_p;
uint16_t ndialects = NDIALECTS;
boolean_t will_sign = B_FALSE;
uint16_t length = 0;
@@ -174,15 +181,18 @@ smb2_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
*/
smb_rq_getrequest(rqp, &mbp);
mb_put_uint16le(mbp, 36); /* Struct Size */
- mb_put_uint16le(mbp, ndialects); /* Dialect Count */
+ ndialects_p = mb_reserve(mbp, 2); /* Dialect Count */
mb_put_uint16le(mbp, security_mode);
mb_put_uint16le(mbp, 0); /* Reserved */
mb_put_uint32le(mbp, smb2_clnt_caps);
mb_put_mem(mbp, vcp->vc_cl_guid, 16, MB_MSYSTEM);
mb_put_uint64le(mbp, 0); /* Start Time */
for (i = 0; i < ndialects; i++) { /* Dialects */
+ if (smb2_dialects[i] > vcp->vc_maxver)
+ break;
mb_put_uint16le(mbp, smb2_dialects[i]);
}
+ *ndialects_p = htoles(i);
/*
* Do the OTW call.
@@ -209,7 +219,7 @@ smb2_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
}
md_get_uint16le(mdp, &sp->sv2_security_mode);
- md_get_uint16le(mdp, &sp->sv2_dialect);
+ md_get_uint16le(mdp, &sp->sv_proto); /* dialect */
md_get_uint16le(mdp, NULL); /* reserved */
md_get_mem(mdp, sp->sv2_guid, 16, MB_MSYSTEM);
md_get_uint32le(mdp, &sp->sv2_capabilities);
@@ -254,7 +264,7 @@ smb2_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
}
wk->wk_u_auth_rlen = sec_buf_len;
err = md_get_mem(mdp, wk->wk_u_auth_rbuf.lp_ptr,
- sec_buf_len, MB_MUSER);
+ sec_buf_len, MB_MUSER);
if (err) {
goto errout;
}
@@ -472,7 +482,7 @@ smb2_smb_ssnsetup(struct smb_vc *vcp, struct smb_cred *scred)
}
wk->wk_u_auth_rlen = sec_buf_len;
err = md_get_mem(mdp, wk->wk_u_auth_rbuf.lp_ptr,
- sec_buf_len, MB_MUSER);
+ sec_buf_len, MB_MUSER);
if (err != 0) {
ret = err;
goto out;
@@ -813,7 +823,7 @@ smb2_smb_ntcreate(
mb_put_uint16le(mbp, StructSize);
mb_put_uint8(mbp, 0); /* Security flags */
mb_put_uint8(mbp, SMB2_OPLOCK_LEVEL_NONE); /* Oplock level */
- mb_put_uint32le(mbp, impersonate); /* Impersonation Level */
+ mb_put_uint32le(mbp, impersonate); /* Impersonation Level */
mb_put_uint64le(mbp, cr_flags);
mb_put_uint64le(mbp, 0); /* Reserved */
mb_put_uint32le(mbp, req_acc);
diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/smb_conn.h b/usr/src/uts/common/fs/smbclnt/netsmb/smb_conn.h
index d0a8a1dca0..b88e32d781 100644
--- a/usr/src/uts/common/fs/smbclnt/netsmb/smb_conn.h
+++ b/usr/src/uts/common/fs/smbclnt/netsmb/smb_conn.h
@@ -70,7 +70,7 @@ typedef struct smb_cred {
#define SMBV_EXT_SEC 0x0080 /* conn to use extended security */
#define SMBV_SIGNING 0x0100 /* negotiated signing */
#define SMBV_SMB2 0x0200 /* VC using SMB 2 or 3 */
-#define SMBV_HAS_FILEIDS 0x0400 /* Use File IDs for hash and inode numbers */
+#define SMBV_HAS_FILEIDS 0x0400 /* Use File IDs for hash and inums */
#define SMBV_NO_WRITE_THRU 0x0800 /* Can't use ... */
/*
@@ -165,7 +165,7 @@ enum smbco_level {
* SMB1 Negotiated protocol parameters
*/
struct smb_sopt {
- int16_t sv_proto; /* protocol dialect */
+ uint16_t sv_proto; /* protocol dialect */
uchar_t sv_sm; /* security mode */
int16_t sv_tz; /* offset in min relative to UTC */
uint16_t sv_maxmux; /* max number of outstanding rq's */
@@ -178,13 +178,12 @@ struct smb_sopt {
/* SMB2+ fields */
uint32_t sv2_sessflags; /* final session setup reply flags */
- uint16_t sv2_dialect; /* dialect (non zero for SMB 2/3 */
uint32_t sv2_capabilities; /* capabilities */
uint32_t sv2_maxtransact; /* max transact size */
uint32_t sv2_maxread; /* max read size */
uint32_t sv2_maxwrite; /* max write size */
- uint8_t sv2_guid[16]; /* GUID */
uint16_t sv2_security_mode; /* security mode */
+ uint8_t sv2_guid[16]; /* GUID */
};
typedef struct smb_sopt smb_sopt_t;
@@ -226,6 +225,7 @@ typedef struct smb_vc {
uint8_t *vc_mackey; /* MAC key buffer */
uint8_t *vc_ssnkey; /* session key buffer */
smb_sign_mech_t vc_signmech;
+ struct smb_mac_ops *vc_sign_ops;
struct smb_tran_desc *vc_tdesc; /* transport ops. vector */
void *vc_tdata; /* transport control block */
diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/smb_signing.h b/usr/src/uts/common/fs/smbclnt/netsmb/smb_signing.h
index e1799e3383..81217d5182 100644
--- a/usr/src/uts/common/fs/smbclnt/netsmb/smb_signing.h
+++ b/usr/src/uts/common/fs/smbclnt/netsmb/smb_signing.h
@@ -48,6 +48,8 @@ typedef CK_MECHANISM smb_sign_mech_t;
typedef CK_SESSION_HANDLE smb_sign_ctx_t;
#endif /* _KERNEL */
+struct smb_mac_ops;
+
/*
* SMB signing routines used in smb_signing.c
*/
@@ -66,6 +68,11 @@ int smb2_hmac_init(smb_sign_ctx_t *, smb_sign_mech_t *, uint8_t *, size_t);
int smb2_hmac_update(smb_sign_ctx_t, uint8_t *, size_t);
int smb2_hmac_final(smb_sign_ctx_t, uint8_t *);
+int smb3_cmac_getmech(smb_sign_mech_t *);
+int smb3_cmac_init(smb_sign_ctx_t *, smb_sign_mech_t *, uint8_t *, size_t);
+int smb3_cmac_update(smb_sign_ctx_t, uint8_t *, size_t);
+int smb3_cmac_final(smb_sign_ctx_t, uint8_t *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c
new file mode 100644
index 0000000000..6623012fc8
--- /dev/null
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c
@@ -0,0 +1,944 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Nexenta Systems, Inc. All rights reserved.
+ */
+
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/modctl.h>
+#include <sys/scsi/scsi.h>
+#include <sys/scsi/impl/scsi_reset_notify.h>
+#include <sys/scsi/generic/mode.h>
+#include <sys/disp.h>
+#include <sys/byteorder.h>
+#include <sys/atomic.h>
+#include <sys/sdt.h>
+#include <sys/dkio.h>
+
+#include <sys/dmu.h>
+#include <sys/txg.h>
+#include <sys/refcount.h>
+#include <sys/zvol.h>
+
+#include <sys/stmf.h>
+#include <sys/lpif.h>
+#include <sys/portif.h>
+#include <sys/stmf_ioctl.h>
+#include <sys/stmf_sbd_ioctl.h>
+
+#include "stmf_sbd.h"
+#include "sbd_impl.h"
+
+/* ATS tuning parameters */
+#define OVERLAP_OFF 0
+#define OVERLAP_LOW 1
+#define OVERLAP_MEDIUM 2
+#define OVERLAP_HIGH 3
+uint8_t ats_overlap_check = OVERLAP_LOW; /* check for rw overlap with ATS */
+
+uint8_t HardwareAcceleratedLocking = 1; /* 0 for disabled */
+uint8_t HardwareAcceleratedMove = 1;
+uint64_t sbd_list_length = 0;
+
+#define SBD_ATS_MAX_NBLKS 32
+/* ATS routines. */
+uint8_t
+sbd_ats_max_nblks(void)
+{
+ if (HardwareAcceleratedLocking == 0)
+ return (0);
+ return (SBD_ATS_MAX_NBLKS);
+}
+
+#define is_overlapping(start1, len1, start2, len2) \
+ ((start2) > (start1) ? ((start2) - (start1)) < (len1) : \
+ ((start1) - (start2)) < (len2))
+
+/*ARGSUSED*/
+static sbd_status_t
+sbd_ats_do_handling_before_io(scsi_task_t *task, struct sbd_lu *sl,
+ uint64_t lba, uint64_t count, uint32_t flags)
+{
+ sbd_status_t ret = SBD_SUCCESS;
+ ats_state_t *ats_state, *ats_state_ret;
+ sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
+ uint8_t cdb0 = task->task_cdb[0];
+
+ if (scmd == NULL)
+ return (SBD_SUCCESS);
+
+ if (HardwareAcceleratedLocking == 0)
+ return (SBD_SUCCESS);
+ /*
+ * if ATS overlap checking is disabled just return. The check
+ * is not done in the function to remove items from the list which
+ * allows this value to be changed at runtime. If it is turned on
+ * at runtime the remove will just start taking items off the list.
+ * If it is turned off at runtime the list is still cleaned up.
+ */
+ if (ats_overlap_check == OVERLAP_OFF)
+ return (SBD_SUCCESS);
+
+ /* overlap checking for compare and write only */
+ if (ats_overlap_check == OVERLAP_LOW) {
+ if (cdb0 != SCMD_COMPARE_AND_WRITE)
+ return (SBD_SUCCESS);
+ }
+
+ /* overlap checking for compare and write and write only */
+ if (ats_overlap_check == OVERLAP_MEDIUM) {
+ if ((cdb0 != SCMD_COMPARE_AND_WRITE) && (cdb0 != SCMD_WRITE))
+ return (SBD_SUCCESS);
+ }
+
+ mutex_enter(&sl->sl_lock);
+ /*
+ * if the list is empty then just add the element to the list and
+ * return success. There is no overlap. This is done for every
+ * read, write or compare and write.
+ */
+ if (list_is_empty(&sl->sl_ats_io_list)) {
+ goto done;
+ }
+
+ /*
+ * There are inflight operations. As a result the list must be scanned
+ * and if there are any overlaps then SBD_BUSY should be returned.
+ *
+ * Duplicate reads and writes are allowed and kept on the list
+ * since there is no reason that overlapping IO operations should
+ * be delayed.
+ *
+ * A command that conflicts with a running compare and write will
+ * be rescheduled and rerun. This is handled by stmf_task_poll_lu.
+ * There is a possibility that a command can be starved and still
+ * return busy, which is valid in the SCSI protocol.
+ */
+
+ for (ats_state = list_head(&sl->sl_ats_io_list); ats_state != NULL;
+ ats_state = list_next(&sl->sl_ats_io_list, ats_state)) {
+
+ if (is_overlapping(ats_state->as_cur_ats_lba,
+ ats_state->as_cur_ats_len, lba, count) == 0)
+ continue;
+
+ /* if the task is already listed just return */
+ if (task == ats_state->as_cur_ats_task) {
+ cmn_err(CE_WARN, "sbd_ats_handling_before_io: "
+ "task %p already on list", (void *) task);
+ ret = SBD_SUCCESS;
+ goto exit;
+ }
+ /*
+ * the current command is a compare and write, if there is any
+ * overlap return error
+ */
+
+ if ((cdb0 == SCMD_COMPARE_AND_WRITE) ||
+ (ats_state->as_cmd == SCMD_COMPARE_AND_WRITE)) {
+ ret = SBD_BUSY;
+ goto exit;
+ }
+ }
+done:
+ ats_state_ret =
+ (ats_state_t *)kmem_zalloc(sizeof (ats_state_t), KM_SLEEP);
+ ats_state_ret->as_cur_ats_lba = lba;
+ ats_state_ret->as_cur_ats_len = count;
+ ats_state_ret->as_cmd = cdb0;
+ ats_state_ret->as_cur_ats_task = task;
+ if (list_is_empty(&sl->sl_ats_io_list)) {
+ list_insert_head(&sl->sl_ats_io_list, ats_state_ret);
+ } else {
+ list_insert_tail(&sl->sl_ats_io_list, ats_state_ret);
+ }
+ scmd->flags |= SBD_SCSI_CMD_ATS_RELATED;
+ scmd->ats_state = ats_state;
+ sbd_list_length++;
+ mutex_exit(&sl->sl_lock);
+ return (SBD_SUCCESS);
+
+exit:
+ mutex_exit(&sl->sl_lock);
+ return (ret);
+}
+
+sbd_status_t
+sbd_ats_handling_before_io(scsi_task_t *task, struct sbd_lu *sl,
+ uint64_t lba, uint64_t count)
+{
+ return (sbd_ats_do_handling_before_io(task, sl, lba, count, 0));
+}
+
+void
+sbd_ats_remove_by_task(scsi_task_t *task)
+{
+ ats_state_t *ats_state;
+ sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+ sbd_cmd_t *scmd = task->task_lu_private;
+
+ if (scmd == NULL)
+ return;
+ /*
+ * Scan the list and take the task off of the list. It is possible
+ * that the call is made in a situation where the task is not
+ * listed. That is a valid but unlikely case. If it happens
+ * just fall through and return. The list removal is done by
+ * task not LBA range and a task cannot be active for more than
+ * one command so there is never an issue about removing the
+ * wrong element.
+ */
+ mutex_enter(&sl->sl_lock);
+ if (list_is_empty(&sl->sl_ats_io_list)) {
+ mutex_exit(&sl->sl_lock);
+ return;
+ }
+
+ for (ats_state = list_head(&sl->sl_ats_io_list); ats_state != NULL;
+ ats_state = list_next(&sl->sl_ats_io_list, ats_state)) {
+
+ if (ats_state->as_cur_ats_task == task) {
+ list_remove(&sl->sl_ats_io_list, ats_state);
+ kmem_free(ats_state, sizeof (ats_state_t));
+ scmd->flags &= ~SBD_SCSI_CMD_ATS_RELATED;
+ scmd->ats_state = NULL;
+ sbd_list_length--;
+ break;
+ }
+ }
+ mutex_exit(&sl->sl_lock);
+}
+
+static sbd_status_t
+sbd_compare_and_write(struct scsi_task *task, sbd_cmd_t *scmd,
+ uint32_t *ret_off)
+{
+ sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+ uint8_t *buf;
+ sbd_status_t ret;
+ uint64_t addr;
+ uint32_t len, i;
+
+ addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
+ len = (uint32_t)task->task_cdb[13];
+
+ addr <<= sl->sl_data_blocksize_shift;
+ len <<= sl->sl_data_blocksize_shift;
+ buf = kmem_alloc(len, KM_SLEEP);
+ ret = sbd_data_read(sl, task, addr, (uint64_t)len, buf);
+ if (ret != SBD_SUCCESS) {
+ goto compare_and_write_done;
+ }
+ /*
+ * Can't use bcmp here. We need mismatch offset.
+ */
+ for (i = 0; i < len; i++) {
+ if (buf[i] != scmd->trans_data[i])
+ break;
+ }
+ if (i != len) {
+ *ret_off = i;
+ ret = SBD_COMPARE_FAILED;
+ goto compare_and_write_done;
+ }
+
+ ret = sbd_data_write(sl, task, addr, (uint64_t)len,
+ scmd->trans_data + len);
+
+compare_and_write_done:
+ kmem_free(buf, len);
+ return (ret);
+}
+
+static void
+sbd_send_miscompare_status(struct scsi_task *task, uint32_t miscompare_off)
+{
+ uint8_t sd[18];
+
+ task->task_scsi_status = STATUS_CHECK;
+ bzero(sd, 18);
+ sd[0] = 0xF0;
+ sd[2] = 0xe;
+ SCSI_WRITE32(&sd[3], miscompare_off);
+ sd[7] = 10;
+ sd[12] = 0x1D;
+ task->task_sense_data = sd;
+ task->task_sense_length = 18;
+ (void) stmf_send_scsi_status(task, STMF_IOF_LU_DONE);
+}
+
+static void
+sbd_ats_release_resources(struct scsi_task *task)
+{
+ sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
+
+ /*
+ * a few basic check here to be sure that there are not multiple
+ * calls going on. If scmd is null just return. This is very
+ * unlikely, but could happed if the task is freed by an abort.
+ * If nbufs is invalid warn but ignore the error. Last if the
+ * trans_data is either null or the lenght is zero just blow
+ * off the operation and leak the memory buffer.
+ */
+ if (scmd == NULL)
+ return;
+
+ if (scmd->nbufs == 0xFF)
+ cmn_err(CE_WARN, "%s invalid buffer count %x", __func__,
+ scmd->nbufs);
+
+ if ((scmd->trans_data != NULL) && (scmd->trans_data_len != 0))
+ kmem_free(scmd->trans_data, scmd->trans_data_len);
+
+ scmd->trans_data = NULL; /* force panic later if re-entered */
+ scmd->trans_data_len = 0;
+ scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
+}
+
+void
+sbd_handle_ats_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
+ struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
+{
+ uint64_t laddr;
+ uint32_t buflen, iolen, miscompare_off;
+ int ndx;
+ sbd_status_t ret;
+
+ if (ATOMIC8_GET(scmd->nbufs) > 0) {
+ atomic_dec_8(&scmd->nbufs);
+ }
+
+ if (dbuf->db_xfer_status != STMF_SUCCESS) {
+ sbd_ats_remove_by_task(task);
+ sbd_ats_release_resources(task);
+ stmf_abort(STMF_QUEUE_TASK_ABORT, task,
+ dbuf->db_xfer_status, NULL);
+ return;
+ }
+
+ if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+ goto ATS_XFER_DONE;
+ }
+
+ /* if state is confused drop the command */
+ if ((scmd->trans_data == NULL) ||
+ ((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) ||
+ ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0)) {
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ return;
+ }
+
+ if (ATOMIC32_GET(scmd->len) != 0) {
+ /*
+ * Initiate the next port xfer to occur in parallel
+ * with writing this buf. A side effect of sbd_do_ats_xfer is
+ * it may set scmd_len to 0. This means all the data
+ * transfers have been started, not that they are done.
+ */
+ sbd_do_ats_xfer(task, scmd, NULL, 0);
+ }
+
+ /*
+ * move the most recent data transfer to the temporary buffer
+ * used for the compare and write function.
+ */
+ laddr = dbuf->db_relative_offset;
+ for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
+ (ndx < dbuf->db_sglist_length); ndx++) {
+ iolen = min(dbuf->db_data_size - buflen,
+ dbuf->db_sglist[ndx].seg_length);
+ if (iolen == 0)
+ break;
+ bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
+ iolen);
+ buflen += iolen;
+ laddr += (uint64_t)iolen;
+ }
+ task->task_nbytes_transferred += buflen;
+
+ATS_XFER_DONE:
+ if (ATOMIC32_GET(scmd->len) == 0 ||
+ scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+ stmf_free_dbuf(task, dbuf);
+ /*
+ * if this is not the last buffer to be transfered then exit
+ * and wait for the next buffer. Once nbufs is 0 then all the
+ * data has arrived and the compare can be done.
+ */
+ if (ATOMIC8_GET(scmd->nbufs) > 0)
+ return;
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+ sbd_ats_remove_by_task(task);
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_WRITE_ERROR);
+ } else {
+ ret = sbd_compare_and_write(task, scmd,
+ &miscompare_off);
+ sbd_ats_remove_by_task(task);
+ sbd_ats_release_resources(task);
+ if (ret != SBD_SUCCESS) {
+ if (ret != SBD_COMPARE_FAILED) {
+ stmf_scsilib_send_status(task,
+ STATUS_CHECK, STMF_SAA_WRITE_ERROR);
+ } else {
+ sbd_send_miscompare_status(task,
+ miscompare_off);
+ }
+ } else {
+ stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+ }
+ }
+ return;
+ }
+ sbd_do_ats_xfer(task, scmd, dbuf, dbuf_reusable);
+}
+
+void
+sbd_do_ats_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
+ struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
+{
+ uint32_t len;
+
+ if (ATOMIC32_GET(scmd->len) == 0) {
+ if (dbuf != NULL) {
+ stmf_free_dbuf(task, dbuf);
+ }
+ return;
+ }
+
+ if ((dbuf != NULL) &&
+ ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
+ /* free current dbuf and allocate a new one */
+ stmf_free_dbuf(task, dbuf);
+ dbuf = NULL;
+ }
+ if (dbuf == NULL) {
+ uint32_t maxsize, minsize, old_minsize;
+
+ maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
+ ATOMIC32_GET(scmd->len);
+ minsize = maxsize >> 2;
+ do {
+ old_minsize = minsize;
+ dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
+ } while ((dbuf == NULL) && (old_minsize > minsize) &&
+ (minsize >= 512));
+ if (dbuf == NULL) {
+ if (ATOMIC8_GET(scmd->nbufs) == 0) {
+ sbd_ats_remove_by_task(task);
+ sbd_ats_release_resources(task);
+ stmf_abort(STMF_QUEUE_TASK_ABORT, task,
+ STMF_ALLOC_FAILURE, NULL);
+ }
+ return;
+ }
+ }
+
+ len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
+ ATOMIC32_GET(scmd->len);
+
+ dbuf->db_relative_offset = scmd->current_ro;
+ dbuf->db_data_size = len;
+ dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
+ (void) stmf_xfer_data(task, dbuf, 0);
+ /*
+ * scmd->nbufs is the outstanding transfers
+ * scmd->len is the number of bytes that are remaing for requests
+ */
+ atomic_inc_8(&scmd->nbufs);
+ atomic_add_32(&scmd->len, -len);
+ scmd->current_ro += len;
+}
+
+void
+sbd_handle_ats(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
+{
+ sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
+ uint64_t addr, len;
+ sbd_cmd_t *scmd;
+ stmf_data_buf_t *dbuf;
+ uint8_t do_immediate_data = 0;
+ /* int ret; */
+
+ if (HardwareAcceleratedLocking == 0) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_INVALID_OPCODE);
+ return;
+ }
+
+ task->task_cmd_xfer_length = 0;
+ if (task->task_additional_flags &
+ TASK_AF_NO_EXPECTED_XFER_LENGTH) {
+ task->task_expected_xfer_length = 0;
+ }
+ if (sl->sl_flags & SL_WRITE_PROTECTED) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_WRITE_PROTECTED);
+ return;
+ }
+ addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
+ len = (uint64_t)task->task_cdb[13];
+
+ if ((task->task_cdb[1]) || (len > SBD_ATS_MAX_NBLKS)) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_INVALID_FIELD_IN_CDB);
+ return;
+ }
+ if (len == 0) {
+ stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+ return;
+ }
+
+ /*
+ * This can be called again. It will return the same handle again.
+ */
+ if (sbd_ats_handling_before_io(task, sl, addr, len) != SBD_SUCCESS) {
+ if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
+ stmf_scsilib_send_status(task, STATUS_BUSY, 0);
+ }
+ return;
+ }
+
+ addr <<= sl->sl_data_blocksize_shift;
+ len <<= sl->sl_data_blocksize_shift;
+
+ task->task_cmd_xfer_length = len << 1; /* actual amt of data is 2x */
+ if (task->task_additional_flags &
+ TASK_AF_NO_EXPECTED_XFER_LENGTH) {
+ task->task_expected_xfer_length = task->task_cmd_xfer_length;
+ }
+ if ((addr + len) > sl->sl_lu_size) {
+ sbd_ats_remove_by_task(task);
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_LBA_OUT_OF_RANGE);
+ return;
+ }
+
+ len <<= 1;
+
+ if (len != task->task_expected_xfer_length) {
+ sbd_ats_remove_by_task(task);
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_INVALID_FIELD_IN_CDB);
+ return;
+ }
+
+ if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
+ if (initial_dbuf->db_data_size > len) {
+ if (initial_dbuf->db_data_size >
+ task->task_expected_xfer_length) {
+ /* protocol error */
+ sbd_ats_remove_by_task(task);
+ stmf_abort(STMF_QUEUE_TASK_ABORT, task,
+ STMF_INVALID_ARG, NULL);
+ return;
+ }
+ ASSERT(len <= 0xFFFFFFFFull);
+ initial_dbuf->db_data_size = (uint32_t)len;
+ }
+ do_immediate_data = 1;
+ }
+ dbuf = initial_dbuf;
+
+ if (task->task_lu_private) {
+ scmd = (sbd_cmd_t *)task->task_lu_private;
+ } else {
+ scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
+ task->task_lu_private = scmd;
+ }
+
+ /* We dont set the ATS_RELATED flag here */
+ scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA;
+ scmd->cmd_type = SBD_CMD_SCSI_WRITE;
+ scmd->nbufs = 0;
+ ASSERT(len <= 0xFFFFFFFFull);
+ scmd->len = (uint32_t)len;
+ scmd->trans_data_len = (uint32_t)len;
+ scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
+ scmd->current_ro = 0;
+
+ if (do_immediate_data) {
+ /*
+ * Account for data passed in this write command
+ */
+ (void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
+ atomic_add_32(&scmd->len, -dbuf->db_data_size);
+ scmd->current_ro += dbuf->db_data_size;
+ dbuf->db_xfer_status = STMF_SUCCESS;
+ sbd_handle_ats_xfer_completion(task, scmd, dbuf, 0);
+ } else {
+ sbd_do_ats_xfer(task, scmd, dbuf, 0);
+ }
+}
+
+/*
+ * SCSI Copy Manager
+ *
+ * SCSI copy manager is the state machine which implements
+ * SCSI extended copy functionality (SPC). There is one
+ * cpmgr instance per extended copy command.
+ *
+ * Exported block-copy functions:
+ * cpmgr_create() - Creates the state machine.
+ * cpmgr_destroy() - Cleans up a completed cpmgr.
+ * cpmgr_run() - Performs time bound copy.
+ * cpmgr_abort() - Aborts a cpmgr(if not already completed).
+ * cpmgr_done() - Tests if the copy is done.
+ */
+
+static void cpmgr_completion_cleanup(cpmgr_t *cm);
+int sbd_check_reservation_conflict(sbd_lu_t *sl, scsi_task_t *task);
+
+static uint8_t sbd_recv_copy_results_op_params[] = {
+ 0, 0, 0, 42, 1, 0, 0, 0,
+ 0, 2, 0, 1, 0, 0, 0xFF, 0xFF, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0xFF, 0xFF, 0, 9, 0, 0, 0, 0, 0,
+ 2, 2, 0xE4
+};
+
+cpmgr_handle_t
+cpmgr_create(scsi_task_t *task, uint8_t *params)
+{
+ cpmgr_t *cm = NULL;
+ uint8_t *p;
+ uint32_t plist_len;
+ uint32_t dbl;
+ int i;
+ uint16_t tdlen;
+ uint16_t n;
+
+ cm = kmem_zalloc(sizeof (*cm), KM_NOSLEEP);
+ if (cm == NULL)
+ return (CPMGR_INVALID_HANDLE);
+
+ cm->cm_task = task;
+ p = task->task_cdb;
+ plist_len = READ_SCSI32(&p[10], uint32_t);
+
+ /*
+ * In case of error. Otherwise we will change this to CM_COPYING.
+ */
+ cm->cm_state = CM_COMPLETE;
+
+ if (plist_len == 0) {
+ cm->cm_status = 0;
+ goto cpmgr_create_done;
+ }
+
+ if (plist_len < CPMGR_PARAM_HDR_LEN) {
+ cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR;
+ goto cpmgr_create_done;
+ } else if ((params[0] != 0) || ((params[1] & 0x18) != 0x18)) {
+ /*
+ * Current implementation does not allow the use
+ * of list ID field.
+ */
+ cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+ goto cpmgr_create_done;
+ }
+ /* No inline data either */
+ if (*((uint32_t *)(&params[12])) != 0) {
+ cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+ goto cpmgr_create_done;
+ }
+
+ tdlen = READ_SCSI16(&params[2], uint16_t);
+ if ((tdlen == 0) || (tdlen % CPMGR_TARGET_DESCRIPTOR_SIZE) ||
+ (plist_len < (CPMGR_PARAM_HDR_LEN + tdlen))) {
+ cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR;
+ goto cpmgr_create_done;
+ }
+ cm->cm_td_count = tdlen / CPMGR_TARGET_DESCRIPTOR_SIZE;
+ if (cm->cm_td_count > CPMGR_MAX_TARGET_DESCRIPTORS) {
+ cm->cm_status = CPMGR_TOO_MANY_TARGET_DESCRIPTORS;
+ goto cpmgr_create_done;
+ }
+ if (plist_len != (CPMGR_PARAM_HDR_LEN + tdlen +
+ CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE)) {
+ cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR;
+ goto cpmgr_create_done;
+ }
+ for (i = 0; i < cm->cm_td_count; i++) {
+ p = params + CPMGR_PARAM_HDR_LEN;
+ p += i * CPMGR_TARGET_DESCRIPTOR_SIZE;
+ if ((p[0] != CPMGR_IDENT_TARGET_DESCRIPTOR) ||
+ ((p[5] & 0x30) != 0) || (p[7] != 16)) {
+ cm->cm_status = CPMGR_UNSUPPORTED_TARGET_DESCRIPTOR;
+ goto cpmgr_create_done;
+ }
+ /*
+ * stmf should be able to find this LU and lock it. Also
+ * make sure that is indeed a sbd lu.
+ */
+ if (((cm->cm_tds[i].td_lu =
+ stmf_check_and_hold_lu(task, &p[8])) == NULL) ||
+ (!sbd_is_valid_lu(cm->cm_tds[i].td_lu))) {
+ cm->cm_status = CPMGR_COPY_TARGET_NOT_REACHABLE;
+ goto cpmgr_create_done;
+ }
+ dbl = p[29];
+ dbl <<= 8;
+ dbl |= p[30];
+ dbl <<= 8;
+ dbl |= p[31];
+ cm->cm_tds[i].td_disk_block_len = dbl;
+ cm->cm_tds[i].td_lbasize_shift =
+ sbd_get_lbasize_shift(cm->cm_tds[i].td_lu);
+ }
+ /* p now points to segment descriptor */
+ p += CPMGR_TARGET_DESCRIPTOR_SIZE;
+
+ if (p[0] != CPMGR_B2B_SEGMENT_DESCRIPTOR) {
+ cm->cm_status = CPMGR_UNSUPPORTED_SEGMENT_DESCRIPTOR;
+ goto cpmgr_create_done;
+ }
+ n = READ_SCSI16(&p[2], uint16_t);
+ if (n != (CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE - 4)) {
+ cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+ goto cpmgr_create_done;
+ }
+
+ n = READ_SCSI16(&p[4], uint16_t);
+ if (n >= cm->cm_td_count) {
+ cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+ goto cpmgr_create_done;
+ }
+ cm->cm_src_td_ndx = n;
+
+ n = READ_SCSI16(&p[6], uint16_t);
+ if (n >= cm->cm_td_count) {
+ cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST;
+ goto cpmgr_create_done;
+ }
+ cm->cm_dst_td_ndx = n;
+
+ cm->cm_copy_size = READ_SCSI16(&p[10], uint64_t);
+ cm->cm_copy_size *= (uint64_t)(cm->cm_tds[(p[1] & 2) ?
+ cm->cm_dst_td_ndx : cm->cm_src_td_ndx].td_disk_block_len);
+ cm->cm_src_offset = (READ_SCSI64(&p[12], uint64_t)) <<
+ cm->cm_tds[cm->cm_src_td_ndx].td_lbasize_shift;
+ cm->cm_dst_offset = (READ_SCSI64(&p[20], uint64_t)) <<
+ cm->cm_tds[cm->cm_dst_td_ndx].td_lbasize_shift;
+
+ /* Allocate the xfer buffer. */
+ cm->cm_xfer_buf = kmem_alloc(CPMGR_XFER_BUF_SIZE, KM_NOSLEEP);
+ if (cm->cm_xfer_buf == NULL) {
+ cm->cm_status = CPMGR_INSUFFICIENT_RESOURCES;
+ goto cpmgr_create_done;
+ }
+
+ /*
+ * No need to check block limits. cpmgr_run() will
+ * take care of that.
+ */
+
+ /* All checks passed */
+ cm->cm_state = CM_COPYING;
+
+cpmgr_create_done:
+ if (cm->cm_state == CM_COMPLETE) {
+ cpmgr_completion_cleanup(cm);
+ }
+ return (cm);
+}
+
+void
+cpmgr_destroy(cpmgr_handle_t h)
+{
+ cpmgr_t *cm = (cpmgr_t *)h;
+
+ ASSERT(cm->cm_state == CM_COMPLETE);
+ kmem_free(cm, sizeof (*cm));
+}
+
+static void
+cpmgr_completion_cleanup(cpmgr_t *cm)
+{
+ int i;
+
+ for (i = 0; i < cm->cm_td_count; i++) {
+ if (cm->cm_tds[i].td_lu) {
+ stmf_release_lu(cm->cm_tds[i].td_lu);
+ cm->cm_tds[i].td_lu = NULL;
+ }
+ }
+ if (cm->cm_xfer_buf) {
+ kmem_free(cm->cm_xfer_buf, CPMGR_XFER_BUF_SIZE);
+ cm->cm_xfer_buf = NULL;
+ }
+}
+
+void
+cpmgr_run(cpmgr_t *cm, clock_t preemption_point)
+{
+ stmf_lu_t *lu;
+ sbd_lu_t *src_slu, *dst_slu;
+ uint64_t xfer_size, start, end;
+ sbd_status_t ret;
+
+ /*
+ * XXX: Handle reservations and read-only LU here.
+ */
+ ASSERT(cm->cm_state == CM_COPYING);
+ lu = cm->cm_tds[cm->cm_src_td_ndx].td_lu;
+ src_slu = (sbd_lu_t *)lu->lu_provider_private;
+ if (sbd_check_reservation_conflict(src_slu, cm->cm_task)) {
+ cpmgr_abort(cm, CPMGR_RESERVATION_CONFLICT);
+ return;
+ }
+
+ lu = cm->cm_tds[cm->cm_dst_td_ndx].td_lu;
+ dst_slu = (sbd_lu_t *)lu->lu_provider_private;
+ if (sbd_check_reservation_conflict(dst_slu, cm->cm_task)) {
+ cpmgr_abort(cm, CPMGR_RESERVATION_CONFLICT);
+ return;
+ }
+ if (dst_slu->sl_flags & SL_WRITE_PROTECTED) {
+ cpmgr_abort(cm, STMF_SAA_WRITE_PROTECTED);
+ return;
+ }
+
+ while (cm->cm_size_done < cm->cm_copy_size) {
+ xfer_size = ((cm->cm_copy_size - cm->cm_size_done) >
+ CPMGR_XFER_BUF_SIZE) ? CPMGR_XFER_BUF_SIZE :
+ (cm->cm_copy_size - cm->cm_size_done);
+ start = cm->cm_src_offset + cm->cm_size_done;
+ ret = sbd_data_read(src_slu, cm->cm_task, start, xfer_size,
+ cm->cm_xfer_buf);
+ if (ret != SBD_SUCCESS) {
+ if (ret == SBD_IO_PAST_EOF) {
+ cpmgr_abort(cm, CPMGR_LBA_OUT_OF_RANGE);
+ } else {
+ cpmgr_abort(cm,
+ CPMGR_THIRD_PARTY_DEVICE_FAILURE);
+ }
+ break;
+ }
+ end = cm->cm_dst_offset + cm->cm_size_done;
+ ret = sbd_data_write(dst_slu, cm->cm_task, end, xfer_size,
+ cm->cm_xfer_buf);
+ if (ret != SBD_SUCCESS) {
+ if (ret == SBD_IO_PAST_EOF) {
+ cpmgr_abort(cm, CPMGR_LBA_OUT_OF_RANGE);
+ } else {
+ cpmgr_abort(cm,
+ CPMGR_THIRD_PARTY_DEVICE_FAILURE);
+ }
+ break;
+ }
+ cm->cm_size_done += xfer_size;
+ if (ddi_get_lbolt() >= preemption_point)
+ break;
+ }
+ if (cm->cm_size_done == cm->cm_copy_size) {
+ cm->cm_state = CM_COMPLETE;
+ cm->cm_status = 0;
+ cpmgr_completion_cleanup(cm);
+ }
+}
+
+void
+cpmgr_abort(cpmgr_t *cm, uint32_t s)
+{
+ if (cm->cm_state == CM_COPYING) {
+ cm->cm_state = CM_COMPLETE;
+ cm->cm_status = s;
+ cpmgr_completion_cleanup(cm);
+ }
+}
+
+void
+sbd_handle_xcopy(scsi_task_t *task, stmf_data_buf_t *dbuf)
+{
+ uint32_t cmd_xfer_len;
+
+ if (HardwareAcceleratedMove == 0) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_INVALID_OPCODE);
+ return;
+ }
+
+ cmd_xfer_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
+
+ if (cmd_xfer_len == 0) {
+ task->task_cmd_xfer_length = 0;
+ if (task->task_additional_flags &
+ TASK_AF_NO_EXPECTED_XFER_LENGTH) {
+ task->task_expected_xfer_length = 0;
+ }
+ stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+ return;
+ }
+
+ sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
+}
+
+void
+sbd_handle_xcopy_xfer(scsi_task_t *task, uint8_t *buf)
+{
+ cpmgr_handle_t h;
+ uint32_t s;
+ clock_t tic, end;
+
+ /*
+ * No need to pass buf size. Its taken from cdb.
+ */
+ h = cpmgr_create(task, buf);
+ if (h == CPMGR_INVALID_HANDLE) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ CPMGR_INSUFFICIENT_RESOURCES);
+ return;
+ }
+ tic = drv_usectohz(1000000);
+ end = ddi_get_lbolt() + (CPMGR_DEFAULT_TIMEOUT * tic);
+ while (!cpmgr_done(h)) {
+ if (stmf_is_task_being_aborted(task) || (ddi_get_lbolt() > end))
+ cpmgr_abort(h, CPMGR_THIRD_PARTY_DEVICE_FAILURE);
+ else
+ cpmgr_run(h, ddi_get_lbolt() + tic);
+ }
+ s = cpmgr_status(h);
+ if (s) {
+ if (s == CPMGR_RESERVATION_CONFLICT) {
+ stmf_scsilib_send_status(task,
+ STATUS_RESERVATION_CONFLICT, 0);
+ } else {
+ stmf_scsilib_send_status(task, STATUS_CHECK, s);
+ }
+ } else {
+ stmf_scsilib_send_status(task, STATUS_GOOD, 0);
+ }
+ cpmgr_destroy(h);
+}
+
+void
+sbd_handle_recv_copy_results(struct scsi_task *task,
+ struct stmf_data_buf *initial_dbuf)
+{
+ uint32_t cdb_len;
+
+ cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
+ if ((task->task_cdb[1] & 0x1F) != 3) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_INVALID_FIELD_IN_CDB);
+ return;
+ }
+ sbd_handle_short_read_transfers(task, initial_dbuf,
+ sbd_recv_copy_results_op_params, cdb_len,
+ sizeof (sbd_recv_copy_results_op_params));
+}
diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.h b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.h
new file mode 100644
index 0000000000..549b17aacf
--- /dev/null
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.h
@@ -0,0 +1,157 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ */
+
+#ifndef _ATS_COPY_MGR_H
+#define _ATS_COPY_MGR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ATS structures and functions. */
+
+typedef struct ats_state_s {
+ /*
+ * We actually dont allow I/O which conflicts with current ats.
+ * The conflicting_rw_count is for those I/Os which are currently
+ * running and are potentally conflicting.
+ */
+ list_node_t as_next;
+ uint8_t as_cmd;
+ uint32_t as_conflicting_rw_count;
+ uint32_t as_non_conflicting_rw_count;
+ uint32_t as_ats_gen_ndx;
+ uint32_t as_cur_ats_handle;
+ uint64_t as_cur_ats_lba;
+ uint64_t as_cur_ats_lba_end;
+ uint64_t as_cur_ats_len; /* in nblks */
+ struct scsi_task *as_cur_ats_task;
+} ats_state_t;
+
+/* Since we're technically part of stmf_sbd.h, use some defines here. */
+#define sl_conflicting_rw_count sl_ats_state.as_conflicting_rw_count
+#define sl_non_conflicting_rw_count sl_ats_state.as_non_conflicting_rw_count
+#define sl_ats_gen_ndx sl_ats_state.as_ats_gen_ndx
+#define sl_cur_ats_handle sl_ats_state.as_cur_ats_handle
+#define sl_cur_ats_lba sl_ats_state.as_cur_ats_lba
+#define sl_cur_ats_len sl_ats_state.as_cur_ats_len
+#define sl_cur_ats_task sl_ats_state.as_cur_ats_task
+
+struct sbd_cmd;
+struct sbd_lu;
+
+void sbd_handle_ats_xfer_completion(struct scsi_task *, struct sbd_cmd *,
+ struct stmf_data_buf *, uint8_t);
+void sbd_do_ats_xfer(struct scsi_task *, struct sbd_cmd *,
+ struct stmf_data_buf *, uint8_t);
+void sbd_handle_ats(scsi_task_t *, struct stmf_data_buf *);
+void sbd_handle_recv_copy_results(struct scsi_task *, struct stmf_data_buf *);
+void sbd_free_ats_handle(struct scsi_task *, struct sbd_cmd *);
+void sbd_handle_ats(scsi_task_t *, struct stmf_data_buf *);
+uint8_t sbd_ats_max_nblks(void);
+void sbd_ats_remove_by_task(scsi_task_t *);
+sbd_status_t sbd_ats_handling_before_io(scsi_task_t *task, struct sbd_lu *sl,
+ uint64_t lba, uint64_t count);
+
+/* Block-copy structures and functions. */
+
+struct scsi_task;
+typedef void *cpmgr_handle_t;
+
+#define CPMGR_INVALID_HANDLE ((cpmgr_handle_t)NULL)
+
+#define CPMGR_DEFAULT_TIMEOUT 30
+
+#define CPMGR_PARAM_HDR_LEN 16
+#define CPMGR_IDENT_TARGET_DESCRIPTOR 0xE4
+#define CPMGR_MAX_TARGET_DESCRIPTORS 2
+#define CPMGR_TARGET_DESCRIPTOR_SIZE 32
+
+#define CPMGR_B2B_SEGMENT_DESCRIPTOR 2
+#define CPMGR_MAX_SEGMENT_DESCRIPTORS 1
+#define CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE 28
+
+/*
+ * SCSI errors before copy starts.
+ */
+#define CPMGR_PARAM_LIST_LEN_ERROR 0x051A00
+#define CPMGR_INVALID_FIELD_IN_PARAM_LIST 0x052600
+#define CPMGR_TOO_MANY_TARGET_DESCRIPTORS 0x052606
+#define CPMGR_UNSUPPORTED_TARGET_DESCRIPTOR 0x052607
+#define CPMGR_TOO_MANY_SEGMENT_DESCRIPTORS 0x052608
+#define CPMGR_UNSUPPORTED_SEGMENT_DESCRIPTOR 0x052609
+#define CPMGR_COPY_TARGET_NOT_REACHABLE 0x050D02
+#define CPMGR_INSUFFICIENT_RESOURCES 0x0B5503
+
+/*
+ * SCSI errors after copy has started.
+ */
+#define CPMGR_LBA_OUT_OF_RANGE 0x0A2100
+#define CPMGR_THIRD_PARTY_DEVICE_FAILURE 0x0A0D01
+
+/*
+ * SCSI errors which dont result in STATUS_CHECK.
+ * Use and invalid sense key to mark these.
+ */
+#define CPMGR_RESERVATION_CONFLICT 0xF00001
+
+typedef enum cm_state {
+ CM_STARTING = 0,
+ CM_COPYING,
+ CM_COMPLETE
+} cm_state_t;
+
+#define CPMGR_XFER_BUF_SIZE (128 * 1024)
+
+typedef struct cm_target_desc {
+ stmf_lu_t *td_lu;
+ uint32_t td_disk_block_len;
+ uint8_t td_lbasize_shift;
+} cm_target_desc_t;
+
+/*
+ * Current implementation supports 2 target descriptors (identification type)
+ * for src and dst and one segment descriptor (block -> block).
+ */
+typedef struct cpmgr {
+ cm_target_desc_t cm_tds[CPMGR_MAX_TARGET_DESCRIPTORS];
+ uint8_t cm_td_count;
+ uint16_t cm_src_td_ndx;
+ uint16_t cm_dst_td_ndx;
+ cm_state_t cm_state;
+ uint32_t cm_status;
+ uint64_t cm_src_offset;
+ uint64_t cm_dst_offset;
+ uint64_t cm_copy_size;
+ uint64_t cm_size_done;
+ void *cm_xfer_buf;
+ scsi_task_t *cm_task;
+} cpmgr_t;
+
+#define cpmgr_done(cm) (((cpmgr_t *)(cm))->cm_state == CM_COMPLETE)
+#define cpmgr_status(cm) (((cpmgr_t *)(cm))->cm_status)
+
+cpmgr_handle_t cpmgr_create(struct scsi_task *task, uint8_t *params);
+void cpmgr_destroy(cpmgr_handle_t h);
+void cpmgr_run(cpmgr_t *cm, clock_t preemption_point);
+void cpmgr_abort(cpmgr_t *cm, uint32_t s);
+void sbd_handle_xcopy_xfer(scsi_task_t *, uint8_t *);
+void sbd_handle_xcopy(scsi_task_t *, stmf_data_buf_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ATS_COPY_MGR_H */
diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c
index 2d1f443dda..1b66e80a22 100644
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c
@@ -25,8 +25,8 @@
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
-#include <sys/sysmacros.h>
#include <sys/conf.h>
+#include <sys/list.h>
#include <sys/file.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
@@ -56,6 +56,10 @@
extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
extern void sbd_pgr_reset(sbd_lu_t *sl);
+extern int HardwareAcceleratedLocking;
+extern int HardwareAcceleratedInit;
+extern int HardwareAcceleratedMove;
+extern uint8_t sbd_unmap_enable;
static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
void **result);
@@ -109,6 +113,7 @@ static sbd_lu_t *sbd_lu_list = NULL;
static kmutex_t sbd_lock;
static dev_info_t *sbd_dip;
static uint32_t sbd_lu_count = 0;
+uint8_t sbd_enable_unmap_sync = 0;
/* Global property settings for the logical unit */
char sbd_vendor_id[] = "SUN ";
@@ -155,7 +160,7 @@ static struct dev_ops sbd_ops = {
NULL /* power */
};
-#define SBD_NAME "COMSTAR SBD"
+#define SBD_NAME "COMSTAR SBD+ "
static struct modldrv modldrv = {
&mod_driverops,
@@ -194,6 +199,14 @@ _init(void)
}
mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
+
+ if (HardwareAcceleratedLocking == 0)
+ cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled");
+ if (HardwareAcceleratedMove == 0)
+ cmn_err(CE_NOTE, "HardwareAcceleratedMove Disabled");
+ if (HardwareAcceleratedInit == 0)
+ cmn_err(CE_NOTE, "HardwareAcceleratedInit Disabled");
+
return (0);
}
@@ -272,6 +285,8 @@ sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
static int
sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
+ char *prop;
+
switch (cmd) {
case DDI_ATTACH:
sbd_dip = dip;
@@ -281,6 +296,23 @@ sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
break;
}
ddi_report_dev(dip);
+
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) {
+ (void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, "");
+ ddi_prop_free(prop);
+ }
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) {
+ (void) snprintf(sbd_product_id, 17, "%s%16s", prop, "");
+ ddi_prop_free(prop);
+ }
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) {
+ (void) snprintf(sbd_revision, 5, "%s%4s", prop, "");
+ ddi_prop_free(prop);
+ }
+
return (DDI_SUCCESS);
}
@@ -1396,7 +1428,10 @@ sbd_write_lu_info(sbd_lu_t *sl)
static void
do_unmap_setup(sbd_lu_t *sl)
{
- ASSERT((sl->sl_flags & SL_UNMAP_ENABLED) == 0);
+ if (sbd_unmap_enable == 0) {
+ sl->sl_flags &= ~(SL_UNMAP_ENABLED);
+ return;
+ }
if ((sl->sl_flags & SL_ZFS_META) == 0)
return; /* No UNMAP for you. */
@@ -1441,8 +1476,10 @@ sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret)
lu->lu_send_status_done = sbd_send_status_done;
lu->lu_task_free = sbd_task_free;
lu->lu_abort = sbd_abort;
+ lu->lu_task_poll = sbd_task_poll;
lu->lu_dbuf_free = sbd_dbuf_free;
lu->lu_ctl = sbd_ctl;
+ lu->lu_task_done = sbd_ats_remove_by_task;
lu->lu_info = sbd_info;
sl->sl_state = STMF_STATE_OFFLINE;
@@ -1455,6 +1492,12 @@ sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret)
return (EIO);
}
+ /*
+ * setup the ATS (compare and write) lists to handle multiple
+ * ATS commands simultaneously
+ */
+ list_create(&sl->sl_ats_io_list, sizeof (ats_state_t),
+ offsetof(ats_state_t, as_next));
*err_ret = 0;
return (0);
}
@@ -1561,6 +1604,7 @@ odf_over_open:
sl->sl_lu_size = vattr.va_size;
}
}
+
if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
*err_ret = SBD_RET_FILE_SIZE_ERROR;
ret = EINVAL;
@@ -1837,7 +1881,7 @@ sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
sl->sl_flags |= SL_WRITE_PROTECTED;
}
if (slu->slu_blksize_valid) {
- if (!ISP2(slu->slu_blksize) ||
+ if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
(slu->slu_blksize > (32 * 1024)) ||
(slu->slu_blksize == 0)) {
*err_ret = SBD_RET_INVALID_BLKSIZE;
@@ -2997,8 +3041,10 @@ sbd_status_t
sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
uint64_t offset, uint64_t size, uint8_t *buf)
{
- int ret;
+ int ret, ioflag = 0;
long resid;
+ hrtime_t xfer_start;
+ uint8_t op = task->task_cdb[0];
if ((offset + size) > sl->sl_lu_size) {
return (SBD_IO_PAST_EOF);
@@ -3006,6 +3052,16 @@ sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
offset += sl->sl_data_offset;
+ /*
+ * Check to see if the command is READ(10), READ(12), or READ(16).
+ * If it is then check for bit 3 being set to indicate if Forced
+ * Unit Access is being requested. If so, the FSYNC flag will be set
+ * on the read.
+ */
+ if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
+ (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) {
+ ioflag = FSYNC;
+ }
if ((offset + size) > sl->sl_data_readable_size) {
uint64_t store_end;
if (offset > sl->sl_data_readable_size) {
@@ -3017,6 +3073,7 @@ sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
size = store_end;
}
+ xfer_start = gethrtime();
DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
uint8_t *, buf, uint64_t, size, uint64_t, offset,
scsi_task_t *, task);
@@ -3032,11 +3089,14 @@ sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
rw_exit(&sl->sl_access_state_lock);
return (SBD_FAILURE);
}
+
ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
- (offset_t)offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, CRED(),
+ (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
&resid);
rw_exit(&sl->sl_access_state_lock);
+ stmf_lu_xfer_done(task, B_TRUE /* read */,
+ (gethrtime() - xfer_start));
DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
uint8_t *, buf, uint64_t, size, uint64_t, offset,
int, ret, scsi_task_t *, task);
@@ -3059,6 +3119,9 @@ sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
long resid;
sbd_status_t sret = SBD_SUCCESS;
int ioflag;
+ hrtime_t xfer_start;
+ uint8_t op = task->task_cdb[0];
+ boolean_t fua_bit = B_FALSE;
if ((offset + size) > sl->sl_lu_size) {
return (SBD_IO_PAST_EOF);
@@ -3066,13 +3129,24 @@ sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
offset += sl->sl_data_offset;
- if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
- (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
+ /*
+ * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
+ * If it is then check for bit 3 being set to indicate if Forced
+ * Unit Access is being requested. If so, the FSYNC flag will be set
+ * on the write.
+ */
+ if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
+ (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
+ fua_bit = B_TRUE;
+ }
+ if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
+ (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) {
ioflag = FSYNC;
} else {
ioflag = 0;
}
+ xfer_start = gethrtime();
DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
uint8_t *, buf, uint64_t, size, uint64_t, offset,
scsi_task_t *, task);
@@ -3093,6 +3167,8 @@ sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
&resid);
rw_exit(&sl->sl_access_state_lock);
+ stmf_lu_xfer_done(task, B_FALSE /* write */,
+ (gethrtime() - xfer_start));
DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
uint8_t *, buf, uint64_t, size, uint64_t, offset,
int, ret, scsi_task_t *, task);
@@ -3103,7 +3179,6 @@ sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
sret = sbd_flush_data_cache(sl, 1);
}
over_sl_data_write:
-
if ((ret || resid) || (sret != SBD_SUCCESS)) {
return (SBD_FAILURE);
} else if ((offset + size) > sl->sl_data_readable_size) {
@@ -3639,7 +3714,8 @@ again:
}
}
out:
- nvlist_free(nv);
+ if (nv != NULL)
+ nvlist_free(nv);
kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
kmem_free(zc, sizeof (zfs_cmd_t));
(void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
@@ -3696,7 +3772,7 @@ int
sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
{
vnode_t *vp;
- int unused;
+ int unused, ret;
/* Nothing to do */
if (dfl->dfl_num_exts == 0)
@@ -3717,6 +3793,29 @@ sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
return (EIO);
}
- return (VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
- &unused, NULL));
+ ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
+ &unused, NULL);
+
+ return (ret);
+}
+
+/*
+ * Check if this lu belongs to sbd or some other lu
+ * provider. A simple check for one of the module
+ * entry points is sufficient.
+ */
+int
+sbd_is_valid_lu(stmf_lu_t *lu)
+{
+ if (lu->lu_new_task == sbd_new_task)
+ return (1);
+ return (0);
+}
+
+uint8_t
+sbd_get_lbasize_shift(stmf_lu_t *lu)
+{
+ sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
+
+ return (sl->sl_data_blocksize_shift);
}
diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h
index e3d3b2eb6f..8b9ec3646e 100644
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SBD_IMPL_H
@@ -36,6 +36,11 @@ struct modify_lu_cmd;
struct sbd_lu_attr;
struct sbd_it_data;
+#define ATOMIC8_GET(val) ( \
+ (atomic_add_8_nv(&(val), 0)))
+#define ATOMIC32_GET(val) ( \
+ (atomic_add_32_nv(&(val), 0)))
+
/*
* sms endianess
*/
@@ -206,16 +211,36 @@ typedef struct sbd_cmd {
uint32_t len; /* len left */
uint32_t current_ro; /* running relative offset */
uint8_t *trans_data; /* Any transient data */
+ ats_state_t *ats_state;
+ uint32_t rsvd;
} sbd_cmd_t;
/*
* flags for sbd_cmd
+ *
+ * SBD_SCSI_CMD_ACTIVE means that a command is running. This is the time
+ * between the function sbd_new_task is called and either the command
+ * completion is sent (stmf_scsilib_send_status) or an abort is
+ * issued
+ *
+ * SBD_SCSI_CMD_ABORT_REQUESTED is when a command is being aborted. It may
+ * be set prior to the task being dispatched or anywhere in the process
+ * of the command.
+ *
+ * SBD_SCSI_CMD_XFER_FAIL is set when a command data buffer transfer was
+ * errored. Usually it leads to an abort.
+ *
+ * SBD_SCSI_CMD_SYNC_WRITE synchronous write being done.
+ *
+ * SBD_SCSI_CMD_TRANS_DATA means that a buffer has been allocated to
+ * be used for the transfer of data.
*/
#define SBD_SCSI_CMD_ACTIVE 0x01
#define SBD_SCSI_CMD_ABORT_REQUESTED 0x02
#define SBD_SCSI_CMD_XFER_FAIL 0x04
#define SBD_SCSI_CMD_SYNC_WRITE 0x08
#define SBD_SCSI_CMD_TRANS_DATA 0x10
+#define SBD_SCSI_CMD_ATS_RELATED 0x20
/*
* cmd types
@@ -269,7 +294,7 @@ typedef struct sbd_create_standby_lu {
typedef struct sbd_zvol_io {
uint64_t zvio_offset; /* offset into volume */
int zvio_flags; /* flags */
- void *zvio_dbp; /* array of dmu buffers */
+ void *zvio_dbp; /* array of dmu buffers */
void *zvio_abp; /* array of arc buffers */
uio_t *zvio_uio; /* for copy operations */
} sbd_zvol_io_t;
@@ -300,10 +325,13 @@ void sbd_send_status_done(struct scsi_task *task);
void sbd_task_free(struct scsi_task *task);
stmf_status_t sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg,
uint32_t flags);
+void sbd_task_poll(struct scsi_task *task);
void sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf);
void sbd_ctl(struct stmf_lu *lu, int cmd, void *arg);
stmf_status_t sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg,
uint8_t *buf, uint32_t *bufsizep);
+uint8_t sbd_get_lbasize_shift(stmf_lu_t *lu);
+int sbd_is_valid_lu(stmf_lu_t *lu);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c
index b8e74375e2..1edd7b4706 100644
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/atomic.h>
@@ -39,7 +40,7 @@
#define MAX_PGR_PARAM_LIST_LENGTH (256 * 1024)
-int sbd_pgr_reservation_conflict(scsi_task_t *);
+int sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl);
void sbd_pgr_reset(sbd_lu_t *);
void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
@@ -82,6 +83,7 @@ static void sbd_pgr_do_release(sbd_lu_t *, sbd_it_data_t *, uint8_t);
static void sbd_pgr_do_reserve(sbd_pgr_t *, sbd_pgr_key_t *, sbd_it_data_t *it,
stmf_scsi_session_t *, scsi_cdb_prout_t *);
+static boolean_t sbd_pgr_should_save(sbd_lu_t *);
extern sbd_status_t sbd_write_meta_section(sbd_lu_t *, sm_section_hdr_t *);
extern sbd_status_t sbd_read_meta_section(sbd_lu_t *, sm_section_hdr_t **,
uint16_t);
@@ -195,7 +197,7 @@ extern char sbd_ctoi(char c);
/* actions for PERSISTENT RESERVE OUT command */ \
(((cdb[0]) == SCMD_PERSISTENT_RESERVE_OUT) && ( \
(((cdb[1]) & 0x1F) == PR_OUT_REGISTER_AND_IGNORE_EXISTING_KEY) || \
- (((cdb[1]) & 0x1F) == PR_OUT_REGISTER))) || \
+ (((cdb[1]) & 0x1F) == PR_OUT_REGISTER))) || \
/* ----------------------- */ \
/* SBC-3 (rev 17) Table 3 */ \
/* ----------------------- */ \
@@ -280,7 +282,7 @@ sbd_status_t
sbd_pgr_meta_init(sbd_lu_t *slu)
{
sbd_pgr_info_t *spi = NULL;
- uint32_t sz;
+ uint32_t sz;
sbd_status_t ret;
sz = sizeof (sbd_pgr_info_t);
@@ -295,6 +297,22 @@ sbd_pgr_meta_init(sbd_lu_t *slu)
return (ret);
}
+/*
+ * Evaluate common cases where a PERSISTENT RESERVE OUT CDB handler should call
+ * sbd_pgr_meta_write().
+ */
+static boolean_t
+sbd_pgr_should_save(sbd_lu_t *slu)
+{
+ sbd_pgr_t *pgr = slu->sl_pgr;
+
+ if (stmf_is_pgr_aptpl_always() == B_TRUE ||
+ (pgr->pgr_flags & (SBD_PGR_APTPL)))
+ return (B_TRUE);
+ else
+ return (B_FALSE);
+}
+
sbd_status_t
sbd_pgr_meta_load(sbd_lu_t *slu)
{
@@ -323,7 +341,12 @@ sbd_pgr_meta_load(sbd_lu_t *slu)
}
pgr->pgr_flags = spi->pgr_flags;
- if (pgr->pgr_flags & SBD_PGR_APTPL) {
+ /*
+ * We reload APTPL reservations when:
+ * 1. Global override is enabled
+ * 2. APTPL was explicitly asserted in the PERSISTENT RESERVE OUT CDB
+ */
+ if (stmf_is_pgr_aptpl_always() || (pgr->pgr_flags & SBD_PGR_APTPL)) {
pgr->pgr_rsv_type = spi->pgr_rsv_type;
pgr->pgr_rsv_scope = spi->pgr_rsv_scope;
} else {
@@ -436,7 +459,7 @@ sbd_pgr_meta_write(sbd_lu_t *slu)
/* Calculate total pgr meta section size needed */
sz = sizeof (sbd_pgr_info_t);
- if (pgr->pgr_flags & SBD_PGR_APTPL) {
+ if ((pgr->pgr_flags & SBD_PGR_APTPL) || stmf_is_pgr_aptpl_always()) {
key = pgr->pgr_keylist;
while (key != NULL) {
sz = ALIGNED_TO_8BYTE_BOUNDARY(sz +
@@ -458,7 +481,7 @@ sbd_pgr_meta_write(sbd_lu_t *slu)
spi->pgr_sms_header.sms_id = SMS_ID_PGR_INFO;
spi->pgr_sms_header.sms_data_order = SMS_DATA_ORDER;
- if (pgr->pgr_flags & SBD_PGR_APTPL) {
+ if ((pgr->pgr_flags & SBD_PGR_APTPL) || stmf_is_pgr_aptpl_always()) {
uint8_t *ptr;
key = pgr->pgr_keylist;
sz = sizeof (sbd_pgr_info_t);
@@ -484,8 +507,12 @@ sbd_pgr_meta_write(sbd_lu_t *slu)
key = key->pgr_key_next;
}
}
-
+ rw_downgrade(&pgr->pgr_lock);
ret = sbd_write_meta_section(slu, (sm_section_hdr_t *)spi);
+ if (!rw_tryupgrade(&pgr->pgr_lock)) {
+ rw_exit(&pgr->pgr_lock);
+ rw_enter(&pgr->pgr_lock, RW_WRITER);
+ }
kmem_free(spi, totalsz);
if (ret != SBD_SUCCESS) {
sbd_pgr_key_t *tmp_list;
@@ -513,7 +540,7 @@ sbd_pgr_meta_write(sbd_lu_t *slu)
static sbd_pgr_key_t *
sbd_pgr_key_alloc(scsi_devid_desc_t *lptid, scsi_transport_id_t *rptid,
- int16_t lpt_len, int16_t rpt_len)
+ int16_t lpt_len, int16_t rpt_len)
{
sbd_pgr_key_t *key;
@@ -578,7 +605,8 @@ sbd_pgr_reset(sbd_lu_t *slu)
sbd_pgr_t *pgr = slu->sl_pgr;
rw_enter(&pgr->pgr_lock, RW_WRITER);
- if (!(pgr->pgr_flags & SBD_PGR_APTPL)) {
+ if (!(pgr->pgr_flags & SBD_PGR_APTPL) &&
+ stmf_is_pgr_aptpl_always() == B_FALSE) {
sbd_pgr_keylist_dealloc(slu);
pgr->pgr_PRgeneration = 0;
pgr->pgr_rsvholder = NULL;
@@ -630,7 +658,7 @@ sbd_pgr_remove_key(sbd_lu_t *slu, sbd_pgr_key_t *key)
*/
static uint32_t
sbd_pgr_remove_keys(sbd_lu_t *slu, sbd_it_data_t *my_it, sbd_pgr_key_t *my_key,
- uint64_t svc_key, boolean_t match)
+ uint64_t svc_key, boolean_t match)
{
sbd_pgr_t *pgr = slu->sl_pgr;
sbd_it_data_t *it;
@@ -708,7 +736,7 @@ sbd_pgr_set_pgr_check_flag(sbd_lu_t *slu, boolean_t registered)
static boolean_t
sbd_pgr_key_compare(sbd_pgr_key_t *key, scsi_devid_desc_t *lpt,
- stmf_remote_port_t *rpt)
+ stmf_remote_port_t *rpt)
{
scsi_devid_desc_t *id;
@@ -732,7 +760,7 @@ sbd_pgr_key_compare(sbd_pgr_key_t *key, scsi_devid_desc_t *lpt,
sbd_pgr_key_t *
sbd_pgr_key_registered(sbd_pgr_t *pgr, scsi_devid_desc_t *lpt,
- stmf_remote_port_t *rpt)
+ stmf_remote_port_t *rpt)
{
sbd_pgr_key_t *key;
@@ -812,9 +840,8 @@ sbd_pgr_initialize_it(scsi_task_t *task, sbd_it_data_t *it)
* Check for any PGR Reservation conflict. return 0 if access allowed
*/
int
-sbd_pgr_reservation_conflict(scsi_task_t *task)
+sbd_pgr_reservation_conflict(scsi_task_t *task, sbd_lu_t *slu)
{
- sbd_lu_t *slu = (sbd_lu_t *)task->task_lu->lu_provider_private;
sbd_pgr_t *pgr = slu->sl_pgr;
sbd_it_data_t *it = (sbd_it_data_t *)task->task_lu_itl_handle;
@@ -1129,7 +1156,7 @@ sbd_pgr_in_read_reservation(scsi_task_t *task, stmf_data_buf_t *initial_dbuf)
static void
sbd_pgr_in_report_capabilities(scsi_task_t *task,
- stmf_data_buf_t *initial_dbuf)
+ stmf_data_buf_t *initial_dbuf)
{
sbd_lu_t *slu = (sbd_lu_t *)task->task_lu->lu_provider_private;
sbd_pgr_t *pgr = slu->sl_pgr;
@@ -1167,14 +1194,14 @@ sbd_pgr_in_report_capabilities(scsi_task_t *task,
static void
sbd_pgr_in_read_full_status(scsi_task_t *task,
- stmf_data_buf_t *initial_dbuf)
+ stmf_data_buf_t *initial_dbuf)
{
sbd_lu_t *slu = (sbd_lu_t *)task->task_lu->lu_provider_private;
sbd_pgr_t *pgr = slu->sl_pgr;
sbd_pgr_key_t *key;
- scsi_prin_status_t *sts;
+ scsi_prin_status_t *sts;
scsi_prin_full_status_t *buf;
- uint32_t i, buf_size, cdb_len;
+ uint32_t i, buf_size, cdb_len;
uint8_t *offset;
ASSERT(task->task_cdb[0] == SCMD_PERSISTENT_RESERVE_IN);
@@ -1203,7 +1230,7 @@ sbd_pgr_in_read_full_status(scsi_task_t *task,
if ((pgr->pgr_flags & SBD_PGR_RSVD_ALL_REGISTRANTS) ||
(pgr->pgr_rsvholder && pgr->pgr_rsvholder == key)) {
sts->r_holder = 1;
- sts->type = pgr->pgr_rsv_type;
+ sts->type = pgr->pgr_rsv_type;
sts->scope = pgr->pgr_rsv_scope;
}
@@ -1386,7 +1413,7 @@ sbd_pgr_out_register(scsi_task_t *task, stmf_data_buf_t *dbuf)
sbd_pgr_reg_done:
- if (pgr->pgr_flags & SBD_PGR_APTPL || plist->aptpl) {
+ if (plist->aptpl || (sbd_pgr_should_save(slu) == B_TRUE)) {
if (plist->aptpl)
PGR_SET_FLAG(pgr->pgr_flags, SBD_PGR_APTPL);
else
@@ -1394,7 +1421,7 @@ sbd_pgr_reg_done:
if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
stmf_scsilib_send_status(task, STATUS_CHECK,
- STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+ STMF_SAA_INSUFFICIENT_REG_RESRCS);
return;
}
}
@@ -1405,7 +1432,7 @@ sbd_pgr_reg_done:
static sbd_pgr_key_t *
sbd_pgr_do_register(sbd_lu_t *slu, sbd_it_data_t *it, scsi_devid_desc_t *lpt,
- stmf_remote_port_t *rpt, uint8_t keyflag, uint64_t svc_key)
+ stmf_remote_port_t *rpt, uint8_t keyflag, uint64_t svc_key)
{
sbd_pgr_t *pgr = slu->sl_pgr;
sbd_pgr_key_t *key;
@@ -1491,10 +1518,10 @@ sbd_pgr_out_reserve(scsi_task_t *task)
/* In case there is no reservation exist */
} else {
sbd_pgr_do_reserve(pgr, key, it, ses, pr_out);
- if (pgr->pgr_flags & SBD_PGR_APTPL) {
+ if (sbd_pgr_should_save(slu) == B_TRUE) {
if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
stmf_scsilib_send_status(task, STATUS_CHECK,
- STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+ STMF_SAA_INSUFFICIENT_REG_RESRCS);
return;
}
}
@@ -1505,7 +1532,7 @@ sbd_pgr_out_reserve(scsi_task_t *task)
static void
sbd_pgr_do_reserve(sbd_pgr_t *pgr, sbd_pgr_key_t *key, sbd_it_data_t *it,
- stmf_scsi_session_t *ses, scsi_cdb_prout_t *pr_out)
+ stmf_scsi_session_t *ses, scsi_cdb_prout_t *pr_out)
{
scsi_devid_desc_t *lpt;
uint16_t lpt_len;
@@ -1548,6 +1575,11 @@ sbd_pgr_out_release(scsi_task_t *task)
ASSERT(key);
+ /*
+ * XXX this does not honor APTPL
+ * (i.e., changes made to a formerly-persistent reservation are not
+ * updated here!!!)
+ */
if (SBD_PGR_RSVD(pgr)) {
if (pgr->pgr_flags & SBD_PGR_RSVD_ALL_REGISTRANTS ||
pgr->pgr_rsvholder == key) {
@@ -1559,6 +1591,27 @@ sbd_pgr_out_release(scsi_task_t *task)
}
sbd_pgr_do_release(slu, it,
SBD_UA_RESERVATIONS_RELEASED);
+
+ /*
+ * XXX T10 SPC-3 5.6.10.2 says nothing about what to
+ * do in the event of a failure updating the
+ * PGR nvram store for a reservation associated with
+ * an APTPL-enabled (see SPC-3 5.6.4.1) I_T
+ * registration during a RELEASE service action.
+ *
+ * Technically, the CDB completed successfully, as per
+ * the spec, but at some point we may need to enter
+ * a recovery mode on the initiator(s) if we power cycle
+ * the target at the wrong instant...
+ */
+ if (sbd_pgr_should_save(slu) == B_TRUE) {
+ if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
+ stmf_scsilib_send_status(task,
+ STATUS_CHECK,
+ STMF_SAA_INSUFFICIENT_REG_RESRCS);
+ return;
+ }
+ }
}
}
stmf_scsilib_send_status(task, STATUS_GOOD, 0);
@@ -1602,10 +1655,10 @@ sbd_pgr_out_clear(scsi_task_t *task)
mutex_exit(&slu->sl_lock);
sbd_pgr_keylist_dealloc(slu);
sbd_pgr_set_ua_conditions(slu, it, SBD_UA_RESERVATIONS_PREEMPTED);
- if (pgr->pgr_flags & SBD_PGR_APTPL) {
+ if (sbd_pgr_should_save(slu) == B_TRUE) {
if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
stmf_scsilib_send_status(task, STATUS_CHECK,
- STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+ STMF_SAA_INSUFFICIENT_REG_RESRCS);
return;
}
}
@@ -1717,10 +1770,10 @@ sbd_pgr_out_preempt(scsi_task_t *task, stmf_data_buf_t *dbuf)
}
}
- if (pgr->pgr_flags & SBD_PGR_APTPL) {
+ if (sbd_pgr_should_save(slu) == B_TRUE) {
if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
stmf_scsilib_send_status(task, STATUS_CHECK,
- STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+ STMF_SAA_INSUFFICIENT_REG_RESRCS);
return;
}
}
@@ -1831,8 +1884,8 @@ sbd_pgr_out_register_and_move(scsi_task_t *task, stmf_data_buf_t *dbuf)
}
- /* Write to disk if currenty aptpl is set or given task is setting it */
- if (pgr->pgr_flags & SBD_PGR_APTPL || plist->aptpl) {
+ /* Write to disk if aptpl is currently set or this task is setting it */
+ if (plist->aptpl || (sbd_pgr_should_save(slu) == B_TRUE)) {
if (plist->aptpl)
PGR_SET_FLAG(pgr->pgr_flags, SBD_PGR_APTPL);
else
@@ -1840,7 +1893,7 @@ sbd_pgr_out_register_and_move(scsi_task_t *task, stmf_data_buf_t *dbuf)
if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) {
stmf_scsilib_send_status(task, STATUS_CHECK,
- STMF_SAA_INSUFFICIENT_REG_RESOURCES);
+ STMF_SAA_INSUFFICIENT_REG_RESRCS);
return;
}
}
@@ -1850,7 +1903,8 @@ sbd_pgr_out_register_and_move(scsi_task_t *task, stmf_data_buf_t *dbuf)
}
void
-sbd_pgr_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *my_it) {
+sbd_pgr_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *my_it)
+{
sbd_it_data_t *it;
rw_enter(&sl->sl_pgr->pgr_lock, RW_WRITER);
diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c
index a2e6fe74fa..8c6cac96c9 100644
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc. All rights reserved.
@@ -87,6 +88,21 @@
(((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
/* End of SCSI2_CONFLICT_FREE_CMDS */
+uint8_t HardwareAcceleratedInit = 1;
+uint8_t sbd_unmap_enable = 1; /* allow unmap by default */
+
+/*
+ * An /etc/system tunable which specifies the maximum number of LBAs supported
+ * in a single UNMAP operation. Default is 0x002000 blocks or 4MB in size.
+ */
+int stmf_sbd_unmap_max_nblks = 0x002000;
+
+/*
+ * An /etc/system tunable which indicates if READ ops can run on the standby
+ * path or return an error.
+ */
+int stmf_standby_fail_reads = 0;
+
stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
static void sbd_handle_sync_cache(struct scsi_task *task,
struct stmf_data_buf *initial_dbuf);
@@ -94,8 +110,6 @@ void sbd_handle_read_xfer_completion(struct scsi_task *task,
sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
stmf_data_buf_t *dbuf);
-void sbd_handle_short_write_transfers(scsi_task_t *task,
- stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size);
void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
uint32_t buflen);
void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
@@ -106,7 +120,7 @@ static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf,
static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
-extern int sbd_pgr_reservation_conflict(scsi_task_t *);
+extern int sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl);
extern void sbd_pgr_reset(sbd_lu_t *);
extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
@@ -144,7 +158,8 @@ sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
(task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
task->task_max_nbufs;
- len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : scmd->len;
+ len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ?
+ dbuf->db_buf_size : ATOMIC32_GET(scmd->len);
laddr = scmd->addr + scmd->current_ro;
for (buflen = 0, ndx = 0; (buflen < len) &&
@@ -168,12 +183,14 @@ sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
dbuf->db_data_size = buflen;
dbuf->db_flags = DB_DIRECTION_TO_RPORT;
(void) stmf_xfer_data(task, dbuf, 0);
- scmd->len -= buflen;
+ atomic_add_32(&scmd->len, -buflen);
scmd->current_ro += buflen;
- if (scmd->len && (scmd->nbufs < bufs_to_take)) {
+ if (ATOMIC32_GET(scmd->len) &&
+ (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
uint32_t maxsize, minsize, old_minsize;
- maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
+ maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128 * 1024 :
+ ATOMIC32_GET(scmd->len);
minsize = maxsize >> 2;
do {
/*
@@ -188,7 +205,7 @@ sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
if (dbuf == NULL) {
return;
}
- scmd->nbufs++;
+ atomic_inc_8(&scmd->nbufs);
sbd_do_read_xfer(task, scmd, dbuf);
}
}
@@ -219,6 +236,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
uint_t nblks;
uint64_t blksize = sl->sl_blksize;
size_t db_private_sz;
+ hrtime_t xfer_start;
uintptr_t pad;
ASSERT(rw_read_held(&sl->sl_access_state_lock));
@@ -260,14 +278,15 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
first_len = 0;
}
- while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
+ while (ATOMIC32_GET(scmd->len) &&
+ ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
- xfer_len = MIN(max_len, scmd->len);
+ xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
if (first_len) {
xfer_len = MIN(xfer_len, first_len);
first_len = 0;
}
- if (scmd->len == xfer_len) {
+ if (ATOMIC32_GET(scmd->len) == xfer_len) {
final_xfer = 1;
} else {
/*
@@ -334,12 +353,15 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
* Accounting for start of read.
* Note there is no buffer address for the probe yet.
*/
+ xfer_start = gethrtime();
DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
uint8_t *, NULL, uint64_t, xfer_len,
uint64_t, offset, scsi_task_t *, task);
ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
+ stmf_lu_xfer_done(task, B_TRUE /* read */,
+ (gethrtime() - xfer_start));
DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
uint8_t *, NULL, uint64_t, xfer_len,
uint64_t, offset, int, ret, scsi_task_t *, task);
@@ -349,9 +371,10 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
* Read failure from the backend.
*/
stmf_free(dbuf);
- if (scmd->nbufs == 0) {
+ if (ATOMIC8_GET(scmd->nbufs) == 0) {
/* nothing queued, just finish */
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_READ_ERROR);
rw_exit(&sl->sl_access_state_lock);
@@ -362,7 +385,6 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
return;
}
-
/*
* Allow PP to do setup
*/
@@ -376,7 +398,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
*/
sbd_zvol_rele_read_bufs(sl, dbuf);
stmf_free(dbuf);
- if (scmd->nbufs > 0) {
+ if (ATOMIC8_GET(scmd->nbufs) > 0) {
/* completion of previous dbuf will retry */
return;
}
@@ -384,6 +406,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
* Done with this command.
*/
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
if (first_xfer)
stmf_scsilib_send_status(task, STATUS_QFULL, 0);
else
@@ -395,7 +418,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
/*
* dbuf is now queued on task
*/
- scmd->nbufs++;
+ atomic_inc_8(&scmd->nbufs);
/* XXX leave this in for FW? */
DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
@@ -417,8 +440,8 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
sbd_zvol_rele_read_bufs(sl, dbuf);
stmf_teardown_dbuf(task, dbuf);
stmf_free(dbuf);
- scmd->nbufs--;
- if (scmd->nbufs > 0) {
+ atomic_dec_8(&scmd->nbufs);
+ if (ATOMIC8_GET(scmd->nbufs) > 0) {
/* completion of previous dbuf will retry */
return;
}
@@ -427,6 +450,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
*/
rw_exit(&sl->sl_access_state_lock);
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
if (first_xfer)
stmf_scsilib_send_status(task, STATUS_QFULL, 0);
else
@@ -438,13 +462,14 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
* Completion from task_done will cleanup
*/
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
return;
}
/*
* Update the xfer progress.
*/
ASSERT(scmd->len >= xfer_len);
- scmd->len -= xfer_len;
+ atomic_add_32(&scmd->len, -xfer_len);
scmd->current_ro += xfer_len;
}
}
@@ -459,12 +484,14 @@ sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
return;
}
task->task_nbytes_transferred += dbuf->db_data_size;
- if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+ if (ATOMIC32_GET(scmd->len) == 0 ||
+ scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
stmf_free_dbuf(task, dbuf);
- scmd->nbufs--;
- if (scmd->nbufs)
+ atomic_dec_8(&scmd->nbufs);
+ if (ATOMIC8_GET(scmd->nbufs))
return; /* wait for all buffers to complete */
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_READ_ERROR);
@@ -477,7 +504,8 @@ sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
uint32_t maxsize, minsize, old_minsize;
stmf_free_dbuf(task, dbuf);
- maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len;
+ maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ?
+ 128 * 1024 : ATOMIC32_GET(scmd->len);
minsize = maxsize >> 2;
do {
old_minsize = minsize;
@@ -485,8 +513,8 @@ sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
} while ((dbuf == NULL) && (old_minsize > minsize) &&
(minsize >= 512));
if (dbuf == NULL) {
- scmd->nbufs --;
- if (scmd->nbufs == 0) {
+ atomic_dec_8(&scmd->nbufs);
+ if (ATOMIC8_GET(scmd->nbufs) == 0) {
stmf_abort(STMF_QUEUE_TASK_ABORT, task,
STMF_ALLOC_FAILURE, NULL);
}
@@ -514,7 +542,7 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
ASSERT(dbuf->db_lu_private);
ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
- scmd->nbufs--; /* account for this dbuf */
+ atomic_dec_8(&scmd->nbufs); /* account for this dbuf */
/*
* Release the DMU resources.
*/
@@ -535,7 +563,8 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
(scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
(xfer_status != STMF_SUCCESS));
- if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
+ if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
+ (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
/* all DMU state has been released */
rw_exit(&sl->sl_access_state_lock);
}
@@ -549,7 +578,8 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
* This chunk completed successfully
*/
task->task_nbytes_transferred += data_size;
- if (scmd->nbufs == 0 && scmd->len == 0) {
+ if (ATOMIC8_GET(scmd->nbufs) == 0 &&
+ ATOMIC32_GET(scmd->len) == 0) {
/*
* This command completed successfully
*
@@ -557,6 +587,7 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
* completion will occur. Tell stmf we are done.
*/
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
stmf_task_lu_done(task);
return;
}
@@ -575,8 +606,9 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
* and wait for the last completion to send the status check.
*/
if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
- if (scmd->nbufs == 0) {
+ if (ATOMIC8_GET(scmd->nbufs) == 0) {
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_READ_ERROR);
}
@@ -586,7 +618,14 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
* Must have been a failure on current dbuf
*/
ASSERT(xfer_status != STMF_SUCCESS);
+
+ /*
+ * Actually this is a bug. stmf abort should have reset the
+ * active flag but since its been there for some time.
+ * I wont change it.
+ */
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
}
}
@@ -601,6 +640,7 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
int scmd_err, scmd_xfer_done;
stmf_status_t xfer_status = dbuf->db_xfer_status;
uint32_t data_size = dbuf->db_data_size;
+ hrtime_t xfer_start;
ASSERT(zvio);
@@ -610,16 +650,18 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
*/
stmf_teardown_dbuf(task, dbuf);
- scmd->nbufs--; /* account for this dbuf */
+ atomic_dec_8(&scmd->nbufs); /* account for this dbuf */
/*
* All data was queued and this is the last completion,
* but there could still be an error.
*/
- scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0);
+ scmd_xfer_done = (ATOMIC32_GET(scmd->len) == 0 &&
+ (ATOMIC8_GET(scmd->nbufs) == 0));
scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
(scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
(xfer_status != STMF_SUCCESS));
+ xfer_start = gethrtime();
DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
uint8_t *, NULL, uint64_t, data_size,
uint64_t, zvio->zvio_offset, scsi_task_t *, task);
@@ -637,6 +679,8 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
ret = sbd_zvol_rele_write_bufs(sl, dbuf);
}
+ stmf_lu_xfer_done(task, B_FALSE /* write */,
+ (gethrtime() - xfer_start));
DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
uint8_t *, NULL, uint64_t, data_size,
uint64_t, zvio->zvio_offset, int, ret, scsi_task_t *, task);
@@ -656,7 +700,8 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
* transferred or an error encountered, then no more dbufs
* will be queued.
*/
- if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) {
+ if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
+ (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
/* all DMU state has been released */
rw_exit(&sl->sl_access_state_lock);
}
@@ -670,6 +715,7 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
if (scmd_xfer_done) {
/* This command completed successfully */
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
(sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
stmf_scsilib_send_status(task, STATUS_CHECK,
@@ -690,8 +736,9 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
*/
if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
- if (scmd->nbufs == 0) {
+ if (ATOMIC8_GET(scmd->nbufs) == 0) {
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_WRITE_ERROR);
}
@@ -701,6 +748,7 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
return;
}
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
ASSERT(xfer_status != STMF_SUCCESS);
stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
}
@@ -726,6 +774,7 @@ sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
struct iovec *iov, *tiov, iov1[8];
uint32_t len, resid;
int ret, i, iovcnt, flags;
+ hrtime_t xfer_start;
boolean_t is_read;
ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
@@ -763,6 +812,7 @@ sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
uio.uio_resid = (uint64_t)len;
uio.uio_llimit = RLIM64_INFINITY;
+ xfer_start = gethrtime();
if (is_read == B_TRUE) {
uio.uio_fmode = FREAD;
uio.uio_extflg = UIO_COPY_CACHED;
@@ -791,6 +841,8 @@ sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
scsi_task_t *, task);
}
+ /* finalize accounting */
+ stmf_lu_xfer_done(task, is_read, (gethrtime() - xfer_start));
if (iov != &iov1[0])
kmem_free(iov, iovcnt * sizeof (*iov));
@@ -805,13 +857,26 @@ void
sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
{
uint64_t lba, laddr;
+ uint64_t blkcount;
uint32_t len;
uint8_t op = task->task_cdb[0];
sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
sbd_cmd_t *scmd;
stmf_data_buf_t *dbuf;
int fast_path;
+ boolean_t fua_bit = B_FALSE;
+ /*
+ * Check to see if the command is READ(10), READ(12), or READ(16).
+ * If it is then check for bit 3 being set to indicate if Forced
+ * Unit Access is being requested. If so, we'll bypass the use of
+ * DMA buffers to simplify support of this feature.
+ */
+ if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
+ (op == SCMD_READ_G5)) &&
+ (task->task_cdb[1] & BIT_3)) {
+ fua_bit = B_TRUE;
+ }
if (op == SCMD_READ) {
lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
len = (uint32_t)task->task_cdb[4];
@@ -835,6 +900,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
}
laddr = lba << sl->sl_data_blocksize_shift;
+ blkcount = len;
len <<= sl->sl_data_blocksize_shift;
if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
@@ -861,6 +927,13 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
return;
}
+ if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
+ SBD_SUCCESS) {
+ if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
+ stmf_scsilib_send_status(task, STATUS_BUSY, 0);
+ }
+ return;
+ }
/*
* Determine if this read can directly use DMU buffers.
*/
@@ -868,8 +941,8 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
initial_dbuf == NULL && /* No PP buffer passed in */
sl->sl_flags & SL_CALL_ZVOL && /* zvol backing store */
(task->task_additional_flags &
- TASK_AF_ACCEPT_LU_DBUF)) /* PP allows it */
- {
+ TASK_AF_ACCEPT_LU_DBUF) && /* PP allows it */
+ !fua_bit) {
/*
* Reduced copy path
*/
@@ -884,6 +957,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
rw_enter(&sl->sl_access_state_lock, RW_READER);
if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
rw_exit(&sl->sl_access_state_lock);
+ sbd_ats_remove_by_task(task);
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_READ_ERROR);
return;
@@ -904,6 +978,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
SBD_CMD_SCSI_READ, 0);
/* done with the backend */
rw_exit(&sl->sl_access_state_lock);
+ sbd_ats_remove_by_task(task);
if (ret != 0) {
/* backend error */
stmf_scsilib_send_status(task, STATUS_CHECK,
@@ -936,13 +1011,12 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
/*
* Setup scmd to track read progress.
*/
- scmd->flags = SBD_SCSI_CMD_ACTIVE;
+ scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
scmd->cmd_type = SBD_CMD_SCSI_READ;
scmd->nbufs = 0;
scmd->addr = laddr;
scmd->len = len;
scmd->current_ro = 0;
-
/*
* Kick-off the read.
*/
@@ -962,6 +1036,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
} while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
(minsize >= 512));
if (initial_dbuf == NULL) {
+ sbd_ats_remove_by_task(task);
stmf_scsilib_send_status(task, STATUS_QFULL, 0);
return;
}
@@ -985,6 +1060,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_READ_ERROR);
}
+ sbd_ats_remove_by_task(task);
return;
}
@@ -994,7 +1070,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
task->task_lu_private = scmd;
}
- scmd->flags = SBD_SCSI_CMD_ACTIVE;
+ scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
scmd->cmd_type = SBD_CMD_SCSI_READ;
scmd->nbufs = 1;
scmd->addr = laddr;
@@ -1011,7 +1087,7 @@ sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
uint32_t len;
int bufs_to_take;
- if (scmd->len == 0) {
+ if (ATOMIC32_GET(scmd->len) == 0) {
goto DO_WRITE_XFER_DONE;
}
@@ -1026,14 +1102,14 @@ sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
stmf_free_dbuf(task, dbuf);
dbuf = NULL;
}
- if (scmd->nbufs >= bufs_to_take) {
+ if (ATOMIC8_GET(scmd->nbufs) >= bufs_to_take) {
goto DO_WRITE_XFER_DONE;
}
if (dbuf == NULL) {
uint32_t maxsize, minsize, old_minsize;
- maxsize = (scmd->len > (128*1024)) ? 128*1024 :
- scmd->len;
+ maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
+ ATOMIC32_GET(scmd->len);
minsize = maxsize >> 2;
do {
old_minsize = minsize;
@@ -1041,7 +1117,7 @@ sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
} while ((dbuf == NULL) && (old_minsize > minsize) &&
(minsize >= 512));
if (dbuf == NULL) {
- if (scmd->nbufs == 0) {
+ if (ATOMIC8_GET(scmd->nbufs) == 0) {
stmf_abort(STMF_QUEUE_TASK_ABORT, task,
STMF_ALLOC_FAILURE, NULL);
}
@@ -1049,18 +1125,20 @@ sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
}
}
- len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
- scmd->len;
+ len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
+ ATOMIC32_GET(scmd->len);
dbuf->db_relative_offset = scmd->current_ro;
dbuf->db_data_size = len;
dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
(void) stmf_xfer_data(task, dbuf, 0);
- scmd->nbufs++; /* outstanding port xfers and bufs used */
- scmd->len -= len;
+ /* outstanding port xfers and bufs used */
+ atomic_inc_8(&scmd->nbufs);
+ atomic_add_32(&scmd->len, -len);
scmd->current_ro += len;
- if ((scmd->len != 0) && (scmd->nbufs < bufs_to_take)) {
+ if ((ATOMIC32_GET(scmd->len) != 0) &&
+ (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
sbd_do_write_xfer(task, scmd, NULL, 0);
}
return;
@@ -1126,14 +1204,14 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
}
- while (scmd->len && scmd->nbufs < task->task_max_nbufs) {
-
- xfer_len = MIN(max_len, scmd->len);
+ while (ATOMIC32_GET(scmd->len) &&
+ ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
+ xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
if (first_len) {
xfer_len = MIN(xfer_len, first_len);
first_len = 0;
}
- if (xfer_len < scmd->len) {
+ if (xfer_len < ATOMIC32_GET(scmd->len)) {
/*
* Attempt to end xfer on a block boundary.
* The only way this does not happen is if the
@@ -1197,10 +1275,11 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
*/
stmf_free(dbuf);
scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
- if (scmd->nbufs == 0) {
+ if (ATOMIC8_GET(scmd->nbufs) == 0) {
/*
* Nothing queued, so no completions coming
*/
+ sbd_ats_remove_by_task(task);
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_WRITE_ERROR);
rw_exit(&sl->sl_access_state_lock);
@@ -1224,7 +1303,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
*/
sbd_zvol_rele_write_bufs_abort(sl, dbuf);
stmf_free(dbuf);
- if (scmd->nbufs > 0) {
+ if (ATOMIC8_GET(scmd->nbufs) > 0) {
/* completion of previous dbuf will retry */
return;
}
@@ -1232,6 +1311,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
* Done with this command.
*/
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
if (first_xfer)
stmf_scsilib_send_status(task, STATUS_QFULL, 0);
else
@@ -1244,7 +1324,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
/*
* dbuf is now queued on task
*/
- scmd->nbufs++;
+ atomic_inc_8(&scmd->nbufs);
xstat = stmf_xfer_data(task, dbuf, 0);
switch (xstat) {
@@ -1258,8 +1338,8 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
sbd_zvol_rele_write_bufs_abort(sl, dbuf);
stmf_teardown_dbuf(task, dbuf);
stmf_free(dbuf);
- scmd->nbufs--;
- if (scmd->nbufs > 0) {
+ atomic_dec_8(&scmd->nbufs);
+ if (ATOMIC8_GET(scmd->nbufs) > 0) {
/* completion of previous dbuf will retry */
return;
}
@@ -1267,6 +1347,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
* Done with this command.
*/
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
if (first_xfer)
stmf_scsilib_send_status(task, STATUS_QFULL, 0);
else
@@ -1284,7 +1365,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
/*
* Update the xfer progress.
*/
- scmd->len -= xfer_len;
+ atomic_add_32(&scmd->len, -xfer_len);
scmd->current_ro += xfer_len;
}
}
@@ -1297,17 +1378,20 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
uint64_t laddr;
uint32_t buflen, iolen;
int ndx;
+ uint8_t op = task->task_cdb[0];
+ boolean_t fua_bit = B_FALSE;
- if (scmd->nbufs > 0) {
+ if (ATOMIC8_GET(scmd->nbufs) > 0) {
/*
* Decrement the count to indicate the port xfer
* into the dbuf has completed even though the buf is
* still in use here in the LU provider.
*/
- scmd->nbufs--;
+ atomic_dec_8(&scmd->nbufs);
}
if (dbuf->db_xfer_status != STMF_SUCCESS) {
+ sbd_ats_remove_by_task(task);
stmf_abort(STMF_QUEUE_TASK_ABORT, task,
dbuf->db_xfer_status, NULL);
return;
@@ -1317,7 +1401,7 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
goto WRITE_XFER_DONE;
}
- if (scmd->len != 0) {
+ if (ATOMIC32_GET(scmd->len) != 0) {
/*
* Initiate the next port xfer to occur in parallel
* with writing this buf.
@@ -1325,6 +1409,16 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
sbd_do_write_xfer(task, scmd, NULL, 0);
}
+ /*
+ * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
+ * If it is then check for bit 3 being set to indicate if Forced
+ * Unit Access is being requested. If so, we'll bypass the direct
+ * call and handle it in sbd_data_write().
+ */
+ if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
+ (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
+ fua_bit = B_TRUE;
+ }
laddr = scmd->addr + dbuf->db_relative_offset;
/*
@@ -1334,13 +1428,17 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
*/
if (sl->sl_flags & SL_CALL_ZVOL &&
(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
- (sbd_zcopy & (4|1))) {
+ (sbd_zcopy & (4|1)) && !fua_bit) {
int commit;
- commit = (scmd->len == 0 && scmd->nbufs == 0);
- if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
+ commit = (ATOMIC32_GET(scmd->len) == 0 &&
+ ATOMIC8_GET(scmd->nbufs) == 0);
+ rw_enter(&sl->sl_access_state_lock, RW_READER);
+ if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 ||
+ sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
commit) != STMF_SUCCESS)
scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
+ rw_exit(&sl->sl_access_state_lock);
buflen = dbuf->db_data_size;
} else {
for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
@@ -1360,11 +1458,13 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
}
task->task_nbytes_transferred += buflen;
WRITE_XFER_DONE:
- if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+ if (ATOMIC32_GET(scmd->len) == 0 ||
+ scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
stmf_free_dbuf(task, dbuf);
- if (scmd->nbufs)
+ if (ATOMIC8_GET(scmd->nbufs))
return; /* wait for all buffers to complete */
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ sbd_ats_remove_by_task(task);
if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_WRITE_ERROR);
@@ -1430,13 +1530,25 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
sbd_cmd_t *scmd;
stmf_data_buf_t *dbuf;
+ uint64_t blkcount;
uint8_t sync_wr_flag = 0;
+ boolean_t fua_bit = B_FALSE;
if (sl->sl_flags & SL_WRITE_PROTECTED) {
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_WRITE_PROTECTED);
return;
}
+ /*
+ * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
+ * If it is then check for bit 3 being set to indicate if Forced
+ * Unit Access is being requested. If so, we'll bypass the fast path
+ * code to simplify support of this feature.
+ */
+ if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
+ (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
+ fua_bit = B_TRUE;
+ }
if (op == SCMD_WRITE) {
lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
len = (uint32_t)task->task_cdb[4];
@@ -1472,6 +1584,7 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
}
laddr = lba << sl->sl_data_blocksize_shift;
+ blkcount = len;
len <<= sl->sl_data_blocksize_shift;
if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
@@ -1493,12 +1606,21 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
return;
}
+ if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
+ SBD_SUCCESS) {
+ if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
+ stmf_scsilib_send_status(task, STATUS_BUSY, 0);
+ }
+ return;
+ }
+
if (sbd_zcopy & (4|1) && /* Debug switch */
initial_dbuf == NULL && /* No PP buf passed in */
sl->sl_flags & SL_CALL_ZVOL && /* zvol backing store */
(task->task_additional_flags &
TASK_AF_ACCEPT_LU_DBUF) && /* PP allows it */
- sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) {
+ sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize) &&
+ !fua_bit) {
/*
* XXX Note that disallowing initial_dbuf will eliminate
@@ -1510,6 +1632,7 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
rw_enter(&sl->sl_access_state_lock, RW_READER);
if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
rw_exit(&sl->sl_access_state_lock);
+ sbd_ats_remove_by_task(task);
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_READ_ERROR);
return;
@@ -1524,7 +1647,8 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
KM_SLEEP);
task->task_lu_private = scmd;
}
- scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
+ scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
+ sync_wr_flag;
scmd->cmd_type = SBD_CMD_SCSI_WRITE;
scmd->nbufs = 0;
scmd->addr = laddr;
@@ -1555,7 +1679,8 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
task->task_lu_private = scmd;
}
- scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag;
+ scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
+ sync_wr_flag;
scmd->cmd_type = SBD_CMD_SCSI_WRITE;
scmd->nbufs = 0;
scmd->addr = laddr;
@@ -1567,7 +1692,7 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
* Account for data passed in this write command
*/
(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
- scmd->len -= dbuf->db_data_size;
+ atomic_add_32(&scmd->len, -dbuf->db_data_size);
scmd->current_ro += dbuf->db_data_size;
dbuf->db_xfer_status = STMF_SUCCESS;
sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
@@ -1750,6 +1875,9 @@ sbd_handle_short_write_xfer_completion(scsi_task_t *task,
sbd_handle_unmap_xfer(task,
dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
break;
+ case SCMD_EXTENDED_COPY:
+ sbd_handle_xcopy_xfer(task, dbuf->db_sglist[0].seg_addr);
+ break;
case SCMD_PERSISTENT_RESERVE_OUT:
if (sl->sl_access_state == SBD_LU_STANDBY) {
st_ret = stmf_proxy_scsi_cmd(task, dbuf);
@@ -1844,7 +1972,7 @@ sbd_handle_mode_sense(struct scsi_task *task,
uint8_t *cdb;
uint32_t ncyl;
uint8_t nsectors, nheads;
- uint8_t page, ctrl, header_size, pc_valid;
+ uint8_t page, ctrl, header_size;
uint16_t nbytes;
uint8_t *p;
uint64_t s = sl->sl_lu_size;
@@ -1855,25 +1983,21 @@ sbd_handle_mode_sense(struct scsi_task *task,
cdb = &task->task_cdb[0];
page = cdb[2] & 0x3F;
ctrl = (cdb[2] >> 6) & 3;
- cmd_size = (cdb[0] == SCMD_MODE_SENSE) ? cdb[4] :
- READ_SCSI16(&cdb[7], uint32_t);
if (cdb[0] == SCMD_MODE_SENSE) {
+ cmd_size = cdb[4];
header_size = 4;
dev_spec_param_offset = 2;
} else {
+ cmd_size = READ_SCSI16(&cdb[7], uint32_t);
header_size = 8;
dev_spec_param_offset = 3;
}
/* Now validate the command */
- if ((cdb[2] == 0) || (page == MODEPAGE_ALLPAGES) || (page == 0x08) ||
- (page == 0x0A) || (page == 0x03) || (page == 0x04)) {
- pc_valid = 1;
- } else {
- pc_valid = 0;
- }
- if ((cmd_size < header_size) || (pc_valid == 0)) {
+ if ((cdb[2] != 0) && (page != MODEPAGE_ALLPAGES) &&
+ (page != MODEPAGE_CACHING) && (page != MODEPAGE_CTRL_MODE) &&
+ (page != MODEPAGE_FORMAT) && (page != MODEPAGE_GEOMETRY)) {
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_INVALID_FIELD_IN_CDB);
return;
@@ -1891,7 +2015,7 @@ sbd_handle_mode_sense(struct scsi_task *task,
nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
- if ((page == 0x03) || (page == MODEPAGE_ALLPAGES)) {
+ if ((page == MODEPAGE_FORMAT) || (page == MODEPAGE_ALLPAGES)) {
p[n] = 0x03;
p[n+1] = 0x16;
if (ctrl != 1) {
@@ -1902,7 +2026,7 @@ sbd_handle_mode_sense(struct scsi_task *task,
}
n += 24;
}
- if ((page == 0x04) || (page == MODEPAGE_ALLPAGES)) {
+ if ((page == MODEPAGE_GEOMETRY) || (page == MODEPAGE_ALLPAGES)) {
p[n] = 0x04;
p[n + 1] = 0x16;
if (ctrl != 1) {
@@ -1977,11 +2101,11 @@ sbd_handle_mode_sense(struct scsi_task *task,
* of bytes in the length field, so adjust the count.
* Byte count minus header length field size.
*/
- buf[0] = (n - 1) & 0xff;
+ buf[0] = (n - header_size) & 0xff;
} else {
/* Byte count minus header length field size. */
- buf[1] = (n - 2) & 0xff;
- buf[0] = ((n - 2) >> 8) & 0xff;
+ buf[1] = (n - header_size) & 0xff;
+ buf[0] = ((n - header_size) >> 8) & 0xff;
}
sbd_handle_short_read_transfers(task, initial_dbuf, buf,
@@ -2302,6 +2426,21 @@ sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd)
}
static void
+sbd_write_same_release_resources(struct scsi_task *task)
+{
+ sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
+
+ if (scmd->nbufs == 0XFF)
+ cmn_err(CE_WARN, "%s invalid buffer count %x",
+ __func__, scmd->nbufs);
+ if ((scmd->trans_data_len != 0) && (scmd->trans_data != NULL))
+ kmem_free(scmd->trans_data, scmd->trans_data_len);
+ scmd->trans_data = NULL;
+ scmd->trans_data_len = 0;
+ scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
+}
+
+static void
sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
{
@@ -2309,7 +2448,13 @@ sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
uint32_t buflen, iolen;
int ndx, ret;
+ if (ATOMIC8_GET(scmd->nbufs) > 0) {
+ atomic_dec_8(&scmd->nbufs);
+ }
+
if (dbuf->db_xfer_status != STMF_SUCCESS) {
+ sbd_write_same_release_resources(task);
+ sbd_ats_remove_by_task(task);
stmf_abort(STMF_QUEUE_TASK_ABORT, task,
dbuf->db_xfer_status, NULL);
return;
@@ -2319,7 +2464,16 @@ sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
goto write_same_xfer_done;
}
- if (scmd->len != 0) {
+ /* if this is a unnessary callback just return */
+ if (((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) ||
+ ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
+ (scmd->trans_data == NULL)) {
+ sbd_ats_remove_by_task(task);
+ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
+ return;
+ }
+
+ if (ATOMIC32_GET(scmd->len) != 0) {
/*
* Initiate the next port xfer to occur in parallel
* with writing this buf.
@@ -2343,14 +2497,21 @@ sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
task->task_nbytes_transferred += buflen;
write_same_xfer_done:
- if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+ if (ATOMIC32_GET(scmd->len) == 0 ||
+ scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
stmf_free_dbuf(task, dbuf);
+ if (ATOMIC8_GET(scmd->nbufs) > 0)
+ return;
scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
+ sbd_ats_remove_by_task(task);
+ sbd_write_same_release_resources(task);
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_WRITE_ERROR);
} else {
ret = sbd_write_same_data(task, scmd);
+ sbd_ats_remove_by_task(task);
+ sbd_write_same_release_resources(task);
if (ret != SBD_SUCCESS) {
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_WRITE_ERROR);
@@ -2358,14 +2519,6 @@ write_same_xfer_done:
stmf_scsilib_send_status(task, STATUS_GOOD, 0);
}
}
- /*
- * Only way we should get here is via handle_write_same(),
- * and that should make the following assertion always pass.
- */
- ASSERT((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) &&
- scmd->trans_data != NULL);
- kmem_free(scmd->trans_data, scmd->trans_data_len);
- scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
return;
}
sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
@@ -2377,7 +2530,7 @@ sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
{
uint32_t len;
- if (scmd->len == 0) {
+ if (ATOMIC32_GET(scmd->len) == 0) {
if (dbuf != NULL)
stmf_free_dbuf(task, dbuf);
return;
@@ -2392,8 +2545,8 @@ sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
if (dbuf == NULL) {
uint32_t maxsize, minsize, old_minsize;
- maxsize = (scmd->len > (128*1024)) ? 128*1024 :
- scmd->len;
+ maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
+ ATOMIC32_GET(scmd->len);
minsize = maxsize >> 2;
do {
old_minsize = minsize;
@@ -2401,7 +2554,9 @@ sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
} while ((dbuf == NULL) && (old_minsize > minsize) &&
(minsize >= 512));
if (dbuf == NULL) {
- if (scmd->nbufs == 0) {
+ sbd_ats_remove_by_task(task);
+ sbd_write_same_release_resources(task);
+ if (ATOMIC8_GET(scmd->nbufs) == 0) {
stmf_abort(STMF_QUEUE_TASK_ABORT, task,
STMF_ALLOC_FAILURE, NULL);
}
@@ -2409,15 +2564,16 @@ sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
}
}
- len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size :
- scmd->len;
+ len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
+ ATOMIC32_GET(scmd->len);
dbuf->db_relative_offset = scmd->current_ro;
dbuf->db_data_size = len;
dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
(void) stmf_xfer_data(task, dbuf, 0);
- scmd->nbufs++; /* outstanding port xfers and bufs used */
- scmd->len -= len;
+ /* outstanding port xfers and bufs used */
+ atomic_inc_8(&scmd->nbufs);
+ atomic_add_32(&scmd->len, -len);
scmd->current_ro += len;
}
@@ -2431,6 +2587,12 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
uint8_t unmap;
uint8_t do_immediate_data = 0;
+ if (HardwareAcceleratedInit == 0) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_INVALID_OPCODE);
+ return;
+ }
+
task->task_cmd_xfer_length = 0;
if (task->task_additional_flags &
TASK_AF_NO_EXPECTED_XFER_LENGTH) {
@@ -2447,11 +2609,13 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
return;
}
unmap = task->task_cdb[1] & 0x08;
+
if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_INVALID_FIELD_IN_CDB);
return;
}
+
if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
len = READ_SCSI16(&task->task_cdb[7], uint64_t);
@@ -2459,11 +2623,20 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
len = READ_SCSI32(&task->task_cdb[10], uint64_t);
}
+
if (len == 0) {
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_INVALID_FIELD_IN_CDB);
return;
}
+
+ if (sbd_ats_handling_before_io(task, sl, addr, len) !=
+ SBD_SUCCESS) {
+ if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS)
+ stmf_scsilib_send_status(task, STATUS_BUSY, 0);
+ return;
+ }
+
addr <<= sl->sl_data_blocksize_shift;
len <<= sl->sl_data_blocksize_shift;
@@ -2492,6 +2665,7 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
task->task_expected_xfer_length = task->task_cmd_xfer_length;
}
if ((addr + len) > sl->sl_lu_size) {
+ sbd_ats_remove_by_task(task);
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_LBA_OUT_OF_RANGE);
return;
@@ -2502,6 +2676,7 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
/* Some basic checks */
if ((len == 0) || (len != task->task_expected_xfer_length)) {
+ sbd_ats_remove_by_task(task);
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_INVALID_FIELD_IN_CDB);
return;
@@ -2513,6 +2688,7 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
if (initial_dbuf->db_data_size >
task->task_expected_xfer_length) {
/* protocol error */
+ sbd_ats_remove_by_task(task);
stmf_abort(STMF_QUEUE_TASK_ABORT, task,
STMF_INVALID_ARG, NULL);
return;
@@ -2529,7 +2705,8 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
task->task_lu_private = scmd;
}
- scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA;
+ scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA |
+ SBD_SCSI_CMD_ATS_RELATED;
scmd->cmd_type = SBD_CMD_SCSI_WRITE;
scmd->nbufs = 0;
scmd->len = (uint32_t)len;
@@ -2542,7 +2719,7 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
* Account for data passed in this write command
*/
(void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
- scmd->len -= dbuf->db_data_size;
+ atomic_add_32(&scmd->len, -dbuf->db_data_size);
scmd->current_ro += dbuf->db_data_size;
dbuf->db_xfer_status = STMF_SUCCESS;
sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
@@ -2554,8 +2731,20 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
static void
sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
{
+ sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
uint32_t cmd_xfer_len;
+ if (sbd_unmap_enable == 0) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_INVALID_OPCODE);
+ return;
+ }
+
+ if (sl->sl_flags & SL_WRITE_PROTECTED) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_WRITE_PROTECTED);
+ return;
+ }
cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
if (task->task_cdb[1] & 1) {
@@ -2605,14 +2794,26 @@ sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
dfl = kmem_zalloc(DFL_SZ(num_desc), KM_SLEEP);
dfl->dfl_num_exts = num_desc;
+ /*
+ * This should use ATS locking but that was disabled by the
+ * changes to ZFS top take advantage of TRIM in SSDs.
+ *
+ * Since the entire list is passed to ZFS in one list ATS
+ * locking is not done. This may be detectable, and if it is
+ * then the entire list needs to be locked and then after the
+ * unmap completes the entire list must be unlocked
+ */
for (p = buf + 8, i = 0; num_desc; num_desc--, p += 16, i++) {
addr = READ_SCSI64(p, uint64_t);
- addr <<= sl->sl_data_blocksize_shift;
len = READ_SCSI32(p+8, uint64_t);
+ addr <<= sl->sl_data_blocksize_shift;
len <<= sl->sl_data_blocksize_shift;
+
/* Prepare a list of extents to unmap */
dfl->dfl_exts[i].dfle_start = addr;
dfl->dfl_exts[i].dfle_length = len;
+
+ /* release the overlap */
}
ASSERT(i == dfl->dfl_num_exts);
@@ -2695,6 +2896,7 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
inq->inq_cmdque = 1;
+ inq->inq_3pc = 1;
if (sl->sl_flags & SL_VID_VALID) {
bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
@@ -2793,13 +2995,15 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
switch (cdbp[2]) {
case 0x00:
- page_length = 4 + (mgmt_url_size ? 1 : 0);
+ page_length = 5 + (mgmt_url_size ? 1 : 0);
+
if (sl->sl_flags & SL_UNMAP_ENABLED)
- page_length += 2;
+ page_length += 1;
p[0] = byte0;
p[3] = page_length;
/* Supported VPD pages in ascending order */
+ /* CSTYLED */
{
uint8_t i = 5;
@@ -2808,8 +3012,8 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
if (mgmt_url_size != 0)
p[i++] = 0x85;
p[i++] = 0x86;
+ p[i++] = 0xb0;
if (sl->sl_flags & SL_UNMAP_ENABLED) {
- p[i++] = 0xb0;
p[i++] = 0xb2;
}
}
@@ -2842,7 +3046,7 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
stmf_scsilib_send_status(task, STATUS_CHECK,
STMF_SAA_INVALID_FIELD_IN_CDB);
goto err_done;
- }
+ } /* CSTYLED */
{
uint16_t idx, newidx, sz, url_size;
char *url;
@@ -2903,17 +3107,23 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
break;
case 0xb0:
- if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
- stmf_scsilib_send_status(task, STATUS_CHECK,
- STMF_SAA_INVALID_FIELD_IN_CDB);
- goto err_done;
- }
page_length = 0x3c;
p[0] = byte0;
p[1] = 0xb0;
p[3] = page_length;
- p[20] = p[21] = p[22] = p[23] = 0xFF;
- p[24] = p[25] = p[26] = p[27] = 0xFF;
+ p[4] = 1;
+ p[5] = sbd_ats_max_nblks();
+ if (sl->sl_flags & SL_UNMAP_ENABLED && sbd_unmap_enable) {
+ p[20] = (stmf_sbd_unmap_max_nblks >> 24) & 0xff;
+ p[21] = (stmf_sbd_unmap_max_nblks >> 16) & 0xff;
+ p[22] = (stmf_sbd_unmap_max_nblks >> 8) & 0xff;
+ p[23] = stmf_sbd_unmap_max_nblks & 0xff;
+
+ p[24] = 0;
+ p[25] = 0;
+ p[26] = 0;
+ p[27] = 0xFF;
+ }
xfer_size = page_length + 4;
break;
@@ -2935,7 +3145,7 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
exp++;
}
p[4] = exp;
- p[5] = 0xc0;
+ p[5] = 0xc0; /* Logical provisioning UNMAP and WRITE SAME */
xfer_size = page_length + 4;
break;
@@ -2956,7 +3166,7 @@ stmf_status_t
sbd_task_alloc(struct scsi_task *task)
{
if ((task->task_lu_private =
- kmem_alloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
+ kmem_zalloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
scmd->flags = 0;
return (STMF_SUCCESS);
@@ -3022,8 +3232,40 @@ sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
mutex_exit(&sl->sl_lock);
}
+/*
+ * Given a LU and a task, check if the task is causing reservation
+ * conflict. Returns 1 in case of conflict, 0 otherwise.
+ * Note that the LU might not be the same LU as in the task but the
+ * caller makes sure that the LU can be accessed.
+ */
+int
+sbd_check_reservation_conflict(struct sbd_lu *sl, struct scsi_task *task)
+{
+ sbd_it_data_t *it;
+ it = task->task_lu_itl_handle;
+ ASSERT(it);
+ if (sl->sl_access_state == SBD_LU_ACTIVE) {
+ if (SBD_PGR_RSVD(sl->sl_pgr)) {
+ if (sbd_pgr_reservation_conflict(task, sl)) {
+ return (1);
+ }
+ } else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
+ ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
+ if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
+ return (1);
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * Keep in mind that sbd_new_task can be called multiple times for the same
+ * task because of us calling stmf_task_poll_lu resulting in a call to
+ * sbd_task_poll().
+ */
void
sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
{
@@ -3087,18 +3329,85 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
stmf_scsilib_send_status(task, STATUS_CHECK,
- STMF_SAA_LU_NO_ACCESS_UNAVAIL);
+ STMF_SAA_LU_NO_ACCESS_TRANSITION);
return;
}
- /* Checking ua conditions as per SAM3R14 5.3.2 specified order */
- if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
+ cdb0 = task->task_cdb[0];
+ cdb1 = task->task_cdb[1];
+ /*
+ * Special case for different versions of Windows.
+ * 1) Windows 2012 and VMWare will fail to discover LU's if a READ
+ * operation sent down the standby path returns an error. By default
+ * standby_fail_reads will be set to 0.
+ * 2) Windows 2008 R2 has a severe performace problem if READ ops
+ * aren't rejected on the standby path. 2008 sends commands
+ * down the standby path which then must be proxied over to the
+ * active node and back.
+ */
+ if ((sl->sl_access_state == SBD_LU_STANDBY) &&
+ stmf_standby_fail_reads &&
+ (cdb0 == SCMD_READ || cdb0 == SCMD_READ_G1 ||
+ cdb0 == SCMD_READ_G4 || cdb0 == SCMD_READ_G5)) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_LU_NO_ACCESS_STANDBY);
+ return;
+ }
+
+ /*
+ * Don't go further if cmd is unsupported in standby mode
+ */
+ if (sl->sl_access_state == SBD_LU_STANDBY) {
+ if (cdb0 != SCMD_INQUIRY &&
+ cdb0 != SCMD_MODE_SENSE &&
+ cdb0 != SCMD_MODE_SENSE_G1 &&
+ cdb0 != SCMD_MODE_SELECT &&
+ cdb0 != SCMD_MODE_SELECT_G1 &&
+ cdb0 != SCMD_RESERVE &&
+ cdb0 != SCMD_RELEASE &&
+ cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
+ cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
+ cdb0 != SCMD_REQUEST_SENSE &&
+ cdb0 != SCMD_READ_CAPACITY &&
+ cdb0 != SCMD_TEST_UNIT_READY &&
+ cdb0 != SCMD_START_STOP &&
+ cdb0 != SCMD_READ &&
+ cdb0 != SCMD_READ_G1 &&
+ cdb0 != SCMD_READ_G4 &&
+ cdb0 != SCMD_READ_G5 &&
+ !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
+ cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
+ !(cdb0 == SCMD_MAINTENANCE_IN &&
+ (cdb1 & 0x1F) == 0x05) &&
+ !(cdb0 == SCMD_MAINTENANCE_IN &&
+ (cdb1 & 0x1F) == 0x0A)) {
+ stmf_scsilib_send_status(task, STATUS_CHECK,
+ STMF_SAA_LU_NO_ACCESS_STANDBY);
+ return;
+ }
+ }
+
+ /*
+ * Checking ua conditions as per SAM3R14 5.3.2 specified order. During
+ * MPIO/ALUA failover, cmds come in through local ports and proxy port
+ * port provider (i.e. pppt), we want to report unit attention to
+ * only local cmds since initiators (Windows MPIO/DSM) would continue
+ * sending I/O to the target that reported unit attention.
+ */
+ if ((it->sbd_it_ua_conditions) &&
+ !(task->task_additional_flags & TASK_AF_PPPT_TASK) &&
+ (task->task_cdb[0] != SCMD_INQUIRY)) {
uint32_t saa = 0;
mutex_enter(&sl->sl_lock);
if (it->sbd_it_ua_conditions & SBD_UA_POR) {
it->sbd_it_ua_conditions &= ~SBD_UA_POR;
saa = STMF_SAA_POR;
+ } else if (it->sbd_it_ua_conditions &
+ SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
+ it->sbd_it_ua_conditions &=
+ ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
+ saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
}
mutex_exit(&sl->sl_lock);
if (saa) {
@@ -3108,21 +3417,10 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
}
/* Reservation conflict checks */
- if (sl->sl_access_state == SBD_LU_ACTIVE) {
- if (SBD_PGR_RSVD(sl->sl_pgr)) {
- if (sbd_pgr_reservation_conflict(task)) {
- stmf_scsilib_send_status(task,
- STATUS_RESERVATION_CONFLICT, 0);
- return;
- }
- } else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
- ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
- if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
- stmf_scsilib_send_status(task,
- STATUS_RESERVATION_CONFLICT, 0);
- return;
- }
- }
+ if (sbd_check_reservation_conflict(sl, task)) {
+ stmf_scsilib_send_status(task,
+ STATUS_RESERVATION_CONFLICT, 0);
+ return;
}
/* Rest of the ua conndition checks */
@@ -3147,9 +3445,9 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
} else if (it->sbd_it_ua_conditions &
SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
- it->sbd_it_ua_conditions &=
- ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
- saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
+ saa = 0;
+ } else if (it->sbd_it_ua_conditions & SBD_UA_POR) {
+ saa = 0;
} else if (it->sbd_it_ua_conditions &
SBD_UA_ACCESS_STATE_TRANSITION) {
it->sbd_it_ua_conditions &=
@@ -3166,38 +3464,7 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
}
}
- cdb0 = task->task_cdb[0];
- cdb1 = task->task_cdb[1];
-
if (sl->sl_access_state == SBD_LU_STANDBY) {
- if (cdb0 != SCMD_INQUIRY &&
- cdb0 != SCMD_MODE_SENSE &&
- cdb0 != SCMD_MODE_SENSE_G1 &&
- cdb0 != SCMD_MODE_SELECT &&
- cdb0 != SCMD_MODE_SELECT_G1 &&
- cdb0 != SCMD_RESERVE &&
- cdb0 != SCMD_RELEASE &&
- cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
- cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
- cdb0 != SCMD_REQUEST_SENSE &&
- cdb0 != SCMD_READ_CAPACITY &&
- cdb0 != SCMD_TEST_UNIT_READY &&
- cdb0 != SCMD_START_STOP &&
- cdb0 != SCMD_READ &&
- cdb0 != SCMD_READ_G1 &&
- cdb0 != SCMD_READ_G4 &&
- cdb0 != SCMD_READ_G5 &&
- !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
- cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
- !(cdb0 == SCMD_MAINTENANCE_IN &&
- (cdb1 & 0x1F) == 0x05) &&
- !(cdb0 == SCMD_MAINTENANCE_IN &&
- (cdb1 & 0x1F) == 0x0A)) {
- stmf_scsilib_send_status(task, STATUS_CHECK,
- STMF_SAA_LU_NO_ACCESS_STANDBY);
- return;
- }
-
/*
* is this a short write?
* if so, we'll need to wait until we have the buffer
@@ -3375,6 +3642,21 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
return;
}
+ if (cdb0 == SCMD_COMPARE_AND_WRITE) {
+ sbd_handle_ats(task, initial_dbuf);
+ return;
+ }
+
+ if (cdb0 == SCMD_EXTENDED_COPY) {
+ sbd_handle_xcopy(task, initial_dbuf);
+ return;
+ }
+
+ if (cdb0 == SCMD_RECV_COPY_RESULTS) {
+ sbd_handle_recv_copy_results(task, initial_dbuf);
+ return;
+ }
+
if (cdb0 == SCMD_TEST_UNIT_READY) { /* Test unit ready */
task->task_cmd_xfer_length = 0;
stmf_scsilib_send_status(task, STATUS_GOOD, 0);
@@ -3441,7 +3723,6 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
sbd_handle_write(task, initial_dbuf);
return;
}
-
stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
}
@@ -3481,12 +3762,18 @@ sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
break;
case (SBD_CMD_SCSI_WRITE):
- if ((task->task_cdb[0] == SCMD_WRITE_SAME_G1) ||
- (task->task_cdb[0] == SCMD_WRITE_SAME_G4)) {
+ switch (task->task_cdb[0]) {
+ case SCMD_WRITE_SAME_G1:
+ case SCMD_WRITE_SAME_G4:
sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
1);
- } else {
+ break;
+ case SCMD_COMPARE_AND_WRITE:
+ sbd_handle_ats_xfer_completion(task, scmd, dbuf, 1);
+ break;
+ default:
sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
+ /* FALLTHRU */
}
break;
@@ -3552,6 +3839,7 @@ sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
task = (scsi_task_t *)arg;
+ sbd_ats_remove_by_task(task);
if (task->task_lu_private) {
sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
@@ -3569,6 +3857,15 @@ sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
return (STMF_NOT_FOUND);
}
+void
+sbd_task_poll(struct scsi_task *task)
+{
+ stmf_data_buf_t *initial_dbuf;
+
+ initial_dbuf = stmf_handle_to_buf(task, 0);
+ sbd_new_task(task, initial_dbuf);
+}
+
/*
* This function is called during task clean-up if the
* DB_LU_FLAG is set on the dbuf. This should only be called for
@@ -3581,7 +3878,7 @@ sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
ASSERT(dbuf->db_lu_private);
- ASSERT(scmd && scmd->nbufs > 0);
+ ASSERT(scmd && ATOMIC8_GET(scmd->nbufs) > 0);
ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
@@ -3595,7 +3892,7 @@ sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
scmd->cmd_type, (void *)task);
}
- if (--scmd->nbufs == 0)
+ if (atomic_dec_8_nv(&scmd->nbufs) == 0)
rw_exit(&sl->sl_access_state_lock);
stmf_teardown_dbuf(task, dbuf);
stmf_free(dbuf);
@@ -3692,13 +3989,20 @@ sbd_status_t
sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
{
int r = 0;
- int ret;
+ sbd_status_t ret;
+ rw_enter(&sl->sl_access_state_lock, RW_READER);
+ if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
+ ret = SBD_FILEIO_FAILURE;
+ goto flush_fail;
+ }
if (fsync_done)
goto over_fsync;
if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
- if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL))
- return (SBD_FAILURE);
+ if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL)) {
+ ret = SBD_FAILURE;
+ goto flush_fail;
+ }
}
over_fsync:
if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
@@ -3709,12 +4013,14 @@ over_fsync:
mutex_enter(&sl->sl_lock);
sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
mutex_exit(&sl->sl_lock);
- } else if (ret != 0) {
- return (SBD_FAILURE);
+ } else {
+ ret = (ret != 0) ? SBD_FAILURE : SBD_SUCCESS;
}
}
+flush_fail:
+ rw_exit(&sl->sl_access_state_lock);
- return (SBD_SUCCESS);
+ return (ret);
}
/* ARGSUSED */
diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h b/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h
index f3ab44f8f4..09c672d16b 100644
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h
+++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h
@@ -34,6 +34,7 @@ extern "C" {
#endif
typedef stmf_status_t sbd_status_t;
+#include "ats_copy_mgr.h"
extern char sbd_vendor_id[];
extern char sbd_product_id[];
extern char sbd_revision[];
@@ -56,6 +57,7 @@ extern krwlock_t sbd_global_prop_lock;
#define SBD_FILEIO_FAILURE (SBD_FAILURE | STMF_FSC(7))
#define SBD_IO_PAST_EOF (SBD_FAILURE | STMF_FSC(8))
#define SBD_BUSY (SBD_FAILURE | STMF_FSC(9))
+#define SBD_COMPARE_FAILED (SBD_FAILURE | STMF_FSC(10))
#define SHARED_META_DATA_SIZE 65536
#define SBD_META_OFFSET 4096
@@ -246,6 +248,7 @@ typedef struct sbd_lu {
struct sbd_it_data *sl_it_list;
struct sbd_pgr *sl_pgr;
uint64_t sl_rs_owner_session_id;
+ list_t sl_ats_io_list;
} sbd_lu_t;
/*
@@ -304,6 +307,11 @@ sbd_status_t sbd_wcd_set(int wcd, sbd_lu_t *sl);
void sbd_wcd_get(int *wcd, sbd_lu_t *sl);
int sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl);
+void sbd_handle_short_write_transfers(scsi_task_t *, stmf_data_buf_t *,
+ uint32_t);
+void sbd_handle_short_read_transfers(scsi_task_t *, stmf_data_buf_t *,
+ uint8_t *, uint32_t, uint32_t);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/io/comstar/port/fct/fct.c b/usr/src/uts/common/io/comstar/port/fct/fct.c
index cdb8a00aa3..9f20636a27 100644
--- a/usr/src/uts/common/io/comstar/port/fct/fct.c
+++ b/usr/src/uts/common/io/comstar/port/fct/fct.c
@@ -113,6 +113,14 @@ static int max_cached_ncmds = FCT_MAX_CACHED_CMDS;
static fct_i_local_port_t *fct_iport_list = NULL;
static kmutex_t fct_global_mutex;
uint32_t fct_rscn_options = RSCN_OPTION_VERIFY;
+/*
+ * This is to keep fibre channel from hanging if syseventd is
+ * not working correctly and the queue fills. It is a tunable
+ * to allow the user to force event logging to always happen
+ * which is the default.
+ */
+static uint8_t fct_force_log = 0; /* use DDI_SLEEP on ddi_log_sysevent */
+
int
_init(void)
@@ -1612,7 +1620,8 @@ fct_local_port_cleanup_done(fct_i_local_port_t *iport)
fct_cmd_t *
fct_scsi_task_alloc(fct_local_port_t *port, uint16_t rp_handle,
- uint32_t rportid, uint8_t *lun, uint16_t cdb_length, uint16_t task_ext)
+ uint32_t rportid, uint8_t *lun, uint16_t cdb_length,
+ uint16_t task_ext)
{
fct_cmd_t *cmd;
fct_i_cmd_t *icmd;
@@ -2829,8 +2838,8 @@ fct_cmd_fca_aborted(fct_cmd_t *cmd, fct_status_t s, uint32_t ioflags)
/* For non FCP Rest of the work is done by the terminator */
/* For FCP stuff just call stmf */
if (cmd->cmd_type == FCT_CMD_FCP_XCHG) {
- stmf_task_lport_aborted((scsi_task_t *)cmd->cmd_specific,
- s, STMF_IOF_LPORT_DONE);
+ stmf_task_lport_aborted_unlocked(
+ (scsi_task_t *)cmd->cmd_specific, s, STMF_IOF_LPORT_DONE);
}
}
@@ -3432,6 +3441,7 @@ fct_log_local_port_event(fct_local_port_t *port, char *subclass)
{
nvlist_t *attr_list;
int port_instance;
+ int rc, sleep = DDI_SLEEP;
if (!fct_dip)
return;
@@ -3452,8 +3462,15 @@ fct_log_local_port_event(fct_local_port_t *port, char *subclass)
goto error;
}
- (void) ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
- subclass, attr_list, NULL, DDI_SLEEP);
+ if (fct_force_log == 0) {
+ sleep = DDI_NOSLEEP;
+ }
+ rc = ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
+ subclass, attr_list, NULL, sleep);
+ if (rc != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s:event dropped", __func__);
+ goto error;
+ }
nvlist_free(attr_list);
return;
@@ -3471,6 +3488,7 @@ fct_log_remote_port_event(fct_local_port_t *port, char *subclass,
{
nvlist_t *attr_list;
int port_instance;
+ int rc, sleep = DDI_SLEEP;
if (!fct_dip)
return;
@@ -3501,8 +3519,15 @@ fct_log_remote_port_event(fct_local_port_t *port, char *subclass,
goto error;
}
- (void) ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
- subclass, attr_list, NULL, DDI_SLEEP);
+ if (fct_force_log == 0) {
+ sleep = DDI_NOSLEEP;
+ }
+ rc = ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC,
+ subclass, attr_list, NULL, sleep);
+ if (rc != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s: queue full event lost", __func__);
+ goto error;
+ }
nvlist_free(attr_list);
return;
diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c
index 3fd198a135..1a3483f189 100644
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*
- * Copyright 2014, 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2017, Joyent, Inc. All rights reserved.
*/
@@ -564,7 +564,8 @@ cleanup:
it_config_free_cmn(cfg);
if (cfg_pnvlist)
kmem_free(cfg_pnvlist, setcfg.set_cfg_pnvlist_len);
- nvlist_free(cfg_nvlist);
+ if (cfg_nvlist)
+ nvlist_free(cfg_nvlist);
/*
* Now that the reconfig is complete set our state back to
@@ -992,7 +993,7 @@ iscsit_task_aborted(idm_task_t *idt, idm_status_t status)
* STMF_ABORTED, the code actually looks for
* STMF_ABORT_SUCCESS.
*/
- stmf_task_lport_aborted(itask->it_stmf_task,
+ stmf_task_lport_aborted_unlocked(itask->it_stmf_task,
STMF_ABORT_SUCCESS, STMF_IOF_LPORT_DONE);
return;
} else {
@@ -1224,6 +1225,7 @@ iscsit_conn_accept(idm_conn_t *ic)
mutex_init(&ict->ict_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ict->ict_statsn_mutex, NULL, MUTEX_DRIVER, NULL);
idm_refcnt_init(&ict->ict_refcnt, ict);
+ idm_refcnt_init(&ict->ict_dispatch_refcnt, ict);
/*
* Initialize login state machine
@@ -1369,6 +1371,9 @@ iscsit_conn_lost(idm_conn_t *ic)
* Make sure there aren't any PDU's transitioning from the receive
* handler to the dispatch taskq.
*/
+ if (idm_refcnt_is_held(&ict->ict_dispatch_refcnt) < 0) {
+ cmn_err(CE_WARN, "Possible hang in iscsit_conn_lost");
+ }
idm_refcnt_wait_ref(&ict->ict_dispatch_refcnt);
return (IDM_STATUS_SUCCESS);
@@ -1385,13 +1390,10 @@ iscsit_conn_destroy(idm_conn_t *ic)
/* Generate session state machine event */
if (ict->ict_sess != NULL) {
- /*
- * Session state machine will call iscsit_conn_destroy_done()
- * when it has removed references to this connection.
- */
iscsit_sess_sm_event(ict->ict_sess, SE_CONN_FAIL, ict);
}
+ idm_refcnt_wait_ref(&ict->ict_dispatch_refcnt);
idm_refcnt_wait_ref(&ict->ict_refcnt);
/*
* The session state machine does not need to post
@@ -1407,6 +1409,7 @@ iscsit_conn_destroy(idm_conn_t *ic)
iscsit_text_cmd_fini(ict);
mutex_destroy(&ict->ict_mutex);
+ idm_refcnt_destroy(&ict->ict_dispatch_refcnt);
idm_refcnt_destroy(&ict->ict_refcnt);
kmem_free(ict, sizeof (*ict));
@@ -1888,20 +1891,8 @@ iscsit_abort(stmf_local_port_t *lport, int abort_cmd, void *arg, uint32_t flags)
* Call IDM to abort the task. Due to a variety of
* circumstances the task may already be in the process of
* aborting.
- * We'll let IDM worry about rationalizing all that except
- * for one particular instance. If the state of the task
- * is TASK_COMPLETE, we need to indicate to the framework
- * that we are in fact done. This typically happens with
- * framework-initiated task management type requests
- * (e.g. abort task).
*/
- if (idt->idt_state == TASK_COMPLETE) {
- idm_refcnt_wait_ref(&idt->idt_refcnt);
- return (STMF_ABORT_SUCCESS);
- } else {
- idm_task_abort(idt->idt_ic, idt, AT_TASK_MGMT_ABORT);
- return (STMF_SUCCESS);
- }
+ return (idm_task_abort(idt->idt_ic, idt, AT_TASK_MGMT_ABORT));
}
/*NOTREACHED*/
@@ -1962,6 +1953,21 @@ iscsit_op_scsi_cmd(idm_conn_t *ic, idm_pdu_t *rx_pdu)
iscsit_process_pdu_in_queue(ict->ict_sess);
}
+static int
+iscsit_validate_idm_pdu(idm_pdu_t *rx_pdu)
+{
+ iscsi_scsi_cmd_hdr_t *iscsi_scsi =
+ (iscsi_scsi_cmd_hdr_t *)rx_pdu->isp_hdr;
+
+ if ((iscsi_scsi->scb[0] == SCMD_READ) ||
+ (iscsi_scsi->scb[0] == SCMD_READ_G1) ||
+ (iscsi_scsi->scb[0] == SCMD_READ_G4)) {
+ if (iscsi_scsi->flags & ISCSI_FLAG_CMD_WRITE)
+ return (IDM_STATUS_FAIL);
+ }
+ return (IDM_STATUS_SUCCESS);
+}
+
/*
* ISCSI protocol
*/
@@ -1979,6 +1985,15 @@ iscsit_post_scsi_cmd(idm_conn_t *ic, idm_pdu_t *rx_pdu)
uint16_t addl_cdb_len = 0;
ict = ic->ic_handle;
+ if (iscsit_validate_idm_pdu(rx_pdu) != IDM_STATUS_SUCCESS) {
+ /* Finish processing request */
+ iscsit_set_cmdsn(ict, rx_pdu);
+
+ iscsit_send_direct_scsi_resp(ict, rx_pdu,
+ ISCSI_STATUS_CMD_COMPLETED, STATUS_CHECK);
+ idm_pdu_complete(rx_pdu, IDM_STATUS_PROTOCOL_ERROR);
+ return;
+ }
itask = iscsit_task_alloc(ict);
if (itask == NULL) {
diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h
index 4718e98e77..9e4d84f722 100644
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h
@@ -56,9 +56,6 @@
#define ISCSI_MAX_TSIH 0xffff
#define ISCSI_UNSPEC_TSIH 0
-/* Max targets per system */
-#define ISCSIT_MAX_TARGETS 1024
-
#define ISCSIT_MAX_WINDOW 1024
#define ISCSIT_RXPDU_QUEUE_LEN 2048
@@ -124,74 +121,63 @@ typedef struct {
} iscsit_portal_t;
+#define TGT_STATE_LIST() \
+ item(TS_UNDEFINED) \
+ item(TS_CREATED) \
+ item(TS_ONLINING) \
+ item(TS_ONLINE) \
+ item(TS_STMF_ONLINE) \
+ item(TS_DELETING_NEED_OFFLINE) \
+ item(TS_OFFLINING) \
+ item(TS_OFFLINE) \
+ item(TS_STMF_OFFLINE) \
+ item(TS_DELETING_STMF_DEREG) \
+ item(TS_DELETING_STMF_DEREG_FAIL) \
+ item(TS_DELETING) \
+ item(TS_MAX_STATE)
+
/* Target states and events, update iscsit_ts_name table whenever modified */
typedef enum {
- TS_UNDEFINED = 0,
- TS_CREATED,
- TS_ONLINING,
- TS_ONLINE,
- TS_STMF_ONLINE,
- TS_DELETING_NEED_OFFLINE,
- TS_OFFLINING,
- TS_OFFLINE,
- TS_STMF_OFFLINE,
- TS_DELETING_STMF_DEREG,
- TS_DELETING_STMF_DEREG_FAIL,
- TS_DELETING,
- TS_MAX_STATE
+#define item(a) a,
+ TGT_STATE_LIST()
+#undef item
} iscsit_tgt_state_t;
#ifdef ISCSIT_TGT_SM_STRINGS
-static const char *iscsit_ts_name[TS_MAX_STATE+1] = {
- "TS_UNDEFINED",
- "TS_CREATED",
- "TS_ONLINING",
- "TS_ONLINE",
- "TS_STMF_ONLINE",
- "TS_DELETING_NEED_OFFLINE",
- "TS_OFFLINING",
- "TS_OFFLINE",
- "TS_STMF_OFFLINE",
- "TS_DELETING_STMF_DEREG",
- "TS_DELETING_STMF_DEREG_FAIL",
- "TS_DELETING",
- "TS_MAX_STATE"
+static const char *iscsit_ts_name[TS_MAX_STATE + 1] = {
+#define item(a) #a,
+ TGT_STATE_LIST()
+#undef item
};
#endif
+#define TGT_EVENT_LIST() \
+ item(TE_UNDEFINED) \
+ item(TE_STMF_ONLINE_REQ) \
+ item(TE_ONLINE_SUCCESS) \
+ item(TE_ONLINE_FAIL) \
+ item(TE_STMF_ONLINE_COMPLETE_ACK) \
+ item(TE_STMF_OFFLINE_REQ) \
+ item(TE_OFFLINE_COMPLETE) \
+ item(TE_STMF_OFFLINE_COMPLETE_ACK) \
+ item(TE_DELETE) \
+ item(TE_STMF_DEREG_SUCCESS) \
+ item(TE_STMF_DEREG_FAIL) \
+ item(TE_STMF_DEREG_RETRY) \
+ item(TE_WAIT_REF_COMPLETE) \
+ item(TE_MAX_EVENT)
+
typedef enum {
- TE_UNDEFINED = 0,
- TE_STMF_ONLINE_REQ,
- TE_ONLINE_SUCCESS,
- TE_ONLINE_FAIL,
- TE_STMF_ONLINE_COMPLETE_ACK,
- TE_STMF_OFFLINE_REQ,
- TE_OFFLINE_COMPLETE,
- TE_STMF_OFFLINE_COMPLETE_ACK,
- TE_DELETE,
- TE_STMF_DEREG_SUCCESS,
- TE_STMF_DEREG_FAIL,
- TE_STMF_DEREG_RETRY,
- TE_WAIT_REF_COMPLETE,
- TE_MAX_EVENT
+#define item(a) a,
+ TGT_EVENT_LIST()
+#undef item
} iscsit_tgt_event_t;
#ifdef ISCSIT_TGT_SM_STRINGS
-static const char *iscsit_te_name[TE_MAX_EVENT+1] = {
- "TE_UNDEFINED",
- "TE_STMF_ONLINE_REQ",
- "TE_ONLINE_SUCCESS",
- "TE_ONLINE_FAIL",
- "TE_STMF_ONLINE_COMPLETE_ACK",
- "TE_STMF_OFFLINE_REQ",
- "TE_OFFLINE_COMPLETE",
- "TE_STMF_OFFLINE_COMPLETE_ACK",
- "TE_DELETE",
- "TE_STMF_DEREG_SUCCESS",
- "TE_STMF_DEREG_FAIL",
- "TE_STMF_DEREG_RETRY",
- "TE_WAIT_REF_COMPLETE",
- "TE_MAX_EVENT"
+static const char *iscsit_te_name[TE_MAX_EVENT + 1] = {
+#define item(a) #a,
+ TGT_EVENT_LIST()
+#undef item
};
#endif
@@ -238,7 +224,7 @@ typedef struct conn_auth {
uint8_t ca_ini_chapsecret[iscsitAuthStringMaxLength];
int ca_ini_chapsecretlen;
- /* RADIUS authentication information */
+ /* RADIUS authentication information */
boolean_t ca_use_radius;
struct sockaddr_storage ca_radius_server;
uint8_t ca_radius_secret[iscsitAuthStringMaxLength];
@@ -284,67 +270,62 @@ typedef struct conn_auth {
struct iscsit_conn_s;
+/* Add new session states above SS_MAX_STATE */
+#define SESSION_STATE_LIST() \
+ item(SS_UNDEFINED) \
+ item(SS_Q1_FREE) \
+ item(SS_Q2_ACTIVE) \
+ item(SS_Q3_LOGGED_IN) \
+ item(SS_Q4_FAILED) \
+ item(SS_Q5_CONTINUE) \
+ item(SS_Q6_DONE) \
+ item(SS_Q7_ERROR) \
+ item(SS_MAX_STATE)
+
/* Update iscsit_ss_name table whenever session states are modified */
typedef enum {
- SS_UNDEFINED = 0,
- SS_Q1_FREE,
- SS_Q2_ACTIVE,
- SS_Q3_LOGGED_IN,
- SS_Q4_FAILED,
- SS_Q5_CONTINUE,
- SS_Q6_DONE,
- SS_Q7_ERROR,
- /* Add new session states above SS_MAX_STATE */
- SS_MAX_STATE
+#define item(a) a,
+ SESSION_STATE_LIST()
+#undef item
} iscsit_session_state_t;
#ifdef ISCSIT_SESS_SM_STRINGS
/* An array of state text values, for use in logging state transitions */
-static const char *iscsit_ss_name[SS_MAX_STATE+1] = {
- "SS_UNDEFINED",
- "SS_Q1_FREE",
- "SS_Q2_ACTIVE",
- "SS_Q3_LOGGED_IN",
- "SS_Q4_FAILED",
- "SS_Q5_CONTINUE",
- "SS_Q6_DONE",
- "SS_Q7_ERROR",
- "SS_MAX_STATE"
+static const char *iscsit_ss_name[SS_MAX_STATE + 1] = {
+#define item(a) #a,
+ SESSION_STATE_LIST()
+#undef item
};
#endif
+/* Add new events above SE_MAX_EVENT */
+#define SESSION_EVENT_LIST() \
+ item(SE_UNDEFINED) \
+ item(SE_CONN_IN_LOGIN) /* From login state machine */ \
+ item(SE_CONN_LOGGED_IN) /* FFP enabled client notification */ \
+ item(SE_CONN_FFP_FAIL) /* FFP disabled client notification */ \
+ item(SE_CONN_FFP_DISABLE) /* FFP disabled client notification */ \
+ item(SE_CONN_FAIL) /* Conn destroy client notification */ \
+ item(SE_SESSION_CLOSE) /* FFP disabled client notification */ \
+ item(SE_SESSION_REINSTATE) /* From login state machine */ \
+ item(SE_SESSION_TIMEOUT) /* Internal */ \
+ item(SE_SESSION_CONTINUE) /* From login state machine */ \
+ item(SE_SESSION_CONTINUE_FAIL) /* From login state machine? */ \
+ item(SE_MAX_EVENT)
+
/* Update iscsit_se_name table whenever session events are modified */
typedef enum {
- SE_UNDEFINED = 0,
- SE_CONN_IN_LOGIN, /* From login state machine */
- SE_CONN_LOGGED_IN, /* FFP enabled client notification */
- SE_CONN_FFP_FAIL, /* FFP disabled client notification */
- SE_CONN_FFP_DISABLE, /* FFP disabled client notification */
- SE_CONN_FAIL, /* Conn destroy client notification */
- SE_SESSION_CLOSE, /* FFP disabled client notification */
- SE_SESSION_REINSTATE, /* From login state machine */
- SE_SESSION_TIMEOUT, /* Internal */
- SE_SESSION_CONTINUE, /* From login state machine */
- SE_SESSION_CONTINUE_FAIL, /* From login state machine? */
- /* Add new events above SE_MAX_EVENT */
- SE_MAX_EVENT
+#define item(a) a,
+ SESSION_EVENT_LIST()
+#undef item
} iscsit_session_event_t;
#ifdef ISCSIT_SESS_SM_STRINGS
/* An array of event text values, for use in logging events */
-static const char *iscsit_se_name[SE_MAX_EVENT+1] = {
- "SE_UNDEFINED",
- "SE_CONN_IN_LOGIN",
- "SE_CONN_LOGGED_IN",
- "SE_CONN_FFP_FAIL",
- "SE_CONN_FFP_DISABLE",
- "SE_CONN_FAIL",
- "SE_SESSION_CLOSE",
- "SE_SESSION_REINSTATE",
- "SE_SESSION_TIMEOUT",
- "SE_SESSION_CONTINUE",
- "SE_SESSION_CONTINUE_FAIL",
- "SE_MAX_EVENT"
+static const char *iscsit_se_name[SE_MAX_EVENT + 1] = {
+#define item(a) #a,
+ SESSION_EVENT_LIST()
+#undef item
};
#endif
@@ -389,61 +370,59 @@ typedef struct {
iscsit_cbuf_t *ist_rxpdu_queue;
} iscsit_sess_t;
+/* Add new login states above ILS_MAX_STATE */
+#define LOGIN_STATE_LIST() \
+ item(ILS_UNDEFINED) \
+ item(ILS_LOGIN_INIT) \
+ item(ILS_LOGIN_WAITING) /* Waiting for more login PDU's */ \
+ item(ILS_LOGIN_PROCESSING) /* Processing login request */ \
+ item(ILS_LOGIN_RESPONDING) /* Sending login response */ \
+ item(ILS_LOGIN_RESPONDED) /* Sent login response (no trans. to FFP) */ \
+ item(ILS_LOGIN_FFP) /* Sending last login PDU for final response */ \
+ item(ILS_LOGIN_DONE) /* Last login PDU sent (so we can free it) */ \
+ item(ILS_LOGIN_ERROR) /* Login error, login failed */ \
+ item(ILS_MAX_STATE)
+
/* Update iscsit_ils_name table whenever login states are modified */
typedef enum {
- ILS_UNDEFINED = 0,
- ILS_LOGIN_INIT,
- ILS_LOGIN_WAITING, /* Waiting for more login PDU's */
- ILS_LOGIN_PROCESSING, /* Processing login request */
- ILS_LOGIN_RESPONDING, /* Sending login response */
- ILS_LOGIN_RESPONDED, /* Sent login response (no trans. to FFP) */
- ILS_LOGIN_FFP, /* Sending last login PDU for final response */
- ILS_LOGIN_DONE, /* Last login PDU sent (so we can free it) */
- ILS_LOGIN_ERROR, /* Login error, login failed */
- /* Add new login states above ILS_MAX_STATE */
- ILS_MAX_STATE
+#define item(a) a,
+ LOGIN_STATE_LIST()
+#undef item
} iscsit_login_state_t;
#ifdef ISCSIT_LOGIN_SM_STRINGS
-/* An array of login state text values, for use in logging login progress */
-static const char *iscsit_ils_name[ILS_MAX_STATE+1] = {
- "ILS_UNDEFINED",
- "ILS_LOGIN_INIT",
- "ILS_LOGIN_WAITING",
- "ILS_LOGIN_PROCESSING",
- "ILS_LOGIN_RESPONDING",
- "ILS_LOGIN_RESPONDED",
- "ILS_LOGIN_FFP",
- "ILS_LOGIN_DONE",
- "ILS_LOGIN_ERROR",
- "ILS_MAX_STATE"
+/* An array of login state text values, for use in logging login progess */
+static const char *iscsit_ils_name[ILS_MAX_STATE + 1] = {
+#define item(a) #a,
+ LOGIN_STATE_LIST()
+#undef item
};
#endif
+/* Add new login events above ILE_MAX_EVENT */
+#define LOGIN_EVENT_LIST() \
+ item(ILE_UNDEFINED) \
+ item(ILE_LOGIN_RCV) \
+ item(ILE_LOGIN_RESP_READY) \
+ item(ILE_LOGIN_FFP) \
+ item(ILE_LOGIN_RESP_COMPLETE) \
+ item(ILE_LOGIN_ERROR) \
+ item(ILE_LOGIN_CONN_ERROR) \
+ item(ILE_MAX_EVENT)
+
/* Update iscsit_ile_name table whenever login events are modified */
typedef enum {
- ILE_UNDEFINED = 0,
- ILE_LOGIN_RCV,
- ILE_LOGIN_RESP_READY,
- ILE_LOGIN_FFP,
- ILE_LOGIN_RESP_COMPLETE,
- ILE_LOGIN_ERROR,
- ILE_LOGIN_CONN_ERROR,
- /* Add new login events above ILE_MAX_EVENT */
- ILE_MAX_EVENT
+#define item(a) a,
+ LOGIN_EVENT_LIST()
+#undef item
} iscsit_login_event_t;
#ifdef ISCSIT_LOGIN_SM_STRINGS
-/* An array of login event text values, for use in logging login events */
-static const char *iscsit_ile_name[ILE_MAX_EVENT+1] = {
- "ILE_UNDEFINED",
- "ILE_LOGIN_RCV",
- "ILE_LOGIN_RESP_READY",
- "ILE_LOGIN_FFP",
- "ILE_LOGIN_RESP_COMPLETE",
- "ILE_LOGIN_ERROR",
- "ILE_LOGIN_CONN_ERROR",
- "ILE_MAX_EVENT"
+/* An array of login event text values, for use in loggin login events */
+static const char *iscsit_ile_name[ILE_MAX_EVENT + 1] = {
+#define item(a) #a,
+ LOGIN_EVENT_LIST()
+#undef item
};
#endif
@@ -466,8 +445,8 @@ typedef struct {
} iscsit_op_params_t;
typedef struct {
- iscsit_login_state_t icl_login_state;
- iscsit_login_state_t icl_login_last_state;
+ iscsit_login_state_t icl_login_state;
+ iscsit_login_state_t icl_login_last_state;
sm_audit_buf_t icl_state_audit;
boolean_t icl_busy;
boolean_t icl_login_complete;
@@ -565,17 +544,22 @@ typedef struct iscsit_isns_cfg {
list_t isns_svrs;
} iscsit_isns_cfg_t;
+#define SERVICE_ENABLED_LIST() \
+ item(ISE_UNDEFINED) \
+ item(ISE_DETACHED) \
+ item(ISE_DISABLED) \
+ item(ISE_ENABLING) \
+ item(ISE_ENABLED) \
+ item(ISE_BUSY) \
+ item(ISE_DISABLING)
+
/*
* State values for the iscsit service
*/
typedef enum {
- ISE_UNDEFINED = 0,
- ISE_DETACHED,
- ISE_DISABLED,
- ISE_ENABLING,
- ISE_ENABLED,
- ISE_BUSY,
- ISE_DISABLING
+#define item(a) a,
+ SERVICE_ENABLED_LIST()
+#undef item
} iscsit_service_enabled_t;
@@ -682,9 +666,6 @@ idm_status_t
iscsit_conn_reinstate(iscsit_conn_t *existing_ict, iscsit_conn_t *ict);
void
-iscsit_conn_destroy_done(iscsit_conn_t *ict);
-
-void
iscsit_conn_set_auth(iscsit_conn_t *ict);
void
diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c
index 32b1ae3b09..4571fc55de 100644
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c
@@ -46,6 +46,7 @@
#include <sys/portif.h>
#include <sys/idm/idm.h>
#include <sys/idm/idm_text.h>
+#include <sys/idm/idm_so.h>
#define ISCSIT_LOGIN_SM_STRINGS
#include "iscsit.h"
@@ -733,7 +734,37 @@ login_sm_new_state(iscsit_conn_t *ict, login_event_ctx_t *ctx,
lsm->icl_login_state = new_state;
mutex_exit(&lsm->icl_mutex);
- switch (lsm->icl_login_state) {
+ /*
+ * Tale of caution here. The use of new_state instead of using
+ * lsm->icl_login_state is deliberate (which had been used originally).
+ * Since the icl_mutex is dropped under the right circumstances
+ * the login state changes between setting the state and examining
+ * the state to proceed. No big surprise since the lock was being
+ * used in the first place to prevent just that type of change.
+ *
+ * There has been a case where network errors occurred while a client
+ * was attempting to reinstate the connection causing multiple
+ * login packets to arrive into the state machine. Those multiple
+ * packets which were processed incorrectly caused the reference
+ * count on the connection to be one higher than it should be and
+ * from then on the connection can't close correctly causing a hang.
+ *
+ * Upon examination of the core it was found that the connection
+ * audit data had calls looking like:
+ * login_sm_event_dispatch
+ * login_sm_processing
+ * login_sm_new_state
+ * That call sequence means the new state was/is ILS_LOGIN_ERROR
+ * yet the audit trail continues with a call to
+ * login_sm_send_next_response
+ * which could only occur if icl_login_state had changed. Had the
+ * design of COMSTAR taken this into account the code would
+ * originally have held the icl_mutex across the processing of the
+ * state processing. Lock order and calls which sleep prevent that
+ * from being possible. The next best solution is to use the local
+ * variable which holds the state.
+ */
+ switch (new_state) {
case ILS_LOGIN_WAITING:
/* Do nothing, waiting for more login PDU's */
break;
@@ -1749,6 +1780,8 @@ login_sm_session_register(iscsit_conn_t *ict)
stmf_scsi_session_t *ss;
iscsi_transport_id_t *iscsi_tptid;
uint16_t ident_len, adn_len, tptid_sz;
+ char prop_buf[KSTAT_STRLEN + 1];
+ char peer_buf[IDM_SA_NTOP_BUFSIZ];
/*
* Hold target mutex until we have finished registering with STMF
@@ -1809,6 +1842,11 @@ login_sm_session_register(iscsit_conn_t *ict)
ss->ss_port_private = ict->ict_sess;
ict->ict_sess->ist_stmf_sess = ss;
mutex_exit(&ist->ist_tgt->target_mutex);
+ (void) snprintf(prop_buf, sizeof (prop_buf), "peername_%"PRIxPTR"",
+ (uintptr_t)ict->ict_sess);
+ (void) idm_sa_ntop(&ict->ict_ic->ic_raddr, peer_buf,
+ sizeof (peer_buf));
+ (void) stmf_add_rport_info(ss, prop_buf, peer_buf);
return (IDM_STATUS_SUCCESS);
}
@@ -2727,15 +2765,15 @@ iscsit_fold_name(char *name, size_t *buflen)
/* Check for one of the supported name types */
if (strncasecmp(name, SNS_EUI ".", strlen(SNS_EUI) + 1) == 0) {
sns = SNS_EUI;
- *buflen = SNS_EUI_U8_LEN_MAX + 1;
+ *buflen = SNS_EUI_LEN_MAX + 1;
flag |= U8_TEXTPREP_TOUPPER;
} else if (strncasecmp(name, SNS_IQN ".", strlen(SNS_IQN) + 1) == 0) {
sns = SNS_IQN;
- *buflen = SNS_IQN_U8_LEN_MAX + 1;
+ *buflen = SNS_IQN_LEN_MAX + 1;
flag |= U8_TEXTPREP_TOLOWER;
} else if (strncasecmp(name, SNS_NAA ".", strlen(SNS_NAA) + 1) == 0) {
sns = SNS_NAA;
- *buflen = SNS_NAA_U8_LEN_MAX + 1;
+ *buflen = SNS_NAA_LEN_MAX + 1;
flag |= U8_TEXTPREP_TOUPPER;
} else {
return (NULL);
@@ -2744,7 +2782,7 @@ iscsit_fold_name(char *name, size_t *buflen)
ret = kmem_zalloc(*buflen, KM_SLEEP);
coff = strlen(sns);
inlen = strlen(name) - coff;
- outlen = *buflen - coff;
+ outlen = *buflen - coff - 1;
/* Fold the case and normalize string */
if (u8_textprep_str(name + coff, &inlen, ret + coff, &outlen, flag,
diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c
index d35a1b327b..c58ff4186c 100644
--- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c
+++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c
@@ -37,6 +37,7 @@
#include <sys/strsubr.h>
#include <sys/note.h>
#include <sys/sdt.h>
+#include <sys/kstat.h>
#include <sys/stmf.h>
#include <sys/stmf_ioctl.h>
@@ -54,7 +55,7 @@ typedef struct {
static void
sess_sm_event_locked(iscsit_sess_t *ist, iscsit_session_event_t event,
-iscsit_conn_t *ict);
+ iscsit_conn_t *ict);
static void
sess_sm_event_dispatch(iscsit_sess_t *ist, sess_event_ctx_t *ctx);
@@ -211,6 +212,7 @@ iscsit_sess_unref(void *ist_void)
{
iscsit_sess_t *ist = ist_void;
stmf_scsi_session_t *iss;
+ char prop_buf[KSTAT_STRLEN + 1];
/*
* State machine has run to completion, destroy session
@@ -225,6 +227,9 @@ iscsit_sess_unref(void *ist_void)
ASSERT(ist->ist_conn_count == 0);
iss = ist->ist_stmf_sess;
if (iss != NULL) {
+ (void) snprintf(prop_buf, sizeof (prop_buf),
+ "peername_%"PRIxPTR"", (uintptr_t)ist);
+ stmf_remove_rport_info(iss, prop_buf);
stmf_deregister_scsi_session(ist->ist_lport, iss);
kmem_free(iss->ss_rport_id, sizeof (scsi_devid_desc_t) +
strlen(ist->ist_initiator_name) + 1);
@@ -397,7 +402,7 @@ iscsit_sess_avl_compare(const void *void_sess1, const void *void_sess2)
{
const iscsit_sess_t *sess1 = void_sess1;
const iscsit_sess_t *sess2 = void_sess2;
- int result;
+ int result;
/*
* Sort by initiator name, then ISID then portal group tag
diff --git a/usr/src/uts/common/io/comstar/port/pppt/pppt.h b/usr/src/uts/common/io/comstar/port/pppt/pppt.h
index 260eb94a8c..4d584c0ab7 100644
--- a/usr/src/uts/common/io/comstar/port/pppt/pppt.h
+++ b/usr/src/uts/common/io/comstar/port/pppt/pppt.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2013, 2015 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _PPPT_H
#define _PPPT_H
@@ -49,74 +49,63 @@ typedef enum {
#define PPPT_MODNAME "pppt"
+#define TGT_STATE_LIST() \
+ item(TS_UNDEFINED) \
+ item(TS_CREATED) \
+ item(TS_ONLINING) \
+ item(TS_ONLINE) \
+ item(TS_STMF_ONLINE) \
+ item(TS_DELETING_NEED_OFFLINE) \
+ item(TS_OFFLINING) \
+ item(TS_OFFLINE) \
+ item(TS_STMF_OFFLINE) \
+ item(TS_DELETING_STMF_DEREG) \
+ item(TS_DELETING_STMF_DEREG_FAIL) \
+ item(TS_DELETING) \
+ item(TS_MAX_STATE)
+
/* Target states and events, update pppt_ts_name table whenever modified */
typedef enum {
- TS_UNDEFINED = 0,
- TS_CREATED,
- TS_ONLINING,
- TS_ONLINE,
- TS_STMF_ONLINE,
- TS_DELETING_NEED_OFFLINE,
- TS_OFFLINING,
- TS_OFFLINE,
- TS_STMF_OFFLINE,
- TS_DELETING_STMF_DEREG,
- TS_DELETING_STMF_DEREG_FAIL,
- TS_DELETING,
- TS_MAX_STATE
+#define item(a) a,
+ TGT_STATE_LIST()
+#undef item
} pppt_tgt_state_t;
#ifdef PPPT_TGT_SM_STRINGS
-static const char *pppt_ts_name[TS_MAX_STATE+1] = {
- "TS_UNDEFINED",
- "TS_CREATED",
- "TS_ONLINING",
- "TS_ONLINE",
- "TS_STMF_ONLINE",
- "TS_DELETING_NEED_OFFLINE",
- "TS_OFFLINING",
- "TS_OFFLINE",
- "TS_STMF_OFFLINE",
- "TS_DELETING_STMF_DEREG",
- "TS_DELETING_STMF_DEREG_FAIL",
- "TS_DELETING",
- "TS_MAX_STATE"
+static const char *pppt_ts_name[TS_MAX_STATE + 1] = {
+#define item(a) #a,
+ TGT_STATE_LIST()
+#undef item
};
#endif
+#define TGT_EVENT_LIST() \
+ item(TE_UNDEFINED) \
+ item(TE_STMF_ONLINE_REQ) \
+ item(TE_ONLINE_SUCCESS) \
+ item(TE_ONLINE_FAIL) \
+ item(TE_STMF_ONLINE_COMPLETE_ACK) \
+ item(TE_STMF_OFFLINE_REQ) \
+ item(TE_OFFLINE_COMPLETE) \
+ item(TE_STMF_OFFLINE_COMPLETE_ACK) \
+ item(TE_DELETE) \
+ item(TE_STMF_DEREG_SUCCESS) \
+ item(TE_STMF_DEREG_FAIL) \
+ item(TE_STMF_DEREG_RETRY) \
+ item(TE_WAIT_REF_COMPLETE) /* XXX */ \
+ item(TE_MAX_EVENT)
+
typedef enum {
- TE_UNDEFINED = 0,
- TE_STMF_ONLINE_REQ,
- TE_ONLINE_SUCCESS,
- TE_ONLINE_FAIL,
- TE_STMF_ONLINE_COMPLETE_ACK,
- TE_STMF_OFFLINE_REQ,
- TE_OFFLINE_COMPLETE,
- TE_STMF_OFFLINE_COMPLETE_ACK,
- TE_DELETE,
- TE_STMF_DEREG_SUCCESS,
- TE_STMF_DEREG_FAIL,
- TE_STMF_DEREG_RETRY,
- TE_WAIT_REF_COMPLETE, /* XXX */
- TE_MAX_EVENT
+#define item(a) a,
+ TGT_EVENT_LIST()
+#undef item
} pppt_tgt_event_t;
#ifdef PPPT_TGT_SM_STRINGS
-static const char *pppt_te_name[TE_MAX_EVENT+1] = {
- "TE_UNDEFINED",
- "TE_STMF_ONLINE_REQ",
- "TE_ONLINE_SUCCESS",
- "TE_ONLINE_FAIL",
- "TE_STMF_ONLINE_COMPLETE_ACK",
- "TE_STMF_OFFLINE_REQ",
- "TE_OFFLINE_COMPLETE",
- "TE_STMF_OFFLINE_COMPLETE_ACK",
- "TE_DELETE",
- "TE_STMF_DEREG_SUCCESS",
- "TE_STMF_DEREG_FAIL",
- "TE_STMF_DEREG_RETRY",
- "TE_WAIT_REF_COMPLETE",
- "TE_MAX_EVENT"
+static const char *pppt_te_name[TE_MAX_EVENT + 1] = {
+#define item(a) #a,
+ TGT_EVENT_LIST()
+#undef item
};
#endif
diff --git a/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c b/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c
index 7d7f571ec0..fa930ec682 100644
--- a/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c
+++ b/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/cpuvar.h>
@@ -331,7 +331,7 @@ pppt_msg_scsi_cmd(stmf_ic_msg_t *msg)
(void) pppt_task_hold(ptask);
task->task_port_private = ptask;
task->task_flags = scmd->icsc_task_flags;
- task->task_additional_flags = 0;
+ task->task_additional_flags = TASK_AF_PPPT_TASK;
task->task_priority = 0;
/*
diff --git a/usr/src/uts/common/io/comstar/stmf/lun_map.c b/usr/src/uts/common/io/comstar/stmf/lun_map.c
index c709a2688a..41d5f7e9c8 100644
--- a/usr/src/uts/common/io/comstar/stmf/lun_map.c
+++ b/usr/src/uts/common/io/comstar/stmf/lun_map.c
@@ -47,8 +47,9 @@ void stmf_update_sessions_per_ve(stmf_view_entry_t *ve,
void stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport,
stmf_i_scsi_session_t *iss, stmf_lun_map_t *vemap);
stmf_id_data_t *stmf_lookup_group_for_host(uint8_t *ident, uint16_t ident_size);
-stmf_status_t stmf_add_ent_to_map(stmf_lun_map_t *sm, void *ent, uint8_t *lun);
-stmf_status_t stmf_remove_ent_from_map(stmf_lun_map_t *sm, uint8_t *lun);
+static stmf_status_t stmf_add_ent_to_map(stmf_lun_map_t *sm, void *ent,
+ uint8_t *lun);
+static stmf_status_t stmf_remove_ent_from_map(stmf_lun_map_t *sm, uint8_t *lun);
uint16_t stmf_get_next_free_lun(stmf_lun_map_t *sm, uint8_t *lun);
stmf_status_t stmf_add_tg(uint8_t *tg_name, uint16_t tg_name_size,
int allow_special, uint32_t *err_detail);
@@ -179,10 +180,11 @@ stmf_view_clear_config()
/*
* Create luns map for session based on the view
+ * iss_lockp is held
*/
stmf_status_t
stmf_session_create_lun_map(stmf_i_local_port_t *ilport,
- stmf_i_scsi_session_t *iss)
+ stmf_i_scsi_session_t *iss)
{
stmf_id_data_t *tg;
stmf_id_data_t *hg;
@@ -236,52 +238,8 @@ stmf_session_create_lun_map(stmf_i_local_port_t *ilport,
}
/*
- * destroy lun map for session
- */
-/* ARGSUSED */
-stmf_status_t
-stmf_session_destroy_lun_map(stmf_i_local_port_t *ilport,
- stmf_i_scsi_session_t *iss)
-{
- stmf_lun_map_t *sm;
- stmf_i_lu_t *ilu;
- uint16_t n;
- stmf_lun_map_ent_t *ent;
-
- ASSERT(mutex_owned(&stmf_state.stmf_lock));
- /*
- * to avoid conflict with updating session's map,
- * which only grab stmf_lock
- */
- sm = iss->iss_sm;
- iss->iss_sm = NULL;
- iss->iss_hg = NULL;
- if (sm->lm_nentries) {
- for (n = 0; n < sm->lm_nentries; n++) {
- if ((ent = (stmf_lun_map_ent_t *)sm->lm_plus[n])
- != NULL) {
- if (ent->ent_itl_datap) {
- stmf_do_itl_dereg(ent->ent_lu,
- ent->ent_itl_datap,
- STMF_ITL_REASON_IT_NEXUS_LOSS);
- }
- ilu = (stmf_i_lu_t *)
- ent->ent_lu->lu_stmf_private;
- atomic_dec_32(&ilu->ilu_ref_cnt);
- kmem_free(sm->lm_plus[n],
- sizeof (stmf_lun_map_ent_t));
- }
- }
- kmem_free(sm->lm_plus,
- sizeof (stmf_lun_map_ent_t *) * sm->lm_nentries);
- }
-
- kmem_free(sm, sizeof (*sm));
- return (STMF_SUCCESS);
-}
-
-/*
* Expects the session lock to be held.
+ * iss_lockp is held
*/
stmf_xfer_data_t *
stmf_session_prepare_report_lun_data(stmf_lun_map_t *sm)
@@ -390,12 +348,13 @@ stmf_session_lu_unmapall(stmf_lu_t *lu)
}
/*
* add lu to a session, stmf_lock is already held
+ * iss_lockp/ilport_lock already held
*/
-stmf_status_t
+static stmf_status_t
stmf_add_lu_to_session(stmf_i_local_port_t *ilport,
- stmf_i_scsi_session_t *iss,
- stmf_lu_t *lu,
- uint8_t *lu_nbr)
+ stmf_i_scsi_session_t *iss,
+ stmf_lu_t *lu,
+ uint8_t *lu_nbr)
{
stmf_lun_map_t *sm = iss->iss_sm;
stmf_status_t ret;
@@ -434,13 +393,11 @@ stmf_add_lu_to_session(stmf_i_local_port_t *ilport,
/*
* remvoe lu from a session, stmf_lock is already held
+ * iss_lockp held
*/
-/* ARGSUSED */
-stmf_status_t
-stmf_remove_lu_from_session(stmf_i_local_port_t *ilport,
- stmf_i_scsi_session_t *iss,
- stmf_lu_t *lu,
- uint8_t *lu_nbr)
+static void
+stmf_remove_lu_from_session(stmf_i_scsi_session_t *iss,
+ stmf_lu_t *lu, uint8_t *lu_nbr)
{
stmf_status_t ret;
stmf_i_lu_t *ilu;
@@ -451,7 +408,10 @@ stmf_remove_lu_from_session(stmf_i_local_port_t *ilport,
ASSERT(mutex_owned(&stmf_state.stmf_lock));
lun_map_ent = stmf_get_ent_from_map(sm, luNbr);
- ASSERT(lun_map_ent && lun_map_ent->ent_lu == lu);
+ ASSERT(lun_map_ent->ent_lu == lu);
+ if (lun_map_ent == NULL) {
+ return;
+ }
ilu = (stmf_i_lu_t *)lu->lu_stmf_private;
@@ -464,7 +424,6 @@ stmf_remove_lu_from_session(stmf_i_local_port_t *ilport,
STMF_ITL_REASON_USER_REQUEST);
}
kmem_free((void *)lun_map_ent, sizeof (stmf_lun_map_ent_t));
- return (STMF_SUCCESS);
}
/*
@@ -473,7 +432,7 @@ stmf_remove_lu_from_session(stmf_i_local_port_t *ilport,
*/
void
stmf_update_sessions_per_ve(stmf_view_entry_t *ve,
- stmf_lu_t *lu, int action)
+ stmf_lu_t *lu, int action)
{
stmf_i_lu_t *ilu_tmp;
stmf_lu_t *lu_to_add;
@@ -519,8 +478,8 @@ stmf_update_sessions_per_ve(stmf_view_entry_t *ve,
continue;
/* This host belongs to the host group */
if (action == 0) { /* to remove */
- (void) stmf_remove_lu_from_session(ilport, iss,
- lu_to_add, ve->ve_lun);
+ stmf_remove_lu_from_session(iss, lu_to_add,
+ ve->ve_lun);
if (ilu_tmp->ilu_ref_cnt == 0) {
rw_exit(&ilport->ilport_lock);
return;
@@ -540,8 +499,8 @@ stmf_update_sessions_per_ve(stmf_view_entry_t *ve,
*/
void
stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport,
- stmf_i_scsi_session_t *iss,
- stmf_lun_map_t *vemap)
+ stmf_i_scsi_session_t *iss,
+ stmf_lun_map_t *vemap)
{
stmf_lu_t *lu;
stmf_i_lu_t *ilu;
@@ -549,7 +508,6 @@ stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport,
uint32_t i;
ASSERT(mutex_owned(&stmf_state.stmf_lock));
-
for (i = 0; i < vemap->lm_nentries; i++) {
ve = (stmf_view_entry_t *)vemap->lm_plus[i];
if (!ve)
@@ -562,11 +520,13 @@ stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport,
}
}
}
-/* remove luns in view entry map from a session */
+/*
+ * remove luns in view entry map from a session
+ * iss_lockp held
+ */
void
-stmf_remove_lus_from_session_per_vemap(stmf_i_local_port_t *ilport,
- stmf_i_scsi_session_t *iss,
- stmf_lun_map_t *vemap)
+stmf_remove_lus_from_session_per_vemap(stmf_i_scsi_session_t *iss,
+ stmf_lun_map_t *vemap)
{
stmf_lu_t *lu;
stmf_i_lu_t *ilu;
@@ -582,15 +542,14 @@ stmf_remove_lus_from_session_per_vemap(stmf_i_local_port_t *ilport,
ilu = (stmf_i_lu_t *)ve->ve_luid->id_pt_to_object;
if (ilu && ilu->ilu_state == STMF_STATE_ONLINE) {
lu = ilu->ilu_lu;
- (void) stmf_remove_lu_from_session(ilport, iss, lu,
- ve->ve_lun);
+ stmf_remove_lu_from_session(iss, lu, ve->ve_lun);
}
}
}
stmf_id_data_t *
stmf_alloc_id(uint16_t id_size, uint16_t type, uint8_t *id_data,
- uint32_t additional_size)
+ uint32_t additional_size)
{
stmf_id_data_t *id;
int struct_size, total_size, real_id_size;
@@ -710,6 +669,7 @@ stmf_remove_id(stmf_id_list_t *idlist, stmf_id_data_t *id)
* is successfully added. ve_map is just another representation of the
* view enrtries in a LU. Duplicating or merging a ve map does not
* affect any refcnts.
+ * stmf_state.stmf_lock held
*/
stmf_lun_map_t *
stmf_duplicate_ve_map(stmf_lun_map_t *src)
@@ -742,9 +702,13 @@ stmf_destroy_ve_map(stmf_lun_map_t *dst)
kmem_free(dst, sizeof (*dst));
}
+/*
+ * stmf_state.stmf_lock held. Operations are stmf global in nature and
+ * not session level.
+ */
int
stmf_merge_ve_map(stmf_lun_map_t *src, stmf_lun_map_t *dst,
- stmf_lun_map_t **pp_ret_map, stmf_merge_flags_t mf)
+ stmf_lun_map_t **pp_ret_map, stmf_merge_flags_t mf)
{
int i;
int nentries;
@@ -816,7 +780,7 @@ stmf_merge_ve_map(stmf_lun_map_t *src, stmf_lun_map_t *dst,
*/
stmf_status_t
stmf_add_hg(uint8_t *hg_name, uint16_t hg_name_size,
- int allow_special, uint32_t *err_detail)
+ int allow_special, uint32_t *err_detail)
{
stmf_id_data_t *id;
@@ -841,7 +805,7 @@ stmf_add_hg(uint8_t *hg_name, uint16_t hg_name_size,
/* add target group */
stmf_status_t
stmf_add_tg(uint8_t *tg_name, uint16_t tg_name_size,
- int allow_special, uint32_t *err_detail)
+ int allow_special, uint32_t *err_detail)
{
stmf_id_data_t *id;
@@ -920,8 +884,8 @@ stmf_add_ve_to_luid(stmf_id_data_t *luid, stmf_view_entry_t *ve)
/* stmf_lock is already held, err_detail may be assigned if error happens */
stmf_status_t
stmf_add_view_entry(stmf_id_data_t *hg, stmf_id_data_t *tg,
- uint8_t *lu_guid, uint32_t *ve_id, uint8_t *lun,
- stmf_view_entry_t **conflicting, uint32_t *err_detail)
+ uint8_t *lu_guid, uint32_t *ve_id, uint8_t *lun,
+ stmf_view_entry_t **conflicting, uint32_t *err_detail)
{
stmf_id_data_t *luid;
stmf_view_entry_t *ve;
@@ -1066,7 +1030,11 @@ add_ve_err_ret:
return (ret);
}
-stmf_status_t
+/*
+ * protected by stmf_state.stmf_lock when working on global lun map.
+ * iss_lockp when working at the session level.
+ */
+static stmf_status_t
stmf_add_ent_to_map(stmf_lun_map_t *lm, void *ent, uint8_t *lun)
{
uint16_t n;
@@ -1098,7 +1066,11 @@ try_again_to_add:
}
-stmf_status_t
+/*
+ * iss_lockp held when working on a session.
+ * stmf_state.stmf_lock is held when working on the global views.
+ */
+static stmf_status_t
stmf_remove_ent_from_map(stmf_lun_map_t *lm, uint8_t *lun)
{
uint16_t n, i;
@@ -1135,6 +1107,9 @@ stmf_remove_ent_from_map(stmf_lun_map_t *lm, uint8_t *lun)
return (STMF_SUCCESS);
}
+/*
+ * stmf_state.stmf_lock held
+ */
uint16_t
stmf_get_next_free_lun(stmf_lun_map_t *sm, uint8_t *lun)
{
@@ -1158,6 +1133,10 @@ stmf_get_next_free_lun(stmf_lun_map_t *sm, uint8_t *lun)
return (luNbr);
}
+/*
+ * stmf_state.stmf_lock is held when working on global view map
+ * iss_lockp (RW_WRITER) is held when working on session map.
+ */
void *
stmf_get_ent_from_map(stmf_lun_map_t *sm, uint16_t lun_num)
{
@@ -1173,9 +1152,9 @@ stmf_get_ent_from_map(stmf_lun_map_t *sm, uint16_t lun_num)
int
stmf_add_ve(uint8_t *hgname, uint16_t hgname_size,
- uint8_t *tgname, uint16_t tgname_size,
- uint8_t *lu_guid, uint32_t *ve_id,
- uint8_t *luNbr, uint32_t *err_detail)
+ uint8_t *tgname, uint16_t tgname_size,
+ uint8_t *lu_guid, uint32_t *ve_id,
+ uint8_t *luNbr, uint32_t *err_detail)
{
stmf_id_data_t *hg;
stmf_id_data_t *tg;
@@ -1317,7 +1296,7 @@ stmf_remove_ve_by_id(uint8_t *guid, uint32_t veid, uint32_t *err_detail)
int
stmf_add_group(uint8_t *grpname, uint16_t grpname_size,
- stmf_id_type_t group_type, uint32_t *err_detail)
+ stmf_id_type_t group_type, uint32_t *err_detail)
{
stmf_status_t status;
@@ -1348,7 +1327,7 @@ stmf_add_group(uint8_t *grpname, uint16_t grpname_size,
*/
int
stmf_remove_group(uint8_t *grpname, uint16_t grpname_size,
- stmf_id_type_t group_type, uint32_t *err_detail)
+ stmf_id_type_t group_type, uint32_t *err_detail)
{
stmf_id_data_t *id;
stmf_id_data_t *idmemb;
@@ -1411,8 +1390,8 @@ stmf_remove_group(uint8_t *grpname, uint16_t grpname_size,
int
stmf_add_group_member(uint8_t *grpname, uint16_t grpname_size,
- uint8_t *entry_ident, uint16_t entry_size,
- stmf_id_type_t entry_type, uint32_t *err_detail)
+ uint8_t *entry_ident, uint16_t entry_size,
+ stmf_id_type_t entry_type, uint32_t *err_detail)
{
stmf_id_data_t *id_grp, *id_alltgt;
stmf_id_data_t *id_member;
@@ -1492,6 +1471,7 @@ stmf_add_group_member(uint8_t *grpname, uint16_t grpname_size,
stmf_id_data_t *tgid;
iss->iss_hg = (void *)id_grp;
tgid = ilport->ilport_tg;
+ rw_enter(iss->iss_lockp, RW_WRITER);
if (tgid) {
vemap = stmf_get_ve_map_per_ids(tgid, id_grp);
if (vemap)
@@ -1501,6 +1481,7 @@ stmf_add_group_member(uint8_t *grpname, uint16_t grpname_size,
if (vemap_alltgt)
stmf_add_lus_to_session_per_vemap(ilport,
iss, vemap_alltgt);
+ rw_exit(iss->iss_lockp);
}
}
@@ -1509,8 +1490,8 @@ stmf_add_group_member(uint8_t *grpname, uint16_t grpname_size,
int
stmf_remove_group_member(uint8_t *grpname, uint16_t grpname_size,
- uint8_t *entry_ident, uint16_t entry_size,
- stmf_id_type_t entry_type, uint32_t *err_detail)
+ uint8_t *entry_ident, uint16_t entry_size,
+ stmf_id_type_t entry_type, uint32_t *err_detail)
{
stmf_id_data_t *id_grp, *id_alltgt;
stmf_id_data_t *id_member;
@@ -1577,17 +1558,19 @@ stmf_remove_group_member(uint8_t *grpname, uint16_t grpname_size,
entry_ident, entry_size);
if (iss) {
stmf_id_data_t *tgid;
+ rw_enter(iss->iss_lockp, RW_WRITER);
iss->iss_hg = NULL;
tgid = ilport->ilport_tg;
if (tgid) {
vemap = stmf_get_ve_map_per_ids(tgid, id_grp);
if (vemap)
stmf_remove_lus_from_session_per_vemap(
- ilport, iss, vemap);
+ iss, vemap);
}
if (vemap_alltgt)
- stmf_remove_lus_from_session_per_vemap(ilport,
- iss, vemap_alltgt);
+ stmf_remove_lus_from_session_per_vemap(iss,
+ vemap_alltgt);
+ rw_exit(iss->iss_lockp);
}
}
@@ -1616,7 +1599,7 @@ stmf_targetident_to_ilport(uint8_t *target_ident, uint16_t ident_size)
stmf_i_scsi_session_t *
stmf_lookup_session_for_hostident(stmf_i_local_port_t *ilport,
- uint8_t *host_ident, uint16_t ident_size)
+ uint8_t *host_ident, uint16_t ident_size)
{
stmf_i_scsi_session_t *iss;
uint8_t *id;
@@ -1734,8 +1717,8 @@ stmf_validate_lun_view_entry(stmf_id_data_t *hg, stmf_id_data_t *tg,
int
stmf_validate_lun_ve(uint8_t *hgname, uint16_t hgname_size,
- uint8_t *tgname, uint16_t tgname_size,
- uint8_t *luNbr, uint32_t *err_detail)
+ uint8_t *tgname, uint16_t tgname_size,
+ uint8_t *luNbr, uint32_t *err_detail)
{
stmf_id_data_t *hg;
stmf_id_data_t *tg;
diff --git a/usr/src/uts/common/io/comstar/stmf/lun_map.h b/usr/src/uts/common/io/comstar/stmf/lun_map.h
index b9e8d8ce40..250db4997a 100644
--- a/usr/src/uts/common/io/comstar/stmf/lun_map.h
+++ b/usr/src/uts/common/io/comstar/stmf/lun_map.h
@@ -48,8 +48,6 @@ void stmf_view_init();
void stmf_view_clear_config();
stmf_status_t stmf_session_create_lun_map(stmf_i_local_port_t *ilport,
stmf_i_scsi_session_t *iss);
-stmf_status_t stmf_session_destroy_lun_map(stmf_i_local_port_t *ilport,
- stmf_i_scsi_session_t *iss);
stmf_xfer_data_t *stmf_session_prepare_report_lun_data(stmf_lun_map_t *sm);
void stmf_add_lu_to_active_sessions(stmf_lu_t *lu);
void stmf_session_lu_unmapall(stmf_lu_t *lu);
diff --git a/usr/src/uts/common/io/comstar/stmf/stmf.c b/usr/src/uts/common/io/comstar/stmf/stmf.c
index a79a27e8b3..14f5d4eecf 100644
--- a/usr/src/uts/common/io/comstar/stmf/stmf.c
+++ b/usr/src/uts/common/io/comstar/stmf/stmf.c
@@ -18,11 +18,12 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
@@ -124,7 +125,6 @@ void stmf_delete_all_ppds();
void stmf_trace_clear();
void stmf_worker_init();
stmf_status_t stmf_worker_fini();
-void stmf_worker_mgmt();
void stmf_worker_task(void *arg);
static void stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss);
static stmf_status_t stmf_ic_lu_reg(stmf_ic_reg_dereg_lun_msg_t *msg,
@@ -164,9 +164,14 @@ static void stmf_update_kstat_lu_q(scsi_task_t *, void());
static void stmf_update_kstat_lport_q(scsi_task_t *, void());
static void stmf_update_kstat_lu_io(scsi_task_t *, stmf_data_buf_t *);
static void stmf_update_kstat_lport_io(scsi_task_t *, stmf_data_buf_t *);
+static hrtime_t stmf_update_rport_timestamps(hrtime_t *start_tstamp,
+ hrtime_t *done_tstamp, stmf_i_scsi_task_t *itask);
static int stmf_irport_compare(const void *void_irport1,
const void *void_irport2);
+static void stmf_create_kstat_rport(stmf_i_remote_port_t *irport);
+static void stmf_destroy_kstat_rport(stmf_i_remote_port_t *irport);
+static int stmf_kstat_rport_update(kstat_t *ksp, int rw);
static stmf_i_remote_port_t *stmf_irport_create(scsi_devid_desc_t *rport_devid);
static void stmf_irport_destroy(stmf_i_remote_port_t *irport);
static stmf_i_remote_port_t *stmf_irport_register(
@@ -179,7 +184,7 @@ extern struct mod_ops mod_driverops;
/* =====[ Tunables ]===== */
/* Internal tracing */
-volatile int stmf_trace_on = 1;
+volatile int stmf_trace_on = 0;
volatile int stmf_trace_buf_size = (1 * 1024 * 1024);
/*
* The reason default task timeout is 75 is because we want the
@@ -192,14 +197,12 @@ volatile int stmf_default_task_timeout = 75;
*/
volatile int stmf_allow_modunload = 0;
-volatile int stmf_max_nworkers = 256;
-volatile int stmf_min_nworkers = 4;
-volatile int stmf_worker_scale_down_delay = 20;
+volatile int stmf_nworkers = 512;
/* === [ Debugging and fault injection ] === */
#ifdef DEBUG
-volatile uint32_t stmf_drop_task_counter = 0;
-volatile uint32_t stmf_drop_buf_counter = 0;
+volatile int stmf_drop_task_counter = 0;
+volatile int stmf_drop_buf_counter = 0;
#endif
@@ -221,20 +224,13 @@ static enum {
STMF_WORKERS_ENABLING,
STMF_WORKERS_ENABLED
} stmf_workers_state = STMF_WORKERS_DISABLED;
-static int stmf_i_max_nworkers;
-static int stmf_i_min_nworkers;
-static int stmf_nworkers_cur; /* # of workers currently running */
-static int stmf_nworkers_needed; /* # of workers need to be running */
+static kmutex_t stmf_worker_sel_mx;
+volatile uint32_t stmf_nworkers_cur = 0; /* # of workers currently running */
static int stmf_worker_sel_counter = 0;
static uint32_t stmf_cur_ntasks = 0;
-static clock_t stmf_wm_last = 0;
-/*
- * This is equal to stmf_nworkers_cur while we are increasing # workers and
- * stmf_nworkers_needed while we are decreasing the worker count.
- */
+static clock_t stmf_wm_next = 0;
static int stmf_nworkers_accepting_cmds;
static stmf_worker_t *stmf_workers = NULL;
-static clock_t stmf_worker_mgmt_delay = 2;
static clock_t stmf_worker_scale_down_timer = 0;
static int stmf_worker_scale_down_qd = 0;
@@ -300,6 +296,7 @@ _init(void)
trace_buf_size = stmf_trace_buf_size;
trace_buf_curndx = 0;
mutex_init(&trace_buf_lock, NULL, MUTEX_DRIVER, 0);
+ mutex_init(&stmf_worker_sel_mx, NULL, MUTEX_ADAPTIVE, 0);
bzero(&stmf_state, sizeof (stmf_state_t));
/* STMF service is off by default */
stmf_state.stmf_service_running = 0;
@@ -370,6 +367,7 @@ _fini(void)
kmem_free(stmf_trace_buf, stmf_trace_buf_size);
mutex_destroy(&trace_buf_lock);
mutex_destroy(&stmf_state.stmf_lock);
+ mutex_destroy(&stmf_worker_sel_mx);
cv_destroy(&stmf_state.stmf_cv);
return (ret);
}
@@ -1655,10 +1653,13 @@ find_task_from_msgid(uint8_t *lu_id, stmf_ic_msgid_t task_msgid)
mutex_enter(&ilu->ilu_task_lock);
for (itask = ilu->ilu_tasks; itask != NULL;
itask = itask->itask_lu_next) {
+ mutex_enter(&itask->itask_mutex);
if (itask->itask_flags & (ITASK_IN_FREE_LIST |
ITASK_BEING_ABORTED)) {
+ mutex_exit(&itask->itask_mutex);
continue;
}
+ mutex_exit(&itask->itask_mutex);
if (itask->itask_proxy_msg_id == task_msgid) {
break;
}
@@ -1903,16 +1904,15 @@ stmf_proxy_scsi_cmd(scsi_task_t *task, stmf_data_buf_t *dbuf)
* we can recognize this on return since we won't be completing
* the proxied task in that case.
*/
+ mutex_enter(&itask->itask_mutex);
if (task->task_mgmt_function) {
itask->itask_proxy_msg_id |= MSG_ID_TM_BIT;
} else {
- uint32_t new, old;
- do {
- new = old = itask->itask_flags;
- if (new & ITASK_BEING_ABORTED)
- return (STMF_FAILURE);
- new |= ITASK_DEFAULT_HANDLING | ITASK_PROXY_TASK;
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ if (itask->itask_flags & ITASK_BEING_ABORTED) {
+ mutex_exit(&itask->itask_mutex);
+ return (STMF_FAILURE);
+ }
+ itask->itask_flags |= ITASK_DEFAULT_HANDLING | ITASK_PROXY_TASK;
}
if (dbuf) {
ic_cmd_msg = ic_scsi_cmd_msg_alloc(itask->itask_proxy_msg_id,
@@ -1922,6 +1922,7 @@ stmf_proxy_scsi_cmd(scsi_task_t *task, stmf_data_buf_t *dbuf)
ic_cmd_msg = ic_scsi_cmd_msg_alloc(itask->itask_proxy_msg_id,
task, 0, NULL, itask->itask_proxy_msg_id);
}
+ mutex_exit(&itask->itask_mutex);
if (ic_cmd_msg) {
ic_ret = ic_tx_msg(ic_cmd_msg);
if (ic_ret == STMF_IC_MSG_SUCCESS) {
@@ -2525,7 +2526,8 @@ stmf_load_ppd_ioctl(stmf_ppioctl_data_t *ppi, uint64_t *ppi_token,
}
/* Free any existing lists and add this one to the ppd */
- nvlist_free(ppd->ppd_nv);
+ if (ppd->ppd_nv)
+ nvlist_free(ppd->ppd_nv);
ppd->ppd_nv = nv;
/* set the token for writes */
@@ -2597,7 +2599,8 @@ stmf_delete_ppd(stmf_pp_data_t *ppd)
return;
*pppd = ppd->ppd_next;
- nvlist_free(ppd->ppd_nv);
+ if (ppd->ppd_nv)
+ nvlist_free(ppd->ppd_nv);
kmem_free(ppd, ppd->ppd_alloc_size);
}
@@ -2706,6 +2709,8 @@ stmf_delete_all_ppds()
*/
#define STMF_KSTAT_LU_SZ (STMF_GUID_INPUT + 1 + 256)
#define STMF_KSTAT_TGT_SZ (256 * 2 + 16)
+#define STMF_KSTAT_RPORT_DATAMAX (sizeof (stmf_kstat_rport_info_t) / \
+ sizeof (kstat_named_t))
/*
* This array matches the Protocol Identifier in stmf_ioctl.h
@@ -2783,6 +2788,96 @@ stmf_update_kstat_lport_io(scsi_task_t *task, stmf_data_buf_t *dbuf)
}
static void
+stmf_update_kstat_rport_io(scsi_task_t *task, stmf_data_buf_t *dbuf)
+{
+ stmf_i_scsi_session_t *iss;
+ stmf_i_remote_port_t *irport;
+ kstat_io_t *kip;
+
+ iss = task->task_session->ss_stmf_private;
+ irport = iss->iss_irport;
+ if (irport->irport_kstat_io != NULL) {
+ kip = KSTAT_IO_PTR(irport->irport_kstat_io);
+ mutex_enter(irport->irport_kstat_io->ks_lock);
+ STMF_UPDATE_KSTAT_IO(kip, dbuf);
+ mutex_exit(irport->irport_kstat_io->ks_lock);
+ }
+}
+
+static void
+stmf_update_kstat_rport_estat(scsi_task_t *task)
+{
+ stmf_i_scsi_task_t *itask;
+ stmf_i_scsi_session_t *iss;
+ stmf_i_remote_port_t *irport;
+ stmf_kstat_rport_estat_t *ks_estat;
+ hrtime_t lat = 0;
+ uint32_t n = 0;
+
+ itask = task->task_stmf_private;
+ iss = task->task_session->ss_stmf_private;
+ irport = iss->iss_irport;
+
+ if (irport->irport_kstat_estat == NULL)
+ return;
+
+ ks_estat = (stmf_kstat_rport_estat_t *)KSTAT_NAMED_PTR(
+ irport->irport_kstat_estat);
+
+ mutex_enter(irport->irport_kstat_estat->ks_lock);
+
+ if (task->task_flags & TF_READ_DATA)
+ n = atomic_dec_32_nv(&irport->irport_nread_tasks);
+ else if (task->task_flags & TF_WRITE_DATA)
+ n = atomic_dec_32_nv(&irport->irport_nwrite_tasks);
+
+ if (itask->itask_read_xfer > 0) {
+ ks_estat->i_nread_tasks.value.ui64++;
+ lat = stmf_update_rport_timestamps(
+ &irport->irport_rdstart_timestamp,
+ &irport->irport_rddone_timestamp, itask);
+ if (n == 0)
+ ks_estat->i_rport_read_latency.value.ui64 += lat;
+ } else if ((itask->itask_write_xfer > 0) ||
+ (task->task_flags & TF_INITIAL_BURST)) {
+ ks_estat->i_nwrite_tasks.value.ui64++;
+ lat = stmf_update_rport_timestamps(
+ &irport->irport_wrstart_timestamp,
+ &irport->irport_wrdone_timestamp, itask);
+ if (n == 0)
+ ks_estat->i_rport_write_latency.value.ui64 += lat;
+ }
+
+ if (n == 0) {
+ if (task->task_flags & TF_READ_DATA) {
+ irport->irport_rdstart_timestamp = LLONG_MAX;
+ irport->irport_rddone_timestamp = 0;
+ } else if (task->task_flags & TF_WRITE_DATA) {
+ irport->irport_wrstart_timestamp = LLONG_MAX;
+ irport->irport_wrdone_timestamp = 0;
+ }
+ }
+
+ mutex_exit(irport->irport_kstat_estat->ks_lock);
+}
+
+static hrtime_t
+stmf_update_rport_timestamps(hrtime_t *start_tstamp, hrtime_t *done_tstamp,
+ stmf_i_scsi_task_t *itask)
+{
+ *start_tstamp = MIN(*start_tstamp, itask->itask_start_timestamp);
+ if ((*done_tstamp == 0) &&
+ (itask->itask_xfer_done_timestamp == 0)) {
+ *done_tstamp = *start_tstamp;
+ } else {
+ *done_tstamp = MAX(*done_tstamp,
+ itask->itask_xfer_done_timestamp);
+ }
+
+ return (*done_tstamp - *start_tstamp);
+}
+
+static void
stmf_update_kstat_lu_io(scsi_task_t *task, stmf_data_buf_t *dbuf)
{
stmf_i_lu_t *ilu;
@@ -3443,6 +3538,8 @@ stmf_irport_create(scsi_devid_desc_t *rport_devid)
sizeof (scsi_devid_desc_t) + rport_devid->ident_length - 1);
irport->irport_refcnt = 1;
mutex_init(&irport->irport_mutex, NULL, MUTEX_DEFAULT, NULL);
+ irport->irport_rdstart_timestamp = LLONG_MAX;
+ irport->irport_wrstart_timestamp = LLONG_MAX;
return (irport);
}
@@ -3450,12 +3547,132 @@ stmf_irport_create(scsi_devid_desc_t *rport_devid)
static void
stmf_irport_destroy(stmf_i_remote_port_t *irport)
{
+ stmf_destroy_kstat_rport(irport);
id_free(stmf_state.stmf_irport_inst_space, irport->irport_instance);
mutex_destroy(&irport->irport_mutex);
kmem_free(irport, sizeof (*irport) + sizeof (scsi_devid_desc_t) +
irport->irport_id->ident_length - 1);
}
+static void
+stmf_create_kstat_rport(stmf_i_remote_port_t *irport)
+{
+ scsi_devid_desc_t *id = irport->irport_id;
+ char ks_nm[KSTAT_STRLEN];
+ stmf_kstat_rport_info_t *ks_info;
+ stmf_kstat_rport_estat_t *ks_estat;
+ char *ident = NULL;
+
+ ks_info = kmem_zalloc(sizeof (*ks_info), KM_NOSLEEP);
+ if (ks_info == NULL)
+ goto err_out;
+
+ (void) snprintf(ks_nm, KSTAT_STRLEN, "stmf_rport_%"PRIxPTR"",
+ (uintptr_t)irport);
+ irport->irport_kstat_info = kstat_create(STMF_MODULE_NAME, 0,
+ ks_nm, "misc", KSTAT_TYPE_NAMED,
+ STMF_KSTAT_RPORT_DATAMAX - STMF_RPORT_INFO_LIMIT,
+ KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
+ if (irport->irport_kstat_info == NULL) {
+ kmem_free(ks_info, sizeof (*ks_info));
+ goto err_out;
+ }
+
+ irport->irport_kstat_info->ks_data = ks_info;
+ irport->irport_kstat_info->ks_private = irport;
+ irport->irport_kstat_info->ks_update = stmf_kstat_rport_update;
+ ident = kmem_alloc(id->ident_length + 1, KM_NOSLEEP);
+ if (ident == NULL) {
+ kstat_delete(irport->irport_kstat_info);
+ irport->irport_kstat_info = NULL;
+ kmem_free(ks_info, sizeof (*ks_info));
+ goto err_out;
+ }
+
+ (void) memcpy(ident, id->ident, id->ident_length);
+ ident[id->ident_length] = '\0';
+ kstat_named_init(&ks_info->i_rport_name, "name", KSTAT_DATA_STRING);
+ kstat_named_init(&ks_info->i_protocol, "protocol",
+ KSTAT_DATA_STRING);
+
+ kstat_named_setstr(&ks_info->i_rport_name, ident);
+ kstat_named_setstr(&ks_info->i_protocol,
+ protocol_ident[irport->irport_id->protocol_id]);
+ irport->irport_kstat_info->ks_lock = &irport->irport_mutex;
+ irport->irport_info_dirty = B_TRUE;
+ kstat_install(irport->irport_kstat_info);
+
+ (void) snprintf(ks_nm, KSTAT_STRLEN, "stmf_rport_io_%"PRIxPTR"",
+ (uintptr_t)irport);
+ irport->irport_kstat_io = kstat_create(STMF_MODULE_NAME, 0, ks_nm,
+ "io", KSTAT_TYPE_IO, 1, 0);
+ if (irport->irport_kstat_io == NULL)
+ goto err_out;
+
+ irport->irport_kstat_io->ks_lock = &irport->irport_mutex;
+ kstat_install(irport->irport_kstat_io);
+
+ (void) snprintf(ks_nm, KSTAT_STRLEN, "stmf_rport_st_%"PRIxPTR"",
+ (uintptr_t)irport);
+ irport->irport_kstat_estat = kstat_create(STMF_MODULE_NAME, 0, ks_nm,
+ "misc", KSTAT_TYPE_NAMED,
+ sizeof (*ks_estat) / sizeof (kstat_named_t), 0);
+ if (irport->irport_kstat_estat == NULL)
+ goto err_out;
+
+ ks_estat = (stmf_kstat_rport_estat_t *)KSTAT_NAMED_PTR(
+ irport->irport_kstat_estat);
+ kstat_named_init(&ks_estat->i_rport_read_latency,
+ "rlatency", KSTAT_DATA_UINT64);
+ kstat_named_init(&ks_estat->i_rport_write_latency,
+ "wlatency", KSTAT_DATA_UINT64);
+ kstat_named_init(&ks_estat->i_nread_tasks, "rntasks",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ks_estat->i_nwrite_tasks, "wntasks",
+ KSTAT_DATA_UINT64);
+ irport->irport_kstat_estat->ks_lock = &irport->irport_mutex;
+ kstat_install(irport->irport_kstat_estat);
+
+ return;
+
+err_out:
+ (void) memcpy(ks_nm, id->ident, MAX(KSTAT_STRLEN - 1,
+ id->ident_length));
+ ks_nm[id->ident_length] = '\0';
+ cmn_err(CE_WARN, "STMF: remote port kstat creation failed: %s", ks_nm);
+}
+
+static void
+stmf_destroy_kstat_rport(stmf_i_remote_port_t *irport)
+{
+ if (irport->irport_kstat_io != NULL) {
+ kstat_delete(irport->irport_kstat_io);
+ }
+ if (irport->irport_kstat_estat != NULL) {
+ kstat_delete(irport->irport_kstat_estat);
+ }
+ if (irport->irport_kstat_info != NULL) {
+ stmf_kstat_rport_info_t *ks_info;
+ kstat_named_t *knp;
+ void *ptr;
+ int i;
+
+ ks_info = (stmf_kstat_rport_info_t *)KSTAT_NAMED_PTR(
+ irport->irport_kstat_info);
+ kstat_delete(irport->irport_kstat_info);
+ ptr = KSTAT_NAMED_STR_PTR(&ks_info->i_rport_name);
+ kmem_free(ptr, KSTAT_NAMED_STR_BUFLEN(&ks_info->i_rport_name));
+
+ for (i = 0, knp = ks_info->i_rport_uinfo;
+ i < STMF_RPORT_INFO_LIMIT; i++, knp++) {
+ ptr = KSTAT_NAMED_STR_PTR(knp);
+ if (ptr != NULL)
+ kmem_free(ptr, KSTAT_NAMED_STR_BUFLEN(knp));
+ }
+ kmem_free(ks_info, sizeof (*ks_info));
+ }
+}
+
static stmf_i_remote_port_t *
stmf_irport_register(scsi_devid_desc_t *rport_devid)
{
@@ -3478,6 +3695,7 @@ stmf_irport_register(scsi_devid_desc_t *rport_devid)
return (NULL);
}
+ stmf_create_kstat_rport(irport);
avl_add(&stmf_state.stmf_irportlist, irport);
mutex_exit(&stmf_state.stmf_lock);
@@ -3601,6 +3819,109 @@ stmf_register_scsi_session(stmf_local_port_t *lport, stmf_scsi_session_t *ss)
return (STMF_SUCCESS);
}
+stmf_status_t
+stmf_add_rport_info(stmf_scsi_session_t *ss,
+ const char *prop_name, const char *prop_value)
+{
+ stmf_i_scsi_session_t *iss = ss->ss_stmf_private;
+ stmf_i_remote_port_t *irport = iss->iss_irport;
+ kstat_named_t *knp;
+ char *s;
+ int i;
+
+ s = strdup(prop_value);
+
+ mutex_enter(irport->irport_kstat_info->ks_lock);
+ /* Make sure the caller doesn't try to add already existing property */
+ knp = KSTAT_NAMED_PTR(irport->irport_kstat_info);
+ for (i = 0; i < STMF_KSTAT_RPORT_DATAMAX; i++, knp++) {
+ if (KSTAT_NAMED_STR_PTR(knp) == NULL)
+ break;
+
+ ASSERT(strcmp(knp->name, prop_name) != 0);
+ }
+
+ if (i == STMF_KSTAT_RPORT_DATAMAX) {
+ mutex_exit(irport->irport_kstat_info->ks_lock);
+ kmem_free(s, strlen(s) + 1);
+ return (STMF_FAILURE);
+ }
+
+ irport->irport_info_dirty = B_TRUE;
+ kstat_named_init(knp, prop_name, KSTAT_DATA_STRING);
+ kstat_named_setstr(knp, s);
+ mutex_exit(irport->irport_kstat_info->ks_lock);
+
+ return (STMF_SUCCESS);
+}
+
+void
+stmf_remove_rport_info(stmf_scsi_session_t *ss,
+ const char *prop_name)
+{
+ stmf_i_scsi_session_t *iss = ss->ss_stmf_private;
+ stmf_i_remote_port_t *irport = iss->iss_irport;
+ kstat_named_t *knp;
+ char *s;
+ int i;
+ uint32_t len;
+
+ mutex_enter(irport->irport_kstat_info->ks_lock);
+ knp = KSTAT_NAMED_PTR(irport->irport_kstat_info);
+ for (i = 0; i < STMF_KSTAT_RPORT_DATAMAX; i++, knp++) {
+ if ((knp->name != NULL) && (strcmp(knp->name, prop_name) == 0))
+ break;
+ }
+
+ if (i == STMF_KSTAT_RPORT_DATAMAX) {
+ mutex_exit(irport->irport_kstat_info->ks_lock);
+ return;
+ }
+
+ s = KSTAT_NAMED_STR_PTR(knp);
+ len = KSTAT_NAMED_STR_BUFLEN(knp);
+
+ for (; i < STMF_KSTAT_RPORT_DATAMAX - 1; i++, knp++) {
+ kstat_named_init(knp, knp[1].name, KSTAT_DATA_STRING);
+ kstat_named_setstr(knp, KSTAT_NAMED_STR_PTR(&knp[1]));
+ }
+ kstat_named_init(knp, "", KSTAT_DATA_STRING);
+
+ irport->irport_info_dirty = B_TRUE;
+ mutex_exit(irport->irport_kstat_info->ks_lock);
+ kmem_free(s, len);
+}
+
+static int
+stmf_kstat_rport_update(kstat_t *ksp, int rw)
+{
+ stmf_i_remote_port_t *irport = ksp->ks_private;
+ kstat_named_t *knp;
+ uint_t ndata = 0;
+ size_t dsize = 0;
+ int i;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ if (!irport->irport_info_dirty)
+ return (0);
+
+ knp = KSTAT_NAMED_PTR(ksp);
+ for (i = 0; i < STMF_KSTAT_RPORT_DATAMAX; i++, knp++) {
+ if (KSTAT_NAMED_STR_PTR(knp) == NULL)
+ break;
+ ndata++;
+ dsize += KSTAT_NAMED_STR_BUFLEN(knp);
+ }
+
+ ksp->ks_ndata = ndata;
+ ksp->ks_data_size = sizeof (kstat_named_t) * ndata + dsize;
+ irport->irport_info_dirty = B_FALSE;
+
+ return (0);
+}
+
void
stmf_deregister_scsi_session(stmf_local_port_t *lport, stmf_scsi_session_t *ss)
{
@@ -3610,6 +3931,10 @@ stmf_deregister_scsi_session(stmf_local_port_t *lport, stmf_scsi_session_t *ss)
int found = 0;
stmf_ic_msg_t *ic_session_dereg;
stmf_status_t ic_ret = STMF_FAILURE;
+ stmf_lun_map_t *sm;
+ stmf_i_lu_t *ilu;
+ uint16_t n;
+ stmf_lun_map_ent_t *ent;
DTRACE_PROBE2(session__offline, stmf_local_port_t *, lport,
stmf_scsi_session_t *, ss);
@@ -3659,15 +3984,46 @@ try_dereg_ss_again:
ilport->ilport_nsessions--;
stmf_irport_deregister(iss->iss_irport);
- (void) stmf_session_destroy_lun_map(ilport, iss);
+ /*
+ * to avoid conflict with updating session's map,
+ * which only grab stmf_lock
+ */
+ sm = iss->iss_sm;
+ iss->iss_sm = NULL;
+ iss->iss_hg = NULL;
+
rw_exit(&ilport->ilport_lock);
- mutex_exit(&stmf_state.stmf_lock);
+
+ if (sm->lm_nentries) {
+ for (n = 0; n < sm->lm_nentries; n++) {
+ if ((ent = (stmf_lun_map_ent_t *)sm->lm_plus[n])
+ != NULL) {
+ if (ent->ent_itl_datap) {
+ stmf_do_itl_dereg(ent->ent_lu,
+ ent->ent_itl_datap,
+ STMF_ITL_REASON_IT_NEXUS_LOSS);
+ }
+ ilu = (stmf_i_lu_t *)
+ ent->ent_lu->lu_stmf_private;
+ atomic_dec_32(&ilu->ilu_ref_cnt);
+ kmem_free(sm->lm_plus[n],
+ sizeof (stmf_lun_map_ent_t));
+ }
+ }
+ kmem_free(sm->lm_plus,
+ sizeof (stmf_lun_map_ent_t *) * sm->lm_nentries);
+ }
+ kmem_free(sm, sizeof (*sm));
if (iss->iss_flags & ISS_NULL_TPTID) {
stmf_remote_port_free(ss->ss_rport);
}
+
+ mutex_exit(&stmf_state.stmf_lock);
}
+
+
stmf_i_scsi_session_t *
stmf_session_id_to_issptr(uint64_t session_id, int stay_locked)
{
@@ -3863,54 +4219,6 @@ dai_scan_done:
return (STMF_SUCCESS);
}
-stmf_status_t
-stmf_get_itl_handle(stmf_lu_t *lu, uint8_t *lun, stmf_scsi_session_t *ss,
- uint64_t session_id, void **itl_handle_retp)
-{
- stmf_i_scsi_session_t *iss;
- stmf_lun_map_ent_t *ent;
- stmf_lun_map_t *lm;
- stmf_status_t ret;
- int i;
- uint16_t n;
-
- if (ss == NULL) {
- iss = stmf_session_id_to_issptr(session_id, 1);
- if (iss == NULL)
- return (STMF_NOT_FOUND);
- } else {
- iss = (stmf_i_scsi_session_t *)ss->ss_stmf_private;
- rw_enter(iss->iss_lockp, RW_WRITER);
- }
-
- ent = NULL;
- if (lun == NULL) {
- lm = iss->iss_sm;
- for (i = 0; i < lm->lm_nentries; i++) {
- if (lm->lm_plus[i] == NULL)
- continue;
- ent = (stmf_lun_map_ent_t *)lm->lm_plus[i];
- if (ent->ent_lu == lu)
- break;
- }
- } else {
- n = ((uint16_t)lun[1] | (((uint16_t)(lun[0] & 0x3F)) << 8));
- ent = (stmf_lun_map_ent_t *)
- stmf_get_ent_from_map(iss->iss_sm, n);
- if (lu && (ent->ent_lu != lu))
- ent = NULL;
- }
- if (ent && ent->ent_itl_datap) {
- *itl_handle_retp = ent->ent_itl_datap->itl_handle;
- ret = STMF_SUCCESS;
- } else {
- ret = STMF_NOT_FOUND;
- }
-
- rw_exit(iss->iss_lockp);
- return (ret);
-}
-
stmf_data_buf_t *
stmf_alloc_dbuf(scsi_task_t *task, uint32_t size, uint32_t *pminsize,
uint32_t flags)
@@ -4044,13 +4352,29 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss,
} else {
lu = lun_map_ent->ent_lu;
}
+
ilu = lu->lu_stmf_private;
if (ilu->ilu_flags & ILU_RESET_ACTIVE) {
rw_exit(iss->iss_lockp);
return (NULL);
}
- ASSERT(lu == dlun0 || (ilu->ilu_state != STMF_STATE_OFFLINING &&
- ilu->ilu_state != STMF_STATE_OFFLINE));
+
+ /*
+ * if the LUN is being offlined or is offline then only command
+ * that are to query the LUN are allowed. These are handled in
+ * stmf via the dlun0 vector. It is possible that a race condition
+ * will cause other commands to arrive while the lun is in the
+ * process of being offlined. Check for those and just let the
+ * protocol stack handle the error.
+ */
+ if ((ilu->ilu_state == STMF_STATE_OFFLINING) ||
+ (ilu->ilu_state == STMF_STATE_OFFLINE)) {
+ if (lu != dlun0) {
+ rw_exit(iss->iss_lockp);
+ return (NULL);
+ }
+ }
+
do {
if (ilu->ilu_free_tasks == NULL) {
new_task = 1;
@@ -4098,15 +4422,6 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss,
return (NULL);
}
task->task_lu = lu;
- l = task->task_lun_no;
- l[0] = lun[0];
- l[1] = lun[1];
- l[2] = lun[2];
- l[3] = lun[3];
- l[4] = lun[4];
- l[5] = lun[5];
- l[6] = lun[6];
- l[7] = lun[7];
task->task_cdb = (uint8_t *)task->task_port_private;
if ((ulong_t)(task->task_cdb) & 7ul) {
task->task_cdb = (uint8_t *)(((ulong_t)
@@ -4115,7 +4430,25 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss,
itask = (stmf_i_scsi_task_t *)task->task_stmf_private;
itask->itask_cdb_buf_size = cdb_length;
mutex_init(&itask->itask_audit_mutex, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&itask->itask_mutex, NULL, MUTEX_DRIVER, NULL);
}
+
+ /*
+ * Since a LUN can be mapped as different LUN ids to different initiator
+ * groups, we need to set LUN id for a new task and reset LUN id for
+ * a reused task.
+ */
+ l = task->task_lun_no;
+ l[0] = lun[0];
+ l[1] = lun[1];
+ l[2] = lun[2];
+ l[3] = lun[3];
+ l[4] = lun[4];
+ l[5] = lun[5];
+ l[6] = lun[6];
+ l[7] = lun[7];
+
+ mutex_enter(&itask->itask_mutex);
task->task_session = ss;
task->task_lport = lport;
task->task_cdb_length = cdb_length_in;
@@ -4125,6 +4458,9 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss,
itask->itask_lport_read_time = itask->itask_lport_write_time = 0;
itask->itask_read_xfer = itask->itask_write_xfer = 0;
itask->itask_audit_index = 0;
+ bzero(&itask->itask_audit_records[0],
+ sizeof (stmf_task_audit_rec_t) * ITASK_TASK_AUDIT_DEPTH);
+ mutex_exit(&itask->itask_mutex);
if (new_task) {
if (lu->lu_task_alloc(task) != STMF_SUCCESS) {
@@ -4165,6 +4501,7 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss,
return (task);
}
+/* ARGSUSED */
static void
stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss)
{
@@ -4173,8 +4510,19 @@ stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss)
stmf_i_lu_t *ilu = (stmf_i_lu_t *)task->task_lu->lu_stmf_private;
ASSERT(rw_lock_held(iss->iss_lockp));
+ ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0);
+ ASSERT((itask->itask_flags & ITASK_IN_WORKER_QUEUE) == 0);
+ ASSERT((itask->itask_flags & ITASK_IN_TRANSITION) == 0);
+ ASSERT((itask->itask_flags & ITASK_KNOWN_TO_LU) == 0);
+ ASSERT(mutex_owned(&itask->itask_mutex));
+
itask->itask_flags = ITASK_IN_FREE_LIST;
+ itask->itask_ncmds = 0;
itask->itask_proxy_msg_id = 0;
+ atomic_dec_32(itask->itask_ilu_task_cntr);
+ itask->itask_worker_next = NULL;
+ mutex_exit(&itask->itask_mutex);
+
mutex_enter(&ilu->ilu_task_lock);
itask->itask_lu_free_next = ilu->ilu_free_tasks;
ilu->ilu_free_tasks = itask;
@@ -4182,7 +4530,6 @@ stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss)
if (ilu->ilu_ntasks == ilu->ilu_ntasks_free)
cv_signal(&ilu->ilu_offline_pending_cv);
mutex_exit(&ilu->ilu_task_lock);
- atomic_dec_32(itask->itask_ilu_task_cntr);
}
void
@@ -4259,7 +4606,40 @@ stmf_check_freetask()
}
}
-void
+/*
+ * Since this method is looking to find tasks that are stuck, lost, or senile
+ * it should be more willing to give up scaning during this time period. This
+ * is why mutex_tryenter is now used instead of the standard mutex_enter.
+ * There has been at least one case were the following occurred.
+ *
+ * 1) The iscsit_deferred() method is trying to register a session and
+ * needs the global lock which is held.
+ * 2) Another thread which holds the global lock is trying to deregister a
+ * session and needs the session lock.
+ * 3) A third thread is allocating a stmf task that has grabbed the session
+ * lock and is trying to grab the lun task lock.
+ * 4) There's a timeout thread that has the lun task lock and is trying to grab
+ * a specific task lock.
+ * 5) The thread that has the task lock is waiting for the ref count to go to
+ * zero.
+ * 6) There's a task that would drop the count to zero, but it's in the task
+ * queue waiting to run and is stuck because of #1 is currently block.
+ *
+ * This method is number 4 in the above chain of events. Had this code
+ * originally used mutex_tryenter the chain would have been broken and the
+ * system wouldn't have hung. So, now this method uses mutex_tryenter and
+ * you know why it does so.
+ */
+/* ---- Only one thread calls stmf_do_ilu_timeouts so no lock required ---- */
+typedef struct stmf_bailout_cnt {
+ int no_ilu_lock;
+ int no_task_lock;
+ int tasks_checked;
+} stmf_bailout_cnt_t;
+
+stmf_bailout_cnt_t stmf_bailout;
+
+static void
stmf_do_ilu_timeouts(stmf_i_lu_t *ilu)
{
clock_t l = ddi_get_lbolt();
@@ -4268,11 +4648,21 @@ stmf_do_ilu_timeouts(stmf_i_lu_t *ilu)
scsi_task_t *task;
uint32_t to;
- mutex_enter(&ilu->ilu_task_lock);
+ if (mutex_tryenter(&ilu->ilu_task_lock) == 0) {
+ stmf_bailout.no_ilu_lock++;
+ return;
+ }
+
for (itask = ilu->ilu_tasks; itask != NULL;
itask = itask->itask_lu_next) {
+ if (mutex_tryenter(&itask->itask_mutex) == 0) {
+ stmf_bailout.no_task_lock++;
+ continue;
+ }
+ stmf_bailout.tasks_checked++;
if (itask->itask_flags & (ITASK_IN_FREE_LIST |
ITASK_BEING_ABORTED)) {
+ mutex_exit(&itask->itask_mutex);
continue;
}
task = itask->itask_task;
@@ -4280,8 +4670,12 @@ stmf_do_ilu_timeouts(stmf_i_lu_t *ilu)
to = stmf_default_task_timeout;
else
to = task->task_timeout;
- if ((itask->itask_start_time + (to * ps)) > l)
+
+ if ((itask->itask_start_time + (to * ps)) > l) {
+ mutex_exit(&itask->itask_mutex);
continue;
+ }
+ mutex_exit(&itask->itask_mutex);
stmf_abort(STMF_QUEUE_TASK_ABORT, task,
STMF_TIMEOUT, NULL);
}
@@ -4336,11 +4730,14 @@ stmf_task_lu_killall(stmf_lu_t *lu, scsi_task_t *tm_task, stmf_status_t s)
stmf_i_scsi_task_t *itask;
mutex_enter(&ilu->ilu_task_lock);
-
for (itask = ilu->ilu_tasks; itask != NULL;
itask = itask->itask_lu_next) {
- if (itask->itask_flags & ITASK_IN_FREE_LIST)
+ mutex_enter(&itask->itask_mutex);
+ if (itask->itask_flags & ITASK_IN_FREE_LIST) {
+ mutex_exit(&itask->itask_mutex);
continue;
+ }
+ mutex_exit(&itask->itask_mutex);
if (itask->itask_task == tm_task)
continue;
stmf_abort(STMF_QUEUE_TASK_ABORT, itask->itask_task, s, NULL);
@@ -4396,9 +4793,12 @@ stmf_task_free(scsi_task_t *task)
task->task_stmf_private;
stmf_i_scsi_session_t *iss = (stmf_i_scsi_session_t *)
task->task_session->ss_stmf_private;
+ stmf_lu_t *lu = task->task_lu;
stmf_task_audit(itask, TE_TASK_FREE, CMD_OR_IOF_NA, NULL);
-
+ ASSERT(mutex_owned(&itask->itask_mutex));
+ if ((lu != NULL) && (lu->lu_task_done != NULL))
+ lu->lu_task_done(task);
stmf_free_task_bufs(itask, lport);
stmf_itl_task_done(itask);
DTRACE_PROBE2(stmf__task__end, scsi_task_t *, task,
@@ -4412,7 +4812,14 @@ stmf_task_free(scsi_task_t *task)
}
}
+ /*
+ * To prevent a deadlock condition must release the itask_mutex,
+ * grab a reader lock on iss_lockp and then reacquire the itask_mutex.
+ */
+ mutex_exit(&itask->itask_mutex);
rw_enter(iss->iss_lockp, RW_READER);
+ mutex_enter(&itask->itask_mutex);
+
lport->lport_task_free(task);
if (itask->itask_worker) {
atomic_dec_32(&stmf_cur_ntasks);
@@ -4433,9 +4840,9 @@ stmf_post_task(scsi_task_t *task, stmf_data_buf_t *dbuf)
task->task_stmf_private;
stmf_i_lu_t *ilu = (stmf_i_lu_t *)task->task_lu->lu_stmf_private;
int nv;
- uint32_t old, new;
+ uint32_t new;
uint32_t ct;
- stmf_worker_t *w, *w1;
+ stmf_worker_t *w;
uint8_t tm;
if (task->task_max_nbufs > 4)
@@ -4445,44 +4852,28 @@ stmf_post_task(scsi_task_t *task, stmf_data_buf_t *dbuf)
ct = atomic_inc_32_nv(&stmf_cur_ntasks);
/* Select the next worker using round robin */
- nv = (int)atomic_inc_32_nv((uint32_t *)&stmf_worker_sel_counter);
- if (nv >= stmf_nworkers_accepting_cmds) {
- int s = nv;
- do {
- nv -= stmf_nworkers_accepting_cmds;
- } while (nv >= stmf_nworkers_accepting_cmds);
- if (nv < 0)
- nv = 0;
- /* Its ok if this cas fails */
- (void) atomic_cas_32((uint32_t *)&stmf_worker_sel_counter,
- s, nv);
- }
- w = &stmf_workers[nv];
+ mutex_enter(&stmf_worker_sel_mx);
+ stmf_worker_sel_counter++;
+ if (stmf_worker_sel_counter >= stmf_nworkers)
+ stmf_worker_sel_counter = 0;
+ nv = stmf_worker_sel_counter;
- /*
- * A worker can be pinned by interrupt. So select the next one
- * if it has lower load.
- */
- if ((nv + 1) >= stmf_nworkers_accepting_cmds) {
- w1 = stmf_workers;
- } else {
- w1 = &stmf_workers[nv + 1];
+ /* if the selected worker is not idle then bump to the next worker */
+ if (stmf_workers[nv].worker_queue_depth > 0) {
+ stmf_worker_sel_counter++;
+ if (stmf_worker_sel_counter >= stmf_nworkers)
+ stmf_worker_sel_counter = 0;
+ nv = stmf_worker_sel_counter;
}
- if (w1->worker_queue_depth < w->worker_queue_depth)
- w = w1;
+ mutex_exit(&stmf_worker_sel_mx);
+ w = &stmf_workers[nv];
+
+ mutex_enter(&itask->itask_mutex);
mutex_enter(&w->worker_lock);
- if (((w->worker_flags & STMF_WORKER_STARTED) == 0) ||
- (w->worker_flags & STMF_WORKER_TERMINATE)) {
- /*
- * Maybe we are in the middle of a change. Just go to
- * the 1st worker.
- */
- mutex_exit(&w->worker_lock);
- w = stmf_workers;
- mutex_enter(&w->worker_lock);
- }
+
itask->itask_worker = w;
+
/*
* Track max system load inside the worker as we already have the
* worker lock (no point implementing another lock). The service
@@ -4492,39 +4883,33 @@ stmf_post_task(scsi_task_t *task, stmf_data_buf_t *dbuf)
if (w->worker_max_sys_qdepth_pu < ct)
w->worker_max_sys_qdepth_pu = ct;
- do {
- old = new = itask->itask_flags;
- new |= ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE;
- if (task->task_mgmt_function) {
- tm = task->task_mgmt_function;
- if ((tm == TM_TARGET_RESET) ||
- (tm == TM_TARGET_COLD_RESET) ||
- (tm == TM_TARGET_WARM_RESET)) {
- new |= ITASK_DEFAULT_HANDLING;
- }
- } else if (task->task_cdb[0] == SCMD_REPORT_LUNS) {
+ new = itask->itask_flags;
+ new |= ITASK_KNOWN_TO_TGT_PORT;
+ if (task->task_mgmt_function) {
+ tm = task->task_mgmt_function;
+ if ((tm == TM_TARGET_RESET) ||
+ (tm == TM_TARGET_COLD_RESET) ||
+ (tm == TM_TARGET_WARM_RESET)) {
new |= ITASK_DEFAULT_HANDLING;
}
- new &= ~ITASK_IN_TRANSITION;
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ } else if (task->task_cdb[0] == SCMD_REPORT_LUNS) {
+ new |= ITASK_DEFAULT_HANDLING;
+ }
+ new &= ~ITASK_IN_TRANSITION;
+ itask->itask_flags = new;
stmf_itl_task_start(itask);
- itask->itask_worker_next = NULL;
- if (w->worker_task_tail) {
- w->worker_task_tail->itask_worker_next = itask;
- } else {
- w->worker_task_head = itask;
- }
- w->worker_task_tail = itask;
- if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
- w->worker_max_qdepth_pu = w->worker_queue_depth;
- }
- /* Measure task waitq time */
- itask->itask_waitq_enter_timestamp = gethrtime();
- atomic_inc_32(&w->worker_ref_count);
itask->itask_cmd_stack[0] = ITASK_CMD_NEW_TASK;
itask->itask_ncmds = 1;
+
+ if ((task->task_flags & TF_INITIAL_BURST) &&
+ !(curthread->t_flag & T_INTR_THREAD)) {
+ stmf_update_kstat_lu_io(task, dbuf);
+ stmf_update_kstat_lport_io(task, dbuf);
+ stmf_update_kstat_rport_io(task, dbuf);
+ }
+
stmf_task_audit(itask, TE_TASK_START, CMD_OR_IOF_NA, dbuf);
if (dbuf) {
itask->itask_allocated_buf_map = 1;
@@ -4535,13 +4920,10 @@ stmf_post_task(scsi_task_t *task, stmf_data_buf_t *dbuf)
itask->itask_dbufs[0] = NULL;
}
- if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) {
- w->worker_signal_timestamp = gethrtime();
- DTRACE_PROBE2(worker__signal, stmf_worker_t *, w,
- scsi_task_t *, task);
- cv_signal(&w->worker_cv);
- }
+ STMF_ENQUEUE_ITASK(w, itask);
+
mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
/*
* This can only happen if during stmf_task_alloc(), ILU_RESET_ACTIVE
@@ -4597,26 +4979,30 @@ stmf_xfer_data(scsi_task_t *task, stmf_data_buf_t *dbuf, uint32_t ioflags)
stmf_task_audit(itask, TE_XFER_START, ioflags, dbuf);
+ mutex_enter(&itask->itask_mutex);
if (ioflags & STMF_IOF_LU_DONE) {
- uint32_t new, old;
- do {
- new = old = itask->itask_flags;
- if (new & ITASK_BEING_ABORTED)
- return (STMF_ABORTED);
- new &= ~ITASK_KNOWN_TO_LU;
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ if (itask->itask_flags & ITASK_BEING_ABORTED) {
+ mutex_exit(&itask->itask_mutex);
+ return (STMF_ABORTED);
+ }
+ itask->itask_flags &= ~ITASK_KNOWN_TO_LU;
}
- if (itask->itask_flags & ITASK_BEING_ABORTED)
+ if ((itask->itask_flags & ITASK_BEING_ABORTED) != 0) {
+ mutex_exit(&itask->itask_mutex);
return (STMF_ABORTED);
+ }
+ mutex_exit(&itask->itask_mutex);
+
#ifdef DEBUG
if (!(ioflags & STMF_IOF_STATS_ONLY) && stmf_drop_buf_counter > 0) {
- if (atomic_dec_32_nv(&stmf_drop_buf_counter) == 1)
+ if (atomic_dec_32_nv((uint32_t *)&stmf_drop_buf_counter) == 1)
return (STMF_SUCCESS);
}
#endif
stmf_update_kstat_lu_io(task, dbuf);
stmf_update_kstat_lport_io(task, dbuf);
+ stmf_update_kstat_rport_io(task, dbuf);
stmf_lport_xfer_start(itask, dbuf);
if (ioflags & STMF_IOF_STATS_ONLY) {
stmf_lport_xfer_done(itask, dbuf);
@@ -4646,7 +5032,7 @@ stmf_data_xfer_done(scsi_task_t *task, stmf_data_buf_t *dbuf, uint32_t iof)
(stmf_i_scsi_task_t *)task->task_stmf_private;
stmf_i_local_port_t *ilport;
stmf_worker_t *w = itask->itask_worker;
- uint32_t new, old;
+ uint32_t new;
uint8_t update_queue_flags, free_it, queue_it;
stmf_lport_xfer_done(itask, dbuf);
@@ -4667,74 +5053,67 @@ stmf_data_xfer_done(scsi_task_t *task, stmf_data_buf_t *dbuf, uint32_t iof)
return;
}
+ mutex_enter(&itask->itask_mutex);
mutex_enter(&w->worker_lock);
- do {
- new = old = itask->itask_flags;
- if (old & ITASK_BEING_ABORTED) {
- mutex_exit(&w->worker_lock);
- return;
- }
+ new = itask->itask_flags;
+ if (itask->itask_flags & ITASK_BEING_ABORTED) {
+ mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
+ return;
+ }
+ free_it = 0;
+ if (iof & STMF_IOF_LPORT_DONE) {
+ new &= ~ITASK_KNOWN_TO_TGT_PORT;
+ task->task_completion_status = dbuf->db_xfer_status;
+ free_it = 1;
+ }
+ /*
+ * If the task is known to LU then queue it. But if
+ * it is already queued (multiple completions) then
+ * just update the buffer information by grabbing the
+ * worker lock. If the task is not known to LU,
+ * completed/aborted, then see if we need to
+ * free this task.
+ */
+ if (itask->itask_flags & ITASK_KNOWN_TO_LU) {
free_it = 0;
- if (iof & STMF_IOF_LPORT_DONE) {
- new &= ~ITASK_KNOWN_TO_TGT_PORT;
- task->task_completion_status = dbuf->db_xfer_status;
- free_it = 1;
- }
- /*
- * If the task is known to LU then queue it. But if
- * it is already queued (multiple completions) then
- * just update the buffer information by grabbing the
- * worker lock. If the task is not known to LU,
- * completed/aborted, then see if we need to
- * free this task.
- */
- if (old & ITASK_KNOWN_TO_LU) {
- free_it = 0;
- update_queue_flags = 1;
- if (old & ITASK_IN_WORKER_QUEUE) {
- queue_it = 0;
- } else {
- queue_it = 1;
- new |= ITASK_IN_WORKER_QUEUE;
- }
- } else {
- update_queue_flags = 0;
+ update_queue_flags = 1;
+ if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) {
queue_it = 0;
+ } else {
+ queue_it = 1;
}
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ } else {
+ update_queue_flags = 0;
+ queue_it = 0;
+ }
+ itask->itask_flags = new;
if (update_queue_flags) {
uint8_t cmd = (dbuf->db_handle << 5) | ITASK_CMD_DATA_XFER_DONE;
+ ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0);
ASSERT(itask->itask_ncmds < ITASK_MAX_NCMDS);
+
itask->itask_cmd_stack[itask->itask_ncmds++] = cmd;
if (queue_it) {
- itask->itask_worker_next = NULL;
- if (w->worker_task_tail) {
- w->worker_task_tail->itask_worker_next = itask;
- } else {
- w->worker_task_head = itask;
- }
- w->worker_task_tail = itask;
- /* Measure task waitq time */
- itask->itask_waitq_enter_timestamp = gethrtime();
- if (++(w->worker_queue_depth) >
- w->worker_max_qdepth_pu) {
- w->worker_max_qdepth_pu = w->worker_queue_depth;
- }
- if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
- cv_signal(&w->worker_cv);
+ STMF_ENQUEUE_ITASK(w, itask);
}
+ mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
+ return;
}
- mutex_exit(&w->worker_lock);
+ mutex_exit(&w->worker_lock);
if (free_it) {
if ((itask->itask_flags & (ITASK_KNOWN_TO_LU |
ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE |
ITASK_BEING_ABORTED)) == 0) {
stmf_task_free(task);
+ return;
}
}
+ mutex_exit(&itask->itask_mutex);
}
stmf_status_t
@@ -4747,22 +5126,25 @@ stmf_send_scsi_status(scsi_task_t *task, uint32_t ioflags)
stmf_task_audit(itask, TE_SEND_STATUS, ioflags, NULL);
+ mutex_enter(&itask->itask_mutex);
if (ioflags & STMF_IOF_LU_DONE) {
- uint32_t new, old;
- do {
- new = old = itask->itask_flags;
- if (new & ITASK_BEING_ABORTED)
- return (STMF_ABORTED);
- new &= ~ITASK_KNOWN_TO_LU;
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ if (itask->itask_flags & ITASK_BEING_ABORTED) {
+ mutex_exit(&itask->itask_mutex);
+ return (STMF_ABORTED);
+ }
+ itask->itask_flags &= ~ITASK_KNOWN_TO_LU;
}
if (!(itask->itask_flags & ITASK_KNOWN_TO_TGT_PORT)) {
+ mutex_exit(&itask->itask_mutex);
return (STMF_SUCCESS);
}
- if (itask->itask_flags & ITASK_BEING_ABORTED)
+ if (itask->itask_flags & ITASK_BEING_ABORTED) {
+ mutex_exit(&itask->itask_mutex);
return (STMF_ABORTED);
+ }
+ mutex_exit(&itask->itask_mutex);
if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
task->task_status_ctrl = 0;
@@ -4790,66 +5172,57 @@ stmf_send_status_done(scsi_task_t *task, stmf_status_t s, uint32_t iof)
stmf_i_scsi_task_t *itask =
(stmf_i_scsi_task_t *)task->task_stmf_private;
stmf_worker_t *w = itask->itask_worker;
- uint32_t new, old;
+ uint32_t new;
uint8_t free_it, queue_it;
stmf_task_audit(itask, TE_SEND_STATUS_DONE, iof, NULL);
+ mutex_enter(&itask->itask_mutex);
mutex_enter(&w->worker_lock);
- do {
- new = old = itask->itask_flags;
- if (old & ITASK_BEING_ABORTED) {
- mutex_exit(&w->worker_lock);
- return;
- }
+ new = itask->itask_flags;
+ if (itask->itask_flags & ITASK_BEING_ABORTED) {
+ mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
+ return;
+ }
+ free_it = 0;
+ if (iof & STMF_IOF_LPORT_DONE) {
+ new &= ~ITASK_KNOWN_TO_TGT_PORT;
+ free_it = 1;
+ }
+ /*
+ * If the task is known to LU then queue it. But if
+ * it is already queued (multiple completions) then
+ * just update the buffer information by grabbing the
+ * worker lock. If the task is not known to LU,
+ * completed/aborted, then see if we need to
+ * free this task.
+ */
+ if (itask->itask_flags & ITASK_KNOWN_TO_LU) {
free_it = 0;
- if (iof & STMF_IOF_LPORT_DONE) {
- new &= ~ITASK_KNOWN_TO_TGT_PORT;
- free_it = 1;
- }
- /*
- * If the task is known to LU then queue it. But if
- * it is already queued (multiple completions) then
- * just update the buffer information by grabbing the
- * worker lock. If the task is not known to LU,
- * completed/aborted, then see if we need to
- * free this task.
- */
- if (old & ITASK_KNOWN_TO_LU) {
- free_it = 0;
- queue_it = 1;
- if (old & ITASK_IN_WORKER_QUEUE) {
- cmn_err(CE_PANIC, "status completion received"
- " when task is already in worker queue "
- " task = %p", (void *)task);
- }
- new |= ITASK_IN_WORKER_QUEUE;
- } else {
- queue_it = 0;
+ queue_it = 1;
+ if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) {
+ cmn_err(CE_PANIC, "status completion received"
+ " when task is already in worker queue "
+ " task = %p", (void *)task);
}
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ } else {
+ queue_it = 0;
+ }
+ itask->itask_flags = new;
task->task_completion_status = s;
-
if (queue_it) {
ASSERT(itask->itask_ncmds < ITASK_MAX_NCMDS);
itask->itask_cmd_stack[itask->itask_ncmds++] =
ITASK_CMD_STATUS_DONE;
- itask->itask_worker_next = NULL;
- if (w->worker_task_tail) {
- w->worker_task_tail->itask_worker_next = itask;
- } else {
- w->worker_task_head = itask;
- }
- w->worker_task_tail = itask;
- /* Measure task waitq time */
- itask->itask_waitq_enter_timestamp = gethrtime();
- if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
- w->worker_max_qdepth_pu = w->worker_queue_depth;
- }
- if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
- cv_signal(&w->worker_cv);
+
+ STMF_ENQUEUE_ITASK(w, itask);
+ mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
+ return;
}
+
mutex_exit(&w->worker_lock);
if (free_it) {
@@ -4857,12 +5230,14 @@ stmf_send_status_done(scsi_task_t *task, stmf_status_t s, uint32_t iof)
ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE |
ITASK_BEING_ABORTED)) == 0) {
stmf_task_free(task);
+ return;
} else {
cmn_err(CE_PANIC, "LU is done with the task but LPORT "
" is not done, itask %p itask_flags %x",
(void *)itask, itask->itask_flags);
}
}
+ mutex_exit(&itask->itask_mutex);
}
void
@@ -4871,33 +5246,32 @@ stmf_task_lu_done(scsi_task_t *task)
stmf_i_scsi_task_t *itask =
(stmf_i_scsi_task_t *)task->task_stmf_private;
stmf_worker_t *w = itask->itask_worker;
- uint32_t new, old;
+ mutex_enter(&itask->itask_mutex);
mutex_enter(&w->worker_lock);
- do {
- new = old = itask->itask_flags;
- if (old & ITASK_BEING_ABORTED) {
- mutex_exit(&w->worker_lock);
- return;
- }
- if (old & ITASK_IN_WORKER_QUEUE) {
- cmn_err(CE_PANIC, "task_lu_done received"
- " when task is in worker queue "
- " task = %p", (void *)task);
- }
- new &= ~ITASK_KNOWN_TO_LU;
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ if (itask->itask_flags & ITASK_BEING_ABORTED) {
+ mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
+ return;
+ }
+ if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) {
+ cmn_err(CE_PANIC, "task_lu_done received"
+ " when task is in worker queue "
+ " task = %p", (void *)task);
+ }
+ itask->itask_flags &= ~ITASK_KNOWN_TO_LU;
mutex_exit(&w->worker_lock);
-
if ((itask->itask_flags & (ITASK_KNOWN_TO_LU |
ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE |
ITASK_BEING_ABORTED)) == 0) {
stmf_task_free(task);
+ return;
} else {
cmn_err(CE_PANIC, "stmf_lu_done should be the last stage but "
" the task is still not done, task = %p", (void *)task);
}
+ mutex_exit(&itask->itask_mutex);
}
void
@@ -4906,54 +5280,41 @@ stmf_queue_task_for_abort(scsi_task_t *task, stmf_status_t s)
stmf_i_scsi_task_t *itask =
(stmf_i_scsi_task_t *)task->task_stmf_private;
stmf_worker_t *w;
- uint32_t old, new;
stmf_task_audit(itask, TE_TASK_ABORT, CMD_OR_IOF_NA, NULL);
- do {
- old = new = itask->itask_flags;
- if ((old & ITASK_BEING_ABORTED) ||
- ((old & (ITASK_KNOWN_TO_TGT_PORT |
- ITASK_KNOWN_TO_LU)) == 0)) {
- return;
- }
- new |= ITASK_BEING_ABORTED;
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ mutex_enter(&itask->itask_mutex);
+ if ((itask->itask_flags & ITASK_BEING_ABORTED) ||
+ ((itask->itask_flags & (ITASK_KNOWN_TO_TGT_PORT |
+ ITASK_KNOWN_TO_LU)) == 0)) {
+ mutex_exit(&itask->itask_mutex);
+ return;
+ }
+ itask->itask_flags |= ITASK_BEING_ABORTED;
task->task_completion_status = s;
- itask->itask_start_time = ddi_get_lbolt();
if (((w = itask->itask_worker) == NULL) ||
(itask->itask_flags & ITASK_IN_TRANSITION)) {
+ mutex_exit(&itask->itask_mutex);
return;
}
/* Queue it and get out */
- mutex_enter(&w->worker_lock);
if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) {
- mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
return;
}
- atomic_or_32(&itask->itask_flags, ITASK_IN_WORKER_QUEUE);
- itask->itask_worker_next = NULL;
- if (w->worker_task_tail) {
- w->worker_task_tail->itask_worker_next = itask;
- } else {
- w->worker_task_head = itask;
- }
- w->worker_task_tail = itask;
- if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
- w->worker_max_qdepth_pu = w->worker_queue_depth;
- }
- if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
- cv_signal(&w->worker_cv);
+ mutex_enter(&w->worker_lock);
+ STMF_ENQUEUE_ITASK(w, itask);
mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
}
void
stmf_abort(int abort_cmd, scsi_task_t *task, stmf_status_t s, void *arg)
{
stmf_i_scsi_task_t *itask = NULL;
- uint32_t old, new, f, rf;
+ uint32_t f, rf;
DTRACE_PROBE2(scsi__task__abort, scsi_task_t *, task,
stmf_status_t, s);
@@ -4976,17 +5337,24 @@ stmf_abort(int abort_cmd, scsi_task_t *task, stmf_status_t s, void *arg)
default:
return;
}
+
itask = (stmf_i_scsi_task_t *)task->task_stmf_private;
+ mutex_enter(&itask->itask_mutex);
f |= ITASK_BEING_ABORTED | rf;
- do {
- old = new = itask->itask_flags;
- if ((old & f) != f) {
- return;
- }
- new &= ~rf;
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+
+ if ((itask->itask_flags & f) != f) {
+ mutex_exit(&itask->itask_mutex);
+ return;
+ }
+ itask->itask_flags &= ~rf;
+ mutex_exit(&itask->itask_mutex);
+
}
+/*
+ * NOTE: stmf_abort_task_offline will release and then reacquire the
+ * itask_mutex. This is required to prevent a lock order violation.
+ */
void
stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof)
{
@@ -4995,7 +5363,7 @@ stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof)
unsigned long long st;
stmf_task_audit(itask, TE_TASK_LU_ABORTED, iof, NULL);
-
+ ASSERT(mutex_owned(&itask->itask_mutex));
st = s; /* gcc fix */
if ((s != STMF_ABORT_SUCCESS) && (s != STMF_NOT_FOUND)) {
(void) snprintf(info, sizeof (info),
@@ -5015,16 +5383,19 @@ stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof)
stmf_abort_task_offline(task, 1, info);
}
+/*
+ * NOTE: stmf_abort_task_offline will release and then reacquire the
+ * itask_mutex. This is required to prevent a lock order violation.
+ */
void
stmf_task_lport_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof)
{
char info[STMF_CHANGE_INFO_LEN];
stmf_i_scsi_task_t *itask = TASK_TO_ITASK(task);
unsigned long long st;
- uint32_t old, new;
+ ASSERT(mutex_owned(&itask->itask_mutex));
stmf_task_audit(itask, TE_TASK_LPORT_ABORTED, iof, NULL);
-
st = s;
if ((s != STMF_ABORT_SUCCESS) && (s != STMF_NOT_FOUND)) {
(void) snprintf(info, sizeof (info),
@@ -5038,18 +5409,24 @@ stmf_task_lport_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof)
/*
* LPORT abort successfully
*/
- do {
- old = new = itask->itask_flags;
- if (!(old & ITASK_KNOWN_TO_TGT_PORT))
- return;
- new &= ~ITASK_KNOWN_TO_TGT_PORT;
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ atomic_and_32(&itask->itask_flags, ~ITASK_KNOWN_TO_TGT_PORT);
return;
}
stmf_abort_task_offline(task, 0, info);
}
+void
+stmf_task_lport_aborted_unlocked(scsi_task_t *task, stmf_status_t s,
+ uint32_t iof)
+{
+ stmf_i_scsi_task_t *itask = TASK_TO_ITASK(task);
+
+ mutex_enter(&itask->itask_mutex);
+ stmf_task_lport_aborted(task, s, iof);
+ mutex_exit(&itask->itask_mutex);
+}
+
stmf_status_t
stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout)
{
@@ -5058,15 +5435,18 @@ stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout)
stmf_worker_t *w = itask->itask_worker;
int i;
+ mutex_enter(&itask->itask_mutex);
ASSERT(itask->itask_flags & ITASK_KNOWN_TO_LU);
mutex_enter(&w->worker_lock);
if (itask->itask_ncmds >= ITASK_MAX_NCMDS) {
mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
return (STMF_BUSY);
}
for (i = 0; i < itask->itask_ncmds; i++) {
if (itask->itask_cmd_stack[i] == ITASK_CMD_POLL_LU) {
mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
return (STMF_SUCCESS);
}
}
@@ -5080,21 +5460,10 @@ stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout)
itask->itask_poll_timeout = ddi_get_lbolt() + t;
}
if ((itask->itask_flags & ITASK_IN_WORKER_QUEUE) == 0) {
- itask->itask_worker_next = NULL;
- if (w->worker_task_tail) {
- w->worker_task_tail->itask_worker_next = itask;
- } else {
- w->worker_task_head = itask;
- }
- w->worker_task_tail = itask;
- if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
- w->worker_max_qdepth_pu = w->worker_queue_depth;
- }
- atomic_or_32(&itask->itask_flags, ITASK_IN_WORKER_QUEUE);
- if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
- cv_signal(&w->worker_cv);
+ STMF_ENQUEUE_ITASK(w, itask);
}
mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
return (STMF_SUCCESS);
}
@@ -5106,15 +5475,18 @@ stmf_task_poll_lport(scsi_task_t *task, uint32_t timeout)
stmf_worker_t *w = itask->itask_worker;
int i;
+ mutex_enter(&itask->itask_mutex);
ASSERT(itask->itask_flags & ITASK_KNOWN_TO_TGT_PORT);
mutex_enter(&w->worker_lock);
if (itask->itask_ncmds >= ITASK_MAX_NCMDS) {
mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
return (STMF_BUSY);
}
for (i = 0; i < itask->itask_ncmds; i++) {
if (itask->itask_cmd_stack[i] == ITASK_CMD_POLL_LPORT) {
mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
return (STMF_SUCCESS);
}
}
@@ -5128,20 +5500,10 @@ stmf_task_poll_lport(scsi_task_t *task, uint32_t timeout)
itask->itask_poll_timeout = ddi_get_lbolt() + t;
}
if ((itask->itask_flags & ITASK_IN_WORKER_QUEUE) == 0) {
- itask->itask_worker_next = NULL;
- if (w->worker_task_tail) {
- w->worker_task_tail->itask_worker_next = itask;
- } else {
- w->worker_task_head = itask;
- }
- w->worker_task_tail = itask;
- if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) {
- w->worker_max_qdepth_pu = w->worker_queue_depth;
- }
- if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
- cv_signal(&w->worker_cv);
+ STMF_ENQUEUE_ITASK(w, itask);
}
mutex_exit(&w->worker_lock);
+ mutex_exit(&itask->itask_mutex);
return (STMF_SUCCESS);
}
@@ -5152,22 +5514,22 @@ stmf_do_task_abort(scsi_task_t *task)
stmf_lu_t *lu;
stmf_local_port_t *lport;
unsigned long long ret;
- uint32_t old, new;
+ uint32_t new = 0;
uint8_t call_lu_abort, call_port_abort;
char info[STMF_CHANGE_INFO_LEN];
lu = task->task_lu;
lport = task->task_lport;
- do {
- old = new = itask->itask_flags;
- if ((old & (ITASK_KNOWN_TO_LU | ITASK_LU_ABORT_CALLED)) ==
- ITASK_KNOWN_TO_LU) {
- new |= ITASK_LU_ABORT_CALLED;
- call_lu_abort = 1;
- } else {
- call_lu_abort = 0;
- }
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ mutex_enter(&itask->itask_mutex);
+ new = itask->itask_flags;
+ if ((itask->itask_flags & (ITASK_KNOWN_TO_LU |
+ ITASK_LU_ABORT_CALLED)) == ITASK_KNOWN_TO_LU) {
+ new |= ITASK_LU_ABORT_CALLED;
+ call_lu_abort = 1;
+ } else {
+ call_lu_abort = 0;
+ }
+ itask->itask_flags = new;
if (call_lu_abort) {
if ((itask->itask_flags & ITASK_DEFAULT_HANDLING) == 0) {
@@ -5195,16 +5557,22 @@ stmf_do_task_abort(scsi_task_t *task)
}
}
- do {
- old = new = itask->itask_flags;
- if ((old & (ITASK_KNOWN_TO_TGT_PORT |
- ITASK_TGT_PORT_ABORT_CALLED)) == ITASK_KNOWN_TO_TGT_PORT) {
- new |= ITASK_TGT_PORT_ABORT_CALLED;
- call_port_abort = 1;
- } else {
- call_port_abort = 0;
- }
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ /*
+ * NOTE: After the call to either stmf_abort_task_offline() or
+ * stmf_task_lu_abort() the itask_mutex was dropped and reacquired
+ * to avoid a deadlock situation with stmf_state.stmf_lock.
+ */
+
+ new = itask->itask_flags;
+ if ((itask->itask_flags & (ITASK_KNOWN_TO_TGT_PORT |
+ ITASK_TGT_PORT_ABORT_CALLED)) == ITASK_KNOWN_TO_TGT_PORT) {
+ new |= ITASK_TGT_PORT_ABORT_CALLED;
+ call_port_abort = 1;
+ } else {
+ call_port_abort = 0;
+ }
+ itask->itask_flags = new;
+
if (call_port_abort) {
ret = lport->lport_abort(lport, STMF_LPORT_ABORT_TASK, task, 0);
if ((ret == STMF_ABORT_SUCCESS) || (ret == STMF_NOT_FOUND)) {
@@ -5228,6 +5596,7 @@ stmf_do_task_abort(scsi_task_t *task)
stmf_abort_task_offline(itask->itask_task, 0, info);
}
}
+ mutex_exit(&itask->itask_mutex);
}
stmf_status_t
@@ -5559,7 +5928,7 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua)
mutex_enter(&stmf_state.stmf_lock);
/* check if any ports are standby and create second group */
- for (ilport = stmf_state.stmf_ilportlist; ilport;
+ for (ilport = stmf_state.stmf_ilportlist; ilport != NULL;
ilport = ilport->ilport_next) {
if (ilport->ilport_standby == 1) {
nports_standby++;
@@ -5568,14 +5937,38 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua)
}
}
- /* The spec only allows for 255 ports to be reported per group */
+ /*
+ * Section 6.25 REPORT TARGET PORT GROUPS
+ * The reply can contain many group replies. Each group is limited
+ * to 255 port identifiers so we'll need to limit the amount of
+ * data returned. For FC ports there's a physical limitation in
+ * machines that make reaching 255 ports very, very unlikely. For
+ * iSCSI on the other hand recent changes mean the port count could
+ * be as high as 4096 (current limit). Limiting the data returned
+ * for iSCSI isn't as bad as it sounds. This information is only
+ * important for ALUA, which isn't supported for iSCSI. iSCSI uses
+ * virtual IP addresses to deal with node fail over in a cluster.
+ */
nports = min(nports, 255);
nports_standby = min(nports_standby, 255);
+
+ /*
+ * The first 4 bytes of the returned data is the length. The
+ * size of the Target Port Group header is 8 bytes. So, that's where
+ * the 12 comes from. Each port entry is 4 bytes in size.
+ */
sz = (nports * 4) + 12;
- if (nports_standby && ilu_alua) {
+ if (nports_standby != 0 && ilu_alua != 0) {
+ /* --- Only add 8 bytes since it's just the Group header ---- */
sz += (nports_standby * 4) + 8;
}
- asz = sz + sizeof (*xd) - 4;
+
+ /*
+ * The stmf_xfer_data structure contains 4 bytes that will be
+ * part of the data buffer. So, subtract the 4 bytes from the space
+ * needed.
+ */
+ asz = sizeof (*xd) + sz - 4;
xd = (stmf_xfer_data_t *)kmem_zalloc(asz, KM_NOSLEEP);
if (xd == NULL) {
mutex_exit(&stmf_state.stmf_lock);
@@ -5586,8 +5979,11 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua)
p = xd->buf;
+ /* ---- length values never include the field that holds the size --- */
*((uint32_t *)p) = BE_32(sz - 4);
p += 4;
+
+ /* ---- Now fill out the first Target Group header ---- */
p[0] = 0x80; /* PREF */
p[1] = 5; /* AO_SUP, S_SUP */
if (stmf_state.stmf_alua_node == 1) {
@@ -5597,15 +5993,16 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua)
}
p[7] = nports & 0xff;
p += 8;
- for (ilport = stmf_state.stmf_ilportlist; ilport;
+ for (ilport = stmf_state.stmf_ilportlist; ilport != NULL && nports != 0;
ilport = ilport->ilport_next) {
if (ilport->ilport_standby == 1) {
continue;
}
((uint16_t *)p)[1] = BE_16(ilport->ilport_rtpid);
p += 4;
+ nports--;
}
- if (nports_standby && ilu_alua) {
+ if (nports_standby != 0 && ilu_alua != 0) {
p[0] = 0x02; /* Non PREF, Standby */
p[1] = 5; /* AO_SUP, S_SUP */
if (stmf_state.stmf_alua_node == 1) {
@@ -5615,13 +6012,14 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua)
}
p[7] = nports_standby & 0xff;
p += 8;
- for (ilport = stmf_state.stmf_ilportlist; ilport;
- ilport = ilport->ilport_next) {
+ for (ilport = stmf_state.stmf_ilportlist; ilport != NULL &&
+ nports_standby != 0; ilport = ilport->ilport_next) {
if (ilport->ilport_standby == 0) {
continue;
}
((uint16_t *)p)[1] = BE_16(ilport->ilport_rtpid);
p += 4;
+ nports_standby--;
}
}
@@ -5862,7 +6260,9 @@ stmf_scsilib_handle_report_tpgs(scsi_task_t *task, stmf_data_buf_t *dbuf)
stmf_xfer_data_t *xd;
uint32_t sz, minsz;
+ mutex_enter(&itask->itask_mutex);
itask->itask_flags |= ITASK_DEFAULT_HANDLING;
+
task->task_cmd_xfer_length =
((((uint32_t)task->task_cdb[6]) << 24) |
(((uint32_t)task->task_cdb[7]) << 16) |
@@ -5874,6 +6274,7 @@ stmf_scsilib_handle_report_tpgs(scsi_task_t *task, stmf_data_buf_t *dbuf)
task->task_expected_xfer_length =
task->task_cmd_xfer_length;
}
+ mutex_exit(&itask->itask_mutex);
if (task->task_cmd_xfer_length == 0) {
stmf_scsilib_send_status(task, STATUS_GOOD, 0);
@@ -5979,7 +6380,9 @@ stmf_handle_lun_reset(scsi_task_t *task)
* was responsible for setting the ILU_RESET_ACTIVE. In case this
* task itself gets aborted, we will clear ILU_RESET_ACTIVE.
*/
+ mutex_enter(&itask->itask_mutex);
itask->itask_flags |= ITASK_DEFAULT_HANDLING | ITASK_CAUSING_LU_RESET;
+ mutex_exit(&itask->itask_mutex);
/* Initiatiate abort on all commands on this LU except this one */
stmf_abort(STMF_QUEUE_ABORT_LU, task, STMF_ABORTED, task->task_lu);
@@ -6055,8 +6458,10 @@ stmf_handle_target_reset(scsi_task_t *task)
}
/* ok, start the damage */
+ mutex_enter(&itask->itask_mutex);
itask->itask_flags |= ITASK_DEFAULT_HANDLING |
ITASK_CAUSING_TARGET_RESET;
+ mutex_exit(&itask->itask_mutex);
for (i = 0; i < lm->lm_nentries; i++) {
if (lm->lm_plus[i] == NULL)
continue;
@@ -6114,34 +6519,46 @@ void
stmf_worker_init()
{
uint32_t i;
+ stmf_worker_t *w;
/* Make local copy of global tunables */
- stmf_i_max_nworkers = stmf_max_nworkers;
- stmf_i_min_nworkers = stmf_min_nworkers;
+ /*
+ * Allow workers to be scaled down to a very low number for cases
+ * where the load is light. If the number of threads gets below
+ * 4 assume it is a mistake and force the threads back to a
+ * reasonable number. The low limit of 4 is simply legacy and
+ * may be too low.
+ */
ASSERT(stmf_workers == NULL);
- if (stmf_i_min_nworkers < 4) {
- stmf_i_min_nworkers = 4;
- }
- if (stmf_i_max_nworkers < stmf_i_min_nworkers) {
- stmf_i_max_nworkers = stmf_i_min_nworkers;
+ if (stmf_nworkers < 4) {
+ stmf_nworkers = 64;
}
+
stmf_workers = (stmf_worker_t *)kmem_zalloc(
- sizeof (stmf_worker_t) * stmf_i_max_nworkers, KM_SLEEP);
- for (i = 0; i < stmf_i_max_nworkers; i++) {
+ sizeof (stmf_worker_t) * stmf_nworkers, KM_SLEEP);
+ for (i = 0; i < stmf_nworkers; i++) {
stmf_worker_t *w = &stmf_workers[i];
mutex_init(&w->worker_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&w->worker_cv, NULL, CV_DRIVER, NULL);
}
- stmf_worker_mgmt_delay = drv_usectohz(20 * 1000);
stmf_workers_state = STMF_WORKERS_ENABLED;
- /* Workers will be started by stmf_worker_mgmt() */
+ /* Check if we are starting */
+ if (stmf_nworkers_cur < stmf_nworkers - 1) {
+ for (i = stmf_nworkers_cur; i < stmf_nworkers; i++) {
+ w = &stmf_workers[i];
+ w->worker_tid = thread_create(NULL, 0, stmf_worker_task,
+ (void *)&stmf_workers[i], 0, &p0, TS_RUN,
+ minclsyspri);
+ stmf_nworkers_accepting_cmds++;
+ }
+ return;
+ }
/* Lets wait for atleast one worker to start */
while (stmf_nworkers_cur == 0)
delay(drv_usectohz(20 * 1000));
- stmf_worker_mgmt_delay = drv_usectohz(3 * 1000 * 1000);
}
stmf_status_t
@@ -6154,7 +6571,6 @@ stmf_worker_fini()
return (STMF_SUCCESS);
ASSERT(stmf_workers);
stmf_workers_state = STMF_WORKERS_DISABLED;
- stmf_worker_mgmt_delay = drv_usectohz(20 * 1000);
cv_signal(&stmf_state.stmf_cv);
sb = ddi_get_lbolt() + drv_usectohz(10 * 1000 * 1000);
@@ -6166,12 +6582,12 @@ stmf_worker_fini()
}
delay(drv_usectohz(100 * 1000));
}
- for (i = 0; i < stmf_i_max_nworkers; i++) {
+ for (i = 0; i < stmf_nworkers; i++) {
stmf_worker_t *w = &stmf_workers[i];
mutex_destroy(&w->worker_lock);
cv_destroy(&w->worker_cv);
}
- kmem_free(stmf_workers, sizeof (stmf_worker_t) * stmf_i_max_nworkers);
+ kmem_free(stmf_workers, sizeof (stmf_worker_t) * stmf_nworkers);
stmf_workers = NULL;
return (STMF_SUCCESS);
@@ -6188,7 +6604,6 @@ stmf_worker_task(void *arg)
stmf_lu_t *lu;
clock_t wait_timer = 0;
clock_t wait_ticks, wait_delta = 0;
- uint32_t old, new;
uint8_t curcmd;
uint8_t abort_free;
uint8_t wait_queue;
@@ -6200,7 +6615,9 @@ stmf_worker_task(void *arg)
DTRACE_PROBE1(worker__create, stmf_worker_t, w);
mutex_enter(&w->worker_lock);
w->worker_flags |= STMF_WORKER_STARTED | STMF_WORKER_ACTIVE;
-stmf_worker_loop:;
+ atomic_inc_32(&stmf_nworkers_cur);
+
+stmf_worker_loop:
if ((w->worker_ref_count == 0) &&
(w->worker_flags & STMF_WORKER_TERMINATE)) {
w->worker_flags &= ~(STMF_WORKER_STARTED |
@@ -6208,10 +6625,13 @@ stmf_worker_loop:;
w->worker_tid = NULL;
mutex_exit(&w->worker_lock);
DTRACE_PROBE1(worker__destroy, stmf_worker_t, w);
+ atomic_dec_32(&stmf_nworkers_cur);
thread_exit();
}
+
/* CONSTCOND */
while (1) {
+ /* worker lock is held at this point */
dec_qdepth = 0;
if (wait_timer && (ddi_get_lbolt() >= wait_timer)) {
wait_timer = 0;
@@ -6229,42 +6649,41 @@ stmf_worker_loop:;
NULL;
}
}
- if ((itask = w->worker_task_head) == NULL) {
+
+ STMF_DEQUEUE_ITASK(w, itask);
+ if (itask == NULL)
break;
- }
+
+ ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0);
task = itask->itask_task;
DTRACE_PROBE2(worker__active, stmf_worker_t, w,
scsi_task_t *, task);
- w->worker_task_head = itask->itask_worker_next;
- if (w->worker_task_head == NULL)
- w->worker_task_tail = NULL;
-
wait_queue = 0;
abort_free = 0;
+ mutex_exit(&w->worker_lock);
+ mutex_enter(&itask->itask_mutex);
+ mutex_enter(&w->worker_lock);
+
if (itask->itask_ncmds > 0) {
curcmd = itask->itask_cmd_stack[itask->itask_ncmds - 1];
} else {
ASSERT(itask->itask_flags & ITASK_BEING_ABORTED);
}
- do {
- old = itask->itask_flags;
- if (old & ITASK_BEING_ABORTED) {
- itask->itask_ncmds = 1;
- curcmd = itask->itask_cmd_stack[0] =
- ITASK_CMD_ABORT;
- goto out_itask_flag_loop;
- } else if ((curcmd & ITASK_CMD_MASK) ==
- ITASK_CMD_NEW_TASK) {
- /*
- * set ITASK_KSTAT_IN_RUNQ, this flag
- * will not reset until task completed
- */
- new = old | ITASK_KNOWN_TO_LU |
- ITASK_KSTAT_IN_RUNQ;
- } else {
- goto out_itask_flag_loop;
- }
- } while (atomic_cas_32(&itask->itask_flags, old, new) != old);
+ if (itask->itask_flags & ITASK_BEING_ABORTED) {
+ itask->itask_ncmds = 1;
+ curcmd = itask->itask_cmd_stack[0] =
+ ITASK_CMD_ABORT;
+ goto out_itask_flag_loop;
+ } else if ((curcmd & ITASK_CMD_MASK) == ITASK_CMD_NEW_TASK) {
+ /*
+ * set ITASK_KSTAT_IN_RUNQ, this flag
+ * will not reset until task completed
+ */
+ itask->itask_flags |= ITASK_KNOWN_TO_LU |
+ ITASK_KSTAT_IN_RUNQ;
+ } else {
+ goto out_itask_flag_loop;
+ }
out_itask_flag_loop:
@@ -6323,24 +6742,29 @@ out_itask_flag_loop:
lu = task->task_lu;
else
lu = dlun0;
+
dbuf = itask->itask_dbufs[ITASK_CMD_BUF_NDX(curcmd)];
mutex_exit(&w->worker_lock);
curcmd &= ITASK_CMD_MASK;
stmf_task_audit(itask, TE_PROCESS_CMD, curcmd, dbuf);
+ mutex_exit(&itask->itask_mutex);
+
switch (curcmd) {
case ITASK_CMD_NEW_TASK:
iss = (stmf_i_scsi_session_t *)
task->task_session->ss_stmf_private;
stmf_itl_lu_new_task(itask);
if (iss->iss_flags & ISS_LUN_INVENTORY_CHANGED) {
- if (stmf_handle_cmd_during_ic(itask))
+ if (stmf_handle_cmd_during_ic(itask)) {
break;
+ }
}
#ifdef DEBUG
if (stmf_drop_task_counter > 0) {
- if (atomic_dec_32_nv(&stmf_drop_task_counter)
- == 1)
+ if (atomic_dec_32_nv(
+ (uint32_t *)&stmf_drop_task_counter) == 1) {
break;
+ }
}
#endif
DTRACE_PROBE1(scsi__task__start, scsi_task_t *, task);
@@ -6354,6 +6778,7 @@ out_itask_flag_loop:
break;
case ITASK_CMD_ABORT:
if (abort_free) {
+ mutex_enter(&itask->itask_mutex);
stmf_task_free(task);
} else {
stmf_do_task_abort(task);
@@ -6372,6 +6797,7 @@ out_itask_flag_loop:
/* case ITASK_CMD_XFER_DATA: */
break;
}
+
mutex_enter(&w->worker_lock);
if (dec_qdepth) {
w->worker_queue_depth--;
@@ -6399,146 +6825,6 @@ out_itask_flag_loop:
goto stmf_worker_loop;
}
-void
-stmf_worker_mgmt()
-{
- int i;
- int workers_needed;
- uint32_t qd;
- clock_t tps, d = 0;
- uint32_t cur_max_ntasks = 0;
- stmf_worker_t *w;
-
- /* Check if we are trying to increase the # of threads */
- for (i = stmf_nworkers_cur; i < stmf_nworkers_needed; i++) {
- if (stmf_workers[i].worker_flags & STMF_WORKER_STARTED) {
- stmf_nworkers_cur++;
- stmf_nworkers_accepting_cmds++;
- } else {
- /* Wait for transition to complete */
- return;
- }
- }
- /* Check if we are trying to decrease the # of workers */
- for (i = (stmf_nworkers_cur - 1); i >= stmf_nworkers_needed; i--) {
- if ((stmf_workers[i].worker_flags & STMF_WORKER_STARTED) == 0) {
- stmf_nworkers_cur--;
- /*
- * stmf_nworkers_accepting_cmds has already been
- * updated by the request to reduce the # of workers.
- */
- } else {
- /* Wait for transition to complete */
- return;
- }
- }
- /* Check if we are being asked to quit */
- if (stmf_workers_state != STMF_WORKERS_ENABLED) {
- if (stmf_nworkers_cur) {
- workers_needed = 0;
- goto worker_mgmt_trigger_change;
- }
- return;
- }
- /* Check if we are starting */
- if (stmf_nworkers_cur < stmf_i_min_nworkers) {
- workers_needed = stmf_i_min_nworkers;
- goto worker_mgmt_trigger_change;
- }
-
- tps = drv_usectohz(1 * 1000 * 1000);
- if ((stmf_wm_last != 0) &&
- ((d = ddi_get_lbolt() - stmf_wm_last) > tps)) {
- qd = 0;
- for (i = 0; i < stmf_nworkers_accepting_cmds; i++) {
- qd += stmf_workers[i].worker_max_qdepth_pu;
- stmf_workers[i].worker_max_qdepth_pu = 0;
- if (stmf_workers[i].worker_max_sys_qdepth_pu >
- cur_max_ntasks) {
- cur_max_ntasks =
- stmf_workers[i].worker_max_sys_qdepth_pu;
- }
- stmf_workers[i].worker_max_sys_qdepth_pu = 0;
- }
- }
- stmf_wm_last = ddi_get_lbolt();
- if (d <= tps) {
- /* still ramping up */
- return;
- }
- /* max qdepth cannot be more than max tasks */
- if (qd > cur_max_ntasks)
- qd = cur_max_ntasks;
-
- /* See if we have more workers */
- if (qd < stmf_nworkers_accepting_cmds) {
- /*
- * Since we dont reduce the worker count right away, monitor
- * the highest load during the scale_down_delay.
- */
- if (qd > stmf_worker_scale_down_qd)
- stmf_worker_scale_down_qd = qd;
- if (stmf_worker_scale_down_timer == 0) {
- stmf_worker_scale_down_timer = ddi_get_lbolt() +
- drv_usectohz(stmf_worker_scale_down_delay *
- 1000 * 1000);
- return;
- }
- if (ddi_get_lbolt() < stmf_worker_scale_down_timer) {
- return;
- }
- /* Its time to reduce the workers */
- if (stmf_worker_scale_down_qd < stmf_i_min_nworkers)
- stmf_worker_scale_down_qd = stmf_i_min_nworkers;
- if (stmf_worker_scale_down_qd > stmf_i_max_nworkers)
- stmf_worker_scale_down_qd = stmf_i_max_nworkers;
- if (stmf_worker_scale_down_qd == stmf_nworkers_cur)
- return;
- workers_needed = stmf_worker_scale_down_qd;
- stmf_worker_scale_down_qd = 0;
- goto worker_mgmt_trigger_change;
- }
- stmf_worker_scale_down_qd = 0;
- stmf_worker_scale_down_timer = 0;
- if (qd > stmf_i_max_nworkers)
- qd = stmf_i_max_nworkers;
- if (qd < stmf_i_min_nworkers)
- qd = stmf_i_min_nworkers;
- if (qd == stmf_nworkers_cur)
- return;
- workers_needed = qd;
- goto worker_mgmt_trigger_change;
-
- /* NOTREACHED */
- return;
-
-worker_mgmt_trigger_change:
- ASSERT(workers_needed != stmf_nworkers_cur);
- if (workers_needed > stmf_nworkers_cur) {
- stmf_nworkers_needed = workers_needed;
- for (i = stmf_nworkers_cur; i < workers_needed; i++) {
- w = &stmf_workers[i];
- w->worker_tid = thread_create(NULL, 0, stmf_worker_task,
- (void *)&stmf_workers[i], 0, &p0, TS_RUN,
- minclsyspri);
- }
- return;
- }
- /* At this point we know that we are decreasing the # of workers */
- stmf_nworkers_accepting_cmds = workers_needed;
- stmf_nworkers_needed = workers_needed;
- /* Signal the workers that its time to quit */
- for (i = (stmf_nworkers_cur - 1); i >= stmf_nworkers_needed; i--) {
- w = &stmf_workers[i];
- ASSERT(w && (w->worker_flags & STMF_WORKER_STARTED));
- mutex_enter(&w->worker_lock);
- w->worker_flags |= STMF_WORKER_TERMINATE;
- if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0)
- cv_signal(&w->worker_cv);
- mutex_exit(&w->worker_lock);
- }
-}
-
/*
* Fills out a dbuf from stmf_xfer_data_t (contained in the db_lu_private).
* If all the data has been filled out, frees the xd and makes
@@ -6867,6 +7153,12 @@ stmf_dlun0_ctl(struct stmf_lu *lu, int cmd, void *arg)
cmn_err(CE_WARN, "stmf_dlun0_ctl called with cmd %x", cmd);
}
+/* ARGSUSED */
+void
+stmf_dlun0_task_done(struct scsi_task *task)
+{
+}
+
void
stmf_dlun_init()
{
@@ -6881,6 +7173,7 @@ stmf_dlun_init()
dlun0->lu_abort = stmf_dlun0_abort;
dlun0->lu_task_poll = stmf_dlun0_task_poll;
dlun0->lu_ctl = stmf_dlun0_ctl;
+ dlun0->lu_task_done = stmf_dlun0_task_done;
ilu = (stmf_i_lu_t *)dlun0->lu_stmf_private;
ilu->ilu_cur_task_cntr = &ilu->ilu_task_cntr1;
@@ -7144,17 +7437,28 @@ stmf_itl_task_start(stmf_i_scsi_task_t *itask)
stmf_itl_data_t *itl = itask->itask_itl_datap;
scsi_task_t *task = itask->itask_task;
stmf_i_lu_t *ilu;
+ stmf_i_scsi_session_t *iss =
+ itask->itask_task->task_session->ss_stmf_private;
+ stmf_i_remote_port_t *irport = iss->iss_irport;
if (itl == NULL || task->task_lu == dlun0)
return;
ilu = (stmf_i_lu_t *)task->task_lu->lu_stmf_private;
itask->itask_start_timestamp = gethrtime();
+ itask->itask_xfer_done_timestamp = 0;
if (ilu->ilu_kstat_io != NULL) {
mutex_enter(ilu->ilu_kstat_io->ks_lock);
stmf_update_kstat_lu_q(itask->itask_task, kstat_waitq_enter);
mutex_exit(ilu->ilu_kstat_io->ks_lock);
}
+ if (irport->irport_kstat_estat != NULL) {
+ if (task->task_flags & TF_READ_DATA)
+ atomic_inc_32(&irport->irport_nread_tasks);
+ else if (task->task_flags & TF_WRITE_DATA)
+ atomic_inc_32(&irport->irport_nwrite_tasks);
+ }
+
stmf_update_kstat_lport_q(itask->itask_task, kstat_waitq_enter);
}
@@ -7193,6 +7497,8 @@ stmf_itl_task_done(stmf_i_scsi_task_t *itask)
if (ilu->ilu_kstat_io == NULL)
return;
+ stmf_update_kstat_rport_estat(task);
+
mutex_enter(ilu->ilu_kstat_io->ks_lock);
if (itask->itask_flags & ITASK_KSTAT_IN_RUNQ) {
@@ -7206,6 +7512,23 @@ stmf_itl_task_done(stmf_i_scsi_task_t *itask)
}
}
+void
+stmf_lu_xfer_done(scsi_task_t *task, boolean_t read, hrtime_t elapsed_time)
+{
+ stmf_i_scsi_task_t *itask = task->task_stmf_private;
+
+ if (task->task_lu == dlun0)
+ return;
+
+ if (read) {
+ atomic_add_64((uint64_t *)&itask->itask_lu_read_time,
+ elapsed_time);
+ } else {
+ atomic_add_64((uint64_t *)&itask->itask_lu_write_time,
+ elapsed_time);
+ }
+}
+
static void
stmf_lport_xfer_start(stmf_i_scsi_task_t *itask, stmf_data_buf_t *dbuf)
{
@@ -7233,7 +7556,9 @@ stmf_lport_xfer_done(stmf_i_scsi_task_t *itask, stmf_data_buf_t *dbuf)
xfer_size = (dbuf->db_xfer_status == STMF_SUCCESS) ?
dbuf->db_data_size : 0;
- elapsed_time = gethrtime() - dbuf->db_xfer_start_timestamp;
+ itask->itask_xfer_done_timestamp = gethrtime();
+ elapsed_time = itask->itask_xfer_done_timestamp -
+ dbuf->db_xfer_start_timestamp;
if (dbuf->db_flags & DB_DIRECTION_TO_RPORT) {
atomic_add_64((uint64_t *)&itask->itask_lport_read_time,
elapsed_time);
@@ -7257,7 +7582,8 @@ stmf_svc_init()
{
if (stmf_state.stmf_svc_flags & STMF_SVC_STARTED)
return;
- stmf_state.stmf_svc_tailp = &stmf_state.stmf_svc_active;
+ list_create(&stmf_state.stmf_svc_list, sizeof (stmf_svc_req_t),
+ offsetof(stmf_svc_req_t, svc_list_entry));
stmf_state.stmf_svc_taskq = ddi_taskq_create(0, "STMF_SVC_TASKQ", 1,
TASKQ_DEFAULTPRI, 0);
(void) ddi_taskq_dispatch(stmf_state.stmf_svc_taskq,
@@ -7286,6 +7612,7 @@ stmf_svc_fini()
if (i == 500)
return (STMF_BUSY);
+ list_destroy(&stmf_state.stmf_svc_list);
ddi_taskq_destroy(stmf_state.stmf_svc_taskq);
return (STMF_SUCCESS);
@@ -7311,7 +7638,7 @@ stmf_svc(void *arg)
stmf_state.stmf_svc_flags |= STMF_SVC_STARTED | STMF_SVC_ACTIVE;
while (!(stmf_state.stmf_svc_flags & STMF_SVC_TERMINATE)) {
- if (stmf_state.stmf_svc_active == NULL) {
+ if (list_is_empty(&stmf_state.stmf_svc_list)) {
stmf_svc_timeout(&clks);
continue;
}
@@ -7322,11 +7649,9 @@ stmf_svc(void *arg)
* so it should be safe to access it without holding the
* stmf state lock.
*/
- req = stmf_state.stmf_svc_active;
- stmf_state.stmf_svc_active = req->svc_next;
-
- if (stmf_state.stmf_svc_active == NULL)
- stmf_state.stmf_svc_tailp = &stmf_state.stmf_svc_active;
+ req = list_remove_head(&stmf_state.stmf_svc_list);
+ if (req == NULL)
+ continue;
switch (req->svc_cmd) {
case STMF_CMD_LPORT_ONLINE:
@@ -7398,7 +7723,7 @@ stmf_svc_timeout(struct stmf_svc_clocks *clks)
ddi_get_lbolt() + drv_usectohz(1*1000*1000);
}
- if (stmf_state.stmf_svc_active)
+ if (!list_is_empty(&stmf_state.stmf_svc_list))
return;
}
@@ -7423,17 +7748,10 @@ stmf_svc_timeout(struct stmf_svc_clocks *clks)
ddi_get_lbolt() + drv_usectohz(1*1000*1000);
}
- if (stmf_state.stmf_svc_active)
+ if (!list_is_empty(&stmf_state.stmf_svc_list))
return;
}
- /* Check if we need to run worker_mgmt */
- if (ddi_get_lbolt() > clks->worker_delay) {
- stmf_worker_mgmt();
- clks->worker_delay = ddi_get_lbolt() +
- stmf_worker_mgmt_delay;
- }
-
/* Check if any active session got its 1st LUN */
if (stmf_state.stmf_process_initial_luns) {
int stmf_level = 0;
@@ -7560,11 +7878,9 @@ stmf_svc_queue(int cmd, void *obj, stmf_state_change_info_t *info)
info->st_additional_info);
}
req->svc_req_alloc_size = s;
- req->svc_next = NULL;
mutex_enter(&stmf_state.stmf_lock);
- *stmf_state.stmf_svc_tailp = req;
- stmf_state.stmf_svc_tailp = &req->svc_next;
+ list_insert_tail(&stmf_state.stmf_svc_list, req);
if ((stmf_state.stmf_svc_flags & STMF_SVC_ACTIVE) == 0) {
cv_signal(&stmf_state.stmf_cv);
}
@@ -7574,29 +7890,15 @@ stmf_svc_queue(int cmd, void *obj, stmf_state_change_info_t *info)
static void
stmf_svc_kill_obj_requests(void *obj)
{
- stmf_svc_req_t *prev_req = NULL;
- stmf_svc_req_t *next_req;
stmf_svc_req_t *req;
ASSERT(mutex_owned(&stmf_state.stmf_lock));
- for (req = stmf_state.stmf_svc_active; req != NULL; req = next_req) {
- next_req = req->svc_next;
-
+ for (req = list_head(&stmf_state.stmf_svc_list); req != NULL;
+ req = list_next(&stmf_state.stmf_svc_list, req)) {
if (req->svc_obj == obj) {
- if (prev_req != NULL)
- prev_req->svc_next = next_req;
- else
- stmf_state.stmf_svc_active = next_req;
-
- if (next_req == NULL)
- stmf_state.stmf_svc_tailp = (prev_req != NULL) ?
- &prev_req->svc_next :
- &stmf_state.stmf_svc_active;
-
+ list_remove(&stmf_state.stmf_svc_list, req);
kmem_free(req, req->svc_req_alloc_size);
- } else {
- prev_req = req;
}
}
}
@@ -7642,6 +7944,14 @@ stmf_trace_clear()
mutex_exit(&trace_buf_lock);
}
+/*
+ * NOTE: Due to lock order problems that are not possible to fix this
+ * method drops and reacquires the itask_mutex around the call to stmf_ctl.
+ * Another possible work around would be to use a dispatch queue and have
+ * the call to stmf_ctl run on another thread that's not holding the
+ * itask_mutex. The problem with that approach is that it's difficult to
+ * determine what impact an asynchronous change would have on the system state.
+ */
static void
stmf_abort_task_offline(scsi_task_t *task, int offline_lu, char *info)
{
@@ -7649,10 +7959,14 @@ stmf_abort_task_offline(scsi_task_t *task, int offline_lu, char *info)
void *ctl_private;
uint32_t ctl_cmd;
int msg = 0;
+ stmf_i_scsi_task_t *itask =
+ (stmf_i_scsi_task_t *)task->task_stmf_private;
stmf_trace("FROM STMF", "abort_task_offline called for %s: %s",
offline_lu ? "LU" : "LPORT", info ? info : "no additional info");
change_info.st_additional_info = info;
+ ASSERT(mutex_owned(&itask->itask_mutex));
+
if (offline_lu) {
change_info.st_rflags = STMF_RFLAG_RESET |
STMF_RFLAG_LU_ABORT;
@@ -7680,7 +7994,9 @@ stmf_abort_task_offline(scsi_task_t *task, int offline_lu, char *info)
offline_lu ? "LU" : "LPORT", info ? info :
"<no additional info>");
}
+ mutex_exit(&itask->itask_mutex);
(void) stmf_ctl(ctl_cmd, ctl_private, &change_info);
+ mutex_enter(&itask->itask_mutex);
}
static char
@@ -7737,7 +8053,7 @@ stmf_scsilib_tptid_validate(scsi_transport_id_t *tptid, uint32_t total_sz,
return (B_FALSE);
break;
- case PROTOCOL_iSCSI:
+ case PROTOCOL_iSCSI: /* CSTYLED */
{
iscsi_transport_id_t *iscsiid;
uint16_t adn_len, name_len;
@@ -7782,7 +8098,7 @@ stmf_scsilib_tptid_validate(scsi_transport_id_t *tptid, uint32_t total_sz,
case PROTOCOL_SAS:
case PROTOCOL_ADT:
case PROTOCOL_ATAPI:
- default:
+ default: /* CSTYLED */
{
stmf_dflt_scsi_tptid_t *dflttpd;
@@ -7810,7 +8126,7 @@ stmf_scsilib_tptid_compare(scsi_transport_id_t *tpd1, scsi_transport_id_t *tpd2)
switch (tpd1->protocol_id) {
- case PROTOCOL_iSCSI:
+ case PROTOCOL_iSCSI: /* CSTYLED */
{
iscsi_transport_id_t *iscsitpd1, *iscsitpd2;
uint16_t len;
@@ -7825,7 +8141,7 @@ stmf_scsilib_tptid_compare(scsi_transport_id_t *tpd1, scsi_transport_id_t *tpd2)
}
break;
- case PROTOCOL_SRP:
+ case PROTOCOL_SRP: /* CSTYLED */
{
scsi_srp_transport_id_t *srptpd1, *srptpd2;
@@ -7837,7 +8153,7 @@ stmf_scsilib_tptid_compare(scsi_transport_id_t *tpd1, scsi_transport_id_t *tpd2)
}
break;
- case PROTOCOL_FIBRE_CHANNEL:
+ case PROTOCOL_FIBRE_CHANNEL: /* CSTYLED */
{
scsi_fc_transport_id_t *fctpd1, *fctpd2;
@@ -7855,7 +8171,7 @@ stmf_scsilib_tptid_compare(scsi_transport_id_t *tpd1, scsi_transport_id_t *tpd2)
case PROTOCOL_SAS:
case PROTOCOL_ADT:
case PROTOCOL_ATAPI:
- default:
+ default: /* CSTYLED */
{
stmf_dflt_scsi_tptid_t *dflt1, *dflt2;
uint16_t len;
@@ -7988,3 +8304,74 @@ stmf_remote_port_free(stmf_remote_port_t *rpt)
*/
kmem_free(rpt, sizeof (stmf_remote_port_t) + rpt->rport_tptid_sz);
}
+
+stmf_lu_t *
+stmf_check_and_hold_lu(scsi_task_t *task, uint8_t *guid)
+{
+ stmf_i_scsi_session_t *iss;
+ stmf_lu_t *lu;
+ stmf_i_lu_t *ilu = NULL;
+ stmf_lun_map_t *sm;
+ stmf_lun_map_ent_t *lme;
+ int i;
+
+ iss = (stmf_i_scsi_session_t *)task->task_session->ss_stmf_private;
+ rw_enter(iss->iss_lockp, RW_READER);
+ sm = iss->iss_sm;
+
+ for (i = 0; i < sm->lm_nentries; i++) {
+ if (sm->lm_plus[i] == NULL)
+ continue;
+ lme = (stmf_lun_map_ent_t *)sm->lm_plus[i];
+ lu = lme->ent_lu;
+ if (bcmp(lu->lu_id->ident, guid, 16) == 0) {
+ break;
+ }
+ lu = NULL;
+ }
+
+ if (!lu) {
+ goto hold_lu_done;
+ }
+
+ ilu = lu->lu_stmf_private;
+ mutex_enter(&ilu->ilu_task_lock);
+ ilu->ilu_additional_ref++;
+ mutex_exit(&ilu->ilu_task_lock);
+
+hold_lu_done:
+ rw_exit(iss->iss_lockp);
+ return (lu);
+}
+
+void
+stmf_release_lu(stmf_lu_t *lu)
+{
+ stmf_i_lu_t *ilu;
+
+ ilu = lu->lu_stmf_private;
+ ASSERT(ilu->ilu_additional_ref != 0);
+ mutex_enter(&ilu->ilu_task_lock);
+ ilu->ilu_additional_ref--;
+ mutex_exit(&ilu->ilu_task_lock);
+}
+
+int
+stmf_is_task_being_aborted(scsi_task_t *task)
+{
+ stmf_i_scsi_task_t *itask;
+
+ itask = (stmf_i_scsi_task_t *)task->task_stmf_private;
+ if (itask->itask_flags & ITASK_BEING_ABORTED)
+ return (1);
+
+ return (0);
+}
+
+volatile boolean_t stmf_pgr_aptpl_always = B_FALSE;
+
+boolean_t
+stmf_is_pgr_aptpl_always()
+{
+ return (stmf_pgr_aptpl_always);
+}
diff --git a/usr/src/uts/common/io/comstar/stmf/stmf_impl.h b/usr/src/uts/common/io/comstar/stmf/stmf_impl.h
index d477224863..b590726168 100644
--- a/usr/src/uts/common/io/comstar/stmf/stmf_impl.h
+++ b/usr/src/uts/common/io/comstar/stmf/stmf_impl.h
@@ -20,6 +20,8 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
#ifndef _STMF_IMPL_H
@@ -89,7 +91,7 @@ typedef struct stmf_i_lu {
uint32_t ilu_ntasks; /* # of tasks in the ilu_task list */
uint32_t ilu_ntasks_free; /* # of tasks that are free */
uint32_t ilu_ntasks_min_free; /* # minimal free tasks */
- uint32_t rsvd1;
+ uint32_t ilu_additional_ref;
uint32_t ilu_proxy_registered;
uint64_t ilu_reg_msgid;
struct stmf_i_scsi_task *ilu_tasks;
@@ -164,6 +166,18 @@ typedef struct stmf_i_remote_port {
int irport_refcnt;
id_t irport_instance;
avl_node_t irport_ln;
+ /* number of active read tasks */
+ uint32_t irport_nread_tasks;
+ /* number of active write tasks */
+ uint32_t irport_nwrite_tasks;
+ hrtime_t irport_rdstart_timestamp;
+ hrtime_t irport_rddone_timestamp;
+ hrtime_t irport_wrstart_timestamp;
+ hrtime_t irport_wrdone_timestamp;
+ kstat_t *irport_kstat_info;
+ kstat_t *irport_kstat_io;
+ kstat_t *irport_kstat_estat; /* extended stats */
+ boolean_t irport_info_dirty;
} stmf_i_remote_port_t;
/*
@@ -237,6 +251,7 @@ typedef struct stmf_i_scsi_task {
scsi_task_t *itask_task;
uint32_t itask_alloc_size;
uint32_t itask_flags;
+ kmutex_t itask_mutex; /* protects flags and lists */
uint64_t itask_proxy_msg_id;
stmf_data_buf_t *itask_proxy_dbuf;
struct stmf_worker *itask_worker;
@@ -245,7 +260,6 @@ typedef struct stmf_i_scsi_task {
struct stmf_i_scsi_task *itask_lu_next;
struct stmf_i_scsi_task *itask_lu_prev;
struct stmf_i_scsi_task *itask_lu_free_next;
- struct stmf_i_scsi_task *itask_abort_next;
struct stmf_itl_data *itask_itl_datap;
clock_t itask_start_time; /* abort and normal */
/* For now we only support 4 parallel buffers. Should be enough. */
@@ -259,6 +273,7 @@ typedef struct stmf_i_scsi_task {
/* Task profile data */
hrtime_t itask_start_timestamp;
hrtime_t itask_done_timestamp;
+ hrtime_t itask_xfer_done_timestamp;
hrtime_t itask_waitq_enter_timestamp;
hrtime_t itask_waitq_time;
hrtime_t itask_lu_read_time;
@@ -275,6 +290,41 @@ typedef struct stmf_i_scsi_task {
#define ITASK_DEFAULT_ABORT_TIMEOUT 5
/*
+ * Common code to encode an itask onto the worker_task queue is placed
+ * in this macro to simplify future maintenace activity.
+ */
+#define STMF_ENQUEUE_ITASK(w, i) \
+ ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0); \
+ ASSERT(mutex_owned(&itask->itask_mutex)); \
+ ASSERT(mutex_owned(&w->worker_lock)); \
+ i->itask_worker_next = NULL; \
+ if (w->worker_task_tail) { \
+ w->worker_task_tail->itask_worker_next = i; \
+ } else { \
+ w->worker_task_head = i; \
+ } \
+ w->worker_task_tail = i; \
+ if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) { \
+ w->worker_max_qdepth_pu = w->worker_queue_depth; \
+ } \
+ atomic_inc_32(&w->worker_ref_count); \
+ atomic_or_32(&itask->itask_flags, ITASK_IN_WORKER_QUEUE); \
+ i->itask_waitq_enter_timestamp = gethrtime(); \
+ if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) \
+ cv_signal(&w->worker_cv);
+
+#define STMF_DEQUEUE_ITASK(w, itask) \
+ ASSERT(mutex_owned(&w->worker_lock)); \
+ if ((itask = w->worker_task_head) != NULL) { \
+ w->worker_task_head = itask->itask_worker_next; \
+ if (w->worker_task_head == NULL) { \
+ w->worker_task_tail = NULL; \
+ } \
+ } else { \
+ w->worker_task_tail = NULL; \
+ }
+
+/*
* itask_flags
*/
#define ITASK_IN_FREE_LIST 0x0001
diff --git a/usr/src/uts/common/io/comstar/stmf/stmf_state.h b/usr/src/uts/common/io/comstar/stmf/stmf_state.h
index 2d7a81f93d..221c732887 100644
--- a/usr/src/uts/common/io/comstar/stmf/stmf_state.h
+++ b/usr/src/uts/common/io/comstar/stmf/stmf_state.h
@@ -22,7 +22,7 @@
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright 2011, Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2016, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
#ifndef _STMF_STATE_H
@@ -62,8 +62,7 @@ typedef struct stmf_state {
uint32_t stmf_svc_flags;
stmf_i_lu_t *stmf_svc_ilu_draining;
stmf_i_lu_t *stmf_svc_ilu_timing;
- struct stmf_svc_req *stmf_svc_active;
- struct stmf_svc_req **stmf_svc_tailp;
+ list_t stmf_svc_list;
stmf_id_list_t stmf_hg_list;
stmf_id_list_t stmf_tg_list;
@@ -86,7 +85,7 @@ typedef struct stmf_state {
* different types of services) are added to the stmf_svc_thread.
*/
typedef struct stmf_svc_req {
- struct stmf_svc_req *svc_next;
+ list_node_t svc_list_entry;
int svc_req_alloc_size;
int svc_cmd;
void *svc_obj;
diff --git a/usr/src/uts/common/io/comstar/stmf/stmf_stats.h b/usr/src/uts/common/io/comstar/stmf/stmf_stats.h
index 2247c1ffd7..3b4184e96a 100644
--- a/usr/src/uts/common/io/comstar/stmf/stmf_stats.h
+++ b/usr/src/uts/common/io/comstar/stmf/stmf_stats.h
@@ -29,6 +29,8 @@
extern "C" {
#endif
+#include <sys/portif.h>
+
typedef struct stmf_kstat_itl_info {
kstat_named_t i_rport_name;
kstat_named_t i_rport_alias;
@@ -58,6 +60,21 @@ typedef struct stmf_kstat_tgt_info {
kstat_named_t i_protocol;
} stmf_kstat_tgt_info_t;
+#define STMF_RPORT_INFO_LIMIT 8
+
+typedef struct stmf_kstat_rport_info {
+ kstat_named_t i_rport_name;
+ kstat_named_t i_protocol;
+ kstat_named_t i_rport_uinfo[STMF_RPORT_INFO_LIMIT];
+} stmf_kstat_rport_info_t;
+
+typedef struct stmf_kstat_rport_estat {
+ kstat_named_t i_rport_read_latency;
+ kstat_named_t i_rport_write_latency;
+ kstat_named_t i_nread_tasks;
+ kstat_named_t i_nwrite_tasks;
+} stmf_kstat_rport_estat_t;
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/io/idm/idm.c b/usr/src/uts/common/io/idm/idm.c
index 877a0e6f20..1361b8dfb9 100644
--- a/usr/src/uts/common/io/idm/idm.c
+++ b/usr/src/uts/common/io/idm/idm.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/cpuvar.h>
@@ -61,7 +62,7 @@ static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
-static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
+static stmf_status_t idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
idm_abort_type_t abort_type);
static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
@@ -1523,11 +1524,12 @@ idm_task_rele(idm_task_t *idt)
idm_refcnt_rele(&idt->idt_refcnt);
}
-void
+stmf_status_t
idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
{
idm_task_t *task;
int idx;
+ stmf_status_t s = STMF_SUCCESS;
/*
* Passing NULL as the task indicates that all tasks
@@ -1549,7 +1551,7 @@ idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
(task->idt_state != TASK_COMPLETE) &&
(task->idt_ic == ic)) {
rw_exit(&idm.idm_taskid_table_lock);
- idm_task_abort_one(ic, task, abort_type);
+ s = idm_task_abort_one(ic, task, abort_type);
rw_enter(&idm.idm_taskid_table_lock, RW_READER);
} else
mutex_exit(&task->idt_mutex);
@@ -1557,8 +1559,9 @@ idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
rw_exit(&idm.idm_taskid_table_lock);
} else {
mutex_enter(&idt->idt_mutex);
- idm_task_abort_one(ic, idt, abort_type);
+ s = idm_task_abort_one(ic, idt, abort_type);
}
+ return (s);
}
static void
@@ -1589,9 +1592,11 @@ idm_task_abort_unref_cb(void *ref)
* Abort the idm task.
* Caller must hold the task mutex, which will be released before return
*/
-static void
+static stmf_status_t
idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
{
+ stmf_status_t s = STMF_SUCCESS;
+
/* Caller must hold connection mutex */
ASSERT(mutex_owned(&idt->idt_mutex));
switch (idt->idt_state) {
@@ -1610,7 +1615,7 @@ idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
*/
idm_refcnt_async_wait_ref(&idt->idt_refcnt,
&idm_task_abort_unref_cb);
- return;
+ return (s);
case AT_INTERNAL_ABORT:
case AT_TASK_MGMT_ABORT:
idt->idt_state = TASK_ABORTING;
@@ -1624,7 +1629,7 @@ idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
*/
idm_refcnt_async_wait_ref(&idt->idt_refcnt,
&idm_task_abort_unref_cb);
- return;
+ return (s);
default:
ASSERT(0);
}
@@ -1664,7 +1669,7 @@ idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
*/
idm_refcnt_async_wait_ref(&idt->idt_refcnt,
&idm_task_abort_unref_cb);
- return;
+ return (s);
default:
ASSERT(0);
}
@@ -1683,17 +1688,15 @@ idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
}
break;
case TASK_COMPLETE:
- /*
- * In this case, let it go. The status has already been
- * sent (which may or may not get successfully transmitted)
- * and we don't want to end up in a race between completing
- * the status PDU and marking the task suspended.
- */
+ idm_refcnt_wait_ref(&idt->idt_refcnt);
+ s = STMF_ABORT_SUCCESS;
break;
default:
ASSERT(0);
}
mutex_exit(&idt->idt_mutex);
+
+ return (s);
}
static void
@@ -2267,6 +2270,29 @@ idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt,
mutex_exit(&refcnt->ir_mutex);
}
+/*
+ * used to determine the status of the refcnt.
+ *
+ * if refcnt is 0 return is 0
+ * if refcnt is negative return is -1
+ * if refcnt > 0 and no waiters return is 1
+ * if refcnt > 0 and waiters return is 2
+ */
+int
+idm_refcnt_is_held(idm_refcnt_t *refcnt)
+{
+ if (refcnt->ir_refcnt < 0)
+ return (-1);
+
+ if (refcnt->ir_refcnt == 0)
+ return (0);
+
+ if (refcnt->ir_waiting == REF_NOWAIT && refcnt->ir_refcnt > 0)
+ return (1);
+
+ return (2);
+}
+
void
idm_conn_hold(idm_conn_t *ic)
{
diff --git a/usr/src/uts/common/io/idm/idm_conn_sm.c b/usr/src/uts/common/io/idm/idm_conn_sm.c
index 52e515a983..205b7b43d0 100644
--- a/usr/src/uts/common/io/idm/idm_conn_sm.c
+++ b/usr/src/uts/common/io/idm/idm_conn_sm.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/cpuvar.h>
@@ -1246,7 +1246,7 @@ idm_update_state(idm_conn_t *ic, idm_conn_state_t new_state,
}
/* Stop executing active tasks */
- idm_task_abort(ic, NULL, AT_INTERNAL_SUSPEND);
+ (void) idm_task_abort(ic, NULL, AT_INTERNAL_SUSPEND);
/* Start logout timer */
IDM_SM_TIMER_CHECK(ic);
@@ -1302,7 +1302,7 @@ idm_update_state(idm_conn_t *ic, idm_conn_state_t new_state,
}
/* Abort all tasks */
- idm_task_abort(ic, NULL, AT_INTERNAL_ABORT);
+ (void) idm_task_abort(ic, NULL, AT_INTERNAL_ABORT);
/*
* Handle terminal state actions on the global taskq so
diff --git a/usr/src/uts/common/io/idm/idm_so.c b/usr/src/uts/common/io/idm/idm_so.c
index 345483eeba..5c3bbd1cd9 100644
--- a/usr/src/uts/common/io/idm/idm_so.c
+++ b/usr/src/uts/common/io/idm/idm_so.c
@@ -24,6 +24,7 @@
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017, Joyent, Inc. All rights reserved.
*/
@@ -859,6 +860,11 @@ idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
/*
* Check actual AHS length against the amount available in the buffer
*/
+ if ((IDM_PDU_OPCODE(pdu) != ISCSI_OP_SCSI_CMD) &&
+ (bhs->hlength != 0)) {
+ /* ---- hlength is only only valid for SCSI Request ---- */
+ return (IDM_STATUS_FAIL);
+ }
pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
(bhs->hlength * sizeof (uint32_t));
pdu->isp_datalen = n2h24(bhs->dlength);
@@ -868,7 +874,7 @@ idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
"idm_sorecvhdr: invalid data segment length");
return (IDM_STATUS_FAIL);
}
- if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
+ if (bhs->hlength > IDM_SORX_WIRE_AHSLEN) {
/* Allocate a new header segment and change the callback */
new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c
index 38795beab5..3d98c3b80d 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c
@@ -21,6 +21,7 @@
/*
* Copyright 2000 by Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright 2019 Joshua M. Clulow <josh@sysmgr.org>
*
* iSCSI Software Initiator
@@ -848,6 +849,7 @@ iscsi_tran_init_pkt(struct scsi_address *ap, struct scsi_pkt *pkt,
icmdp->cmd_sig = ISCSI_SIG_CMD;
icmdp->cmd_state = ISCSI_CMD_STATE_FREE;
icmdp->cmd_lun = ilp;
+ iscsi_lun_hold(ilp);
icmdp->cmd_type = ISCSI_CMD_TYPE_SCSI;
/* add the report lun addressing type on to the lun */
icmdp->cmd_un.scsi.lun = ilp->lun_addr_type << 14;
@@ -1091,6 +1093,7 @@ iscsi_tran_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
ASSERT(icmdp->cmd_sig == ISCSI_SIG_CMD);
ASSERT(icmdp->cmd_state == ISCSI_CMD_STATE_FREE);
+ iscsi_lun_rele(icmdp->cmd_lun);
mutex_destroy(&icmdp->cmd_mutex);
cv_destroy(&icmdp->cmd_completion);
scsi_hba_pkt_free(ap, pkt);
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h
index 62b6487866..67e860b550 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h
@@ -22,7 +22,7 @@
/*
* Copyright 2000 by Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014-2015 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _ISCSI_H
@@ -549,6 +549,8 @@ typedef struct iscsi_lun {
uchar_t lun_pid[ISCSI_INQ_PID_BUF_LEN]; /* Product ID */
uchar_t lun_type;
+ kmutex_t lun_mutex;
+ int lun_refcnt;
} iscsi_lun_t;
#define ISCSI_LUN_STATE_CLEAR 0 /* used to clear all states */
@@ -1328,8 +1330,9 @@ void iscsi_conn_update_state_locked(iscsi_conn_t *icp,
/* iscsi_lun.c */
iscsi_status_t iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num,
uint8_t lun_addr_type, struct scsi_inquiry *inq, char *guid);
-iscsi_status_t iscsi_lun_destroy(iscsi_hba_t *ihp,
- iscsi_lun_t *ilp);
+void iscsi_lun_hold(iscsi_lun_t *ilp);
+void iscsi_lun_rele(iscsi_lun_t *ilp);
+iscsi_status_t iscsi_lun_destroy(iscsi_hba_t *ihp, iscsi_lun_t *ilp);
void iscsi_lun_online(iscsi_hba_t *ihp,
iscsi_lun_t *ilp);
iscsi_status_t iscsi_lun_offline(iscsi_hba_t *ihp,
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c
index 3d9f59b9e2..287f2e35d2 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c
@@ -22,6 +22,7 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
* iSCSI command interfaces
*/
@@ -796,8 +797,8 @@ iscsi_cmd_state_active(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg)
*/
ISCSI_CMD_SET_REASON_STAT(
t_icmdp, CMD_TIMEOUT, STAT_ABORTED);
- idm_task_abort(icp->conn_ic, t_icmdp->cmd_itp,
- AT_TASK_MGMT_ABORT);
+ (void) idm_task_abort(icp->conn_ic,
+ t_icmdp->cmd_itp, AT_TASK_MGMT_ABORT);
} else {
cv_broadcast(&t_icmdp->cmd_completion);
}
@@ -942,7 +943,7 @@ iscsi_cmd_state_active(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg)
*/
ISCSI_CMD_SET_REASON_STAT(t_icmdp,
CMD_TIMEOUT, STAT_TIMEOUT);
- idm_task_abort(icp->conn_ic,
+ (void) idm_task_abort(icp->conn_ic,
t_icmdp->cmd_itp,
AT_TASK_MGMT_ABORT);
} else {
@@ -1027,7 +1028,7 @@ iscsi_cmd_state_active(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg)
mutex_exit(&icp->conn_queue_idm_aborting.mutex);
ISCSI_CMD_SET_REASON_STAT(icmdp,
CMD_TRAN_ERR, icmdp->cmd_un.scsi.pkt_stat);
- idm_task_abort(icp->conn_ic, icmdp->cmd_itp,
+ (void) idm_task_abort(icp->conn_ic, icmdp->cmd_itp,
AT_TASK_MGMT_ABORT);
break;
@@ -1208,7 +1209,7 @@ iscsi_cmd_state_aborting(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg)
ISCSI_CMD_SET_REASON_STAT(icmdp,
CMD_TRAN_ERR, icmdp->cmd_un.scsi.pkt_stat);
- idm_task_abort(icmdp->cmd_conn->conn_ic, icmdp->cmd_itp,
+ (void) idm_task_abort(icmdp->cmd_conn->conn_ic, icmdp->cmd_itp,
AT_TASK_MGMT_ABORT);
break;
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c
index 365ca58528..354c343a7c 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c
@@ -21,7 +21,7 @@
/*
* Copyright 2000 by Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2011, 2015 Nexenta Systems, Inc. All rights reserved.
*
* iSCSI Pseudo HBA Driver
*/
@@ -3511,11 +3511,14 @@ iscsi_timeout_checks(iscsi_sess_t *isp)
icp = isp->sess_conn_list;
while (icp != NULL) {
- if (icp->conn_timeout == B_TRUE) {
+ mutex_enter(&icp->conn_state_mutex);
+ if ((icp->conn_timeout == B_TRUE) &&
+ (icp->conn_state_idm_connected == B_TRUE)) {
/* timeout on this connect detected */
idm_ini_conn_disconnect(icp->conn_ic);
icp->conn_timeout = B_FALSE;
}
+ mutex_exit(&icp->conn_state_mutex);
icp = icp->conn_next;
}
rw_exit(&isp->sess_conn_list_rwlock);
diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c
index b5e8b17fb2..2e707d9fcc 100644
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c
+++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c
@@ -21,12 +21,17 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- *
+ */
+
+/*
+ * Copyright 2019 Nexenta Systems, Inc.
+ */
+
+/*
* iSCSI logical unit interfaces
*/
#include "iscsi.h"
-#include <sys/fs/dv_node.h> /* devfs_clean */
#include <sys/bootprops.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dev.h>
@@ -123,6 +128,11 @@ iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num, uint8_t lun_addr_type,
ilp->lun_addr = addr;
ilp->lun_type = inq->inq_dtype & DTYPE_MASK;
ilp->lun_oid = oid_tmp;
+ /*
+ * Setting refcnt to 1 is the first hold for the LUN structure.
+ */
+ ilp->lun_refcnt = 1;
+ mutex_init(&ilp->lun_mutex, NULL, MUTEX_DRIVER, NULL);
bcopy(inq->inq_vid, ilp->lun_vid, sizeof (inq->inq_vid));
bcopy(inq->inq_pid, ilp->lun_pid, sizeof (inq->inq_pid));
@@ -189,6 +199,7 @@ iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num, uint8_t lun_addr_type,
kmem_free(ilp->lun_guid, ilp->lun_guid_size);
ilp->lun_guid = NULL;
}
+ mutex_destroy(&ilp->lun_mutex);
kmem_free(ilp, sizeof (iscsi_lun_t));
} else {
ilp->lun_state &= ISCSI_LUN_STATE_CLEAR;
@@ -215,6 +226,81 @@ iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num, uint8_t lun_addr_type,
return (rtn);
}
+void
+iscsi_lun_hold(iscsi_lun_t *ilp)
+{
+ mutex_enter(&ilp->lun_mutex);
+ /*
+ * By design lun_refcnt should never be zero when this routine
+ * is called. When the LUN is created the refcnt is set to 1.
+ * If iscsi_lun_rele is called and the refcnt goes to zero the
+ * structure will be freed so this method shouldn't be called
+ * afterwards.
+ */
+ ASSERT(ilp->lun_refcnt > 0);
+ ilp->lun_refcnt++;
+ mutex_exit(&ilp->lun_mutex);
+}
+
+void
+iscsi_lun_rele(iscsi_lun_t *ilp)
+{
+ ASSERT(ilp != NULL);
+
+ mutex_enter(&ilp->lun_mutex);
+ ASSERT(ilp->lun_refcnt > 0);
+ if (--ilp->lun_refcnt == 0) {
+ iscsi_sess_t *isp;
+
+ isp = ilp->lun_sess;
+ ASSERT(isp != NULL);
+
+ /* ---- release its memory ---- */
+ kmem_free(ilp->lun_addr, (strlen((char *)isp->sess_name) +
+ ADDR_EXT_SIZE + 1));
+
+ if (ilp->lun_guid != NULL) {
+ kmem_free(ilp->lun_guid, ilp->lun_guid_size);
+ }
+ mutex_destroy(&ilp->lun_mutex);
+ kmem_free(ilp, sizeof (iscsi_lun_t));
+ } else {
+ mutex_exit(&ilp->lun_mutex);
+ }
+}
+
+/*
+ * iscsi_lun_cmd_cancel -- as the name implies, cancel all commands for the lun
+ *
+ * This code is similar to the timeout function with a lot less checking of
+ * state before sending the ABORT event for commands on the pending queue.
+ *
+ * This function is only used by iscsi_lun_destroy().
+ */
+static void
+iscsi_lun_cmd_cancel(iscsi_lun_t *ilp)
+{
+ iscsi_sess_t *isp;
+ iscsi_cmd_t *icmdp, *nicmdp;
+
+ isp = ilp->lun_sess;
+ rw_enter(&isp->sess_state_rwlock, RW_READER);
+ mutex_enter(&isp->sess_queue_pending.mutex);
+ for (icmdp = isp->sess_queue_pending.head;
+ icmdp; icmdp = nicmdp) {
+ nicmdp = icmdp->cmd_next;
+
+ /*
+ * For commands on the pending queue we can go straight
+ * to and abort request which will free the command
+ * and call back to the complete function.
+ */
+ iscsi_cmd_state_machine(icmdp, ISCSI_CMD_EVENT_E4, isp);
+ }
+ mutex_exit(&isp->sess_queue_pending.mutex);
+ rw_exit(&isp->sess_state_rwlock);
+}
+
/*
* iscsi_lun_destroy - offline and remove lun
*
@@ -240,6 +326,9 @@ iscsi_lun_destroy(iscsi_hba_t *ihp, iscsi_lun_t *ilp)
isp = ilp->lun_sess;
ASSERT(isp != NULL);
+ /* flush all outstanding commands first */
+ iscsi_lun_cmd_cancel(ilp);
+
/* attempt to offline and free solaris node */
status = iscsi_lun_offline(ihp, ilp, B_TRUE);
@@ -269,16 +358,7 @@ iscsi_lun_destroy(iscsi_hba_t *ihp, iscsi_lun_t *ilp)
}
}
- /* release its memory */
- kmem_free(ilp->lun_addr, (strlen((char *)isp->sess_name) +
- ADDR_EXT_SIZE + 1));
- ilp->lun_addr = NULL;
- if (ilp->lun_guid != NULL) {
- kmem_free(ilp->lun_guid, ilp->lun_guid_size);
- ilp->lun_guid = NULL;
- }
- kmem_free(ilp, sizeof (iscsi_lun_t));
- ilp = NULL;
+ iscsi_lun_rele(ilp);
}
return (status);
@@ -641,56 +721,18 @@ iscsi_lun_offline(iscsi_hba_t *ihp, iscsi_lun_t *ilp, boolean_t lun_free)
{
iscsi_status_t status = ISCSI_STATUS_SUCCESS;
int circ = 0;
- dev_info_t *cdip, *pdip;
- char *devname = NULL;
+ dev_info_t *cdip;
char *pathname = NULL;
- int rval;
boolean_t offline = B_FALSE;
nvlist_t *attr_list = NULL;
ASSERT(ilp != NULL);
ASSERT((ilp->lun_pip != NULL) || (ilp->lun_dip != NULL));
- /*
- * Since we carry the logical units parent
- * lock across the offline call it will not
- * issue devfs_clean() and may fail with a
- * devi_ref count > 0.
- */
- if (ilp->lun_pip == NULL) {
+ if (ilp->lun_pip == NULL)
cdip = ilp->lun_dip;
- } else {
+ else
cdip = mdi_pi_get_client(ilp->lun_pip);
- }
-
- if ((cdip != NULL) &&
- (lun_free == B_TRUE) &&
- (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) {
- /*
- * Make sure node is attached otherwise
- * it won't have related cache nodes to
- * clean up. i_ddi_devi_attached is
- * similiar to i_ddi_node_state(cdip) >=
- * DS_ATTACHED. We should clean up only
- * when lun_free is set.
- */
- if (i_ddi_devi_attached(cdip)) {
-
- /* Get parent dip */
- pdip = ddi_get_parent(cdip);
-
- /* Get full devname */
- devname = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
- ndi_devi_enter(pdip, &circ);
- (void) ddi_deviname(cdip, devname);
- /* Release lock before devfs_clean() */
- ndi_devi_exit(pdip, circ);
-
- /* Clean cache */
- (void) devfs_clean(pdip, devname + 1, DV_CLEAN_FORCE);
- kmem_free(devname, MAXNAMELEN + 1);
- }
- }
if (cdip != NULL && ilp->lun_type == DTYPE_DIRECT) {
pathname = kmem_zalloc(MAXNAMELEN + 1, KM_SLEEP);
@@ -699,18 +741,9 @@ iscsi_lun_offline(iscsi_hba_t *ihp, iscsi_lun_t *ilp, boolean_t lun_free)
/* Attempt to offline the logical units */
if (ilp->lun_pip != NULL) {
-
/* virt/mdi */
ndi_devi_enter(scsi_vhci_dip, &circ);
- if ((lun_free == B_TRUE) &&
- (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) {
- rval = mdi_pi_offline(ilp->lun_pip,
- NDI_DEVI_REMOVE);
- } else {
- rval = mdi_pi_offline(ilp->lun_pip, 0);
- }
-
- if (rval == MDI_SUCCESS) {
+ if (mdi_pi_offline(ilp->lun_pip, 0) == MDI_SUCCESS) {
ilp->lun_state &= ISCSI_LUN_STATE_CLEAR;
ilp->lun_state |= ISCSI_LUN_STATE_OFFLINE;
if (lun_free == B_TRUE) {
@@ -728,18 +761,14 @@ iscsi_lun_offline(iscsi_hba_t *ihp, iscsi_lun_t *ilp, boolean_t lun_free)
ndi_devi_exit(scsi_vhci_dip, circ);
} else {
-
/* phys/ndi */
+ int flags = NDI_DEVFS_CLEAN;
+
ndi_devi_enter(ihp->hba_dip, &circ);
- if ((lun_free == B_TRUE) &&
- (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) {
- rval = ndi_devi_offline(
- ilp->lun_dip, NDI_DEVI_REMOVE);
- } else {
- rval = ndi_devi_offline(
- ilp->lun_dip, 0);
- }
- if (rval != NDI_SUCCESS) {
+ if (lun_free == B_TRUE &&
+ (ilp->lun_state & ISCSI_LUN_STATE_ONLINE))
+ flags |= NDI_DEVI_REMOVE;
+ if (ndi_devi_offline(ilp->lun_dip, flags) != NDI_SUCCESS) {
status = ISCSI_STATUS_BUSY;
if (lun_free == B_FALSE) {
ilp->lun_state |= ISCSI_LUN_STATE_INVALID;
diff --git a/usr/src/uts/common/netsmb/smb_dev.h b/usr/src/uts/common/netsmb/smb_dev.h
index 817d214b3e..ceb22371b7 100644
--- a/usr/src/uts/common/netsmb/smb_dev.h
+++ b/usr/src/uts/common/netsmb/smb_dev.h
@@ -126,7 +126,7 @@
#define SMB2_DIALECT_0302 0x0302
/* Maximum supported dialect (for ssn_maxver) */
-#define SMB2_DIALECT_MAX SMB2_DIALECT_0210
+#define SMB2_DIALECT_MAX SMB2_DIALECT_0302
/*
* Option flags in smbioc_oshare.ioc_opt
diff --git a/usr/src/uts/common/sys/idm/idm.h b/usr/src/uts/common/sys/idm/idm.h
index 0be365e42e..875a3fce4a 100644
--- a/usr/src/uts/common/sys/idm/idm.h
+++ b/usr/src/uts/common/sys/idm/idm.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -31,6 +32,8 @@
extern "C" {
#endif
+#include <sys/stmf_defines.h>
+
typedef enum {
IDM_STATUS_SUCCESS = 0,
IDM_STATUS_FAIL,
@@ -45,31 +48,29 @@ typedef enum {
IDM_STATUS_LOGIN_FAIL
} idm_status_t;
+#define IDM_CLIENT_NOTIFY_LIST() \
+ item(CN_UNDEFINED) \
+ item(CN_CONNECT_ACCEPT) /* Target only */ \
+ item(CN_LOGIN_FAIL) \
+ item(CN_READY_FOR_LOGIN) /* Initiator only */ \
+ item(CN_FFP_ENABLED) \
+ item(CN_FFP_DISABLED) \
+ item(CN_CONNECT_LOST) \
+ item(CN_CONNECT_DESTROY) \
+ item(CN_CONNECT_FAIL) \
+ item(CN_MAX)
typedef enum {
- CN_CONNECT_ACCEPT = 1, /* Target only */
- CN_LOGIN_FAIL,
- CN_READY_FOR_LOGIN, /* Initiator only */
- CN_FFP_ENABLED,
- CN_FFP_DISABLED,
- CN_CONNECT_LOST,
- CN_CONNECT_DESTROY,
- CN_CONNECT_FAIL,
- CN_MAX
+#define item(a) a,
+ IDM_CLIENT_NOTIFY_LIST()
+#undef item
} idm_client_notify_t;
#ifdef IDM_CN_NOTIFY_STRINGS
static const char *idm_cn_strings[CN_MAX + 1] = {
- "CN_UNDEFINED",
- "CN_CONNECT_ACCEPT",
- "CN_LOGIN_FAIL",
- "CN_READY_FOR_LOGIN",
- "CN_FFP_ENABLED",
- "CN_FFP_DISABLED",
- "CN_CONNECT_LOST",
- "CN_CONNECT_DESTROY",
- "CN_CONNECT_FAIL",
- "CN_MAX"
+#define item(a) #a,
+ IDM_CLIENT_NOTIFY_LIST()
+#undef item
};
#endif
@@ -85,27 +86,27 @@ typedef enum {
AT_TASK_MGMT_ABORT
} idm_abort_type_t;
+#define IDM_TASK_STATE_LIST() \
+ item(TASK_IDLE) \
+ item(TASK_ACTIVE) \
+ item(TASK_SUSPENDING) \
+ item(TASK_SUSPENDED) \
+ item(TASK_ABORTING) \
+ item(TASK_ABORTED) \
+ item(TASK_COMPLETE) \
+ item(TASK_MAX_STATE)
+
typedef enum {
- TASK_IDLE,
- TASK_ACTIVE,
- TASK_SUSPENDING,
- TASK_SUSPENDED,
- TASK_ABORTING,
- TASK_ABORTED,
- TASK_COMPLETE,
- TASK_MAX_STATE
+#define item(a) a,
+ IDM_TASK_STATE_LIST()
+#undef item
} idm_task_state_t;
#ifdef IDM_TASK_SM_STRINGS
static const char *idm_ts_name[TASK_MAX_STATE+1] = {
- "TASK_IDLE",
- "TASK_ACTIVE",
- "TASK_SUSPENDING",
- "TASK_SUSPENDED",
- "TASK_ABORTING",
- "TASK_ABORTED",
- "TASK_COMPLETE",
- "TASK_MAX_STATE"
+#define item(a) #a,
+ IDM_TASK_STATE_LIST()
+#undef item
};
#endif
@@ -422,12 +423,12 @@ idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type);
extern boolean_t idm_pattern_checking;
-#define IDM_BUFPAT_SET(CHK_BUF) \
+#define IDM_BUFPAT_SET(CHK_BUF) \
if (idm_pattern_checking && (CHK_BUF)->idb_bufalloc) { \
idm_bufpat_set(CHK_BUF); \
}
-#define IDM_BUFPAT_CHECK(CHK_BUF, CHK_LEN, CHK_TYPE) \
+#define IDM_BUFPAT_CHECK(CHK_BUF, CHK_LEN, CHK_TYPE) \
if (idm_pattern_checking) { \
(void) idm_bufpat_check(CHK_BUF, CHK_LEN, CHK_TYPE); \
}
@@ -441,7 +442,7 @@ idm_task_alloc(idm_conn_t *ic);
void
idm_task_start(idm_task_t *idt, uintptr_t handle);
-void
+stmf_status_t
idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type);
void
@@ -524,6 +525,8 @@ idm_refcnt_wait_ref(idm_refcnt_t *refcnt);
void
idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func);
+int
+idm_refcnt_is_held(idm_refcnt_t *refcnt);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/idm/idm_conn_sm.h b/usr/src/uts/common/sys/idm/idm_conn_sm.h
index 38a7526368..8b9056fb56 100644
--- a/usr/src/uts/common/sys/idm/idm_conn_sm.h
+++ b/usr/src/uts/common/sys/idm/idm_conn_sm.h
@@ -65,158 +65,106 @@ extern "C" {
#define IDM_LOGOUT_SECONDS 20
#define IDM_CLEANUP_SECONDS 0
+#define IDM_CONN_EVENT_LIST() \
+ item(CE_UNDEFINED) \
+ /* Initiator events */ \
+ item(CE_CONNECT_REQ) \
+ item(CE_CONNECT_FAIL) \
+ item(CE_CONNECT_SUCCESS) \
+ item(CE_LOGIN_SND) \
+ item(CE_LOGIN_SUCCESS_RCV) \
+ item(CE_LOGIN_FAIL_RCV) \
+ item(CE_LOGOUT_THIS_CONN_SND) \
+ item(CE_LOGOUT_OTHER_CONN_SND) \
+ item(CE_LOGOUT_SESSION_SND) \
+ item(CE_LOGOUT_SUCCESS_RCV) \
+ item(CE_LOGOUT_FAIL_RCV) \
+ item(CE_ASYNC_LOGOUT_RCV) \
+ item(CE_ASYNC_DROP_CONN_RCV) \
+ item(CE_ASYNC_DROP_ALL_CONN_RCV) \
+ /* Target events */ \
+ item(CE_CONNECT_ACCEPT) \
+ item(CE_CONNECT_REJECT) \
+ item(CE_LOGIN_RCV) \
+ item(CE_LOGIN_TIMEOUT) \
+ item(CE_LOGIN_SUCCESS_SND) \
+ item(CE_LOGIN_FAIL_SND) \
+ item(CE_LOGIN_FAIL_SND_DONE) \
+ item(CE_LOGOUT_THIS_CONN_RCV) \
+ item(CE_LOGOUT_OTHER_CONN_RCV) \
+ item(CE_LOGOUT_SESSION_RCV) \
+ item(CE_LOGOUT_SUCCESS_SND) \
+ item(CE_LOGOUT_SUCCESS_SND_DONE) \
+ item(CE_LOGOUT_FAIL_SND) \
+ item(CE_LOGOUT_FAIL_SND_DONE) \
+ item(CE_CLEANUP_TIMEOUT) \
+ item(CE_ASYNC_LOGOUT_SND) \
+ item(CE_ASYNC_DROP_CONN_SND) \
+ item(CE_ASYNC_DROP_ALL_CONN_SND) \
+ item(CE_LOGOUT_TIMEOUT) \
+ /* Common events */ \
+ item(CE_TRANSPORT_FAIL) \
+ item(CE_MISC_TX) \
+ item(CE_TX_PROTOCOL_ERROR) \
+ item(CE_MISC_RX) \
+ item(CE_RX_PROTOCOL_ERROR) \
+ item(CE_LOGOUT_SESSION_SUCCESS) \
+ item(CE_CONN_REINSTATE) \
+ item(CE_CONN_REINSTATE_SUCCESS) \
+ item(CE_CONN_REINSTATE_FAIL) \
+ item(CE_ENABLE_DM_SUCCESS) \
+ item(CE_ENABLE_DM_FAIL) \
+ /* Add new events above CE_MAX_EVENT */ \
+ item(CE_MAX_EVENT)
+
/* Update idm_ce_name table whenever connection events are modified */
typedef enum {
- CE_UNDEFINED = 0,
-
- /* Initiator events */
- CE_CONNECT_REQ,
- CE_CONNECT_FAIL,
- CE_CONNECT_SUCCESS,
- CE_LOGIN_SND,
- CE_LOGIN_SUCCESS_RCV,
- CE_LOGIN_FAIL_RCV,
- CE_LOGOUT_THIS_CONN_SND,
- CE_LOGOUT_OTHER_CONN_SND,
- CE_LOGOUT_SESSION_SND,
- CE_LOGOUT_SUCCESS_RCV,
- CE_LOGOUT_FAIL_RCV,
- CE_ASYNC_LOGOUT_RCV,
- CE_ASYNC_DROP_CONN_RCV,
- CE_ASYNC_DROP_ALL_CONN_RCV,
-
- /* Target events */
- CE_CONNECT_ACCEPT,
- CE_CONNECT_REJECT,
- CE_LOGIN_RCV,
- CE_LOGIN_TIMEOUT,
- CE_LOGIN_SUCCESS_SND,
- CE_LOGIN_FAIL_SND,
- CE_LOGIN_FAIL_SND_DONE,
- CE_LOGOUT_THIS_CONN_RCV,
- CE_LOGOUT_OTHER_CONN_RCV,
- CE_LOGOUT_SESSION_RCV,
- CE_LOGOUT_SUCCESS_SND,
- CE_LOGOUT_SUCCESS_SND_DONE,
- CE_LOGOUT_FAIL_SND,
- CE_LOGOUT_FAIL_SND_DONE,
- CE_CLEANUP_TIMEOUT,
- CE_ASYNC_LOGOUT_SND,
- CE_ASYNC_DROP_CONN_SND,
- CE_ASYNC_DROP_ALL_CONN_SND,
- CE_LOGOUT_TIMEOUT,
-
- /* Common events */
- CE_TRANSPORT_FAIL,
- CE_MISC_TX,
- CE_TX_PROTOCOL_ERROR,
- CE_MISC_RX,
- CE_RX_PROTOCOL_ERROR,
- CE_LOGOUT_SESSION_SUCCESS,
- CE_CONN_REINSTATE,
- CE_CONN_REINSTATE_SUCCESS,
- CE_CONN_REINSTATE_FAIL,
- CE_ENABLE_DM_SUCCESS,
- CE_ENABLE_DM_FAIL,
-
- /* Add new events above CE_MAX_EVENT */
- CE_MAX_EVENT
+#define item(a) a,
+ IDM_CONN_EVENT_LIST()
+#undef item
} idm_conn_event_t;
#ifdef IDM_CONN_SM_STRINGS
/* An array of event text values, for use in logging events */
static const char *idm_ce_name[CE_MAX_EVENT+1] = {
- "CE_UNDEFINED",
- "CE_CONNECT_REQ",
- "CE_CONNECT_FAIL",
- "CE_CONNECT_SUCCESS",
- "CE_LOGIN_SND",
- "CE_LOGIN_SUCCESS_RCV",
- "CE_LOGIN_FAIL_RCV",
- "CE_LOGOUT_THIS_CONN_SND",
- "CE_LOGOUT_OTHER_CONN_SND",
- "CE_LOGOUT_SESSION_SND",
- "CE_LOGOUT_SUCCESS_RCV",
- "CE_LOGOUT_FAIL_RCV",
- "CE_ASYNC_LOGOUT_RCV",
- "CE_ASYNC_DROP_CONN_RCV",
- "CE_ASYNC_DROP_ALL_CONN_RCV",
- "CE_CONNECT_ACCEPT",
- "CE_CONNECT_REJECT",
- "CE_LOGIN_RCV",
- "CE_LOGIN_TIMEOUT",
- "CE_LOGIN_SUCCESS_SND",
- "CE_LOGIN_FAIL_SND",
- "CE_LOGIN_FAIL_SND_DONE",
- "CE_LOGOUT_THIS_CONN_RCV",
- "CE_LOGOUT_OTHER_CONN_RCV",
- "CE_LOGOUT_SESSION_RCV",
- "CE_LOGOUT_SUCCESS_SND",
- "CE_LOGOUT_SUCCESS_SND_DONE",
- "CE_LOGOUT_FAIL_SND",
- "CE_LOGOUT_FAIL_SND_DONE",
- "CE_CLEANUP_TIMEOUT",
- "CE_ASYNC_LOGOUT_SND",
- "CE_ASYNC_DROP_CONN_SND",
- "CE_ASYNC_DROP_ALL_CONN_SND",
- "CE_LOGOUT_TIMEOUT",
- "CE_TRANSPORT_FAIL",
- "CE_MISC_TX",
- "CE_TX_PROTOCOL_ERROR",
- "CE_MISC_RX",
- "CE_RX_PROTOCOL_ERROR",
- "CE_LOGOUT_SESSION_SUCCESS",
- "CE_CONN_REINSTATE",
- "CE_CONN_REINSTATE_SUCCESS",
- "CE_CONN_REINSTATE_FAIL",
- "CE_ENABLE_DM_SUCCESS",
- "CE_ENABLE_DM_FAIL",
- "CE_MAX_EVENT"
+#define item(a) #a,
+ IDM_CONN_EVENT_LIST()
+#undef item
};
#endif
+#define CONN_STATE_LIST() \
+ item(CS_S0_UNDEFINED) \
+ item(CS_S1_FREE) \
+ item(CS_S2_XPT_WAIT) \
+ item(CS_S3_XPT_UP) \
+ item(CS_S4_IN_LOGIN) \
+ item(CS_S5_LOGGED_IN) \
+ item(CS_S6_IN_LOGOUT) \
+ item(CS_S7_LOGOUT_REQ) \
+ item(CS_S8_CLEANUP) \
+ item(CS_S9_INIT_ERROR) \
+ item(CS_S10_IN_CLEANUP) \
+ item(CS_S11_COMPLETE) \
+ item(CS_S12_ENABLE_DM) \
+ item(CS_S9A_REJECTED) \
+ item(CS_S9B_WAIT_SND_DONE) \
+ /* Add new connection states above CS_MAX_STATE */ \
+ item(CS_MAX_STATE)
+
/* Update idm_cs_name table whenever connection states are modified */
typedef enum {
- CS_S0_UNDEFINED = 0,
-
- CS_S1_FREE,
- CS_S2_XPT_WAIT,
- CS_S3_XPT_UP,
- CS_S4_IN_LOGIN,
- CS_S5_LOGGED_IN,
- CS_S6_IN_LOGOUT,
- CS_S7_LOGOUT_REQ,
- CS_S8_CLEANUP,
- CS_S9_INIT_ERROR,
- CS_S10_IN_CLEANUP,
- CS_S11_COMPLETE,
- CS_S12_ENABLE_DM,
- CS_S9A_REJECTED,
- CS_S9B_WAIT_SND_DONE,
-
- /* Add new connection states above CS_MAX_STATE */
- CS_MAX_STATE
+#define item(a) a,
+ CONN_STATE_LIST()
+#undef item
} idm_conn_state_t;
#ifdef IDM_CONN_SM_STRINGS
/* An array of state text values, for use in logging state transitions */
static const char *idm_cs_name[CS_MAX_STATE+1] = {
- "CS_S0_UNDEFINED",
- "CS_S1_FREE",
- "CS_S2_XPT_WAIT",
- "CS_S3_XPT_UP",
- "CS_S4_IN_LOGIN",
- "CS_S5_LOGGED_IN",
- "CS_S6_IN_LOGOUT",
- "CS_S7_LOGOUT_REQ",
- "CS_S8_CLEANUP",
- "CS_S9_INIT_ERROR",
- "CS_S10_IN_CLEANUP",
- "CS_S11_COMPLETE",
- "CS_S12_ENABLE_DM",
- "CS_S9A_REJECTED",
- "CS_S9B_WAIT_SND_DONE",
- "CS_MAX_STATE"
+#define item(a) #a,
+ CONN_STATE_LIST()
+#undef item
};
#endif
diff --git a/usr/src/uts/common/sys/idm/idm_impl.h b/usr/src/uts/common/sys/idm/idm_impl.h
index 87d3674ad1..346611719d 100644
--- a/usr/src/uts/common/sys/idm/idm_impl.h
+++ b/usr/src/uts/common/sys/idm/idm_impl.h
@@ -22,7 +22,7 @@
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014-2015 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _IDM_IMPL_H_
@@ -155,7 +155,7 @@ typedef struct idm_conn_s {
void *ic_handle;
idm_refcnt_t ic_refcnt;
idm_svc_t *ic_svc_binding; /* Target conn. only */
- idm_sockaddr_t ic_ini_dst_addr;
+ idm_sockaddr_t ic_ini_dst_addr;
struct sockaddr_storage ic_laddr; /* conn local address */
struct sockaddr_storage ic_raddr; /* conn remote address */
@@ -321,7 +321,7 @@ typedef enum {
BP_CHECK_ASSERT
} idm_bufpat_check_type_t;
-#define BUFPAT_MATCH(bc_bufpat, bc_idb) \
+#define BUFPAT_MATCH(bc_bufpat, bc_idb) \
((bufpat->bufpat_idb == bc_idb) && \
(bufpat->bufpat_bufmagic == IDM_BUF_MAGIC))
@@ -409,9 +409,19 @@ typedef struct {
#define OSD_EXT_CDB_AHSLEN (200 - 15)
#define BIDI_AHS_LENGTH 5
+/*
+ * Additional Header Segment (AHS)
+ * AHS is only valid for SCSI Requests and contains SCSI CDB information
+ * which doesn't fit in the standard 16 byte area of the PDU. Commonly
+ * this only holds true for OSD device commands.
+ *
+ * IDM_SORX_CACHE_ASHLEN is the amount of memory which is preallocated in bytes.
+ * When used in the header the AHS length is stored as the number of 4-byte
+ * words; so IDM_SORX_WIRE_ASHLEN is IDM_SORX_CACHE_ASHLEN in words.
+ */
#define IDM_SORX_CACHE_AHSLEN \
- (((OSD_EXT_CDB_AHSLEN + 3) + \
- (BIDI_AHS_LENGTH + 3)) / sizeof (uint32_t))
+ ((OSD_EXT_CDB_AHSLEN + 3) + (BIDI_AHS_LENGTH + 3))
+#define IDM_SORX_WIRE_AHSLEN (IDM_SORX_CACHE_AHSLEN / sizeof (uint32_t))
#define IDM_SORX_CACHE_HDRLEN (sizeof (iscsi_hdr_t) + IDM_SORX_CACHE_AHSLEN)
/*
diff --git a/usr/src/uts/common/sys/lpif.h b/usr/src/uts/common/sys/lpif.h
index 7b04a22f7b..168916f02e 100644
--- a/usr/src/uts/common/sys/lpif.h
+++ b/usr/src/uts/common/sys/lpif.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _LPIF_H
#define _LPIF_H
@@ -76,6 +77,7 @@ typedef struct stmf_lu {
uint32_t lu_proxy_reg_arg_len;
void (*lu_dbuf_free)(struct scsi_task *task,
struct stmf_data_buf *dbuf);
+ void (*lu_task_done)(struct scsi_task *task);
} stmf_lu_t;
/*
@@ -84,6 +86,7 @@ typedef struct stmf_lu {
#define STMF_LU_ABORT_TASK 1
#define STMF_LU_RESET_STATE 2
#define STMF_LU_ITL_HANDLE_REMOVED 3
+#define STMF_LU_SET_ABORT 4
/*
* Asymmetric access state
@@ -132,6 +135,9 @@ stmf_status_t stmf_deregister_lu(stmf_lu_t *lup);
stmf_status_t stmf_set_lu_access(stmf_lu_t *lup, uint8_t access_state);
stmf_status_t stmf_proxy_scsi_cmd(scsi_task_t *, stmf_data_buf_t *dbuf);
int stmf_is_standby_port(scsi_task_t *);
+void stmf_lu_xfer_done(struct scsi_task *task, boolean_t read,
+ hrtime_t elapsed_time);
+boolean_t stmf_is_pgr_aptpl_always();
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/portif.h b/usr/src/uts/common/sys/portif.h
index 861a983315..792abd94b7 100644
--- a/usr/src/uts/common/sys/portif.h
+++ b/usr/src/uts/common/sys/portif.h
@@ -138,6 +138,10 @@ stmf_status_t stmf_register_local_port(stmf_local_port_t *lportp);
stmf_status_t stmf_deregister_local_port(stmf_local_port_t *lport);
stmf_status_t stmf_register_scsi_session(stmf_local_port_t *lport,
stmf_scsi_session_t *ss);
+stmf_status_t stmf_add_rport_info(stmf_scsi_session_t *ss,
+ const char *prop_name, const char *prop_value);
+void stmf_remove_rport_info(stmf_scsi_session_t *ss,
+ const char *prop_name);
void stmf_deregister_scsi_session(stmf_local_port_t *lport,
stmf_scsi_session_t *ss);
void stmf_set_port_standby(stmf_local_port_t *lport, uint16_t rtpid);
diff --git a/usr/src/uts/common/sys/scsi/generic/commands.h b/usr/src/uts/common/sys/scsi/generic/commands.h
index e15a80a800..ed45141bcd 100644
--- a/usr/src/uts/common/sys/scsi/generic/commands.h
+++ b/usr/src/uts/common/sys/scsi/generic/commands.h
@@ -329,6 +329,7 @@ extern "C" {
*/
#define SCMD_GROUP4 0x80
#define SCMD_EXTENDED_COPY 0x83
+#define SCMD_RECV_COPY_RESULTS 0x84
#define SCMD_VERIFY_G4 0x8f
/*
@@ -357,6 +358,7 @@ extern "C" {
*/
#define SCMD_WRITE_FILE_MARK_G4 0x80
#define SCMD_READ_REVERSE_G4 0x81
+#define SCMD_COMPARE_AND_WRITE 0x89
#define SCMD_READ_ATTRIBUTE 0x8c
#define SCMD_WRITE_ATTRIBUTE 0x8d
#define SCMD_SPACE_G4 0x91
diff --git a/usr/src/uts/common/sys/scsi/generic/mode.h b/usr/src/uts/common/sys/scsi/generic/mode.h
index f1b8880cce..b2e1d925e3 100644
--- a/usr/src/uts/common/sys/scsi/generic/mode.h
+++ b/usr/src/uts/common/sys/scsi/generic/mode.h
@@ -191,6 +191,8 @@ struct mode_page {
*/
#define MODEPAGE_DISCO_RECO 0x02
+#define MODEPAGE_FORMAT 0x03
+#define MODEPAGE_GEOMETRY 0x04
#define MODEPAGE_CACHING 0x08
#define MODEPAGE_PDEVICE 0x09
#define MODEPAGE_CTRL_MODE 0x0A
diff --git a/usr/src/uts/common/sys/scsi/scsi_names.h b/usr/src/uts/common/sys/scsi/scsi_names.h
index 349020d58a..bd2150ae9c 100644
--- a/usr/src/uts/common/sys/scsi/scsi_names.h
+++ b/usr/src/uts/common/sys/scsi/scsi_names.h
@@ -35,13 +35,13 @@ extern "C" {
#define SNS_NAA_32 32
#define SNS_WWN_16 16
-/*
- * Maximum number of bytes needed to store SCSI Name Strings in UTF-8 format,
- * assuming that (per RFC3629) one UTF-8 character can take up to 4 bytes.
- */
-#define SNS_EUI_U8_LEN_MAX (SNS_EUI_16 * 4)
-#define SNS_IQN_U8_LEN_MAX (SNS_IQN_223 * 4)
-#define SNS_NAA_U8_LEN_MAX (SNS_NAA_32 * 4)
+#define SNS_EUI_LEN_MAX sizeof (SNS_EUI) + SNS_EUI_16
+#define SNS_IQN_LEN_MAX SNS_IQN_223
+#define SNS_MAC_LEN_MAX sizeof (SNS_MAC) + SNS_MAC_12
+#define SNS_NAA_LEN_MAX sizeof (SNS_NAA) + SNS_NAA_32
+#define SNS_WWN_LEN_MAX sizeof (SNS_WWN) + SNS_WWN_16
+
+#define SNS_LEN_MAX SNS_IQN_LEN_MAX
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/stmf.h b/usr/src/uts/common/sys/stmf.h
index ad95cd0b15..76f378b226 100644
--- a/usr/src/uts/common/sys/stmf.h
+++ b/usr/src/uts/common/sys/stmf.h
@@ -18,10 +18,13 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
+
#ifndef _STMF_H
#define _STMF_H
@@ -205,6 +208,11 @@ typedef struct scsi_task {
#define TASK_AF_ACCEPT_LU_DBUF 0x08
/*
+ * Indicating a PPPT task
+ */
+#define TASK_AF_PPPT_TASK 0x10
+
+/*
* scsi_task_t extension identifiers
*/
#define STMF_TASK_EXT_NONE 0
@@ -379,7 +387,8 @@ void stmf_send_status_done(scsi_task_t *task, stmf_status_t s, uint32_t iof);
void stmf_task_lu_done(scsi_task_t *task);
void stmf_abort(int abort_cmd, scsi_task_t *task, stmf_status_t s, void *arg);
void stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof);
-void stmf_task_lport_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof);
+void stmf_task_lport_aborted_unlocked(scsi_task_t *task, stmf_status_t s,
+ uint32_t iof);
stmf_status_t stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout);
stmf_status_t stmf_task_poll_lport(scsi_task_t *task, uint32_t timeout);
stmf_status_t stmf_ctl(int cmd, void *obj, void *arg);
@@ -416,6 +425,9 @@ boolean_t stmf_scsilib_tptid_compare(struct scsi_transport_id *,
struct scsi_transport_id *);
struct stmf_remote_port *stmf_remote_port_alloc(uint16_t);
void stmf_remote_port_free(struct stmf_remote_port *);
+struct stmf_lu *stmf_check_and_hold_lu(struct scsi_task *task, uint8_t *guid);
+void stmf_release_lu(struct stmf_lu *lu);
+int stmf_is_task_being_aborted(struct scsi_task *task);
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/stmf_defines.h b/usr/src/uts/common/sys/stmf_defines.h
index bb9e442677..0fbab9455f 100644
--- a/usr/src/uts/common/sys/stmf_defines.h
+++ b/usr/src/uts/common/sys/stmf_defines.h
@@ -123,7 +123,7 @@ typedef uint64_t stmf_status_t;
#define STMF_SAA_INVALID_FIELD_IN_PARAM_LIST 0x052600
#define STMF_SAA_INVALID_RELEASE_OF_PR 0x052604
#define STMF_SAA_MEDIUM_REMOVAL_PREVENTED 0x055302
-#define STMF_SAA_INSUFFICIENT_REG_RESOURCES 0x055504
+#define STMF_SAA_INSUFFICIENT_REG_RESRCS 0x055504
#define STMF_SAA_POR 0x062900
#define STMF_SAA_MODE_PARAMETERS_CHANGED 0x062A01
#define STMF_SAA_ASYMMETRIC_ACCESS_CHANGED 0x062A06