diff options
Diffstat (limited to 'usr/src')
51 files changed, 4025 insertions, 1554 deletions
diff --git a/usr/src/lib/libiscsit/common/libiscsit.h b/usr/src/lib/libiscsit/common/libiscsit.h index 704c790edc..6666b4613e 100644 --- a/usr/src/lib/libiscsit/common/libiscsit.h +++ b/usr/src/lib/libiscsit/common/libiscsit.h @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ #ifndef _LIBISCSIT_H @@ -40,7 +40,7 @@ extern "C" { #endif -#define MAX_TARGETS 255 /* maximum targets that may be created */ +#define MAX_TARGETS 4095 /* maximum targets that may be created */ #define MAX_TPGT 256 #define CFG_TPGTLIST "tpgt-list" diff --git a/usr/src/lib/libsmbfs/smb/ctx.c b/usr/src/lib/libsmbfs/smb/ctx.c index 3aa67fd5f5..64122e3416 100644 --- a/usr/src/lib/libsmbfs/smb/ctx.c +++ b/usr/src/lib/libsmbfs/smb/ctx.c @@ -1411,10 +1411,16 @@ smb_cf_minauth_from_str(char *str) return (-1); } - +/* + * SMB 2.1 is the oldest SMB2 dialect implemented (we skipped SMB 2.002) + * so if we see a_protocol value of just "2" assume they meant 2.1 + */ static struct nv smbver_table[] = { + { "3.02", SMB2_DIALECT_0302 }, + { "3.0", SMB2_DIALECT_0300 }, { "2.1", SMB2_DIALECT_0210 }, + { "2", SMB2_DIALECT_0210 }, { "1", 1 }, { NULL, 0 } }; diff --git a/usr/src/lib/libsmbfs/smb/rcfile.c b/usr/src/lib/libsmbfs/smb/rcfile.c index d7ee2d15af..da96b13c34 100644 --- a/usr/src/lib/libsmbfs/smb/rcfile.c +++ b/usr/src/lib/libsmbfs/smb/rcfile.c @@ -492,8 +492,7 @@ rc_parse(struct rcfile *rcp) if (home_nsmbrc != 0 && ( strcmp(buf, "nbns") == 0 || strcmp(buf, "nbns_enable") == 0 || - strcmp(buf, "nbns_broadcast") == 0 || - strcmp(buf, "signing") == 0)) { + strcmp(buf, "nbns_broadcast") == 0)) { fprintf(stderr, dgettext(TEXT_DOMAIN, "option %s may not be set " "in user .nsmbrc file\n"), buf); diff --git a/usr/src/lib/smbclnt/libfknsmb/common/fksmb_sign_pkcs.c b/usr/src/lib/smbclnt/libfknsmb/common/fksmb_sign_pkcs.c index 24bb8fccbc..44c6666313 100644 --- a/usr/src/lib/smbclnt/libfknsmb/common/fksmb_sign_pkcs.c +++ b/usr/src/lib/smbclnt/libfknsmb/common/fksmb_sign_pkcs.c @@ -23,22 +23,46 @@ #include <stdlib.h> #include <strings.h> +#include <sys/cmn_err.h> #include <netsmb/smb_signing.h> #include <security/cryptoki.h> #include <security/pkcs11.h> /* + * Common function to see if a mech is available. + */ +static int +find_mech(smb_sign_mech_t *mech, ulong_t mid) +{ + CK_SESSION_HANDLE hdl; + CK_RV rv; + + rv = SUNW_C_GetMechSession(mid, &hdl); + if (rv != CKR_OK) { + cmn_err(CE_NOTE, "PKCS#11: no mech 0x%x", + (unsigned int)mid); + return (-1); + } + (void) C_CloseSession(hdl); + + mech->mechanism = mid; + mech->pParameter = NULL; + mech->ulParameterLen = 0; + return (0); +} + +/* * SMB1 signing helpers: * (getmech, init, update, final) */ +/* + * Find out if we have this mech. + */ int smb_md5_getmech(smb_sign_mech_t *mech) { - mech->mechanism = CKM_MD5; - mech->pParameter = NULL; - mech->ulParameterLen = 0; - return (0); + return (find_mech(mech, CKM_MD5)); } /* @@ -93,13 +117,13 @@ smb_md5_final(smb_sign_ctx_t ctx, uint8_t *digest16) * (getmech, init, update, final) */ +/* + * Find out if we have this mech. + */ int smb2_hmac_getmech(smb_sign_mech_t *mech) { - mech->mechanism = CKM_SHA256_HMAC; - mech->pParameter = NULL; - mech->ulParameterLen = 0; - return (0); + return (find_mech(mech, CKM_SHA256_HMAC)); } /* @@ -161,3 +185,79 @@ smb2_hmac_final(smb_sign_ctx_t ctx, uint8_t *digest16) return (rv == CKR_OK ? 0 : -1); } + +/* + * SMB3 signing helpers: + * (getmech, init, update, final) + */ + +/* + * Find out if we have this mech. + */ +int +smb3_cmac_getmech(smb_sign_mech_t *mech) +{ + return (find_mech(mech, CKM_AES_CMAC)); +} + +/* + * Start PKCS#11 session, load the key. + */ +int +smb3_cmac_init(smb_sign_ctx_t *ctxp, smb_sign_mech_t *mech, + uint8_t *key, size_t key_len) +{ + CK_OBJECT_HANDLE hkey = 0; + CK_RV rv; + + rv = SUNW_C_GetMechSession(mech->mechanism, ctxp); + if (rv != CKR_OK) + return (-1); + + rv = SUNW_C_KeyToObject(*ctxp, mech->mechanism, + key, key_len, &hkey); + if (rv != CKR_OK) { + (void) C_CloseSession(*ctxp); + return (-1); + } + + rv = C_SignInit(*ctxp, mech, hkey); + (void) C_DestroyObject(*ctxp, hkey); + if (rv != CKR_OK) { + (void) C_CloseSession(*ctxp); + return (-1); + } + + return (0); +} + +/* + * Digest one segment + */ +int +smb3_cmac_update(smb_sign_ctx_t ctx, uint8_t *in, size_t len) +{ + CK_RV rv; + + rv = C_SignUpdate(ctx, in, len); + if (rv != CKR_OK) + (void) C_CloseSession(ctx); + + return (rv == CKR_OK ? 0 : -1); +} + +/* + * Note, the SMB2 signature is just the AES CMAC digest. + * (both are 16 bytes long) + */ +int +smb3_cmac_final(smb_sign_ctx_t ctx, uint8_t *digest) +{ + CK_ULONG len = SMB2_SIG_SIZE; + CK_RV rv; + + rv = C_SignFinal(ctx, digest, &len); + (void) C_CloseSession(ctx); + + return (rv == CKR_OK ? 0 : -1); +} diff --git a/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_sign_pkcs.c b/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_sign_pkcs.c index e018c6eb0c..f0eb05093c 100644 --- a/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_sign_pkcs.c +++ b/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_sign_pkcs.c @@ -215,13 +215,19 @@ smb3_cmac_init(smb_sign_ctx_t *ctxp, smb_crypto_mech_t *mech, rv = SUNW_C_KeyToObject(*ctxp, mech->mechanism, key, key_len, &hkey); - if (rv != CKR_OK) + if (rv != CKR_OK) { + (void) C_CloseSession(*ctxp); return (-1); + } rv = C_SignInit(*ctxp, mech, hkey); (void) C_DestroyObject(*ctxp, hkey); + if (rv != CKR_OK) { + (void) C_CloseSession(*ctxp); + return (-1); + } - return (rv == CKR_OK ? 0 : -1); + return (0); } /* diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 78894e23f2..2b614cd7cd 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -944,7 +944,7 @@ PPPT_OBJS += alua_ic_if.o pppt.o pppt_msg.o pppt_tgt.o STMF_OBJS += lun_map.o stmf.o -STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o sbd_zvol.o +STMF_SBD_OBJS += sbd.o sbd_scsi.o sbd_pgr.o sbd_zvol.o ats_copy_mgr.o SYSMSG_OBJS += sysmsg.o diff --git a/usr/src/uts/common/fs/doorfs/door_sys.c b/usr/src/uts/common/fs/doorfs/door_sys.c index f2c22ec249..68a7a11d82 100644 --- a/usr/src/uts/common/fs/doorfs/door_sys.c +++ b/usr/src/uts/common/fs/doorfs/door_sys.c @@ -81,7 +81,7 @@ size_t door_max_arg = 16 * 1024; * door_upcall. Need to guard against a process returning huge amounts * of data and getting the kernel stuck in kmem_alloc. */ -size_t door_max_upcall_reply = 1024 * 1024; +size_t door_max_upcall_reply = 4 * 1024 * 1024; /* * Maximum number of descriptors allowed to be passed in a single @@ -2725,7 +2725,7 @@ door_translate_out(void) */ static int door_results(kthread_t *caller, caddr_t data_ptr, size_t data_size, - door_desc_t *desc_ptr, uint_t desc_num) + door_desc_t *desc_ptr, uint_t desc_num) { door_client_t *ct = DOOR_CLIENT(caller->t_door); door_upcall_t *dup = ct->d_upcall; diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/nsmb_sign_kcf.c b/usr/src/uts/common/fs/smbclnt/netsmb/nsmb_sign_kcf.c index 2be033a8fc..4235c94a06 100644 --- a/usr/src/uts/common/fs/smbclnt/netsmb/nsmb_sign_kcf.c +++ b/usr/src/uts/common/fs/smbclnt/netsmb/nsmb_sign_kcf.c @@ -29,23 +29,34 @@ #include <netsmb/smb_signing.h> /* - * SMB1 signing helpers: - * (getmech, init, update, final) + * Common function to see if a mech is available. */ - -int -smb_md5_getmech(smb_sign_mech_t *mech) +static int +find_mech(smb_sign_mech_t *mech, crypto_mech_name_t name) { crypto_mech_type_t t; - t = crypto_mech2id(SUN_CKM_MD5); - if (t == CRYPTO_MECH_INVALID) + t = crypto_mech2id(name); + if (t == CRYPTO_MECH_INVALID) { + cmn_err(CE_NOTE, "nsmb: no kcf mech: %s", name); return (-1); + } mech->cm_type = t; return (0); } /* + * SMB1 signing helpers: + * (getmech, init, update, final) + */ + +int +smb_md5_getmech(smb_sign_mech_t *mech) +{ + return (find_mech(mech, SUN_CKM_MD5)); +} + +/* * Start the KCF session, load the key */ int @@ -75,7 +86,12 @@ smb_md5_update(smb_sign_ctx_t ctx, void *buf, size_t len) rv = crypto_digest_update(ctx, &data, 0); - return (rv == CRYPTO_SUCCESS ? 0 : -1); + if (rv != CRYPTO_SUCCESS) { + crypto_cancel_ctx(ctx); + return (-1); + } + + return (0); } /* @@ -106,13 +122,7 @@ smb_md5_final(smb_sign_ctx_t ctx, uint8_t *digest16) int smb2_hmac_getmech(smb_sign_mech_t *mech) { - crypto_mech_type_t t; - - t = crypto_mech2id(SUN_CKM_SHA256_HMAC); - if (t == CRYPTO_MECH_INVALID) - return (-1); - mech->cm_type = t; - return (0); + return (find_mech(mech, SUN_CKM_SHA256_HMAC)); } /* @@ -152,7 +162,12 @@ smb2_hmac_update(smb_sign_ctx_t ctx, uint8_t *in, size_t len) rv = crypto_mac_update(ctx, &data, 0); - return (rv == CRYPTO_SUCCESS ? 0 : -1); + if (rv != CRYPTO_SUCCESS) { + crypto_cancel_ctx(ctx); + return (-1); + } + + return (0); } /* @@ -178,3 +193,80 @@ smb2_hmac_final(smb_sign_ctx_t ctx, uint8_t *digest16) return (rv == CRYPTO_SUCCESS ? 0 : -1); } + +/* + * SMB3 signing helpers: + * (getmech, init, update, final) + */ + +int +smb3_cmac_getmech(smb_sign_mech_t *mech) +{ + return (find_mech(mech, SUN_CKM_AES_CMAC)); +} + +/* + * Start the KCF session, load the key + */ +int +smb3_cmac_init(smb_sign_ctx_t *ctxp, smb_sign_mech_t *mech, + uint8_t *key, size_t key_len) +{ + crypto_key_t ckey; + int rv; + + bzero(&ckey, sizeof (ckey)); + ckey.ck_format = CRYPTO_KEY_RAW; + ckey.ck_data = key; + ckey.ck_length = key_len * 8; /* in bits */ + + rv = crypto_mac_init(mech, &ckey, NULL, ctxp, NULL); + + return (rv == CRYPTO_SUCCESS ? 0 : -1); +} + +/* + * Digest one segment + */ +int +smb3_cmac_update(smb_sign_ctx_t ctx, uint8_t *in, size_t len) +{ + crypto_data_t data; + int rv; + + bzero(&data, sizeof (data)); + data.cd_format = CRYPTO_DATA_RAW; + data.cd_length = len; + data.cd_raw.iov_base = (void *)in; + data.cd_raw.iov_len = len; + + rv = crypto_mac_update(ctx, &data, 0); + + if (rv != CRYPTO_SUCCESS) { + crypto_cancel_ctx(ctx); + return (-1); + } + + return (0); +} + +/* + * Note, the SMB2 signature is just the AES CMAC digest. + * (both are 16 bytes long) + */ +int +smb3_cmac_final(smb_sign_ctx_t ctx, uint8_t *digest16) +{ + crypto_data_t out; + int rv; + + bzero(&out, sizeof (out)); + out.cd_format = CRYPTO_DATA_RAW; + out.cd_length = SMB2_SIG_SIZE; + out.cd_raw.iov_len = SMB2_SIG_SIZE; + out.cd_raw.iov_base = (void *)digest16; + + rv = crypto_mac_final(ctx, &out, 0); + + return (rv == CRYPTO_SUCCESS ? 0 : -1); +} diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/smb2_sign.c b/usr/src/uts/common/fs/smbclnt/netsmb/smb2_sign.c index 46bf28c370..f7f20bdb5a 100644 --- a/usr/src/uts/common/fs/smbclnt/netsmb/smb2_sign.c +++ b/usr/src/uts/common/fs/smbclnt/netsmb/smb2_sign.c @@ -54,10 +54,45 @@ #define SMB2_SIG_OFF 48 #define SMB2_SIG_LEN 16 +typedef struct smb_mac_ops { + int (*mac_init)(smb_sign_ctx_t *, smb_sign_mech_t *, + uint8_t *, size_t); + int (*mac_update)(smb_sign_ctx_t, uint8_t *, size_t); + int (*mac_final)(smb_sign_ctx_t, uint8_t *); +} smb_mac_ops_t; + +static smb_mac_ops_t +smb2_sign_ops = { + smb2_hmac_init, + smb2_hmac_update, + smb2_hmac_final +}; + +static struct smb_mac_ops +smb3_sign_ops = { + smb3_cmac_init, + smb3_cmac_update, + smb3_cmac_final +}; + +/* + * Input to KDF for SigningKey. + * See comment for smb3_do_kdf for content. + */ +static uint8_t sign_kdf_input[29] = { + 0, 0, 0, 1, 'S', 'M', 'B', '2', + 'A', 'E', 'S', 'C', 'M', 'A', 'C', 0, + 0, 'S', 'm', 'b', 'S', 'i', 'g', 'n', + 0, 0, 0, 0, 0x80 }; + +int smb3_do_kdf(void *outbuf, size_t outbuf_len, + void *input, size_t input_len, + uint8_t *key, uint32_t key_len); + /* * smb2_sign_init * - * Get the mechanism info and initilize SMB2 signing. + * Get the mechanism info and initilize SMB2 or SMB3 signing. */ int smb2_sign_init(smb_vc_t *vcp) @@ -68,31 +103,103 @@ smb2_sign_init(smb_vc_t *vcp) ASSERT(vcp->vc_ssnkey != NULL); ASSERT(vcp->vc_mackey == NULL); - rc = smb2_hmac_getmech(&vcp->vc_signmech); - if (rc != 0) { - cmn_err(CE_NOTE, "smb2 can't get signing mechanism"); + if (SMB_DIALECT(vcp) < SMB2_DIALECT_0300) + rc = smb2_hmac_getmech(&vcp->vc_signmech); + else + rc = smb3_cmac_getmech(&vcp->vc_signmech); + if (rc != 0) return (EAUTH); - } /* * Convert the session key to the MAC key. * * For SMB2, the signing key is just the first 16 bytes * of the session key (truncated or padded with zeros). - * [MS-SMB2] 3.2.5.3.1 - * - * SMB3 would do KDF here. + * For SMB3, the signing key is a "KDF" hash of the + * session key. [MS-SMB2] 3.2.5.3.1 */ vcp->vc_mackeylen = SMB2_SIG_LEN; vcp->vc_mackey = kmem_zalloc(vcp->vc_mackeylen, KM_SLEEP); - copysize = vcp->vc_ssnkeylen; - if (copysize > vcp->vc_mackeylen) - copysize = vcp->vc_mackeylen; - bcopy(vcp->vc_ssnkey, vcp->vc_mackey, copysize); + if (SMB_DIALECT(vcp) < SMB2_DIALECT_0300) { + copysize = vcp->vc_ssnkeylen; + if (copysize > vcp->vc_mackeylen) + copysize = vcp->vc_mackeylen; + bcopy(vcp->vc_ssnkey, vcp->vc_mackey, copysize); + + vcp->vc_sign_ops = &smb2_sign_ops; + } else { + rc = smb3_do_kdf(vcp->vc_mackey, vcp->vc_mackeylen, + sign_kdf_input, sizeof (sign_kdf_input), + vcp->vc_ssnkey, vcp->vc_ssnkeylen); + if (rc != 0) + return (EAUTH); + vcp->vc_sign_ops = &smb3_sign_ops; + } return (0); } +/* + * Derive SMB3 key as described in [MS-SMB2] 3.1.4.2 + * and [NIST SP800-108] + * + * r = 32, L = 128, PRF = HMAC-SHA256, key = (session key) + * + * Note that these describe pre-3.1.1 inputs. + * + * Session.SigningKey for binding a session: + * - Session.SessionKey as K1 + * - label = SMB2AESCMAC (size 12) + * - context = SmbSign (size 8) + * Channel.SigningKey for for all other requests + * - if SMB2_SESSION_FLAG_BINDING, GSS key (in Session.SessionKey?) as K1; + * - otherwise, Session.SessionKey as K1 + * - label = SMB2AESCMAC (size 12) + * - context = SmbSign (size 8) + * Session.ApplicationKey for ... (not sure what yet) + * - Session.SessionKey as K1 + * - label = SMB2APP (size 8) + * - context = SmbRpc (size 7) + * Session.EncryptionKey for encrypting server messages + * - Session.SessionKey as K1 + * - label = "SMB2AESCCM" (size 11) + * - context = "ServerOut" (size 10) + * Session.DecryptionKey for decrypting client requests + * - Session.SessionKey as K1 + * - label = "SMB2AESCCM" (size 11) + * - context = "ServerIn " (size 10) (Note the space) + */ +int +smb3_do_kdf(void *outbuf, size_t outbuf_len, + void *input, size_t input_len, + uint8_t *key, uint32_t key_len) +{ + uint8_t digest32[SHA256_DIGEST_LENGTH]; + smb_sign_mech_t mech; + smb_sign_ctx_t hctx = 0; + int rc; + + bzero(&mech, sizeof (mech)); + if ((rc = smb2_hmac_getmech(&mech)) != 0) + return (rc); + + /* Limit the SessionKey input to its maximum size (16 bytes) */ + if (key_len > SMB2_SIG_SIZE) + key_len = SMB2_SIG_SIZE; + rc = smb2_hmac_init(&hctx, &mech, key, key_len); + if (rc != 0) + return (rc); + + if ((rc = smb2_hmac_update(hctx, input, input_len)) != 0) + return (rc); + + if ((rc = smb2_hmac_final(hctx, digest32)) != 0) + return (rc); + + /* Output is first 16 bytes of digest. */ + bcopy(digest32, outbuf, outbuf_len); + return (0); +} /* * Compute MAC signature of packet data, using the stored MAC key. @@ -109,14 +216,17 @@ smb2_compute_MAC(struct smb_vc *vcp, mblk_t *mp, uchar_t *signature) { uint8_t tmp_hdr[SMB2_HDR_SIZE]; smb_sign_ctx_t ctx = 0; + smb_mac_ops_t *ops; mblk_t *m = mp; int size; int rc; if (vcp->vc_mackey == NULL) return (-1); + if ((ops = vcp->vc_sign_ops) == NULL) + return (-1); - rc = smb2_hmac_init(&ctx, &vcp->vc_signmech, + rc = ops->mac_init(&ctx, &vcp->vc_signmech, vcp->vc_mackey, vcp->vc_mackeylen); if (rc != 0) return (rc); @@ -131,7 +241,7 @@ smb2_compute_MAC(struct smb_vc *vcp, mblk_t *mp, uchar_t *signature) size = SMB2_HDRLEN; bcopy(m->b_rptr, tmp_hdr, size); bzero(tmp_hdr + SMB2_SIG_OFF, SMB2_SIG_LEN); - rc = smb2_hmac_update(ctx, tmp_hdr, size); + rc = ops->mac_update(ctx, tmp_hdr, size); if (rc != 0) return (rc); @@ -140,7 +250,7 @@ smb2_compute_MAC(struct smb_vc *vcp, mblk_t *mp, uchar_t *signature) * the data just after the SMB2 header. */ size = MBLKL(m) - SMB2_HDRLEN; - rc = smb2_hmac_update(ctx, m->b_rptr + SMB2_HDRLEN, size); + rc = ops->mac_update(ctx, m->b_rptr + SMB2_HDRLEN, size); if (rc != 0) return (rc); m = m->b_cont; @@ -149,13 +259,13 @@ smb2_compute_MAC(struct smb_vc *vcp, mblk_t *mp, uchar_t *signature) while (m != NULL) { size = MBLKL(m); if (size > 0) { - rc = smb2_hmac_update(ctx, m->b_rptr, size); + rc = ops->mac_update(ctx, m->b_rptr, size); if (rc != 0) return (rc); } m = m->b_cont; } - rc = smb2_hmac_final(ctx, signature); + rc = ops->mac_final(ctx, signature); return (rc); } diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/smb2_smb.c b/usr/src/uts/common/fs/smbclnt/netsmb/smb2_smb.c index c3df18faa9..52d8661b7a 100644 --- a/usr/src/uts/common/fs/smbclnt/netsmb/smb2_smb.c +++ b/usr/src/uts/common/fs/smbclnt/netsmb/smb2_smb.c @@ -50,9 +50,15 @@ #include <netsmb/smb_rq.h> #include <netsmb/smb2_rq.h> -#define NDIALECTS 1 -static const uint16_t smb2_dialects[1] = { - SMB2_DIALECT_0210 +/* + * Supported dialects. Keep sorted by number because of how the + * vc_maxver check below may truncate this list. + */ +#define NDIALECTS 3 +static const uint16_t smb2_dialects[NDIALECTS] = { + SMB2_DIALECT_0210, + SMB2_DIALECT_0300, + SMB2_DIALECT_0302, }; /* Optional capabilities we advertise (none yet). */ @@ -113,13 +119,13 @@ smb2_parse_smb1nego_resp(struct smb_rq *rqp) md_get_uint16le(mdp, &sp->sv2_security_mode); /* Get Dialect. */ - error = md_get_uint16le(mdp, &sp->sv2_dialect); + error = md_get_uint16le(mdp, &sp->sv_proto); if (error != 0) return (error); /* What dialect did we get? */ - if (sp->sv2_dialect != SMB2_DIALECT_02ff) { - SMBERROR("Unknown dialect 0x%x\n", sp->sv2_dialect); + if (sp->sv_proto != SMB2_DIALECT_02ff) { + SMBERROR("Unknown dialect 0x%x\n", sp->sv_proto); return (EINVAL); } /* Set our (internal) SMB1 dialect also. */ @@ -148,6 +154,7 @@ smb2_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred) struct smb_rq *rqp = NULL; struct mbchain *mbp = NULL; struct mdchain *mdp = NULL; + uint16_t *ndialects_p; uint16_t ndialects = NDIALECTS; boolean_t will_sign = B_FALSE; uint16_t length = 0; @@ -174,15 +181,18 @@ smb2_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred) */ smb_rq_getrequest(rqp, &mbp); mb_put_uint16le(mbp, 36); /* Struct Size */ - mb_put_uint16le(mbp, ndialects); /* Dialect Count */ + ndialects_p = mb_reserve(mbp, 2); /* Dialect Count */ mb_put_uint16le(mbp, security_mode); mb_put_uint16le(mbp, 0); /* Reserved */ mb_put_uint32le(mbp, smb2_clnt_caps); mb_put_mem(mbp, vcp->vc_cl_guid, 16, MB_MSYSTEM); mb_put_uint64le(mbp, 0); /* Start Time */ for (i = 0; i < ndialects; i++) { /* Dialects */ + if (smb2_dialects[i] > vcp->vc_maxver) + break; mb_put_uint16le(mbp, smb2_dialects[i]); } + *ndialects_p = htoles(i); /* * Do the OTW call. @@ -209,7 +219,7 @@ smb2_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred) } md_get_uint16le(mdp, &sp->sv2_security_mode); - md_get_uint16le(mdp, &sp->sv2_dialect); + md_get_uint16le(mdp, &sp->sv_proto); /* dialect */ md_get_uint16le(mdp, NULL); /* reserved */ md_get_mem(mdp, sp->sv2_guid, 16, MB_MSYSTEM); md_get_uint32le(mdp, &sp->sv2_capabilities); @@ -254,7 +264,7 @@ smb2_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred) } wk->wk_u_auth_rlen = sec_buf_len; err = md_get_mem(mdp, wk->wk_u_auth_rbuf.lp_ptr, - sec_buf_len, MB_MUSER); + sec_buf_len, MB_MUSER); if (err) { goto errout; } @@ -472,7 +482,7 @@ smb2_smb_ssnsetup(struct smb_vc *vcp, struct smb_cred *scred) } wk->wk_u_auth_rlen = sec_buf_len; err = md_get_mem(mdp, wk->wk_u_auth_rbuf.lp_ptr, - sec_buf_len, MB_MUSER); + sec_buf_len, MB_MUSER); if (err != 0) { ret = err; goto out; @@ -813,7 +823,7 @@ smb2_smb_ntcreate( mb_put_uint16le(mbp, StructSize); mb_put_uint8(mbp, 0); /* Security flags */ mb_put_uint8(mbp, SMB2_OPLOCK_LEVEL_NONE); /* Oplock level */ - mb_put_uint32le(mbp, impersonate); /* Impersonation Level */ + mb_put_uint32le(mbp, impersonate); /* Impersonation Level */ mb_put_uint64le(mbp, cr_flags); mb_put_uint64le(mbp, 0); /* Reserved */ mb_put_uint32le(mbp, req_acc); diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/smb_conn.h b/usr/src/uts/common/fs/smbclnt/netsmb/smb_conn.h index d0a8a1dca0..b88e32d781 100644 --- a/usr/src/uts/common/fs/smbclnt/netsmb/smb_conn.h +++ b/usr/src/uts/common/fs/smbclnt/netsmb/smb_conn.h @@ -70,7 +70,7 @@ typedef struct smb_cred { #define SMBV_EXT_SEC 0x0080 /* conn to use extended security */ #define SMBV_SIGNING 0x0100 /* negotiated signing */ #define SMBV_SMB2 0x0200 /* VC using SMB 2 or 3 */ -#define SMBV_HAS_FILEIDS 0x0400 /* Use File IDs for hash and inode numbers */ +#define SMBV_HAS_FILEIDS 0x0400 /* Use File IDs for hash and inums */ #define SMBV_NO_WRITE_THRU 0x0800 /* Can't use ... */ /* @@ -165,7 +165,7 @@ enum smbco_level { * SMB1 Negotiated protocol parameters */ struct smb_sopt { - int16_t sv_proto; /* protocol dialect */ + uint16_t sv_proto; /* protocol dialect */ uchar_t sv_sm; /* security mode */ int16_t sv_tz; /* offset in min relative to UTC */ uint16_t sv_maxmux; /* max number of outstanding rq's */ @@ -178,13 +178,12 @@ struct smb_sopt { /* SMB2+ fields */ uint32_t sv2_sessflags; /* final session setup reply flags */ - uint16_t sv2_dialect; /* dialect (non zero for SMB 2/3 */ uint32_t sv2_capabilities; /* capabilities */ uint32_t sv2_maxtransact; /* max transact size */ uint32_t sv2_maxread; /* max read size */ uint32_t sv2_maxwrite; /* max write size */ - uint8_t sv2_guid[16]; /* GUID */ uint16_t sv2_security_mode; /* security mode */ + uint8_t sv2_guid[16]; /* GUID */ }; typedef struct smb_sopt smb_sopt_t; @@ -226,6 +225,7 @@ typedef struct smb_vc { uint8_t *vc_mackey; /* MAC key buffer */ uint8_t *vc_ssnkey; /* session key buffer */ smb_sign_mech_t vc_signmech; + struct smb_mac_ops *vc_sign_ops; struct smb_tran_desc *vc_tdesc; /* transport ops. vector */ void *vc_tdata; /* transport control block */ diff --git a/usr/src/uts/common/fs/smbclnt/netsmb/smb_signing.h b/usr/src/uts/common/fs/smbclnt/netsmb/smb_signing.h index e1799e3383..81217d5182 100644 --- a/usr/src/uts/common/fs/smbclnt/netsmb/smb_signing.h +++ b/usr/src/uts/common/fs/smbclnt/netsmb/smb_signing.h @@ -48,6 +48,8 @@ typedef CK_MECHANISM smb_sign_mech_t; typedef CK_SESSION_HANDLE smb_sign_ctx_t; #endif /* _KERNEL */ +struct smb_mac_ops; + /* * SMB signing routines used in smb_signing.c */ @@ -66,6 +68,11 @@ int smb2_hmac_init(smb_sign_ctx_t *, smb_sign_mech_t *, uint8_t *, size_t); int smb2_hmac_update(smb_sign_ctx_t, uint8_t *, size_t); int smb2_hmac_final(smb_sign_ctx_t, uint8_t *); +int smb3_cmac_getmech(smb_sign_mech_t *); +int smb3_cmac_init(smb_sign_ctx_t *, smb_sign_mech_t *, uint8_t *, size_t); +int smb3_cmac_update(smb_sign_ctx_t, uint8_t *, size_t); +int smb3_cmac_final(smb_sign_ctx_t, uint8_t *); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c new file mode 100644 index 0000000000..6623012fc8 --- /dev/null +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.c @@ -0,0 +1,944 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Nexenta Systems, Inc. All rights reserved. + */ + +#include <sys/conf.h> +#include <sys/file.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/modctl.h> +#include <sys/scsi/scsi.h> +#include <sys/scsi/impl/scsi_reset_notify.h> +#include <sys/scsi/generic/mode.h> +#include <sys/disp.h> +#include <sys/byteorder.h> +#include <sys/atomic.h> +#include <sys/sdt.h> +#include <sys/dkio.h> + +#include <sys/dmu.h> +#include <sys/txg.h> +#include <sys/refcount.h> +#include <sys/zvol.h> + +#include <sys/stmf.h> +#include <sys/lpif.h> +#include <sys/portif.h> +#include <sys/stmf_ioctl.h> +#include <sys/stmf_sbd_ioctl.h> + +#include "stmf_sbd.h" +#include "sbd_impl.h" + +/* ATS tuning parameters */ +#define OVERLAP_OFF 0 +#define OVERLAP_LOW 1 +#define OVERLAP_MEDIUM 2 +#define OVERLAP_HIGH 3 +uint8_t ats_overlap_check = OVERLAP_LOW; /* check for rw overlap with ATS */ + +uint8_t HardwareAcceleratedLocking = 1; /* 0 for disabled */ +uint8_t HardwareAcceleratedMove = 1; +uint64_t sbd_list_length = 0; + +#define SBD_ATS_MAX_NBLKS 32 +/* ATS routines. */ +uint8_t +sbd_ats_max_nblks(void) +{ + if (HardwareAcceleratedLocking == 0) + return (0); + return (SBD_ATS_MAX_NBLKS); +} + +#define is_overlapping(start1, len1, start2, len2) \ + ((start2) > (start1) ? ((start2) - (start1)) < (len1) : \ + ((start1) - (start2)) < (len2)) + +/*ARGSUSED*/ +static sbd_status_t +sbd_ats_do_handling_before_io(scsi_task_t *task, struct sbd_lu *sl, + uint64_t lba, uint64_t count, uint32_t flags) +{ + sbd_status_t ret = SBD_SUCCESS; + ats_state_t *ats_state, *ats_state_ret; + sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private; + uint8_t cdb0 = task->task_cdb[0]; + + if (scmd == NULL) + return (SBD_SUCCESS); + + if (HardwareAcceleratedLocking == 0) + return (SBD_SUCCESS); + /* + * if ATS overlap checking is disabled just return. The check + * is not done in the function to remove items from the list which + * allows this value to be changed at runtime. If it is turned on + * at runtime the remove will just start taking items off the list. + * If it is turned off at runtime the list is still cleaned up. + */ + if (ats_overlap_check == OVERLAP_OFF) + return (SBD_SUCCESS); + + /* overlap checking for compare and write only */ + if (ats_overlap_check == OVERLAP_LOW) { + if (cdb0 != SCMD_COMPARE_AND_WRITE) + return (SBD_SUCCESS); + } + + /* overlap checking for compare and write and write only */ + if (ats_overlap_check == OVERLAP_MEDIUM) { + if ((cdb0 != SCMD_COMPARE_AND_WRITE) && (cdb0 != SCMD_WRITE)) + return (SBD_SUCCESS); + } + + mutex_enter(&sl->sl_lock); + /* + * if the list is empty then just add the element to the list and + * return success. There is no overlap. This is done for every + * read, write or compare and write. + */ + if (list_is_empty(&sl->sl_ats_io_list)) { + goto done; + } + + /* + * There are inflight operations. As a result the list must be scanned + * and if there are any overlaps then SBD_BUSY should be returned. + * + * Duplicate reads and writes are allowed and kept on the list + * since there is no reason that overlapping IO operations should + * be delayed. + * + * A command that conflicts with a running compare and write will + * be rescheduled and rerun. This is handled by stmf_task_poll_lu. + * There is a possibility that a command can be starved and still + * return busy, which is valid in the SCSI protocol. + */ + + for (ats_state = list_head(&sl->sl_ats_io_list); ats_state != NULL; + ats_state = list_next(&sl->sl_ats_io_list, ats_state)) { + + if (is_overlapping(ats_state->as_cur_ats_lba, + ats_state->as_cur_ats_len, lba, count) == 0) + continue; + + /* if the task is already listed just return */ + if (task == ats_state->as_cur_ats_task) { + cmn_err(CE_WARN, "sbd_ats_handling_before_io: " + "task %p already on list", (void *) task); + ret = SBD_SUCCESS; + goto exit; + } + /* + * the current command is a compare and write, if there is any + * overlap return error + */ + + if ((cdb0 == SCMD_COMPARE_AND_WRITE) || + (ats_state->as_cmd == SCMD_COMPARE_AND_WRITE)) { + ret = SBD_BUSY; + goto exit; + } + } +done: + ats_state_ret = + (ats_state_t *)kmem_zalloc(sizeof (ats_state_t), KM_SLEEP); + ats_state_ret->as_cur_ats_lba = lba; + ats_state_ret->as_cur_ats_len = count; + ats_state_ret->as_cmd = cdb0; + ats_state_ret->as_cur_ats_task = task; + if (list_is_empty(&sl->sl_ats_io_list)) { + list_insert_head(&sl->sl_ats_io_list, ats_state_ret); + } else { + list_insert_tail(&sl->sl_ats_io_list, ats_state_ret); + } + scmd->flags |= SBD_SCSI_CMD_ATS_RELATED; + scmd->ats_state = ats_state; + sbd_list_length++; + mutex_exit(&sl->sl_lock); + return (SBD_SUCCESS); + +exit: + mutex_exit(&sl->sl_lock); + return (ret); +} + +sbd_status_t +sbd_ats_handling_before_io(scsi_task_t *task, struct sbd_lu *sl, + uint64_t lba, uint64_t count) +{ + return (sbd_ats_do_handling_before_io(task, sl, lba, count, 0)); +} + +void +sbd_ats_remove_by_task(scsi_task_t *task) +{ + ats_state_t *ats_state; + sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private; + sbd_cmd_t *scmd = task->task_lu_private; + + if (scmd == NULL) + return; + /* + * Scan the list and take the task off of the list. It is possible + * that the call is made in a situation where the task is not + * listed. That is a valid but unlikely case. If it happens + * just fall through and return. The list removal is done by + * task not LBA range and a task cannot be active for more than + * one command so there is never an issue about removing the + * wrong element. + */ + mutex_enter(&sl->sl_lock); + if (list_is_empty(&sl->sl_ats_io_list)) { + mutex_exit(&sl->sl_lock); + return; + } + + for (ats_state = list_head(&sl->sl_ats_io_list); ats_state != NULL; + ats_state = list_next(&sl->sl_ats_io_list, ats_state)) { + + if (ats_state->as_cur_ats_task == task) { + list_remove(&sl->sl_ats_io_list, ats_state); + kmem_free(ats_state, sizeof (ats_state_t)); + scmd->flags &= ~SBD_SCSI_CMD_ATS_RELATED; + scmd->ats_state = NULL; + sbd_list_length--; + break; + } + } + mutex_exit(&sl->sl_lock); +} + +static sbd_status_t +sbd_compare_and_write(struct scsi_task *task, sbd_cmd_t *scmd, + uint32_t *ret_off) +{ + sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private; + uint8_t *buf; + sbd_status_t ret; + uint64_t addr; + uint32_t len, i; + + addr = READ_SCSI64(&task->task_cdb[2], uint64_t); + len = (uint32_t)task->task_cdb[13]; + + addr <<= sl->sl_data_blocksize_shift; + len <<= sl->sl_data_blocksize_shift; + buf = kmem_alloc(len, KM_SLEEP); + ret = sbd_data_read(sl, task, addr, (uint64_t)len, buf); + if (ret != SBD_SUCCESS) { + goto compare_and_write_done; + } + /* + * Can't use bcmp here. We need mismatch offset. + */ + for (i = 0; i < len; i++) { + if (buf[i] != scmd->trans_data[i]) + break; + } + if (i != len) { + *ret_off = i; + ret = SBD_COMPARE_FAILED; + goto compare_and_write_done; + } + + ret = sbd_data_write(sl, task, addr, (uint64_t)len, + scmd->trans_data + len); + +compare_and_write_done: + kmem_free(buf, len); + return (ret); +} + +static void +sbd_send_miscompare_status(struct scsi_task *task, uint32_t miscompare_off) +{ + uint8_t sd[18]; + + task->task_scsi_status = STATUS_CHECK; + bzero(sd, 18); + sd[0] = 0xF0; + sd[2] = 0xe; + SCSI_WRITE32(&sd[3], miscompare_off); + sd[7] = 10; + sd[12] = 0x1D; + task->task_sense_data = sd; + task->task_sense_length = 18; + (void) stmf_send_scsi_status(task, STMF_IOF_LU_DONE); +} + +static void +sbd_ats_release_resources(struct scsi_task *task) +{ + sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private; + + /* + * a few basic check here to be sure that there are not multiple + * calls going on. If scmd is null just return. This is very + * unlikely, but could happed if the task is freed by an abort. + * If nbufs is invalid warn but ignore the error. Last if the + * trans_data is either null or the lenght is zero just blow + * off the operation and leak the memory buffer. + */ + if (scmd == NULL) + return; + + if (scmd->nbufs == 0xFF) + cmn_err(CE_WARN, "%s invalid buffer count %x", __func__, + scmd->nbufs); + + if ((scmd->trans_data != NULL) && (scmd->trans_data_len != 0)) + kmem_free(scmd->trans_data, scmd->trans_data_len); + + scmd->trans_data = NULL; /* force panic later if re-entered */ + scmd->trans_data_len = 0; + scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA; +} + +void +sbd_handle_ats_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, + struct stmf_data_buf *dbuf, uint8_t dbuf_reusable) +{ + uint64_t laddr; + uint32_t buflen, iolen, miscompare_off; + int ndx; + sbd_status_t ret; + + if (ATOMIC8_GET(scmd->nbufs) > 0) { + atomic_dec_8(&scmd->nbufs); + } + + if (dbuf->db_xfer_status != STMF_SUCCESS) { + sbd_ats_remove_by_task(task); + sbd_ats_release_resources(task); + stmf_abort(STMF_QUEUE_TASK_ABORT, task, + dbuf->db_xfer_status, NULL); + return; + } + + if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { + goto ATS_XFER_DONE; + } + + /* if state is confused drop the command */ + if ((scmd->trans_data == NULL) || + ((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) || + ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0)) { + scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + return; + } + + if (ATOMIC32_GET(scmd->len) != 0) { + /* + * Initiate the next port xfer to occur in parallel + * with writing this buf. A side effect of sbd_do_ats_xfer is + * it may set scmd_len to 0. This means all the data + * transfers have been started, not that they are done. + */ + sbd_do_ats_xfer(task, scmd, NULL, 0); + } + + /* + * move the most recent data transfer to the temporary buffer + * used for the compare and write function. + */ + laddr = dbuf->db_relative_offset; + for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) && + (ndx < dbuf->db_sglist_length); ndx++) { + iolen = min(dbuf->db_data_size - buflen, + dbuf->db_sglist[ndx].seg_length); + if (iolen == 0) + break; + bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr], + iolen); + buflen += iolen; + laddr += (uint64_t)iolen; + } + task->task_nbytes_transferred += buflen; + +ATS_XFER_DONE: + if (ATOMIC32_GET(scmd->len) == 0 || + scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { + stmf_free_dbuf(task, dbuf); + /* + * if this is not the last buffer to be transfered then exit + * and wait for the next buffer. Once nbufs is 0 then all the + * data has arrived and the compare can be done. + */ + if (ATOMIC8_GET(scmd->nbufs) > 0) + return; + scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { + sbd_ats_remove_by_task(task); + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_WRITE_ERROR); + } else { + ret = sbd_compare_and_write(task, scmd, + &miscompare_off); + sbd_ats_remove_by_task(task); + sbd_ats_release_resources(task); + if (ret != SBD_SUCCESS) { + if (ret != SBD_COMPARE_FAILED) { + stmf_scsilib_send_status(task, + STATUS_CHECK, STMF_SAA_WRITE_ERROR); + } else { + sbd_send_miscompare_status(task, + miscompare_off); + } + } else { + stmf_scsilib_send_status(task, STATUS_GOOD, 0); + } + } + return; + } + sbd_do_ats_xfer(task, scmd, dbuf, dbuf_reusable); +} + +void +sbd_do_ats_xfer(struct scsi_task *task, sbd_cmd_t *scmd, + struct stmf_data_buf *dbuf, uint8_t dbuf_reusable) +{ + uint32_t len; + + if (ATOMIC32_GET(scmd->len) == 0) { + if (dbuf != NULL) { + stmf_free_dbuf(task, dbuf); + } + return; + } + + if ((dbuf != NULL) && + ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) { + /* free current dbuf and allocate a new one */ + stmf_free_dbuf(task, dbuf); + dbuf = NULL; + } + if (dbuf == NULL) { + uint32_t maxsize, minsize, old_minsize; + + maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 : + ATOMIC32_GET(scmd->len); + minsize = maxsize >> 2; + do { + old_minsize = minsize; + dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0); + } while ((dbuf == NULL) && (old_minsize > minsize) && + (minsize >= 512)); + if (dbuf == NULL) { + if (ATOMIC8_GET(scmd->nbufs) == 0) { + sbd_ats_remove_by_task(task); + sbd_ats_release_resources(task); + stmf_abort(STMF_QUEUE_TASK_ABORT, task, + STMF_ALLOC_FAILURE, NULL); + } + return; + } + } + + len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size : + ATOMIC32_GET(scmd->len); + + dbuf->db_relative_offset = scmd->current_ro; + dbuf->db_data_size = len; + dbuf->db_flags = DB_DIRECTION_FROM_RPORT; + (void) stmf_xfer_data(task, dbuf, 0); + /* + * scmd->nbufs is the outstanding transfers + * scmd->len is the number of bytes that are remaing for requests + */ + atomic_inc_8(&scmd->nbufs); + atomic_add_32(&scmd->len, -len); + scmd->current_ro += len; +} + +void +sbd_handle_ats(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) +{ + sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private; + uint64_t addr, len; + sbd_cmd_t *scmd; + stmf_data_buf_t *dbuf; + uint8_t do_immediate_data = 0; + /* int ret; */ + + if (HardwareAcceleratedLocking == 0) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_INVALID_OPCODE); + return; + } + + task->task_cmd_xfer_length = 0; + if (task->task_additional_flags & + TASK_AF_NO_EXPECTED_XFER_LENGTH) { + task->task_expected_xfer_length = 0; + } + if (sl->sl_flags & SL_WRITE_PROTECTED) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_WRITE_PROTECTED); + return; + } + addr = READ_SCSI64(&task->task_cdb[2], uint64_t); + len = (uint64_t)task->task_cdb[13]; + + if ((task->task_cdb[1]) || (len > SBD_ATS_MAX_NBLKS)) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_INVALID_FIELD_IN_CDB); + return; + } + if (len == 0) { + stmf_scsilib_send_status(task, STATUS_GOOD, 0); + return; + } + + /* + * This can be called again. It will return the same handle again. + */ + if (sbd_ats_handling_before_io(task, sl, addr, len) != SBD_SUCCESS) { + if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) { + stmf_scsilib_send_status(task, STATUS_BUSY, 0); + } + return; + } + + addr <<= sl->sl_data_blocksize_shift; + len <<= sl->sl_data_blocksize_shift; + + task->task_cmd_xfer_length = len << 1; /* actual amt of data is 2x */ + if (task->task_additional_flags & + TASK_AF_NO_EXPECTED_XFER_LENGTH) { + task->task_expected_xfer_length = task->task_cmd_xfer_length; + } + if ((addr + len) > sl->sl_lu_size) { + sbd_ats_remove_by_task(task); + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_LBA_OUT_OF_RANGE); + return; + } + + len <<= 1; + + if (len != task->task_expected_xfer_length) { + sbd_ats_remove_by_task(task); + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_INVALID_FIELD_IN_CDB); + return; + } + + if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) { + if (initial_dbuf->db_data_size > len) { + if (initial_dbuf->db_data_size > + task->task_expected_xfer_length) { + /* protocol error */ + sbd_ats_remove_by_task(task); + stmf_abort(STMF_QUEUE_TASK_ABORT, task, + STMF_INVALID_ARG, NULL); + return; + } + ASSERT(len <= 0xFFFFFFFFull); + initial_dbuf->db_data_size = (uint32_t)len; + } + do_immediate_data = 1; + } + dbuf = initial_dbuf; + + if (task->task_lu_private) { + scmd = (sbd_cmd_t *)task->task_lu_private; + } else { + scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP); + task->task_lu_private = scmd; + } + + /* We dont set the ATS_RELATED flag here */ + scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA; + scmd->cmd_type = SBD_CMD_SCSI_WRITE; + scmd->nbufs = 0; + ASSERT(len <= 0xFFFFFFFFull); + scmd->len = (uint32_t)len; + scmd->trans_data_len = (uint32_t)len; + scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP); + scmd->current_ro = 0; + + if (do_immediate_data) { + /* + * Account for data passed in this write command + */ + (void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY); + atomic_add_32(&scmd->len, -dbuf->db_data_size); + scmd->current_ro += dbuf->db_data_size; + dbuf->db_xfer_status = STMF_SUCCESS; + sbd_handle_ats_xfer_completion(task, scmd, dbuf, 0); + } else { + sbd_do_ats_xfer(task, scmd, dbuf, 0); + } +} + +/* + * SCSI Copy Manager + * + * SCSI copy manager is the state machine which implements + * SCSI extended copy functionality (SPC). There is one + * cpmgr instance per extended copy command. + * + * Exported block-copy functions: + * cpmgr_create() - Creates the state machine. + * cpmgr_destroy() - Cleans up a completed cpmgr. + * cpmgr_run() - Performs time bound copy. + * cpmgr_abort() - Aborts a cpmgr(if not already completed). + * cpmgr_done() - Tests if the copy is done. + */ + +static void cpmgr_completion_cleanup(cpmgr_t *cm); +int sbd_check_reservation_conflict(sbd_lu_t *sl, scsi_task_t *task); + +static uint8_t sbd_recv_copy_results_op_params[] = { + 0, 0, 0, 42, 1, 0, 0, 0, + 0, 2, 0, 1, 0, 0, 0xFF, 0xFF, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0xFF, 0xFF, 0, 9, 0, 0, 0, 0, 0, + 2, 2, 0xE4 +}; + +cpmgr_handle_t +cpmgr_create(scsi_task_t *task, uint8_t *params) +{ + cpmgr_t *cm = NULL; + uint8_t *p; + uint32_t plist_len; + uint32_t dbl; + int i; + uint16_t tdlen; + uint16_t n; + + cm = kmem_zalloc(sizeof (*cm), KM_NOSLEEP); + if (cm == NULL) + return (CPMGR_INVALID_HANDLE); + + cm->cm_task = task; + p = task->task_cdb; + plist_len = READ_SCSI32(&p[10], uint32_t); + + /* + * In case of error. Otherwise we will change this to CM_COPYING. + */ + cm->cm_state = CM_COMPLETE; + + if (plist_len == 0) { + cm->cm_status = 0; + goto cpmgr_create_done; + } + + if (plist_len < CPMGR_PARAM_HDR_LEN) { + cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR; + goto cpmgr_create_done; + } else if ((params[0] != 0) || ((params[1] & 0x18) != 0x18)) { + /* + * Current implementation does not allow the use + * of list ID field. + */ + cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST; + goto cpmgr_create_done; + } + /* No inline data either */ + if (*((uint32_t *)(¶ms[12])) != 0) { + cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST; + goto cpmgr_create_done; + } + + tdlen = READ_SCSI16(¶ms[2], uint16_t); + if ((tdlen == 0) || (tdlen % CPMGR_TARGET_DESCRIPTOR_SIZE) || + (plist_len < (CPMGR_PARAM_HDR_LEN + tdlen))) { + cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR; + goto cpmgr_create_done; + } + cm->cm_td_count = tdlen / CPMGR_TARGET_DESCRIPTOR_SIZE; + if (cm->cm_td_count > CPMGR_MAX_TARGET_DESCRIPTORS) { + cm->cm_status = CPMGR_TOO_MANY_TARGET_DESCRIPTORS; + goto cpmgr_create_done; + } + if (plist_len != (CPMGR_PARAM_HDR_LEN + tdlen + + CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE)) { + cm->cm_status = CPMGR_PARAM_LIST_LEN_ERROR; + goto cpmgr_create_done; + } + for (i = 0; i < cm->cm_td_count; i++) { + p = params + CPMGR_PARAM_HDR_LEN; + p += i * CPMGR_TARGET_DESCRIPTOR_SIZE; + if ((p[0] != CPMGR_IDENT_TARGET_DESCRIPTOR) || + ((p[5] & 0x30) != 0) || (p[7] != 16)) { + cm->cm_status = CPMGR_UNSUPPORTED_TARGET_DESCRIPTOR; + goto cpmgr_create_done; + } + /* + * stmf should be able to find this LU and lock it. Also + * make sure that is indeed a sbd lu. + */ + if (((cm->cm_tds[i].td_lu = + stmf_check_and_hold_lu(task, &p[8])) == NULL) || + (!sbd_is_valid_lu(cm->cm_tds[i].td_lu))) { + cm->cm_status = CPMGR_COPY_TARGET_NOT_REACHABLE; + goto cpmgr_create_done; + } + dbl = p[29]; + dbl <<= 8; + dbl |= p[30]; + dbl <<= 8; + dbl |= p[31]; + cm->cm_tds[i].td_disk_block_len = dbl; + cm->cm_tds[i].td_lbasize_shift = + sbd_get_lbasize_shift(cm->cm_tds[i].td_lu); + } + /* p now points to segment descriptor */ + p += CPMGR_TARGET_DESCRIPTOR_SIZE; + + if (p[0] != CPMGR_B2B_SEGMENT_DESCRIPTOR) { + cm->cm_status = CPMGR_UNSUPPORTED_SEGMENT_DESCRIPTOR; + goto cpmgr_create_done; + } + n = READ_SCSI16(&p[2], uint16_t); + if (n != (CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE - 4)) { + cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST; + goto cpmgr_create_done; + } + + n = READ_SCSI16(&p[4], uint16_t); + if (n >= cm->cm_td_count) { + cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST; + goto cpmgr_create_done; + } + cm->cm_src_td_ndx = n; + + n = READ_SCSI16(&p[6], uint16_t); + if (n >= cm->cm_td_count) { + cm->cm_status = CPMGR_INVALID_FIELD_IN_PARAM_LIST; + goto cpmgr_create_done; + } + cm->cm_dst_td_ndx = n; + + cm->cm_copy_size = READ_SCSI16(&p[10], uint64_t); + cm->cm_copy_size *= (uint64_t)(cm->cm_tds[(p[1] & 2) ? + cm->cm_dst_td_ndx : cm->cm_src_td_ndx].td_disk_block_len); + cm->cm_src_offset = (READ_SCSI64(&p[12], uint64_t)) << + cm->cm_tds[cm->cm_src_td_ndx].td_lbasize_shift; + cm->cm_dst_offset = (READ_SCSI64(&p[20], uint64_t)) << + cm->cm_tds[cm->cm_dst_td_ndx].td_lbasize_shift; + + /* Allocate the xfer buffer. */ + cm->cm_xfer_buf = kmem_alloc(CPMGR_XFER_BUF_SIZE, KM_NOSLEEP); + if (cm->cm_xfer_buf == NULL) { + cm->cm_status = CPMGR_INSUFFICIENT_RESOURCES; + goto cpmgr_create_done; + } + + /* + * No need to check block limits. cpmgr_run() will + * take care of that. + */ + + /* All checks passed */ + cm->cm_state = CM_COPYING; + +cpmgr_create_done: + if (cm->cm_state == CM_COMPLETE) { + cpmgr_completion_cleanup(cm); + } + return (cm); +} + +void +cpmgr_destroy(cpmgr_handle_t h) +{ + cpmgr_t *cm = (cpmgr_t *)h; + + ASSERT(cm->cm_state == CM_COMPLETE); + kmem_free(cm, sizeof (*cm)); +} + +static void +cpmgr_completion_cleanup(cpmgr_t *cm) +{ + int i; + + for (i = 0; i < cm->cm_td_count; i++) { + if (cm->cm_tds[i].td_lu) { + stmf_release_lu(cm->cm_tds[i].td_lu); + cm->cm_tds[i].td_lu = NULL; + } + } + if (cm->cm_xfer_buf) { + kmem_free(cm->cm_xfer_buf, CPMGR_XFER_BUF_SIZE); + cm->cm_xfer_buf = NULL; + } +} + +void +cpmgr_run(cpmgr_t *cm, clock_t preemption_point) +{ + stmf_lu_t *lu; + sbd_lu_t *src_slu, *dst_slu; + uint64_t xfer_size, start, end; + sbd_status_t ret; + + /* + * XXX: Handle reservations and read-only LU here. + */ + ASSERT(cm->cm_state == CM_COPYING); + lu = cm->cm_tds[cm->cm_src_td_ndx].td_lu; + src_slu = (sbd_lu_t *)lu->lu_provider_private; + if (sbd_check_reservation_conflict(src_slu, cm->cm_task)) { + cpmgr_abort(cm, CPMGR_RESERVATION_CONFLICT); + return; + } + + lu = cm->cm_tds[cm->cm_dst_td_ndx].td_lu; + dst_slu = (sbd_lu_t *)lu->lu_provider_private; + if (sbd_check_reservation_conflict(dst_slu, cm->cm_task)) { + cpmgr_abort(cm, CPMGR_RESERVATION_CONFLICT); + return; + } + if (dst_slu->sl_flags & SL_WRITE_PROTECTED) { + cpmgr_abort(cm, STMF_SAA_WRITE_PROTECTED); + return; + } + + while (cm->cm_size_done < cm->cm_copy_size) { + xfer_size = ((cm->cm_copy_size - cm->cm_size_done) > + CPMGR_XFER_BUF_SIZE) ? CPMGR_XFER_BUF_SIZE : + (cm->cm_copy_size - cm->cm_size_done); + start = cm->cm_src_offset + cm->cm_size_done; + ret = sbd_data_read(src_slu, cm->cm_task, start, xfer_size, + cm->cm_xfer_buf); + if (ret != SBD_SUCCESS) { + if (ret == SBD_IO_PAST_EOF) { + cpmgr_abort(cm, CPMGR_LBA_OUT_OF_RANGE); + } else { + cpmgr_abort(cm, + CPMGR_THIRD_PARTY_DEVICE_FAILURE); + } + break; + } + end = cm->cm_dst_offset + cm->cm_size_done; + ret = sbd_data_write(dst_slu, cm->cm_task, end, xfer_size, + cm->cm_xfer_buf); + if (ret != SBD_SUCCESS) { + if (ret == SBD_IO_PAST_EOF) { + cpmgr_abort(cm, CPMGR_LBA_OUT_OF_RANGE); + } else { + cpmgr_abort(cm, + CPMGR_THIRD_PARTY_DEVICE_FAILURE); + } + break; + } + cm->cm_size_done += xfer_size; + if (ddi_get_lbolt() >= preemption_point) + break; + } + if (cm->cm_size_done == cm->cm_copy_size) { + cm->cm_state = CM_COMPLETE; + cm->cm_status = 0; + cpmgr_completion_cleanup(cm); + } +} + +void +cpmgr_abort(cpmgr_t *cm, uint32_t s) +{ + if (cm->cm_state == CM_COPYING) { + cm->cm_state = CM_COMPLETE; + cm->cm_status = s; + cpmgr_completion_cleanup(cm); + } +} + +void +sbd_handle_xcopy(scsi_task_t *task, stmf_data_buf_t *dbuf) +{ + uint32_t cmd_xfer_len; + + if (HardwareAcceleratedMove == 0) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_INVALID_OPCODE); + return; + } + + cmd_xfer_len = READ_SCSI32(&task->task_cdb[10], uint32_t); + + if (cmd_xfer_len == 0) { + task->task_cmd_xfer_length = 0; + if (task->task_additional_flags & + TASK_AF_NO_EXPECTED_XFER_LENGTH) { + task->task_expected_xfer_length = 0; + } + stmf_scsilib_send_status(task, STATUS_GOOD, 0); + return; + } + + sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len); +} + +void +sbd_handle_xcopy_xfer(scsi_task_t *task, uint8_t *buf) +{ + cpmgr_handle_t h; + uint32_t s; + clock_t tic, end; + + /* + * No need to pass buf size. Its taken from cdb. + */ + h = cpmgr_create(task, buf); + if (h == CPMGR_INVALID_HANDLE) { + stmf_scsilib_send_status(task, STATUS_CHECK, + CPMGR_INSUFFICIENT_RESOURCES); + return; + } + tic = drv_usectohz(1000000); + end = ddi_get_lbolt() + (CPMGR_DEFAULT_TIMEOUT * tic); + while (!cpmgr_done(h)) { + if (stmf_is_task_being_aborted(task) || (ddi_get_lbolt() > end)) + cpmgr_abort(h, CPMGR_THIRD_PARTY_DEVICE_FAILURE); + else + cpmgr_run(h, ddi_get_lbolt() + tic); + } + s = cpmgr_status(h); + if (s) { + if (s == CPMGR_RESERVATION_CONFLICT) { + stmf_scsilib_send_status(task, + STATUS_RESERVATION_CONFLICT, 0); + } else { + stmf_scsilib_send_status(task, STATUS_CHECK, s); + } + } else { + stmf_scsilib_send_status(task, STATUS_GOOD, 0); + } + cpmgr_destroy(h); +} + +void +sbd_handle_recv_copy_results(struct scsi_task *task, + struct stmf_data_buf *initial_dbuf) +{ + uint32_t cdb_len; + + cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t); + if ((task->task_cdb[1] & 0x1F) != 3) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_INVALID_FIELD_IN_CDB); + return; + } + sbd_handle_short_read_transfers(task, initial_dbuf, + sbd_recv_copy_results_op_params, cdb_len, + sizeof (sbd_recv_copy_results_op_params)); +} diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.h b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.h new file mode 100644 index 0000000000..549b17aacf --- /dev/null +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/ats_copy_mgr.h @@ -0,0 +1,157 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2016 Nexenta Systems, Inc. All rights reserved. + */ + +#ifndef _ATS_COPY_MGR_H +#define _ATS_COPY_MGR_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* ATS structures and functions. */ + +typedef struct ats_state_s { + /* + * We actually dont allow I/O which conflicts with current ats. + * The conflicting_rw_count is for those I/Os which are currently + * running and are potentally conflicting. + */ + list_node_t as_next; + uint8_t as_cmd; + uint32_t as_conflicting_rw_count; + uint32_t as_non_conflicting_rw_count; + uint32_t as_ats_gen_ndx; + uint32_t as_cur_ats_handle; + uint64_t as_cur_ats_lba; + uint64_t as_cur_ats_lba_end; + uint64_t as_cur_ats_len; /* in nblks */ + struct scsi_task *as_cur_ats_task; +} ats_state_t; + +/* Since we're technically part of stmf_sbd.h, use some defines here. */ +#define sl_conflicting_rw_count sl_ats_state.as_conflicting_rw_count +#define sl_non_conflicting_rw_count sl_ats_state.as_non_conflicting_rw_count +#define sl_ats_gen_ndx sl_ats_state.as_ats_gen_ndx +#define sl_cur_ats_handle sl_ats_state.as_cur_ats_handle +#define sl_cur_ats_lba sl_ats_state.as_cur_ats_lba +#define sl_cur_ats_len sl_ats_state.as_cur_ats_len +#define sl_cur_ats_task sl_ats_state.as_cur_ats_task + +struct sbd_cmd; +struct sbd_lu; + +void sbd_handle_ats_xfer_completion(struct scsi_task *, struct sbd_cmd *, + struct stmf_data_buf *, uint8_t); +void sbd_do_ats_xfer(struct scsi_task *, struct sbd_cmd *, + struct stmf_data_buf *, uint8_t); +void sbd_handle_ats(scsi_task_t *, struct stmf_data_buf *); +void sbd_handle_recv_copy_results(struct scsi_task *, struct stmf_data_buf *); +void sbd_free_ats_handle(struct scsi_task *, struct sbd_cmd *); +void sbd_handle_ats(scsi_task_t *, struct stmf_data_buf *); +uint8_t sbd_ats_max_nblks(void); +void sbd_ats_remove_by_task(scsi_task_t *); +sbd_status_t sbd_ats_handling_before_io(scsi_task_t *task, struct sbd_lu *sl, + uint64_t lba, uint64_t count); + +/* Block-copy structures and functions. */ + +struct scsi_task; +typedef void *cpmgr_handle_t; + +#define CPMGR_INVALID_HANDLE ((cpmgr_handle_t)NULL) + +#define CPMGR_DEFAULT_TIMEOUT 30 + +#define CPMGR_PARAM_HDR_LEN 16 +#define CPMGR_IDENT_TARGET_DESCRIPTOR 0xE4 +#define CPMGR_MAX_TARGET_DESCRIPTORS 2 +#define CPMGR_TARGET_DESCRIPTOR_SIZE 32 + +#define CPMGR_B2B_SEGMENT_DESCRIPTOR 2 +#define CPMGR_MAX_SEGMENT_DESCRIPTORS 1 +#define CPMGR_B2B_SEGMENT_DESCRIPTOR_SIZE 28 + +/* + * SCSI errors before copy starts. + */ +#define CPMGR_PARAM_LIST_LEN_ERROR 0x051A00 +#define CPMGR_INVALID_FIELD_IN_PARAM_LIST 0x052600 +#define CPMGR_TOO_MANY_TARGET_DESCRIPTORS 0x052606 +#define CPMGR_UNSUPPORTED_TARGET_DESCRIPTOR 0x052607 +#define CPMGR_TOO_MANY_SEGMENT_DESCRIPTORS 0x052608 +#define CPMGR_UNSUPPORTED_SEGMENT_DESCRIPTOR 0x052609 +#define CPMGR_COPY_TARGET_NOT_REACHABLE 0x050D02 +#define CPMGR_INSUFFICIENT_RESOURCES 0x0B5503 + +/* + * SCSI errors after copy has started. + */ +#define CPMGR_LBA_OUT_OF_RANGE 0x0A2100 +#define CPMGR_THIRD_PARTY_DEVICE_FAILURE 0x0A0D01 + +/* + * SCSI errors which dont result in STATUS_CHECK. + * Use and invalid sense key to mark these. + */ +#define CPMGR_RESERVATION_CONFLICT 0xF00001 + +typedef enum cm_state { + CM_STARTING = 0, + CM_COPYING, + CM_COMPLETE +} cm_state_t; + +#define CPMGR_XFER_BUF_SIZE (128 * 1024) + +typedef struct cm_target_desc { + stmf_lu_t *td_lu; + uint32_t td_disk_block_len; + uint8_t td_lbasize_shift; +} cm_target_desc_t; + +/* + * Current implementation supports 2 target descriptors (identification type) + * for src and dst and one segment descriptor (block -> block). + */ +typedef struct cpmgr { + cm_target_desc_t cm_tds[CPMGR_MAX_TARGET_DESCRIPTORS]; + uint8_t cm_td_count; + uint16_t cm_src_td_ndx; + uint16_t cm_dst_td_ndx; + cm_state_t cm_state; + uint32_t cm_status; + uint64_t cm_src_offset; + uint64_t cm_dst_offset; + uint64_t cm_copy_size; + uint64_t cm_size_done; + void *cm_xfer_buf; + scsi_task_t *cm_task; +} cpmgr_t; + +#define cpmgr_done(cm) (((cpmgr_t *)(cm))->cm_state == CM_COMPLETE) +#define cpmgr_status(cm) (((cpmgr_t *)(cm))->cm_status) + +cpmgr_handle_t cpmgr_create(struct scsi_task *task, uint8_t *params); +void cpmgr_destroy(cpmgr_handle_t h); +void cpmgr_run(cpmgr_t *cm, clock_t preemption_point); +void cpmgr_abort(cpmgr_t *cm, uint32_t s); +void sbd_handle_xcopy_xfer(scsi_task_t *, uint8_t *); +void sbd_handle_xcopy(scsi_task_t *, stmf_data_buf_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _ATS_COPY_MGR_H */ diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c index 2d1f443dda..1b66e80a22 100644 --- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd.c @@ -25,8 +25,8 @@ * Copyright (c) 2013 by Delphix. All rights reserved. */ -#include <sys/sysmacros.h> #include <sys/conf.h> +#include <sys/list.h> #include <sys/file.h> #include <sys/ddi.h> #include <sys/sunddi.h> @@ -56,6 +56,10 @@ extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl); extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl); extern void sbd_pgr_reset(sbd_lu_t *sl); +extern int HardwareAcceleratedLocking; +extern int HardwareAcceleratedInit; +extern int HardwareAcceleratedMove; +extern uint8_t sbd_unmap_enable; static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result); @@ -109,6 +113,7 @@ static sbd_lu_t *sbd_lu_list = NULL; static kmutex_t sbd_lock; static dev_info_t *sbd_dip; static uint32_t sbd_lu_count = 0; +uint8_t sbd_enable_unmap_sync = 0; /* Global property settings for the logical unit */ char sbd_vendor_id[] = "SUN "; @@ -155,7 +160,7 @@ static struct dev_ops sbd_ops = { NULL /* power */ }; -#define SBD_NAME "COMSTAR SBD" +#define SBD_NAME "COMSTAR SBD+ " static struct modldrv modldrv = { &mod_driverops, @@ -194,6 +199,14 @@ _init(void) } mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL); rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL); + + if (HardwareAcceleratedLocking == 0) + cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled"); + if (HardwareAcceleratedMove == 0) + cmn_err(CE_NOTE, "HardwareAcceleratedMove Disabled"); + if (HardwareAcceleratedInit == 0) + cmn_err(CE_NOTE, "HardwareAcceleratedInit Disabled"); + return (0); } @@ -272,6 +285,8 @@ sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { + char *prop; + switch (cmd) { case DDI_ATTACH: sbd_dip = dip; @@ -281,6 +296,23 @@ sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) break; } ddi_report_dev(dip); + + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, + DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) { + (void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, ""); + ddi_prop_free(prop); + } + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, + DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) { + (void) snprintf(sbd_product_id, 17, "%s%16s", prop, ""); + ddi_prop_free(prop); + } + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, + DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) { + (void) snprintf(sbd_revision, 5, "%s%4s", prop, ""); + ddi_prop_free(prop); + } + return (DDI_SUCCESS); } @@ -1396,7 +1428,10 @@ sbd_write_lu_info(sbd_lu_t *sl) static void do_unmap_setup(sbd_lu_t *sl) { - ASSERT((sl->sl_flags & SL_UNMAP_ENABLED) == 0); + if (sbd_unmap_enable == 0) { + sl->sl_flags &= ~(SL_UNMAP_ENABLED); + return; + } if ((sl->sl_flags & SL_ZFS_META) == 0) return; /* No UNMAP for you. */ @@ -1441,8 +1476,10 @@ sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret) lu->lu_send_status_done = sbd_send_status_done; lu->lu_task_free = sbd_task_free; lu->lu_abort = sbd_abort; + lu->lu_task_poll = sbd_task_poll; lu->lu_dbuf_free = sbd_dbuf_free; lu->lu_ctl = sbd_ctl; + lu->lu_task_done = sbd_ats_remove_by_task; lu->lu_info = sbd_info; sl->sl_state = STMF_STATE_OFFLINE; @@ -1455,6 +1492,12 @@ sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret) return (EIO); } + /* + * setup the ATS (compare and write) lists to handle multiple + * ATS commands simultaneously + */ + list_create(&sl->sl_ats_io_list, sizeof (ats_state_t), + offsetof(ats_state_t, as_next)); *err_ret = 0; return (0); } @@ -1561,6 +1604,7 @@ odf_over_open: sl->sl_lu_size = vattr.va_size; } } + if (sl->sl_lu_size < SBD_MIN_LU_SIZE) { *err_ret = SBD_RET_FILE_SIZE_ERROR; ret = EINVAL; @@ -1837,7 +1881,7 @@ sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz, sl->sl_flags |= SL_WRITE_PROTECTED; } if (slu->slu_blksize_valid) { - if (!ISP2(slu->slu_blksize) || + if ((slu->slu_blksize & (slu->slu_blksize - 1)) || (slu->slu_blksize > (32 * 1024)) || (slu->slu_blksize == 0)) { *err_ret = SBD_RET_INVALID_BLKSIZE; @@ -2997,8 +3041,10 @@ sbd_status_t sbd_data_read(sbd_lu_t *sl, struct scsi_task *task, uint64_t offset, uint64_t size, uint8_t *buf) { - int ret; + int ret, ioflag = 0; long resid; + hrtime_t xfer_start; + uint8_t op = task->task_cdb[0]; if ((offset + size) > sl->sl_lu_size) { return (SBD_IO_PAST_EOF); @@ -3006,6 +3052,16 @@ sbd_data_read(sbd_lu_t *sl, struct scsi_task *task, offset += sl->sl_data_offset; + /* + * Check to see if the command is READ(10), READ(12), or READ(16). + * If it is then check for bit 3 being set to indicate if Forced + * Unit Access is being requested. If so, the FSYNC flag will be set + * on the read. + */ + if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) || + (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) { + ioflag = FSYNC; + } if ((offset + size) > sl->sl_data_readable_size) { uint64_t store_end; if (offset > sl->sl_data_readable_size) { @@ -3017,6 +3073,7 @@ sbd_data_read(sbd_lu_t *sl, struct scsi_task *task, size = store_end; } + xfer_start = gethrtime(); DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl, uint8_t *, buf, uint64_t, size, uint64_t, offset, scsi_task_t *, task); @@ -3032,11 +3089,14 @@ sbd_data_read(sbd_lu_t *sl, struct scsi_task *task, rw_exit(&sl->sl_access_state_lock); return (SBD_FAILURE); } + ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size, - (offset_t)offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, CRED(), + (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(), &resid); rw_exit(&sl->sl_access_state_lock); + stmf_lu_xfer_done(task, B_TRUE /* read */, + (gethrtime() - xfer_start)); DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl, uint8_t *, buf, uint64_t, size, uint64_t, offset, int, ret, scsi_task_t *, task); @@ -3059,6 +3119,9 @@ sbd_data_write(sbd_lu_t *sl, struct scsi_task *task, long resid; sbd_status_t sret = SBD_SUCCESS; int ioflag; + hrtime_t xfer_start; + uint8_t op = task->task_cdb[0]; + boolean_t fua_bit = B_FALSE; if ((offset + size) > sl->sl_lu_size) { return (SBD_IO_PAST_EOF); @@ -3066,13 +3129,24 @@ sbd_data_write(sbd_lu_t *sl, struct scsi_task *task, offset += sl->sl_data_offset; - if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) && - (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) { + /* + * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16). + * If it is then check for bit 3 being set to indicate if Forced + * Unit Access is being requested. If so, the FSYNC flag will be set + * on the write. + */ + if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) || + (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) { + fua_bit = B_TRUE; + } + if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) && + (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) { ioflag = FSYNC; } else { ioflag = 0; } + xfer_start = gethrtime(); DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl, uint8_t *, buf, uint64_t, size, uint64_t, offset, scsi_task_t *, task); @@ -3093,6 +3167,8 @@ sbd_data_write(sbd_lu_t *sl, struct scsi_task *task, &resid); rw_exit(&sl->sl_access_state_lock); + stmf_lu_xfer_done(task, B_FALSE /* write */, + (gethrtime() - xfer_start)); DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl, uint8_t *, buf, uint64_t, size, uint64_t, offset, int, ret, scsi_task_t *, task); @@ -3103,7 +3179,6 @@ sbd_data_write(sbd_lu_t *sl, struct scsi_task *task, sret = sbd_flush_data_cache(sl, 1); } over_sl_data_write: - if ((ret || resid) || (sret != SBD_SUCCESS)) { return (SBD_FAILURE); } else if ((offset + size) > sl->sl_data_readable_size) { @@ -3639,7 +3714,8 @@ again: } } out: - nvlist_free(nv); + if (nv != NULL) + nvlist_free(nv); kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size); kmem_free(zc, sizeof (zfs_cmd_t)); (void) ldi_close(zfs_lh, FREAD|FWRITE, kcred); @@ -3696,7 +3772,7 @@ int sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl) { vnode_t *vp; - int unused; + int unused, ret; /* Nothing to do */ if (dfl->dfl_num_exts == 0) @@ -3717,6 +3793,29 @@ sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl) return (EIO); } - return (VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred, - &unused, NULL)); + ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred, + &unused, NULL); + + return (ret); +} + +/* + * Check if this lu belongs to sbd or some other lu + * provider. A simple check for one of the module + * entry points is sufficient. + */ +int +sbd_is_valid_lu(stmf_lu_t *lu) +{ + if (lu->lu_new_task == sbd_new_task) + return (1); + return (0); +} + +uint8_t +sbd_get_lbasize_shift(stmf_lu_t *lu) +{ + sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private; + + return (sl->sl_data_blocksize_shift); } diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h index e3d3b2eb6f..8b9ec3646e 100644 --- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_impl.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2016 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SBD_IMPL_H @@ -36,6 +36,11 @@ struct modify_lu_cmd; struct sbd_lu_attr; struct sbd_it_data; +#define ATOMIC8_GET(val) ( \ + (atomic_add_8_nv(&(val), 0))) +#define ATOMIC32_GET(val) ( \ + (atomic_add_32_nv(&(val), 0))) + /* * sms endianess */ @@ -206,16 +211,36 @@ typedef struct sbd_cmd { uint32_t len; /* len left */ uint32_t current_ro; /* running relative offset */ uint8_t *trans_data; /* Any transient data */ + ats_state_t *ats_state; + uint32_t rsvd; } sbd_cmd_t; /* * flags for sbd_cmd + * + * SBD_SCSI_CMD_ACTIVE means that a command is running. This is the time + * between the function sbd_new_task is called and either the command + * completion is sent (stmf_scsilib_send_status) or an abort is + * issued + * + * SBD_SCSI_CMD_ABORT_REQUESTED is when a command is being aborted. It may + * be set prior to the task being dispatched or anywhere in the process + * of the command. + * + * SBD_SCSI_CMD_XFER_FAIL is set when a command data buffer transfer was + * errored. Usually it leads to an abort. + * + * SBD_SCSI_CMD_SYNC_WRITE synchronous write being done. + * + * SBD_SCSI_CMD_TRANS_DATA means that a buffer has been allocated to + * be used for the transfer of data. */ #define SBD_SCSI_CMD_ACTIVE 0x01 #define SBD_SCSI_CMD_ABORT_REQUESTED 0x02 #define SBD_SCSI_CMD_XFER_FAIL 0x04 #define SBD_SCSI_CMD_SYNC_WRITE 0x08 #define SBD_SCSI_CMD_TRANS_DATA 0x10 +#define SBD_SCSI_CMD_ATS_RELATED 0x20 /* * cmd types @@ -269,7 +294,7 @@ typedef struct sbd_create_standby_lu { typedef struct sbd_zvol_io { uint64_t zvio_offset; /* offset into volume */ int zvio_flags; /* flags */ - void *zvio_dbp; /* array of dmu buffers */ + void *zvio_dbp; /* array of dmu buffers */ void *zvio_abp; /* array of arc buffers */ uio_t *zvio_uio; /* for copy operations */ } sbd_zvol_io_t; @@ -300,10 +325,13 @@ void sbd_send_status_done(struct scsi_task *task); void sbd_task_free(struct scsi_task *task); stmf_status_t sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags); +void sbd_task_poll(struct scsi_task *task); void sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf); void sbd_ctl(struct stmf_lu *lu, int cmd, void *arg); stmf_status_t sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf, uint32_t *bufsizep); +uint8_t sbd_get_lbasize_shift(stmf_lu_t *lu); +int sbd_is_valid_lu(stmf_lu_t *lu); #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c index b8e74375e2..1edd7b4706 100644 --- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ #include <sys/atomic.h> @@ -39,7 +40,7 @@ #define MAX_PGR_PARAM_LIST_LENGTH (256 * 1024) -int sbd_pgr_reservation_conflict(scsi_task_t *); +int sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl); void sbd_pgr_reset(sbd_lu_t *); void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *); void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *); @@ -82,6 +83,7 @@ static void sbd_pgr_do_release(sbd_lu_t *, sbd_it_data_t *, uint8_t); static void sbd_pgr_do_reserve(sbd_pgr_t *, sbd_pgr_key_t *, sbd_it_data_t *it, stmf_scsi_session_t *, scsi_cdb_prout_t *); +static boolean_t sbd_pgr_should_save(sbd_lu_t *); extern sbd_status_t sbd_write_meta_section(sbd_lu_t *, sm_section_hdr_t *); extern sbd_status_t sbd_read_meta_section(sbd_lu_t *, sm_section_hdr_t **, uint16_t); @@ -195,7 +197,7 @@ extern char sbd_ctoi(char c); /* actions for PERSISTENT RESERVE OUT command */ \ (((cdb[0]) == SCMD_PERSISTENT_RESERVE_OUT) && ( \ (((cdb[1]) & 0x1F) == PR_OUT_REGISTER_AND_IGNORE_EXISTING_KEY) || \ - (((cdb[1]) & 0x1F) == PR_OUT_REGISTER))) || \ + (((cdb[1]) & 0x1F) == PR_OUT_REGISTER))) || \ /* ----------------------- */ \ /* SBC-3 (rev 17) Table 3 */ \ /* ----------------------- */ \ @@ -280,7 +282,7 @@ sbd_status_t sbd_pgr_meta_init(sbd_lu_t *slu) { sbd_pgr_info_t *spi = NULL; - uint32_t sz; + uint32_t sz; sbd_status_t ret; sz = sizeof (sbd_pgr_info_t); @@ -295,6 +297,22 @@ sbd_pgr_meta_init(sbd_lu_t *slu) return (ret); } +/* + * Evaluate common cases where a PERSISTENT RESERVE OUT CDB handler should call + * sbd_pgr_meta_write(). + */ +static boolean_t +sbd_pgr_should_save(sbd_lu_t *slu) +{ + sbd_pgr_t *pgr = slu->sl_pgr; + + if (stmf_is_pgr_aptpl_always() == B_TRUE || + (pgr->pgr_flags & (SBD_PGR_APTPL))) + return (B_TRUE); + else + return (B_FALSE); +} + sbd_status_t sbd_pgr_meta_load(sbd_lu_t *slu) { @@ -323,7 +341,12 @@ sbd_pgr_meta_load(sbd_lu_t *slu) } pgr->pgr_flags = spi->pgr_flags; - if (pgr->pgr_flags & SBD_PGR_APTPL) { + /* + * We reload APTPL reservations when: + * 1. Global override is enabled + * 2. APTPL was explicitly asserted in the PERSISTENT RESERVE OUT CDB + */ + if (stmf_is_pgr_aptpl_always() || (pgr->pgr_flags & SBD_PGR_APTPL)) { pgr->pgr_rsv_type = spi->pgr_rsv_type; pgr->pgr_rsv_scope = spi->pgr_rsv_scope; } else { @@ -436,7 +459,7 @@ sbd_pgr_meta_write(sbd_lu_t *slu) /* Calculate total pgr meta section size needed */ sz = sizeof (sbd_pgr_info_t); - if (pgr->pgr_flags & SBD_PGR_APTPL) { + if ((pgr->pgr_flags & SBD_PGR_APTPL) || stmf_is_pgr_aptpl_always()) { key = pgr->pgr_keylist; while (key != NULL) { sz = ALIGNED_TO_8BYTE_BOUNDARY(sz + @@ -458,7 +481,7 @@ sbd_pgr_meta_write(sbd_lu_t *slu) spi->pgr_sms_header.sms_id = SMS_ID_PGR_INFO; spi->pgr_sms_header.sms_data_order = SMS_DATA_ORDER; - if (pgr->pgr_flags & SBD_PGR_APTPL) { + if ((pgr->pgr_flags & SBD_PGR_APTPL) || stmf_is_pgr_aptpl_always()) { uint8_t *ptr; key = pgr->pgr_keylist; sz = sizeof (sbd_pgr_info_t); @@ -484,8 +507,12 @@ sbd_pgr_meta_write(sbd_lu_t *slu) key = key->pgr_key_next; } } - + rw_downgrade(&pgr->pgr_lock); ret = sbd_write_meta_section(slu, (sm_section_hdr_t *)spi); + if (!rw_tryupgrade(&pgr->pgr_lock)) { + rw_exit(&pgr->pgr_lock); + rw_enter(&pgr->pgr_lock, RW_WRITER); + } kmem_free(spi, totalsz); if (ret != SBD_SUCCESS) { sbd_pgr_key_t *tmp_list; @@ -513,7 +540,7 @@ sbd_pgr_meta_write(sbd_lu_t *slu) static sbd_pgr_key_t * sbd_pgr_key_alloc(scsi_devid_desc_t *lptid, scsi_transport_id_t *rptid, - int16_t lpt_len, int16_t rpt_len) + int16_t lpt_len, int16_t rpt_len) { sbd_pgr_key_t *key; @@ -578,7 +605,8 @@ sbd_pgr_reset(sbd_lu_t *slu) sbd_pgr_t *pgr = slu->sl_pgr; rw_enter(&pgr->pgr_lock, RW_WRITER); - if (!(pgr->pgr_flags & SBD_PGR_APTPL)) { + if (!(pgr->pgr_flags & SBD_PGR_APTPL) && + stmf_is_pgr_aptpl_always() == B_FALSE) { sbd_pgr_keylist_dealloc(slu); pgr->pgr_PRgeneration = 0; pgr->pgr_rsvholder = NULL; @@ -630,7 +658,7 @@ sbd_pgr_remove_key(sbd_lu_t *slu, sbd_pgr_key_t *key) */ static uint32_t sbd_pgr_remove_keys(sbd_lu_t *slu, sbd_it_data_t *my_it, sbd_pgr_key_t *my_key, - uint64_t svc_key, boolean_t match) + uint64_t svc_key, boolean_t match) { sbd_pgr_t *pgr = slu->sl_pgr; sbd_it_data_t *it; @@ -708,7 +736,7 @@ sbd_pgr_set_pgr_check_flag(sbd_lu_t *slu, boolean_t registered) static boolean_t sbd_pgr_key_compare(sbd_pgr_key_t *key, scsi_devid_desc_t *lpt, - stmf_remote_port_t *rpt) + stmf_remote_port_t *rpt) { scsi_devid_desc_t *id; @@ -732,7 +760,7 @@ sbd_pgr_key_compare(sbd_pgr_key_t *key, scsi_devid_desc_t *lpt, sbd_pgr_key_t * sbd_pgr_key_registered(sbd_pgr_t *pgr, scsi_devid_desc_t *lpt, - stmf_remote_port_t *rpt) + stmf_remote_port_t *rpt) { sbd_pgr_key_t *key; @@ -812,9 +840,8 @@ sbd_pgr_initialize_it(scsi_task_t *task, sbd_it_data_t *it) * Check for any PGR Reservation conflict. return 0 if access allowed */ int -sbd_pgr_reservation_conflict(scsi_task_t *task) +sbd_pgr_reservation_conflict(scsi_task_t *task, sbd_lu_t *slu) { - sbd_lu_t *slu = (sbd_lu_t *)task->task_lu->lu_provider_private; sbd_pgr_t *pgr = slu->sl_pgr; sbd_it_data_t *it = (sbd_it_data_t *)task->task_lu_itl_handle; @@ -1129,7 +1156,7 @@ sbd_pgr_in_read_reservation(scsi_task_t *task, stmf_data_buf_t *initial_dbuf) static void sbd_pgr_in_report_capabilities(scsi_task_t *task, - stmf_data_buf_t *initial_dbuf) + stmf_data_buf_t *initial_dbuf) { sbd_lu_t *slu = (sbd_lu_t *)task->task_lu->lu_provider_private; sbd_pgr_t *pgr = slu->sl_pgr; @@ -1167,14 +1194,14 @@ sbd_pgr_in_report_capabilities(scsi_task_t *task, static void sbd_pgr_in_read_full_status(scsi_task_t *task, - stmf_data_buf_t *initial_dbuf) + stmf_data_buf_t *initial_dbuf) { sbd_lu_t *slu = (sbd_lu_t *)task->task_lu->lu_provider_private; sbd_pgr_t *pgr = slu->sl_pgr; sbd_pgr_key_t *key; - scsi_prin_status_t *sts; + scsi_prin_status_t *sts; scsi_prin_full_status_t *buf; - uint32_t i, buf_size, cdb_len; + uint32_t i, buf_size, cdb_len; uint8_t *offset; ASSERT(task->task_cdb[0] == SCMD_PERSISTENT_RESERVE_IN); @@ -1203,7 +1230,7 @@ sbd_pgr_in_read_full_status(scsi_task_t *task, if ((pgr->pgr_flags & SBD_PGR_RSVD_ALL_REGISTRANTS) || (pgr->pgr_rsvholder && pgr->pgr_rsvholder == key)) { sts->r_holder = 1; - sts->type = pgr->pgr_rsv_type; + sts->type = pgr->pgr_rsv_type; sts->scope = pgr->pgr_rsv_scope; } @@ -1386,7 +1413,7 @@ sbd_pgr_out_register(scsi_task_t *task, stmf_data_buf_t *dbuf) sbd_pgr_reg_done: - if (pgr->pgr_flags & SBD_PGR_APTPL || plist->aptpl) { + if (plist->aptpl || (sbd_pgr_should_save(slu) == B_TRUE)) { if (plist->aptpl) PGR_SET_FLAG(pgr->pgr_flags, SBD_PGR_APTPL); else @@ -1394,7 +1421,7 @@ sbd_pgr_reg_done: if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) { stmf_scsilib_send_status(task, STATUS_CHECK, - STMF_SAA_INSUFFICIENT_REG_RESOURCES); + STMF_SAA_INSUFFICIENT_REG_RESRCS); return; } } @@ -1405,7 +1432,7 @@ sbd_pgr_reg_done: static sbd_pgr_key_t * sbd_pgr_do_register(sbd_lu_t *slu, sbd_it_data_t *it, scsi_devid_desc_t *lpt, - stmf_remote_port_t *rpt, uint8_t keyflag, uint64_t svc_key) + stmf_remote_port_t *rpt, uint8_t keyflag, uint64_t svc_key) { sbd_pgr_t *pgr = slu->sl_pgr; sbd_pgr_key_t *key; @@ -1491,10 +1518,10 @@ sbd_pgr_out_reserve(scsi_task_t *task) /* In case there is no reservation exist */ } else { sbd_pgr_do_reserve(pgr, key, it, ses, pr_out); - if (pgr->pgr_flags & SBD_PGR_APTPL) { + if (sbd_pgr_should_save(slu) == B_TRUE) { if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) { stmf_scsilib_send_status(task, STATUS_CHECK, - STMF_SAA_INSUFFICIENT_REG_RESOURCES); + STMF_SAA_INSUFFICIENT_REG_RESRCS); return; } } @@ -1505,7 +1532,7 @@ sbd_pgr_out_reserve(scsi_task_t *task) static void sbd_pgr_do_reserve(sbd_pgr_t *pgr, sbd_pgr_key_t *key, sbd_it_data_t *it, - stmf_scsi_session_t *ses, scsi_cdb_prout_t *pr_out) + stmf_scsi_session_t *ses, scsi_cdb_prout_t *pr_out) { scsi_devid_desc_t *lpt; uint16_t lpt_len; @@ -1548,6 +1575,11 @@ sbd_pgr_out_release(scsi_task_t *task) ASSERT(key); + /* + * XXX this does not honor APTPL + * (i.e., changes made to a formerly-persistent reservation are not + * updated here!!!) + */ if (SBD_PGR_RSVD(pgr)) { if (pgr->pgr_flags & SBD_PGR_RSVD_ALL_REGISTRANTS || pgr->pgr_rsvholder == key) { @@ -1559,6 +1591,27 @@ sbd_pgr_out_release(scsi_task_t *task) } sbd_pgr_do_release(slu, it, SBD_UA_RESERVATIONS_RELEASED); + + /* + * XXX T10 SPC-3 5.6.10.2 says nothing about what to + * do in the event of a failure updating the + * PGR nvram store for a reservation associated with + * an APTPL-enabled (see SPC-3 5.6.4.1) I_T + * registration during a RELEASE service action. + * + * Technically, the CDB completed successfully, as per + * the spec, but at some point we may need to enter + * a recovery mode on the initiator(s) if we power cycle + * the target at the wrong instant... + */ + if (sbd_pgr_should_save(slu) == B_TRUE) { + if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) { + stmf_scsilib_send_status(task, + STATUS_CHECK, + STMF_SAA_INSUFFICIENT_REG_RESRCS); + return; + } + } } } stmf_scsilib_send_status(task, STATUS_GOOD, 0); @@ -1602,10 +1655,10 @@ sbd_pgr_out_clear(scsi_task_t *task) mutex_exit(&slu->sl_lock); sbd_pgr_keylist_dealloc(slu); sbd_pgr_set_ua_conditions(slu, it, SBD_UA_RESERVATIONS_PREEMPTED); - if (pgr->pgr_flags & SBD_PGR_APTPL) { + if (sbd_pgr_should_save(slu) == B_TRUE) { if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) { stmf_scsilib_send_status(task, STATUS_CHECK, - STMF_SAA_INSUFFICIENT_REG_RESOURCES); + STMF_SAA_INSUFFICIENT_REG_RESRCS); return; } } @@ -1717,10 +1770,10 @@ sbd_pgr_out_preempt(scsi_task_t *task, stmf_data_buf_t *dbuf) } } - if (pgr->pgr_flags & SBD_PGR_APTPL) { + if (sbd_pgr_should_save(slu) == B_TRUE) { if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) { stmf_scsilib_send_status(task, STATUS_CHECK, - STMF_SAA_INSUFFICIENT_REG_RESOURCES); + STMF_SAA_INSUFFICIENT_REG_RESRCS); return; } } @@ -1831,8 +1884,8 @@ sbd_pgr_out_register_and_move(scsi_task_t *task, stmf_data_buf_t *dbuf) } - /* Write to disk if currenty aptpl is set or given task is setting it */ - if (pgr->pgr_flags & SBD_PGR_APTPL || plist->aptpl) { + /* Write to disk if aptpl is currently set or this task is setting it */ + if (plist->aptpl || (sbd_pgr_should_save(slu) == B_TRUE)) { if (plist->aptpl) PGR_SET_FLAG(pgr->pgr_flags, SBD_PGR_APTPL); else @@ -1840,7 +1893,7 @@ sbd_pgr_out_register_and_move(scsi_task_t *task, stmf_data_buf_t *dbuf) if (sbd_pgr_meta_write(slu) != SBD_SUCCESS) { stmf_scsilib_send_status(task, STATUS_CHECK, - STMF_SAA_INSUFFICIENT_REG_RESOURCES); + STMF_SAA_INSUFFICIENT_REG_RESRCS); return; } } @@ -1850,7 +1903,8 @@ sbd_pgr_out_register_and_move(scsi_task_t *task, stmf_data_buf_t *dbuf) } void -sbd_pgr_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *my_it) { +sbd_pgr_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *my_it) +{ sbd_it_data_t *it; rw_enter(&sl->sl_pgr->pgr_lock, RW_WRITER); diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c index a2e6fe74fa..8c6cac96c9 100644 --- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_scsi.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. All rights reserved. @@ -87,6 +88,21 @@ (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0)))) /* End of SCSI2_CONFLICT_FREE_CMDS */ +uint8_t HardwareAcceleratedInit = 1; +uint8_t sbd_unmap_enable = 1; /* allow unmap by default */ + +/* + * An /etc/system tunable which specifies the maximum number of LBAs supported + * in a single UNMAP operation. Default is 0x002000 blocks or 4MB in size. + */ +int stmf_sbd_unmap_max_nblks = 0x002000; + +/* + * An /etc/system tunable which indicates if READ ops can run on the standby + * path or return an error. + */ +int stmf_standby_fail_reads = 0; + stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu); static void sbd_handle_sync_cache(struct scsi_task *task, struct stmf_data_buf *initial_dbuf); @@ -94,8 +110,6 @@ void sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, struct stmf_data_buf *dbuf); void sbd_handle_short_write_xfer_completion(scsi_task_t *task, stmf_data_buf_t *dbuf); -void sbd_handle_short_write_transfers(scsi_task_t *task, - stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size); void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen); void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf); @@ -106,7 +120,7 @@ static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf); extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *); -extern int sbd_pgr_reservation_conflict(scsi_task_t *); +extern int sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl); extern void sbd_pgr_reset(sbd_lu_t *); extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *); extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *); @@ -144,7 +158,8 @@ sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, (task->task_cmd_xfer_length < (32 * 1024))) ? 2 : task->task_max_nbufs; - len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : scmd->len; + len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? + dbuf->db_buf_size : ATOMIC32_GET(scmd->len); laddr = scmd->addr + scmd->current_ro; for (buflen = 0, ndx = 0; (buflen < len) && @@ -168,12 +183,14 @@ sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, dbuf->db_data_size = buflen; dbuf->db_flags = DB_DIRECTION_TO_RPORT; (void) stmf_xfer_data(task, dbuf, 0); - scmd->len -= buflen; + atomic_add_32(&scmd->len, -buflen); scmd->current_ro += buflen; - if (scmd->len && (scmd->nbufs < bufs_to_take)) { + if (ATOMIC32_GET(scmd->len) && + (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) { uint32_t maxsize, minsize, old_minsize; - maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len; + maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128 * 1024 : + ATOMIC32_GET(scmd->len); minsize = maxsize >> 2; do { /* @@ -188,7 +205,7 @@ sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, if (dbuf == NULL) { return; } - scmd->nbufs++; + atomic_inc_8(&scmd->nbufs); sbd_do_read_xfer(task, scmd, dbuf); } } @@ -219,6 +236,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) uint_t nblks; uint64_t blksize = sl->sl_blksize; size_t db_private_sz; + hrtime_t xfer_start; uintptr_t pad; ASSERT(rw_read_held(&sl->sl_access_state_lock)); @@ -260,14 +278,15 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) first_len = 0; } - while (scmd->len && scmd->nbufs < task->task_max_nbufs) { + while (ATOMIC32_GET(scmd->len) && + ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) { - xfer_len = MIN(max_len, scmd->len); + xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len)); if (first_len) { xfer_len = MIN(xfer_len, first_len); first_len = 0; } - if (scmd->len == xfer_len) { + if (ATOMIC32_GET(scmd->len) == xfer_len) { final_xfer = 1; } else { /* @@ -334,12 +353,15 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) * Accounting for start of read. * Note there is no buffer address for the probe yet. */ + xfer_start = gethrtime(); DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl, uint8_t *, NULL, uint64_t, xfer_len, uint64_t, offset, scsi_task_t *, task); ret = sbd_zvol_alloc_read_bufs(sl, dbuf); + stmf_lu_xfer_done(task, B_TRUE /* read */, + (gethrtime() - xfer_start)); DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl, uint8_t *, NULL, uint64_t, xfer_len, uint64_t, offset, int, ret, scsi_task_t *, task); @@ -349,9 +371,10 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) * Read failure from the backend. */ stmf_free(dbuf); - if (scmd->nbufs == 0) { + if (ATOMIC8_GET(scmd->nbufs) == 0) { /* nothing queued, just finish */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_READ_ERROR); rw_exit(&sl->sl_access_state_lock); @@ -362,7 +385,6 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) return; } - /* * Allow PP to do setup */ @@ -376,7 +398,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) */ sbd_zvol_rele_read_bufs(sl, dbuf); stmf_free(dbuf); - if (scmd->nbufs > 0) { + if (ATOMIC8_GET(scmd->nbufs) > 0) { /* completion of previous dbuf will retry */ return; } @@ -384,6 +406,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) * Done with this command. */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); if (first_xfer) stmf_scsilib_send_status(task, STATUS_QFULL, 0); else @@ -395,7 +418,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) /* * dbuf is now queued on task */ - scmd->nbufs++; + atomic_inc_8(&scmd->nbufs); /* XXX leave this in for FW? */ DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task, @@ -417,8 +440,8 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) sbd_zvol_rele_read_bufs(sl, dbuf); stmf_teardown_dbuf(task, dbuf); stmf_free(dbuf); - scmd->nbufs--; - if (scmd->nbufs > 0) { + atomic_dec_8(&scmd->nbufs); + if (ATOMIC8_GET(scmd->nbufs) > 0) { /* completion of previous dbuf will retry */ return; } @@ -427,6 +450,7 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) */ rw_exit(&sl->sl_access_state_lock); scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); if (first_xfer) stmf_scsilib_send_status(task, STATUS_QFULL, 0); else @@ -438,13 +462,14 @@ sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) * Completion from task_done will cleanup */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); return; } /* * Update the xfer progress. */ ASSERT(scmd->len >= xfer_len); - scmd->len -= xfer_len; + atomic_add_32(&scmd->len, -xfer_len); scmd->current_ro += xfer_len; } } @@ -459,12 +484,14 @@ sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, return; } task->task_nbytes_transferred += dbuf->db_data_size; - if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { + if (ATOMIC32_GET(scmd->len) == 0 || + scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { stmf_free_dbuf(task, dbuf); - scmd->nbufs--; - if (scmd->nbufs) + atomic_dec_8(&scmd->nbufs); + if (ATOMIC8_GET(scmd->nbufs)) return; /* wait for all buffers to complete */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_READ_ERROR); @@ -477,7 +504,8 @@ sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, uint32_t maxsize, minsize, old_minsize; stmf_free_dbuf(task, dbuf); - maxsize = (scmd->len > (128*1024)) ? 128*1024 : scmd->len; + maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? + 128 * 1024 : ATOMIC32_GET(scmd->len); minsize = maxsize >> 2; do { old_minsize = minsize; @@ -485,8 +513,8 @@ sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, } while ((dbuf == NULL) && (old_minsize > minsize) && (minsize >= 512)); if (dbuf == NULL) { - scmd->nbufs --; - if (scmd->nbufs == 0) { + atomic_dec_8(&scmd->nbufs); + if (ATOMIC8_GET(scmd->nbufs) == 0) { stmf_abort(STMF_QUEUE_TASK_ABORT, task, STMF_ALLOC_FAILURE, NULL); } @@ -514,7 +542,7 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, ASSERT(dbuf->db_lu_private); ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ); - scmd->nbufs--; /* account for this dbuf */ + atomic_dec_8(&scmd->nbufs); /* account for this dbuf */ /* * Release the DMU resources. */ @@ -535,7 +563,8 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) || (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) || (xfer_status != STMF_SUCCESS)); - if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) { + if ((ATOMIC8_GET(scmd->nbufs) == 0) && + (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) { /* all DMU state has been released */ rw_exit(&sl->sl_access_state_lock); } @@ -549,7 +578,8 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, * This chunk completed successfully */ task->task_nbytes_transferred += data_size; - if (scmd->nbufs == 0 && scmd->len == 0) { + if (ATOMIC8_GET(scmd->nbufs) == 0 && + ATOMIC32_GET(scmd->len) == 0) { /* * This command completed successfully * @@ -557,6 +587,7 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, * completion will occur. Tell stmf we are done. */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); stmf_task_lu_done(task); return; } @@ -575,8 +606,9 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, * and wait for the last completion to send the status check. */ if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { - if (scmd->nbufs == 0) { + if (ATOMIC8_GET(scmd->nbufs) == 0) { scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_READ_ERROR); } @@ -586,7 +618,14 @@ sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, * Must have been a failure on current dbuf */ ASSERT(xfer_status != STMF_SUCCESS); + + /* + * Actually this is a bug. stmf abort should have reset the + * active flag but since its been there for some time. + * I wont change it. + */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL); } } @@ -601,6 +640,7 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, int scmd_err, scmd_xfer_done; stmf_status_t xfer_status = dbuf->db_xfer_status; uint32_t data_size = dbuf->db_data_size; + hrtime_t xfer_start; ASSERT(zvio); @@ -610,16 +650,18 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, */ stmf_teardown_dbuf(task, dbuf); - scmd->nbufs--; /* account for this dbuf */ + atomic_dec_8(&scmd->nbufs); /* account for this dbuf */ /* * All data was queued and this is the last completion, * but there could still be an error. */ - scmd_xfer_done = (scmd->len == 0 && scmd->nbufs == 0); + scmd_xfer_done = (ATOMIC32_GET(scmd->len) == 0 && + (ATOMIC8_GET(scmd->nbufs) == 0)); scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) || (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) || (xfer_status != STMF_SUCCESS)); + xfer_start = gethrtime(); DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl, uint8_t *, NULL, uint64_t, data_size, uint64_t, zvio->zvio_offset, scsi_task_t *, task); @@ -637,6 +679,8 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, ret = sbd_zvol_rele_write_bufs(sl, dbuf); } + stmf_lu_xfer_done(task, B_FALSE /* write */, + (gethrtime() - xfer_start)); DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl, uint8_t *, NULL, uint64_t, data_size, uint64_t, zvio->zvio_offset, int, ret, scsi_task_t *, task); @@ -656,7 +700,8 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, * transferred or an error encountered, then no more dbufs * will be queued. */ - if (scmd->nbufs == 0 && (scmd->len == 0 || scmd_err)) { + if ((ATOMIC8_GET(scmd->nbufs) == 0) && + (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) { /* all DMU state has been released */ rw_exit(&sl->sl_access_state_lock); } @@ -670,6 +715,7 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, if (scmd_xfer_done) { /* This command completed successfully */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) && (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) { stmf_scsilib_send_status(task, STATUS_CHECK, @@ -690,8 +736,9 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, */ if (scmd->flags & SBD_SCSI_CMD_ACTIVE) { if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { - if (scmd->nbufs == 0) { + if (ATOMIC8_GET(scmd->nbufs) == 0) { scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_WRITE_ERROR); } @@ -701,6 +748,7 @@ sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, return; } scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); ASSERT(xfer_status != STMF_SUCCESS); stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL); } @@ -726,6 +774,7 @@ sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf, struct iovec *iov, *tiov, iov1[8]; uint32_t len, resid; int ret, i, iovcnt, flags; + hrtime_t xfer_start; boolean_t is_read; ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE); @@ -763,6 +812,7 @@ sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf, uio.uio_resid = (uint64_t)len; uio.uio_llimit = RLIM64_INFINITY; + xfer_start = gethrtime(); if (is_read == B_TRUE) { uio.uio_fmode = FREAD; uio.uio_extflg = UIO_COPY_CACHED; @@ -791,6 +841,8 @@ sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf, uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret, scsi_task_t *, task); } + /* finalize accounting */ + stmf_lu_xfer_done(task, is_read, (gethrtime() - xfer_start)); if (iov != &iov1[0]) kmem_free(iov, iovcnt * sizeof (*iov)); @@ -805,13 +857,26 @@ void sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) { uint64_t lba, laddr; + uint64_t blkcount; uint32_t len; uint8_t op = task->task_cdb[0]; sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private; sbd_cmd_t *scmd; stmf_data_buf_t *dbuf; int fast_path; + boolean_t fua_bit = B_FALSE; + /* + * Check to see if the command is READ(10), READ(12), or READ(16). + * If it is then check for bit 3 being set to indicate if Forced + * Unit Access is being requested. If so, we'll bypass the use of + * DMA buffers to simplify support of this feature. + */ + if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) || + (op == SCMD_READ_G5)) && + (task->task_cdb[1] & BIT_3)) { + fua_bit = B_TRUE; + } if (op == SCMD_READ) { lba = READ_SCSI21(&task->task_cdb[1], uint64_t); len = (uint32_t)task->task_cdb[4]; @@ -835,6 +900,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) } laddr = lba << sl->sl_data_blocksize_shift; + blkcount = len; len <<= sl->sl_data_blocksize_shift; if ((laddr + (uint64_t)len) > sl->sl_lu_size) { @@ -861,6 +927,13 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) return; } + if (sbd_ats_handling_before_io(task, sl, lba, blkcount) != + SBD_SUCCESS) { + if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) { + stmf_scsilib_send_status(task, STATUS_BUSY, 0); + } + return; + } /* * Determine if this read can directly use DMU buffers. */ @@ -868,8 +941,8 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) initial_dbuf == NULL && /* No PP buffer passed in */ sl->sl_flags & SL_CALL_ZVOL && /* zvol backing store */ (task->task_additional_flags & - TASK_AF_ACCEPT_LU_DBUF)) /* PP allows it */ - { + TASK_AF_ACCEPT_LU_DBUF) && /* PP allows it */ + !fua_bit) { /* * Reduced copy path */ @@ -884,6 +957,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) rw_enter(&sl->sl_access_state_lock, RW_READER); if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) { rw_exit(&sl->sl_access_state_lock); + sbd_ats_remove_by_task(task); stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_READ_ERROR); return; @@ -904,6 +978,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) SBD_CMD_SCSI_READ, 0); /* done with the backend */ rw_exit(&sl->sl_access_state_lock); + sbd_ats_remove_by_task(task); if (ret != 0) { /* backend error */ stmf_scsilib_send_status(task, STATUS_CHECK, @@ -936,13 +1011,12 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) /* * Setup scmd to track read progress. */ - scmd->flags = SBD_SCSI_CMD_ACTIVE; + scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED; scmd->cmd_type = SBD_CMD_SCSI_READ; scmd->nbufs = 0; scmd->addr = laddr; scmd->len = len; scmd->current_ro = 0; - /* * Kick-off the read. */ @@ -962,6 +1036,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) } while ((initial_dbuf == NULL) && (old_minsize > minsize) && (minsize >= 512)); if (initial_dbuf == NULL) { + sbd_ats_remove_by_task(task); stmf_scsilib_send_status(task, STATUS_QFULL, 0); return; } @@ -985,6 +1060,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_READ_ERROR); } + sbd_ats_remove_by_task(task); return; } @@ -994,7 +1070,7 @@ sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP); task->task_lu_private = scmd; } - scmd->flags = SBD_SCSI_CMD_ACTIVE; + scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED; scmd->cmd_type = SBD_CMD_SCSI_READ; scmd->nbufs = 1; scmd->addr = laddr; @@ -1011,7 +1087,7 @@ sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, uint32_t len; int bufs_to_take; - if (scmd->len == 0) { + if (ATOMIC32_GET(scmd->len) == 0) { goto DO_WRITE_XFER_DONE; } @@ -1026,14 +1102,14 @@ sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, stmf_free_dbuf(task, dbuf); dbuf = NULL; } - if (scmd->nbufs >= bufs_to_take) { + if (ATOMIC8_GET(scmd->nbufs) >= bufs_to_take) { goto DO_WRITE_XFER_DONE; } if (dbuf == NULL) { uint32_t maxsize, minsize, old_minsize; - maxsize = (scmd->len > (128*1024)) ? 128*1024 : - scmd->len; + maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 : + ATOMIC32_GET(scmd->len); minsize = maxsize >> 2; do { old_minsize = minsize; @@ -1041,7 +1117,7 @@ sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, } while ((dbuf == NULL) && (old_minsize > minsize) && (minsize >= 512)); if (dbuf == NULL) { - if (scmd->nbufs == 0) { + if (ATOMIC8_GET(scmd->nbufs) == 0) { stmf_abort(STMF_QUEUE_TASK_ABORT, task, STMF_ALLOC_FAILURE, NULL); } @@ -1049,18 +1125,20 @@ sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, } } - len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : - scmd->len; + len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size : + ATOMIC32_GET(scmd->len); dbuf->db_relative_offset = scmd->current_ro; dbuf->db_data_size = len; dbuf->db_flags = DB_DIRECTION_FROM_RPORT; (void) stmf_xfer_data(task, dbuf, 0); - scmd->nbufs++; /* outstanding port xfers and bufs used */ - scmd->len -= len; + /* outstanding port xfers and bufs used */ + atomic_inc_8(&scmd->nbufs); + atomic_add_32(&scmd->len, -len); scmd->current_ro += len; - if ((scmd->len != 0) && (scmd->nbufs < bufs_to_take)) { + if ((ATOMIC32_GET(scmd->len) != 0) && + (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) { sbd_do_write_xfer(task, scmd, NULL, 0); } return; @@ -1126,14 +1204,14 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) } - while (scmd->len && scmd->nbufs < task->task_max_nbufs) { - - xfer_len = MIN(max_len, scmd->len); + while (ATOMIC32_GET(scmd->len) && + ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) { + xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len)); if (first_len) { xfer_len = MIN(xfer_len, first_len); first_len = 0; } - if (xfer_len < scmd->len) { + if (xfer_len < ATOMIC32_GET(scmd->len)) { /* * Attempt to end xfer on a block boundary. * The only way this does not happen is if the @@ -1197,10 +1275,11 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) */ stmf_free(dbuf); scmd->flags |= SBD_SCSI_CMD_XFER_FAIL; - if (scmd->nbufs == 0) { + if (ATOMIC8_GET(scmd->nbufs) == 0) { /* * Nothing queued, so no completions coming */ + sbd_ats_remove_by_task(task); stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_WRITE_ERROR); rw_exit(&sl->sl_access_state_lock); @@ -1224,7 +1303,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) */ sbd_zvol_rele_write_bufs_abort(sl, dbuf); stmf_free(dbuf); - if (scmd->nbufs > 0) { + if (ATOMIC8_GET(scmd->nbufs) > 0) { /* completion of previous dbuf will retry */ return; } @@ -1232,6 +1311,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) * Done with this command. */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); if (first_xfer) stmf_scsilib_send_status(task, STATUS_QFULL, 0); else @@ -1244,7 +1324,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) /* * dbuf is now queued on task */ - scmd->nbufs++; + atomic_inc_8(&scmd->nbufs); xstat = stmf_xfer_data(task, dbuf, 0); switch (xstat) { @@ -1258,8 +1338,8 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) sbd_zvol_rele_write_bufs_abort(sl, dbuf); stmf_teardown_dbuf(task, dbuf); stmf_free(dbuf); - scmd->nbufs--; - if (scmd->nbufs > 0) { + atomic_dec_8(&scmd->nbufs); + if (ATOMIC8_GET(scmd->nbufs) > 0) { /* completion of previous dbuf will retry */ return; } @@ -1267,6 +1347,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) * Done with this command. */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); if (first_xfer) stmf_scsilib_send_status(task, STATUS_QFULL, 0); else @@ -1284,7 +1365,7 @@ sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer) /* * Update the xfer progress. */ - scmd->len -= xfer_len; + atomic_add_32(&scmd->len, -xfer_len); scmd->current_ro += xfer_len; } } @@ -1297,17 +1378,20 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, uint64_t laddr; uint32_t buflen, iolen; int ndx; + uint8_t op = task->task_cdb[0]; + boolean_t fua_bit = B_FALSE; - if (scmd->nbufs > 0) { + if (ATOMIC8_GET(scmd->nbufs) > 0) { /* * Decrement the count to indicate the port xfer * into the dbuf has completed even though the buf is * still in use here in the LU provider. */ - scmd->nbufs--; + atomic_dec_8(&scmd->nbufs); } if (dbuf->db_xfer_status != STMF_SUCCESS) { + sbd_ats_remove_by_task(task); stmf_abort(STMF_QUEUE_TASK_ABORT, task, dbuf->db_xfer_status, NULL); return; @@ -1317,7 +1401,7 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, goto WRITE_XFER_DONE; } - if (scmd->len != 0) { + if (ATOMIC32_GET(scmd->len) != 0) { /* * Initiate the next port xfer to occur in parallel * with writing this buf. @@ -1325,6 +1409,16 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, sbd_do_write_xfer(task, scmd, NULL, 0); } + /* + * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16). + * If it is then check for bit 3 being set to indicate if Forced + * Unit Access is being requested. If so, we'll bypass the direct + * call and handle it in sbd_data_write(). + */ + if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) || + (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) { + fua_bit = B_TRUE; + } laddr = scmd->addr + dbuf->db_relative_offset; /* @@ -1334,13 +1428,17 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, */ if (sl->sl_flags & SL_CALL_ZVOL && (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) && - (sbd_zcopy & (4|1))) { + (sbd_zcopy & (4|1)) && !fua_bit) { int commit; - commit = (scmd->len == 0 && scmd->nbufs == 0); - if (sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE, + commit = (ATOMIC32_GET(scmd->len) == 0 && + ATOMIC8_GET(scmd->nbufs) == 0); + rw_enter(&sl->sl_access_state_lock, RW_READER); + if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 || + sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE, commit) != STMF_SUCCESS) scmd->flags |= SBD_SCSI_CMD_XFER_FAIL; + rw_exit(&sl->sl_access_state_lock); buflen = dbuf->db_data_size; } else { for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) && @@ -1360,11 +1458,13 @@ sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, } task->task_nbytes_transferred += buflen; WRITE_XFER_DONE: - if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { + if (ATOMIC32_GET(scmd->len) == 0 || + scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { stmf_free_dbuf(task, dbuf); - if (scmd->nbufs) + if (ATOMIC8_GET(scmd->nbufs)) return; /* wait for all buffers to complete */ scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + sbd_ats_remove_by_task(task); if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_WRITE_ERROR); @@ -1430,13 +1530,25 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private; sbd_cmd_t *scmd; stmf_data_buf_t *dbuf; + uint64_t blkcount; uint8_t sync_wr_flag = 0; + boolean_t fua_bit = B_FALSE; if (sl->sl_flags & SL_WRITE_PROTECTED) { stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_WRITE_PROTECTED); return; } + /* + * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16). + * If it is then check for bit 3 being set to indicate if Forced + * Unit Access is being requested. If so, we'll bypass the fast path + * code to simplify support of this feature. + */ + if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) || + (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) { + fua_bit = B_TRUE; + } if (op == SCMD_WRITE) { lba = READ_SCSI21(&task->task_cdb[1], uint64_t); len = (uint32_t)task->task_cdb[4]; @@ -1472,6 +1584,7 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) } laddr = lba << sl->sl_data_blocksize_shift; + blkcount = len; len <<= sl->sl_data_blocksize_shift; if ((laddr + (uint64_t)len) > sl->sl_lu_size) { @@ -1493,12 +1606,21 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) return; } + if (sbd_ats_handling_before_io(task, sl, lba, blkcount) != + SBD_SUCCESS) { + if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) { + stmf_scsilib_send_status(task, STATUS_BUSY, 0); + } + return; + } + if (sbd_zcopy & (4|1) && /* Debug switch */ initial_dbuf == NULL && /* No PP buf passed in */ sl->sl_flags & SL_CALL_ZVOL && /* zvol backing store */ (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) && /* PP allows it */ - sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize)) { + sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize) && + !fua_bit) { /* * XXX Note that disallowing initial_dbuf will eliminate @@ -1510,6 +1632,7 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) rw_enter(&sl->sl_access_state_lock, RW_READER); if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) { rw_exit(&sl->sl_access_state_lock); + sbd_ats_remove_by_task(task); stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_READ_ERROR); return; @@ -1524,7 +1647,8 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) KM_SLEEP); task->task_lu_private = scmd; } - scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag; + scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED | + sync_wr_flag; scmd->cmd_type = SBD_CMD_SCSI_WRITE; scmd->nbufs = 0; scmd->addr = laddr; @@ -1555,7 +1679,8 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP); task->task_lu_private = scmd; } - scmd->flags = SBD_SCSI_CMD_ACTIVE | sync_wr_flag; + scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED | + sync_wr_flag; scmd->cmd_type = SBD_CMD_SCSI_WRITE; scmd->nbufs = 0; scmd->addr = laddr; @@ -1567,7 +1692,7 @@ sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) * Account for data passed in this write command */ (void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY); - scmd->len -= dbuf->db_data_size; + atomic_add_32(&scmd->len, -dbuf->db_data_size); scmd->current_ro += dbuf->db_data_size; dbuf->db_xfer_status = STMF_SUCCESS; sbd_handle_write_xfer_completion(task, scmd, dbuf, 0); @@ -1750,6 +1875,9 @@ sbd_handle_short_write_xfer_completion(scsi_task_t *task, sbd_handle_unmap_xfer(task, dbuf->db_sglist[0].seg_addr, dbuf->db_data_size); break; + case SCMD_EXTENDED_COPY: + sbd_handle_xcopy_xfer(task, dbuf->db_sglist[0].seg_addr); + break; case SCMD_PERSISTENT_RESERVE_OUT: if (sl->sl_access_state == SBD_LU_STANDBY) { st_ret = stmf_proxy_scsi_cmd(task, dbuf); @@ -1844,7 +1972,7 @@ sbd_handle_mode_sense(struct scsi_task *task, uint8_t *cdb; uint32_t ncyl; uint8_t nsectors, nheads; - uint8_t page, ctrl, header_size, pc_valid; + uint8_t page, ctrl, header_size; uint16_t nbytes; uint8_t *p; uint64_t s = sl->sl_lu_size; @@ -1855,25 +1983,21 @@ sbd_handle_mode_sense(struct scsi_task *task, cdb = &task->task_cdb[0]; page = cdb[2] & 0x3F; ctrl = (cdb[2] >> 6) & 3; - cmd_size = (cdb[0] == SCMD_MODE_SENSE) ? cdb[4] : - READ_SCSI16(&cdb[7], uint32_t); if (cdb[0] == SCMD_MODE_SENSE) { + cmd_size = cdb[4]; header_size = 4; dev_spec_param_offset = 2; } else { + cmd_size = READ_SCSI16(&cdb[7], uint32_t); header_size = 8; dev_spec_param_offset = 3; } /* Now validate the command */ - if ((cdb[2] == 0) || (page == MODEPAGE_ALLPAGES) || (page == 0x08) || - (page == 0x0A) || (page == 0x03) || (page == 0x04)) { - pc_valid = 1; - } else { - pc_valid = 0; - } - if ((cmd_size < header_size) || (pc_valid == 0)) { + if ((cdb[2] != 0) && (page != MODEPAGE_ALLPAGES) && + (page != MODEPAGE_CACHING) && (page != MODEPAGE_CTRL_MODE) && + (page != MODEPAGE_FORMAT) && (page != MODEPAGE_GEOMETRY)) { stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_FIELD_IN_CDB); return; @@ -1891,7 +2015,7 @@ sbd_handle_mode_sense(struct scsi_task *task, nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift; sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl); - if ((page == 0x03) || (page == MODEPAGE_ALLPAGES)) { + if ((page == MODEPAGE_FORMAT) || (page == MODEPAGE_ALLPAGES)) { p[n] = 0x03; p[n+1] = 0x16; if (ctrl != 1) { @@ -1902,7 +2026,7 @@ sbd_handle_mode_sense(struct scsi_task *task, } n += 24; } - if ((page == 0x04) || (page == MODEPAGE_ALLPAGES)) { + if ((page == MODEPAGE_GEOMETRY) || (page == MODEPAGE_ALLPAGES)) { p[n] = 0x04; p[n + 1] = 0x16; if (ctrl != 1) { @@ -1977,11 +2101,11 @@ sbd_handle_mode_sense(struct scsi_task *task, * of bytes in the length field, so adjust the count. * Byte count minus header length field size. */ - buf[0] = (n - 1) & 0xff; + buf[0] = (n - header_size) & 0xff; } else { /* Byte count minus header length field size. */ - buf[1] = (n - 2) & 0xff; - buf[0] = ((n - 2) >> 8) & 0xff; + buf[1] = (n - header_size) & 0xff; + buf[0] = ((n - header_size) >> 8) & 0xff; } sbd_handle_short_read_transfers(task, initial_dbuf, buf, @@ -2302,6 +2426,21 @@ sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd) } static void +sbd_write_same_release_resources(struct scsi_task *task) +{ + sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private; + + if (scmd->nbufs == 0XFF) + cmn_err(CE_WARN, "%s invalid buffer count %x", + __func__, scmd->nbufs); + if ((scmd->trans_data_len != 0) && (scmd->trans_data != NULL)) + kmem_free(scmd->trans_data, scmd->trans_data_len); + scmd->trans_data = NULL; + scmd->trans_data_len = 0; + scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA; +} + +static void sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, struct stmf_data_buf *dbuf, uint8_t dbuf_reusable) { @@ -2309,7 +2448,13 @@ sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, uint32_t buflen, iolen; int ndx, ret; + if (ATOMIC8_GET(scmd->nbufs) > 0) { + atomic_dec_8(&scmd->nbufs); + } + if (dbuf->db_xfer_status != STMF_SUCCESS) { + sbd_write_same_release_resources(task); + sbd_ats_remove_by_task(task); stmf_abort(STMF_QUEUE_TASK_ABORT, task, dbuf->db_xfer_status, NULL); return; @@ -2319,7 +2464,16 @@ sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, goto write_same_xfer_done; } - if (scmd->len != 0) { + /* if this is a unnessary callback just return */ + if (((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) || + ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) || + (scmd->trans_data == NULL)) { + sbd_ats_remove_by_task(task); + scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; + return; + } + + if (ATOMIC32_GET(scmd->len) != 0) { /* * Initiate the next port xfer to occur in parallel * with writing this buf. @@ -2343,14 +2497,21 @@ sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd, task->task_nbytes_transferred += buflen; write_same_xfer_done: - if (scmd->len == 0 || scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { + if (ATOMIC32_GET(scmd->len) == 0 || + scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { stmf_free_dbuf(task, dbuf); + if (ATOMIC8_GET(scmd->nbufs) > 0) + return; scmd->flags &= ~SBD_SCSI_CMD_ACTIVE; if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) { + sbd_ats_remove_by_task(task); + sbd_write_same_release_resources(task); stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_WRITE_ERROR); } else { ret = sbd_write_same_data(task, scmd); + sbd_ats_remove_by_task(task); + sbd_write_same_release_resources(task); if (ret != SBD_SUCCESS) { stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_WRITE_ERROR); @@ -2358,14 +2519,6 @@ write_same_xfer_done: stmf_scsilib_send_status(task, STATUS_GOOD, 0); } } - /* - * Only way we should get here is via handle_write_same(), - * and that should make the following assertion always pass. - */ - ASSERT((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) && - scmd->trans_data != NULL); - kmem_free(scmd->trans_data, scmd->trans_data_len); - scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA; return; } sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable); @@ -2377,7 +2530,7 @@ sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd, { uint32_t len; - if (scmd->len == 0) { + if (ATOMIC32_GET(scmd->len) == 0) { if (dbuf != NULL) stmf_free_dbuf(task, dbuf); return; @@ -2392,8 +2545,8 @@ sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd, if (dbuf == NULL) { uint32_t maxsize, minsize, old_minsize; - maxsize = (scmd->len > (128*1024)) ? 128*1024 : - scmd->len; + maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 : + ATOMIC32_GET(scmd->len); minsize = maxsize >> 2; do { old_minsize = minsize; @@ -2401,7 +2554,9 @@ sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd, } while ((dbuf == NULL) && (old_minsize > minsize) && (minsize >= 512)); if (dbuf == NULL) { - if (scmd->nbufs == 0) { + sbd_ats_remove_by_task(task); + sbd_write_same_release_resources(task); + if (ATOMIC8_GET(scmd->nbufs) == 0) { stmf_abort(STMF_QUEUE_TASK_ABORT, task, STMF_ALLOC_FAILURE, NULL); } @@ -2409,15 +2564,16 @@ sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd, } } - len = scmd->len > dbuf->db_buf_size ? dbuf->db_buf_size : - scmd->len; + len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size : + ATOMIC32_GET(scmd->len); dbuf->db_relative_offset = scmd->current_ro; dbuf->db_data_size = len; dbuf->db_flags = DB_DIRECTION_FROM_RPORT; (void) stmf_xfer_data(task, dbuf, 0); - scmd->nbufs++; /* outstanding port xfers and bufs used */ - scmd->len -= len; + /* outstanding port xfers and bufs used */ + atomic_inc_8(&scmd->nbufs); + atomic_add_32(&scmd->len, -len); scmd->current_ro += len; } @@ -2431,6 +2587,12 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) uint8_t unmap; uint8_t do_immediate_data = 0; + if (HardwareAcceleratedInit == 0) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_INVALID_OPCODE); + return; + } + task->task_cmd_xfer_length = 0; if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) { @@ -2447,11 +2609,13 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) return; } unmap = task->task_cdb[1] & 0x08; + if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) { stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_FIELD_IN_CDB); return; } + if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) { addr = READ_SCSI32(&task->task_cdb[2], uint64_t); len = READ_SCSI16(&task->task_cdb[7], uint64_t); @@ -2459,11 +2623,20 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) addr = READ_SCSI64(&task->task_cdb[2], uint64_t); len = READ_SCSI32(&task->task_cdb[10], uint64_t); } + if (len == 0) { stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_FIELD_IN_CDB); return; } + + if (sbd_ats_handling_before_io(task, sl, addr, len) != + SBD_SUCCESS) { + if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) + stmf_scsilib_send_status(task, STATUS_BUSY, 0); + return; + } + addr <<= sl->sl_data_blocksize_shift; len <<= sl->sl_data_blocksize_shift; @@ -2492,6 +2665,7 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) task->task_expected_xfer_length = task->task_cmd_xfer_length; } if ((addr + len) > sl->sl_lu_size) { + sbd_ats_remove_by_task(task); stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_LBA_OUT_OF_RANGE); return; @@ -2502,6 +2676,7 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) /* Some basic checks */ if ((len == 0) || (len != task->task_expected_xfer_length)) { + sbd_ats_remove_by_task(task); stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_FIELD_IN_CDB); return; @@ -2513,6 +2688,7 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) if (initial_dbuf->db_data_size > task->task_expected_xfer_length) { /* protocol error */ + sbd_ats_remove_by_task(task); stmf_abort(STMF_QUEUE_TASK_ABORT, task, STMF_INVALID_ARG, NULL); return; @@ -2529,7 +2705,8 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP); task->task_lu_private = scmd; } - scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA; + scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA | + SBD_SCSI_CMD_ATS_RELATED; scmd->cmd_type = SBD_CMD_SCSI_WRITE; scmd->nbufs = 0; scmd->len = (uint32_t)len; @@ -2542,7 +2719,7 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) * Account for data passed in this write command */ (void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY); - scmd->len -= dbuf->db_data_size; + atomic_add_32(&scmd->len, -dbuf->db_data_size); scmd->current_ro += dbuf->db_data_size; dbuf->db_xfer_status = STMF_SUCCESS; sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0); @@ -2554,8 +2731,20 @@ sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf) static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf) { + sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private; uint32_t cmd_xfer_len; + if (sbd_unmap_enable == 0) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_INVALID_OPCODE); + return; + } + + if (sl->sl_flags & SL_WRITE_PROTECTED) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_WRITE_PROTECTED); + return; + } cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t); if (task->task_cdb[1] & 1) { @@ -2605,14 +2794,26 @@ sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen) dfl = kmem_zalloc(DFL_SZ(num_desc), KM_SLEEP); dfl->dfl_num_exts = num_desc; + /* + * This should use ATS locking but that was disabled by the + * changes to ZFS top take advantage of TRIM in SSDs. + * + * Since the entire list is passed to ZFS in one list ATS + * locking is not done. This may be detectable, and if it is + * then the entire list needs to be locked and then after the + * unmap completes the entire list must be unlocked + */ for (p = buf + 8, i = 0; num_desc; num_desc--, p += 16, i++) { addr = READ_SCSI64(p, uint64_t); - addr <<= sl->sl_data_blocksize_shift; len = READ_SCSI32(p+8, uint64_t); + addr <<= sl->sl_data_blocksize_shift; len <<= sl->sl_data_blocksize_shift; + /* Prepare a list of extents to unmap */ dfl->dfl_exts[i].dfle_start = addr; dfl->dfl_exts[i].dfle_length = len; + + /* release the overlap */ } ASSERT(i == dfl->dfl_num_exts); @@ -2695,6 +2896,7 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT; inq->inq_cmdque = 1; + inq->inq_3pc = 1; if (sl->sl_flags & SL_VID_VALID) { bcopy(sl->sl_vendor_id, inq->inq_vid, 8); @@ -2793,13 +2995,15 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) switch (cdbp[2]) { case 0x00: - page_length = 4 + (mgmt_url_size ? 1 : 0); + page_length = 5 + (mgmt_url_size ? 1 : 0); + if (sl->sl_flags & SL_UNMAP_ENABLED) - page_length += 2; + page_length += 1; p[0] = byte0; p[3] = page_length; /* Supported VPD pages in ascending order */ + /* CSTYLED */ { uint8_t i = 5; @@ -2808,8 +3012,8 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) if (mgmt_url_size != 0) p[i++] = 0x85; p[i++] = 0x86; + p[i++] = 0xb0; if (sl->sl_flags & SL_UNMAP_ENABLED) { - p[i++] = 0xb0; p[i++] = 0xb2; } } @@ -2842,7 +3046,7 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_FIELD_IN_CDB); goto err_done; - } + } /* CSTYLED */ { uint16_t idx, newidx, sz, url_size; char *url; @@ -2903,17 +3107,23 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) break; case 0xb0: - if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) { - stmf_scsilib_send_status(task, STATUS_CHECK, - STMF_SAA_INVALID_FIELD_IN_CDB); - goto err_done; - } page_length = 0x3c; p[0] = byte0; p[1] = 0xb0; p[3] = page_length; - p[20] = p[21] = p[22] = p[23] = 0xFF; - p[24] = p[25] = p[26] = p[27] = 0xFF; + p[4] = 1; + p[5] = sbd_ats_max_nblks(); + if (sl->sl_flags & SL_UNMAP_ENABLED && sbd_unmap_enable) { + p[20] = (stmf_sbd_unmap_max_nblks >> 24) & 0xff; + p[21] = (stmf_sbd_unmap_max_nblks >> 16) & 0xff; + p[22] = (stmf_sbd_unmap_max_nblks >> 8) & 0xff; + p[23] = stmf_sbd_unmap_max_nblks & 0xff; + + p[24] = 0; + p[25] = 0; + p[26] = 0; + p[27] = 0xFF; + } xfer_size = page_length + 4; break; @@ -2935,7 +3145,7 @@ sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) exp++; } p[4] = exp; - p[5] = 0xc0; + p[5] = 0xc0; /* Logical provisioning UNMAP and WRITE SAME */ xfer_size = page_length + 4; break; @@ -2956,7 +3166,7 @@ stmf_status_t sbd_task_alloc(struct scsi_task *task) { if ((task->task_lu_private = - kmem_alloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) { + kmem_zalloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) { sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private; scmd->flags = 0; return (STMF_SUCCESS); @@ -3022,8 +3232,40 @@ sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it) mutex_exit(&sl->sl_lock); } +/* + * Given a LU and a task, check if the task is causing reservation + * conflict. Returns 1 in case of conflict, 0 otherwise. + * Note that the LU might not be the same LU as in the task but the + * caller makes sure that the LU can be accessed. + */ +int +sbd_check_reservation_conflict(struct sbd_lu *sl, struct scsi_task *task) +{ + sbd_it_data_t *it; + it = task->task_lu_itl_handle; + ASSERT(it); + if (sl->sl_access_state == SBD_LU_ACTIVE) { + if (SBD_PGR_RSVD(sl->sl_pgr)) { + if (sbd_pgr_reservation_conflict(task, sl)) { + return (1); + } + } else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) && + ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) { + if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) { + return (1); + } + } + } + return (0); +} + +/* + * Keep in mind that sbd_new_task can be called multiple times for the same + * task because of us calling stmf_task_poll_lu resulting in a call to + * sbd_task_poll(). + */ void sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) { @@ -3087,18 +3329,85 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY || sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) { stmf_scsilib_send_status(task, STATUS_CHECK, - STMF_SAA_LU_NO_ACCESS_UNAVAIL); + STMF_SAA_LU_NO_ACCESS_TRANSITION); return; } - /* Checking ua conditions as per SAM3R14 5.3.2 specified order */ - if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) { + cdb0 = task->task_cdb[0]; + cdb1 = task->task_cdb[1]; + /* + * Special case for different versions of Windows. + * 1) Windows 2012 and VMWare will fail to discover LU's if a READ + * operation sent down the standby path returns an error. By default + * standby_fail_reads will be set to 0. + * 2) Windows 2008 R2 has a severe performace problem if READ ops + * aren't rejected on the standby path. 2008 sends commands + * down the standby path which then must be proxied over to the + * active node and back. + */ + if ((sl->sl_access_state == SBD_LU_STANDBY) && + stmf_standby_fail_reads && + (cdb0 == SCMD_READ || cdb0 == SCMD_READ_G1 || + cdb0 == SCMD_READ_G4 || cdb0 == SCMD_READ_G5)) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_LU_NO_ACCESS_STANDBY); + return; + } + + /* + * Don't go further if cmd is unsupported in standby mode + */ + if (sl->sl_access_state == SBD_LU_STANDBY) { + if (cdb0 != SCMD_INQUIRY && + cdb0 != SCMD_MODE_SENSE && + cdb0 != SCMD_MODE_SENSE_G1 && + cdb0 != SCMD_MODE_SELECT && + cdb0 != SCMD_MODE_SELECT_G1 && + cdb0 != SCMD_RESERVE && + cdb0 != SCMD_RELEASE && + cdb0 != SCMD_PERSISTENT_RESERVE_OUT && + cdb0 != SCMD_PERSISTENT_RESERVE_IN && + cdb0 != SCMD_REQUEST_SENSE && + cdb0 != SCMD_READ_CAPACITY && + cdb0 != SCMD_TEST_UNIT_READY && + cdb0 != SCMD_START_STOP && + cdb0 != SCMD_READ && + cdb0 != SCMD_READ_G1 && + cdb0 != SCMD_READ_G4 && + cdb0 != SCMD_READ_G5 && + !(cdb0 == SCMD_SVC_ACTION_IN_G4 && + cdb1 == SSVC_ACTION_READ_CAPACITY_G4) && + !(cdb0 == SCMD_MAINTENANCE_IN && + (cdb1 & 0x1F) == 0x05) && + !(cdb0 == SCMD_MAINTENANCE_IN && + (cdb1 & 0x1F) == 0x0A)) { + stmf_scsilib_send_status(task, STATUS_CHECK, + STMF_SAA_LU_NO_ACCESS_STANDBY); + return; + } + } + + /* + * Checking ua conditions as per SAM3R14 5.3.2 specified order. During + * MPIO/ALUA failover, cmds come in through local ports and proxy port + * port provider (i.e. pppt), we want to report unit attention to + * only local cmds since initiators (Windows MPIO/DSM) would continue + * sending I/O to the target that reported unit attention. + */ + if ((it->sbd_it_ua_conditions) && + !(task->task_additional_flags & TASK_AF_PPPT_TASK) && + (task->task_cdb[0] != SCMD_INQUIRY)) { uint32_t saa = 0; mutex_enter(&sl->sl_lock); if (it->sbd_it_ua_conditions & SBD_UA_POR) { it->sbd_it_ua_conditions &= ~SBD_UA_POR; saa = STMF_SAA_POR; + } else if (it->sbd_it_ua_conditions & + SBD_UA_ASYMMETRIC_ACCESS_CHANGED) { + it->sbd_it_ua_conditions &= + ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED; + saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED; } mutex_exit(&sl->sl_lock); if (saa) { @@ -3108,21 +3417,10 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) } /* Reservation conflict checks */ - if (sl->sl_access_state == SBD_LU_ACTIVE) { - if (SBD_PGR_RSVD(sl->sl_pgr)) { - if (sbd_pgr_reservation_conflict(task)) { - stmf_scsilib_send_status(task, - STATUS_RESERVATION_CONFLICT, 0); - return; - } - } else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) && - ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) { - if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) { - stmf_scsilib_send_status(task, - STATUS_RESERVATION_CONFLICT, 0); - return; - } - } + if (sbd_check_reservation_conflict(sl, task)) { + stmf_scsilib_send_status(task, + STATUS_RESERVATION_CONFLICT, 0); + return; } /* Rest of the ua conndition checks */ @@ -3147,9 +3445,9 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) saa = STMF_SAA_MODE_PARAMETERS_CHANGED; } else if (it->sbd_it_ua_conditions & SBD_UA_ASYMMETRIC_ACCESS_CHANGED) { - it->sbd_it_ua_conditions &= - ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED; - saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED; + saa = 0; + } else if (it->sbd_it_ua_conditions & SBD_UA_POR) { + saa = 0; } else if (it->sbd_it_ua_conditions & SBD_UA_ACCESS_STATE_TRANSITION) { it->sbd_it_ua_conditions &= @@ -3166,38 +3464,7 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) } } - cdb0 = task->task_cdb[0]; - cdb1 = task->task_cdb[1]; - if (sl->sl_access_state == SBD_LU_STANDBY) { - if (cdb0 != SCMD_INQUIRY && - cdb0 != SCMD_MODE_SENSE && - cdb0 != SCMD_MODE_SENSE_G1 && - cdb0 != SCMD_MODE_SELECT && - cdb0 != SCMD_MODE_SELECT_G1 && - cdb0 != SCMD_RESERVE && - cdb0 != SCMD_RELEASE && - cdb0 != SCMD_PERSISTENT_RESERVE_OUT && - cdb0 != SCMD_PERSISTENT_RESERVE_IN && - cdb0 != SCMD_REQUEST_SENSE && - cdb0 != SCMD_READ_CAPACITY && - cdb0 != SCMD_TEST_UNIT_READY && - cdb0 != SCMD_START_STOP && - cdb0 != SCMD_READ && - cdb0 != SCMD_READ_G1 && - cdb0 != SCMD_READ_G4 && - cdb0 != SCMD_READ_G5 && - !(cdb0 == SCMD_SVC_ACTION_IN_G4 && - cdb1 == SSVC_ACTION_READ_CAPACITY_G4) && - !(cdb0 == SCMD_MAINTENANCE_IN && - (cdb1 & 0x1F) == 0x05) && - !(cdb0 == SCMD_MAINTENANCE_IN && - (cdb1 & 0x1F) == 0x0A)) { - stmf_scsilib_send_status(task, STATUS_CHECK, - STMF_SAA_LU_NO_ACCESS_STANDBY); - return; - } - /* * is this a short write? * if so, we'll need to wait until we have the buffer @@ -3375,6 +3642,21 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) return; } + if (cdb0 == SCMD_COMPARE_AND_WRITE) { + sbd_handle_ats(task, initial_dbuf); + return; + } + + if (cdb0 == SCMD_EXTENDED_COPY) { + sbd_handle_xcopy(task, initial_dbuf); + return; + } + + if (cdb0 == SCMD_RECV_COPY_RESULTS) { + sbd_handle_recv_copy_results(task, initial_dbuf); + return; + } + if (cdb0 == SCMD_TEST_UNIT_READY) { /* Test unit ready */ task->task_cmd_xfer_length = 0; stmf_scsilib_send_status(task, STATUS_GOOD, 0); @@ -3441,7 +3723,6 @@ sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf) sbd_handle_write(task, initial_dbuf); return; } - stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE); } @@ -3481,12 +3762,18 @@ sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf) break; case (SBD_CMD_SCSI_WRITE): - if ((task->task_cdb[0] == SCMD_WRITE_SAME_G1) || - (task->task_cdb[0] == SCMD_WRITE_SAME_G4)) { + switch (task->task_cdb[0]) { + case SCMD_WRITE_SAME_G1: + case SCMD_WRITE_SAME_G4: sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 1); - } else { + break; + case SCMD_COMPARE_AND_WRITE: + sbd_handle_ats_xfer_completion(task, scmd, dbuf, 1); + break; + default: sbd_handle_write_xfer_completion(task, scmd, dbuf, 1); + /* FALLTHRU */ } break; @@ -3552,6 +3839,7 @@ sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags) ASSERT(abort_cmd == STMF_LU_ABORT_TASK); task = (scsi_task_t *)arg; + sbd_ats_remove_by_task(task); if (task->task_lu_private) { sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private; @@ -3569,6 +3857,15 @@ sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags) return (STMF_NOT_FOUND); } +void +sbd_task_poll(struct scsi_task *task) +{ + stmf_data_buf_t *initial_dbuf; + + initial_dbuf = stmf_handle_to_buf(task, 0); + sbd_new_task(task, initial_dbuf); +} + /* * This function is called during task clean-up if the * DB_LU_FLAG is set on the dbuf. This should only be called for @@ -3581,7 +3878,7 @@ sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf) sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private; ASSERT(dbuf->db_lu_private); - ASSERT(scmd && scmd->nbufs > 0); + ASSERT(scmd && ATOMIC8_GET(scmd->nbufs) > 0); ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0); ASSERT(dbuf->db_flags & DB_LU_DATA_BUF); ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF); @@ -3595,7 +3892,7 @@ sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf) cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p", scmd->cmd_type, (void *)task); } - if (--scmd->nbufs == 0) + if (atomic_dec_8_nv(&scmd->nbufs) == 0) rw_exit(&sl->sl_access_state_lock); stmf_teardown_dbuf(task, dbuf); stmf_free(dbuf); @@ -3692,13 +3989,20 @@ sbd_status_t sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done) { int r = 0; - int ret; + sbd_status_t ret; + rw_enter(&sl->sl_access_state_lock, RW_READER); + if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) { + ret = SBD_FILEIO_FAILURE; + goto flush_fail; + } if (fsync_done) goto over_fsync; if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) { - if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL)) - return (SBD_FAILURE); + if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL)) { + ret = SBD_FAILURE; + goto flush_fail; + } } over_fsync: if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) && @@ -3709,12 +4013,14 @@ over_fsync: mutex_enter(&sl->sl_lock); sl->sl_flags |= SL_NO_DATA_DKIOFLUSH; mutex_exit(&sl->sl_lock); - } else if (ret != 0) { - return (SBD_FAILURE); + } else { + ret = (ret != 0) ? SBD_FAILURE : SBD_SUCCESS; } } +flush_fail: + rw_exit(&sl->sl_access_state_lock); - return (SBD_SUCCESS); + return (ret); } /* ARGSUSED */ diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h b/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h index f3ab44f8f4..09c672d16b 100644 --- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/stmf_sbd.h @@ -34,6 +34,7 @@ extern "C" { #endif typedef stmf_status_t sbd_status_t; +#include "ats_copy_mgr.h" extern char sbd_vendor_id[]; extern char sbd_product_id[]; extern char sbd_revision[]; @@ -56,6 +57,7 @@ extern krwlock_t sbd_global_prop_lock; #define SBD_FILEIO_FAILURE (SBD_FAILURE | STMF_FSC(7)) #define SBD_IO_PAST_EOF (SBD_FAILURE | STMF_FSC(8)) #define SBD_BUSY (SBD_FAILURE | STMF_FSC(9)) +#define SBD_COMPARE_FAILED (SBD_FAILURE | STMF_FSC(10)) #define SHARED_META_DATA_SIZE 65536 #define SBD_META_OFFSET 4096 @@ -246,6 +248,7 @@ typedef struct sbd_lu { struct sbd_it_data *sl_it_list; struct sbd_pgr *sl_pgr; uint64_t sl_rs_owner_session_id; + list_t sl_ats_io_list; } sbd_lu_t; /* @@ -304,6 +307,11 @@ sbd_status_t sbd_wcd_set(int wcd, sbd_lu_t *sl); void sbd_wcd_get(int *wcd, sbd_lu_t *sl); int sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl); +void sbd_handle_short_write_transfers(scsi_task_t *, stmf_data_buf_t *, + uint32_t); +void sbd_handle_short_read_transfers(scsi_task_t *, stmf_data_buf_t *, + uint8_t *, uint32_t, uint32_t); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/io/comstar/port/fct/fct.c b/usr/src/uts/common/io/comstar/port/fct/fct.c index cdb8a00aa3..9f20636a27 100644 --- a/usr/src/uts/common/io/comstar/port/fct/fct.c +++ b/usr/src/uts/common/io/comstar/port/fct/fct.c @@ -113,6 +113,14 @@ static int max_cached_ncmds = FCT_MAX_CACHED_CMDS; static fct_i_local_port_t *fct_iport_list = NULL; static kmutex_t fct_global_mutex; uint32_t fct_rscn_options = RSCN_OPTION_VERIFY; +/* + * This is to keep fibre channel from hanging if syseventd is + * not working correctly and the queue fills. It is a tunable + * to allow the user to force event logging to always happen + * which is the default. + */ +static uint8_t fct_force_log = 0; /* use DDI_SLEEP on ddi_log_sysevent */ + int _init(void) @@ -1612,7 +1620,8 @@ fct_local_port_cleanup_done(fct_i_local_port_t *iport) fct_cmd_t * fct_scsi_task_alloc(fct_local_port_t *port, uint16_t rp_handle, - uint32_t rportid, uint8_t *lun, uint16_t cdb_length, uint16_t task_ext) + uint32_t rportid, uint8_t *lun, uint16_t cdb_length, + uint16_t task_ext) { fct_cmd_t *cmd; fct_i_cmd_t *icmd; @@ -2829,8 +2838,8 @@ fct_cmd_fca_aborted(fct_cmd_t *cmd, fct_status_t s, uint32_t ioflags) /* For non FCP Rest of the work is done by the terminator */ /* For FCP stuff just call stmf */ if (cmd->cmd_type == FCT_CMD_FCP_XCHG) { - stmf_task_lport_aborted((scsi_task_t *)cmd->cmd_specific, - s, STMF_IOF_LPORT_DONE); + stmf_task_lport_aborted_unlocked( + (scsi_task_t *)cmd->cmd_specific, s, STMF_IOF_LPORT_DONE); } } @@ -3432,6 +3441,7 @@ fct_log_local_port_event(fct_local_port_t *port, char *subclass) { nvlist_t *attr_list; int port_instance; + int rc, sleep = DDI_SLEEP; if (!fct_dip) return; @@ -3452,8 +3462,15 @@ fct_log_local_port_event(fct_local_port_t *port, char *subclass) goto error; } - (void) ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC, - subclass, attr_list, NULL, DDI_SLEEP); + if (fct_force_log == 0) { + sleep = DDI_NOSLEEP; + } + rc = ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC, + subclass, attr_list, NULL, sleep); + if (rc != DDI_SUCCESS) { + cmn_err(CE_WARN, "%s:event dropped", __func__); + goto error; + } nvlist_free(attr_list); return; @@ -3471,6 +3488,7 @@ fct_log_remote_port_event(fct_local_port_t *port, char *subclass, { nvlist_t *attr_list; int port_instance; + int rc, sleep = DDI_SLEEP; if (!fct_dip) return; @@ -3501,8 +3519,15 @@ fct_log_remote_port_event(fct_local_port_t *port, char *subclass, goto error; } - (void) ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC, - subclass, attr_list, NULL, DDI_SLEEP); + if (fct_force_log == 0) { + sleep = DDI_NOSLEEP; + } + rc = ddi_log_sysevent(fct_dip, DDI_VENDOR_SUNW, EC_SUNFC, + subclass, attr_list, NULL, sleep); + if (rc != DDI_SUCCESS) { + cmn_err(CE_WARN, "%s: queue full event lost", __func__); + goto error; + } nvlist_free(attr_list); return; diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c index 3fd198a135..1a3483f189 100644 --- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c +++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright 2014, 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2017, Joyent, Inc. All rights reserved. */ @@ -564,7 +564,8 @@ cleanup: it_config_free_cmn(cfg); if (cfg_pnvlist) kmem_free(cfg_pnvlist, setcfg.set_cfg_pnvlist_len); - nvlist_free(cfg_nvlist); + if (cfg_nvlist) + nvlist_free(cfg_nvlist); /* * Now that the reconfig is complete set our state back to @@ -992,7 +993,7 @@ iscsit_task_aborted(idm_task_t *idt, idm_status_t status) * STMF_ABORTED, the code actually looks for * STMF_ABORT_SUCCESS. */ - stmf_task_lport_aborted(itask->it_stmf_task, + stmf_task_lport_aborted_unlocked(itask->it_stmf_task, STMF_ABORT_SUCCESS, STMF_IOF_LPORT_DONE); return; } else { @@ -1224,6 +1225,7 @@ iscsit_conn_accept(idm_conn_t *ic) mutex_init(&ict->ict_mutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&ict->ict_statsn_mutex, NULL, MUTEX_DRIVER, NULL); idm_refcnt_init(&ict->ict_refcnt, ict); + idm_refcnt_init(&ict->ict_dispatch_refcnt, ict); /* * Initialize login state machine @@ -1369,6 +1371,9 @@ iscsit_conn_lost(idm_conn_t *ic) * Make sure there aren't any PDU's transitioning from the receive * handler to the dispatch taskq. */ + if (idm_refcnt_is_held(&ict->ict_dispatch_refcnt) < 0) { + cmn_err(CE_WARN, "Possible hang in iscsit_conn_lost"); + } idm_refcnt_wait_ref(&ict->ict_dispatch_refcnt); return (IDM_STATUS_SUCCESS); @@ -1385,13 +1390,10 @@ iscsit_conn_destroy(idm_conn_t *ic) /* Generate session state machine event */ if (ict->ict_sess != NULL) { - /* - * Session state machine will call iscsit_conn_destroy_done() - * when it has removed references to this connection. - */ iscsit_sess_sm_event(ict->ict_sess, SE_CONN_FAIL, ict); } + idm_refcnt_wait_ref(&ict->ict_dispatch_refcnt); idm_refcnt_wait_ref(&ict->ict_refcnt); /* * The session state machine does not need to post @@ -1407,6 +1409,7 @@ iscsit_conn_destroy(idm_conn_t *ic) iscsit_text_cmd_fini(ict); mutex_destroy(&ict->ict_mutex); + idm_refcnt_destroy(&ict->ict_dispatch_refcnt); idm_refcnt_destroy(&ict->ict_refcnt); kmem_free(ict, sizeof (*ict)); @@ -1888,20 +1891,8 @@ iscsit_abort(stmf_local_port_t *lport, int abort_cmd, void *arg, uint32_t flags) * Call IDM to abort the task. Due to a variety of * circumstances the task may already be in the process of * aborting. - * We'll let IDM worry about rationalizing all that except - * for one particular instance. If the state of the task - * is TASK_COMPLETE, we need to indicate to the framework - * that we are in fact done. This typically happens with - * framework-initiated task management type requests - * (e.g. abort task). */ - if (idt->idt_state == TASK_COMPLETE) { - idm_refcnt_wait_ref(&idt->idt_refcnt); - return (STMF_ABORT_SUCCESS); - } else { - idm_task_abort(idt->idt_ic, idt, AT_TASK_MGMT_ABORT); - return (STMF_SUCCESS); - } + return (idm_task_abort(idt->idt_ic, idt, AT_TASK_MGMT_ABORT)); } /*NOTREACHED*/ @@ -1962,6 +1953,21 @@ iscsit_op_scsi_cmd(idm_conn_t *ic, idm_pdu_t *rx_pdu) iscsit_process_pdu_in_queue(ict->ict_sess); } +static int +iscsit_validate_idm_pdu(idm_pdu_t *rx_pdu) +{ + iscsi_scsi_cmd_hdr_t *iscsi_scsi = + (iscsi_scsi_cmd_hdr_t *)rx_pdu->isp_hdr; + + if ((iscsi_scsi->scb[0] == SCMD_READ) || + (iscsi_scsi->scb[0] == SCMD_READ_G1) || + (iscsi_scsi->scb[0] == SCMD_READ_G4)) { + if (iscsi_scsi->flags & ISCSI_FLAG_CMD_WRITE) + return (IDM_STATUS_FAIL); + } + return (IDM_STATUS_SUCCESS); +} + /* * ISCSI protocol */ @@ -1979,6 +1985,15 @@ iscsit_post_scsi_cmd(idm_conn_t *ic, idm_pdu_t *rx_pdu) uint16_t addl_cdb_len = 0; ict = ic->ic_handle; + if (iscsit_validate_idm_pdu(rx_pdu) != IDM_STATUS_SUCCESS) { + /* Finish processing request */ + iscsit_set_cmdsn(ict, rx_pdu); + + iscsit_send_direct_scsi_resp(ict, rx_pdu, + ISCSI_STATUS_CMD_COMPLETED, STATUS_CHECK); + idm_pdu_complete(rx_pdu, IDM_STATUS_PROTOCOL_ERROR); + return; + } itask = iscsit_task_alloc(ict); if (itask == NULL) { diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h index 4718e98e77..9e4d84f722 100644 --- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h +++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit.h @@ -56,9 +56,6 @@ #define ISCSI_MAX_TSIH 0xffff #define ISCSI_UNSPEC_TSIH 0 -/* Max targets per system */ -#define ISCSIT_MAX_TARGETS 1024 - #define ISCSIT_MAX_WINDOW 1024 #define ISCSIT_RXPDU_QUEUE_LEN 2048 @@ -124,74 +121,63 @@ typedef struct { } iscsit_portal_t; +#define TGT_STATE_LIST() \ + item(TS_UNDEFINED) \ + item(TS_CREATED) \ + item(TS_ONLINING) \ + item(TS_ONLINE) \ + item(TS_STMF_ONLINE) \ + item(TS_DELETING_NEED_OFFLINE) \ + item(TS_OFFLINING) \ + item(TS_OFFLINE) \ + item(TS_STMF_OFFLINE) \ + item(TS_DELETING_STMF_DEREG) \ + item(TS_DELETING_STMF_DEREG_FAIL) \ + item(TS_DELETING) \ + item(TS_MAX_STATE) + /* Target states and events, update iscsit_ts_name table whenever modified */ typedef enum { - TS_UNDEFINED = 0, - TS_CREATED, - TS_ONLINING, - TS_ONLINE, - TS_STMF_ONLINE, - TS_DELETING_NEED_OFFLINE, - TS_OFFLINING, - TS_OFFLINE, - TS_STMF_OFFLINE, - TS_DELETING_STMF_DEREG, - TS_DELETING_STMF_DEREG_FAIL, - TS_DELETING, - TS_MAX_STATE +#define item(a) a, + TGT_STATE_LIST() +#undef item } iscsit_tgt_state_t; #ifdef ISCSIT_TGT_SM_STRINGS -static const char *iscsit_ts_name[TS_MAX_STATE+1] = { - "TS_UNDEFINED", - "TS_CREATED", - "TS_ONLINING", - "TS_ONLINE", - "TS_STMF_ONLINE", - "TS_DELETING_NEED_OFFLINE", - "TS_OFFLINING", - "TS_OFFLINE", - "TS_STMF_OFFLINE", - "TS_DELETING_STMF_DEREG", - "TS_DELETING_STMF_DEREG_FAIL", - "TS_DELETING", - "TS_MAX_STATE" +static const char *iscsit_ts_name[TS_MAX_STATE + 1] = { +#define item(a) #a, + TGT_STATE_LIST() +#undef item }; #endif +#define TGT_EVENT_LIST() \ + item(TE_UNDEFINED) \ + item(TE_STMF_ONLINE_REQ) \ + item(TE_ONLINE_SUCCESS) \ + item(TE_ONLINE_FAIL) \ + item(TE_STMF_ONLINE_COMPLETE_ACK) \ + item(TE_STMF_OFFLINE_REQ) \ + item(TE_OFFLINE_COMPLETE) \ + item(TE_STMF_OFFLINE_COMPLETE_ACK) \ + item(TE_DELETE) \ + item(TE_STMF_DEREG_SUCCESS) \ + item(TE_STMF_DEREG_FAIL) \ + item(TE_STMF_DEREG_RETRY) \ + item(TE_WAIT_REF_COMPLETE) \ + item(TE_MAX_EVENT) + typedef enum { - TE_UNDEFINED = 0, - TE_STMF_ONLINE_REQ, - TE_ONLINE_SUCCESS, - TE_ONLINE_FAIL, - TE_STMF_ONLINE_COMPLETE_ACK, - TE_STMF_OFFLINE_REQ, - TE_OFFLINE_COMPLETE, - TE_STMF_OFFLINE_COMPLETE_ACK, - TE_DELETE, - TE_STMF_DEREG_SUCCESS, - TE_STMF_DEREG_FAIL, - TE_STMF_DEREG_RETRY, - TE_WAIT_REF_COMPLETE, - TE_MAX_EVENT +#define item(a) a, + TGT_EVENT_LIST() +#undef item } iscsit_tgt_event_t; #ifdef ISCSIT_TGT_SM_STRINGS -static const char *iscsit_te_name[TE_MAX_EVENT+1] = { - "TE_UNDEFINED", - "TE_STMF_ONLINE_REQ", - "TE_ONLINE_SUCCESS", - "TE_ONLINE_FAIL", - "TE_STMF_ONLINE_COMPLETE_ACK", - "TE_STMF_OFFLINE_REQ", - "TE_OFFLINE_COMPLETE", - "TE_STMF_OFFLINE_COMPLETE_ACK", - "TE_DELETE", - "TE_STMF_DEREG_SUCCESS", - "TE_STMF_DEREG_FAIL", - "TE_STMF_DEREG_RETRY", - "TE_WAIT_REF_COMPLETE", - "TE_MAX_EVENT" +static const char *iscsit_te_name[TE_MAX_EVENT + 1] = { +#define item(a) #a, + TGT_EVENT_LIST() +#undef item }; #endif @@ -238,7 +224,7 @@ typedef struct conn_auth { uint8_t ca_ini_chapsecret[iscsitAuthStringMaxLength]; int ca_ini_chapsecretlen; - /* RADIUS authentication information */ + /* RADIUS authentication information */ boolean_t ca_use_radius; struct sockaddr_storage ca_radius_server; uint8_t ca_radius_secret[iscsitAuthStringMaxLength]; @@ -284,67 +270,62 @@ typedef struct conn_auth { struct iscsit_conn_s; +/* Add new session states above SS_MAX_STATE */ +#define SESSION_STATE_LIST() \ + item(SS_UNDEFINED) \ + item(SS_Q1_FREE) \ + item(SS_Q2_ACTIVE) \ + item(SS_Q3_LOGGED_IN) \ + item(SS_Q4_FAILED) \ + item(SS_Q5_CONTINUE) \ + item(SS_Q6_DONE) \ + item(SS_Q7_ERROR) \ + item(SS_MAX_STATE) + /* Update iscsit_ss_name table whenever session states are modified */ typedef enum { - SS_UNDEFINED = 0, - SS_Q1_FREE, - SS_Q2_ACTIVE, - SS_Q3_LOGGED_IN, - SS_Q4_FAILED, - SS_Q5_CONTINUE, - SS_Q6_DONE, - SS_Q7_ERROR, - /* Add new session states above SS_MAX_STATE */ - SS_MAX_STATE +#define item(a) a, + SESSION_STATE_LIST() +#undef item } iscsit_session_state_t; #ifdef ISCSIT_SESS_SM_STRINGS /* An array of state text values, for use in logging state transitions */ -static const char *iscsit_ss_name[SS_MAX_STATE+1] = { - "SS_UNDEFINED", - "SS_Q1_FREE", - "SS_Q2_ACTIVE", - "SS_Q3_LOGGED_IN", - "SS_Q4_FAILED", - "SS_Q5_CONTINUE", - "SS_Q6_DONE", - "SS_Q7_ERROR", - "SS_MAX_STATE" +static const char *iscsit_ss_name[SS_MAX_STATE + 1] = { +#define item(a) #a, + SESSION_STATE_LIST() +#undef item }; #endif +/* Add new events above SE_MAX_EVENT */ +#define SESSION_EVENT_LIST() \ + item(SE_UNDEFINED) \ + item(SE_CONN_IN_LOGIN) /* From login state machine */ \ + item(SE_CONN_LOGGED_IN) /* FFP enabled client notification */ \ + item(SE_CONN_FFP_FAIL) /* FFP disabled client notification */ \ + item(SE_CONN_FFP_DISABLE) /* FFP disabled client notification */ \ + item(SE_CONN_FAIL) /* Conn destroy client notification */ \ + item(SE_SESSION_CLOSE) /* FFP disabled client notification */ \ + item(SE_SESSION_REINSTATE) /* From login state machine */ \ + item(SE_SESSION_TIMEOUT) /* Internal */ \ + item(SE_SESSION_CONTINUE) /* From login state machine */ \ + item(SE_SESSION_CONTINUE_FAIL) /* From login state machine? */ \ + item(SE_MAX_EVENT) + /* Update iscsit_se_name table whenever session events are modified */ typedef enum { - SE_UNDEFINED = 0, - SE_CONN_IN_LOGIN, /* From login state machine */ - SE_CONN_LOGGED_IN, /* FFP enabled client notification */ - SE_CONN_FFP_FAIL, /* FFP disabled client notification */ - SE_CONN_FFP_DISABLE, /* FFP disabled client notification */ - SE_CONN_FAIL, /* Conn destroy client notification */ - SE_SESSION_CLOSE, /* FFP disabled client notification */ - SE_SESSION_REINSTATE, /* From login state machine */ - SE_SESSION_TIMEOUT, /* Internal */ - SE_SESSION_CONTINUE, /* From login state machine */ - SE_SESSION_CONTINUE_FAIL, /* From login state machine? */ - /* Add new events above SE_MAX_EVENT */ - SE_MAX_EVENT +#define item(a) a, + SESSION_EVENT_LIST() +#undef item } iscsit_session_event_t; #ifdef ISCSIT_SESS_SM_STRINGS /* An array of event text values, for use in logging events */ -static const char *iscsit_se_name[SE_MAX_EVENT+1] = { - "SE_UNDEFINED", - "SE_CONN_IN_LOGIN", - "SE_CONN_LOGGED_IN", - "SE_CONN_FFP_FAIL", - "SE_CONN_FFP_DISABLE", - "SE_CONN_FAIL", - "SE_SESSION_CLOSE", - "SE_SESSION_REINSTATE", - "SE_SESSION_TIMEOUT", - "SE_SESSION_CONTINUE", - "SE_SESSION_CONTINUE_FAIL", - "SE_MAX_EVENT" +static const char *iscsit_se_name[SE_MAX_EVENT + 1] = { +#define item(a) #a, + SESSION_EVENT_LIST() +#undef item }; #endif @@ -389,61 +370,59 @@ typedef struct { iscsit_cbuf_t *ist_rxpdu_queue; } iscsit_sess_t; +/* Add new login states above ILS_MAX_STATE */ +#define LOGIN_STATE_LIST() \ + item(ILS_UNDEFINED) \ + item(ILS_LOGIN_INIT) \ + item(ILS_LOGIN_WAITING) /* Waiting for more login PDU's */ \ + item(ILS_LOGIN_PROCESSING) /* Processing login request */ \ + item(ILS_LOGIN_RESPONDING) /* Sending login response */ \ + item(ILS_LOGIN_RESPONDED) /* Sent login response (no trans. to FFP) */ \ + item(ILS_LOGIN_FFP) /* Sending last login PDU for final response */ \ + item(ILS_LOGIN_DONE) /* Last login PDU sent (so we can free it) */ \ + item(ILS_LOGIN_ERROR) /* Login error, login failed */ \ + item(ILS_MAX_STATE) + /* Update iscsit_ils_name table whenever login states are modified */ typedef enum { - ILS_UNDEFINED = 0, - ILS_LOGIN_INIT, - ILS_LOGIN_WAITING, /* Waiting for more login PDU's */ - ILS_LOGIN_PROCESSING, /* Processing login request */ - ILS_LOGIN_RESPONDING, /* Sending login response */ - ILS_LOGIN_RESPONDED, /* Sent login response (no trans. to FFP) */ - ILS_LOGIN_FFP, /* Sending last login PDU for final response */ - ILS_LOGIN_DONE, /* Last login PDU sent (so we can free it) */ - ILS_LOGIN_ERROR, /* Login error, login failed */ - /* Add new login states above ILS_MAX_STATE */ - ILS_MAX_STATE +#define item(a) a, + LOGIN_STATE_LIST() +#undef item } iscsit_login_state_t; #ifdef ISCSIT_LOGIN_SM_STRINGS -/* An array of login state text values, for use in logging login progress */ -static const char *iscsit_ils_name[ILS_MAX_STATE+1] = { - "ILS_UNDEFINED", - "ILS_LOGIN_INIT", - "ILS_LOGIN_WAITING", - "ILS_LOGIN_PROCESSING", - "ILS_LOGIN_RESPONDING", - "ILS_LOGIN_RESPONDED", - "ILS_LOGIN_FFP", - "ILS_LOGIN_DONE", - "ILS_LOGIN_ERROR", - "ILS_MAX_STATE" +/* An array of login state text values, for use in logging login progess */ +static const char *iscsit_ils_name[ILS_MAX_STATE + 1] = { +#define item(a) #a, + LOGIN_STATE_LIST() +#undef item }; #endif +/* Add new login events above ILE_MAX_EVENT */ +#define LOGIN_EVENT_LIST() \ + item(ILE_UNDEFINED) \ + item(ILE_LOGIN_RCV) \ + item(ILE_LOGIN_RESP_READY) \ + item(ILE_LOGIN_FFP) \ + item(ILE_LOGIN_RESP_COMPLETE) \ + item(ILE_LOGIN_ERROR) \ + item(ILE_LOGIN_CONN_ERROR) \ + item(ILE_MAX_EVENT) + /* Update iscsit_ile_name table whenever login events are modified */ typedef enum { - ILE_UNDEFINED = 0, - ILE_LOGIN_RCV, - ILE_LOGIN_RESP_READY, - ILE_LOGIN_FFP, - ILE_LOGIN_RESP_COMPLETE, - ILE_LOGIN_ERROR, - ILE_LOGIN_CONN_ERROR, - /* Add new login events above ILE_MAX_EVENT */ - ILE_MAX_EVENT +#define item(a) a, + LOGIN_EVENT_LIST() +#undef item } iscsit_login_event_t; #ifdef ISCSIT_LOGIN_SM_STRINGS -/* An array of login event text values, for use in logging login events */ -static const char *iscsit_ile_name[ILE_MAX_EVENT+1] = { - "ILE_UNDEFINED", - "ILE_LOGIN_RCV", - "ILE_LOGIN_RESP_READY", - "ILE_LOGIN_FFP", - "ILE_LOGIN_RESP_COMPLETE", - "ILE_LOGIN_ERROR", - "ILE_LOGIN_CONN_ERROR", - "ILE_MAX_EVENT" +/* An array of login event text values, for use in loggin login events */ +static const char *iscsit_ile_name[ILE_MAX_EVENT + 1] = { +#define item(a) #a, + LOGIN_EVENT_LIST() +#undef item }; #endif @@ -466,8 +445,8 @@ typedef struct { } iscsit_op_params_t; typedef struct { - iscsit_login_state_t icl_login_state; - iscsit_login_state_t icl_login_last_state; + iscsit_login_state_t icl_login_state; + iscsit_login_state_t icl_login_last_state; sm_audit_buf_t icl_state_audit; boolean_t icl_busy; boolean_t icl_login_complete; @@ -565,17 +544,22 @@ typedef struct iscsit_isns_cfg { list_t isns_svrs; } iscsit_isns_cfg_t; +#define SERVICE_ENABLED_LIST() \ + item(ISE_UNDEFINED) \ + item(ISE_DETACHED) \ + item(ISE_DISABLED) \ + item(ISE_ENABLING) \ + item(ISE_ENABLED) \ + item(ISE_BUSY) \ + item(ISE_DISABLING) + /* * State values for the iscsit service */ typedef enum { - ISE_UNDEFINED = 0, - ISE_DETACHED, - ISE_DISABLED, - ISE_ENABLING, - ISE_ENABLED, - ISE_BUSY, - ISE_DISABLING +#define item(a) a, + SERVICE_ENABLED_LIST() +#undef item } iscsit_service_enabled_t; @@ -682,9 +666,6 @@ idm_status_t iscsit_conn_reinstate(iscsit_conn_t *existing_ict, iscsit_conn_t *ict); void -iscsit_conn_destroy_done(iscsit_conn_t *ict); - -void iscsit_conn_set_auth(iscsit_conn_t *ict); void diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c index 32b1ae3b09..4571fc55de 100644 --- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c +++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_login.c @@ -46,6 +46,7 @@ #include <sys/portif.h> #include <sys/idm/idm.h> #include <sys/idm/idm_text.h> +#include <sys/idm/idm_so.h> #define ISCSIT_LOGIN_SM_STRINGS #include "iscsit.h" @@ -733,7 +734,37 @@ login_sm_new_state(iscsit_conn_t *ict, login_event_ctx_t *ctx, lsm->icl_login_state = new_state; mutex_exit(&lsm->icl_mutex); - switch (lsm->icl_login_state) { + /* + * Tale of caution here. The use of new_state instead of using + * lsm->icl_login_state is deliberate (which had been used originally). + * Since the icl_mutex is dropped under the right circumstances + * the login state changes between setting the state and examining + * the state to proceed. No big surprise since the lock was being + * used in the first place to prevent just that type of change. + * + * There has been a case where network errors occurred while a client + * was attempting to reinstate the connection causing multiple + * login packets to arrive into the state machine. Those multiple + * packets which were processed incorrectly caused the reference + * count on the connection to be one higher than it should be and + * from then on the connection can't close correctly causing a hang. + * + * Upon examination of the core it was found that the connection + * audit data had calls looking like: + * login_sm_event_dispatch + * login_sm_processing + * login_sm_new_state + * That call sequence means the new state was/is ILS_LOGIN_ERROR + * yet the audit trail continues with a call to + * login_sm_send_next_response + * which could only occur if icl_login_state had changed. Had the + * design of COMSTAR taken this into account the code would + * originally have held the icl_mutex across the processing of the + * state processing. Lock order and calls which sleep prevent that + * from being possible. The next best solution is to use the local + * variable which holds the state. + */ + switch (new_state) { case ILS_LOGIN_WAITING: /* Do nothing, waiting for more login PDU's */ break; @@ -1749,6 +1780,8 @@ login_sm_session_register(iscsit_conn_t *ict) stmf_scsi_session_t *ss; iscsi_transport_id_t *iscsi_tptid; uint16_t ident_len, adn_len, tptid_sz; + char prop_buf[KSTAT_STRLEN + 1]; + char peer_buf[IDM_SA_NTOP_BUFSIZ]; /* * Hold target mutex until we have finished registering with STMF @@ -1809,6 +1842,11 @@ login_sm_session_register(iscsit_conn_t *ict) ss->ss_port_private = ict->ict_sess; ict->ict_sess->ist_stmf_sess = ss; mutex_exit(&ist->ist_tgt->target_mutex); + (void) snprintf(prop_buf, sizeof (prop_buf), "peername_%"PRIxPTR"", + (uintptr_t)ict->ict_sess); + (void) idm_sa_ntop(&ict->ict_ic->ic_raddr, peer_buf, + sizeof (peer_buf)); + (void) stmf_add_rport_info(ss, prop_buf, peer_buf); return (IDM_STATUS_SUCCESS); } @@ -2727,15 +2765,15 @@ iscsit_fold_name(char *name, size_t *buflen) /* Check for one of the supported name types */ if (strncasecmp(name, SNS_EUI ".", strlen(SNS_EUI) + 1) == 0) { sns = SNS_EUI; - *buflen = SNS_EUI_U8_LEN_MAX + 1; + *buflen = SNS_EUI_LEN_MAX + 1; flag |= U8_TEXTPREP_TOUPPER; } else if (strncasecmp(name, SNS_IQN ".", strlen(SNS_IQN) + 1) == 0) { sns = SNS_IQN; - *buflen = SNS_IQN_U8_LEN_MAX + 1; + *buflen = SNS_IQN_LEN_MAX + 1; flag |= U8_TEXTPREP_TOLOWER; } else if (strncasecmp(name, SNS_NAA ".", strlen(SNS_NAA) + 1) == 0) { sns = SNS_NAA; - *buflen = SNS_NAA_U8_LEN_MAX + 1; + *buflen = SNS_NAA_LEN_MAX + 1; flag |= U8_TEXTPREP_TOUPPER; } else { return (NULL); @@ -2744,7 +2782,7 @@ iscsit_fold_name(char *name, size_t *buflen) ret = kmem_zalloc(*buflen, KM_SLEEP); coff = strlen(sns); inlen = strlen(name) - coff; - outlen = *buflen - coff; + outlen = *buflen - coff - 1; /* Fold the case and normalize string */ if (u8_textprep_str(name + coff, &inlen, ret + coff, &outlen, flag, diff --git a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c index d35a1b327b..c58ff4186c 100644 --- a/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c +++ b/usr/src/uts/common/io/comstar/port/iscsit/iscsit_sess.c @@ -37,6 +37,7 @@ #include <sys/strsubr.h> #include <sys/note.h> #include <sys/sdt.h> +#include <sys/kstat.h> #include <sys/stmf.h> #include <sys/stmf_ioctl.h> @@ -54,7 +55,7 @@ typedef struct { static void sess_sm_event_locked(iscsit_sess_t *ist, iscsit_session_event_t event, -iscsit_conn_t *ict); + iscsit_conn_t *ict); static void sess_sm_event_dispatch(iscsit_sess_t *ist, sess_event_ctx_t *ctx); @@ -211,6 +212,7 @@ iscsit_sess_unref(void *ist_void) { iscsit_sess_t *ist = ist_void; stmf_scsi_session_t *iss; + char prop_buf[KSTAT_STRLEN + 1]; /* * State machine has run to completion, destroy session @@ -225,6 +227,9 @@ iscsit_sess_unref(void *ist_void) ASSERT(ist->ist_conn_count == 0); iss = ist->ist_stmf_sess; if (iss != NULL) { + (void) snprintf(prop_buf, sizeof (prop_buf), + "peername_%"PRIxPTR"", (uintptr_t)ist); + stmf_remove_rport_info(iss, prop_buf); stmf_deregister_scsi_session(ist->ist_lport, iss); kmem_free(iss->ss_rport_id, sizeof (scsi_devid_desc_t) + strlen(ist->ist_initiator_name) + 1); @@ -397,7 +402,7 @@ iscsit_sess_avl_compare(const void *void_sess1, const void *void_sess2) { const iscsit_sess_t *sess1 = void_sess1; const iscsit_sess_t *sess2 = void_sess2; - int result; + int result; /* * Sort by initiator name, then ISID then portal group tag diff --git a/usr/src/uts/common/io/comstar/port/pppt/pppt.h b/usr/src/uts/common/io/comstar/port/pppt/pppt.h index 260eb94a8c..4d584c0ab7 100644 --- a/usr/src/uts/common/io/comstar/port/pppt/pppt.h +++ b/usr/src/uts/common/io/comstar/port/pppt/pppt.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2013, Nexenta Systems, Inc. All rights reserved. + * Copyright 2013, 2015 Nexenta Systems, Inc. All rights reserved. */ #ifndef _PPPT_H #define _PPPT_H @@ -49,74 +49,63 @@ typedef enum { #define PPPT_MODNAME "pppt" +#define TGT_STATE_LIST() \ + item(TS_UNDEFINED) \ + item(TS_CREATED) \ + item(TS_ONLINING) \ + item(TS_ONLINE) \ + item(TS_STMF_ONLINE) \ + item(TS_DELETING_NEED_OFFLINE) \ + item(TS_OFFLINING) \ + item(TS_OFFLINE) \ + item(TS_STMF_OFFLINE) \ + item(TS_DELETING_STMF_DEREG) \ + item(TS_DELETING_STMF_DEREG_FAIL) \ + item(TS_DELETING) \ + item(TS_MAX_STATE) + /* Target states and events, update pppt_ts_name table whenever modified */ typedef enum { - TS_UNDEFINED = 0, - TS_CREATED, - TS_ONLINING, - TS_ONLINE, - TS_STMF_ONLINE, - TS_DELETING_NEED_OFFLINE, - TS_OFFLINING, - TS_OFFLINE, - TS_STMF_OFFLINE, - TS_DELETING_STMF_DEREG, - TS_DELETING_STMF_DEREG_FAIL, - TS_DELETING, - TS_MAX_STATE +#define item(a) a, + TGT_STATE_LIST() +#undef item } pppt_tgt_state_t; #ifdef PPPT_TGT_SM_STRINGS -static const char *pppt_ts_name[TS_MAX_STATE+1] = { - "TS_UNDEFINED", - "TS_CREATED", - "TS_ONLINING", - "TS_ONLINE", - "TS_STMF_ONLINE", - "TS_DELETING_NEED_OFFLINE", - "TS_OFFLINING", - "TS_OFFLINE", - "TS_STMF_OFFLINE", - "TS_DELETING_STMF_DEREG", - "TS_DELETING_STMF_DEREG_FAIL", - "TS_DELETING", - "TS_MAX_STATE" +static const char *pppt_ts_name[TS_MAX_STATE + 1] = { +#define item(a) #a, + TGT_STATE_LIST() +#undef item }; #endif +#define TGT_EVENT_LIST() \ + item(TE_UNDEFINED) \ + item(TE_STMF_ONLINE_REQ) \ + item(TE_ONLINE_SUCCESS) \ + item(TE_ONLINE_FAIL) \ + item(TE_STMF_ONLINE_COMPLETE_ACK) \ + item(TE_STMF_OFFLINE_REQ) \ + item(TE_OFFLINE_COMPLETE) \ + item(TE_STMF_OFFLINE_COMPLETE_ACK) \ + item(TE_DELETE) \ + item(TE_STMF_DEREG_SUCCESS) \ + item(TE_STMF_DEREG_FAIL) \ + item(TE_STMF_DEREG_RETRY) \ + item(TE_WAIT_REF_COMPLETE) /* XXX */ \ + item(TE_MAX_EVENT) + typedef enum { - TE_UNDEFINED = 0, - TE_STMF_ONLINE_REQ, - TE_ONLINE_SUCCESS, - TE_ONLINE_FAIL, - TE_STMF_ONLINE_COMPLETE_ACK, - TE_STMF_OFFLINE_REQ, - TE_OFFLINE_COMPLETE, - TE_STMF_OFFLINE_COMPLETE_ACK, - TE_DELETE, - TE_STMF_DEREG_SUCCESS, - TE_STMF_DEREG_FAIL, - TE_STMF_DEREG_RETRY, - TE_WAIT_REF_COMPLETE, /* XXX */ - TE_MAX_EVENT +#define item(a) a, + TGT_EVENT_LIST() +#undef item } pppt_tgt_event_t; #ifdef PPPT_TGT_SM_STRINGS -static const char *pppt_te_name[TE_MAX_EVENT+1] = { - "TE_UNDEFINED", - "TE_STMF_ONLINE_REQ", - "TE_ONLINE_SUCCESS", - "TE_ONLINE_FAIL", - "TE_STMF_ONLINE_COMPLETE_ACK", - "TE_STMF_OFFLINE_REQ", - "TE_OFFLINE_COMPLETE", - "TE_STMF_OFFLINE_COMPLETE_ACK", - "TE_DELETE", - "TE_STMF_DEREG_SUCCESS", - "TE_STMF_DEREG_FAIL", - "TE_STMF_DEREG_RETRY", - "TE_WAIT_REF_COMPLETE", - "TE_MAX_EVENT" +static const char *pppt_te_name[TE_MAX_EVENT + 1] = { +#define item(a) #a, + TGT_EVENT_LIST() +#undef item }; #endif diff --git a/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c b/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c index 7d7f571ec0..fa930ec682 100644 --- a/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c +++ b/usr/src/uts/common/io/comstar/port/pppt/pppt_msg.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2013, Nexenta Systems, Inc. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ #include <sys/cpuvar.h> @@ -331,7 +331,7 @@ pppt_msg_scsi_cmd(stmf_ic_msg_t *msg) (void) pppt_task_hold(ptask); task->task_port_private = ptask; task->task_flags = scmd->icsc_task_flags; - task->task_additional_flags = 0; + task->task_additional_flags = TASK_AF_PPPT_TASK; task->task_priority = 0; /* diff --git a/usr/src/uts/common/io/comstar/stmf/lun_map.c b/usr/src/uts/common/io/comstar/stmf/lun_map.c index c709a2688a..41d5f7e9c8 100644 --- a/usr/src/uts/common/io/comstar/stmf/lun_map.c +++ b/usr/src/uts/common/io/comstar/stmf/lun_map.c @@ -47,8 +47,9 @@ void stmf_update_sessions_per_ve(stmf_view_entry_t *ve, void stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport, stmf_i_scsi_session_t *iss, stmf_lun_map_t *vemap); stmf_id_data_t *stmf_lookup_group_for_host(uint8_t *ident, uint16_t ident_size); -stmf_status_t stmf_add_ent_to_map(stmf_lun_map_t *sm, void *ent, uint8_t *lun); -stmf_status_t stmf_remove_ent_from_map(stmf_lun_map_t *sm, uint8_t *lun); +static stmf_status_t stmf_add_ent_to_map(stmf_lun_map_t *sm, void *ent, + uint8_t *lun); +static stmf_status_t stmf_remove_ent_from_map(stmf_lun_map_t *sm, uint8_t *lun); uint16_t stmf_get_next_free_lun(stmf_lun_map_t *sm, uint8_t *lun); stmf_status_t stmf_add_tg(uint8_t *tg_name, uint16_t tg_name_size, int allow_special, uint32_t *err_detail); @@ -179,10 +180,11 @@ stmf_view_clear_config() /* * Create luns map for session based on the view + * iss_lockp is held */ stmf_status_t stmf_session_create_lun_map(stmf_i_local_port_t *ilport, - stmf_i_scsi_session_t *iss) + stmf_i_scsi_session_t *iss) { stmf_id_data_t *tg; stmf_id_data_t *hg; @@ -236,52 +238,8 @@ stmf_session_create_lun_map(stmf_i_local_port_t *ilport, } /* - * destroy lun map for session - */ -/* ARGSUSED */ -stmf_status_t -stmf_session_destroy_lun_map(stmf_i_local_port_t *ilport, - stmf_i_scsi_session_t *iss) -{ - stmf_lun_map_t *sm; - stmf_i_lu_t *ilu; - uint16_t n; - stmf_lun_map_ent_t *ent; - - ASSERT(mutex_owned(&stmf_state.stmf_lock)); - /* - * to avoid conflict with updating session's map, - * which only grab stmf_lock - */ - sm = iss->iss_sm; - iss->iss_sm = NULL; - iss->iss_hg = NULL; - if (sm->lm_nentries) { - for (n = 0; n < sm->lm_nentries; n++) { - if ((ent = (stmf_lun_map_ent_t *)sm->lm_plus[n]) - != NULL) { - if (ent->ent_itl_datap) { - stmf_do_itl_dereg(ent->ent_lu, - ent->ent_itl_datap, - STMF_ITL_REASON_IT_NEXUS_LOSS); - } - ilu = (stmf_i_lu_t *) - ent->ent_lu->lu_stmf_private; - atomic_dec_32(&ilu->ilu_ref_cnt); - kmem_free(sm->lm_plus[n], - sizeof (stmf_lun_map_ent_t)); - } - } - kmem_free(sm->lm_plus, - sizeof (stmf_lun_map_ent_t *) * sm->lm_nentries); - } - - kmem_free(sm, sizeof (*sm)); - return (STMF_SUCCESS); -} - -/* * Expects the session lock to be held. + * iss_lockp is held */ stmf_xfer_data_t * stmf_session_prepare_report_lun_data(stmf_lun_map_t *sm) @@ -390,12 +348,13 @@ stmf_session_lu_unmapall(stmf_lu_t *lu) } /* * add lu to a session, stmf_lock is already held + * iss_lockp/ilport_lock already held */ -stmf_status_t +static stmf_status_t stmf_add_lu_to_session(stmf_i_local_port_t *ilport, - stmf_i_scsi_session_t *iss, - stmf_lu_t *lu, - uint8_t *lu_nbr) + stmf_i_scsi_session_t *iss, + stmf_lu_t *lu, + uint8_t *lu_nbr) { stmf_lun_map_t *sm = iss->iss_sm; stmf_status_t ret; @@ -434,13 +393,11 @@ stmf_add_lu_to_session(stmf_i_local_port_t *ilport, /* * remvoe lu from a session, stmf_lock is already held + * iss_lockp held */ -/* ARGSUSED */ -stmf_status_t -stmf_remove_lu_from_session(stmf_i_local_port_t *ilport, - stmf_i_scsi_session_t *iss, - stmf_lu_t *lu, - uint8_t *lu_nbr) +static void +stmf_remove_lu_from_session(stmf_i_scsi_session_t *iss, + stmf_lu_t *lu, uint8_t *lu_nbr) { stmf_status_t ret; stmf_i_lu_t *ilu; @@ -451,7 +408,10 @@ stmf_remove_lu_from_session(stmf_i_local_port_t *ilport, ASSERT(mutex_owned(&stmf_state.stmf_lock)); lun_map_ent = stmf_get_ent_from_map(sm, luNbr); - ASSERT(lun_map_ent && lun_map_ent->ent_lu == lu); + ASSERT(lun_map_ent->ent_lu == lu); + if (lun_map_ent == NULL) { + return; + } ilu = (stmf_i_lu_t *)lu->lu_stmf_private; @@ -464,7 +424,6 @@ stmf_remove_lu_from_session(stmf_i_local_port_t *ilport, STMF_ITL_REASON_USER_REQUEST); } kmem_free((void *)lun_map_ent, sizeof (stmf_lun_map_ent_t)); - return (STMF_SUCCESS); } /* @@ -473,7 +432,7 @@ stmf_remove_lu_from_session(stmf_i_local_port_t *ilport, */ void stmf_update_sessions_per_ve(stmf_view_entry_t *ve, - stmf_lu_t *lu, int action) + stmf_lu_t *lu, int action) { stmf_i_lu_t *ilu_tmp; stmf_lu_t *lu_to_add; @@ -519,8 +478,8 @@ stmf_update_sessions_per_ve(stmf_view_entry_t *ve, continue; /* This host belongs to the host group */ if (action == 0) { /* to remove */ - (void) stmf_remove_lu_from_session(ilport, iss, - lu_to_add, ve->ve_lun); + stmf_remove_lu_from_session(iss, lu_to_add, + ve->ve_lun); if (ilu_tmp->ilu_ref_cnt == 0) { rw_exit(&ilport->ilport_lock); return; @@ -540,8 +499,8 @@ stmf_update_sessions_per_ve(stmf_view_entry_t *ve, */ void stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport, - stmf_i_scsi_session_t *iss, - stmf_lun_map_t *vemap) + stmf_i_scsi_session_t *iss, + stmf_lun_map_t *vemap) { stmf_lu_t *lu; stmf_i_lu_t *ilu; @@ -549,7 +508,6 @@ stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport, uint32_t i; ASSERT(mutex_owned(&stmf_state.stmf_lock)); - for (i = 0; i < vemap->lm_nentries; i++) { ve = (stmf_view_entry_t *)vemap->lm_plus[i]; if (!ve) @@ -562,11 +520,13 @@ stmf_add_lus_to_session_per_vemap(stmf_i_local_port_t *ilport, } } } -/* remove luns in view entry map from a session */ +/* + * remove luns in view entry map from a session + * iss_lockp held + */ void -stmf_remove_lus_from_session_per_vemap(stmf_i_local_port_t *ilport, - stmf_i_scsi_session_t *iss, - stmf_lun_map_t *vemap) +stmf_remove_lus_from_session_per_vemap(stmf_i_scsi_session_t *iss, + stmf_lun_map_t *vemap) { stmf_lu_t *lu; stmf_i_lu_t *ilu; @@ -582,15 +542,14 @@ stmf_remove_lus_from_session_per_vemap(stmf_i_local_port_t *ilport, ilu = (stmf_i_lu_t *)ve->ve_luid->id_pt_to_object; if (ilu && ilu->ilu_state == STMF_STATE_ONLINE) { lu = ilu->ilu_lu; - (void) stmf_remove_lu_from_session(ilport, iss, lu, - ve->ve_lun); + stmf_remove_lu_from_session(iss, lu, ve->ve_lun); } } } stmf_id_data_t * stmf_alloc_id(uint16_t id_size, uint16_t type, uint8_t *id_data, - uint32_t additional_size) + uint32_t additional_size) { stmf_id_data_t *id; int struct_size, total_size, real_id_size; @@ -710,6 +669,7 @@ stmf_remove_id(stmf_id_list_t *idlist, stmf_id_data_t *id) * is successfully added. ve_map is just another representation of the * view enrtries in a LU. Duplicating or merging a ve map does not * affect any refcnts. + * stmf_state.stmf_lock held */ stmf_lun_map_t * stmf_duplicate_ve_map(stmf_lun_map_t *src) @@ -742,9 +702,13 @@ stmf_destroy_ve_map(stmf_lun_map_t *dst) kmem_free(dst, sizeof (*dst)); } +/* + * stmf_state.stmf_lock held. Operations are stmf global in nature and + * not session level. + */ int stmf_merge_ve_map(stmf_lun_map_t *src, stmf_lun_map_t *dst, - stmf_lun_map_t **pp_ret_map, stmf_merge_flags_t mf) + stmf_lun_map_t **pp_ret_map, stmf_merge_flags_t mf) { int i; int nentries; @@ -816,7 +780,7 @@ stmf_merge_ve_map(stmf_lun_map_t *src, stmf_lun_map_t *dst, */ stmf_status_t stmf_add_hg(uint8_t *hg_name, uint16_t hg_name_size, - int allow_special, uint32_t *err_detail) + int allow_special, uint32_t *err_detail) { stmf_id_data_t *id; @@ -841,7 +805,7 @@ stmf_add_hg(uint8_t *hg_name, uint16_t hg_name_size, /* add target group */ stmf_status_t stmf_add_tg(uint8_t *tg_name, uint16_t tg_name_size, - int allow_special, uint32_t *err_detail) + int allow_special, uint32_t *err_detail) { stmf_id_data_t *id; @@ -920,8 +884,8 @@ stmf_add_ve_to_luid(stmf_id_data_t *luid, stmf_view_entry_t *ve) /* stmf_lock is already held, err_detail may be assigned if error happens */ stmf_status_t stmf_add_view_entry(stmf_id_data_t *hg, stmf_id_data_t *tg, - uint8_t *lu_guid, uint32_t *ve_id, uint8_t *lun, - stmf_view_entry_t **conflicting, uint32_t *err_detail) + uint8_t *lu_guid, uint32_t *ve_id, uint8_t *lun, + stmf_view_entry_t **conflicting, uint32_t *err_detail) { stmf_id_data_t *luid; stmf_view_entry_t *ve; @@ -1066,7 +1030,11 @@ add_ve_err_ret: return (ret); } -stmf_status_t +/* + * protected by stmf_state.stmf_lock when working on global lun map. + * iss_lockp when working at the session level. + */ +static stmf_status_t stmf_add_ent_to_map(stmf_lun_map_t *lm, void *ent, uint8_t *lun) { uint16_t n; @@ -1098,7 +1066,11 @@ try_again_to_add: } -stmf_status_t +/* + * iss_lockp held when working on a session. + * stmf_state.stmf_lock is held when working on the global views. + */ +static stmf_status_t stmf_remove_ent_from_map(stmf_lun_map_t *lm, uint8_t *lun) { uint16_t n, i; @@ -1135,6 +1107,9 @@ stmf_remove_ent_from_map(stmf_lun_map_t *lm, uint8_t *lun) return (STMF_SUCCESS); } +/* + * stmf_state.stmf_lock held + */ uint16_t stmf_get_next_free_lun(stmf_lun_map_t *sm, uint8_t *lun) { @@ -1158,6 +1133,10 @@ stmf_get_next_free_lun(stmf_lun_map_t *sm, uint8_t *lun) return (luNbr); } +/* + * stmf_state.stmf_lock is held when working on global view map + * iss_lockp (RW_WRITER) is held when working on session map. + */ void * stmf_get_ent_from_map(stmf_lun_map_t *sm, uint16_t lun_num) { @@ -1173,9 +1152,9 @@ stmf_get_ent_from_map(stmf_lun_map_t *sm, uint16_t lun_num) int stmf_add_ve(uint8_t *hgname, uint16_t hgname_size, - uint8_t *tgname, uint16_t tgname_size, - uint8_t *lu_guid, uint32_t *ve_id, - uint8_t *luNbr, uint32_t *err_detail) + uint8_t *tgname, uint16_t tgname_size, + uint8_t *lu_guid, uint32_t *ve_id, + uint8_t *luNbr, uint32_t *err_detail) { stmf_id_data_t *hg; stmf_id_data_t *tg; @@ -1317,7 +1296,7 @@ stmf_remove_ve_by_id(uint8_t *guid, uint32_t veid, uint32_t *err_detail) int stmf_add_group(uint8_t *grpname, uint16_t grpname_size, - stmf_id_type_t group_type, uint32_t *err_detail) + stmf_id_type_t group_type, uint32_t *err_detail) { stmf_status_t status; @@ -1348,7 +1327,7 @@ stmf_add_group(uint8_t *grpname, uint16_t grpname_size, */ int stmf_remove_group(uint8_t *grpname, uint16_t grpname_size, - stmf_id_type_t group_type, uint32_t *err_detail) + stmf_id_type_t group_type, uint32_t *err_detail) { stmf_id_data_t *id; stmf_id_data_t *idmemb; @@ -1411,8 +1390,8 @@ stmf_remove_group(uint8_t *grpname, uint16_t grpname_size, int stmf_add_group_member(uint8_t *grpname, uint16_t grpname_size, - uint8_t *entry_ident, uint16_t entry_size, - stmf_id_type_t entry_type, uint32_t *err_detail) + uint8_t *entry_ident, uint16_t entry_size, + stmf_id_type_t entry_type, uint32_t *err_detail) { stmf_id_data_t *id_grp, *id_alltgt; stmf_id_data_t *id_member; @@ -1492,6 +1471,7 @@ stmf_add_group_member(uint8_t *grpname, uint16_t grpname_size, stmf_id_data_t *tgid; iss->iss_hg = (void *)id_grp; tgid = ilport->ilport_tg; + rw_enter(iss->iss_lockp, RW_WRITER); if (tgid) { vemap = stmf_get_ve_map_per_ids(tgid, id_grp); if (vemap) @@ -1501,6 +1481,7 @@ stmf_add_group_member(uint8_t *grpname, uint16_t grpname_size, if (vemap_alltgt) stmf_add_lus_to_session_per_vemap(ilport, iss, vemap_alltgt); + rw_exit(iss->iss_lockp); } } @@ -1509,8 +1490,8 @@ stmf_add_group_member(uint8_t *grpname, uint16_t grpname_size, int stmf_remove_group_member(uint8_t *grpname, uint16_t grpname_size, - uint8_t *entry_ident, uint16_t entry_size, - stmf_id_type_t entry_type, uint32_t *err_detail) + uint8_t *entry_ident, uint16_t entry_size, + stmf_id_type_t entry_type, uint32_t *err_detail) { stmf_id_data_t *id_grp, *id_alltgt; stmf_id_data_t *id_member; @@ -1577,17 +1558,19 @@ stmf_remove_group_member(uint8_t *grpname, uint16_t grpname_size, entry_ident, entry_size); if (iss) { stmf_id_data_t *tgid; + rw_enter(iss->iss_lockp, RW_WRITER); iss->iss_hg = NULL; tgid = ilport->ilport_tg; if (tgid) { vemap = stmf_get_ve_map_per_ids(tgid, id_grp); if (vemap) stmf_remove_lus_from_session_per_vemap( - ilport, iss, vemap); + iss, vemap); } if (vemap_alltgt) - stmf_remove_lus_from_session_per_vemap(ilport, - iss, vemap_alltgt); + stmf_remove_lus_from_session_per_vemap(iss, + vemap_alltgt); + rw_exit(iss->iss_lockp); } } @@ -1616,7 +1599,7 @@ stmf_targetident_to_ilport(uint8_t *target_ident, uint16_t ident_size) stmf_i_scsi_session_t * stmf_lookup_session_for_hostident(stmf_i_local_port_t *ilport, - uint8_t *host_ident, uint16_t ident_size) + uint8_t *host_ident, uint16_t ident_size) { stmf_i_scsi_session_t *iss; uint8_t *id; @@ -1734,8 +1717,8 @@ stmf_validate_lun_view_entry(stmf_id_data_t *hg, stmf_id_data_t *tg, int stmf_validate_lun_ve(uint8_t *hgname, uint16_t hgname_size, - uint8_t *tgname, uint16_t tgname_size, - uint8_t *luNbr, uint32_t *err_detail) + uint8_t *tgname, uint16_t tgname_size, + uint8_t *luNbr, uint32_t *err_detail) { stmf_id_data_t *hg; stmf_id_data_t *tg; diff --git a/usr/src/uts/common/io/comstar/stmf/lun_map.h b/usr/src/uts/common/io/comstar/stmf/lun_map.h index b9e8d8ce40..250db4997a 100644 --- a/usr/src/uts/common/io/comstar/stmf/lun_map.h +++ b/usr/src/uts/common/io/comstar/stmf/lun_map.h @@ -48,8 +48,6 @@ void stmf_view_init(); void stmf_view_clear_config(); stmf_status_t stmf_session_create_lun_map(stmf_i_local_port_t *ilport, stmf_i_scsi_session_t *iss); -stmf_status_t stmf_session_destroy_lun_map(stmf_i_local_port_t *ilport, - stmf_i_scsi_session_t *iss); stmf_xfer_data_t *stmf_session_prepare_report_lun_data(stmf_lun_map_t *sm); void stmf_add_lu_to_active_sessions(stmf_lu_t *lu); void stmf_session_lu_unmapall(stmf_lu_t *lu); diff --git a/usr/src/uts/common/io/comstar/stmf/stmf.c b/usr/src/uts/common/io/comstar/stmf/stmf.c index a79a27e8b3..14f5d4eecf 100644 --- a/usr/src/uts/common/io/comstar/stmf/stmf.c +++ b/usr/src/uts/common/io/comstar/stmf/stmf.c @@ -18,11 +18,12 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* - * Copyright 2012, Nexenta Systems, Inc. All rights reserved. + * Copyright 2019 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */ @@ -124,7 +125,6 @@ void stmf_delete_all_ppds(); void stmf_trace_clear(); void stmf_worker_init(); stmf_status_t stmf_worker_fini(); -void stmf_worker_mgmt(); void stmf_worker_task(void *arg); static void stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss); static stmf_status_t stmf_ic_lu_reg(stmf_ic_reg_dereg_lun_msg_t *msg, @@ -164,9 +164,14 @@ static void stmf_update_kstat_lu_q(scsi_task_t *, void()); static void stmf_update_kstat_lport_q(scsi_task_t *, void()); static void stmf_update_kstat_lu_io(scsi_task_t *, stmf_data_buf_t *); static void stmf_update_kstat_lport_io(scsi_task_t *, stmf_data_buf_t *); +static hrtime_t stmf_update_rport_timestamps(hrtime_t *start_tstamp, + hrtime_t *done_tstamp, stmf_i_scsi_task_t *itask); static int stmf_irport_compare(const void *void_irport1, const void *void_irport2); +static void stmf_create_kstat_rport(stmf_i_remote_port_t *irport); +static void stmf_destroy_kstat_rport(stmf_i_remote_port_t *irport); +static int stmf_kstat_rport_update(kstat_t *ksp, int rw); static stmf_i_remote_port_t *stmf_irport_create(scsi_devid_desc_t *rport_devid); static void stmf_irport_destroy(stmf_i_remote_port_t *irport); static stmf_i_remote_port_t *stmf_irport_register( @@ -179,7 +184,7 @@ extern struct mod_ops mod_driverops; /* =====[ Tunables ]===== */ /* Internal tracing */ -volatile int stmf_trace_on = 1; +volatile int stmf_trace_on = 0; volatile int stmf_trace_buf_size = (1 * 1024 * 1024); /* * The reason default task timeout is 75 is because we want the @@ -192,14 +197,12 @@ volatile int stmf_default_task_timeout = 75; */ volatile int stmf_allow_modunload = 0; -volatile int stmf_max_nworkers = 256; -volatile int stmf_min_nworkers = 4; -volatile int stmf_worker_scale_down_delay = 20; +volatile int stmf_nworkers = 512; /* === [ Debugging and fault injection ] === */ #ifdef DEBUG -volatile uint32_t stmf_drop_task_counter = 0; -volatile uint32_t stmf_drop_buf_counter = 0; +volatile int stmf_drop_task_counter = 0; +volatile int stmf_drop_buf_counter = 0; #endif @@ -221,20 +224,13 @@ static enum { STMF_WORKERS_ENABLING, STMF_WORKERS_ENABLED } stmf_workers_state = STMF_WORKERS_DISABLED; -static int stmf_i_max_nworkers; -static int stmf_i_min_nworkers; -static int stmf_nworkers_cur; /* # of workers currently running */ -static int stmf_nworkers_needed; /* # of workers need to be running */ +static kmutex_t stmf_worker_sel_mx; +volatile uint32_t stmf_nworkers_cur = 0; /* # of workers currently running */ static int stmf_worker_sel_counter = 0; static uint32_t stmf_cur_ntasks = 0; -static clock_t stmf_wm_last = 0; -/* - * This is equal to stmf_nworkers_cur while we are increasing # workers and - * stmf_nworkers_needed while we are decreasing the worker count. - */ +static clock_t stmf_wm_next = 0; static int stmf_nworkers_accepting_cmds; static stmf_worker_t *stmf_workers = NULL; -static clock_t stmf_worker_mgmt_delay = 2; static clock_t stmf_worker_scale_down_timer = 0; static int stmf_worker_scale_down_qd = 0; @@ -300,6 +296,7 @@ _init(void) trace_buf_size = stmf_trace_buf_size; trace_buf_curndx = 0; mutex_init(&trace_buf_lock, NULL, MUTEX_DRIVER, 0); + mutex_init(&stmf_worker_sel_mx, NULL, MUTEX_ADAPTIVE, 0); bzero(&stmf_state, sizeof (stmf_state_t)); /* STMF service is off by default */ stmf_state.stmf_service_running = 0; @@ -370,6 +367,7 @@ _fini(void) kmem_free(stmf_trace_buf, stmf_trace_buf_size); mutex_destroy(&trace_buf_lock); mutex_destroy(&stmf_state.stmf_lock); + mutex_destroy(&stmf_worker_sel_mx); cv_destroy(&stmf_state.stmf_cv); return (ret); } @@ -1655,10 +1653,13 @@ find_task_from_msgid(uint8_t *lu_id, stmf_ic_msgid_t task_msgid) mutex_enter(&ilu->ilu_task_lock); for (itask = ilu->ilu_tasks; itask != NULL; itask = itask->itask_lu_next) { + mutex_enter(&itask->itask_mutex); if (itask->itask_flags & (ITASK_IN_FREE_LIST | ITASK_BEING_ABORTED)) { + mutex_exit(&itask->itask_mutex); continue; } + mutex_exit(&itask->itask_mutex); if (itask->itask_proxy_msg_id == task_msgid) { break; } @@ -1903,16 +1904,15 @@ stmf_proxy_scsi_cmd(scsi_task_t *task, stmf_data_buf_t *dbuf) * we can recognize this on return since we won't be completing * the proxied task in that case. */ + mutex_enter(&itask->itask_mutex); if (task->task_mgmt_function) { itask->itask_proxy_msg_id |= MSG_ID_TM_BIT; } else { - uint32_t new, old; - do { - new = old = itask->itask_flags; - if (new & ITASK_BEING_ABORTED) - return (STMF_FAILURE); - new |= ITASK_DEFAULT_HANDLING | ITASK_PROXY_TASK; - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + if (itask->itask_flags & ITASK_BEING_ABORTED) { + mutex_exit(&itask->itask_mutex); + return (STMF_FAILURE); + } + itask->itask_flags |= ITASK_DEFAULT_HANDLING | ITASK_PROXY_TASK; } if (dbuf) { ic_cmd_msg = ic_scsi_cmd_msg_alloc(itask->itask_proxy_msg_id, @@ -1922,6 +1922,7 @@ stmf_proxy_scsi_cmd(scsi_task_t *task, stmf_data_buf_t *dbuf) ic_cmd_msg = ic_scsi_cmd_msg_alloc(itask->itask_proxy_msg_id, task, 0, NULL, itask->itask_proxy_msg_id); } + mutex_exit(&itask->itask_mutex); if (ic_cmd_msg) { ic_ret = ic_tx_msg(ic_cmd_msg); if (ic_ret == STMF_IC_MSG_SUCCESS) { @@ -2525,7 +2526,8 @@ stmf_load_ppd_ioctl(stmf_ppioctl_data_t *ppi, uint64_t *ppi_token, } /* Free any existing lists and add this one to the ppd */ - nvlist_free(ppd->ppd_nv); + if (ppd->ppd_nv) + nvlist_free(ppd->ppd_nv); ppd->ppd_nv = nv; /* set the token for writes */ @@ -2597,7 +2599,8 @@ stmf_delete_ppd(stmf_pp_data_t *ppd) return; *pppd = ppd->ppd_next; - nvlist_free(ppd->ppd_nv); + if (ppd->ppd_nv) + nvlist_free(ppd->ppd_nv); kmem_free(ppd, ppd->ppd_alloc_size); } @@ -2706,6 +2709,8 @@ stmf_delete_all_ppds() */ #define STMF_KSTAT_LU_SZ (STMF_GUID_INPUT + 1 + 256) #define STMF_KSTAT_TGT_SZ (256 * 2 + 16) +#define STMF_KSTAT_RPORT_DATAMAX (sizeof (stmf_kstat_rport_info_t) / \ + sizeof (kstat_named_t)) /* * This array matches the Protocol Identifier in stmf_ioctl.h @@ -2783,6 +2788,96 @@ stmf_update_kstat_lport_io(scsi_task_t *task, stmf_data_buf_t *dbuf) } static void +stmf_update_kstat_rport_io(scsi_task_t *task, stmf_data_buf_t *dbuf) +{ + stmf_i_scsi_session_t *iss; + stmf_i_remote_port_t *irport; + kstat_io_t *kip; + + iss = task->task_session->ss_stmf_private; + irport = iss->iss_irport; + if (irport->irport_kstat_io != NULL) { + kip = KSTAT_IO_PTR(irport->irport_kstat_io); + mutex_enter(irport->irport_kstat_io->ks_lock); + STMF_UPDATE_KSTAT_IO(kip, dbuf); + mutex_exit(irport->irport_kstat_io->ks_lock); + } +} + +static void +stmf_update_kstat_rport_estat(scsi_task_t *task) +{ + stmf_i_scsi_task_t *itask; + stmf_i_scsi_session_t *iss; + stmf_i_remote_port_t *irport; + stmf_kstat_rport_estat_t *ks_estat; + hrtime_t lat = 0; + uint32_t n = 0; + + itask = task->task_stmf_private; + iss = task->task_session->ss_stmf_private; + irport = iss->iss_irport; + + if (irport->irport_kstat_estat == NULL) + return; + + ks_estat = (stmf_kstat_rport_estat_t *)KSTAT_NAMED_PTR( + irport->irport_kstat_estat); + + mutex_enter(irport->irport_kstat_estat->ks_lock); + + if (task->task_flags & TF_READ_DATA) + n = atomic_dec_32_nv(&irport->irport_nread_tasks); + else if (task->task_flags & TF_WRITE_DATA) + n = atomic_dec_32_nv(&irport->irport_nwrite_tasks); + + if (itask->itask_read_xfer > 0) { + ks_estat->i_nread_tasks.value.ui64++; + lat = stmf_update_rport_timestamps( + &irport->irport_rdstart_timestamp, + &irport->irport_rddone_timestamp, itask); + if (n == 0) + ks_estat->i_rport_read_latency.value.ui64 += lat; + } else if ((itask->itask_write_xfer > 0) || + (task->task_flags & TF_INITIAL_BURST)) { + ks_estat->i_nwrite_tasks.value.ui64++; + lat = stmf_update_rport_timestamps( + &irport->irport_wrstart_timestamp, + &irport->irport_wrdone_timestamp, itask); + if (n == 0) + ks_estat->i_rport_write_latency.value.ui64 += lat; + } + + if (n == 0) { + if (task->task_flags & TF_READ_DATA) { + irport->irport_rdstart_timestamp = LLONG_MAX; + irport->irport_rddone_timestamp = 0; + } else if (task->task_flags & TF_WRITE_DATA) { + irport->irport_wrstart_timestamp = LLONG_MAX; + irport->irport_wrdone_timestamp = 0; + } + } + + mutex_exit(irport->irport_kstat_estat->ks_lock); +} + +static hrtime_t +stmf_update_rport_timestamps(hrtime_t *start_tstamp, hrtime_t *done_tstamp, + stmf_i_scsi_task_t *itask) +{ + *start_tstamp = MIN(*start_tstamp, itask->itask_start_timestamp); + if ((*done_tstamp == 0) && + (itask->itask_xfer_done_timestamp == 0)) { + *done_tstamp = *start_tstamp; + } else { + *done_tstamp = MAX(*done_tstamp, + itask->itask_xfer_done_timestamp); + } + + return (*done_tstamp - *start_tstamp); +} + +static void stmf_update_kstat_lu_io(scsi_task_t *task, stmf_data_buf_t *dbuf) { stmf_i_lu_t *ilu; @@ -3443,6 +3538,8 @@ stmf_irport_create(scsi_devid_desc_t *rport_devid) sizeof (scsi_devid_desc_t) + rport_devid->ident_length - 1); irport->irport_refcnt = 1; mutex_init(&irport->irport_mutex, NULL, MUTEX_DEFAULT, NULL); + irport->irport_rdstart_timestamp = LLONG_MAX; + irport->irport_wrstart_timestamp = LLONG_MAX; return (irport); } @@ -3450,12 +3547,132 @@ stmf_irport_create(scsi_devid_desc_t *rport_devid) static void stmf_irport_destroy(stmf_i_remote_port_t *irport) { + stmf_destroy_kstat_rport(irport); id_free(stmf_state.stmf_irport_inst_space, irport->irport_instance); mutex_destroy(&irport->irport_mutex); kmem_free(irport, sizeof (*irport) + sizeof (scsi_devid_desc_t) + irport->irport_id->ident_length - 1); } +static void +stmf_create_kstat_rport(stmf_i_remote_port_t *irport) +{ + scsi_devid_desc_t *id = irport->irport_id; + char ks_nm[KSTAT_STRLEN]; + stmf_kstat_rport_info_t *ks_info; + stmf_kstat_rport_estat_t *ks_estat; + char *ident = NULL; + + ks_info = kmem_zalloc(sizeof (*ks_info), KM_NOSLEEP); + if (ks_info == NULL) + goto err_out; + + (void) snprintf(ks_nm, KSTAT_STRLEN, "stmf_rport_%"PRIxPTR"", + (uintptr_t)irport); + irport->irport_kstat_info = kstat_create(STMF_MODULE_NAME, 0, + ks_nm, "misc", KSTAT_TYPE_NAMED, + STMF_KSTAT_RPORT_DATAMAX - STMF_RPORT_INFO_LIMIT, + KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE); + if (irport->irport_kstat_info == NULL) { + kmem_free(ks_info, sizeof (*ks_info)); + goto err_out; + } + + irport->irport_kstat_info->ks_data = ks_info; + irport->irport_kstat_info->ks_private = irport; + irport->irport_kstat_info->ks_update = stmf_kstat_rport_update; + ident = kmem_alloc(id->ident_length + 1, KM_NOSLEEP); + if (ident == NULL) { + kstat_delete(irport->irport_kstat_info); + irport->irport_kstat_info = NULL; + kmem_free(ks_info, sizeof (*ks_info)); + goto err_out; + } + + (void) memcpy(ident, id->ident, id->ident_length); + ident[id->ident_length] = '\0'; + kstat_named_init(&ks_info->i_rport_name, "name", KSTAT_DATA_STRING); + kstat_named_init(&ks_info->i_protocol, "protocol", + KSTAT_DATA_STRING); + + kstat_named_setstr(&ks_info->i_rport_name, ident); + kstat_named_setstr(&ks_info->i_protocol, + protocol_ident[irport->irport_id->protocol_id]); + irport->irport_kstat_info->ks_lock = &irport->irport_mutex; + irport->irport_info_dirty = B_TRUE; + kstat_install(irport->irport_kstat_info); + + (void) snprintf(ks_nm, KSTAT_STRLEN, "stmf_rport_io_%"PRIxPTR"", + (uintptr_t)irport); + irport->irport_kstat_io = kstat_create(STMF_MODULE_NAME, 0, ks_nm, + "io", KSTAT_TYPE_IO, 1, 0); + if (irport->irport_kstat_io == NULL) + goto err_out; + + irport->irport_kstat_io->ks_lock = &irport->irport_mutex; + kstat_install(irport->irport_kstat_io); + + (void) snprintf(ks_nm, KSTAT_STRLEN, "stmf_rport_st_%"PRIxPTR"", + (uintptr_t)irport); + irport->irport_kstat_estat = kstat_create(STMF_MODULE_NAME, 0, ks_nm, + "misc", KSTAT_TYPE_NAMED, + sizeof (*ks_estat) / sizeof (kstat_named_t), 0); + if (irport->irport_kstat_estat == NULL) + goto err_out; + + ks_estat = (stmf_kstat_rport_estat_t *)KSTAT_NAMED_PTR( + irport->irport_kstat_estat); + kstat_named_init(&ks_estat->i_rport_read_latency, + "rlatency", KSTAT_DATA_UINT64); + kstat_named_init(&ks_estat->i_rport_write_latency, + "wlatency", KSTAT_DATA_UINT64); + kstat_named_init(&ks_estat->i_nread_tasks, "rntasks", + KSTAT_DATA_UINT64); + kstat_named_init(&ks_estat->i_nwrite_tasks, "wntasks", + KSTAT_DATA_UINT64); + irport->irport_kstat_estat->ks_lock = &irport->irport_mutex; + kstat_install(irport->irport_kstat_estat); + + return; + +err_out: + (void) memcpy(ks_nm, id->ident, MAX(KSTAT_STRLEN - 1, + id->ident_length)); + ks_nm[id->ident_length] = '\0'; + cmn_err(CE_WARN, "STMF: remote port kstat creation failed: %s", ks_nm); +} + +static void +stmf_destroy_kstat_rport(stmf_i_remote_port_t *irport) +{ + if (irport->irport_kstat_io != NULL) { + kstat_delete(irport->irport_kstat_io); + } + if (irport->irport_kstat_estat != NULL) { + kstat_delete(irport->irport_kstat_estat); + } + if (irport->irport_kstat_info != NULL) { + stmf_kstat_rport_info_t *ks_info; + kstat_named_t *knp; + void *ptr; + int i; + + ks_info = (stmf_kstat_rport_info_t *)KSTAT_NAMED_PTR( + irport->irport_kstat_info); + kstat_delete(irport->irport_kstat_info); + ptr = KSTAT_NAMED_STR_PTR(&ks_info->i_rport_name); + kmem_free(ptr, KSTAT_NAMED_STR_BUFLEN(&ks_info->i_rport_name)); + + for (i = 0, knp = ks_info->i_rport_uinfo; + i < STMF_RPORT_INFO_LIMIT; i++, knp++) { + ptr = KSTAT_NAMED_STR_PTR(knp); + if (ptr != NULL) + kmem_free(ptr, KSTAT_NAMED_STR_BUFLEN(knp)); + } + kmem_free(ks_info, sizeof (*ks_info)); + } +} + static stmf_i_remote_port_t * stmf_irport_register(scsi_devid_desc_t *rport_devid) { @@ -3478,6 +3695,7 @@ stmf_irport_register(scsi_devid_desc_t *rport_devid) return (NULL); } + stmf_create_kstat_rport(irport); avl_add(&stmf_state.stmf_irportlist, irport); mutex_exit(&stmf_state.stmf_lock); @@ -3601,6 +3819,109 @@ stmf_register_scsi_session(stmf_local_port_t *lport, stmf_scsi_session_t *ss) return (STMF_SUCCESS); } +stmf_status_t +stmf_add_rport_info(stmf_scsi_session_t *ss, + const char *prop_name, const char *prop_value) +{ + stmf_i_scsi_session_t *iss = ss->ss_stmf_private; + stmf_i_remote_port_t *irport = iss->iss_irport; + kstat_named_t *knp; + char *s; + int i; + + s = strdup(prop_value); + + mutex_enter(irport->irport_kstat_info->ks_lock); + /* Make sure the caller doesn't try to add already existing property */ + knp = KSTAT_NAMED_PTR(irport->irport_kstat_info); + for (i = 0; i < STMF_KSTAT_RPORT_DATAMAX; i++, knp++) { + if (KSTAT_NAMED_STR_PTR(knp) == NULL) + break; + + ASSERT(strcmp(knp->name, prop_name) != 0); + } + + if (i == STMF_KSTAT_RPORT_DATAMAX) { + mutex_exit(irport->irport_kstat_info->ks_lock); + kmem_free(s, strlen(s) + 1); + return (STMF_FAILURE); + } + + irport->irport_info_dirty = B_TRUE; + kstat_named_init(knp, prop_name, KSTAT_DATA_STRING); + kstat_named_setstr(knp, s); + mutex_exit(irport->irport_kstat_info->ks_lock); + + return (STMF_SUCCESS); +} + +void +stmf_remove_rport_info(stmf_scsi_session_t *ss, + const char *prop_name) +{ + stmf_i_scsi_session_t *iss = ss->ss_stmf_private; + stmf_i_remote_port_t *irport = iss->iss_irport; + kstat_named_t *knp; + char *s; + int i; + uint32_t len; + + mutex_enter(irport->irport_kstat_info->ks_lock); + knp = KSTAT_NAMED_PTR(irport->irport_kstat_info); + for (i = 0; i < STMF_KSTAT_RPORT_DATAMAX; i++, knp++) { + if ((knp->name != NULL) && (strcmp(knp->name, prop_name) == 0)) + break; + } + + if (i == STMF_KSTAT_RPORT_DATAMAX) { + mutex_exit(irport->irport_kstat_info->ks_lock); + return; + } + + s = KSTAT_NAMED_STR_PTR(knp); + len = KSTAT_NAMED_STR_BUFLEN(knp); + + for (; i < STMF_KSTAT_RPORT_DATAMAX - 1; i++, knp++) { + kstat_named_init(knp, knp[1].name, KSTAT_DATA_STRING); + kstat_named_setstr(knp, KSTAT_NAMED_STR_PTR(&knp[1])); + } + kstat_named_init(knp, "", KSTAT_DATA_STRING); + + irport->irport_info_dirty = B_TRUE; + mutex_exit(irport->irport_kstat_info->ks_lock); + kmem_free(s, len); +} + +static int +stmf_kstat_rport_update(kstat_t *ksp, int rw) +{ + stmf_i_remote_port_t *irport = ksp->ks_private; + kstat_named_t *knp; + uint_t ndata = 0; + size_t dsize = 0; + int i; + + if (rw == KSTAT_WRITE) + return (EACCES); + + if (!irport->irport_info_dirty) + return (0); + + knp = KSTAT_NAMED_PTR(ksp); + for (i = 0; i < STMF_KSTAT_RPORT_DATAMAX; i++, knp++) { + if (KSTAT_NAMED_STR_PTR(knp) == NULL) + break; + ndata++; + dsize += KSTAT_NAMED_STR_BUFLEN(knp); + } + + ksp->ks_ndata = ndata; + ksp->ks_data_size = sizeof (kstat_named_t) * ndata + dsize; + irport->irport_info_dirty = B_FALSE; + + return (0); +} + void stmf_deregister_scsi_session(stmf_local_port_t *lport, stmf_scsi_session_t *ss) { @@ -3610,6 +3931,10 @@ stmf_deregister_scsi_session(stmf_local_port_t *lport, stmf_scsi_session_t *ss) int found = 0; stmf_ic_msg_t *ic_session_dereg; stmf_status_t ic_ret = STMF_FAILURE; + stmf_lun_map_t *sm; + stmf_i_lu_t *ilu; + uint16_t n; + stmf_lun_map_ent_t *ent; DTRACE_PROBE2(session__offline, stmf_local_port_t *, lport, stmf_scsi_session_t *, ss); @@ -3659,15 +3984,46 @@ try_dereg_ss_again: ilport->ilport_nsessions--; stmf_irport_deregister(iss->iss_irport); - (void) stmf_session_destroy_lun_map(ilport, iss); + /* + * to avoid conflict with updating session's map, + * which only grab stmf_lock + */ + sm = iss->iss_sm; + iss->iss_sm = NULL; + iss->iss_hg = NULL; + rw_exit(&ilport->ilport_lock); - mutex_exit(&stmf_state.stmf_lock); + + if (sm->lm_nentries) { + for (n = 0; n < sm->lm_nentries; n++) { + if ((ent = (stmf_lun_map_ent_t *)sm->lm_plus[n]) + != NULL) { + if (ent->ent_itl_datap) { + stmf_do_itl_dereg(ent->ent_lu, + ent->ent_itl_datap, + STMF_ITL_REASON_IT_NEXUS_LOSS); + } + ilu = (stmf_i_lu_t *) + ent->ent_lu->lu_stmf_private; + atomic_dec_32(&ilu->ilu_ref_cnt); + kmem_free(sm->lm_plus[n], + sizeof (stmf_lun_map_ent_t)); + } + } + kmem_free(sm->lm_plus, + sizeof (stmf_lun_map_ent_t *) * sm->lm_nentries); + } + kmem_free(sm, sizeof (*sm)); if (iss->iss_flags & ISS_NULL_TPTID) { stmf_remote_port_free(ss->ss_rport); } + + mutex_exit(&stmf_state.stmf_lock); } + + stmf_i_scsi_session_t * stmf_session_id_to_issptr(uint64_t session_id, int stay_locked) { @@ -3863,54 +4219,6 @@ dai_scan_done: return (STMF_SUCCESS); } -stmf_status_t -stmf_get_itl_handle(stmf_lu_t *lu, uint8_t *lun, stmf_scsi_session_t *ss, - uint64_t session_id, void **itl_handle_retp) -{ - stmf_i_scsi_session_t *iss; - stmf_lun_map_ent_t *ent; - stmf_lun_map_t *lm; - stmf_status_t ret; - int i; - uint16_t n; - - if (ss == NULL) { - iss = stmf_session_id_to_issptr(session_id, 1); - if (iss == NULL) - return (STMF_NOT_FOUND); - } else { - iss = (stmf_i_scsi_session_t *)ss->ss_stmf_private; - rw_enter(iss->iss_lockp, RW_WRITER); - } - - ent = NULL; - if (lun == NULL) { - lm = iss->iss_sm; - for (i = 0; i < lm->lm_nentries; i++) { - if (lm->lm_plus[i] == NULL) - continue; - ent = (stmf_lun_map_ent_t *)lm->lm_plus[i]; - if (ent->ent_lu == lu) - break; - } - } else { - n = ((uint16_t)lun[1] | (((uint16_t)(lun[0] & 0x3F)) << 8)); - ent = (stmf_lun_map_ent_t *) - stmf_get_ent_from_map(iss->iss_sm, n); - if (lu && (ent->ent_lu != lu)) - ent = NULL; - } - if (ent && ent->ent_itl_datap) { - *itl_handle_retp = ent->ent_itl_datap->itl_handle; - ret = STMF_SUCCESS; - } else { - ret = STMF_NOT_FOUND; - } - - rw_exit(iss->iss_lockp); - return (ret); -} - stmf_data_buf_t * stmf_alloc_dbuf(scsi_task_t *task, uint32_t size, uint32_t *pminsize, uint32_t flags) @@ -4044,13 +4352,29 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss, } else { lu = lun_map_ent->ent_lu; } + ilu = lu->lu_stmf_private; if (ilu->ilu_flags & ILU_RESET_ACTIVE) { rw_exit(iss->iss_lockp); return (NULL); } - ASSERT(lu == dlun0 || (ilu->ilu_state != STMF_STATE_OFFLINING && - ilu->ilu_state != STMF_STATE_OFFLINE)); + + /* + * if the LUN is being offlined or is offline then only command + * that are to query the LUN are allowed. These are handled in + * stmf via the dlun0 vector. It is possible that a race condition + * will cause other commands to arrive while the lun is in the + * process of being offlined. Check for those and just let the + * protocol stack handle the error. + */ + if ((ilu->ilu_state == STMF_STATE_OFFLINING) || + (ilu->ilu_state == STMF_STATE_OFFLINE)) { + if (lu != dlun0) { + rw_exit(iss->iss_lockp); + return (NULL); + } + } + do { if (ilu->ilu_free_tasks == NULL) { new_task = 1; @@ -4098,15 +4422,6 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss, return (NULL); } task->task_lu = lu; - l = task->task_lun_no; - l[0] = lun[0]; - l[1] = lun[1]; - l[2] = lun[2]; - l[3] = lun[3]; - l[4] = lun[4]; - l[5] = lun[5]; - l[6] = lun[6]; - l[7] = lun[7]; task->task_cdb = (uint8_t *)task->task_port_private; if ((ulong_t)(task->task_cdb) & 7ul) { task->task_cdb = (uint8_t *)(((ulong_t) @@ -4115,7 +4430,25 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss, itask = (stmf_i_scsi_task_t *)task->task_stmf_private; itask->itask_cdb_buf_size = cdb_length; mutex_init(&itask->itask_audit_mutex, NULL, MUTEX_DRIVER, NULL); + mutex_init(&itask->itask_mutex, NULL, MUTEX_DRIVER, NULL); } + + /* + * Since a LUN can be mapped as different LUN ids to different initiator + * groups, we need to set LUN id for a new task and reset LUN id for + * a reused task. + */ + l = task->task_lun_no; + l[0] = lun[0]; + l[1] = lun[1]; + l[2] = lun[2]; + l[3] = lun[3]; + l[4] = lun[4]; + l[5] = lun[5]; + l[6] = lun[6]; + l[7] = lun[7]; + + mutex_enter(&itask->itask_mutex); task->task_session = ss; task->task_lport = lport; task->task_cdb_length = cdb_length_in; @@ -4125,6 +4458,9 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss, itask->itask_lport_read_time = itask->itask_lport_write_time = 0; itask->itask_read_xfer = itask->itask_write_xfer = 0; itask->itask_audit_index = 0; + bzero(&itask->itask_audit_records[0], + sizeof (stmf_task_audit_rec_t) * ITASK_TASK_AUDIT_DEPTH); + mutex_exit(&itask->itask_mutex); if (new_task) { if (lu->lu_task_alloc(task) != STMF_SUCCESS) { @@ -4165,6 +4501,7 @@ stmf_task_alloc(struct stmf_local_port *lport, stmf_scsi_session_t *ss, return (task); } +/* ARGSUSED */ static void stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss) { @@ -4173,8 +4510,19 @@ stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss) stmf_i_lu_t *ilu = (stmf_i_lu_t *)task->task_lu->lu_stmf_private; ASSERT(rw_lock_held(iss->iss_lockp)); + ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0); + ASSERT((itask->itask_flags & ITASK_IN_WORKER_QUEUE) == 0); + ASSERT((itask->itask_flags & ITASK_IN_TRANSITION) == 0); + ASSERT((itask->itask_flags & ITASK_KNOWN_TO_LU) == 0); + ASSERT(mutex_owned(&itask->itask_mutex)); + itask->itask_flags = ITASK_IN_FREE_LIST; + itask->itask_ncmds = 0; itask->itask_proxy_msg_id = 0; + atomic_dec_32(itask->itask_ilu_task_cntr); + itask->itask_worker_next = NULL; + mutex_exit(&itask->itask_mutex); + mutex_enter(&ilu->ilu_task_lock); itask->itask_lu_free_next = ilu->ilu_free_tasks; ilu->ilu_free_tasks = itask; @@ -4182,7 +4530,6 @@ stmf_task_lu_free(scsi_task_t *task, stmf_i_scsi_session_t *iss) if (ilu->ilu_ntasks == ilu->ilu_ntasks_free) cv_signal(&ilu->ilu_offline_pending_cv); mutex_exit(&ilu->ilu_task_lock); - atomic_dec_32(itask->itask_ilu_task_cntr); } void @@ -4259,7 +4606,40 @@ stmf_check_freetask() } } -void +/* + * Since this method is looking to find tasks that are stuck, lost, or senile + * it should be more willing to give up scaning during this time period. This + * is why mutex_tryenter is now used instead of the standard mutex_enter. + * There has been at least one case were the following occurred. + * + * 1) The iscsit_deferred() method is trying to register a session and + * needs the global lock which is held. + * 2) Another thread which holds the global lock is trying to deregister a + * session and needs the session lock. + * 3) A third thread is allocating a stmf task that has grabbed the session + * lock and is trying to grab the lun task lock. + * 4) There's a timeout thread that has the lun task lock and is trying to grab + * a specific task lock. + * 5) The thread that has the task lock is waiting for the ref count to go to + * zero. + * 6) There's a task that would drop the count to zero, but it's in the task + * queue waiting to run and is stuck because of #1 is currently block. + * + * This method is number 4 in the above chain of events. Had this code + * originally used mutex_tryenter the chain would have been broken and the + * system wouldn't have hung. So, now this method uses mutex_tryenter and + * you know why it does so. + */ +/* ---- Only one thread calls stmf_do_ilu_timeouts so no lock required ---- */ +typedef struct stmf_bailout_cnt { + int no_ilu_lock; + int no_task_lock; + int tasks_checked; +} stmf_bailout_cnt_t; + +stmf_bailout_cnt_t stmf_bailout; + +static void stmf_do_ilu_timeouts(stmf_i_lu_t *ilu) { clock_t l = ddi_get_lbolt(); @@ -4268,11 +4648,21 @@ stmf_do_ilu_timeouts(stmf_i_lu_t *ilu) scsi_task_t *task; uint32_t to; - mutex_enter(&ilu->ilu_task_lock); + if (mutex_tryenter(&ilu->ilu_task_lock) == 0) { + stmf_bailout.no_ilu_lock++; + return; + } + for (itask = ilu->ilu_tasks; itask != NULL; itask = itask->itask_lu_next) { + if (mutex_tryenter(&itask->itask_mutex) == 0) { + stmf_bailout.no_task_lock++; + continue; + } + stmf_bailout.tasks_checked++; if (itask->itask_flags & (ITASK_IN_FREE_LIST | ITASK_BEING_ABORTED)) { + mutex_exit(&itask->itask_mutex); continue; } task = itask->itask_task; @@ -4280,8 +4670,12 @@ stmf_do_ilu_timeouts(stmf_i_lu_t *ilu) to = stmf_default_task_timeout; else to = task->task_timeout; - if ((itask->itask_start_time + (to * ps)) > l) + + if ((itask->itask_start_time + (to * ps)) > l) { + mutex_exit(&itask->itask_mutex); continue; + } + mutex_exit(&itask->itask_mutex); stmf_abort(STMF_QUEUE_TASK_ABORT, task, STMF_TIMEOUT, NULL); } @@ -4336,11 +4730,14 @@ stmf_task_lu_killall(stmf_lu_t *lu, scsi_task_t *tm_task, stmf_status_t s) stmf_i_scsi_task_t *itask; mutex_enter(&ilu->ilu_task_lock); - for (itask = ilu->ilu_tasks; itask != NULL; itask = itask->itask_lu_next) { - if (itask->itask_flags & ITASK_IN_FREE_LIST) + mutex_enter(&itask->itask_mutex); + if (itask->itask_flags & ITASK_IN_FREE_LIST) { + mutex_exit(&itask->itask_mutex); continue; + } + mutex_exit(&itask->itask_mutex); if (itask->itask_task == tm_task) continue; stmf_abort(STMF_QUEUE_TASK_ABORT, itask->itask_task, s, NULL); @@ -4396,9 +4793,12 @@ stmf_task_free(scsi_task_t *task) task->task_stmf_private; stmf_i_scsi_session_t *iss = (stmf_i_scsi_session_t *) task->task_session->ss_stmf_private; + stmf_lu_t *lu = task->task_lu; stmf_task_audit(itask, TE_TASK_FREE, CMD_OR_IOF_NA, NULL); - + ASSERT(mutex_owned(&itask->itask_mutex)); + if ((lu != NULL) && (lu->lu_task_done != NULL)) + lu->lu_task_done(task); stmf_free_task_bufs(itask, lport); stmf_itl_task_done(itask); DTRACE_PROBE2(stmf__task__end, scsi_task_t *, task, @@ -4412,7 +4812,14 @@ stmf_task_free(scsi_task_t *task) } } + /* + * To prevent a deadlock condition must release the itask_mutex, + * grab a reader lock on iss_lockp and then reacquire the itask_mutex. + */ + mutex_exit(&itask->itask_mutex); rw_enter(iss->iss_lockp, RW_READER); + mutex_enter(&itask->itask_mutex); + lport->lport_task_free(task); if (itask->itask_worker) { atomic_dec_32(&stmf_cur_ntasks); @@ -4433,9 +4840,9 @@ stmf_post_task(scsi_task_t *task, stmf_data_buf_t *dbuf) task->task_stmf_private; stmf_i_lu_t *ilu = (stmf_i_lu_t *)task->task_lu->lu_stmf_private; int nv; - uint32_t old, new; + uint32_t new; uint32_t ct; - stmf_worker_t *w, *w1; + stmf_worker_t *w; uint8_t tm; if (task->task_max_nbufs > 4) @@ -4445,44 +4852,28 @@ stmf_post_task(scsi_task_t *task, stmf_data_buf_t *dbuf) ct = atomic_inc_32_nv(&stmf_cur_ntasks); /* Select the next worker using round robin */ - nv = (int)atomic_inc_32_nv((uint32_t *)&stmf_worker_sel_counter); - if (nv >= stmf_nworkers_accepting_cmds) { - int s = nv; - do { - nv -= stmf_nworkers_accepting_cmds; - } while (nv >= stmf_nworkers_accepting_cmds); - if (nv < 0) - nv = 0; - /* Its ok if this cas fails */ - (void) atomic_cas_32((uint32_t *)&stmf_worker_sel_counter, - s, nv); - } - w = &stmf_workers[nv]; + mutex_enter(&stmf_worker_sel_mx); + stmf_worker_sel_counter++; + if (stmf_worker_sel_counter >= stmf_nworkers) + stmf_worker_sel_counter = 0; + nv = stmf_worker_sel_counter; - /* - * A worker can be pinned by interrupt. So select the next one - * if it has lower load. - */ - if ((nv + 1) >= stmf_nworkers_accepting_cmds) { - w1 = stmf_workers; - } else { - w1 = &stmf_workers[nv + 1]; + /* if the selected worker is not idle then bump to the next worker */ + if (stmf_workers[nv].worker_queue_depth > 0) { + stmf_worker_sel_counter++; + if (stmf_worker_sel_counter >= stmf_nworkers) + stmf_worker_sel_counter = 0; + nv = stmf_worker_sel_counter; } - if (w1->worker_queue_depth < w->worker_queue_depth) - w = w1; + mutex_exit(&stmf_worker_sel_mx); + w = &stmf_workers[nv]; + + mutex_enter(&itask->itask_mutex); mutex_enter(&w->worker_lock); - if (((w->worker_flags & STMF_WORKER_STARTED) == 0) || - (w->worker_flags & STMF_WORKER_TERMINATE)) { - /* - * Maybe we are in the middle of a change. Just go to - * the 1st worker. - */ - mutex_exit(&w->worker_lock); - w = stmf_workers; - mutex_enter(&w->worker_lock); - } + itask->itask_worker = w; + /* * Track max system load inside the worker as we already have the * worker lock (no point implementing another lock). The service @@ -4492,39 +4883,33 @@ stmf_post_task(scsi_task_t *task, stmf_data_buf_t *dbuf) if (w->worker_max_sys_qdepth_pu < ct) w->worker_max_sys_qdepth_pu = ct; - do { - old = new = itask->itask_flags; - new |= ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE; - if (task->task_mgmt_function) { - tm = task->task_mgmt_function; - if ((tm == TM_TARGET_RESET) || - (tm == TM_TARGET_COLD_RESET) || - (tm == TM_TARGET_WARM_RESET)) { - new |= ITASK_DEFAULT_HANDLING; - } - } else if (task->task_cdb[0] == SCMD_REPORT_LUNS) { + new = itask->itask_flags; + new |= ITASK_KNOWN_TO_TGT_PORT; + if (task->task_mgmt_function) { + tm = task->task_mgmt_function; + if ((tm == TM_TARGET_RESET) || + (tm == TM_TARGET_COLD_RESET) || + (tm == TM_TARGET_WARM_RESET)) { new |= ITASK_DEFAULT_HANDLING; } - new &= ~ITASK_IN_TRANSITION; - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + } else if (task->task_cdb[0] == SCMD_REPORT_LUNS) { + new |= ITASK_DEFAULT_HANDLING; + } + new &= ~ITASK_IN_TRANSITION; + itask->itask_flags = new; stmf_itl_task_start(itask); - itask->itask_worker_next = NULL; - if (w->worker_task_tail) { - w->worker_task_tail->itask_worker_next = itask; - } else { - w->worker_task_head = itask; - } - w->worker_task_tail = itask; - if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) { - w->worker_max_qdepth_pu = w->worker_queue_depth; - } - /* Measure task waitq time */ - itask->itask_waitq_enter_timestamp = gethrtime(); - atomic_inc_32(&w->worker_ref_count); itask->itask_cmd_stack[0] = ITASK_CMD_NEW_TASK; itask->itask_ncmds = 1; + + if ((task->task_flags & TF_INITIAL_BURST) && + !(curthread->t_flag & T_INTR_THREAD)) { + stmf_update_kstat_lu_io(task, dbuf); + stmf_update_kstat_lport_io(task, dbuf); + stmf_update_kstat_rport_io(task, dbuf); + } + stmf_task_audit(itask, TE_TASK_START, CMD_OR_IOF_NA, dbuf); if (dbuf) { itask->itask_allocated_buf_map = 1; @@ -4535,13 +4920,10 @@ stmf_post_task(scsi_task_t *task, stmf_data_buf_t *dbuf) itask->itask_dbufs[0] = NULL; } - if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) { - w->worker_signal_timestamp = gethrtime(); - DTRACE_PROBE2(worker__signal, stmf_worker_t *, w, - scsi_task_t *, task); - cv_signal(&w->worker_cv); - } + STMF_ENQUEUE_ITASK(w, itask); + mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); /* * This can only happen if during stmf_task_alloc(), ILU_RESET_ACTIVE @@ -4597,26 +4979,30 @@ stmf_xfer_data(scsi_task_t *task, stmf_data_buf_t *dbuf, uint32_t ioflags) stmf_task_audit(itask, TE_XFER_START, ioflags, dbuf); + mutex_enter(&itask->itask_mutex); if (ioflags & STMF_IOF_LU_DONE) { - uint32_t new, old; - do { - new = old = itask->itask_flags; - if (new & ITASK_BEING_ABORTED) - return (STMF_ABORTED); - new &= ~ITASK_KNOWN_TO_LU; - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + if (itask->itask_flags & ITASK_BEING_ABORTED) { + mutex_exit(&itask->itask_mutex); + return (STMF_ABORTED); + } + itask->itask_flags &= ~ITASK_KNOWN_TO_LU; } - if (itask->itask_flags & ITASK_BEING_ABORTED) + if ((itask->itask_flags & ITASK_BEING_ABORTED) != 0) { + mutex_exit(&itask->itask_mutex); return (STMF_ABORTED); + } + mutex_exit(&itask->itask_mutex); + #ifdef DEBUG if (!(ioflags & STMF_IOF_STATS_ONLY) && stmf_drop_buf_counter > 0) { - if (atomic_dec_32_nv(&stmf_drop_buf_counter) == 1) + if (atomic_dec_32_nv((uint32_t *)&stmf_drop_buf_counter) == 1) return (STMF_SUCCESS); } #endif stmf_update_kstat_lu_io(task, dbuf); stmf_update_kstat_lport_io(task, dbuf); + stmf_update_kstat_rport_io(task, dbuf); stmf_lport_xfer_start(itask, dbuf); if (ioflags & STMF_IOF_STATS_ONLY) { stmf_lport_xfer_done(itask, dbuf); @@ -4646,7 +5032,7 @@ stmf_data_xfer_done(scsi_task_t *task, stmf_data_buf_t *dbuf, uint32_t iof) (stmf_i_scsi_task_t *)task->task_stmf_private; stmf_i_local_port_t *ilport; stmf_worker_t *w = itask->itask_worker; - uint32_t new, old; + uint32_t new; uint8_t update_queue_flags, free_it, queue_it; stmf_lport_xfer_done(itask, dbuf); @@ -4667,74 +5053,67 @@ stmf_data_xfer_done(scsi_task_t *task, stmf_data_buf_t *dbuf, uint32_t iof) return; } + mutex_enter(&itask->itask_mutex); mutex_enter(&w->worker_lock); - do { - new = old = itask->itask_flags; - if (old & ITASK_BEING_ABORTED) { - mutex_exit(&w->worker_lock); - return; - } + new = itask->itask_flags; + if (itask->itask_flags & ITASK_BEING_ABORTED) { + mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); + return; + } + free_it = 0; + if (iof & STMF_IOF_LPORT_DONE) { + new &= ~ITASK_KNOWN_TO_TGT_PORT; + task->task_completion_status = dbuf->db_xfer_status; + free_it = 1; + } + /* + * If the task is known to LU then queue it. But if + * it is already queued (multiple completions) then + * just update the buffer information by grabbing the + * worker lock. If the task is not known to LU, + * completed/aborted, then see if we need to + * free this task. + */ + if (itask->itask_flags & ITASK_KNOWN_TO_LU) { free_it = 0; - if (iof & STMF_IOF_LPORT_DONE) { - new &= ~ITASK_KNOWN_TO_TGT_PORT; - task->task_completion_status = dbuf->db_xfer_status; - free_it = 1; - } - /* - * If the task is known to LU then queue it. But if - * it is already queued (multiple completions) then - * just update the buffer information by grabbing the - * worker lock. If the task is not known to LU, - * completed/aborted, then see if we need to - * free this task. - */ - if (old & ITASK_KNOWN_TO_LU) { - free_it = 0; - update_queue_flags = 1; - if (old & ITASK_IN_WORKER_QUEUE) { - queue_it = 0; - } else { - queue_it = 1; - new |= ITASK_IN_WORKER_QUEUE; - } - } else { - update_queue_flags = 0; + update_queue_flags = 1; + if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) { queue_it = 0; + } else { + queue_it = 1; } - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + } else { + update_queue_flags = 0; + queue_it = 0; + } + itask->itask_flags = new; if (update_queue_flags) { uint8_t cmd = (dbuf->db_handle << 5) | ITASK_CMD_DATA_XFER_DONE; + ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0); ASSERT(itask->itask_ncmds < ITASK_MAX_NCMDS); + itask->itask_cmd_stack[itask->itask_ncmds++] = cmd; if (queue_it) { - itask->itask_worker_next = NULL; - if (w->worker_task_tail) { - w->worker_task_tail->itask_worker_next = itask; - } else { - w->worker_task_head = itask; - } - w->worker_task_tail = itask; - /* Measure task waitq time */ - itask->itask_waitq_enter_timestamp = gethrtime(); - if (++(w->worker_queue_depth) > - w->worker_max_qdepth_pu) { - w->worker_max_qdepth_pu = w->worker_queue_depth; - } - if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) - cv_signal(&w->worker_cv); + STMF_ENQUEUE_ITASK(w, itask); } + mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); + return; } - mutex_exit(&w->worker_lock); + mutex_exit(&w->worker_lock); if (free_it) { if ((itask->itask_flags & (ITASK_KNOWN_TO_LU | ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE | ITASK_BEING_ABORTED)) == 0) { stmf_task_free(task); + return; } } + mutex_exit(&itask->itask_mutex); } stmf_status_t @@ -4747,22 +5126,25 @@ stmf_send_scsi_status(scsi_task_t *task, uint32_t ioflags) stmf_task_audit(itask, TE_SEND_STATUS, ioflags, NULL); + mutex_enter(&itask->itask_mutex); if (ioflags & STMF_IOF_LU_DONE) { - uint32_t new, old; - do { - new = old = itask->itask_flags; - if (new & ITASK_BEING_ABORTED) - return (STMF_ABORTED); - new &= ~ITASK_KNOWN_TO_LU; - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + if (itask->itask_flags & ITASK_BEING_ABORTED) { + mutex_exit(&itask->itask_mutex); + return (STMF_ABORTED); + } + itask->itask_flags &= ~ITASK_KNOWN_TO_LU; } if (!(itask->itask_flags & ITASK_KNOWN_TO_TGT_PORT)) { + mutex_exit(&itask->itask_mutex); return (STMF_SUCCESS); } - if (itask->itask_flags & ITASK_BEING_ABORTED) + if (itask->itask_flags & ITASK_BEING_ABORTED) { + mutex_exit(&itask->itask_mutex); return (STMF_ABORTED); + } + mutex_exit(&itask->itask_mutex); if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) { task->task_status_ctrl = 0; @@ -4790,66 +5172,57 @@ stmf_send_status_done(scsi_task_t *task, stmf_status_t s, uint32_t iof) stmf_i_scsi_task_t *itask = (stmf_i_scsi_task_t *)task->task_stmf_private; stmf_worker_t *w = itask->itask_worker; - uint32_t new, old; + uint32_t new; uint8_t free_it, queue_it; stmf_task_audit(itask, TE_SEND_STATUS_DONE, iof, NULL); + mutex_enter(&itask->itask_mutex); mutex_enter(&w->worker_lock); - do { - new = old = itask->itask_flags; - if (old & ITASK_BEING_ABORTED) { - mutex_exit(&w->worker_lock); - return; - } + new = itask->itask_flags; + if (itask->itask_flags & ITASK_BEING_ABORTED) { + mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); + return; + } + free_it = 0; + if (iof & STMF_IOF_LPORT_DONE) { + new &= ~ITASK_KNOWN_TO_TGT_PORT; + free_it = 1; + } + /* + * If the task is known to LU then queue it. But if + * it is already queued (multiple completions) then + * just update the buffer information by grabbing the + * worker lock. If the task is not known to LU, + * completed/aborted, then see if we need to + * free this task. + */ + if (itask->itask_flags & ITASK_KNOWN_TO_LU) { free_it = 0; - if (iof & STMF_IOF_LPORT_DONE) { - new &= ~ITASK_KNOWN_TO_TGT_PORT; - free_it = 1; - } - /* - * If the task is known to LU then queue it. But if - * it is already queued (multiple completions) then - * just update the buffer information by grabbing the - * worker lock. If the task is not known to LU, - * completed/aborted, then see if we need to - * free this task. - */ - if (old & ITASK_KNOWN_TO_LU) { - free_it = 0; - queue_it = 1; - if (old & ITASK_IN_WORKER_QUEUE) { - cmn_err(CE_PANIC, "status completion received" - " when task is already in worker queue " - " task = %p", (void *)task); - } - new |= ITASK_IN_WORKER_QUEUE; - } else { - queue_it = 0; + queue_it = 1; + if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) { + cmn_err(CE_PANIC, "status completion received" + " when task is already in worker queue " + " task = %p", (void *)task); } - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + } else { + queue_it = 0; + } + itask->itask_flags = new; task->task_completion_status = s; - if (queue_it) { ASSERT(itask->itask_ncmds < ITASK_MAX_NCMDS); itask->itask_cmd_stack[itask->itask_ncmds++] = ITASK_CMD_STATUS_DONE; - itask->itask_worker_next = NULL; - if (w->worker_task_tail) { - w->worker_task_tail->itask_worker_next = itask; - } else { - w->worker_task_head = itask; - } - w->worker_task_tail = itask; - /* Measure task waitq time */ - itask->itask_waitq_enter_timestamp = gethrtime(); - if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) { - w->worker_max_qdepth_pu = w->worker_queue_depth; - } - if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) - cv_signal(&w->worker_cv); + + STMF_ENQUEUE_ITASK(w, itask); + mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); + return; } + mutex_exit(&w->worker_lock); if (free_it) { @@ -4857,12 +5230,14 @@ stmf_send_status_done(scsi_task_t *task, stmf_status_t s, uint32_t iof) ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE | ITASK_BEING_ABORTED)) == 0) { stmf_task_free(task); + return; } else { cmn_err(CE_PANIC, "LU is done with the task but LPORT " " is not done, itask %p itask_flags %x", (void *)itask, itask->itask_flags); } } + mutex_exit(&itask->itask_mutex); } void @@ -4871,33 +5246,32 @@ stmf_task_lu_done(scsi_task_t *task) stmf_i_scsi_task_t *itask = (stmf_i_scsi_task_t *)task->task_stmf_private; stmf_worker_t *w = itask->itask_worker; - uint32_t new, old; + mutex_enter(&itask->itask_mutex); mutex_enter(&w->worker_lock); - do { - new = old = itask->itask_flags; - if (old & ITASK_BEING_ABORTED) { - mutex_exit(&w->worker_lock); - return; - } - if (old & ITASK_IN_WORKER_QUEUE) { - cmn_err(CE_PANIC, "task_lu_done received" - " when task is in worker queue " - " task = %p", (void *)task); - } - new &= ~ITASK_KNOWN_TO_LU; - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + if (itask->itask_flags & ITASK_BEING_ABORTED) { + mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); + return; + } + if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) { + cmn_err(CE_PANIC, "task_lu_done received" + " when task is in worker queue " + " task = %p", (void *)task); + } + itask->itask_flags &= ~ITASK_KNOWN_TO_LU; mutex_exit(&w->worker_lock); - if ((itask->itask_flags & (ITASK_KNOWN_TO_LU | ITASK_KNOWN_TO_TGT_PORT | ITASK_IN_WORKER_QUEUE | ITASK_BEING_ABORTED)) == 0) { stmf_task_free(task); + return; } else { cmn_err(CE_PANIC, "stmf_lu_done should be the last stage but " " the task is still not done, task = %p", (void *)task); } + mutex_exit(&itask->itask_mutex); } void @@ -4906,54 +5280,41 @@ stmf_queue_task_for_abort(scsi_task_t *task, stmf_status_t s) stmf_i_scsi_task_t *itask = (stmf_i_scsi_task_t *)task->task_stmf_private; stmf_worker_t *w; - uint32_t old, new; stmf_task_audit(itask, TE_TASK_ABORT, CMD_OR_IOF_NA, NULL); - do { - old = new = itask->itask_flags; - if ((old & ITASK_BEING_ABORTED) || - ((old & (ITASK_KNOWN_TO_TGT_PORT | - ITASK_KNOWN_TO_LU)) == 0)) { - return; - } - new |= ITASK_BEING_ABORTED; - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + mutex_enter(&itask->itask_mutex); + if ((itask->itask_flags & ITASK_BEING_ABORTED) || + ((itask->itask_flags & (ITASK_KNOWN_TO_TGT_PORT | + ITASK_KNOWN_TO_LU)) == 0)) { + mutex_exit(&itask->itask_mutex); + return; + } + itask->itask_flags |= ITASK_BEING_ABORTED; task->task_completion_status = s; - itask->itask_start_time = ddi_get_lbolt(); if (((w = itask->itask_worker) == NULL) || (itask->itask_flags & ITASK_IN_TRANSITION)) { + mutex_exit(&itask->itask_mutex); return; } /* Queue it and get out */ - mutex_enter(&w->worker_lock); if (itask->itask_flags & ITASK_IN_WORKER_QUEUE) { - mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); return; } - atomic_or_32(&itask->itask_flags, ITASK_IN_WORKER_QUEUE); - itask->itask_worker_next = NULL; - if (w->worker_task_tail) { - w->worker_task_tail->itask_worker_next = itask; - } else { - w->worker_task_head = itask; - } - w->worker_task_tail = itask; - if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) { - w->worker_max_qdepth_pu = w->worker_queue_depth; - } - if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) - cv_signal(&w->worker_cv); + mutex_enter(&w->worker_lock); + STMF_ENQUEUE_ITASK(w, itask); mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); } void stmf_abort(int abort_cmd, scsi_task_t *task, stmf_status_t s, void *arg) { stmf_i_scsi_task_t *itask = NULL; - uint32_t old, new, f, rf; + uint32_t f, rf; DTRACE_PROBE2(scsi__task__abort, scsi_task_t *, task, stmf_status_t, s); @@ -4976,17 +5337,24 @@ stmf_abort(int abort_cmd, scsi_task_t *task, stmf_status_t s, void *arg) default: return; } + itask = (stmf_i_scsi_task_t *)task->task_stmf_private; + mutex_enter(&itask->itask_mutex); f |= ITASK_BEING_ABORTED | rf; - do { - old = new = itask->itask_flags; - if ((old & f) != f) { - return; - } - new &= ~rf; - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + + if ((itask->itask_flags & f) != f) { + mutex_exit(&itask->itask_mutex); + return; + } + itask->itask_flags &= ~rf; + mutex_exit(&itask->itask_mutex); + } +/* + * NOTE: stmf_abort_task_offline will release and then reacquire the + * itask_mutex. This is required to prevent a lock order violation. + */ void stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof) { @@ -4995,7 +5363,7 @@ stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof) unsigned long long st; stmf_task_audit(itask, TE_TASK_LU_ABORTED, iof, NULL); - + ASSERT(mutex_owned(&itask->itask_mutex)); st = s; /* gcc fix */ if ((s != STMF_ABORT_SUCCESS) && (s != STMF_NOT_FOUND)) { (void) snprintf(info, sizeof (info), @@ -5015,16 +5383,19 @@ stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof) stmf_abort_task_offline(task, 1, info); } +/* + * NOTE: stmf_abort_task_offline will release and then reacquire the + * itask_mutex. This is required to prevent a lock order violation. + */ void stmf_task_lport_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof) { char info[STMF_CHANGE_INFO_LEN]; stmf_i_scsi_task_t *itask = TASK_TO_ITASK(task); unsigned long long st; - uint32_t old, new; + ASSERT(mutex_owned(&itask->itask_mutex)); stmf_task_audit(itask, TE_TASK_LPORT_ABORTED, iof, NULL); - st = s; if ((s != STMF_ABORT_SUCCESS) && (s != STMF_NOT_FOUND)) { (void) snprintf(info, sizeof (info), @@ -5038,18 +5409,24 @@ stmf_task_lport_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof) /* * LPORT abort successfully */ - do { - old = new = itask->itask_flags; - if (!(old & ITASK_KNOWN_TO_TGT_PORT)) - return; - new &= ~ITASK_KNOWN_TO_TGT_PORT; - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + atomic_and_32(&itask->itask_flags, ~ITASK_KNOWN_TO_TGT_PORT); return; } stmf_abort_task_offline(task, 0, info); } +void +stmf_task_lport_aborted_unlocked(scsi_task_t *task, stmf_status_t s, + uint32_t iof) +{ + stmf_i_scsi_task_t *itask = TASK_TO_ITASK(task); + + mutex_enter(&itask->itask_mutex); + stmf_task_lport_aborted(task, s, iof); + mutex_exit(&itask->itask_mutex); +} + stmf_status_t stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout) { @@ -5058,15 +5435,18 @@ stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout) stmf_worker_t *w = itask->itask_worker; int i; + mutex_enter(&itask->itask_mutex); ASSERT(itask->itask_flags & ITASK_KNOWN_TO_LU); mutex_enter(&w->worker_lock); if (itask->itask_ncmds >= ITASK_MAX_NCMDS) { mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); return (STMF_BUSY); } for (i = 0; i < itask->itask_ncmds; i++) { if (itask->itask_cmd_stack[i] == ITASK_CMD_POLL_LU) { mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); return (STMF_SUCCESS); } } @@ -5080,21 +5460,10 @@ stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout) itask->itask_poll_timeout = ddi_get_lbolt() + t; } if ((itask->itask_flags & ITASK_IN_WORKER_QUEUE) == 0) { - itask->itask_worker_next = NULL; - if (w->worker_task_tail) { - w->worker_task_tail->itask_worker_next = itask; - } else { - w->worker_task_head = itask; - } - w->worker_task_tail = itask; - if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) { - w->worker_max_qdepth_pu = w->worker_queue_depth; - } - atomic_or_32(&itask->itask_flags, ITASK_IN_WORKER_QUEUE); - if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) - cv_signal(&w->worker_cv); + STMF_ENQUEUE_ITASK(w, itask); } mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); return (STMF_SUCCESS); } @@ -5106,15 +5475,18 @@ stmf_task_poll_lport(scsi_task_t *task, uint32_t timeout) stmf_worker_t *w = itask->itask_worker; int i; + mutex_enter(&itask->itask_mutex); ASSERT(itask->itask_flags & ITASK_KNOWN_TO_TGT_PORT); mutex_enter(&w->worker_lock); if (itask->itask_ncmds >= ITASK_MAX_NCMDS) { mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); return (STMF_BUSY); } for (i = 0; i < itask->itask_ncmds; i++) { if (itask->itask_cmd_stack[i] == ITASK_CMD_POLL_LPORT) { mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); return (STMF_SUCCESS); } } @@ -5128,20 +5500,10 @@ stmf_task_poll_lport(scsi_task_t *task, uint32_t timeout) itask->itask_poll_timeout = ddi_get_lbolt() + t; } if ((itask->itask_flags & ITASK_IN_WORKER_QUEUE) == 0) { - itask->itask_worker_next = NULL; - if (w->worker_task_tail) { - w->worker_task_tail->itask_worker_next = itask; - } else { - w->worker_task_head = itask; - } - w->worker_task_tail = itask; - if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) { - w->worker_max_qdepth_pu = w->worker_queue_depth; - } - if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) - cv_signal(&w->worker_cv); + STMF_ENQUEUE_ITASK(w, itask); } mutex_exit(&w->worker_lock); + mutex_exit(&itask->itask_mutex); return (STMF_SUCCESS); } @@ -5152,22 +5514,22 @@ stmf_do_task_abort(scsi_task_t *task) stmf_lu_t *lu; stmf_local_port_t *lport; unsigned long long ret; - uint32_t old, new; + uint32_t new = 0; uint8_t call_lu_abort, call_port_abort; char info[STMF_CHANGE_INFO_LEN]; lu = task->task_lu; lport = task->task_lport; - do { - old = new = itask->itask_flags; - if ((old & (ITASK_KNOWN_TO_LU | ITASK_LU_ABORT_CALLED)) == - ITASK_KNOWN_TO_LU) { - new |= ITASK_LU_ABORT_CALLED; - call_lu_abort = 1; - } else { - call_lu_abort = 0; - } - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + mutex_enter(&itask->itask_mutex); + new = itask->itask_flags; + if ((itask->itask_flags & (ITASK_KNOWN_TO_LU | + ITASK_LU_ABORT_CALLED)) == ITASK_KNOWN_TO_LU) { + new |= ITASK_LU_ABORT_CALLED; + call_lu_abort = 1; + } else { + call_lu_abort = 0; + } + itask->itask_flags = new; if (call_lu_abort) { if ((itask->itask_flags & ITASK_DEFAULT_HANDLING) == 0) { @@ -5195,16 +5557,22 @@ stmf_do_task_abort(scsi_task_t *task) } } - do { - old = new = itask->itask_flags; - if ((old & (ITASK_KNOWN_TO_TGT_PORT | - ITASK_TGT_PORT_ABORT_CALLED)) == ITASK_KNOWN_TO_TGT_PORT) { - new |= ITASK_TGT_PORT_ABORT_CALLED; - call_port_abort = 1; - } else { - call_port_abort = 0; - } - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + /* + * NOTE: After the call to either stmf_abort_task_offline() or + * stmf_task_lu_abort() the itask_mutex was dropped and reacquired + * to avoid a deadlock situation with stmf_state.stmf_lock. + */ + + new = itask->itask_flags; + if ((itask->itask_flags & (ITASK_KNOWN_TO_TGT_PORT | + ITASK_TGT_PORT_ABORT_CALLED)) == ITASK_KNOWN_TO_TGT_PORT) { + new |= ITASK_TGT_PORT_ABORT_CALLED; + call_port_abort = 1; + } else { + call_port_abort = 0; + } + itask->itask_flags = new; + if (call_port_abort) { ret = lport->lport_abort(lport, STMF_LPORT_ABORT_TASK, task, 0); if ((ret == STMF_ABORT_SUCCESS) || (ret == STMF_NOT_FOUND)) { @@ -5228,6 +5596,7 @@ stmf_do_task_abort(scsi_task_t *task) stmf_abort_task_offline(itask->itask_task, 0, info); } } + mutex_exit(&itask->itask_mutex); } stmf_status_t @@ -5559,7 +5928,7 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua) mutex_enter(&stmf_state.stmf_lock); /* check if any ports are standby and create second group */ - for (ilport = stmf_state.stmf_ilportlist; ilport; + for (ilport = stmf_state.stmf_ilportlist; ilport != NULL; ilport = ilport->ilport_next) { if (ilport->ilport_standby == 1) { nports_standby++; @@ -5568,14 +5937,38 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua) } } - /* The spec only allows for 255 ports to be reported per group */ + /* + * Section 6.25 REPORT TARGET PORT GROUPS + * The reply can contain many group replies. Each group is limited + * to 255 port identifiers so we'll need to limit the amount of + * data returned. For FC ports there's a physical limitation in + * machines that make reaching 255 ports very, very unlikely. For + * iSCSI on the other hand recent changes mean the port count could + * be as high as 4096 (current limit). Limiting the data returned + * for iSCSI isn't as bad as it sounds. This information is only + * important for ALUA, which isn't supported for iSCSI. iSCSI uses + * virtual IP addresses to deal with node fail over in a cluster. + */ nports = min(nports, 255); nports_standby = min(nports_standby, 255); + + /* + * The first 4 bytes of the returned data is the length. The + * size of the Target Port Group header is 8 bytes. So, that's where + * the 12 comes from. Each port entry is 4 bytes in size. + */ sz = (nports * 4) + 12; - if (nports_standby && ilu_alua) { + if (nports_standby != 0 && ilu_alua != 0) { + /* --- Only add 8 bytes since it's just the Group header ---- */ sz += (nports_standby * 4) + 8; } - asz = sz + sizeof (*xd) - 4; + + /* + * The stmf_xfer_data structure contains 4 bytes that will be + * part of the data buffer. So, subtract the 4 bytes from the space + * needed. + */ + asz = sizeof (*xd) + sz - 4; xd = (stmf_xfer_data_t *)kmem_zalloc(asz, KM_NOSLEEP); if (xd == NULL) { mutex_exit(&stmf_state.stmf_lock); @@ -5586,8 +5979,11 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua) p = xd->buf; + /* ---- length values never include the field that holds the size --- */ *((uint32_t *)p) = BE_32(sz - 4); p += 4; + + /* ---- Now fill out the first Target Group header ---- */ p[0] = 0x80; /* PREF */ p[1] = 5; /* AO_SUP, S_SUP */ if (stmf_state.stmf_alua_node == 1) { @@ -5597,15 +5993,16 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua) } p[7] = nports & 0xff; p += 8; - for (ilport = stmf_state.stmf_ilportlist; ilport; + for (ilport = stmf_state.stmf_ilportlist; ilport != NULL && nports != 0; ilport = ilport->ilport_next) { if (ilport->ilport_standby == 1) { continue; } ((uint16_t *)p)[1] = BE_16(ilport->ilport_rtpid); p += 4; + nports--; } - if (nports_standby && ilu_alua) { + if (nports_standby != 0 && ilu_alua != 0) { p[0] = 0x02; /* Non PREF, Standby */ p[1] = 5; /* AO_SUP, S_SUP */ if (stmf_state.stmf_alua_node == 1) { @@ -5615,13 +6012,14 @@ stmf_prepare_tpgs_data(uint8_t ilu_alua) } p[7] = nports_standby & 0xff; p += 8; - for (ilport = stmf_state.stmf_ilportlist; ilport; - ilport = ilport->ilport_next) { + for (ilport = stmf_state.stmf_ilportlist; ilport != NULL && + nports_standby != 0; ilport = ilport->ilport_next) { if (ilport->ilport_standby == 0) { continue; } ((uint16_t *)p)[1] = BE_16(ilport->ilport_rtpid); p += 4; + nports_standby--; } } @@ -5862,7 +6260,9 @@ stmf_scsilib_handle_report_tpgs(scsi_task_t *task, stmf_data_buf_t *dbuf) stmf_xfer_data_t *xd; uint32_t sz, minsz; + mutex_enter(&itask->itask_mutex); itask->itask_flags |= ITASK_DEFAULT_HANDLING; + task->task_cmd_xfer_length = ((((uint32_t)task->task_cdb[6]) << 24) | (((uint32_t)task->task_cdb[7]) << 16) | @@ -5874,6 +6274,7 @@ stmf_scsilib_handle_report_tpgs(scsi_task_t *task, stmf_data_buf_t *dbuf) task->task_expected_xfer_length = task->task_cmd_xfer_length; } + mutex_exit(&itask->itask_mutex); if (task->task_cmd_xfer_length == 0) { stmf_scsilib_send_status(task, STATUS_GOOD, 0); @@ -5979,7 +6380,9 @@ stmf_handle_lun_reset(scsi_task_t *task) * was responsible for setting the ILU_RESET_ACTIVE. In case this * task itself gets aborted, we will clear ILU_RESET_ACTIVE. */ + mutex_enter(&itask->itask_mutex); itask->itask_flags |= ITASK_DEFAULT_HANDLING | ITASK_CAUSING_LU_RESET; + mutex_exit(&itask->itask_mutex); /* Initiatiate abort on all commands on this LU except this one */ stmf_abort(STMF_QUEUE_ABORT_LU, task, STMF_ABORTED, task->task_lu); @@ -6055,8 +6458,10 @@ stmf_handle_target_reset(scsi_task_t *task) } /* ok, start the damage */ + mutex_enter(&itask->itask_mutex); itask->itask_flags |= ITASK_DEFAULT_HANDLING | ITASK_CAUSING_TARGET_RESET; + mutex_exit(&itask->itask_mutex); for (i = 0; i < lm->lm_nentries; i++) { if (lm->lm_plus[i] == NULL) continue; @@ -6114,34 +6519,46 @@ void stmf_worker_init() { uint32_t i; + stmf_worker_t *w; /* Make local copy of global tunables */ - stmf_i_max_nworkers = stmf_max_nworkers; - stmf_i_min_nworkers = stmf_min_nworkers; + /* + * Allow workers to be scaled down to a very low number for cases + * where the load is light. If the number of threads gets below + * 4 assume it is a mistake and force the threads back to a + * reasonable number. The low limit of 4 is simply legacy and + * may be too low. + */ ASSERT(stmf_workers == NULL); - if (stmf_i_min_nworkers < 4) { - stmf_i_min_nworkers = 4; - } - if (stmf_i_max_nworkers < stmf_i_min_nworkers) { - stmf_i_max_nworkers = stmf_i_min_nworkers; + if (stmf_nworkers < 4) { + stmf_nworkers = 64; } + stmf_workers = (stmf_worker_t *)kmem_zalloc( - sizeof (stmf_worker_t) * stmf_i_max_nworkers, KM_SLEEP); - for (i = 0; i < stmf_i_max_nworkers; i++) { + sizeof (stmf_worker_t) * stmf_nworkers, KM_SLEEP); + for (i = 0; i < stmf_nworkers; i++) { stmf_worker_t *w = &stmf_workers[i]; mutex_init(&w->worker_lock, NULL, MUTEX_DRIVER, NULL); cv_init(&w->worker_cv, NULL, CV_DRIVER, NULL); } - stmf_worker_mgmt_delay = drv_usectohz(20 * 1000); stmf_workers_state = STMF_WORKERS_ENABLED; - /* Workers will be started by stmf_worker_mgmt() */ + /* Check if we are starting */ + if (stmf_nworkers_cur < stmf_nworkers - 1) { + for (i = stmf_nworkers_cur; i < stmf_nworkers; i++) { + w = &stmf_workers[i]; + w->worker_tid = thread_create(NULL, 0, stmf_worker_task, + (void *)&stmf_workers[i], 0, &p0, TS_RUN, + minclsyspri); + stmf_nworkers_accepting_cmds++; + } + return; + } /* Lets wait for atleast one worker to start */ while (stmf_nworkers_cur == 0) delay(drv_usectohz(20 * 1000)); - stmf_worker_mgmt_delay = drv_usectohz(3 * 1000 * 1000); } stmf_status_t @@ -6154,7 +6571,6 @@ stmf_worker_fini() return (STMF_SUCCESS); ASSERT(stmf_workers); stmf_workers_state = STMF_WORKERS_DISABLED; - stmf_worker_mgmt_delay = drv_usectohz(20 * 1000); cv_signal(&stmf_state.stmf_cv); sb = ddi_get_lbolt() + drv_usectohz(10 * 1000 * 1000); @@ -6166,12 +6582,12 @@ stmf_worker_fini() } delay(drv_usectohz(100 * 1000)); } - for (i = 0; i < stmf_i_max_nworkers; i++) { + for (i = 0; i < stmf_nworkers; i++) { stmf_worker_t *w = &stmf_workers[i]; mutex_destroy(&w->worker_lock); cv_destroy(&w->worker_cv); } - kmem_free(stmf_workers, sizeof (stmf_worker_t) * stmf_i_max_nworkers); + kmem_free(stmf_workers, sizeof (stmf_worker_t) * stmf_nworkers); stmf_workers = NULL; return (STMF_SUCCESS); @@ -6188,7 +6604,6 @@ stmf_worker_task(void *arg) stmf_lu_t *lu; clock_t wait_timer = 0; clock_t wait_ticks, wait_delta = 0; - uint32_t old, new; uint8_t curcmd; uint8_t abort_free; uint8_t wait_queue; @@ -6200,7 +6615,9 @@ stmf_worker_task(void *arg) DTRACE_PROBE1(worker__create, stmf_worker_t, w); mutex_enter(&w->worker_lock); w->worker_flags |= STMF_WORKER_STARTED | STMF_WORKER_ACTIVE; -stmf_worker_loop:; + atomic_inc_32(&stmf_nworkers_cur); + +stmf_worker_loop: if ((w->worker_ref_count == 0) && (w->worker_flags & STMF_WORKER_TERMINATE)) { w->worker_flags &= ~(STMF_WORKER_STARTED | @@ -6208,10 +6625,13 @@ stmf_worker_loop:; w->worker_tid = NULL; mutex_exit(&w->worker_lock); DTRACE_PROBE1(worker__destroy, stmf_worker_t, w); + atomic_dec_32(&stmf_nworkers_cur); thread_exit(); } + /* CONSTCOND */ while (1) { + /* worker lock is held at this point */ dec_qdepth = 0; if (wait_timer && (ddi_get_lbolt() >= wait_timer)) { wait_timer = 0; @@ -6229,42 +6649,41 @@ stmf_worker_loop:; NULL; } } - if ((itask = w->worker_task_head) == NULL) { + + STMF_DEQUEUE_ITASK(w, itask); + if (itask == NULL) break; - } + + ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0); task = itask->itask_task; DTRACE_PROBE2(worker__active, stmf_worker_t, w, scsi_task_t *, task); - w->worker_task_head = itask->itask_worker_next; - if (w->worker_task_head == NULL) - w->worker_task_tail = NULL; - wait_queue = 0; abort_free = 0; + mutex_exit(&w->worker_lock); + mutex_enter(&itask->itask_mutex); + mutex_enter(&w->worker_lock); + if (itask->itask_ncmds > 0) { curcmd = itask->itask_cmd_stack[itask->itask_ncmds - 1]; } else { ASSERT(itask->itask_flags & ITASK_BEING_ABORTED); } - do { - old = itask->itask_flags; - if (old & ITASK_BEING_ABORTED) { - itask->itask_ncmds = 1; - curcmd = itask->itask_cmd_stack[0] = - ITASK_CMD_ABORT; - goto out_itask_flag_loop; - } else if ((curcmd & ITASK_CMD_MASK) == - ITASK_CMD_NEW_TASK) { - /* - * set ITASK_KSTAT_IN_RUNQ, this flag - * will not reset until task completed - */ - new = old | ITASK_KNOWN_TO_LU | - ITASK_KSTAT_IN_RUNQ; - } else { - goto out_itask_flag_loop; - } - } while (atomic_cas_32(&itask->itask_flags, old, new) != old); + if (itask->itask_flags & ITASK_BEING_ABORTED) { + itask->itask_ncmds = 1; + curcmd = itask->itask_cmd_stack[0] = + ITASK_CMD_ABORT; + goto out_itask_flag_loop; + } else if ((curcmd & ITASK_CMD_MASK) == ITASK_CMD_NEW_TASK) { + /* + * set ITASK_KSTAT_IN_RUNQ, this flag + * will not reset until task completed + */ + itask->itask_flags |= ITASK_KNOWN_TO_LU | + ITASK_KSTAT_IN_RUNQ; + } else { + goto out_itask_flag_loop; + } out_itask_flag_loop: @@ -6323,24 +6742,29 @@ out_itask_flag_loop: lu = task->task_lu; else lu = dlun0; + dbuf = itask->itask_dbufs[ITASK_CMD_BUF_NDX(curcmd)]; mutex_exit(&w->worker_lock); curcmd &= ITASK_CMD_MASK; stmf_task_audit(itask, TE_PROCESS_CMD, curcmd, dbuf); + mutex_exit(&itask->itask_mutex); + switch (curcmd) { case ITASK_CMD_NEW_TASK: iss = (stmf_i_scsi_session_t *) task->task_session->ss_stmf_private; stmf_itl_lu_new_task(itask); if (iss->iss_flags & ISS_LUN_INVENTORY_CHANGED) { - if (stmf_handle_cmd_during_ic(itask)) + if (stmf_handle_cmd_during_ic(itask)) { break; + } } #ifdef DEBUG if (stmf_drop_task_counter > 0) { - if (atomic_dec_32_nv(&stmf_drop_task_counter) - == 1) + if (atomic_dec_32_nv( + (uint32_t *)&stmf_drop_task_counter) == 1) { break; + } } #endif DTRACE_PROBE1(scsi__task__start, scsi_task_t *, task); @@ -6354,6 +6778,7 @@ out_itask_flag_loop: break; case ITASK_CMD_ABORT: if (abort_free) { + mutex_enter(&itask->itask_mutex); stmf_task_free(task); } else { stmf_do_task_abort(task); @@ -6372,6 +6797,7 @@ out_itask_flag_loop: /* case ITASK_CMD_XFER_DATA: */ break; } + mutex_enter(&w->worker_lock); if (dec_qdepth) { w->worker_queue_depth--; @@ -6399,146 +6825,6 @@ out_itask_flag_loop: goto stmf_worker_loop; } -void -stmf_worker_mgmt() -{ - int i; - int workers_needed; - uint32_t qd; - clock_t tps, d = 0; - uint32_t cur_max_ntasks = 0; - stmf_worker_t *w; - - /* Check if we are trying to increase the # of threads */ - for (i = stmf_nworkers_cur; i < stmf_nworkers_needed; i++) { - if (stmf_workers[i].worker_flags & STMF_WORKER_STARTED) { - stmf_nworkers_cur++; - stmf_nworkers_accepting_cmds++; - } else { - /* Wait for transition to complete */ - return; - } - } - /* Check if we are trying to decrease the # of workers */ - for (i = (stmf_nworkers_cur - 1); i >= stmf_nworkers_needed; i--) { - if ((stmf_workers[i].worker_flags & STMF_WORKER_STARTED) == 0) { - stmf_nworkers_cur--; - /* - * stmf_nworkers_accepting_cmds has already been - * updated by the request to reduce the # of workers. - */ - } else { - /* Wait for transition to complete */ - return; - } - } - /* Check if we are being asked to quit */ - if (stmf_workers_state != STMF_WORKERS_ENABLED) { - if (stmf_nworkers_cur) { - workers_needed = 0; - goto worker_mgmt_trigger_change; - } - return; - } - /* Check if we are starting */ - if (stmf_nworkers_cur < stmf_i_min_nworkers) { - workers_needed = stmf_i_min_nworkers; - goto worker_mgmt_trigger_change; - } - - tps = drv_usectohz(1 * 1000 * 1000); - if ((stmf_wm_last != 0) && - ((d = ddi_get_lbolt() - stmf_wm_last) > tps)) { - qd = 0; - for (i = 0; i < stmf_nworkers_accepting_cmds; i++) { - qd += stmf_workers[i].worker_max_qdepth_pu; - stmf_workers[i].worker_max_qdepth_pu = 0; - if (stmf_workers[i].worker_max_sys_qdepth_pu > - cur_max_ntasks) { - cur_max_ntasks = - stmf_workers[i].worker_max_sys_qdepth_pu; - } - stmf_workers[i].worker_max_sys_qdepth_pu = 0; - } - } - stmf_wm_last = ddi_get_lbolt(); - if (d <= tps) { - /* still ramping up */ - return; - } - /* max qdepth cannot be more than max tasks */ - if (qd > cur_max_ntasks) - qd = cur_max_ntasks; - - /* See if we have more workers */ - if (qd < stmf_nworkers_accepting_cmds) { - /* - * Since we dont reduce the worker count right away, monitor - * the highest load during the scale_down_delay. - */ - if (qd > stmf_worker_scale_down_qd) - stmf_worker_scale_down_qd = qd; - if (stmf_worker_scale_down_timer == 0) { - stmf_worker_scale_down_timer = ddi_get_lbolt() + - drv_usectohz(stmf_worker_scale_down_delay * - 1000 * 1000); - return; - } - if (ddi_get_lbolt() < stmf_worker_scale_down_timer) { - return; - } - /* Its time to reduce the workers */ - if (stmf_worker_scale_down_qd < stmf_i_min_nworkers) - stmf_worker_scale_down_qd = stmf_i_min_nworkers; - if (stmf_worker_scale_down_qd > stmf_i_max_nworkers) - stmf_worker_scale_down_qd = stmf_i_max_nworkers; - if (stmf_worker_scale_down_qd == stmf_nworkers_cur) - return; - workers_needed = stmf_worker_scale_down_qd; - stmf_worker_scale_down_qd = 0; - goto worker_mgmt_trigger_change; - } - stmf_worker_scale_down_qd = 0; - stmf_worker_scale_down_timer = 0; - if (qd > stmf_i_max_nworkers) - qd = stmf_i_max_nworkers; - if (qd < stmf_i_min_nworkers) - qd = stmf_i_min_nworkers; - if (qd == stmf_nworkers_cur) - return; - workers_needed = qd; - goto worker_mgmt_trigger_change; - - /* NOTREACHED */ - return; - -worker_mgmt_trigger_change: - ASSERT(workers_needed != stmf_nworkers_cur); - if (workers_needed > stmf_nworkers_cur) { - stmf_nworkers_needed = workers_needed; - for (i = stmf_nworkers_cur; i < workers_needed; i++) { - w = &stmf_workers[i]; - w->worker_tid = thread_create(NULL, 0, stmf_worker_task, - (void *)&stmf_workers[i], 0, &p0, TS_RUN, - minclsyspri); - } - return; - } - /* At this point we know that we are decreasing the # of workers */ - stmf_nworkers_accepting_cmds = workers_needed; - stmf_nworkers_needed = workers_needed; - /* Signal the workers that its time to quit */ - for (i = (stmf_nworkers_cur - 1); i >= stmf_nworkers_needed; i--) { - w = &stmf_workers[i]; - ASSERT(w && (w->worker_flags & STMF_WORKER_STARTED)); - mutex_enter(&w->worker_lock); - w->worker_flags |= STMF_WORKER_TERMINATE; - if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) - cv_signal(&w->worker_cv); - mutex_exit(&w->worker_lock); - } -} - /* * Fills out a dbuf from stmf_xfer_data_t (contained in the db_lu_private). * If all the data has been filled out, frees the xd and makes @@ -6867,6 +7153,12 @@ stmf_dlun0_ctl(struct stmf_lu *lu, int cmd, void *arg) cmn_err(CE_WARN, "stmf_dlun0_ctl called with cmd %x", cmd); } +/* ARGSUSED */ +void +stmf_dlun0_task_done(struct scsi_task *task) +{ +} + void stmf_dlun_init() { @@ -6881,6 +7173,7 @@ stmf_dlun_init() dlun0->lu_abort = stmf_dlun0_abort; dlun0->lu_task_poll = stmf_dlun0_task_poll; dlun0->lu_ctl = stmf_dlun0_ctl; + dlun0->lu_task_done = stmf_dlun0_task_done; ilu = (stmf_i_lu_t *)dlun0->lu_stmf_private; ilu->ilu_cur_task_cntr = &ilu->ilu_task_cntr1; @@ -7144,17 +7437,28 @@ stmf_itl_task_start(stmf_i_scsi_task_t *itask) stmf_itl_data_t *itl = itask->itask_itl_datap; scsi_task_t *task = itask->itask_task; stmf_i_lu_t *ilu; + stmf_i_scsi_session_t *iss = + itask->itask_task->task_session->ss_stmf_private; + stmf_i_remote_port_t *irport = iss->iss_irport; if (itl == NULL || task->task_lu == dlun0) return; ilu = (stmf_i_lu_t *)task->task_lu->lu_stmf_private; itask->itask_start_timestamp = gethrtime(); + itask->itask_xfer_done_timestamp = 0; if (ilu->ilu_kstat_io != NULL) { mutex_enter(ilu->ilu_kstat_io->ks_lock); stmf_update_kstat_lu_q(itask->itask_task, kstat_waitq_enter); mutex_exit(ilu->ilu_kstat_io->ks_lock); } + if (irport->irport_kstat_estat != NULL) { + if (task->task_flags & TF_READ_DATA) + atomic_inc_32(&irport->irport_nread_tasks); + else if (task->task_flags & TF_WRITE_DATA) + atomic_inc_32(&irport->irport_nwrite_tasks); + } + stmf_update_kstat_lport_q(itask->itask_task, kstat_waitq_enter); } @@ -7193,6 +7497,8 @@ stmf_itl_task_done(stmf_i_scsi_task_t *itask) if (ilu->ilu_kstat_io == NULL) return; + stmf_update_kstat_rport_estat(task); + mutex_enter(ilu->ilu_kstat_io->ks_lock); if (itask->itask_flags & ITASK_KSTAT_IN_RUNQ) { @@ -7206,6 +7512,23 @@ stmf_itl_task_done(stmf_i_scsi_task_t *itask) } } +void +stmf_lu_xfer_done(scsi_task_t *task, boolean_t read, hrtime_t elapsed_time) +{ + stmf_i_scsi_task_t *itask = task->task_stmf_private; + + if (task->task_lu == dlun0) + return; + + if (read) { + atomic_add_64((uint64_t *)&itask->itask_lu_read_time, + elapsed_time); + } else { + atomic_add_64((uint64_t *)&itask->itask_lu_write_time, + elapsed_time); + } +} + static void stmf_lport_xfer_start(stmf_i_scsi_task_t *itask, stmf_data_buf_t *dbuf) { @@ -7233,7 +7556,9 @@ stmf_lport_xfer_done(stmf_i_scsi_task_t *itask, stmf_data_buf_t *dbuf) xfer_size = (dbuf->db_xfer_status == STMF_SUCCESS) ? dbuf->db_data_size : 0; - elapsed_time = gethrtime() - dbuf->db_xfer_start_timestamp; + itask->itask_xfer_done_timestamp = gethrtime(); + elapsed_time = itask->itask_xfer_done_timestamp - + dbuf->db_xfer_start_timestamp; if (dbuf->db_flags & DB_DIRECTION_TO_RPORT) { atomic_add_64((uint64_t *)&itask->itask_lport_read_time, elapsed_time); @@ -7257,7 +7582,8 @@ stmf_svc_init() { if (stmf_state.stmf_svc_flags & STMF_SVC_STARTED) return; - stmf_state.stmf_svc_tailp = &stmf_state.stmf_svc_active; + list_create(&stmf_state.stmf_svc_list, sizeof (stmf_svc_req_t), + offsetof(stmf_svc_req_t, svc_list_entry)); stmf_state.stmf_svc_taskq = ddi_taskq_create(0, "STMF_SVC_TASKQ", 1, TASKQ_DEFAULTPRI, 0); (void) ddi_taskq_dispatch(stmf_state.stmf_svc_taskq, @@ -7286,6 +7612,7 @@ stmf_svc_fini() if (i == 500) return (STMF_BUSY); + list_destroy(&stmf_state.stmf_svc_list); ddi_taskq_destroy(stmf_state.stmf_svc_taskq); return (STMF_SUCCESS); @@ -7311,7 +7638,7 @@ stmf_svc(void *arg) stmf_state.stmf_svc_flags |= STMF_SVC_STARTED | STMF_SVC_ACTIVE; while (!(stmf_state.stmf_svc_flags & STMF_SVC_TERMINATE)) { - if (stmf_state.stmf_svc_active == NULL) { + if (list_is_empty(&stmf_state.stmf_svc_list)) { stmf_svc_timeout(&clks); continue; } @@ -7322,11 +7649,9 @@ stmf_svc(void *arg) * so it should be safe to access it without holding the * stmf state lock. */ - req = stmf_state.stmf_svc_active; - stmf_state.stmf_svc_active = req->svc_next; - - if (stmf_state.stmf_svc_active == NULL) - stmf_state.stmf_svc_tailp = &stmf_state.stmf_svc_active; + req = list_remove_head(&stmf_state.stmf_svc_list); + if (req == NULL) + continue; switch (req->svc_cmd) { case STMF_CMD_LPORT_ONLINE: @@ -7398,7 +7723,7 @@ stmf_svc_timeout(struct stmf_svc_clocks *clks) ddi_get_lbolt() + drv_usectohz(1*1000*1000); } - if (stmf_state.stmf_svc_active) + if (!list_is_empty(&stmf_state.stmf_svc_list)) return; } @@ -7423,17 +7748,10 @@ stmf_svc_timeout(struct stmf_svc_clocks *clks) ddi_get_lbolt() + drv_usectohz(1*1000*1000); } - if (stmf_state.stmf_svc_active) + if (!list_is_empty(&stmf_state.stmf_svc_list)) return; } - /* Check if we need to run worker_mgmt */ - if (ddi_get_lbolt() > clks->worker_delay) { - stmf_worker_mgmt(); - clks->worker_delay = ddi_get_lbolt() + - stmf_worker_mgmt_delay; - } - /* Check if any active session got its 1st LUN */ if (stmf_state.stmf_process_initial_luns) { int stmf_level = 0; @@ -7560,11 +7878,9 @@ stmf_svc_queue(int cmd, void *obj, stmf_state_change_info_t *info) info->st_additional_info); } req->svc_req_alloc_size = s; - req->svc_next = NULL; mutex_enter(&stmf_state.stmf_lock); - *stmf_state.stmf_svc_tailp = req; - stmf_state.stmf_svc_tailp = &req->svc_next; + list_insert_tail(&stmf_state.stmf_svc_list, req); if ((stmf_state.stmf_svc_flags & STMF_SVC_ACTIVE) == 0) { cv_signal(&stmf_state.stmf_cv); } @@ -7574,29 +7890,15 @@ stmf_svc_queue(int cmd, void *obj, stmf_state_change_info_t *info) static void stmf_svc_kill_obj_requests(void *obj) { - stmf_svc_req_t *prev_req = NULL; - stmf_svc_req_t *next_req; stmf_svc_req_t *req; ASSERT(mutex_owned(&stmf_state.stmf_lock)); - for (req = stmf_state.stmf_svc_active; req != NULL; req = next_req) { - next_req = req->svc_next; - + for (req = list_head(&stmf_state.stmf_svc_list); req != NULL; + req = list_next(&stmf_state.stmf_svc_list, req)) { if (req->svc_obj == obj) { - if (prev_req != NULL) - prev_req->svc_next = next_req; - else - stmf_state.stmf_svc_active = next_req; - - if (next_req == NULL) - stmf_state.stmf_svc_tailp = (prev_req != NULL) ? - &prev_req->svc_next : - &stmf_state.stmf_svc_active; - + list_remove(&stmf_state.stmf_svc_list, req); kmem_free(req, req->svc_req_alloc_size); - } else { - prev_req = req; } } } @@ -7642,6 +7944,14 @@ stmf_trace_clear() mutex_exit(&trace_buf_lock); } +/* + * NOTE: Due to lock order problems that are not possible to fix this + * method drops and reacquires the itask_mutex around the call to stmf_ctl. + * Another possible work around would be to use a dispatch queue and have + * the call to stmf_ctl run on another thread that's not holding the + * itask_mutex. The problem with that approach is that it's difficult to + * determine what impact an asynchronous change would have on the system state. + */ static void stmf_abort_task_offline(scsi_task_t *task, int offline_lu, char *info) { @@ -7649,10 +7959,14 @@ stmf_abort_task_offline(scsi_task_t *task, int offline_lu, char *info) void *ctl_private; uint32_t ctl_cmd; int msg = 0; + stmf_i_scsi_task_t *itask = + (stmf_i_scsi_task_t *)task->task_stmf_private; stmf_trace("FROM STMF", "abort_task_offline called for %s: %s", offline_lu ? "LU" : "LPORT", info ? info : "no additional info"); change_info.st_additional_info = info; + ASSERT(mutex_owned(&itask->itask_mutex)); + if (offline_lu) { change_info.st_rflags = STMF_RFLAG_RESET | STMF_RFLAG_LU_ABORT; @@ -7680,7 +7994,9 @@ stmf_abort_task_offline(scsi_task_t *task, int offline_lu, char *info) offline_lu ? "LU" : "LPORT", info ? info : "<no additional info>"); } + mutex_exit(&itask->itask_mutex); (void) stmf_ctl(ctl_cmd, ctl_private, &change_info); + mutex_enter(&itask->itask_mutex); } static char @@ -7737,7 +8053,7 @@ stmf_scsilib_tptid_validate(scsi_transport_id_t *tptid, uint32_t total_sz, return (B_FALSE); break; - case PROTOCOL_iSCSI: + case PROTOCOL_iSCSI: /* CSTYLED */ { iscsi_transport_id_t *iscsiid; uint16_t adn_len, name_len; @@ -7782,7 +8098,7 @@ stmf_scsilib_tptid_validate(scsi_transport_id_t *tptid, uint32_t total_sz, case PROTOCOL_SAS: case PROTOCOL_ADT: case PROTOCOL_ATAPI: - default: + default: /* CSTYLED */ { stmf_dflt_scsi_tptid_t *dflttpd; @@ -7810,7 +8126,7 @@ stmf_scsilib_tptid_compare(scsi_transport_id_t *tpd1, scsi_transport_id_t *tpd2) switch (tpd1->protocol_id) { - case PROTOCOL_iSCSI: + case PROTOCOL_iSCSI: /* CSTYLED */ { iscsi_transport_id_t *iscsitpd1, *iscsitpd2; uint16_t len; @@ -7825,7 +8141,7 @@ stmf_scsilib_tptid_compare(scsi_transport_id_t *tpd1, scsi_transport_id_t *tpd2) } break; - case PROTOCOL_SRP: + case PROTOCOL_SRP: /* CSTYLED */ { scsi_srp_transport_id_t *srptpd1, *srptpd2; @@ -7837,7 +8153,7 @@ stmf_scsilib_tptid_compare(scsi_transport_id_t *tpd1, scsi_transport_id_t *tpd2) } break; - case PROTOCOL_FIBRE_CHANNEL: + case PROTOCOL_FIBRE_CHANNEL: /* CSTYLED */ { scsi_fc_transport_id_t *fctpd1, *fctpd2; @@ -7855,7 +8171,7 @@ stmf_scsilib_tptid_compare(scsi_transport_id_t *tpd1, scsi_transport_id_t *tpd2) case PROTOCOL_SAS: case PROTOCOL_ADT: case PROTOCOL_ATAPI: - default: + default: /* CSTYLED */ { stmf_dflt_scsi_tptid_t *dflt1, *dflt2; uint16_t len; @@ -7988,3 +8304,74 @@ stmf_remote_port_free(stmf_remote_port_t *rpt) */ kmem_free(rpt, sizeof (stmf_remote_port_t) + rpt->rport_tptid_sz); } + +stmf_lu_t * +stmf_check_and_hold_lu(scsi_task_t *task, uint8_t *guid) +{ + stmf_i_scsi_session_t *iss; + stmf_lu_t *lu; + stmf_i_lu_t *ilu = NULL; + stmf_lun_map_t *sm; + stmf_lun_map_ent_t *lme; + int i; + + iss = (stmf_i_scsi_session_t *)task->task_session->ss_stmf_private; + rw_enter(iss->iss_lockp, RW_READER); + sm = iss->iss_sm; + + for (i = 0; i < sm->lm_nentries; i++) { + if (sm->lm_plus[i] == NULL) + continue; + lme = (stmf_lun_map_ent_t *)sm->lm_plus[i]; + lu = lme->ent_lu; + if (bcmp(lu->lu_id->ident, guid, 16) == 0) { + break; + } + lu = NULL; + } + + if (!lu) { + goto hold_lu_done; + } + + ilu = lu->lu_stmf_private; + mutex_enter(&ilu->ilu_task_lock); + ilu->ilu_additional_ref++; + mutex_exit(&ilu->ilu_task_lock); + +hold_lu_done: + rw_exit(iss->iss_lockp); + return (lu); +} + +void +stmf_release_lu(stmf_lu_t *lu) +{ + stmf_i_lu_t *ilu; + + ilu = lu->lu_stmf_private; + ASSERT(ilu->ilu_additional_ref != 0); + mutex_enter(&ilu->ilu_task_lock); + ilu->ilu_additional_ref--; + mutex_exit(&ilu->ilu_task_lock); +} + +int +stmf_is_task_being_aborted(scsi_task_t *task) +{ + stmf_i_scsi_task_t *itask; + + itask = (stmf_i_scsi_task_t *)task->task_stmf_private; + if (itask->itask_flags & ITASK_BEING_ABORTED) + return (1); + + return (0); +} + +volatile boolean_t stmf_pgr_aptpl_always = B_FALSE; + +boolean_t +stmf_is_pgr_aptpl_always() +{ + return (stmf_pgr_aptpl_always); +} diff --git a/usr/src/uts/common/io/comstar/stmf/stmf_impl.h b/usr/src/uts/common/io/comstar/stmf/stmf_impl.h index d477224863..b590726168 100644 --- a/usr/src/uts/common/io/comstar/stmf/stmf_impl.h +++ b/usr/src/uts/common/io/comstar/stmf/stmf_impl.h @@ -20,6 +20,8 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * + * Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ #ifndef _STMF_IMPL_H @@ -89,7 +91,7 @@ typedef struct stmf_i_lu { uint32_t ilu_ntasks; /* # of tasks in the ilu_task list */ uint32_t ilu_ntasks_free; /* # of tasks that are free */ uint32_t ilu_ntasks_min_free; /* # minimal free tasks */ - uint32_t rsvd1; + uint32_t ilu_additional_ref; uint32_t ilu_proxy_registered; uint64_t ilu_reg_msgid; struct stmf_i_scsi_task *ilu_tasks; @@ -164,6 +166,18 @@ typedef struct stmf_i_remote_port { int irport_refcnt; id_t irport_instance; avl_node_t irport_ln; + /* number of active read tasks */ + uint32_t irport_nread_tasks; + /* number of active write tasks */ + uint32_t irport_nwrite_tasks; + hrtime_t irport_rdstart_timestamp; + hrtime_t irport_rddone_timestamp; + hrtime_t irport_wrstart_timestamp; + hrtime_t irport_wrdone_timestamp; + kstat_t *irport_kstat_info; + kstat_t *irport_kstat_io; + kstat_t *irport_kstat_estat; /* extended stats */ + boolean_t irport_info_dirty; } stmf_i_remote_port_t; /* @@ -237,6 +251,7 @@ typedef struct stmf_i_scsi_task { scsi_task_t *itask_task; uint32_t itask_alloc_size; uint32_t itask_flags; + kmutex_t itask_mutex; /* protects flags and lists */ uint64_t itask_proxy_msg_id; stmf_data_buf_t *itask_proxy_dbuf; struct stmf_worker *itask_worker; @@ -245,7 +260,6 @@ typedef struct stmf_i_scsi_task { struct stmf_i_scsi_task *itask_lu_next; struct stmf_i_scsi_task *itask_lu_prev; struct stmf_i_scsi_task *itask_lu_free_next; - struct stmf_i_scsi_task *itask_abort_next; struct stmf_itl_data *itask_itl_datap; clock_t itask_start_time; /* abort and normal */ /* For now we only support 4 parallel buffers. Should be enough. */ @@ -259,6 +273,7 @@ typedef struct stmf_i_scsi_task { /* Task profile data */ hrtime_t itask_start_timestamp; hrtime_t itask_done_timestamp; + hrtime_t itask_xfer_done_timestamp; hrtime_t itask_waitq_enter_timestamp; hrtime_t itask_waitq_time; hrtime_t itask_lu_read_time; @@ -275,6 +290,41 @@ typedef struct stmf_i_scsi_task { #define ITASK_DEFAULT_ABORT_TIMEOUT 5 /* + * Common code to encode an itask onto the worker_task queue is placed + * in this macro to simplify future maintenace activity. + */ +#define STMF_ENQUEUE_ITASK(w, i) \ + ASSERT((itask->itask_flags & ITASK_IN_FREE_LIST) == 0); \ + ASSERT(mutex_owned(&itask->itask_mutex)); \ + ASSERT(mutex_owned(&w->worker_lock)); \ + i->itask_worker_next = NULL; \ + if (w->worker_task_tail) { \ + w->worker_task_tail->itask_worker_next = i; \ + } else { \ + w->worker_task_head = i; \ + } \ + w->worker_task_tail = i; \ + if (++(w->worker_queue_depth) > w->worker_max_qdepth_pu) { \ + w->worker_max_qdepth_pu = w->worker_queue_depth; \ + } \ + atomic_inc_32(&w->worker_ref_count); \ + atomic_or_32(&itask->itask_flags, ITASK_IN_WORKER_QUEUE); \ + i->itask_waitq_enter_timestamp = gethrtime(); \ + if ((w->worker_flags & STMF_WORKER_ACTIVE) == 0) \ + cv_signal(&w->worker_cv); + +#define STMF_DEQUEUE_ITASK(w, itask) \ + ASSERT(mutex_owned(&w->worker_lock)); \ + if ((itask = w->worker_task_head) != NULL) { \ + w->worker_task_head = itask->itask_worker_next; \ + if (w->worker_task_head == NULL) { \ + w->worker_task_tail = NULL; \ + } \ + } else { \ + w->worker_task_tail = NULL; \ + } + +/* * itask_flags */ #define ITASK_IN_FREE_LIST 0x0001 diff --git a/usr/src/uts/common/io/comstar/stmf/stmf_state.h b/usr/src/uts/common/io/comstar/stmf/stmf_state.h index 2d7a81f93d..221c732887 100644 --- a/usr/src/uts/common/io/comstar/stmf/stmf_state.h +++ b/usr/src/uts/common/io/comstar/stmf/stmf_state.h @@ -22,7 +22,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ /* - * Copyright 2011, Nexenta Systems, Inc. All rights reserved. + * Copyright 2016, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ #ifndef _STMF_STATE_H @@ -62,8 +62,7 @@ typedef struct stmf_state { uint32_t stmf_svc_flags; stmf_i_lu_t *stmf_svc_ilu_draining; stmf_i_lu_t *stmf_svc_ilu_timing; - struct stmf_svc_req *stmf_svc_active; - struct stmf_svc_req **stmf_svc_tailp; + list_t stmf_svc_list; stmf_id_list_t stmf_hg_list; stmf_id_list_t stmf_tg_list; @@ -86,7 +85,7 @@ typedef struct stmf_state { * different types of services) are added to the stmf_svc_thread. */ typedef struct stmf_svc_req { - struct stmf_svc_req *svc_next; + list_node_t svc_list_entry; int svc_req_alloc_size; int svc_cmd; void *svc_obj; diff --git a/usr/src/uts/common/io/comstar/stmf/stmf_stats.h b/usr/src/uts/common/io/comstar/stmf/stmf_stats.h index 2247c1ffd7..3b4184e96a 100644 --- a/usr/src/uts/common/io/comstar/stmf/stmf_stats.h +++ b/usr/src/uts/common/io/comstar/stmf/stmf_stats.h @@ -29,6 +29,8 @@ extern "C" { #endif +#include <sys/portif.h> + typedef struct stmf_kstat_itl_info { kstat_named_t i_rport_name; kstat_named_t i_rport_alias; @@ -58,6 +60,21 @@ typedef struct stmf_kstat_tgt_info { kstat_named_t i_protocol; } stmf_kstat_tgt_info_t; +#define STMF_RPORT_INFO_LIMIT 8 + +typedef struct stmf_kstat_rport_info { + kstat_named_t i_rport_name; + kstat_named_t i_protocol; + kstat_named_t i_rport_uinfo[STMF_RPORT_INFO_LIMIT]; +} stmf_kstat_rport_info_t; + +typedef struct stmf_kstat_rport_estat { + kstat_named_t i_rport_read_latency; + kstat_named_t i_rport_write_latency; + kstat_named_t i_nread_tasks; + kstat_named_t i_nwrite_tasks; +} stmf_kstat_rport_estat_t; + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/io/idm/idm.c b/usr/src/uts/common/io/idm/idm.c index 877a0e6f20..1361b8dfb9 100644 --- a/usr/src/uts/common/io/idm/idm.c +++ b/usr/src/uts/common/io/idm/idm.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. All rights reserved. */ #include <sys/cpuvar.h> @@ -61,7 +62,7 @@ static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf); static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf); static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf); static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf); -static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, +static stmf_status_t idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type); static void idm_task_aborted(idm_task_t *idt, idm_status_t status); static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, @@ -1523,11 +1524,12 @@ idm_task_rele(idm_task_t *idt) idm_refcnt_rele(&idt->idt_refcnt); } -void +stmf_status_t idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) { idm_task_t *task; int idx; + stmf_status_t s = STMF_SUCCESS; /* * Passing NULL as the task indicates that all tasks @@ -1549,7 +1551,7 @@ idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) (task->idt_state != TASK_COMPLETE) && (task->idt_ic == ic)) { rw_exit(&idm.idm_taskid_table_lock); - idm_task_abort_one(ic, task, abort_type); + s = idm_task_abort_one(ic, task, abort_type); rw_enter(&idm.idm_taskid_table_lock, RW_READER); } else mutex_exit(&task->idt_mutex); @@ -1557,8 +1559,9 @@ idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) rw_exit(&idm.idm_taskid_table_lock); } else { mutex_enter(&idt->idt_mutex); - idm_task_abort_one(ic, idt, abort_type); + s = idm_task_abort_one(ic, idt, abort_type); } + return (s); } static void @@ -1589,9 +1592,11 @@ idm_task_abort_unref_cb(void *ref) * Abort the idm task. * Caller must hold the task mutex, which will be released before return */ -static void +static stmf_status_t idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) { + stmf_status_t s = STMF_SUCCESS; + /* Caller must hold connection mutex */ ASSERT(mutex_owned(&idt->idt_mutex)); switch (idt->idt_state) { @@ -1610,7 +1615,7 @@ idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) */ idm_refcnt_async_wait_ref(&idt->idt_refcnt, &idm_task_abort_unref_cb); - return; + return (s); case AT_INTERNAL_ABORT: case AT_TASK_MGMT_ABORT: idt->idt_state = TASK_ABORTING; @@ -1624,7 +1629,7 @@ idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) */ idm_refcnt_async_wait_ref(&idt->idt_refcnt, &idm_task_abort_unref_cb); - return; + return (s); default: ASSERT(0); } @@ -1664,7 +1669,7 @@ idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) */ idm_refcnt_async_wait_ref(&idt->idt_refcnt, &idm_task_abort_unref_cb); - return; + return (s); default: ASSERT(0); } @@ -1683,17 +1688,15 @@ idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) } break; case TASK_COMPLETE: - /* - * In this case, let it go. The status has already been - * sent (which may or may not get successfully transmitted) - * and we don't want to end up in a race between completing - * the status PDU and marking the task suspended. - */ + idm_refcnt_wait_ref(&idt->idt_refcnt); + s = STMF_ABORT_SUCCESS; break; default: ASSERT(0); } mutex_exit(&idt->idt_mutex); + + return (s); } static void @@ -2267,6 +2270,29 @@ idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt, mutex_exit(&refcnt->ir_mutex); } +/* + * used to determine the status of the refcnt. + * + * if refcnt is 0 return is 0 + * if refcnt is negative return is -1 + * if refcnt > 0 and no waiters return is 1 + * if refcnt > 0 and waiters return is 2 + */ +int +idm_refcnt_is_held(idm_refcnt_t *refcnt) +{ + if (refcnt->ir_refcnt < 0) + return (-1); + + if (refcnt->ir_refcnt == 0) + return (0); + + if (refcnt->ir_waiting == REF_NOWAIT && refcnt->ir_refcnt > 0) + return (1); + + return (2); +} + void idm_conn_hold(idm_conn_t *ic) { diff --git a/usr/src/uts/common/io/idm/idm_conn_sm.c b/usr/src/uts/common/io/idm/idm_conn_sm.c index 52e515a983..205b7b43d0 100644 --- a/usr/src/uts/common/io/idm/idm_conn_sm.c +++ b/usr/src/uts/common/io/idm/idm_conn_sm.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. All rights reserved. */ #include <sys/cpuvar.h> @@ -1246,7 +1246,7 @@ idm_update_state(idm_conn_t *ic, idm_conn_state_t new_state, } /* Stop executing active tasks */ - idm_task_abort(ic, NULL, AT_INTERNAL_SUSPEND); + (void) idm_task_abort(ic, NULL, AT_INTERNAL_SUSPEND); /* Start logout timer */ IDM_SM_TIMER_CHECK(ic); @@ -1302,7 +1302,7 @@ idm_update_state(idm_conn_t *ic, idm_conn_state_t new_state, } /* Abort all tasks */ - idm_task_abort(ic, NULL, AT_INTERNAL_ABORT); + (void) idm_task_abort(ic, NULL, AT_INTERNAL_ABORT); /* * Handle terminal state actions on the global taskq so diff --git a/usr/src/uts/common/io/idm/idm_so.c b/usr/src/uts/common/io/idm/idm_so.c index 345483eeba..5c3bbd1cd9 100644 --- a/usr/src/uts/common/io/idm/idm_so.c +++ b/usr/src/uts/common/io/idm/idm_so.c @@ -24,6 +24,7 @@ */ /* * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017, Joyent, Inc. All rights reserved. */ @@ -859,6 +860,11 @@ idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu) /* * Check actual AHS length against the amount available in the buffer */ + if ((IDM_PDU_OPCODE(pdu) != ISCSI_OP_SCSI_CMD) && + (bhs->hlength != 0)) { + /* ---- hlength is only only valid for SCSI Request ---- */ + return (IDM_STATUS_FAIL); + } pdu->isp_hdrlen = sizeof (iscsi_hdr_t) + (bhs->hlength * sizeof (uint32_t)); pdu->isp_datalen = n2h24(bhs->dlength); @@ -868,7 +874,7 @@ idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu) "idm_sorecvhdr: invalid data segment length"); return (IDM_STATUS_FAIL); } - if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) { + if (bhs->hlength > IDM_SORX_WIRE_AHSLEN) { /* Allocate a new header segment and change the callback */ new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP); bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t)); diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c index 38795beab5..3d98c3b80d 100644 --- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c +++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.c @@ -21,6 +21,7 @@ /* * Copyright 2000 by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org> * * iSCSI Software Initiator @@ -848,6 +849,7 @@ iscsi_tran_init_pkt(struct scsi_address *ap, struct scsi_pkt *pkt, icmdp->cmd_sig = ISCSI_SIG_CMD; icmdp->cmd_state = ISCSI_CMD_STATE_FREE; icmdp->cmd_lun = ilp; + iscsi_lun_hold(ilp); icmdp->cmd_type = ISCSI_CMD_TYPE_SCSI; /* add the report lun addressing type on to the lun */ icmdp->cmd_un.scsi.lun = ilp->lun_addr_type << 14; @@ -1091,6 +1093,7 @@ iscsi_tran_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt) ASSERT(icmdp->cmd_sig == ISCSI_SIG_CMD); ASSERT(icmdp->cmd_state == ISCSI_CMD_STATE_FREE); + iscsi_lun_rele(icmdp->cmd_lun); mutex_destroy(&icmdp->cmd_mutex); cv_destroy(&icmdp->cmd_completion); scsi_hba_pkt_free(ap, pkt); diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h index 62b6487866..67e860b550 100644 --- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h +++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi.h @@ -22,7 +22,7 @@ /* * Copyright 2000 by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014-2015 Nexenta Systems, Inc. All rights reserved. */ #ifndef _ISCSI_H @@ -549,6 +549,8 @@ typedef struct iscsi_lun { uchar_t lun_pid[ISCSI_INQ_PID_BUF_LEN]; /* Product ID */ uchar_t lun_type; + kmutex_t lun_mutex; + int lun_refcnt; } iscsi_lun_t; #define ISCSI_LUN_STATE_CLEAR 0 /* used to clear all states */ @@ -1328,8 +1330,9 @@ void iscsi_conn_update_state_locked(iscsi_conn_t *icp, /* iscsi_lun.c */ iscsi_status_t iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num, uint8_t lun_addr_type, struct scsi_inquiry *inq, char *guid); -iscsi_status_t iscsi_lun_destroy(iscsi_hba_t *ihp, - iscsi_lun_t *ilp); +void iscsi_lun_hold(iscsi_lun_t *ilp); +void iscsi_lun_rele(iscsi_lun_t *ilp); +iscsi_status_t iscsi_lun_destroy(iscsi_hba_t *ihp, iscsi_lun_t *ilp); void iscsi_lun_online(iscsi_hba_t *ihp, iscsi_lun_t *ilp); iscsi_status_t iscsi_lun_offline(iscsi_hba_t *ihp, diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c index 3d9f59b9e2..287f2e35d2 100644 --- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c +++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_cmd.c @@ -22,6 +22,7 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * + * Copyright 2017 Nexenta Systems, Inc. All rights reserved. * iSCSI command interfaces */ @@ -796,8 +797,8 @@ iscsi_cmd_state_active(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg) */ ISCSI_CMD_SET_REASON_STAT( t_icmdp, CMD_TIMEOUT, STAT_ABORTED); - idm_task_abort(icp->conn_ic, t_icmdp->cmd_itp, - AT_TASK_MGMT_ABORT); + (void) idm_task_abort(icp->conn_ic, + t_icmdp->cmd_itp, AT_TASK_MGMT_ABORT); } else { cv_broadcast(&t_icmdp->cmd_completion); } @@ -942,7 +943,7 @@ iscsi_cmd_state_active(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg) */ ISCSI_CMD_SET_REASON_STAT(t_icmdp, CMD_TIMEOUT, STAT_TIMEOUT); - idm_task_abort(icp->conn_ic, + (void) idm_task_abort(icp->conn_ic, t_icmdp->cmd_itp, AT_TASK_MGMT_ABORT); } else { @@ -1027,7 +1028,7 @@ iscsi_cmd_state_active(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg) mutex_exit(&icp->conn_queue_idm_aborting.mutex); ISCSI_CMD_SET_REASON_STAT(icmdp, CMD_TRAN_ERR, icmdp->cmd_un.scsi.pkt_stat); - idm_task_abort(icp->conn_ic, icmdp->cmd_itp, + (void) idm_task_abort(icp->conn_ic, icmdp->cmd_itp, AT_TASK_MGMT_ABORT); break; @@ -1208,7 +1209,7 @@ iscsi_cmd_state_aborting(iscsi_cmd_t *icmdp, iscsi_cmd_event_t event, void *arg) ISCSI_CMD_SET_REASON_STAT(icmdp, CMD_TRAN_ERR, icmdp->cmd_un.scsi.pkt_stat); - idm_task_abort(icmdp->cmd_conn->conn_ic, icmdp->cmd_itp, + (void) idm_task_abort(icmdp->cmd_conn->conn_ic, icmdp->cmd_itp, AT_TASK_MGMT_ABORT); break; diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c index 365ca58528..354c343a7c 100644 --- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c +++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_io.c @@ -21,7 +21,7 @@ /* * Copyright 2000 by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2011, 2015 Nexenta Systems, Inc. All rights reserved. * * iSCSI Pseudo HBA Driver */ @@ -3511,11 +3511,14 @@ iscsi_timeout_checks(iscsi_sess_t *isp) icp = isp->sess_conn_list; while (icp != NULL) { - if (icp->conn_timeout == B_TRUE) { + mutex_enter(&icp->conn_state_mutex); + if ((icp->conn_timeout == B_TRUE) && + (icp->conn_state_idm_connected == B_TRUE)) { /* timeout on this connect detected */ idm_ini_conn_disconnect(icp->conn_ic); icp->conn_timeout = B_FALSE; } + mutex_exit(&icp->conn_state_mutex); icp = icp->conn_next; } rw_exit(&isp->sess_conn_list_rwlock); diff --git a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c index b5e8b17fb2..2e707d9fcc 100644 --- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c +++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_lun.c @@ -21,12 +21,17 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * + */ + +/* + * Copyright 2019 Nexenta Systems, Inc. + */ + +/* * iSCSI logical unit interfaces */ #include "iscsi.h" -#include <sys/fs/dv_node.h> /* devfs_clean */ #include <sys/bootprops.h> #include <sys/sysevent/eventdefs.h> #include <sys/sysevent/dev.h> @@ -123,6 +128,11 @@ iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num, uint8_t lun_addr_type, ilp->lun_addr = addr; ilp->lun_type = inq->inq_dtype & DTYPE_MASK; ilp->lun_oid = oid_tmp; + /* + * Setting refcnt to 1 is the first hold for the LUN structure. + */ + ilp->lun_refcnt = 1; + mutex_init(&ilp->lun_mutex, NULL, MUTEX_DRIVER, NULL); bcopy(inq->inq_vid, ilp->lun_vid, sizeof (inq->inq_vid)); bcopy(inq->inq_pid, ilp->lun_pid, sizeof (inq->inq_pid)); @@ -189,6 +199,7 @@ iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num, uint8_t lun_addr_type, kmem_free(ilp->lun_guid, ilp->lun_guid_size); ilp->lun_guid = NULL; } + mutex_destroy(&ilp->lun_mutex); kmem_free(ilp, sizeof (iscsi_lun_t)); } else { ilp->lun_state &= ISCSI_LUN_STATE_CLEAR; @@ -215,6 +226,81 @@ iscsi_lun_create(iscsi_sess_t *isp, uint16_t lun_num, uint8_t lun_addr_type, return (rtn); } +void +iscsi_lun_hold(iscsi_lun_t *ilp) +{ + mutex_enter(&ilp->lun_mutex); + /* + * By design lun_refcnt should never be zero when this routine + * is called. When the LUN is created the refcnt is set to 1. + * If iscsi_lun_rele is called and the refcnt goes to zero the + * structure will be freed so this method shouldn't be called + * afterwards. + */ + ASSERT(ilp->lun_refcnt > 0); + ilp->lun_refcnt++; + mutex_exit(&ilp->lun_mutex); +} + +void +iscsi_lun_rele(iscsi_lun_t *ilp) +{ + ASSERT(ilp != NULL); + + mutex_enter(&ilp->lun_mutex); + ASSERT(ilp->lun_refcnt > 0); + if (--ilp->lun_refcnt == 0) { + iscsi_sess_t *isp; + + isp = ilp->lun_sess; + ASSERT(isp != NULL); + + /* ---- release its memory ---- */ + kmem_free(ilp->lun_addr, (strlen((char *)isp->sess_name) + + ADDR_EXT_SIZE + 1)); + + if (ilp->lun_guid != NULL) { + kmem_free(ilp->lun_guid, ilp->lun_guid_size); + } + mutex_destroy(&ilp->lun_mutex); + kmem_free(ilp, sizeof (iscsi_lun_t)); + } else { + mutex_exit(&ilp->lun_mutex); + } +} + +/* + * iscsi_lun_cmd_cancel -- as the name implies, cancel all commands for the lun + * + * This code is similar to the timeout function with a lot less checking of + * state before sending the ABORT event for commands on the pending queue. + * + * This function is only used by iscsi_lun_destroy(). + */ +static void +iscsi_lun_cmd_cancel(iscsi_lun_t *ilp) +{ + iscsi_sess_t *isp; + iscsi_cmd_t *icmdp, *nicmdp; + + isp = ilp->lun_sess; + rw_enter(&isp->sess_state_rwlock, RW_READER); + mutex_enter(&isp->sess_queue_pending.mutex); + for (icmdp = isp->sess_queue_pending.head; + icmdp; icmdp = nicmdp) { + nicmdp = icmdp->cmd_next; + + /* + * For commands on the pending queue we can go straight + * to and abort request which will free the command + * and call back to the complete function. + */ + iscsi_cmd_state_machine(icmdp, ISCSI_CMD_EVENT_E4, isp); + } + mutex_exit(&isp->sess_queue_pending.mutex); + rw_exit(&isp->sess_state_rwlock); +} + /* * iscsi_lun_destroy - offline and remove lun * @@ -240,6 +326,9 @@ iscsi_lun_destroy(iscsi_hba_t *ihp, iscsi_lun_t *ilp) isp = ilp->lun_sess; ASSERT(isp != NULL); + /* flush all outstanding commands first */ + iscsi_lun_cmd_cancel(ilp); + /* attempt to offline and free solaris node */ status = iscsi_lun_offline(ihp, ilp, B_TRUE); @@ -269,16 +358,7 @@ iscsi_lun_destroy(iscsi_hba_t *ihp, iscsi_lun_t *ilp) } } - /* release its memory */ - kmem_free(ilp->lun_addr, (strlen((char *)isp->sess_name) + - ADDR_EXT_SIZE + 1)); - ilp->lun_addr = NULL; - if (ilp->lun_guid != NULL) { - kmem_free(ilp->lun_guid, ilp->lun_guid_size); - ilp->lun_guid = NULL; - } - kmem_free(ilp, sizeof (iscsi_lun_t)); - ilp = NULL; + iscsi_lun_rele(ilp); } return (status); @@ -641,56 +721,18 @@ iscsi_lun_offline(iscsi_hba_t *ihp, iscsi_lun_t *ilp, boolean_t lun_free) { iscsi_status_t status = ISCSI_STATUS_SUCCESS; int circ = 0; - dev_info_t *cdip, *pdip; - char *devname = NULL; + dev_info_t *cdip; char *pathname = NULL; - int rval; boolean_t offline = B_FALSE; nvlist_t *attr_list = NULL; ASSERT(ilp != NULL); ASSERT((ilp->lun_pip != NULL) || (ilp->lun_dip != NULL)); - /* - * Since we carry the logical units parent - * lock across the offline call it will not - * issue devfs_clean() and may fail with a - * devi_ref count > 0. - */ - if (ilp->lun_pip == NULL) { + if (ilp->lun_pip == NULL) cdip = ilp->lun_dip; - } else { + else cdip = mdi_pi_get_client(ilp->lun_pip); - } - - if ((cdip != NULL) && - (lun_free == B_TRUE) && - (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) { - /* - * Make sure node is attached otherwise - * it won't have related cache nodes to - * clean up. i_ddi_devi_attached is - * similiar to i_ddi_node_state(cdip) >= - * DS_ATTACHED. We should clean up only - * when lun_free is set. - */ - if (i_ddi_devi_attached(cdip)) { - - /* Get parent dip */ - pdip = ddi_get_parent(cdip); - - /* Get full devname */ - devname = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); - ndi_devi_enter(pdip, &circ); - (void) ddi_deviname(cdip, devname); - /* Release lock before devfs_clean() */ - ndi_devi_exit(pdip, circ); - - /* Clean cache */ - (void) devfs_clean(pdip, devname + 1, DV_CLEAN_FORCE); - kmem_free(devname, MAXNAMELEN + 1); - } - } if (cdip != NULL && ilp->lun_type == DTYPE_DIRECT) { pathname = kmem_zalloc(MAXNAMELEN + 1, KM_SLEEP); @@ -699,18 +741,9 @@ iscsi_lun_offline(iscsi_hba_t *ihp, iscsi_lun_t *ilp, boolean_t lun_free) /* Attempt to offline the logical units */ if (ilp->lun_pip != NULL) { - /* virt/mdi */ ndi_devi_enter(scsi_vhci_dip, &circ); - if ((lun_free == B_TRUE) && - (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) { - rval = mdi_pi_offline(ilp->lun_pip, - NDI_DEVI_REMOVE); - } else { - rval = mdi_pi_offline(ilp->lun_pip, 0); - } - - if (rval == MDI_SUCCESS) { + if (mdi_pi_offline(ilp->lun_pip, 0) == MDI_SUCCESS) { ilp->lun_state &= ISCSI_LUN_STATE_CLEAR; ilp->lun_state |= ISCSI_LUN_STATE_OFFLINE; if (lun_free == B_TRUE) { @@ -728,18 +761,14 @@ iscsi_lun_offline(iscsi_hba_t *ihp, iscsi_lun_t *ilp, boolean_t lun_free) ndi_devi_exit(scsi_vhci_dip, circ); } else { - /* phys/ndi */ + int flags = NDI_DEVFS_CLEAN; + ndi_devi_enter(ihp->hba_dip, &circ); - if ((lun_free == B_TRUE) && - (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) { - rval = ndi_devi_offline( - ilp->lun_dip, NDI_DEVI_REMOVE); - } else { - rval = ndi_devi_offline( - ilp->lun_dip, 0); - } - if (rval != NDI_SUCCESS) { + if (lun_free == B_TRUE && + (ilp->lun_state & ISCSI_LUN_STATE_ONLINE)) + flags |= NDI_DEVI_REMOVE; + if (ndi_devi_offline(ilp->lun_dip, flags) != NDI_SUCCESS) { status = ISCSI_STATUS_BUSY; if (lun_free == B_FALSE) { ilp->lun_state |= ISCSI_LUN_STATE_INVALID; diff --git a/usr/src/uts/common/netsmb/smb_dev.h b/usr/src/uts/common/netsmb/smb_dev.h index 817d214b3e..ceb22371b7 100644 --- a/usr/src/uts/common/netsmb/smb_dev.h +++ b/usr/src/uts/common/netsmb/smb_dev.h @@ -126,7 +126,7 @@ #define SMB2_DIALECT_0302 0x0302 /* Maximum supported dialect (for ssn_maxver) */ -#define SMB2_DIALECT_MAX SMB2_DIALECT_0210 +#define SMB2_DIALECT_MAX SMB2_DIALECT_0302 /* * Option flags in smbioc_oshare.ioc_opt diff --git a/usr/src/uts/common/sys/idm/idm.h b/usr/src/uts/common/sys/idm/idm.h index 0be365e42e..875a3fce4a 100644 --- a/usr/src/uts/common/sys/idm/idm.h +++ b/usr/src/uts/common/sys/idm/idm.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,6 +32,8 @@ extern "C" { #endif +#include <sys/stmf_defines.h> + typedef enum { IDM_STATUS_SUCCESS = 0, IDM_STATUS_FAIL, @@ -45,31 +48,29 @@ typedef enum { IDM_STATUS_LOGIN_FAIL } idm_status_t; +#define IDM_CLIENT_NOTIFY_LIST() \ + item(CN_UNDEFINED) \ + item(CN_CONNECT_ACCEPT) /* Target only */ \ + item(CN_LOGIN_FAIL) \ + item(CN_READY_FOR_LOGIN) /* Initiator only */ \ + item(CN_FFP_ENABLED) \ + item(CN_FFP_DISABLED) \ + item(CN_CONNECT_LOST) \ + item(CN_CONNECT_DESTROY) \ + item(CN_CONNECT_FAIL) \ + item(CN_MAX) typedef enum { - CN_CONNECT_ACCEPT = 1, /* Target only */ - CN_LOGIN_FAIL, - CN_READY_FOR_LOGIN, /* Initiator only */ - CN_FFP_ENABLED, - CN_FFP_DISABLED, - CN_CONNECT_LOST, - CN_CONNECT_DESTROY, - CN_CONNECT_FAIL, - CN_MAX +#define item(a) a, + IDM_CLIENT_NOTIFY_LIST() +#undef item } idm_client_notify_t; #ifdef IDM_CN_NOTIFY_STRINGS static const char *idm_cn_strings[CN_MAX + 1] = { - "CN_UNDEFINED", - "CN_CONNECT_ACCEPT", - "CN_LOGIN_FAIL", - "CN_READY_FOR_LOGIN", - "CN_FFP_ENABLED", - "CN_FFP_DISABLED", - "CN_CONNECT_LOST", - "CN_CONNECT_DESTROY", - "CN_CONNECT_FAIL", - "CN_MAX" +#define item(a) #a, + IDM_CLIENT_NOTIFY_LIST() +#undef item }; #endif @@ -85,27 +86,27 @@ typedef enum { AT_TASK_MGMT_ABORT } idm_abort_type_t; +#define IDM_TASK_STATE_LIST() \ + item(TASK_IDLE) \ + item(TASK_ACTIVE) \ + item(TASK_SUSPENDING) \ + item(TASK_SUSPENDED) \ + item(TASK_ABORTING) \ + item(TASK_ABORTED) \ + item(TASK_COMPLETE) \ + item(TASK_MAX_STATE) + typedef enum { - TASK_IDLE, - TASK_ACTIVE, - TASK_SUSPENDING, - TASK_SUSPENDED, - TASK_ABORTING, - TASK_ABORTED, - TASK_COMPLETE, - TASK_MAX_STATE +#define item(a) a, + IDM_TASK_STATE_LIST() +#undef item } idm_task_state_t; #ifdef IDM_TASK_SM_STRINGS static const char *idm_ts_name[TASK_MAX_STATE+1] = { - "TASK_IDLE", - "TASK_ACTIVE", - "TASK_SUSPENDING", - "TASK_SUSPENDED", - "TASK_ABORTING", - "TASK_ABORTED", - "TASK_COMPLETE", - "TASK_MAX_STATE" +#define item(a) #a, + IDM_TASK_STATE_LIST() +#undef item }; #endif @@ -422,12 +423,12 @@ idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type); extern boolean_t idm_pattern_checking; -#define IDM_BUFPAT_SET(CHK_BUF) \ +#define IDM_BUFPAT_SET(CHK_BUF) \ if (idm_pattern_checking && (CHK_BUF)->idb_bufalloc) { \ idm_bufpat_set(CHK_BUF); \ } -#define IDM_BUFPAT_CHECK(CHK_BUF, CHK_LEN, CHK_TYPE) \ +#define IDM_BUFPAT_CHECK(CHK_BUF, CHK_LEN, CHK_TYPE) \ if (idm_pattern_checking) { \ (void) idm_bufpat_check(CHK_BUF, CHK_LEN, CHK_TYPE); \ } @@ -441,7 +442,7 @@ idm_task_alloc(idm_conn_t *ic); void idm_task_start(idm_task_t *idt, uintptr_t handle); -void +stmf_status_t idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type); void @@ -524,6 +525,8 @@ idm_refcnt_wait_ref(idm_refcnt_t *refcnt); void idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func); +int +idm_refcnt_is_held(idm_refcnt_t *refcnt); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/idm/idm_conn_sm.h b/usr/src/uts/common/sys/idm/idm_conn_sm.h index 38a7526368..8b9056fb56 100644 --- a/usr/src/uts/common/sys/idm/idm_conn_sm.h +++ b/usr/src/uts/common/sys/idm/idm_conn_sm.h @@ -65,158 +65,106 @@ extern "C" { #define IDM_LOGOUT_SECONDS 20 #define IDM_CLEANUP_SECONDS 0 +#define IDM_CONN_EVENT_LIST() \ + item(CE_UNDEFINED) \ + /* Initiator events */ \ + item(CE_CONNECT_REQ) \ + item(CE_CONNECT_FAIL) \ + item(CE_CONNECT_SUCCESS) \ + item(CE_LOGIN_SND) \ + item(CE_LOGIN_SUCCESS_RCV) \ + item(CE_LOGIN_FAIL_RCV) \ + item(CE_LOGOUT_THIS_CONN_SND) \ + item(CE_LOGOUT_OTHER_CONN_SND) \ + item(CE_LOGOUT_SESSION_SND) \ + item(CE_LOGOUT_SUCCESS_RCV) \ + item(CE_LOGOUT_FAIL_RCV) \ + item(CE_ASYNC_LOGOUT_RCV) \ + item(CE_ASYNC_DROP_CONN_RCV) \ + item(CE_ASYNC_DROP_ALL_CONN_RCV) \ + /* Target events */ \ + item(CE_CONNECT_ACCEPT) \ + item(CE_CONNECT_REJECT) \ + item(CE_LOGIN_RCV) \ + item(CE_LOGIN_TIMEOUT) \ + item(CE_LOGIN_SUCCESS_SND) \ + item(CE_LOGIN_FAIL_SND) \ + item(CE_LOGIN_FAIL_SND_DONE) \ + item(CE_LOGOUT_THIS_CONN_RCV) \ + item(CE_LOGOUT_OTHER_CONN_RCV) \ + item(CE_LOGOUT_SESSION_RCV) \ + item(CE_LOGOUT_SUCCESS_SND) \ + item(CE_LOGOUT_SUCCESS_SND_DONE) \ + item(CE_LOGOUT_FAIL_SND) \ + item(CE_LOGOUT_FAIL_SND_DONE) \ + item(CE_CLEANUP_TIMEOUT) \ + item(CE_ASYNC_LOGOUT_SND) \ + item(CE_ASYNC_DROP_CONN_SND) \ + item(CE_ASYNC_DROP_ALL_CONN_SND) \ + item(CE_LOGOUT_TIMEOUT) \ + /* Common events */ \ + item(CE_TRANSPORT_FAIL) \ + item(CE_MISC_TX) \ + item(CE_TX_PROTOCOL_ERROR) \ + item(CE_MISC_RX) \ + item(CE_RX_PROTOCOL_ERROR) \ + item(CE_LOGOUT_SESSION_SUCCESS) \ + item(CE_CONN_REINSTATE) \ + item(CE_CONN_REINSTATE_SUCCESS) \ + item(CE_CONN_REINSTATE_FAIL) \ + item(CE_ENABLE_DM_SUCCESS) \ + item(CE_ENABLE_DM_FAIL) \ + /* Add new events above CE_MAX_EVENT */ \ + item(CE_MAX_EVENT) + /* Update idm_ce_name table whenever connection events are modified */ typedef enum { - CE_UNDEFINED = 0, - - /* Initiator events */ - CE_CONNECT_REQ, - CE_CONNECT_FAIL, - CE_CONNECT_SUCCESS, - CE_LOGIN_SND, - CE_LOGIN_SUCCESS_RCV, - CE_LOGIN_FAIL_RCV, - CE_LOGOUT_THIS_CONN_SND, - CE_LOGOUT_OTHER_CONN_SND, - CE_LOGOUT_SESSION_SND, - CE_LOGOUT_SUCCESS_RCV, - CE_LOGOUT_FAIL_RCV, - CE_ASYNC_LOGOUT_RCV, - CE_ASYNC_DROP_CONN_RCV, - CE_ASYNC_DROP_ALL_CONN_RCV, - - /* Target events */ - CE_CONNECT_ACCEPT, - CE_CONNECT_REJECT, - CE_LOGIN_RCV, - CE_LOGIN_TIMEOUT, - CE_LOGIN_SUCCESS_SND, - CE_LOGIN_FAIL_SND, - CE_LOGIN_FAIL_SND_DONE, - CE_LOGOUT_THIS_CONN_RCV, - CE_LOGOUT_OTHER_CONN_RCV, - CE_LOGOUT_SESSION_RCV, - CE_LOGOUT_SUCCESS_SND, - CE_LOGOUT_SUCCESS_SND_DONE, - CE_LOGOUT_FAIL_SND, - CE_LOGOUT_FAIL_SND_DONE, - CE_CLEANUP_TIMEOUT, - CE_ASYNC_LOGOUT_SND, - CE_ASYNC_DROP_CONN_SND, - CE_ASYNC_DROP_ALL_CONN_SND, - CE_LOGOUT_TIMEOUT, - - /* Common events */ - CE_TRANSPORT_FAIL, - CE_MISC_TX, - CE_TX_PROTOCOL_ERROR, - CE_MISC_RX, - CE_RX_PROTOCOL_ERROR, - CE_LOGOUT_SESSION_SUCCESS, - CE_CONN_REINSTATE, - CE_CONN_REINSTATE_SUCCESS, - CE_CONN_REINSTATE_FAIL, - CE_ENABLE_DM_SUCCESS, - CE_ENABLE_DM_FAIL, - - /* Add new events above CE_MAX_EVENT */ - CE_MAX_EVENT +#define item(a) a, + IDM_CONN_EVENT_LIST() +#undef item } idm_conn_event_t; #ifdef IDM_CONN_SM_STRINGS /* An array of event text values, for use in logging events */ static const char *idm_ce_name[CE_MAX_EVENT+1] = { - "CE_UNDEFINED", - "CE_CONNECT_REQ", - "CE_CONNECT_FAIL", - "CE_CONNECT_SUCCESS", - "CE_LOGIN_SND", - "CE_LOGIN_SUCCESS_RCV", - "CE_LOGIN_FAIL_RCV", - "CE_LOGOUT_THIS_CONN_SND", - "CE_LOGOUT_OTHER_CONN_SND", - "CE_LOGOUT_SESSION_SND", - "CE_LOGOUT_SUCCESS_RCV", - "CE_LOGOUT_FAIL_RCV", - "CE_ASYNC_LOGOUT_RCV", - "CE_ASYNC_DROP_CONN_RCV", - "CE_ASYNC_DROP_ALL_CONN_RCV", - "CE_CONNECT_ACCEPT", - "CE_CONNECT_REJECT", - "CE_LOGIN_RCV", - "CE_LOGIN_TIMEOUT", - "CE_LOGIN_SUCCESS_SND", - "CE_LOGIN_FAIL_SND", - "CE_LOGIN_FAIL_SND_DONE", - "CE_LOGOUT_THIS_CONN_RCV", - "CE_LOGOUT_OTHER_CONN_RCV", - "CE_LOGOUT_SESSION_RCV", - "CE_LOGOUT_SUCCESS_SND", - "CE_LOGOUT_SUCCESS_SND_DONE", - "CE_LOGOUT_FAIL_SND", - "CE_LOGOUT_FAIL_SND_DONE", - "CE_CLEANUP_TIMEOUT", - "CE_ASYNC_LOGOUT_SND", - "CE_ASYNC_DROP_CONN_SND", - "CE_ASYNC_DROP_ALL_CONN_SND", - "CE_LOGOUT_TIMEOUT", - "CE_TRANSPORT_FAIL", - "CE_MISC_TX", - "CE_TX_PROTOCOL_ERROR", - "CE_MISC_RX", - "CE_RX_PROTOCOL_ERROR", - "CE_LOGOUT_SESSION_SUCCESS", - "CE_CONN_REINSTATE", - "CE_CONN_REINSTATE_SUCCESS", - "CE_CONN_REINSTATE_FAIL", - "CE_ENABLE_DM_SUCCESS", - "CE_ENABLE_DM_FAIL", - "CE_MAX_EVENT" +#define item(a) #a, + IDM_CONN_EVENT_LIST() +#undef item }; #endif +#define CONN_STATE_LIST() \ + item(CS_S0_UNDEFINED) \ + item(CS_S1_FREE) \ + item(CS_S2_XPT_WAIT) \ + item(CS_S3_XPT_UP) \ + item(CS_S4_IN_LOGIN) \ + item(CS_S5_LOGGED_IN) \ + item(CS_S6_IN_LOGOUT) \ + item(CS_S7_LOGOUT_REQ) \ + item(CS_S8_CLEANUP) \ + item(CS_S9_INIT_ERROR) \ + item(CS_S10_IN_CLEANUP) \ + item(CS_S11_COMPLETE) \ + item(CS_S12_ENABLE_DM) \ + item(CS_S9A_REJECTED) \ + item(CS_S9B_WAIT_SND_DONE) \ + /* Add new connection states above CS_MAX_STATE */ \ + item(CS_MAX_STATE) + /* Update idm_cs_name table whenever connection states are modified */ typedef enum { - CS_S0_UNDEFINED = 0, - - CS_S1_FREE, - CS_S2_XPT_WAIT, - CS_S3_XPT_UP, - CS_S4_IN_LOGIN, - CS_S5_LOGGED_IN, - CS_S6_IN_LOGOUT, - CS_S7_LOGOUT_REQ, - CS_S8_CLEANUP, - CS_S9_INIT_ERROR, - CS_S10_IN_CLEANUP, - CS_S11_COMPLETE, - CS_S12_ENABLE_DM, - CS_S9A_REJECTED, - CS_S9B_WAIT_SND_DONE, - - /* Add new connection states above CS_MAX_STATE */ - CS_MAX_STATE +#define item(a) a, + CONN_STATE_LIST() +#undef item } idm_conn_state_t; #ifdef IDM_CONN_SM_STRINGS /* An array of state text values, for use in logging state transitions */ static const char *idm_cs_name[CS_MAX_STATE+1] = { - "CS_S0_UNDEFINED", - "CS_S1_FREE", - "CS_S2_XPT_WAIT", - "CS_S3_XPT_UP", - "CS_S4_IN_LOGIN", - "CS_S5_LOGGED_IN", - "CS_S6_IN_LOGOUT", - "CS_S7_LOGOUT_REQ", - "CS_S8_CLEANUP", - "CS_S9_INIT_ERROR", - "CS_S10_IN_CLEANUP", - "CS_S11_COMPLETE", - "CS_S12_ENABLE_DM", - "CS_S9A_REJECTED", - "CS_S9B_WAIT_SND_DONE", - "CS_MAX_STATE" +#define item(a) #a, + CONN_STATE_LIST() +#undef item }; #endif diff --git a/usr/src/uts/common/sys/idm/idm_impl.h b/usr/src/uts/common/sys/idm/idm_impl.h index 87d3674ad1..346611719d 100644 --- a/usr/src/uts/common/sys/idm/idm_impl.h +++ b/usr/src/uts/common/sys/idm/idm_impl.h @@ -22,7 +22,7 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014-2015 Nexenta Systems, Inc. All rights reserved. */ #ifndef _IDM_IMPL_H_ @@ -155,7 +155,7 @@ typedef struct idm_conn_s { void *ic_handle; idm_refcnt_t ic_refcnt; idm_svc_t *ic_svc_binding; /* Target conn. only */ - idm_sockaddr_t ic_ini_dst_addr; + idm_sockaddr_t ic_ini_dst_addr; struct sockaddr_storage ic_laddr; /* conn local address */ struct sockaddr_storage ic_raddr; /* conn remote address */ @@ -321,7 +321,7 @@ typedef enum { BP_CHECK_ASSERT } idm_bufpat_check_type_t; -#define BUFPAT_MATCH(bc_bufpat, bc_idb) \ +#define BUFPAT_MATCH(bc_bufpat, bc_idb) \ ((bufpat->bufpat_idb == bc_idb) && \ (bufpat->bufpat_bufmagic == IDM_BUF_MAGIC)) @@ -409,9 +409,19 @@ typedef struct { #define OSD_EXT_CDB_AHSLEN (200 - 15) #define BIDI_AHS_LENGTH 5 +/* + * Additional Header Segment (AHS) + * AHS is only valid for SCSI Requests and contains SCSI CDB information + * which doesn't fit in the standard 16 byte area of the PDU. Commonly + * this only holds true for OSD device commands. + * + * IDM_SORX_CACHE_ASHLEN is the amount of memory which is preallocated in bytes. + * When used in the header the AHS length is stored as the number of 4-byte + * words; so IDM_SORX_WIRE_ASHLEN is IDM_SORX_CACHE_ASHLEN in words. + */ #define IDM_SORX_CACHE_AHSLEN \ - (((OSD_EXT_CDB_AHSLEN + 3) + \ - (BIDI_AHS_LENGTH + 3)) / sizeof (uint32_t)) + ((OSD_EXT_CDB_AHSLEN + 3) + (BIDI_AHS_LENGTH + 3)) +#define IDM_SORX_WIRE_AHSLEN (IDM_SORX_CACHE_AHSLEN / sizeof (uint32_t)) #define IDM_SORX_CACHE_HDRLEN (sizeof (iscsi_hdr_t) + IDM_SORX_CACHE_AHSLEN) /* diff --git a/usr/src/uts/common/sys/lpif.h b/usr/src/uts/common/sys/lpif.h index 7b04a22f7b..168916f02e 100644 --- a/usr/src/uts/common/sys/lpif.h +++ b/usr/src/uts/common/sys/lpif.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. */ #ifndef _LPIF_H #define _LPIF_H @@ -76,6 +77,7 @@ typedef struct stmf_lu { uint32_t lu_proxy_reg_arg_len; void (*lu_dbuf_free)(struct scsi_task *task, struct stmf_data_buf *dbuf); + void (*lu_task_done)(struct scsi_task *task); } stmf_lu_t; /* @@ -84,6 +86,7 @@ typedef struct stmf_lu { #define STMF_LU_ABORT_TASK 1 #define STMF_LU_RESET_STATE 2 #define STMF_LU_ITL_HANDLE_REMOVED 3 +#define STMF_LU_SET_ABORT 4 /* * Asymmetric access state @@ -132,6 +135,9 @@ stmf_status_t stmf_deregister_lu(stmf_lu_t *lup); stmf_status_t stmf_set_lu_access(stmf_lu_t *lup, uint8_t access_state); stmf_status_t stmf_proxy_scsi_cmd(scsi_task_t *, stmf_data_buf_t *dbuf); int stmf_is_standby_port(scsi_task_t *); +void stmf_lu_xfer_done(struct scsi_task *task, boolean_t read, + hrtime_t elapsed_time); +boolean_t stmf_is_pgr_aptpl_always(); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/portif.h b/usr/src/uts/common/sys/portif.h index 861a983315..792abd94b7 100644 --- a/usr/src/uts/common/sys/portif.h +++ b/usr/src/uts/common/sys/portif.h @@ -138,6 +138,10 @@ stmf_status_t stmf_register_local_port(stmf_local_port_t *lportp); stmf_status_t stmf_deregister_local_port(stmf_local_port_t *lport); stmf_status_t stmf_register_scsi_session(stmf_local_port_t *lport, stmf_scsi_session_t *ss); +stmf_status_t stmf_add_rport_info(stmf_scsi_session_t *ss, + const char *prop_name, const char *prop_value); +void stmf_remove_rport_info(stmf_scsi_session_t *ss, + const char *prop_name); void stmf_deregister_scsi_session(stmf_local_port_t *lport, stmf_scsi_session_t *ss); void stmf_set_port_standby(stmf_local_port_t *lport, uint16_t rtpid); diff --git a/usr/src/uts/common/sys/scsi/generic/commands.h b/usr/src/uts/common/sys/scsi/generic/commands.h index e15a80a800..ed45141bcd 100644 --- a/usr/src/uts/common/sys/scsi/generic/commands.h +++ b/usr/src/uts/common/sys/scsi/generic/commands.h @@ -329,6 +329,7 @@ extern "C" { */ #define SCMD_GROUP4 0x80 #define SCMD_EXTENDED_COPY 0x83 +#define SCMD_RECV_COPY_RESULTS 0x84 #define SCMD_VERIFY_G4 0x8f /* @@ -357,6 +358,7 @@ extern "C" { */ #define SCMD_WRITE_FILE_MARK_G4 0x80 #define SCMD_READ_REVERSE_G4 0x81 +#define SCMD_COMPARE_AND_WRITE 0x89 #define SCMD_READ_ATTRIBUTE 0x8c #define SCMD_WRITE_ATTRIBUTE 0x8d #define SCMD_SPACE_G4 0x91 diff --git a/usr/src/uts/common/sys/scsi/generic/mode.h b/usr/src/uts/common/sys/scsi/generic/mode.h index f1b8880cce..b2e1d925e3 100644 --- a/usr/src/uts/common/sys/scsi/generic/mode.h +++ b/usr/src/uts/common/sys/scsi/generic/mode.h @@ -191,6 +191,8 @@ struct mode_page { */ #define MODEPAGE_DISCO_RECO 0x02 +#define MODEPAGE_FORMAT 0x03 +#define MODEPAGE_GEOMETRY 0x04 #define MODEPAGE_CACHING 0x08 #define MODEPAGE_PDEVICE 0x09 #define MODEPAGE_CTRL_MODE 0x0A diff --git a/usr/src/uts/common/sys/scsi/scsi_names.h b/usr/src/uts/common/sys/scsi/scsi_names.h index 349020d58a..bd2150ae9c 100644 --- a/usr/src/uts/common/sys/scsi/scsi_names.h +++ b/usr/src/uts/common/sys/scsi/scsi_names.h @@ -35,13 +35,13 @@ extern "C" { #define SNS_NAA_32 32 #define SNS_WWN_16 16 -/* - * Maximum number of bytes needed to store SCSI Name Strings in UTF-8 format, - * assuming that (per RFC3629) one UTF-8 character can take up to 4 bytes. - */ -#define SNS_EUI_U8_LEN_MAX (SNS_EUI_16 * 4) -#define SNS_IQN_U8_LEN_MAX (SNS_IQN_223 * 4) -#define SNS_NAA_U8_LEN_MAX (SNS_NAA_32 * 4) +#define SNS_EUI_LEN_MAX sizeof (SNS_EUI) + SNS_EUI_16 +#define SNS_IQN_LEN_MAX SNS_IQN_223 +#define SNS_MAC_LEN_MAX sizeof (SNS_MAC) + SNS_MAC_12 +#define SNS_NAA_LEN_MAX sizeof (SNS_NAA) + SNS_NAA_32 +#define SNS_WWN_LEN_MAX sizeof (SNS_WWN) + SNS_WWN_16 + +#define SNS_LEN_MAX SNS_IQN_LEN_MAX #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/stmf.h b/usr/src/uts/common/sys/stmf.h index ad95cd0b15..76f378b226 100644 --- a/usr/src/uts/common/sys/stmf.h +++ b/usr/src/uts/common/sys/stmf.h @@ -18,10 +18,13 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ + #ifndef _STMF_H #define _STMF_H @@ -205,6 +208,11 @@ typedef struct scsi_task { #define TASK_AF_ACCEPT_LU_DBUF 0x08 /* + * Indicating a PPPT task + */ +#define TASK_AF_PPPT_TASK 0x10 + +/* * scsi_task_t extension identifiers */ #define STMF_TASK_EXT_NONE 0 @@ -379,7 +387,8 @@ void stmf_send_status_done(scsi_task_t *task, stmf_status_t s, uint32_t iof); void stmf_task_lu_done(scsi_task_t *task); void stmf_abort(int abort_cmd, scsi_task_t *task, stmf_status_t s, void *arg); void stmf_task_lu_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof); -void stmf_task_lport_aborted(scsi_task_t *task, stmf_status_t s, uint32_t iof); +void stmf_task_lport_aborted_unlocked(scsi_task_t *task, stmf_status_t s, + uint32_t iof); stmf_status_t stmf_task_poll_lu(scsi_task_t *task, uint32_t timeout); stmf_status_t stmf_task_poll_lport(scsi_task_t *task, uint32_t timeout); stmf_status_t stmf_ctl(int cmd, void *obj, void *arg); @@ -416,6 +425,9 @@ boolean_t stmf_scsilib_tptid_compare(struct scsi_transport_id *, struct scsi_transport_id *); struct stmf_remote_port *stmf_remote_port_alloc(uint16_t); void stmf_remote_port_free(struct stmf_remote_port *); +struct stmf_lu *stmf_check_and_hold_lu(struct scsi_task *task, uint8_t *guid); +void stmf_release_lu(struct stmf_lu *lu); +int stmf_is_task_being_aborted(struct scsi_task *task); #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/stmf_defines.h b/usr/src/uts/common/sys/stmf_defines.h index bb9e442677..0fbab9455f 100644 --- a/usr/src/uts/common/sys/stmf_defines.h +++ b/usr/src/uts/common/sys/stmf_defines.h @@ -123,7 +123,7 @@ typedef uint64_t stmf_status_t; #define STMF_SAA_INVALID_FIELD_IN_PARAM_LIST 0x052600 #define STMF_SAA_INVALID_RELEASE_OF_PR 0x052604 #define STMF_SAA_MEDIUM_REMOVAL_PREVENTED 0x055302 -#define STMF_SAA_INSUFFICIENT_REG_RESOURCES 0x055504 +#define STMF_SAA_INSUFFICIENT_REG_RESRCS 0x055504 #define STMF_SAA_POR 0x062900 #define STMF_SAA_MODE_PARAMETERS_CHANGED 0x062A01 #define STMF_SAA_ASYMMETRIC_ACCESS_CHANGED 0x062A06 |