diff options
author | hiremath <none@none> | 2006-12-14 05:33:04 -0800 |
---|---|---|
committer | hiremath <none@none> | 2006-12-14 05:33:04 -0800 |
commit | 934f0bcca2426560672b4f167f4446289ec3e7af (patch) | |
tree | fb79c962e6699a948900b9a23136b17d48980d57 | |
parent | 7d0d5588e73fc4ac036733d158e41d506289722a (diff) | |
download | illumos-joyent-934f0bcca2426560672b4f167f4446289ec3e7af.tar.gz |
6428685 RC Channel opened after ibt_recycle_rc succeed free_channel, without close_channel
6434915 ibt_close_rc_channel incorrectly blocks in interrupt context
6479638 Add redirect support to SAA
-rw-r--r-- | usr/src/uts/common/io/ib/ibtl/ibtl_qp.c | 19 | ||||
-rw-r--r-- | usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c | 47 | ||||
-rw-r--r-- | usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c | 70 | ||||
-rw-r--r-- | usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c | 216 | ||||
-rw-r--r-- | usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c | 424 | ||||
-rw-r--r-- | usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_utils.c | 40 | ||||
-rw-r--r-- | usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h | 25 | ||||
-rw-r--r-- | usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h | 15 |
8 files changed, 712 insertions, 144 deletions
diff --git a/usr/src/uts/common/io/ib/ibtl/ibtl_qp.c b/usr/src/uts/common/io/ib/ibtl/ibtl_qp.c index b215a4b31e..5e884252ad 100644 --- a/usr/src/uts/common/io/ib/ibtl/ibtl_qp.c +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_qp.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -186,7 +185,7 @@ ibt_alloc_qp(ibt_hca_hdl_t hca_hdl, ibt_qp_type_t type, hca_hdl->ha_qp_cnt++; mutex_exit(&hca_hdl->ha_mutex); - IBTF_DPRINTF_L2(ibtf_qp, "ibt_alloc_qp: SUCCESS: qp %p owned by '%s'", + IBTF_DPRINTF_L3(ibtf_qp, "ibt_alloc_qp: SUCCESS: qp %p owned by '%s'", chanp, hca_hdl->ha_clnt_devp->clnt_name); *ibt_qp_p = chanp; @@ -572,7 +571,7 @@ ibtl_cm_chan_is_closed(ibt_channel_hdl_t chan) /* decrement ha_qpn_cnt and check for close in progress */ ibtl_close_hca_check(ibtl_hca); } else - IBTF_DPRINTF_L1(ibtf_qp, "ibtl_cm_chan_is_closed: " + IBTF_DPRINTF_L2(ibtf_qp, "ibtl_cm_chan_is_closed: " "ibc_release_qpn failed: status = %d\n", status); ibtl_qp_flow_control_exit(); } @@ -593,7 +592,7 @@ ibtl_cm_chan_is_reused(ibt_channel_hdl_t chan) IBTL_RC_QP_CONNECTED)); /* channel is no longer in closed state, shall be re-used */ - chan->ch_transport.rc.rc_free_flags &= ~IBTL_RC_QP_CLOSED; + chan->ch_transport.rc.rc_free_flags = 0; mutex_exit(&ibtl_free_qp_mutex); @@ -650,7 +649,7 @@ ibt_free_qp(ibt_qp_hdl_t ibt_qp) mutex_enter(&ibtl_hca->ha_mutex); ibtl_hca->ha_qp_cnt--; mutex_exit(&ibtl_hca->ha_mutex); - IBTF_DPRINTF_L2(ibtf_qp, "ibt_free_qp(%p) - " + IBTF_DPRINTF_L3(ibtf_qp, "ibt_free_qp(%p) - " "SUCCESS", ibt_qp); } else IBTF_DPRINTF_L2(ibtf_qp, "ibt_free_qp: " @@ -673,7 +672,7 @@ ibt_free_qp(ibt_qp_hdl_t ibt_qp) mutex_enter(&ibtl_hca->ha_mutex); ibtl_hca->ha_qp_cnt--; mutex_exit(&ibtl_hca->ha_mutex); - IBTF_DPRINTF_L2(ibtf_qp, "ibt_free_qp(%p) - SUCCESS", ibt_qp); + IBTF_DPRINTF_L3(ibtf_qp, "ibt_free_qp(%p) - SUCCESS", ibt_qp); } else { IBTF_DPRINTF_L2(ibtf_qp, "ibt_free_qp: " "ibc_free_qp failed with error %d", status); @@ -848,7 +847,7 @@ ibt_migrate_path(ibt_channel_hdl_t rc_chan) ibt_cep_modify_flags_t cep_flags; int retries = 1; - IBTF_DPRINTF_L2(ibtf_qp, "ibt_migrate_path: channel %p", rc_chan); + IBTF_DPRINTF_L3(ibtf_qp, "ibt_migrate_path: channel %p", rc_chan); if (rc_chan->ch_qp.qp_type != IBT_RC_SRV) { IBTF_DPRINTF_L2(ibtf_qp, "ibt_migrate_path: " diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c index 450b8d8fcc..c7f4578d54 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c @@ -220,6 +220,13 @@ ibcm_flow_t ibcm_saa_flow; ibcm_flow_t ibcm_close_flow; ibcm_flow_t ibcm_lapr_flow; +/* NONBLOCKING close requests are queued */ +struct ibcm_close_s { + kmutex_t mutex; + ibcm_state_data_t *tail; + ibcm_state_data_t head; +} ibcm_close; + static ibt_clnt_modinfo_t ibcm_ibt_modinfo = { /* Client's modinfop */ IBTI_V2, IBT_CM, @@ -846,6 +853,8 @@ ibcm_hca_attach(ib_guid_t hcaguid) /* Store the static hca attribute data */ hcap->hca_caps = hca_attrs.hca_flags; + hcap->hca_vendor_id = hca_attrs.hca_vendor_id; + hcap->hca_device_id = hca_attrs.hca_device_id; hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay; hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp; hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp; @@ -1568,12 +1577,19 @@ ibcm_rc_flow_control_init(void) ibcm_open.tail = &ibcm_open.head; ibcm_open.head.open_link = NULL; mutex_exit(&ibcm_open.mutex); + + mutex_init(&ibcm_close.mutex, NULL, MUTEX_DEFAULT, NULL); + mutex_enter(&ibcm_close.mutex); + ibcm_close.tail = &ibcm_close.head; + ibcm_close.head.close_link = NULL; + mutex_exit(&ibcm_close.mutex); } static void ibcm_rc_flow_control_fini(void) { mutex_destroy(&ibcm_open.mutex); + mutex_destroy(&ibcm_close.mutex); } static ibcm_flow1_t * @@ -1703,6 +1719,37 @@ ibcm_flow_dec(hrtime_t time, char *mad_type) } void +ibcm_close_enqueue(ibcm_state_data_t *statep) +{ + mutex_enter(&ibcm_close.mutex); + statep->close_link = NULL; + ibcm_close.tail->close_link = statep; + ibcm_close.tail = statep; + mutex_exit(&ibcm_close.mutex); + ibcm_run_tlist_thread(); +} + +void +ibcm_check_for_async_close() +{ + ibcm_state_data_t *statep; + + mutex_enter(&ibcm_close.mutex); + + while (ibcm_close.head.close_link) { + statep = ibcm_close.head.close_link; + ibcm_close.head.close_link = statep->close_link; + statep->close_link = NULL; + if (ibcm_close.tail == statep) + ibcm_close.tail = &ibcm_close.head; + mutex_exit(&ibcm_close.mutex); + ibcm_close_start(statep); + mutex_enter(&ibcm_close.mutex); + } + mutex_exit(&ibcm_close.mutex); +} + +void ibcm_close_enter(void) { ibcm_flow_enter(&ibcm_close_flow); diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c index 21b4ee7b46..a68fa0a581 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -472,6 +471,14 @@ int ibcm_recv_tasks = 0; int ibcm_max_recv_tasks = 24; int ibcm_recv_timeouts = 0; +/* + * Tunable MAX MRA Service Timeout value in MicroSECONDS. + * 0 - Tunable parameter not used. + * + * Ex: 60000000 - Max MRA Service Delay is 60 Seconds. + */ +clock_t ibcm_mra_service_timeout_max = 0; + #ifdef DEBUG static void print_modify_qp(char *prefix, @@ -1024,6 +1031,22 @@ new_req: return; } + /* Allocate dreq_msg buf to be used during teardown. */ + if (ibcm_alloc_out_msg(cm_mad_addr->ibmf_hdl, + &statep->dreq_msg, MAD_METHOD_SEND) != IBT_SUCCESS) { + + IBCM_REF_CNT_DECR(statep); + statep->state = IBCM_STATE_DELETE; + mutex_exit(&statep->state_mutex); + IBTF_DPRINTF_L2(cmlog, "ibcm_process_req_msg: " + "statep 0x%p: Failed to allocate dreq_msg", statep); + + /* HCA res cnt decremented via ibcm_delete_state_data */ + ibcm_inc_hca_res_cnt(hcap); + ibcm_delete_state_data(statep); + return; + } + /* initialize some "statep" fields */ statep->mode = IBCM_PASSIVE_MODE; statep->hcap = hcap; @@ -1919,6 +1942,7 @@ ibcm_process_mra_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, ((statep->state == IBCM_STATE_ESTABLISHED) && (statep->ap_state == IBCM_AP_STATE_LAP_SENT))) { timeout_id_t timer_val = statep->timerid; + clock_t service_timeout; if (statep->state == IBCM_STATE_REQ_SENT) { mra_msg = IBT_CM_MRA_TYPE_REQ; @@ -1937,6 +1961,23 @@ ibcm_process_mra_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, (void) untimeout(timer_val); + service_timeout = + ibt_ib2usec(mra_msgp->mra_service_timeout_plus >> 3); + + /* + * If tunable MAX MRA Service Timeout parameter is set, then + * verify whether the requested timer value exceeds the MAX + * value and reset the timer value to the MAX value. + */ + if (ibcm_mra_service_timeout_max && + ibcm_mra_service_timeout_max < service_timeout) { + IBTF_DPRINTF_L2(cmlog, "ibcm_process_mra_msg: " + "Unexpected MRA Service Timeout value (%ld), Max " + "allowed is (%ld)", service_timeout, + ibcm_mra_service_timeout_max); + service_timeout = ibcm_mra_service_timeout_max; + } + /* * Invoke client handler to pass the MRA private data */ @@ -1953,8 +1994,7 @@ ibcm_process_mra_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, event.cm_event.mra.mra_msg_type = mra_msg; - event.cm_event.mra.mra_service_time = ibt_ib2usec( - mra_msgp->mra_service_timeout_plus >> 3); + event.cm_event.mra.mra_service_time = service_timeout; /* Client cannot return private data */ (void) statep->cm_handler(statep->state_cm_private, @@ -1979,8 +2019,7 @@ ibcm_process_mra_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, */ statep->timer_stored_state = statep->state; statep->timer_value = statep->pkt_life_time + - ibt_ib2usec(mra_msgp->mra_service_timeout_plus - >> 3); + service_timeout; statep->timerid = IBCM_TIMEOUT(statep, statep->timer_value); } @@ -2783,12 +2822,12 @@ ibcm_process_drep_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, } /* copy the private to close channel, if specified */ - if ((statep->close_priv_data != NULL) && - (statep->close_priv_data_len != NULL) && - (*statep->close_priv_data_len > 0)) { + if ((statep->close_ret_priv_data != NULL) && + (statep->close_ret_priv_data_len != NULL) && + (*statep->close_ret_priv_data_len > 0)) { bcopy(drep_msgp->drep_private_data, - statep->close_priv_data, - min(*statep->close_priv_data_len, + statep->close_ret_priv_data, + min(*statep->close_ret_priv_data_len, IBT_DREP_PRIV_DATA_SZ)); } @@ -4058,8 +4097,8 @@ ibcm_process_dreq_timeout(ibcm_state_data_t *statep) /* signal waiting CVs - blocking in ibt_close_channel() */ statep->close_done = B_TRUE; - if (statep->close_priv_data_len != NULL) - *statep->close_priv_data_len = 0; + if (statep->close_ret_priv_data_len != NULL) + *statep->close_ret_priv_data_len = 0; /* unblock any close channel with no callbacks option */ statep->close_nocb_state = IBCM_FAIL; @@ -4168,6 +4207,7 @@ ibcm_process_tlist() } mutex_exit(&ibcm_timeout_list_lock); ibcm_check_for_opens(); + ibcm_check_for_async_close(); mutex_enter(&ibcm_timeout_list_lock); /* First, handle pending RC statep's, followed by UD's */ diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c index 99dbe17f8a..e0995f6974 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c @@ -55,6 +55,9 @@ static void ibcm_process_async_join_mcg(void *tq_arg); static ibt_status_t ibcm_get_node_rec(ibmf_saa_handle_t, sa_node_record_t *, uint64_t c_mask, void *, size_t *); +static ibt_status_t ibcm_close_rc_channel(ibt_channel_hdl_t channel, + ibcm_state_data_t *statep, ibt_execution_mode_t mode); + /* Address Record management definitions */ #define IBCM_DAPL_ATS_NAME "DAPL Address Translation Service" #define IBCM_DAPL_ATS_SID 0x10000CE100415453ULL @@ -62,6 +65,16 @@ static ibt_status_t ibcm_get_node_rec(ibmf_saa_handle_t, sa_node_record_t *, ibcm_svc_info_t *ibcm_ar_svcinfop; ibcm_ar_t *ibcm_ar_list; +/* + * Tunable parameter to turnoff the overriding of pi_path_mtu value. + * 1 By default override the path record's pi_path_mtu value to + * IB_MTU_1K for all RC channels. This is done only for the + * channels established on Tavor HCA and the path's pi_path_mtu + * is greater than IB_MTU_1K. + * 0 Do not override, use pi_path_mtu by default. + */ +int ibcm_override_path_mtu = 1; + #ifdef DEBUG static void ibcm_print_reply_addr(ibt_channel_hdl_t channel, ibcm_mad_addr_t *cm_reply_addr); @@ -112,7 +125,7 @@ ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags, boolean_t alternate_grh = B_FALSE; ib_lid_t base_lid; ib_com_id_t local_comid; - ibmf_msg_t *ibmf_msg; + ibmf_msg_t *ibmf_msg, *ibmf_msg_dreq; ibcm_req_msg_t *req_msgp; uint8_t rdma_in, rdma_out; @@ -486,7 +499,7 @@ ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags, return (IBT_INSUFF_KERNEL_RESOURCE); } - /* allocate an IBMF mad buffer */ + /* allocate an IBMF mad buffer (REQ) */ if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg, MAD_METHOD_SEND)) != IBT_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: " @@ -497,6 +510,18 @@ ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags, return (status); } + /* allocate an IBMF mad buffer (DREQ) */ + if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg_dreq, + MAD_METHOD_SEND)) != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: " + "chan 0x%p ibcm_alloc_out_msg failed", channel); + (void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg); + ibcm_release_qp(cm_qp_entry); + ibcm_free_comid(hcap, local_comid); + ibcm_dec_hca_acc_cnt(hcap); + return (status); + } + /* Init to Init, if QP's port does not match with path information */ if (qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num != IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num) { @@ -530,6 +555,7 @@ ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags, ibcm_free_comid(hcap, local_comid); ibcm_dec_hca_acc_cnt(hcap); (void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg); + (void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg_dreq); return (status); } else IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: " @@ -567,6 +593,7 @@ ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags, IBCM_SET_CHAN_PRIVATE(statep->channel, statep); statep->stored_msg = ibmf_msg; + statep->dreq_msg = ibmf_msg_dreq; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*req_msgp)) @@ -652,8 +679,20 @@ ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags, req_msgp->req_max_cm_retries_plus |= (1 << 3); } - req_msgp->req_mtu_plus = chan_args->oc_path->pi_path_mtu << 4 | - chan_args->oc_path_rnr_retry_cnt; + /* + * By default on Tavor, we override the PathMTU to 1K. + * To turn this off, set ibcm_override_path_mtu = 0. + */ + if (ibcm_override_path_mtu && IBCM_IS_HCA_TAVOR(hcap) && + (chan_args->oc_path->pi_path_mtu > IB_MTU_1K)) { + req_msgp->req_mtu_plus = IB_MTU_1K << 4 | + chan_args->oc_path_rnr_retry_cnt; + IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p PathMTU" + " overidden to IB_MTU_1K(%d) from %d", channel, IB_MTU_1K, + chan_args->oc_path->pi_path_mtu); + } else + req_msgp->req_mtu_plus = chan_args->oc_path->pi_path_mtu << 4 | + chan_args->oc_path_rnr_retry_cnt; IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p CM retry cnt %d" " staring PSN %x", channel, cm_retries, starting_psn); @@ -1159,9 +1198,7 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, void *priv_data, ibt_priv_data_len_t priv_data_len, uint8_t *ret_status, void *ret_priv_data, ibt_priv_data_len_t *ret_priv_data_len_p) { - ibcm_hca_info_t *hcap; ibcm_state_data_t *statep; - ibt_status_t status; IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel(%p, %x, %p, %d, %p)", channel, mode, priv_data, priv_data_len, @@ -1196,41 +1233,94 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, return (IBT_INVALID_PARAM); } + if (ibtl_cm_is_chan_closing(channel) || + ibtl_cm_is_chan_closed(channel)) { + if (ret_status) + *ret_status = IBT_CM_CLOSED_ALREADY; + + /* No private data to return to the client */ + if (ret_priv_data_len_p != NULL) + *ret_priv_data_len_p = 0; + + IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel: chan 0x%p " + "already marked for closing", channel); + + return (IBT_SUCCESS); + } + /* get the statep */ IBCM_GET_CHAN_PRIVATE(channel, statep); - if (statep == NULL) { - IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p " "statep NULL", channel); - if (ibtl_cm_is_chan_closing(channel) || - ibtl_cm_is_chan_closed(channel)) { - if (ret_status) - *ret_status = IBT_CM_CLOSED_ALREADY; + return (IBT_CHAN_STATE_INVALID); + } - /* No private data to return to the client */ - if (ret_priv_data_len_p != NULL) - *ret_priv_data_len_p = 0; + mutex_enter(&statep->state_mutex); - return (IBT_SUCCESS); - } + if (statep->dreq_msg == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p " + "Fatal Error: dreq_msg is NULL", channel); + IBCM_RELEASE_CHAN_PRIVATE(channel); + mutex_exit(&statep->state_mutex); return (IBT_CHAN_STATE_INVALID); } - mutex_enter(&statep->state_mutex); + if ((ret_priv_data == NULL) || (ret_priv_data_len_p == NULL)) { + statep->close_ret_priv_data = NULL; + statep->close_ret_priv_data_len = NULL; + } else { + statep->close_ret_priv_data = ret_priv_data; + statep->close_ret_priv_data_len = ret_priv_data_len_p; + } + + priv_data_len = min(priv_data_len, IBT_DREQ_PRIV_DATA_SZ); + if ((priv_data != NULL) && (priv_data_len > 0)) { + bcopy(priv_data, ((ibcm_dreq_msg_t *) + IBCM_OUT_MSGP(statep->dreq_msg))->dreq_private_data, + priv_data_len); + } + statep->close_ret_status = ret_status; + IBCM_RELEASE_CHAN_PRIVATE(channel); IBCM_REF_CNT_INCR(statep); + + if (mode != IBT_NONBLOCKING) { + return (ibcm_close_rc_channel(channel, statep, mode)); + } + + /* IBT_NONBLOCKING */ + ibcm_close_enqueue(statep); mutex_exit(&statep->state_mutex); - IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel: chan 0x%p statep %p", - channel, statep); + return (IBT_SUCCESS); +} +void +ibcm_close_start(ibcm_state_data_t *statep) +{ mutex_enter(&statep->state_mutex); + (void) ibcm_close_rc_channel(statep->channel, statep, IBT_NONBLOCKING); +} + +static +ibt_status_t +ibcm_close_rc_channel(ibt_channel_hdl_t channel, ibcm_state_data_t *statep, + ibt_execution_mode_t mode) +{ + ibcm_hca_info_t *hcap; + + _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&statep->state_mutex)); + ASSERT(MUTEX_HELD(&statep->state_mutex)); + + IBTF_DPRINTF_L3(cmlog, "ibcm_close_rc_channel: chan 0x%p statep %p", + channel, statep); + hcap = statep->hcap; /* HCA must have been in active state. If not, it's a client bug */ if (!IBCM_ACCESS_HCA_OK(hcap)) { - IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p " + IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p " "hcap 0x%p not active", channel, hcap); IBCM_REF_CNT_DECR(statep); mutex_exit(&statep->state_mutex); @@ -1246,18 +1336,16 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, while (statep->state == IBCM_STATE_TRANSIENT_DREQ_SENT) cv_wait(&statep->block_mad_cv, &statep->state_mutex); - IBTF_DPRINTF_L4(cmlog, "ibt_close_rc_channel: chan 0x%p " + IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p " "connection state is %x", channel, statep->state); - statep->close_ret_status = ret_status; - /* If state is in pre-established states, abort the connection est */ if (statep->state != IBCM_STATE_ESTABLISHED) { statep->cm_retries++; /* ensure connection trace is dumped */ /* No DREP private data possible */ - if (ret_priv_data_len_p != NULL) - *ret_priv_data_len_p = 0; + if (statep->close_ret_priv_data_len != NULL) + *statep->close_ret_priv_data_len = 0; /* * If waiting for a response mad, then cancel the timer, @@ -1270,7 +1358,7 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, timeout_id_t timer_val = statep->timerid; ibcm_conn_state_t old_state; - IBTF_DPRINTF_L4(cmlog, "ibt_close_rc_channel: " + IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: " "chan 0x%p connection aborted in state %x", channel, statep->state); @@ -1304,17 +1392,17 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, (void) untimeout(timer_val); /* wait until cm handler returns for BLOCKING cases */ + mutex_enter(&statep->state_mutex); if ((mode == IBT_BLOCKING) || (mode == IBT_NOCALLBACKS)) { - mutex_enter(&statep->state_mutex); while (statep->close_done != B_TRUE) cv_wait(&statep->block_client_cv, &statep->state_mutex); - mutex_exit(&statep->state_mutex); } - if (ret_status) - *ret_status = IBT_CM_CLOSED_ABORT; + if (statep->close_ret_status) + *statep->close_ret_status = IBT_CM_CLOSED_ABORT; + mutex_exit(&statep->state_mutex); /* * It would ideal to post a REJ MAD, but that would @@ -1336,7 +1424,7 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, /* take control of statep */ statep->abort_flag |= IBCM_ABORT_CLIENT; - IBTF_DPRINTF_L4(cmlog, "ibt_close_rc_channel: " + IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: " "chan 0x%p connection aborted in state = %x", channel, statep->state); @@ -1353,15 +1441,15 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, if (mode == IBT_NOCALLBACKS) statep->cm_handler = NULL; IBCM_REF_CNT_DECR(statep); - mutex_exit(&statep->state_mutex); /* * In rare situations, connection attempt could be * terminated for some other reason, before abort is * processed, but CM still returns ret_status as abort */ - if (ret_status) - *ret_status = IBT_CM_CLOSED_ABORT; + if (statep->close_ret_status) + *statep->close_ret_status = IBT_CM_CLOSED_ABORT; + mutex_exit(&statep->state_mutex); /* * REJ MAD is posted by the CM state machine for this @@ -1375,11 +1463,12 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, /* State already in timewait, so no return priv data */ IBCM_REF_CNT_DECR(statep); - mutex_exit(&statep->state_mutex); /* The teardown has already been done */ - if (ret_status) - *ret_status = IBT_CM_CLOSED_ALREADY; + if (statep->close_ret_status) + *statep->close_ret_status = + IBT_CM_CLOSED_ALREADY; + mutex_exit(&statep->state_mutex); return (IBT_SUCCESS); @@ -1406,8 +1495,9 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, cv_wait(&statep->block_client_cv, &statep->state_mutex); statep->cm_handler = NULL; /* sanity setting */ - if (ret_status) - *ret_status = IBT_CM_CLOSED_ALREADY; + if (statep->close_ret_status) + *statep->close_ret_status = + IBT_CM_CLOSED_ALREADY; } else if (mode == IBT_BLOCKING) { /* wait until state is moved to timewait */ while (statep->close_done != B_TRUE) @@ -1436,8 +1526,8 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, &statep->state_mutex); } - if (ret_status) - *ret_status = IBT_CM_CLOSED_ABORT; + if (statep->close_ret_status) + *statep->close_ret_status = IBT_CM_CLOSED_ABORT; IBCM_REF_CNT_DECR(statep); mutex_exit(&statep->state_mutex); @@ -1456,7 +1546,7 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, statep->close_nocb_state = IBCM_FAIL; statep->cm_handler = NULL; ibtl_cm_chan_is_closing(statep->channel); - IBTF_DPRINTF_L4(cmlog, "ibt_close_rc_channel: " + IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: " "NOCALLBACKS on in statep = %p", statep); } mutex_exit(&statep->state_mutex); @@ -1486,40 +1576,6 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, statep->close_flow = 1; mutex_exit(&statep->state_mutex); - if (statep->dreq_msg == NULL) { - if ((status = ibcm_alloc_out_msg( - statep->stored_reply_addr.ibmf_hdl, &statep->dreq_msg, - MAD_METHOD_SEND)) != IBT_SUCCESS) { - - IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: " - "chan 0x%p ibcm_alloc_out_msg failed ", channel); - mutex_enter(&statep->state_mutex); - ibcm_close_exit(); - statep->state = IBCM_STATE_ESTABLISHED; - IBCM_REF_CNT_DECR(statep); - cv_broadcast(&statep->block_mad_cv); - statep->close_flow = 0; - mutex_exit(&statep->state_mutex); - return (status); - } - } else - IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel: " - "DREQ MAD already allocated in statep %p", statep); - - if ((ret_priv_data == NULL) || (ret_priv_data_len_p == NULL)) { - statep->close_priv_data = NULL; - statep->close_priv_data_len = NULL; - } else { - statep->close_priv_data = ret_priv_data; - statep->close_priv_data_len = ret_priv_data_len_p; - } - - priv_data_len = min(priv_data_len, IBT_DREQ_PRIV_DATA_SZ); - if ((priv_data != NULL) && (priv_data_len > 0)) - bcopy(priv_data, ((ibcm_dreq_msg_t *) - IBCM_OUT_MSGP(statep->dreq_msg))->dreq_private_data, - priv_data_len); - ibcm_post_dreq_mad(statep); mutex_enter(&statep->state_mutex); @@ -1532,7 +1588,7 @@ lost_race: cv_wait(&statep->block_client_cv, &statep->state_mutex); - IBTF_DPRINTF_L4(cmlog, "ibt_close_rc_channel: chan 0x%p " + IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p " "done blocking", channel); } @@ -1540,7 +1596,7 @@ lost_race: mutex_exit(&statep->state_mutex); /* If this message isn't seen then ibt_close_rc_channel failed */ - IBTF_DPRINTF_L5(cmlog, "ibt_close_rc_channel: chan 0x%p done", + IBTF_DPRINTF_L5(cmlog, "ibcm_close_rc_channel: chan 0x%p done", channel); return (IBT_SUCCESS); @@ -4029,7 +4085,7 @@ ibt_cm_ud_proceed(void *session_id, ibt_channel_hdl_t ud_channel, /* the state machine processing is done in a separate thread */ /* proceed_targs is freed in ibcm_proceed_via_taskq */ - proceed_targs = kmem_alloc(sizeof (ibcm_proceed_targs_t), + proceed_targs = kmem_zalloc(sizeof (ibcm_proceed_targs_t), KM_SLEEP); proceed_targs->status = status; diff --git a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c index 3d10dae54e..b083ca7c0e 100644 --- a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c +++ b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +40,9 @@ static int ibmf_saa_impl_new_smlid_retry(saa_port_t *saa_portp, ibmf_msg_t *msgp, ibmf_msg_cb_t ibmf_callback, void *ibmf_callback_arg, int transport_flags); static int +ibmf_saa_impl_revert_to_qp1(saa_port_t *saa_portp, ibmf_msg_t *msgp, + ibmf_msg_cb_t ibmf_callback, void *ibmf_callback_args, int transport_flags); +static int ibmf_saa_check_sa_and_retry(saa_port_t *saa_portp, ibmf_msg_t *msgp, ibmf_msg_cb_t ibmf_callback, void *ibmf_callback_arg, hrtime_t trans_send_time, int transport_flags); @@ -68,6 +70,8 @@ static int ibmf_saa_impl_get_port_guid(ibt_hca_portinfo_t *ibt_portinfop, ib_guid_t *guid_ret); static void ibmf_saa_impl_set_transaction_params(saa_port_t *saa_portp, ibt_hca_portinfo_t *portinfop); +static void ibmf_saa_impl_update_sa_address_info(saa_port_t *saa_portp, + ibmf_msg_t *msgp); static void ibmf_saa_impl_ibmf_unreg(saa_port_t *saa_portp); int ibmf_saa_max_wait_time = IBMF_SAA_MAX_WAIT_TIME_IN_SECS; @@ -626,6 +630,8 @@ ibmf_saa_impl_create_port(ib_guid_t pt_guid, saa_port_t **saa_portpp) saa_portp->saa_pt_reference_count = 1; saa_portp->saa_pt_current_tid = pt_guid << 32; + saa_portp->saa_pt_redirect_active = B_FALSE; + /* set sa_uptime now in case we never receive anything from SA */ saa_portp->saa_pt_sa_uptime = gethrtime(); @@ -1229,9 +1235,9 @@ ibmf_saa_impl_get_cpi_cb(void *arg, size_t length, char *buffer, int status) classportinfo = (ib_mad_classportinfo_t *)buffer; - resp_time_value = b2h32(classportinfo->RespTimeValue) & 0x1f; + resp_time_value = classportinfo->RespTimeValue & 0x1f; - sa_cap_mask = b2h16(classportinfo->CapabilityMask); + sa_cap_mask = classportinfo->CapabilityMask; IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_saa_impl_get_cpi_cb, IBMF_TNF_TRACE, "", @@ -1308,6 +1314,7 @@ ibmf_saa_impl_send_request(saa_impl_trans_info_t *trans_info) int ibmf_status = IBMF_SUCCESS; int retry_count; uint16_t mad_status; + boolean_t sa_is_redirected = B_FALSE; IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_saa_impl_send_request_start, @@ -1346,6 +1353,7 @@ ibmf_saa_impl_send_request(saa_impl_trans_info_t *trans_info) mutex_enter(&saa_portp->saa_pt_mutex); sa_cap_mask = saa_portp->saa_pt_sa_cap_mask; + sa_is_redirected = saa_portp->saa_pt_redirect_active; mutex_exit(&saa_portp->saa_pt_mutex); @@ -1442,13 +1450,21 @@ ibmf_saa_impl_send_request(saa_impl_trans_info_t *trans_info) /* * if the transaction timed out and this was a synchronous * request there's a possiblity we were talking to the wrong - * master smlid. Check this and retry if necessary. + * master smlid or that the SA has stopped responding on the + * redirected desination (if redirect is active). + * Check this and retry if necessary. */ if ((ibmf_status == IBMF_TRANS_TIMEOUT) && (sleep_flag == B_TRUE)) { - ibmf_status = ibmf_saa_impl_new_smlid_retry(saa_portp, - msgp, ibmf_callback, ibmf_callback_arg, - transport_flags); + if (sa_is_redirected == B_TRUE) { + ibmf_status = ibmf_saa_impl_revert_to_qp1( + saa_portp, msgp, ibmf_callback, + ibmf_callback_arg, transport_flags); + } else { + ibmf_status = ibmf_saa_impl_new_smlid_retry( + saa_portp, msgp, ibmf_callback, + ibmf_callback_arg, transport_flags); + } } /* @@ -1477,22 +1493,38 @@ ibmf_saa_impl_send_request(saa_impl_trans_info_t *trans_info) mad_status = b2h16(msgp->im_msgbufs_recv. im_bufs_mad_hdr->Status); - if (mad_status != MAD_STATUS_BUSY) + if ((mad_status != MAD_STATUS_BUSY) && + (mad_status != MAD_STATUS_REDIRECT_REQUIRED)) break; - IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L2, - ibmf_saa_impl_send_request, IBMF_TNF_TRACE, "", - "ibmf_saa_impl_send_request: %s, retry_count = %d\n", - tnf_string, msg, "response returned busy status", - tnf_int, retry_count, retry_count); + if (mad_status == MAD_STATUS_REDIRECT_REQUIRED) { + + IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L2, + ibmf_saa_impl_send_request, IBMF_TNF_TRACE, "", + "ibmf_saa_impl_send_request: %s, retry_count %d\n", + tnf_string, msg, + "response returned redirect status", + tnf_int, retry_count, retry_count); + + /* update address info and copy it into msgp */ + ibmf_saa_impl_update_sa_address_info(saa_portp, msgp); + } else { + IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L2, + ibmf_saa_impl_send_request, IBMF_TNF_TRACE, "", + "ibmf_saa_impl_send_request: %s, retry_count %d\n", + tnf_string, msg, "response returned busy status", + tnf_int, retry_count, retry_count); + } retry_count++; /* * since this is a blocking call, sleep for some time - * to allow SA to transition from busy state + * to allow SA to transition from busy state (if busy) */ - delay(drv_usectohz(IBMF_SAA_BUSY_RETRY_SLEEP_SECS * 1000000)); + if (mad_status == MAD_STATUS_BUSY) + delay(drv_usectohz( + IBMF_SAA_BUSY_RETRY_SLEEP_SECS * 1000000)); } if (ibmf_status != IBMF_SUCCESS) { @@ -1849,6 +1881,18 @@ ibmf_saa_impl_init_msg(saa_impl_trans_info_t *trans_info, boolean_t sleep_flag, bcopy(&saa_portp->saa_pt_ibmf_addr_info, &ibmf_msg->im_local_addr, sizeof (ibmf_addr_info_t)); + /* copy global addressing information to message if in use */ + if (saa_portp->saa_pt_ibmf_msg_flags & IBMF_MSG_FLAGS_GLOBAL_ADDRESS) { + + ibmf_msg->im_msg_flags = IBMF_MSG_FLAGS_GLOBAL_ADDRESS; + + bcopy(&saa_portp->saa_pt_ibmf_global_addr, + &ibmf_msg->im_global_addr, + sizeof (ibmf_global_addr_info_t)); + } else { + ibmf_msg->im_msg_flags = 0; + } + mutex_exit(&saa_portp->saa_pt_mutex); *msgp = ibmf_msg; @@ -2015,6 +2059,141 @@ bail: } /* + * ibmf_saa_impl_revert_to_qp1() + * + * The SA that we had contact with via redirect may fail to respond. If this + * occurs SA should revert back to qp1 and the SMLID set in the port. + * msg_transport for the message that timed out will be retried with + * these new parameters. + * + * msgp, ibmf_callback, ibmf_callback_arg, and transport flags should be the + * same values passed to the original ibmf_msg_transport that timedout. The + * ibmf_retrans parameter will be re-retrieved from the saa_portp structure. + * + * Input Arguments + * saa_portp pointer to saa_port structure + * msgp ibmf message that timedout + * ibmf_callback callback that should be called by msg_transport + * ibmf_callback_arg args for ibmf_callback + * transport_flags flags for ibmf_msg_transport + * + * Output Arguments + * none + * + * Returns + * none + */ +static int +ibmf_saa_impl_revert_to_qp1(saa_port_t *saa_portp, ibmf_msg_t *msgp, + ibmf_msg_cb_t ibmf_callback, void *ibmf_callback_args, int transport_flags) +{ + ibt_hca_portinfo_t *ibt_portinfop; + ib_lid_t master_sm_lid, base_lid; + uint8_t sm_sl; + int subnet_timeout; + uint_t nports, size; + ibmf_retrans_t ibmf_retrans; + int ibmf_status; + ibt_status_t ibt_status; + + IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, + ibmf_saa_impl_revert_to_qp1_start, + IBMF_TNF_TRACE, "", "ibmf_saa_impl_revert_to_qp1() enter\n"); + + _NOTE(ASSUMING_PROTECTED(*msgp)) + _NOTE(ASSUMING_PROTECTED(*msgp->im_msgbufs_send.im_bufs_mad_hdr)) + + /* first query the portinfo to see if the lid changed */ + ibt_status = ibt_query_hca_ports_byguid(saa_portp->saa_pt_node_guid, + saa_portp->saa_pt_port_num, &ibt_portinfop, &nports, &size); + + if (ibt_status != IBT_SUCCESS) { + + IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, + ibmf_saa_impl_revert_to_qp1_err, IBMF_TNF_ERROR, "", + "ibmf_saa_impl_revert_to_qp1: %s, ibmf_status =" + " %d\n", tnf_string, msg, + "ibt_query_hca_ports_byguid() failed", + tnf_int, ibt_status, ibt_status); + + ibmf_status = IBMF_TRANSPORT_FAILURE; + + goto bail; + } + + master_sm_lid = ibt_portinfop->p_sm_lid; + base_lid = ibt_portinfop->p_base_lid; + sm_sl = ibt_portinfop->p_sm_sl; + subnet_timeout = ibt_portinfop->p_subnet_timeout; + + ibt_free_portinfo(ibt_portinfop, size); + + + mutex_enter(&saa_portp->saa_pt_mutex); + + saa_portp->saa_pt_redirect_active = B_FALSE; + + /* update the address info in ibmf_saa */ + saa_portp->saa_pt_ibmf_addr_info.ia_local_lid = base_lid; + saa_portp->saa_pt_ibmf_addr_info.ia_remote_lid = master_sm_lid; + saa_portp->saa_pt_ibmf_addr_info.ia_service_level = sm_sl; + saa_portp->saa_pt_ibmf_addr_info.ia_remote_qno = 1; + saa_portp->saa_pt_ibmf_addr_info.ia_p_key = IB_PKEY_DEFAULT_LIMITED; + saa_portp->saa_pt_ibmf_addr_info.ia_q_key = IB_GSI_QKEY; + saa_portp->saa_pt_ibmf_msg_flags = 0; + + /* new tid needed */ + msgp->im_msgbufs_send.im_bufs_mad_hdr->TransactionID = + h2b64(saa_portp->saa_pt_current_tid++); + + bcopy(&saa_portp->saa_pt_ibmf_retrans, &ibmf_retrans, + sizeof (ibmf_retrans_t)); + + /* update the subnet timeout since this may be a new sm/sa */ + saa_portp->saa_pt_timeout = subnet_timeout; + + /* place upper bound on subnet timeout in case of faulty SM */ + if (saa_portp->saa_pt_timeout > IBMF_SAA_MAX_SUBNET_TIMEOUT) + saa_portp->saa_pt_timeout = IBMF_SAA_MAX_SUBNET_TIMEOUT; + + /* increment the reference count to account for the cpi call */ + saa_portp->saa_pt_reference_count++; + + mutex_exit(&saa_portp->saa_pt_mutex); + + /* update the address info for this particular message */ + bcopy(&saa_portp->saa_pt_ibmf_addr_info, &msgp->im_local_addr, + sizeof (ibmf_addr_info_t)); + msgp->im_msg_flags = 0; /* No GRH */ + + /* get the classportinfo again since this may be a new sm/sa */ + ibmf_saa_impl_get_classportinfo(saa_portp); + + ibmf_status = ibmf_msg_transport(saa_portp->saa_pt_ibmf_handle, + saa_portp->saa_pt_qp_handle, msgp, &ibmf_retrans, + ibmf_callback, ibmf_callback_args, transport_flags); + + if (ibmf_status != IBMF_SUCCESS) { + + IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L2, + ibmf_saa_impl_revert_to_qp1, IBMF_TNF_TRACE, "", + "ibmf_saa_impl_revert_to_qp1: %s, ibmf_status = " + "%d\n", tnf_string, msg, + "ibmf_msg_transport() failed", + tnf_int, ibmf_status, ibmf_status); + } + +bail: + + IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3, + ibmf_saa_impl_revert_to_qp1_end, + IBMF_TNF_TRACE, "", "ibmf_saa_impl_revert_to_qp1() exiting" + " ibmf_status = %d\n", tnf_int, result, ibmf_status); + + return (ibmf_status); +} + +/* * ibmf_saa_impl_async_event_cb: * ibmf event callback, argument to ibmf_register * ibmf_handle is unused @@ -2371,6 +2550,7 @@ ibmf_saa_async_cb(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args) int ibmf_status; boolean_t ignore_data; ibmf_retrans_t ibmf_retrans; + boolean_t sa_is_redirected = B_FALSE; IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_saa_async_cb_start, IBMF_TNF_TRACE, "", "ibmf_saa_async_cb() enter\n"); @@ -2380,7 +2560,30 @@ ibmf_saa_async_cb(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args) client_data = trans_info->si_trans_client_data; saa_portp = trans_info->si_trans_port; - if (msgp->im_msg_status == IBMF_TRANS_TIMEOUT) { + mutex_enter(&saa_portp->saa_pt_mutex); + sa_is_redirected = saa_portp->saa_pt_redirect_active; + mutex_exit(&saa_portp->saa_pt_mutex); + + if ((msgp->im_msg_status == IBMF_TRANS_TIMEOUT) && + (sa_is_redirected == B_TRUE)) { + + /* + * We should retry the request using SM_LID and QP1 if we + * have been using redirect up until now + */ + ibmf_status = ibmf_saa_impl_revert_to_qp1( + saa_portp, msgp, ibmf_saa_async_cb, args, + trans_info->si_trans_transport_flags); + + /* + * If revert_to_qp1 returns success msg was resent. + * Otherwise msg could not be resent. Continue normally + */ + if (ibmf_status == IBMF_SUCCESS) + goto bail; + + } else if (msgp->im_msg_status == IBMF_TRANS_TIMEOUT) { + ibmf_status = ibmf_saa_impl_new_smlid_retry(saa_portp, msgp, ibmf_saa_async_cb, args, @@ -2409,7 +2612,11 @@ ibmf_saa_async_cb(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args) goto bail; } - /* if SA returned success but mad status is busy, retry a few times */ + /* + * If SA returned success but mad status is busy, retry a few times. + * If SA returned success but mad status says redirect is required, + * update the address info and retry the request to the new SA address + */ if (msgp->im_msg_status == IBMF_SUCCESS) { ASSERT(msgp->im_msgbufs_recv.im_bufs_mad_hdr != NULL); @@ -2445,6 +2652,34 @@ ibmf_saa_async_cb(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, void *args) */ if (ibmf_status == IBMF_SUCCESS) goto bail; + } else if (b2h16(msgp->im_msgbufs_recv.im_bufs_mad_hdr->Status) + == MAD_STATUS_REDIRECT_REQUIRED) { + + IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L2, + ibmf_saa_async_cb, IBMF_TNF_TRACE, "", + "ibmf_saa_async_cb: " + "async response returned redirect status\n"); + + /* update address info and copy it into msgp */ + ibmf_saa_impl_update_sa_address_info(saa_portp, msgp); + + /* retry with new address info */ + bcopy(&saa_portp->saa_pt_ibmf_retrans, &ibmf_retrans, + sizeof (ibmf_retrans_t)); + + ibmf_status = ibmf_msg_transport( + saa_portp->saa_pt_ibmf_handle, + saa_portp->saa_pt_qp_handle, msgp, &ibmf_retrans, + ibmf_saa_async_cb, args, + trans_info->si_trans_transport_flags); + + /* + * if retry is successful, quit here since async_cb will + * get called again; otherwise, let this function call + * handle the cleanup + */ + if (ibmf_status == IBMF_SUCCESS) + goto bail; } } @@ -3221,6 +3456,7 @@ ibmf_saa_impl_set_transaction_params(saa_port_t *saa_portp, */ saa_portp->saa_pt_sa_cap_mask = 0xFFFF; + saa_portp->saa_pt_ibmf_msg_flags = 0; saa_portp->saa_pt_ibmf_addr_info.ia_remote_qno = 1; saa_portp->saa_pt_ibmf_addr_info.ia_p_key = IB_PKEY_DEFAULT_LIMITED; @@ -3258,6 +3494,154 @@ ibmf_saa_impl_set_transaction_params(saa_port_t *saa_portp, "ibmf_saa_impl_set_transaction_params() exit\n"); } + +/* + * ibmf_saa_impl_update_sa_address_info + */ +static void +ibmf_saa_impl_update_sa_address_info(saa_port_t *saa_portp, ibmf_msg_t *msgp) +{ + void *result; + ib_sa_hdr_t *sa_hdr; + int rv; + size_t length; + uint16_t attr_id; + ib_mad_classportinfo_t *cpi; + ibmf_global_addr_info_t *gaddrp = &saa_portp->saa_pt_ibmf_global_addr; + ibt_hca_portinfo_t *ibt_pinfo; + uint_t nports, size; + ibt_status_t ibt_status; + + IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, + ibmf_saa_impl_update_sa_address_info, + IBMF_TNF_TRACE, "", + "ibmf_saa_impl_update_sa_address_info() enter\n"); + + /* + * decode the respons of msgp as a classportinfo attribute + */ + rv = ibmf_saa_utils_unpack_sa_hdr(msgp->im_msgbufs_recv.im_bufs_cl_hdr, + msgp->im_msgbufs_recv.im_bufs_cl_hdr_len, &sa_hdr, KM_NOSLEEP); + if (rv != IBMF_SUCCESS) { + + IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L1, + ibmf_saa_impl_update_sa_address_err, + IBMF_TNF_TRACE, "", "ibmf_saa_impl_update_sa_address_info: " + "%s, ibmf_status = %d\n", tnf_string, msg, + "Could not unpack sa hdr", tnf_int, ibmf_status, rv); + + return; + } + + attr_id = b2h16(msgp->im_msgbufs_recv.im_bufs_mad_hdr->AttributeID); + if (attr_id != MAD_ATTR_ID_CLASSPORTINFO) { + IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L1, + ibmf_saa_impl_update_sa_address_info_err, + IBMF_TNF_TRACE, "", "ibmf_saa_impl_update_sa_address_info: " + "%s, attrID = %x\n", tnf_string, msg, + "Wrong attribute ID", tnf_int, ibmf_status, attr_id); + + kmem_free(sa_hdr, sizeof (ib_sa_hdr_t)); + return; + } + rv = ibmf_saa_utils_unpack_payload( + msgp->im_msgbufs_recv.im_bufs_cl_data, + msgp->im_msgbufs_recv.im_bufs_cl_data_len, attr_id, &result, + &length, sa_hdr->AttributeOffset, B_TRUE, KM_NOSLEEP); + if (rv != IBMF_SUCCESS) { + + IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L1, + ibmf_saa_impl_update_sa_address_err, + IBMF_TNF_TRACE, "", "ibmf_saa_impl_update_sa_address_info: " + "%s, ibmf_status = %d\n", tnf_string, msg, + "Could not unpack payload", tnf_int, ibmf_status, rv); + + kmem_free(sa_hdr, sizeof (ib_sa_hdr_t)); + return; + } + + kmem_free(sa_hdr, sizeof (ib_sa_hdr_t)); + + /* + * Use the classportinfo contents to update the SA address info + */ + cpi = (ib_mad_classportinfo_t *)result; + mutex_enter(&saa_portp->saa_pt_mutex); + saa_portp->saa_pt_ibmf_addr_info.ia_remote_lid = cpi->RedirectLID; + saa_portp->saa_pt_ibmf_addr_info.ia_remote_qno = cpi->RedirectQP; + saa_portp->saa_pt_ibmf_addr_info.ia_p_key = cpi->RedirectP_Key; + saa_portp->saa_pt_ibmf_addr_info.ia_q_key = cpi->RedirectQ_Key; + saa_portp->saa_pt_ibmf_addr_info.ia_service_level = cpi->RedirectSL; + + saa_portp->saa_pt_redirect_active = B_TRUE; + + if ((cpi->RedirectGID_hi != 0) || (cpi->RedirectGID_lo != 0)) { + + mutex_exit(&saa_portp->saa_pt_mutex); + ibt_status = ibt_query_hca_ports_byguid( + saa_portp->saa_pt_node_guid, saa_portp->saa_pt_port_num, + &ibt_pinfo, &nports, &size); + if (ibt_status != IBT_SUCCESS) { + + IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L1, + ibmf_saa_impl_update_sa_address_err, IBMF_TNF_TRACE, + "", "ibmf_saa_impl_update_sa_address_info: " + "%s, ibt_status = %d\n", tnf_string, msg, + "Could not query hca port", + tnf_int, ibt_status, ibt_status); + + kmem_free(result, length); + return; + } + + mutex_enter(&saa_portp->saa_pt_mutex); + /* + * Fill in global address info parameters + * + * NOTE: The HopLimit value is not specified through the + * contents of ClassPortInfo. It may be possible to find + * out the proper value to use even for SA beeing redirected + * to another subnet. But we do only support redirect within + * our local subnet + */ + gaddrp->ig_sender_gid.gid_prefix = + ibt_pinfo->p_sgid_tbl[0].gid_prefix; + gaddrp->ig_sender_gid.gid_guid = saa_portp->saa_pt_port_guid; + gaddrp->ig_recver_gid.gid_prefix = cpi->RedirectGID_hi; + gaddrp->ig_recver_gid.gid_guid = cpi->RedirectGID_lo; + gaddrp->ig_flow_label = cpi->RedirectFL; + gaddrp->ig_tclass = cpi->RedirectTC; + gaddrp->ig_hop_limit = 0; + + saa_portp->saa_pt_ibmf_msg_flags = + IBMF_MSG_FLAGS_GLOBAL_ADDRESS; + + mutex_exit(&saa_portp->saa_pt_mutex); + ibt_free_portinfo(ibt_pinfo, size); + } else { + saa_portp->saa_pt_ibmf_msg_flags = 0; + mutex_exit(&saa_portp->saa_pt_mutex); + } + kmem_free(result, length); + + /* + * Update the address info of msgp with the new address parameters + */ + mutex_enter(&saa_portp->saa_pt_mutex); + bcopy(&saa_portp->saa_pt_ibmf_addr_info, &msgp->im_local_addr, + sizeof (ibmf_addr_info_t)); + if (saa_portp->saa_pt_ibmf_msg_flags & IBMF_MSG_FLAGS_GLOBAL_ADDRESS) { + + msgp->im_msg_flags = IBMF_MSG_FLAGS_GLOBAL_ADDRESS; + + bcopy(&saa_portp->saa_pt_ibmf_global_addr, + &msgp->im_global_addr, sizeof (ibmf_global_addr_info_t)); + } else { + msgp->im_msg_flags = 0; + } + mutex_exit(&saa_portp->saa_pt_mutex); +} + /* * ibmf_saa_impl_ibmf_unreg: */ diff --git a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_utils.c b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_utils.c index fc1820d5fb..4c76719724 100644 --- a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_utils.c +++ b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_utils.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -78,6 +77,7 @@ extern int ibmf_trace_level; /* These functions have only been tested on a big-endian system */ +static void ibmf_saa_classportinfo_parse_buffer(uchar_t *buffer, void *record); static void ibmf_saa_notice_parse_buffer(uchar_t *buffer, void *record); static void ibmf_saa_informinfo_parse_buffer(uchar_t *buffer, void *record); static void ibmf_saa_node_record_parse_buffer(uchar_t *buffer, void *record); @@ -112,6 +112,7 @@ static void ibmf_saa_multipath_record_parse_buffer(uchar_t *buffer, static void ibmf_saa_service_assn_record_parse_buffer(uchar_t *buffer, void *record); +static void ibmf_saa_classportinfo_to_buf(void *record, uchar_t *buffer); static void ibmf_saa_notice_to_buf(void *record, uchar_t *buffer); static void ibmf_saa_informinfo_to_buf(void *record, uchar_t *buffer); static void ibmf_saa_node_record_to_buf(void *record, uchar_t *buffer); @@ -155,9 +156,17 @@ static void ibmf_saa_service_assn_record_to_buf(void *record, uchar_t *buffer); */ static void -ibmf_saa_notice_parse_buffer(uchar_t *buffer, void *record) +ibmf_saa_classportinfo_parse_buffer(uchar_t *buffer, void *record) { + ib_mad_classportinfo_t *cpi = (ib_mad_classportinfo_t *)record; + + ibmf_utils_unpack_data("2csl2Ll2s2l2Ll2s2l", buffer, + IB_MAD_CLASSPORTINFO_SIZE, cpi, sizeof (ib_mad_classportinfo_t)); +} +static void +ibmf_saa_notice_parse_buffer(uchar_t *buffer, void *record) +{ ib_mad_notice_t *notice = (ib_mad_notice_t *)record; ibmf_utils_unpack_data("4c3s54c2L", buffer, IB_MAD_NOTICE_SIZE, @@ -553,9 +562,18 @@ ibmf_saa_sysimg_guid_chg_trap_parse_buffer(uchar_t *buffer, */ static void -ibmf_saa_notice_to_buf(void *record, uchar_t *buffer) +ibmf_saa_classportinfo_to_buf(void *record, uchar_t *buffer) { + ib_mad_classportinfo_t *cpi = (ib_mad_classportinfo_t *)record; + ibmf_utils_pack_data("2csl2Ll2s2l2Ll2s2l", + cpi, sizeof (ib_mad_classportinfo_t), + buffer, IB_MAD_CLASSPORTINFO_SIZE); +} + +static void +ibmf_saa_notice_to_buf(void *record, uchar_t *buffer) +{ ib_mad_notice_t *notice = (ib_mad_notice_t *)record; ibmf_utils_pack_data("4c3s54c2L", notice, sizeof (ib_mad_notice_t), @@ -968,6 +986,11 @@ ibmf_saa_utils_pack_payload(uchar_t *structs_payload, size_t ASSERT(attr_id != SA_TRACERECORD_ATTRID); switch (attr_id) { + case SA_CLASSPORTINFO_ATTRID: + struct_size = sizeof (ib_mad_classportinfo_t); + buf_size = IB_MAD_CLASSPORTINFO_SIZE; + pack_data_fn = ibmf_saa_classportinfo_to_buf; + break; case SA_NOTICE_ATTRID: struct_size = sizeof (ib_mad_notice_t); buf_size = IB_MAD_NOTICE_SIZE; @@ -1151,6 +1174,11 @@ ibmf_saa_utils_unpack_payload(uchar_t *buf_payload, size_t buf_payload_length, } switch (attr_id) { + case SA_CLASSPORTINFO_ATTRID: + struct_size = sizeof (ib_mad_classportinfo_t); + buf_size = IB_MAD_CLASSPORTINFO_SIZE; + unpack_data_fn = ibmf_saa_classportinfo_parse_buffer; + break; case SA_NOTICE_ATTRID: struct_size = sizeof (ib_mad_notice_t); buf_size = IB_MAD_NOTICE_SIZE; diff --git a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h index 1e49919cf7..0ac8454fcb 100644 --- a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h +++ b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -455,12 +454,14 @@ typedef struct ibcm_state_data_s { /* Return data pointers in various cm api calls */ ibt_rc_returns_t *open_return_data; ibt_ap_returns_t *ap_return_data; - uint8_t *close_priv_data; - ibt_priv_data_len_t *close_priv_data_len; + uint8_t *close_ret_priv_data; + ibt_priv_data_len_t *close_ret_priv_data_len; uint8_t *close_ret_status; /* for queuing of open_rc_channel requests */ struct ibcm_state_data_s *open_link; + /* for queuing of non-blocking close_rc_channel requests */ + struct ibcm_state_data_s *close_link; struct ibcm_conn_trace_s *conn_trace; @@ -481,8 +482,8 @@ _NOTE(READ_ONLY_DATA(ibcm_state_data_s::{mode channel svcid hcap conn_trace})) _NOTE(SCHEME_PROTECTS_DATA("Serailized access by block_client_cv", - ibcm_state_data_s::{open_return_data ap_return_data close_priv_data - close_priv_data_len close_ret_status})) + ibcm_state_data_s::{open_return_data ap_return_data close_ret_priv_data + close_ret_priv_data_len close_ret_status})) _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_state_data_s::{timedout_state cm_handler mra_msg abort_flag})) @@ -549,12 +550,15 @@ _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_state_data_s::{timedout_state extern void ibcm_open_enqueue(ibcm_state_data_t *statep); extern void ibcm_open_done(ibcm_state_data_t *statep); +extern void ibcm_close_enqueue(ibcm_state_data_t *statep); extern void ibcm_close_done(ibcm_state_data_t *statep, int send_done); extern void ibcm_close_enter(void); extern void ibcm_close_exit(void); extern void ibcm_lapr_enter(void); extern void ibcm_lapr_exit(void); extern void ibcm_check_for_opens(void); +extern void ibcm_check_for_async_close(void); +extern void ibcm_close_start(ibcm_state_data_t *statep); extern void ibcm_run_tlist_thread(void); /* @@ -954,6 +958,8 @@ typedef struct ibcm_hca_info_s { ibcm_hca_state_t hca_state; /* Is HCA attached? */ ib_guid_t hca_guid; /* HCA's guid value */ ibt_hca_flags_t hca_caps; /* HCA capabilities */ + uint32_t hca_vendor_id:24; + uint16_t hca_device_id; ib_time_t hca_ack_delay; /* HCA ack delay */ uint8_t hca_max_rdma_in_qp; /* Max RDMA in Reads */ uint8_t hca_max_rdma_out_qp; /* Max RDMA out Reads */ @@ -986,6 +992,9 @@ _NOTE(READ_ONLY_DATA(ibcm_hca_info_s::{hca_guid hca_caps hca_ack_delay hca_max_rdma_in_qp hca_max_rdma_out_qp hca_comid_arena hca_reqid_arena hca_passive_tree hca_active_tree hca_passive_comid_tree hca_num_ports })) +/* Are we on Tavor HCA */ +#define IBCM_IS_HCA_TAVOR(hcap) \ + (((hcap)->hca_device_id == 0x5a44) && ((hcap)->hca_vendor_id == 0x15b3)) /* * called to ensure that HCA is in "attached" state and is willing to diff --git a/usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h b/usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h index f3b9bd8e4d..947c90453a 100644 --- a/usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h +++ b/usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -89,6 +88,9 @@ typedef struct saa_port_s { uint16_t saa_pt_sa_cap_mask; /* from classportinfo */ ibmf_addr_info_t saa_pt_ibmf_addr_info; + ibmf_global_addr_info_t saa_pt_ibmf_global_addr; + uint32_t saa_pt_ibmf_msg_flags; + boolean_t saa_pt_redirect_active; /* SA has redirected */ ibmf_retrans_t saa_pt_ibmf_retrans; uint64_t saa_pt_current_tid; @@ -119,7 +121,10 @@ _NOTE(MUTEX_PROTECTS_DATA(saa_port_t::saa_pt_mutex, saa_port_t::saa_pt_current_tid saa_port_t::saa_pt_num_outstanding_trans saa_port_t::saa_pt_timeout - saa_port_t::saa_pt_ibmf_addr_info)) + saa_port_t::saa_pt_ibmf_addr_info + saa_port_t::saa_pt_ibmf_global_addr + saa_port_t::saa_pt_ibmf_msg_flags + saa_port_t::saa_pt_redirect_active)) _NOTE(MUTEX_PROTECTS_DATA(saa_port_t::saa_pt_kstat_mutex, saa_port_t::saa_pt_kstatp)) |