diff options
author | Ramaswamy Tummala <Ramaswamy.Tummala@Sun.COM> | 2008-11-11 13:14:57 -0800 |
---|---|---|
committer | Ramaswamy Tummala <Ramaswamy.Tummala@Sun.COM> | 2008-11-11 13:14:57 -0800 |
commit | 00a3eaf3896a33935e11fd5c5fb5c1714225c067 (patch) | |
tree | 3ea400ad9807f014d695cc03c10ee70584a81fcb | |
parent | 6f02aa444eb77edda1b97ff8a1215c417932a62e (diff) | |
download | illumos-joyent-00a3eaf3896a33935e11fd5c5fb5c1714225c067.tar.gz |
6566278 Panic when ibd is configured after HCA is unconfigured and configured
6727497 IB nexus should create one pathinfo node per HCA per IB nexus child
6726179 ls -l /devices/ib shows zero devices nodes after doing rem_drv and add_drv of HCA driver
6368026 IOC enumeration issues in multi HCA system
6745259 Calling ddi_remove_minor_node in async thread causes deadlock
6500304 RDS: Support HCA DR
6739584 The 'cfgadm -x list_clients' command has a display problem
6739581 The output of 'cfgadm -x unconfig_clients' command doesn't display the HCA ap_id correctly
6759972 ibmf_unregister() returned busy during HCA unconfigure operation
6754808 IB Stack should recover from HCA unconfigure failures
6725241 ibdm_handle_hca_detach() should update ibdm.ibdm_hca_list_tail pointer
6751608 panic when configuring an IB HCA child
6607950 raise IB port settling time
21 files changed, 1465 insertions, 642 deletions
diff --git a/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.c b/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.c index fe8284cd14..164eb8b5ee 100644 --- a/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.c +++ b/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.c @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include "cfga_ib.h" /* @@ -864,8 +862,6 @@ cfga_change_state(cfga_cmd_t state_change_cmd, const char *ap_id, char **errstring, cfga_flags_t flags) { int ret; - int len; - char *msg; char *devpath; nvlist_t *nvl = NULL; boolean_t static_ap_id = B_TRUE; @@ -923,19 +919,10 @@ cfga_change_state(cfga_cmd_t state_change_cmd, const char *ap_id, rv = CFGA_IB_OK; /* Other status don't matter */ - len = strlen(IB_CONFIRM0) + strlen(IB_CONFIRM1) + - strlen("Configure") + strlen(ap_id); - if ((msg = (char *)calloc(len + 3, 1)) != NULL) { - (void) snprintf(msg, len + 3, "Configure %s%s\n%s", - IB_CONFIRM0, ap_id, IB_CONFIRM1); - } - - if (!ib_confirm(confp, msg)) { - free(msg); + if (!ib_confirm(confp, IB_CONFIRM1)) { ib_cleanup_after_devctl_cmd(hdl, nvl); return (CFGA_NACK); } - free(msg); if (devctl_ap_configure(hdl, nvl) != 0) { DPRINTF("cfga_change_state: devctl_ap_configure " @@ -986,19 +973,11 @@ cfga_change_state(cfga_cmd_t state_change_cmd, const char *ap_id, } rv = CFGA_IB_OK; /* Other statuses don't matter */ - len = strlen(IB_CONFIRM0) + strlen(IB_CONFIRM1) + - strlen("Unconfigure") + strlen(ap_id); - if ((msg = (char *)calloc(len + 3, 1)) != NULL) { - (void) snprintf(msg, len + 3, "Unconfigure %s%s\n%s", - IB_CONFIRM0, ap_id, IB_CONFIRM1); - } - if (!ib_confirm(confp, msg)) { - free(msg); + if (!ib_confirm(confp, IB_CONFIRM1)) { ib_cleanup_after_devctl_cmd(hdl, nvl); return (CFGA_NACK); } - free(msg); devpath = ib_get_devicepath(ap_id); if (devpath == NULL) { @@ -1127,7 +1106,7 @@ cfga_private_func(const char *func, const char *ap_id, const char *options, ap_id, errno)); } - if ((msg = (char *)calloc(80, 1)) == NULL) { + if ((msg = (char *)calloc(256, 1)) == NULL) { DPRINTF("cfga_private_func: malloc for msg failed. " "errno: %d\n", errno); return (ib_err_msg(errstring, CFGA_IB_ALLOC_FAIL, @@ -1152,7 +1131,7 @@ cfga_private_func(const char *func, const char *ap_id, const char *options, errno)); } - (void) snprintf(msg, 80, "Ap_Id\t\t\t IB Client\t\t " + (void) snprintf(msg, 256, "Ap_Id\t\t\t IB Client\t\t " "Alternate HCA\n"); cfga_msg(msgp, msg); @@ -1173,7 +1152,7 @@ cfga_private_func(const char *func, const char *ap_id, const char *options, /* check at the end; print message per client found */ if (count == 3) { count = 0; - (void) snprintf(msg, 80, "%-31s%-26s%s\n", + (void) snprintf(msg, 256, "%-30s %-25s %s\n", clnt_apid, clnt_name, alt_hca); cfga_msg(msgp, msg); } @@ -1201,19 +1180,12 @@ cfga_private_func(const char *func, const char *ap_id, const char *options, ap_id, errno)); } - /* Check w/ user if it is ok to do this operation */ - len = strlen(IB_CONFIRM2) + strlen(IB_CONFIRM3) + strlen(ap_id); - if ((msg = (char *)calloc(len + 3, 1)) != NULL) { - (void) snprintf(msg, len + 3, "%s %s\n%s", - IB_CONFIRM2, ap_id, IB_CONFIRM3); - } - - /* If the user fails to confirm, bailout */ - if (!ib_confirm(confp, msg)) { - free(msg); + /* + * Check w/ user if it is ok to do this operation + * If the user fails to confirm, bailout + */ + if (!ib_confirm(confp, IB_CONFIRM3)) return (CFGA_NACK); - } - free(msg); /* Get device-paths of all the IOC/Port/Pseudo devices */ rv = ib_do_control_ioctl((char *)ap_id, IBNEX_UNCFG_CLNTS_SZ, diff --git a/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.h b/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.h index a4e9c04fee..1c07e5c013 100644 --- a/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.h +++ b/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _CFGA_IB_H #define _CFGA_IB_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -200,10 +197,8 @@ typedef enum { /* config */ /* for confirm operation */ -#define IB_CONFIRM0 "the device: " #define IB_CONFIRM1 \ "This operation will suspend activity on the IB device\nContinue" -#define IB_CONFIRM2 "Unconfigure Clients of HCA" #define IB_CONFIRM3 \ "This operation will unconfigure IB clients of this HCA\nContinue" #define IB_CONFIRM4 \ diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsib_buf.c b/usr/src/uts/common/io/ib/clients/rds/rdsib_buf.c index 41d0b44644..8a501acba5 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdsib_buf.c +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_buf.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -72,8 +72,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/ib/clients/rds/rdsib_cm.h> #include <sys/ib/clients/rds/rdsib_ib.h> #include <sys/ib/clients/rds/rdsib_buf.h> @@ -237,8 +235,15 @@ rds_init_recv_caches(rds_state_t *statep) mem_attr.mr_as = NULL; mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; + rw_enter(&statep->rds_hca_lock, RW_WRITER); + hcap = statep->rds_hcalistp; while (hcap != NULL) { + if (hcap->hca_state != RDS_HCA_STATE_OPEN) { + hcap = hcap->hca_nextp; + continue; + } + ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, &mem_attr, &hcap->hca_mrhdl, &mem_desc); if (ret != IBT_SUCCESS) { @@ -260,15 +265,18 @@ rds_init_recv_caches(rds_state_t *statep) } kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); kmem_free(memp, memsize); + rw_exit(&statep->rds_hca_lock); mutex_exit(&rds_dpool.pool_lock); return (-1); } + hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED; hcap->hca_lkey = mem_desc.md_lkey; hcap->hca_rkey = mem_desc.md_rkey; hcap = hcap->hca_nextp; } + rw_exit(&statep->rds_hca_lock); /* Initialize data pool */ rds_dpool.pool_memp = memp; @@ -316,6 +324,8 @@ rds_init_recv_caches(rds_state_t *statep) return (0); } +rds_hca_t *rds_lkup_hca(ib_guid_t hca_guid); + void rds_free_send_pool(rds_ep_t *ep) { @@ -334,7 +344,7 @@ rds_free_send_pool(rds_ep_t *ep) } /* get the hcap for the HCA hosting this channel */ - hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); + hcap = rds_lkup_hca(ep->ep_hca_guid); if (hcap == NULL) { RDS_DPRINTF2("rds_free_send_pool", "HCA (0x%llx) not found", ep->ep_hca_guid); @@ -858,6 +868,7 @@ rds_is_sendq_empty(rds_ep_t *ep, uint_t wait) if ((wait == 2) && (ep->ep_type == RDS_EP_TYPE_DATA)) { rds_buf_t *ackbp; + rds_buf_t *prev_ackbp; /* * If the last one is acknowledged then everything @@ -865,6 +876,7 @@ rds_is_sendq_empty(rds_ep_t *ep, uint_t wait) */ bp = spool->pool_tailp; ackbp = *(rds_buf_t **)ep->ep_ack_addr; + prev_ackbp = ackbp; RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " "Checking for acknowledgements", ep); while (bp != ackbp) { @@ -877,6 +889,13 @@ rds_is_sendq_empty(rds_ep_t *ep, uint_t wait) bp = spool->pool_tailp; ackbp = *(rds_buf_t **)ep->ep_ack_addr; + if (ackbp == prev_ackbp) { + RDS_DPRINTF2("rds_is_sendq_empty", + "There has been no progress," + "give up and proceed"); + break; + } + prev_ackbp = ackbp; } } } else if (spool->pool_nbusy != 0) { diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c b/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c index 6d0561d729..5a2d3fa4c9 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c @@ -116,14 +116,6 @@ rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx", rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid); - /* validate service id */ - if (reqp->req_service_id == RDS_SERVICE_ID) { - RDS_DPRINTF2(LABEL, "Version Mismatch: Remote system " - "(GUID: 0x%llx) is running an older version of RDS", - rgid.gid_guid); - return (IBT_CM_REJECT); - } - /* * CM private data brings IP information * Private data received is a stream of bytes and may not be properly @@ -536,6 +528,7 @@ rds_handle_cm_conn_closed(ibt_cm_event_t *evp) switch (sp->session_state) { case RDS_SESSION_STATE_CONNECTED: + case RDS_SESSION_STATE_HCA_CLOSING: sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING; RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); @@ -737,19 +730,6 @@ rds_register_service(ibt_clnt_hdl_t rds_ibhdl) srvdesc.sd_flags = IBT_SRV_NO_FLAGS; /* - * Register the old service id for backward compatibility - * REQs received on this service id would be rejected - */ - ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID, - 1, &rdsib_statep->rds_old_srvhdl, NULL); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2(LABEL, - "RDS Service (0x%llx) Registration Failed: %d", - RDS_SERVICE_ID, ret); - return (NULL); - } - - /* * This is the new service id as per: * Annex A11: RDMA IP CM Service */ @@ -779,16 +759,33 @@ rds_bind_service(rds_state_t *statep) RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep); + rw_enter(&statep->rds_hca_lock, RW_READER); + hcap = statep->rds_hcalistp; while (hcap != NULL) { + + /* skip the HCAs that are not fully online */ + if ((hcap->hca_state != RDS_HCA_STATE_OPEN) && + (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED)) { + RDS_DPRINTF2("rds_bind_service", + "Skipping HCA: 0x%llx, state: %d", + hcap->hca_guid, hcap->hca_state); + hcap = hcap->hca_nextp; + continue; + } + + /* currently, we have space for only 4 bindhdls */ + ASSERT(hcap->hca_nports < 4); for (jx = 0; jx < hcap->hca_nports; jx++) { nports++; if (hcap->hca_pinfop[jx].p_linkstate != IBT_PORT_ACTIVE) { /* * service bind will be called in the async - * handler when the port comes up + * handler when the port comes up. Clear any + * stale bind handle. */ + hcap->hca_bindhdl[jx] = NULL; continue; } @@ -800,7 +797,7 @@ rds_bind_service(rds_state_t *statep) /* pass statep as cm_private */ ret = ibt_bind_service(statep->rds_srvhdl, gid, - NULL, statep, NULL); + NULL, statep, &hcap->hca_bindhdl[jx]); if (ret != IBT_SUCCESS) { RDS_DPRINTF2(LABEL, "Bind service for " "HCA: 0x%llx Port: %d gid %llx:%llx " @@ -811,21 +808,12 @@ rds_bind_service(rds_state_t *statep) } nbinds++; - - /* bind the old service, ignore if it fails */ - ret = ibt_bind_service(statep->rds_old_srvhdl, gid, - NULL, statep, NULL); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2(LABEL, "Bind service for " - "HCA: 0x%llx Port: %d gid %llx:%llx " - "failed: %d", hcap->hca_guid, - hcap->hca_pinfop[jx].p_port_num, - gid.gid_prefix, gid.gid_guid, ret); - } } hcap = hcap->hca_nextp; } + rw_exit(&statep->rds_hca_lock); + RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports", nbinds, nports); diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c b/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c index bb85b74019..ff67d283e8 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c @@ -479,6 +479,7 @@ rds_session_init(rds_session_t *sp) } hca_guid = hcap->hca_guid; + sp->session_hca_guid = hca_guid; /* allocate and initialize the ctrl channel */ ret = rds_ep_init(&sp->session_ctrlep, hca_guid); @@ -555,6 +556,8 @@ rds_session_reinit(rds_session_t *sp, ib_gid_t lgid) RDS_DPRINTF2("rds_session_reinit", "Failover across HCAs"); + sp->session_hca_guid = hcap->hca_guid; + /* re-initialize the control channel */ ret = rds_ep_reinit(&sp->session_ctrlep, hcap->hca_guid); if (ret != 0) { @@ -807,13 +810,14 @@ rds_destroy_session(rds_session_t *sp) } /* This is called on the taskq thread */ -static void +void rds_failover_session(void *arg) { rds_session_t *sp = (rds_session_t *)arg; ib_gid_t lgid, rgid; ipaddr_t myip, remip; int ret, cnt = 0; + uint8_t sp_state; RDS_DPRINTF2("rds_failover_session", "Enter: (%p)", sp); @@ -838,13 +842,16 @@ rds_failover_session(void *arg) return; } sp->session_failover = 1; + sp_state = sp->session_state; rw_exit(&sp->session_lock); /* * The session is in ERROR state but close both channels * for a clean start. */ - rds_session_close(sp, IBT_BLOCKING, 1); + if (sp_state == RDS_SESSION_STATE_ERROR) { + rds_session_close(sp, IBT_BLOCKING, 1); + } /* wait 1 sec before re-connecting */ delay(drv_usectohz(1000000)); @@ -1036,6 +1043,50 @@ rds_passive_session_fini(rds_session_t *sp) RDS_DPRINTF2("rds_passive_session_fini", "Return: SP (%p)", sp); } +void +rds_close_this_session(rds_session_t *sp, uint8_t wait) +{ + switch (sp->session_state) { + case RDS_SESSION_STATE_CONNECTED: + sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; + rw_exit(&sp->session_lock); + + rds_session_close(sp, IBT_BLOCKING, wait); + + rw_enter(&sp->session_lock, RW_WRITER); + sp->session_state = RDS_SESSION_STATE_CLOSED; + RDS_DPRINTF3("rds_close_sessions", + "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); + rds_session_fini(sp); + sp->session_state = RDS_SESSION_STATE_FINI; + sp->session_failover = 0; + RDS_DPRINTF3("rds_close_sessions", + "SP(%p) State RDS_SESSION_STATE_FINI", sp); + break; + + case RDS_SESSION_STATE_ERROR: + case RDS_SESSION_STATE_PASSIVE_CLOSING: + case RDS_SESSION_STATE_INIT: + sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; + rw_exit(&sp->session_lock); + + rds_session_close(sp, IBT_BLOCKING, wait); + + rw_enter(&sp->session_lock, RW_WRITER); + sp->session_state = RDS_SESSION_STATE_CLOSED; + RDS_DPRINTF3("rds_close_sessions", + "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); + /* FALLTHRU */ + case RDS_SESSION_STATE_CLOSED: + rds_session_fini(sp); + sp->session_state = RDS_SESSION_STATE_FINI; + sp->session_failover = 0; + RDS_DPRINTF3("rds_close_sessions", + "SP(%p) State RDS_SESSION_STATE_FINI", sp); + break; + } +} + /* * Can be called: * 1. on driver detach @@ -1066,47 +1117,7 @@ rds_close_sessions(void *arg) rw_enter(&sp->session_lock, RW_WRITER); RDS_DPRINTF2("rds_close_sessions", "SP(%p) State: %d", sp, sp->session_state); - - switch (sp->session_state) { - case RDS_SESSION_STATE_CONNECTED: - sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; - rw_exit(&sp->session_lock); - - rds_session_close(sp, IBT_BLOCKING, 1); - - rw_enter(&sp->session_lock, RW_WRITER); - sp->session_state = RDS_SESSION_STATE_CLOSED; - RDS_DPRINTF3("rds_close_sessions", - "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); - rds_session_fini(sp); - sp->session_state = RDS_SESSION_STATE_FINI; - sp->session_failover = 0; - RDS_DPRINTF3("rds_close_sessions", - "SP(%p) State RDS_SESSION_STATE_FINI", sp); - break; - - case RDS_SESSION_STATE_ERROR: - case RDS_SESSION_STATE_PASSIVE_CLOSING: - case RDS_SESSION_STATE_INIT: - sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; - rw_exit(&sp->session_lock); - - rds_session_close(sp, IBT_BLOCKING, 1); - - rw_enter(&sp->session_lock, RW_WRITER); - sp->session_state = RDS_SESSION_STATE_CLOSED; - RDS_DPRINTF3("rds_close_sessions", - "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); - /* FALLTHRU */ - case RDS_SESSION_STATE_CLOSED: - rds_session_fini(sp); - sp->session_state = RDS_SESSION_STATE_FINI; - sp->session_failover = 0; - RDS_DPRINTF3("rds_close_sessions", - "SP(%p) State RDS_SESSION_STATE_FINI", sp); - break; - } - + rds_close_this_session(sp, 2); rw_exit(&sp->session_lock); sp = sp->session_nextp; } @@ -1367,6 +1378,23 @@ rds_session_create(rds_state_t *statep, ipaddr_t localip, ipaddr_t remip, } void +rds_handle_close_session_request(void *arg) +{ + rds_session_t *sp = (rds_session_t *)arg; + + RDS_DPRINTF2("rds_handle_close_session_request", + "Enter: Closing this Session (%p)", sp); + + rw_enter(&sp->session_lock, RW_WRITER); + RDS_DPRINTF2("rds_handle_close_session_request", + "SP(%p) State: %d", sp, sp->session_state); + rds_close_this_session(sp, 2); + rw_exit(&sp->session_lock); + + RDS_DPRINTF2("rds_handle_close_session_request", "Return SP(%p)", sp); +} + +void rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cpkt) { RDS_DPRINTF4("rds_handle_control_message", "Enter: SP(%p) code: %d " @@ -1389,6 +1417,12 @@ rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cpkt) break; case RDS_CTRL_CODE_HEARTBEAT: break; + case RDS_CTRL_CODE_CLOSE_SESSION: + RDS_DPRINTF2("rds_handle_control_message", + "SP(%p) Remote Requested to close this session", sp); + (void) ddi_taskq_dispatch(rds_taskq, + rds_handle_close_session_request, (void *)sp, DDI_SLEEP); + break; default: RDS_DPRINTF2(LABEL, "ERROR: Invalid Control code: %d", cpkt->rcp_code); @@ -2140,7 +2174,7 @@ rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, in_port_t sendport, return (ENOMEM); } } else { - RDS_DPRINTF2("rds_sendmsg", "SP(%p): Session is in %d state", + RDS_DPRINTF4("rds_sendmsg", "SP(%p): Session is in %d state", sp, sp->session_state); rw_exit(&sp->session_lock); return (ENOMEM); diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c b/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c index 9e84042cd0..380ad1af17 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c @@ -72,8 +72,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/ddi.h> #include <sys/sunddi.h> @@ -171,6 +169,143 @@ rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp) } } +/* Return hcap, given the hca guid */ +rds_hca_t * +rds_lkup_hca(ib_guid_t hca_guid) +{ + rds_hca_t *hcap; + + RDS_DPRINTF4("rds_lkup_hca", "Enter: statep: 0x%p " + "guid: %llx", rdsib_statep, hca_guid); + + rw_enter(&rdsib_statep->rds_hca_lock, RW_READER); + + hcap = rdsib_statep->rds_hcalistp; + while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { + hcap = hcap->hca_nextp; + } + + rw_exit(&rdsib_statep->rds_hca_lock); + + RDS_DPRINTF4("rds_lkup_hca", "return"); + + return (hcap); +} + + +static rds_hca_t * +rdsib_init_hca(ib_guid_t hca_guid) +{ + rds_hca_t *hcap; + boolean_t alloc = B_FALSE; + int ret; + + RDS_DPRINTF2("rdsib_init_hca", "enter: HCA 0x%llx", hca_guid); + + /* Do a HCA lookup */ + hcap = rds_lkup_hca(hca_guid); + + if (hcap != NULL && hcap->hca_hdl != NULL) { + /* + * This can happen if we get IBT_HCA_ATTACH_EVENT on an HCA + * that we have already opened. Just return NULL so that + * we'll not end up reinitializing the HCA again. + */ + RDS_DPRINTF2("rdsib_init_hca", "HCA already initialized"); + return (NULL); + } + + if (hcap == NULL) { + RDS_DPRINTF2("rdsib_init_hca", "New HCA is added"); + hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP); + alloc = B_TRUE; + } + + hcap->hca_guid = hca_guid; + ret = ibt_open_hca(rdsib_statep->rds_ibhdl, hca_guid, + &hcap->hca_hdl); + if (ret != IBT_SUCCESS) { + if (ret == IBT_HCA_IN_USE) { + RDS_DPRINTF2("rdsib_init_hca", + "ibt_open_hca: 0x%llx returned IBT_HCA_IN_USE", + hca_guid); + } else { + RDS_DPRINTF2("rdsib_init_hca", + "ibt_open_hca: 0x%llx failed: %d", hca_guid, ret); + } + if (alloc == B_TRUE) { + kmem_free(hcap, sizeof (rds_hca_t)); + } + return (NULL); + } + + ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rdsib_init_hca", + "Query HCA: 0x%llx failed: %d", hca_guid, ret); + ret = ibt_close_hca(hcap->hca_hdl); + ASSERT(ret == IBT_SUCCESS); + if (alloc == B_TRUE) { + kmem_free(hcap, sizeof (rds_hca_t)); + } else { + hcap->hca_hdl = NULL; + } + return (NULL); + } + + ret = ibt_query_hca_ports(hcap->hca_hdl, 0, + &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rdsib_init_hca", + "Query HCA 0x%llx ports failed: %d", hca_guid, + ret); + ret = ibt_close_hca(hcap->hca_hdl); + hcap->hca_hdl = NULL; + ASSERT(ret == IBT_SUCCESS); + if (alloc == B_TRUE) { + kmem_free(hcap, sizeof (rds_hca_t)); + } else { + hcap->hca_hdl = NULL; + } + return (NULL); + } + + /* Only one PD per HCA is allocated, so do it here */ + ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS, + &hcap->hca_pdhdl); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rdsib_init_hca", + "ibt_alloc_pd 0x%llx failed: %d", hca_guid, ret); + (void) ibt_free_portinfo(hcap->hca_pinfop, + hcap->hca_pinfo_sz); + ret = ibt_close_hca(hcap->hca_hdl); + ASSERT(ret == IBT_SUCCESS); + hcap->hca_hdl = NULL; + if (alloc == B_TRUE) { + kmem_free(hcap, sizeof (rds_hca_t)); + } else { + hcap->hca_hdl = NULL; + } + return (NULL); + } + + rdsib_validate_chan_sizes(&hcap->hca_attr); + + rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); + hcap->hca_state = RDS_HCA_STATE_OPEN; + if (alloc == B_TRUE) { + /* this is a new HCA, add it to the list */ + rdsib_statep->rds_nhcas++; + hcap->hca_nextp = rdsib_statep->rds_hcalistp; + rdsib_statep->rds_hcalistp = hcap; + } + rw_exit(&rdsib_statep->rds_hca_lock); + + RDS_DPRINTF2("rdsib_init_hca", "return: HCA 0x%llx", hca_guid); + + return (hcap); +} + /* * Called from attach */ @@ -178,7 +313,7 @@ int rdsib_initialize_ib() { ib_guid_t *guidp; - rds_hca_t *hcap, *hcap1; + rds_hca_t *hcap; uint_t ix, hcaix, nhcas; int ret; @@ -216,65 +351,11 @@ rdsib_initialize_ib() * opened. * Initialize a HCA only if all the information is available. */ - hcap1 = NULL; for (ix = 0, hcaix = 0; ix < nhcas; ix++) { RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]); - hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP); - - ret = ibt_open_hca(rdsib_statep->rds_ibhdl, guidp[ix], - &hcap->hca_hdl); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2("rdsib_initialize_ib", - "ibt_open_hca: 0x%llx failed: %d", guidp[ix], ret); - kmem_free(hcap, sizeof (rds_hca_t)); - continue; - } - - hcap->hca_guid = guidp[ix]; - - ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2("rdsib_initialize_ib", - "Query HCA: 0x%llx failed: %d", guidp[ix], ret); - ret = ibt_close_hca(hcap->hca_hdl); - ASSERT(ret == IBT_SUCCESS); - kmem_free(hcap, sizeof (rds_hca_t)); - continue; - } - - ret = ibt_query_hca_ports(hcap->hca_hdl, 0, - &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2("rdsib_initialize_ib", - "Query HCA 0x%llx ports failed: %d", guidp[ix], - ret); - ret = ibt_close_hca(hcap->hca_hdl); - ASSERT(ret == IBT_SUCCESS); - kmem_free(hcap, sizeof (rds_hca_t)); - continue; - } - - /* Only one PD per HCA is allocated, so do it here */ - ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS, - &hcap->hca_pdhdl); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2("rdsib_initialize_ib", - "ibt_alloc_pd 0x%llx failed: %d", guidp[ix], ret); - (void) ibt_free_portinfo(hcap->hca_pinfop, - hcap->hca_pinfo_sz); - ret = ibt_close_hca(hcap->hca_hdl); - ASSERT(ret == IBT_SUCCESS); - kmem_free(hcap, sizeof (rds_hca_t)); - continue; - } - - rdsib_validate_chan_sizes(&hcap->hca_attr); - - /* this HCA is fully initialized, go to the next one */ - hcaix++; - hcap->hca_nextp = hcap1; - hcap1 = hcap; + hcap = rdsib_init_hca(guidp[ix]); + if (hcap != NULL) hcaix++; } /* free the HCA list, we are done with it */ @@ -293,9 +374,6 @@ rdsib_initialize_ib() (nhcas - hcaix), nhcas); } - rdsib_statep->rds_hcalistp = hcap1; - rdsib_statep->rds_nhcas = hcaix; - RDS_DPRINTF2("rdsib_initialize_ib", "return: statep %p", rdsib_statep); return (0); @@ -317,6 +395,8 @@ rdsib_deinitialize_ib() /* Release all HCA resources */ rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); + RDS_DPRINTF2("rdsib_deinitialize_ib", "HCA List: %p, NHCA: %d", + rdsib_statep->rds_hcalistp, rdsib_statep->rds_nhcas); hcap = rdsib_statep->rds_hcalistp; rdsib_statep->rds_hcalistp = NULL; rdsib_statep->rds_nhcas = 0; @@ -325,13 +405,16 @@ rdsib_deinitialize_ib() while (hcap != NULL) { nextp = hcap->hca_nextp; - ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); - ASSERT(ret == IBT_SUCCESS); + if (hcap->hca_hdl != NULL) { + ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); + ASSERT(ret == IBT_SUCCESS); - (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz); + (void) ibt_free_portinfo(hcap->hca_pinfop, + hcap->hca_pinfo_sz); - ret = ibt_close_hca(hcap->hca_hdl); - ASSERT(ret == IBT_SUCCESS); + ret = ibt_close_hca(hcap->hca_hdl); + ASSERT(ret == IBT_SUCCESS); + } kmem_free(hcap, sizeof (rds_hca_t)); hcap = nextp; @@ -405,22 +488,6 @@ rdsib_close_ib() } else { rdsib_statep->rds_srvhdl = NULL; } - - ret = ibt_unbind_all_services(rdsib_statep->rds_old_srvhdl); - if (ret != 0) { - RDS_DPRINTF2("rdsib_close_ib", - "ibt_unbind_all_services failed for old service" - ": %d\n", ret); - } - ret = ibt_deregister_service(rdsib_statep->rds_ibhdl, - rdsib_statep->rds_old_srvhdl); - if (ret != 0) { - RDS_DPRINTF2("rdsib_close_ib", - "ibt_deregister_service failed for old service:" - "%d\n", ret); - } else { - rdsib_statep->rds_old_srvhdl = NULL; - } } RDS_DPRINTF2("rdsib_close_ib", "return: statep %p", rdsib_statep); @@ -442,11 +509,24 @@ rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid) hcap = hcap->hca_nextp; } + /* + * don't let anyone use this HCA until the RECV memory + * is registered with this HCA + */ + if ((hcap != NULL) && + (hcap->hca_state == RDS_HCA_STATE_MEM_REGISTERED)) { + ASSERT(hcap->hca_mrhdl != NULL); + rw_exit(&statep->rds_hca_lock); + return (hcap); + } + + RDS_DPRINTF2("rds_get_hcap", + "HCA (0x%p, 0x%llx) is not initialized", hcap, hca_guid); rw_exit(&statep->rds_hca_lock); RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return"); - return (hcap); + return (NULL); } /* Return hcap, given a gid */ @@ -463,6 +543,19 @@ rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid) hcap = statep->rds_hcalistp; while (hcap != NULL) { + + /* + * don't let anyone use this HCA until the RECV memory + * is registered with this HCA + */ + if (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED) { + RDS_DPRINTF3("rds_gid_to_hcap", + "HCA (0x%p, 0x%llx) is not initialized", + hcap, gid.gid_guid); + hcap = hcap->hca_nextp; + continue; + } + for (ix = 0; ix < hcap->hca_nports; ix++) { if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix == gid.gid_prefix) && @@ -633,7 +726,7 @@ rds_post_recv_buf(void *arg) RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep); /* get the hcap for the HCA hosting this channel */ - hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); + hcap = rds_lkup_hca(ep->ep_hca_guid); if (hcap == NULL) { RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found", ep->ep_hca_guid); @@ -643,7 +736,8 @@ rds_post_recv_buf(void *arg) /* Make sure the session is still connected */ rw_enter(&sp->session_lock, RW_READER); if ((sp->session_state != RDS_SESSION_STATE_INIT) && - (sp->session_state != RDS_SESSION_STATE_CONNECTED)) { + (sp->session_state != RDS_SESSION_STATE_CONNECTED) && + (sp->session_state != RDS_SESSION_STATE_HCA_CLOSING)) { RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not " "in active state (%d)", ep, sp->session_state); rw_exit(&sp->session_lock); @@ -1313,23 +1407,24 @@ rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl, RDS_DPRINTF2("rds_handle_portup_event", "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep); - /* If RDS service is not registered then no bind is needed */ - if (statep->rds_srvhdl == NULL) { - RDS_DPRINTF2("rds_handle_portup_event", - "RDS Service is not registered, so no action needed"); - return; + rw_enter(&statep->rds_hca_lock, RW_WRITER); + + hcap = statep->rds_hcalistp; + while ((hcap != NULL) && (hcap->hca_guid != event->ev_hca_guid)) { + hcap = hcap->hca_nextp; } - hcap = rds_get_hcap(statep, event->ev_hca_guid); if (hcap == NULL) { RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is " "not in our list", event->ev_hca_guid); + rw_exit(&statep->rds_hca_lock); return; } ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize); if (ret != IBT_SUCCESS) { RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret); + rw_exit(&statep->rds_hca_lock); return; } @@ -1338,36 +1433,284 @@ rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl, hcap->hca_pinfop = newpinfop; hcap->hca_pinfo_sz = newsize; - /* structure copy */ - gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0]; + (void) ibt_free_portinfo(oldpinfop, oldsize); - /* bind RDS service on the port, pass statep as cm_private */ - ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, NULL); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2(LABEL, "Bind service for HCA: 0x%llx Port: %d " - "gid %llx:%llx returned: %d", event->ev_hca_guid, - event->ev_port, gid.gid_prefix, gid.gid_guid, ret); + /* If RDS service is not registered then no bind is needed */ + if (statep->rds_srvhdl == NULL) { + RDS_DPRINTF2("rds_handle_portup_event", + "RDS Service is not registered, so no action needed"); + rw_exit(&statep->rds_hca_lock); + return; } - (void) ibt_free_portinfo(oldpinfop, oldsize); + /* + * If the service was previously bound on this port and + * if this port has changed state down and now up, we do not + * need to bind the service again. The bind is expected to + * persist across state changes. If the service was never bound + * before then we bind it this time. + */ + if (hcap->hca_bindhdl[event->ev_port - 1] == NULL) { + + /* structure copy */ + gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0]; + + /* bind RDS service on the port, pass statep as cm_private */ + ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, + &hcap->hca_bindhdl[event->ev_port - 1]); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rds_handle_portup_event", + "Bind service for HCA: 0x%llx Port: %d " + "gid %llx:%llx returned: %d", event->ev_hca_guid, + event->ev_port, gid.gid_prefix, gid.gid_guid, ret); + } + } + + rw_exit(&statep->rds_hca_lock); RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx", event->ev_hca_guid); } static void +rdsib_add_hca(ib_guid_t hca_guid) +{ + rds_hca_t *hcap; + ibt_mr_attr_t mem_attr; + ibt_mr_desc_t mem_desc; + int ret; + + RDS_DPRINTF2("rdsib_add_hca", "Enter: GUID: 0x%llx", hca_guid); + + hcap = rdsib_init_hca(hca_guid); + if (hcap == NULL) + return; + + /* register the recv memory with this hca */ + mutex_enter(&rds_dpool.pool_lock); + if (rds_dpool.pool_memp == NULL) { + /* no memory to register */ + RDS_DPRINTF2("rdsib_add_hca", "No memory to register"); + mutex_exit(&rds_dpool.pool_lock); + return; + } + + mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)rds_dpool.pool_memp; + mem_attr.mr_len = rds_dpool.pool_memsize; + mem_attr.mr_as = NULL; + mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; + + ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, &mem_attr, + &hcap->hca_mrhdl, &mem_desc); + + mutex_exit(&rds_dpool.pool_lock); + + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rdsib_add_hca", "ibt_register_mr failed: %d", + ret); + } else { + rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); + hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED; + hcap->hca_lkey = mem_desc.md_lkey; + hcap->hca_rkey = mem_desc.md_rkey; + rw_exit(&rdsib_statep->rds_hca_lock); + } + + RDS_DPRINTF2("rdsib_add_hca", "Retrun: GUID: 0x%llx", hca_guid); +} + +void rds_close_this_session(rds_session_t *sp, uint8_t wait); +int rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port); + +static void +rdsib_del_hca(rds_state_t *statep, ib_guid_t hca_guid) +{ + rds_session_t *sp; + rds_hca_t *hcap; + rds_hca_state_t saved_state; + int ret, ix; + + RDS_DPRINTF2("rdsib_del_hca", "Enter: GUID: 0x%llx", hca_guid); + + /* + * This should be a write lock as we don't want anyone to get access + * to the hcap while we are modifing its contents + */ + rw_enter(&statep->rds_hca_lock, RW_WRITER); + + hcap = statep->rds_hcalistp; + while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { + hcap = hcap->hca_nextp; + } + + /* Prevent initiating any new activity on this HCA */ + ASSERT(hcap != NULL); + saved_state = hcap->hca_state; + hcap->hca_state = RDS_HCA_STATE_STOPPING; + + rw_exit(&statep->rds_hca_lock); + + /* + * stop the outgoing traffic and close any active sessions on this hca. + * Any pending messages in the SQ will be allowed to complete. + */ + rw_enter(&statep->rds_sessionlock, RW_READER); + sp = statep->rds_sessionlistp; + while (sp) { + if (sp->session_hca_guid != hca_guid) { + sp = sp->session_nextp; + continue; + } + + rw_enter(&sp->session_lock, RW_WRITER); + RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, + sp->session_state); + /* + * We are changing the session state in advance. This prevents + * further messages to be posted to the SQ. We then + * send a control message to the remote and tell it close + * the session. + */ + sp->session_state = RDS_SESSION_STATE_HCA_CLOSING; + RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " + "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); + rw_exit(&sp->session_lock); + + /* + * wait until the sendq is empty then tell the remote to + * close this session. This enables for graceful shutdown of + * the session + */ + rds_is_sendq_empty(&sp->session_dataep, 2); + (void) rds_post_control_message(sp, + RDS_CTRL_CODE_CLOSE_SESSION, 0); + + sp = sp->session_nextp; + } + + /* wait until all the sessions are off this HCA */ + sp = statep->rds_sessionlistp; + while (sp) { + if (sp->session_hca_guid != hca_guid) { + sp = sp->session_nextp; + continue; + } + + rw_enter(&sp->session_lock, RW_READER); + RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, + sp->session_state); + + while ((sp->session_state == RDS_SESSION_STATE_HCA_CLOSING) || + (sp->session_state == RDS_SESSION_STATE_ERROR) || + (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING) || + (sp->session_state == RDS_SESSION_STATE_CLOSED)) { + rw_exit(&sp->session_lock); + delay(drv_usectohz(1000000)); + rw_enter(&sp->session_lock, RW_READER); + RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, + sp->session_state); + } + + rw_exit(&sp->session_lock); + + sp = sp->session_nextp; + } + rw_exit(&statep->rds_sessionlock); + + /* + * if rdsib_close_ib was called before this, then that would have + * unbound the service on all ports. In that case, the HCA structs + * will contain stale bindhdls. Hence, we do not call unbind unless + * the service is still registered. + */ + if (statep->rds_srvhdl != NULL) { + /* unbind RDS service on all ports on this HCA */ + for (ix = 0; ix < hcap->hca_nports; ix++) { + if (hcap->hca_bindhdl[ix] == NULL) { + continue; + } + + RDS_DPRINTF2("rdsib_del_hca", + "Unbinding Service: port: %d, bindhdl: %p", + ix + 1, hcap->hca_bindhdl[ix]); + (void) ibt_unbind_service(rdsib_statep->rds_srvhdl, + hcap->hca_bindhdl[ix]); + hcap->hca_bindhdl[ix] = NULL; + } + } + + RDS_DPRINTF2("rdsib_del_hca", "HCA(%p) State: %d", hcap, + hcap->hca_state); + + switch (saved_state) { + case RDS_HCA_STATE_MEM_REGISTERED: + ASSERT(hcap->hca_mrhdl != NULL); + ret = ibt_deregister_mr(hcap->hca_hdl, hcap->hca_mrhdl); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rdsib_del_hca", + "ibt_deregister_mr failed: %d", ret); + return; + } + hcap->hca_mrhdl = NULL; + /* FALLTHRU */ + case RDS_HCA_STATE_OPEN: + ASSERT(hcap->hca_hdl != NULL); + ASSERT(hcap->hca_pdhdl != NULL); + + + ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rdsib_del_hca", + "ibt_free_pd failed: %d", ret); + } + + (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz); + + ret = ibt_close_hca(hcap->hca_hdl); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rdsib_del_hca", + "ibt_close_hca failed: %d", ret); + } + + hcap->hca_hdl = NULL; + hcap->hca_pdhdl = NULL; + hcap->hca_lkey = 0; + hcap->hca_rkey = 0; + } + + /* + * This should be a write lock as we don't want anyone to get access + * to the hcap while we are modifing its contents + */ + rw_enter(&statep->rds_hca_lock, RW_WRITER); + hcap->hca_state = RDS_HCA_STATE_REMOVED; + rw_exit(&statep->rds_hca_lock); + + RDS_DPRINTF2("rdsib_del_hca", "Return: GUID: 0x%llx", hca_guid); +} + +static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, ibt_async_event_t *event) { - rds_state_t *statep; + rds_state_t *statep = (rds_state_t *)clntp; RDS_DPRINTF2("rds_async_handler", "Async code: %d", code); switch (code) { case IBT_EVENT_PORT_UP: - statep = (rds_state_t *)clntp; rds_handle_portup_event(statep, hdl, event); break; + case IBT_HCA_ATTACH_EVENT: + /* + * NOTE: In some error recovery paths, it is possible to + * receive IBT_HCA_ATTACH_EVENTs on already known HCAs. + */ + (void) rdsib_add_hca(event->ev_hca_guid); + break; + case IBT_HCA_DETACH_EVENT: + (void) rdsib_del_hca(statep, event->ev_hca_guid); + break; default: RDS_DPRINTF2(LABEL, "Async event: %d not handled", code); diff --git a/usr/src/uts/common/io/ib/ibnex/ibnex.c b/usr/src/uts/common/io/ib/ibnex/ibnex.c index 5944d652c7..c5038c1a1d 100644 --- a/usr/src/uts/common/io/ib/ibnex/ibnex.c +++ b/usr/src/uts/common/io/ib/ibnex/ibnex.c @@ -40,8 +40,6 @@ * device tree nodes. */ - - #include <sys/conf.h> #include <sys/stat.h> #include <sys/modctl.h> @@ -118,11 +116,11 @@ static int ibnex_devname2port(char *, int *); static int ibnex_config_ioc_node(char *, dev_info_t *); static int ibnex_devname_to_node_n_ioc_guids( char *, ib_guid_t *, ib_guid_t *, char **); -static int ibnex_is_ioc_present(ib_guid_t); static void ibnex_ioc_node_cleanup(); static void ibnex_delete_ioc_node_data(ibnex_node_data_t *); -int ibnex_ioc_initnode(ibdm_ioc_info_t *, int, - dev_info_t *); +int ibnex_ioc_initnode_all_pi(ibdm_ioc_info_t *); +static int ibnex_ioc_initnode_pdip(ibnex_node_data_t *, + ibdm_ioc_info_t *, dev_info_t *); static int ibnex_create_ioc_node_prop( ibdm_ioc_info_t *, dev_info_t *); static int ibnex_create_ioc_compatible_prop( @@ -156,10 +154,11 @@ extern int ibnex_offline_childdip(dev_info_t *); static int ibnex_ioc_create_pi( ibdm_ioc_info_t *, ibnex_node_data_t *, - dev_info_t *); + dev_info_t *, int *); static int ibnex_bus_power(dev_info_t *, void *, pm_bus_power_op_t, void *, void *); -int ibnex_pseudo_create_pi(ibnex_node_data_t *, +int ibnex_pseudo_create_all_pi(ibnex_node_data_t *); +static int ibnex_pseudo_create_pi_pdip(ibnex_node_data_t *, dev_info_t *); static int ibnex_pseudo_config_one( ibnex_node_data_t *, char *, dev_info_t *); @@ -170,6 +169,11 @@ static int ibnex_ioc_bus_config_one(dev_info_t **, uint_t, ddi_bus_config_op_t, void *, dev_info_t **, int *); static int ibnex_is_merge_node(dev_info_t *); static void ibnex_hw_in_dev_tree(char *); +static int ibnex_ioc_config_from_pdip(ibdm_ioc_info_t *, + dev_info_t *, int); +static int ibnex_ioc_pi_exists(ibnex_node_data_t *, dev_info_t *); +static int ibnex_ioc_pi_reachable(ibdm_ioc_info_t *, + dev_info_t *); /* * The bus_ops structure defines the capabilities of HCA nexus driver. @@ -326,12 +330,12 @@ _NOTE(MUTEX_PROTECTS_DATA(ibnex.ibnex_mutex, ibnex_s)) _NOTE(DATA_READABLE_WITHOUT_LOCK(ibnex.ibnex_num_comm_svcs ibnex.ibnex_comm_svc_names ibnex.ibnex_nvppa_comm_svcs ibnex.ibnex_vppa_comm_svc_names ibnex.ibnex_nhcasvc_comm_svcs - ibnex.ibnex_hcasvc_comm_svc_names)) + ibnex.ibnex_hcasvc_comm_svc_names ibnex.ibnex_ioc_list)) _NOTE(MUTEX_PROTECTS_DATA(ibnex.ibnex_mutex, ibnex_node_data_s)) _NOTE(LOCK_ORDER(ibdm.ibdm_hl_mutex ibnex.ibnex_mutex)) /* The port settling time in seconds */ -int ibnex_port_settling_time = 8; +int ibnex_port_settling_time = 30; /* create an array of properties supported, easier to add new ones here */ static struct ibnex_property { @@ -1113,13 +1117,27 @@ ibnex_bus_config(dev_info_t *parent, uint_t flag, ibnex_port_node_t *port_node; int use_mdi_devi_locking = 0; + if (parent != ibnex.ibnex_dip) { + /* + * This must be an HCA.In a normal case HCA is setup as a phci. + * If an HCA is in maintenance mode, its phci is not set up + * but the driver is attached to update the firmware. In this + * case, do not configure the MPxIO clients. + */ + if (mdi_component_is_phci(parent, NULL) == MDI_FAILURE) { + if (op == BUS_CONFIG_ALL || op == BUS_CONFIG_DRIVER) + return (NDI_SUCCESS); + else + return (NDI_FAILURE); + } - /* Set use_mdi_devi_locking appropriately */ - if ((mdi_component_is_phci(parent, NULL) == MDI_SUCCESS) && - ((op != BUS_CONFIG_ONE) || (op == BUS_CONFIG_ONE && - strncmp((char *)devname, IBNEX_IBPORT_CNAME, 6) != 0))) { - IBTF_DPRINTF_L4("ibnex", "\tbus_config: using mdi_devi_enter"); - use_mdi_devi_locking = 1; + /* Set use_mdi_devi_locking appropriately */ + if ((op != BUS_CONFIG_ONE) || (op == BUS_CONFIG_ONE && + strncmp((char *)devname, IBNEX_IBPORT_CNAME, 6) != 0)) { + IBTF_DPRINTF_L4("ibnex", + "\tbus_config: using mdi_devi_enter"); + use_mdi_devi_locking = 1; + } } if (use_mdi_devi_locking) @@ -1156,8 +1174,11 @@ ibnex_bus_config(dev_info_t *parent, uint_t flag, "\tbus_config: cname %s addr %s", cname, caddr); cdip = ndi_devi_findchild(parent, device_name1); + if (cdip) + node_data = ddi_get_parent_data(cdip); kmem_free(device_name1, len); - if (cdip == NULL) { + if (cdip == NULL || (node_data != NULL && + node_data->node_dip == NULL)) { /* Node is not present */ if (strncmp(cname, IBNEX_IOC_CNAME, 3) == 0) { if (use_mdi_devi_locking) @@ -1280,6 +1301,35 @@ ibnex_bus_config(dev_info_t *parent, uint_t flag, * ibdm and configure all children. */ if (parent == ibnex.ibnex_dip) { + ibdm_ioc_info_t *ioc_list; + + /* + * Optimize the calls for each BUS_CONFIG_ALL request + * to the IB Nexus dip. This is currently done for + * each PDIP. + */ + if (ibnex.ibnex_ioc_list) { + IBTF_DPRINTF_L4("ibnex", + "\tbus_config: freeing ioc_list %p", + ibnex.ibnex_ioc_list); + ibdm_ibnex_free_ioc_list(ibnex.ibnex_ioc_list); + mutex_enter(&ibnex.ibnex_mutex); + ibnex.ibnex_ioc_list = NULL; + mutex_exit(&ibnex.ibnex_mutex); + } + + /* Enumerate all the IOC's */ + ibdm_ibnex_port_settle_wait(0, + ibnex_port_settling_time); + + ioc_list = ibdm_ibnex_get_ioc_list( + IBDM_IBNEX_NORMAL_PROBE); + IBTF_DPRINTF_L4("ibnex", + "\tbus_config: alloc ioc_list %p", ioc_list); + mutex_enter(&ibnex.ibnex_mutex); + ibnex.ibnex_ioc_list = ioc_list; + mutex_exit(&ibnex.ibnex_mutex); + ret = mdi_vhci_bus_config(parent, flag, op, devname, child, NULL); return (ret); @@ -1317,7 +1367,6 @@ ibnex_bus_config(dev_info_t *parent, uint_t flag, return (ret); } - IBTF_DPRINTF_L2("ibnex", "\tbus_config: Failure End"); return (NDI_FAILURE); } @@ -1392,12 +1441,11 @@ ibnex_config_root_iocnode(dev_info_t *parent, char *device_name) return (IBNEX_FAILURE); } mutex_enter(&ibnex.ibnex_mutex); - if (ibnex_is_ioc_present(ioc_guid) == IBNEX_SUCCESS) { - IBTF_DPRINTF_L4("ibnex", "\tconfig_root_iocnode: IOC present"); - ret = IBNEX_SUCCESS; - } else - ret = ibnex_ioc_initnode(ioc_info, IBNEX_DEVFS_ENUMERATE, - parent); + if ((ret = ibnex_ioc_config_from_pdip(ioc_info, parent, 0)) != + IBNEX_SUCCESS) { + IBTF_DPRINTF_L2("ibnex", + "\tconfig_root_ioc_node failed for pdip %p", parent); + } mutex_exit(&ibnex.ibnex_mutex); ibdm_ibnex_free_ioc_list(ioc_info); return (ret); @@ -1437,8 +1485,7 @@ static void ibnex_config_all_children(dev_info_t *parent) { int ii; - time_t wait_time; - ibdm_ioc_info_t *ioc_list, *ioc; + ibdm_ioc_info_t *ioc_list; ibdm_hca_list_t *hca_list; ib_guid_t hca_guid; int circ; @@ -1453,11 +1500,7 @@ ibnex_config_all_children(dev_info_t *parent) */ ndi_devi_enter(parent, &circ); hca_guid = ibtl_ibnex_hcadip2guid(parent); - wait_time = ibdm_ibnex_get_waittime( - hca_guid, &ibnex_port_settling_time); - if (wait_time) { - delay(drv_usectohz(wait_time * 1000000)); - } + ibdm_ibnex_port_settle_wait(hca_guid, ibnex_port_settling_time); hca_list = ibdm_ibnex_get_hca_info_by_guid(hca_guid); if (hca_list == NULL) { ndi_devi_exit(parent, circ); @@ -1474,19 +1517,6 @@ ibnex_config_all_children(dev_info_t *parent) ndi_devi_exit(parent, circ); /* - * Check if ibtc_attach() is called and the phci is - * set up for this device before the IB nexus starts - * enumerating MPxIO clients. - * - * If an HCA is in maintenance mode, its phci is not set up - * but the driver is attached to update the firmware. In the - * case, do not configure the MPxIO clients. - */ - if (mdi_component_is_phci(parent, NULL) == MDI_FAILURE) { - return; - } - - /* * Use mdi_devi_enter() for locking. IB Nexus is * enumerating MPxIO clients. */ @@ -1494,23 +1524,12 @@ ibnex_config_all_children(dev_info_t *parent) ibnex_pseudo_initnodes(); - /* Enumerate all the IOC's */ - wait_time = ibdm_ibnex_get_waittime( - 0, &ibnex_port_settling_time); - if (wait_time) - delay(drv_usectohz(wait_time * 1000000)); - - ioc_list = ibdm_ibnex_get_ioc_list(IBDM_IBNEX_NORMAL_PROBE); - ioc = ioc_list; + ioc_list = ibnex.ibnex_ioc_list; mutex_enter(&ibnex.ibnex_mutex); while (ioc_list) { - if (ibnex_is_ioc_present( - ioc_list->ioc_profile.ioc_guid) != IBNEX_SUCCESS) { - (void) ibnex_ioc_initnode(ioc_list, - IBNEX_DEVFS_ENUMERATE, parent); - } + (void) ibnex_ioc_config_from_pdip(ioc_list, parent, 0); ioc_list = ioc_list->ioc_next; } @@ -1518,7 +1537,6 @@ ibnex_config_all_children(dev_info_t *parent) ibnex_config_pseudo_all(parent); mutex_exit(&ibnex.ibnex_mutex); - ibdm_ibnex_free_ioc_list(ioc); mdi_devi_exit(parent, circ); IBTF_DPRINTF_L4("ibnex", "\tconfig_all_children: End"); @@ -1583,7 +1601,7 @@ ibnex_create_vppa_nodes(dev_info_t *parent, ibdm_port_attr_t *port_attr) } rval = ibnex_get_dip_from_guid( port_attr->pa_port_guid, idx, pkey, &dip); - if (rval != IBNEX_SUCCESS) { + if ((rval != IBNEX_SUCCESS) || (dip == NULL)) { (void) ibnex_commsvc_initnode(parent, port_attr, idx, IBNEX_VPPA_COMMSVC_NODE, pkey, &rval, IBNEX_CFGADM_ENUMERATE); @@ -1623,34 +1641,6 @@ ibnex_create_hcasvc_nodes(dev_info_t *parent, ibdm_port_attr_t *port_attr) mutex_exit(&ibnex.ibnex_mutex); } -/* - * ibnex_is_ioc_present() - * Returns IBNEX_SUCCESS if an entry found in the global linked list - * Returns IBNEX_FAILURE, if no match found - */ -static int -ibnex_is_ioc_present(ib_guid_t ioc_guid) -{ - ibnex_node_data_t *head; - ibnex_ioc_node_t *ioc; - int ret = IBNEX_FAILURE; - - IBTF_DPRINTF_L4("ibnex", "\tis_ioc_present: Begin"); - ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex)); - - head = ibnex.ibnex_ioc_node_head; - while (head) { - ioc = &head->node_data.ioc_node; - if (ioc->ioc_guid == ioc_guid) - break; - head = head->node_next; - } - if (head) - ret = IBNEX_SUCCESS; - - return (ret); -} - /* * ibnex_bus_unconfig() @@ -1662,7 +1652,87 @@ static int ibnex_bus_unconfig(dev_info_t *parent, uint_t flag, ddi_bus_config_op_t op, void *device_name) { - return (ndi_busop_bus_unconfig(parent, flag, op, device_name)); + ibnex_node_data_t *ndp; + major_t major = (major_t)(uintptr_t)device_name; + dev_info_t *dip = NULL; + + if (ndi_busop_bus_unconfig(parent, flag, op, device_name) != + DDI_SUCCESS) + return (DDI_FAILURE); + + /* + * We can come into this routine with dip as ibnexus dip or hca dip. + * When the dip is that of ib nexus we need to clean up the IOC and + * pseudo nodes. When the dip is that of an HCA (not IB nexus dip) + * cleanup the port nodes. + */ + if ((op == BUS_UNCONFIG_ALL || op == BUS_UNCONFIG_DRIVER) && + (flag & (NDI_UNCONFIG | NDI_DETACH_DRIVER))) { + mutex_enter(&ibnex.ibnex_mutex); + if (parent != ibnex.ibnex_dip) { + if (major == -1) { + /* + * HCA dip. When major number is -1 HCA is + * going away cleanup all the port nodes. + */ + for (ndp = ibnex.ibnex_port_node_head; + ndp; ndp = ndp->node_next) { + ibnex_port_node_t *port_node; + + port_node = &ndp->node_data.port_node; + if (port_node->port_pdip == parent) { + port_node->port_pdip = NULL; + ndp->node_dip = NULL; + ndp->node_state = + IBNEX_CFGADM_UNCONFIGURED; + } + } + } else { + /* + * HCA dip. Cleanup only the port nodes that + * match the major number. + */ + for (ndp = ibnex.ibnex_port_node_head; + ndp; ndp = ndp->node_next) { + ibnex_port_node_t *port_node; + + port_node = &ndp->node_data.port_node; + dip = ndp->node_dip; + if (dip && (ddi_driver_major(dip) == + major) && port_node->port_pdip == + parent) { + port_node->port_pdip = NULL; + ndp->node_dip = NULL; + ndp->node_state = + IBNEX_CFGADM_UNCONFIGURED; + } + } + } + } else { + /* + * IB dip. here we handle IOC and pseudo nodes which + * are the children of IB nexus. Cleanup only the nodes + * with matching major number. We also need to cleanup + * the PathInfo links to the PHCI here. + */ + for (ndp = ibnex.ibnex_ioc_node_head; + ndp; ndp = ndp->node_next) { + dip = ndp->node_dip; + if (dip && (ddi_driver_major(dip) == major)) { + ibnex_offline_childdip(dip); + } + } + for (ndp = ibnex.ibnex_pseudo_node_head; + ndp; ndp = ndp->node_next) { + dip = ndp->node_dip; + if (dip && (ddi_driver_major(dip) == major)) { + ibnex_offline_childdip(dip); + } + } + } + mutex_exit(&ibnex.ibnex_mutex); + } + return (DDI_SUCCESS); } @@ -1683,7 +1753,6 @@ ibnex_config_port_node(dev_info_t *parent, char *devname) { int ii, index; int rval; - time_t wait_time; uint8_t port_num; ib_guid_t hca_guid, port_guid; ib_pkey_t pkey; @@ -1718,11 +1787,8 @@ ibnex_config_port_node(dev_info_t *parent, char *devname) } if (port_attr->pa_state != IBT_PORT_ACTIVE) { - wait_time = ibdm_ibnex_get_waittime( - hca_guid, &ibnex_port_settling_time); - if (wait_time) { - delay(drv_usectohz(wait_time * 1000000)); - } + ibdm_ibnex_port_settle_wait( + hca_guid, ibnex_port_settling_time); ibdm_ibnex_free_port_attr(port_attr); if ((port_attr = ibdm_ibnex_probe_hcaport( hca_guid, port_num)) == NULL) { @@ -2013,7 +2079,6 @@ static int ibnex_config_ioc_node(char *device_name, dev_info_t *pdip) { int ret; - time_t wait_time; ib_guid_t iou_guid, ioc_guid; ibdm_ioc_info_t *ioc_info; @@ -2024,9 +2089,7 @@ ibnex_config_ioc_node(char *device_name, dev_info_t *pdip) return (IBNEX_FAILURE); } - wait_time = ibdm_ibnex_get_waittime(0, &ibnex_port_settling_time); - if (wait_time) - delay(drv_usectohz(wait_time * 1000000)); + ibdm_ibnex_port_settle_wait(0, ibnex_port_settling_time); if ((ioc_info = ibdm_ibnex_probe_ioc(iou_guid, ioc_guid, 0)) == NULL) { @@ -2034,14 +2097,11 @@ ibnex_config_ioc_node(char *device_name, dev_info_t *pdip) return (IBNEX_FAILURE); } mutex_enter(&ibnex.ibnex_mutex); - if (ibnex_is_ioc_present(ioc_guid) == IBNEX_SUCCESS) { - IBTF_DPRINTF_L4("ibnex", "\tconfig_ioc_node: IOC present"); - ret = IBNEX_SUCCESS; - } else - ret = ibnex_ioc_initnode(ioc_info, IBNEX_DEVFS_ENUMERATE, - pdip); + ret = ibnex_ioc_config_from_pdip(ioc_info, pdip, 0); mutex_exit(&ibnex.ibnex_mutex); ibdm_ibnex_free_ioc_list(ioc_info); + IBTF_DPRINTF_L4("ibnex", "\tconfig_ioc_node: ret %x", + ret); return (ret); } @@ -2086,7 +2146,6 @@ ibnex_devname_to_node_n_ioc_guids( } -/*ARGSUSED*/ /* * ibnex_ioc_initnode() * Allocate a pathinfo node for the IOC @@ -2095,55 +2154,42 @@ ibnex_devname_to_node_n_ioc_guids( * Update IBnex global data * Returns IBNEX_SUCCESS/IBNEX_FAILURE/IBNEX_BUSY */ -int -ibnex_ioc_initnode(ibdm_ioc_info_t *ioc_info, int flag, dev_info_t *pdip) +static int +ibnex_ioc_initnode_pdip(ibnex_node_data_t *node_data, + ibdm_ioc_info_t *ioc_info, dev_info_t *pdip) { - int rval; - ibnex_node_data_t *node_data; + int rval, node_valid; + ibnex_node_state_t prev_state; ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex)); + ASSERT(node_data); - node_data = ibnex_is_node_data_present(IBNEX_IOC_NODE, - (void *)ioc_info, 0, 0); - - /* - * prevent any races - * we have seen this node_data and it has been initialized - * Note that node_dip is already NULL if unconfigure is in - * progress. - */ - if (node_data && node_data->node_dip) { - return ((node_data->node_state == IBNEX_CFGADM_CONFIGURING) ? - IBNEX_BUSY : IBNEX_SUCCESS); - } else if (node_data == NULL) { - node_data = ibnex_init_child_nodedata(IBNEX_IOC_NODE, - ioc_info, 0, 0); - } /* * Return EBUSY if another configure/unconfigure * operation is in progress */ if (node_data->node_state == IBNEX_CFGADM_UNCONFIGURING) { + IBTF_DPRINTF_L4("ibnex", + "\tioc_initnode_pdip : BUSY"); return (IBNEX_BUSY); } - ASSERT(node_data->node_state != IBNEX_CFGADM_CONFIGURED); + prev_state = node_data->node_state; node_data->node_state = IBNEX_CFGADM_CONFIGURING; - - mutex_exit(&ibnex.ibnex_mutex); - rval = ibnex_ioc_create_pi(ioc_info, node_data, pdip); + rval = ibnex_ioc_create_pi(ioc_info, node_data, pdip, &node_valid); mutex_enter(&ibnex.ibnex_mutex); if (rval == IBNEX_SUCCESS) node_data->node_state = IBNEX_CFGADM_CONFIGURED; + else if (node_valid) + node_data->node_state = prev_state; return (rval); } - /* * ibnex_config_pseudo_all() * Configure all the pseudo nodes @@ -2170,15 +2216,17 @@ ibnex_pseudo_config_one(ibnex_node_data_t *node_data, char *caddr, dev_info_t *pdip) { int rval; + ibnex_pseudo_node_t *pseudo; + ibnex_node_state_t prev_state; - IBTF_DPRINTF_L4("ibnex", "\tpseudo_config_one(%p, %p, %p):Begin", + IBTF_DPRINTF_L4("ibnex", "\tpseudo_config_one(%p, %p, %p)", node_data, caddr, pdip); ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex)); if (node_data == NULL) { - IBTF_DPRINTF_L4("ibnex", "\tpseudo_config_one: caddr = %s", - caddr); + IBTF_DPRINTF_L4("ibnex", + "\tpseudo_config_one: caddr = %s", caddr); /* * This function is now called with PHCI / HCA driver @@ -2193,68 +2241,71 @@ ibnex_pseudo_config_one(ibnex_node_data_t *node_data, char *caddr, (void *)caddr, 0, 0); } + if (node_data == NULL) { + IBTF_DPRINTF_L2("ibnex", + "\tpseudo_config_one: Invalid node"); + return (IBNEX_FAILURE); + } + + if (node_data->node_ap_state == IBNEX_NODE_AP_UNCONFIGURED) { + IBTF_DPRINTF_L4("ibnex", + "\tpseudo_config_one: Unconfigured node"); + return (IBNEX_FAILURE); + } + + pseudo = &node_data->node_data.pseudo_node; + /* * Do not enumerate nodes with ib-node-type set as "merge" */ - if (node_data && node_data->node_data.pseudo_node.pseudo_merge_node - == 1) { - IBTF_DPRINTF_L4("ibnex", "\tpseudo_config_one: merge_node"); + if (pseudo->pseudo_merge_node == 1) { + IBTF_DPRINTF_L4("ibnex", + "\tpseudo_config_one: merge_node"); return (IBNEX_FAILURE); } /* - * prevent any races - * we have seen this node_data and it has been initialized - * Note that node_dip is already NULL if unconfigure is in - * progress. + * Check if a PI has already been created for the PDIP. + * If so, return SUCCESS. */ - if (node_data && node_data->node_dip) { - return ((node_data->node_state == IBNEX_CFGADM_CONFIGURING) ? - IBNEX_BUSY : IBNEX_SUCCESS); - } else if (node_data == NULL) { - IBTF_DPRINTF_L2("ibnex", "\tpseudo_config_one: Invalid node"); - return (IBNEX_FAILURE); + if (node_data->node_dip != NULL && mdi_pi_find(pdip, + pseudo->pseudo_node_addr, pseudo->pseudo_node_addr) != NULL) { + IBTF_DPRINTF_L4("ibnex", + "\tpseudo_config_one: PI created," + " pdip %p, addr %s", pdip, pseudo->pseudo_node_addr); + return (IBNEX_SUCCESS); } /* - * Return EBUSY if another configure/unconfigure + * Return EBUSY if another unconfigure * operation is in progress */ if (node_data->node_state == IBNEX_CFGADM_UNCONFIGURING) { + IBTF_DPRINTF_L4("ibnex", + "\tpseudo_config_one: BUSY"); return (IBNEX_BUSY); } - if (node_data->node_state == IBNEX_CFGADM_CONFIGURED) - return (IBNEX_SUCCESS); - - /* - * Prevent configuring pseudo nodes specifically unconfigured - * by cfgadm. This is done by checking if this is a newly - * created node, not yet configured by BUS_CONFIG or cfgadm - */ - if (node_data->node_data.pseudo_node.pseudo_new_node != 1) - return (IBNEX_FAILURE); - node_data->node_data.pseudo_node.pseudo_new_node = 0; + prev_state = node_data->node_state; node_data->node_state = IBNEX_CFGADM_CONFIGURING; mutex_exit(&ibnex.ibnex_mutex); - rval = ibnex_pseudo_create_pi(node_data, pdip); + rval = ibnex_pseudo_create_pi_pdip(node_data, pdip); mutex_enter(&ibnex.ibnex_mutex); - if (rval == IBNEX_SUCCESS) + if (rval == IBNEX_SUCCESS) { node_data->node_state = IBNEX_CFGADM_CONFIGURED; - else { - node_data->node_dip = NULL; - node_data->node_state = IBNEX_CFGADM_UNCONFIGURED; - node_data->node_data.pseudo_node.pseudo_new_node = 1; + } else { + node_data->node_state = prev_state; } + IBTF_DPRINTF_L4("ibnex", "\tpseudo_config_one: ret %x", + rval); return (rval); } - /* * ibnex_pseudo_mdi_config_one() * This is similar to ibnex_pseudo_config_one. Few @@ -2302,161 +2353,174 @@ ibnex_pseudo_mdi_config_one(int flag, void *devname, dev_info_t **child, return (rval); } + /* - * ibnex_pseudo_create_pi() - * Create a path info node for each pseudo entry + * ibnex_pseudo_create_all_pi() + * Create all path infos node for a pseudo entry */ int -ibnex_pseudo_create_pi(ibnex_node_data_t *nodep, dev_info_t *parent) +ibnex_pseudo_create_all_pi(ibnex_node_data_t *nodep) { - mdi_pathinfo_t *pip; - int rval, hcacnt; - dev_info_t *hca_dip, *cdip = NULL; - ibdm_hca_list_t *hca_list, *head; - ibnex_pseudo_node_t *pseudo; - - IBTF_DPRINTF_L4("ibnex", "\tibnex_pseudo_create_pi: %p", nodep); - - pseudo = &nodep->node_data.pseudo_node; - + int hcacnt, rc; + int hcafailcnt = 0; + dev_info_t *hca_dip; + ibdm_hca_list_t *hca_list, *head; + IBTF_DPRINTF_L4("ibnex", "\tpseudo_create_all_pi(%p)", + nodep); ibdm_ibnex_get_hca_list(&hca_list, &hcacnt); head = hca_list; + /* + * We return failure even if we fail for all HCAs. + */ for (; hca_list != NULL; hca_list = hca_list->hl_next) { - hca_dip = ibtl_ibnex_hcaguid2dip(hca_list->hl_hca_guid); + rc = ibnex_pseudo_create_pi_pdip(nodep, hca_dip); + if (rc != IBNEX_SUCCESS) + hcafailcnt++; + } + if (head) + ibdm_ibnex_free_hca_list(head); - /* - * For CONFIG_ONE requests through HCA dip, alloc - * for HCA dip driving BUS_CONFIG request. - */ - if (parent != NULL && hca_dip != parent) - continue; + if (hcafailcnt == hcacnt) + rc = IBNEX_FAILURE; + else + rc = IBNEX_SUCCESS; - rval = mdi_pi_alloc(hca_dip, - pseudo->pseudo_devi_name, pseudo->pseudo_node_addr, - pseudo->pseudo_node_addr, 0, &pip); + IBTF_DPRINTF_L4("ibnex", "\tpseudo_create_all_pi rc %x", + rc); + return (rc); +} - if (rval != MDI_SUCCESS) { - (void) ibnex_offline_childdip(cdip); - return (IBNEX_FAILURE); - } - cdip = mdi_pi_get_client(pip); +static int +ibnex_pseudo_create_pi_pdip(ibnex_node_data_t *nodep, dev_info_t *hca_dip) +{ + mdi_pathinfo_t *pip; + int rval; + dev_info_t *cdip = NULL; + ibnex_pseudo_node_t *pseudo; + int first_pi = 0; + + IBTF_DPRINTF_L4("ibnex", "\tpseudo_create_pi_pdip: %p, %p", + nodep, hca_dip); + + pseudo = &nodep->node_data.pseudo_node; + + rval = mdi_pi_alloc(hca_dip, + pseudo->pseudo_devi_name, pseudo->pseudo_node_addr, + pseudo->pseudo_node_addr, 0, &pip); + if (rval != MDI_SUCCESS) { + IBTF_DPRINTF_L2("ibnex", "\tpseudo_create_pi_pdip:" + " mdi_pi_alloc failed"); + return (IBNEX_FAILURE); + } + cdip = mdi_pi_get_client(pip); + + if (nodep->node_dip == NULL) { IBTF_DPRINTF_L4("ibnex", "\tpseudo_create_pi: New dip %p", cdip); + first_pi = 1; nodep->node_dip = cdip; ddi_set_parent_data(cdip, nodep); + } - rval = mdi_pi_online(pip, 0); + rval = mdi_pi_online(pip, 0); - if (rval != MDI_SUCCESS) { + if (rval != MDI_SUCCESS) { + IBTF_DPRINTF_L2("ibnex", + "\tpseudo_create_pi: " + "mdi_pi_online: failed for pseudo dip %p," + " rval %d", cdip, rval); + rval = IBNEX_FAILURE; + if (first_pi == 1) { ddi_set_parent_data(cdip, NULL); - IBTF_DPRINTF_L2("ibnex", "\tpseudo_create_pi:" - "mdi_pi_online: failed for pseudo dip %p," - " rval %d", cdip, rval); (void) ibnex_offline_childdip(cdip); - rval = IBNEX_FAILURE; - break; + nodep->node_dip = NULL; } else - rval = IBNEX_SUCCESS; - - if (parent != NULL && hca_dip != parent) - break; - } - if (head) - ibdm_ibnex_free_hca_list(head); + (void) mdi_pi_free(pip, 0); + } else + rval = IBNEX_SUCCESS; return (rval); } - /* * ibnex_ioc_create_pi() * Create a pathinfo node for the ioc node */ static int ibnex_ioc_create_pi(ibdm_ioc_info_t *ioc_info, ibnex_node_data_t *node_data, - dev_info_t *pdip) + dev_info_t *pdip, int *node_valid) { - char ioc_guid[33], phci_guid[66]; mdi_pathinfo_t *pip; int rval = DDI_FAILURE; - dev_info_t *hca_dip, *cdip = NULL; - int flag = 1; - ibdm_hca_list_t *hca_list; - - IBTF_DPRINTF_L4("ibnex", "\tibnex_ioc_create_pi Begin"); + dev_info_t *cdip = NULL; + int create_prop = 0; + ibnex_ioc_node_t *ioc = &node_data->node_data.ioc_node; - (void) snprintf(ioc_guid, 33, "%llX", - (longlong_t)ioc_info->ioc_profile.ioc_guid); - (void) snprintf(phci_guid, 66, "%llX,%llX", - (longlong_t)ioc_info->ioc_profile.ioc_guid, - (longlong_t)ioc_info->ioc_iou_guid); - - hca_list = ioc_info->ioc_hca_list; - - for (; hca_list != NULL; hca_list = hca_list->hl_next) { - - hca_dip = ibtl_ibnex_hcaguid2dip(hca_list->hl_hca_guid); - - /* - * For CONFIG_ONE requests through HCA dip, alloc - * for HCA dip driving BUS_CONFIG request. - */ - if (pdip != NULL && hca_dip != pdip) - continue; - - IBTF_DPRINTF_L4("ibnex", "\tioc_create_pi " - "hca guid %llX", hca_list->hl_hca_guid); + IBTF_DPRINTF_L4("ibnex", + "\tibnex_ioc_create_pi(%p, %p, %p)", ioc_info, node_data, pdip); + *node_valid = 1; - rval = mdi_pi_alloc(hca_dip, - IBNEX_IOC_CNAME, ioc_guid, phci_guid, 0, &pip); - if (rval != MDI_SUCCESS) { - (void) ibnex_offline_childdip(cdip); - return (IBNEX_FAILURE); - } - cdip = mdi_pi_get_client(pip); + /* + * For CONFIG_ONE requests through HCA dip, alloc + * for HCA dip driving BUS_CONFIG request. + */ + rval = mdi_pi_alloc(pdip, IBNEX_IOC_CNAME, ioc->ioc_guid_str, + ioc->ioc_phci_guid, 0, &pip); + if (rval != MDI_SUCCESS) { + IBTF_DPRINTF_L2("ibnex", + "\tioc_create_pi: mdi_pi_alloc(%p, %s. %s) failed", + pdip, ioc->ioc_guid_str, ioc->ioc_phci_guid); + return (IBNEX_FAILURE); + } + cdip = mdi_pi_get_client(pip); - IBTF_DPRINTF_L4("ibnex", - "\tioc_create_pi: New IOC dip %p", cdip); + IBTF_DPRINTF_L4("ibnex", "\tioc_create_pi: IOC dip %p", + cdip); + if (node_data->node_dip == NULL) { node_data->node_dip = cdip; ddi_set_parent_data(cdip, node_data); + create_prop = 1; + IBTF_DPRINTF_L4("ibnex", + "\tioc_create_pi: creating prop"); + if ((rval = ibnex_create_ioc_node_prop( + ioc_info, cdip)) != IBNEX_SUCCESS) { + IBTF_DPRINTF_L4("ibnex", + "\tioc_create_pi: creating prop failed"); + ibnex_delete_ioc_node_data(node_data); + *node_valid = 0; + ddi_prop_remove_all(cdip); + ddi_set_parent_data(cdip, NULL); - if (flag) { - if ((rval = ibnex_create_ioc_node_prop( - ioc_info, cdip)) != IBNEX_SUCCESS) { - ibnex_delete_ioc_node_data(node_data); - ddi_prop_remove_all(cdip); - ddi_set_parent_data(cdip, NULL); - - (void) ibnex_offline_childdip(cdip); - return (IBNEX_FAILURE); - } - flag = 0; + (void) ibnex_offline_childdip(cdip); + return (IBNEX_FAILURE); } + } - rval = mdi_pi_online(pip, 0); + rval = mdi_pi_online(pip, 0); - if (rval != MDI_SUCCESS) { - ibnex_delete_ioc_node_data(node_data); - ddi_prop_remove_all(cdip); + if (rval != MDI_SUCCESS) { + IBTF_DPRINTF_L2("ibnex", "\tioc_create_pi: " + "mdi_pi_online() failed ioc dip %p, rval %d", + cdip, rval); + rval = IBNEX_FAILURE; + if (create_prop) { ddi_set_parent_data(cdip, NULL); - IBTF_DPRINTF_L2("ibnex", "\tioc_create_pi: " - "mdi_pi_online() failed ioc dip %p, rval %d", - cdip, rval); + ddi_prop_remove_all(cdip); + ibnex_delete_ioc_node_data(node_data); + *node_valid = 0; (void) ibnex_offline_childdip(cdip); - rval = IBNEX_FAILURE; - break; } else - rval = IBNEX_SUCCESS; + (void) mdi_pi_free(pip, 0); + } else + rval = IBNEX_SUCCESS; - if (pdip != NULL && hca_dip != pdip) - break; - } + IBTF_DPRINTF_L4("ibnex", "\tioc_create_pi ret %x", rval); return (rval); } @@ -2985,10 +3049,17 @@ ibnex_comm_svc_init(char *property, ibnex_node_type_t type) int j; len = strlen(servicep[count]); - if (len == 0 || len > 4) { + /* + * ib.conf has NULL strings for port-svc-list & + * hca-svc-list, by default. Do not have L2 message + * for these. + */ + if (len == 1 || len > 4) { IBTF_DPRINTF_L2("ibnex", "\tcomm_svc_init : " - "Service name %s invalid : length %d", - servicep[count], len); + "Service name %s for property %s invalid : " + "length %d", servicep[count], property, len); + continue; + } else if (len == 0) { continue; } if (ibnex_unique_svcname(servicep[count]) != IBNEX_SUCCESS) { @@ -3115,6 +3186,8 @@ ibnex_commsvc_initnode(dev_info_t *parent, ibdm_port_attr_t *port_attr, char *svcname; dev_info_t *cdip; ibnex_node_data_t *node_data; + ibnex_port_node_t *port_node; + char devname[MAXNAMELEN]; ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex)); @@ -3128,6 +3201,17 @@ ibnex_commsvc_initnode(dev_info_t *parent, ibdm_port_attr_t *port_attr, */ node_data = ibnex_is_node_data_present(node_type, (void *)port_attr, index, pkey); + + /* + * If this node has been explicity unconfigured by cfgadm, then it can + * be configured back again only by cfgadm configure. + */ + if (node_data && (node_data->node_ap_state == + IBNEX_NODE_AP_UNCONFIGURED)) { + *rval = IBNEX_FAILURE; + return (NULL); + } + if (node_data && node_data->node_dip) { /* * Return NULL if another configure @@ -3143,6 +3227,7 @@ ibnex_commsvc_initnode(dev_info_t *parent, ibdm_port_attr_t *port_attr, /* allocate a new ibnex_node_data_t */ node_data = ibnex_init_child_nodedata(node_type, port_attr, index, pkey); + node_data->node_data.port_node.port_pdip = parent; } /* @@ -3156,35 +3241,54 @@ ibnex_commsvc_initnode(dev_info_t *parent, ibdm_port_attr_t *port_attr, ASSERT(node_data->node_state != IBNEX_CFGADM_CONFIGURED); node_data->node_state = IBNEX_CFGADM_CONFIGURING; - ndi_devi_alloc_sleep(parent, - IBNEX_IBPORT_CNAME, (pnode_t)DEVI_SID_NODEID, &cdip); - - node_data->node_dip = cdip; - ddi_set_parent_data(cdip, node_data); - mutex_exit(&ibnex.ibnex_mutex); - switch (node_type) { case IBNEX_VPPA_COMMSVC_NODE : svcname = ibnex.ibnex_vppa_comm_svc_names[index]; + port_node = &node_data->node_data.port_node; + (void) snprintf(devname, MAXNAMELEN, "%s@%x,%x,%s", + IBNEX_IBPORT_CNAME, port_node->port_num, + port_node->port_pkey, svcname); break; case IBNEX_HCASVC_COMMSVC_NODE : svcname = ibnex.ibnex_hcasvc_comm_svc_names[index]; + port_node = &node_data->node_data.port_node; + (void) snprintf(devname, MAXNAMELEN, "%s@%x,0,%s", + IBNEX_IBPORT_CNAME, port_node->port_num, svcname); break; case IBNEX_PORT_COMMSVC_NODE : svcname = ibnex.ibnex_comm_svc_names[index]; + port_node = &node_data->node_data.port_node; + (void) snprintf(devname, MAXNAMELEN, "%s@%x,0,%s", + IBNEX_IBPORT_CNAME, port_node->port_num, svcname); break; default : IBTF_DPRINTF_L2("ibnex", "\tcommsvc_initnode:" "\tInvalid Node type"); *rval = IBNEX_FAILURE; + mutex_exit(&ibnex.ibnex_mutex); ibnex_delete_port_node_data(node_data); - ddi_prop_remove_all(cdip); - ddi_set_parent_data(cdip, NULL); - (void) ndi_devi_free(cdip); mutex_enter(&ibnex.ibnex_mutex); return (NULL); } + if ((cdip = ndi_devi_findchild(parent, devname)) != NULL) { + if (i_ddi_devi_attached(cdip)) { + node_data->node_dip = cdip; + node_data->node_data.port_node.port_pdip = parent; + node_data->node_state = IBNEX_CFGADM_CONFIGURED; + ddi_set_parent_data(cdip, node_data); + return (cdip); + } + } else { + ndi_devi_alloc_sleep(parent, + IBNEX_IBPORT_CNAME, (pnode_t)DEVI_SID_NODEID, &cdip); + } + + node_data->node_dip = cdip; + ddi_set_parent_data(cdip, node_data); + mutex_exit(&ibnex.ibnex_mutex); + + if (ibnex_create_port_node_prop(port_attr, cdip, svcname, pkey) == IBNEX_SUCCESS) { if (flag == IBNEX_DEVFS_ENUMERATE) @@ -3194,13 +3298,13 @@ ibnex_commsvc_initnode(dev_info_t *parent, ibdm_port_attr_t *port_attr, if (ret == NDI_SUCCESS) { mutex_enter(&ibnex.ibnex_mutex); node_data->node_state = IBNEX_CFGADM_CONFIGURED; + node_data->node_data.port_node.port_pdip = parent; return (cdip); } } + *rval = IBNEX_FAILURE; ibnex_delete_port_node_data(node_data); - ddi_prop_remove_all(cdip); - ddi_set_parent_data(cdip, NULL); (void) ndi_devi_free(cdip); mutex_enter(&ibnex.ibnex_mutex); IBTF_DPRINTF_L4("ibnex", "\tcommsvc_initnode: failure exit"); @@ -3614,9 +3718,6 @@ ibnex_pseudo_initnodes() if (node_type && strcmp(node_type, "merge") == 0) nodep->node_data.pseudo_node.pseudo_merge_node = 1; - /* Mark this as a new psuedo node */ - nodep->node_data.pseudo_node.pseudo_new_node = 1; - IBTF_DPRINTF_L3("ibnex", "\tpseudo_initnodes: unit addr = %s" " : drv name = %s", unit_addr, spec->hwc_devi_name); } @@ -3645,6 +3746,7 @@ ibnex_init_child_nodedata(ibnex_node_type_t node_type, void *attr, int index, ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex)); node_data = kmem_zalloc(sizeof (ibnex_node_data_t), KM_SLEEP); + node_data->node_ap_state = IBNEX_NODE_AP_CONFIGURED; node_data->node_state = IBNEX_CFGADM_CONFIGURING; node_data->node_type = node_type; @@ -4028,29 +4130,72 @@ ibnex_handle_reprobe_dev(void *arg) */ /*ARGSUSED*/ static int -ib_vhci_pi_init(dev_info_t *dip, mdi_pathinfo_t *pip, int flag) +ib_vhci_pi_init(dev_info_t *vdip, mdi_pathinfo_t *pip, int flag) { - IBTF_DPRINTF_L4("ibnex", "\tpi_init: dip %p pip %p", dip, pip); + IBTF_DPRINTF_L4("ibnex", "\tpi_init: dip %p pip %p", vdip, pip); return (MDI_SUCCESS); } /*ARGSUSED*/ static int -ib_vhci_pi_uninit(dev_info_t *dip, mdi_pathinfo_t *pip, int flag) +ib_vhci_pi_uninit(dev_info_t *vdip, mdi_pathinfo_t *pip, int flag) { - IBTF_DPRINTF_L4("ibnex", "\tpi_uninit: dip %p pip %p", dip, pip); + dev_info_t *cdip; + ibnex_node_data_t *node_data; + int clnt_num_pi; + IBTF_DPRINTF_L4("ibnex", "\tpi_uninit: dip %p pip %p", vdip, pip); + + if (pip == NULL) + return (MDI_FAILURE); + /* + * Get the Client dev_info from the pathinfo. + */ + cdip = mdi_pi_get_client(pip); + if (cdip == NULL) + return (MDI_FAILURE); + + /* + * How many PIs do we have from this cdip ? + */ + clnt_num_pi = mdi_client_get_path_count(cdip); + + /* + * If this is the last PI that is being free'd ( called from + * mdi_pi_free) we have to clean up the node data for the cdip since + * the client would have been detached by mdi_devi_offline. + */ + if (clnt_num_pi == 1) { + for (node_data = ibnex.ibnex_ioc_node_head; + node_data; node_data = node_data->node_next) { + if (node_data->node_dip == cdip) { + node_data->node_dip = NULL; + node_data->node_state = + IBNEX_CFGADM_UNCONFIGURED; + return (MDI_SUCCESS); + } + } + for (node_data = ibnex.ibnex_pseudo_node_head; + node_data; node_data = node_data->node_next) { + if (node_data->node_dip == cdip) { + node_data->node_dip = NULL; + node_data->node_state = + IBNEX_CFGADM_UNCONFIGURED; + return (MDI_SUCCESS); + } + } + } return (MDI_SUCCESS); } /*ARGSUSED*/ static int -ib_vhci_pi_state_change(dev_info_t *dip, mdi_pathinfo_t *pip, +ib_vhci_pi_state_change(dev_info_t *vdip, mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, uint32_t arg1, int arg2) { IBTF_DPRINTF_L4("ibnex", - "\tpi_state_change: dip %p pip %p state %x", dip, pip, state); + "\tpi_state_change: dip %p pip %p state %x", vdip, pip, state); return (MDI_SUCCESS); } @@ -4135,18 +4280,6 @@ ibnex_ioc_bus_config_one(dev_info_t **pdipp, uint_t flag, if (ret == MDI_SUCCESS) *need_bus_config = 0; } else { - /* - * Check if ibtc_attach() is called and the phci is - * set up for this device. - * - * If an HCA is in maintenance mode, its phci is not set up - * but the driver is attached to update the firmware. In the - * case, do not configure the ioc node because the IB does not - * work properly. - */ - if (mdi_component_is_phci(pdip, NULL) == MDI_FAILURE) { - return (IBNEX_FAILURE); - } mdi_devi_enter(pdip, &circ); if (strstr((char *)devname, ":port=") != NULL) { ret = ibnex_config_root_iocnode(pdip, devname); @@ -4206,3 +4339,133 @@ ibnex_hw_in_dev_tree(char *driver_name) if (devnamesp[major].dn_head != (dev_info_t *)NULL) ibnex_hw_status = IBNEX_HW_IN_DEVTREE; } + +int +ibnex_ioc_initnode_all_pi(ibdm_ioc_info_t *ioc_info) +{ + ibdm_hca_list_t *hca_list; + dev_info_t *hca_dip; + int rc = IBNEX_FAILURE; + + ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex)); + /* + * We return failure even if we fail for all HCAs + */ + for (hca_list = ioc_info->ioc_hca_list; hca_list; + hca_list = hca_list->hl_next) { + hca_dip = ibtl_ibnex_hcaguid2dip(hca_list->hl_hca_guid); + if (ibnex_ioc_config_from_pdip(ioc_info, hca_dip, 1) == + IBNEX_SUCCESS) + rc = IBNEX_SUCCESS; + } + return (rc); +} + +static int +ibnex_ioc_config_from_pdip(ibdm_ioc_info_t *ioc_info, dev_info_t *pdip, + int pdip_reachable_checked) +{ + ibnex_node_data_t *node_data; + int create_pdip = 0; + int rc = IBNEX_SUCCESS; + + + ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex)); + IBTF_DPRINTF_L4("ibnex", + "/tioc_config_from_pdip(%p, %p, %d)", ioc_info, pdip, + pdip_reachable_checked); + + if (pdip_reachable_checked == 0) { + if (ibnex_ioc_pi_reachable(ioc_info, pdip) == IBNEX_FAILURE) { + IBTF_DPRINTF_L4("ibnex", + "/tioc_config_from_pdip: ioc %p not reachable" + "from %p", ioc_info, pdip); + return (IBNEX_FAILURE); + } + } + + node_data = ibnex_is_node_data_present(IBNEX_IOC_NODE, + (void *)ioc_info, 0, 0); + + if (node_data && node_data->node_ap_state == + IBNEX_NODE_AP_UNCONFIGURED) { + IBTF_DPRINTF_L4("ibnex", + "\tioc_config_from_pdip: Unconfigured node"); + return (IBNEX_FAILURE); + } + + + if (node_data == NULL) { + ibnex_ioc_node_t *ioc; + + create_pdip = 1; + + node_data = ibnex_init_child_nodedata(IBNEX_IOC_NODE, + ioc_info, 0, 0); + ASSERT(node_data); + ioc = &node_data->node_data.ioc_node; + (void) snprintf(ioc->ioc_guid_str, IBNEX_IOC_GUID_LEN, + "%llX", + (longlong_t)ioc_info->ioc_profile.ioc_guid); + (void) snprintf(ioc->ioc_phci_guid, IBNEX_PHCI_GUID_LEN, + "%llX,%llX", + (longlong_t)ioc_info->ioc_profile.ioc_guid, + (longlong_t)ioc_info->ioc_iou_guid); + } else if (ibnex_ioc_pi_exists(node_data, pdip) == IBNEX_FAILURE) { + create_pdip = 1; + } + + if (create_pdip) { + rc = ibnex_ioc_initnode_pdip(node_data, ioc_info, pdip); + } + + IBTF_DPRINTF_L4("ibnex", "\tioc_config_from_pdip ret %x", + rc); + return (rc); +} + +/* + * This function checks if a pathinfo has already been created + * for the HCA parent. The function returns SUCCESS if a pathinfo + * has already been created, FAILURE if not. + */ +static int +ibnex_ioc_pi_exists(ibnex_node_data_t *node_data, dev_info_t *parent) +{ + int rc; + ibnex_ioc_node_t *ioc; + + ioc = &node_data->node_data.ioc_node; + if (mdi_pi_find(parent, (char *)ioc->ioc_guid_str, + (char *)ioc->ioc_phci_guid) != NULL) + rc = IBNEX_SUCCESS; + else + rc = IBNEX_FAILURE; + + IBTF_DPRINTF_L4("ibnex", "\tioc_pi_created- client_guid %s, " + "phci_guid %s, parent %p, rc %x", + ioc->ioc_guid_str, ioc->ioc_phci_guid, parent, rc); + return (rc); +} + +static int +ibnex_ioc_pi_reachable(ibdm_ioc_info_t *ioc_info, dev_info_t *pdip) +{ + ibdm_hca_list_t *hca_list; + dev_info_t *hca_dip; + + IBTF_DPRINTF_L4("ibnex", "\tioc_pi_reachable(%p, %p)", + ioc_info, pdip); + for (hca_list = ioc_info->ioc_hca_list; hca_list; + hca_list = hca_list->hl_next) { + hca_dip = ibtl_ibnex_hcaguid2dip(hca_list->hl_hca_guid); + if (hca_dip == pdip) { + IBTF_DPRINTF_L4("ibnex", + "\tioc_pi_reachable FAILURE"); + return (IBNEX_SUCCESS); + } + } + + IBTF_DPRINTF_L4("ibnex", "\tioc_pi_reachable FAILURE"); + return (IBNEX_FAILURE); +} diff --git a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c index 605b901a0e..c685266992 100644 --- a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c +++ b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,9 +27,6 @@ * This file contains support required for IB cfgadm plugin. */ -#pragma ident "%Z%%M% %I% %E% SMI" - - #include <sys/conf.h> #include <sys/stat.h> #include <sys/modctl.h> @@ -79,8 +76,7 @@ static ibnex_rval_t ibnex_commsvc_fininode(dev_info_t *); static ibnex_rval_t ibnex_pseudo_fininode(dev_info_t *); extern uint64_t ibnex_str2hex(char *, int, int *); -extern int ibnex_ioc_initnode(ibdm_ioc_info_t *, int, - dev_info_t *); +extern int ibnex_ioc_initnode_all_pi(ibdm_ioc_info_t *); extern dev_info_t *ibnex_commsvc_initnode(dev_info_t *, ibdm_port_attr_t *, int, int, ib_pkey_t, int *, int); @@ -88,8 +84,7 @@ extern int ibnex_get_dip_from_guid(ib_guid_t, int, ib_pkey_t, dev_info_t **); extern void ibnex_reprobe_ioc_dev(void *arg); extern void ibnex_reprobe_ioc_all(); -extern int ibnex_pseudo_create_pi(ibnex_node_data_t *, - dev_info_t *); +extern int ibnex_pseudo_create_all_pi(ibnex_node_data_t *); extern void ibnex_pseudo_initnodes(void); extern ibnex_t ibnex; @@ -894,8 +889,10 @@ ibnex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, if (ret_val != IBNEX_SUCCESS) { nodep->node_dip = apid_dip; nodep->node_state = IBNEX_CFGADM_CONFIGURED; - } else + } else { nodep->node_state = IBNEX_CFGADM_UNCONFIGURED; + nodep->node_ap_state = IBNEX_NODE_AP_UNCONFIGURED; + } rv = (ret_val != IBNEX_SUCCESS) ? EIO : 0; ndi_devi_exit(pdip, circ); @@ -928,6 +925,35 @@ ibnex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, } /* + * Let's get the node if it already exists. + * NOTE: ibnex_get_dip_from_apid() finds a valid dip + * and also does a ndi_devi_hold() on the child. + */ + nodep = NULL; + ret_val = ibnex_get_dip_from_apid(apid_n, &apid_dip, &nodep); + /* + * We need the node_data but not the dip. If we get a dip for + * this apid, it means it's already configured. We need to + * return. + */ + if (apid_dip != NULL) { + ndi_rele_devi(apid_dip); + ndi_devi_exit(ibnex.ibnex_dip, circ); + rv = 0; + break; + } + + /* + * A node exits for this apid but not a dip. So we must have + * unconfigured it earlier. Set the node_ap_state to configuring + * to allow configure operation. + */ + if (nodep != NULL) { + nodep->node_ap_state = IBNEX_NODE_AP_CONFIGURING; + } + + + /* * Five types of APIDs are supported: * o HCA_GUID,0,service-name (HCA-SVC device) * o IOC_GUID (IOC device) @@ -954,8 +980,25 @@ ibnex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, } } /* end of else */ - if (ret_val != IBNEX_SUCCESS) + if (ret_val != IBNEX_SUCCESS) { rv = (ret_val == IBNEX_BUSY) ? EBUSY : EIO; + } else { + /* + * Get the newly created node and set the state to + * IBNEX_NODE_AP_CONFIGURED. + * NOTE: ibnex_get_dip_from_apid() finds a valid dip + * and also does a ndi_devi_hold() on the child. + */ + if (!nodep) + ret_val = ibnex_get_dip_from_apid(apid_n, + &apid_dip, &nodep); + if (nodep != NULL) { + nodep->node_ap_state = IBNEX_NODE_AP_CONFIGURED; + } + if (apid_dip != NULL) { + ndi_rele_devi(apid_dip); + } + } IBTF_DPRINTF_L2("ibnex", "%s: DONE !! It %s", msg, rv ? "failed" : "succeeded"); ndi_devi_exit(ibnex.ibnex_dip, circ); @@ -1011,7 +1054,8 @@ ibnex_get_num_devices(void) continue; for (l = 0; l < ibnex.ibnex_nvppa_comm_svcs; - l++, ++num_nodes); + l++, ++num_nodes) + ; } /* end of pa_npkeys */ } /* end of hl_nports */ } /* end of hca_list != NULL */ @@ -1770,7 +1814,7 @@ ibnex_get_dip_from_apid(char *apid, dev_info_t **ret_dip, ibnex.ibnex_nhcasvc_comm_svcs) && (strstr(svc_str, ibnex. ibnex_hcasvc_comm_svc_names[index]) - != NULL)))) || + != NULL)))) || /* next the VPPA strings */ ((pkey_val != 0) && (strstr(svc_str, ibnex. ibnex_vppa_comm_svc_names[index]) != @@ -1883,10 +1927,9 @@ ibnex_handle_pseudo_configure(char *apid) ASSERT(nodep->node_state != IBNEX_CFGADM_CONFIGURED); nodep->node_state = IBNEX_CFGADM_CONFIGURING; - nodep->node_data.pseudo_node.pseudo_new_node = 0; mutex_exit(&ibnex.ibnex_mutex); - retval = ibnex_pseudo_create_pi(nodep, NULL); + retval = ibnex_pseudo_create_all_pi(nodep); mutex_enter(&ibnex.ibnex_mutex); if (retval == NDI_SUCCESS) { nodep->node_state = IBNEX_CFGADM_CONFIGURED; @@ -1945,7 +1988,7 @@ ibnex_handle_ioc_configure(char *apid) return (retval); } - retval = ibnex_ioc_initnode(ioc_info, IBNEX_CFGADM_ENUMERATE, NULL); + retval = ibnex_ioc_initnode_all_pi(ioc_info); ibdm_ibnex_free_ioc_list(ioc_info); IBTF_DPRINTF_L4("ibnex", "\tibnex_handle_ioc_configure: " @@ -1963,7 +2006,7 @@ ibnex_handle_ioc_configure(char *apid) static ibnex_rval_t ibnex_handle_commsvcnode_configure(char *apid) { - int ret, str_len; + int ret, str_len, circ; int sndx; int port_pkey = 0; char *pkey_str = strchr(apid, ','); @@ -2106,12 +2149,14 @@ ibnex_handle_commsvcnode_configure(char *apid) node_type = IBNEX_VPPA_COMMSVC_NODE; mutex_enter(&ibnex.ibnex_mutex); + ndi_devi_enter(parent, &circ); if (ibnex_commsvc_initnode(parent, port_attr, sndx, node_type, port_pkey, &ret, IBNEX_CFGADM_ENUMERATE) != NULL) { retval = IBNEX_SUCCESS; } else { retval = (ret == IBNEX_BUSY) ? IBNEX_BUSY : IBNEX_FAILURE; } + ndi_devi_exit(parent, circ); if (is_hcasvc_node == B_FALSE) ibdm_ibnex_free_port_attr(port_attr); diff --git a/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c b/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c index 95f6352eea..130a0012a3 100644 --- a/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c @@ -1398,17 +1398,16 @@ ibtl_announce_new_hca(ibtl_hca_devinfo_t *hca_devp) clntp->clnt_async_cnt++; hca_devp->hd_async_task_cnt++; - (void) taskq_dispatch(ibtl_async_taskq, - ibtl_tell_client_about_new_hca, new_hcap, - TQ_SLEEP); + mutex_exit(&ibtl_clnt_list_mutex); + (void) ibtl_tell_client_about_new_hca( + new_hcap); + mutex_enter(&ibtl_clnt_list_mutex); } break; } clntp = clntp->clnt_list_link; } - if (clntp != NULL) - while (clntp->clnt_async_cnt > 0) - cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex); + clntp = ibtl_clnt_list; while (clntp != NULL) { if (clntp->clnt_modinfop->mi_clnt_class == IBT_CM) { @@ -1564,15 +1563,13 @@ ibtl_detach_all_clients(ibtl_hca_devinfo_t *hca_devp) mutex_exit(&ibtl_async_mutex); hca_devp->hd_async_task_cnt++; - (void) taskq_dispatch(ibtl_async_taskq, - ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP); + mutex_exit(&ibtl_clnt_list_mutex); + ibtl_hca_client_async_task(ibt_hca); + mutex_enter(&ibtl_clnt_list_mutex); + break; } ibt_hca = ibt_hca->ha_clnt_link; } - /* wait for IBDM to complete */ - while (hca_devp->hd_async_task_cnt != 0) { - cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex); - } /* * Next inform IBCM. @@ -1645,8 +1642,16 @@ ibtl_detach_all_clients(ibtl_hca_devinfo_t *hca_devp) retval = 0; bailout: - hca_devp->hd_async_busy = 0; - cv_broadcast(&hca_devp->hd_async_busy_cv); + if (retval) { + hca_devp->hd_state = IBTL_HCA_DEV_ATTACHED; /* fix hd_state */ + mutex_exit(&ibtl_clnt_list_mutex); + ibtl_announce_new_hca(hca_devp); + mutex_enter(&ibtl_clnt_list_mutex); + } else { + hca_devp->hd_async_busy = 0; + cv_broadcast(&hca_devp->hd_async_busy_cv); + } + return (retval); } diff --git a/usr/src/uts/common/io/ib/ibtl/ibtl_ibnex.c b/usr/src/uts/common/io/ib/ibtl/ibtl_ibnex.c index 28bd222128..ff036672f6 100644 --- a/usr/src/uts/common/io/ib/ibtl/ibtl_ibnex.c +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_ibnex.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,13 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/systm.h> +#include <sys/sunndi.h> +#include <sys/sunmdi.h> #include <sys/ib/ibtl/impl/ibtl.h> #include <sys/ib/ibtl/impl/ibtl_ibnex.h> @@ -264,8 +263,8 @@ ibtl_ibnex_get_hca_info(ib_guid_t hca_guid, int flag, char **buffer, return (ibt_get_module_failure(IBT_FAILURE_IBTL, 0)); } - IBTF_DPRINTF_L4(ibtl_ibnex, - "ibtl_ibnex_get_hca_info: size = %x", *bufsiz); + IBTF_DPRINTF_L4(ibtl_ibnex, "ibtl_ibnex_get_hca_info: size = %x", + *bufsiz); nvlist_free(nvl); return (IBT_SUCCESS); } @@ -548,3 +547,78 @@ ibtl_ibnex_valid_hca_parent(dev_info_t *pdip) return (IBT_NO_HCAS_AVAILABLE); } } + +/* + * Function: + * ibtl_ibnex_phci_register + * Input: + * hca_dip - The HCA dip + * Output: + * NONE + * Returns: + * IBT_SUCCESS/IBT_FAILURE + * Description: + * Register the HCA dip as the MPxIO PCHI. + */ +ibt_status_t +ibtl_ibnex_phci_register(dev_info_t *hca_dip) +{ + /* Register the with MPxIO as PHCI */ + if (mdi_phci_register(MDI_HCI_CLASS_IB, hca_dip, 0) != + MDI_SUCCESS) { + return (IBT_FAILURE); + } + return (IBT_SUCCESS); +} + +/* + * Function: + * ibtl_ibnex_phci_unregister + * Input: + * hca_dip - The HCA dip + * Output: + * NONE + * Returns: + * IBT_SUCCESS/IBT_FAILURE + * Description: + * Free up any pending MPxIO Pathinfos and unregister the HCA dip as the + * MPxIO PCHI. + */ +ibt_status_t +ibtl_ibnex_phci_unregister(dev_info_t *hca_dip) +{ + mdi_pathinfo_t *pip = NULL; + dev_info_t *vdip = 0; + int circ = 0, circ1 = 0; + + /* + * Should free all the Pathinfos associated with the HCA pdip before + * unregistering the PHCI. + * + * mdi_pi_free will call ib_vhci_pi_uninit() callbackfor each PI where + * the ibnex internal datastructures (ibnex_node_data) will have to be + * cleaned up if needed. + */ + vdip = mdi_devi_get_vdip(hca_dip); + ndi_devi_enter(vdip, &circ1); + ndi_devi_enter(hca_dip, &circ); + while (pip = mdi_get_next_client_path(hca_dip, NULL)) { + if (mdi_pi_free(pip, 0) == MDI_SUCCESS) { + continue; + } + ndi_devi_exit(hca_dip, circ); + ndi_devi_exit(vdip, circ1); + IBTF_DPRINTF_L1(ibtl_ibnex, "ibtl_ibnex_phci_unregister: " + "mdi_pi_free failed"); + return (IBT_FAILURE); + } + ndi_devi_exit(hca_dip, circ); + ndi_devi_exit(vdip, circ1); + + if (mdi_phci_unregister(hca_dip, 0) != MDI_SUCCESS) { + IBTF_DPRINTF_L1(ibtl_ibnex, "ibtl_ibnex_phci_unregister: PHCI " + "unregister failed"); + return (IBT_FAILURE); + } + return (IBT_SUCCESS); +} diff --git a/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c b/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c index 909a49b2c4..5542ac9f71 100644 --- a/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c @@ -32,7 +32,6 @@ #include <sys/modctl.h> #include <sys/sunndi.h> -#include <sys/sunmdi.h> #include <sys/ib/ibtl/impl/ibtl.h> #include <sys/ib/ibtl/impl/ibtl_ibnex.h> @@ -595,8 +594,7 @@ ibc_attach(ibc_clnt_hdl_t *ibc_hdl_p, ibc_hca_info_t *info_p) } /* Register the with MPxIO as PHCI */ - if (mdi_phci_register(MDI_HCI_CLASS_IB, info_p->hca_dip, 0) != - MDI_SUCCESS) { + if (ibtl_ibnex_phci_register(info_p->hca_dip) != IBT_SUCCESS) { mutex_exit(&ibtl_clnt_list_mutex); IBTF_DPRINTF_L1(ibtf, "ibc_attach: MPxIO register failed"); kmem_free(hca_devp, sizeof (ibtl_hca_devinfo_t) + @@ -749,7 +747,7 @@ ibc_pre_detach(ibc_clnt_hdl_t hca_devp, ddi_detach_cmd_t cmd) hcapp = &(*hcapp)->hd_hca_dev_link; } - if (mdi_phci_unregister(hca_devp->hd_hca_dip, 0) != MDI_SUCCESS) { + if (ibtl_ibnex_phci_unregister(hca_devp->hd_hca_dip) != IBT_SUCCESS) { hca_devp->hd_state = IBTL_HCA_DEV_ATTACHED; /* fix hd_state */ mutex_exit(&ibtl_clnt_list_mutex); IBTF_DPRINTF_L1(ibtf, "ibc_pre_detach: PHCI unregister failed"); diff --git a/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c b/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c index c0e6914232..12da927fff 100644 --- a/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c +++ b/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c @@ -161,7 +161,7 @@ int ibdm_ignore_saa_event = 0; /* Modload support */ static struct modlmisc ibdm_modlmisc = { &mod_miscops, - "InfiniBand Device Manager", + "InfiniBand Device Manager" }; struct modlinkage ibdm_modlinkage = { @@ -257,6 +257,7 @@ ibdm_init(void) mutex_init(&ibdm.ibdm_mutex, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibdm.ibdm_hl_mutex, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibdm.ibdm_ibnex_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ibdm.ibdm_port_settle_cv, NULL, CV_DRIVER, NULL); mutex_enter(&ibdm.ibdm_mutex); ibdm.ibdm_state |= IBDM_LOCKS_ALLOCED; } @@ -478,6 +479,7 @@ ibdm_fini() mutex_destroy(&ibdm.ibdm_mutex); mutex_destroy(&ibdm.ibdm_hl_mutex); mutex_destroy(&ibdm.ibdm_ibnex_mutex); + cv_destroy(&ibdm.ibdm_port_settle_cv); } if (ibdm.ibdm_state & IBDM_CVS_ALLOCED) { ibdm.ibdm_state &= ~IBDM_CVS_ALLOCED; @@ -548,6 +550,7 @@ ibdm_event_hdlr(void *clnt_hdl, } ibdm_initialize_port(port); hca_list->hl_nports_active++; + cv_broadcast(&ibdm.ibdm_port_settle_cv); mutex_exit(&ibdm.ibdm_hl_mutex); break; @@ -565,6 +568,7 @@ ibdm_event_hdlr(void *clnt_hdl, port_sa_hdl = port->pa_sa_hdl; (void) ibdm_fini_port(port); port->pa_state = IBT_PORT_DOWN; + cv_broadcast(&ibdm.ibdm_port_settle_cv); mutex_exit(&ibdm.ibdm_hl_mutex); ibdm_reset_all_dgids(port_sa_hdl); break; @@ -874,6 +878,7 @@ ibdm_handle_hca_attach(ib_guid_t hca_guid) mutex_enter(&ibdm.ibdm_hl_mutex); hca_list->hl_nports_active++; ibdm_initialize_port(port_attr); + cv_broadcast(&ibdm.ibdm_port_settle_cv); mutex_exit(&ibdm.ibdm_hl_mutex); } } @@ -936,6 +941,8 @@ ibdm_handle_hca_detach(ib_guid_t hca_guid) ibdm.ibdm_hca_list_head = head->hl_next; else prev->hl_next = head->hl_next; + if (ibdm.ibdm_hca_list_tail == head) + ibdm.ibdm_hca_list_tail = prev; ibdm.ibdm_hca_count--; break; } @@ -4504,22 +4511,22 @@ ibdm_ibnex_unregister_callback() mutex_exit(&ibdm.ibdm_ibnex_mutex); } - /* - * ibdm_ibnex_get_waittime() + * ibdm_get_waittime() * Calculates the wait time based on the last HCA attach time */ -time_t -ibdm_ibnex_get_waittime(ib_guid_t hca_guid, int *dft_wait) +static time_t +ibdm_get_waittime(ib_guid_t hca_guid, int dft_wait) { int ii; time_t temp, wait_time = 0; ibdm_hca_list_t *hca; - IBTF_DPRINTF_L4("ibdm", "\tibnex_get_waittime hcaguid:%llx" - "\tport settling time %d", hca_guid, *dft_wait); + IBTF_DPRINTF_L4("ibdm", "\tget_waittime hcaguid:%llx" + "\tport settling time %d", hca_guid, dft_wait); + + ASSERT(mutex_owned(&ibdm.ibdm_hl_mutex)); - mutex_enter(&ibdm.ibdm_hl_mutex); hca = ibdm.ibdm_hca_list_head; if (hca_guid) { @@ -4528,29 +4535,43 @@ ibdm_ibnex_get_waittime(ib_guid_t hca_guid, int *dft_wait) (hca->hl_nports != hca->hl_nports_active)) { wait_time = ddi_get_time() - hca->hl_attach_time; - wait_time = ((wait_time >= *dft_wait) ? - 0 : (*dft_wait - wait_time)); + wait_time = ((wait_time >= dft_wait) ? + 0 : (dft_wait - wait_time)); break; } hca = hca->hl_next; } - mutex_exit(&ibdm.ibdm_hl_mutex); - IBTF_DPRINTF_L4("ibdm", "\tibnex_get_waittime %llx", wait_time); + IBTF_DPRINTF_L4("ibdm", "\tget_waittime %llx", wait_time); return (wait_time); } for (ii = 0; ii < ibdm.ibdm_hca_count; ii++) { if (hca->hl_nports != hca->hl_nports_active) { temp = ddi_get_time() - hca->hl_attach_time; - temp = ((temp >= *dft_wait) ? 0 : (*dft_wait - temp)); + temp = ((temp >= dft_wait) ? 0 : (dft_wait - temp)); wait_time = (temp > wait_time) ? temp : wait_time; } } - mutex_exit(&ibdm.ibdm_hl_mutex); - IBTF_DPRINTF_L4("ibdm", "\tibnex_get_waittime %llx", wait_time); + IBTF_DPRINTF_L4("ibdm", "\tget_waittime %llx", wait_time); return (wait_time); } +void +ibdm_ibnex_port_settle_wait(ib_guid_t hca_guid, int dft_wait) +{ + time_t wait_time; + + mutex_enter(&ibdm.ibdm_hl_mutex); + + while ((wait_time = ibdm_get_waittime(hca_guid, dft_wait)) > 0) { + (void) cv_timedwait(&ibdm.ibdm_port_settle_cv, + &ibdm.ibdm_hl_mutex, + ddi_get_lbolt() + drv_usectohz(wait_time * 1000000)); + } + + mutex_exit(&ibdm.ibdm_hl_mutex); +} + /* * ibdm_ibnex_probe_hcaport diff --git a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf.c b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf.c index bc1623c4eb..f8d16348d5 100644 --- a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf.c +++ b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * This file implements the client interfaces of the IBMF. */ @@ -232,6 +229,7 @@ ibmf_unregister(ibmf_handle_t *ibmf_handlep, uint_t flags) boolean_t error = B_FALSE; int status = IBMF_SUCCESS; char errmsg[128]; + int secs; clientp = (ibmf_client_t *)*ibmf_handlep; @@ -266,10 +264,19 @@ ibmf_unregister(ibmf_handle_t *ibmf_handlep, uint_t flags) } /* - * verify the client does not have a receive callback registered and/or - * doesn't have any messages allocated + * Verify the client does not have a receive callback registered. + * If there are messages, give some time for the messages to be + * cleaned up. */ + secs = 60; mutex_enter(&clientp->ic_mutex); + while (clientp->ic_recv_cb == NULL && clientp->ic_msgs_alloced != 0 && + secs > 0) { + mutex_exit(&clientp->ic_mutex); + delay(drv_usectohz(1000000)); /* one second delay */ + secs--; + mutex_enter(&clientp->ic_mutex); + } if (clientp->ic_recv_cb != NULL || clientp->ic_msgs_alloced != 0) { IBMF_TRACE_4(IBMF_TNF_NODEBUG, DPRINT_L1, diff --git a/usr/src/uts/common/os/sunmdi.c b/usr/src/uts/common/os/sunmdi.c index d2af7a3c03..42b0a9025e 100644 --- a/usr/src/uts/common/os/sunmdi.c +++ b/usr/src/uts/common/os/sunmdi.c @@ -375,7 +375,20 @@ mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, mdi_vhci_t *vh = NULL; ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); - ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); +#ifdef DEBUG + /* + * IB nexus driver is loaded only when IB hardware is present. + * In order to be able to do this there is a need to drive the loading + * and attaching of the IB nexus driver (especially when an IB hardware + * is dynamically plugged in) when an IB HCA driver (PHCI) + * is being attached. Unfortunately this gets into the limitations + * of devfs as there seems to be no clean way to drive configuration + * of a subtree from another subtree of a devfs. Hence, do not ASSERT + * for IB. + */ + if (strcmp(class, MDI_HCI_CLASS_IB) != 0) + ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); +#endif i_mdi_init(); diff --git a/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h b/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h index a2bf77aed0..03d4cf9d68 100644 --- a/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h +++ b/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -75,8 +75,6 @@ #ifndef _RDSIB_EP_H #define _RDSIB_EP_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -174,12 +172,13 @@ typedef enum rds_session_state_s { RDS_SESSION_STATE_FAILED = 1, RDS_SESSION_STATE_INIT = 2, RDS_SESSION_STATE_CONNECTED = 3, - RDS_SESSION_STATE_ERROR = 4, - RDS_SESSION_STATE_ACTIVE_CLOSING = 5, - RDS_SESSION_STATE_PASSIVE_CLOSING = 6, - RDS_SESSION_STATE_CLOSED = 7, - RDS_SESSION_STATE_FINI = 8, - RDS_SESSION_STATE_DESTROY = 9 + RDS_SESSION_STATE_HCA_CLOSING = 4, + RDS_SESSION_STATE_ERROR = 5, + RDS_SESSION_STATE_ACTIVE_CLOSING = 6, + RDS_SESSION_STATE_PASSIVE_CLOSING = 7, + RDS_SESSION_STATE_CLOSED = 8, + RDS_SESSION_STATE_FINI = 9, + RDS_SESSION_STATE_DESTROY = 10 } rds_session_state_t; #define RDS_SESSION_TRANSITION(sp, state) \ @@ -292,6 +291,7 @@ typedef struct rds_session_s { struct rds_session_s *session_nextp; ipaddr_t session_remip; ipaddr_t session_myip; + ib_guid_t session_hca_guid; ib_gid_t session_lgid; ib_gid_t session_rgid; krwlock_t session_lock; diff --git a/usr/src/uts/common/sys/ib/clients/rds/rdsib_ib.h b/usr/src/uts/common/sys/ib/clients/rds/rdsib_ib.h index e5716cb241..937d3a27e7 100644 --- a/usr/src/uts/common/sys/ib/clients/rds/rdsib_ib.h +++ b/usr/src/uts/common/sys/ib/clients/rds/rdsib_ib.h @@ -75,8 +75,6 @@ #ifndef _RDSIB_IB_H #define _RDSIB_IB_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -130,11 +128,20 @@ extern uint_t rds_rx_pkts_pending_hwm; /* readonly */ #define RDS_RDMAW_WRID 0xdabadaba #define RDS_NUM_ACKS 4 /* only 1 is used */ +typedef enum rds_hca_state_s { + RDS_HCA_STATE_ADDED = 0, + RDS_HCA_STATE_OPEN = 1, + RDS_HCA_STATE_MEM_REGISTERED = 2, + RDS_HCA_STATE_STOPPING = 3, + RDS_HCA_STATE_REMOVED = 4 +} rds_hca_state_t; + /* * There is one of this structure for each HCA in the system. * This holds all the information about the HCA. * * hca_nextp - Points to the next hca in the system. + * hca_state - State of the hca (only modified on HCA attach/detach) * hca_guid - HCA Guid * hca_nports - Number of ports on the HCA * hca_hdl - HCA hdl obtained after opening the HCA @@ -147,6 +154,7 @@ extern uint_t rds_rx_pkts_pending_hwm; /* readonly */ */ typedef struct rds_hca_s { struct rds_hca_s *hca_nextp; + rds_hca_state_t hca_state; ib_guid_t hca_guid; uint_t hca_nports; ibt_hca_hdl_t hca_hdl; @@ -154,6 +162,7 @@ typedef struct rds_hca_s { ibt_mr_hdl_t hca_mrhdl; ibt_lkey_t hca_lkey; ibt_rkey_t hca_rkey; + ibt_sbind_hdl_t hca_bindhdl[4]; ibt_hca_attr_t hca_attr; ibt_hca_portinfo_t *hca_pinfop; uint_t hca_pinfo_sz; @@ -182,7 +191,6 @@ typedef struct rds_state_s { uint_t rds_nhcas; rds_hca_t *rds_hcalistp; ibt_srv_hdl_t rds_srvhdl; - ibt_srv_hdl_t rds_old_srvhdl; ib_svc_id_t rds_service_id; } rds_state_t; diff --git a/usr/src/uts/common/sys/ib/clients/rds/rdsib_protocol.h b/usr/src/uts/common/sys/ib/clients/rds/rdsib_protocol.h index 19731c3d95..f4ae992d54 100644 --- a/usr/src/uts/common/sys/ib/clients/rds/rdsib_protocol.h +++ b/usr/src/uts/common/sys/ib/clients/rds/rdsib_protocol.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -75,8 +75,6 @@ #ifndef _RDSIB_PROTOCOL_H #define _RDSIB_PROTOCOL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -199,6 +197,7 @@ typedef struct rds_data_hdr_s { #define RDS_CTRL_CODE_STALL_PORTS 3 #define RDS_CTRL_CODE_UNSTALL_PORTS 4 #define RDS_CTRL_CODE_HEARTBEAT 5 +#define RDS_CTRL_CODE_CLOSE_SESSION 6 /* * RDS ctrl packet diff --git a/usr/src/uts/common/sys/ib/ibnex/ibnex.h b/usr/src/uts/common/sys/ib/ibnex/ibnex.h index 301d247331..30b917cc49 100644 --- a/usr/src/uts/common/sys/ib/ibnex/ibnex.h +++ b/usr/src/uts/common/sys/ib/ibnex/ibnex.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_IB_IBNEX_IBNEX_H #define _SYS_IB_IBNEX_IBNEX_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * ibnex.h * This file contains defines and structures used within the IB Nexus @@ -49,6 +46,8 @@ typedef enum { IBNEX_INVALID_NODE = -4 } ibnex_rval_t; +#define IBNEX_IOC_GUID_LEN 33 +#define IBNEX_PHCI_GUID_LEN 66 /* IOC device node specific data */ typedef struct ibnex_ioc_node_s { @@ -58,6 +57,8 @@ typedef struct ibnex_ioc_node_s { uint32_t ioc_ngids; /* This field will be non NULL only for diconnected IOCs */ ib_dm_ioc_ctrl_profile_t *ioc_profile; + char ioc_guid_str[IBNEX_IOC_GUID_LEN]; + char ioc_phci_guid[IBNEX_PHCI_GUID_LEN]; } ibnex_ioc_node_t; /* DLPI device node specific data */ @@ -67,6 +68,7 @@ typedef struct ibnex_port_node_s { ib_guid_t port_guid; ib_guid_t port_hcaguid; ib_pkey_t port_pkey; + dev_info_t *port_pdip; } ibnex_port_node_t; /* Pseudo device node specific data */ @@ -75,7 +77,6 @@ typedef struct ibnex_pseudo_node_s { char *pseudo_unit_addr; /* unit addr of drvr */ int pseudo_unit_addr_len; /* unit addr len */ char *pseudo_devi_name; /* name of driver */ - int pseudo_new_node; /* new node */ int pseudo_merge_node; /* merge node */ } ibnex_pseudo_node_t; @@ -127,6 +128,20 @@ typedef enum ibnex_node_state_e { * Device reprobes triggered by ibt_reprobe_dev will result in an DDI * event, even though no prepoerties have changed. */ + +/* + * Defines for node_ap_state: + * IBNEX_NODE_AP_CONFIGURED + * this node was not unconfigured by cfgadm. + * IBNEX_NODE_AP_UNCONFIGURED + * this node has been unconfigured by cfgadm. + * IBNEX_NODE_AP_CONFIGURING + * this node is being configured by cfgadm + */ +#define IBNEX_NODE_AP_CONFIGURED 0x0 +#define IBNEX_NODE_AP_UNCONFIGURED 0x1 +#define IBNEX_NODE_AP_CONFIGURING 0x2 + #define IBNEX_NODE_REPROBE_NOTIFY_ON_UPDATE 0x01 #define IBNEX_NODE_REPROBE_NOTIFY_ALWAYS 0x02 #define IBNEX_NODE_REPROBE_IOC_WAIT 0x04 @@ -144,6 +159,7 @@ typedef struct ibnex_node_data_s { ibnex_node_type_t node_type; ibnex_node_state_t node_state; int node_reprobe_state; /* Node reprobe flag */ + unsigned int node_ap_state; } ibnex_node_data_t; /* @@ -201,6 +217,7 @@ typedef struct ibnex_s { /* Pseudo nodes inited from ibnex_get_snapshot? */ int ibnex_pseudo_inited; + ibdm_ioc_info_t *ibnex_ioc_list; } ibnex_t; /* diff --git a/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_ibnex.h b/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_ibnex.h index 5ac0fc5b49..bfe91475ca 100644 --- a/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_ibnex.h +++ b/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_ibnex.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_IB_IBTL_IMPL_IBTL_IBNEX_H #define _SYS_IB_IBTL_IMPL_IBTL_IBNEX_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * ibtl_ibnex.h * @@ -200,6 +197,35 @@ ibt_status_t ibtl_ibnex_get_hca_verbose_data(ib_guid_t, char **, size_t *); */ ibt_status_t ibtl_ibnex_valid_hca_parent(dev_info_t *); +/* + * Function: + * ibtl_ibnex_phci_register + * Input: + * hca_dip - The HCA dip + * Output: + * NONE + * Returns: + * IBT_SUCCESS/IBT_FAILURE + * Description: + * Register the HCA dip as the MPxIO PCHI. + */ +ibt_status_t ibtl_ibnex_phci_register(dev_info_t *hca_dip); + +/* + * Function: + * ibtl_ibnex_phci_unregister + * Input: + * hca_dip - The HCA dip + * Output: + * NONE + * Returns: + * IBT_SUCCESS/IBT_FAILURE + * Description: + * Free up any pending MPxIO Pathinfos and unregister the HCA dip as the + * MPxIO PCHI. + */ +ibt_status_t ibtl_ibnex_phci_unregister(dev_info_t *hca_dip); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_ibnex.h b/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_ibnex.h index 14ec58bec4..9714d65981 100644 --- a/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_ibnex.h +++ b/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_ibnex.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_IB_MGT_IBDM_IBDM_IBNEX_H #define _SYS_IB_MGT_IBDM_IBDM_IBNEX_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * This file contains the definitions of private interfaces * and data structures used between IB nexus and IBDM. @@ -401,13 +398,13 @@ void ibdm_ibnex_free_hca_list(ibdm_hca_list_t *); void ibdm_ibnex_update_pkey_tbls(void); /* - * ibdm_ibnex_get_waittime - * Figure out how much to delay for a given port to come up + * ibdm_ibnex_port_settle_wait + * Wait until the ports come up * - * Arguments : IOC/PORT GUID - * Return Values : default wait time from IB nexus + * Arguments + * HCA GUID and the maximum wait time since the hca instance attach */ -time_t ibdm_ibnex_get_waittime(ib_guid_t, int *); +void ibdm_ibnex_port_settle_wait(ib_guid_t, int); #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h b/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h index 536d4bcb1f..06b7e0ccfb 100644 --- a/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h +++ b/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h @@ -26,8 +26,6 @@ #ifndef _SYS_IB_MGT_IBDM_IBDM_IMPL_H #define _SYS_IB_MGT_IBDM_IBDM_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * ibdm_impl.h * @@ -239,6 +237,7 @@ typedef struct ibdm_s { kcondvar_t ibdm_probe_cv; kcondvar_t ibdm_busy_cv; + kcondvar_t ibdm_port_settle_cv; uint32_t ibdm_ngid_probes_in_progress; uint64_t ibdm_transactionID; uint32_t ibdm_ngids; |