diff options
Diffstat (limited to 'usr/src')
27 files changed, 3862 insertions, 1169 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 30b66a4499..38ea6bfc4b 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -526,7 +526,7 @@ ATUN_OBJS += atun.o RDS_OBJS += rdsddi.o rdssubr.o rds_opt.o rds_ioctl.o RDSIB_OBJS += rdsib.o rdsib_ib.o rdsib_cm.o rdsib_ep.o rdsib_buf.o \ - rdsib_arp.o rdsib_arp_link.o rdsib_debug.o rdsib_sc.o + rdsib_debug.o rdsib_sc.o UDP_OBJS += udpddi.o @@ -570,7 +570,8 @@ NET80211_OBJS += net80211.o net80211_proto.o net80211_input.o \ IB_OBJS += ibnex.o ibnex_ioctl.o -IBCM_OBJS += ibcm_impl.o ibcm_sm.o ibcm_ti.o ibcm_utils.o ibcm_path.o +IBCM_OBJS += ibcm_impl.o ibcm_sm.o ibcm_ti.o ibcm_utils.o ibcm_path.o \ + ibcm_arp.o ibcm_arp_link.o IBDM_OBJS += ibdm.o diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsib_arp.c b/usr/src/uts/common/io/ib/clients/rds/rdsib_arp.c deleted file mode 100644 index 68d73da269..0000000000 --- a/usr/src/uts/common/io/ib/clients/rds/rdsib_arp.c +++ /dev/null @@ -1,478 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - - -#include <sys/types.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include <sys/stropts.h> -#include <sys/stream.h> -#include <sys/strsun.h> -#include <sys/strsubr.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <inet/common.h> -#include <net/if_arp.h> -#include <sys/file.h> -#include <sys/sockio.h> -#include <sys/pathname.h> -#include <inet/arp.h> -#include <sys/modctl.h> -#include <sys/ib/ib_types.h> -#include <sys/ib/clients/rds/rdsib_ib.h> -#include <sys/ib/clients/rds/rdsib_arp.h> -#include <sys/ib/clients/rds/rdsib_debug.h> - -extern int rds_pr_lookup(rds_streams_t *rdss, rds_ipx_addr_t *dst_addr, - rds_ipx_addr_t *src_addr, uint8_t localroute, uint32_t bound_dev_if, - rds_pr_comp_func_t func); -extern void rds_pr_arp_ack(mblk_t *mp); -extern void rds_prwqn_delete(rds_prwqn_t *wqnp); - -/* - * rds_get_ibaddr_complete - */ -static int -rds_get_ibaddr_complete(void *arg, int status) -{ - rds_prwqn_t *wqnp = (rds_prwqn_t *)arg; - rds_streams_t *rdss = (rds_streams_t *)wqnp->arg; - - RDS_DPRINTF4("rds_get_ibaddr_complete", "Enter: rdss: 0x%p wqnp: 0x%p", - rdss, wqnp); - - mutex_enter(&rdss->lock); - rdss->status = status; - - RDS_DPRINTF2(LABEL, "sgid: %llx:%llx dgid: %llx:%llx", - wqnp->sgid.gid_prefix, wqnp->sgid.gid_guid, wqnp->dgid.gid_prefix, - wqnp->dgid.gid_guid); - - /* - * lock is held by the caller and is released after - * this function returns - */ - cv_signal(&rdss->cv); - mutex_exit(&rdss->lock); - - RDS_DPRINTF4("rds_get_ibaddr_complete", "Return"); - - return (0); -} - -/* - * Lower read service procedure (messages coming back from arp/ip). - * Process messages based on queue type. - */ -static int -rds_lrsrv(queue_t *q) -{ - mblk_t *mp; - rds_streams_t *rdss = q->q_ptr; - - RDS_DPRINTF4("rds_lrsrv", "Enter: 0x%p 0x%p", q, rdss); - - if (WR(q) == rdss->arpqueue) { - while (mp = getq(q)) { - rds_pr_arp_ack(mp); - } - } else { - freemsg(mp); - } - - RDS_DPRINTF4("rds_lrsrv", "Return: 0x%p", q); - - return (0); -} - -/* - * Lower write service procedure. - * Used when lower streams are flow controlled. - */ -static int -rds_lwsrv(queue_t *q) -{ - mblk_t *mp; - - RDS_DPRINTF4("rds_lwsrv", "Enter: 0x%p", q); - - while (mp = getq(q)) { - if (canputnext(q)) { - putnext(q, mp); - } else { - (void) putbq(q, mp); - qenable(q); - break; - } - } - - RDS_DPRINTF4("rds_lwsrv", "Return: 0x%p", q); - return (0); -} - -/* - * Lower read put procedure. Arp/ip messages come here. - */ -static int -rds_lrput(queue_t *q, mblk_t *mp) -{ - RDS_DPRINTF4("rds_lrput", "Enter: 0x%p, db_type: %d", q, DB_TYPE(mp)); - - switch (DB_TYPE(mp)) { - case M_FLUSH: - /* - * Turn around - */ - if (*mp->b_rptr & FLUSHW) { - *mp->b_rptr &= ~FLUSHR; - qreply(q, mp); - return (0); - } - freemsg(mp); - break; - case M_IOCACK: - case M_IOCNAK: - case M_DATA: - /* - * This could be in interrupt context. - * Some of the ibt calls cannot be called in - * interrupt context, so - * put it in the queue and the message will be - * processed by service proccedure - */ - (void) putq(q, mp); - qenable(q); - break; - default: - RDS_DPRINTF1(LABEL, "lrput: got unknown msg <0x%x>\n", - mp->b_datap->db_type); - ASSERT(0); - break; - } - - RDS_DPRINTF4("rds_lrput", "Return: 0x%p", q); - - return (0); -} - -/* - * Streams write queue module info - */ -static struct module_info rds_winfo = { - 99, /* module ID number */ - "rds", /* module name */ - 0, /* min packet size */ - INFPSZ, - 49152, /* STREAM queue high water mark -- 49152 */ - 12 /* STREAM queue low water mark -- 12 */ -}; - -/* - * Streams lower write queue, for rds/ip requests. - */ -static struct qinit rds_lwinit = { - NULL, /* qi_putp */ - rds_lwsrv, /* qi_srvp */ - NULL, /* qi_qopen */ - NULL, /* qi_qclose */ - NULL, /* qi_qadmin */ - &rds_winfo, /* module info */ - NULL, /* module statistics struct */ - NULL, - NULL, - STRUIOT_NONE /* stream uio type is standard uiomove() */ -}; - -/* - * Streams lower read queue: read reply messages from rds/ip. - */ -static struct qinit rds_lrinit = { - rds_lrput, /* qi_putp */ - rds_lrsrv, /* qi_srvp */ - NULL, /* qi_qopen */ - NULL, /* qi_qclose */ - NULL, /* qi_qadmin */ - &rds_winfo, /* module info */ - NULL, /* module statistics struct */ - NULL, - NULL, - STRUIOT_NONE /* stream uio type is standard uiomove() */ -}; - - -int -rds_link_driver(rds_streams_t *rdss, char *path, queue_t **q, vnode_t **dev_vp) -{ - struct stdata *dev_stp; - vnode_t *vp; - int error; - queue_t *rq; - - RDS_DPRINTF4("rds_link_driver", "Enter: %s", path); - - /* open the driver from inside the kernel */ - error = vn_open(path, UIO_SYSSPACE, FREAD|FWRITE, 0, &vp, - 0, NULL); - if (error) { - RDS_DPRINTF1(LABEL, "rds_link_driver: vn_open('%s') failed\n", - path); - return (error); - } - *dev_vp = vp; - - dev_stp = vp->v_stream; - *q = dev_stp->sd_wrq; - - mutex_enter(&vp->v_lock); - vp->v_count++; - mutex_exit(&vp->v_lock); - - rq = RD(dev_stp->sd_wrq); - RD(rq)->q_ptr = WR(rq)->q_ptr = rdss; - setq(rq, &rds_lrinit, &rds_lwinit, NULL, QMTSAFE, - SQ_CI|SQ_CO, B_FALSE); - - RDS_DPRINTF4("rds_link_driver", "Return: %s", path); - - return (0); -} - -extern struct qinit strdata; -extern struct qinit stwdata; - -/* - * Unlink ip, rds, icmp6 drivers - */ -/* ARGSUSED */ -int -rds_unlink_driver(queue_t **q, vnode_t **dev_vp) -{ - vnode_t *vp = *dev_vp; - struct stdata *dev_stp = vp->v_stream; - queue_t *wrq, *rq; - int rc; - - RDS_DPRINTF4("rds_unlink_driver", "Enter: 0x%p", q); - - wrq = dev_stp->sd_wrq; - rq = RD(wrq); - - disable_svc(rq); - wait_svc(rq); - flushq(rq, FLUSHALL); - flushq(WR(rq), FLUSHALL); - - rq->q_ptr = wrq->q_ptr = dev_stp; - - setq(rq, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_TRUE); - - if ((rc = VOP_CLOSE(vp, FREAD, 1, (offset_t)0, 0)) != 0) { - RDS_DPRINTF1(LABEL, "VOP_CLOSE failed %d\n", rc); - } - vn_rele(vp); - - RDS_DPRINTF4("rds_unlink_driver", "Return: 0x%p", q); - - return (0); -} - -int -rds_unlink_drivers(rds_streams_t *rdss) -{ - RDS_DPRINTF4("rds_unlink_drivers", "Enter"); - - if (rdss->arpqueue) { - (void) rds_unlink_driver(&rdss->arpqueue, &rdss->arp_vp); - } - - RDS_DPRINTF4("rds_unlink_drivers", "Return"); - - return (0); -} - -/* - * Link ip, rds drivers below rds - */ -int -rds_link_drivers(rds_streams_t *rdss) -{ - int rc; - - RDS_DPRINTF4("rds_link_drivers", "Enter"); - - if ((rc = rds_link_driver(rdss, "/dev/arp", &rdss->arpqueue, - &rdss->arp_vp)) != 0) { - RDS_DPRINTF1(LABEL, "rds_link_drivers: rds failed\n"); - return (rc); - } - - RDS_DPRINTF4("rds_link_drivers", "Return"); - - return (0); -} - -typedef struct rds_get_ibaddr_args_s { - int ret; - ipaddr_t srcip; - ipaddr_t destip; - ib_gid_t sgid; - ib_gid_t dgid; - kmutex_t lock; - kcondvar_t cv; -} rds_get_ibaddr_args_t; - -void -rds_get_ibaddr_impl(void *arg) -{ - rds_get_ibaddr_args_t *argsp = (rds_get_ibaddr_args_t *)arg; - rds_streams_t *rdss; - rds_ipx_addr_t srcaddr, destaddr; - int ret; - - RDS_DPRINTF4("rds_get_ibaddr_impl", "Enter: src: 0x%x dest: 0x%x", - argsp->srcip, argsp->destip); - - rdss = (rds_streams_t *)kmem_zalloc(sizeof (rds_streams_t), KM_SLEEP); - - mutex_init(&rdss->lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&rdss->cv, NULL, CV_DRIVER, NULL); - - ret = rds_link_drivers(rdss); - if (ret != 0) { - RDS_DPRINTF2(LABEL, "rds_link_drivers failed %d", ret); - argsp->ret = ret; - mutex_enter(&argsp->lock); - cv_signal(&argsp->cv); - mutex_exit(&argsp->lock); - return; - } - - destaddr.family = AF_INET_OFFLOAD; - destaddr.un.ip4addr = htonl(argsp->destip); - srcaddr.family = AF_INET_OFFLOAD; - srcaddr.un.ip4addr = htonl(argsp->srcip); - - ret = rds_pr_lookup(rdss, &destaddr, &srcaddr, 0, NULL, - rds_get_ibaddr_complete); - - if (ret == 0) { - mutex_enter(&rdss->lock); - cv_wait(&rdss->cv, &rdss->lock); - mutex_exit(&rdss->lock); - } - - (void) rds_unlink_drivers(rdss); - - argsp->ret = rdss->status; - if (argsp->ret == 0) { - argsp->sgid = rdss->wqnp->sgid; - argsp->dgid = rdss->wqnp->dgid; - rds_prwqn_delete(rdss->wqnp); - } - - mutex_destroy(&rdss->lock); - cv_destroy(&rdss->cv); - kmem_free(rdss, sizeof (rds_streams_t)); - - mutex_enter(&argsp->lock); - cv_signal(&argsp->cv); - mutex_exit(&argsp->lock); - - RDS_DPRINTF4("rds_get_ibaddr_impl", "Return"); -} - -/* - * Return 0 for SUCCESS - * Return NON-ZERO for FAILURE - */ -int -rds_get_ibaddr(ipaddr_t srcip, ipaddr_t destip, ib_gid_t *sgid, ib_gid_t *dgid) -{ - rds_get_ibaddr_args_t *argsp; - ibt_path_info_t pinfo; - ibt_path_attr_t pattr; - int ret; - - RDS_DPRINTF4("rds_get_ibaddr", "Enter: src: 0x%x dest: 0x%x", srcip, - destip); - - argsp = (rds_get_ibaddr_args_t *)kmem_zalloc( - sizeof (rds_get_ibaddr_args_t), KM_SLEEP); - argsp->srcip = srcip; - argsp->destip = destip; - mutex_init(&argsp->lock, NULL, MUTEX_DRIVER, NULL); - cv_init(&argsp->cv, NULL, CV_DRIVER, NULL); - - /* - * To prevent cv_signal to be called before cv_wait grab the lock - * before taskq_dispatch - */ - mutex_enter(&argsp->lock); - - ret = ddi_taskq_dispatch(rds_taskq, rds_get_ibaddr_impl, - (void *)argsp, DDI_NOSLEEP); - if (ret != DDI_SUCCESS) { - RDS_DPRINTF1(LABEL, "Taskq dispatch failed"); - mutex_exit(&argsp->lock); - cv_destroy(&argsp->cv); - mutex_destroy(&argsp->lock); - kmem_free(argsp, sizeof (rds_get_ibaddr_args_t)); - return (ret); - } - - /* wait here for rds_get_ibaddr_impl to complete */ - cv_wait(&argsp->cv, &argsp->lock); - mutex_exit(&argsp->lock); - - ret = argsp->ret; - *sgid = argsp->sgid; - *dgid = argsp->dgid; - - if (ret == 0) { - /* - * Sometimes arp returns the gids from the cache even when - * the port is down. So, check here if there is actually an - * available path to the destination - */ - bzero(&pattr, sizeof (ibt_path_attr_t)); - pattr.pa_dgids = &argsp->dgid; - pattr.pa_sgid = argsp->sgid; - pattr.pa_num_dgids = 1; - ret = ibt_get_paths(rdsib_statep->rds_ibhdl, IBT_PATH_NO_FLAGS, - &pattr, 1, &pinfo, NULL); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2("rds_get_ibaddr", - "ibt_get_paths failed: %d", ret); - } - } - - cv_destroy(&argsp->cv); - mutex_destroy(&argsp->lock); - kmem_free(argsp, sizeof (rds_get_ibaddr_args_t)); - - return (ret); -} diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c b/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c index a4bc86e733..4786fb598d 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c @@ -114,6 +114,14 @@ rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx", rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid); + /* validate service id */ + if (reqp->req_service_id == RDS_SERVICE_ID) { + RDS_DPRINTF0(LABEL, "Version Mismatch: Remote system " + "(GUID: 0x%llx) is running an older version of RDS", + rgid.gid_guid); + return (IBT_CM_REJECT); + } + /* * CM private data brings IP information * Private data received is a stream of bytes and may not be properly @@ -126,7 +134,7 @@ rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, cmp.cmp_localip, cmp.cmp_remip, cmp.cmp_eptype); if (cmp.cmp_version != RDS_VERSION) { - RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d " + RDS_DPRINTF0(LABEL, "Version Mismatch: Local version: %d " "Remote version: %d", RDS_VERSION, cmp.cmp_version); return (IBT_CM_REJECT); } @@ -561,6 +569,12 @@ rds_handle_cm_event_failure(ibt_cm_event_t *evp) evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg, evp->cm_event.failed.cf_reason); + if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) { + RDS_DPRINTF0(LABEL, + "Received REJ with reason IBT_CM_INVALID_SID: " + "The remote system could be running an older RDS version"); + } + if (evp->cm_channel == NULL) { return (IBT_CM_ACCEPT); } @@ -665,6 +679,9 @@ rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp, return (ret); } +/* This is based on OFED Linux RDS */ +#define RDS_PORT_NUM 6556 + /* * Register the wellknown service with service id: RDS_SERVICE_ID * Incoming connection requests should arrive on this service id. @@ -682,11 +699,31 @@ rds_register_service(ibt_clnt_hdl_t rds_ibhdl) srvdesc.sd_handler = rds_cm_handler; srvdesc.sd_flags = IBT_SRV_NO_FLAGS; + /* + * Register the old service id for backward compatibility + * REQs received on this service id would be rejected + */ ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID, - 1, &srvhdl, NULL); + 1, &rdsib_statep->rds_old_srvhdl, NULL); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2(LABEL, + "RDS Service (0x%llx) Registration Failed: %d", + RDS_SERVICE_ID, ret); + return (NULL); + } + + /* + * This is the new service id as per: + * Annex A11: RDMA IP CM Service + */ + rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP, + RDS_PORT_NUM); + ret = ibt_register_service(rds_ibhdl, &srvdesc, + rdsib_statep->rds_service_id, 1, &srvhdl, NULL); if (ret != IBT_SUCCESS) { - RDS_DPRINTF2(LABEL, "RDS Service Registration Failed: %d", - ret); + RDS_DPRINTF2(LABEL, + "RDS Service (0x%llx) Registration Failed: %d", + rdsib_statep->rds_service_id, ret); return (NULL); } @@ -737,6 +774,17 @@ rds_bind_service(rds_state_t *statep) } nbinds++; + + /* bind the old service, ignore if it fails */ + ret = ibt_bind_service(statep->rds_old_srvhdl, gid, + NULL, statep, NULL); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2(LABEL, "Bind service for " + "HCA: 0x%llx Port: %d gid %llx:%llx " + "failed: %d", hcap->hca_guid, + hcap->hca_pinfop[jx].p_port_num, + gid.gid_prefix, gid.gid_guid, ret); + } } hcap = hcap->hca_nextp; } @@ -766,12 +814,27 @@ rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo, rds_cm_private_data_t cmp; uint8_t hca_port; ibt_channel_hdl_t hdl; - int ret = 0; + ibt_status_t ret = 0; + ibt_ip_cm_info_t ipcm_info; RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode); sp = ep->ep_sp; + bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); + ipcm_info.src_addr.family = AF_INET; + ipcm_info.src_addr.un.ip4addr = sp->session_myip; + ipcm_info.dst_addr.family = AF_INET; + ipcm_info.dst_addr.un.ip4addr = sp->session_remip; + ipcm_info.src_port = 6556; /* based on OFED RDS */ + ret = ibt_format_ip_private_data(&ipcm_info, + sizeof (rds_cm_private_data_t), &cmp); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data " + "failed: %d", sp, ep, ret); + return (-1); + } + hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num; hdl = rds_ep_alloc_rc_channel(ep, hca_port); diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c b/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c index 3ff8100bb6..beecced14a 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c @@ -88,19 +88,15 @@ * This File contains the endpoint related calls */ -extern int rds_get_ibaddr(ipaddr_t, ipaddr_t, ib_gid_t *, ib_gid_t *); extern boolean_t rds_islocal(ipaddr_t addr); extern uint_t rds_wc_signal; -#define RDS_GET_IBADDR_SUCCESS(ret, lgid, rgid) \ - ((ret == 0) && (lgid.gid_prefix != 0) && \ - (lgid.gid_guid != 0) && (rgid.gid_prefix != 0) && \ - (rgid.gid_guid != 0)) - #define RDS_LOOPBACK 0 #define RDS_LOCAL 1 #define RDS_REMOTE 2 +#define IBT_IPADDR 1 + static uint8_t rds_is_port_marked(rds_session_t *sp, in_port_t port, uint_t qualifier) { @@ -561,34 +557,16 @@ rds_session_connect(rds_session_t *sp) { ibt_channel_hdl_t ctrlchan, datachan; rds_ep_t *ep; - ibt_path_info_t pinfo; - ibt_path_attr_t pattr; - ib_gid_t lgid, rgid; int ret; RDS_DPRINTF2("rds_session_connect", "Enter SP(%p)", sp); - rw_enter(&sp->session_lock, RW_READER); - rgid = sp->session_rgid; - lgid = sp->session_lgid; - rw_exit(&sp->session_lock); - - /* get paths to the destination */ - bzero(&pattr, sizeof (ibt_path_attr_t)); - pattr.pa_dgids = &rgid; - pattr.pa_sgid = lgid; - pattr.pa_num_dgids = 1; - ret = ibt_get_paths(rdsib_statep->rds_ibhdl, IBT_PATH_NO_FLAGS, - &pattr, 1, &pinfo, NULL); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2(LABEL, "ibt_get_paths failed: %d", ret); - return (-1); - } - pinfo.pi_sid = RDS_SERVICE_ID; + sp->session_pinfo.pi_sid = rdsib_statep->rds_service_id; /* Override the packet life time based on the conf file */ if (IBPktLifeTime != 0) { - pinfo.pi_prim_cep_path.cep_cm_opaque1 = IBPktLifeTime; + sp->session_pinfo.pi_prim_cep_path.cep_cm_opaque1 = + IBPktLifeTime; } /* Session type may change if we run into peer-to-peer case. */ @@ -607,7 +585,8 @@ rds_session_connect(rds_session_t *sp) if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING; mutex_exit(&ep->ep_lock); - ret = rds_open_rc_channel(ep, &pinfo, IBT_BLOCKING, &datachan); + ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING, + &datachan); if (ret != IBT_SUCCESS) { RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel " "failed: %d", ep, ret); @@ -629,7 +608,8 @@ rds_session_connect(rds_session_t *sp) if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING; mutex_exit(&ep->ep_lock); - ret = rds_open_rc_channel(ep, &pinfo, IBT_BLOCKING, &ctrlchan); + ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING, + &ctrlchan); if (ret != IBT_SUCCESS) { RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel " "failed: %d", ep, ret); @@ -701,8 +681,8 @@ rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode, uint_t wait) } if (ep->ep_state == RDS_EP_STATE_CONNECTED) { - mutex_exit(&ep->ep_lock); ep->ep_state = RDS_EP_STATE_CLOSING; + mutex_exit(&ep->ep_lock); (void) rds_close_rc_channel(ep->ep_chanhdl, mode); mutex_enter(&ep->ep_lock); } @@ -817,6 +797,9 @@ rds_failover_session(void *arg) delay(drv_usectohz(1000000)); do { + ibt_ip_path_attr_t ipattr; + ibt_ip_addr_t dstip; + /* The ipaddr should be in the network order */ myip = sp->session_myip; remip = sp->session_remip; @@ -830,21 +813,36 @@ rds_failover_session(void *arg) lgid.gid_guid = 0; rgid.gid_prefix = 0; rgid.gid_guid = 0; - ret = rds_get_ibaddr(htonl(myip), htonl(remip), &lgid, &rgid); - if (RDS_GET_IBADDR_SUCCESS(ret, lgid, rgid)) { + + bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); + dstip.family = AF_INET; + dstip.un.ip4addr = htonl(remip); + ipattr.ipa_dst_ip = &dstip; + ipattr.ipa_src_ip.family = AF_INET; + ipattr.ipa_src_ip.un.ip4addr = htonl(myip); + ipattr.ipa_ndst = 1; + ipattr.ipa_max_paths = 1; + RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ", + myip, remip); + ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl, + IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo, NULL, NULL); + if (ret == IBT_SUCCESS) { + RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success"); + lgid = sp->session_pinfo. + pi_prim_cep_path.cep_adds_vect.av_sgid; + rgid = sp->session_pinfo. + pi_prim_cep_path.cep_adds_vect.av_dgid; break; } - RDS_DPRINTF1(LABEL, "rds_get_ibaddr failed, ret: %d " - "lgid: %llx:%llx rgid: %llx:%llx", lgid.gid_prefix, - lgid.gid_guid, rgid.gid_prefix, rgid.gid_guid); + RDS_DPRINTF1(LABEL, "ibt_get_ip_paths failed, ret: %d ", ret); /* wait 1 sec before re-trying */ delay(drv_usectohz(1000000)); cnt++; } while (cnt < 5); - if (!RDS_GET_IBADDR_SUCCESS(ret, lgid, rgid)) { + if (ret != IBT_SUCCESS) { rw_enter(&sp->session_lock, RW_WRITER); if (sp->session_type == RDS_SESSION_ACTIVE) { rds_session_fini(sp); @@ -1226,6 +1224,8 @@ rds_session_create(rds_state_t *statep, ipaddr_t localip, ipaddr_t remip, if (type == RDS_SESSION_ACTIVE) { ipaddr_t localip1, remip1; + ibt_ip_path_attr_t ipattr; + ibt_ip_addr_t dstip; /* The ipaddr should be in the network order */ localip1 = localip; @@ -1241,16 +1241,33 @@ rds_session_create(rds_state_t *statep, ipaddr_t localip, ipaddr_t remip, lgid.gid_guid = 0; rgid.gid_prefix = 0; rgid.gid_guid = 0; - ret = rds_get_ibaddr(ntohl(localip1), ntohl(remip1), - &lgid, &rgid); - if (!RDS_GET_IBADDR_SUCCESS(ret, lgid, rgid)) { - RDS_DPRINTF1(LABEL, "rds_get_ibaddr failed, ret: %d " + + bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); + dstip.family = AF_INET; + dstip.un.ip4addr = ntohl(remip1); + ipattr.ipa_dst_ip = &dstip; + ipattr.ipa_src_ip.family = AF_INET; + ipattr.ipa_src_ip.un.ip4addr = ntohl(localip1); + ipattr.ipa_ndst = 1; + ipattr.ipa_max_paths = 1; + RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ", + localip1, remip1); + ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl, + IBT_PATH_NO_FLAGS, &ipattr, &newp->session_pinfo, + NULL, NULL); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF1(LABEL, "ibt_get_ip_paths failed, ret: %d " "lgid: %llx:%llx rgid: %llx:%llx", lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, rgid.gid_guid); RDS_SESSION_TRANSITION(newp, RDS_SESSION_STATE_FAILED); return (NULL); } + RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success"); + lgid = + newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_sgid; + rgid = + newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_dgid; RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, @@ -1948,6 +1965,9 @@ rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, in_port_t sendport, rw_enter(&sp->session_lock, RW_WRITER); if ((sp->session_state == RDS_SESSION_STATE_FAILED) || (sp->session_state == RDS_SESSION_STATE_FINI)) { + ibt_ip_path_attr_t ipattr; + ibt_ip_addr_t dstip; + sp->session_state = RDS_SESSION_STATE_CREATED; sp->session_type = RDS_SESSION_ACTIVE; RDS_DPRINTF3("rds_sendmsg", "SP(%p) State " @@ -1969,14 +1989,23 @@ rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, in_port_t sendport, lgid.gid_guid = 0; rgid.gid_prefix = 0; rgid.gid_guid = 0; - ret = rds_get_ibaddr(htonl(sendip1), htonl(recvip1), - &lgid, &rgid); - if (!RDS_GET_IBADDR_SUCCESS(ret, lgid, rgid)) { + + bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); + dstip.family = AF_INET; + dstip.un.ip4addr = htonl(recvip1); + ipattr.ipa_dst_ip = &dstip; + ipattr.ipa_src_ip.family = AF_INET; + ipattr.ipa_src_ip.un.ip4addr = htonl(sendip1); + ipattr.ipa_ndst = 1; + ipattr.ipa_max_paths = 1; + RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ", + sendip1, recvip1); + ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl, + IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo, + NULL, NULL); + if (ret != IBT_SUCCESS) { RDS_DPRINTF1("rds_sendmsg", - "rds_get_ibaddr failed, ret: %d " - "lgid: %llx:%llx rgid: %llx:%llx", - lgid.gid_prefix, lgid.gid_guid, - rgid.gid_prefix, rgid.gid_guid); + "ibt_get_ip_paths failed, ret: %d ", ret); rw_enter(&sp->session_lock, RW_WRITER); if (sp->session_type == RDS_SESSION_ACTIVE) { @@ -1992,6 +2021,11 @@ rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, in_port_t sendport, return (ENOMEM); } } + RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success"); + lgid = sp->session_pinfo. + pi_prim_cep_path.cep_adds_vect.av_sgid; + rgid = sp->session_pinfo. + pi_prim_cep_path.cep_adds_vect.av_dgid; RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c b/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c index ee61ad3667..59f024ebee 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c @@ -329,6 +329,11 @@ rdsib_close_ib() (void) ibt_unbind_all_services(rdsib_statep->rds_srvhdl); (void) ibt_deregister_service(rdsib_statep->rds_ibhdl, rdsib_statep->rds_srvhdl); + (void) ibt_release_ip_sid(rdsib_statep->rds_service_id); + + (void) ibt_unbind_all_services(rdsib_statep->rds_old_srvhdl); + (void) ibt_deregister_service(rdsib_statep->rds_ibhdl, + rdsib_statep->rds_old_srvhdl); } /* close and destroy all the sessions */ diff --git a/usr/src/uts/common/io/ib/ibtl/ibtl_cm.c b/usr/src/uts/common/io/ib/ibtl/ibtl_cm.c index f90c58f83b..c473f11029 100644 --- a/usr/src/uts/common/io/ib/ibtl/ibtl_cm.c +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_cm.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -353,6 +353,15 @@ ibtl_cm_get_cnt(ibt_path_attr_t *attr, ibt_path_flags_t flags, for (j = 0; j < pinfop->p_sgid_tbl_sz; j++) { gid = pinfop->p_sgid_tbl[j]; if (gid.gid_prefix && gid.gid_guid) { + if (!(flags & IBT_PATH_APM) && + attr->pa_sgid.gid_prefix && + attr->pa_sgid.gid_guid) { + if ((attr->pa_sgid.gid_prefix != + gid.gid_prefix) || + (attr->pa_sgid.gid_guid != + gid.gid_guid)) + continue; + } pcount++; if (plistp) { plistp->p_hca_guid = hca_guid; diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c new file mode 100644 index 0000000000..a69ecc8fea --- /dev/null +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c @@ -0,0 +1,709 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/stropts.h> +#include <sys/stream.h> +#include <sys/strsun.h> +#include <sys/strsubr.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <net/if_arp.h> +#include <sys/file.h> +#include <sys/sockio.h> +#include <sys/pathname.h> +#include <inet/arp.h> +#include <sys/modctl.h> + +#include <sys/ib/mgt/ibcm/ibcm_arp.h> + +#include <sys/kstr.h> +#include <sys/tiuser.h> +#include <sys/t_kuser.h> + +extern char cmlog[]; + +extern int ibcm_arp_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, + ibt_ip_addr_t *src_addr, uint8_t localroute, uint32_t bound_dev_if, + ibcm_arp_pr_comp_func_t func); +extern void ibcm_arp_pr_arp_ack(mblk_t *mp); +extern void ibcm_arp_prwqn_delete(ibcm_arp_prwqn_t *wqnp); + +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", datab)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibt_ip_addr_s)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_ip_t)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_ibd_insts_t)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_prwqn_t)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", iocblk)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", msgb)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", queue)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", sockaddr_in)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", sockaddr_in6)) + +/* + * ibcm_arp_get_ibaddr_cb + */ +static int +ibcm_arp_get_ibaddr_cb(void *arg, int status) +{ + ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg; + ibcm_arp_streams_t *ib_s = (ibcm_arp_streams_t *)wqnp->arg; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibaddr_cb(ib_s: %p wqnp: %p)", + ib_s, wqnp); + + mutex_enter(&ib_s->lock); + ib_s->status = status; + + IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibaddr_cb: SGID %llX:%llX " + "DGID: %llX:%llX", wqnp->sgid.gid_prefix, wqnp->sgid.gid_guid, + wqnp->dgid.gid_prefix, wqnp->dgid.gid_guid); + + /* lock is held by the caller. */ + cv_signal(&ib_s->cv); + mutex_exit(&ib_s->lock); + return (0); +} + +/* + * Lower read service procedure (messages coming back from arp/ip). + * Process messages based on queue type. + */ +static int +ibcm_arp_lrsrv(queue_t *q) +{ + mblk_t *mp; + ibcm_arp_streams_t *ib_s = q->q_ptr; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lrsrv(%p, ibd_s: 0x%p)", q, ib_s); + + if (WR(q) == ib_s->arpqueue) { + while (mp = getq(q)) { + ibcm_arp_pr_arp_ack(mp); + } + } else { + freemsg(mp); + } + + return (0); +} + +/* + * Lower write service procedure. + * Used when lower streams are flow controlled. + */ +static int +ibcm_arp_lwsrv(queue_t *q) +{ + mblk_t *mp; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lwsrv(%p)", q); + + while (mp = getq(q)) { + if (canputnext(q)) { + putnext(q, mp); + } else { + (void) putbq(q, mp); + qenable(q); + break; + } + } + + return (0); +} + +/* + * Lower read put procedure. Arp/ip messages come here. + */ +static int +ibcm_arp_lrput(queue_t *q, mblk_t *mp) +{ + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lrput(0x%p, db_type: %d)", + q, DB_TYPE(mp)); + + switch (DB_TYPE(mp)) { + case M_FLUSH: + /* + * Turn around + */ + if (*mp->b_rptr & FLUSHW) { + *mp->b_rptr &= ~FLUSHR; + qreply(q, mp); + return (0); + } + freemsg(mp); + break; + case M_IOCACK: + case M_IOCNAK: + case M_DATA: + /* + * This could be in interrupt context. + * Some of the ibt calls cannot be called in + * interrupt context, so + * put it in the queue and the message will be + * processed by service proccedure + */ + (void) putq(q, mp); + qenable(q); + break; + default: + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_lrput: " + "got unknown msg <0x%x>\n", mp->b_datap->db_type); + ASSERT(0); + break; + } + + return (0); +} + +/* + * Streams write queue module info + */ +static struct module_info ibcm_arp_winfo = { + 0, /* module ID number */ + "ibcm", /* module name */ + 0, /* min packet size */ + INFPSZ, + 49152, /* STREAM queue high water mark -- 49152 */ + 12 /* STREAM queue low water mark -- 12 */ +}; + +/* + * Streams lower write queue, for ibcm/ip requests. + */ +static struct qinit ibcm_arp_lwinit = { + NULL, /* qi_putp */ + ibcm_arp_lwsrv, /* qi_srvp */ + NULL, /* qi_qopen */ + NULL, /* qi_qclose */ + NULL, /* qi_qadmin */ + &ibcm_arp_winfo, /* module info */ + NULL, /* module statistics struct */ + NULL, + NULL, + STRUIOT_NONE /* stream uio type is standard uiomove() */ +}; + +/* + * Streams lower read queue: read reply messages from ibcm/ip. + */ +static struct qinit ibcm_arp_lrinit = { + ibcm_arp_lrput, /* qi_putp */ + ibcm_arp_lrsrv, /* qi_srvp */ + NULL, /* qi_qopen */ + NULL, /* qi_qclose */ + NULL, /* qi_qadmin */ + &ibcm_arp_winfo, /* module info */ + NULL, /* module statistics struct */ + NULL, + NULL, + STRUIOT_NONE /* stream uio type is standard uiomove() */ +}; + + +static int +ibcm_arp_link_driver(ibcm_arp_streams_t *ib_s, char *path, queue_t **q, + vnode_t **dev_vp) +{ + struct stdata *dev_stp; + vnode_t *vp; + int error; + queue_t *rq; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_link_driver: Enter: %s", path); + + /* open the driver from inside the kernel */ + error = vn_open(path, UIO_SYSSPACE, FREAD|FWRITE, 0, &vp, + 0, NULL); + if (error) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_link_driver: " + "vn_open('%s') failed\n", path); + return (error); + } + *dev_vp = vp; + + dev_stp = vp->v_stream; + *q = dev_stp->sd_wrq; + + VN_HOLD(vp); + + rq = RD(dev_stp->sd_wrq); + RD(rq)->q_ptr = WR(rq)->q_ptr = ib_s; + setq(rq, &ibcm_arp_lrinit, &ibcm_arp_lwinit, NULL, QMTSAFE, + SQ_CI|SQ_CO, B_FALSE); + + return (0); +} + +extern struct qinit strdata; +extern struct qinit stwdata; + +/* + * Unlink ip, ibcm, icmp6 drivers + */ +/* ARGSUSED */ +static int +ibcm_arp_unlink_driver(queue_t **q, vnode_t **dev_vp) +{ + vnode_t *vp = *dev_vp; + struct stdata *dev_stp = vp->v_stream; + queue_t *wrq, *rq; + int rc; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_unlink_driver: Enter: 0x%p", q); + + wrq = dev_stp->sd_wrq; + rq = RD(wrq); + + disable_svc(rq); + wait_svc(rq); + flushq(rq, FLUSHALL); + flushq(WR(rq), FLUSHALL); + + rq->q_ptr = wrq->q_ptr = dev_stp; + + setq(rq, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_TRUE); + + if ((rc = VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED())) != 0) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_unlink_driver: VOP_CLOSE " + "failed %d\n", rc); + } + vn_rele(vp); + + return (0); +} + +static int +ibcm_arp_unlink_drivers(ibcm_arp_streams_t *ib_s) +{ + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_unlink_drivers(%p)", ib_s); + + if (ib_s->arpqueue) { + (void) ibcm_arp_unlink_driver(&ib_s->arpqueue, &ib_s->arp_vp); + } + + return (0); +} + +/* + * Link ip, ibtl drivers below ibtl + */ +static int +ibcm_arp_link_drivers(ibcm_arp_streams_t *ib_s) +{ + int rc; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_link_drivers(%p)", ib_s); + + if ((rc = ibcm_arp_link_driver(ib_s, "/dev/arp", &ib_s->arpqueue, + &ib_s->arp_vp)) != 0) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_link_drivers: " + "ibcm_arp_link_driver failed: %d\n", rc); + return (rc); + } + + return (0); +} + +ibt_status_t +ibcm_arp_get_ibaddr(ipaddr_t srcip, ipaddr_t destip, ib_gid_t *sgid, + ib_gid_t *dgid) +{ + ibcm_arp_streams_t *ib_s; + ibt_ip_addr_t srcaddr, destaddr; + int ret; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibaddr(%lX, %lX, %p, %p)", + htonl(srcip), htonl(destip), sgid, dgid); + + ib_s = (ibcm_arp_streams_t *)kmem_zalloc(sizeof (ibcm_arp_streams_t), + KM_SLEEP); + + mutex_init(&ib_s->lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ib_s->cv, NULL, CV_DRIVER, NULL); + + ret = ibcm_arp_link_drivers(ib_s); + if (ret != 0) { + IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibaddr: " + "ibcm_arp_link_drivers failed %d", ret); + return (IBT_FAILURE); + } + + bzero(&destaddr, sizeof (ibt_ip_addr_t)); + bzero(&srcaddr, sizeof (ibt_ip_addr_t)); + + destaddr.family = AF_INET_OFFLOAD; + destaddr.un.ip4addr = htonl(destip); + srcaddr.family = AF_INET_OFFLOAD; + srcaddr.un.ip4addr = htonl(srcip); + + IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibaddr: SrcIP %lX, DstIP %lX", + srcaddr.un.ip4addr, destaddr.un.ip4addr); + ret = ibcm_arp_pr_lookup(ib_s, &destaddr, &srcaddr, 0, NULL, + ibcm_arp_get_ibaddr_cb); + + IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibaddr: ibcm_arp_pr_lookup " + "returned: %d", ret); + if (ret == 0) { + mutex_enter(&ib_s->lock); + cv_wait(&ib_s->cv, &ib_s->lock); + mutex_exit(&ib_s->lock); + } + + (void) ibcm_arp_unlink_drivers(ib_s); + ret = ib_s->status; + if (ret == 0) { + if (sgid) + *sgid = ib_s->wqnp->sgid; + if (dgid) + *dgid = ib_s->wqnp->dgid; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibaddr: SGID: %llX:%llX" + " DGID: %llX:%llX", + ib_s->wqnp->sgid.gid_prefix, ib_s->wqnp->sgid.gid_guid, + ib_s->wqnp->dgid.gid_prefix, ib_s->wqnp->dgid.gid_guid); + + ibcm_arp_prwqn_delete(ib_s->wqnp); + } + + mutex_destroy(&ib_s->lock); + cv_destroy(&ib_s->cv); + kmem_free(ib_s, sizeof (ibcm_arp_streams_t)); + + if (ret) + return (IBT_FAILURE); + else + return (IBT_SUCCESS); +} + + +/* + * Routine to get list of "local" IP-ADDR to GID/P_KEY mapping information. + * Optionally, if "gid" and/or "p_key" info are specified, then retrieve the + * IP-ADDR info for that attribute only. + */ + +static ibcm_arp_ip_t * +ibcm_arp_ibd_gid2mac(ib_gid_t *gid, ib_pkey_t pkey, ibcm_arp_ibd_insts_t *ibdp) +{ + ibcm_arp_ip_t *ipp; + int i; + + for (i = 0, ipp = ibdp->ibcm_arp_ip; i < ibdp->ibcm_arp_ibd_cnt; + i++, ipp++) { + if ((ipp->ip_port_gid.gid_prefix == gid->gid_prefix) && + (ipp->ip_port_gid.gid_guid == gid->gid_guid)) { + if (pkey) { + if (ipp->ip_pkey == pkey) + return (ipp); + else + continue; + } + return (ipp); + } + } + return (NULL); +} + +static ibt_status_t +ibcm_arp_ibd_mac2gid(ibcm_arp_ibd_insts_t *ibdp, ipaddr_t srcip, + ib_gid_t *sgid) +{ + ibcm_arp_ip_t *ipp; + int i; + + for (i = 0, ipp = ibdp->ibcm_arp_ip; i < ibdp->ibcm_arp_ibd_cnt; + i++, ipp++) { + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_ibd_mac2gid: Is %lX == %lX " + "GID %llX:%llX", srcip, ipp->ip_cm_sin.sin_addr, + ipp->ip_port_gid.gid_prefix, ipp->ip_port_gid.gid_guid); + + if (bcmp(&srcip, &ipp->ip_cm_sin.sin_addr, sizeof (in_addr_t)) + == 0) { + *sgid = ipp->ip_port_gid; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_ibd_mac2gid: Found " + "GID %llX:%llX", sgid->gid_prefix, sgid->gid_guid); + return (IBT_SUCCESS); + } + } + IBTF_DPRINTF_L3(cmlog, "ibcm_arp_ibd_mac2gid: Matching SRC info " + "NOT Found"); + return (IBT_SRC_IP_NOT_FOUND); +} + +static int +ibcm_arp_get_ibd_insts_cb(dev_info_t *dip, void *arg) +{ + ibcm_arp_ibd_insts_t *ibds = (ibcm_arp_ibd_insts_t *)arg; + ibcm_arp_ip_t *ipp; + ib_pkey_t pkey; + uint8_t port; + ib_guid_t hca_guid; + ib_gid_t port_gid; + + if (i_ddi_devi_attached(dip) && + (strcmp(ddi_node_name(dip), "ibport") == 0) && + (strstr(ddi_get_name_addr(dip), "ipib") != NULL)) { + + if (ibds->ibcm_arp_ibd_cnt >= ibds->ibcm_arp_ibd_alloc) { + ibcm_arp_ip_t *tmp = NULL; + int new_count = 0; + + new_count = ibds->ibcm_arp_ibd_alloc + + IBCM_ARP_IBD_INSTANCES; + + tmp = (ibcm_arp_ip_t *)kmem_zalloc( + new_count * sizeof (ibcm_arp_ip_t), KM_SLEEP); + bcopy(ibds->ibcm_arp_ip, tmp, + ibds->ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t)); + kmem_free(ibds->ibcm_arp_ip, + ibds->ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t)); + ibds->ibcm_arp_ibd_alloc = new_count; + ibds->ibcm_arp_ip = tmp; + } + + if (((hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0, + "hca-guid", 0)) == 0) || + ((port = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, + "port-number", 0)) == 0) || + (ibt_get_port_state_byguid(hca_guid, port, &port_gid, + NULL) != IBT_SUCCESS) || + ((pkey = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, + "port-pkey", IB_PKEY_INVALID_LIMITED)) <= + IB_PKEY_INVALID_FULL)) { + return (DDI_WALK_CONTINUE); + } + + ipp = &ibds->ibcm_arp_ip[ibds->ibcm_arp_ibd_cnt]; + ipp->ip_inst = ddi_get_instance(dip); + ipp->ip_pkey = pkey; + ipp->ip_hca_guid = hca_guid; + ipp->ip_port_gid = port_gid; + ibds->ibcm_arp_ibd_cnt++; + } + return (DDI_WALK_CONTINUE); +} + +static void +ibcm_arp_get_ibd_insts(ibcm_arp_ibd_insts_t *ibds) +{ + ddi_walk_devs(ddi_root_node(), ibcm_arp_get_ibd_insts_cb, ibds); +} + +/* + * Return ibd interfaces and ibd instances. + */ +static int +ibcm_arp_get_ibd_ipaddr(ibcm_arp_ibd_insts_t *ibds) +{ + TIUSER *tiptr; + vnode_t *kvp; + vnode_t *vp = NULL; + struct strioctl iocb; + struct lifreq lif_req; + int k, ip_cnt; + ibcm_arp_ip_t *ipp; + + if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &kvp) == 0) { + if (t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE, + &tiptr, CRED()) == 0) { + vp = tiptr->fp->f_vnode; + } else { + VN_RELE(kvp); + } + } + + if (vp == NULL) + return (-1); + + /* Get ibd ip's */ + ip_cnt = 0; + for (k = 0, ipp = ibds->ibcm_arp_ip; k < ibds->ibcm_arp_ibd_cnt; + k++, ipp++) { + + (void) bzero((void *)&lif_req, sizeof (struct lifreq)); + (void) snprintf(lif_req.lifr_name, sizeof (lif_req.lifr_name), + "%s%d", IBCM_ARP_IBD_NAME, ipp->ip_inst); + + (void) bzero((void *)&iocb, sizeof (struct strioctl)); + iocb.ic_cmd = SIOCGLIFADDR; + iocb.ic_timout = 0; + iocb.ic_len = sizeof (struct lifreq); + iocb.ic_dp = (caddr_t)&lif_req; + + if (kstr_ioctl(vp, I_STR, (intptr_t)&iocb) == 0) { + ipp->ip_inet_family = AF_INET; + bcopy(&lif_req.lifr_addr, &ipp->ip_cm_sin, + sizeof (struct sockaddr_in)); + ip_cnt++; + continue; + } + } + + (void) t_kclose(tiptr, 0); + VN_RELE(kvp); + + if (ip_cnt == 0) + return (-1); + else + return (0); +} + +ibt_status_t +ibcm_arp_get_ibds(ibcm_arp_ibd_insts_t *ibdp) +{ + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds(%p)", ibdp); + + ibcm_arp_get_ibd_insts(ibdp); + + IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibds: Found %d ibd instances", + ibdp->ibcm_arp_ibd_cnt); + + if (ibdp->ibcm_arp_ibd_cnt == 0) + return (IBT_SRC_IP_NOT_FOUND); + + /* Get the IP addresses of active ports. */ + if (ibcm_arp_get_ibd_ipaddr(ibdp) != 0) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_ibds: failed to get " + "ibd instance: IBT_SRC_IP_NOT_FOUND"); + return (IBT_SRC_IP_NOT_FOUND); + } + + return (IBT_SUCCESS); +} + +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibtl_cm_port_list_t)) + +ibt_status_t +ibcm_arp_get_srcip_plist(ibt_ip_path_attr_t *ipattr, ibt_path_flags_t flags, + ibtl_cm_port_list_t **port_list_p) +{ + ibt_path_attr_t attr; + ibt_status_t ret; + ibcm_arp_ibd_insts_t ibds; + ibcm_arp_ip_t *ipp; + ibtl_cm_port_list_t *plistp; + ib_gid_t sgid; + + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_srcip_plist(%p, %llX)", + ipattr, flags); + + sgid.gid_prefix = sgid.gid_guid = 0; + bzero(&ibds, sizeof (ibcm_arp_ibd_insts_t)); + ibds.ibcm_arp_ibd_alloc = IBCM_ARP_IBD_INSTANCES; + ibds.ibcm_arp_ibd_cnt = 0; + ibds.ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc( + ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t), KM_SLEEP); + + ret = ibcm_arp_get_ibds(&ibds); + if (ret != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_srcip_plist: " + "ibcm_arp_get_ibds failed : 0x%x", ret); + goto srcip_plist_end; + } + + if (ipattr->ipa_src_ip.family != AF_UNSPEC) { + ret = ibcm_arp_ibd_mac2gid(&ibds, + htonl(ipattr->ipa_src_ip.un.ip4addr), &sgid); + if (ret != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_srcip_plist: " + "SGID for the specified SRCIP Not found %X", ret); + goto srcip_plist_end; + } + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_srcip_plist: SGID " + "%llX:%llX", sgid.gid_prefix, sgid.gid_guid); + } + + bzero(&attr, sizeof (ibt_path_attr_t)); + attr.pa_hca_guid = ipattr->ipa_hca_guid; + attr.pa_hca_port_num = ipattr->ipa_hca_port_num; + attr.pa_sgid = sgid; + bcopy(&ipattr->ipa_mtu, &attr.pa_mtu, sizeof (ibt_mtu_req_t)); + bcopy(&ipattr->ipa_srate, &attr.pa_srate, sizeof (ibt_srate_req_t)); + bcopy(&ipattr->ipa_pkt_lt, &attr.pa_pkt_lt, sizeof (ibt_pkt_lt_req_t)); + + ret = ibtl_cm_get_active_plist(&attr, flags, port_list_p); + if (ret == IBT_SUCCESS) { + int i; + + plistp = port_list_p[0]; + for (i = 0; i < plistp->p_count; i++, plistp++) { + ipp = ibcm_arp_ibd_gid2mac(&plistp->p_sgid, 0, &ibds); + if (ipp == NULL) + plistp->p_src_ip.family = AF_UNSPEC; + else { + IBTF_DPRINTF_L4(cmlog, + "ibcm_arp_get_srcip_plist: GID %llX:%llX", + plistp->p_sgid.gid_prefix, + plistp->p_sgid.gid_guid); + if (ipp->ip_inet_family == AF_INET) { + plistp->p_src_ip.family = AF_INET; + bcopy(&ipp->ip_cm_sin.sin_addr, + &plistp->p_src_ip.un.ip4addr, + sizeof (in_addr_t)); + + IBTF_DPRINTF_L4(cmlog, + "ibcm_arp_get_srcip_plist: SrcIP: " + "%lX", plistp->p_src_ip.un.ip4addr); + } else if (ipp->ip_inet_family == AF_INET6) { + plistp->p_src_ip.family = AF_INET6; + bcopy(&ipp->ip_cm_sin6.sin6_addr, + &plistp->p_src_ip.un.ip6addr, + sizeof (in6_addr_t)); + } + } + } + } + +srcip_plist_end: + if (ibds.ibcm_arp_ip) + kmem_free(ibds.ibcm_arp_ip, ibds.ibcm_arp_ibd_alloc * + sizeof (ibcm_arp_ip_t)); + + return (ret); +} +/* Routines for warlock */ + +/* ARGSUSED */ +static int +ibcm_arp_dummy_ibaddr_hdl(void *arg, int status) +{ + ibcm_arp_prwqn_t dummy_wqn1; + ibcm_arp_prwqn_t dummy_wqn2; + + dummy_wqn1.func = ibcm_arp_get_ibaddr_cb; + dummy_wqn2.func = ibcm_arp_dummy_ibaddr_hdl; + + IBTF_DPRINTF_L5(cmlog, "ibcm_arp_dummy_ibaddr_hdl: " + "dummy_wqn1.func %p %p", dummy_wqn1.func, dummy_wqn2.func); + + return (0); +} diff --git a/usr/src/uts/common/io/ib/clients/rds/rdsib_arp_link.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c index e883d2b347..640133b33a 100644 --- a/usr/src/uts/common/io/ib/clients/rds/rdsib_arp_link.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c @@ -54,18 +54,10 @@ #include <inet/ip_ire.h> #include <inet/ip_rts.h> #include <inet/ip_if.h> -#include <sys/ib/clients/ibd/ibd.h> -#include <sys/ib/clients/rds/rdsib_arp.h> -#include <sys/ib/clients/rds/rdsib_debug.h> +#include <sys/ib/mgt/ibcm/ibcm_arp.h> #include <inet/ip_ftable.h> -extern int rds_pr_cache; - -#define RDS_RTM_LEN 0x158 -#define RDS_ARP_XMIT_COUNT 6 -#define RDS_ARP_XMIT_INTERVAL 1000 - -static areq_t rds_areq_template = { +static areq_t ibcm_arp_areq_template = { AR_ENTRY_QUERY, /* cmd */ sizeof (areq_t) + (2 * IP_ADDR_LEN), /* name offset */ sizeof (areq_t), /* name len */ @@ -75,15 +67,15 @@ static areq_t rds_areq_template = { 0, /* flags */ sizeof (areq_t) + IP_ADDR_LEN, /* sender addr offset */ IP_ADDR_LEN, /* sender addr length */ - RDS_ARP_XMIT_COUNT, /* xmit_count */ - RDS_ARP_XMIT_INTERVAL, /* (re)xmit_interval in milliseconds */ + IBCM_ARP_XMIT_COUNT, /* xmit_count */ + IBCM_ARP_XMIT_INTERVAL, /* (re)xmit_interval in milliseconds */ 4 /* max # of requests to buffer */ /* * anything else filled in by the code */ }; -static area_t rds_area_template = { +static area_t ibcm_arp_area_template = { AR_ENTRY_ADD, /* cmd */ sizeof (area_t) + IPOIB_ADDRL + (2 * IP_ADDR_LEN), /* name offset */ sizeof (area_t), /* name len */ @@ -96,19 +88,20 @@ static area_t rds_area_template = { IPOIB_ADDRL /* hw addr length */ }; -static void rds_arp_timeout(void *arg); -#ifdef DEBUG -void print_ib_mac(char *str, uint8_t *mac); -void print_ib_gid(char *str, uint8_t *mac); -#endif -extern int rds_get_hca_info(rds_prwqn_t *wqnp); -void rds_pr_callback(rds_prwqn_t *wqnp, int status); +extern char cmlog[]; + +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", msgb)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", area_t)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t)) + +static void ibcm_arp_timeout(void *arg); +void ibcm_arp_pr_callback(ibcm_arp_prwqn_t *wqnp, int status); /* * issue a AR_ENTRY_QUERY to arp driver and schedule a timeout. */ int -rds_query_arp(rds_prwqn_t *wqnp) +ibcm_arp_query_arp(ibcm_arp_prwqn_t *wqnp) { int len; int name_len; @@ -117,12 +110,12 @@ rds_query_arp(rds_prwqn_t *wqnp) mblk_t *mp; mblk_t *mp1; areq_t *areqp; - rds_streams_t *rdss = (rds_streams_t *)wqnp->arg; + ibcm_arp_streams_t *ib_s = (ibcm_arp_streams_t *)wqnp->arg; - RDS_DPRINTF4("rds_query_arp", "Enter: rdss: 0x%p wqnp: 0x%p", rdss, - wqnp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_query_arp(ib_s: %p wqnp: %p)", + ib_s, wqnp); - name_offset = rds_areq_template.areq_name_offset; + name_offset = ibcm_arp_areq_template.areq_name_offset; /* * allocate mblk for AR_ENTRY_QUERY @@ -148,7 +141,7 @@ rds_query_arp(rds_prwqn_t *wqnp) *(uintptr_t *)mp1->b_rptr = (uintptr_t)wqnp; /* store wqnp */ cp = (char *)mp->b_rptr; - bcopy(&rds_areq_template, cp, sizeof (areq_t)); + bcopy(&ibcm_arp_areq_template, cp, sizeof (areq_t)); /* LINTED */ areqp = (areq_t *)cp; areqp->areq_name_length = name_len; @@ -170,17 +163,16 @@ rds_query_arp(rds_prwqn_t *wqnp) /* * issue the request to arp */ - wqnp->flags |= RDS_PR_ARP_PENDING; - wqnp->timeout_id = timeout(rds_arp_timeout, wqnp, - drv_usectohz(RDS_ARP_TIMEOUT * 1000)); - if (canputnext(rdss->arpqueue)) { - putnext(rdss->arpqueue, mp); + wqnp->flags |= IBCM_ARP_PR_ARP_PENDING; + wqnp->timeout_id = timeout(ibcm_arp_timeout, wqnp, + drv_usectohz(IBCM_ARP_TIMEOUT * 1000)); + if (canputnext(ib_s->arpqueue)) { + putnext(ib_s->arpqueue, mp); } else { - (void) putq(rdss->arpqueue, mp); - qenable(rdss->arpqueue); + (void) putq(ib_s->arpqueue, mp); + qenable(ib_s->arpqueue); } - RDS_DPRINTF4("rds_query_arp", "Return: 0x%p", wqnp); return (0); } @@ -188,7 +180,7 @@ rds_query_arp(rds_prwqn_t *wqnp) * issue AR_ENTRY_SQUERY to arp driver */ int -rds_squery_arp(rds_prwqn_t *wqnp) +ibcm_arp_squery_arp(ibcm_arp_prwqn_t *wqnp) { int len; int name_len; @@ -198,17 +190,17 @@ rds_squery_arp(rds_prwqn_t *wqnp) area_t *areap; uint32_t proto_mask = 0xffffffff; struct iocblk *ioc; - rds_streams_t *rdss = (rds_streams_t *)wqnp->arg; + ibcm_arp_streams_t *ib_s = (ibcm_arp_streams_t *)wqnp->arg; - RDS_DPRINTF4("rds_squery_arp", "Enter: rdss: 0x%p wqnp: 0x%p", rdss, - wqnp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_squery_arp(ib_s: %p wqnp: %p)", + ib_s, wqnp); /* * allocate mblk for AR_ENTRY_SQUERY */ name_len = strlen(wqnp->ifname) + 1; - len = rds_area_template.area_name_offset + - name_len + sizeof (uintptr_t); + len = ibcm_arp_area_template.area_name_offset + name_len + + sizeof (uintptr_t); if ((mp = allocb(len, BPRI_HI)) == NULL) { return (ENOMEM); } @@ -221,7 +213,7 @@ rds_squery_arp(rds_prwqn_t *wqnp) cp = (char *)mp->b_rptr; - bcopy(&rds_area_template, cp, sizeof (area_t)); + bcopy(&ibcm_arp_area_template, cp, sizeof (area_t)); /* LINTED */ areap = (area_t *)cp; @@ -252,13 +244,12 @@ rds_squery_arp(rds_prwqn_t *wqnp) DB_TYPE(mp1) = M_IOCTL; - if (canputnext(rdss->arpqueue)) { - putnext(rdss->arpqueue, mp1); + if (canputnext(ib_s->arpqueue)) { + putnext(ib_s->arpqueue, mp1); } else { - (void) putq(rdss->arpqueue, mp1); - qenable(rdss->arpqueue); + (void) putq(ib_s->arpqueue, mp1); + qenable(ib_s->arpqueue); } - RDS_DPRINTF4("rds_squery_arp", "Return: 0x%p", wqnp); return (0); } @@ -267,7 +258,7 @@ rds_squery_arp(rds_prwqn_t *wqnp) * This is required as arp driver does not maintain a cache. */ int -rds_arp_add(rds_prwqn_t *wqnp) +ibcm_arp_add(ibcm_arp_prwqn_t *wqnp) { int len; int name_len; @@ -275,17 +266,16 @@ rds_arp_add(rds_prwqn_t *wqnp) mblk_t *mp; area_t *areap; uint32_t proto_mask = 0xffffffff; - rds_streams_t *rdss = (rds_streams_t *)wqnp->arg; + ibcm_arp_streams_t *ib_s = (ibcm_arp_streams_t *)wqnp->arg; - RDS_DPRINTF4("rds_arp_add", "Enter: rdss: 0x%p wqnp: 0x%p", rdss, - wqnp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_add(ib_s: %p wqnp: %p)", ib_s, wqnp); /* * allocate mblk for AR_ENTRY_ADD */ name_len = strlen(wqnp->ifname) + 1; - len = rds_area_template.area_name_offset + name_len; + len = ibcm_arp_area_template.area_name_offset + name_len; if ((mp = allocb(len, BPRI_HI)) == NULL) { return (ENOMEM); } @@ -293,7 +283,7 @@ rds_arp_add(rds_prwqn_t *wqnp) mp->b_wptr += len; cp = (char *)mp->b_rptr; - bcopy(&rds_area_template, cp, sizeof (area_t)); + bcopy(&ibcm_arp_area_template, cp, sizeof (area_t)); /* LINTED */ areap = (area_t *)mp->b_rptr; @@ -312,13 +302,12 @@ rds_arp_add(rds_prwqn_t *wqnp) DB_TYPE(mp) = M_PROTO; - if (canputnext(rdss->arpqueue)) { - putnext(rdss->arpqueue, mp); + if (canputnext(ib_s->arpqueue)) { + putnext(ib_s->arpqueue, mp); } else { - (void) putq(rdss->arpqueue, mp); - qenable(rdss->arpqueue); + (void) putq(ib_s->arpqueue, mp); + qenable(ib_s->arpqueue); } - RDS_DPRINTF4("rds_arp_add", "Return: 0x%p", wqnp); return (0); } @@ -327,20 +316,18 @@ rds_arp_add(rds_prwqn_t *wqnp) * timeout routine when there is no response to AR_ENTRY_QUERY */ static void -rds_arp_timeout(void *arg) +ibcm_arp_timeout(void *arg) { - rds_prwqn_t *wqnp = (rds_prwqn_t *)arg; - rds_streams_t *rdss = (rds_streams_t *)wqnp->arg; + ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg; + ibcm_arp_streams_t *ib_s = (ibcm_arp_streams_t *)wqnp->arg; - RDS_DPRINTF4("rds_arp_timeout", "Enter: rdss: 0x%p wqnp: 0x%p", rdss, - wqnp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_timeout(ib_s: %p wqnp: %p)", + ib_s, wqnp); /* * indicate to user */ - rds_pr_callback(wqnp, EHOSTUNREACH); - - RDS_DPRINTF4("rds_arp_timeout", "Return: 0x%p", wqnp); + ibcm_arp_pr_callback(wqnp, EHOSTUNREACH); } /* @@ -348,51 +335,50 @@ rds_arp_timeout(void *arg) * assumes mutex is acquired */ void -rds_prwqn_delete(rds_prwqn_t *wqnp) +ibcm_arp_prwqn_delete(ibcm_arp_prwqn_t *wqnp) { - rds_streams_t *rdss; - - RDS_DPRINTF4("rds_prwqn_delete", "Enter: 0x%p", wqnp); + ibcm_arp_streams_t *ib_s; - rdss = (rds_streams_t *)wqnp->arg; - rdss->wqnp = NULL; - kmem_free(wqnp, sizeof (rds_prwqn_t)); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_prwqn_delete(%p)", wqnp); - RDS_DPRINTF4("rds_prwqn_delete", "Return: 0x%p", wqnp); + ib_s = (ibcm_arp_streams_t *)wqnp->arg; + ib_s->wqnp = NULL; + kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t)); } /* * allocate a wait queue node, and insert it in the list */ -rds_prwqn_t * -rds_create_prwqn(rds_streams_t *rdss, rds_ipx_addr_t *dst_addr, - rds_ipx_addr_t *src_addr, uint32_t localroute, uint32_t bound_dev_if, - rds_pr_comp_func_t func) +ibcm_arp_prwqn_t * +ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, + ibt_ip_addr_t *src_addr, uint32_t localroute, uint32_t bound_dev_if, + ibcm_arp_pr_comp_func_t func) { - rds_prwqn_t *wqnp; + ibcm_arp_prwqn_t *wqnp; - RDS_DPRINTF4("rds_create_prwqn", "Enter: rdss: 0x%p", rdss); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s); if (dst_addr == NULL) { return (NULL); } - if ((wqnp = kmem_zalloc(sizeof (rds_prwqn_t), KM_NOSLEEP)) == NULL) { + if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) == + NULL) { return (NULL); } + wqnp->dst_addr = *dst_addr; if (src_addr) { wqnp->usrc_addr = *src_addr; } - wqnp->dst_addr = *dst_addr; wqnp->func = func; - wqnp->arg = rdss; + wqnp->arg = ib_s; wqnp->localroute = localroute; wqnp->bound_dev_if = bound_dev_if; wqnp->ifproto = ETHERTYPE_IP; - rdss->wqnp = wqnp; + ib_s->wqnp = wqnp; - RDS_DPRINTF4("rds_create_prwqn", "Return: wqnp: 0x%p", wqnp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp); return (wqnp); } @@ -402,17 +388,15 @@ rds_create_prwqn(rds_streams_t *rdss, rds_ipx_addr_t *dst_addr, * called with lock held */ void -rds_pr_callback(rds_prwqn_t *wqnp, int status) +ibcm_arp_pr_callback(ibcm_arp_prwqn_t *wqnp, int status) { - RDS_DPRINTF4("rds_pr_callback", "Enter: 0x%p", wqnp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_callback(%p, %d)", wqnp, status); wqnp->func((void *)wqnp, status); - - RDS_DPRINTF4("rds_pr_callback", "Return: 0x%p", wqnp); } static int -rds_check_interface(rds_prwqn_t *wqnp, int length) +ibcm_arp_check_interface(ibcm_arp_prwqn_t *wqnp, int length) { /* * if the i/f is not ib or lo device, fail the request @@ -428,12 +412,14 @@ rds_check_interface(rds_prwqn_t *wqnp, int length) return (0); } +#define IBTL_IPV4_ADDR(a) (a->un.ip4addr) + int -rds_pr_lookup(rds_streams_t *rdss, rds_ipx_addr_t *dst_addr, - rds_ipx_addr_t *src_addr, uint8_t localroute, uint32_t bound_dev_if, - rds_pr_comp_func_t func) +ibcm_arp_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, + ibt_ip_addr_t *src_addr, uint8_t localroute, uint32_t bound_dev_if, + ibcm_arp_pr_comp_func_t func) { - rds_prwqn_t *wqnp; + ibcm_arp_prwqn_t *wqnp; ire_t *ire; ire_t *src_ire; ipif_t *ipif; @@ -441,19 +427,19 @@ rds_pr_lookup(rds_streams_t *rdss, rds_ipx_addr_t *dst_addr, int length; ip_stack_t *ipst; - - - RDS_DPRINTF4("rds_pr_lookup", "Enter: src 0x%x dest 0x%x", src_addr, - dst_addr); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_lookup(src %p dest %p)", + src_addr, dst_addr); if (dst_addr->family != AF_INET_OFFLOAD) { - rdss->status = EAFNOSUPPORT; + ib_s->status = EAFNOSUPPORT; return (1); } - if ((wqnp = rds_create_prwqn(rdss, dst_addr, - src_addr, localroute, bound_dev_if, func)) == NULL) { - rdss->status = ENOMEM; + if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, + src_addr, localroute, bound_dev_if, func)) == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: " + "ibcm_arp_create_prwqn failed"); + ib_s->status = ENOMEM; return (1); } @@ -461,15 +447,17 @@ rds_pr_lookup(rds_streams_t *rdss, rds_ipx_addr_t *dst_addr, /* * Get the ire for the local address */ + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_lookup: srcip %lX destip %lX", + IBTL_IPV4_ADDR(src_addr), IBTL_IPV4_ADDR(dst_addr)); - src_ire = ire_ctable_lookup(RDS_IPV4_ADDR(src_addr), NULL, + src_ire = ire_ctable_lookup(IBTL_IPV4_ADDR(src_addr), NULL, IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst); - - if (src_ire == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: " + "ire_ctable_lookup failed"); netstack_rele(ipst->ips_netstack); - rds_prwqn_delete(wqnp); - rdss->status = EFAULT; + ibcm_arp_prwqn_delete(wqnp); + ib_s->status = EFAULT; return (1); } @@ -478,16 +466,18 @@ rds_pr_lookup(rds_streams_t *rdss, rds_ipx_addr_t *dst_addr, * get an ire for the destination adress with the matching source * address */ - ire = ire_ftable_lookup(RDS_IPV4_ADDR(dst_addr), 0, 0, 0, + ire = ire_ftable_lookup(IBTL_IPV4_ADDR(dst_addr), 0, 0, 0, src_ire->ire_ipif, 0, src_ire->ire_zoneid, 0, NULL, MATCH_IRE_SRC, ipst); netstack_rele(ipst->ips_netstack); if (ire == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: " + "ire_ftable_lookup failed"); IRE_REFRELE(src_ire); - rds_prwqn_delete(wqnp); - rdss->status = EFAULT; + ibcm_arp_prwqn_delete(wqnp); + ib_s->status = EFAULT; return (1); } @@ -505,24 +495,29 @@ rds_pr_lookup(rds_streams_t *rdss, rds_ipx_addr_t *dst_addr, IRE_REFRELE(ire); IRE_REFRELE(src_ire); - rdss->status = rds_check_interface(wqnp, ill->ill_phys_addr_length); - if (rdss->status) { - rds_prwqn_delete(wqnp); + ib_s->status = + ibcm_arp_check_interface(wqnp, ill->ill_phys_addr_length); + if (ib_s->status) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: " + "ibcm_arp_check_interface failed"); + ibcm_arp_prwqn_delete(wqnp); return (1); } - rdss->status = rds_squery_arp(wqnp); - if (rdss->status) { - rds_prwqn_delete(wqnp); + ib_s->status = ibcm_arp_squery_arp(wqnp); + if (ib_s->status) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: " + "ibcm_arp_squery_arp failed"); + ibcm_arp_prwqn_delete(wqnp); return (1); } - RDS_DPRINTF4("rds_pr_lookup", "Return: 0x%p", wqnp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_lookup: Return: 0x%p", wqnp); return (0); } -#define H2N_GID(gid) \ +#define IBCM_H2N_GID(gid) \ { \ uint32_t *ptr; \ ptr = (uint32_t *)&gid.gid_prefix; \ @@ -539,15 +534,15 @@ rds_pr_lookup(rds_streams_t *rdss, rds_ipx_addr_t *dst_addr, * the message should be M_DATA -->> dl_unitdata_req */ void -rds_pr_arp_query_ack(mblk_t *mp) +ibcm_arp_pr_arp_query_ack(mblk_t *mp) { - rds_prwqn_t *wqnp; + ibcm_arp_prwqn_t *wqnp; dl_unitdata_req_t *dlreq; - rds_streams_t *rdss; + ibcm_arp_streams_t *ib_s; char *cp; int rc; - RDS_DPRINTF4("rds_pr_arp_query_ack", "Enter: 0x%p", mp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_arp_query_ack(%p)", mp); /* * the first mblk contains the wqnp pointer for the request @@ -559,10 +554,10 @@ rds_pr_arp_query_ack(mblk_t *mp) } /* LINTED */ - wqnp = *(rds_prwqn_t **)mp->b_rptr; /* retrieve wqnp */ - rdss = (rds_streams_t *)wqnp->arg; + wqnp = *(ibcm_arp_prwqn_t **)mp->b_rptr; /* retrieve wqnp */ + ib_s = (ibcm_arp_streams_t *)wqnp->arg; - mutex_enter(&rdss->lock); + mutex_enter(&ib_s->lock); /* * cancel the timeout for this request @@ -573,29 +568,28 @@ rds_pr_arp_query_ack(mblk_t *mp) * sanity checks on the dl_unitdata_req block */ if (!mp->b_cont) { - RDS_DPRINTF1(LABEL, "areq_ack: b_cont = NULL\n"); + IBTF_DPRINTF_L2(cmlog, "areq_ack: b_cont = NULL\n"); rc = EPROTO; goto user_callback; } /* LINTED */ if (MBLKL(mp->b_cont) < (sizeof (dl_unitdata_req_t) + IPOIB_ADDRL)) { - RDS_DPRINTF1(LABEL, "areq_ack: invalid len in " - "dl_unitdatareq_t block\n"); + IBTF_DPRINTF_L2(cmlog, "areq_ack: invalid len in " + "dl_unitdatareq_t block\n"); rc = EPROTO; goto user_callback; } /* LINTED */ dlreq = (dl_unitdata_req_t *)mp->b_cont->b_rptr; if (dlreq->dl_primitive != DL_UNITDATA_REQ) { - RDS_DPRINTF1(LABEL, "areq_ack: invalid dl_primitive in " - "dl_unitdatareq_t block\n"); + IBTF_DPRINTF_L2(cmlog, "areq_ack: invalid dl_primitive " + "in dl_unitdatareq_t block\n"); rc = EPROTO; goto user_callback; } if (dlreq->dl_dest_addr_length != (IPOIB_ADDRL + 2)) { - RDS_DPRINTF1(LABEL, "areq_ack: invalid hw len in " - "dl_unitdatareq_t block %d\n", - dlreq->dl_dest_addr_length); + IBTF_DPRINTF_L2(cmlog, "areq_ack: invalid hw len in " + "dl_unitdatareq_t block %d\n", dlreq->dl_dest_addr_length); rc = EPROTO; goto user_callback; } @@ -610,25 +604,23 @@ rds_pr_arp_query_ack(mblk_t *mp) bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t)); freemsg(mp); - H2N_GID(wqnp->sgid); - H2N_GID(wqnp->dgid); - - (void) rds_arp_add(wqnp); + IBCM_H2N_GID(wqnp->sgid); + IBCM_H2N_GID(wqnp->dgid); - mutex_exit(&rdss->lock); - rds_pr_callback(wqnp, 0); + (void) ibcm_arp_add(wqnp); - RDS_DPRINTF4("rds_pr_arp_query_ack", "Return: 0x%p", mp); + mutex_exit(&ib_s->lock); + ibcm_arp_pr_callback(wqnp, 0); return; user_callback: freemsg(mp); - mutex_exit(&rdss->lock); + mutex_exit(&ib_s->lock); /* * indicate to user */ - rds_pr_callback(wqnp, rc); + ibcm_arp_pr_callback(wqnp, rc); } /* @@ -636,16 +628,16 @@ user_callback: * the message should be M_IOCACK -->> area_t */ void -rds_pr_arp_squery_ack(mblk_t *mp) +ibcm_arp_pr_arp_squery_ack(mblk_t *mp) { struct iocblk *ioc; mblk_t *mp1; - rds_prwqn_t *wqnp; - rds_streams_t *rdss; + ibcm_arp_prwqn_t *wqnp; + ibcm_arp_streams_t *ib_s; area_t *areap; char *cp; - RDS_DPRINTF4("rds_pr_arp_squery_ack", "Enter: 0x%p", mp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_arp_squery_ack(%p)", mp); /* LINTED */ if (MBLKL(mp) < sizeof (struct iocblk)) { @@ -662,10 +654,11 @@ rds_pr_arp_squery_ack(mblk_t *mp) mp1 = mp->b_cont; - wqnp = *(rds_prwqn_t **)((uintptr_t)mp1->b_rptr - sizeof (uintptr_t)); - rdss = (rds_streams_t *)wqnp->arg; + wqnp = *(ibcm_arp_prwqn_t **)((uintptr_t)mp1->b_rptr - + sizeof (uintptr_t)); + ib_s = (ibcm_arp_streams_t *)wqnp->arg; - mutex_enter(&rdss->lock); + mutex_enter(&ib_s->lock); /* If the entry was not in arp cache, ioc_error is set */ if (ioc->ioc_error) { @@ -674,11 +667,11 @@ rds_pr_arp_squery_ack(mblk_t *mp) * send out AR_ENTRY_QUERY which would send * arp-request on wire */ - RDS_DPRINTF3(LABEL, "Sending a Query_ARP"); + IBTF_DPRINTF_L3(cmlog, "Sending a Query_ARP"); - (void) rds_query_arp(wqnp); + (void) ibcm_arp_query_arp(wqnp); freemsg(mp); - mutex_exit(&rdss->lock); + mutex_exit(&ib_s->lock); return; } @@ -695,71 +688,28 @@ rds_pr_arp_squery_ack(mblk_t *mp) bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t)); freemsg(mp); - H2N_GID(wqnp->sgid); - H2N_GID(wqnp->dgid); - - mutex_exit(&rdss->lock); - rds_pr_callback(wqnp, 0); + IBCM_H2N_GID(wqnp->sgid); + IBCM_H2N_GID(wqnp->dgid); - RDS_DPRINTF4("rds_pr_arp_squery_ack", "Return: 0x%p", mp); + mutex_exit(&ib_s->lock); + ibcm_arp_pr_callback(wqnp, 0); } /* * Process arp ack's. */ void -rds_pr_arp_ack(mblk_t *mp) +ibcm_arp_pr_arp_ack(mblk_t *mp) { - RDS_DPRINTF4("rds_pr_arp_ack", "Enter: 0x%p", mp); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_arp_ack(0x%p, DB_TYPE %lX)", + mp, DB_TYPE(mp)); if (DB_TYPE(mp) == M_DATA) { - rds_pr_arp_query_ack(mp); + ibcm_arp_pr_arp_query_ack(mp); } else if ((DB_TYPE(mp) == M_IOCACK) || (DB_TYPE(mp) == M_IOCNAK)) { - rds_pr_arp_squery_ack(mp); + ibcm_arp_pr_arp_squery_ack(mp); } else { freemsg(mp); } - - RDS_DPRINTF4("rds_pr_arp_ack", "Return: 0x%p", mp); -} - -#ifdef DEBUG -void -print_ib_mac(char *str, uint8_t *mac) -{ - cmn_err(CE_CONT, "%s:" - "%02x:" "%02x:" "%02x:" "%02x:" "%02x:" - "%02x:" "%02x:" "%02x:" "%02x:" "%02x:" - "%02x:" "%02x:" "%02x:" "%02x:" "%02x:" - "%02x:" "%02x:" "%02x:" "%02x:" "%02x:\n", - str, - mac[0] & 0xff, mac[1] & 0xff, - mac[2] & 0xff, mac[3] & 0xff, - mac[4] & 0xff, mac[5] & 0xff, - mac[6] & 0xff, mac[7] & 0xff, - mac[8] & 0xff, mac[9] & 0xff, - mac[10] & 0xff, mac[11] & 0xff, - mac[12] & 0xff, mac[13] & 0xff, - mac[14] & 0xff, mac[15] & 0xff, - mac[16] & 0xff, mac[17] & 0xff, mac[18] & 0xff, mac[19] & 0xff); -} - -void -print_ib_gid(char *str, uint8_t *mac) -{ - cmn_err(CE_CONT, "%s:" - "%02x:" "%02x:" "%02x:" "%02x:" "%02x:" - "%02x:" "%02x:" "%02x:" "%02x:" "%02x:" - "%02x:" "%02x:" "%02x:" "%02x:" "%02x:" - "%02x:\n", - str, - mac[0] & 0xff, mac[1] & 0xff, - mac[2] & 0xff, mac[3] & 0xff, - mac[4] & 0xff, mac[5] & 0xff, - mac[6] & 0xff, mac[7] & 0xff, - mac[8] & 0xff, mac[9] & 0xff, - mac[10] & 0xff, mac[11] & 0xff, - mac[12] & 0xff, mac[13] & 0xff, mac[14] & 0xff, mac[15] & 0xff); } -#endif diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c index 2f24dc660d..798403c044 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c @@ -26,7 +26,7 @@ #pragma ident "%Z%%M% %I% %E% SMI" #include <sys/ib/mgt/ibcm/ibcm_impl.h> -#include <sys/ib/ibtl/ibti.h> +#include <sys/ib/mgt/ibcm/ibcm_arp.h> /* * ibcm_path.c @@ -34,9 +34,6 @@ * ibt_get_paths() implement the Path Informations related functionality. */ -/* Externs. */ -extern char cmlog[]; - /* ibcm_saa_service_rec() fills in ServiceID and DGID. */ typedef struct ibcm_dest_s { ib_gid_t d_gid; @@ -105,7 +102,6 @@ static ibt_status_t ibcm_process_get_paths(void *tq_arg); static ibt_status_t ibcm_get_comp_pgids(ib_gid_t, ib_gid_t, ib_guid_t, ib_gid_t **, uint_t *); - /* * Function: * ibt_aget_paths @@ -219,7 +215,7 @@ ibcm_path_cache_init(void) timeout_in_hz = drv_usectohz(ibcm_path_cache_timeout * 1000000); path_cachep = kmem_zalloc(cache_size * sizeof (*path_cachep), KM_SLEEP); - mutex_init(&ibcm_path_cache_mutex, NULL, MUTEX_DRIVER, NULL); + mutex_init(&ibcm_path_cache_mutex, NULL, MUTEX_DEFAULT, NULL); mutex_enter(&ibcm_path_cache_mutex); ibcm_path_cache_size = cache_size; ibcm_path_cachep = path_cachep; @@ -754,105 +750,19 @@ ibcm_process_get_paths(void *tq_arg) _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*p_arg)) - /* Checkout whether user has specified SGID. */ - if (p_arg->attr.pa_sgid.gid_prefix && p_arg->attr.pa_sgid.gid_guid) { - ibtl_cm_hca_port_t hport; - - IBTF_DPRINTF_L3(cmlog, "ibcm_process_get_paths: SGID %llX:%llX", - p_arg->attr.pa_sgid.gid_prefix, - p_arg->attr.pa_sgid.gid_guid); - - /* For the specified SGID, get HCA information. */ - retval = ibtl_cm_get_hca_port(p_arg->attr.pa_sgid, - p_arg->attr.pa_hca_guid, &hport); - if (retval != IBT_SUCCESS) { - IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_paths: " - "Get HCA Port Failed: %d", retval); - goto path_error; - } - - if ((p_arg->attr.pa_hca_port_num != 0) && - (p_arg->attr.pa_hca_port_num != hport.hp_port)) { - IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_paths: " - "Mis-match input HCA PortNum v/s SGID"); - retval = IBT_HCA_PORT_INVALID; - goto path_error; - } - - /* - * If a specific MTU is desired, then first check out whether - * this source port is capable of this MTU. - */ - if (p_arg->attr.pa_mtu.r_mtu) { - if ((p_arg->attr.pa_mtu.r_selector == IBT_GT) && - (p_arg->attr.pa_mtu.r_mtu >= hport.hp_mtu)) { - - IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_paths:" - " Required MTU not available on this Port. " - "Requested IBT_GT 0x%x but avail 0x%x", - p_arg->attr.pa_mtu.r_mtu, hport.hp_mtu); - - retval = IBT_INVALID_PARAM; - goto path_error; - } else if ((p_arg->attr.pa_mtu.r_selector == IBT_EQU) && - (p_arg->attr.pa_mtu.r_mtu > hport.hp_mtu)) { - IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_paths:" - " Required MTU not available on this Port " - "Requested IBT_EQU to 0x%x but avail 0x%x", - p_arg->attr.pa_mtu.r_mtu, hport.hp_mtu); - - retval = IBT_INVALID_PARAM; - goto path_error; - } - } - - if (p_arg->flags & IBT_PATH_APM) { - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*p_arg)) - p_arg->attr.pa_hca_guid = hport.hp_hca_guid; - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*p_arg)) + /* + * Get list of active HCA<->Port list, that matches input specified attr + */ + IBTF_DPRINTF_L3(cmlog, "ibcm_process_get_paths: Get Paths from \n HCA " + "(%llX:%d), SGID %llX:%llX", p_arg->attr.pa_hca_guid, + p_arg->attr.pa_hca_port_num, p_arg->attr.pa_sgid.gid_prefix, + p_arg->attr.pa_sgid.gid_guid); - retval = ibtl_cm_get_active_plist(&p_arg->attr, - p_arg->flags, &slistp); - if (retval != IBT_SUCCESS) { - IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_paths:" - " ibtl_cm_get_active_plist returned error " - " %d", retval); - goto path_error; - } - } else { - slistp = kmem_zalloc(sizeof (ibtl_cm_port_list_t), - KM_SLEEP); - - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*slistp)) - slistp->p_hca_guid = hport.hp_hca_guid; - slistp->p_mtu = hport.hp_mtu; - slistp->p_port_num = hport.hp_port; - slistp->p_base_lid = hport.hp_base_lid; - slistp->p_sgid = p_arg->attr.pa_sgid; - slistp->p_sgid_ix = hport.hp_sgid_ix; - slistp->p_count = 1; - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*slistp)) - } - } else { - /* Source GID is not specified, but so let's find them. */ - if (p_arg->attr.pa_hca_guid) - IBTF_DPRINTF_L3(cmlog, "ibcm_process_get_paths: Get " - "Paths from HCA (%llX)", p_arg->attr.pa_hca_guid); - else - IBTF_DPRINTF_L3(cmlog, "ibcm_process_get_paths: SRC " - "Point not specified, flags(%X)", p_arg->flags); - /* - * Get list of active HCA<->Port list, that matches input - * specified attr. - */ - retval = ibtl_cm_get_active_plist(&p_arg->attr, p_arg->flags, - &slistp); - if (retval != IBT_SUCCESS) { - IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_paths: " - "HCA capable of requested source attributes NOT " - "available."); - goto path_error; - } + retval = ibtl_cm_get_active_plist(&p_arg->attr, p_arg->flags, &slistp); + if (retval != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_paths: HCA capable of " + "requested source attributes NOT available."); + goto path_error; } IBTF_DPRINTF_L3(cmlog, "ibcm_process_get_paths: HCA (%llX, %d)", @@ -1392,8 +1302,8 @@ ibcm_get_single_pathrec(ibcm_path_tqargs_t *p_arg, ibtl_cm_port_list_t *sl, } IBTF_DPRINTF_L3(cmlog, "ibcm_get_single_pathrec: " - "Get %d Path(s) between\n\tSGID(%llX:%llX) " - "DGID(%llX:%llX)", pathrec_req.NumbPath, + "Get %d Path(s) between\nSGID %llX:%llX " + "DGID %llX:%llX", pathrec_req.NumbPath, pathrec_req.SGID.gid_prefix, pathrec_req.SGID.gid_guid, pathrec_req.DGID.gid_prefix, @@ -2023,8 +1933,8 @@ ibcm_fillin_loopbackinfo(ibtl_cm_port_list_t *sl, uint8_t index, paths->pi_prim_cep_path.cep_adds_vect.av_dlid = sl->p_base_lid; paths->pi_prim_cep_path.cep_adds_vect.av_src_path = 0; paths->pi_prim_cep_path.cep_adds_vect.av_sgid_ix = sl->p_sgid_ix; - paths->pi_prim_cep_path.cep_adds_vect.av_port_num = - paths->pi_prim_cep_path.cep_hca_port_num = sl->p_port_num; + paths->pi_prim_cep_path.cep_adds_vect.av_port_num = sl->p_port_num; + paths->pi_prim_cep_path.cep_hca_port_num = sl->p_port_num; paths->pi_prim_cep_path.cep_timeout = 0; /* To be filled in by CM. */ paths->pi_path_mtu = sl->p_mtu; /* MTU */ paths->pi_prim_pkt_lt = 0; /* Packet Life Time. */ @@ -2471,8 +2381,8 @@ ibcm_saa_service_rec(ibcm_path_tqargs_t *p_arg, ibtl_cm_port_list_t *sl, /* Rec not found for Alt. */ for (j = 0; j < n_gids; j++) { if (gidp[j].gid_prefix - == - p_gid.gid_prefix) { + == p_gid. + gid_prefix) { a_gid = gidp[j]; break; } @@ -3211,6 +3121,1804 @@ get_alt_path_done: } + +/* + * IP Path API + */ + +typedef struct ibcm_ip_path_tqargs_s { + ibt_ip_path_attr_t attr; + ibt_path_info_t *paths; + ibt_path_ip_src_t *src_ip_p; + uint8_t *num_paths_p; + ibt_ip_path_handler_t func; + void *arg; + ibt_path_flags_t flags; + ibt_clnt_hdl_t ibt_hdl; + kmutex_t ip_lock; + kcondvar_t ip_cv; + ibt_status_t retval; + uint_t len; +} ibcm_ip_path_tqargs_t; + +typedef struct ibcm_ip_dest_s { + ib_gid_t d_gid; + uint_t d_tag; /* 0 = Unicast, 2 = LoopBack */ + ibt_ip_addr_t d_ip; +} ibcm_ip_dest_t; + +/* Holds destination information needed to fill in ibt_path_info_t. */ +typedef struct ibcm_ip_dinfo_s { + uint8_t num_dest; + ibcm_ip_dest_t dest[1]; +} ibcm_ip_dinfo_t; + +_NOTE(SCHEME_PROTECTS_DATA("Temporary path storage", ibcm_ip_dinfo_s)) + +/* Prototype Declarations. */ +static void ibcm_process_get_ip_paths(void *tq_arg); +static ibt_status_t ibcm_get_ip_spr(ibcm_ip_path_tqargs_t *, + ibtl_cm_port_list_t *, ibcm_ip_dinfo_t *, uint8_t *, ibt_path_info_t *); +static ibt_status_t ibcm_get_ip_mpr(ibcm_ip_path_tqargs_t *, + ibtl_cm_port_list_t *, ibcm_ip_dinfo_t *dinfo, + uint8_t *, ibt_path_info_t *); +static ibt_status_t ibcm_fillin_ip_lbpr(ibtl_cm_port_list_t *, uint8_t index, + ibcm_ip_dinfo_t *, ibt_path_info_t *); + +/* + * Perform SA Access to retrieve Path Records. + */ +static ibt_status_t +ibcm_saa_ip_pr(ibcm_ip_path_tqargs_t *p_arg, ibtl_cm_port_list_t *sl, + ibcm_ip_dinfo_t *dinfo, uint8_t *max_count) +{ + uint8_t num_path = *max_count; + uint8_t num_path_plus; + uint_t extra, idx, rec_found = 0; + ibt_status_t retval = IBT_SUCCESS; + int dgid_present = 0; + uint8_t i, j; + + IBTF_DPRINTF_L3(cmlog, "ibcm_saa_ip_pr(%p, %p, %p, 0x%X, %d)", + p_arg, sl, dinfo, p_arg->flags, *max_count); + + if ((dinfo->num_dest == 0) || (num_path == 0) || (sl == NULL)) { + IBTF_DPRINTF_L3(cmlog, "ibcm_saa_ip_pr: Invalid Counters"); + return (IBT_INVALID_PARAM); + } + + /* + * Of the total needed "X" number of paths to "Y" number of destination + * we need to get X/Y plus X%Y extra paths to each destination, + * We do this so that we can choose the required number of path records + * for the specific destination. + */ + num_path /= dinfo->num_dest; + extra = (*max_count % dinfo->num_dest); + + IBTF_DPRINTF_L3(cmlog, "ibcm_saa_ip_pr: numpath %d extra %d dest %d", + num_path, extra, dinfo->num_dest); + + /* + * Find out whether we need to get PathRecord that qualifies for a + * LoopBack. + */ + for (idx = 0; idx < dinfo->num_dest; idx++) { + ib_gid_t dgid = dinfo->dest[idx].d_gid; + + IBTF_DPRINTF_L3(cmlog, "ibcm_saa_ip_pr: DGID[%d]: %llX:%llX", + idx, dgid.gid_prefix, dgid.gid_guid); + + /* + * For loop-back path record, we should NOT contact SA Access. + * But instead we need to "synthesize" a loop back path record. + */ + for (i = 0; i < sl->p_count; i++) { + if ((sl[i].p_sgid.gid_prefix == dgid.gid_prefix) && + (sl[i].p_sgid.gid_guid == dgid.gid_guid)) { + + dinfo->dest[idx].d_tag = 2; + + /* Yes, it's loop back case. */ + retval = ibcm_fillin_ip_lbpr(&sl[i], idx, + dinfo, &p_arg->paths[rec_found]); + if (retval != IBT_SUCCESS) + break; + + /* + * We update only one record for loop-back case. + */ + rec_found++; + if (rec_found == *max_count) + break; + } + } + if (rec_found == *max_count) + break; + } + + for (i = 0; i < dinfo->num_dest; i++) + if (dinfo->dest[i].d_tag == 0) + dgid_present++; + + num_path_plus = *max_count - rec_found; + + IBTF_DPRINTF_L3(cmlog, "ibcm_saa_ip_pr: Recfound: %d, need to find " + "%d, GID present %d", rec_found, num_path_plus, dgid_present); + + if ((dgid_present != 0) && (num_path_plus > 0)) { + IBTF_DPRINTF_L3(cmlog, "ibcm_saa_ip_pr: MultiSM=%X, #SRC=%d, " + "Dest=%d", sl->p_multi, sl->p_count, dgid_present); + + if ((sl->p_multi != IBTL_CM_SIMPLE_SETUP) || + ((dgid_present == 1) && (sl->p_count == 1))) { + /* + * Use SinglePathRec if we are dealing w/ MultiSM or + * request is for one SGID to one DGID. + */ + retval = ibcm_get_ip_spr(p_arg, sl, dinfo, + &num_path_plus, &p_arg->paths[rec_found]); + } else { + /* MultiPathRec will be used for other queries. */ + retval = ibcm_get_ip_mpr(p_arg, sl, dinfo, + &num_path_plus, &p_arg->paths[rec_found]); + } + + if ((retval != IBT_SUCCESS) && (retval != IBT_INSUFF_DATA)) + IBTF_DPRINTF_L2(cmlog, "ibcm_saa_ip_pr: " + "Failed to get PathRec: Status %d", retval); + else + rec_found += num_path_plus; + } + + if (rec_found == 0) { + if (retval == IBT_SUCCESS) + retval = IBT_PATH_RECORDS_NOT_FOUND; + } else if (rec_found != *max_count) + retval = IBT_INSUFF_DATA; + else if (rec_found != 0) + retval = IBT_SUCCESS; + + if ((p_arg->src_ip_p != NULL) && (rec_found != 0)) { + for (i = 0; i < rec_found; i++) { + for (j = 0; j < sl->p_count; j++) { + if (sl[j].p_sgid.gid_guid == p_arg->paths[i]. + pi_prim_cep_path.cep_adds_vect. + av_sgid.gid_guid) { + bcopy(&sl[j].p_src_ip, + &p_arg->src_ip_p[i].ip_primary, + sizeof (ibt_ip_addr_t)); + } + /* Is Alt Path present */ + if (p_arg->paths[i].pi_alt_cep_path. + cep_hca_port_num) { + if (sl[j].p_sgid.gid_guid == + p_arg->paths[i].pi_alt_cep_path. + cep_adds_vect.av_sgid.gid_guid) { + bcopy(&sl[j].p_src_ip, + &p_arg->src_ip_p[i]. + ip_alternate, + sizeof (ibt_ip_addr_t)); + } + } + } + } + } + IBTF_DPRINTF_L3(cmlog, "ibcm_saa_ip_pr: done. Status = %d, " + "Found %d/%d Paths", retval, rec_found, *max_count); + + *max_count = rec_found; /* Update the return count. */ + + return (retval); +} + +static ibt_status_t +ibcm_ip_update_pri(sa_path_record_t *pr_resp, ibtl_cm_port_list_t *sl, + ibt_path_info_t *paths) +{ + ibt_status_t retval = IBT_SUCCESS; + int s; + + retval = ibcm_update_cep_info(pr_resp, sl, NULL, + &paths->pi_prim_cep_path); + if (retval != IBT_SUCCESS) + return (retval); + + /* Update some leftovers */ + paths->pi_prim_pkt_lt = pr_resp->PacketLifeTime; + paths->pi_path_mtu = pr_resp->Mtu; + + for (s = 0; s < sl->p_count; s++) { + if (pr_resp->SGID.gid_guid == sl[s].p_sgid.gid_guid) + paths->pi_hca_guid = sl[s].p_hca_guid; + } + + /* Set Alternate Path to invalid state. */ + paths->pi_alt_cep_path.cep_hca_port_num = 0; + paths->pi_alt_cep_path.cep_adds_vect.av_dlid = 0; + + IBTF_DPRINTF_L5(cmlog, "ibcm_ip_update_pri: Path HCA GUID 0x%llX", + paths->pi_hca_guid); + + return (retval); +} + + +static ibt_status_t +ibcm_get_ip_spr(ibcm_ip_path_tqargs_t *p_arg, ibtl_cm_port_list_t *sl, + ibcm_ip_dinfo_t *dinfo, uint8_t *num_path, ibt_path_info_t *paths) +{ + sa_path_record_t pathrec_req; + sa_path_record_t *pr_resp; + ibmf_saa_access_args_t access_args; + uint64_t c_mask = 0; + void *results_p; + uint8_t num_rec; + size_t length; + ibt_status_t retval; + int i, j, k; + int found, p_fnd; + ibt_ip_path_attr_t *attrp = &p_arg->attr; + ibmf_saa_handle_t saa_handle; + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_spr(%p, %p, %p, %d)", + p_arg, sl, dinfo, *num_path); + + bzero(&pathrec_req, sizeof (sa_path_record_t)); + + /* Is Flow Label Specified. */ + if (attrp->ipa_flow) { + pathrec_req.FlowLabel = attrp->ipa_flow; + c_mask |= SA_PR_COMPMASK_FLOWLABEL; + } + + /* Is HopLimit Specified. */ + if (p_arg->flags & IBT_PATH_HOP) { + pathrec_req.HopLimit = attrp->ipa_hop; + c_mask |= SA_PR_COMPMASK_HOPLIMIT; + } + + /* Is TClass Specified. */ + if (attrp->ipa_tclass) { + pathrec_req.TClass = attrp->ipa_tclass; + c_mask |= SA_PR_COMPMASK_TCLASS; + } + + /* Is SL specified. */ + if (attrp->ipa_sl) { + pathrec_req.SL = attrp->ipa_sl; + c_mask |= SA_PR_COMPMASK_SL; + } + + /* If IBT_PATH_PERF is set, then mark all selectors to BEST. */ + if (p_arg->flags & IBT_PATH_PERF) { + pathrec_req.PacketLifeTimeSelector = IBT_BEST; + pathrec_req.MtuSelector = IBT_BEST; + pathrec_req.RateSelector = IBT_BEST; + + c_mask |= SA_PR_COMPMASK_PKTLTSELECTOR | + SA_PR_COMPMASK_RATESELECTOR | SA_PR_COMPMASK_MTUSELECTOR; + } else { + if (attrp->ipa_pkt_lt.p_selector == IBT_BEST) { + pathrec_req.PacketLifeTimeSelector = IBT_BEST; + c_mask |= SA_PR_COMPMASK_PKTLTSELECTOR; + } + + if (attrp->ipa_srate.r_selector == IBT_BEST) { + pathrec_req.RateSelector = IBT_BEST; + c_mask |= SA_PR_COMPMASK_RATESELECTOR; + } + + if (attrp->ipa_mtu.r_selector == IBT_BEST) { + pathrec_req.MtuSelector = IBT_BEST; + c_mask |= SA_PR_COMPMASK_MTUSELECTOR; + } + } + + /* + * Honor individual selection of these attributes, + * even if IBT_PATH_PERF is set. + */ + /* Check out whether Packet Life Time is specified. */ + if (attrp->ipa_pkt_lt.p_pkt_lt) { + pathrec_req.PacketLifeTime = + ibt_usec2ib(attrp->ipa_pkt_lt.p_pkt_lt); + pathrec_req.PacketLifeTimeSelector = + attrp->ipa_pkt_lt.p_selector; + + c_mask |= SA_PR_COMPMASK_PKTLT | SA_PR_COMPMASK_PKTLTSELECTOR; + } + + /* Is SRATE specified. */ + if (attrp->ipa_srate.r_srate) { + pathrec_req.Rate = attrp->ipa_srate.r_srate; + pathrec_req.RateSelector = attrp->ipa_srate.r_selector; + + c_mask |= SA_PR_COMPMASK_RATE | SA_PR_COMPMASK_RATESELECTOR; + } + + /* Is MTU specified. */ + if (attrp->ipa_mtu.r_mtu) { + pathrec_req.Mtu = attrp->ipa_mtu.r_mtu; + pathrec_req.MtuSelector = attrp->ipa_mtu.r_selector; + + c_mask |= SA_PR_COMPMASK_MTU | SA_PR_COMPMASK_MTUSELECTOR; + } + + /* We always get REVERSIBLE paths. */ + pathrec_req.Reversible = 1; + c_mask |= SA_PR_COMPMASK_REVERSIBLE; + + pathrec_req.NumbPath = *num_path; + c_mask |= SA_PR_COMPMASK_NUMBPATH; + + p_fnd = found = 0; + + for (i = 0; i < sl->p_count; i++) { + /* SGID */ + pathrec_req.SGID = sl[i].p_sgid; + c_mask |= SA_PR_COMPMASK_SGID; + saa_handle = sl[i].p_saa_hdl; + + for (k = 0; k < dinfo->num_dest; k++) { + if (dinfo->dest[k].d_tag != 0) + continue; + + if (pathrec_req.SGID.gid_prefix != + dinfo->dest[k].d_gid.gid_prefix) { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_spr: " + "SGID_pfx=%llX DGID_pfx=%llX doesn't match", + pathrec_req.SGID.gid_prefix, + dinfo->dest[k].d_gid.gid_prefix); + continue; + } else if (pathrec_req.SGID.gid_guid == + pathrec_req.DGID.gid_guid) { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_spr: Why " + "LoopBack request came here! GID %llX:%llX", + pathrec_req.SGID.gid_prefix, + pathrec_req.SGID.gid_guid); + continue; + } + + pathrec_req.DGID = dinfo->dest[k].d_gid; + c_mask |= SA_PR_COMPMASK_DGID; + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_spr: " + "Get %d Path(s) between\n SGID %llX:%llX " + "DGID %llX:%llX", pathrec_req.NumbPath, + pathrec_req.SGID.gid_prefix, + pathrec_req.SGID.gid_guid, + pathrec_req.DGID.gid_prefix, + pathrec_req.DGID.gid_guid); + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_spr: CMask=0x%llX, " + "PKey=0x%X", c_mask, pathrec_req.P_Key); + + /* Contact SA Access to retrieve Path Records. */ + access_args.sq_attr_id = SA_PATHRECORD_ATTRID; + access_args.sq_template = &pathrec_req; + access_args.sq_access_type = IBMF_SAA_RETRIEVE; + access_args.sq_template_length = + sizeof (sa_path_record_t); + access_args.sq_component_mask = c_mask; + access_args.sq_callback = NULL; + access_args.sq_callback_arg = NULL; + + retval = ibcm_contact_sa_access(saa_handle, + &access_args, &length, &results_p); + if (retval != IBT_SUCCESS) { + *num_path = 0; + return (retval); + } + + num_rec = length / sizeof (sa_path_record_t); + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_spr: " + "FOUND %d/%d path requested", num_rec, *num_path); + + if ((results_p == NULL) || (num_rec == 0)) + continue; + + /* Update the PathInfo from the response. */ + pr_resp = (sa_path_record_t *)results_p; + for (j = 0; j < num_rec; j++, pr_resp++) { + if ((p_fnd != 0) && + (p_arg->flags & IBT_PATH_APM)) { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_spr" + ": Fill Alternate Path"); + retval = ibcm_update_cep_info(pr_resp, + sl, NULL, + &paths[found - 1].pi_alt_cep_path); + if (retval != IBT_SUCCESS) + continue; + + /* Update some leftovers */ + paths[found - 1].pi_alt_pkt_lt = + pr_resp->PacketLifeTime; + p_fnd = 0; + } else { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_spr" + ": Fill Primary Path"); + + if (found == *num_path) + break; + + retval = ibcm_ip_update_pri(pr_resp, sl, + &paths[found]); + if (retval != IBT_SUCCESS) + continue; + p_fnd = 1; + found++; + } + + } + /* Deallocate the memory for results_p. */ + kmem_free(results_p, length); + } + } + + if (found == 0) + retval = IBT_PATH_RECORDS_NOT_FOUND; + else if (found != *num_path) + retval = IBT_INSUFF_DATA; + else + retval = IBT_SUCCESS; + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_spr: done. Status %d, " + "Found %d/%d Paths", retval, found, *num_path); + + *num_path = found; + + return (retval); +} + + +static ibt_status_t +ibcm_get_ip_mpr(ibcm_ip_path_tqargs_t *p_arg, ibtl_cm_port_list_t *sl, + ibcm_ip_dinfo_t *dinfo, uint8_t *num_path, ibt_path_info_t *paths) +{ + sa_multipath_record_t *mpr_req; + sa_path_record_t *pr_resp; + ibmf_saa_access_args_t access_args; + void *results_p; + uint64_t c_mask = 0; + ib_gid_t *gid_ptr, *gid_s_ptr; + size_t length; + int template_len, found, num_rec; + int i; + ibt_status_t retval; + uint8_t sgid_cnt, dgid_cnt; + ibt_ip_path_attr_t *attrp = &p_arg->attr; + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr(%p, %p, %p, %d)", + attrp, sl, dinfo, *num_path); + + for (i = 0, dgid_cnt = 0; i < dinfo->num_dest; i++) { + if (dinfo->dest[i].d_tag == 0) + dgid_cnt++; + } + + sgid_cnt = sl->p_count; + + if ((sgid_cnt == 0) || (dgid_cnt == 0)) { + IBTF_DPRINTF_L2(cmlog, "ibcm_get_ip_mpr: sgid_cnt(%d) or" + " dgid_cnt(%d) is zero", sgid_cnt, dgid_cnt); + return (IBT_INVALID_PARAM); + } + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: Get %d records between " + "%d Src(s) <=> %d Dest(s)", *num_path, sgid_cnt, dgid_cnt); + + /* + * Calculate the size for multi-path records template, which includes + * constant portion of the multipath record, plus variable size for + * SGID (sgid_cnt) and DGID (dgid_cnt). + */ + template_len = ((dgid_cnt + sgid_cnt) * sizeof (ib_gid_t)) + + sizeof (sa_multipath_record_t); + + mpr_req = kmem_zalloc(template_len, KM_SLEEP); + + ASSERT(mpr_req != NULL); + + gid_ptr = (ib_gid_t *)(((uchar_t *)mpr_req) + + sizeof (sa_multipath_record_t)); + + /* Get the starting pointer where GIDs are stored. */ + gid_s_ptr = gid_ptr; + + /* SGID */ + for (i = 0; i < sl->p_count; i++) { + *gid_ptr = sl[i].p_sgid; + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: SGID[%d] = %llX:%llX", + i, gid_ptr->gid_prefix, gid_ptr->gid_guid); + + gid_ptr++; + } + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpr_req)) + + mpr_req->SGIDCount = sgid_cnt; + c_mask = SA_MPR_COMPMASK_SGIDCOUNT; + + /* DGIDs */ + for (i = 0; i < dinfo->num_dest; i++) { + if (dinfo->dest[i].d_tag == 0) { + *gid_ptr = dinfo->dest[i].d_gid; + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: DGID[%d] = " + "%llX:%llX", i, gid_ptr->gid_prefix, + gid_ptr->gid_guid); + gid_ptr++; + } + } + + mpr_req->DGIDCount = dgid_cnt; + c_mask |= SA_MPR_COMPMASK_DGIDCOUNT; + + /* Is Flow Label Specified. */ + if (attrp->ipa_flow) { + mpr_req->FlowLabel = attrp->ipa_flow; + c_mask |= SA_MPR_COMPMASK_FLOWLABEL; + } + + /* Is HopLimit Specified. */ + if (p_arg->flags & IBT_PATH_HOP) { + mpr_req->HopLimit = attrp->ipa_hop; + c_mask |= SA_MPR_COMPMASK_HOPLIMIT; + } + + /* Is TClass Specified. */ + if (attrp->ipa_tclass) { + mpr_req->TClass = attrp->ipa_tclass; + c_mask |= SA_MPR_COMPMASK_TCLASS; + } + + /* Is SL specified. */ + if (attrp->ipa_sl) { + mpr_req->SL = attrp->ipa_sl; + c_mask |= SA_MPR_COMPMASK_SL; + } + + if (p_arg->flags & IBT_PATH_PERF) { + mpr_req->PacketLifeTimeSelector = IBT_BEST; + mpr_req->RateSelector = IBT_BEST; + mpr_req->MtuSelector = IBT_BEST; + + c_mask |= SA_MPR_COMPMASK_PKTLTSELECTOR | + SA_MPR_COMPMASK_RATESELECTOR | SA_MPR_COMPMASK_MTUSELECTOR; + } else { + if (attrp->ipa_pkt_lt.p_selector == IBT_BEST) { + mpr_req->PacketLifeTimeSelector = IBT_BEST; + c_mask |= SA_MPR_COMPMASK_PKTLTSELECTOR; + } + + if (attrp->ipa_srate.r_selector == IBT_BEST) { + mpr_req->RateSelector = IBT_BEST; + c_mask |= SA_MPR_COMPMASK_RATESELECTOR; + } + + if (attrp->ipa_mtu.r_selector == IBT_BEST) { + mpr_req->MtuSelector = IBT_BEST; + c_mask |= SA_MPR_COMPMASK_MTUSELECTOR; + } + } + + /* + * Honor individual selection of these attributes, + * even if IBT_PATH_PERF is set. + */ + /* Check out whether Packet Life Time is specified. */ + if (attrp->ipa_pkt_lt.p_pkt_lt) { + mpr_req->PacketLifeTime = + ibt_usec2ib(attrp->ipa_pkt_lt.p_pkt_lt); + mpr_req->PacketLifeTimeSelector = + attrp->ipa_pkt_lt.p_selector; + + c_mask |= SA_MPR_COMPMASK_PKTLT | + SA_MPR_COMPMASK_PKTLTSELECTOR; + } + + /* Is SRATE specified. */ + if (attrp->ipa_srate.r_srate) { + mpr_req->Rate = attrp->ipa_srate.r_srate; + mpr_req->RateSelector = attrp->ipa_srate.r_selector; + + c_mask |= SA_MPR_COMPMASK_RATE | + SA_MPR_COMPMASK_RATESELECTOR; + } + + /* Is MTU specified. */ + if (attrp->ipa_mtu.r_mtu) { + mpr_req->Mtu = attrp->ipa_mtu.r_mtu; + mpr_req->MtuSelector = attrp->ipa_mtu.r_selector; + + c_mask |= SA_MPR_COMPMASK_MTU | + SA_MPR_COMPMASK_MTUSELECTOR; + } + + /* We always get REVERSIBLE paths. */ + mpr_req->Reversible = 1; + c_mask |= SA_MPR_COMPMASK_REVERSIBLE; + + if (p_arg->flags & IBT_PATH_AVAIL) { + mpr_req->IndependenceSelector = 1; + c_mask |= SA_MPR_COMPMASK_INDEPSEL; + } + + /* we will not specify how many records we want. */ + + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mpr_req)) + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: CMask: %llX Pkey: %X", + c_mask, mpr_req->P_Key); + + /* Contact SA Access to retrieve Path Records. */ + access_args.sq_attr_id = SA_MULTIPATHRECORD_ATTRID; + access_args.sq_access_type = IBMF_SAA_RETRIEVE; + access_args.sq_component_mask = c_mask; + access_args.sq_template = mpr_req; + access_args.sq_template_length = sizeof (sa_multipath_record_t); + access_args.sq_callback = NULL; + access_args.sq_callback_arg = NULL; + + retval = ibcm_contact_sa_access(sl->p_saa_hdl, &access_args, &length, + &results_p); + if (retval != IBT_SUCCESS) { + *num_path = 0; /* Update the return count. */ + kmem_free(mpr_req, template_len); + return (retval); + } + + num_rec = length / sizeof (sa_path_record_t); + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: Found %d Paths", num_rec); + + found = 0; + if ((results_p != NULL) && (num_rec > 0)) { + /* Update the PathInfo with the response Path Records */ + pr_resp = (sa_path_record_t *)results_p; + + for (i = 0; i < num_rec; i++) { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: " + "P[%d]: SG %llX, DG %llX", i, + pr_resp[i].SGID.gid_guid, pr_resp[i].DGID.gid_guid); + } + + if (p_arg->flags & IBT_PATH_APM) { + sa_path_record_t *p_resp = NULL, *a_resp = NULL; + int p_found = 0, a_found = 0; + ib_gid_t p_sg, a_sg, p_dg, a_dg; + int s_spec; + + s_spec = + p_arg->attr.ipa_src_ip.family != AF_UNSPEC ? 1 : 0; + + p_sg = gid_s_ptr[0]; + if (sgid_cnt > 1) + a_sg = gid_s_ptr[1]; + else + a_sg = p_sg; + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: P_SG: %llX, " + "A_SG: %llX", p_sg.gid_guid, a_sg.gid_guid); + + p_dg = gid_s_ptr[sgid_cnt]; + if (dgid_cnt > 1) + a_dg = gid_s_ptr[sgid_cnt + 1]; + else + a_dg = p_dg; + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: P_DG: %llX, " + "A_DG: %llX", p_dg.gid_guid, a_dg.gid_guid); + + /* + * If SGID and/or DGID is specified by user, make sure + * he gets his primary-path on those node points. + */ + for (i = 0; i < num_rec; i++, pr_resp++) { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: " + "PF %d, AF %d,\n\t\t P[%d] = SG: %llX, " + "DG: %llX", p_found, a_found, i, + pr_resp->SGID.gid_guid, + pr_resp->DGID.gid_guid); + + if ((!p_found) && + (p_dg.gid_guid == pr_resp->DGID.gid_guid)) { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr" + ": Pri DGID Match.. "); + if ((s_spec == 0) || (p_sg.gid_guid == + pr_resp->SGID.gid_guid)) { + p_found = 1; + p_resp = pr_resp; + IBTF_DPRINTF_L3(cmlog, + "ibcm_get_ip_mpr: " + "Primary Path Found"); + + if (a_found) + break; + else + continue; + } + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr" + ": Pri SGID Don't Match.. "); + } + + if ((!a_found) && + (a_dg.gid_guid == pr_resp->DGID.gid_guid)) { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr" + ": Alt DGID Match.. "); + if ((s_spec == 0) || (a_sg.gid_guid == + pr_resp->SGID.gid_guid)) { + a_found = 1; + a_resp = pr_resp; + + IBTF_DPRINTF_L3(cmlog, + "ibcm_get_ip_mpr:" + "Alternate Path Found "); + + if (p_found) + break; + else + continue; + } + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr" + ": Alt SGID Don't Match.. "); + } + } + + if ((p_found == 0) && (a_found == 0)) { + IBTF_DPRINTF_L2(cmlog, "ibcm_get_ip_mpr: Path " + "to desired node points NOT Available."); + retval = IBT_PATH_RECORDS_NOT_FOUND; + goto get_ip_mpr_end; + } + + if ((p_resp == NULL) && (a_resp != NULL)) { + p_resp = a_resp; + a_resp = NULL; + } + + /* Fill in Primary Path */ + retval = ibcm_ip_update_pri(p_resp, sl, &paths[found]); + if (retval != IBT_SUCCESS) + goto get_ip_mpr_end; + + /* Fill in Alternate Path */ + if (a_resp != NULL) { + /* a_resp will point to AltPathInfo buffer. */ + retval = ibcm_update_cep_info(a_resp, sl, + NULL, &paths[found].pi_alt_cep_path); + if (retval != IBT_SUCCESS) + goto get_ip_mpr_end; + + /* Update some leftovers */ + paths[found].pi_alt_pkt_lt = + a_resp->PacketLifeTime; + } else { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: " + "Alternate Path NOT Available."); + retval = IBT_INSUFF_DATA; + } + found++; + } else { /* If NOT APM */ + for (i = 0; i < num_rec; i++, pr_resp++) { + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: " + "DGID(%llX)", pr_resp->DGID.gid_guid); + + /* Fill in Primary Path */ + retval = ibcm_ip_update_pri(pr_resp, sl, + &paths[found]); + if (retval != IBT_SUCCESS) + continue; + + if (++found == *num_path) + break; + } + } +get_ip_mpr_end: + kmem_free(results_p, length); + } + kmem_free(mpr_req, template_len); + + if (found == 0) + retval = IBT_PATH_RECORDS_NOT_FOUND; + else if (found != *num_path) + retval = IBT_INSUFF_DATA; + else + retval = IBT_SUCCESS; + + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_mpr: Done (status %d). " + "Found %d/%d Paths", retval, found, *num_path); + + *num_path = found; /* Update the return count. */ + + return (retval); +} + + +/* + * Here we "synthesize" loop back path record information. + * + * Currently the synthesize values are assumed as follows: + * SLID, DLID = Base LID from Query HCA Port. + * FlowLabel, HopLimit, TClass = 0, as GRH is False. + * RawTraffic = 0. + * P_Key = first valid one in P_Key table as obtained from Query HCA Port. + * SL = as from Query HCA Port. + * MTU = from Query HCA Port. + * Rate = 2 (arbitrary). + * PacketLifeTime = 0 (4.096 usec). + */ +static ibt_status_t +ibcm_fillin_ip_lbpr(ibtl_cm_port_list_t *sl, uint8_t idx, + ibcm_ip_dinfo_t *dinfo, ibt_path_info_t *paths) +{ + IBTF_DPRINTF_L3(cmlog, "ibcm_fillin_ip_lbpr(%p, %p)", sl, dinfo); + + /* Synthesize path record with appropriate loop back information. */ + paths->pi_prim_cep_path.cep_pkey_ix = + ibtl_cm_get_1st_full_pkey_ix(sl->p_hca_guid, sl->p_port_num); + paths->pi_hca_guid = sl->p_hca_guid; + paths->pi_prim_cep_path.cep_adds_vect.av_dgid = dinfo->dest[idx].d_gid; + paths->pi_prim_cep_path.cep_adds_vect.av_sgid = sl->p_sgid; + paths->pi_prim_cep_path.cep_adds_vect.av_srate = IBT_SRATE_1X; + paths->pi_prim_cep_path.cep_adds_vect.av_srvl = 0; /* SL */ + + paths->pi_prim_cep_path.cep_adds_vect.av_send_grh = B_FALSE; + paths->pi_prim_cep_path.cep_adds_vect.av_flow = 0; + paths->pi_prim_cep_path.cep_adds_vect.av_tclass = 0; + paths->pi_prim_cep_path.cep_adds_vect.av_hop = 0; + + /* SLID and DLID will be equal to BLID. */ + paths->pi_prim_cep_path.cep_adds_vect.av_dlid = sl->p_base_lid; + paths->pi_prim_cep_path.cep_adds_vect.av_src_path = 0; + paths->pi_prim_cep_path.cep_adds_vect.av_sgid_ix = sl->p_sgid_ix; + paths->pi_prim_cep_path.cep_adds_vect.av_port_num = sl->p_port_num; + paths->pi_prim_cep_path.cep_hca_port_num = sl->p_port_num; + paths->pi_prim_cep_path.cep_timeout = 0; /* To be filled in by CM. */ + paths->pi_path_mtu = sl->p_mtu; /* MTU */ + paths->pi_prim_pkt_lt = 0; /* Packet Life Time. */ + paths->pi_alt_pkt_lt = 0; /* Packet Life Time. */ + + IBTF_DPRINTF_L3(cmlog, "ibcm_fillin_ip_lbpr: HCA %llX:%d \n " + "SGID %llX:%llX DGID %llX:%llX", paths->pi_hca_guid, + paths->pi_prim_cep_path.cep_hca_port_num, sl->p_sgid.gid_prefix, + sl->p_sgid.gid_guid, dinfo->dest[idx].d_gid.gid_prefix, + dinfo->dest[idx].d_gid.gid_guid); + + /* Set Alternate Path to invalid state. */ + paths->pi_alt_cep_path.cep_hca_port_num = 0; + paths->pi_alt_cep_path.cep_adds_vect.av_dlid = 0; + + return (IBT_SUCCESS); +} + +static void +ibcm_process_get_ip_paths(void *tq_arg) +{ + ibcm_ip_path_tqargs_t *p_arg = (ibcm_ip_path_tqargs_t *)tq_arg; + ibcm_ip_dinfo_t *dinfo = NULL; + int len = 0; + uint8_t max_paths, num_path; + ib_gid_t *d_gids_p = NULL; + ib_gid_t sgid, dgid1, dgid2; + ibt_status_t retval = IBT_SUCCESS; + ibtl_cm_port_list_t *sl = NULL; + uint_t dnum = 0; + uint_t i; + ibcm_hca_info_t *hcap; + ibmf_saa_handle_t saa_handle; + + IBTF_DPRINTF_L3(cmlog, "ibcm_process_get_ip_paths(%p, 0x%X) ", + p_arg, p_arg->flags); + + max_paths = num_path = p_arg->attr.ipa_max_paths; + + /* + * Prepare the Source and Destination GID list based on the input + * attributes. We contact ARP module to perform IP to MAC + * i.e. GID conversion. We use this GID for path look-up. + * + * If APM is requested and if multiple Dest IPs are specified, check + * out whether they are companion to each other. But, if only one + * Dest IP is specified, then it is beyond our scope to verify that + * the companion port GID obtained has IP-Service enabled. + */ + dgid1.gid_prefix = dgid1.gid_guid = 0; + sgid.gid_prefix = sgid.gid_guid = 0; + if ((p_arg->attr.ipa_src_ip.family != AF_UNSPEC) && + (!(p_arg->flags & IBT_PATH_APM))) { + ibt_path_attr_t attr; + + retval = ibcm_arp_get_ibaddr(p_arg->attr.ipa_src_ip.un.ip4addr, + p_arg->attr.ipa_dst_ip[0].un.ip4addr, &sgid, &dgid1); + if (retval) { + IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_ip_paths: " + "ibcm_arp_get_ibaddr() failed: %d", retval); + goto ippath_error; + } + + bzero(&attr, sizeof (ibt_path_attr_t)); + attr.pa_hca_guid = p_arg->attr.ipa_hca_guid; + attr.pa_hca_port_num = p_arg->attr.ipa_hca_port_num; + attr.pa_sgid = sgid; + bcopy(&p_arg->attr.ipa_mtu, &attr.pa_mtu, + sizeof (ibt_mtu_req_t)); + bcopy(&p_arg->attr.ipa_srate, &attr.pa_srate, + sizeof (ibt_srate_req_t)); + bcopy(&p_arg->attr.ipa_pkt_lt, &attr.pa_pkt_lt, + sizeof (ibt_pkt_lt_req_t)); + retval = ibtl_cm_get_active_plist(&attr, p_arg->flags, &sl); + if (retval == IBT_SUCCESS) { + bcopy(&p_arg->attr.ipa_src_ip, &sl->p_src_ip, + sizeof (ibt_ip_addr_t)); + } else { + IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_ip_paths: " + "ibtl_cm_get_active_plist: Failed %d", retval); + goto ippath_error; + } + } else { + /* + * Get list of active HCA-Port list, that matches input + * specified attr. + */ + retval = ibcm_arp_get_srcip_plist(&p_arg->attr, p_arg->flags, + &sl); + if (retval != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_ip_paths: " + "ibcm_arp_get_srcip_plist: Failed %d", retval); + goto ippath_error; + } + + sl->p_src_ip.un.ip4addr = htonl(sl->p_src_ip.un.ip4addr); + /* + * Accumulate all destination information. + * Get GID info for the specified input ip-addr. + */ + retval = ibcm_arp_get_ibaddr(sl->p_src_ip.un.ip4addr, + p_arg->attr.ipa_dst_ip[0].un.ip4addr, NULL, &dgid1); + if (retval) { + IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_ip_paths: " + "ibcm_arp_get_ibaddr() failed: %d", retval); + goto ippath_error1; + } + } + IBTF_DPRINTF_L4(cmlog, "ibcm_process_get_ip_paths: SrcIP %lX DstIP %lX", + sl->p_src_ip.un.ip4addr, + htonl(p_arg->attr.ipa_dst_ip[0].un.ip4addr)); + + IBTF_DPRINTF_L4(cmlog, "ibcm_process_get_ip_paths: SGID %llX:%llX, " + "DGID0: %llX:%llX", sl->p_sgid.gid_prefix, sl->p_sgid.gid_guid, + dgid1.gid_prefix, dgid1.gid_guid); + + len = p_arg->attr.ipa_ndst + 1; + len = (len * sizeof (ibcm_ip_dest_t)) + sizeof (ibcm_ip_dinfo_t); + dinfo = kmem_zalloc(len, KM_SLEEP); + + dinfo->dest[0].d_gid = dgid1; + bcopy(&p_arg->attr.ipa_dst_ip[0], &dinfo->dest[0].d_ip, + sizeof (ibt_ip_addr_t)); + + i = 1; + if (p_arg->attr.ipa_ndst > 1) { + /* Get DGID for all specified Dest IP Addr */ + for (; i < p_arg->attr.ipa_ndst; i++) { + retval = ibcm_arp_get_ibaddr(sl->p_src_ip.un.ip4addr, + p_arg->attr.ipa_dst_ip[i].un.ip4addr, NULL, &dgid2); + if (retval) { + IBTF_DPRINTF_L2(cmlog, + "ibcm_process_get_ip_paths: " + "ibcm_arp_get_ibaddr failed: %d", retval); + goto ippath_error2; + } + dinfo->dest[i].d_gid = dgid2; + + IBTF_DPRINTF_L4(cmlog, "ibcm_process_get_ip_paths: " + "DGID%d: %llX:%llX", i, dgid2.gid_prefix, + dgid2.gid_guid); + bcopy(&p_arg->attr.ipa_dst_ip[i], &dinfo->dest[i].d_ip, + sizeof (ibt_ip_addr_t)); + } + + if (p_arg->flags & IBT_PATH_APM) { + dgid2 = dinfo->dest[1].d_gid; + + retval = ibcm_get_comp_pgids(dgid1, dgid2, 0, + &d_gids_p, &dnum); + if ((retval != IBT_SUCCESS) && + (retval != IBT_GIDS_NOT_FOUND)) { + IBTF_DPRINTF_L2(cmlog, + "ibcm_process_get_ip_paths: " + "Invalid DGIDs specified w/ APM Flag"); + goto ippath_error2; + } + IBTF_DPRINTF_L3(cmlog, "ibcm_process_get_ip_paths: " + "Found %d Comp DGID", dnum); + + if (dnum) { + dinfo->dest[i].d_gid = d_gids_p[0]; + dinfo->dest[i].d_ip.family = AF_UNSPEC; + i++; + } + } + } + + /* "i" will get us num_dest count. */ + dinfo->num_dest = i; + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*p_arg)) + + /* + * IBTF allocates memory for path_info & src_ip in case of + * Async Get IP Paths + */ + if (p_arg->func) { /* Do these only for Async Get Paths */ + p_arg->paths = kmem_zalloc(sizeof (ibt_path_info_t) * max_paths, + KM_SLEEP); + if (p_arg->src_ip_p == NULL) + p_arg->src_ip_p = kmem_zalloc( + sizeof (ibt_path_ip_src_t) * max_paths, KM_SLEEP); + } + + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*p_arg)) + + IBTF_DPRINTF_L3(cmlog, "ibcm_process_get_ip_paths: HCA (%llX, %d)", + sl->p_hca_guid, sl->p_port_num); + + hcap = ibcm_find_hca_entry(sl->p_hca_guid); + if (hcap == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_ip_paths: " + "NO HCA found"); + retval = IBT_HCA_BUSY_DETACHING; + goto ippath_error2; + } + + /* Get SA Access Handle. */ + for (i = 0; i < sl->p_count; i++) { + if (i == 0) { + /* Validate whether this HCA supports APM */ + if ((p_arg->flags & IBT_PATH_APM) && + (!(hcap->hca_caps & IBT_HCA_AUTO_PATH_MIG))) { + IBTF_DPRINTF_L2(cmlog, + "ibcm_process_get_ip_paths: HCA (%llX): " + "APM NOT SUPPORTED", sl[i].p_hca_guid); + retval = IBT_APM_NOT_SUPPORTED; + goto ippath_error3; + } + } + + saa_handle = ibcm_get_saa_handle(hcap, sl[i].p_port_num); + if (saa_handle == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_ip_paths: " + "SAA HDL NULL, HCA (%llX:%d) NOT ACTIVE", + sl[i].p_hca_guid, sl[i].p_port_num); + retval = IBT_HCA_PORT_NOT_ACTIVE; + goto ippath_error3; + } + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sl)) + sl[i].p_saa_hdl = saa_handle; + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sl)) + } + + /* Get Path Records. */ + retval = ibcm_saa_ip_pr(p_arg, sl, dinfo, &num_path); + +ippath_error3: + ibcm_dec_hca_acc_cnt(hcap); + +ippath_error2: + if (dinfo && len) + kmem_free(dinfo, len); + +ippath_error1: + if (sl) + ibtl_cm_free_active_plist(sl); + +ippath_error: + if ((retval != IBT_SUCCESS) && (retval != IBT_INSUFF_DATA)) + num_path = 0; + + if (p_arg->num_paths_p != NULL) + *p_arg->num_paths_p = num_path; + + if (p_arg->func) { /* Do these only for Async Get Paths */ + ibt_path_info_t *tmp_path_p; + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*p_arg)) + p_arg->retval = retval; + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*p_arg)) + + if (retval == IBT_INSUFF_DATA) { + /* + * We allocated earlier memory based on "max_paths", + * but we got lesser path-records, so re-adjust that + * buffer so that caller can free the correct memory. + */ + tmp_path_p = kmem_alloc( + sizeof (ibt_path_info_t) * num_path, KM_SLEEP); + + bcopy(p_arg->paths, tmp_path_p, + num_path * sizeof (ibt_path_info_t)); + + kmem_free(p_arg->paths, + sizeof (ibt_path_info_t) * max_paths); + } else if (retval != IBT_SUCCESS) { + if (p_arg->paths) + kmem_free(p_arg->paths, + sizeof (ibt_path_info_t) * max_paths); + if (p_arg->src_ip_p) + kmem_free(p_arg->src_ip_p, + sizeof (ibt_path_ip_src_t) * max_paths); + tmp_path_p = NULL; + } else { + tmp_path_p = p_arg->paths; + } + (*(p_arg->func))(p_arg->arg, retval, tmp_path_p, num_path, + p_arg->src_ip_p); + + cv_destroy(&p_arg->ip_cv); + mutex_destroy(&p_arg->ip_lock); + len = p_arg->len; + if (p_arg && len) + kmem_free(p_arg, len); + } else { + mutex_enter(&p_arg->ip_lock); + p_arg->retval = retval; + cv_signal(&p_arg->ip_cv); + mutex_exit(&p_arg->ip_lock); + } + + IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_ip_paths: done: status %d, " + "Found %d/%d Path Records", retval, num_path, max_paths); +} + + +static ibt_status_t +ibcm_val_ipattr(ibt_ip_path_attr_t *attrp, ibt_path_flags_t flags) +{ + uint_t i; + + if (attrp == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibcm_val_ipattr: IP Path Attr is NULL"); + return (IBT_INVALID_PARAM); + } + + IBTF_DPRINTF_L2(cmlog, "ibcm_val_ipattr: Inputs are: HCA %llX:%d, " + "Maxpath= %d, Flags= 0x%X, #Dest %d", attrp->ipa_hca_guid, + attrp->ipa_hca_port_num, attrp->ipa_max_paths, flags, + attrp->ipa_ndst); + + /* + * Validate Path Flags. + * IBT_PATH_AVAIL & IBT_PATH_PERF are mutually exclusive. + */ + if ((flags & IBT_PATH_AVAIL) && (flags & IBT_PATH_PERF)) { + IBTF_DPRINTF_L2(cmlog, "ibcm_val_ipattr: Invalid Flags: 0x%X," + "\n\t AVAIL and PERF flags specified together", flags); + return (IBT_INVALID_PARAM); + } + + /* + * Validate number of records requested. + * + * Max_paths of "0" is invalid. + * Max_paths <= IBT_MAX_SPECIAL_PATHS, if AVAIL or PERF is set. + */ + if (attrp->ipa_max_paths == 0) { + IBTF_DPRINTF_L2(cmlog, "ibcm_val_ipattr: Invalid max_paths %d", + attrp->ipa_max_paths); + return (IBT_INVALID_PARAM); + } + + if ((flags & (IBT_PATH_AVAIL | IBT_PATH_PERF)) && + (attrp->ipa_max_paths > IBT_MAX_SPECIAL_PATHS)) { + IBTF_DPRINTF_L2(cmlog, "ibcm_val_ipattr: MaxPaths that can be " + "requested is <%d> \n when IBT_PATH_AVAIL or IBT_PATH_PERF" + " flag is specified.", IBT_MAX_SPECIAL_PATHS); + return (IBT_INVALID_PARAM); + } + + /* Only 2 destinations can be specified w/ APM flag. */ + if ((flags & IBT_PATH_APM) && (attrp->ipa_ndst > 2)) { + IBTF_DPRINTF_L2(cmlog, "ibcm_val_ipattr: Max #Dest is 2, with " + "APM flag"); + return (IBT_INVALID_PARAM); + } + + /* Validate the destination info */ + if ((attrp->ipa_ndst == 0) || (attrp->ipa_ndst == NULL)) { + IBTF_DPRINTF_L2(cmlog, "ibcm_val_ipattr: DstIP Not provided " + "dst_ip %p, ndst %d", attrp->ipa_dst_ip, attrp->ipa_ndst); + return (IBT_INVALID_PARAM); + } + + /* Validate destination IP */ + for (i = 0; i < attrp->ipa_ndst; i++) { + ibt_ip_addr_t dst_ip = attrp->ipa_dst_ip[i]; + + IBTF_DPRINTF_L3(cmlog, "ibcm_val_ipattr: DstIP[%d]:= family %d " + "IP %lX", i, dst_ip.family, htonl(dst_ip.un.ip4addr)); + + if (dst_ip.family == AF_UNSPEC) { + IBTF_DPRINTF_L2(cmlog, "ibcm_val_ipattr: ERROR: " + "Invalid DstIP specified"); + return (IBT_INVALID_PARAM); + } + } + + IBTF_DPRINTF_L4(cmlog, "ibcm_val_ipattr: SrcIP: family %d, IP %lX", + attrp->ipa_src_ip.family, htonl(attrp->ipa_src_ip.un.ip4addr)); + + return (IBT_SUCCESS); +} + + +static ibt_status_t +ibcm_get_ip_path(ibt_clnt_hdl_t ibt_hdl, ibt_path_flags_t flags, + ibt_ip_path_attr_t *attrp, ibt_path_info_t *paths, uint8_t *num_path_p, + ibt_path_ip_src_t *src_ip_p, ibt_ip_path_handler_t func, void *arg) +{ + ibcm_ip_path_tqargs_t *path_tq; + int sleep_flag = ((func == NULL) ? KM_SLEEP : KM_NOSLEEP); + uint_t len, ret; + ibt_status_t retval; + + IBTF_DPRINTF_L4(cmlog, "ibcm_get_ip_path(%p, %X, %p, %p, %p %p %p %p)", + ibt_hdl, flags, attrp, paths, num_path_p, src_ip_p, func, arg); + + retval = ibcm_val_ipattr(attrp, flags); + if (retval != IBT_SUCCESS) + return (retval); + + len = (attrp->ipa_ndst * sizeof (ibt_ip_addr_t)) + + sizeof (ibcm_ip_path_tqargs_t); + path_tq = kmem_zalloc(len, sleep_flag); + if (path_tq == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibcm_get_ip_path: " + "Unable to allocate memory for local usage."); + return (IBT_INSUFF_KERNEL_RESOURCE); + } + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path_tq)) + mutex_init(&path_tq->ip_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&path_tq->ip_cv, NULL, CV_DRIVER, NULL); + bcopy(attrp, &path_tq->attr, sizeof (ibt_ip_path_attr_t)); + + path_tq->attr.ipa_dst_ip = (ibt_ip_addr_t *)(((uchar_t *)path_tq) + + sizeof (ibcm_ip_path_tqargs_t)); + bcopy(attrp->ipa_dst_ip, path_tq->attr.ipa_dst_ip, + sizeof (ibt_ip_addr_t) * attrp->ipa_ndst); + + /* Ignore IBT_PATH_AVAIL flag, if only one path is requested. */ + if ((flags & IBT_PATH_AVAIL) && (attrp->ipa_max_paths == 1)) { + flags &= ~IBT_PATH_AVAIL; + + IBTF_DPRINTF_L4(cmlog, "ibcm_get_ip_path: Ignoring " + "IBT_PATH_AVAIL flag, as only ONE path info is requested."); + } + + path_tq->flags = flags; + path_tq->ibt_hdl = ibt_hdl; + path_tq->paths = paths; + path_tq->src_ip_p = src_ip_p; + path_tq->num_paths_p = num_path_p; + path_tq->func = func; + path_tq->arg = arg; + path_tq->len = len; + + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*path_tq)) + + sleep_flag = ((func == NULL) ? TQ_SLEEP : TQ_NOSLEEP); + mutex_enter(&path_tq->ip_lock); + ret = taskq_dispatch(ibcm_taskq, ibcm_process_get_ip_paths, path_tq, + sleep_flag); + if (ret == 0) { + IBTF_DPRINTF_L2(cmlog, "ibcm_get_ip_path: Failed to dispatch " + "the TaskQ"); + mutex_exit(&path_tq->ip_lock); + cv_destroy(&path_tq->ip_cv); + mutex_destroy(&path_tq->ip_lock); + kmem_free(path_tq, len); + retval = IBT_INSUFF_KERNEL_RESOURCE; + } else { + if (func != NULL) { /* Non-Blocking */ + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_path: NonBlocking"); + retval = IBT_SUCCESS; + mutex_exit(&path_tq->ip_lock); + } else { /* Blocking */ + IBTF_DPRINTF_L3(cmlog, "ibcm_get_ip_path: Blocking"); + cv_wait(&path_tq->ip_cv, &path_tq->ip_lock); + retval = path_tq->retval; + mutex_exit(&path_tq->ip_lock); + cv_destroy(&path_tq->ip_cv); + mutex_destroy(&path_tq->ip_lock); + kmem_free(path_tq, len); + } + } + + return (retval); +} + + +ibt_status_t +ibt_aget_ip_paths(ibt_clnt_hdl_t ibt_hdl, ibt_path_flags_t flags, + ibt_ip_path_attr_t *attrp, ibt_ip_path_handler_t func, void *arg) +{ + IBTF_DPRINTF_L3(cmlog, "ibt_aget_ip_paths(%p, 0x%X, %p, %p, %p)", + ibt_hdl, flags, attrp, func, arg); + + if (func == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibt_aget_ip_paths: Function Pointer is " + "NULL - ERROR "); + return (IBT_INVALID_PARAM); + } + + /* path info will be allocated in ibcm_process_get_ip_paths() */ + return (ibcm_get_ip_path(ibt_hdl, flags, attrp, NULL, NULL, + NULL, func, arg)); +} + + +ibt_status_t +ibt_get_ip_paths(ibt_clnt_hdl_t ibt_hdl, ibt_path_flags_t flags, + ibt_ip_path_attr_t *attrp, ibt_path_info_t *paths, uint8_t *num_paths_p, + ibt_path_ip_src_t *src_ip_p) +{ + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_paths(%p, 0x%X, %p, %p, %p, %p)", + ibt_hdl, flags, attrp, paths, num_paths_p, src_ip_p); + + if (paths == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_paths: Path Info Pointer is " + "NULL - ERROR "); + return (IBT_INVALID_PARAM); + } + + if (num_paths_p != NULL) + *num_paths_p = 0; + + return (ibcm_get_ip_path(ibt_hdl, flags, attrp, paths, num_paths_p, + src_ip_p, NULL, NULL)); +} + + +ibt_status_t +ibt_get_ip_alt_path(ibt_channel_hdl_t rc_chan, ibt_path_flags_t flags, + ibt_alt_ip_path_attr_t *attrp, ibt_alt_path_info_t *api_p) +{ + sa_multipath_record_t *mpr_req; + sa_path_record_t *pr_resp; + ibmf_saa_access_args_t access_args; + ibt_qp_query_attr_t qp_attr; + ibtl_cm_hca_port_t c_hp, n_hp; + ibcm_hca_info_t *hcap; + void *results_p; + uint64_t c_mask = 0; + ib_gid_t *gid_ptr = NULL; + ib_gid_t *sgids_p = NULL, *dgids_p = NULL; + ib_gid_t cur_dgid, cur_sgid; + ib_gid_t new_dgid, new_sgid; + ibmf_saa_handle_t saa_handle; + size_t length; + int i, j, template_len, rec_found; + uint_t snum = 0, dnum = 0, num_rec; + ibt_status_t retval; + ib_mtu_t prim_mtu; + + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path(%p, %x, %p, %p)", + rc_chan, flags, attrp, api_p); + + /* validate channel */ + if (IBCM_INVALID_CHANNEL(rc_chan)) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: invalid channel"); + return (IBT_CHAN_HDL_INVALID); + } + + if (api_p == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: invalid attribute:" + " AltPathInfo can't be NULL"); + return (IBT_INVALID_PARAM); + } + + retval = ibt_query_qp(rc_chan, &qp_attr); + if (retval != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: ibt_query_qp(%p) " + "failed %d", rc_chan, retval); + return (retval); + } + + if (qp_attr.qp_info.qp_trans != IBT_RC_SRV) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: " + "Invalid Channel type: Applicable only to RC Channel"); + return (IBT_CHAN_SRV_TYPE_INVALID); + } + + cur_dgid = + qp_attr.qp_info.qp_transport.rc.rc_path.cep_adds_vect.av_dgid; + cur_sgid = + qp_attr.qp_info.qp_transport.rc.rc_path.cep_adds_vect.av_sgid; + prim_mtu = qp_attr.qp_info.qp_transport.rc.rc_path_mtu; + + /* If optional attributes are specified, validate them. */ + if (attrp) { + /* Get SGID and DGID for the specified input ip-addr */ + retval = ibcm_arp_get_ibaddr(attrp->apa_src_ip.un.ip4addr, + attrp->apa_dst_ip.un.ip4addr, &new_sgid, &new_dgid); + if (retval) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: " + "ibcm_arp_get_ibaddr() failed: %d", retval); + return (retval); + } + } else { + new_dgid.gid_prefix = 0; + new_dgid.gid_guid = 0; + new_sgid.gid_prefix = 0; + new_sgid.gid_guid = 0; + } + + if ((new_dgid.gid_prefix != 0) && (new_sgid.gid_prefix != 0) && + (new_dgid.gid_prefix != new_sgid.gid_prefix)) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: Specified SGID's " + "SNprefix (%llX) doesn't match with \n specified DGID's " + "SNprefix: %llX", new_sgid.gid_prefix, new_dgid.gid_prefix); + return (IBT_INVALID_PARAM); + } + + /* For the specified SGID, get HCA information. */ + retval = ibtl_cm_get_hca_port(cur_sgid, 0, &c_hp); + if (retval != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: " + "Get HCA Port Failed: %d", retval); + return (retval); + } + + hcap = ibcm_find_hca_entry(c_hp.hp_hca_guid); + if (hcap == NULL) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: NO HCA found"); + return (IBT_HCA_BUSY_DETACHING); + } + + /* Validate whether this HCA support APM */ + if (!(hcap->hca_caps & IBT_HCA_AUTO_PATH_MIG)) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: " + "HCA (%llX) - APM NOT SUPPORTED ", c_hp.hp_hca_guid); + retval = IBT_APM_NOT_SUPPORTED; + goto get_ip_alt_path_done; + } + + /* Get Companion Port GID of the current Channel's SGID */ + if ((new_sgid.gid_guid == 0) || ((new_sgid.gid_guid != 0) && + (new_sgid.gid_guid != cur_sgid.gid_guid))) { + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path: SRC: " + "Get Companion PortGids for - %llX:%llX", + cur_sgid.gid_prefix, cur_sgid.gid_guid); + + retval = ibcm_get_comp_pgids(cur_sgid, new_sgid, + c_hp.hp_hca_guid, &sgids_p, &snum); + if (retval != IBT_SUCCESS) + goto get_ip_alt_path_done; + } + + /* Get Companion Port GID of the current Channel's DGID */ + if ((new_dgid.gid_guid == 0) || ((new_dgid.gid_guid != 0) && + (new_dgid.gid_guid != cur_dgid.gid_guid))) { + + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path: DEST: " + "Get Companion PortGids for - %llX:%llX", + cur_dgid.gid_prefix, cur_dgid.gid_guid); + + retval = ibcm_get_comp_pgids(cur_dgid, new_dgid, 0, &dgids_p, + &dnum); + if (retval != IBT_SUCCESS) + goto get_ip_alt_path_done; + } + + if ((new_dgid.gid_guid == 0) || (new_sgid.gid_guid == 0)) { + if (new_sgid.gid_guid == 0) { + for (i = 0; i < snum; i++) { + if (new_dgid.gid_guid == 0) { + for (j = 0; j < dnum; j++) { + if (sgids_p[i].gid_prefix == + dgids_p[j].gid_prefix) { + new_dgid = dgids_p[j]; + new_sgid = sgids_p[i]; + + goto get_ip_alt_proceed; + } + } + /* Current DGID */ + if (sgids_p[i].gid_prefix == + cur_dgid.gid_prefix) { + new_sgid = sgids_p[i]; + goto get_ip_alt_proceed; + } + } else { + if (sgids_p[i].gid_prefix == + new_dgid.gid_prefix) { + new_sgid = sgids_p[i]; + goto get_ip_alt_proceed; + } + } + } + /* Current SGID */ + if (new_dgid.gid_guid == 0) { + for (j = 0; j < dnum; j++) { + if (cur_sgid.gid_prefix == + dgids_p[j].gid_prefix) { + new_dgid = dgids_p[j]; + + goto get_ip_alt_proceed; + } + } + } + } else if (new_dgid.gid_guid == 0) { + for (i = 0; i < dnum; i++) { + if (dgids_p[i].gid_prefix == + new_sgid.gid_prefix) { + new_dgid = dgids_p[i]; + goto get_ip_alt_proceed; + } + } + /* Current DGID */ + if (cur_dgid.gid_prefix == new_sgid.gid_prefix) { + goto get_ip_alt_proceed; + } + } + /* + * hmm... No Companion Ports available. + * so we will be using current or specified attributes only. + */ + } + +get_ip_alt_proceed: + if (new_sgid.gid_guid != 0) { + retval = ibtl_cm_get_hca_port(new_sgid, 0, &n_hp); + if (retval != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: " + "Get HCA Port Failed: %d", retval); + goto get_ip_alt_path_done; + } + } + + /* Calculate the size for multi-path records template */ + template_len = (2 * sizeof (ib_gid_t)) + sizeof (sa_multipath_record_t); + + mpr_req = kmem_zalloc(template_len, KM_SLEEP); + + ASSERT(mpr_req != NULL); + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpr_req)) + + gid_ptr = (ib_gid_t *)(((uchar_t *)mpr_req) + + sizeof (sa_multipath_record_t)); + + /* SGID */ + if (new_sgid.gid_guid == 0) + *gid_ptr = cur_sgid; + else + *gid_ptr = new_sgid; + + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path: Get Path Between " + " SGID : %llX:%llX", gid_ptr->gid_prefix, gid_ptr->gid_guid); + + gid_ptr++; + + /* DGID */ + if (new_dgid.gid_guid == 0) + *gid_ptr = cur_dgid; + else + *gid_ptr = new_dgid; + + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path:\t\t DGID : %llX:%llX", + gid_ptr->gid_prefix, gid_ptr->gid_guid); + + mpr_req->SGIDCount = 1; + c_mask = SA_MPR_COMPMASK_SGIDCOUNT; + + mpr_req->DGIDCount = 1; + c_mask |= SA_MPR_COMPMASK_DGIDCOUNT; + + /* Is Flow Label Specified. */ + if (attrp) { + if (attrp->apa_flow) { + mpr_req->FlowLabel = attrp->apa_flow; + c_mask |= SA_MPR_COMPMASK_FLOWLABEL; + } + + /* Is HopLimit Specified. */ + if (flags & IBT_PATH_HOP) { + mpr_req->HopLimit = attrp->apa_hop; + c_mask |= SA_MPR_COMPMASK_HOPLIMIT; + } + + /* Is TClass Specified. */ + if (attrp->apa_tclass) { + mpr_req->TClass = attrp->apa_tclass; + c_mask |= SA_MPR_COMPMASK_TCLASS; + } + + /* Is SL specified. */ + if (attrp->apa_sl) { + mpr_req->SL = attrp->apa_sl; + c_mask |= SA_MPR_COMPMASK_SL; + } + + if (flags & IBT_PATH_PERF) { + mpr_req->PacketLifeTimeSelector = IBT_BEST; + mpr_req->RateSelector = IBT_BEST; + + c_mask |= SA_MPR_COMPMASK_PKTLTSELECTOR | + SA_MPR_COMPMASK_RATESELECTOR; + } else { + if (attrp->apa_pkt_lt.p_selector == IBT_BEST) { + mpr_req->PacketLifeTimeSelector = IBT_BEST; + c_mask |= SA_MPR_COMPMASK_PKTLTSELECTOR; + } + + if (attrp->apa_srate.r_selector == IBT_BEST) { + mpr_req->RateSelector = IBT_BEST; + c_mask |= SA_MPR_COMPMASK_RATESELECTOR; + } + } + + /* + * Honor individual selection of these attributes, + * even if IBT_PATH_PERF is set. + */ + /* Check out whether Packet Life Time is specified. */ + if (attrp->apa_pkt_lt.p_pkt_lt) { + mpr_req->PacketLifeTime = + ibt_usec2ib(attrp->apa_pkt_lt.p_pkt_lt); + mpr_req->PacketLifeTimeSelector = + attrp->apa_pkt_lt.p_selector; + + c_mask |= SA_MPR_COMPMASK_PKTLT | + SA_MPR_COMPMASK_PKTLTSELECTOR; + } + + /* Is SRATE specified. */ + if (attrp->apa_srate.r_srate) { + mpr_req->Rate = attrp->apa_srate.r_srate; + mpr_req->RateSelector = attrp->apa_srate.r_selector; + + c_mask |= SA_MPR_COMPMASK_RATE | + SA_MPR_COMPMASK_RATESELECTOR; + } + } + + /* Alt PathMTU can be GT or EQU to current channel's Pri PathMTU */ + + /* P_Key must be same as that of primary path */ + retval = ibt_index2pkey_byguid(c_hp.hp_hca_guid, c_hp.hp_port, + qp_attr.qp_info.qp_transport.rc.rc_path.cep_pkey_ix, + &mpr_req->P_Key); + if (retval != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: PKeyIdx2Pkey " + "Failed: %d", retval); + goto get_ip_alt_path_done; + } + c_mask |= SA_MPR_COMPMASK_PKEY; + + mpr_req->Reversible = 1; /* We always get REVERSIBLE paths. */ + mpr_req->IndependenceSelector = 1; + c_mask |= SA_MPR_COMPMASK_REVERSIBLE | SA_MPR_COMPMASK_INDEPSEL; + + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mpr_req)) + + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path: CMask: 0x%llX", c_mask); + + /* NOTE: We will **NOT** specify how many records we want. */ + + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path: Primary: MTU %d, PKey[%d]=" + "0x%X\n\tSGID = %llX:%llX, DGID = %llX:%llX", prim_mtu, + qp_attr.qp_info.qp_transport.rc.rc_path.cep_pkey_ix, mpr_req->P_Key, + cur_sgid.gid_prefix, cur_sgid.gid_guid, cur_dgid.gid_prefix, + cur_dgid.gid_guid); + + /* Get SA Access Handle. */ + if (new_sgid.gid_guid != 0) + saa_handle = ibcm_get_saa_handle(hcap, n_hp.hp_port); + else + saa_handle = ibcm_get_saa_handle(hcap, c_hp.hp_port); + if (saa_handle == NULL) { + retval = IBT_HCA_PORT_NOT_ACTIVE; + goto get_ip_alt_path_done; + } + + /* Contact SA Access to retrieve Path Records. */ + access_args.sq_attr_id = SA_MULTIPATHRECORD_ATTRID; + access_args.sq_access_type = IBMF_SAA_RETRIEVE; + access_args.sq_component_mask = c_mask; + access_args.sq_template = mpr_req; + access_args.sq_template_length = sizeof (sa_multipath_record_t); + access_args.sq_callback = NULL; + access_args.sq_callback_arg = NULL; + + retval = ibcm_contact_sa_access(saa_handle, &access_args, &length, + &results_p); + if (retval != IBT_SUCCESS) { + goto get_ip_alt_path_done; + } + + num_rec = length / sizeof (sa_path_record_t); + + kmem_free(mpr_req, template_len); + + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path: Found %d Paths", num_rec); + + rec_found = 0; + if ((results_p != NULL) && (num_rec > 0)) { + /* Update the PathInfo with the response Path Records */ + pr_resp = (sa_path_record_t *)results_p; + for (i = 0; i < num_rec; i++, pr_resp++) { + if (prim_mtu > pr_resp->Mtu) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: " + "Alt PathMTU(%d) must be GT or EQU to Pri " + "PathMTU(%d). Ignore this rec", + pr_resp->Mtu, prim_mtu); + continue; + } + + if ((new_sgid.gid_guid == 0) && + (new_dgid.gid_guid == 0)) { + /* Reject PathRec if it same as Primary Path. */ + if (ibcm_compare_paths(pr_resp, + &qp_attr.qp_info.qp_transport.rc.rc_path, + &c_hp) == B_TRUE) { + IBTF_DPRINTF_L3(cmlog, + "ibt_get_ip_alt_path: PathRec " + "obtained is similar to Prim Path, " + "ignore this record"); + continue; + } + } + + if (new_sgid.gid_guid == 0) { + retval = ibcm_update_cep_info(pr_resp, NULL, + &c_hp, &api_p->ap_alt_cep_path); + } else { + retval = ibcm_update_cep_info(pr_resp, NULL, + &n_hp, &api_p->ap_alt_cep_path); + } + if (retval != IBT_SUCCESS) + continue; + + /* Update some leftovers */ + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*api_p)) + + api_p->ap_alt_pkt_lt = pr_resp->PacketLifeTime; + + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*api_p)) + + rec_found = 1; + break; + } + kmem_free(results_p, length); + } + + if (rec_found == 0) { + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path: AltPath cannot" + " be established"); + retval = IBT_PATH_RECORDS_NOT_FOUND; + } else + retval = IBT_SUCCESS; + +get_ip_alt_path_done: + if ((snum) && (sgids_p)) + kmem_free(sgids_p, snum * sizeof (ib_gid_t)); + + if ((dnum) && (dgids_p)) + kmem_free(dgids_p, dnum * sizeof (ib_gid_t)); + + ibcm_dec_hca_acc_cnt(hcap); + + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_alt_path: Done (status %d)", retval); + + return (retval); +} + + /* Routines for warlock */ /* ARGSUSED */ @@ -3225,3 +4933,16 @@ ibcm_dummy_path_handler(void *arg, ibt_status_t retval, ibt_path_info_t *paths, IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_path_handler: " "dummy_path.func %p", dummy_path.func); } + +/* ARGSUSED */ +static void +ibcm_dummy_ip_path_handler(void *arg, ibt_status_t retval, + ibt_path_info_t *paths, uint8_t num_path, ibt_path_ip_src_t *src_ip) +{ + ibcm_ip_path_tqargs_t dummy_path; + + dummy_path.func = ibcm_dummy_ip_path_handler; + + IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_ip_path_handler: " + "dummy_path.func %p", dummy_path.func); +} diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c index a68fa0a581..9c0e056678 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -953,8 +953,8 @@ ibcm_process_req_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, ibcm_status_t state_lookup_status; ibcm_status_t comid_lookup_status; ibcm_status_t response; - ibcm_req_msg_t *req_msgp = (ibcm_req_msg_t *) - &input_madp[IBCM_MAD_HDR_SIZE]; + ibcm_req_msg_t *req_msgp = + (ibcm_req_msg_t *)&input_madp[IBCM_MAD_HDR_SIZE]; ibt_cm_reason_t reject_reason = IBT_CM_SUCCESS; ibcm_state_data_t *statep; ibcm_state_data_t *stale_statep = NULL; @@ -1843,11 +1843,11 @@ ibcm_return_open_data(ibcm_state_data_t *statep, ibcm_rep_msg_t *rep_msgp, statep->open_return_data->rc_priv_data, statep->open_return_data->rc_priv_data_len); statep->open_return_data->rc_rdma_ra_in = - rep_msgp->rep_resp_resources; + rep_msgp->rep_resp_resources; statep->open_return_data->rc_rdma_ra_out = - rep_msgp->rep_initiator_depth; + rep_msgp->rep_initiator_depth; statep->open_return_data->rc_failover_status = - rep_msgp->rep_target_delay_plus >> 1 & 3; + rep_msgp->rep_target_delay_plus >> 1 & 3; statep->open_return_data->rc_status = reject_reason; mutex_enter(&statep->state_mutex); @@ -1882,8 +1882,8 @@ ibcm_process_mra_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr) { ibcm_status_t state_lookup_status; - ibcm_mra_msg_t *mra_msgp = (ibcm_mra_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + ibcm_mra_msg_t *mra_msgp = + (ibcm_mra_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]); ibcm_state_data_t *statep = NULL; uint8_t mra_msg; @@ -2065,8 +2065,8 @@ ibcm_process_rtu_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, { timeout_id_t timer_val; ibcm_status_t status; - ibcm_rtu_msg_t *rtu_msg = (ibcm_rtu_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + ibcm_rtu_msg_t *rtu_msg = + (ibcm_rtu_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]); ibcm_state_data_t *statep = NULL; IBTF_DPRINTF_L4(cmlog, "ibcm_process_rtu_msg:"); @@ -2161,8 +2161,8 @@ ibcm_process_rej_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr) { ibcm_status_t state_lookup_status; - ibcm_rej_msg_t *rej_msg = (ibcm_rej_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + ibcm_rej_msg_t *rej_msg = + (ibcm_rej_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]); ibcm_state_data_t *statep = NULL; ib_guid_t remote_hca_guid; ibcm_conn_state_t rej_state; @@ -2353,8 +2353,8 @@ ibcm_process_dreq_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, void *priv_data = NULL; ibcm_status_t state_lookup_status; ib_qpn_t local_qpn; - ibcm_dreq_msg_t *dreq_msgp = (ibcm_dreq_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + ibcm_dreq_msg_t *dreq_msgp = + (ibcm_dreq_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]); ibcm_state_data_t *statep = NULL; uint8_t close_event_type; ibt_cm_status_t cb_status; @@ -2730,8 +2730,8 @@ ibcm_process_drep_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr) { ibcm_status_t state_lookup_status; - ibcm_drep_msg_t *drep_msgp = (ibcm_drep_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + ibcm_drep_msg_t *drep_msgp = + (ibcm_drep_msg_t *)(&input_madp[IBCM_MAD_HDR_SIZE]); ibcm_state_data_t *statep = NULL; IBTF_DPRINTF_L4(cmlog, "ibcm_process_drep_msg:"); @@ -2828,7 +2828,7 @@ ibcm_process_drep_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, bcopy(drep_msgp->drep_private_data, statep->close_ret_priv_data, min(*statep->close_ret_priv_data_len, - IBT_DREP_PRIV_DATA_SZ)); + IBT_DREP_PRIV_DATA_SZ)); } mutex_enter(&statep->state_mutex); @@ -3010,8 +3010,8 @@ void ibcm_post_rej_mad(ibcm_state_data_t *statep, ibt_cm_reason_t reject_reason, int which_msg, void *addl_rej_info, ibt_priv_data_len_t arej_info_len) { - ibcm_rej_msg_t *rej_msg = (ibcm_rej_msg_t *) - IBCM_OUT_MSGP(statep->stored_msg); + ibcm_rej_msg_t *rej_msg = + (ibcm_rej_msg_t *)IBCM_OUT_MSGP(statep->stored_msg); /* Message printed if connection gets REJed */ IBTF_DPRINTF_L3(cmlog, "ibcm_post_rej_mad: " @@ -3122,8 +3122,8 @@ ibcm_build_n_post_rej_mad(uint8_t *input_madp, ib_com_id_t remote_comid, static void ibcm_post_rej_ver_mismatch(uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr) { - ibcm_req_msg_t *req_msgp = (ibcm_req_msg_t *) - &input_madp[IBCM_MAD_HDR_SIZE]; + ibcm_req_msg_t *req_msgp = + (ibcm_req_msg_t *)&input_madp[IBCM_MAD_HDR_SIZE]; ibcm_rej_msg_t *rej_msg; ibmf_msg_t *cm_rej_msg; ibcm_mad_addr_t rej_reply_addr; @@ -3153,7 +3153,7 @@ ibcm_post_rej_ver_mismatch(uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr) rej_msg->rej_addl_rej_info[0] = IBCM_MAD_CLASS_VERSION; IBCM_OUT_HDRP(cm_rej_msg)->AttributeID = - h2b16(IBCM_INCOMING_REJ + IBCM_ATTR_BASE_ID); + h2b16(IBCM_INCOMING_REJ + IBCM_ATTR_BASE_ID); IBCM_OUT_HDRP(cm_rej_msg)->Status = h2b16(MAD_STATUS_BAD_VERSION); _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rej_msg)) @@ -3180,8 +3180,8 @@ ibcm_post_rej_ver_mismatch(uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr) void ibcm_post_rep_mad(ibcm_state_data_t *statep) { - ibcm_rep_msg_t *rep_msgp = (ibcm_rep_msg_t *) - IBCM_OUT_MSGP(statep->stored_msg); + ibcm_rep_msg_t *rep_msgp = + (ibcm_rep_msg_t *)IBCM_OUT_MSGP(statep->stored_msg); ibmf_msg_t *mra_msg = NULL; boolean_t ret = B_FALSE; @@ -4216,7 +4216,7 @@ ibcm_process_tlist() ibcm_timeout_list_hdr = statep->timeout_next; if (ibcm_timeout_list_hdr == NULL) - ibcm_timeout_list_tail = NULL; + ibcm_timeout_list_tail = NULL; statep->timeout_next = NULL; @@ -4230,7 +4230,7 @@ ibcm_process_tlist() ibcm_ud_timeout_list_hdr = ud_statep->ud_timeout_next; if (ibcm_ud_timeout_list_hdr == NULL) - ibcm_ud_timeout_list_tail = NULL; + ibcm_ud_timeout_list_tail = NULL; ud_statep->ud_timeout_next = NULL; @@ -4466,7 +4466,7 @@ ibcm_process_sidr_req_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, ibcm_svc_bind_t *svc_bindp; ibcm_svc_bind_t *tmp_bindp; ibcm_sidr_req_msg_t *sidr_reqp = (ibcm_sidr_req_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + (&input_madp[IBCM_MAD_HDR_SIZE]); ibcm_ud_state_data_t *ud_statep = NULL; ibcm_sidr_srch_t srch_sidr; ib_pkey_t pkey; @@ -4677,7 +4677,7 @@ ibcm_process_sidr_rep_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, ibcm_status_t status; ib_svc_id_t tmp_svc_id; ibcm_sidr_rep_msg_t *sidr_repp = (ibcm_sidr_rep_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + (&input_madp[IBCM_MAD_HDR_SIZE]); ibcm_ud_state_data_t *ud_statep = NULL; ibcm_sidr_srch_t srch_sidr; @@ -4946,7 +4946,7 @@ ibcm_sidr_timeout_cb(void *arg) if (ud_statep->ud_return_data != NULL) { ud_statep->ud_return_data->ud_status = - IBT_CM_SREP_TIMEOUT; + IBT_CM_SREP_TIMEOUT; ud_statep->ud_blocking_done = B_TRUE; cv_broadcast(&ud_statep->ud_block_client_cv); } @@ -5231,7 +5231,7 @@ ibcm_decode_classport_info(ibcm_hca_info_t *hcap, uint8_t *input_madp, ibcm_mad_addr_t *cm_mad_addr) { ibcm_classportinfo_msg_t *portinfop = (ibcm_classportinfo_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + (&input_madp[IBCM_MAD_HDR_SIZE]); IBTF_DPRINTF_L5(cmlog, "ibcm_decode_classport_info: (%p, %p, %p)", hcap, input_madp, cm_mad_addr); @@ -5334,24 +5334,24 @@ ibcm_set_primary_adds_vect(ibcm_state_data_t *statep, if (statep->mode == IBCM_PASSIVE_MODE) { adds_vectp->av_dlid = b2h16(msgp->req_primary_l_port_lid); adds_vectp->av_dgid.gid_prefix = - b2h64(msgp->req_primary_l_port_gid.gid_prefix); + b2h64(msgp->req_primary_l_port_gid.gid_prefix); adds_vectp->av_dgid.gid_guid = - b2h64(msgp->req_primary_l_port_gid.gid_guid); + b2h64(msgp->req_primary_l_port_gid.gid_guid); adds_vectp->av_sgid.gid_prefix = - b2h64(msgp->req_primary_r_port_gid.gid_prefix); + b2h64(msgp->req_primary_r_port_gid.gid_prefix); adds_vectp->av_sgid.gid_guid = - b2h64(msgp->req_primary_r_port_gid.gid_guid); + b2h64(msgp->req_primary_r_port_gid.gid_guid); adds_vectp->av_srate = flow_label20_res6_rate6 & 0x3f; } else { adds_vectp->av_dlid = b2h16(msgp->req_primary_r_port_lid); adds_vectp->av_dgid.gid_prefix = - b2h64(msgp->req_primary_r_port_gid.gid_prefix); + b2h64(msgp->req_primary_r_port_gid.gid_prefix); adds_vectp->av_dgid.gid_guid = - b2h64(msgp->req_primary_r_port_gid.gid_guid); + b2h64(msgp->req_primary_r_port_gid.gid_guid); adds_vectp->av_sgid.gid_prefix = - b2h64(msgp->req_primary_l_port_gid.gid_prefix); + b2h64(msgp->req_primary_l_port_gid.gid_prefix); adds_vectp->av_sgid.gid_guid = - b2h64(msgp->req_primary_l_port_gid.gid_guid); + b2h64(msgp->req_primary_l_port_gid.gid_guid); adds_vectp->av_srate = statep->local_srate; } @@ -5641,10 +5641,10 @@ ibcm_invoke_qp_modify(ibcm_state_data_t *statep, ibcm_req_msg_t *req_msgp, case IBT_RD_SRV: if (statep->mode == IBCM_ACTIVE_MODE) { /* look at REP msg */ IBCM_QPINFO(qp_info).rd.rd_qkey = - b2h32(rep_msgp->rep_local_qkey); + b2h32(rep_msgp->rep_local_qkey); } else { IBCM_QPINFO(qp_info).rd.rd_qkey = - b2h32(req_msgp->req_local_qkey); + b2h32(req_msgp->req_local_qkey); } break; @@ -5735,6 +5735,7 @@ ibcm_verify_req_gids_and_svcid(ibcm_state_data_t *statep, ib_pkey_t pkey; uint8_t port_num; ib_guid_t hca_guid; + ibcm_ip_pvtdata_t *ip_data; /* Verify LID and GID of primary port */ @@ -5772,8 +5773,8 @@ ibcm_verify_req_gids_and_svcid(ibcm_state_data_t *statep, hca_guid = statep->hcap->hca_guid; } else if (port.hp_base_lid != - (b2h16(cm_req_msgp->req_primary_r_port_lid) & - (~((1 << port.hp_lmc) - 1)))) { + (b2h16(cm_req_msgp->req_primary_r_port_lid) & + (~((1 << port.hp_lmc) - 1)))) { IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids: statep 0x%p " "primary port lid invalid (%x, %x, %x)", statep, port.hp_base_lid, @@ -5883,6 +5884,126 @@ ibcm_verify_req_gids_and_svcid(ibcm_state_data_t *statep, return (IBCM_FAILURE); } + /* + * Check if ServiceID is in RDMA IP CM SID range, if yes, we parse + * the REQ's Private Data and verify for it's goodness. + */ + if (((statep->svcid & IB_SID_IPADDR_PREFIX_MASK) == 0) && + (statep->svcid & IB_SID_IPADDR_PREFIX)) { + ibt_ari_ip_t ari_ip; + boolean_t rdma_rej_mad = B_FALSE; + + if (cm_req_msgp->req_private_data == NULL) { + mutex_exit(&ibcm_svc_info_lock); + + IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids_and_svcid:" + " RDMA CM IP REQ Priv Data is NULL"); + + /* Send a REJ with CONSUMER REJ */ + ibcm_post_rej_mad(statep, IBT_CM_CONSUMER, + IBT_CM_FAILURE_REQ, NULL, 0); + return (IBCM_FAILURE); + } + ip_data = (ibcm_ip_pvtdata_t *)cm_req_msgp->req_private_data; + + bzero(&ari_ip, sizeof (ibt_ari_ip_t)); + + /* RDMA IP CM Layer Rejects this */ + if (ip_data->ip_MajV != IBT_CM_IP_MAJ_VER) { + IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids_and_svcid:" + "IP MajorVer mis-match %d", ip_data->ip_MajV); + ari_ip.ip_reason = IBT_ARI_IP_MAJOR_VERSION; + ari_ip.ip_suggested_version = IBT_CM_IP_MAJ_VER; + ari_ip.ip_suggested = B_TRUE; + rdma_rej_mad = B_TRUE; + } else if (ip_data->ip_MinV != IBT_CM_IP_MIN_VER) { + IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids_and_svcid:" + "IP MinorVer mis-match %d", ip_data->ip_MinV); + ari_ip.ip_reason = IBT_ARI_IP_MINOR_VERSION; + ari_ip.ip_suggested_version = IBT_CM_IP_MIN_VER; + ari_ip.ip_suggested = B_TRUE; + rdma_rej_mad = B_TRUE; + } else if ((ip_data->ip_ipv != IBT_CM_IP_IPV_V4) && + (ip_data->ip_ipv != IBT_CM_IP_IPV_V6)) { + IBTF_DPRINTF_L2(cmlog, "ibcm_verify_req_gids_and_svcid:" + " Invalid IPV specified %d", ip_data->ip_ipv); + ari_ip.ip_reason = IBT_ARI_IP_IPV; + ari_ip.ip_suggested_version = IBT_CM_IP_IPV_V4; + ari_ip.ip_suggested = B_TRUE; + rdma_rej_mad = B_TRUE; + } else { + /* + * Validate whether ip_addr specified are non-NULL. + * + * NOTE: + * RDMA ULP which is servicing this SID, should validate + * the correctness of srcip/dstip and accordingly post + * REJ related to ibt_ari_ip_reason_t of + * IBT_ARI_IP_SRC_ADDR, IBT_ARI_IP_DST_ADDR and + * IBT_ARI_IP_UNKNOWN_ADDR. + */ + if (ip_data->ip_ipv == IBT_CM_IP_IPV_V4) { + if (ip_data->ip_srcv4 == 0) { + IBTF_DPRINTF_L2(cmlog, + "ibcm_verify_req_gids_and_svcid: " + "Invalid NULL V4 SrcIp specified"); + rdma_rej_mad = B_TRUE; + ari_ip.ip_reason = IBT_ARI_IP_SRC_ADDR; + ari_ip.ip_suggested = B_TRUE; + ari_ip.ip_suggested_version = + IBT_CM_IP_IPV_V4; + } else if (ip_data->ip_dstv4 == 0) { + IBTF_DPRINTF_L2(cmlog, + "ibcm_verify_req_gids_and_svcid: " + "Invalid NULL V4 DstIp specified"); + rdma_rej_mad = B_TRUE; + ari_ip.ip_reason = IBT_ARI_IP_DST_ADDR; + ari_ip.ip_suggested = B_TRUE; + ari_ip.ip_suggested_version = + IBT_CM_IP_IPV_V4; + } + } else if (ip_data->ip_ipv == IBT_CM_IP_IPV_V6) { + if (IN6_IS_ADDR_UNSPECIFIED( + &ip_data->ip_srcv6)) { + IBTF_DPRINTF_L2(cmlog, + "ibcm_verify_req_gids_and_svcid: " + "Invalid NULL V6 SrcIp specified"); + rdma_rej_mad = B_TRUE; + ari_ip.ip_reason = IBT_ARI_IP_SRC_ADDR; + ari_ip.ip_suggested = B_TRUE; + ari_ip.ip_suggested_version = + IBT_CM_IP_IPV_V6; + } else if (IN6_IS_ADDR_UNSPECIFIED( + &ip_data->ip_dstv6)) { + IBTF_DPRINTF_L2(cmlog, + "ibcm_verify_req_gids_and_svcid: " + "Invalid NULL V6 DstIp specified"); + rdma_rej_mad = B_TRUE; + ari_ip.ip_reason = IBT_ARI_IP_DST_ADDR; + ari_ip.ip_suggested = B_TRUE; + ari_ip.ip_suggested_version = + IBT_CM_IP_IPV_V6; + } + } + /* TBD: IBT_ARI_IP_UNKNOWN_ADDR */ + } + if (rdma_rej_mad == B_TRUE) { + ibt_ari_con_t cons_rej; + + mutex_exit(&ibcm_svc_info_lock); + + cons_rej.rej_ari_len = 1 + sizeof (ibt_ari_ip_t); + cons_rej.rej_ari[0] = 0; /* Rejected by CM Layer */ + bcopy(&ari_ip, &cons_rej.rej_ari[1], + sizeof (ibt_ari_ip_t)); + /* Send a REJ with CONSUMER REJ */ + ibcm_post_rej_mad(statep, IBT_CM_CONSUMER, + IBT_CM_FAILURE_REQ, &cons_rej, + sizeof (ibt_ari_con_t)); + return (IBCM_FAILURE); + } + } + /* find the best "bind" entry that enables this port */ pkey = b2h16(cm_req_msgp->req_part_key); @@ -6139,8 +6260,8 @@ ibcm_cep_state_req(ibcm_state_data_t *statep, ibcm_req_msg_t *cm_req_msgp, clnt_info.priv_data_len = ret_args.cm_ret_len; status = - ibcm_process_cep_req_cm_hdlr(statep, cb_status, - &clnt_info, reject_reason, arej_len, cm_req_msgp); + ibcm_process_cep_req_cm_hdlr(statep, cb_status, + &clnt_info, reject_reason, arej_len, cm_req_msgp); kmem_free(priv_data, IBT_MAX_PRIV_DATA_SZ); return (status); } @@ -6190,7 +6311,7 @@ ibcm_process_cep_req_cm_hdlr(ibcm_state_data_t *statep, /* client handler gave CM ok */ if (cb_status == IBT_CM_ACCEPT) { ibcm_rep_msg_t *rep_msgp = (ibcm_rep_msg_t *) - IBCM_OUT_MSGP(statep->stored_msg); + IBCM_OUT_MSGP(statep->stored_msg); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep)) @@ -6457,6 +6578,22 @@ ibcm_process_cep_req_cm_hdlr(ibcm_state_data_t *statep, IBT_CM_ADDL_REJ_LEN); bcopy(clnt_info->reply_event->rej.ari_consumer.rej_ari, &rej_msgp->rej_addl_rej_info, *arej_len); + + /* + * RDMA IP REQ was passed up to the ULP, the ULP decided to do + * a "normal" consumer REJ, by the returning IBT_CM_REJECT in + * the cm handler. + * CM has to do some extra stuff too, it has to + * a) return REJ code 28 (consumer) and b) put 0x1 in the first + * byte of the ARI data, to indicate that this is a RDMA aware + * ULP that is doing a consumer reject. The ULP should have + * put its consumer specific data into ibt_arej_info_t(9s) at + * byte 1 of the rej_ari[] array. + */ + if (((statep->svcid & IB_SID_IPADDR_PREFIX_MASK) == 0) && + (statep->svcid & IB_SID_IPADDR_PREFIX)) { + rej_msgp->rej_addl_rej_info[0] = 1; + } } rej_msgp->rej_msg_type_plus = IBT_CM_FAILURE_REQ << 6; @@ -6499,9 +6636,9 @@ ibcm_cep_state_rep(ibcm_state_data_t *statep, ibcm_rep_msg_t *cm_rep_msgp, event.cm_session_id = statep; IBCM_EVT_REP(event).rep_rdma_ra_in = - cm_rep_msgp->rep_resp_resources; + cm_rep_msgp->rep_resp_resources; IBCM_EVT_REP(event).rep_rdma_ra_out = - cm_rep_msgp->rep_initiator_depth; + cm_rep_msgp->rep_initiator_depth; IBCM_EVT_REP(event).rep_service_time = ibt_ib2usec( ((uint8_t *)&(((ibcm_req_msg_t *)IBCM_OUT_MSGP( statep->stored_msg))->req_starting_psn_plus))[3] >> 3); @@ -6510,7 +6647,7 @@ ibcm_cep_state_rep(ibcm_state_data_t *statep, ibcm_rep_msg_t *cm_rep_msgp, 2 * statep->pkt_life_time - ibcm_sw_delay; IBCM_EVT_REP(event).rep_failover_status = - cm_rep_msgp->rep_target_delay_plus >> 1 & 3; + cm_rep_msgp->rep_target_delay_plus >> 1 & 3; if (cm_rep_msgp->rep_target_delay_plus & 0x1) IBCM_EVT_REP(event).rep_flags |= IBT_CM_FLOW_CONTROL; @@ -6674,7 +6811,7 @@ ibcm_process_cep_rep_cm_hdlr(ibcm_state_data_t *statep, if (clnt_info->priv_data_len != 0) { ibcm_rtu_msg_t *rtu_msgp; rtu_msgp = (ibcm_rtu_msg_t *) - IBCM_OUT_MSGP(statep->stored_msg); + IBCM_OUT_MSGP(statep->stored_msg); bcopy(clnt_info->priv_data, rtu_msgp->rtu_private_data, min(IBT_RTU_PRIV_DATA_SZ, clnt_info->priv_data_len)); @@ -6821,7 +6958,7 @@ ibcm_cep_state_rtu(ibcm_state_data_t *statep, ibcm_rtu_msg_t *cm_rtu_msgp) ibt_status_t status; ibt_cm_event_t event; ibcm_rep_msg_t *rep_msgp = (ibcm_rep_msg_t *) - IBCM_OUT_MSGP(statep->stored_msg); + IBCM_OUT_MSGP(statep->stored_msg); IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_rtu: statep 0x%p", statep); @@ -7044,7 +7181,7 @@ ibcm_cep_state_rej(ibcm_state_data_t *statep, ibcm_rej_msg_t *rej_msgp, event.cm_event.failed.cf_code = IBT_CM_FAILURE_REJ_RCV; event.cm_event.failed.cf_msg = rej_msgp->rej_msg_type_plus >> 6; event.cm_event.failed.cf_reason = - b2h16(rej_msgp->rej_rejection_reason); + b2h16(rej_msgp->rej_rejection_reason); IBTF_DPRINTF_L3(cmlog, "ibcm_cep_state_rej: rej_reason = %d", event.cm_event.failed.cf_reason); @@ -7519,7 +7656,7 @@ ibcm_process_lap_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, { ibcm_status_t state_lookup_status; ibcm_lap_msg_t *lap_msg = (ibcm_lap_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + (&input_madp[IBCM_MAD_HDR_SIZE]); ibcm_apr_msg_t *apr_msg; ibcm_state_data_t *statep = NULL; @@ -7642,7 +7779,7 @@ ibcm_post_stored_apr_mad(ibcm_state_data_t *statep, uint8_t *input_madp) bcopy(apr_msg, IBCM_OUT_MSGP(ibmf_apr_msg), IBCM_MSG_SIZE); IBCM_OUT_HDRP(ibmf_apr_msg)->AttributeID = - h2b16(IBCM_INCOMING_APR + IBCM_ATTR_BASE_ID); + h2b16(IBCM_INCOMING_APR + IBCM_ATTR_BASE_ID); IBCM_OUT_HDRP(ibmf_apr_msg)->TransactionID = ((ib_mad_hdr_t *)(input_madp))->TransactionID; @@ -7989,7 +8126,7 @@ ibcm_process_apr_msg(ibcm_hca_info_t *hcap, uint8_t *input_madp, { ibcm_status_t state_lookup_status; ibcm_apr_msg_t *apr_msg = (ibcm_apr_msg_t *) - (&input_madp[IBCM_MAD_HDR_SIZE]); + (&input_madp[IBCM_MAD_HDR_SIZE]); ibcm_state_data_t *statep = NULL; IBTF_DPRINTF_L4(cmlog, "ibcm_process_apr_msg:"); diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c index e0995f6974..f13ae8f43f 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,6 +27,7 @@ #include <sys/ib/mgt/ibcm/ibcm_impl.h> #include <sys/ib/ibtl/ibti.h> +#include <sys/ib/mgt/ibcm/ibcm_arp.h> /* * ibcm_ti.c @@ -1259,7 +1260,7 @@ ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode, mutex_enter(&statep->state_mutex); if (statep->dreq_msg == NULL) { - IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p " + IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p " "Fatal Error: dreq_msg is NULL", channel); IBCM_RELEASE_CHAN_PRIVATE(channel); mutex_exit(&statep->state_mutex); @@ -1549,9 +1550,7 @@ ibcm_close_rc_channel(ibt_channel_hdl_t channel, ibcm_state_data_t *statep, IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: " "NOCALLBACKS on in statep = %p", statep); } - mutex_exit(&statep->state_mutex); - mutex_enter(&statep->state_mutex); if (statep->state != IBCM_STATE_ESTABLISHED) { goto lost_race; } @@ -2385,7 +2384,7 @@ ibt_cm_delay(ibt_cmdelay_flags_t flags, void *cm_session_id, mutex_exit(&statep->state_mutex); IBCM_OUT_HDRP(statep->mra_msg)->TransactionID = - IBCM_OUT_HDRP(statep->stored_msg)->TransactionID; + IBCM_OUT_HDRP(statep->stored_msg)->TransactionID; /* post the MRA mad in blocking mode, as no timers involved */ ibcm_post_rc_mad(statep, statep->mra_msg, ibcm_post_mra_complete, @@ -3244,9 +3243,9 @@ ibt_register_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp) /* verify GID/pkey is valid for a local port, etc. */ hcap = NULL; if ((s1 = ibtl_cm_get_hca_port(arp->ar_gid, 0, &cm_port)) - != IBT_SUCCESS || + != IBT_SUCCESS || (s2 = ibt_pkey2index_byguid(cm_port.hp_hca_guid, cm_port.hp_port, - arp->ar_pkey, &pkey_ix)) != IBT_SUCCESS || + arp->ar_pkey, &pkey_ix)) != IBT_SUCCESS || (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL) { cv_destroy(&new->ar_cv); ibcm_ar_list = new->ar_link; @@ -6065,6 +6064,236 @@ get_comp_pgid_exit: return (retval); } +/* RDMA IP CM Support routines */ +ibt_status_t +ibt_get_src_ip(ib_gid_t gid, ib_pkey_t pkey, ibt_ip_addr_t *src_ip) +{ + ibcm_arp_ip_t *ipp; + ibcm_arp_ibd_insts_t ibds; + int i; + boolean_t found = B_FALSE; + ibt_status_t retval = IBT_SUCCESS; + + IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip(%llX:%llX, %X, %p)", + gid.gid_prefix, gid.gid_guid, pkey, src_ip); + + if (gid.gid_prefix == 0 || gid.gid_guid == 0) { + IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid GID."); + return (IBT_INVALID_PARAM); + } + + if (src_ip == NULL) { + IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: ERROR: src_ip NULL"); + return (IBT_INVALID_PARAM); + } + + bzero(&ibds, sizeof (ibcm_arp_ibd_insts_t)); + ibds.ibcm_arp_ibd_alloc = IBCM_ARP_IBD_INSTANCES; + ibds.ibcm_arp_ibd_cnt = 0; + ibds.ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc( + ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t), KM_SLEEP); + + retval = ibcm_arp_get_ibds(&ibds); + if (retval != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_src_ip: ibcm_arp_get_ibds " + "failed to get IBD Instances: ret 0x%x", retval); + goto get_src_ip_end; + } + + for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt; + i++, ipp++) { + if (ipp->ip_port_gid.gid_prefix == gid.gid_prefix && + ipp->ip_port_gid.gid_guid == gid.gid_guid) { + if (pkey) { + if (ipp->ip_pkey == pkey) { + found = B_TRUE; + break; + } else + continue; + } + found = B_TRUE; + break; + } + } + + if (found == B_FALSE) { + retval = IBT_SRC_IP_NOT_FOUND; + } else { + src_ip->family = ipp->ip_inet_family; + if (src_ip->family == AF_INET) { + bcopy(&ipp->ip_cm_sin.sin_addr, &src_ip->un.ip4addr, + sizeof (in_addr_t)); + IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip: Got %lX", + src_ip->un.ip4addr); + } else if (src_ip->family == AF_INET6) { + bcopy(&ipp->ip_cm_sin6.sin6_addr, &src_ip->un.ip6addr, + sizeof (in6_addr_t)); + } + } + +get_src_ip_end: + if (ibds.ibcm_arp_ip) + kmem_free(ibds.ibcm_arp_ip, + ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t)); + + return (retval); +} + +ib_svc_id_t +ibt_get_ip_sid(uint8_t protocol_num, in_port_t dst_port) +{ + ib_svc_id_t sid; + + IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_sid(%X, %lX)", protocol_num, + dst_port); + + /* + * If protocol_num is non-zero, then formulate the SID and return it. + * If protocol_num is zero, then we need to assign a locally generated + * IP SID with IB_SID_IPADDR_PREFIX. + */ + if (protocol_num) { + sid = IB_SID_IPADDR_PREFIX | protocol_num << 16 | dst_port; + } else { + sid = ibcm_alloc_ip_sid(); + } + + IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_sid: SID: 0x%016llX", sid); + return (sid); +} + +ibt_status_t +ibt_release_ip_sid(ib_svc_id_t ip_sid) +{ + IBTF_DPRINTF_L4(cmlog, "ibt_release_ip_sid(%llX)", ip_sid); + + if (((ip_sid & IB_SID_IPADDR_PREFIX_MASK) != 0) || + (!(ip_sid & IB_SID_IPADDR_PREFIX))) { + IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: " + "Called for Non-RDMA IP SID", ip_sid); + return (IBT_INVALID_PARAM); + } + + /* + * If protocol_num in ip_sid are all ZEROs, then this SID is allocated + * by IBTF. If not, then the specified ip_sid is invalid. + */ + if (ip_sid & IB_SID_IPADDR_IPNUM_MASK) { + IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: " + "Called for Non-IBTF assigned RDMA IP SID", ip_sid); + return (IBT_INVALID_PARAM); + } + + ibcm_free_ip_sid(ip_sid); + + return (IBT_SUCCESS); +} + + +uint8_t +ibt_get_ip_protocol_num(ib_svc_id_t sid) +{ + return ((sid & IB_SID_IPADDR_IPNUM_MASK) >> 16); +} + +in_port_t +ibt_get_ip_dst_port(ib_svc_id_t sid) +{ + return (sid & IB_SID_IPADDR_PORTNUM_MASK); +} + +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibt_ip_cm_info_t)) +_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_ip_pvtdata_t)) + +ibt_status_t +ibt_format_ip_private_data(ibt_ip_cm_info_t *ip_cm_info, + ibt_priv_data_len_t priv_data_len, void *priv_data_p) +{ + ibcm_ip_pvtdata_t *ip_data; + + IBTF_DPRINTF_L4(cmlog, "ibt_format_ip_private_data(%p, %d, %p)", + ip_cm_info, priv_data_len, priv_data_p); + + if ((ip_cm_info == NULL) || (priv_data_p == NULL) || + (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) { + IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR " + "Invalid Inputs."); + return (IBT_INVALID_PARAM); + } + + /* bzero'ing just IP_HDR part */ + bzero(priv_data_p, IBT_IP_HDR_PRIV_DATA_SZ); + ip_data = (ibcm_ip_pvtdata_t *)priv_data_p; + ip_data->ip_srcport = b2h16(ip_cm_info->src_port); /* Source Port */ + + /* IPV = 0x4, if IP-Addr are IPv4 format, else 0x6 for IPv6 */ + if (ip_cm_info->src_addr.family == AF_INET) { + ip_data->ip_ipv = IBT_CM_IP_IPV_V4; + ip_data->ip_srcv4 = ntohl(ip_cm_info->src_addr.un.ip4addr); + ip_data->ip_dstv4 = ntohl(ip_cm_info->dst_addr.un.ip4addr); + } else if (ip_cm_info->src_addr.family == AF_INET6) { + ip_data->ip_ipv = IBT_CM_IP_IPV_V6; + bcopy(&ip_cm_info->src_addr.un.ip6addr, + &ip_data->ip_srcv6, sizeof (in6_addr_t)); + bcopy(&ip_cm_info->dst_addr.un.ip6addr, + &ip_data->ip_dstv6, sizeof (in6_addr_t)); + } else { + IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR " + "IP Addr needs to be either AF_INET or AF_INET6 family."); + return (IBT_INVALID_PARAM); + } + + ip_data->ip_MajV = IBT_CM_IP_MAJ_VER; + ip_data->ip_MinV = IBT_CM_IP_MIN_VER; + + return (IBT_SUCCESS); +} + + +ibt_status_t +ibt_get_ip_data(ibt_priv_data_len_t priv_data_len, void *priv_data, + ibt_ip_cm_info_t *ip_cm_infop) +{ + ibcm_ip_pvtdata_t *ip_data; + + IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_data(%d, %p, %p)", + priv_data_len, priv_data, ip_cm_infop); + + if ((ip_cm_infop == NULL) || (priv_data == NULL) || + (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR Invalid Inputs"); + return (IBT_INVALID_PARAM); + } + + bzero(ip_cm_infop, sizeof (ibt_ip_cm_info_t)); + + ip_data = (ibcm_ip_pvtdata_t *)priv_data; + ip_cm_infop->src_port = b2h16(ip_data->ip_srcport); /* Source Port */ + + /* IPV = 0x4, if IP Address are IPv4 format, else 0x6 for IPv6 */ + if (ip_data->ip_ipv == IBT_CM_IP_IPV_V4) { + /* Copy IPv4 Addr */ + ip_cm_infop->src_addr.family = AF_INET; + ip_cm_infop->src_addr.un.ip4addr = ntohl(ip_data->ip_srcv4); + ip_cm_infop->dst_addr.family = AF_INET; + ip_cm_infop->dst_addr.un.ip4addr = ntohl(ip_data->ip_dstv4); + } else if (ip_data->ip_ipv == IBT_CM_IP_IPV_V6) { + /* Copy IPv6 Addr */ + ip_cm_infop->src_addr.family = AF_INET6; + bcopy(&ip_data->ip_srcv6, &ip_cm_infop->src_addr.un.ip6addr, + sizeof (in6_addr_t)); + ip_cm_infop->dst_addr.family = AF_INET6; + bcopy(&ip_data->ip_dstv6, &ip_cm_infop->dst_addr.un.ip6addr, + sizeof (in6_addr_t)); + } else { + IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR: IP Addr needs" + " to be either AF_INET or AF_INET6 family."); + return (IBT_INVALID_PARAM); + } + + return (IBT_SUCCESS); +} + /* Routines for warlock */ diff --git a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_utils.c b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_utils.c index 14a2a149ae..e89a0fc7a7 100644 --- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_utils.c +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_utils.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -43,10 +42,11 @@ /* statics */ static vmem_t *ibcm_local_sid_arena; +static vmem_t *ibcm_ip_sid_arena; static ib_svc_id_t ibcm_local_sid_seed; static ib_com_id_t ibcm_local_cid_seed; _NOTE(READ_ONLY_DATA({ibcm_local_sid_arena ibcm_local_sid_seed - ibcm_local_cid_seed})) + ibcm_ip_sid_arena ibcm_local_cid_seed})) static void ibcm_delete_state_from_avl(ibcm_state_data_t *statep); static void ibcm_init_conn_trace(ibcm_state_data_t *statep); static void ibcm_fini_conn_trace(ibcm_state_data_t *statep); @@ -354,8 +354,8 @@ ibcm_delete_state_from_avl(ibcm_state_data_t *statep) avl_index_t p_where = 0; avl_index_t pcomid_where = 0; ibcm_hca_info_t *hcap; - ibcm_state_data_t *active_nodep, *passive_nodep, - *passive_comid_nodep; + ibcm_state_data_t *active_nodep, *passive_nodep; + ibcm_state_data_t *passive_comid_nodep; ibcm_passive_node_info_t info; ibcm_passive_comid_node_info_t info_comid; @@ -584,10 +584,10 @@ ibcm_find_sidr_entry(ibcm_sidr_srch_t *srch_param, ibcm_hca_info_t *hcap, if ((usp->ud_sidr_req_lid == srch_param->srch_lid) && ((srch_param->srch_gid.gid_prefix == 0) || (srch_param->srch_gid.gid_prefix == - usp->ud_sidr_req_gid.gid_prefix)) && + usp->ud_sidr_req_gid.gid_prefix)) && ((srch_param->srch_gid.gid_guid == 0) || (srch_param->srch_gid.gid_guid == - usp->ud_sidr_req_gid.gid_guid)) && + usp->ud_sidr_req_gid.gid_guid)) && (srch_param->srch_req_id == usp->ud_req_id) && (usp->ud_grh_exists == srch_param->srch_grh_exists) && (usp->ud_mode == srch_param->srch_mode)) { /* found match */ @@ -825,6 +825,7 @@ ibcm_init_ids(void) timespec_t tv; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_local_sid_arena)) + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ip_sid_arena)) _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_local_sid_seed)) _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_local_cid_seed)) @@ -835,14 +836,23 @@ ibcm_init_ids(void) if (!ibcm_local_sid_arena) return (IBCM_FAILURE); + ibcm_ip_sid_arena = vmem_create("ibcm_ip_sid", (void *)IBCM_INITIAL_SID, + IBCM_MAX_IP_SIDS, 1, NULL, NULL, NULL, 0, + VM_SLEEP | VMC_IDENTIFIER); + + if (!ibcm_ip_sid_arena) + return (IBCM_FAILURE); + /* create a random starting value for local service ids */ gethrestime(&tv); ibcm_local_sid_seed = ((uint64_t)tv.tv_sec << 20) & 0x007FFFFFFFF00000; ASSERT((ibcm_local_sid_seed & IB_SID_AGN_MASK) == 0); ibcm_local_sid_seed |= IB_SID_AGN_LOCAL; + ibcm_local_cid_seed = (ib_com_id_t)tv.tv_sec; _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_local_sid_arena)) _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_local_sid_seed)) + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ip_sid_arena)) _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_local_cid_seed)) return (IBCM_SUCCESS); @@ -894,6 +904,7 @@ ibcm_fini_ids(void) { /* All arenas shall be valid */ vmem_destroy(ibcm_local_sid_arena); + vmem_destroy(ibcm_ip_sid_arena); } /* @@ -1024,6 +1035,40 @@ ibcm_free_local_sids(ib_svc_id_t service_id, int num_sids) (void *)(uintptr_t)service_id, num_sids); } +/* + * ibcm_alloc_ip_sid: + * Allocate a local IP SID. + */ +ib_svc_id_t +ibcm_alloc_ip_sid() +{ + ib_svc_id_t sid; + + sid = (ib_svc_id_t)(uintptr_t)vmem_alloc(ibcm_ip_sid_arena, 1, + VM_SLEEP | VM_NEXTFIT); + if (sid == 0) { + IBTF_DPRINTF_L2(cmlog, "ibcm_alloc_ip_sid: no more RDMA IP " + "SIDs available"); + } else { + sid += IB_SID_IPADDR_PREFIX; + IBTF_DPRINTF_L4(cmlog, "ibcm_alloc_ip_sid: Success: RDMA IP SID" + " allocated : 0x%016llX", sid); + } + return (sid); +} + +/* + * ibcm_free_ip_sid: + * Releases the given IP Service ID + */ +void +ibcm_free_ip_sid(ib_svc_id_t sid) +{ + sid -= IB_SID_IPADDR_PREFIX; + vmem_free(ibcm_ip_sid_arena, (void *)(uintptr_t)sid, 1); +} + + /* Allocate and free request id routines for SIDR */ /* diff --git a/usr/src/uts/common/io/warlock/ibcm.wlcmd b/usr/src/uts/common/io/warlock/ibcm.wlcmd index 71002ea295..ae1004b1c4 100644 --- a/usr/src/uts/common/io/warlock/ibcm.wlcmd +++ b/usr/src/uts/common/io/warlock/ibcm.wlcmd @@ -18,7 +18,7 @@ # # CDDL HEADER END # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -53,6 +53,23 @@ root ibt_get_companion_port_gids root ibt_get_paths root ibt_get_alt_path root ibt_aget_paths +root ibt_get_ip_paths +root ibt_aget_ip_paths +root ibt_get_ip_alt_path +root ibt_format_ip_private_data +root ibt_get_ip_data +root ibt_get_ip_dst_port +root ibt_get_ip_protocol_num +root ibt_get_ip_sid +root ibt_release_ip_sid +root ibt_get_src_ip + +root ibcm_arp_timeout +root ibcm_arp_get_srcip_plist +root ibcm_arp_lrput +root ibcm_arp_lwsrv +root ibcm_arp_lrsrv +root ibcm_arp_get_ibd_insts_cb # callback entry points from ibmf root ibcm_recv_cb @@ -99,6 +116,7 @@ root ibcm_recv_task root ibcm_init_saa root ibcm_process_abort_via_taskq root ibcm_process_async_get_paths +root ibcm_process_get_ip_paths root ibcm_service_record_rewrite_task # kernel callbacks to ibcm diff --git a/usr/src/uts/common/sys/ib/clients/rds/rdsib_arp.h b/usr/src/uts/common/sys/ib/clients/rds/rdsib_arp.h deleted file mode 100644 index b102697d35..0000000000 --- a/usr/src/uts/common/sys/ib/clients/rds/rdsib_arp.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _RDSIB_ARP_H -#define _RDSIB_ARP_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/ib/ibtl/ibti.h> -#include <sys/ib/ibtl/ibtl_types.h> -#include <sys/ib/ib_pkt_hdrs.h> -#include <sys/modhash.h> -#include <sys/ib/clients/ibd/ibd.h> -#include <sys/strsun.h> -#include <sys/strsubr.h> -#include <sys/socket.h> -#include <sys/stat.h> /* for S_IFCHR */ -#include <inet/common.h> -#include <inet/ip.h> -#include <inet/ip_if.h> -#include <inet/ip_ire.h> -#include <inet/ip_rts.h> -#include <sys/dlpi.h> -#include <net/route.h> - -/* - * Place holder for ipv4 or ipv6 address - */ -typedef struct { - sa_family_t family; - union { - in_addr_t ip4addr; - in6_addr_t ip6addr; - } un; -} rds_ipx_addr_t; - -/* - * IPoIB addr lookup completion function - */ -typedef int (*rds_pr_comp_func_t) (void *usr_arg, int status); - -/* - * Path record cache node definition - */ -typedef struct rds_prcn { - rds_ipx_addr_t dst_addr; /* requested address */ - rds_ipx_addr_t src_addr; - rds_ipx_addr_t gateway; /* gateway to use */ - clock_t last_used_time; /* last used */ - uint32_t hw_port; /* source port */ - ibt_hca_hdl_t hca_hdl; /* hca handle */ - uint16_t pkey; - ibt_path_info_t path_info; - ibt_path_attr_t path_attr; - struct rds_prcn *next; - struct rds_prcn **p_next; -} rds_prcn_t; - -#define RDS_MAX_IFNAME_LEN 24 -#define RDS_MAX_IP6_RETRIES 6 - -#define RDS_ARP_XMIT_COUNT 6 -#define RDS_ARP_XMIT_INTERVAL 1000 /* timeout in milliseconds */ -#define RDS_ARP_TIMEOUT \ - ((RDS_ARP_XMIT_COUNT + 1) * RDS_ARP_XMIT_INTERVAL) -#define RDS_IP6_TIMEOUT 1000000 /* timeout in microseconds */ -#define RDS_PR_CACHE_REAPING_AGE 10 /* in seconds */ -#define RDS_PR_CACHE_REAPING_AGE_USECS (RDS_PR_CACHE_REAPING_AGE * 1000000) - -enum { - RDS_PR_RT_PENDING = 0x01, - RDS_PR_ARP_PENDING = 0x02 -}; - -typedef struct { - ib_guid_t hca_guid; - ibt_hca_hdl_t hca_hdl; - uint8_t nports; - int opened; -} rds_hca_info_t; - -/* - * Path record wait queue node definition - */ -typedef struct rds_prwqn { - rds_pr_comp_func_t func; /* user callback function */ - void *arg; /* callback function arg */ - timeout_id_t timeout_id; - uint8_t flags; - rds_ipx_addr_t usrc_addr; /* user supplied src address */ - rds_ipx_addr_t dst_addr; /* user supplied dest address */ - - rds_ipx_addr_t src_addr; /* rts's view of source address */ - char ifname[RDS_MAX_IFNAME_LEN]; - int ibd_instance; - uint16_t ifproto; - ipoib_mac_t src_mac; - ipoib_mac_t dst_mac; - uint32_t localroute; /* user option */ - uint32_t bound_dev_if; /* user option */ - ib_gid_t sgid; - ib_gid_t dgid; - uint8_t hw_port; - uint16_t pkey; - int retries; /* no. of ND retries for ipv6 */ -} rds_prwqn_t; - -typedef struct rds_streams_s { - kmutex_t lock; - kcondvar_t cv; - queue_t *arpqueue; - vnode_t *arp_vp; - int status; - rds_prwqn_t *wqnp; -} rds_streams_t; - -#define RDS_IPV4_ADDR(a) (a->un.ip4addr) -#define RDS_IPV6_ADDR(a) (a->un.ip6addr) -#define RDS_IS_V4_ADDR(a) ((a)->family == AF_INET) - -/* - * #define RDS_IS_V4_ADDR(a) ((a)->family == AF_RDS) - */ -#define RDS_IS_V6_ADDR(a) ((a)->family == AF_INET6) - -#define RDS_IOCTL ((('P' & 0xff) << 8) | (('R' & 0xff) << 16)) - -#define RDS_PR_LOOKUP (RDS_IOCTL + 1) -#define IB_HW_LEN 20 - -typedef struct { - int family; - union { - in_addr_t ip4addr; - in6_addr_t ip6addr; - } un; - - uint8_t hwaddr[IB_HW_LEN]; -} rds_prreq_t; - -#ifdef __cplusplus -} -#endif - -#endif /* _RDSIB_ARP_H */ diff --git a/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h b/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h index 4d1626bbf8..a2bf77aed0 100644 --- a/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h +++ b/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h @@ -304,6 +304,7 @@ typedef struct rds_session_s { krwlock_t session_remote_portmap_lock; uint8_t session_local_portmap[RDS_PORT_MAP_SIZE]; uint8_t session_remote_portmap[RDS_PORT_MAP_SIZE]; + ibt_path_info_t session_pinfo; } rds_session_t; /* defined in rds_ep.c */ diff --git a/usr/src/uts/common/sys/ib/clients/rds/rdsib_ib.h b/usr/src/uts/common/sys/ib/clients/rds/rdsib_ib.h index 86e00a828e..193fa66e66 100644 --- a/usr/src/uts/common/sys/ib/clients/rds/rdsib_ib.h +++ b/usr/src/uts/common/sys/ib/clients/rds/rdsib_ib.h @@ -182,6 +182,8 @@ typedef struct rds_state_s { uint_t rds_nhcas; rds_hca_t *rds_hcalistp; ibt_srv_hdl_t rds_srvhdl; + ibt_srv_hdl_t rds_old_srvhdl; + ib_svc_id_t rds_service_id; } rds_state_t; extern rds_state_t *rdsib_statep; /* global */ diff --git a/usr/src/uts/common/sys/ib/clients/rds/rdsib_protocol.h b/usr/src/uts/common/sys/ib/clients/rds/rdsib_protocol.h index febf055930..5f91741572 100644 --- a/usr/src/uts/common/sys/ib/clients/rds/rdsib_protocol.h +++ b/usr/src/uts/common/sys/ib/clients/rds/rdsib_protocol.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -83,7 +83,7 @@ extern "C" { #include <netinet/in.h> -#define RDS_VERSION 2 +#define RDS_VERSION 3 /* * RDS Well known service id @@ -146,6 +146,7 @@ extern "C" { * same size on the both active and passive nodes. */ typedef struct rds_cm_private_data_s { + uint8_t cmp_ip_pvt[IBT_IP_HDR_PRIV_DATA_SZ]; uint8_t cmp_version; uint8_t cmp_arch; uint8_t cmp_eptype; diff --git a/usr/src/uts/common/sys/ib/ib_types.h b/usr/src/uts/common/sys/ib/ib_types.h index 8ebacce3e6..30d00c198a 100644 --- a/usr/src/uts/common/sys/ib/ib_types.h +++ b/usr/src/uts/common/sys/ib/ib_types.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -199,6 +198,11 @@ typedef uint8_t ib_time_t; /* 6 bits of timeout exponent */ #define IB_SID_AGN_IETF 0x0100000000000000 #define IB_SID_AGN_LOCAL 0x0200000000000000 +#define IB_SID_IPADDR_PREFIX 0x0000000001000000 /* Byte 4 */ +#define IB_SID_IPADDR_PREFIX_MASK 0xFFFFFFFFFE000000 +#define IB_SID_IPADDR_IPNUM_MASK 0x0000000000FF0000 /* Byte 5 */ +#define IB_SID_IPADDR_PORTNUM_MASK 0x000000000000FFFF /* Byte 6 & 7 */ + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/ib/ibtl/ibti_cm.h b/usr/src/uts/common/sys/ib/ibtl/ibti_cm.h index 3b46e392eb..fe7cfaf062 100644 --- a/usr/src/uts/common/sys/ib/ibtl/ibti_cm.h +++ b/usr/src/uts/common/sys/ib/ibtl/ibti_cm.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -55,6 +54,7 @@ extern "C" { #define IBT_REQ_PRIV_DATA_SZ 92 #define IBT_SIDR_REQ_PRIV_DATA_SZ 216 #define IBT_SIDR_REP_PRIV_DATA_SZ 136 +#define IBT_IP_HDR_PRIV_DATA_SZ 36 #define IBT_CM_ADDL_REJ_LEN 72 /* Additional Rej Info len */ /* This is the max consumer addl */ @@ -68,7 +68,30 @@ typedef uint8_t ibt_priv_data_len_t; * CM channel handler reject reasons. * * Refer to InfiniBand Architecture Release Volume 1 Rev 1.0a: - * Section 12.6.7.2 Rejection Reason + * Section 12.6.7.2 Rejection Reason, and RDMA IP CM Service Annex + * + * Note: + * When a REJ happens for an RDMA-aware ULP, a consumer reject code + * indicating an IP CM Service reject or a RDMA-Aware ULP reject can + * be returned. In the IBTA spec both use the consumer reject code, but + * are distinguished by the REJ layer byte (table 3 of the annex 11). + * The IBTF CM can thus tell what type of reject has been returned. When + * a RDMA ULP issues a consumer REJ to an RDMA REQ then the CM will + * return an IBT_CM_CONSUMER ibt_cm_reason_t. The ARI data is returned + * in an ibt_ari_con_t struct accessed by the 'ari_consumer' member of + * the ibt_arej_info_t. However the consumer reject data begins at + * 'ari_consumer.rej_ari[1]', and is of length + * 'ari_consumer.rej_ari_len - 1' (the first byte is the REJ layer byte), + * where as for a non RDMA-aware ULP consumer REJ, the ARI data begins + * at 'ari_consumer.rej_ari[0]' and is of length 'ari_consumer.rej_ari_len' + * + * If an RDMA-aware ULP REQ is rejected by the IP CM Service layer, the + * CM will return the new IBT_CM_RDMA_IP ibt_cm_reason_t, and the + * private data is returned in an ibt_ari_ip_t struct accessed via the + * 'ari_ip' member of the ibt_arej_info_t struct. + * + * If an RDMA IP CM REQ is sent to a non RDMA-aware ULP consumer, then + * the REQ is Rejected with an IBT_CM_INVALID_SID ibt_cm_reason_t. */ typedef enum ibt_cm_reason_e { IBT_CM_SUCCESS = 0, /* Success */ @@ -118,9 +141,9 @@ typedef enum ibt_cm_reason_e { IBT_CM_ABORT = 1001, /* Connection aborted */ IBT_CM_CI_FAILURE = 1002, /* A call to CI failed, could be */ /* query/modify channel */ - IBT_CM_CHAN_INVALID_STATE = 1003 /* Passive's QP is not in Init */ + IBT_CM_CHAN_INVALID_STATE = 1003, /* Passive's QP is not in Init */ /* state */ - + IBT_CM_RDMA_IP = 1004 /* RDMA IP CM reject */ } ibt_cm_reason_t; /* @@ -269,6 +292,11 @@ typedef struct ibt_cm_lap_rcv_s { /* return to the CM */ } ibt_cm_lap_rcv_t; +#define IBT_CM_IP_MAJ_VER 0 +#define IBT_CM_IP_MIN_VER 0 +#define IBT_CM_IP_IPV_V4 0x4 +#define IBT_CM_IP_IPV_V6 0x6 + /* * Consumer defined Additional reject information. */ @@ -278,6 +306,30 @@ typedef struct ibt_ari_con_s { } ibt_ari_con_t; /* + * Consumer defined Additional reject information. + * For RDMA IP CM Service. + */ +typedef uint8_t ibt_ari_ip_reason_t; +#define IBT_ARI_IP_UNSPECIFIED 0x0 +#define IBT_ARI_IP_MAJOR_VERSION 0x1 +#define IBT_ARI_IP_MINOR_VERSION 0x2 +#define IBT_ARI_IP_IPV 0x3 +#define IBT_ARI_IP_SRC_ADDR 0x4 +#define IBT_ARI_IP_DST_ADDR 0x5 +#define IBT_ARI_IP_UNKNOWN_ADDR 0x6 + +typedef struct ibt_ari_ip_s { + ibt_ip_addr_t ip_suggested_addr; /* IP_UNKNOWN_ADDR */ + boolean_t ip_suggested; /* suggested valid */ + ibt_ari_ip_reason_t ip_reason; + uint8_t ip_suggested_version:4; /* IP_MAJOR_VERSION */ + /* IP_MINOR_VERSION */ + /* IP_IPV, */ + /* IP_SRC_ADDR, */ + /* IP_DST_ADDR */ +} ibt_ari_ip_t; + +/* * Additional reject information. */ typedef union ibt_arej_info_u { @@ -299,6 +351,7 @@ typedef union ibt_arej_info_u { /* IBT_CM_INVALID_ALT_RATE */ ib_mtu_t ari_mtu; /* IBT_CM_INVALID_MTU */ ibt_redirect_info_t ari_redirect; /* IBT_CM_REDIRECT_CM */ + ibt_ari_ip_t ari_ip; /* IBT_CM_RDMA_IP */ } ibt_arej_info_t; /* diff --git a/usr/src/uts/common/sys/ib/ibtl/ibti_common.h b/usr/src/uts/common/sys/ib/ibtl/ibti_common.h index 86a1fd0e8f..6df7deefbe 100644 --- a/usr/src/uts/common/sys/ib/ibtl/ibti_common.h +++ b/usr/src/uts/common/sys/ib/ibtl/ibti_common.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1615,6 +1614,230 @@ ibt_failure_type_t ibt_check_failure(ibt_status_t status, uint64_t *reserved_p); */ int ibt_hw_is_present(); +/* + * Fast Memory Registration (FMR). + * + * ibt_create_fmr_pool + * Not fast-path. + * ibt_create_fmr_pool() verifies that the HCA supports FMR and allocates + * and initializes an "FMR pool". This pool contains state specific to + * this registration, including the watermark setting to determine when + * to sync, and the total number of FMR regions available within this pool. + * + * ibt_destroy_fmr_pool + * ibt_destroy_fmr_pool() deallocates all of the FMR regions in a specific + * pool. All state and information regarding the pool are destroyed and + * returned as free space once again. No more use of FMR regions in this + * pool are possible without a subsequent call to ibt_create_fmr_pool(). + * + * ibt_flush_fmr_pool + * ibt_flush_fmr_pool forces a flush to occur. At the client's request, + * any unmapped FMR regions (See 'ibt_deregister_mr())') are returned to + * a free state. This function allows for an asynchronous cleanup of + * formerly used FMR regions. Sync operation is also performed internally + * by HCA driver, when 'watermark' settings for the number of free FMR + * regions left in the "pool" is reached. + * + * ibt_register_physical_fmr + * ibt_register_physical_fmr() assigns a "free" entry from the FMR Pool. + * It first consults the "FMR cache" to see if this is a duplicate memory + * registration to something already in use. If not, then a free entry + * in the "pool" is marked used. + * + * ibt_deregister_fmr + * The ibt_deregister_fmr un-maps the resources reserved from the FMR + * pool by ibt_register_physical_fmr(). The ibt_deregister_fmr() will + * mark the region as free in the FMR Pool. + */ +ibt_status_t ibt_create_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_pd_hdl_t pd, + ibt_fmr_pool_attr_t *fmr_params, ibt_fmr_pool_hdl_t *fmr_pool_p); + +ibt_status_t ibt_destroy_fmr_pool(ibt_hca_hdl_t hca_hdl, + ibt_fmr_pool_hdl_t fmr_pool); + +ibt_status_t ibt_flush_fmr_pool(ibt_hca_hdl_t hca_hdl, + ibt_fmr_pool_hdl_t fmr_pool); + +ibt_status_t ibt_register_physical_fmr(ibt_hca_hdl_t hca_hdl, + ibt_fmr_pool_hdl_t fmr_pool, ibt_pmr_attr_t *mem_pattr, + ibt_mr_hdl_t *mr_hdl_p, ibt_pmr_desc_t *mem_desc_p); + +ibt_status_t ibt_deregister_fmr(ibt_hca_hdl_t hca, ibt_mr_hdl_t mr_hdl); + +/* + * IP SUPPORT + */ + +/* + * IP get_paths + * Returns an array (or single) of paths and source IP addresses. In the + * simplest form just the destination IP address is specified, and one path + * is requested, then one ibt_path_info_t struct and one source IP. + * + * More than one path can be requested to a single destination, in which case + * the requested number of ibt_path_info_t's are returned, and the same + * number of SRC IP address, with the first SRC IP address corrosponding + * to the first ibt_path_info_t, etc. + * + * Restrictions on the source end point can be specified, in the form of a + * source IP address (this implicitly defines the HCA, HCA port and Pkey) + * HCA, HCA port, and sgid (implicitly defines HCA and HCA port). + * Combinations are allowed but they must be consistent. + * + * Path attributes can also be specified, these can also affect local HCA + * selection. + * + * ibt_get_ip_paths() internally does (among other things): + * + * o ibt_get_list_of_ibd_ipaddr_and_macaddr( OUT list_ipaddr_macaddr) + * + * o extract_pkey_and_sgid(IN list_ipaddr_macaddr, OUT list_pkey_and_sgid) + * + * o map_dst_ip_addr(IN dst_ip_addr, OUT dst_pkey, OUT dgid) - See Note + * + * o filter_by_pkey(IN list_pkey_and_sgid, IN dst_pkey, OUT list_of_sgid) + * + * o do_multipath_query(IN list_of_sgid, IN dst_pkey, IN dgid, OUT path_list) + * + * o pick_a_good_path(IN path_list, OUT the_path) + * + * o find_matching_src_ip(IN the_path, IN list_ipaddr_macaddr, OUT src_ip) + * + * The ibd instance which got the ARP response is only on one P_Key + * knowing the ibd instance (or which IPonIB MCG) got the ARP response + * determins the P_Key associated with a dgid. If the proposedi "ip2mac()" + * API is used to get an IP to GID translations, then returned 'sockaddr_dl' + * contains the interface name and index. + * + * + * Example: + * ip_path_attr.ipa_dst_ip = dst_ip_addr; + * ip_path_attr.ipa_ndst = 1; + * ip_path_attr.ipa_max_paths = 1; + * + * status = ibt_get_ip_paths(clnt_hdl, flags, &ip_path_attr, &paths, + * &num_paths_p, &src_ip); + * + * sid = ibt_get_ip_sid(protocol_num, dst_port); + * path_info->sid = sid; + * + * ip_cm_info.src_addr = src_ip; + * ip_cm_info.dst_addr = dst_ip_addr; + * ip_cm_info.src_port = src_port + * + * ibt_format_ip_private_data(ip_cm_info, priv_data_len, &priv_data); + * ibt_open_rc_channel(chan, private_data); + */ +typedef struct ibt_ip_path_attr_s { + ibt_ip_addr_t *ipa_dst_ip; /* Required */ + ibt_ip_addr_t ipa_src_ip; /* Optional */ + ib_guid_t ipa_hca_guid; /* Optional */ + uint8_t ipa_hca_port_num; /* Optional */ + uint8_t ipa_max_paths; /* Required */ + uint8_t ipa_ndst; /* Required */ + uint8_t ipa_sl:4; /* Optional */ + ibt_mtu_req_t ipa_mtu; /* Optional */ + ibt_srate_req_t ipa_srate; /* Optional */ + ibt_pkt_lt_req_t ipa_pkt_lt; /* Optional */ + uint_t ipa_flow:20; /* Optional */ + uint8_t ipa_hop; /* Optional */ + uint8_t ipa_tclass; /* Optional */ +} ibt_ip_path_attr_t; + +/* + * Path SRC IP addresses + */ +typedef struct ibt_path_ip_src_s { + ibt_ip_addr_t ip_primary; + ibt_ip_addr_t ip_alternate; +} ibt_path_ip_src_t; + + +ibt_status_t ibt_get_ip_paths(ibt_clnt_hdl_t ibt_hdl, ibt_path_flags_t flags, + ibt_ip_path_attr_t *attr, ibt_path_info_t *paths_p, uint8_t *num_paths_p, + ibt_path_ip_src_t *src_ip_p); + +ibt_status_t ibt_get_src_ip(ib_gid_t gid, ib_pkey_t pkey, + ibt_ip_addr_t *src_ip); + +/* + * Callback function that can be used in ibt_aget_ip_paths(), a Non-Blocking + * version of ibt_get_ip_paths(). + */ +typedef void (*ibt_ip_path_handler_t)(void *arg, ibt_status_t retval, + ibt_path_info_t *paths_p, uint8_t num_paths, ibt_path_ip_src_t *src_ip_p); + +/* + * Find path(s) to a given destination or service asynchronously. + * ibt_aget_ip_paths() is a Non-Blocking version of ibt_get_ip_paths(). + */ +ibt_status_t ibt_aget_ip_paths(ibt_clnt_hdl_t ibt_hdl, ibt_path_flags_t flags, + ibt_ip_path_attr_t *attr, ibt_ip_path_handler_t func, void *arg); + +/* + * IP RDMA protocol functions + */ + +/* + * IBTF manages the port number space for non well known ports. If a ULP + * is not using TCP/UDP and a well known port, then ibt_get_ip_sid() returns + * an sid based on the IP protocol number '0' (reserved) and an IBTF assigned + * port number. ibt_release_ip_sid() should be used to release the hold + * of SID created by ibt_get_ip_sid(). + */ +ib_svc_id_t ibt_get_ip_sid(uint8_t protocol_num, in_port_t dst_port); +ibt_status_t ibt_release_ip_sid(ib_svc_id_t sid); + +uint8_t ibt_get_ip_protocol_num(ib_svc_id_t sid); +in_port_t ibt_get_ip_dst_port(ib_svc_id_t sid); + +/* + * Functions to format/extract the RDMA IP CM private data + */ +typedef struct ibt_ip_cm_info_s { + ibt_ip_addr_t src_addr; + ibt_ip_addr_t dst_addr; + in_port_t src_port; +} ibt_ip_cm_info_t; + +/* + * If a ULP is using IP addressing as defined by the RDMA IP CM Service IBTA + * Annex 11, then it must always allocate a private data buffer for use in + * the ibt_open_rc_channel(9F) call. The minimum size of the buffer is + * IBT_IP_HDR_PRIV_DATA_SZ, if the ULP has no ULP specific private data. + * This allows ibt_format_ip_private_data() to place the RDMA IP CM service + * hello message in the private data of the REQ. If the ULP has some ULP + * specific private data then it should allocate a buffer big enough to + * contain that data plus an additional IBT_IP_HDR_PRIV_DATA_SZ bytes. + * The ULP should place its ULP specific private data at offset + * IBT_IP_HDR_PRIV_DATA_SZ in the allocated buffer before calling + * ibt_format_ip_private_data(). + */ +ibt_status_t ibt_format_ip_private_data(ibt_ip_cm_info_t *ip_cm_info, + ibt_priv_data_len_t priv_data_len, void *priv_data_p); +ibt_status_t ibt_get_ip_data(ibt_priv_data_len_t priv_data_len, + void *priv_data, ibt_ip_cm_info_t *ip_info_p); + +/* + * The ibt_alt_ip_path_attr_t structure is used to specify additional optional + * attributes when requesting an alternate path for an existing channel. + * + * Attributes that are don't care should be set to NULL or '0'. + */ +typedef struct ibt_alt_ip_path_attr_s { + ibt_ip_addr_t apa_dst_ip; + ibt_ip_addr_t apa_src_ip; + ibt_srate_req_t apa_srate; + ibt_pkt_lt_req_t apa_pkt_lt; /* Packet Life Time Request */ + uint_t apa_flow:20; + uint8_t apa_sl:4; + uint8_t apa_hop; + uint8_t apa_tclass; +} ibt_alt_ip_path_attr_t; + +ibt_status_t ibt_get_ip_alt_path(ibt_channel_hdl_t rc_chan, + ibt_path_flags_t flags, ibt_alt_ip_path_attr_t *attr, + ibt_alt_path_info_t *alt_path); /* * CONTRACT PRIVATE ONLY INTERFACES @@ -1680,57 +1903,6 @@ ibt_status_t ibt_get_port_state(ibt_hca_hdl_t hca_hdl, uint8_t port, ibt_status_t ibt_get_port_state_byguid(ib_guid_t hca_guid, uint8_t port, ib_gid_t *sgid_p, ib_lid_t *base_lid_p); - -/* - * Fast Memory Registration (FMR). - * - * ibt_create_fmr_pool - * Not fast-path. - * ibt_create_fmr_pool() verifies that the HCA supports FMR and allocates - * and initializes an "FMR pool". This pool contains state specific to - * this registration, including the watermark setting to determine when - * to sync, and the total number of FMR regions available within this pool. - * - * ibt_destroy_fmr_pool - * ibt_destroy_fmr_pool() deallocates all of the FMR regions in a specific - * pool. All state and information regarding the pool are destroyed and - * returned as free space once again. No more use of FMR regions in this - * pool are possible without a subsequent call to ibt_create_fmr_pool(). - * - * ibt_flush_fmr_pool - * ibt_flush_fmr_pool forces a flush to occur. At the client's request, - * any unmapped FMR regions (See 'ibt_deregister_mr())') are returned to - * a free state. This function allows for an asynchronous cleanup of - * formerly used FMR regions. Sync operation is also performed internally - * by HCA driver, when 'watermark' settings for the number of free FMR - * regions left in the "pool" is reached. - * - * ibt_register_physical_fmr - * ibt_register_physical_fmr() assigns a "free" entry from the FMR Pool. - * It first consults the "FMR cache" to see if this is a duplicate memory - * registration to something already in use. If not, then a free entry - * in the "pool" is marked used. - * - * ibt_deregister_fmr - * The ibt_deregister_fmr un-maps the resources reserved from the FMR - * pool by ibt_register_physical_fmr(). The ibt_deregister_fmr() will - * mark the region as free in the FMR Pool. - */ -ibt_status_t ibt_create_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_pd_hdl_t pd, - ibt_fmr_pool_attr_t *fmr_params, ibt_fmr_pool_hdl_t *fmr_pool_p); - -ibt_status_t ibt_destroy_fmr_pool(ibt_hca_hdl_t hca_hdl, - ibt_fmr_pool_hdl_t fmr_pool); - -ibt_status_t ibt_flush_fmr_pool(ibt_hca_hdl_t hca_hdl, - ibt_fmr_pool_hdl_t fmr_pool); - -ibt_status_t ibt_register_physical_fmr(ibt_hca_hdl_t hca_hdl, - ibt_fmr_pool_hdl_t fmr_pool, ibt_pmr_attr_t *mem_pattr, - ibt_mr_hdl_t *mr_hdl_p, ibt_pmr_desc_t *mem_desc_p); - -ibt_status_t ibt_deregister_fmr(ibt_hca_hdl_t hca, ibt_mr_hdl_t mr_hdl); - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h b/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h index 41c7396254..fe7ba92c11 100644 --- a/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h +++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -123,6 +122,8 @@ typedef enum ibt_status_e { /* what should be a unique */ /* query result. One of the */ /* records was returned. */ + IBT_DEST_IP_GID_NOT_FOUND = 25, /* No IP to GID Mapping */ + IBT_SRC_IP_NOT_FOUND = 26, /* SRC IP Endpoint not found */ /* * Resource Errors diff --git a/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h b/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h index 0dd90c5c67..0e786a5ae4 100644 --- a/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h +++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h @@ -39,6 +39,8 @@ #include <sys/sunddi.h> #include <sys/ib/ib_types.h> #include <sys/ib/ibtl/ibtl_status.h> +#include <sys/socket.h> + #ifdef __cplusplus extern "C" { @@ -1340,7 +1342,18 @@ typedef enum ibt_failure_type_e { IBT_FAILURE_IBSM } ibt_failure_type_t; -#ifdef __cplusplus +/* + * RDMA IP CM service Annex definitions + */ +typedef struct ibt_ip_addr_s { + sa_family_t family; /* AF_INET or AF_INET6 */ + union { + in_addr_t ip4addr; + in6_addr_t ip6addr; + } un; +} ibt_ip_addr_t; + +#ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_cm.h b/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_cm.h index ff8fa14b45..40d4b98602 100644 --- a/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_cm.h +++ b/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_cm.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -203,11 +202,11 @@ typedef struct ibtl_cm_port_list_s { uint8_t p_count; uint8_t p_multi; void *p_saa_hdl; + ibt_ip_addr_t p_src_ip; } ibtl_cm_port_list_t; ibt_status_t ibtl_cm_get_active_plist(ibt_path_attr_t *attr, ibt_path_flags_t flags, ibtl_cm_port_list_t **port_list_p); - void ibtl_cm_free_active_plist(ibtl_cm_port_list_t *port_list); /* diff --git a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h new file mode 100644 index 0000000000..cc1b85bd7b --- /dev/null +++ b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_IB_MGT_IBCM_IBCM_ARP_H +#define _SYS_IB_MGT_IBCM_IBCM_ARP_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + + +#include <sys/ib/mgt/ibcm/ibcm_impl.h> +#include <sys/modhash.h> +#include <sys/ib/clients/ibd/ibd.h> +#include <sys/strsun.h> +#include <sys/strsubr.h> +#include <sys/socket.h> +#include <sys/stat.h> /* for S_IFCHR */ +#include <inet/common.h> +#include <inet/ip.h> +#include <inet/ip_if.h> +#include <inet/ip_ire.h> +#include <inet/ip_rts.h> +#include <sys/dlpi.h> +#include <net/route.h> + +/* + * IPoIB addr lookup completion function + */ +typedef int (*ibcm_arp_pr_comp_func_t) (void *usr_arg, int status); + +#define IBCM_ARP_MAX_IFNAME_LEN 24 +#define IBCM_ARP_RTM_LEN 0x158 +#define IBCM_ARP_XMIT_COUNT 6 +#define IBCM_ARP_XMIT_INTERVAL 1000 /* timeout in milliseconds */ +#define IBCM_ARP_TIMEOUT \ + ((IBCM_ARP_XMIT_COUNT + 1) * IBCM_ARP_XMIT_INTERVAL) +#define IBCM_ARP_IP6_TIMEOUT 1000000 /* timeout in microseconds */ + +enum { + IBCM_ARP_PR_RT_PENDING = 0x01, + IBCM_ARP_PR_ARP_PENDING = 0x02 +}; + +/* + * Path record wait queue node definition + */ +typedef struct ibcm_arp_prwqn { + ibcm_arp_pr_comp_func_t func; /* user callback function */ + void *arg; /* callback function arg */ + timeout_id_t timeout_id; + uint8_t flags; + ibt_ip_addr_t usrc_addr; /* user supplied src address */ + ibt_ip_addr_t dst_addr; /* user supplied dest address */ + ibt_ip_addr_t src_addr; /* rts's view of src address */ + char ifname[IBCM_ARP_MAX_IFNAME_LEN]; + int ibd_instance; + uint16_t ifproto; + ipoib_mac_t src_mac; + ipoib_mac_t dst_mac; + uint32_t localroute; /* user option */ + uint32_t bound_dev_if; /* user option */ + ib_gid_t sgid; + ib_gid_t dgid; + uint8_t hw_port; + uint16_t pkey; + int retries; /* no. of ND retries for ipv6 */ +} ibcm_arp_prwqn_t; + +typedef struct ibcm_arp_streams_s { + kmutex_t lock; + kcondvar_t cv; + queue_t *arpqueue; + vnode_t *arp_vp; + int status; + ibcm_arp_prwqn_t *wqnp; +} ibcm_arp_streams_t; + +/* GID to IP-Addr and Ip-Addr to GID look-up functions. */ + +#define IBCM_ARP_IBD_NAME "ibd" +#define IBCM_ARP_IBD_INSTANCES 4 + +typedef struct ibcm_arp_ip_s { + uint8_t ip_inst; + ib_pkey_t ip_pkey; + ib_guid_t ip_hca_guid; + ib_gid_t ip_port_gid; + sa_family_t ip_inet_family; + union { + struct sockaddr_in ip_sockaddr; + struct sockaddr_in6 ip_sockaddr6; + } ip_sin; +#define ip_cm_sin ip_sin.ip_sockaddr +#define ip_cm_sin6 ip_sin.ip_sockaddr6 +} ibcm_arp_ip_t; + +typedef struct ibcm_arp_ibd_insts_s { + uint8_t ibcm_arp_ibd_alloc; + uint8_t ibcm_arp_ibd_cnt; + ibcm_arp_ip_t *ibcm_arp_ip; +} ibcm_arp_ibd_insts_t; + +ibt_status_t ibcm_arp_get_ibaddr(ipaddr_t srcip, ipaddr_t destip, + ib_gid_t *sgid, ib_gid_t *dgid); +ibt_status_t ibcm_arp_get_srcip_plist(ibt_ip_path_attr_t *attr, + ibt_path_flags_t flags, ibtl_cm_port_list_t **list_p); +ibt_status_t ibcm_arp_get_ibds(ibcm_arp_ibd_insts_t *ibdp); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_IB_MGT_IBCM_IBCM_ARP_H */ diff --git a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h index 0ac8454fcb..832d457569 100644 --- a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h +++ b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -99,6 +99,7 @@ #include <sys/ib/ibtl/impl/ibtl_util.h> #include <sys/ib/mgt/ibmf/ibmf.h> #include <sys/ib/mgt/ibcm/ibcm_trace.h> +#include <inet/ip.h> #ifdef __cplusplus extern "C" { @@ -225,6 +226,7 @@ typedef enum ibcm_event_type_e { #define IBCM_MAX_COMIDS (0x01000000 - 2) #define IBCM_MAX_REQIDS 0xFFFFFFFF #define IBCM_MAX_LOCAL_SIDS 0xFFFFFFFF +#define IBCM_MAX_IP_SIDS 0xFFFF typedef uint32_t ib_com_id_t; /* CM Communication ID */ @@ -1878,6 +1880,9 @@ void ibcm_free_reqid(ibcm_hca_info_t *hcap, uint32_t reqid); ib_svc_id_t ibcm_alloc_local_sids(int num_sids); void ibcm_free_local_sids(ib_svc_id_t service_id, int num_sids); +ib_svc_id_t ibcm_alloc_ip_sid(); +void ibcm_free_ip_sid(ib_svc_id_t sid); + uint64_t ibcm_generate_tranid(uint8_t event, uint32_t id, uint32_t cm_tran_priv); @@ -2106,6 +2111,38 @@ _NOTE(READ_ONLY_DATA(ibcm_local_processing_time ibcm_remote_response_time #define IBCM_CPINFO_CAP_UC 0x1000 /* UC supported */ #define IBCM_CPINFO_CAP_SIDR 0x2000 /* SIDR supported */ +#define IBCM_V4_PART_OF_V6(v6) v6.s6_addr32[3] +/* RDMA CM IP Service's Private Data Format. */ +#ifdef _BIG_ENDIAN +typedef struct ibcm_ip_pvtdata_s { + uint8_t ip_MajV:4, + ip_MinV:4; + uint8_t ip_ipv:4, + ip_rsvd:4; /* 0-3: rsvd, 4-7: ipv */ + uint16_t ip_srcport; /* Source Port */ + in6_addr_t ip_srcip; /* Source IP address. */ + in6_addr_t ip_dstip; /* Remote IP address. */ +#define ip_srcv4 IBCM_V4_PART_OF_V6(ip_srcip) +#define ip_dstv4 IBCM_V4_PART_OF_V6(ip_dstip) +#define ip_srcv6 ip_srcip +#define ip_dstv6 ip_dstip +} ibcm_ip_pvtdata_t; +#else +typedef struct ibcm_ip_pvtdata_s { + uint8_t ip_MinV:4, + ip_MajV:4; + uint8_t ip_rsvd:4, + ip_ipv:4; /* 0-3: rsvd, 4-7: ipv */ + uint16_t ip_srcport; /* Source Port */ + in6_addr_t ip_srcip; /* Source IP address. */ + in6_addr_t ip_dstip; /* Remote IP address. */ +#define ip_srcv4 IBCM_V4_PART_OF_V6(ip_srcip) +#define ip_dstv4 IBCM_V4_PART_OF_V6(ip_dstip) +#define ip_srcv6 ip_srcip +#define ip_dstv6 ip_dstip +} ibcm_ip_pvtdata_t; +#endif + /* * for debug purposes */ diff --git a/usr/src/uts/intel/ibcm/Makefile b/usr/src/uts/intel/ibcm/Makefile index e84ff874d5..013cd7637d 100644 --- a/usr/src/uts/intel/ibcm/Makefile +++ b/usr/src/uts/intel/ibcm/Makefile @@ -70,7 +70,7 @@ CFLAGS += $(CCVERBOSE) # # depends on misc/ibtl and misc/ibmf # -LDFLAGS += -dy -Nmisc/ibtl -Nmisc/ibmf +LDFLAGS += -dy -Nmisc/ibtl -Nmisc/ibmf -Ndrv/ip # # For now, disable these lint checks; maintainers should endeavor diff --git a/usr/src/uts/sparc/ibcm/Makefile b/usr/src/uts/sparc/ibcm/Makefile index 615e08985c..62e2fb4a3a 100644 --- a/usr/src/uts/sparc/ibcm/Makefile +++ b/usr/src/uts/sparc/ibcm/Makefile @@ -72,7 +72,7 @@ CFLAGS += $(CCVERBOSE) # # depends on misc/ibtl and misc/ibmf # -LDFLAGS += -dy -Nmisc/ibtl -Nmisc/ibmf +LDFLAGS += -dy -Nmisc/ibtl -Nmisc/ibmf -Ndrv/ip # # For now, disable these lint checks; maintainers should endeavor |