summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/rpc/rpcib.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/rpc/rpcib.c')
-rw-r--r--usr/src/uts/common/rpc/rpcib.c392
1 files changed, 141 insertions, 251 deletions
diff --git a/usr/src/uts/common/rpc/rpcib.c b/usr/src/uts/common/rpc/rpcib.c
index d0edb2e8f0..aba7803131 100644
--- a/usr/src/uts/common/rpc/rpcib.c
+++ b/usr/src/uts/common/rpc/rpcib.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -56,7 +56,6 @@
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/debug.h>
-#include <sys/systm.h>
#include <sys/pathname.h>
#include <sys/kstat.h>
#include <sys/t_lock.h>
@@ -67,47 +66,43 @@
#include <sys/callb.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
-#include <sys/sunldi.h>
#include <sys/sdt.h>
-#include <sys/dlpi.h>
#include <sys/ib/ibtl/ibti.h>
#include <rpc/rpc.h>
#include <rpc/ib.h>
-
#include <sys/modctl.h>
-
-#include <sys/pathname.h>
#include <sys/kstr.h>
#include <sys/sockio.h>
#include <sys/vnode.h>
#include <sys/tiuser.h>
#include <net/if.h>
+#include <net/if_types.h>
#include <sys/cred.h>
#include <rpc/rpc_rdma.h>
-
#include <nfs/nfs.h>
-#include <sys/kstat.h>
#include <sys/atomic.h>
#define NFS_RDMA_PORT 2050
-extern char *inet_ntop(int, const void *, char *, int);
-
+/*
+ * Convenience structure used by rpcib_get_ib_addresses()
+ */
+typedef struct rpcib_ipaddrs {
+ void *ri_list; /* pointer to list of addresses */
+ uint_t ri_count; /* number of addresses in list */
+ uint_t ri_size; /* size of ri_list in bytes */
+} rpcib_ipaddrs_t;
/*
* Prototype declarations for driver ops
*/
-
static int rpcib_attach(dev_info_t *, ddi_attach_cmd_t);
static int rpcib_getinfo(dev_info_t *, ddi_info_cmd_t,
void *, void **);
static int rpcib_detach(dev_info_t *, ddi_detach_cmd_t);
-static int rpcib_is_ib_interface(char *);
-static int rpcib_dl_info(ldi_handle_t, dl_info_ack_t *);
-static int rpcib_do_ip_ioctl(int, int, caddr_t);
-static boolean_t rpcib_get_ib_addresses(struct sockaddr_in *,
- struct sockaddr_in6 *, uint_t *, uint_t *);
-static uint_t rpcib_get_number_interfaces(void);
+static boolean_t rpcib_rdma_capable_interface(struct lifreq *);
+static int rpcib_do_ip_ioctl(int, int, void *);
+static boolean_t rpcib_get_ib_addresses(rpcib_ipaddrs_t *, rpcib_ipaddrs_t *);
static int rpcib_cache_kstat_update(kstat_t *, int);
static void rib_force_cleanup(void *);
@@ -147,9 +142,6 @@ static struct cb_ops rpcib_cbops = {
nodev /* int (*cb_awrite)() */
};
-
-
-
/*
* Device options
*/
@@ -205,8 +197,7 @@ typedef struct cache_struct {
avl_node_t avl_link;
} cache_avl_struct_t;
-
-static uint64_t rib_total_buffers = 0;
+static uint64_t rib_total_buffers = 0;
uint64_t cache_limit = 100 * 1024 * 1024;
static volatile uint64_t cache_allocation = 0;
static uint64_t cache_watermark = 80 * 1024 * 1024;
@@ -409,12 +400,10 @@ rpcib_t rpcib;
*/
int rib_debug = 0;
-
int
_init(void)
{
- int error;
- int ret;
+ int error;
error = mod_install((struct modlinkage *)&rib_modlinkage);
if (error != 0) {
@@ -423,11 +412,7 @@ _init(void)
*/
return (error);
}
- ret = ldi_ident_from_mod(&rib_modlinkage, &rpcib_li);
- if (ret != 0)
- rpcib_li = NULL;
mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL);
-
return (0);
}
@@ -448,7 +433,6 @@ _fini()
return (status);
}
mutex_destroy(&plugin_state_lock);
- ldi_ident_release(rpcib_li);
return (0);
}
@@ -458,7 +442,6 @@ _info(struct modinfo *modinfop)
return (mod_info(&rib_modlinkage, modinfop));
}
-
/*
* rpcib_getinfo()
* Given the device number, return the devinfo pointer or the
@@ -1822,124 +1805,100 @@ refresh:
rdma_stat
rib_ping_srv(int addr_type, struct netbuf *raddr, rib_hca_t **hca)
{
- struct sockaddr_in *sin4, *sin4arr;
- struct sockaddr_in6 *sin6, *sin6arr;
- uint_t nif, nif4, nif6, i;
+ uint_t i;
ibt_path_info_t path;
ibt_status_t ibt_status;
uint8_t num_paths_p;
ibt_ip_path_attr_t ipattr;
ibt_ip_addr_t dstip;
ibt_path_ip_src_t srcip;
-
+ rpcib_ipaddrs_t addrs4;
+ rpcib_ipaddrs_t addrs6;
+ struct sockaddr_in *sinp;
+ struct sockaddr_in6 *sin6p;
+ rdma_stat retval = RDMA_SUCCESS;
*hca = NULL;
-
ASSERT(raddr->buf != NULL);
bzero(&path, sizeof (ibt_path_info_t));
bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
bzero(&srcip, sizeof (ibt_path_ip_src_t));
- /* Obtain the source IP addresses for the system */
- nif = rpcib_get_number_interfaces();
- sin4arr = (struct sockaddr_in *)
- kmem_zalloc(sizeof (struct sockaddr_in) * nif, KM_SLEEP);
- sin6arr = (struct sockaddr_in6 *)
- kmem_zalloc(sizeof (struct sockaddr_in6) * nif, KM_SLEEP);
-
- (void) rpcib_get_ib_addresses(sin4arr, sin6arr, &nif4, &nif6);
-
- /* Are there really any IB interfaces available */
- if (nif4 == 0 && nif6 == 0) {
- kmem_free(sin4arr, sizeof (struct sockaddr_in) * nif);
- kmem_free(sin6arr, sizeof (struct sockaddr_in6) * nif);
- return (RDMA_FAILED);
+ if (!rpcib_get_ib_addresses(&addrs4, &addrs6) ||
+ (addrs4.ri_count == 0 && addrs6.ri_count == 0)) {
+ retval = RDMA_FAILED;
+ goto done;
}
/* Prep the destination address */
switch (addr_type) {
case AF_INET:
- sin4 = (struct sockaddr_in *)raddr->buf;
+ sinp = (struct sockaddr_in *)raddr->buf;
dstip.family = AF_INET;
- dstip.un.ip4addr = sin4->sin_addr.s_addr;
+ dstip.un.ip4addr = sinp->sin_addr.s_addr;
+ sinp = addrs4.ri_list;
- for (i = 0; i < nif4; i++) {
+ for (i = 0; i < addrs4.ri_count; i++) {
num_paths_p = 0;
ipattr.ipa_dst_ip = &dstip;
ipattr.ipa_hca_guid = rib_stat->hca->hca_guid;
ipattr.ipa_ndst = 1;
ipattr.ipa_max_paths = 1;
ipattr.ipa_src_ip.family = dstip.family;
- ipattr.ipa_src_ip.un.ip4addr =
- sin4arr[i].sin_addr.s_addr;
+ ipattr.ipa_src_ip.un.ip4addr = sinp[i].sin_addr.s_addr;
ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl,
- IBT_PATH_NO_FLAGS,
- &ipattr,
- &path,
- &num_paths_p,
+ IBT_PATH_NO_FLAGS, &ipattr, &path, &num_paths_p,
&srcip);
if (ibt_status == IBT_SUCCESS &&
num_paths_p != 0 &&
path.pi_hca_guid == rib_stat->hca->hca_guid) {
*hca = rib_stat->hca;
-
- kmem_free(sin4arr,
- sizeof (struct sockaddr_in) * nif);
- kmem_free(sin6arr,
- sizeof (struct sockaddr_in6) * nif);
-
- return (RDMA_SUCCESS);
+ goto done;
}
}
+ retval = RDMA_FAILED;
break;
case AF_INET6:
- sin6 = (struct sockaddr_in6 *)raddr->buf;
+ sin6p = (struct sockaddr_in6 *)raddr->buf;
dstip.family = AF_INET6;
- dstip.un.ip6addr = sin6->sin6_addr;
+ dstip.un.ip6addr = sin6p->sin6_addr;
+ sin6p = addrs6.ri_list;
- for (i = 0; i < nif6; i++) {
+ for (i = 0; i < addrs6.ri_count; i++) {
num_paths_p = 0;
ipattr.ipa_dst_ip = &dstip;
ipattr.ipa_hca_guid = rib_stat->hca->hca_guid;
ipattr.ipa_ndst = 1;
ipattr.ipa_max_paths = 1;
ipattr.ipa_src_ip.family = dstip.family;
- ipattr.ipa_src_ip.un.ip6addr = sin6arr[i].sin6_addr;
+ ipattr.ipa_src_ip.un.ip6addr = sin6p[i].sin6_addr;
ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl,
- IBT_PATH_NO_FLAGS,
- &ipattr,
- &path,
- &num_paths_p,
+ IBT_PATH_NO_FLAGS, &ipattr, &path, &num_paths_p,
&srcip);
if (ibt_status == IBT_SUCCESS &&
num_paths_p != 0 &&
path.pi_hca_guid == rib_stat->hca->hca_guid) {
*hca = rib_stat->hca;
-
- kmem_free(sin4arr,
- sizeof (struct sockaddr_in) * nif);
- kmem_free(sin6arr,
- sizeof (struct sockaddr_in6) * nif);
-
- return (RDMA_SUCCESS);
+ goto done;
}
}
-
+ retval = RDMA_FAILED;
break;
default:
- kmem_free(sin4arr, sizeof (struct sockaddr_in) * nif);
- kmem_free(sin6arr, sizeof (struct sockaddr_in6) * nif);
- return (RDMA_INVAL);
+ retval = RDMA_INVAL;
+ break;
}
-
- kmem_free(sin4arr, sizeof (struct sockaddr_in) * nif);
- kmem_free(sin6arr, sizeof (struct sockaddr_in6) * nif);
- return (RDMA_FAILED);
+done:
+ if (addrs4.ri_size > 0)
+ kmem_free(addrs4.ri_list, addrs4.ri_size);
+ if (addrs6.ri_size > 0)
+ kmem_free(addrs6.ri_list, addrs6.ri_size);
+ return (retval);
}
/*
@@ -4668,123 +4627,31 @@ rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle)
return (RDMA_SUCCESS);
}
-
/*
- * Return 0 if the interface is IB.
- * Return error (>0) if any error is encountered during processing.
- * Return -1 if the interface is not IB and no error.
+ * Check if the IP interface named by `lifrp' is RDMA-capable.
*/
-#define isalpha(ch) (((ch) >= 'a' && (ch) <= 'z') || \
- ((ch) >= 'A' && (ch) <= 'Z'))
-static int
-rpcib_is_ib_interface(char *name)
+static boolean_t
+rpcib_rdma_capable_interface(struct lifreq *lifrp)
{
+ char ifname[LIFNAMSIZ];
+ char *cp;
- char dev_path[MAXPATHLEN];
- char devname[MAXNAMELEN];
- ldi_handle_t lh;
- dl_info_ack_t info;
- int ret = 0;
- int i;
+ if (lifrp->lifr_type == IFT_IB)
+ return (B_TRUE);
/*
- * ibd devices are only style 2 devices
- * so we will open only style 2 devices
- * by ignoring the ppa
+ * Strip off the logical interface portion before getting
+ * intimate with the name.
*/
+ (void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ);
+ if ((cp = strchr(ifname, ':')) != NULL)
+ *cp = '\0';
- i = strlen(name) - 1;
- while ((i >= 0) && (!isalpha(name[i]))) i--;
-
- if (i < 0) {
- /* Invalid interface name, no alphabet */
- return (-1);
- }
-
- (void) strncpy(devname, name, i + 1);
- devname[i + 1] = '\0';
-
- if (strcmp("lo", devname) == 0) {
- /*
- * loopback interface not rpc/rdma capable
- */
- return (-1);
- }
-
- (void) strncpy(dev_path, "/dev/", MAXPATHLEN);
- if (strlcat(dev_path, devname, MAXPATHLEN) >= MAXPATHLEN) {
- /* string overflow */
- return (-1);
- }
-
- ret = ldi_open_by_name(dev_path, FREAD|FWRITE, kcred, &lh, rpcib_li);
- if (ret != 0) {
- return (ret);
- }
- ret = rpcib_dl_info(lh, &info);
- (void) ldi_close(lh, FREAD|FWRITE, kcred);
- if (ret != 0) {
- return (ret);
- }
-
- if (info.dl_mac_type != DL_IB) {
- return (-1);
- }
-
- return (0);
+ return (strcmp("lo0", ifname) == 0);
}
static int
-rpcib_dl_info(ldi_handle_t lh, dl_info_ack_t *info)
-{
- dl_info_req_t *info_req;
- union DL_primitives *dl_prim;
- mblk_t *mp;
- k_sigset_t smask;
- int error;
-
- if ((mp = allocb(sizeof (dl_info_req_t), BPRI_MED)) == NULL) {
- return (ENOMEM);
- }
-
- mp->b_datap->db_type = M_PROTO;
-
- info_req = (dl_info_req_t *)(uintptr_t)mp->b_wptr;
- mp->b_wptr += sizeof (dl_info_req_t);
- info_req->dl_primitive = DL_INFO_REQ;
-
- sigintr(&smask, 0);
- if ((error = ldi_putmsg(lh, mp)) != 0) {
- sigunintr(&smask);
- return (error);
- }
- if ((error = ldi_getmsg(lh, &mp, (timestruc_t *)NULL)) != 0) {
- sigunintr(&smask);
- return (error);
- }
- sigunintr(&smask);
-
- dl_prim = (union DL_primitives *)(uintptr_t)mp->b_rptr;
- switch (dl_prim->dl_primitive) {
- case DL_INFO_ACK:
- if (((uintptr_t)mp->b_wptr - (uintptr_t)mp->b_rptr) <
- sizeof (dl_info_ack_t)) {
- error = -1;
- } else {
- *info = *(dl_info_ack_t *)(uintptr_t)mp->b_rptr;
- error = 0;
- }
- break;
- default:
- error = -1;
- break;
- }
-
- freemsg(mp);
- return (error);
-}
-static int
-rpcib_do_ip_ioctl(int cmd, int len, caddr_t arg)
+rpcib_do_ip_ioctl(int cmd, int len, void *arg)
{
vnode_t *kvp, *vp;
TIUSER *tiptr;
@@ -4792,23 +4659,22 @@ rpcib_do_ip_ioctl(int cmd, int len, caddr_t arg)
k_sigset_t smask;
int err = 0;
- if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP,
- &kvp) == 0) {
- if (t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE,
+ if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &kvp) == 0) {
+ if (t_kopen(NULL, kvp->v_rdev, FREAD|FWRITE,
&tiptr, CRED()) == 0) {
- vp = tiptr->fp->f_vnode;
- } else {
- VN_RELE(kvp);
- return (EPROTO);
+ vp = tiptr->fp->f_vnode;
+ } else {
+ VN_RELE(kvp);
+ return (EPROTO);
}
} else {
- return (EPROTO);
+ return (EPROTO);
}
iocb.ic_cmd = cmd;
iocb.ic_timout = 0;
iocb.ic_len = len;
- iocb.ic_dp = arg;
+ iocb.ic_dp = (caddr_t)arg;
sigintr(&smask, 0);
err = kstr_ioctl(vp, I_STR, (intptr_t)&iocb);
sigunintr(&smask);
@@ -4817,65 +4683,89 @@ rpcib_do_ip_ioctl(int cmd, int len, caddr_t arg)
return (err);
}
-static uint_t rpcib_get_number_interfaces(void) {
-uint_t numifs;
- if (rpcib_do_ip_ioctl(SIOCGIFNUM, sizeof (uint_t), (caddr_t)&numifs)) {
- return (0);
+/*
+ * Issue an SIOCGLIFCONF down to IP and return the result in `lifcp'.
+ * lifcp->lifc_buf is dynamically allocated to be *bufsizep bytes.
+ */
+static int
+rpcib_do_lifconf(struct lifconf *lifcp, uint_t *bufsizep)
+{
+ int err;
+ struct lifnum lifn;
+
+ bzero(&lifn, sizeof (struct lifnum));
+ lifn.lifn_family = AF_UNSPEC;
+
+ err = rpcib_do_ip_ioctl(SIOCGLIFNUM, sizeof (struct lifnum), &lifn);
+ if (err != 0)
+ return (err);
+
+ /*
+ * Pad the interface count to account for additional interfaces that
+ * may have been configured between the SIOCGLIFNUM and SIOCGLIFCONF.
+ */
+ lifn.lifn_count += 4;
+
+ bzero(lifcp, sizeof (struct lifconf));
+ lifcp->lifc_family = AF_UNSPEC;
+ lifcp->lifc_len = *bufsizep = lifn.lifn_count * sizeof (struct lifreq);
+ lifcp->lifc_buf = kmem_zalloc(*bufsizep, KM_SLEEP);
+
+ err = rpcib_do_ip_ioctl(SIOCGLIFCONF, sizeof (struct lifconf), lifcp);
+ if (err != 0) {
+ kmem_free(lifcp->lifc_buf, *bufsizep);
+ return (err);
}
- return (numifs);
+ return (0);
}
static boolean_t
-rpcib_get_ib_addresses(
- struct sockaddr_in *saddr4,
- struct sockaddr_in6 *saddr6,
- uint_t *number4,
- uint_t *number6)
+rpcib_get_ib_addresses(rpcib_ipaddrs_t *addrs4, rpcib_ipaddrs_t *addrs6)
{
- int numifs;
- struct ifconf kifc;
- struct ifreq *ifr;
- boolean_t ret = B_FALSE;
+ uint_t i, nifs;
+ uint_t bufsize;
+ struct lifconf lifc;
+ struct lifreq *lifrp;
+ struct sockaddr_in *sinp;
+ struct sockaddr_in6 *sin6p;
- *number4 = 0;
- *number6 = 0;
+ bzero(addrs4, sizeof (rpcib_ipaddrs_t));
+ bzero(addrs6, sizeof (rpcib_ipaddrs_t));
- if (rpcib_do_ip_ioctl(SIOCGIFNUM, sizeof (int), (caddr_t)&numifs)) {
- return (ret);
+ if (rpcib_do_lifconf(&lifc, &bufsize) != 0)
+ return (B_FALSE);
+
+ if ((nifs = lifc.lifc_len / sizeof (struct lifreq)) == 0) {
+ kmem_free(lifc.lifc_buf, bufsize);
+ return (B_FALSE);
}
- kifc.ifc_len = numifs * sizeof (struct ifreq);
- kifc.ifc_buf = kmem_zalloc(kifc.ifc_len, KM_SLEEP);
+ /*
+ * Worst case is that all of the addresses are IB-capable and have
+ * the same address family, so size our buffers accordingly.
+ */
+ addrs4->ri_size = nifs * sizeof (struct sockaddr_in);
+ addrs4->ri_list = kmem_zalloc(addrs4->ri_size, KM_SLEEP);
+ addrs6->ri_size = nifs * sizeof (struct sockaddr_in6);
+ addrs6->ri_list = kmem_zalloc(addrs6->ri_size, KM_SLEEP);
- if (rpcib_do_ip_ioctl(SIOCGIFCONF, sizeof (struct ifconf),
- (caddr_t)&kifc)) {
- goto done;
- }
+ for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) {
+ if (!rpcib_rdma_capable_interface(lifrp))
+ continue;
- ifr = kifc.ifc_req;
- for (numifs = kifc.ifc_len / sizeof (struct ifreq);
- numifs > 0; numifs--, ifr++) {
- struct sockaddr_in *sin4;
- struct sockaddr_in6 *sin6;
-
- if ((rpcib_is_ib_interface(ifr->ifr_name) == 0)) {
- sin4 = (struct sockaddr_in *)(uintptr_t)&ifr->ifr_addr;
- sin6 = (struct sockaddr_in6 *)(uintptr_t)&ifr->ifr_addr;
- if (sin4->sin_family == AF_INET) {
- saddr4[*number4] = *(struct sockaddr_in *)
- (uintptr_t)&ifr->ifr_addr;
- *number4 = *number4 + 1;
- } else if (sin6->sin6_family == AF_INET6) {
- saddr6[*number6] = *(struct sockaddr_in6 *)
- (uintptr_t)&ifr->ifr_addr;
- *number6 = *number6 + 1;
- }
+ if (lifrp->lifr_addr.ss_family == AF_INET) {
+ sinp = addrs4->ri_list;
+ bcopy(&lifrp->lifr_addr, &sinp[addrs4->ri_count++],
+ sizeof (struct sockaddr_in));
+ } else if (lifrp->lifr_addr.ss_family == AF_INET6) {
+ sin6p = addrs6->ri_list;
+ bcopy(&lifrp->lifr_addr, &sin6p[addrs6->ri_count++],
+ sizeof (struct sockaddr_in6));
}
}
- ret = B_TRUE;
-done:
- kmem_free(kifc.ifc_buf, kifc.ifc_len);
- return (ret);
+
+ kmem_free(lifc.lifc_buf, bufsize);
+ return (B_TRUE);
}
/* ARGSUSED */