summaryrefslogtreecommitdiff
path: root/usr/src/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd')
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c16
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c7
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c206
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h9
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c9
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c7
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c22
-rw-r--r--usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c5
-rw-r--r--usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c263
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile45
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h14
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c1013
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c1210
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c1331
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h130
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c236
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c66
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c183
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h11
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c10
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/Makefile18
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c717
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile5
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h8
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c1265
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h5
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c27
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h13
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c35
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/Makefile48
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.c1498
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.xcl106
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types10
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c53
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c4
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c76
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c4
-rw-r--r--usr/src/cmd/mdb/common/modules/ip/ip.c7
-rw-r--r--usr/src/cmd/rcm_daemon/Makefile.com4
-rw-r--r--usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c14
-rw-r--r--usr/src/cmd/rcm_daemon/common/ip_rcm.c1798
-rw-r--r--usr/src/cmd/svc/milestone/net-init13
-rw-r--r--usr/src/cmd/svc/milestone/net-loopback11
-rw-r--r--usr/src/cmd/svc/milestone/net-physical147
-rw-r--r--usr/src/cmd/svc/shell/net_include.sh510
-rw-r--r--usr/src/cmd/truss/codes.c9
-rw-r--r--usr/src/cmd/truss/print.c22
-rw-r--r--usr/src/cmd/zoneadmd/vplat.c26
48 files changed, 6262 insertions, 4984 deletions
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c
index 34bb772632..5a4779cfa5 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -133,6 +133,7 @@ main(int argc, char **argv)
boolean_t is_verbose;
int ipc_fd;
int c;
+ int aware = RTAW_UNDER_IPMP;
struct rlimit rl;
debug_level = df_get_int("", B_FALSE, DF_DEBUG_LEVEL);
@@ -301,6 +302,17 @@ main(int argc, char **argv)
dhcpmsg(MSG_ERR, "cannot open routing socket");
return (EXIT_FAILURE);
}
+
+ /*
+ * We're IPMP-aware and can manage IPMP test addresses, so issue
+ * RT_AWARE to get routing socket messages for interfaces under IPMP.
+ */
+ if (setsockopt(rtsock_fd, SOL_ROUTE, RT_AWARE, &aware,
+ sizeof (aware)) == -1) {
+ dhcpmsg(MSG_ERR, "cannot set RT_AWARE on routing socket");
+ return (EXIT_FAILURE);
+ }
+
if (iu_register_event(eh, rtsock_fd, POLLIN, rtsock_event, 0) == -1) {
dhcpmsg(MSG_ERR, "cannot register routing socket for messages");
return (EXIT_FAILURE);
@@ -1182,7 +1194,7 @@ check_lif(dhcp_lif_t *lif, const struct ifa_msghdr *ifam, int msglen)
lif->lif_name);
lif_mark_decline(lif, "duplicate address");
close_ip_lif(lif);
- (void) open_ip_lif(lif, INADDR_ANY);
+ (void) open_ip_lif(lif, INADDR_ANY, B_TRUE);
}
dad_wait = lif->lif_dad_wait;
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c
index 4637ecc346..6cfce9f0a9 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c
@@ -19,14 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* BOUND state of the DHCP client state machine.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/socket.h>
#include <sys/types.h>
#include <string.h>
@@ -358,7 +356,8 @@ dhcp_bound_complete(dhcp_smach_t *dsmp)
lif = dsmp->dsm_lif;
if (router_list != NULL &&
(router_list->len % sizeof (ipaddr_t)) == 0 &&
- strchr(lif->lif_name, ':') == NULL) {
+ strchr(lif->lif_name, ':') == NULL &&
+ !lif->lif_pif->pif_under_ipmp) {
dsmp->dsm_nrouters = router_list->len / sizeof (ipaddr_t);
dsmp->dsm_routers = malloc(router_list->len);
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c
index 0cfdad40e3..5d2d5fb99e 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -76,6 +76,7 @@ insert_pif(const char *pname, boolean_t isv6, int *error)
{
dhcp_pif_t *pif;
struct lifreq lifr;
+ lifgroupinfo_t lifgr;
dlpi_handle_t dh = NULL;
int fd = isv6 ? v6_sock_fd : v4_sock_fd;
@@ -127,12 +128,60 @@ insert_pif(const char *pname, boolean_t isv6, int *error)
}
/*
- * For IPv4, use DLPI to determine the hardware type, hardware
- * address, and hardware address length.
+ * Check if the pif is in an IPMP group. Interfaces using IPMP don't
+ * have dedicated hardware addresses, and get their hardware type from
+ * the SIOCGLIFGROUPINFO ioctl rather than DLPI.
*/
- if (!isv6) {
- int rc;
- dlpi_info_t dlinfo;
+ if (ioctl(fd, SIOCGLIFGROUPNAME, &lifr) == -1) {
+ *error = DHCP_IPC_E_INT;
+ dhcpmsg(MSG_ERR, "insert_pif: SIOCGLIFGROUPNAME for %s", pname);
+ goto failure;
+ }
+
+ if (lifr.lifr_groupname[0] != '\0') {
+ (void) strlcpy(lifgr.gi_grname, lifr.lifr_groupname,
+ LIFGRNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPINFO, &lifgr) == -1) {
+ *error = DHCP_IPC_E_INT;
+ dhcpmsg(MSG_ERR, "insert_pif: SIOCGLIFGROUPINFO for %s",
+ lifgr.gi_grname);
+ goto failure;
+ }
+
+ pif->pif_hwtype = dlpi_arptype(lifgr.gi_mactype);
+ pif->pif_under_ipmp = (strcmp(pname, lifgr.gi_grifname) != 0);
+ (void) strlcpy(pif->pif_grifname, lifgr.gi_grifname, LIFNAMSIZ);
+
+ /*
+ * For IPMP underlying interfaces, stash the interface index
+ * of the IPMP meta-interface; we'll use it to send/receive
+ * traffic. This is both necessary (since IP_BOUND_IF for
+ * non-unicast traffic won't work on underlying interfaces)
+ * and preferred (since a test address lease will be able to
+ * be maintained as long as another interface in the group is
+ * still functioning).
+ */
+ if (pif->pif_under_ipmp) {
+ (void) strlcpy(lifr.lifr_name, pif->pif_grifname,
+ LIFNAMSIZ);
+
+ if (ioctl(fd, SIOCGLIFINDEX, &lifr) == -1) {
+ *error = DHCP_IPC_E_INT;
+ dhcpmsg(MSG_ERR, "insert_pif: SIOCGLIFINDEX "
+ "for %s", lifr.lifr_name);
+ goto failure;
+ }
+ pif->pif_grindex = lifr.lifr_index;
+ }
+ }
+
+ /*
+ * For IPv4, if the hardware type is still unknown, use DLPI to
+ * determine it, the hardware address, and hardware address length.
+ */
+ if (!isv6 && pif->pif_hwtype == 0) {
+ int rc;
+ dlpi_info_t dlinfo;
if ((rc = dlpi_open(pname, &dh, 0)) != DLPI_SUCCESS) {
dhcpmsg(MSG_ERROR, "insert_pif: dlpi_open: %s",
@@ -661,11 +710,12 @@ verify_lif(const dhcp_lif_t *lif)
boolean_t isv6;
int fd;
struct lifreq lifr;
+ dhcp_pif_t *pif = lif->lif_pif;
(void) memset(&lifr, 0, sizeof (struct lifreq));
(void) strlcpy(lifr.lifr_name, lif->lif_name, LIFNAMSIZ);
- isv6 = lif->lif_pif->pif_isv6;
+ isv6 = pif->pif_isv6;
fd = isv6 ? v6_sock_fd : v4_sock_fd;
if (ioctl(fd, SIOCGLIFFLAGS, &lifr) == -1) {
@@ -689,43 +739,41 @@ verify_lif(const dhcp_lif_t *lif)
}
/*
- * Special case: if the interface has gone down as a duplicate, then
- * this alone does _not_ mean that we're abandoning it just yet. Allow
- * the state machine to handle this normally by trying to get a new
- * lease.
- */
- if ((lifr.lifr_flags & (IFF_UP|IFF_DUPLICATE)) == IFF_DUPLICATE) {
- dhcpmsg(MSG_DEBUG, "verify_lif: duplicate address on %s",
- lif->lif_name);
- return (B_TRUE);
- }
-
- /*
- * If the user has torn down or started up the interface manually, then
- * abandon the lease.
- */
- if ((lif->lif_flags ^ lifr.lifr_flags) & IFF_UP) {
- dhcpmsg(MSG_DEBUG, "verify_lif: user has %s %s",
- lifr.lifr_flags & IFF_UP ? "started up" : "shut down",
- lif->lif_name);
- return (B_FALSE);
- }
-
- /*
* Check for delete and recreate.
*/
if (ioctl(fd, SIOCGLIFINDEX, &lifr) == -1) {
- dhcpmsg(MSG_ERR, "verify_lif: SIOCGLIFINDEX failed on %s",
- lif->lif_name);
+ if (errno != ENXIO) {
+ dhcpmsg(MSG_ERR, "verify_lif: SIOCGLIFINDEX failed "
+ "on %s", lif->lif_name);
+ }
return (B_FALSE);
}
- if (lifr.lifr_index != lif->lif_pif->pif_index) {
+ if (lifr.lifr_index != pif->pif_index) {
dhcpmsg(MSG_DEBUG,
"verify_lif: ifindex on %s changed: %u to %u",
- lif->lif_name, lif->lif_pif->pif_index, lifr.lifr_index);
+ lif->lif_name, pif->pif_index, lifr.lifr_index);
return (B_FALSE);
}
+ if (pif->pif_under_ipmp) {
+ (void) strlcpy(lifr.lifr_name, pif->pif_grifname, LIFNAMSIZ);
+
+ if (ioctl(fd, SIOCGLIFINDEX, &lifr) == -1) {
+ if (errno != ENXIO) {
+ dhcpmsg(MSG_ERR, "verify_lif: SIOCGLIFINDEX "
+ "failed on %s", lifr.lifr_name);
+ }
+ return (B_FALSE);
+ }
+
+ if (lifr.lifr_index != pif->pif_grindex) {
+ dhcpmsg(MSG_DEBUG, "verify_lif: IPMP group ifindex "
+ "on %s changed: %u to %u", lifr.lifr_name,
+ pif->pif_grindex, lifr.lifr_index);
+ return (B_FALSE);
+ }
+ }
+
/*
* If the IP address, netmask, or broadcast address have changed, or
* the interface has been unplumbed, then we act like there has been an
@@ -934,6 +982,13 @@ plumb_lif(dhcp_pif_t *pif, const in6_addr_t *addr)
lifr.lifr_name);
goto failure;
}
+
+ /*
+ * See comment in set_lif_dhcp().
+ */
+ if (pif->pif_under_ipmp && !(lifr.lifr_flags & IFF_NOFAILOVER))
+ lifr.lifr_flags |= IFF_NOFAILOVER | IFF_DEPRECATED;
+
lifr.lifr_flags |= IFF_UP | IFF_DHCPRUNNING;
if (ioctl(v6_sock_fd, SIOCSLIFFLAGS, &lifr) == -1) {
dhcpmsg(MSG_ERR, "plumb_lif: SIOCSLIFFLAGS %s",
@@ -1060,8 +1115,9 @@ set_lif_dhcp(dhcp_lif_t *lif, boolean_t is_adopting)
int fd;
int err;
struct lifreq lifr;
+ dhcp_pif_t *pif = lif->lif_pif;
- fd = lif->lif_pif->pif_isv6 ? v6_sock_fd : v4_sock_fd;
+ fd = pif->pif_isv6 ? v6_sock_fd : v4_sock_fd;
(void) strlcpy(lifr.lifr_name, lif->lif_name, LIFNAMSIZ);
@@ -1098,6 +1154,17 @@ set_lif_dhcp(dhcp_lif_t *lif, boolean_t is_adopting)
"set on %s", lif->lif_name);
}
} else {
+ /*
+ * If the lif is on an interface under IPMP, IFF_NOFAILOVER
+ * must be set or the kernel will prevent us from setting
+ * IFF_DHCPRUNNING (since the subsequent IFF_UP would lead to
+ * migration). We set IFF_DEPRECATED too since the kernel
+ * will set it automatically when setting IFF_NOFAILOVER,
+ * causing our lif_flags value to grow stale.
+ */
+ if (pif->pif_under_ipmp && !(lifr.lifr_flags & IFF_NOFAILOVER))
+ lifr.lifr_flags |= IFF_NOFAILOVER | IFF_DEPRECATED;
+
lifr.lifr_flags |= IFF_DHCPRUNNING;
if (ioctl(fd, SIOCSLIFFLAGS, &lifr) == -1) {
dhcpmsg(MSG_ERR, "set_lif_dhcp: SIOCSLIFFLAGS for %s",
@@ -1207,6 +1274,13 @@ clear_lif_deprecated(dhcp_lif_t *lif)
return (B_FALSE);
}
+ /*
+ * Don't try to clear IFF_DEPRECATED if this is a test address,
+ * since IPMP's use of IFF_DEPRECATED is not compatible with ours.
+ */
+ if (lifr.lifr_flags & IFF_NOFAILOVER)
+ return (B_TRUE);
+
if (!(lifr.lifr_flags & IFF_DEPRECATED))
return (B_TRUE);
@@ -1226,16 +1300,19 @@ clear_lif_deprecated(dhcp_lif_t *lif)
*
* input: dhcp_lif_t *: the logical interface to operate on
* in_addr_t: the address the socket will be bound to (in hbo)
+ * boolean_t: B_TRUE if the address should be brought up (if needed)
* output: boolean_t: B_TRUE if the socket was opened successfully.
*/
boolean_t
-open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo)
+open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo, boolean_t bringup)
{
const char *errmsg;
struct lifreq lifr;
int on = 1;
uchar_t ttl = 255;
+ uint32_t ifindex;
+ dhcp_pif_t *pif = lif->lif_pif;
if (lif->lif_sock_ip_fd != -1) {
dhcpmsg(MSG_WARNING, "open_ip_lif: socket already open on %s",
@@ -1270,7 +1347,7 @@ open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo)
}
if (setsockopt(lif->lif_sock_ip_fd, IPPROTO_IP, IP_DHCPINIT_IF,
- &lif->lif_pif->pif_index, sizeof (int)) == -1) {
+ &pif->pif_index, sizeof (int)) == -1) {
errmsg = "cannot set IP_DHCPINIT_IF";
goto failure;
}
@@ -1288,23 +1365,40 @@ open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo)
goto failure;
}
- if (setsockopt(lif->lif_sock_ip_fd, IPPROTO_IP, IP_BOUND_IF,
- &lif->lif_pif->pif_index, sizeof (int)) == -1) {
+ ifindex = pif->pif_under_ipmp ? pif->pif_grindex : pif->pif_index;
+ if (setsockopt(lif->lif_sock_ip_fd, IPPROTO_IP, IP_BOUND_IF, &ifindex,
+ sizeof (int)) == -1) {
errmsg = "cannot set IP_BOUND_IF";
goto failure;
}
- /*
- * Make sure at least one lif on the interface we used in IP_BOUND_IF
- * is IFF_UP so that we can send and receive IP packets.
- */
(void) strlcpy(lifr.lifr_name, lif->lif_name, LIFNAMSIZ);
if (ioctl(v4_sock_fd, SIOCGLIFFLAGS, &lifr) == -1) {
errmsg = "cannot get interface flags";
goto failure;
}
- if (!(lifr.lifr_flags & IFF_UP)) {
+ /*
+ * If the lif is part of an interface under IPMP, IFF_NOFAILOVER must
+ * be set or the kernel will prevent us from setting IFF_DHCPRUNNING
+ * (since the subsequent IFF_UP would lead to migration). We set
+ * IFF_DEPRECATED too since the kernel will set it automatically when
+ * setting IFF_NOFAILOVER, causing our lif_flags value to grow stale.
+ */
+ if (pif->pif_under_ipmp && !(lifr.lifr_flags & IFF_NOFAILOVER)) {
+ lifr.lifr_flags |= IFF_NOFAILOVER | IFF_DEPRECATED;
+ if (ioctl(v4_sock_fd, SIOCSLIFFLAGS, &lifr) == -1) {
+ errmsg = "cannot set IFF_NOFAILOVER";
+ goto failure;
+ }
+ }
+ lif->lif_flags = lifr.lifr_flags;
+
+ /*
+ * If this is initial bringup, make sure the address we're acquiring a
+ * lease on is IFF_UP.
+ */
+ if (bringup && !(lifr.lifr_flags & IFF_UP)) {
/*
* Start from a clean slate.
*/
@@ -1330,6 +1424,30 @@ open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo)
((struct sockaddr_in *)&lifr.lifr_addr)->sin_addr.s_addr;
}
+ /*
+ * Usually, bringing up the address we're acquiring a lease on is
+ * sufficient to allow packets to be sent and received via the
+ * IP_BOUND_IF we did earlier. However, if we're acquiring a lease on
+ * an underlying IPMP interface, the group interface will be used for
+ * sending and receiving IP packets via IP_BOUND_IF. Thus, ensure at
+ * least one address on the group interface is IFF_UP.
+ */
+ if (bringup && pif->pif_under_ipmp) {
+ (void) strlcpy(lifr.lifr_name, pif->pif_grifname, LIFNAMSIZ);
+ if (ioctl(v4_sock_fd, SIOCGLIFFLAGS, &lifr) == -1) {
+ errmsg = "cannot get IPMP group interface flags";
+ goto failure;
+ }
+
+ if (!(lifr.lifr_flags & IFF_UP)) {
+ lifr.lifr_flags |= IFF_UP;
+ if (ioctl(v4_sock_fd, SIOCSLIFFLAGS, &lifr) == -1) {
+ errmsg = "cannot bring up IPMP group interface";
+ goto failure;
+ }
+ }
+ }
+
lif->lif_packet_id = iu_register_event(eh, lif->lif_sock_ip_fd, POLLIN,
dhcp_packet_lif, lif);
if (lif->lif_packet_id == -1) {
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h
index a59e3ea68d..46cf30bedb 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef INTERFACE_H
#define INTERFACE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Interface.[ch] encapsulate all of the agent's knowledge of network
* interfaces from the DHCP agent's perspective. See interface.c for
@@ -66,6 +64,9 @@ struct dhcp_pif_s {
boolean_t pif_running; /* interface is running */
uint_t pif_hold_count; /* reference count */
char pif_name[LIFNAMSIZ];
+ char pif_grifname[LIFNAMSIZ];
+ uint32_t pif_grindex; /* interface index for pif_grifname */
+ boolean_t pif_under_ipmp; /* is an ipmp underlying interface */
};
struct dhcp_lif_s {
@@ -182,7 +183,7 @@ dhcp_lif_t *attach_lif(const char *, boolean_t, int *);
int set_lif_dhcp(dhcp_lif_t *, boolean_t);
void set_lif_deprecated(dhcp_lif_t *);
boolean_t clear_lif_deprecated(dhcp_lif_t *);
-boolean_t open_ip_lif(dhcp_lif_t *, in_addr_t);
+boolean_t open_ip_lif(dhcp_lif_t *, in_addr_t, boolean_t);
void close_ip_lif(dhcp_lif_t *);
void lif_mark_decline(dhcp_lif_t *, const char *);
boolean_t schedule_lif_timer(dhcp_lif_t *, dhcp_timer_t *,
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c
index 8a32b55ea5..a763530436 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <string.h>
#include <sys/types.h>
#include <stdlib.h>
@@ -970,7 +968,10 @@ send_pkt_internal(dhcp_smach_t *dsmp)
ipi6->ipi6_addr = lif->lif_v6addr;
else
ipi6->ipi6_addr = my_in6addr_any;
- ipi6->ipi6_ifindex = lif->lif_pif->pif_index;
+ if (lif->lif_pif->pif_under_ipmp)
+ ipi6->ipi6_ifindex = lif->lif_pif->pif_grindex;
+ else
+ ipi6->ipi6_ifindex = lif->lif_pif->pif_index;
cmsg->cmsg_len = (char *)(ipi6 + 1) - (char *)cmsg;
/*
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
index a8c05de986..78da07aebf 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
@@ -19,14 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* REQUESTING state of the client state machine.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdlib.h>
#include <string.h>
#include <search.h>
@@ -1008,7 +1006,8 @@ dhcp_acknak_global(iu_eh_t *ehp, int fd, short events, iu_event_id_t id,
for (dsmp = lookup_smach_by_xid(xid, NULL, isv6); dsmp != NULL;
dsmp = lookup_smach_by_xid(xid, dsmp, isv6)) {
pif = dsmp->dsm_lif->lif_pif;
- if (pif->pif_index == plp->ifindex)
+ if (pif->pif_index == plp->ifindex ||
+ pif->pif_under_ipmp && pif->pif_grindex == plp->ifindex)
break;
}
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c
index 9ae7fd7aba..852b428551 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* This module contains core functions for managing DHCP state machine
* instances.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdlib.h>
#include <search.h>
#include <string.h>
@@ -151,7 +149,7 @@ insert_smach(dhcp_lif_t *lif, int *error)
/*
* With IPv4 DHCP, we use a socket per lif.
*/
- if (!open_ip_lif(lif, INADDR_ANY)) {
+ if (!open_ip_lif(lif, INADDR_ANY, B_TRUE)) {
dhcpmsg(MSG_ERR, "unable to open socket for %s",
lif->lif_name);
/* This will also dispose of the LIF */
@@ -696,14 +694,15 @@ set_smach_state(dhcp_smach_t *dsmp, DHCPSTATE state)
if (is_bound_state(dsmp->dsm_state)) {
if (!is_bound_state(state)) {
close_ip_lif(lif);
- if (!open_ip_lif(lif, INADDR_ANY))
+ if (!open_ip_lif(lif, INADDR_ANY,
+ B_FALSE))
return (B_FALSE);
}
} else {
if (is_bound_state(state)) {
close_ip_lif(lif);
if (!open_ip_lif(lif,
- ntohl(lif->lif_addr)))
+ ntohl(lif->lif_addr), B_FALSE))
return (B_FALSE);
}
}
@@ -952,11 +951,14 @@ no_specified_id:
* unable to parse it. We need to determine if a Client ID is required
* and, if so, generate one.
*
- * If it's IPv4 and not a logical interface, then we need to preserve
- * backward-compatibility by avoiding new-fangled DUID/IAID
- * construction.
+ * If it's IPv4, not in an IPMP group, and not a logical interface,
+ * then we need to preserve backward-compatibility by avoiding
+ * new-fangled DUID/IAID construction. (Note: even for IPMP test
+ * addresses, we construct a DUID/IAID since we may renew a lease for
+ * an IPMP test address on any functioning IP interface in the group.)
*/
- if (!pif->pif_isv6 && strchr(dsmp->dsm_name, ':') == NULL) {
+ if (!pif->pif_isv6 && pif->pif_grifname[0] == '\0' &&
+ strchr(dsmp->dsm_name, ':') == NULL) {
if (pif->pif_hwtype == ARPHRD_IB) {
/*
* This comes from the DHCP over IPoIB specification.
diff --git a/usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c b/usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c
index 47e1202b32..d73722cc55 100644
--- a/usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c
+++ b/usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -8,8 +8,6 @@
* specifies the terms and conditions for redistribution.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Ifparse splits up an ifconfig command line, and was written for use
* with the networking boot scripts; see $SRC/cmd/svc/shell/net_include.sh
@@ -184,6 +182,7 @@ struct cmd {
{ "auto-revarp", 0, AF_INET, PARSEFIXED},
{ "plumb", 0, AF_ANY, PARSENOW },
{ "unplumb", 0, AF_ANY, PARSENOW },
+ { "ipmp", 0, AF_ANY, PARSELOG0 },
{ "subnet", NEXTARG, AF_ANY, 0 },
{ "token", NEXTARG, AF_INET6, PARSELOG0 },
{ "tsrc", NEXTARG, AF_ANY, PARSELOG0 },
diff --git a/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c b/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c
index b9a02b54e7..2d115e221b 100644
--- a/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c
+++ b/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -29,8 +29,6 @@
* MROUTING Revision 3.5
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* simple netstat based on snmp/mib-2 interface to the TCP/IP stack
*
@@ -221,6 +219,7 @@ static char *plural(int n);
static char *pluraly(int n);
static char *plurales(int n);
static void process_filter(char *arg);
+static char *ifindex2str(uint_t, char *);
static boolean_t family_selected(int family);
static void usage(char *);
@@ -680,8 +679,14 @@ mibget(int sd)
tor->OPT_offset = sizeof (struct T_optmgmt_req);
tor->OPT_length = sizeof (struct opthdr);
tor->MGMT_flags = T_CURRENT;
+
+
+ /*
+ * Note: we use the special level value below so that IP will return
+ * us information concerning IRE_MARK_TESTHIDDEN routes.
+ */
req = (struct opthdr *)&tor[1];
- req->level = MIB2_IP; /* any MIB2_xxx value ok here */
+ req->level = EXPER_IP_AND_TESTHIDDEN;
req->name = 0;
req->len = 0;
@@ -712,7 +717,7 @@ mibget(int sd)
stderr);
i = 0;
for (last_item = first_item; last_item;
- last_item = last_item->next_item)
+ last_item = last_item->next_item)
(void) printf("%d %4d %5d %d\n",
++i,
last_item->group,
@@ -1707,19 +1712,19 @@ mib_get_constants(mib_item_t *item)
ipRouteAttributeSize = ip->ipRouteAttributeSize;
transportMLPSize = ip->transportMLPSize;
assert(IS_P2ALIGNED(ipAddrEntrySize,
- sizeof (mib2_ipAddrEntry_t *)) &&
- IS_P2ALIGNED(ipRouteEntrySize,
- sizeof (mib2_ipRouteEntry_t *)) &&
- IS_P2ALIGNED(ipNetToMediaEntrySize,
- sizeof (mib2_ipNetToMediaEntry_t *)) &&
- IS_P2ALIGNED(ipMemberEntrySize,
- sizeof (ip_member_t *)) &&
- IS_P2ALIGNED(ipGroupSourceEntrySize,
- sizeof (ip_grpsrc_t *)) &&
- IS_P2ALIGNED(ipRouteAttributeSize,
- sizeof (mib2_ipAttributeEntry_t *)) &&
- IS_P2ALIGNED(transportMLPSize,
- sizeof (mib2_transportMLPEntry_t *)));
+ sizeof (mib2_ipAddrEntry_t *)));
+ assert(IS_P2ALIGNED(ipRouteEntrySize,
+ sizeof (mib2_ipRouteEntry_t *)));
+ assert(IS_P2ALIGNED(ipNetToMediaEntrySize,
+ sizeof (mib2_ipNetToMediaEntry_t *)));
+ assert(IS_P2ALIGNED(ipMemberEntrySize,
+ sizeof (ip_member_t *)));
+ assert(IS_P2ALIGNED(ipGroupSourceEntrySize,
+ sizeof (ip_grpsrc_t *)));
+ assert(IS_P2ALIGNED(ipRouteAttributeSize,
+ sizeof (mib2_ipAttributeEntry_t *)));
+ assert(IS_P2ALIGNED(transportMLPSize,
+ sizeof (mib2_transportMLPEntry_t *)));
break;
}
case EXPER_DVMRP: {
@@ -1728,8 +1733,9 @@ mib_get_constants(mib_item_t *item)
vifctlSize = mrts->mrts_vifctlSize;
mfcctlSize = mrts->mrts_mfcctlSize;
assert(IS_P2ALIGNED(vifctlSize,
- sizeof (struct vifclt *)) &&
- IS_P2ALIGNED(mfcctlSize, sizeof (struct mfcctl *)));
+ sizeof (struct vifclt *)));
+ assert(IS_P2ALIGNED(mfcctlSize,
+ sizeof (struct mfcctl *)));
break;
}
case MIB2_IP6: {
@@ -1745,17 +1751,17 @@ mib_get_constants(mib_item_t *item)
ipv6GroupSourceEntrySize =
ip6->ipv6GroupSourceEntrySize;
assert(IS_P2ALIGNED(ipv6IfStatsEntrySize,
- sizeof (mib2_ipv6IfStatsEntry_t *)) &&
- IS_P2ALIGNED(ipv6AddrEntrySize,
- sizeof (mib2_ipv6AddrEntry_t *)) &&
- IS_P2ALIGNED(ipv6RouteEntrySize,
- sizeof (mib2_ipv6RouteEntry_t *)) &&
- IS_P2ALIGNED(ipv6NetToMediaEntrySize,
- sizeof (mib2_ipv6NetToMediaEntry_t *)) &&
- IS_P2ALIGNED(ipv6MemberEntrySize,
- sizeof (ipv6_member_t *)) &&
- IS_P2ALIGNED(ipv6GroupSourceEntrySize,
- sizeof (ipv6_grpsrc_t *)));
+ sizeof (mib2_ipv6IfStatsEntry_t *)));
+ assert(IS_P2ALIGNED(ipv6AddrEntrySize,
+ sizeof (mib2_ipv6AddrEntry_t *)));
+ assert(IS_P2ALIGNED(ipv6RouteEntrySize,
+ sizeof (mib2_ipv6RouteEntry_t *)));
+ assert(IS_P2ALIGNED(ipv6NetToMediaEntrySize,
+ sizeof (mib2_ipv6NetToMediaEntry_t *)));
+ assert(IS_P2ALIGNED(ipv6MemberEntrySize,
+ sizeof (ipv6_member_t *)));
+ assert(IS_P2ALIGNED(ipv6GroupSourceEntrySize,
+ sizeof (ipv6_grpsrc_t *)));
break;
}
case MIB2_ICMP6: {
@@ -1774,9 +1780,9 @@ mib_get_constants(mib_item_t *item)
tcpConnEntrySize = tcp->tcpConnTableSize;
tcp6ConnEntrySize = tcp->tcp6ConnTableSize;
assert(IS_P2ALIGNED(tcpConnEntrySize,
- sizeof (mib2_tcpConnEntry_t *)) &&
- IS_P2ALIGNED(tcp6ConnEntrySize,
- sizeof (mib2_tcp6ConnEntry_t *)));
+ sizeof (mib2_tcpConnEntry_t *)));
+ assert(IS_P2ALIGNED(tcp6ConnEntrySize,
+ sizeof (mib2_tcp6ConnEntry_t *)));
break;
}
case MIB2_UDP: {
@@ -1785,9 +1791,9 @@ mib_get_constants(mib_item_t *item)
udpEntrySize = udp->udpEntrySize;
udp6EntrySize = udp->udp6EntrySize;
assert(IS_P2ALIGNED(udpEntrySize,
- sizeof (mib2_udpEntry_t *)) &&
- IS_P2ALIGNED(udp6EntrySize,
- sizeof (mib2_udp6Entry_t *)));
+ sizeof (mib2_udpEntry_t *)));
+ assert(IS_P2ALIGNED(udp6EntrySize,
+ sizeof (mib2_udp6Entry_t *)));
break;
}
case MIB2_SCTP: {
@@ -1843,7 +1849,6 @@ stat_report(mib_item_t *item)
{
int jtemp = 0;
char ifname[LIFNAMSIZ + 1];
- char *ifnamep;
/* 'for' loop 1: */
for (; item; item = item->next_item) {
@@ -1891,12 +1896,10 @@ stat_report(mib_item_t *item)
bzero(&sum6, sizeof (sum6));
/* 'for' loop 2a: */
for (ip6 = (mib2_ipv6IfStatsEntry_t *)item->valp;
- (char *)ip6 < (char *)item->valp
- + item->length;
+ (char *)ip6 < (char *)item->valp + item->length;
/* LINTED: (note 1) */
ip6 = (mib2_ipv6IfStatsEntry_t *)((char *)ip6 +
ipv6IfStatsEntrySize)) {
-
if (ip6->ipv6IfIndex == 0) {
/*
* The "unknown interface" ip6
@@ -1905,19 +1908,10 @@ stat_report(mib_item_t *item)
sum_ip6_stats(ip6, &sum6);
continue; /* 'for' loop 2a */
}
- ifnamep = if_indextoname(
- ip6->ipv6IfIndex,
- ifname);
- if (ifnamep == NULL) {
- (void) printf(
- "Invalid ifindex %d\n",
- ip6->ipv6IfIndex);
- continue; /* 'for' loop 2a */
- }
-
if (Aflag) {
(void) printf("\nIPv6 for %s\n",
- ifnamep);
+ ifindex2str(ip6->ipv6IfIndex,
+ ifname));
print_ip6_stats(ip6);
}
sum_ip6_stats(ip6, &sum6);
@@ -1935,15 +1929,10 @@ stat_report(mib_item_t *item)
break;
bzero(&sum6, sizeof (sum6));
/* 'for' loop 2b: */
- for (icmp6 =
- (mib2_ipv6IfIcmpEntry_t *)item->valp;
- (char *)icmp6 < (char *)item->valp
- + item->length;
- icmp6 =
- /* LINTED: (note 1) */
- (mib2_ipv6IfIcmpEntry_t *)((char *)icmp6
- + ipv6IfIcmpEntrySize)) {
-
+ for (icmp6 = (mib2_ipv6IfIcmpEntry_t *)item->valp;
+ (char *)icmp6 < (char *)item->valp + item->length;
+ icmp6 = (void *)((char *)icmp6 +
+ ipv6IfIcmpEntrySize)) {
if (icmp6->ipv6IfIcmpIfIndex == 0) {
/*
* The "unknown interface" icmp6
@@ -1952,19 +1941,10 @@ stat_report(mib_item_t *item)
sum_icmp6_stats(icmp6, &sum6);
continue; /* 'for' loop 2b: */
}
- ifnamep = if_indextoname(
- icmp6->ipv6IfIcmpIfIndex, ifname);
- if (ifnamep == NULL) {
- (void) printf(
- "Invalid ifindex %d\n",
- icmp6->ipv6IfIcmpIfIndex);
- continue; /* 'for' loop 2b: */
- }
-
if (Aflag) {
- (void) printf(
- "\nICMPv6 for %s\n",
- ifnamep);
+ (void) printf("\nICMPv6 for %s\n",
+ ifindex2str(
+ icmp6->ipv6IfIcmpIfIndex, ifname));
print_icmp6_stats(icmp6);
}
sum_icmp6_stats(icmp6, &sum6);
@@ -2369,51 +2349,49 @@ print_mrt_stats(struct mrtstat *mrts)
{
(void) puts("DVMRP multicast routing:");
(void) printf(" %10u hit%s - kernel forwarding cache hits\n",
- mrts->mrts_mfc_hits, PLURAL(mrts->mrts_mfc_hits));
+ mrts->mrts_mfc_hits, PLURAL(mrts->mrts_mfc_hits));
(void) printf(" %10u miss%s - kernel forwarding cache misses\n",
- mrts->mrts_mfc_misses, PLURALES(mrts->mrts_mfc_misses));
+ mrts->mrts_mfc_misses, PLURALES(mrts->mrts_mfc_misses));
(void) printf(" %10u packet%s potentially forwarded\n",
- mrts->mrts_fwd_in, PLURAL(mrts->mrts_fwd_in));
+ mrts->mrts_fwd_in, PLURAL(mrts->mrts_fwd_in));
(void) printf(" %10u packet%s actually sent out\n",
- mrts->mrts_fwd_out, PLURAL(mrts->mrts_fwd_out));
+ mrts->mrts_fwd_out, PLURAL(mrts->mrts_fwd_out));
(void) printf(" %10u upcall%s - upcalls made to mrouted\n",
- mrts->mrts_upcalls, PLURAL(mrts->mrts_upcalls));
+ mrts->mrts_upcalls, PLURAL(mrts->mrts_upcalls));
(void) printf(" %10u packet%s not sent out due to lack of resources\n",
- mrts->mrts_fwd_drop, PLURAL(mrts->mrts_fwd_drop));
+ mrts->mrts_fwd_drop, PLURAL(mrts->mrts_fwd_drop));
(void) printf(" %10u datagram%s with malformed tunnel options\n",
- mrts->mrts_bad_tunnel, PLURAL(mrts->mrts_bad_tunnel));
+ mrts->mrts_bad_tunnel, PLURAL(mrts->mrts_bad_tunnel));
(void) printf(" %10u datagram%s with no room for tunnel options\n",
- mrts->mrts_cant_tunnel, PLURAL(mrts->mrts_cant_tunnel));
+ mrts->mrts_cant_tunnel, PLURAL(mrts->mrts_cant_tunnel));
(void) printf(" %10u datagram%s arrived on wrong interface\n",
- mrts->mrts_wrong_if, PLURAL(mrts->mrts_wrong_if));
+ mrts->mrts_wrong_if, PLURAL(mrts->mrts_wrong_if));
(void) printf(" %10u datagram%s dropped due to upcall Q overflow\n",
- mrts->mrts_upq_ovflw, PLURAL(mrts->mrts_upq_ovflw));
+ mrts->mrts_upq_ovflw, PLURAL(mrts->mrts_upq_ovflw));
(void) printf(" %10u datagram%s cleaned up by the cache\n",
- mrts->mrts_cache_cleanups, PLURAL(mrts->mrts_cache_cleanups));
+ mrts->mrts_cache_cleanups, PLURAL(mrts->mrts_cache_cleanups));
(void) printf(" %10u datagram%s dropped selectively by ratelimiter\n",
- mrts->mrts_drop_sel, PLURAL(mrts->mrts_drop_sel));
+ mrts->mrts_drop_sel, PLURAL(mrts->mrts_drop_sel));
(void) printf(" %10u datagram%s dropped - bucket Q overflow\n",
- mrts->mrts_q_overflow, PLURAL(mrts->mrts_q_overflow));
+ mrts->mrts_q_overflow, PLURAL(mrts->mrts_q_overflow));
(void) printf(" %10u datagram%s dropped - larger than bkt size\n",
- mrts->mrts_pkt2large, PLURAL(mrts->mrts_pkt2large));
+ mrts->mrts_pkt2large, PLURAL(mrts->mrts_pkt2large));
(void) printf("\nPIM multicast routing:\n");
(void) printf(" %10u datagram%s dropped - bad version number\n",
- mrts->mrts_pim_badversion, PLURAL(mrts->mrts_pim_badversion));
+ mrts->mrts_pim_badversion, PLURAL(mrts->mrts_pim_badversion));
(void) printf(" %10u datagram%s dropped - bad checksum\n",
- mrts->mrts_pim_rcv_badcsum, PLURAL(mrts->mrts_pim_rcv_badcsum));
+ mrts->mrts_pim_rcv_badcsum, PLURAL(mrts->mrts_pim_rcv_badcsum));
(void) printf(" %10u datagram%s dropped - bad register packets\n",
- mrts->mrts_pim_badregisters,
- PLURAL(mrts->mrts_pim_badregisters));
+ mrts->mrts_pim_badregisters, PLURAL(mrts->mrts_pim_badregisters));
(void) printf(
- " %10u datagram%s potentially forwarded - register packets\n",
- mrts->mrts_pim_regforwards, PLURAL(mrts->mrts_pim_regforwards));
+ " %10u datagram%s potentially forwarded - register packets\n",
+ mrts->mrts_pim_regforwards, PLURAL(mrts->mrts_pim_regforwards));
(void) printf(" %10u datagram%s dropped - register send drops\n",
- mrts->mrts_pim_regsend_drops,
- PLURAL(mrts->mrts_pim_regsend_drops));
+ mrts->mrts_pim_regsend_drops, PLURAL(mrts->mrts_pim_regsend_drops));
(void) printf(" %10u datagram%s dropped - packet malformed\n",
- mrts->mrts_pim_malformed, PLURAL(mrts->mrts_pim_malformed));
+ mrts->mrts_pim_malformed, PLURAL(mrts->mrts_pim_malformed));
(void) printf(" %10u datagram%s dropped - no memory to forward\n",
- mrts->mrts_pim_nomemory, PLURAL(mrts->mrts_pim_nomemory));
+ mrts->mrts_pim_nomemory, PLURAL(mrts->mrts_pim_nomemory));
}
static void
@@ -2674,7 +2652,7 @@ if_report(mib_item_t *item, char *matchname,
"Ierrs", "Opkts", "Oerrs",
"Collis", "Queue");
- first = B_FALSE;
+ first = B_FALSE;
}
if_report_ip4(ap, ifname,
logintname, &stat, B_TRUE);
@@ -2717,7 +2695,7 @@ if_report(mib_item_t *item, char *matchname,
+ item->length;
ap++) {
(void) octetstr(&ap->ipAdEntIfIndex,
- 'a', ifname, sizeof (ifname));
+ 'a', ifname, sizeof (ifname));
(void) strtok(ifname, ":");
if (matchname) {
@@ -3387,7 +3365,7 @@ dhcp_walk_interfaces(uint_t flags_on, uint_t flags_off, int af,
*/
(void) memset(&lifn, 0, sizeof (lifn));
lifn.lifn_family = af;
- lifn.lifn_flags = LIFC_ALLZONES | LIFC_NOXMIT;
+ lifn.lifn_flags = LIFC_ALLZONES | LIFC_NOXMIT | LIFC_UNDER_IPMP;
if (ioctl(sock_fd, SIOCGLIFNUM, &lifn) == -1)
n_ifs = LIFN_GUARD_VALUE;
else
@@ -3471,7 +3449,6 @@ group_report(mib_item_t *item)
ip_grpsrc_t *ips;
ipv6_member_t *ipmp6;
ipv6_grpsrc_t *ips6;
- char *ifnamep;
boolean_t first, first_src;
/* 'for' loop 1: */
@@ -3604,7 +3581,7 @@ group_report(mib_item_t *item)
(char *)ipmp6 < (char *)v6grp->valp + v6grp->length;
/* LINTED: (note 1) */
ipmp6 = (ipv6_member_t *)((char *)ipmp6 +
- ipv6MemberEntrySize)) {
+ ipv6MemberEntrySize)) {
if (first) {
(void) puts("Group Memberships: "
"IPv6");
@@ -3615,15 +3592,8 @@ group_report(mib_item_t *item)
first = B_FALSE;
}
- ifnamep = if_indextoname(
- ipmp6->ipv6GroupMemberIfIndex, ifname);
- if (ifnamep == NULL) {
- (void) printf("Invalid ifindex %d\n",
- ipmp6->ipv6GroupMemberIfIndex);
- continue;
- }
(void) printf("%-5s %-27s %5u\n",
- ifnamep,
+ ifindex2str(ipmp6->ipv6GroupMemberIfIndex, ifname),
pr_addr6(&ipmp6->ipv6GroupMemberAddress,
abuf, sizeof (abuf)),
ipmp6->ipv6GroupMemberRefCnt);
@@ -3784,7 +3754,6 @@ ndp_report(mib_item_t *item)
char xbuf[STR_EXPAND * OCTET_LENGTH + 1];
mib2_ipv6NetToMediaEntry_t *np6;
char ifname[LIFNAMSIZ + 1];
- char *ifnamep;
boolean_t first;
if (!(family_selected(AF_INET6)))
@@ -3820,13 +3789,6 @@ ndp_report(mib_item_t *item)
first = B_FALSE;
}
- ifnamep = if_indextoname(np6->ipv6NetToMediaIfIndex,
- ifname);
- if (ifnamep == NULL) {
- (void) printf("Invalid ifindex %d\n",
- np6->ipv6NetToMediaIfIndex);
- continue; /* 'for' loop 2 */
- }
switch (np6->ipv6NetToMediaState) {
case ND_INCOMPLETE:
state = "INCOMPLETE";
@@ -3865,7 +3827,7 @@ ndp_report(mib_item_t *item)
break;
}
(void) printf("%-5s %-17s %-7s %-12s %-27s\n",
- ifnamep,
+ ifindex2str(np6->ipv6NetToMediaIfIndex, ifname),
octetstr(&np6->ipv6NetToMediaPhysAddress, 'h',
xbuf, sizeof (xbuf)),
type,
@@ -4472,7 +4434,7 @@ ire_report_item_v6(const mib2_ipv6RouteEntry_t *rp6, boolean_t first,
(void) printf("%-27s %-27s %-5s %5u%c %5u %3u "
"%-5s %6u %6u %s\n",
pr_prefix6(&rp6->ipv6RouteDest,
- rp6->ipv6RoutePfxLength, dstbuf, sizeof (dstbuf)),
+ rp6->ipv6RoutePfxLength, dstbuf, sizeof (dstbuf)),
IN6_IS_ADDR_UNSPECIFIED(&rp6->ipv6RouteNextHop) ?
" --" :
pr_addr6(&rp6->ipv6RouteNextHop, gwbuf, sizeof (gwbuf)),
@@ -4489,7 +4451,7 @@ ire_report_item_v6(const mib2_ipv6RouteEntry_t *rp6, boolean_t first,
} else {
(void) printf("%-27s %-27s %-5s %3u %7u %-5s %s\n",
pr_prefix6(&rp6->ipv6RouteDest,
- rp6->ipv6RoutePfxLength, dstbuf, sizeof (dstbuf)),
+ rp6->ipv6RoutePfxLength, dstbuf, sizeof (dstbuf)),
IN6_IS_ADDR_UNSPECIFIED(&rp6->ipv6RouteNextHop) ?
" --" :
pr_addr6(&rp6->ipv6RouteNextHop, gwbuf, sizeof (gwbuf)),
@@ -4690,9 +4652,9 @@ tcp_report_item_v4(const mib2_tcpConnEntry_t *tp, boolean_t first,
(void) printf("%-20s\n%-20s %5u %08x %08x %5u %08x %08x "
"%5u %5u %s\n",
pr_ap(tp->tcpConnLocalAddress,
- tp->tcpConnLocalPort, "tcp", lname, sizeof (lname)),
+ tp->tcpConnLocalPort, "tcp", lname, sizeof (lname)),
pr_ap(tp->tcpConnRemAddress,
- tp->tcpConnRemPort, "tcp", fname, sizeof (fname)),
+ tp->tcpConnRemPort, "tcp", fname, sizeof (fname)),
tp->tcpConnEntryInfo.ce_swnd,
tp->tcpConnEntryInfo.ce_snxt,
tp->tcpConnEntryInfo.ce_suna,
@@ -4710,9 +4672,9 @@ tcp_report_item_v4(const mib2_tcpConnEntry_t *tp, boolean_t first,
(void) printf("%-20s %-20s %5u %6d %5u %6d %s\n",
pr_ap(tp->tcpConnLocalAddress,
- tp->tcpConnLocalPort, "tcp", lname, sizeof (lname)),
+ tp->tcpConnLocalPort, "tcp", lname, sizeof (lname)),
pr_ap(tp->tcpConnRemAddress,
- tp->tcpConnRemPort, "tcp", fname, sizeof (fname)),
+ tp->tcpConnRemPort, "tcp", fname, sizeof (fname)),
tp->tcpConnEntryInfo.ce_swnd,
(sq >= 0) ? sq : 0,
tp->tcpConnEntryInfo.ce_rwnd,
@@ -4756,9 +4718,9 @@ tcp_report_item_v6(const mib2_tcp6ConnEntry_t *tp6, boolean_t first,
(void) printf("%-33s\n%-33s %5u %08x %08x %5u %08x %08x "
"%5u %5u %-11s %s\n",
pr_ap6(&tp6->tcp6ConnLocalAddress,
- tp6->tcp6ConnLocalPort, "tcp", lname, sizeof (lname)),
+ tp6->tcp6ConnLocalPort, "tcp", lname, sizeof (lname)),
pr_ap6(&tp6->tcp6ConnRemAddress,
- tp6->tcp6ConnRemPort, "tcp", fname, sizeof (fname)),
+ tp6->tcp6ConnRemPort, "tcp", fname, sizeof (fname)),
tp6->tcp6ConnEntryInfo.ce_swnd,
tp6->tcp6ConnEntryInfo.ce_snxt,
tp6->tcp6ConnEntryInfo.ce_suna,
@@ -4777,9 +4739,9 @@ tcp_report_item_v6(const mib2_tcp6ConnEntry_t *tp6, boolean_t first,
(void) printf("%-33s %-33s %5u %6d %5u %6d %-11s %s\n",
pr_ap6(&tp6->tcp6ConnLocalAddress,
- tp6->tcp6ConnLocalPort, "tcp", lname, sizeof (lname)),
+ tp6->tcp6ConnLocalPort, "tcp", lname, sizeof (lname)),
pr_ap6(&tp6->tcp6ConnRemAddress,
- tp6->tcp6ConnRemPort, "tcp", fname, sizeof (fname)),
+ tp6->tcp6ConnRemPort, "tcp", fname, sizeof (fname)),
tp6->tcp6ConnEntryInfo.ce_swnd,
(sq >= 0) ? sq : 0,
tp6->tcp6ConnEntryInfo.ce_rwnd,
@@ -5112,7 +5074,7 @@ sctp_pr_addr(int type, char *name, int namelen, const in6_addr_t *addr,
* displaying.
*/
switch (type) {
- case MIB2_SCTP_ADDR_V4:
+ case MIB2_SCTP_ADDR_V4:
/* v4 */
v6addr = *addr;
@@ -5124,7 +5086,7 @@ sctp_pr_addr(int type, char *name, int namelen, const in6_addr_t *addr,
}
break;
- case MIB2_SCTP_ADDR_V6:
+ case MIB2_SCTP_ADDR_V6:
/* v6 */
if (port > 0) {
(void) pr_ap6(addr, port, "sctp", name, namelen);
@@ -5133,7 +5095,7 @@ sctp_pr_addr(int type, char *name, int namelen, const in6_addr_t *addr,
}
break;
- default:
+ default:
(void) snprintf(name, namelen, "<unknown addr type>");
break;
}
@@ -5379,7 +5341,7 @@ mrt_report(mib_item_t *item)
case EXPER_DVMRP_MRT:
if (Dflag)
(void) printf("%u records for ipMfcTable:\n",
- item->length/sizeof (struct vifctl));
+ item->length/sizeof (struct vifctl));
if (item->length/sizeof (struct vifctl) == 0) {
(void) puts("\nMulticast Forwarding Cache is "
"empty");
@@ -5402,10 +5364,10 @@ mrt_report(mib_item_t *item)
abuf, sizeof (abuf)));
(void) printf("%-15.15s %6s %3u ",
pr_net(mfccp->mfcc_mcastgrp.s_addr,
- mfccp->mfcc_mcastgrp.s_addr,
- abuf, sizeof (abuf)),
+ mfccp->mfcc_mcastgrp.s_addr,
+ abuf, sizeof (abuf)),
pktscale((int)mfccp->mfcc_pkt_cnt),
- mfccp->mfcc_parent);
+ mfccp->mfcc_parent);
for (vifi = 0; vifi < MAXVIFS; ++vifi) {
if (mfccp->mfcc_ttls[vifi]) {
@@ -5468,7 +5430,7 @@ kmem_cache_stats(char *title, char *name, int prefix, int64_t *total_bytes)
strncmp(ksp->ks_name, "streams_dblk", 12) == 0) {
(void) safe_kstat_read(kc, ksp, NULL);
total_buf_inuse -=
- kstat_named_value(ksp, "buf_constructed");
+ kstat_named_value(ksp, "buf_constructed");
continue; /* 'for' loop 1 */
}
@@ -5501,7 +5463,7 @@ kmem_cache_stats(char *title, char *name, int prefix, int64_t *total_bytes)
if (buf_size == 0) {
(void) printf("%-22s [couldn't find statistics for %s]\n",
- title, name);
+ title, name);
return;
}
@@ -5511,7 +5473,7 @@ kmem_cache_stats(char *title, char *name, int prefix, int64_t *total_bytes)
(void) snprintf(buf, sizeof (buf), "%s", title);
(void) printf("%-22s %6d %9d %11lld %11d\n", buf,
- total_buf_inuse, total_buf_max, total_alloc, total_alloc_fail);
+ total_buf_inuse, total_buf_max, total_alloc, total_alloc_fail);
}
static void
@@ -5534,7 +5496,7 @@ m_report(void)
kmem_cache_stats("qband", "qband_cache", 0, &total_bytes);
(void) printf("\n%lld Kbytes allocated for streams data\n",
- total_bytes / 1024);
+ total_bytes / 1024);
(void) putchar('\n');
(void) fflush(stdout);
@@ -5967,7 +5929,7 @@ portname(uint_t port, char *proto, char *dst, uint_t dstlen)
sp = getservbyport(htons(port), proto);
if (sp || port == 0)
(void) snprintf(dst, dstlen, "%.*s", MAXHOSTNAMELEN,
- sp ? sp->s_name : "*");
+ sp ? sp->s_name : "*");
else
(void) snprintf(dst, dstlen, "%d", port);
dst[dstlen - 1] = 0;
@@ -6161,8 +6123,8 @@ process_filter(char *arg)
*/
if (hp->h_addr_list[0] != NULL &&
/* LINTED: (note 1) */
- IN6_IS_ADDR_V4MAPPED((in6_addr_t
- *)hp->h_addr_list[0])) {
+ IN6_IS_ADDR_V4MAPPED((in6_addr_t *)
+ hp->h_addr_list[0])) {
maxv = IP_ABITS;
} else {
maxv = IPV6_ABITS;
@@ -6226,6 +6188,21 @@ family_selected(int family)
}
/*
+ * Convert the interface index to a string using the buffer `ifname', which
+ * must be at least LIFNAMSIZ bytes. We first try to map it to name. If that
+ * fails (e.g., because we're inside a zone and it does not have access to
+ * interface for the index in question), just return "if#<num>".
+ */
+static char *
+ifindex2str(uint_t ifindex, char *ifname)
+{
+ if (if_indextoname(ifindex, ifname) == NULL)
+ (void) snprintf(ifname, LIFNAMSIZ, "if#%d", ifindex);
+
+ return (ifname);
+}
+
+/*
* print the usage line
*/
static void
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile
index f0c4c03250..f3ce9fae4b 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile
@@ -19,51 +19,58 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
-
-PROG = in.mpathd
-OBJS = mpd_tables.o mpd_main.o mpd_probe.o
-SRCS = $(OBJS:%.o=%.c)
-DEFAULTFILES = mpathd.dfl
+PROG = in.mpathd
+ROOTFS_PROG = $(PROG)
+OBJS = mpd_tables.o mpd_main.o mpd_probe.o
+SRCS = $(OBJS:%.o=%.c)
+DEFAULTFILES = mpathd.dfl
include ../../../Makefile.cmd
-POFILE = $(PROG).po
-POFILES = $(SRCS:%.c=%.po)
+ROOTCMDDIR = $(ROOT)/lib/inet
+
+POFILE = $(PROG).po
+POFILES = $(SRCS:%.c=%.po)
-C99MODE= $(C99_ENABLE)
+C99MODE = $(C99_ENABLE)
#
# We need access to the ancillary data features which are only available
# via the SUS standards. Further, C99 support requires SUSv3 or higher.
#
CPPFLAGS += -D_XOPEN_SOURCE=600 -D__EXTENSIONS__
-LDLIBS += -lsocket -lnsl -lsysevent -lnvpair -lipmp -lc
+LDLIBS += -lsocket -lnsl -lsysevent -lnvpair -lipmp -linetutil -ldlpi
+LINTFLAGS += -erroff=E_INCONS_ARG_DECL2 -erroff=E_INCONS_ARG_USED2
-LINTFLAGS += -erroff=E_FUNC_DECL_VAR_ARG2 -erroff=E_INCONS_VAL_TYPE_DECL2 \
- -erroff=E_FUNC_USED_VAR_ARG2 -erroff=E_INCONS_ARG_DECL2 \
- -erroff=E_NAME_USED_NOT_DEF2 -erroff=E_INCONS_ARG_USED2 \
- -errtags=yes
+#
+# Instrument in.mpathd with CTF data to ease debugging.
+#
+CTFCONVERT_HOOK = && $(CTFCONVERT_O)
+CTFMERGE_HOOK = && $(CTFMERGE) -L VERSION -o $@ $(OBJS)
+$(OBJS) := CFLAGS += $(CTF_FLAGS)
.KEEP_STATE:
all: $(PROG)
$(PROG): $(OBJS)
- $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS) $(CTFMERGE_HOOK)
$(POST_PROCESS)
include ../Makefile.lib
+$(ROOTLIBINETPROG):
+ $(RM) $@; $(SYMLINK) ../../../lib/inet/$(PROG) $@
+
$(ROOTSBINPROG):
- $(RM) $@; $(SYMLINK) ../usr/lib/inet/$(PROG) $@
+ $(RM) $@; $(SYMLINK) ../lib/inet/$(PROG) $@
-install: all $(ROOTLIBINETPROG) $(ROOTSBINPROG) $(ROOTETCDEFAULTFILES)
+install: all $(ROOTLIBINETPROG) $(ROOTSBINPROG) $(ROOTCMD) \
+ $(ROOTETCDEFAULTFILES)
clean:
$(RM) $(OBJS)
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h
index 9b07e2a7a3..e7cb096bf7 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _MPD_DEFS_H
#define _MPD_DEFS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -78,12 +76,13 @@ extern "C" {
#include <locale.h>
#include <deflt.h>
+#include <libdlpi.h>
+#include <libinetutil.h>
#include <libnvpair.h>
#include <libsysevent.h>
#include <sys/sysevent.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/ipmp.h>
-#include <zone.h>
#include <ipmp_mpathd.h>
#include <ipmp_query_impl.h>
@@ -92,7 +91,7 @@ extern "C" {
/* Debug flags */
#define D_ALL 0xffff /* enable all debug */
#define D_PROBE 0x0001 /* probe mechanism */
-#define D_FAILOVER 0x0002 /* failover mechanism */
+#define D_FAILREP 0x0002 /* failure/repair mechanism */
#define D_PHYINT 0x0004 /* phyint table */
#define D_LOGINT 0x0008 /* logint table */
#define D_TARGET 0x0010 /* target table */
@@ -199,10 +198,8 @@ extern int user_failure_detection_time; /* User specified fdt */
extern int ifsock_v4; /* IPv4 socket for ioctls */
extern int ifsock_v6; /* IPv6 socket for ioctls */
-extern boolean_t full_scan_required; /* Do full scans */
-
extern int debug; /* debug option */
-
+extern boolean_t cleanup_started; /* true if we're shutting down */
extern boolean_t handle_link_notifications;
/*
@@ -212,6 +209,7 @@ extern void timer_schedule(uint_t delay);
extern void logmsg(int pri, const char *fmt, ...);
extern void logperror(const char *str);
extern int poll_add(int fd);
+extern int poll_remove(int fd);
extern uint64_t getcurrentsec(void);
extern uint_t getcurrenttime(void);
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c
index aa6a99fb9c..e1e22e12d4 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "mpd_defs.h"
#include "mpd_tables.h"
@@ -46,7 +44,6 @@ static int lsock_v6; /* Listen socket to detect mpathd */
static int mibfd = -1; /* fd to get mib info */
static boolean_t force_mcast = _B_FALSE; /* Only for test purposes */
-boolean_t full_scan_required = _B_FALSE;
static uint_t last_initifs_time; /* Time when initifs was last run */
static char **argv0; /* Saved for re-exec on SIGHUP */
boolean_t handle_link_notifications = _B_TRUE;
@@ -58,10 +55,6 @@ static void check_if_removed(struct phyint_instance *pii);
static void select_test_ifs(void);
static void ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len);
static void ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len);
-static void router_add_v4(mib2_ipRouteEntry_t *rp1,
- struct in_addr nexthop_v4);
-static void router_add_v6(mib2_ipv6RouteEntry_t *rp1,
- struct in6_addr nexthop_v6);
static void router_add_common(int af, char *ifname,
struct in6_addr nexthop);
static void init_router_targets();
@@ -74,17 +67,17 @@ static void check_addr_unique(struct phyint_instance *,
static void init_host_targets(void);
static void dup_host_targets(struct phyint_instance *desired_pii);
static void loopback_cmd(int sock, int family);
-static int poll_remove(int fd);
static boolean_t daemonize(void);
static int closefunc(void *, int);
static unsigned int process_cmd(int newfd, union mi_commands *mpi);
static unsigned int process_query(int fd, mi_query_t *miq);
+static unsigned int send_addrinfo(int fd, ipmp_addrinfo_t *adinfop);
static unsigned int send_groupinfo(int fd, ipmp_groupinfo_t *grinfop);
static unsigned int send_grouplist(int fd, ipmp_grouplist_t *grlistp);
static unsigned int send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop);
static unsigned int send_result(int fd, unsigned int error, int syserror);
-struct local_addr *laddr_list = NULL;
+addrlist_t *localaddrs;
/*
* Return the current time in milliseconds (from an arbitrary reference)
@@ -153,7 +146,7 @@ retry:
/*
* Remove fd from the set being polled. Returns 0 if ok; -1 if failed.
*/
-static int
+int
poll_remove(int fd)
{
int i;
@@ -205,17 +198,11 @@ pii_process(int af, char *name, struct phyint_instance **pii_p)
break;
case PI_GROUP_CHANGED:
- /*
- * The phyint has changed group.
- */
- restore_phyint(pii->pii_phyint);
- /* FALLTHRU */
-
case PI_IFINDEX_CHANGED:
/*
- * Interface index has changed. Delete and
- * recreate the phyint as it is quite likely
- * the interface has been unplumbed and replumbed.
+ * Interface index or group membership has changed.
+ * Delete the old state and recreate based on the new
+ * state (it may no longer be in a group).
*/
pii_other = phyint_inst_other(pii);
if (pii_other != NULL)
@@ -249,51 +236,26 @@ pii_process(int af, char *name, struct phyint_instance **pii_p)
}
/*
- * This phyint is leaving the group. Try to restore the phyint to its
- * initial state. Return the addresses that belong to other group members,
- * to the group, and take back any addresses owned by this phyint
- */
-void
-restore_phyint(struct phyint *pi)
-{
- if (pi->pi_group == phyint_anongroup)
- return;
-
- /*
- * Move everthing to some other member in the group.
- * The phyint has changed group in the kernel. But we
- * have yet to do it in our tables.
- */
- if (!pi->pi_empty)
- (void) try_failover(pi, FAILOVER_TO_ANY);
- /*
- * Move all addresses owned by 'pi' back to pi, from each
- * of the other members of the group
- */
- (void) try_failback(pi);
-}
-
-/*
* Scan all interfaces to detect changes as well as new and deleted interfaces
*/
static void
initifs()
{
- int n;
+ int i, nlifr;
int af;
char *cp;
char *buf;
- int numifs;
+ int sockfd;
+ uint64_t flags;
struct lifnum lifn;
struct lifconf lifc;
+ struct lifreq lifreq;
struct lifreq *lifr;
struct logint *li;
struct phyint_instance *pii;
struct phyint_instance *next_pii;
- char pi_name[LIFNAMSIZ + 1];
- boolean_t exists;
- struct phyint *pi;
- struct local_addr *next;
+ struct phyint_group *pg, *next_pg;
+ char pi_name[LIFNAMSIZ + 1];
if (debug & D_PHYINT)
logdebug("initifs: Scanning interfaces\n");
@@ -301,13 +263,9 @@ initifs()
last_initifs_time = getcurrenttime();
/*
- * Free the laddr_list before collecting the local addresses.
+ * Free the existing local address list; we'll build a new list below.
*/
- while (laddr_list != NULL) {
- next = laddr_list->next;
- free(laddr_list);
- laddr_list = next;
- }
+ addrlist_free(&localaddrs);
/*
* Mark the interfaces so that we can find phyints and logints
@@ -326,122 +284,142 @@ initifs()
}
}
+ /*
+ * As above, mark groups so that we can detect IPMP interfaces which
+ * have been removed from the kernel. Also, delete the group address
+ * list since we'll iteratively recreate it below.
+ */
+ for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
+ pg->pg_in_use = _B_FALSE;
+ addrlist_free(&pg->pg_addrs);
+ }
+
lifn.lifn_family = AF_UNSPEC;
- lifn.lifn_flags = LIFC_ALLZONES;
+ lifn.lifn_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
+again:
if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
- logperror("initifs: ioctl (get interface numbers)");
+ logperror("initifs: ioctl (get interface count)");
return;
}
- numifs = lifn.lifn_count;
+ /*
+ * Pad the interface count to detect when additional interfaces have
+ * been configured between SIOCGLIFNUM and SIOCGLIFCONF.
+ */
+ lifn.lifn_count += 4;
- buf = (char *)calloc(numifs, sizeof (struct lifreq));
- if (buf == NULL) {
+ if ((buf = calloc(lifn.lifn_count, sizeof (struct lifreq))) == NULL) {
logperror("initifs: calloc");
return;
}
lifc.lifc_family = AF_UNSPEC;
- lifc.lifc_flags = LIFC_ALLZONES;
- lifc.lifc_len = numifs * sizeof (struct lifreq);
+ lifc.lifc_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
+ lifc.lifc_len = lifn.lifn_count * sizeof (struct lifreq);
lifc.lifc_buf = buf;
if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
- /*
- * EINVAL is commonly encountered, when things change
- * underneath us rapidly, (eg. at boot, when new interfaces
- * are plumbed successively) and the kernel finds the buffer
- * size we passed as too small. We will retry again
- * when we see the next routing socket msg, or at worst after
- * IF_SCAN_INTERVAL ms.
- */
- if (errno != EINVAL) {
- logperror("initifs: ioctl"
- " (get interface configuration)");
- }
+ logperror("initifs: ioctl (get interface configuration)");
free(buf);
return;
}
- lifr = (struct lifreq *)lifc.lifc_req;
-
/*
- * For each lifreq returned by SIOGGLIFCONF, call pii_process()
- * and get the state of the corresponding phyint_instance. If it is
- * successful, then call logint_init_from_k() to get the state of the
- * logint.
+ * If every lifr_req slot is taken, then additional interfaces must
+ * have been plumbed between the SIOCGLIFNUM and the SIOCGLIFCONF.
+ * Recalculate to make sure we didn't miss any interfaces.
*/
- for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifr++) {
- int sockfd;
- struct local_addr *taddr;
- struct sockaddr_in *sin;
- struct sockaddr_in6 *sin6;
- struct lifreq lifreq;
+ nlifr = lifc.lifc_len / sizeof (struct lifreq);
+ if (nlifr >= lifn.lifn_count) {
+ free(buf);
+ goto again;
+ }
+ /*
+ * Walk through the lifreqs returned by SIOGGLIFCONF, and refresh the
+ * global list of addresses, phyint groups, phyints, and logints.
+ */
+ for (lifr = lifc.lifc_req, i = 0; i < nlifr; i++, lifr++) {
af = lifr->lifr_addr.ss_family;
-
- /*
- * Collect all local addresses.
- */
sockfd = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
- (void) memset(&lifreq, 0, sizeof (lifreq));
- (void) strlcpy(lifreq.lifr_name, lifr->lifr_name,
- sizeof (lifreq.lifr_name));
+ (void) strlcpy(lifreq.lifr_name, lifr->lifr_name, LIFNAMSIZ);
if (ioctl(sockfd, SIOCGLIFFLAGS, &lifreq) == -1) {
if (errno != ENXIO)
logperror("initifs: ioctl (SIOCGLIFFLAGS)");
continue;
}
+ flags = lifreq.lifr_flags;
+
+ /*
+ * If the address is IFF_UP, add it to the local address list.
+ * (We ignore addresses that aren't IFF_UP since another node
+ * might legitimately have that address IFF_UP.)
+ */
+ if (flags & IFF_UP) {
+ (void) addrlist_add(&localaddrs, lifr->lifr_name, flags,
+ &lifr->lifr_addr);
+ }
/*
- * Add the interface address to laddr_list.
- * Another node might have the same IP address which is up.
- * In that case, it is appropriate to use the address as a
- * target, even though it is also configured (but not up) on
- * the local system.
- * Hence,the interface address is not added to laddr_list
- * unless it is IFF_UP.
+ * If this address is on an IPMP meta-interface, update our
+ * phyint_group information (either by recording that group
+ * still exists or creating a new group), and track what
+ * group the address is part of.
*/
- if (lifreq.lifr_flags & IFF_UP) {
- taddr = malloc(sizeof (struct local_addr));
- if (taddr == NULL) {
- logperror("initifs: malloc");
+ if (flags & IFF_IPMP) {
+ if (ioctl(sockfd, SIOCGLIFGROUPNAME, &lifreq) == -1) {
+ if (errno != ENXIO)
+ logperror("initifs: ioctl "
+ "(SIOCGLIFGROUPNAME)");
continue;
}
- if (af == AF_INET) {
- sin = (struct sockaddr_in *)&lifr->lifr_addr;
- IN6_INADDR_TO_V4MAPPED(&sin->sin_addr,
- &taddr->addr);
- } else {
- sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
- taddr->addr = sin6->sin6_addr;
+
+ pg = phyint_group_lookup(lifreq.lifr_groupname);
+ if (pg == NULL) {
+ pg = phyint_group_create(lifreq.lifr_groupname);
+ if (pg == NULL) {
+ logerr("initifs: cannot create group "
+ "%s\n", lifreq.lifr_groupname);
+ continue;
+ }
+ phyint_group_insert(pg);
+ }
+ pg->pg_in_use = _B_TRUE;
+
+ /*
+ * Add this to the group's list of data addresses.
+ */
+ if (!addrlist_add(&pg->pg_addrs, lifr->lifr_name, flags,
+ &lifr->lifr_addr)) {
+ logerr("initifs: insufficient memory to track "
+ "data address information for %s\n",
+ lifr->lifr_name);
}
- taddr->next = laddr_list;
- laddr_list = taddr;
+ continue;
}
/*
- * Need to pass a phyint name to pii_process. Insert the
- * null where the ':' IF_SEPARATOR is found in the logical
- * name.
+ * This isn't an address on an IPMP meta-interface, so it's
+ * either on an underlying interface or not related to any
+ * group. Update our phyint and logint information (via
+ * pii_process() and logint_init_from_k()) -- but first,
+ * convert the logint name to a phyint name so we can call
+ * pii_process().
*/
(void) strlcpy(pi_name, lifr->lifr_name, sizeof (pi_name));
if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
*cp = '\0';
- exists = pii_process(af, pi_name, &pii);
- if (exists) {
+ if (pii_process(af, pi_name, &pii)) {
/* The phyint is fine. So process the logint */
logint_init_from_k(pii, lifr->lifr_name);
check_addr_unique(pii, &lifr->lifr_addr);
}
-
}
-
free(buf);
/*
- * Scan for phyints and logints that have disappeared from the
+ * Scan for groups, phyints and logints that have disappeared from the
* kernel, and delete them.
*/
for (pii = phyint_instances; pii != NULL; pii = next_pii) {
@@ -449,70 +427,31 @@ initifs()
check_if_removed(pii);
}
+ for (pg = phyint_groups; pg != NULL; pg = next_pg) {
+ next_pg = pg->pg_next;
+ if (!pg->pg_in_use) {
+ phyint_group_delete(pg);
+ continue;
+ }
+ /*
+ * Refresh the group's state. This is necessary since the
+ * group's state is defined by the set of usable interfaces in
+ * the group, and an interface is considered unusable if all
+ * of its addresses are down. When an address goes down/up,
+ * the RTM_DELADDR/RTM_NEWADDR brings us through here.
+ */
+ phyint_group_refresh_state(pg);
+ }
+
/*
* Select a test address for sending probes on each phyint instance
*/
select_test_ifs();
/*
- * Handle link up/down notifications from the NICs.
+ * Handle link up/down notifications.
*/
process_link_state_changes();
-
- for (pi = phyints; pi != NULL; pi = pi->pi_next) {
- /*
- * If this is a case of group failure, we don't have much
- * to do until the group recovers again.
- */
- if (GROUP_FAILED(pi->pi_group))
- continue;
-
- /*
- * Try/Retry any pending failovers / failbacks, that did not
- * not complete, or that could not be initiated previously.
- * This implements the 3 invariants described in the big block
- * comment at the beginning of probe.c
- */
- if (pi->pi_flags & IFF_INACTIVE) {
- if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY))
- (void) try_failover(pi, FAILOVER_TO_NONSTANDBY);
- } else {
- struct phyint_instance *pii;
-
- /*
- * Skip LINK UP interfaces which are not capable
- * of probing.
- */
- pii = pi->pi_v4;
- if (pii == NULL ||
- (LINK_UP(pi) && !PROBE_CAPABLE(pii))) {
- pii = pi->pi_v6;
- if (pii == NULL ||
- (LINK_UP(pi) && !PROBE_CAPABLE(pii)))
- continue;
- }
-
- /*
- * It is possible that the phyint has started
- * receiving packets, after it has been marked
- * PI_FAILED. Don't initiate failover, if the
- * phyint has started recovering. failure_state()
- * captures this check. A similar logic is used
- * for failback/repair case.
- */
- if (pi->pi_state == PI_FAILED && !pi->pi_empty &&
- (failure_state(pii) == PHYINT_FAILURE)) {
- (void) try_failover(pi, FAILOVER_NORMAL);
- } else if (pi->pi_state == PI_RUNNING && !pi->pi_full) {
- if (try_failback(pi) != IPMP_FAILURE) {
- (void) change_lif_flags(pi, IFF_FAILED,
- _B_FALSE);
- /* Per state diagram */
- pi->pi_empty = 0;
- }
- }
- }
- }
}
/*
@@ -569,7 +508,7 @@ check_addr_unique(struct phyint_instance *ourpii, struct sockaddr_storage *ss)
* The probe socket is closed on each interface instance, and the
* interface state set to PI_OFFLINE.
*/
-static void
+void
stop_probing(struct phyint *pi)
{
struct phyint_instance *pii;
@@ -631,7 +570,6 @@ select_test_ifs(void)
struct logint *li;
struct logint *probe_logint;
boolean_t target_scan_reqd = _B_FALSE;
- struct target *tg;
int rating;
if (debug & D_PHYINT)
@@ -645,8 +583,8 @@ select_test_ifs(void)
probe_logint = NULL;
/*
- * An interface that is offline, should not be probed.
- * Offline interfaces should always in PI_OFFLINE state,
+ * An interface that is offline should not be probed.
+ * IFF_OFFLINE interfaces should always be PI_OFFLINE
* unless some other entity has set the offline flag.
*/
if (pii->pii_phyint->pi_flags & IFF_OFFLINE) {
@@ -659,6 +597,15 @@ select_test_ifs(void)
stop_probing(pii->pii_phyint);
}
continue;
+ } else {
+ /*
+ * If something cleared IFF_OFFLINE (e.g., by accident
+ * because the SIOCGLIFFLAGS/SIOCSLIFFLAGS sequence is
+ * inherently racy), the phyint may still be offline.
+ * Just ignore it.
+ */
+ if (pii->pii_phyint->pi_state == PI_OFFLINE)
+ continue;
}
li = pii->pii_probe_logint;
@@ -776,17 +723,6 @@ select_test_ifs(void)
phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
}
- if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) {
- tg = pii->pii_targets;
- if (tg != NULL)
- target_delete(tg);
- assert(pii->pii_targets == NULL);
- assert(pii->pii_target_next == NULL);
- assert(pii->pii_ntargets == 0);
- target_create(pii, probe_logint->li_dstaddr,
- _B_TRUE);
- }
-
/*
* If no targets are currently known for this phyint
* we need to call init_router_targets. Since
@@ -806,15 +742,16 @@ select_test_ifs(void)
}
/*
- * Check the interface list for any interfaces that are marked
- * PI_FAILED but no longer enabled to send probes, and call
- * phyint_check_for_repair() to see if the link now indicates that the
- * interface should be repaired. Also see the state diagram in
+ * Scan the interface list for any interfaces that are PI_FAILED or
+ * PI_NOTARGETS but no longer enabled to send probes, and call
+ * phyint_check_for_repair() to see if the link state indicates that
+ * the interface should be repaired. Also see the state diagram in
* mpd_probe.c.
*/
for (pi = phyints; pi != NULL; pi = pi->pi_next) {
- if (pi->pi_state == PI_FAILED &&
- !PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) {
+ if ((!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) &&
+ (pi->pi_state == PI_FAILED ||
+ pi->pi_state == PI_NOTARGETS)) {
phyint_check_for_repair(pi);
}
}
@@ -875,15 +812,14 @@ check_testconfig(void)
pi->pi_v6->pii_probe_logint->li_dupaddr)
li = pi->pi_v6->pii_probe_logint;
- if (li != NULL) {
- if (!pi->pi_duptaddrmsg_printed) {
- (void) pr_addr(li->li_phyint_inst->pii_af,
- li->li_addr, abuf, sizeof (abuf));
- logerr("Test address %s is not unique in "
- "group; disabling probe-based failure "
- "detection on %s\n", abuf, pi->pi_name);
- pi->pi_duptaddrmsg_printed = 1;
- }
+ if (li != NULL && li->li_dupaddr) {
+ if (pi->pi_duptaddrmsg_printed)
+ continue;
+ logerr("Test address %s is not unique in group; "
+ "disabling probe-based failure detection on %s\n",
+ pr_addr(li->li_phyint_inst->pii_af,
+ li->li_addr, abuf, sizeof (abuf)), pi->pi_name);
+ pi->pi_duptaddrmsg_printed = 1;
continue;
}
@@ -915,10 +851,10 @@ check_config(void)
boolean_t v6_in_group;
/*
- * All phyints of a group must be homogenous to ensure that
- * failover or failback can be done. If any phyint in a group
- * has IPv4 plumbed, check that all phyints have IPv4 plumbed.
- * Do a similar check for IPv6.
+ * All phyints of a group must be homogeneous to ensure that they can
+ * take over for one another. If any phyint in a group has IPv4
+ * plumbed, check that all phyints have IPv4 plumbed. Do a similar
+ * check for IPv6.
*/
for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
if (pg == phyint_anongroup)
@@ -949,9 +885,9 @@ check_config(void)
if (v4_in_group == _B_TRUE && pi->pi_v4 == NULL) {
if (!pi->pi_cfgmsg_printed) {
- logerr("NIC %s of group %s is"
- " not plumbed for IPv4 and may"
- " affect failover capability\n",
+ logerr("IP interface %s in group %s is"
+ " not plumbed for IPv4, affecting"
+ " IPv4 connectivity\n",
pi->pi_name,
pi->pi_group->pg_name);
pi->pi_cfgmsg_printed = 1;
@@ -959,9 +895,9 @@ check_config(void)
} else if (v6_in_group == _B_TRUE &&
pi->pi_v6 == NULL) {
if (!pi->pi_cfgmsg_printed) {
- logerr("NIC %s of group %s is"
- " not plumbed for IPv6 and may"
- " affect failover capability\n",
+ logerr("IP interface %s in group %s is"
+ " not plumbed for IPv6, affecting"
+ " IPv6 connectivity\n",
pi->pi_name,
pi->pi_group->pg_name);
pi->pi_cfgmsg_printed = 1;
@@ -974,10 +910,10 @@ check_config(void)
* error recovery message
*/
if (pi->pi_cfgmsg_printed) {
- logerr("NIC %s is now consistent with "
- "group %s and failover capability "
- "is restored\n", pi->pi_name,
- pi->pi_group->pg_name);
+ logerr("IP interface %s is now"
+ " consistent with group %s "
+ " and connectivity is restored\n",
+ pi->pi_name, pi->pi_group->pg_name);
pi->pi_cfgmsg_printed = 0;
}
}
@@ -1117,8 +1053,8 @@ run_timeouts(void)
static int eventpipe_read = -1; /* Used for synchronous signal delivery */
static int eventpipe_write = -1;
-static boolean_t cleanup_started = _B_FALSE;
- /* Don't write to eventpipe if in cleanup */
+boolean_t cleanup_started = _B_FALSE; /* true if we're going away */
+
/*
* Ensure that signals are processed synchronously with the rest of
* the code by just writing a one character signal number on the pipe.
@@ -1228,7 +1164,7 @@ in_signal(int fd)
"Number of probes sent %lld\n"
"Number of probe acks received %lld\n"
"Number of probes/acks lost %lld\n"
- "Number of valid unacknowled probes %lld\n"
+ "Number of valid unacknowledged probes %lld\n"
"Number of ambiguous probe acks received %lld\n",
AF_STR(pii->pii_af), pii->pii_name,
sent, acked, lost, unacked, unknown);
@@ -1321,12 +1257,20 @@ setup_rtsock(int af)
{
int s;
int flags;
+ int aware = RTAW_UNDER_IPMP;
s = socket(PF_ROUTE, SOCK_RAW, af);
if (s == -1) {
logperror("setup_rtsock: socket PF_ROUTE");
exit(1);
}
+
+ if (setsockopt(s, SOL_ROUTE, RT_AWARE, &aware, sizeof (aware)) == -1) {
+ logperror("setup_rtsock: setsockopt RT_AWARE");
+ (void) close(s);
+ exit(1);
+ }
+
if ((flags = fcntl(s, F_GETFL, 0)) < 0) {
logperror("setup_rtsock: fcntl F_GETFL");
(void) close(s);
@@ -1347,8 +1291,7 @@ setup_rtsock(int af)
/*
* Process an RTM_IFINFO message received on a routing socket.
* The return value indicates whether a full interface scan is required.
- * Link up/down notifications from the NICs are reflected in the
- * IFF_RUNNING flag.
+ * Link up/down notifications are reflected in the IFF_RUNNING flag.
* If just the state of the IFF_RUNNING interface flag has changed, a
* a full interface scan isn't required.
*/
@@ -1400,7 +1343,7 @@ process_rtm_ifinfo(if_msghdr_t *ifm, int type)
/*
* We want to try and avoid doing a full interface scan for
- * link state notifications from the NICs, as indicated
+ * link state notifications from the datalink layer, as indicated
* by the state of the IFF_RUNNING flag. If just the
* IFF_RUNNING flag has changed state, the link state changes
* are processed without a full scan.
@@ -1441,25 +1384,7 @@ process_rtm_ifinfo(if_msghdr_t *ifm, int type)
* types.
*/
if ((old_flags ^ pii->pii_flags) & IFF_STANDBY)
- phyint_newtype(pi);
-
- /*
- * If IFF_INACTIVE has been set, then no data addresses should be
- * hosted on the interface. If IFF_INACTIVE has been cleared, then
- * move previously failed-over addresses back to it, provided it is
- * not failed. For details, see the state diagram in mpd_probe.c.
- */
- if ((old_flags ^ pii->pii_flags) & IFF_INACTIVE) {
- if (pii->pii_flags & IFF_INACTIVE) {
- if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY))
- (void) try_failover(pi, FAILOVER_TO_NONSTANDBY);
- } else {
- if (pi->pi_state == PI_RUNNING && !pi->pi_full) {
- pi->pi_empty = 0;
- (void) try_failback(pi);
- }
- }
- }
+ phyint_changed(pi);
/* Has just the IFF_RUNNING flag changed state ? */
if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) {
@@ -1620,22 +1545,24 @@ update_router_list(int fd)
t_scalar_t prim;
tor = (struct T_optmgmt_req *)&buf;
-
tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
tor->OPT_offset = sizeof (struct T_optmgmt_req);
tor->OPT_length = sizeof (struct opthdr);
tor->MGMT_flags = T_CURRENT;
+ /*
+ * Note: we use the special level value below so that IP will return
+ * us information concerning IRE_MARK_TESTHIDDEN routes.
+ */
req = (struct opthdr *)&tor[1];
- req->level = MIB2_IP; /* any MIB2_xxx value ok here */
+ req->level = EXPER_IP_AND_TESTHIDDEN;
req->name = 0;
req->len = 0;
ctlbuf.buf = (char *)&buf;
ctlbuf.len = tor->OPT_length + tor->OPT_offset;
ctlbuf.maxlen = sizeof (buf);
- flags = 0;
- if (putmsg(fd, &ctlbuf, NULL, flags) == -1) {
+ if (putmsg(fd, &ctlbuf, NULL, 0) == -1) {
logperror("update_router_list: putmsg(ctl)");
return (_B_FALSE);
}
@@ -1689,7 +1616,8 @@ update_router_list(int fd)
case T_OPTMGMT_ACK:
toa = &buf.uprim.optmgmt_ack;
optp = (struct opthdr *)&toa[1];
- if (ctlbuf.len < sizeof (struct T_optmgmt_ack)) {
+ if (ctlbuf.len < (sizeof (struct T_optmgmt_ack) +
+ sizeof (struct opthdr))) {
logerr("update_router_list: ctlbuf.len %d\n",
ctlbuf.len);
return (_B_FALSE);
@@ -1707,7 +1635,7 @@ update_router_list(int fd)
return (_B_FALSE);
}
- /* Process the T_OPGMGMT_ACK below */
+ /* Process the T_OPTMGMT_ACK below */
assert(prim == T_OPTMGMT_ACK);
switch (status) {
@@ -1717,9 +1645,8 @@ update_router_list(int fd)
* message. If this is the last message i.e EOD,
* return, else process the next T_OPTMGMT_ACK msg.
*/
- if ((ctlbuf.len == sizeof (struct T_optmgmt_ack) +
- sizeof (struct opthdr)) && optp->len == 0 &&
- optp->name == 0 && optp->level == 0) {
+ if (optp->len == 0 && optp->name == 0 &&
+ optp->level == 0) {
/*
* This is the EOD message. Return
*/
@@ -1747,17 +1674,14 @@ update_router_list(int fd)
databuf.len = 0;
flags = 0;
for (;;) {
- status = getmsg(fd, NULL, &databuf, &flags);
- if (status >= 0) {
+ if (getmsg(fd, NULL, &databuf, &flags) >= 0)
break;
- } else if (errno == EINTR) {
+ if (errno == EINTR)
continue;
- } else {
- logperror("update_router_list:"
- " getmsg(data)");
- free(databuf.buf);
- return (_B_FALSE);
- }
+
+ logperror("update_router_list: getmsg(data)");
+ free(databuf.buf);
+ return (_B_FALSE);
}
if (optp->level == MIB2_IP &&
@@ -1777,18 +1701,35 @@ update_router_list(int fd)
/* NOTREACHED */
}
+
+/*
+ * Convert octet `octp' to a phyint name and store in `ifname'
+ */
+static void
+oct2ifname(const Octet_t *octp, char *ifname, size_t ifsize)
+{
+ char *cp;
+ size_t len = MIN(octp->o_length, ifsize - 1);
+
+ (void) strncpy(ifname, octp->o_bytes, len);
+ ifname[len] = '\0';
+
+ if ((cp = strchr(ifname, IF_SEPARATOR)) != NULL)
+ *cp = '\0';
+}
+
/*
- * Examine the IPv4 routing table, for default routers. For each default
- * router, populate the list of targets of each phyint that is on the same
- * link as the default router
+ * Examine the IPv4 routing table `buf' for possible targets. For each
+ * possible target, if it's on the same subnet an interface route, pass
+ * it to router_add_common() for further consideration.
*/
static void
ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len)
{
- mib2_ipRouteEntry_t *rp;
- mib2_ipRouteEntry_t *rp1;
- struct in_addr nexthop_v4;
- mib2_ipRouteEntry_t *endp;
+ char ifname[LIFNAMSIZ];
+ mib2_ipRouteEntry_t *rp, *rp1, *endp;
+ struct in_addr nexthop_v4;
+ struct in6_addr nexthop;
if (len == 0)
return;
@@ -1797,75 +1738,40 @@ ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len)
endp = buf + (len / sizeof (mib2_ipRouteEntry_t));
/*
- * Loop thru the routing table entries. Process any IRE_DEFAULT,
- * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others.
- * For each such IRE_OFFSUBNET ire, get the nexthop gateway address.
- * This is a potential target for probing, which we try to add
- * to the list of probe targets.
+ * Scan the routing table entries for any IRE_OFFSUBNET entries, and
+ * cross-reference them with the interface routes to determine if
+ * they're possible probe targets.
*/
for (rp = buf; rp < endp; rp++) {
if (!(rp->ipRouteInfo.re_ire_type & IRE_OFFSUBNET))
continue;
- /* Get the nexthop address. */
+ /* Get the nexthop address. */
nexthop_v4.s_addr = rp->ipRouteNextHop;
/*
- * Get the nexthop address. Then determine the outgoing
- * interface, by examining all interface IREs, and picking the
- * match. We don't look at the interface specified in the route
- * because we need to add the router target on all matching
- * interfaces anyway; the goal is to avoid falling back to
- * multicast when some interfaces are in the same subnet but
- * not in the same group.
+ * Rescan the routing table looking for interface routes that
+ * are on the same subnet, and try to add them. If they're
+ * not relevant (e.g., the interface route isn't part of an
+ * IPMP group, router_add_common() will discard).
*/
for (rp1 = buf; rp1 < endp; rp1++) {
- if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE)) {
+ if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE) ||
+ rp1->ipRouteIfIndex.o_length == 0)
continue;
- }
- /*
- * Determine the interface IRE that matches the nexthop.
- * i.e. (IRE addr & IRE mask) == (nexthop & IRE mask)
- */
- if ((rp1->ipRouteDest & rp1->ipRouteMask) ==
- (nexthop_v4.s_addr & rp1->ipRouteMask)) {
- /*
- * We found the interface ire
- */
- router_add_v4(rp1, nexthop_v4);
- }
+ if ((rp1->ipRouteDest & rp1->ipRouteMask) !=
+ (nexthop_v4.s_addr & rp1->ipRouteMask))
+ continue;
+
+ oct2ifname(&rp1->ipRouteIfIndex, ifname, LIFNAMSIZ);
+ IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop);
+ router_add_common(AF_INET, ifname, nexthop);
}
}
}
void
-router_add_v4(mib2_ipRouteEntry_t *rp1, struct in_addr nexthop_v4)
-{
- char *cp;
- char ifname[LIFNAMSIZ + 1];
- struct in6_addr nexthop;
- int len;
-
- if (debug & D_TARGET)
- logdebug("router_add_v4()\n");
-
- len = MIN(rp1->ipRouteIfIndex.o_length, sizeof (ifname) - 1);
- (void) memcpy(ifname, rp1->ipRouteIfIndex.o_bytes, len);
- ifname[len] = '\0';
-
- if (ifname[0] == '\0')
- return;
-
- cp = strchr(ifname, IF_SEPARATOR);
- if (cp != NULL)
- *cp = '\0';
-
- IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop);
- router_add_common(AF_INET, ifname, nexthop);
-}
-
-void
router_add_common(int af, char *ifname, struct in6_addr nexthop)
{
struct phyint_instance *pii;
@@ -1906,16 +1812,17 @@ router_add_common(int af, char *ifname, struct in6_addr nexthop)
}
/*
- * Examine the IPv6 routing table, for default routers. For each default
- * router, populate the list of targets of each phyint that is on the same
- * link as the default router
+ * Examine the IPv6 routing table `buf' for possible link-local targets, and
+ * pass any contenders to router_add_common() for further consideration.
*/
static void
ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len)
{
- mib2_ipv6RouteEntry_t *rp;
- mib2_ipv6RouteEntry_t *endp;
- struct in6_addr nexthop_v6;
+ struct lifreq lifr;
+ char ifname[LIFNAMSIZ];
+ char grname[LIFGRNAMSIZ];
+ mib2_ipv6RouteEntry_t *rp, *rp1, *endp;
+ struct in6_addr nexthop_v6;
if (debug & D_TARGET)
logdebug("ire_process_v6(len %d)\n", len);
@@ -1927,62 +1834,51 @@ ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len)
endp = buf + (len / sizeof (mib2_ipv6RouteEntry_t));
/*
- * Loop thru the routing table entries. Process any IRE_DEFAULT,
- * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others.
- * For each such IRE_OFFSUBNET ire, get the nexthop gateway address.
- * This is a potential target for probing, which we try to add
- * to the list of probe targets.
+ * Scan the routing table entries for any IRE_OFFSUBNET entries, and
+ * cross-reference them with the interface routes to determine if
+ * they're possible probe targets.
*/
for (rp = buf; rp < endp; rp++) {
- if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET))
+ if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET) ||
+ !IN6_IS_ADDR_LINKLOCAL(&rp->ipv6RouteNextHop))
continue;
- /*
- * We have the outgoing interface in ipv6RouteIfIndex
- * if ipv6RouteIfindex.o_length is non-zero. The outgoing
- * interface must be present for link-local addresses. Since
- * we use only link-local addreses for probing, we don't
- * consider the case when the outgoing interface is not
- * known and we need to scan interface ires
- */
+ /* Get the nexthop address. */
nexthop_v6 = rp->ipv6RouteNextHop;
- if (rp->ipv6RouteIfIndex.o_length != 0) {
- /*
- * We already have the outgoing interface
- * in ipv6RouteIfIndex.
- */
- router_add_v6(rp, nexthop_v6);
- }
- }
-}
-
-void
-router_add_v6(mib2_ipv6RouteEntry_t *rp1, struct in6_addr nexthop_v6)
-{
- char ifname[LIFNAMSIZ + 1];
- char *cp;
- int len;
-
- if (debug & D_TARGET)
- logdebug("router_add_v6()\n");
-
- len = MIN(rp1->ipv6RouteIfIndex.o_length, sizeof (ifname) - 1);
- (void) memcpy(ifname, rp1->ipv6RouteIfIndex.o_bytes, len);
- ifname[len] = '\0';
+ /*
+ * The interface name should always exist for link-locals;
+ * we use it to map this entry to an IPMP group name.
+ */
+ if (rp->ipv6RouteIfIndex.o_length == 0)
+ continue;
- if (ifname[0] == '\0')
- return;
+ oct2ifname(&rp->ipv6RouteIfIndex, lifr.lifr_name, LIFNAMSIZ);
+ if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) == -1 ||
+ strlcpy(grname, lifr.lifr_groupname, LIFGRNAMSIZ) == 0) {
+ continue;
+ }
- cp = strchr(ifname, IF_SEPARATOR);
- if (cp != NULL)
- *cp = '\0';
+ /*
+ * Rescan the list of routes for interface routes, and add the
+ * above target to any interfaces in the same IPMP group.
+ */
+ for (rp1 = buf; rp1 < endp; rp1++) {
+ if (!(rp1->ipv6RouteInfo.re_ire_type & IRE_INTERFACE) ||
+ rp1->ipv6RouteIfIndex.o_length == 0) {
+ continue;
+ }
+ oct2ifname(&rp1->ipv6RouteIfIndex, ifname, LIFNAMSIZ);
+ (void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
- router_add_common(AF_INET6, ifname, nexthop_v6);
+ if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) != -1 &&
+ strcmp(lifr.lifr_groupname, grname) == 0) {
+ router_add_common(AF_INET6, ifname, nexthop_v6);
+ }
+ }
+ }
}
-
-
/*
* Build a list of target routers, by scanning the routing tables.
* It is assumed that interface routes exist, to reach the routers.
@@ -2001,11 +1897,9 @@ init_router_targets(void)
for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
pi = pii->pii_phyint;
/*
- * Exclude ptp and host targets. Set tg_in_use to false,
- * only for router targets.
+ * Set tg_in_use to false only for router targets.
*/
- if (!pii->pii_targets_are_routers ||
- (pi->pi_flags & IFF_POINTOPOINT))
+ if (!pii->pii_targets_are_routers)
continue;
for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next)
@@ -2026,15 +1920,21 @@ init_router_targets(void)
}
for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
- if (!pii->pii_targets_are_routers ||
- (pi->pi_flags & IFF_POINTOPOINT))
+ pi = pii->pii_phyint;
+ if (!pii->pii_targets_are_routers)
continue;
for (tg = pii->pii_targets; tg != NULL; tg = next_tg) {
next_tg = tg->tg_next;
- if (!tg->tg_in_use) {
+ /*
+ * If the group has failed, it's likely the route was
+ * removed by an application affected by that failure.
+ * In that case, we keep the target so that we can
+ * reliably repair, at which point we'll refresh the
+ * target list again.
+ */
+ if (!tg->tg_in_use && !GROUP_FAILED(pi->pi_group))
target_delete(tg);
- }
}
}
}
@@ -2140,7 +2040,7 @@ getdefault(char *name)
* Command line options below
*/
boolean_t failback_enabled = _B_TRUE; /* failback enabled/disabled */
-boolean_t track_all_phyints = _B_FALSE; /* option to track all NICs */
+boolean_t track_all_phyints = _B_FALSE; /* track all IP interfaces */
static boolean_t adopt = _B_FALSE;
static boolean_t foreground = _B_FALSE;
@@ -2149,6 +2049,7 @@ main(int argc, char *argv[])
{
int i;
int c;
+ struct phyint *pi;
struct phyint_instance *pii;
char *value;
@@ -2173,14 +2074,15 @@ main(int argc, char *argv[])
if (user_failure_detection_time <= 0) {
user_failure_detection_time = FAILURE_DETECTION_TIME;
logerr("Invalid failure detection time %s, assuming "
- "default %d\n", value, user_failure_detection_time);
+ "default of %d ms\n", value,
+ user_failure_detection_time);
} else if (user_failure_detection_time <
MIN_FAILURE_DETECTION_TIME) {
user_failure_detection_time =
MIN_FAILURE_DETECTION_TIME;
logerr("Too small failure detection time of %s, "
- "assuming minimum %d\n", value,
+ "assuming minimum of %d ms\n", value,
user_failure_detection_time);
}
free(value);
@@ -2211,9 +2113,9 @@ main(int argc, char *argv[])
*/
value = getdefault("FAILBACK");
if (value != NULL) {
- if (strncasecmp(value, "yes", 3) == 0)
+ if (strcasecmp(value, "yes") == 0)
failback_enabled = _B_TRUE;
- else if (strncasecmp(value, "no", 2) == 0)
+ else if (strcasecmp(value, "no") == 0)
failback_enabled = _B_FALSE;
else
logerr("Invalid value for FAILBACK %s\n", value);
@@ -2229,9 +2131,9 @@ main(int argc, char *argv[])
*/
value = getdefault("TRACK_INTERFACES_ONLY_WITH_GROUPS");
if (value != NULL) {
- if (strncasecmp(value, "yes", 3) == 0)
+ if (strcasecmp(value, "yes") == 0)
track_all_phyints = _B_FALSE;
- else if (strncasecmp(value, "no", 2) == 0)
+ else if (strcasecmp(value, "no") == 0)
track_all_phyints = _B_TRUE;
else
logerr("Invalid value for "
@@ -2340,12 +2242,6 @@ main(int argc, char *argv[])
initifs();
- /* Inform kernel whether failback is enabled or disabled */
- if (ioctl(ifsock_v4, SIOCSIPMPFAILBACK, (int *)&failback_enabled) < 0) {
- logperror("main: ioctl (SIOCSIPMPFAILBACK)");
- exit(1);
- }
-
/*
* If we're operating in "adopt" mode and no interfaces need to be
* tracked, shut down (ifconfig(1M) will restart us on demand if
@@ -2379,6 +2275,7 @@ main(int argc, char *argv[])
process_rtsock(rtsock_v4, rtsock_v6);
break;
}
+
for (pii = phyint_instances; pii != NULL;
pii = pii->pii_next) {
if (pollfds[i].fd == pii->pii_probe_sock) {
@@ -2389,15 +2286,21 @@ main(int argc, char *argv[])
break;
}
}
+
+ for (pi = phyints; pi != NULL; pi = pi->pi_next) {
+ if (pi->pi_notes != 0 &&
+ pollfds[i].fd == dlpi_fd(pi->pi_dh)) {
+ (void) dlpi_recv(pi->pi_dh, NULL, NULL,
+ NULL, NULL, 0, NULL);
+ break;
+ }
+ }
+
if (pollfds[i].fd == lsock_v4)
loopback_cmd(lsock_v4, AF_INET);
else if (pollfds[i].fd == lsock_v6)
loopback_cmd(lsock_v6, AF_INET6);
}
- if (full_scan_required) {
- initifs();
- full_scan_required = _B_FALSE;
- }
}
/* NOTREACHED */
return (EXIT_SUCCESS);
@@ -2481,29 +2384,23 @@ static struct {
{ "MI_PING", sizeof (uint32_t) },
{ "MI_OFFLINE", sizeof (mi_offline_t) },
{ "MI_UNDO_OFFLINE", sizeof (mi_undo_offline_t) },
- { "MI_SETOINDEX", sizeof (mi_setoindex_t) },
{ "MI_QUERY", sizeof (mi_query_t) }
};
/*
- * Commands received over the loopback interface come here. Currently
- * the agents that send commands are ifconfig, if_mpadm and the RCM IPMP
- * module. ifconfig only makes a connection, and closes it to check if
- * in.mpathd is running.
- * if_mpadm sends commands in the format specified by the mpathd_interface
- * structure.
+ * Commands received over the loopback interface come here (via libipmp).
*/
static void
loopback_cmd(int sock, int family)
{
int newfd;
ssize_t len;
+ boolean_t is_priv = _B_FALSE;
struct sockaddr_storage peer;
struct sockaddr_in *peer_sin;
struct sockaddr_in6 *peer_sin6;
socklen_t peerlen;
union mi_commands mpi;
- struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
char abuf[INET6_ADDRSTRLEN];
uint_t cmd;
int retval;
@@ -2528,10 +2425,11 @@ loopback_cmd(int sock, int family)
return;
}
peer_sin = (struct sockaddr_in *)&peer;
- if ((ntohs(peer_sin->sin_port) >= IPPORT_RESERVED) ||
- (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK)) {
- (void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr,
- abuf, sizeof (abuf));
+ is_priv = ntohs(peer_sin->sin_port) < IPPORT_RESERVED;
+ (void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr,
+ abuf, sizeof (abuf));
+
+ if (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK) {
logerr("Attempt to connect from addr %s port %d\n",
abuf, ntohs(peer_sin->sin_port));
(void) close(newfd);
@@ -2551,11 +2449,10 @@ loopback_cmd(int sock, int family)
* talking to us.
*/
peer_sin6 = (struct sockaddr_in6 *)&peer;
- if ((ntohs(peer_sin6->sin6_port) >= IPPORT_RESERVED) ||
- (!IN6_ARE_ADDR_EQUAL(&peer_sin6->sin6_addr,
- &loopback_addr))) {
- (void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf,
- sizeof (abuf));
+ is_priv = ntohs(peer_sin6->sin6_port) < IPPORT_RESERVED;
+ (void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf,
+ sizeof (abuf));
+ if (!IN6_IS_ADDR_LOOPBACK(&peer_sin6->sin6_addr)) {
logerr("Attempt to connect from addr %s port %d\n",
abuf, ntohs(peer_sin6->sin6_port));
(void) close(newfd);
@@ -2575,15 +2472,6 @@ loopback_cmd(int sock, int family)
len = read(newfd, &mpi, sizeof (mpi));
/*
- * ifconfig does not send any data. Just tests to see if mpathd
- * is already running.
- */
- if (len <= 0) {
- (void) close(newfd);
- return;
- }
-
- /*
* In theory, we can receive any sized message for a stream socket,
* but we don't expect that to happen for a small message over a
* loopback connection.
@@ -2591,6 +2479,8 @@ loopback_cmd(int sock, int family)
if (len < sizeof (uint32_t)) {
logerr("loopback_cmd: bad command format or read returns "
"partial data %d\n", len);
+ (void) close(newfd);
+ return;
}
cmd = mpi.mi_command;
@@ -2600,6 +2490,16 @@ loopback_cmd(int sock, int family)
return;
}
+ /*
+ * Only MI_PING and MI_QUERY can come from unprivileged sources.
+ */
+ if (!is_priv && (cmd != MI_QUERY && cmd != MI_PING)) {
+ logerr("Unprivileged request from %s for privileged "
+ "command %s\n", abuf, commands[cmd].name);
+ (void) close(newfd);
+ return;
+ }
+
if (len < commands[cmd].size) {
logerr("loopback_cmd: short %s command (expected %d, got %d)\n",
commands[cmd].name, commands[cmd].size, len);
@@ -2615,179 +2515,46 @@ loopback_cmd(int sock, int family)
(void) close(newfd);
}
-extern int global_errno; /* set by failover() or failback() */
-
/*
- * Process the offline, undo offline and set original index commands,
- * received from if_mpadm(1M)
+ * Process the commands received via libipmp.
*/
static unsigned int
process_cmd(int newfd, union mi_commands *mpi)
{
- uint_t nif = 0;
- uint32_t cmd;
struct phyint *pi;
- struct phyint *pi2;
- struct phyint_group *pg;
- boolean_t success;
- int error;
struct mi_offline *mio;
struct mi_undo_offline *miu;
- struct lifreq lifr;
- int ifsock;
- struct mi_setoindex *mis;
+ unsigned int retval;
- cmd = mpi->mi_command;
+ switch (mpi->mi_command) {
+ case MI_PING:
+ return (send_result(newfd, IPMP_SUCCESS, 0));
- switch (cmd) {
case MI_OFFLINE:
mio = &mpi->mi_ocmd;
- /*
- * Lookup the interface that needs to be offlined.
- * If it does not exist, return a suitable error.
- */
+
pi = phyint_lookup(mio->mio_ifname);
if (pi == NULL)
- return (send_result(newfd, IPMP_FAILURE, EINVAL));
-
- /*
- * Verify that the minimum redundancy requirements are met.
- * The multipathing group must have at least the specified
- * number of functional interfaces after offlining the
- * requested interface. Otherwise return a suitable error.
- */
- pg = pi->pi_group;
- nif = 0;
- if (pg != phyint_anongroup) {
- for (nif = 0, pi2 = pg->pg_phyint; pi2 != NULL;
- pi2 = pi2->pi_pgnext) {
- if ((pi2->pi_state == PI_RUNNING) ||
- (pg->pg_groupfailed &&
- !(pi2->pi_flags & IFF_OFFLINE)))
- nif++;
- }
- }
- if (nif < mio->mio_min_redundancy)
- return (send_result(newfd, IPMP_EMINRED, 0));
+ return (send_result(newfd, IPMP_EUNKIF, 0));
- /*
- * The order of operation is to set IFF_OFFLINE, followed by
- * failover. Setting IFF_OFFLINE ensures that no new ipif's
- * can be created. Subsequent failover moves everything on
- * the OFFLINE interface to some other functional interface.
- */
- success = change_lif_flags(pi, IFF_OFFLINE, _B_TRUE);
- if (success) {
- if (!pi->pi_empty) {
- error = try_failover(pi, FAILOVER_NORMAL);
- if (error != 0) {
- if (!change_lif_flags(pi, IFF_OFFLINE,
- _B_FALSE)) {
- logerr("process_cmd: couldn't"
- " clear OFFLINE flag on"
- " %s\n", pi->pi_name);
- /*
- * Offline interfaces should
- * not be probed.
- */
- stop_probing(pi);
- }
- return (send_result(newfd, error,
- global_errno));
- }
- }
- } else {
+ retval = phyint_offline(pi, mio->mio_min_redundancy);
+ if (retval == IPMP_FAILURE)
return (send_result(newfd, IPMP_FAILURE, errno));
- }
- /*
- * The interface is now Offline, so stop probing it.
- * Note that if_mpadm(1M) will down the test addresses,
- * after receiving a success reply from us. The routing
- * socket message will then make us close the socket used
- * for sending probes. But it is more logical that an
- * offlined interface must not be probed, even if it has
- * test addresses.
- */
- stop_probing(pi);
- return (send_result(newfd, IPMP_SUCCESS, 0));
+ return (send_result(newfd, retval, 0));
case MI_UNDO_OFFLINE:
miu = &mpi->mi_ucmd;
- /*
- * Undo the offline command. As usual lookup the interface.
- * Send an error if it does not exist or is not offline.
- */
- pi = phyint_lookup(miu->miu_ifname);
- if (pi == NULL || pi->pi_state != PI_OFFLINE)
- return (send_result(newfd, IPMP_FAILURE, EINVAL));
-
- /*
- * Reset the state of the interface based on the current link
- * state; if this phyint subsequently acquires a test address,
- * the state will be updated later as a result of the probes.
- */
- if (LINK_UP(pi))
- phyint_chstate(pi, PI_RUNNING);
- else
- phyint_chstate(pi, PI_FAILED);
-
- if (pi->pi_state == PI_RUNNING) {
- /*
- * Note that the success of MI_UNDO_OFFLINE is not
- * contingent on actually failing back; in the odd
- * case where we cannot do it here, we will try again
- * in initifs() since pi->pi_full will still be zero.
- */
- if (do_failback(pi) != IPMP_SUCCESS) {
- logdebug("process_cmd: cannot failback from "
- "%s during MI_UNDO_OFFLINE\n", pi->pi_name);
- }
- }
-
- /*
- * Clear the IFF_OFFLINE flag. We have to do this last
- * because do_failback() relies on it being set to decide
- * when to display messages.
- */
- (void) change_lif_flags(pi, IFF_OFFLINE, _B_FALSE);
-
- /*
- * Give the requestor time to configure test addresses
- * before complaining that they're missing.
- */
- pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME;
-
- return (send_result(newfd, IPMP_SUCCESS, 0));
-
- case MI_SETOINDEX:
- mis = &mpi->mi_scmd;
- /* Get the socket for doing ioctls */
- ifsock = (mis->mis_iftype == AF_INET) ? ifsock_v4 : ifsock_v6;
-
- /*
- * Get index of new original interface.
- * The index is returned in lifr.lifr_index.
- */
- (void) strlcpy(lifr.lifr_name, mis->mis_new_pifname,
- sizeof (lifr.lifr_name));
+ pi = phyint_lookup(miu->miu_ifname);
+ if (pi == NULL)
+ return (send_result(newfd, IPMP_EUNKIF, 0));
- if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0)
+ retval = phyint_undo_offline(pi);
+ if (retval == IPMP_FAILURE)
return (send_result(newfd, IPMP_FAILURE, errno));
- /*
- * Set new original interface index.
- * The new index was put into lifr.lifr_index by the
- * SIOCGLIFINDEX ioctl.
- */
- (void) strlcpy(lifr.lifr_name, mis->mis_lifname,
- sizeof (lifr.lifr_name));
-
- if (ioctl(ifsock, SIOCSLIFOINDEX, (char *)&lifr) < 0)
- return (send_result(newfd, IPMP_FAILURE, errno));
-
- return (send_result(newfd, IPMP_SUCCESS, 0));
+ return (send_result(newfd, retval, 0));
case MI_QUERY:
return (process_query(newfd, &mpi->mi_qcmd));
@@ -2806,6 +2573,8 @@ process_cmd(int newfd, union mi_commands *mpi)
static unsigned int
process_query(int fd, mi_query_t *miq)
{
+ ipmp_addrinfo_t *adinfop;
+ ipmp_addrinfolist_t *adlp;
ipmp_groupinfo_t *grinfop;
ipmp_groupinfolist_t *grlp;
ipmp_grouplist_t *grlistp;
@@ -2815,6 +2584,19 @@ process_query(int fd, mi_query_t *miq)
unsigned int retval;
switch (miq->miq_inforeq) {
+ case IPMP_ADDRINFO:
+ retval = getgraddrinfo(miq->miq_grname, &miq->miq_addr,
+ &adinfop);
+ if (retval != IPMP_SUCCESS)
+ return (send_result(fd, retval, errno));
+
+ retval = send_result(fd, IPMP_SUCCESS, 0);
+ if (retval == IPMP_SUCCESS)
+ retval = send_addrinfo(fd, adinfop);
+
+ ipmp_freeaddrinfo(adinfop);
+ return (retval);
+
case IPMP_GROUPLIST:
retval = getgrouplist(&grlistp);
if (retval != IPMP_SUCCESS)
@@ -2829,7 +2611,7 @@ process_query(int fd, mi_query_t *miq)
case IPMP_GROUPINFO:
miq->miq_grname[LIFGRNAMSIZ - 1] = '\0';
- retval = getgroupinfo(miq->miq_ifname, &grinfop);
+ retval = getgroupinfo(miq->miq_grname, &grinfop);
if (retval != IPMP_SUCCESS)
return (send_result(fd, retval, errno));
@@ -2854,6 +2636,11 @@ process_query(int fd, mi_query_t *miq)
return (retval);
case IPMP_SNAP:
+ /*
+ * Before taking the snapshot, sync with the kernel.
+ */
+ initifs();
+
retval = getsnap(&snap);
if (retval != IPMP_SUCCESS)
return (send_result(fd, retval, errno));
@@ -2883,6 +2670,13 @@ process_query(int fd, mi_query_t *miq)
if (retval != IPMP_SUCCESS)
goto out;
}
+
+ adlp = snap->sn_adinfolistp;
+ for (; adlp != NULL; adlp = adlp->adl_next) {
+ retval = send_addrinfo(fd, adlp->adl_adinfop);
+ if (retval != IPMP_SUCCESS)
+ goto out;
+ }
out:
ipmp_snap_free(snap);
return (retval);
@@ -2902,14 +2696,20 @@ static unsigned int
send_groupinfo(int fd, ipmp_groupinfo_t *grinfop)
{
ipmp_iflist_t *iflistp = grinfop->gr_iflistp;
+ ipmp_addrlist_t *adlistp = grinfop->gr_adlistp;
unsigned int retval;
retval = ipmp_writetlv(fd, IPMP_GROUPINFO, sizeof (*grinfop), grinfop);
if (retval != IPMP_SUCCESS)
return (retval);
- return (ipmp_writetlv(fd, IPMP_IFLIST,
- IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp));
+ retval = ipmp_writetlv(fd, IPMP_IFLIST,
+ IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp);
+ if (retval != IPMP_SUCCESS)
+ return (retval);
+
+ return (ipmp_writetlv(fd, IPMP_ADDRLIST,
+ IPMP_ADDRLIST_SIZE(adlistp->al_naddr), adlistp));
}
/*
@@ -2919,7 +2719,31 @@ send_groupinfo(int fd, ipmp_groupinfo_t *grinfop)
static unsigned int
send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop)
{
- return (ipmp_writetlv(fd, IPMP_IFINFO, sizeof (*ifinfop), ifinfop));
+ ipmp_addrlist_t *adlist4p = ifinfop->if_targinfo4.it_targlistp;
+ ipmp_addrlist_t *adlist6p = ifinfop->if_targinfo6.it_targlistp;
+ unsigned int retval;
+
+ retval = ipmp_writetlv(fd, IPMP_IFINFO, sizeof (*ifinfop), ifinfop);
+ if (retval != IPMP_SUCCESS)
+ return (retval);
+
+ retval = ipmp_writetlv(fd, IPMP_ADDRLIST,
+ IPMP_ADDRLIST_SIZE(adlist4p->al_naddr), adlist4p);
+ if (retval != IPMP_SUCCESS)
+ return (retval);
+
+ return (ipmp_writetlv(fd, IPMP_ADDRLIST,
+ IPMP_ADDRLIST_SIZE(adlist6p->al_naddr), adlist6p));
+}
+
+/*
+ * Send the address information pointed to by `adinfop' on file descriptor
+ * `fd'. Returns an IPMP error code.
+ */
+static unsigned int
+send_addrinfo(int fd, ipmp_addrinfo_t *adinfop)
+{
+ return (ipmp_writetlv(fd, IPMP_ADDRINFO, sizeof (*adinfop), adinfop));
}
/*
@@ -3109,3 +2933,32 @@ close_probe_socket(struct phyint_instance *pii, boolean_t polled)
pii->pii_probe_sock = -1;
pii->pii_basetime_inited = 0;
}
+
+boolean_t
+addrlist_add(addrlist_t **addrsp, const char *name, uint64_t flags,
+ struct sockaddr_storage *ssp)
+{
+ addrlist_t *addrp;
+
+ if ((addrp = malloc(sizeof (addrlist_t))) == NULL)
+ return (_B_FALSE);
+
+ (void) strlcpy(addrp->al_name, name, LIFNAMSIZ);
+ addrp->al_flags = flags;
+ addrp->al_addr = *ssp;
+ addrp->al_next = *addrsp;
+ *addrsp = addrp;
+ return (_B_TRUE);
+}
+
+void
+addrlist_free(addrlist_t **addrsp)
+{
+ addrlist_t *addrp, *next_addrp;
+
+ for (addrp = *addrsp; addrp != NULL; addrp = next_addrp) {
+ next_addrp = addrp->al_next;
+ free(addrp);
+ }
+ *addrsp = NULL;
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c
index a2ff76a983..cf327fbaff 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -20,8 +20,6 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "mpd_defs.h"
#include "mpd_tables.h"
@@ -45,7 +43,7 @@ struct pr_icmp
uint16_t pr_icmp_cksum; /* checksum field */
uint16_t pr_icmp_id; /* Identification */
uint16_t pr_icmp_seq; /* sequence number */
- uint32_t pr_icmp_timestamp; /* Time stamp */
+ uint64_t pr_icmp_timestamp; /* Time stamp (in ns) */
uint32_t pr_icmp_mtype; /* Message type */
};
@@ -58,11 +56,12 @@ static struct in_addr all_nodes_mcast_v4 = { { { 0xe0, 0x0, 0x0, 0x1 } } };
static hrtime_t last_fdt_bumpup_time; /* When FDT was bumped up last */
-static void *find_ancillary(struct msghdr *msg, int cmsg_type);
-static void pi_set_crtt(struct target *tg, int m,
+static void *find_ancillary(struct msghdr *msg, int cmsg_level,
+ int cmsg_type);
+static void pi_set_crtt(struct target *tg, int64_t m,
boolean_t is_probe_uni);
static void incoming_echo_reply(struct phyint_instance *pii,
- struct pr_icmp *reply, struct in6_addr fromaddr);
+ struct pr_icmp *reply, struct in6_addr fromaddr, struct timeval *recv_tvp);
static void incoming_rtt_reply(struct phyint_instance *pii,
struct pr_icmp *reply, struct in6_addr fromaddr);
static void incoming_mcast_reply(struct phyint_instance *pii,
@@ -78,13 +77,11 @@ static void probe_success_info(struct phyint_instance *pii,
struct target *cur_tg, struct probe_success_count *psinfo);
static boolean_t phyint_repaired(struct phyint *pi);
-static int failover(struct phyint *from, struct phyint *to);
-static int failback(struct phyint *from, struct phyint *to);
-static struct phyint *get_failover_dst(struct phyint *pi, int failover_type);
-
static boolean_t highest_ack_tg(uint16_t seq, struct target *tg);
static int in_cksum(ushort_t *addr, int len);
static void reset_snxt_basetimes(void);
+static int ns2ms(int64_t ns);
+static int64_t tv2ns(struct timeval *);
/*
* CRTT - Conservative Round Trip Time Estimate
@@ -104,7 +101,7 @@ static void reset_snxt_basetimes(void);
* Phyint state diagram
*
* The state of a phyint that is capable of being probed, is completely
- * specified by the 5-tuple <pi_state, pg_groupfailed, I, pi_empty, pi_full>.
+ * specified by the 3-tuple <pi_state, pg_state, I>.
*
* A phyint starts in either PI_RUNNING or PI_FAILED, depending on the state
* of the link (according to the driver). If the phyint is also configured
@@ -117,8 +114,8 @@ static void reset_snxt_basetimes(void);
* state, which indicates that the link is apparently functional but that
* in.mpathd is unable to send probes to verify functionality (in this case,
* in.mpathd makes the optimistic assumption that the interface is working
- * correctly and thus does not perform a failover, but reports the interface
- * as IPMP_IF_UNKNOWN through the async events and query interfaces).
+ * correctly and thus does not mark the interface FAILED, but reports it as
+ * IPMP_IF_UNKNOWN through the async events and query interfaces).
*
* At any point, a phyint may be administratively marked offline via if_mpadm.
* In this case, the interface always transitions to PI_OFFLINE, regardless
@@ -131,8 +128,11 @@ static void reset_snxt_basetimes(void);
* PI_RUNNING: The failure detection logic says the phyint is good.
* PI_FAILED: The failure detection logic says the phyint has failed.
*
- * pg_groupfailed - Group failure, all interfaces in the group have failed.
- * The pi_state may be either PI_FAILED or PI_NOTARGETS.
+ * pg_state - PG_OK, PG_DEGRADED, or PG_FAILED.
+ * PG_OK: All interfaces in the group are OK.
+ * PG_DEGRADED: Some interfaces in the group are unusable.
+ * PG_FAILED: All interfaces in the group are unusable.
+ *
* In the case of router targets, we assume that the current list of
* targets obtained from the routing table, is still valid, so the
* phyint stat is PI_FAILED. In the case of host targets, we delete the
@@ -140,144 +140,46 @@ static void reset_snxt_basetimes(void);
* target list. So the phyints are in the PI_NOTARGETS state.
*
* I - value of (pi_flags & IFF_INACTIVE)
- * IFF_INACTIVE: No failovers have been done to this phyint, from
- * other phyints. This phyint is inactive. Phyint can be a Standby.
- * When failback has been disabled (FAILOVER=no configured),
- * phyint can also be a non-STANDBY. In this case IFF_INACTIVE
- * is set when phyint subsequently recovers after a failure.
- *
- * pi_empty
- * This phyint has failed over successfully to another phyint, and
- * this phyint is currently "empty". It does not host any addresses or
- * multicast membership etc. This is the state of a phyint after a
- * failover from the phyint has completed successfully and no subsequent
- * 'failover to' or 'failback to' has occurred on the phyint.
- * IP guarantees that no new logicals will be hosted nor any multicast
- * joins permitted on the phyint, since the phyint is either failed or
- * inactive. pi_empty is set implies the phyint is either failed or
- * inactive.
- *
- * pi_full
- * The phyint hosts all of its own addresses that it "owns". If the
- * phyint was previously failed or inactive, failbacks to the phyint
- * has completed successfully. i.e. No more failbacks to this phyint
- * can produce any change in system state whatsoever.
- *
- * Not all 32 possible combinations of the above 5-tuple are possible.
- * Furthermore some of the above combinations are transient. They may occur
- * only because the failover or failback did not complete successfully. The
- * failover/failback will be retried and eventually a stable state will be
- * reached.
- *
- * I is tracked by IP. pi_state, pi_empty and pi_full are tracked by mpathd.
- * The following are the state machines. 'from' and 'to' are the src and
- * dst of the failover/failback, below
- *
- * pi_empty state machine
- * ---------------------------------------------------------------------------
- * Event State -> New State
- * ---------------------------------------------------------------------------
- * successful completion from.pi_empty = 0 -> from.pi_empty = 1
- * of failover
+ * IFF_INACTIVE: This phyint will not send or receive packets.
+ * Usually, inactive is tied to standby interfaces that are not yet
+ * needed (e.g., no non-standby interfaces in the group have failed).
+ * When failback has been disabled (FAILBACK=no configured), phyint can
+ * also be a non-STANDBY. In this case IFF_INACTIVE is set when phyint
+ * subsequently recovers after a failure.
*
- * Initiate failover to.pi_empty = X -> to.pi_empty = 0
+ * Not all 9 possible combinations of the above 3-tuple are possible.
*
- * Initiate failback to.pi_empty = X -> to.pi_empty = 0
- *
- * group failure pi_empty = X -> pi_empty = 0
- * ---------------------------------------------------------------------------
- *
- * pi_full state machine
- * ---------------------------------------------------------------------------
- * Event State -> New State
- * ---------------------------------------------------------------------------
- * successful completion to.pi_full = 0 -> to.pi_full = 1
- * of failback from
- * each of the other phyints
- *
- * Initiate failover from.pi_full = X -> from.pi_full = 0
- *
- * group failure pi_full = X -> pi_full = 0
- * ---------------------------------------------------------------------------
+ * I is tracked by IP. pi_state is tracked by mpathd.
*
* pi_state state machine
* ---------------------------------------------------------------------------
* Event State New State
* Action:
* ---------------------------------------------------------------------------
- * NIC failure (PI_RUNNING, I == 0) -> (PI_FAILED, I == 0)
+ * IP interface failure (PI_RUNNING, I == 0) -> (PI_FAILED, I == 0)
* detection : set IFF_FAILED on this phyint
- * : failover from this phyint to another
*
- * NIC failure (PI_RUNNING, I == 1) -> (PI_FAILED, I == 0)
+ * IP interface failure (PI_RUNNING, I == 1) -> (PI_FAILED, I == 0)
* detection : set IFF_FAILED on this phyint
*
- * NIC repair (PI_FAILED, I == 0, FAILBACK=yes)
+ * IP interface repair (PI_FAILED, I == 0, FAILBACK=yes)
* detection -> (PI_RUNNING, I == 0)
- * : to.pi_empty = 0
* : clear IFF_FAILED on this phyint
- * : failback to this phyint if enabled
*
- * NIC repair (PI_FAILED, I == 0, FAILBACK=no)
+ * IP interface repair (PI_FAILED, I == 0, FAILBACK=no)
* detection -> (PI_RUNNING, I == 1)
- * : to.pi_empty = 0
* : clear IFF_FAILED on this phyint
* : if failback is disabled set I == 1
*
* Group failure (perform on all phyints in the group)
* detection PI_RUNNING PI_FAILED
* (Router targets) : set IFF_FAILED
- * : clear pi_empty and pi_full
*
* Group failure (perform on all phyints in the group)
* detection PI_RUNNING PI_NOTARGETS
* (Host targets) : set IFF_FAILED
- * : clear pi_empty and pi_full
* : delete the target list on all phyints
* ---------------------------------------------------------------------------
- *
- * I state machine
- * ---------------------------------------------------------------------------
- * Event State Action:
- * ---------------------------------------------------------------------------
- * Turn on I pi_empty == 0, STANDBY : failover from standby
- *
- * Turn off I PI_RUNNING, STANDBY : pi_empty = 0
- * pi_full == 0 : failback to this if enabled
- * ---------------------------------------------------------------------------
- *
- * Assertions: (Read '==>' as implies)
- *
- * (pi_empty == 1) ==> (I == 1 || pi_state == PI_FAILED)
- * (pi_empty == 1) ==> (pi_full == 0)
- * (pi_full == 1) ==> (pi_empty == 0)
- *
- * Invariants
- *
- * pg_groupfailed = 0 &&
- * 1. (I == 1, pi_empty == 0) ==> initiate failover from standby
- * 2. (I == 0, PI_FAILED, pi_empty == 0) ==> initiate failover from phyint
- * 3. (I == 0, PI_RUNNING, pi_full == 0) ==> initiate failback to phyint
- *
- * 1. says that an inactive standby, that is not empty, has to be failed
- * over. For a standby to be truly inactive, it should not host any
- * addresses. So we move them to some other phyint. Usually we catch the
- * turn on of IFF_INACTIVE, and perform this action. However if the failover
- * did not complete successfully, then subsequently we have lost the edge
- * trigger, and this invariant kicks in and completes the action.
- *
- * 2. says that any failed phyint that is not empty must be failed over.
- * Usually we do the failover when we detect NIC failure. However if the
- * failover does not complete successfully, this invariant kicks in and
- * completes the failover. We exclude inactive standby which is covered by 1.
- *
- * 3. says that any running phyint that is not full must be failed back.
- * Usually we do the failback when we detect NIC repair. However if the
- * failback does not complete successfully, this invariant kicks in and
- * completes the failback. Note that we don't want to failback to an inactive
- * standby.
- *
- * The invariants 1 - 3 and the actions are in initifs().
*/
struct probes_missed probes_missed;
@@ -295,7 +197,7 @@ struct probes_missed probes_missed;
* not less than the current CRTT. pii_probes[] stores data
* about these probes. These packets consume sequence number space.
*
- * PROBE_RTT: This type is used to make only rtt measurments. Normally these
+ * PROBE_RTT: This type is used to make only rtt measurements. Normally these
* are not used. Under heavy network load, the rtt may go up very high,
* due to a spike, or may appear to go high, due to extreme scheduling
* delays. Once the network stress is removed, mpathd takes long time to
@@ -310,17 +212,19 @@ struct probes_missed probes_missed;
* no targets are known. The packet is multicast to the all hosts addr.
*/
static void
-probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
+probe(struct phyint_instance *pii, uint_t probe_type, hrtime_t start_hrtime)
{
+ hrtime_t sent_hrtime;
+ struct timeval sent_tv;
struct pr_icmp probe_pkt; /* Probe packet */
- struct sockaddr_in6 whereto6; /* target address IPv6 */
- struct sockaddr_in whereto; /* target address IPv4 */
+ struct sockaddr_storage targ; /* target address */
+ uint_t targaddrlen; /* targed address length */
int pr_ndx; /* probe index in pii->pii_probes[] */
boolean_t sent = _B_TRUE;
if (debug & D_TARGET) {
- logdebug("probe(%s %s %d %u)\n", AF_STR(pii->pii_af),
- pii->pii_name, probe_type, cur_time);
+ logdebug("probe(%s %s %d %lld)\n", AF_STR(pii->pii_af),
+ pii->pii_name, probe_type, start_hrtime);
}
assert(pii->pii_probe_sock != -1);
@@ -339,7 +243,7 @@ probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
* network byte order at initialization itself.
*/
probe_pkt.pr_icmp_id = pii->pii_icmpid;
- probe_pkt.pr_icmp_timestamp = htonl(cur_time);
+ probe_pkt.pr_icmp_timestamp = htonll(start_hrtime);
probe_pkt.pr_icmp_mtype = htonl(probe_type);
/*
@@ -349,38 +253,34 @@ probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
assert(probe_type == PROBE_MULTI || ((pii->pii_target_next != NULL) &&
pii->pii_rtt_target_next != NULL));
+ bzero(&targ, sizeof (targ));
+ targ.ss_family = pii->pii_af;
+
if (pii->pii_af == AF_INET6) {
- bzero(&whereto6, sizeof (whereto6));
- whereto6.sin6_family = AF_INET6;
+ struct in6_addr *addr6;
+
+ addr6 = &((struct sockaddr_in6 *)&targ)->sin6_addr;
+ targaddrlen = sizeof (struct sockaddr_in6);
if (probe_type == PROBE_MULTI) {
- whereto6.sin6_addr = all_nodes_mcast_v6;
+ *addr6 = all_nodes_mcast_v6;
} else if (probe_type == PROBE_UNI) {
- whereto6.sin6_addr = pii->pii_target_next->tg_address;
- } else {
- /* type is PROBE_RTT */
- whereto6.sin6_addr =
- pii->pii_rtt_target_next->tg_address;
- }
- if (sendto(pii->pii_probe_sock, (char *)&probe_pkt,
- sizeof (probe_pkt), 0, (struct sockaddr *)&whereto6,
- sizeof (whereto6)) != sizeof (probe_pkt)) {
- logperror_pii(pii, "probe: probe sendto");
- sent = _B_FALSE;
+ *addr6 = pii->pii_target_next->tg_address;
+ } else { /* type is PROBE_RTT */
+ *addr6 = pii->pii_rtt_target_next->tg_address;
}
} else {
- bzero(&whereto, sizeof (whereto));
- whereto.sin_family = AF_INET;
+ struct in_addr *addr4;
+
+ addr4 = &((struct sockaddr_in *)&targ)->sin_addr;
+ targaddrlen = sizeof (struct sockaddr_in);
if (probe_type == PROBE_MULTI) {
- whereto.sin_addr = all_nodes_mcast_v4;
+ *addr4 = all_nodes_mcast_v4;
} else if (probe_type == PROBE_UNI) {
IN6_V4MAPPED_TO_INADDR(
- &pii->pii_target_next->tg_address,
- &whereto.sin_addr);
- } else {
- /* type is PROBE_RTT */
+ &pii->pii_target_next->tg_address, addr4);
+ } else { /* type is PROBE_RTT */
IN6_V4MAPPED_TO_INADDR(
- &pii->pii_rtt_target_next->tg_address,
- &whereto.sin_addr);
+ &pii->pii_rtt_target_next->tg_address, addr4);
}
/*
@@ -388,12 +288,18 @@ probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
*/
probe_pkt.pr_icmp_cksum =
in_cksum((ushort_t *)&probe_pkt, (int)sizeof (probe_pkt));
- if (sendto(pii->pii_probe_sock, (char *)&probe_pkt,
- sizeof (probe_pkt), 0, (struct sockaddr *)&whereto,
- sizeof (whereto)) != sizeof (probe_pkt)) {
- logperror_pii(pii, "probe: probe sendto");
- sent = _B_FALSE;
- }
+ }
+
+ /*
+ * Use the current time as the time we sent. Not atomic, but the best
+ * we can do from here.
+ */
+ sent_hrtime = gethrtime();
+ (void) gettimeofday(&sent_tv, NULL);
+ if (sendto(pii->pii_probe_sock, &probe_pkt, sizeof (probe_pkt), 0,
+ (struct sockaddr *)&targ, targaddrlen) != sizeof (probe_pkt)) {
+ logperror_pii(pii, "probe: probe sendto");
+ sent = _B_FALSE;
}
/*
@@ -415,9 +321,13 @@ probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
pii->pii_cum_stats.acked++;
pii->pii_cum_stats.sent++;
- pii->pii_probes[pr_ndx].pr_status = PR_UNACKED;
+ pii->pii_probes[pr_ndx].pr_id = pii->pii_snxt;
+ pii->pii_probes[pr_ndx].pr_tv_sent = sent_tv;
+ pii->pii_probes[pr_ndx].pr_hrtime_sent = sent_hrtime;
+ pii->pii_probes[pr_ndx].pr_hrtime_start = start_hrtime;
pii->pii_probes[pr_ndx].pr_target = pii->pii_target_next;
- pii->pii_probes[pr_ndx].pr_time_sent = cur_time;
+ probe_chstate(&pii->pii_probes[pr_ndx], pii, PR_UNACKED);
+
pii->pii_probe_next = PROBE_INDEX_NEXT(pii->pii_probe_next);
pii->pii_target_next = target_next(pii->pii_target_next);
assert(pii->pii_target_next != NULL);
@@ -448,33 +358,42 @@ in_data(struct phyint_instance *pii)
{
struct sockaddr_in from;
struct in6_addr fromaddr;
- uint_t fromlen;
- static uint_t in_packet[(IP_MAXPACKET + 1)/4];
+ static uint64_t in_packet[(IP_MAXPACKET + 1)/8];
+ static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8];
struct ip *ip;
int iphlen;
int len;
char abuf[INET_ADDRSTRLEN];
- struct pr_icmp *reply;
+ struct msghdr msg;
+ struct iovec iov;
+ struct pr_icmp *reply;
+ struct timeval *recv_tvp;
if (debug & D_PROBE) {
logdebug("in_data(%s %s)\n",
AF_STR(pii->pii_af), pii->pii_name);
}
+ iov.iov_base = (char *)in_packet;
+ iov.iov_len = sizeof (in_packet);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_name = (struct sockaddr *)&from;
+ msg.msg_namelen = sizeof (from);
+ msg.msg_control = ancillary_data;
+ msg.msg_controllen = sizeof (ancillary_data);
+
/*
* Poll has already told us that a message is waiting,
* on this socket. Read it now. We should not block.
*/
- fromlen = sizeof (from);
- len = recvfrom(pii->pii_probe_sock, (char *)in_packet,
- sizeof (in_packet), 0, (struct sockaddr *)&from, &fromlen);
- if (len < 0) {
- logperror_pii(pii, "in_data: recvfrom");
+ if ((len = recvmsg(pii->pii_probe_sock, &msg, 0)) < 0) {
+ logperror_pii(pii, "in_data: recvmsg");
return;
}
/*
- * If the NIC has indicated the link is down, don't go
+ * If the datalink has indicated the link is down, don't go
* any further.
*/
if (LINK_DOWN(pii->pii_phyint))
@@ -483,6 +402,15 @@ in_data(struct phyint_instance *pii)
/* Get the printable address for error reporting */
(void) inet_ntop(AF_INET, &from.sin_addr, abuf, sizeof (abuf));
+ /* Ignore packets > 64k or control buffers that don't fit */
+ if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
+ if (debug & D_PKTBAD) {
+ logdebug("Truncated message: msg_flags 0x%x from %s\n",
+ msg.msg_flags, abuf);
+ }
+ return;
+ }
+
/* Make sure packet contains at least minimum ICMP header */
ip = (struct ip *)in_packet;
iphlen = ip->ip_hl << 2;
@@ -528,10 +456,17 @@ in_data(struct phyint_instance *pii)
return;
}
+ recv_tvp = find_ancillary(&msg, SOL_SOCKET, SCM_TIMESTAMP);
+ if (recv_tvp == NULL) {
+ logtrace("message without timestamp from %s on %s\n",
+ abuf, pii->pii_name);
+ return;
+ }
+
IN6_INADDR_TO_V4MAPPED(&from.sin_addr, &fromaddr);
if (reply->pr_icmp_mtype == htonl(PROBE_UNI))
/* Unicast probe reply */
- incoming_echo_reply(pii, reply, fromaddr);
+ incoming_echo_reply(pii, reply, fromaddr, recv_tvp);
else if (reply->pr_icmp_mtype == htonl(PROBE_MULTI)) {
/* Multicast reply */
incoming_mcast_reply(pii, reply, fromaddr);
@@ -543,7 +478,6 @@ in_data(struct phyint_instance *pii)
reply->pr_icmp_mtype, abuf, pii->pii_name);
return;
}
-
}
/*
@@ -559,8 +493,9 @@ in6_data(struct phyint_instance *pii)
char abuf[INET6_ADDRSTRLEN];
struct msghdr msg;
struct iovec iov;
- uchar_t *opt;
+ void *opt;
struct pr_icmp *reply;
+ struct timeval *recv_tvp;
if (debug & D_PROBE) {
logdebug("in6_data(%s %s)\n",
@@ -577,12 +512,12 @@ in6_data(struct phyint_instance *pii)
msg.msg_controllen = sizeof (ancillary_data);
if ((len = recvmsg(pii->pii_probe_sock, &msg, 0)) < 0) {
- logperror_pii(pii, "in6_data: recvfrom");
+ logperror_pii(pii, "in6_data: recvmsg");
return;
}
/*
- * If the NIC has indicated that the link is down, don't go
+ * If the datalink has indicated that the link is down, don't go
* any further.
*/
if (LINK_DOWN(pii->pii_phyint))
@@ -623,13 +558,14 @@ in6_data(struct phyint_instance *pii)
"%s on %s\n", abuf, pii->pii_name);
return;
}
- opt = find_ancillary(&msg, IPV6_RTHDR);
+ opt = find_ancillary(&msg, IPPROTO_IPV6, IPV6_RTHDR);
if (opt != NULL) {
/* Can't allow routing headers in probe replies */
logtrace("message with routing header from %s on %s\n",
abuf, pii->pii_name);
return;
}
+
if (reply->pr_icmp_code != 0) {
logtrace("probe reply code: %d from %s on %s\n",
reply->pr_icmp_code, abuf, pii->pii_name);
@@ -640,8 +576,16 @@ in6_data(struct phyint_instance *pii)
len, abuf, pii->pii_name);
return;
}
+
+ recv_tvp = find_ancillary(&msg, SOL_SOCKET, SCM_TIMESTAMP);
+ if (recv_tvp == NULL) {
+ logtrace("message without timestamp from %s on %s\n",
+ abuf, pii->pii_name);
+ return;
+ }
+
if (reply->pr_icmp_mtype == htonl(PROBE_UNI)) {
- incoming_echo_reply(pii, reply, from.sin6_addr);
+ incoming_echo_reply(pii, reply, from.sin6_addr, recv_tvp);
} else if (reply->pr_icmp_mtype == htonl(PROBE_MULTI)) {
incoming_mcast_reply(pii, reply, from.sin6_addr);
} else if (reply->pr_icmp_mtype == htonl(PROBE_RTT)) {
@@ -663,11 +607,9 @@ static void
incoming_rtt_reply(struct phyint_instance *pii, struct pr_icmp *reply,
struct in6_addr fromaddr)
{
- int m; /* rtt measurment in ms */
- uint32_t cur_time; /* in ms from some arbitrary point */
+ int64_t m; /* rtt measurement in ns */
char abuf[INET6_ADDRSTRLEN];
struct target *target;
- uint32_t pr_icmp_timestamp;
struct phyint_group *pg;
/* Get the printable address for error reporting */
@@ -683,10 +625,7 @@ incoming_rtt_reply(struct phyint_instance *pii, struct pr_icmp *reply,
if (target == NULL)
return;
- pr_icmp_timestamp = ntohl(reply->pr_icmp_timestamp);
- cur_time = getcurrenttime();
- m = (int)(cur_time - pr_icmp_timestamp);
-
+ m = (int64_t)(gethrtime() - ntohll(reply->pr_icmp_timestamp));
/* Invalid rtt. It has wrapped around */
if (m < 0)
return;
@@ -754,29 +693,30 @@ incoming_rtt_reply(struct phyint_instance *pii, struct pr_icmp *reply,
*/
static void
incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
- struct in6_addr fromaddr)
+ struct in6_addr fromaddr, struct timeval *recv_tvp)
{
- int m; /* rtt measurment in ms */
- uint32_t cur_time; /* in ms from some arbitrary point */
+ int64_t m; /* rtt measurement in ns */
+ hrtime_t cur_hrtime; /* in ns from some arbitrary point */
char abuf[INET6_ADDRSTRLEN];
int pr_ndx;
struct target *target;
boolean_t exception;
- uint32_t pr_icmp_timestamp;
+ uint64_t pr_icmp_timestamp;
uint16_t pr_icmp_seq;
+ struct probe_stats *pr_statp;
struct phyint_group *pg = pii->pii_phyint->pi_group;
/* Get the printable address for error reporting */
(void) pr_addr(pii->pii_af, fromaddr, abuf, sizeof (abuf));
if (debug & D_PROBE) {
- logdebug("incoming_echo_reply: %s %s %s seq %u\n",
+ logdebug("incoming_echo_reply: %s %s %s seq %u recv_tvp %lld\n",
AF_STR(pii->pii_af), pii->pii_name, abuf,
- ntohs(reply->pr_icmp_seq));
+ ntohs(reply->pr_icmp_seq), tv2ns(recv_tvp));
}
- pr_icmp_timestamp = ntohl(reply->pr_icmp_timestamp);
- pr_icmp_seq = ntohs(reply->pr_icmp_seq);
+ pr_icmp_timestamp = ntohll(reply->pr_icmp_timestamp);
+ pr_icmp_seq = ntohs(reply->pr_icmp_seq);
/* Reject out of window probe replies */
if (SEQ_GE(pr_icmp_seq, pii->pii_snxt) ||
@@ -786,15 +726,16 @@ incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
pii->pii_cum_stats.unknown++;
return;
}
- cur_time = getcurrenttime();
- m = (int)(cur_time - pr_icmp_timestamp);
+
+ cur_hrtime = gethrtime();
+ m = (int64_t)(cur_hrtime - pr_icmp_timestamp);
if (m < 0) {
/*
* This is a ridiculously high value of rtt. rtt has wrapped
* around. Log a message, and ignore the rtt.
*/
- logerr("incoming_echo_reply: rtt wraparound cur_time %u reply "
- "timestamp %u\n", cur_time, pr_icmp_timestamp);
+ logerr("incoming_echo_reply: rtt wraparound cur_hrtime %lld "
+ "reply timestamp %lld\n", cur_hrtime, pr_icmp_timestamp);
}
/*
@@ -868,10 +809,10 @@ incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
* debugger, or the system was hung or too busy for a
* substantial time that we didn't get a chance to run.
*/
- if ((m < 0) || (m > PROBE_STATS_COUNT * pg->pg_probeint)) {
+ if ((m < 0) || (ns2ms(m) > PROBE_STATS_COUNT * pg->pg_probeint)) {
/*
- * If the probe corresponding to this receieved response
- * was truly sent 'm' ms. ago, then this response must
+ * If the probe corresponding to this received response
+ * was truly sent 'm' ns. ago, then this response must
* have been rejected by the sequence number checks. The
* fact that it has passed the sequence number checks
* means that the measured rtt is wrong. We were probably
@@ -947,7 +888,7 @@ incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
* adjusts pii->pii_target_next
*/
target_delete(target);
- probe(pii, PROBE_MULTI, cur_time);
+ probe(pii, PROBE_MULTI, cur_hrtime);
}
} else {
/*
@@ -999,8 +940,12 @@ incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
}
}
out:
- pii->pii_probes[pr_ndx].pr_status = PR_ACKED;
- pii->pii_probes[pr_ndx].pr_time_acked = cur_time;
+ pr_statp = &pii->pii_probes[pr_ndx];
+ pr_statp->pr_hrtime_ackproc = cur_hrtime;
+ pr_statp->pr_hrtime_ackrecv = pr_statp->pr_hrtime_sent +
+ (tv2ns(recv_tvp) - tv2ns(&pr_statp->pr_tv_sent));
+
+ probe_chstate(pr_statp, pii, PR_ACKED);
/*
* Update pii->pii_rack, i.e. the sequence number of the last received
@@ -1240,13 +1185,13 @@ incoming_mcast_reply(struct phyint_instance *pii, struct pr_icmp *reply,
*
* New scaled average and deviation are passed back via sap and svp
*/
-static int
-compute_crtt(int *sap, int *svp, int m)
+static int64_t
+compute_crtt(int64_t *sap, int64_t *svp, int64_t m)
{
- int sa = *sap;
- int sv = *svp;
- int crtt;
- int saved_m = m;
+ int64_t sa = *sap;
+ int64_t sv = *svp;
+ int64_t crtt;
+ int64_t saved_m = m;
assert(*sap >= -1);
assert(*svp >= 0);
@@ -1285,8 +1230,8 @@ compute_crtt(int *sap, int *svp, int m)
crtt = (sa >> 3) + sv;
if (debug & D_PROBE) {
- logdebug("compute_crtt: m = %d sa = %d, sv = %d -> crtt = "
- "%d\n", saved_m, sa, sv, crtt);
+ logerr("compute_crtt: m = %lld sa = %lld, sv = %lld -> "
+ "crtt = %lld\n", saved_m, sa, sv, crtt);
}
*sap = sa;
@@ -1300,22 +1245,22 @@ compute_crtt(int *sap, int *svp, int m)
}
static void
-pi_set_crtt(struct target *tg, int m, boolean_t is_probe_uni)
+pi_set_crtt(struct target *tg, int64_t m, boolean_t is_probe_uni)
{
struct phyint_instance *pii = tg->tg_phyint_inst;
int probe_interval = pii->pii_phyint->pi_group->pg_probeint;
- int sa = tg->tg_rtt_sa;
- int sv = tg->tg_rtt_sd;
+ int64_t sa = tg->tg_rtt_sa;
+ int64_t sv = tg->tg_rtt_sd;
int new_crtt;
int i;
if (debug & D_PROBE)
- logdebug("pi_set_crtt: target - m %d\n", m);
+ logdebug("pi_set_crtt: target - m %lld\n", m);
/* store the round trip time, in case we need to defer computation */
tg->tg_deferred[tg->tg_num_deferred] = m;
- new_crtt = compute_crtt(&sa, &sv, m);
+ new_crtt = ns2ms(compute_crtt(&sa, &sv, m));
/*
* If this probe's round trip time would singlehandedly cause an
@@ -1342,8 +1287,8 @@ pi_set_crtt(struct target *tg, int m, boolean_t is_probe_uni)
}
for (i = 0; i <= tg->tg_num_deferred; i++) {
- tg->tg_crtt = compute_crtt(&tg->tg_rtt_sa,
- &tg->tg_rtt_sd, tg->tg_deferred[i]);
+ tg->tg_crtt = ns2ms(compute_crtt(&tg->tg_rtt_sa,
+ &tg->tg_rtt_sd, tg->tg_deferred[i]));
}
tg->tg_num_deferred = 0;
@@ -1373,13 +1318,13 @@ pi_set_crtt(struct target *tg, int m, boolean_t is_probe_uni)
* If not found return NULL.
*/
static void *
-find_ancillary(struct msghdr *msg, int cmsg_type)
+find_ancillary(struct msghdr *msg, int cmsg_level, int cmsg_type)
{
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
cmsg = CMSG_NXTHDR(msg, cmsg)) {
- if (cmsg->cmsg_level == IPPROTO_IPV6 &&
+ if (cmsg->cmsg_level == cmsg_level &&
cmsg->cmsg_type == cmsg_type) {
return (CMSG_DATA(cmsg));
}
@@ -1388,107 +1333,194 @@ find_ancillary(struct msghdr *msg, int cmsg_type)
}
/*
- * See if a previously failed interface has started working again.
+ * Try to activate another INACTIVE interface in the same group as `pi'.
+ * Prefer STANDBY INACTIVE to just INACTIVE.
*/
void
-phyint_check_for_repair(struct phyint *pi)
+phyint_activate_another(struct phyint *pi)
{
- if (phyint_repaired(pi)) {
- if (pi->pi_group == phyint_anongroup) {
- logerr("NIC repair detected on %s\n", pi->pi_name);
- } else {
- logerr("NIC repair detected on %s of group %s\n",
- pi->pi_name, pi->pi_group->pg_name);
- }
+ struct phyint *pi2;
+ struct phyint *inactivepi = NULL;
- /*
- * If the interface is offline, just clear the FAILED flag,
- * delaying the state change and failback operation until it
- * is brought back online.
- */
- if (pi->pi_state == PI_OFFLINE) {
- (void) change_lif_flags(pi, IFF_FAILED, _B_FALSE);
- return;
- }
+ if (pi->pi_group == phyint_anongroup)
+ return;
- if (pi->pi_flags & IFF_STANDBY) {
- (void) change_lif_flags(pi, IFF_FAILED, _B_FALSE);
- } else {
- if (try_failback(pi) != IPMP_FAILURE) {
- (void) change_lif_flags(pi,
- IFF_FAILED, _B_FALSE);
- /* Per state diagram */
- pi->pi_empty = 0;
+ for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ if (pi == pi2 || pi2->pi_state != PI_RUNNING ||
+ !(pi2->pi_flags & IFF_INACTIVE))
+ continue;
+
+ inactivepi = pi2;
+ if (pi2->pi_flags & IFF_STANDBY)
+ break;
+ }
+
+ if (inactivepi != NULL)
+ (void) change_pif_flags(inactivepi, 0, IFF_INACTIVE);
+}
+
+/*
+ * Transition a phyint back to PI_RUNNING (from PI_FAILED or PI_OFFLINE). The
+ * caller must ensure that the transition is appropriate. Clears IFF_OFFLINE
+ * or IFF_FAILED, as appropriate. Also sets IFF_INACTIVE on this or other
+ * interfaces as appropriate (see comment below). Finally, also updates the
+ * phyint's group state to account for the change.
+ */
+void
+phyint_transition_to_running(struct phyint *pi)
+{
+ struct phyint *pi2;
+ struct phyint *actstandbypi = NULL;
+ uint_t nactive = 0, nnonstandby = 0;
+ boolean_t onlining = (pi->pi_state == PI_OFFLINE);
+ uint64_t set, clear;
+
+ /*
+ * The interface is running again, but should it or another interface
+ * in the group end up INACTIVE? There are three cases:
+ *
+ * 1. If it's a STANDBY interface, it should be end up INACTIVE if
+ * the group is operating at capacity (i.e., there are at least as
+ * many active interfaces as non-STANDBY interfaces in the group).
+ * No other interfaces should be changed.
+ *
+ * 2. If it's a non-STANDBY interface and we're onlining it or
+ * FAILBACK is enabled, then it should *not* end up INACTIVE.
+ * Further, if the group is above capacity as a result of this
+ * interface, then an active STANDBY interface in the group should
+ * end up INACTIVE.
+ *
+ * 3. If it's a non-STANDBY interface, we're repairing it, and
+ * FAILBACK is disabled, then it should end up INACTIVE *unless*
+ * the group was failed (in which case we have no choice but to
+ * use it). No other interfaces should be changed.
+ */
+ if (pi->pi_group != phyint_anongroup) {
+ pi2 = pi->pi_group->pg_phyint;
+ for (; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ if (!(pi2->pi_flags & IFF_STANDBY))
+ nnonstandby++;
+
+ if (pi2->pi_state == PI_RUNNING) {
+ if (!(pi2->pi_flags & IFF_INACTIVE)) {
+ nactive++;
+ if (pi2->pi_flags & IFF_STANDBY)
+ actstandbypi = pi2;
+ }
}
}
+ }
- phyint_chstate(pi, PI_RUNNING);
+ set = 0;
+ clear = (onlining ? IFF_OFFLINE : IFF_FAILED);
- if (GROUP_FAILED(pi->pi_group)) {
- /*
- * This is the 1st phyint to receive a response
- * after group failure.
- */
- logerr("At least 1 interface (%s) of group %s has "
- "repaired\n", pi->pi_name, pi->pi_group->pg_name);
- phyint_group_chstate(pi->pi_group, PG_RUNNING);
- }
+ if (pi->pi_flags & IFF_STANDBY) { /* case 1 */
+ if (nactive >= nnonstandby)
+ set |= IFF_INACTIVE;
+ else
+ clear |= IFF_INACTIVE;
+ } else if (onlining || failback_enabled) { /* case 2 */
+ if (nactive >= nnonstandby && actstandbypi != NULL)
+ (void) change_pif_flags(actstandbypi, IFF_INACTIVE, 0);
+ } else if (!GROUP_FAILED(pi->pi_group)) { /* case 3 */
+ set |= IFF_INACTIVE;
+ }
+ (void) change_pif_flags(pi, set, clear);
+
+ phyint_chstate(pi, PI_RUNNING);
+
+ /*
+ * Update the group state to account for the change.
+ */
+ phyint_group_refresh_state(pi->pi_group);
+}
+
+/*
+ * See if a previously failed interface has started working again.
+ */
+void
+phyint_check_for_repair(struct phyint *pi)
+{
+ if (!phyint_repaired(pi))
+ return;
+
+ if (pi->pi_group == phyint_anongroup) {
+ logerr("IP interface repair detected on %s\n", pi->pi_name);
+ } else {
+ logerr("IP interface repair detected on %s of group %s\n",
+ pi->pi_name, pi->pi_group->pg_name);
}
+
+ /*
+ * If the interface is PI_OFFLINE, it can't be made PI_RUNNING yet.
+ * So just clear IFF_OFFLINE and defer phyint_transition_to_running()
+ * until it is brought back online.
+ */
+ if (pi->pi_state == PI_OFFLINE) {
+ (void) change_pif_flags(pi, 0, IFF_FAILED);
+ return;
+ }
+
+ phyint_transition_to_running(pi); /* calls phyint_chstate() */
}
/*
- * See if a previously functioning interface has failed, or if the
- * whole group of interfaces has failed.
+ * See if an interface has failed, or if the whole group of interfaces has
+ * failed.
*/
static void
phyint_inst_check_for_failure(struct phyint_instance *pii)
{
- struct phyint *pi;
- struct phyint *pi2;
-
- pi = pii->pii_phyint;
+ struct phyint *pi = pii->pii_phyint;
+ struct phyint *pi2;
+ boolean_t was_active;
switch (failure_state(pii)) {
case PHYINT_FAILURE:
- (void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
+ was_active = ((pi->pi_flags & IFF_INACTIVE) == 0);
+
+ (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE);
if (pi->pi_group == phyint_anongroup) {
- logerr("NIC failure detected on %s\n", pii->pii_name);
+ logerr("IP interface failure detected on %s\n",
+ pii->pii_name);
} else {
- logerr("NIC failure detected on %s of group %s\n",
- pii->pii_name, pi->pi_group->pg_name);
+ logerr("IP interface failure detected on %s of group"
+ " %s\n", pii->pii_name, pi->pi_group->pg_name);
}
+
/*
- * Do the failover, unless the interface is offline (in
- * which case we've already failed over).
+ * If the interface is offline, the state change will be
+ * noted when it comes back online.
*/
if (pi->pi_state != PI_OFFLINE) {
+ /*
+ * If the failed interface was active, activate
+ * another INACTIVE interface in the group if
+ * possible. (If the interface is PI_OFFLINE,
+ * we already activated another.)
+ */
+ if (was_active)
+ phyint_activate_another(pi);
+
phyint_chstate(pi, PI_FAILED);
reset_crtt_all(pi);
- if (!(pi->pi_flags & IFF_INACTIVE))
- (void) try_failover(pi, FAILOVER_NORMAL);
}
break;
case GROUP_FAILURE:
- logerr("All Interfaces in group %s have failed\n",
- pi->pi_group->pg_name);
- for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL;
- pi2 = pi2->pi_pgnext) {
- if (pi2->pi_flags & IFF_OFFLINE)
+ pi2 = pi->pi_group->pg_phyint;
+ for (; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ (void) change_pif_flags(pi2, IFF_FAILED, IFF_INACTIVE);
+ if (pi2->pi_state == PI_OFFLINE) /* see comment above */
continue;
- (void) change_lif_flags(pi2, IFF_FAILED, _B_TRUE);
- reset_crtt_all(pi2);
+ reset_crtt_all(pi2);
/*
- * In the case of host targets, we
- * would have flushed the targets,
- * and gone to PI_NOTARGETS state.
+ * In the case of host targets, we would have flushed
+ * the targets, and gone to PI_NOTARGETS state.
*/
if (pi2->pi_state == PI_RUNNING)
phyint_chstate(pi2, PI_FAILED);
-
- pi2->pi_empty = 0;
- pi2->pi_full = 0;
}
break;
@@ -1519,7 +1551,8 @@ phyint_inst_timer(struct phyint_instance *pii)
hrtime_t cur_hrtime;
int probe_interval = pii->pii_phyint->pi_group->pg_probeint;
- cur_time = getcurrenttime();
+ cur_hrtime = gethrtime();
+ cur_time = ns2ms(cur_hrtime);
if (debug & D_TIMER) {
logdebug("phyint_inst_timer(%s %s)\n",
@@ -1621,7 +1654,7 @@ phyint_inst_timer(struct phyint_instance *pii)
* the failure detection (fd) probe timer has not yet fired.
* Need to send only an rtt probe. The probe type is PROBE_RTT.
*/
- probe(pii, PROBE_RTT, cur_time);
+ probe(pii, PROBE_RTT, cur_hrtime);
return (interval);
}
/*
@@ -1651,7 +1684,7 @@ phyint_inst_timer(struct phyint_instance *pii)
* We can have at most, the latest 2 probes that we sent, in
* the PR_UNACKED state. All previous probes sent, are either
* PR_LOST or PR_ACKED. An unacknowledged probe is considered
- * timed out if the probe's time_sent + the CRTT < currenttime.
+ * timed out if the probe's time_start + the CRTT < currenttime.
* For each of the last 2 probes, examine whether it has timed
* out. If so, mark it PR_LOST. The probe stats is a circular array.
*/
@@ -1686,16 +1719,15 @@ phyint_inst_timer(struct phyint_instance *pii)
* not available use group's probe interval,
* which is a worst case estimate.
*/
+ timeout = ns2ms(pr_statp->pr_hrtime_start);
if (cur_tg->tg_crtt != 0) {
- timeout = pr_statp->pr_time_sent +
- cur_tg->tg_crtt;
+ timeout += cur_tg->tg_crtt;
} else {
- timeout = pr_statp->pr_time_sent +
- probe_interval;
+ timeout += probe_interval;
}
if (TIME_LT(timeout, cur_time)) {
- pr_statp->pr_status = PR_LOST;
pr_statp->pr_time_lost = timeout;
+ probe_chstate(pr_statp, pii, PR_LOST);
} else if (i == 1) {
/*
* We are forced to consider this probe
@@ -1711,8 +1743,8 @@ phyint_inst_timer(struct phyint_instance *pii)
* when the timer fires, we find 2 valid
* unacked probes, and they are yet to timeout
*/
- pr_statp->pr_status = PR_LOST;
pr_statp->pr_time_lost = cur_time;
+ probe_chstate(pr_statp, pii, PR_LOST);
} else {
/*
* Only the most recent probe can enter
@@ -1740,16 +1772,15 @@ phyint_inst_timer(struct phyint_instance *pii)
* The timer has fired. Take appropriate action depending
* on the current state of the phyint.
*
- * PI_RUNNING state - Failure detection and failover
- * PI_FAILED state - Repair detection and failback
+ * PI_RUNNING state - Failure detection
+ * PI_FAILED state - Repair detection
*/
switch (pii->pii_phyint->pi_state) {
case PI_FAILED:
/*
* If the most recent probe (excluding unacked probes that
* are yet to time out) has been acked, check whether the
- * phyint is now repaired. If the phyint is repaired, then
- * attempt failback, unless it is an inactive standby.
+ * phyint is now repaired.
*/
if (pii->pii_rack + valid_unack_count + 1 == pii->pii_snxt) {
phyint_check_for_repair(pii->pii_phyint);
@@ -1760,10 +1791,8 @@ phyint_inst_timer(struct phyint_instance *pii)
/*
* It's possible our probes have been lost because of a
* spanning-tree mandated quiet period on the switch. If so,
- * ignore the lost probes and consider the interface to still
- * be functioning.
+ * ignore the lost probes.
*/
- cur_hrtime = gethrtime();
if (pii->pii_fd_hrtime - cur_hrtime > 0)
break;
@@ -1771,8 +1800,7 @@ phyint_inst_timer(struct phyint_instance *pii)
/*
* We have 1 or more failed probes (excluding unacked
* probes that are yet to time out). Determine if the
- * phyint has failed. If so attempt a failover,
- * unless it is an inactive standby
+ * phyint has failed.
*/
phyint_inst_check_for_failure(pii);
}
@@ -1790,16 +1818,16 @@ phyint_inst_timer(struct phyint_instance *pii)
* was called, the target list may be empty.
*/
if (pii->pii_target_next != NULL) {
- probe(pii, PROBE_UNI, cur_time);
+ probe(pii, PROBE_UNI, cur_hrtime);
/*
* If we have just the one probe target, and we're not using
* router targets, try to find another as we presently have
* no resilience.
*/
if (!pii->pii_targets_are_routers && pii->pii_ntargets == 1)
- probe(pii, PROBE_MULTI, cur_time);
+ probe(pii, PROBE_MULTI, cur_hrtime);
} else {
- probe(pii, PROBE_MULTI, cur_time);
+ probe(pii, PROBE_MULTI, cur_hrtime);
}
return (interval);
}
@@ -1859,8 +1887,8 @@ process_link_state_down(struct phyint *pi)
/*
* Clear the probe statistics arrays, we don't want the repair
- * detection logic relying on probes that were succesful prior
- * to the link going down.
+ * detection logic relying on probes that were successful prior
+ * to the link going down.
*/
if (PROBE_CAPABLE(pi->pi_v4))
clear_pii_probe_stats(pi->pi_v4);
@@ -2016,7 +2044,7 @@ phyint_inst_probe_failure_state(struct phyint_instance *pii, uint_t *tff)
pii->pii_target_next = target_next(cur_tg);
} else {
target_delete(cur_tg);
- probe(pii, PROBE_MULTI, getcurrenttime());
+ probe(pii, PROBE_MULTI, gethrtime());
}
return (PHYINT_OK);
}
@@ -2065,13 +2093,13 @@ failure_state(struct phyint_instance *pii)
struct probe_success_count psinfo;
uint_t pi2_tls; /* time last success */
uint_t pi_tff; /* time first fail */
- struct phyint *pi2;
+ struct phyint *pi2;
struct phyint *pi;
struct phyint_instance *pii2;
struct phyint_group *pg;
- boolean_t alone;
+ int retval;
- if (debug & D_FAILOVER)
+ if (debug & D_FAILREP)
logdebug("phyint_failed(%s)\n", pii->pii_name);
pi = pii->pii_phyint;
@@ -2082,24 +2110,13 @@ failure_state(struct phyint_instance *pii)
return (PHYINT_OK);
/*
- * At this point, the link is down, or the phyint is suspect,
- * as it has lost NUM_PROBE_FAILS or more probes. If the phyint
- * does not belong to any group, or is the only member of the
- * group capable of being probed, return PHYINT_FAILURE.
+ * At this point, the link is down, or the phyint is suspect, as it
+ * has lost NUM_PROBE_FAILS or more probes. If the phyint does not
+ * belong to any group, this is a PHYINT_FAILURE. Otherwise, continue
+ * on to determine whether this should be considered a PHYINT_FAILURE
+ * or GROUP_FAILURE.
*/
- alone = _B_TRUE;
- if (pg != phyint_anongroup) {
- for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
- if (pi2 == pi)
- continue;
- if (PROBE_CAPABLE(pi2->pi_v4) ||
- PROBE_CAPABLE(pi2->pi_v6)) {
- alone = _B_FALSE;
- break;
- }
- }
- }
- if (alone)
+ if (pg == phyint_anongroup)
return (PHYINT_FAILURE);
/*
@@ -2116,6 +2133,7 @@ failure_state(struct phyint_instance *pii)
* after it was received, so there is no point looking at the tls
* of other phyints.
*/
+ retval = GROUP_FAILURE;
for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
/* Exclude ourself from comparison */
if (pi2 == pi)
@@ -2123,76 +2141,86 @@ failure_state(struct phyint_instance *pii)
if (LINK_DOWN(pi)) {
/*
- * We use FLAGS_TO_LINK_STATE() to test the
- * flags directly, rather then LINK_UP() or
- * LINK_DOWN(), as we may not have got round
- * to processing the link state for the other
- * phyints in the group yet.
+ * We use FLAGS_TO_LINK_STATE() to test the flags
+ * directly, rather then LINK_UP() or LINK_DOWN(), as
+ * we may not have got round to processing the link
+ * state for the other phyints in the group yet.
*
- * The check for PI_RUNNING and group
- * failure handles the case when the
- * group begins to recover. The first
- * phyint to recover should not trigger
- * a failover from the soon-to-recover
- * other phyints to the first recovered
- * phyint. PI_RUNNING will be set, and
- * pg_groupfailed cleared only after
- * receipt of NUM_PROBE_REPAIRS, by
- * which time the other phyints should
- * have received at least 1 packet,
- * and so will not have NUM_PROBE_FAILS.
+ * The check for PI_RUNNING and group failure handles
+ * the case when the group begins to recover.
+ * PI_RUNNING will be set, and group failure cleared
+ * only after receipt of NUM_PROBE_REPAIRS, by which
+ * time the other phyints should have received at
+ * least 1 packet, and so will not have NUM_PROBE_FAILS.
*/
if ((pi2->pi_state == PI_RUNNING) &&
- !GROUP_FAILED(pg) && FLAGS_TO_LINK_STATE(pi2))
- return (PHYINT_FAILURE);
- } else {
- /*
- * Need to compare against both IPv4 and
- * IPv6 instances.
- */
- pii2 = pi2->pi_v4;
- if (pii2 != NULL) {
- probe_success_info(pii2, NULL, &psinfo);
- if (psinfo.ps_tls_valid) {
- pi2_tls = psinfo.ps_tls;
- /*
- * See comment above regarding check
- * for PI_RUNNING and group failure.
- */
- if (TIME_GT(pi2_tls, pi_tff) &&
- (pi2->pi_state == PI_RUNNING) &&
- !GROUP_FAILED(pg) &&
- FLAGS_TO_LINK_STATE(pi2))
- return (PHYINT_FAILURE);
+ !GROUP_FAILED(pg) && FLAGS_TO_LINK_STATE(pi2)) {
+ retval = PHYINT_FAILURE;
+ break;
+ }
+ continue;
+ }
+
+ if (LINK_DOWN(pi2))
+ continue;
+
+ /*
+ * If there's no probe-based failure detection on this
+ * interface, and its link is still up, then it's still
+ * working and thus the group has not failed.
+ */
+ if (!PROBE_ENABLED(pi2->pi_v4) && !PROBE_ENABLED(pi2->pi_v6)) {
+ retval = PHYINT_FAILURE;
+ break;
+ }
+
+ /*
+ * Need to compare against both IPv4 and IPv6 instances.
+ */
+ pii2 = pi2->pi_v4;
+ if (pii2 != NULL) {
+ probe_success_info(pii2, NULL, &psinfo);
+ if (psinfo.ps_tls_valid) {
+ pi2_tls = psinfo.ps_tls;
+ /*
+ * See comment above regarding check
+ * for PI_RUNNING and group failure.
+ */
+ if (TIME_GT(pi2_tls, pi_tff) &&
+ (pi2->pi_state == PI_RUNNING) &&
+ !GROUP_FAILED(pg) &&
+ FLAGS_TO_LINK_STATE(pi2)) {
+ retval = PHYINT_FAILURE;
+ break;
}
}
+ }
- pii2 = pi2->pi_v6;
- if (pii2 != NULL) {
- probe_success_info(pii2, NULL, &psinfo);
- if (psinfo.ps_tls_valid) {
- pi2_tls = psinfo.ps_tls;
- /*
- * See comment above regarding check
- * for PI_RUNNING and group failure.
- */
- if (TIME_GT(pi2_tls, pi_tff) &&
- (pi2->pi_state == PI_RUNNING) &&
- !GROUP_FAILED(pg) &&
- FLAGS_TO_LINK_STATE(pi2))
- return (PHYINT_FAILURE);
+ pii2 = pi2->pi_v6;
+ if (pii2 != NULL) {
+ probe_success_info(pii2, NULL, &psinfo);
+ if (psinfo.ps_tls_valid) {
+ pi2_tls = psinfo.ps_tls;
+ /*
+ * See comment above regarding check
+ * for PI_RUNNING and group failure.
+ */
+ if (TIME_GT(pi2_tls, pi_tff) &&
+ (pi2->pi_state == PI_RUNNING) &&
+ !GROUP_FAILED(pg) &&
+ FLAGS_TO_LINK_STATE(pi2)) {
+ retval = PHYINT_FAILURE;
+ break;
}
}
}
}
/*
- * Change the group state to PG_FAILED if it's not already.
+ * Update the group state to account for the changes.
*/
- if (!GROUP_FAILED(pg))
- phyint_group_chstate(pg, PG_FAILED);
-
- return (GROUP_FAILURE);
+ phyint_group_refresh_state(pg);
+ return (retval);
}
/*
@@ -2215,7 +2243,7 @@ probe_success_info(struct phyint_instance *pii, struct target *cur_tg,
uint_t timeout;
struct target *tg;
- if (debug & D_FAILOVER)
+ if (debug & D_FAILREP)
logdebug("probe_success_info(%s)\n", pii->pii_name);
bzero(psinfo, sizeof (*psinfo));
@@ -2248,10 +2276,11 @@ probe_success_info(struct phyint_instance *pii, struct target *cur_tg,
* not available use the value of the group's probe
* interval which is a worst case estimate.
*/
+ timeout = ns2ms(pr_statp->pr_hrtime_start);
if (tg->tg_crtt != 0) {
- timeout = pr_statp->pr_time_sent + tg->tg_crtt;
+ timeout += tg->tg_crtt;
} else {
- timeout = pr_statp->pr_time_sent +
+ timeout +=
pii->pii_phyint->pi_group->pg_probeint;
}
@@ -2261,7 +2290,7 @@ probe_success_info(struct phyint_instance *pii, struct target *cur_tg,
* recent consecutive successes.
*/
pr_statp->pr_time_lost = timeout;
- pr_statp->pr_status = PR_LOST;
+ probe_chstate(pr_statp, pii, PR_LOST);
pi_found_failure = _B_TRUE;
if (cur_tg != NULL && tg == cur_tg) {
/*
@@ -2292,7 +2321,8 @@ probe_success_info(struct phyint_instance *pii, struct target *cur_tg,
* the most recent probe success.
*/
if (!psinfo->ps_tls_valid) {
- psinfo->ps_tls = pr_statp->pr_time_acked;
+ psinfo->ps_tls =
+ ns2ms(pr_statp->pr_hrtime_ackproc);
psinfo->ps_tls_valid = _B_TRUE;
}
break;
@@ -2339,7 +2369,7 @@ probe_fail_info(struct phyint_instance *pii, struct target *cur_tg,
uint_t timeout;
struct target *tg;
- if (debug & D_FAILOVER)
+ if (debug & D_FAILREP)
logdebug("probe_fail_info(%s)\n", pii->pii_name);
bzero(pfinfo, sizeof (*pfinfo));
@@ -2377,10 +2407,11 @@ probe_fail_info(struct phyint_instance *pii, struct target *cur_tg,
* not available use the group's probe interval,
* which is a worst case estimate.
*/
+ timeout = ns2ms(pr_statp->pr_hrtime_start);
if (tg->tg_crtt != 0) {
- timeout = pr_statp->pr_time_sent + tg->tg_crtt;
+ timeout += tg->tg_crtt;
} else {
- timeout = pr_statp->pr_time_sent +
+ timeout +=
pii->pii_phyint->pi_group->pg_probeint;
}
@@ -2388,7 +2419,7 @@ probe_fail_info(struct phyint_instance *pii, struct target *cur_tg,
break;
pr_statp->pr_time_lost = timeout;
- pr_statp->pr_status = PR_LOST;
+ probe_chstate(pr_statp, pii, PR_LOST);
/* FALLTHRU */
case PR_LOST:
@@ -2421,6 +2452,19 @@ probe_fail_info(struct phyint_instance *pii, struct target *cur_tg,
}
/*
+ * Change the state of probe `pr' on phyint_instance `pii' to state `state'.
+ */
+void
+probe_chstate(struct probe_stats *pr, struct phyint_instance *pii, int state)
+{
+ if (pr->pr_status == state)
+ return;
+
+ pr->pr_status = state;
+ (void) probe_state_event(pr, pii);
+}
+
+/*
* Check if the phyint has been repaired. If no test address has been
* configured, then consider the interface repaired if the link is up (unless
* the link is flapping; see below). Otherwise, look for proof of probes
@@ -2436,7 +2480,7 @@ phyint_repaired(struct phyint *pi)
int pr_ndx;
uint_t cur_time;
- if (debug & D_FAILOVER)
+ if (debug & D_FAILREP)
logdebug("phyint_repaired(%s)\n", pi->pi_name);
if (LINK_DOWN(pi))
@@ -2458,7 +2502,7 @@ phyint_repaired(struct phyint *pi)
}
if (!pi->pi_lfmsg_printed) {
logerr("The link has come up on %s more than %d times "
- "in the last minute; disabling failback until it "
+ "in the last minute; disabling repair until it "
"stabilizes\n", pi->pi_name, LINK_UP_PERMIN);
pi->pi_lfmsg_printed = 1;
}
@@ -2490,354 +2534,41 @@ phyint_repaired(struct phyint *pi)
}
/*
- * Try failover from phyint 'pi' to a suitable destination.
- */
-int
-try_failover(struct phyint *pi, int failover_type)
-{
- struct phyint *dst;
- int err;
-
- if (debug & D_FAILOVER)
- logdebug("try_failover(%s %d)\n", pi->pi_name, failover_type);
-
- /*
- * Attempt to find a failover destination 'dst'.
- * dst will be null if any of the following is true
- * Phyint is not part of a group OR
- * Phyint is the only member of a group OR
- * No suitable failover dst was available
- */
- dst = get_failover_dst(pi, failover_type);
- if (dst == NULL)
- return (IPMP_EMINRED);
-
- dst->pi_empty = 0; /* Per state diagram */
- pi->pi_full = 0; /* Per state diagram */
-
- err = failover(pi, dst);
-
- if (debug & D_FAILOVER) {
- logdebug("failed over from %s to %s ret %d\n",
- pi->pi_name, dst->pi_name, err);
- }
- if (err == 0) {
- pi->pi_empty = 1; /* Per state diagram */
- /*
- * we don't want to print out this message if a
- * phyint is leaving the group, nor for failover from
- * standby
- */
- if (failover_type == FAILOVER_NORMAL) {
- logerr("Successfully failed over from NIC %s to NIC "
- "%s\n", pi->pi_name, dst->pi_name);
- }
- return (0);
- } else {
- /*
- * The failover did not succeed. We must retry the failover
- * only after resyncing our state based on the kernel's.
- * For eg. either the src or the dst might have been unplumbed
- * causing this failure. initifs() will be called again,
- * from main, since full_scan_required has been set to true
- * by failover();
- */
- return (IPMP_FAILURE);
- }
-}
-
-/*
- * global_errno captures the errno value, if failover() or failback()
- * fails. This is sent to if_mpadm(1M).
- */
-int global_errno;
-
-/*
- * Attempt failover from phyint 'from' to phyint 'to'.
- * IP moves everything from phyint 'from' to phyint 'to'.
- */
-static int
-failover(struct phyint *from, struct phyint *to)
-{
- struct lifreq lifr;
- int ret;
-
- if (debug & D_FAILOVER) {
- logdebug("failing over from %s to %s\n",
- from->pi_name, to->pi_name);
- }
-
- /*
- * Perform the failover. Both IPv4 and IPv6 are failed over
- * using a single ioctl by passing in AF_UNSPEC family.
- */
- lifr.lifr_addr.ss_family = AF_UNSPEC;
- (void) strncpy(lifr.lifr_name, from->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_movetoindex = to->pi_ifindex;
-
- ret = ioctl(ifsock_v4, SIOCLIFFAILOVER, (caddr_t)&lifr);
- if (ret < 0) {
- global_errno = errno;
- logperror("failover: ioctl (failover)");
- }
-
- /*
- * Set full_scan_required to true. This will make us read
- * the state from the kernel in initifs() and update our tables,
- * to reflect the current state after the failover. If the
- * failover has failed it will then reissue the failover.
- */
- full_scan_required = _B_TRUE;
- return (ret);
-}
-
-/*
- * phyint 'pi' has recovered. Attempt failback from every phyint in the same
- * group as phyint 'pi' that is a potential failback source, to phyint 'pi'.
- * Return values:
- * IPMP_SUCCESS: Failback successful from each of the other
- * phyints in the group.
- * IPMP_EFBPARTIAL: Failback successful from some of the other
- * phyints in the group.
- * IPMP_FAILURE: Failback syscall failed with some error.
- *
- * Note that failback is attempted regardless of the setting of the
- * failback_enabled flag.
- */
-int
-do_failback(struct phyint *pi)
-{
- struct phyint *from;
- boolean_t done;
- boolean_t partial;
- boolean_t attempted_failback = _B_FALSE;
-
- if (debug & D_FAILOVER)
- logdebug("do_failback(%s)\n", pi->pi_name);
-
- /* If this phyint is not part of a named group, return. */
- if (pi->pi_group == phyint_anongroup) {
- pi->pi_full = 1;
- return (IPMP_SUCCESS);
- }
-
- /*
- * Attempt failback from every phyint in the group to 'pi'.
- * The reason for doing this, instead of only from the
- * phyint to which we did the failover is given below.
- *
- * After 'pi' failed, if any app. tries to join on a multicast
- * address (IPv6), on the failed phyint, IP picks any arbitrary
- * non-failed phyint in the group, instead of the failed phyint,
- * in.mpathd is not aware of this. Thus failing back only from the
- * interface to which 'pi' failed over, will failback the ipif's
- * but not the ilm's. So we need to failback from all members of
- * the phyint group
- */
- done = _B_TRUE;
- partial = _B_FALSE;
- for (from = pi->pi_group->pg_phyint; from != NULL;
- from = from->pi_pgnext) {
- /* Exclude ourself as a failback src */
- if (from == pi)
- continue;
-
- /*
- * If the 'from' phyint has IPv4 plumbed, the 'to'
- * phyint must also have IPv4 plumbed. Similar check
- * for IPv6. IP makes the same check. Otherwise the
- * failback will fail.
- */
- if ((from->pi_v4 != NULL && pi->pi_v4 == NULL) ||
- (from->pi_v6 != NULL && pi->pi_v6 == NULL)) {
- partial = _B_TRUE;
- continue;
- }
-
- pi->pi_empty = 0; /* Per state diagram */
- attempted_failback = _B_TRUE;
- if (failback(from, pi) != 0) {
- done = _B_FALSE;
- break;
- }
- }
-
- /*
- * We are done. No more phyint from which we can src the failback
- */
- if (done) {
- if (!partial)
- pi->pi_full = 1; /* Per state diagram */
- /*
- * Don't print out a message unless there is a
- * transition from FAILED to RUNNING. For eg.
- * we don't want to print out this message if a
- * phyint is leaving the group, or at startup
- */
- if (attempted_failback && (pi->pi_flags &
- (IFF_FAILED | IFF_OFFLINE))) {
- logerr("Successfully failed back to NIC %s\n",
- pi->pi_name);
- }
- return (partial ? IPMP_EFBPARTIAL : IPMP_SUCCESS);
- }
-
- return (IPMP_FAILURE);
-}
-
-/*
- * This function is similar to do_failback() above, but respects the
- * failback_enabled flag for phyints in named groups.
- */
-int
-try_failback(struct phyint *pi)
-{
- if (debug & D_FAILOVER)
- logdebug("try_failback(%s)\n", pi->pi_name);
-
- if (pi->pi_group != phyint_anongroup && !failback_enabled)
- return (IPMP_EFBDISABLED);
-
- return (do_failback(pi));
-}
-
-/*
- * Failback everything from phyint 'from' that has the same ifindex
- * as phyint to's ifindex.
- */
-static int
-failback(struct phyint *from, struct phyint *to)
-{
- struct lifreq lifr;
- int ret;
-
- if (debug & D_FAILOVER)
- logdebug("failback(%s %s)\n", from->pi_name, to->pi_name);
-
- lifr.lifr_addr.ss_family = AF_UNSPEC;
- (void) strncpy(lifr.lifr_name, from->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_movetoindex = to->pi_ifindex;
-
- ret = ioctl(ifsock_v4, SIOCLIFFAILBACK, (caddr_t)&lifr);
- if (ret < 0) {
- global_errno = errno;
- logperror("failback: ioctl (failback)");
- }
-
- /*
- * Set full_scan_required to true. This will make us read
- * the state from the kernel in initifs() and update our tables,
- * to reflect the current state after the failback. If the
- * failback has failed it will then reissue the failback.
- */
- full_scan_required = _B_TRUE;
-
- return (ret);
-}
-
-/*
- * Select a target phyint for failing over from 'pi'.
- * In the normal case i.e. failover_type is FAILOVER_NORMAL, the preferred
- * target phyint is chosen as follows,
- * 1. Pick any inactive standby interface.
- * 2. If no inactive standby is available, select any phyint in the
- * same group that has the least number of logints, (excluding
- * IFF_NOFAILOVER and !IFF_UP logints)
- * If we are failing over from a standby, failover_type is
- * FAILOVER_TO_NONSTANDBY, and we won't pick a standby for the destination.
- * If a phyint is leaving the group, then failover_type is FAILOVER_TO_ANY,
- * and we won't return NULL, as long as there is at least 1 other phyint
- * in the group.
- */
-static struct phyint *
-get_failover_dst(struct phyint *pi, int failover_type)
-{
- struct phyint *maybe = NULL;
- struct phyint *pi2;
- struct phyint *last_choice = NULL;
-
- if (pi->pi_group == phyint_anongroup)
- return (NULL);
-
- /*
- * Loop thru the phyints in the group, and pick the preferred
- * phyint for the target.
- */
- for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
- /* Exclude ourself and offlined interfaces */
- if (pi2 == pi || pi2->pi_state == PI_OFFLINE)
- continue;
-
- /*
- * The chosen target phyint must have IPv4 instance
- * plumbed, if the src phyint has IPv4 plumbed. Similarly
- * for IPv6.
- */
- if ((pi2->pi_v4 == NULL && pi->pi_v4 != NULL) ||
- (pi2->pi_v6 == NULL && pi->pi_v6 != NULL))
- continue;
-
- /* The chosen target must be PI_RUNNING. */
- if (pi2->pi_state != PI_RUNNING) {
- last_choice = pi2;
- continue;
- }
-
- if ((pi2->pi_flags & (IFF_STANDBY | IFF_INACTIVE)) &&
- (failover_type != FAILOVER_TO_NONSTANDBY)) {
- return (pi2);
- } else {
- if (maybe == NULL)
- maybe = pi2;
- else if (logint_upcount(pi2) < logint_upcount(maybe))
- maybe = pi2;
- }
- }
- if (maybe == NULL && failover_type == FAILOVER_TO_ANY)
- return (last_choice);
- else
- return (maybe);
-}
-
-/*
* Used to set/clear phyint flags, by making a SIOCSLIFFLAGS call.
*/
boolean_t
-change_lif_flags(struct phyint *pi, uint64_t flags, boolean_t setfl)
+change_pif_flags(struct phyint *pi, uint64_t set, uint64_t clear)
{
int ifsock;
struct lifreq lifr;
uint64_t old_flags;
- if (debug & D_FAILOVER) {
- logdebug("change_lif_flags(%s): flags %llx setfl %d\n",
- pi->pi_name, flags, (int)setfl);
+ if (debug & D_FAILREP) {
+ logdebug("change_pif_flags(%s): set %llx clear %llx\n",
+ pi->pi_name, set, clear);
}
- if (pi->pi_v4 != NULL) {
+ if (pi->pi_v4 != NULL)
ifsock = ifsock_v4;
- } else {
+ else
ifsock = ifsock_v6;
- }
/*
* Get the current flags from the kernel, and set/clear the
* desired phyint flags. Since we set only phyint flags, we can
* do it on either IPv4 or IPv6 instance.
*/
- (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
+
if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO)
- logperror("change_lif_flags: ioctl (get flags)");
+ logperror("change_pif_flags: ioctl (get flags)");
return (_B_FALSE);
}
old_flags = lifr.lifr_flags;
- if (setfl)
- lifr.lifr_flags |= flags;
- else
- lifr.lifr_flags &= ~flags;
+ lifr.lifr_flags |= set;
+ lifr.lifr_flags &= ~clear;
if (old_flags == lifr.lifr_flags) {
/* No change in the flags. No need to send ioctl */
@@ -2846,7 +2577,7 @@ change_lif_flags(struct phyint *pi, uint64_t flags, boolean_t setfl)
if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO)
- logperror("change_lif_flags: ioctl (set flags)");
+ logperror("change_pif_flags: ioctl (set flags)");
return (_B_FALSE);
}
@@ -2854,15 +2585,13 @@ change_lif_flags(struct phyint *pi, uint64_t flags, boolean_t setfl)
* Keep pi_flags in synch. with actual flags. Assumes flags are
* phyint flags.
*/
- if (setfl)
- pi->pi_flags |= flags;
- else
- pi->pi_flags &= ~flags;
+ pi->pi_flags |= set;
+ pi->pi_flags &= ~clear;
- if (pi->pi_v4)
+ if (pi->pi_v4 != NULL)
pi->pi_v4->pii_flags = pi->pi_flags;
- if (pi->pi_v6)
+ if (pi->pi_v6 != NULL)
pi->pi_v6->pii_flags = pi->pi_flags;
return (_B_TRUE);
@@ -2928,18 +2657,31 @@ reset_snxt_basetimes(void)
* and it is up, it is not possible to detect the interface failure.
* SIOCTMYADDR also doesn't consider local zone address as own address.
* So, we choose to use SIOCGLIFCONF to collect the local addresses, and they
- * are stored in laddr_list.
+ * are stored in `localaddrs'
*/
-
boolean_t
own_address(struct in6_addr addr)
{
- struct local_addr *taddr = laddr_list;
+ addrlist_t *addrp;
+ struct sockaddr_storage ss;
+ int af = IN6_IS_ADDR_V4MAPPED(&addr) ? AF_INET : AF_INET6;
- for (; taddr != NULL; taddr = taddr->next) {
- if (IN6_ARE_ADDR_EQUAL(&addr, &taddr->addr)) {
+ addr2storage(af, &addr, &ss);
+ for (addrp = localaddrs; addrp != NULL; addrp = addrp->al_next) {
+ if (sockaddrcmp(&ss, &addrp->al_addr))
return (_B_TRUE);
- }
}
return (_B_FALSE);
}
+
+static int
+ns2ms(int64_t ns)
+{
+ return (ns / (NANOSEC / MILLISEC));
+}
+
+static int64_t
+tv2ns(struct timeval *tvp)
+{
+ return (tvp->tv_sec * NANOSEC + tvp->tv_usec * 1000);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c
index b56648cf12..def08d39ce 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "mpd_defs.h"
#include "mpd_tables.h"
@@ -47,11 +45,7 @@ static void phyint_inst_print(struct phyint_instance *pii);
static void phyint_insert(struct phyint *pi, struct phyint_group *pg);
static void phyint_delete(struct phyint *pi);
-
-static void phyint_group_insert(struct phyint_group *pg);
-static void phyint_group_delete(struct phyint_group *pg);
-static struct phyint_group *phyint_group_lookup(const char *pg_name);
-static struct phyint_group *phyint_group_create(const char *pg_name);
+static boolean_t phyint_is_usable(struct phyint *pi);
static void logint_print(struct logint *li);
static void logint_insert(struct phyint_instance *pii, struct logint *li);
@@ -68,16 +62,13 @@ static void reset_pii_probes(struct phyint_instance *pii, struct target *tg);
static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii);
static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii);
-static void ip_index_to_mask_v6(uint_t masklen, struct in6_addr *bitmask);
-static boolean_t prefix_equal(struct in6_addr p1, struct in6_addr p2,
- int prefix_len);
-
static int phyint_state_event(struct phyint_group *pg, struct phyint *pi);
static int phyint_group_state_event(struct phyint_group *pg);
static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t);
static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi,
ipmp_if_op_t op);
+static int logint_upcount(struct phyint *pi);
static uint64_t gensig(void);
/* Initialize any per-file global state. Returns 0 on success, -1 on failure */
@@ -110,6 +101,183 @@ phyint_lookup(const char *name)
return (pi);
}
+/*
+ * Lookup a phyint in the group that has the same hardware address as `pi', or
+ * NULL if there's none. If `online_only' is set, then only online phyints
+ * are considered when matching. Otherwise, phyints that had been offlined
+ * due to a duplicate hardware address will also be considered.
+ */
+static struct phyint *
+phyint_lookup_hwaddr(struct phyint *pi, boolean_t online_only)
+{
+ struct phyint *pi2;
+
+ if (pi->pi_group == phyint_anongroup)
+ return (NULL);
+
+ for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ if (pi2 == pi)
+ continue;
+
+ /*
+ * NOTE: even when online_only is B_FALSE, we ignore phyints
+ * that are administratively offline (rather than offline
+ * because they're dups); when they're brought back online,
+ * they'll be flagged as dups if need be.
+ */
+ if (pi2->pi_state == PI_OFFLINE &&
+ (online_only || !pi2->pi_hwaddrdup))
+ continue;
+
+ if (pi2->pi_hwaddrlen == pi->pi_hwaddrlen &&
+ bcmp(pi2->pi_hwaddr, pi->pi_hwaddr, pi->pi_hwaddrlen) == 0)
+ return (pi2);
+ }
+ return (NULL);
+}
+
+/*
+ * Respond to DLPI notifications. Currently, this only processes physical
+ * address changes for the phyint passed via `arg' by onlining or offlining
+ * phyints in the group.
+ */
+/* ARGSUSED */
+static void
+phyint_link_notify(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg)
+{
+ struct phyint *pi = arg;
+ struct phyint *oduppi = NULL, *duppi = NULL;
+
+ assert((dnip->dni_note & pi->pi_notes) != 0);
+
+ if (dnip->dni_note != DL_NOTE_PHYS_ADDR)
+ return;
+
+ assert(dnip->dni_physaddrlen <= DLPI_PHYSADDR_MAX);
+
+ /*
+ * If our hardware address hasn't changed, there's nothing to do.
+ */
+ if (pi->pi_hwaddrlen == dnip->dni_physaddrlen &&
+ bcmp(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen) == 0)
+ return;
+
+ oduppi = phyint_lookup_hwaddr(pi, _B_FALSE);
+ pi->pi_hwaddrlen = dnip->dni_physaddrlen;
+ (void) memcpy(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen);
+ duppi = phyint_lookup_hwaddr(pi, _B_FALSE);
+
+ if (oduppi != NULL || pi->pi_hwaddrdup) {
+ /*
+ * Our old hardware address was a duplicate. If we'd been
+ * offlined because of it, and our new hardware address is not
+ * a duplicate, then bring us online. Otherwise, `oduppi'
+ * must've been the one brought offline; bring it online.
+ */
+ if (pi->pi_hwaddrdup) {
+ if (duppi == NULL)
+ (void) phyint_undo_offline(pi);
+ } else {
+ assert(oduppi->pi_hwaddrdup);
+ (void) phyint_undo_offline(oduppi);
+ }
+ }
+
+ if (duppi != NULL && !pi->pi_hwaddrdup) {
+ /*
+ * Our new hardware address was a duplicate and we're not
+ * yet flagged as a duplicate; bring us offline.
+ */
+ pi->pi_hwaddrdup = _B_TRUE;
+ (void) phyint_offline(pi, 0);
+ }
+}
+
+/*
+ * Initialize information about the underlying link for `pi', and set us
+ * up to be notified about future changes. Returns _B_TRUE on success.
+ */
+boolean_t
+phyint_link_init(struct phyint *pi)
+{
+ int retval;
+ uint_t notes;
+ const char *errmsg;
+ dlpi_notifyid_t id;
+
+ pi->pi_notes = 0;
+ retval = dlpi_open(pi->pi_name, &pi->pi_dh, 0);
+ if (retval != DLPI_SUCCESS) {
+ pi->pi_dh = NULL;
+ errmsg = "cannot open";
+ goto failed;
+ }
+
+ pi->pi_hwaddrlen = DLPI_PHYSADDR_MAX;
+ retval = dlpi_get_physaddr(pi->pi_dh, DL_CURR_PHYS_ADDR, pi->pi_hwaddr,
+ &pi->pi_hwaddrlen);
+ if (retval != DLPI_SUCCESS) {
+ errmsg = "cannot get hardware address";
+ goto failed;
+ }
+
+ retval = dlpi_bind(pi->pi_dh, DLPI_ANY_SAP, NULL);
+ if (retval != DLPI_SUCCESS) {
+ errmsg = "cannot bind to DLPI_ANY_SAP";
+ goto failed;
+ }
+
+ /*
+ * Check if the link supports DLPI link state notifications. For
+ * historical reasons, the actual changes are tracked through routing
+ * sockets, so we immediately disable the notification upon success.
+ */
+ notes = DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN;
+ retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id);
+ if (retval == DLPI_SUCCESS) {
+ (void) dlpi_disabnotify(pi->pi_dh, id, NULL);
+ pi->pi_notes |= notes;
+ }
+
+ /*
+ * Enable notification of hardware address changes to keep pi_hwaddr
+ * up-to-date and track if we need to offline/undo-offline phyints.
+ */
+ notes = DL_NOTE_PHYS_ADDR;
+ retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id);
+ if (retval == DLPI_SUCCESS && poll_add(dlpi_fd(pi->pi_dh)) == 0)
+ pi->pi_notes |= notes;
+
+ return (_B_TRUE);
+failed:
+ logerr("%s: %s: %s\n", pi->pi_name, errmsg, dlpi_strerror(retval));
+ if (pi->pi_dh != NULL) {
+ dlpi_close(pi->pi_dh);
+ pi->pi_dh = NULL;
+ }
+ return (_B_FALSE);
+}
+
+/*
+ * Close use of link on `pi'.
+ */
+void
+phyint_link_close(struct phyint *pi)
+{
+ if (pi->pi_notes & DL_NOTE_PHYS_ADDR) {
+ (void) poll_remove(dlpi_fd(pi->pi_dh));
+ pi->pi_notes &= ~DL_NOTE_PHYS_ADDR;
+ }
+
+ /*
+ * NOTE: we don't clear pi_notes here so that iflinkstate() can still
+ * properly report the link state even when offline (which is possible
+ * since we use IFF_RUNNING to track link state).
+ */
+ dlpi_close(pi->pi_dh);
+ pi->pi_dh = NULL;
+}
+
/* Return the phyint instance with the given name and the given family */
struct phyint_instance *
phyint_inst_lookup(int af, char *name)
@@ -128,7 +296,7 @@ phyint_inst_lookup(int af, char *name)
return (PHYINT_INSTANCE(pi, af));
}
-static struct phyint_group *
+struct phyint_group *
phyint_group_lookup(const char *pg_name)
{
struct phyint_group *pg;
@@ -173,6 +341,9 @@ phyint_insert(struct phyint *pi, struct phyint_group *pg)
pi->pi_pgnext->pi_pgprev = pi;
pg->pg_phyint = pi;
+ /* Refresh the group state now that this phyint has been added */
+ phyint_group_refresh_state(pg);
+
pg->pg_sig++;
(void) phyint_group_member_event(pg, pi, IPMP_IF_ADD);
}
@@ -214,24 +385,24 @@ phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex,
}
/*
- * Record the phyint values. Also insert the phyint into the
- * phyint group by calling phyint_insert().
+ * Record the phyint values.
*/
(void) strlcpy(pi->pi_name, pi_name, sizeof (pi->pi_name));
pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME;
pi->pi_ifindex = ifindex;
- pi->pi_icmpid =
- htons(((getpid() & 0xFF) << 8) | (pi->pi_ifindex & 0xFF));
+ pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF));
+
/*
- * We optimistically start in the PI_RUNNING state. Later (in
- * process_link_state_changes()), we will readjust this to match the
+ * If the interface is offline, we set the state to PI_OFFLINE.
+ * Otherwise, we optimistically start in the PI_RUNNING state. Later
+ * (in process_link_state_changes()), we will adjust this to match the
* current state of the link. Further, if test addresses are
* subsequently assigned, we will transition to PI_NOTARGETS and then
- * either PI_RUNNING or PI_FAILED, depending on the result of the test
- * probes.
+ * to either PI_RUNNING or PI_FAILED depending on the probe results.
*/
- pi->pi_state = PI_RUNNING;
+ pi->pi_state = (flags & IFF_OFFLINE) ? PI_OFFLINE : PI_RUNNING;
pi->pi_flags = PHYINT_FLAGS(flags);
+
/*
* Initialise the link state. The link state is initialised to
* up, so that if the link is down when IPMP starts monitoring
@@ -241,19 +412,17 @@ phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex,
*/
INIT_LINK_STATE(pi);
+ if (!phyint_link_init(pi)) {
+ free(pi);
+ return (NULL);
+ }
+
/*
* Insert the phyint in the list of all phyints, and the
* list of phyint group members
*/
phyint_insert(pi, pg);
- /*
- * If we are joining a failed group, mark the interface as
- * failed.
- */
- if (GROUP_FAILED(pg))
- (void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
-
return (pi);
}
@@ -313,15 +482,14 @@ phyint_chstate(struct phyint *pi, enum pi_state state)
return;
pi->pi_state = state;
- pi->pi_group->pg_sig++;
- (void) phyint_state_event(pi->pi_group, pi);
+ phyint_changed(pi);
}
/*
- * Note that the type of phyint `pi' has changed.
+ * Note that `pi' has changed state.
*/
void
-phyint_newtype(struct phyint *pi)
+phyint_changed(struct phyint *pi)
{
pi->pi_group->pg_sig++;
(void) phyint_state_event(pi->pi_group, pi);
@@ -331,7 +499,7 @@ phyint_newtype(struct phyint *pi)
* Insert the phyint group in the linked list of all phyint groups
* at the head of the list
*/
-static void
+void
phyint_group_insert(struct phyint_group *pg)
{
pg->pg_next = phyint_groups;
@@ -347,7 +515,7 @@ phyint_group_insert(struct phyint_group *pg)
/*
* Create a new phyint group called 'name'.
*/
-static struct phyint_group *
+struct phyint_group *
phyint_group_create(const char *name)
{
struct phyint_group *pg;
@@ -363,9 +531,16 @@ phyint_group_create(const char *name)
(void) strlcpy(pg->pg_name, name, sizeof (pg->pg_name));
pg->pg_sig = gensig();
-
pg->pg_fdt = user_failure_detection_time;
pg->pg_probeint = user_probe_interval;
+ pg->pg_in_use = _B_TRUE;
+
+ /*
+ * Normal groups always start in the PG_FAILED state since they
+ * have no active interfaces. In contrast, anonymous groups are
+ * heterogeneous and thus always PG_OK.
+ */
+ pg->pg_state = (name[0] == '\0' ? PG_OK : PG_FAILED);
return (pg);
}
@@ -378,10 +553,20 @@ phyint_group_chstate(struct phyint_group *pg, enum pg_state state)
{
assert(pg != phyint_anongroup);
+ /*
+ * To simplify things, some callers always set a given state
+ * regardless of the previous state of the group (e.g., setting
+ * PG_DEGRADED when it's already set). We shouldn't bother
+ * generating an event or consuming a signature for these, since
+ * the actual state of the group is unchanged.
+ */
+ if (pg->pg_state == state)
+ return;
+
+ pg->pg_state = state;
+
switch (state) {
case PG_FAILED:
- pg->pg_groupfailed = 1;
-
/*
* We can never know with certainty that a group has
* failed. It is possible that all known targets have
@@ -392,16 +577,15 @@ phyint_group_chstate(struct phyint_group *pg, enum pg_state state)
* hosts, we have to discover it by multicast. So flush
* all the host targets. The next probe will send out a
* multicast echo request. If this is a group failure, we
- * will still not see any response, otherwise we will
- * clear the pg_groupfailed flag after we get
- * NUM_PROBE_REPAIRS consecutive unicast replies on any
- * phyint.
+ * will still not see any response, otherwise the group
+ * will be repaired after we get NUM_PROBE_REPAIRS
+ * consecutive unicast replies on any phyint.
*/
target_flush_hosts(pg);
break;
- case PG_RUNNING:
- pg->pg_groupfailed = 0;
+ case PG_OK:
+ case PG_DEGRADED:
break;
default:
@@ -432,7 +616,6 @@ phyint_inst_init_from_k(int af, char *pi_name)
struct lifreq lifr;
struct phyint *pi;
struct phyint_instance *pii;
- boolean_t pg_created;
boolean_t pi_created;
struct phyint_group *pg;
@@ -441,7 +624,6 @@ retry:
pi = NULL;
pg = NULL;
pi_created = _B_FALSE;
- pg_created = _B_FALSE;
if (debug & D_PHYINT) {
logdebug("phyint_inst_init_from_k(%s %s)\n",
@@ -454,11 +636,11 @@ retry:
ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
/*
- * Get the interface flags. Ignore loopback and multipoint
- * interfaces.
+ * Get the interface flags. Ignore virtual interfaces, IPMP
+ * meta-interfaces, point-to-point interfaces, and interfaces
+ * that can't support multicast.
*/
- (void) strncpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
+ (void) strlcpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name));
if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO) {
logperror("phyint_inst_init_from_k:"
@@ -467,7 +649,8 @@ retry:
return (NULL);
}
flags = lifr.lifr_flags;
- if (!(flags & IFF_MULTICAST) || (flags & IFF_LOOPBACK))
+ if (!(flags & IFF_MULTICAST) ||
+ (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)))
return (NULL);
/*
@@ -493,8 +676,7 @@ retry:
}
return (NULL);
}
- (void) strncpy(pg_name, lifr.lifr_groupname, sizeof (pg_name));
- pg_name[sizeof (pg_name) - 1] = '\0';
+ (void) strlcpy(pg_name, lifr.lifr_groupname, sizeof (pg_name));
/*
* If the phyint is not part of any group, pg_name is the
@@ -503,12 +685,13 @@ retry:
*/
if (pg_name[0] == '\0' && !track_all_phyints) {
/*
- * If the IFF_FAILED or IFF_OFFLINE flags are set, reset
- * them. These flags shouldn't be set if IPMP isn't
- * tracking the interface.
+ * If the IFF_FAILED, IFF_INACTIVE, or IFF_OFFLINE flags are
+ * set, reset them. These flags shouldn't be set if in.mpathd
+ * isn't tracking the interface.
*/
- if ((flags & (IFF_FAILED | IFF_OFFLINE)) != 0) {
- lifr.lifr_flags = flags & ~(IFF_FAILED | IFF_OFFLINE);
+ if ((flags & (IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE))) {
+ lifr.lifr_flags = flags &
+ ~(IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE);
if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO) {
logperror("phyint_inst_init_from_k:"
@@ -520,21 +703,20 @@ retry:
}
/*
- * We need to create a new phyint instance. A phyint instance
- * belongs to a phyint, and the phyint belongs to a phyint group.
- * So we first lookup the 'parents' and if they don't exist then
- * we create them.
+ * We need to create a new phyint instance. We may also need to
+ * create the group if e.g. the SIOCGLIFCONF loop in initifs() found
+ * an underlying interface before it found its IPMP meta-interface.
+ * Note that we keep any created groups even if phyint_inst_from_k()
+ * fails since a group's existence is not dependent on the ability of
+ * in.mpathd to the track the group's interfaces.
*/
- pg = phyint_group_lookup(pg_name);
- if (pg == NULL) {
- pg = phyint_group_create(pg_name);
- if (pg == NULL) {
- logerr("phyint_inst_init_from_k:"
- " unable to create group %s\n", pg_name);
+ if ((pg = phyint_group_lookup(pg_name)) == NULL) {
+ if ((pg = phyint_group_create(pg_name)) == NULL) {
+ logerr("phyint_inst_init_from_k: cannot create group "
+ "%s\n", pg_name);
return (NULL);
}
phyint_group_insert(pg);
- pg_created = _B_TRUE;
}
/*
@@ -546,8 +728,6 @@ retry:
if (pi == NULL) {
logerr("phyint_inst_init_from_k:"
" unable to create phyint %s\n", pi_name);
- if (pg_created)
- phyint_group_delete(pg);
return (NULL);
}
pi_created = _B_TRUE;
@@ -564,8 +744,6 @@ retry:
* while we are yet to update our tables. Do it now.
*/
if (pi->pi_ifindex != ifindex) {
- if (pg_created)
- phyint_group_delete(pg);
phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af)));
goto retry;
}
@@ -577,9 +755,6 @@ retry:
* changed, while we are yet to update our tables. Do it now.
*/
if (strcmp(pi->pi_group->pg_name, pg_name) != 0) {
- if (pg_created)
- phyint_group_delete(pg);
- restore_phyint(pi);
phyint_inst_delete(PHYINT_INSTANCE(pi,
AF_OTHER(af)));
goto retry;
@@ -594,16 +769,25 @@ retry:
if (pii == NULL) {
logerr("phyint_inst_init_from_k: unable to create"
"phyint inst %s\n", pi->pi_name);
- if (pi_created) {
- /*
- * Deleting the phyint will delete the phyint group
- * if this is the last phyint in the group.
- */
+ if (pi_created)
phyint_delete(pi);
- }
+
return (NULL);
}
+ if (pi_created) {
+ /*
+ * If this phyint does not have a unique hardware address in its
+ * group, offline it. (The change_pif_flags() implementation
+ * requires that we defer this until after the phyint_instance
+ * is created.)
+ */
+ if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) {
+ pi->pi_hwaddrdup = _B_TRUE;
+ (void) phyint_offline(pi, 0);
+ }
+ }
+
return (pii);
}
@@ -677,16 +861,16 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
{
icmp6_filter_t filter;
int hopcount = 1;
- int int_op;
+ int off = 0;
+ int on = 1;
struct sockaddr_in6 testaddr;
/*
* Open a raw socket with ICMPv6 protocol.
*
- * Use IPV6_DONTFAILOVER_IF to make sure that probes go out
- * on the specified phyint only, and are not subject to load
- * balancing. Bind to the src address chosen will ensure that
- * the responses are received only on the specified phyint.
+ * Use IPV6_BOUND_IF to make sure that probes are sent and received on
+ * the specified phyint only. Bind to the test address to ensure that
+ * the responses are sent to the specified phyint.
*
* Set the hopcount to 1 so that probe packets are not routed.
* Disable multicast loopback. Set the receive filter to
@@ -696,7 +880,7 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
if (pii->pii_probe_sock < 0) {
logperror_pii(pii, "phyint_inst_v6_sockinit: socket");
return (_B_FALSE);
-}
+ }
bzero(&testaddr, sizeof (testaddr));
testaddr.sin6_family = AF_INET6;
@@ -709,14 +893,17 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- /*
- * IPV6_DONTFAILOVER_IF option takes precedence over setting
- * IP_MULTICAST_IF. So we don't set IPV6_MULTICAST_IF again.
- */
- if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_DONTFAILOVER_IF,
+ if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_IF,
(char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) {
logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
- " IPV6_DONTFAILOVER_IF");
+ " IPV6_MULTICAST_IF");
+ return (_B_FALSE);
+ }
+
+ if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_BOUND_IF,
+ &pii->pii_ifindex, sizeof (uint_t)) < 0) {
+ logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
+ " IPV6_BOUND_IF");
return (_B_FALSE);
}
@@ -734,9 +921,8 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- int_op = 0; /* used to turn off option */
if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
- (char *)&int_op, sizeof (int_op)) < 0) {
+ (char *)&off, sizeof (off)) < 0) {
logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
" IPV6_MULTICAST_LOOP");
return (_B_FALSE);
@@ -755,15 +941,22 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- /* Enable receipt of ancillary data */
- int_op = 1;
+ /* Enable receipt of hoplimit */
if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
- (char *)&int_op, sizeof (int_op)) < 0) {
+ &on, sizeof (on)) < 0) {
logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
" IPV6_RECVHOPLIMIT");
return (_B_FALSE);
}
+ /* Enable receipt of timestamp */
+ if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP,
+ &on, sizeof (on)) < 0) {
+ logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
+ " SO_TIMESTAMP");
+ return (_B_FALSE);
+ }
+
return (_B_TRUE);
}
@@ -775,20 +968,20 @@ static boolean_t
phyint_inst_v4_sockinit(struct phyint_instance *pii)
{
struct sockaddr_in testaddr;
- char char_op;
+ char char_off = 0;
int ttl = 1;
char char_ttl = 1;
+ int on = 1;
/*
* Open a raw socket with ICMPv4 protocol.
*
- * Use IP_DONTFAILOVER_IF to make sure that probes go out
- * on the specified phyint only, and are not subject to load
- * balancing. Bind to the src address chosen will ensure that
- * the responses are received only on the specified phyint.
+ * Use IP_BOUND_IF to make sure that probes are sent and received on
+ * the specified phyint only. Bind to the test address to ensure that
+ * the responses are sent to the specified phyint.
*
* Set the ttl to 1 so that probe packets are not routed.
- * Disable multicast loopback.
+ * Disable multicast loopback. Enable receipt of timestamp.
*/
pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP);
if (pii->pii_probe_sock < 0) {
@@ -808,14 +1001,17 @@ phyint_inst_v4_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- /*
- * IP_DONTFAILOVER_IF option takes precedence over setting
- * IP_MULTICAST_IF. So we don't set IP_MULTICAST_IF again.
- */
- if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_DONTFAILOVER_IF,
+ if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_BOUND_IF,
+ &pii->pii_ifindex, sizeof (uint_t)) < 0) {
+ logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
+ " IP_BOUND_IF");
+ return (_B_FALSE);
+ }
+
+ if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_IF,
(char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) {
logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
- " IP_DONTFAILOVER");
+ " IP_MULTICAST_IF");
return (_B_FALSE);
}
@@ -826,9 +1022,8 @@ phyint_inst_v4_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- char_op = 0; /* used to turn off option */
if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP,
- (char *)&char_op, sizeof (char_op)) == -1) {
+ (char *)&char_off, sizeof (char_off)) == -1) {
logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
" IP_MULTICAST_LOOP");
return (_B_FALSE);
@@ -841,6 +1036,13 @@ phyint_inst_v4_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
+ if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, &on,
+ sizeof (on)) < 0) {
+ logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
+ " SO_TIMESTAMP");
+ return (_B_FALSE);
+ }
+
return (_B_TRUE);
}
@@ -848,7 +1050,7 @@ phyint_inst_v4_sockinit(struct phyint_instance *pii)
* Remove the phyint group from the list of 'all phyint groups'
* and free it.
*/
-static void
+void
phyint_group_delete(struct phyint_group *pg)
{
/*
@@ -881,10 +1083,69 @@ phyint_group_delete(struct phyint_group *pg)
phyint_grouplistsig++;
(void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE);
+ addrlist_free(&pg->pg_addrs);
free(pg);
}
/*
+ * Refresh the state of `pg' based on its current members.
+ */
+void
+phyint_group_refresh_state(struct phyint_group *pg)
+{
+ enum pg_state state;
+ enum pg_state origstate = pg->pg_state;
+ struct phyint *pi, *usablepi;
+ uint_t nif = 0, nusable = 0;
+
+ /*
+ * Anonymous groups never change state.
+ */
+ if (pg == phyint_anongroup)
+ return;
+
+ for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
+ nif++;
+ if (phyint_is_usable(pi)) {
+ nusable++;
+ usablepi = pi;
+ }
+ }
+
+ if (nusable == 0)
+ state = PG_FAILED;
+ else if (nif == nusable)
+ state = PG_OK;
+ else
+ state = PG_DEGRADED;
+
+ phyint_group_chstate(pg, state);
+
+ /*
+ * If we're shutting down, skip logging messages since otherwise our
+ * shutdown housecleaning will make us report that groups are unusable.
+ */
+ if (cleanup_started)
+ return;
+
+ /*
+ * NOTE: We use pg_failmsg_printed rather than origstate since
+ * otherwise at startup we'll log a "now usable" message when the
+ * first usable phyint is added to an empty group.
+ */
+ if (state != PG_FAILED && pg->pg_failmsg_printed) {
+ assert(origstate == PG_FAILED);
+ logerr("At least 1 IP interface (%s) in group %s is now "
+ "usable\n", usablepi->pi_name, pg->pg_name);
+ pg->pg_failmsg_printed = _B_FALSE;
+ } else if (origstate != PG_FAILED && state == PG_FAILED) {
+ logerr("All IP interfaces in group %s are now unusable\n",
+ pg->pg_name);
+ pg->pg_failmsg_printed = _B_TRUE;
+ }
+}
+
+/*
* Extract information from the kernel about the desired phyint.
* Look only for properties of the phyint and not properties of logints.
* Take appropriate action on the changes.
@@ -998,28 +1259,16 @@ phyint_inst_update_from_k(struct phyint_instance *pii)
if (pi->pi_v6 != NULL)
pi->pi_v6->pii_flags = pi->pi_flags;
+ /*
+ * Make sure the IFF_FAILED flag is set if and only if we think
+ * the interface should be failed.
+ */
if (pi->pi_flags & IFF_FAILED) {
- /*
- * If we are in the running and full state, we have
- * completed failbacks successfully and we would have
- * expected IFF_FAILED to have been clear. That it is
- * set means there was a race condition. Some other
- * process turned on the IFF_FAILED flag. Since the
- * flag setting is not atomic, i.e. a get ioctl followed
- * by a set ioctl, and since there is no way to set an
- * individual flag bit, this could have occurred.
- */
- if (pi->pi_state == PI_RUNNING && pi->pi_full)
- (void) change_lif_flags(pi, IFF_FAILED, _B_FALSE);
+ if (pi->pi_state == PI_RUNNING)
+ (void) change_pif_flags(pi, 0, IFF_FAILED);
} else {
- /*
- * If we are in the failed state, there was a race.
- * we have completed failover successfully because our
- * state is failed and empty. Some other process turned
- * off the IFF_FAILED flag. Same comment as above
- */
- if (pi->pi_state == PI_FAILED && pi->pi_empty)
- (void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
+ if (pi->pi_state == PI_FAILED)
+ (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE);
}
/* No change in phyint status */
@@ -1028,12 +1277,12 @@ phyint_inst_update_from_k(struct phyint_instance *pii)
/*
* Delete the phyint. Remove it from the list of all phyints, and the
- * list of phyint group members. If the group becomes empty, delete the
- * group also.
+ * list of phyint group members.
*/
static void
phyint_delete(struct phyint *pi)
{
+ struct phyint *pi2;
struct phyint_group *pg = pi->pi_group;
if (debug & D_PHYINT)
@@ -1065,6 +1314,9 @@ phyint_delete(struct phyint *pi)
pi->pi_pgnext = NULL;
pi->pi_pgprev = NULL;
+ /* Refresh the group state now that this phyint has been removed */
+ phyint_group_refresh_state(pg);
+
/* Remove the phyint from the global list of phyints */
if (pi->pi_prev == NULL) {
/* Phyint is the 1st in the list */
@@ -1077,11 +1329,153 @@ phyint_delete(struct phyint *pi)
pi->pi_next = NULL;
pi->pi_prev = NULL;
+ /*
+ * See if another phyint in the group had been offlined because
+ * it was a dup of `pi' -- and if so, online it.
+ */
+ if (!pi->pi_hwaddrdup &&
+ (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) {
+ assert(pi2->pi_hwaddrdup);
+ (void) phyint_undo_offline(pi2);
+ }
+ phyint_link_close(pi);
free(pi);
+}
+
+/*
+ * Offline phyint `pi' if at least `minred' usable interfaces remain in the
+ * group. Returns an IPMP error code.
+ */
+int
+phyint_offline(struct phyint *pi, uint_t minred)
+{
+ unsigned int nusable = 0;
+ struct phyint *pi2;
+ struct phyint_group *pg = pi->pi_group;
+
+ /*
+ * Verify that enough usable interfaces in the group would remain.
+ * As a special case, if the group has failed, allow any non-offline
+ * phyints to be offlined.
+ */
+ if (pg != phyint_anongroup) {
+ for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ if (pi2 == pi)
+ continue;
+ if (phyint_is_usable(pi2) ||
+ (GROUP_FAILED(pg) && pi2->pi_state != PI_OFFLINE))
+ nusable++;
+ }
+ }
+ if (nusable < minred)
+ return (IPMP_EMINRED);
+
+ if (!change_pif_flags(pi, IFF_OFFLINE, 0))
+ return (IPMP_FAILURE);
+
+ /*
+ * The interface is now offline, so stop probing it. Note that
+ * if_mpadm(1M) will down the test addresses, after receiving a
+ * success reply from us. The routing socket message will then make us
+ * close the socket used for sending probes. But it is more logical
+ * that an offlined interface must not be probed, even if it has test
+ * addresses.
+ *
+ * NOTE: stop_probing() also sets PI_OFFLINE.
+ */
+ stop_probing(pi);
+
+ /*
+ * If we're offlining the phyint because it has a duplicate hardware
+ * address, print a warning -- and leave the link open so that we can
+ * be notified of hardware address changes that make it usable again.
+ * Otherwise, close the link so that we won't prevent a detach.
+ */
+ if (pi->pi_hwaddrdup) {
+ logerr("IP interface %s has a hardware address which is not "
+ "unique in group %s; offlining\n", pi->pi_name,
+ pg->pg_name);
+ } else {
+ phyint_link_close(pi);
+ }
+
+ /*
+ * If this phyint was preventing another phyint with a duplicate
+ * hardware address from being online, bring that one online now.
+ */
+ if (!pi->pi_hwaddrdup &&
+ (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) {
+ assert(pi2->pi_hwaddrdup);
+ (void) phyint_undo_offline(pi2);
+ }
- /* Delete the phyint_group if the last phyint has been deleted */
- if (pg->pg_phyint == NULL)
- phyint_group_delete(pg);
+ /*
+ * If this interface was active, try to activate another INACTIVE
+ * interface in the group.
+ */
+ if (!(pi->pi_flags & IFF_INACTIVE))
+ phyint_activate_another(pi);
+
+ return (IPMP_SUCCESS);
+}
+
+/*
+ * Undo a previous offline of `pi'. Returns an IPMP error code.
+ */
+int
+phyint_undo_offline(struct phyint *pi)
+{
+ if (pi->pi_state != PI_OFFLINE) {
+ errno = EINVAL;
+ return (IPMP_FAILURE);
+ }
+
+ /*
+ * If necessary, reinitialize our link information and verify that its
+ * hardware address is still unique across the group.
+ */
+ if (pi->pi_dh == NULL && !phyint_link_init(pi)) {
+ errno = EIO;
+ return (IPMP_FAILURE);
+ }
+
+ if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) {
+ pi->pi_hwaddrdup = _B_TRUE;
+ return (IPMP_EHWADDRDUP);
+ }
+
+ if (pi->pi_hwaddrdup) {
+ logerr("IP interface %s now has a unique hardware address in "
+ "group %s; onlining\n", pi->pi_name, pi->pi_group->pg_name);
+ pi->pi_hwaddrdup = _B_FALSE;
+ }
+
+ if (!change_pif_flags(pi, 0, IFF_OFFLINE))
+ return (IPMP_FAILURE);
+
+ /*
+ * While the interface was offline, it may have failed (e.g. the link
+ * may have gone down). phyint_inst_check_for_failure() will have
+ * already set pi_flags with IFF_FAILED, so we can use that to decide
+ * whether the phyint should transition to running. Note that after
+ * we transition to running, we will start sending probes again (if
+ * test addresses are configured), which may also reveal that the
+ * interface is in fact failed.
+ */
+ if (pi->pi_flags & IFF_FAILED) {
+ phyint_chstate(pi, PI_FAILED);
+ } else {
+ /* calls phyint_chstate() */
+ phyint_transition_to_running(pi);
+ }
+
+ /*
+ * Give the requestor time to configure test addresses before
+ * complaining that they're missing.
+ */
+ pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME;
+
+ return (IPMP_SUCCESS);
}
/*
@@ -1166,11 +1560,10 @@ phyint_inst_print(struct phyint_instance *pii)
}
logdebug("\nPhyint instance: %s %s index %u state %x flags %llx "
- "sock %x in_use %d empty %x full %x\n",
+ "sock %x in_use %d\n",
AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex,
pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock,
- pii->pii_in_use, pii->pii_phyint->pi_empty,
- pii->pii_phyint->pi_full);
+ pii->pii_in_use);
for (li = pii->pii_logint; li != NULL; li = li->li_next)
logint_print(li);
@@ -1211,9 +1604,11 @@ phyint_inst_print(struct phyint_instance *pii)
} else {
logdebug("#%d target NULL ", i);
}
- logdebug("time_sent %u status %d time_ack/lost %u\n",
- pii->pii_probes[i].pr_time_sent,
+ logdebug("time_start %lld status %d "
+ "time_ackproc %lld time_lost %u",
+ pii->pii_probes[i].pr_hrtime_start,
pii->pii_probes[i].pr_status,
+ pii->pii_probes[i].pr_hrtime_ackproc,
pii->pii_probes[i].pr_time_lost);
i = PROBE_INDEX_PREV(i);
} while (i != most_recent);
@@ -1293,7 +1688,6 @@ logint_init_from_k(struct phyint_instance *pii, char *li_name)
struct logint *li;
struct lifreq lifr;
struct in6_addr test_subnet;
- struct in6_addr test_subnet_mask;
struct in6_addr testaddr;
int test_subnet_len;
struct sockaddr_in6 *sin6;
@@ -1373,55 +1767,21 @@ logint_init_from_k(struct phyint_instance *pii, char *li_name)
testaddr = sin6->sin6_addr;
}
- if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) {
- ptp = _B_TRUE;
- if (ioctl(ifsock, SIOCGLIFDSTADDR, (char *)&lifr) < 0) {
- if (errno != ENXIO) {
- logperror_li(li, "logint_init_from_k:"
- " (get dstaddr)");
- }
- goto error;
- }
- if (pii->pii_af == AF_INET) {
- sin = (struct sockaddr_in *)&lifr.lifr_addr;
- IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &tgaddr);
- } else {
- sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
- tgaddr = sin6->sin6_addr;
- }
- } else {
- if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) {
- /* Interface may have vanished */
- if (errno != ENXIO) {
- logperror_li(li, "logint_init_from_k:"
- " (get subnet)");
- }
- goto error;
- }
- if (lifr.lifr_subnet.ss_family == AF_INET6) {
- sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet;
- test_subnet = sin6->sin6_addr;
- test_subnet_len = lifr.lifr_addrlen;
- } else {
- sin = (struct sockaddr_in *)&lifr.lifr_subnet;
- IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet);
- test_subnet_len = lifr.lifr_addrlen +
- (IPV6_ABITS - IP_ABITS);
- }
- (void) ip_index_to_mask_v6(test_subnet_len, &test_subnet_mask);
- }
-
- /*
- * Also record the OINDEX for completeness. This information is
- * not used.
- */
- if (ioctl(ifsock, SIOCGLIFOINDEX, (char *)&lifr) < 0) {
- if (errno != ENXIO) {
- logperror_li(li, "logint_init_from_k:"
- " (get lifoindex)");
- }
+ if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) {
+ /* Interface may have vanished */
+ if (errno != ENXIO)
+ logperror_li(li, "logint_init_from_k: (get subnet)");
goto error;
}
+ if (lifr.lifr_subnet.ss_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet;
+ test_subnet = sin6->sin6_addr;
+ test_subnet_len = lifr.lifr_addrlen;
+ } else {
+ sin = (struct sockaddr_in *)&lifr.lifr_subnet;
+ IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet);
+ test_subnet_len = lifr.lifr_addrlen + (IPV6_ABITS - IP_ABITS);
+ }
/*
* If this is the logint corresponding to the test address used for
@@ -1454,7 +1814,6 @@ logint_init_from_k(struct phyint_instance *pii, char *li_name)
/* Update the logint with the values obtained from the kernel. */
li->li_addr = testaddr;
li->li_in_use = 1;
- li->li_oifindex = lifr.lifr_index;
if (ptp) {
li->li_dstaddr = tgaddr;
li->li_subnet_len = (pii->pii_af == AF_INET) ?
@@ -1530,15 +1889,12 @@ static void
logint_print(struct logint *li)
{
char abuf[INET6_ADDRSTRLEN];
- int af;
-
- af = li->li_phyint_inst->pii_af;
+ int af = li->li_phyint_inst->pii_af;
logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name,
pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len);
- logdebug("\tFlags: %llx in_use %d oifindex %d\n",
- li->li_flags, li->li_in_use, li->li_oifindex);
+ logdebug("\tFlags: %llx in_use %d\n", li->li_flags, li->li_in_use);
}
char *
@@ -1555,6 +1911,33 @@ pr_addr(int af, struct in6_addr addr, char *abuf, int len)
return (abuf);
}
+/*
+ * Fill in the sockaddr_storage pointed to by `ssp' with the IP address
+ * represented by the [`af',`addr'] pair. Needed because in.mpathd internally
+ * stores all addresses as in6_addrs, but we don't want to expose that.
+ */
+void
+addr2storage(int af, const struct in6_addr *addr, struct sockaddr_storage *ssp)
+{
+ struct sockaddr_in *sinp = (struct sockaddr_in *)ssp;
+ struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)ssp;
+
+ assert(af == AF_INET || af == AF_INET6);
+
+ switch (af) {
+ case AF_INET:
+ (void) memset(sinp, 0, sizeof (*sinp));
+ sinp->sin_family = AF_INET;
+ IN6_V4MAPPED_TO_INADDR(addr, &sinp->sin_addr);
+ break;
+ case AF_INET6:
+ (void) memset(sin6p, 0, sizeof (*sin6p));
+ sin6p->sin6_family = AF_INET6;
+ sin6p->sin6_addr = *addr;
+ break;
+ }
+}
+
/* Lookup target on its address */
struct target *
target_lookup(struct phyint_instance *pii, struct in6_addr addr)
@@ -1686,7 +2069,7 @@ target_select_best(struct phyint_instance *pii)
if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
slow_recovered = tg;
/*
- * Promote the slow_recoverd to unused
+ * Promote the slow_recovered to unused
*/
tg->tg_status = TG_UNUSED;
} else {
@@ -1698,7 +2081,7 @@ target_select_best(struct phyint_instance *pii)
if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
dead_recovered = tg;
/*
- * Promote the dead_recoverd to slow
+ * Promote the dead_recovered to slow
*/
tg->tg_status = TG_SLOW;
tg->tg_latime = now;
@@ -1798,11 +2181,9 @@ target_create(struct phyint_instance *pii, struct in6_addr addr,
/*
* If there are multiple subnets associated with an interface, then
- * add the target to this phyint instance, only if it belongs to the
- * same subnet as the test address. The reason is that interface
- * routes derived from non-test-addresses i.e. non-IFF_NOFAILOVER
- * addresses, will disappear after failover, and the targets will not
- * be reachable from this interface.
+ * add the target to this phyint instance only if it belongs to the
+ * same subnet as the test address. This assures us that we will
+ * be able to reach this target through our routing table.
*/
if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len))
return;
@@ -1906,11 +2287,12 @@ target_add(struct phyint_instance *pii, struct in6_addr addr,
/*
* If the target does not exist, create it; target_create() will set
- * tg_in_use to true. If it exists already, and it is a router
- * target, set tg_in_use to to true, so that init_router_targets()
- * won't delete it
+ * tg_in_use to true. Even if it exists already, if it's a router
+ * target and we'd previously learned of it through multicast, then we
+ * need to recreate it as a router target. Otherwise, just set
+ * tg_in_use to to true so that init_router_targets() won't delete it.
*/
- if (tg == NULL)
+ if (tg == NULL || (is_router && !pii->pii_targets_are_routers))
target_create(pii, addr, is_router);
else if (is_router)
tg->tg_in_use = 1;
@@ -2034,16 +2416,17 @@ target_delete(struct target *tg)
* relevant any longer.
*/
assert(pii->pii_targets == NULL);
+ pii->pii_targets_are_routers = _B_FALSE;
clear_pii_probe_stats(pii);
pii_other = phyint_inst_other(pii);
/*
- * If there are no targets on both instances and the interface is
- * online, go back to PI_NOTARGETS state, since we cannot probe this
- * phyint any more. For more details, please see phyint state
- * diagram in mpd_probe.c.
+ * If there are no targets on both instances and the interface would
+ * otherwise be considered PI_RUNNING, go back to PI_NOTARGETS state,
+ * since we cannot probe this phyint any more. For more details,
+ * please see phyint state diagram in mpd_probe.c.
*/
- if (!PROBE_CAPABLE(pii_other) &&
+ if (!PROBE_CAPABLE(pii_other) && LINK_UP(pii->pii_phyint) &&
pii->pii_phyint->pi_state != PI_OFFLINE)
phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
}
@@ -2101,9 +2484,11 @@ reset_pii_probes(struct phyint_instance *pii, struct target *tg)
for (i = 0; i < PROBE_STATS_COUNT; i++) {
if (pii->pii_probes[i].pr_target == tg) {
+ if (pii->pii_probes[i].pr_status == PR_UNACKED) {
+ probe_chstate(&pii->pii_probes[i], pii,
+ PR_LOST);
+ }
pii->pii_probes[i].pr_target = NULL;
- if (pii->pii_probes[i].pr_status == PR_UNACKED)
- pii->pii_probes[i].pr_status = PR_LOST;
}
}
@@ -2132,7 +2517,7 @@ target_print(struct target *tg)
af = tg->tg_phyint_inst->pii_af;
logdebug("Target on %s %s addr %s\n"
- "status %d rtt_sa %d rtt_sd %d crtt %d tg_in_use %d\n",
+ "status %d rtt_sa %lld rtt_sd %lld crtt %d tg_in_use %d\n",
AF_STR(af), tg->tg_phyint_inst->pii_name,
pr_addr(af, tg->tg_address, abuf, sizeof (abuf)),
tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd,
@@ -2158,35 +2543,16 @@ phyint_inst_print_all(void)
}
/*
- * Convert length for a mask to the mask.
- */
-static void
-ip_index_to_mask_v6(uint_t masklen, struct in6_addr *bitmask)
-{
- int j;
-
- assert(masklen <= IPV6_ABITS);
- bzero((char *)bitmask, sizeof (*bitmask));
-
- /* Make the 'masklen' leftmost bits one */
- for (j = 0; masklen > 8; masklen -= 8, j++)
- bitmask->s6_addr[j] = 0xff;
-
- bitmask->s6_addr[j] = 0xff << (8 - masklen);
-
-}
-
-/*
* Compare two prefixes that have the same prefix length.
* Fails if the prefix length is unreasonable.
*/
-static boolean_t
-prefix_equal(struct in6_addr p1, struct in6_addr p2, int prefix_len)
+boolean_t
+prefix_equal(struct in6_addr p1, struct in6_addr p2, uint_t prefix_len)
{
uchar_t mask;
int j;
- if (prefix_len < 0 || prefix_len > IPV6_ABITS)
+ if (prefix_len > IPV6_ABITS)
return (_B_FALSE);
for (j = 0; prefix_len > 8; prefix_len -= 8, j++)
@@ -2202,35 +2568,25 @@ prefix_equal(struct in6_addr p1, struct in6_addr p2, int prefix_len)
}
/*
- * Get the number of UP logints (excluding IFF_NOFAILOVERs), on both
- * IPv4 and IPv6 put together. The phyint with the least such number
- * will be used as the failover destination, if no standby interface is
- * available
+ * Get the number of UP logints on phyint `pi'.
*/
-int
+static int
logint_upcount(struct phyint *pi)
{
struct logint *li;
- struct phyint_instance *pii;
int count = 0;
- pii = pi->pi_v4;
- if (pii != NULL) {
- for (li = pii->pii_logint; li != NULL; li = li->li_next) {
- if ((li->li_flags &
- (IFF_UP | IFF_NOFAILOVER)) == IFF_UP) {
+ if (pi->pi_v4 != NULL) {
+ for (li = pi->pi_v4->pii_logint; li != NULL; li = li->li_next) {
+ if (li->li_flags & IFF_UP)
count++;
- }
}
}
- pii = pi->pi_v6;
- if (pii != NULL) {
- for (li = pii->pii_logint; li != NULL; li = li->li_next) {
- if ((li->li_flags &
- (IFF_UP | IFF_NOFAILOVER)) == IFF_UP) {
+ if (pi->pi_v6 != NULL) {
+ for (li = pi->pi_v6->pii_logint; li != NULL; li = li->li_next) {
+ if (li->li_flags & IFF_UP)
count++;
- }
}
}
@@ -2250,6 +2606,28 @@ phyint_inst_other(struct phyint_instance *pii)
}
/*
+ * Check whether a phyint is functioning.
+ */
+static boolean_t
+phyint_is_functioning(struct phyint *pi)
+{
+ if (pi->pi_state == PI_RUNNING)
+ return (_B_TRUE);
+ return (pi->pi_state == PI_NOTARGETS && !(pi->pi_flags & IFF_FAILED));
+}
+
+/*
+ * Check whether a phyint is usable.
+ */
+static boolean_t
+phyint_is_usable(struct phyint *pi)
+{
+ if (logint_upcount(pi) == 0)
+ return (_B_FALSE);
+ return (phyint_is_functioning(pi));
+}
+
+/*
* Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'.
* Before sending the event, it prepends the current version of the IPMP
* sysevent API. Returns 0 on success, -1 on failure (in either case,
@@ -2258,16 +2636,18 @@ phyint_inst_other(struct phyint_instance *pii)
static int
post_event(const char *subclass, nvlist_t *nvl)
{
- sysevent_id_t eid;
+ static evchan_t *evchp = NULL;
/*
- * Since sysevents don't work yet in non-global zones, there cannot
- * possibly be any consumers yet, so don't bother trying to generate
- * them. (Otherwise, we'll spew warnings.)
+ * Initialize the event channel if we haven't already done so.
*/
- if (getzoneid() != GLOBAL_ZONEID) {
- nvlist_free(nvl);
- return (0);
+ if (evchp == NULL) {
+ errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evchp, EVCH_CREAT);
+ if (errno != 0) {
+ logerr("cannot create event channel `%s': %s\n",
+ IPMP_EVENT_CHAN, strerror(errno));
+ goto failed;
+ }
}
errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION,
@@ -2278,8 +2658,9 @@ post_event(const char *subclass, nvlist_t *nvl)
goto failed;
}
- if (sysevent_post_event(EC_IPMP, (char *)subclass, SUNW_VENDOR,
- "in.mpathd", nvl, &eid) == -1) {
+ errno = sysevent_evc_publish(evchp, EC_IPMP, subclass, "com.sun",
+ "in.mpathd", nvl, EVCH_NOSLEEP);
+ if (errno != 0) {
logerr("cannot send `%s' event: %s\n", subclass,
strerror(errno));
goto failed;
@@ -2300,6 +2681,8 @@ ifstate(struct phyint *pi)
{
switch (pi->pi_state) {
case PI_NOTARGETS:
+ if (pi->pi_flags & IFF_FAILED)
+ return (IPMP_IF_FAILED);
return (IPMP_IF_UNKNOWN);
case PI_OFFLINE:
@@ -2330,12 +2713,203 @@ iftype(struct phyint *pi)
}
/*
+ * Return the external IPMP link state associated with phyint `pi'.
+ */
+static ipmp_if_linkstate_t
+iflinkstate(struct phyint *pi)
+{
+ if (!(pi->pi_notes & (DL_NOTE_LINK_UP|DL_NOTE_LINK_DOWN)))
+ return (IPMP_LINK_UNKNOWN);
+
+ return (LINK_DOWN(pi) ? IPMP_LINK_DOWN : IPMP_LINK_UP);
+}
+
+/*
+ * Return the external IPMP probe state associated with phyint `pi'.
+ */
+static ipmp_if_probestate_t
+ifprobestate(struct phyint *pi)
+{
+ if (!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6))
+ return (IPMP_PROBE_DISABLED);
+
+ if (pi->pi_state == PI_FAILED)
+ return (IPMP_PROBE_FAILED);
+
+ if (!PROBE_CAPABLE(pi->pi_v4) && !PROBE_CAPABLE(pi->pi_v6))
+ return (IPMP_PROBE_UNKNOWN);
+
+ return (IPMP_PROBE_OK);
+}
+
+/*
+ * Return the external IPMP target mode associated with phyint instance `pii'.
+ */
+static ipmp_if_targmode_t
+iftargmode(struct phyint_instance *pii)
+{
+ if (!PROBE_ENABLED(pii))
+ return (IPMP_TARG_DISABLED);
+ else if (pii->pii_targets_are_routers)
+ return (IPMP_TARG_ROUTES);
+ else
+ return (IPMP_TARG_MULTICAST);
+}
+
+/*
+ * Return the external IPMP flags associated with phyint `pi'.
+ */
+static ipmp_if_flags_t
+ifflags(struct phyint *pi)
+{
+ ipmp_if_flags_t flags = 0;
+
+ if (logint_upcount(pi) == 0)
+ flags |= IPMP_IFFLAG_DOWN;
+ if (pi->pi_flags & IFF_INACTIVE)
+ flags |= IPMP_IFFLAG_INACTIVE;
+ if (pi->pi_hwaddrdup)
+ flags |= IPMP_IFFLAG_HWADDRDUP;
+ if (phyint_is_functioning(pi) && flags == 0)
+ flags |= IPMP_IFFLAG_ACTIVE;
+
+ return (flags);
+}
+
+/*
+ * Store the test address used on phyint instance `pii' in `ssp'. If there's
+ * no test address, 0.0.0.0 is stored.
+ */
+static struct sockaddr_storage *
+iftestaddr(struct phyint_instance *pii, struct sockaddr_storage *ssp)
+{
+ if (PROBE_ENABLED(pii))
+ addr2storage(pii->pii_af, &pii->pii_probe_logint->li_addr, ssp);
+ else
+ addr2storage(AF_INET6, &in6addr_any, ssp);
+
+ return (ssp);
+}
+
+/*
* Return the external IPMP group state associated with phyint group `pg'.
*/
static ipmp_group_state_t
groupstate(struct phyint_group *pg)
{
- return (GROUP_FAILED(pg) ? IPMP_GROUP_FAILED : IPMP_GROUP_OK);
+ switch (pg->pg_state) {
+ case PG_FAILED:
+ return (IPMP_GROUP_FAILED);
+ case PG_DEGRADED:
+ return (IPMP_GROUP_DEGRADED);
+ case PG_OK:
+ return (IPMP_GROUP_OK);
+ }
+
+ logerr("groupstate: unknown state %d; aborting\n", pg->pg_state);
+ abort();
+ /* NOTREACHED */
+}
+
+/*
+ * Return the external IPMP probe state associated with probe `ps'.
+ */
+static ipmp_probe_state_t
+probestate(struct probe_stats *ps)
+{
+ switch (ps->pr_status) {
+ case PR_UNUSED:
+ case PR_LOST:
+ return (IPMP_PROBE_LOST);
+ case PR_UNACKED:
+ return (IPMP_PROBE_SENT);
+ case PR_ACKED:
+ return (IPMP_PROBE_ACKED);
+ }
+
+ logerr("probestate: unknown state %d; aborting\n", ps->pr_status);
+ abort();
+ /* NOTREACHED */
+}
+
+/*
+ * Generate an ESC_IPMP_PROBE_STATE sysevent for the probe described by `pr'
+ * on phyint instance `pii'. Returns 0 on success, -1 on failure.
+ */
+int
+probe_state_event(struct probe_stats *pr, struct phyint_instance *pii)
+{
+ nvlist_t *nvl;
+ hrtime_t proc_time = 0, recv_time = 0;
+ struct sockaddr_storage ss;
+ struct target *tg = pr->pr_target;
+
+ errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
+ if (errno != 0) {
+ logperror("cannot create `interface change' event");
+ return (-1);
+ }
+
+ errno = nvlist_add_uint32(nvl, IPMP_PROBE_ID, pr->pr_id);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_string(nvl, IPMP_IF_NAME, pii->pii_phyint->pi_name);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_uint32(nvl, IPMP_PROBE_STATE, probestate(pr));
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_hrtime(nvl, IPMP_PROBE_START_TIME,
+ pr->pr_hrtime_start);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_hrtime(nvl, IPMP_PROBE_SENT_TIME,
+ pr->pr_hrtime_sent);
+ if (errno != 0)
+ goto failed;
+
+ if (pr->pr_status == PR_ACKED) {
+ recv_time = pr->pr_hrtime_ackrecv;
+ proc_time = pr->pr_hrtime_ackproc;
+ }
+
+ errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, recv_time);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, proc_time);
+ if (errno != 0)
+ goto failed;
+
+ if (tg != NULL)
+ addr2storage(pii->pii_af, &tg->tg_address, &ss);
+ else
+ addr2storage(pii->pii_af, &in6addr_any, &ss);
+
+ errno = nvlist_add_byte_array(nvl, IPMP_PROBE_TARGET, (uchar_t *)&ss,
+ sizeof (ss));
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTAVG,
+ tg->tg_rtt_sa / 8);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTDEV,
+ tg->tg_rtt_sd / 4);
+ if (errno != 0)
+ goto failed;
+
+ return (post_event(ESC_IPMP_PROBE_STATE, nvl));
+failed:
+ logperror("cannot create `probe state' event");
+ nvlist_free(nvl);
+ return (-1);
}
/*
@@ -2529,10 +3103,15 @@ gensig(void)
unsigned int
getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp)
{
- struct phyint_group *pg;
struct phyint *pi;
+ struct phyint_group *pg;
char (*ifs)[LIFNAMSIZ];
- unsigned int nif, i;
+ unsigned int i, j;
+ unsigned int nif = 0, naddr = 0;
+ lifgroupinfo_t lifgr;
+ addrlist_t *addrp;
+ struct sockaddr_storage *addrs;
+ int fdt = 0;
pg = phyint_group_lookup(grname);
if (pg == NULL)
@@ -2540,39 +3119,143 @@ getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp)
/*
* Tally up the number of interfaces, allocate an array to hold them,
- * and insert their names into the array.
+ * and insert their names into the array. While we're at it, if any
+ * interface is actually enabled to send probes, save the group fdt.
*/
- for (nif = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext)
+ for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext)
nif++;
ifs = alloca(nif * sizeof (*ifs));
for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) {
assert(i < nif);
(void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ);
+ if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6))
+ fdt = pg->pg_fdt;
}
assert(i == nif);
- *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig,
- groupstate(pg), nif, ifs);
+ /*
+ * If this is the anonymous group, there's no other information to
+ * collect (since there's no IPMP interface).
+ */
+ if (pg == phyint_anongroup) {
+ *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt,
+ groupstate(pg), nif, ifs, "", "", "", "", 0, NULL);
+ return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+ }
+
+ /*
+ * Grab some additional information about the group from the kernel.
+ * (NOTE: since SIOCGLIFGROUPINFO does not look up by interface name,
+ * we can use ifsock_v4 even for a V6-only group.)
+ */
+ (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ);
+ if (ioctl(ifsock_v4, SIOCGLIFGROUPINFO, &lifgr) == -1) {
+ if (errno == ENOENT)
+ return (IPMP_EUNKGROUP);
+
+ logperror("getgroupinfo: SIOCGLIFGROUPINFO");
+ return (IPMP_FAILURE);
+ }
+
+ /*
+ * Tally up the number of data addresses, allocate an array to hold
+ * them, and insert their values into the array.
+ */
+ for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next)
+ naddr++;
+
+ addrs = alloca(naddr * sizeof (*addrs));
+ i = 0;
+ for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) {
+ /*
+ * It's possible to have duplicate addresses (if some are
+ * down). Weed the dups out to avoid confusing consumers.
+ * (If groups start having tons of addresses, we'll need a
+ * better algorithm here.)
+ */
+ for (j = 0; j < i; j++) {
+ if (sockaddrcmp(&addrs[j], &addrp->al_addr))
+ break;
+ }
+ if (j == i) {
+ assert(i < naddr);
+ addrs[i++] = addrp->al_addr;
+ }
+ }
+ naddr = i;
+
+ *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt,
+ groupstate(pg), nif, ifs, lifgr.gi_grifname, lifgr.gi_m4ifname,
+ lifgr.gi_m6ifname, lifgr.gi_bcifname, naddr, addrs);
return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
}
/*
+ * Store the target information associated with phyint instance `pii' into a
+ * dynamically allocated structure pointed to by `*targinfopp'. Returns an
+ * IPMP error code.
+ */
+unsigned int
+gettarginfo(struct phyint_instance *pii, const char *name,
+ ipmp_targinfo_t **targinfopp)
+{
+ uint_t ntarg = 0;
+ struct target *tg;
+ struct sockaddr_storage ss;
+ struct sockaddr_storage *targs = NULL;
+
+ if (PROBE_CAPABLE(pii)) {
+ targs = alloca(pii->pii_ntargets * sizeof (*targs));
+ tg = pii->pii_target_next;
+ do {
+ if (tg->tg_status == TG_ACTIVE) {
+ assert(ntarg < pii->pii_ntargets);
+ addr2storage(pii->pii_af, &tg->tg_address,
+ &targs[ntarg++]);
+ }
+ if ((tg = tg->tg_next) == NULL)
+ tg = pii->pii_targets;
+ } while (tg != pii->pii_target_next);
+
+ assert(ntarg == pii->pii_ntargets);
+ }
+
+ *targinfopp = ipmp_targinfo_create(name, iftestaddr(pii, &ss),
+ iftargmode(pii), ntarg, targs);
+ return (*targinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+}
+
+/*
* Store the information associated with interface `ifname' into a dynamically
* allocated structure pointed to by `*ifinfopp'. Returns an IPMP error code.
*/
unsigned int
getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp)
{
+ int retval;
struct phyint *pi;
+ ipmp_targinfo_t *targinfo4;
+ ipmp_targinfo_t *targinfo6;
pi = phyint_lookup(ifname);
if (pi == NULL)
return (IPMP_EUNKIF);
+ if ((retval = gettarginfo(pi->pi_v4, pi->pi_name, &targinfo4)) != 0 ||
+ (retval = gettarginfo(pi->pi_v6, pi->pi_name, &targinfo6)) != 0)
+ goto out;
+
*ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name,
- ifstate(pi), iftype(pi));
- return (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+ ifstate(pi), iftype(pi), iflinkstate(pi), ifprobestate(pi),
+ ifflags(pi), targinfo4, targinfo6);
+ retval = (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+out:
+ if (targinfo4 != NULL)
+ ipmp_freetarginfo(targinfo4);
+ if (targinfo6 != NULL)
+ ipmp_freetarginfo(targinfo6);
+ return (retval);
}
/*
@@ -2605,6 +3288,54 @@ getgrouplist(ipmp_grouplist_t **grlistpp)
}
/*
+ * Store the address information for `ssp' (in group `grname') into a
+ * dynamically allocated structure pointed to by `*adinfopp'. Returns an IPMP
+ * error code. (We'd call this function getaddrinfo(), but it would conflict
+ * with getaddrinfo(3SOCKET)).
+ */
+unsigned int
+getgraddrinfo(const char *grname, struct sockaddr_storage *ssp,
+ ipmp_addrinfo_t **adinfopp)
+{
+ int ifsock;
+ addrlist_t *addrp, *addrmatchp = NULL;
+ ipmp_addr_state_t state;
+ const char *binding = "";
+ struct lifreq lifr;
+ struct phyint_group *pg;
+
+ if ((pg = phyint_group_lookup(grname)) == NULL)
+ return (IPMP_EUNKADDR);
+
+ /*
+ * Walk through the data addresses, and find a match. Note that since
+ * some of the addresses may be down, more than one may match. We
+ * prefer an up address (if one exists).
+ */
+ for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) {
+ if (sockaddrcmp(ssp, &addrp->al_addr)) {
+ addrmatchp = addrp;
+ if (addrmatchp->al_flags & IFF_UP)
+ break;
+ }
+ }
+
+ if (addrmatchp == NULL)
+ return (IPMP_EUNKADDR);
+
+ state = (addrmatchp->al_flags & IFF_UP) ? IPMP_ADDR_UP : IPMP_ADDR_DOWN;
+ if (state == IPMP_ADDR_UP) {
+ ifsock = (ssp->ss_family == AF_INET) ? ifsock_v4 : ifsock_v6;
+ (void) strlcpy(lifr.lifr_name, addrmatchp->al_name, LIFNAMSIZ);
+ if (ioctl(ifsock, SIOCGLIFBINDING, &lifr) >= 0)
+ binding = lifr.lifr_binding;
+ }
+
+ *adinfopp = ipmp_addrinfo_create(ssp, state, pg->pg_name, binding);
+ return (*adinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+}
+
+/*
* Store a snapshot of the IPMP subsystem into a dynamically allocated
* structure pointed to by `*snapp'. Returns an IPMP error code.
*/
@@ -2613,10 +3344,12 @@ getsnap(ipmp_snap_t **snapp)
{
ipmp_grouplist_t *grlistp;
ipmp_groupinfo_t *grinfop;
+ ipmp_addrinfo_t *adinfop;
+ ipmp_addrlist_t *adlistp;
ipmp_ifinfo_t *ifinfop;
ipmp_snap_t *snap;
struct phyint *pi;
- unsigned int i;
+ unsigned int i, j;
int retval;
snap = ipmp_snap_create();
@@ -2627,26 +3360,37 @@ getsnap(ipmp_snap_t **snapp)
* Add group list.
*/
retval = getgrouplist(&snap->sn_grlistp);
- if (retval != IPMP_SUCCESS) {
- ipmp_snap_free(snap);
- return (retval);
- }
+ if (retval != IPMP_SUCCESS)
+ goto failed;
/*
- * Add information for each group in the list.
+ * Add information for each group in the list, along with all of its
+ * data addresses.
*/
grlistp = snap->sn_grlistp;
for (i = 0; i < grlistp->gl_ngroup; i++) {
retval = getgroupinfo(grlistp->gl_groups[i], &grinfop);
- if (retval != IPMP_SUCCESS) {
- ipmp_snap_free(snap);
- return (retval);
- }
+ if (retval != IPMP_SUCCESS)
+ goto failed;
+
retval = ipmp_snap_addgroupinfo(snap, grinfop);
if (retval != IPMP_SUCCESS) {
ipmp_freegroupinfo(grinfop);
- ipmp_snap_free(snap);
- return (retval);
+ goto failed;
+ }
+
+ adlistp = grinfop->gr_adlistp;
+ for (j = 0; j < adlistp->al_naddr; j++) {
+ retval = getgraddrinfo(grinfop->gr_name,
+ &adlistp->al_addrs[j], &adinfop);
+ if (retval != IPMP_SUCCESS)
+ goto failed;
+
+ retval = ipmp_snap_addaddrinfo(snap, adinfop);
+ if (retval != IPMP_SUCCESS) {
+ ipmp_freeaddrinfo(adinfop);
+ goto failed;
+ }
}
}
@@ -2655,18 +3399,19 @@ getsnap(ipmp_snap_t **snapp)
*/
for (pi = phyints; pi != NULL; pi = pi->pi_next) {
retval = getifinfo(pi->pi_name, &ifinfop);
- if (retval != IPMP_SUCCESS) {
- ipmp_snap_free(snap);
- return (retval);
- }
+ if (retval != IPMP_SUCCESS)
+ goto failed;
+
retval = ipmp_snap_addifinfo(snap, ifinfop);
if (retval != IPMP_SUCCESS) {
ipmp_freeifinfo(ifinfop);
- ipmp_snap_free(snap);
- return (retval);
+ goto failed;
}
}
*snapp = snap;
return (IPMP_SUCCESS);
+failed:
+ ipmp_snap_free(snap);
+ return (retval);
}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h
index e4be3ccb30..39da2c3f1b 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _MPD_TABLES_H
#define _MPD_TABLES_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -47,20 +45,11 @@ extern "C" {
* switch AND
* (ii) share the same phyint group name.
* Load spreading and failover occur across members of the same phyint group.
- * phyint group members must be homogenous. i.e. if a phyint belonging to a
+ * phyint group members must be homogeneous. i.e. if a phyint belonging to a
* phyint group has a IPv6 protocol instance, then all members of the phyint
* group, must have IPv6 protocol instances. (struct phyint_group)
*/
-/*
- * Parameter passed to try_failover(), indicating the type of failover
- * that is requested.
- */
-#define FAILOVER_NORMAL 1 /* Failover to another phyint */
- /* that is preferably a standby */
-#define FAILOVER_TO_NONSTANDBY 2 /* Failover to non-standby phyint */
-#define FAILOVER_TO_ANY 3 /* Failover to any available phyint */
-
#define MAXDEFERREDRTT 1 /* Maximum number of deferred rtts */
/*
@@ -79,15 +68,9 @@ extern "C" {
#define PI_IOCTL_ERROR 4 /* Some ioctl error */
#define PI_GROUP_CHANGED 5 /* The phyint has changed group. */
-/*
- * Though IFF_POINTOPOINT is a logint property, for the purpose of
- * failover, we treat it as a phyint property. Note that we cannot failover
- * individual logints.
- */
#define PHYINT_FLAGS(flags) \
- (((flags) & (IFF_STANDBY | IFF_INACTIVE | IFF_FAILED | IFF_OFFLINE | \
- IFF_POINTOPOINT | IFF_RUNNING)) | (handle_link_notifications ? \
- 0 : IFF_RUNNING))
+ (((flags) & (IFF_STANDBY | IFF_INACTIVE | IFF_FAILED | IFF_OFFLINE | \
+ IFF_RUNNING)) | (handle_link_notifications ? 0 : IFF_RUNNING))
/* A Phyint can have up to 2 instances, the IPv4 and the IPv6 instance */
#define PHYINT_INSTANCE(pi, af) \
@@ -152,29 +135,32 @@ extern "C" {
* Phyint group states; see below for the phyint group definition.
*/
enum pg_state {
- PG_RUNNING = 1, /* at least one interface in group is working */
- PG_FAILED = 2 /* group has failed completely */
+ PG_OK = 1, /* all interfaces in the group are working */
+ PG_DEGRADED, /* some interfaces in the group are unusable */
+ PG_FAILED /* all interfaces in the group are unusable */
};
/*
* Convenience macro to check if the whole group has failed.
*/
-#define GROUP_FAILED(pg) ((pg)->pg_groupfailed)
+#define GROUP_FAILED(pg) ((pg)->pg_state == PG_FAILED)
/*
* A doubly linked list of all phyint groups in the system.
* A phyint group is identified by its group name.
*/
struct phyint_group {
- char pg_name[LIFNAMSIZ + 1]; /* Phyint group name */
+ char pg_name[LIFGRNAMSIZ]; /* Phyint group name */
struct phyint *pg_phyint; /* List of phyints in this group */
struct phyint_group *pg_next; /* Next phyint group */
struct phyint_group *pg_prev; /* Prev phyint group */
- uint64_t pg_sig; /* Current signature of this group */
- int pg_probeint; /* Interval between probes */
- int pg_fdt; /* Time needed to detect failure */
- uint_t
- pg_groupfailed : 1; /* The whole group has failed */
+ uint64_t pg_sig; /* Current signature of this group */
+ int pg_probeint; /* Interval between probes */
+ int pg_fdt; /* Time needed to detect failure */
+ enum pg_state pg_state; /* Current group state */
+ boolean_t pg_in_use; /* To detect removed groups */
+ struct addrlist *pg_addrs; /* Data addresses in this group */
+ boolean_t pg_failmsg_printed; /* Group failure msg printed */
};
/*
@@ -207,6 +193,11 @@ struct phyint {
uint16_t pi_icmpid; /* icmp id in icmp echo request */
uint64_t pi_taddrthresh; /* time (in secs) to delay logging */
/* about missing test addresses */
+ dlpi_handle_t pi_dh; /* DLPI handle to underlying link */
+ uint_t pi_notes; /* enabled DLPI notifications */
+ uchar_t pi_hwaddr[DLPI_PHYSADDR_MAX]; /* phyint's hw address */
+ size_t pi_hwaddrlen; /* phyint's hw address length */
+
/*
* The pi_whenup array is a circular buffer of the most recent
* times (in milliseconds since some arbitrary point of time in
@@ -217,14 +208,12 @@ struct phyint {
unsigned int pi_whendx;
uint_t
- pi_empty : 1, /* failover done, empty */
- pi_full : 1, /* failback done, full */
- /* More details in probe.c */
pi_taddrmsg_printed : 1, /* testaddr msg printed */
pi_duptaddrmsg_printed : 1, /* dup testaddr msg printed */
pi_cfgmsg_printed : 1, /* bad config msg printed */
pi_lfmsg_printed : 1, /* link-flapping msg printed */
- pi_link_state : 1; /* interface link state */
+ pi_link_state : 1, /* interface link state */
+ pi_hwaddrdup : 1; /* disabled due to dup hw address */
};
/*
@@ -260,19 +249,19 @@ struct phyint_instance {
uint64_t pii_flags; /* Phyint flags from kernel */
struct probe_stats {
- struct target *pr_target; /* Probe Target */
- uint_t pr_time_sent; /* Time probe was sent */
+ uint_t pr_id; /* Full ID of probe */
+ struct target *pr_target; /* Probe Target */
+ uint_t pr_time_lost; /* Time probe declared lost */
+ struct timeval pr_tv_sent; /* Wall time probe was sent */
+ hrtime_t pr_hrtime_start; /* hrtime probe op started */
+ hrtime_t pr_hrtime_sent; /* hrtime probe was sent */
+ hrtime_t pr_hrtime_ackrecv; /* hrtime probe ack received */
+ hrtime_t pr_hrtime_ackproc; /* hrtime probe ack processed */
uint_t pr_status; /* probe status as below */
#define PR_UNUSED 0 /* Probe slot unused */
#define PR_UNACKED 1 /* Probe is unacknowledged */
#define PR_ACKED 2 /* Probe has been acknowledged */
#define PR_LOST 3 /* Probe is declared lost */
- union {
- uint_t tl; /* time probe is declared lost */
- uint_t ta; /* time probe is acked */
- } prt;
-#define pr_time_lost prt.tl
-#define pr_time_acked prt.ta
} pii_probes[PROBE_STATS_COUNT];
uint_t
@@ -319,7 +308,6 @@ struct logint {
struct in6_addr li_subnet; /* prefix / subnet */
uint_t li_subnet_len; /* prefix / subnet length */
uint64_t li_flags; /* IFF_* flags */
- uint_t li_oifindex; /* original ifindex (SIOCGLIFOINDEX) */
uint_t
li_in_use : 1, /* flag to detect deleted logints */
li_dupaddr : 1; /* test address is not unique */
@@ -345,12 +333,12 @@ struct target {
#define TG_DEAD 4 /* Target is not responding */
hrtime_t tg_latime; /* Target's last active time */
- int tg_rtt_sa; /* Scaled round trip time(RTT) avg. */
- int tg_rtt_sd; /* Scaled RTT deviation */
- int tg_crtt; /* Conservative RTT = A + 4D */
+ int64_t tg_rtt_sa; /* Scaled RTT average (in ns) */
+ int64_t tg_rtt_sd; /* Scaled RTT deviation (in ns) */
+ int tg_crtt; /* Conservative RTT = A + 4D (in ms) */
uint32_t
tg_in_use : 1; /* In use flag */
- int tg_deferred[MAXDEFERREDRTT + 1];
+ int64_t tg_deferred[MAXDEFERREDRTT + 1];
/* Deferred rtt data points */
int tg_num_deferred;
/* Number of deferred rtt data points */
@@ -393,19 +381,20 @@ struct probe_success_count
struct probes_missed
{
uint_t pm_nprobes; /* Cumulative number of missed probes */
- uint_t pm_ntimes; /* Total number of occassions */
+ uint_t pm_ntimes; /* Total number of occasions */
};
-struct local_addr
-{
- struct in6_addr addr;
- struct local_addr *next;
-};
+typedef struct addrlist {
+ struct addrlist *al_next; /* next address */
+ char al_name[LIFNAMSIZ]; /* address lif name */
+ uint64_t al_flags; /* address flags */
+ struct sockaddr_storage al_addr; /* address */
+} addrlist_t;
/*
* Globals
*/
-extern struct local_addr *laddr_list;
+extern addrlist_t *localaddrs;
/* List of all local addresses, including local zones */
extern struct phyint *phyints; /* List of all phyints */
extern struct phyint_group *phyint_groups; /* List of all phyint groups */
@@ -428,10 +417,19 @@ extern void phyint_inst_delete(struct phyint_instance *pii);
extern uint_t phyint_inst_timer(struct phyint_instance *pii);
extern boolean_t phyint_inst_sockinit(struct phyint_instance *pii);
-extern void phyint_newtype(struct phyint *pi);
+extern void phyint_changed(struct phyint *pi);
extern void phyint_chstate(struct phyint *pi, enum pi_state state);
extern void phyint_group_chstate(struct phyint_group *pg, enum pg_state state);
+extern struct phyint_group *phyint_group_create(const char *pg_name);
+extern struct phyint_group *phyint_group_lookup(const char *pg_name);
+extern void phyint_group_insert(struct phyint_group *pg);
+extern void phyint_group_delete(struct phyint_group *pg);
+extern void phyint_group_refresh_state(struct phyint_group *pg);
extern void phyint_check_for_repair(struct phyint *pi);
+extern void phyint_transition_to_running(struct phyint *pi);
+extern void phyint_activate_another(struct phyint *pi);
+extern int phyint_offline(struct phyint *pi, unsigned int);
+extern int phyint_undo_offline(struct phyint *pi);
extern void logint_init_from_k(struct phyint_instance *pii, char *li_name);
extern void logint_delete(struct logint *li);
@@ -448,34 +446,40 @@ extern void target_add(struct phyint_instance *pii, struct in6_addr addr,
extern void in_data(struct phyint_instance *pii);
extern void in6_data(struct phyint_instance *pii);
-extern int try_failover(struct phyint *pi, int failover_type);
-extern int try_failback(struct phyint *pi);
-extern int do_failback(struct phyint *pi);
-extern boolean_t change_lif_flags(struct phyint *pi, uint64_t flags,
- boolean_t setfl);
-
extern void logperror_pii(struct phyint_instance *pii, const char *str);
extern void logperror_li(struct logint *li, const char *str);
extern char *pr_addr(int af, struct in6_addr addr, char *abuf, int len);
+extern void addr2storage(int af, const struct in6_addr *addr,
+ struct sockaddr_storage *ssp);
extern void phyint_inst_print_all(void);
+extern boolean_t prefix_equal(struct in6_addr, struct in6_addr, uint_t);
-extern int logint_upcount(struct phyint *pi);
-extern void restore_phyint(struct phyint *pi);
extern void reset_crtt_all(struct phyint *pi);
extern int failure_state(struct phyint_instance *pii);
extern void process_link_state_changes(void);
extern void clear_pii_probe_stats(struct phyint_instance *pii);
extern void start_timer(struct phyint_instance *pii);
+extern void stop_probing(struct phyint *pi);
extern boolean_t own_address(struct in6_addr addr);
+extern boolean_t change_pif_flags(struct phyint *pi, uint64_t set,
+ uint64_t clear);
extern void close_probe_socket(struct phyint_instance *pii, boolean_t flag);
+extern int probe_state_event(struct probe_stats *, struct phyint_instance *);
+extern void probe_chstate(struct probe_stats *, struct phyint_instance *, int);
+extern unsigned int getgraddrinfo(const char *, struct sockaddr_storage *,
+ ipmp_addrinfo_t **);
extern unsigned int getifinfo(const char *, ipmp_ifinfo_t **);
extern unsigned int getgroupinfo(const char *, ipmp_groupinfo_t **);
extern unsigned int getgrouplist(ipmp_grouplist_t **);
extern unsigned int getsnap(ipmp_snap_t **);
+extern boolean_t addrlist_add(addrlist_t **, const char *, uint64_t,
+ struct sockaddr_storage *);
+extern void addrlist_free(addrlist_t **);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c
index 27716cabce..703ddcfaad 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c
@@ -17,14 +17,11 @@
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ *
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "defs.h"
#include "tables.h"
#include <fcntl.h>
@@ -122,7 +119,7 @@ sendpacket(struct sockaddr_in6 *sin6, int sock, int size, int flags)
char abuf[INET6_ADDRSTRLEN];
cc = sendto(sock, (char *)packet, size, flags,
- (struct sockaddr *)sin6, sizeof (*sin6));
+ (struct sockaddr *)sin6, sizeof (*sin6));
if (cc < 0 || cc != size) {
if (cc < 0) {
logperror("sendpacket: sendto");
@@ -135,6 +132,32 @@ sendpacket(struct sockaddr_in6 *sin6, int sock, int size, int flags)
}
}
+/*
+ * If possible, place an ND_OPT_SOURCE_LINKADDR option at `optp'.
+ * Return the number of bytes placed in the option.
+ */
+static uint_t
+add_opt_lla(struct phyint *pi, struct nd_opt_lla *optp)
+{
+ uint_t optlen;
+ uint_t hwaddrlen;
+ struct lifreq lifr;
+
+ /* If this phyint doesn't have a link-layer address, bail */
+ if (phyint_get_lla(pi, &lifr) == -1)
+ return (0);
+
+ hwaddrlen = lifr.lifr_nd.lnr_hdw_len;
+ /* roundup to multiple of 8 and make padding zero */
+ optlen = ((sizeof (struct nd_opt_hdr) + hwaddrlen + 7) / 8) * 8;
+ bzero(optp, optlen);
+ optp->nd_opt_lla_type = ND_OPT_SOURCE_LINKADDR;
+ optp->nd_opt_lla_len = optlen / 8;
+ bcopy(lifr.lifr_nd.lnr_hdw_addr, optp->nd_opt_lla_hdw_addr, hwaddrlen);
+
+ return (optlen);
+}
+
/* Send a Router Solicitation */
static void
solicit(struct sockaddr_in6 *sin6, struct phyint *pi)
@@ -151,24 +174,8 @@ solicit(struct sockaddr_in6 *sin6, struct phyint *pi)
packetlen += sizeof (*rs);
pptr += sizeof (*rs);
- /* Attach any options */
- if (pi->pi_hdw_addr_len != 0) {
- struct nd_opt_lla *lo = (struct nd_opt_lla *)pptr;
- int optlen;
-
- /* roundup to multiple of 8 and make padding zero */
- optlen = ((sizeof (struct nd_opt_hdr) +
- pi->pi_hdw_addr_len + 7) / 8) * 8;
- bzero(pptr, optlen);
-
- lo->nd_opt_lla_type = ND_OPT_SOURCE_LINKADDR;
- lo->nd_opt_lla_len = optlen / 8;
- bcopy((char *)pi->pi_hdw_addr,
- (char *)lo->nd_opt_lla_hdw_addr,
- pi->pi_hdw_addr_len);
- packetlen += optlen;
- pptr += optlen;
- }
+ /* add options */
+ packetlen += add_opt_lla(pi, (struct nd_opt_lla *)pptr);
if (debug & D_PKTOUT) {
print_route_sol("Sending solicitation to ", pi, rs, packetlen,
@@ -224,24 +231,9 @@ advertise(struct sockaddr_in6 *sin6, struct phyint *pi, boolean_t no_prefixes)
return;
}
- /* Attach any options */
- if (pi->pi_hdw_addr_len != 0) {
- struct nd_opt_lla *lo = (struct nd_opt_lla *)pptr;
- int optlen;
-
- /* roundup to multiple of 8 and make padding zero */
- optlen = ((sizeof (struct nd_opt_hdr) +
- pi->pi_hdw_addr_len + 7) / 8) * 8;
- bzero(pptr, optlen);
-
- lo->nd_opt_lla_type = ND_OPT_SOURCE_LINKADDR;
- lo->nd_opt_lla_len = optlen / 8;
- bcopy((char *)pi->pi_hdw_addr,
- (char *)lo->nd_opt_lla_hdw_addr,
- pi->pi_hdw_addr_len);
- packetlen += optlen;
- pptr += optlen;
- }
+ /* add options */
+ packetlen += add_opt_lla(pi, (struct nd_opt_lla *)pptr);
+ pptr = (char *)packet + packetlen;
if (pi->pi_AdvLinkMTU != 0) {
struct nd_opt_mtu *mo = (struct nd_opt_mtu *)pptr;
@@ -1671,10 +1663,10 @@ process_rtsock(int rtsock)
return;
}
- if (ifm->ifm_flags != pi->pi_flags) {
+ if (ifm->ifm_flags != (uint_t)pi->pi_flags) {
if (debug & D_IFSCAN) {
logmsg(LOG_DEBUG, "process_rtsock: clr for "
- "%s old flags 0x%x new flags 0x%x\n",
+ "%s old flags 0x%llx new flags 0x%x\n",
pi->pi_name, pi->pi_flags, ifm->ifm_flags);
}
}
@@ -1825,141 +1817,67 @@ process_mibsock(int mibsock)
}
/*
- * Check whether the address formed by pr->pr_prefix and pi_token
- * exists in the kernel. Cannot call SIOCTMYADDR/ONLINK as it
- * does not check for down addresses. This function should not
- * be called for onlink prefixes.
- */
-static boolean_t
-is_address_present(struct phyint *pi, struct prefix *pr, uint64_t flags)
-{
- int s;
- in6_addr_t addr, *token;
- int i;
- int ret;
- struct sockaddr_in6 sin6;
-
- s = socket(AF_INET6, SOCK_DGRAM, 0);
- if (s < 0) {
- logperror("is_address_present: socket");
- /*
- * By returning B_TRUE, we make the caller delete
- * the prefix from the internal table. In the worst
- * case the next RA will create the prefix.
- */
- return (_B_TRUE);
- }
- if (flags & IFF_TEMPORARY)
- token = &pi->pi_tmp_token;
- else
- token = &pi->pi_token;
- for (i = 0; i < 16; i++) {
- /*
- * prefix_create ensures that pr_prefix has all-zero
- * bits after prefixlen.
- */
- addr.s6_addr[i] = pr->pr_prefix.s6_addr[i] | token->s6_addr[i];
- }
- (void) memset(&sin6, 0, sizeof (struct sockaddr_in6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_addr = addr;
- ret = bind(s, (struct sockaddr *)&sin6, sizeof (struct sockaddr_in6));
- (void) close(s);
- if (ret < 0 && errno == EADDRNOTAVAIL)
- return (_B_FALSE);
- else
- return (_B_TRUE);
-}
-
-/*
* Look if the phyint or one of its prefixes have been removed from
* the kernel and take appropriate action.
- * Uses {pi,pr}_in_use.
+ * Uses pr_in_use and pi{,_kernel}_state.
*/
static void
check_if_removed(struct phyint *pi)
{
- struct prefix *pr;
- struct prefix *next_pr;
+ struct prefix *pr, *next_pr;
/*
- * Detect phyints that have been removed from the kernel.
- * Since we can't recreate it here (would require ifconfig plumb
- * logic) we just terminate use of that phyint.
- */
- if (!(pi->pi_kernel_state & PI_PRESENT) &&
- (pi->pi_state & PI_PRESENT)) {
- logmsg(LOG_ERR, "Interface %s has been removed from kernel. "
- "in.ndpd will no longer use it\n", pi->pi_name);
- /*
- * Clear state so that should the phyint reappear
- * we will start with initial advertisements or
- * solicitations.
- */
- phyint_cleanup(pi);
- }
- /*
* Detect prefixes which are removed.
- *
- * We remove the prefix in all of the following cases :
- *
- * 1) Static prefixes are not the ones we create. So,
- * just remove it from our tables.
- *
- * 2) On-link prefixes potentially move to a different
- * phyint during failover. As it does not have
- * an address, we can't use the logic in is_address_present
- * to detect whether it is present in the kernel or not.
- * Thus when it is manually removed we don't recreate it.
- *
- * 3) If there is a token mis-match and this prefix is not
- * in the kernel, it means we don't need this prefix on
- * this interface anymore. It must have been moved to a
- * different interface by in.mpathd. This normally
- * happens after a failover followed by a failback (or
- * another failover) and we re-read the network
- * configuration. For the failover from A to B, we would
- * have created state on B about A's address, which will
- * not be in use after the subsequent failback. So, we
- * remove that prefix here.
- *
- * 4) If the physical interface is not present, then remove
- * the prefix. In the cases where we are advertising
- * prefixes, the state is kept in advertisement prefix and
- * hence we can delete the prefix.
- *
- * 5) Similar to case (3), when we failover from A to B, the
- * prefix in A will not be in use as it has been moved to B.
- * We will delete it from our tables and recreate it when
- * it fails back. is_address_present makes sure that the
- * address is still valid in kernel.
- *
- * If none of the above is true, we recreate the prefix as it
- * has been manually removed. We do it only when the interface
- * is not FAILED or INACTIVE or OFFLINE.
+ * Static prefixes are just removed from our tables.
+ * Non-static prefixes are recreated i.e. in.ndpd takes precedence
+ * over manually removing prefixes via ifconfig.
*/
for (pr = pi->pi_prefix_list; pr != NULL; pr = next_pr) {
next_pr = pr->pr_next;
if (!pr->pr_in_use) {
- /* Clear PR_AUTO and PR_ONLINK */
+ /* Clear everything except PR_STATIC */
pr->pr_kernel_state &= PR_STATIC;
- if ((pr->pr_state & PR_STATIC) ||
- !(pr->pr_state & PR_AUTO) ||
- !(prefix_token_match(pi, pr, pr->pr_flags)) ||
- (!(pi->pi_kernel_state & PI_PRESENT)) ||
- (is_address_present(pi, pr, pr->pr_flags))) {
+ pr->pr_name[0] = '\0';
+ if (pr->pr_state & PR_STATIC) {
prefix_delete(pr);
- } else if (!(pi->pi_flags &
- (IFF_FAILED|IFF_INACTIVE|IFF_OFFLINE)) &&
- pr->pr_state != pr->pr_kernel_state) {
- pr->pr_name[0] = '\0';
+ } else if (!(pi->pi_kernel_state & PI_PRESENT)) {
+ /*
+ * Ensure that there are no future attempts to
+ * run prefix_update_k since the phyint is gone.
+ */
+ pr->pr_state = pr->pr_kernel_state;
+ } else if (pr->pr_state != pr->pr_kernel_state) {
logmsg(LOG_INFO, "Prefix manually removed "
- "on %s - recreating it!\n",
- pi->pi_name);
+ "on %s; recreating\n", pi->pi_name);
prefix_update_k(pr);
}
}
}
+
+ /*
+ * Detect phyints that have been removed from the kernel, and tear
+ * down any prefixes we created that are associated with that phyint.
+ * (NOTE: IPMP depends on in.ndpd tearing down these prefixes so an
+ * administrator can easily place an IP interface with ADDRCONF'd
+ * addresses into an IPMP group.)
+ */
+ if (!(pi->pi_kernel_state & PI_PRESENT) &&
+ (pi->pi_state & PI_PRESENT)) {
+ logmsg(LOG_ERR, "Interface %s has been removed from kernel. "
+ "in.ndpd will no longer use it\n", pi->pi_name);
+
+ for (pr = pi->pi_prefix_list; pr != NULL; pr = next_pr) {
+ next_pr = pr->pr_next;
+ if (pr->pr_state & PR_AUTO)
+ prefix_delete(pr);
+ }
+
+ /*
+ * Clear state so that should the phyint reappear we will
+ * start with initial advertisements or solicitations.
+ */
+ phyint_cleanup(pi);
+ }
}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c
index 5d64a9303d..0a9e1e6a13 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -383,29 +383,12 @@ incoming_ra(struct phyint *pi, struct nd_router_advert *ra, int len,
if (no_loopback && loopback)
return;
- /*
- * If the interface is FAILED or INACTIVE or OFFLINE, don't
- * create any addresses on them. in.mpathd assumes that no new
- * addresses will appear on these. This implies that we
- * won't create any new prefixes advertised by the router
- * on FAILED/INACTIVE/OFFLINE interfaces. When the state changes,
- * the next RA will create the prefix on this interface.
- */
- if (pi->pi_flags & (IFF_FAILED|IFF_INACTIVE|IFF_OFFLINE))
- return;
+ bzero(&lifr, sizeof (lifr));
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
- if (ioctl(pi->pi_sock, SIOCGLIFLNKINFO, (char *)&lifr) < 0) {
- if (errno == ENXIO)
- return;
- logperror_pi(pi, "incoming_ra: SIOCGLIFLNKINFO");
- return;
- }
if (ra->nd_ra_curhoplimit != CURHOP_UNSPECIFIED &&
ra->nd_ra_curhoplimit != pi->pi_CurHopLimit) {
pi->pi_CurHopLimit = ra->nd_ra_curhoplimit;
-
lifr.lifr_ifinfo.lir_maxhops = pi->pi_CurHopLimit;
set_needed = _B_TRUE;
}
@@ -460,7 +443,7 @@ incoming_ra(struct phyint *pi, struct nd_router_advert *ra, int len,
logmsg(LOG_DEBUG,
"incoming_ra: trigger dhcp %s on %s\n",
(ra->nd_ra_flags_reserved & ~pi->pi_ra_flags &
- ND_RA_FLAG_MANAGED) ? "MANAGED" : "OTHER",
+ ND_RA_FLAG_MANAGED) ? "MANAGED" : "OTHER",
pi->pi_name);
}
pi->pi_ra_flags |= ra->nd_ra_flags_reserved;
@@ -999,11 +982,9 @@ incoming_prefix_addrconf_process(struct phyint *pi, struct prefix *pr,
* Delete this prefix structure as kernel
* does not allow duplicated addresses
*/
-
logmsg(LOG_ERR, "incoming_prefix_addrconf_process: "
- "Duplicate prefix %s received on interface %s\n",
- inet_ntop(AF_INET6,
- (void *)&po->nd_opt_pi_prefix, abuf,
+ "Duplicate prefix %s received on interface %s\n",
+ inet_ntop(AF_INET6, &po->nd_opt_pi_prefix, abuf,
sizeof (abuf)), pi->pi_name);
logmsg(LOG_ERR, "incoming_prefix_addrconf_process: "
"Prefix already exists in interface %s\n",
@@ -1129,12 +1110,8 @@ incoming_mtu_opt(struct phyint *pi, uchar_t *opt,
}
pi->pi_LinkMTU = mtu;
- (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
- if (ioctl(pi->pi_sock, SIOCGLIFLNKINFO, (char *)&lifr) < 0) {
- logperror_pi(pi, "incoming_mtu_opt: SIOCGLIFLNKINFO");
- return;
- }
+ bzero(&lifr, sizeof (lifr));
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
lifr.lifr_ifinfo.lir_maxmtu = pi->pi_LinkMTU;
if (ioctl(pi->pi_sock, SIOCSLIFLNKINFO, (char *)&lifr) < 0) {
logperror_pi(pi, "incoming_mtu_opt: SIOCSLIFLNKINFO");
@@ -1155,33 +1132,33 @@ incoming_lla_opt(struct phyint *pi, uchar_t *opt,
struct sockaddr_in6 *sin6;
int max_content_len;
- if (pi->pi_hdw_addr_len == 0)
+ /*
+ * Get our link-layer address length. We may not have one, in which
+ * case we can just bail.
+ */
+ if (phyint_get_lla(pi, &lifr) != 0)
return;
/*
* Can't remove padding since it is link type specific.
- * However, we check against the length of our link-layer
- * address.
- * Note: assumes that all links have a fixed lengh address.
+ * However, we check against the length of our link-layer address.
+ * Note: assumes that all links have a fixed length address.
*/
max_content_len = lo->nd_opt_lla_len * 8 - sizeof (struct nd_opt_hdr);
- if (max_content_len < pi->pi_hdw_addr_len ||
+ if (max_content_len < lifr.lifr_nd.lnr_hdw_len ||
(max_content_len >= 8 &&
- max_content_len - 7 > pi->pi_hdw_addr_len)) {
+ max_content_len - 7 > lifr.lifr_nd.lnr_hdw_len)) {
char abuf[INET6_ADDRSTRLEN];
(void) inet_ntop(AF_INET6, (void *)&from->sin6_addr,
abuf, sizeof (abuf));
logmsg(LOG_INFO, "lla option from %s on %s too long with bad "
- "physaddr length (%d vs. %d bytes)\n",
- abuf, pi->pi_name,
- max_content_len, pi->pi_hdw_addr_len);
+ "physaddr length (%d vs. %d bytes)\n", abuf, pi->pi_name,
+ max_content_len, lifr.lifr_nd.lnr_hdw_len);
return;
}
- lifr.lifr_nd.lnr_hdw_len = pi->pi_hdw_addr_len;
- bcopy((char *)lo->nd_opt_lla_hdw_addr,
- (char *)lifr.lifr_nd.lnr_hdw_addr,
+ bcopy(lo->nd_opt_lla_hdw_addr, lifr.lifr_nd.lnr_hdw_addr,
lifr.lifr_nd.lnr_hdw_len);
sin6 = (struct sockaddr_in6 *)&lifr.lifr_nd.lnr_addr;
@@ -1196,8 +1173,7 @@ incoming_lla_opt(struct phyint *pi, uchar_t *opt,
lifr.lifr_nd.lnr_state_same_lla = ND_UNCHANGED;
lifr.lifr_nd.lnr_state_diff_lla = ND_STALE;
lifr.lifr_nd.lnr_flags = isrouter;
- (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
if (ioctl(pi->pi_sock, SIOCLIFSETND, (char *)&lifr) < 0) {
logperror_pi(pi, "incoming_lla_opt: SIOCLIFSETND");
return;
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c
index c8fc6381b7..09e6137965 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "defs.h"
#include "tables.h"
@@ -171,6 +169,7 @@ phyint_init_from_k(struct phyint *pi)
struct ipv6_mreq v6mcastr;
struct lifreq lifr;
int fd;
+ int save_errno;
boolean_t newsock;
uint_t ttl;
struct sockaddr_in6 *sin6;
@@ -297,30 +296,6 @@ start_over:
pi->pi_dst_token = in6addr_any;
}
- /* Get link-layer address */
- if (!(pi->pi_flags & IFF_MULTICAST) ||
- (pi->pi_flags & IFF_POINTOPOINT)) {
- pi->pi_hdw_addr_len = 0;
- } else {
- sin6 = (struct sockaddr_in6 *)&lifr.lifr_nd.lnr_addr;
- bzero(sin6, sizeof (struct sockaddr_in6));
- sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = pi->pi_ifaddr;
-
- if (ioctl(fd, SIOCLIFGETND, (char *)&lifr) < 0) {
- logperror_pi(pi, "phyint_init_from_k: SIOCLIFGETND");
- goto error;
- }
-
- pi->pi_hdw_addr_len = lifr.lifr_nd.lnr_hdw_len;
-
- if (lifr.lifr_nd.lnr_hdw_len != 0) {
- bcopy((char *)lifr.lifr_nd.lnr_hdw_addr,
- (char *)pi->pi_hdw_addr,
- lifr.lifr_nd.lnr_hdw_len);
- }
- }
-
if (newsock) {
icmp6_filter_t filter;
int on = 1;
@@ -360,8 +335,21 @@ start_over:
v6mcastr.ipv6mr_interface = pi->pi_index;
if (setsockopt(fd, IPPROTO_IPV6, IPV6_JOIN_GROUP,
(char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- logperror_pi(pi, "phyint_init_from_k: "
- "setsockopt IPV6_JOIN_GROUP");
+ /*
+ * One benign reason IPV6_JOIN_GROUP could fail is
+ * when `pi' has been placed into an IPMP group and we
+ * haven't yet processed the routing socket message
+ * informing us of its disappearance. As such, if
+ * it's now in a group, don't print an error.
+ */
+ save_errno = errno;
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, LIFNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPNAME, &lifr) == -1 ||
+ lifr.lifr_groupname[0] == '\0') {
+ errno = save_errno;
+ logperror_pi(pi, "phyint_init_from_k: "
+ "setsockopt IPV6_JOIN_GROUP");
+ }
goto error;
}
pi->pi_state |= PI_JOINED_ALLNODES;
@@ -403,8 +391,17 @@ start_over:
v6mcastr.ipv6mr_interface = pi->pi_index;
if (setsockopt(fd, IPPROTO_IPV6, IPV6_JOIN_GROUP,
(char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- logperror_pi(pi, "phyint_init_from_k: setsockopt "
- "IPV6_JOIN_GROUP");
+ /*
+ * See IPV6_JOIN_GROUP comment above.
+ */
+ save_errno = errno;
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, LIFNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPNAME, &lifr) == -1 ||
+ lifr.lifr_groupname[0] == '\0') {
+ errno = save_errno;
+ logperror_pi(pi, "phyint_init_from_k: "
+ "setsockopt IPV6_JOIN_GROUP");
+ }
goto error;
}
pi->pi_state |= PI_JOINED_ALLROUTERS;
@@ -569,22 +566,16 @@ phyint_print(struct phyint *pi)
struct adv_prefix *adv_pr;
struct router *dr;
char abuf[INET6_ADDRSTRLEN];
- char llabuf[BUFSIZ];
logmsg(LOG_DEBUG, "Phyint %s index %d state %x, kernel %x, "
"num routers %d\n",
pi->pi_name, pi->pi_index, pi->pi_state, pi->pi_kernel_state,
pi->pi_num_k_routers);
- logmsg(LOG_DEBUG, "\taddress: %s flags %x\n",
+ logmsg(LOG_DEBUG, "\taddress: %s flags %llx\n",
inet_ntop(AF_INET6, (void *)&pi->pi_ifaddr,
abuf, sizeof (abuf)), pi->pi_flags);
- logmsg(LOG_DEBUG, "\tsock %d mtu %d hdw_addr len %d <%s>\n",
- pi->pi_sock, pi->pi_mtu, pi->pi_hdw_addr_len,
- ((pi->pi_hdw_addr_len != 0) ?
- fmt_lla(llabuf, sizeof (llabuf), pi->pi_hdw_addr,
- pi->pi_hdw_addr_len) : "none"));
- logmsg(LOG_DEBUG, "\ttoken: len %d %s\n",
- pi->pi_token_length,
+ logmsg(LOG_DEBUG, "\tsock %d mtu %d\n", pi->pi_sock, pi->pi_mtu);
+ logmsg(LOG_DEBUG, "\ttoken: len %d %s\n", pi->pi_token_length,
inet_ntop(AF_INET6, (void *)&pi->pi_token,
abuf, sizeof (abuf)));
if (pi->pi_TmpAddrsEnabled) {
@@ -632,6 +623,43 @@ phyint_print(struct phyint *pi)
logmsg(LOG_DEBUG, "\n");
}
+
+/*
+ * Store the LLA for the phyint `pi' `lifrp'. Returns 0 on success, or
+ * -1 on failure.
+ *
+ * Note that we do not cache the hardware address since there's no reliable
+ * mechanism to determine when it's become stale.
+ */
+int
+phyint_get_lla(struct phyint *pi, struct lifreq *lifrp)
+{
+ struct sockaddr_in6 *sin6;
+
+ /* If this phyint doesn't have a link-layer address, bail */
+ if (!(pi->pi_flags & IFF_MULTICAST) ||
+ (pi->pi_flags & IFF_POINTOPOINT)) {
+ return (-1);
+ }
+
+ (void) strlcpy(lifrp->lifr_name, pi->pi_name, LIFNAMSIZ);
+ sin6 = (struct sockaddr_in6 *)&(lifrp->lifr_nd.lnr_addr);
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_addr = pi->pi_ifaddr;
+ if (ioctl(pi->pi_sock, SIOCLIFGETND, lifrp) < 0) {
+ /*
+ * For IPMP interfaces, don't report ESRCH errors since that
+ * merely indicates that there are no active interfaces in the
+ * IPMP group (and thus there's no working hardware address),
+ * and the packet will thus never make it out anyway.
+ */
+ if (!(pi->pi_flags & IFF_IPMP) || errno != ESRCH)
+ logperror_pi(pi, "phyint_get_lla: SIOCLIFGETND");
+ return (-1);
+ }
+ return (0);
+}
+
/*
* Randomize pi->pi_ReachableTime.
* Done periodically when there are no RAs and at a maximum frequency when
@@ -642,20 +670,14 @@ phyint_print(struct phyint *pi)
void
phyint_reach_random(struct phyint *pi, boolean_t set_needed)
{
+ struct lifreq lifr;
+
pi->pi_ReachableTime = GET_RANDOM(
(int)(ND_MIN_RANDOM_FACTOR * pi->pi_BaseReachableTime),
(int)(ND_MAX_RANDOM_FACTOR * pi->pi_BaseReachableTime));
if (set_needed) {
- struct lifreq lifr;
-
- (void) strncpy(lifr.lifr_name, pi->pi_name,
- sizeof (lifr.lifr_name));
- pi->pi_name[sizeof (pi->pi_name) - 1] = '\0';
- if (ioctl(pi->pi_sock, SIOCGLIFLNKINFO, (char *)&lifr) < 0) {
- logperror_pi(pi,
- "phyint_reach_random: SIOCGLIFLNKINFO");
- return;
- }
+ bzero(&lifr, sizeof (lifr));
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, LIFNAMSIZ);
lifr.lifr_ifinfo.lir_reachtime = pi->pi_ReachableTime;
if (ioctl(pi->pi_sock, SIOCSLIFLNKINFO, (char *)&lifr) < 0) {
logperror_pi(pi,
@@ -1386,12 +1408,12 @@ prefix_modify_flags(struct prefix *pr, uint64_t onflags, uint64_t offflags)
(void) strncpy(lifr.lifr_name, pr->pr_name, sizeof (lifr.lifr_name));
lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
if (ioctl(pi->pi_sock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
- logperror_pr(pr, "prefix_modify_flags: SIOCGLIFFLAGS");
- logmsg(LOG_ERR, "prefix_modify_flags(%s, %s) old 0x%llx "
- "on 0x%llx off 0x%llx\n",
- pr->pr_physical->pi_name,
- pr->pr_name,
- pr->pr_flags, onflags, offflags);
+ if (errno != ENXIO) {
+ logperror_pr(pr, "prefix_modify_flags: SIOCGLIFFLAGS");
+ logmsg(LOG_ERR, "prefix_modify_flags(%s, %s) old 0x%llx"
+ " on 0x%llx off 0x%llx\n", pr->pr_physical->pi_name,
+ pr->pr_name, pr->pr_flags, onflags, offflags);
+ }
return (-1);
}
old_flags = lifr.lifr_flags;
@@ -1399,12 +1421,13 @@ prefix_modify_flags(struct prefix *pr, uint64_t onflags, uint64_t offflags)
lifr.lifr_flags &= ~offflags;
pr->pr_flags = lifr.lifr_flags;
if (ioctl(pi->pi_sock, SIOCSLIFFLAGS, (char *)&lifr) < 0) {
- logperror_pr(pr, "prefix_modify_flags: SIOCSLIFFLAGS");
- logmsg(LOG_ERR, "prefix_modify_flags(%s, %s) old 0x%llx "
- "new 0x%llx on 0x%llx off 0x%llx\n",
- pr->pr_physical->pi_name,
- pr->pr_name,
- old_flags, lifr.lifr_flags, onflags, offflags);
+ if (errno != ENXIO) {
+ logperror_pr(pr, "prefix_modify_flags: SIOCSLIFFLAGS");
+ logmsg(LOG_ERR, "prefix_modify_flags(%s, %s) old 0x%llx"
+ " new 0x%llx on 0x%llx off 0x%llx\n",
+ pr->pr_physical->pi_name, pr->pr_name,
+ old_flags, lifr.lifr_flags, onflags, offflags);
+ }
return (-1);
}
return (0);
@@ -1540,7 +1563,8 @@ prefix_update_k(struct prefix *pr)
/* Remove logical interface based on pr_name */
lifr.lifr_addr.ss_family = AF_UNSPEC;
- if (ioctl(pi->pi_sock, SIOCLIFREMOVEIF, (char *)&lifr) < 0) {
+ if (ioctl(pi->pi_sock, SIOCLIFREMOVEIF, (char *)&lifr) < 0 &&
+ errno != ENXIO) {
logperror_pr(pr, "prefix_update_k: SIOCLIFREMOVEIF");
}
pr->pr_kernel_state = 0;
@@ -1865,36 +1889,6 @@ prefix_print(struct prefix *pr)
}
/*
- * Does the address formed by pr->pr_prefix and pi->pi_token match
- * pr->pr_address. It does not match if a failover has happened
- * earlier (done by in.mpathd) from a different pi. Should not
- * be called for onlink prefixes.
- */
-boolean_t
-prefix_token_match(struct phyint *pi, struct prefix *pr, uint64_t flags)
-{
- int i;
- in6_addr_t addr, *token;
-
- if (flags & IFF_TEMPORARY)
- token = &pi->pi_tmp_token;
- else
- token = &pi->pi_token;
- for (i = 0; i < 16; i++) {
- /*
- * prefix_create ensures that pr_prefix has all-zero
- * bits after prefixlen.
- */
- addr.s6_addr[i] = pr->pr_prefix.s6_addr[i] | token->s6_addr[i];
- }
- if (IN6_ARE_ADDR_EQUAL(&pr->pr_address, &addr)) {
- return (_B_TRUE);
- } else {
- return (_B_FALSE);
- }
-}
-
-/*
* Lookup advertisement prefix structure that matches the prefix and
* prefix length.
* Assumes that the bits after prefixlen might not be zero.
@@ -2305,8 +2299,7 @@ phyint_print_all(void)
}
void
-phyint_cleanup(pi)
- struct phyint *pi;
+phyint_cleanup(struct phyint *pi)
{
pi->pi_state = 0;
pi->pi_kernel_state = 0;
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h
index 409600a402..dfc5414d5d 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _NDPD_TABLES_H
#define _NDPD_TABLES_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -58,9 +56,7 @@ struct phyint {
char pi_name[LIFNAMSIZ]; /* Used to identify it */
int pi_sock; /* For sending and receiving */
struct in6_addr pi_ifaddr; /* Local address */
- uint_t pi_flags; /* IFF_* flags */
- uint_t pi_hdw_addr_len;
- uchar_t pi_hdw_addr[ND_MAX_HDW_LEN];
+ uint64_t pi_flags; /* IFF_* flags */
uint_t pi_mtu; /* From SIOCGLIFMTU */
struct in6_addr pi_token;
uint_t pi_token_length;
@@ -256,6 +252,7 @@ extern int phyint_init_from_k(struct phyint *pi);
extern void phyint_delete(struct phyint *pi);
extern uint_t phyint_timer(struct phyint *pi, uint_t elapsed);
extern void phyint_print_all(void);
+extern int phyint_get_lla(struct phyint *pi, struct lifreq *lifrp);
extern void phyint_reach_random(struct phyint *pi,
boolean_t set_needed);
extern void phyint_cleanup(struct phyint *pi);
@@ -280,8 +277,6 @@ extern void prefix_update_k(struct prefix *pr);
extern uint_t prefix_timer(struct prefix *pr, uint_t elapsed);
extern uint_t adv_prefix_timer(struct adv_prefix *adv_pr,
uint_t elapsed);
-extern boolean_t prefix_token_match(struct phyint *pi,
- struct prefix *pr, uint64_t flags);
extern struct prefix *prefix_lookup_addr(struct phyint *pi,
struct in6_addr prefix);
diff --git a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c
index 15db1b7539..b76341e303 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c
@@ -1,3 +1,7 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
/* -*- Mode: C; tab-width: 4 -*-
*
* Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved.
@@ -130,8 +134,6 @@ First checkin
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "mDNSUNP.h"
#include "mDNSDebug.h"
@@ -398,13 +400,11 @@ select_src_ifi_info_solaris(int sockfd, int numifs,
continue;
/*
* Avoid address if any of the following flags are set:
- * IFF_NOFAILOVER: IPMP test address for use only by in.mpathd
* IFF_NOXMIT: no packets transmitted over interface
* IFF_NOLOCAL: no address
* IFF_PRIVATE: is not advertised
*/
- if (ifflags & (IFF_NOFAILOVER | IFF_NOXMIT
- | IFF_NOLOCAL | IFF_PRIVATE))
+ if (ifflags & (IFF_NOXMIT | IFF_NOLOCAL | IFF_PRIVATE))
continue;
if (*best_lifr != NULL) {
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/Makefile
index d91d113347..e29c1765ec 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/Makefile
@@ -20,7 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -65,12 +65,13 @@ K5TELNETOBJS= in.telnetd.o
SRCS= $(PROGSRCS) $(OTHERSRC)
SUBDIRS= bootconfchk htable ifconfig in.ftpd in.rdisc in.routed \
- in.talkd inetadm inetconv ipqosconf kssl/kssladm kssl/ksslcfg \
- ping routeadm snoop sppptun traceroute wificonfig ipsecutils
+ in.talkd inetadm inetconv ipmpstat ipqosconf ipsecutils \
+ kssl/kssladm kssl/ksslcfg ping routeadm snoop sppptun \
+ traceroute wificonfig
MSGSUBDIRS= bootconfchk htable ifconfig in.ftpd in.routed in.talkd inetadm \
- inetconv ipqosconf kssl/ksslcfg routeadm sppptun snoop \
- wificonfig ipsecutils
+ inetconv ipmpstat ipqosconf ipsecutils kssl/ksslcfg routeadm \
+ sppptun snoop wificonfig
# As programs get lint-clean, add them here and to the 'lint' target.
# Eventually this hack should go away, and all in PROG should be
@@ -83,7 +84,8 @@ LINTCLEAN= 6to4relay arp in.rlogind in.rshd in.telnetd in.tftpd \
# with SUBDIRS. Also (sigh) deal with the commented-out build lines
# for the lint rule.
LINTSUBDIRS= bootconfchk in.rdisc in.routed in.talkd inetadm inetconv \
- ipqosconf ping routeadm sppptun traceroute wificonfig ipsecutils
+ ipmpstat ipqosconf ipsecutils ping routeadm sppptun traceroute \
+ wificonfig
# And as programs are verified not to attempt to write into constants,
# -xstrconst should be used to ensure they stay that way.
CONSTCLEAN=
@@ -144,6 +146,8 @@ LDLIBS += $(K5LIBS)
$(TSNETPROG) := LDLIBS += -ltsnet
in.rarpd := LDLIBS += -linetutil -ldlpi
+if_mpadm := LDLIBS += -linetutil -lipmp
+if_mpadm.po := XGETFLAGS += -a
route := CPPFLAGS += -DNDEBUG
ndd := LDLIBS += -ldladm
gettable in.comsat := LDFLAGS += $(MAPFILE.NGB:%=-M%)
@@ -245,7 +249,7 @@ lint: $(LINTSUBDIRS)
-I$(SRC)/lib/gss_mechs/mech_krb5/include \
-I$(SRC)/lib/pam_modules/krb5 \
in.telnetd.c $(LDLIBS) -lbsm -lpam -lsocket -lnsl
- $(LINT.c) if_mpadm.c $(LDLIBS) -lsocket -lnsl -lipmp
+ $(LINT.c) if_mpadm.c $(LDLIBS) -lsocket -lnsl -lipmp -linetutil
$(LINT.c) ipaddrsel.c $(LDLIBS) -lsocket -lnsl
$(LINT.c) route.c $(LDLIBS) -lsocket -lnsl -ltsnet
$(LINT.c) syncinit.c $(LDLIBS) -ldlpi
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c b/usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c
index d4874135fd..7c5d73c796 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,660 +19,250 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <unistd.h>
-#include <stdlib.h>
+#include <errno.h>
+#include <ipmp_admin.h>
+#include <libinetutil.h>
+#include <locale.h>
+#include <net/if.h>
+#include <stdarg.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
#include <sys/sockio.h>
-#include <net/if.h>
-#include <errno.h>
-#include <strings.h>
-#include <ipmp_mpathd.h>
-#include <libintl.h>
+#include <sys/types.h>
-static int if_down(int ifsock, struct lifreq *lifr);
-static int if_up(int ifsock, struct lifreq *lifr);
-static void send_cmd(int cmd, char *ifname);
-static int connect_to_mpathd(sa_family_t family);
-static void do_offline(char *ifname);
-static void undo_offline(char *ifname);
-static boolean_t offline_set(char *ifname);
+typedef void offline_func_t(const char *, ipmp_handle_t);
-#define IF_SEPARATOR ':'
-#define MAX_RETRIES 3
+static const char *progname;
+static int sioc4fd, sioc6fd;
+static offline_func_t do_offline, undo_offline;
+static boolean_t set_lifflags(const char *, uint64_t);
+static boolean_t is_offline(const char *);
+static void warn(const char *, ...);
+static void die(const char *, ...);
static void
usage()
{
- (void) fprintf(stderr, "Usage : if_mpadm [-d | -r] <interface_name>\n");
+ (void) fprintf(stderr, "Usage: %s [-d | -r] <interface>\n", progname);
+ exit(1);
}
-static void
-print_mpathd_error_msg(uint32_t error)
+static const char *
+mpadm_errmsg(uint32_t error)
{
switch (error) {
- case MPATHD_MIN_RED_ERROR:
- (void) fprintf(stderr, gettext(
- "Offline failed as there is no other functional "
- "interface available in the multipathing group "
- "for failing over the network access.\n"));
- break;
-
- case MPATHD_FAILBACK_PARTIAL:
- (void) fprintf(stderr, gettext(
- "Offline cannot be undone because multipathing "
- "configuration is not consistent across all the "
- "interfaces in the group.\n"));
- break;
-
+ case IPMP_EUNKIF:
+ return ("not a physical interface or not in an IPMP group");
+ case IPMP_EMINRED:
+ return ("no other functioning interfaces are in its IPMP "
+ "group");
default:
- /*
- * We shouldn't get here. All errors should have a
- * meaningful error message, as shown in the above
- * cases. If we get here, someone has made a mistake.
- */
- (void) fprintf(stderr, gettext(
- "Operation returned an unrecognized error: %u\n"),
- error);
- break;
+ return (ipmp_errmsg(error));
}
}
int
main(int argc, char **argv)
{
- char *ifname;
- int cmd = 0;
+ int retval;
+ ipmp_handle_t handle;
+ offline_func_t *ofuncp = NULL;
+ const char *ifname;
int c;
-#if !defined(TEXT_DOMAIN)
-#define TEXT_DOMAIN "SYS_TEST"
-#endif
+ if ((progname = strrchr(argv[0], '/')) != NULL)
+ progname++;
+ else
+ progname = argv[0];
+
+ (void) setlocale(LC_ALL, "");
(void) textdomain(TEXT_DOMAIN);
while ((c = getopt(argc, argv, "d:r:")) != EOF) {
switch (c) {
case 'd':
ifname = optarg;
- cmd = MI_OFFLINE;
- if (offline_set(ifname)) {
- (void) fprintf(stderr, gettext("Interface "
- "already offlined\n"));
- exit(1);
- }
+ ofuncp = do_offline;
break;
case 'r':
ifname = optarg;
- cmd = MI_UNDO_OFFLINE;
- if (!offline_set(ifname)) {
- (void) fprintf(stderr, gettext("Interface not "
- "offlined\n"));
- exit(1);
- }
+ ofuncp = undo_offline;
break;
default :
usage();
- exit(1);
}
}
- if (cmd == 0) {
+ if (ofuncp == NULL)
usage();
- exit(1);
- }
/*
- * Send the command to in.mpathd which is generic to
- * both the commands. send_cmd returns only if there
- * is no error.
+ * Create the global V4 and V6 socket ioctl descriptors.
*/
- send_cmd(cmd, ifname);
- if (cmd == MI_OFFLINE) {
- do_offline(ifname);
- } else {
- undo_offline(ifname);
- }
+ sioc4fd = socket(AF_INET, SOCK_DGRAM, 0);
+ sioc6fd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (sioc4fd == -1 || sioc6fd == -1)
+ die("cannot create sockets");
- return (0);
-}
+ if ((retval = ipmp_open(&handle)) != IPMP_SUCCESS)
+ die("cannot create ipmp handle: %s\n", ipmp_errmsg(retval));
-/*
- * Is IFF_OFFLINE set ?
- * Returns B_FALSE on failure and B_TRUE on success.
- */
-boolean_t
-offline_set(char *ifname)
-{
- struct lifreq lifr;
- int s4;
- int s6;
- int ret;
-
- s4 = socket(AF_INET, SOCK_DGRAM, 0);
- if (s4 < 0) {
- perror("socket");
- exit(1);
- }
- s6 = socket(AF_INET6, SOCK_DGRAM, 0);
- if (s6 < 0) {
- perror("socket");
- exit(1);
- }
- (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
- ret = ioctl(s4, SIOCGLIFFLAGS, (caddr_t)&lifr);
- if (ret < 0) {
- if (errno != ENXIO) {
- perror("ioctl: SIOCGLIFFLAGS");
- exit(1);
- }
- ret = ioctl(s6, SIOCGLIFFLAGS, (caddr_t)&lifr);
- if (ret < 0) {
- perror("ioctl: SIOCGLIFFLAGS");
- exit(1);
- }
- }
- (void) close(s4);
- (void) close(s6);
- if (lifr.lifr_flags & IFF_OFFLINE)
- return (B_TRUE);
- else
- return (B_FALSE);
+ (*ofuncp)(ifname, handle);
+
+ ipmp_close(handle);
+ (void) close(sioc4fd);
+ (void) close(sioc6fd);
+
+ return (EXIT_SUCCESS);
}
/*
- * Sends the command to in.mpathd. If not successful, prints
- * an error message and exits.
+ * Checks whether IFF_OFFLINE is set on `ifname'.
*/
-void
-send_cmd(int cmd, char *ifname)
+boolean_t
+is_offline(const char *ifname)
{
- struct mi_offline mio;
- struct mi_undo_offline miu;
- struct mi_result me;
- int ret;
- int cmd_len;
- int i;
- int s;
-
- for (i = 0; i < MAX_RETRIES; i++) {
- s = connect_to_mpathd(AF_INET);
- if (s == -1) {
- s = connect_to_mpathd(AF_INET6);
- if (s == -1) {
- (void) fprintf(stderr, gettext("Cannot "
- "establish communication with "
- "in.mpathd.\n"));
- exit(1);
- }
- }
- switch (cmd) {
- case MI_OFFLINE :
- cmd_len = sizeof (struct mi_offline);
- bzero(&mio, cmd_len);
- mio.mio_command = cmd;
- (void) strncpy(mio.mio_ifname, ifname, LIFNAMSIZ);
- mio.mio_min_redundancy = 1;
- ret = write(s, &mio, cmd_len);
- if (ret != cmd_len) {
- /* errno is set only when ret is -1 */
- if (ret == -1)
- perror("write");
- (void) fprintf(stderr, gettext("Failed to "
- "successfully send command to "
- "in.mpathd.\n"));
- exit(1);
- }
- break;
- case MI_UNDO_OFFLINE:
- cmd_len = sizeof (struct mi_undo_offline);
- bzero(&miu, cmd_len);
- miu.miu_command = cmd;
- (void) strncpy(miu.miu_ifname, ifname, LIFNAMSIZ);
- ret = write(s, &miu, cmd_len);
- if (ret != cmd_len) {
- /* errno is set only when ret is -1 */
- if (ret == -1)
- perror("write");
- (void) fprintf(stderr, gettext("Failed to "
- "successfully send command to "
- "in.mpathd.\n"));
- exit(1);
- }
- break;
- default :
- (void) fprintf(stderr, "Unknown command \n");
- exit(1);
- }
+ struct lifreq lifr = { 0 };
- /* Read the result from mpathd */
- ret = read(s, &me, sizeof (me));
- if (ret != sizeof (me)) {
- /* errno is set only when ret is -1 */
- if (ret == -1)
- perror("read");
- (void) fprintf(stderr, gettext("Failed to successfully "
- "read result from in.mpathd.\n"));
- exit(1);
+ (void) strlcpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
+ if (ioctl(sioc4fd, SIOCGLIFFLAGS, &lifr) == -1) {
+ if (errno != ENXIO ||
+ ioctl(sioc6fd, SIOCGLIFFLAGS, &lifr) == -1) {
+ die("cannot get interface flags on %s", ifname);
}
- if (me.me_mpathd_error == 0) {
- if (i != 0) {
- /*
- * We retried at least once. Tell the user
- * that things succeeded now.
- */
- (void) fprintf(stderr,
- gettext("Retry Successful.\n"));
- }
- return; /* Successful */
- }
-
- if (me.me_mpathd_error == MPATHD_SYS_ERROR) {
- if (me.me_sys_error == EAGAIN) {
- (void) close(s);
- (void) sleep(1);
- (void) fprintf(stderr,
- gettext("Retrying ...\n"));
- continue; /* Retry */
- }
- errno = me.me_sys_error;
- perror("if_mpadm");
- } else {
- print_mpathd_error_msg(me.me_mpathd_error);
- }
- exit(1);
}
- /*
- * We come here only if we retry the operation multiple
- * times and did not succeed. Let the user try it again
- * later.
- */
- (void) fprintf(stderr,
- gettext("Device busy. Retry the operation later.\n"));
- exit(1);
+
+ return ((lifr.lifr_flags & IFF_OFFLINE) != 0);
}
static void
-do_offline(char *ifname)
+do_offline(const char *ifname, ipmp_handle_t handle)
{
- struct lifreq lifr;
- struct lifreq *lifcr;
- struct lifnum lifn;
- struct lifconf lifc;
- char *buf;
- int numifs;
- int n;
- char pi_name[LIFNAMSIZ + 1];
- char *cp;
- int ifsock_v4;
- int ifsock_v6;
- int af;
- int ret;
+ ifaddrlistx_t *ifaddrp, *ifaddrs;
+ int retval;
+
+ if (is_offline(ifname))
+ die("interface %s is already offline\n", ifname);
+
+ if ((retval = ipmp_offline(handle, ifname, 1)) != IPMP_SUCCESS)
+ die("cannot offline %s: %s\n", ifname, mpadm_errmsg(retval));
/*
- * Verify whether IFF_OFFLINE is not set as a sanity check.
- */
- if (!offline_set(ifname)) {
- (void) fprintf(stderr, gettext("Operation failed : in.mpathd "
- "has not set IFF_OFFLINE on %s\n"), ifname);
- exit(1);
- }
- /*
- * Get both the sockets as we may need to bring both
- * IPv4 and IPv6 interfaces down.
- */
- ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0);
- if (ifsock_v4 < 0) {
- perror("socket");
- exit(1);
- }
- ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0);
- if (ifsock_v6 < 0) {
- perror("socket");
- exit(1);
- }
- /*
- * Get all the logicals for "ifname" and mark them down.
- * There is no easy way of doing this. We get all the
- * interfaces in the system using SICGLIFCONF and mark the
- * ones matching the name down.
+ * Get all the up addresses for `ifname' and bring them down.
*/
- lifn.lifn_family = AF_UNSPEC;
- lifn.lifn_flags = 0;
- if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
- perror("ioctl : SIOCGLIFNUM");
- exit(1);
- }
- numifs = lifn.lifn_count;
-
- buf = calloc(numifs, sizeof (struct lifreq));
- if (buf == NULL) {
- perror("calloc");
- exit(1);
- }
+ if (ifaddrlistx(ifname, IFF_UP, 0, &ifaddrs) == -1)
+ die("cannot get addresses on %s", ifname);
- lifc.lifc_family = AF_UNSPEC;
- lifc.lifc_flags = 0;
- lifc.lifc_len = numifs * sizeof (struct lifreq);
- lifc.lifc_buf = buf;
+ for (ifaddrp = ifaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!(ifaddrp->ia_flags & IFF_OFFLINE))
+ warn("IFF_OFFLINE vanished on %s\n", ifaddrp->ia_name);
- if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
- perror("ioctl : SIOCGLIFCONF");
- exit(1);
+ if (!set_lifflags(ifaddrp->ia_name,
+ ifaddrp->ia_flags & ~IFF_UP))
+ warn("cannot bring down address on %s\n",
+ ifaddrp->ia_name);
}
- lifcr = (struct lifreq *)lifc.lifc_req;
- for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifcr++) {
- af = lifcr->lifr_addr.ss_family;
- (void) strncpy(pi_name, lifcr->lifr_name,
- sizeof (pi_name));
- pi_name[sizeof (pi_name) - 1] = '\0';
- if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
- *cp = '\0';
- if (strcmp(pi_name, ifname) == 0) {
- /* It matches the interface name that was offlined */
- (void) strncpy(lifr.lifr_name, lifcr->lifr_name,
- sizeof (lifr.lifr_name));
- if (af == AF_INET)
- ret = if_down(ifsock_v4, &lifr);
- else
- ret = if_down(ifsock_v6, &lifr);
- if (ret != 0) {
- (void) fprintf(stderr, gettext("Bringing down "
- "the interfaces failed.\n"));
- exit(1);
- }
- }
- }
+ ifaddrlistx_free(ifaddrs);
}
static void
-undo_offline(char *ifname)
+undo_offline(const char *ifname, ipmp_handle_t handle)
{
- struct lifreq lifr;
- struct lifreq *lifcr;
- struct lifnum lifn;
- struct lifconf lifc;
- char *buf;
- int numifs;
- int n;
- char pi_name[LIFNAMSIZ + 1];
- char *cp;
- int ifsock_v4;
- int ifsock_v6;
- int af;
- int ret;
+ ifaddrlistx_t *ifaddrp, *ifaddrs;
+ int retval;
+
+ if (!is_offline(ifname))
+ die("interface %s is not offline\n", ifname);
/*
- * Verify whether IFF_OFFLINE is set as a sanity check.
- */
- if (offline_set(ifname)) {
- (void) fprintf(stderr, gettext("Operation failed : in.mpathd "
- "has not cleared IFF_OFFLINE on %s\n"), ifname);
- exit(1);
- }
- /*
- * Get both the sockets as we may need to bring both
- * IPv4 and IPv6 interfaces UP.
- */
- ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0);
- if (ifsock_v4 < 0) {
- perror("socket");
- exit(1);
- }
- ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0);
- if (ifsock_v6 < 0) {
- perror("socket");
- exit(1);
- }
- /*
- * Get all the logicals for "ifname" and mark them up.
- * There is no easy way of doing this. We get all the
- * interfaces in the system using SICGLIFCONF and mark the
- * ones matching the name up.
+ * Get all the down addresses for `ifname' and bring them up.
*/
- lifn.lifn_family = AF_UNSPEC;
- lifn.lifn_flags = 0;
- if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
- perror("ioctl : SIOCGLIFNUM");
- exit(1);
- }
- numifs = lifn.lifn_count;
-
- buf = calloc(numifs, sizeof (struct lifreq));
- if (buf == NULL) {
- perror("calloc");
- exit(1);
- }
+ if (ifaddrlistx(ifname, 0, IFF_UP, &ifaddrs) == -1)
+ die("cannot get addresses for %s", ifname);
- lifc.lifc_family = AF_UNSPEC;
- lifc.lifc_flags = 0;
- lifc.lifc_len = numifs * sizeof (struct lifreq);
- lifc.lifc_buf = buf;
+ for (ifaddrp = ifaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!(ifaddrp->ia_flags & IFF_OFFLINE))
+ warn("IFF_OFFLINE vanished on %s\n", ifaddrp->ia_name);
- if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
- perror("ioctl : SIOCGLIFCONF");
- exit(1);
+ if (!set_lifflags(ifaddrp->ia_name, ifaddrp->ia_flags | IFF_UP))
+ warn("cannot bring up address on %s\n",
+ ifaddrp->ia_name);
}
- lifcr = (struct lifreq *)lifc.lifc_req;
- for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifcr++) {
- af = lifcr->lifr_addr.ss_family;
- (void) strncpy(pi_name, lifcr->lifr_name,
- sizeof (pi_name));
- pi_name[sizeof (pi_name) - 1] = '\0';
- if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
- *cp = '\0';
-
- if (strcmp(pi_name, ifname) == 0) {
- /* It matches the interface name that was offlined */
- (void) strncpy(lifr.lifr_name, lifcr->lifr_name,
- sizeof (lifr.lifr_name));
- if (af == AF_INET)
- ret = if_up(ifsock_v4, &lifr);
- else
- ret = if_up(ifsock_v6, &lifr);
- if (ret != 0) {
- (void) fprintf(stderr, gettext("Bringing up "
- "the interfaces failed.\n"));
- exit(1);
- }
- }
- }
-}
+ ifaddrlistx_free(ifaddrs);
-/*
- * Returns -1 on failure. Returns the socket file descriptor on
- * success.
- */
-static int
-connect_to_mpathd(sa_family_t family)
-{
- int s;
- struct sockaddr_storage ss;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
- struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
- int addrlen;
- int ret;
- int on;
-
- s = socket(family, SOCK_STREAM, 0);
- if (s < 0) {
- perror("socket");
- return (-1);
- }
- bzero((char *)&ss, sizeof (ss));
- ss.ss_family = family;
/*
- * Need to bind to a privileged port. For non-root, this
- * will fail. in.mpathd verifies that only commands coming
- * from privileged ports succeed so that the ordinary user
- * can't issue offline commands.
+ * Undo the offline.
*/
- on = 1;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- perror("setsockopt : TCP_ANONPRIVBIND");
- exit(1);
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = 0;
- sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addrlen = sizeof (struct sockaddr_in);
- break;
- case AF_INET6:
- sin6->sin6_port = 0;
- sin6->sin6_addr = loopback_addr;
- addrlen = sizeof (struct sockaddr_in6);
- break;
- }
- ret = bind(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- perror("bind");
- return (-1);
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = htons(MPATHD_PORT);
- break;
- case AF_INET6:
- sin6->sin6_port = htons(MPATHD_PORT);
- break;
+ if ((retval = ipmp_undo_offline(handle, ifname)) != IPMP_SUCCESS) {
+ die("cannot undo-offline %s: %s\n", ifname,
+ mpadm_errmsg(retval));
}
- ret = connect(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- perror("connect");
- return (-1);
- }
- on = 0;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- perror("setsockopt : TCP_ANONPRIVBIND");
- return (-1);
- }
- return (s);
+
+ /*
+ * Verify whether IFF_OFFLINE is set as a sanity check.
+ */
+ if (is_offline(ifname))
+ warn("in.mpathd has not cleared IFF_OFFLINE on %s\n", ifname);
}
/*
- * Bring down the interface specified by the name lifr->lifr_name.
- *
- * Returns -1 on failure. Returns 0 on success.
+ * Change `lifname' to have `flags' set. Returns B_TRUE on success.
*/
-static int
-if_down(int ifsock, struct lifreq *lifr)
+static boolean_t
+set_lifflags(const char *lifname, uint64_t flags)
{
- int ret;
+ struct lifreq lifr = { 0 };
+ int fd = (flags & IFF_IPV4) ? sioc4fd : sioc6fd;
- ret = ioctl(ifsock, SIOCGLIFFLAGS, (caddr_t)lifr);
- if (ret < 0) {
- perror("ioctl: SIOCGLIFFLAGS");
- return (-1);
- }
+ (void) strlcpy(lifr.lifr_name, lifname, LIFNAMSIZ);
+ lifr.lifr_flags = flags;
- /* IFF_OFFLINE was set to start with. Is it still there ? */
- if (!(lifr->lifr_flags & (IFF_OFFLINE))) {
- (void) fprintf(stderr, gettext("IFF_OFFLINE disappeared on "
- "%s\n"), lifr->lifr_name);
- return (-1);
- }
- lifr->lifr_flags &= ~IFF_UP;
- ret = ioctl(ifsock, SIOCSLIFFLAGS, (caddr_t)lifr);
- if (ret < 0) {
- perror("ioctl: SIOCSLIFFLAGS");
- return (-1);
- }
- return (0);
+ return (ioctl(fd, SIOCSLIFFLAGS, &lifr) >= 0);
}
-/*
- * Bring up the interface specified by the name lifr->lifr_name.
- *
- * Returns -1 on failure. Returns 0 on success.
- */
-static int
-if_up(int ifsock, struct lifreq *lifr)
+/* PRINTFLIKE1 */
+static void
+die(const char *format, ...)
{
- int ret;
- boolean_t zeroaddr = B_FALSE;
- struct sockaddr_in *addr;
-
- ret = ioctl(ifsock, SIOCGLIFADDR, lifr);
- if (ret < 0) {
- perror("ioctl: SIOCGLIFADDR");
- return (-1);
- }
+ va_list alist;
+ char *errstr = strerror(errno);
- addr = (struct sockaddr_in *)&lifr->lifr_addr;
- switch (addr->sin_family) {
- case AF_INET:
- zeroaddr = (addr->sin_addr.s_addr == INADDR_ANY);
- break;
+ format = gettext(format);
+ (void) fprintf(stderr, gettext("%s: fatal: "), progname);
- case AF_INET6:
- zeroaddr = IN6_IS_ADDR_UNSPECIFIED(
- &((struct sockaddr_in6 *)addr)->sin6_addr);
- break;
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
- default:
- break;
- }
+ if (strchr(format, '\n') == NULL)
+ (void) fprintf(stderr, ": %s\n", errstr);
- ret = ioctl(ifsock, SIOCGLIFFLAGS, lifr);
- if (ret < 0) {
- perror("ioctl: SIOCGLIFFLAGS");
- return (-1);
- }
- /*
- * Don't affect the state of addresses that failed back.
- *
- * XXX Link local addresses that are not marked IFF_NOFAILOVER
- * will not be brought up. Link local addresses never failover.
- * When the interface was offlined, we brought the link local
- * address down. We will not bring it up now if IFF_NOFAILOVER
- * is not marked. We check for IFF_NOFAILOVER below so that
- * we want to maintain the state of all other addresses as it
- * was before offline. Normally link local addresses are marked
- * IFF_NOFAILOVER and hence this is not an issue. These can
- * be fixed in future with RCM and it is beyond the scope
- * of if_mpadm to maintain state and do this correctly.
- */
- if (!(lifr->lifr_flags & IFF_NOFAILOVER))
- return (0);
+ exit(EXIT_FAILURE);
+}
- /*
- * When a data address associated with the physical interface itself
- * is failed over (e.g., qfe0, rather than qfe0:1), the kernel must
- * fill the ipif data structure for qfe0 with a placeholder entry (the
- * "replacement ipif"). Replacement ipif's cannot be brought IFF_UP
- * (nor would it make any sense to do so), so we must be careful to
- * skip them; thankfully they can be easily identified since they
- * all have a zeroed address.
- */
- if (zeroaddr)
- return (0);
-
- /* IFF_OFFLINE was not set to start with. Is it there ? */
- if (lifr->lifr_flags & IFF_OFFLINE) {
- (void) fprintf(stderr,
- gettext("IFF_OFFLINE set wrongly on %s\n"),
- lifr->lifr_name);
- return (-1);
- }
- lifr->lifr_flags |= IFF_UP;
- ret = ioctl(ifsock, SIOCSLIFFLAGS, lifr);
- if (ret < 0) {
- perror("ioctl: SIOCSLIFFLAGS");
- return (-1);
- }
- return (0);
+/* PRINTFLIKE1 */
+static void
+warn(const char *format, ...)
+{
+ va_list alist;
+ char *errstr = strerror(errno);
+
+ format = gettext(format);
+ (void) fprintf(stderr, gettext("%s: warning: "), progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+
+ if (strchr(format, '\n') == NULL)
+ (void) fprintf(stderr, ": %s\n", errstr);
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile
index 69e91758ea..e99f2945a7 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile
@@ -19,10 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#
PROG = ifconfig
ROOTFS_PROG = $(PROG)
@@ -38,7 +37,7 @@ COMMONSRCS= $(CMDINETCOMMONDIR)/$(COMMONOBJS:%.o=%.c)
SRCS= $(LOCALSRCS) $(COMMONSRCS)
CPPFLAGS += -I$(CMDINETCOMMONDIR) -I$(SRC)/common/net/dhcp
-LDLIBS += -ldhcpagent -linetcfg -ldlpi -ldladm
+LDLIBS += -ldhcpagent -ldlpi -linetutil -linetcfg -lipmp -ldladm
LINTFLAGS += -m
ROOTUSRSBINLINKS = $(PROG:%=$(ROOTUSRSBIN)/%)
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h
index c993baeb02..4aa1aa0ed7 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -11,13 +11,12 @@
#ifndef _DEFS_H
#define _DEFS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
#include <errno.h>
+#include <limits.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
@@ -54,7 +53,10 @@ extern "C" {
#include <assert.h>
#include <ipmp_mpathd.h>
+#include <ipmp_admin.h>
#include <inetcfg.h>
+#include <libinetutil.h>
+#include <alloca.h>
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c
index f49fca249c..d5517a4700 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -23,6 +23,7 @@
#define TUN_NAME "tun"
#define ATUN_NAME "atun"
#define TUN6TO4_NAME "6to4tun"
+#define IPMPSTUB (void *)-1
typedef struct if_flags {
uint64_t iff_value;
@@ -67,7 +68,20 @@ static if_flags_t if_flags_tbl[] = {
{ IFF_TEMPORARY, "TEMPORARY" },
{ IFF_FIXEDMTU, "FIXEDMTU" },
{ IFF_VIRTUAL, "VIRTUAL" },
- { IFF_DUPLICATE, "DUPLICATE" }
+ { IFF_DUPLICATE, "DUPLICATE" },
+ { IFF_IPMP, "IPMP"}
+};
+
+typedef struct {
+ const char *ia_app;
+ uint64_t ia_flag;
+ uint_t ia_tries;
+} if_appflags_t;
+
+static const if_appflags_t if_appflags_tbl[] = {
+ { "dhcpagent(1M)", IFF_DHCPRUNNING, 1 },
+ { "in.ndpd(1M)", IFF_ADDRCONF, 3 },
+ { NULL, 0, 0 }
};
static struct lifreq lifr;
@@ -75,7 +89,6 @@ static struct lifreq lifr;
static char name[LIFNAMSIZ];
/* foreach interface saved name */
static char origname[LIFNAMSIZ];
-static char savedname[LIFNAMSIZ]; /* For addif */
static int setaddr;
/*
@@ -89,20 +102,7 @@ static int setaddr;
#define NO_ESP_AALG 256
#define NO_ESP_EALG 256
-/*
- * iface_t
- * used by setifether to create a list of interfaces to mark
- * down-up when changing the ethernet address of an interface
- */
-typedef struct iface {
- struct lifreq lifr;
- struct iface *next; /* pointer to the next list element */
-} iface_t;
-
-static iface_t *logifs = NULL; /* list of logical interfaces */
-static iface_t *phyif = NULL; /* physical interface */
-
-int s;
+int s, s4, s6;
int af = AF_INET; /* default address family */
int debug = 0;
int all = 0; /* setifdhcp() needs to know this */
@@ -113,6 +113,7 @@ int v4compat = 0; /* Compatible printing format */
* Function prototypes for command functions.
*/
static int addif(char *arg, int64_t param);
+static int inetipmp(char *arg, int64_t param);
static int inetplumb(char *arg, int64_t param);
static int inetunplumb(char *arg, int64_t param);
static int removeif(char *arg, int64_t param);
@@ -141,7 +142,7 @@ static int modinsert(char *arg, int64_t param);
static int modremove(char *arg, int64_t param);
static int setifgroupname(char *arg, int64_t param);
static int configinfo(char *arg, int64_t param);
-static void print_config_flags(uint64_t flags);
+static void print_config_flags(int af, uint64_t flags);
static void print_flags(uint64_t flags);
static void print_ifether(char *ifname);
static int set_tun_encap_limit(char *arg, int64_t param);
@@ -150,6 +151,7 @@ static int set_tun_hop_limit(char *arg, int64_t param);
static int setzone(char *arg, int64_t param);
static int setallzones(char *arg, int64_t param);
static int setifsrc(char *arg, int64_t param);
+static int lifnum(const char *ifname);
/*
* Address family specific function prototypes.
@@ -179,19 +181,22 @@ static int settaddr(char *, int (*)(icfg_handle_t,
static void status(void);
static void ifstatus(const char *);
static void usage(void);
-static int strioctl(int s, int cmd, char *buf, int buflen);
+static int strioctl(int s, int cmd, void *buf, int buflen);
static int setifdhcp(const char *caller, const char *ifname,
int argc, char *argv[]);
static int ip_domux2fd(int *, int *, int *, int *, int *);
static int ip_plink(int, int, int, int, int);
static int modop(char *arg, char op);
-static void selectifs(int argc, char *argv[], int af,
- struct lifreq *lifrp);
-static int updownifs(iface_t *ifs, int up);
static int find_all_global_interfaces(struct lifconf *lifcp, char **buf,
int64_t lifc_flags);
static int find_all_zone_interfaces(struct lifconf *lifcp, char **buf,
int64_t lifc_flags);
+static int create_ipmp(const char *grname, int af, const char *ifname,
+ boolean_t implicit);
+static int create_ipmp_peer(int af, const char *ifname);
+static void start_ipmp_daemon(void);
+static boolean_t ifaddr_up(ifaddrlistx_t *ifaddrp);
+static boolean_t ifaddr_down(ifaddrlistx_t *ifaddrp);
#define max(a, b) ((a) < (b) ? (b) : (a))
@@ -251,6 +256,7 @@ struct cmd {
{ "index", NEXTARG, setifindex, 0, AF_ANY },
{ "broadcast", NEXTARG, setifbroadaddr, 0, AF_INET },
{ "auto-revarp", 0, setifrevarp, 1, AF_INET },
+ { "ipmp", 0, inetipmp, 1, AF_ANY },
{ "plumb", 0, inetplumb, 1, AF_ANY },
{ "unplumb", 0, inetunplumb, 0, AF_ANY },
{ "subnet", NEXTARG, setifsubnet, 0, AF_ANY },
@@ -297,22 +303,30 @@ struct cmd {
typedef struct if_config_cmd {
uint64_t iff_flag;
+ int iff_af;
char *iff_name;
} if_config_cmd_t;
+/*
+ * NOTE: print_config_flags() processes this table in order, so we put "up"
+ * last so that we can be sure "-failover" will take effect first. Otherwise,
+ * IPMP test addresses will erroneously migrate to the IPMP interface.
+ */
static if_config_cmd_t if_config_cmd_tbl[] = {
- { IFF_UP, "up" },
- { IFF_NOTRAILERS, "-trailers" },
- { IFF_PRIVATE, "private" },
- { IFF_NOXMIT, "-xmit" },
- { IFF_ANYCAST, "anycast" },
- { IFF_NOLOCAL, "-local" },
- { IFF_DEPRECATED, "deprecated" },
- { IFF_NOFAILOVER, "-failover" },
- { IFF_STANDBY, "standby" },
- { IFF_FAILED, "failed" },
- { IFF_PREFERRED, "preferred" },
- { 0, 0 },
+ { IFF_NOTRAILERS, AF_UNSPEC, "-trailers" },
+ { IFF_PRIVATE, AF_UNSPEC, "private" },
+ { IFF_NOXMIT, AF_UNSPEC, "-xmit" },
+ { IFF_ANYCAST, AF_INET6, "anycast" },
+ { IFF_NOLOCAL, AF_UNSPEC, "-local" },
+ { IFF_DEPRECATED, AF_UNSPEC, "deprecated" },
+ { IFF_NOFAILOVER, AF_UNSPEC, "-failover" },
+ { IFF_STANDBY, AF_UNSPEC, "standby" },
+ { IFF_FAILED, AF_UNSPEC, "failed" },
+ { IFF_PREFERRED, AF_UNSPEC, "preferred" },
+ { IFF_NONUD, AF_INET6, "-nud" },
+ { IFF_NOARP, AF_INET, "-arp" },
+ { IFF_UP, AF_UNSPEC, "up" },
+ { 0, 0, NULL },
};
typedef struct ni {
@@ -345,10 +359,11 @@ struct afswtch *afp; /* the address family being set or asked about */
int
main(int argc, char *argv[])
{
- /* Include IFF_NOXMIT, IFF_TEMPORARY and all zone interfaces */
- int64_t lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
+ int64_t lifc_flags;
char *default_ip_str;
+ lifc_flags = LIFC_NOXMIT|LIFC_TEMPORARY|LIFC_ALLZONES|LIFC_UNDER_IPMP;
+
if (argc < 2) {
usage();
exit(1);
@@ -388,9 +403,10 @@ main(int argc, char *argv[])
}
s = socket(SOCKET_AF(af), SOCK_DGRAM, 0);
- if (s < 0) {
+ s4 = socket(AF_INET, SOCK_DGRAM, 0);
+ s6 = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (s == -1 || s4 == -1 || s6 == -1)
Perror0_exit("socket");
- }
/*
* Special interface names is any combination of these flags.
@@ -1441,39 +1457,38 @@ setifdstaddr(char *addr, int64_t param)
static int
setifflags(char *val, int64_t value)
{
- int phyintlen, origphyintlen;
+ struct lifreq lifrl; /* local lifreq struct */
+ boolean_t bringup = _B_FALSE;
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0)
Perror0_exit("setifflags: SIOCGLIFFLAGS");
- if (value == IFF_NOFAILOVER) {
- /*
- * Fail if '-failover' is set after a prior addif created the
- * alias on a different interface. This can happen when the
- * interface is part of an IPMP group.
- */
- phyintlen = strcspn(name, ":");
- origphyintlen = strcspn(origname, ":");
- if (phyintlen != origphyintlen ||
- strncmp(name, origname, phyintlen) != 0) {
- (void) fprintf(stderr, "ifconfig: can't set -failover "
- "on failed/standby/offlined interface %s\n",
- origname);
- exit(1);
- }
- }
-
if (value < 0) {
value = -value;
+
+ if ((value & IFF_NOFAILOVER) && (lifr.lifr_flags & IFF_UP)) {
+ /*
+ * The kernel does not allow administratively up test
+ * addresses to be converted to data addresses. Bring
+ * the address down first, then bring it up after it's
+ * been converted to a data address.
+ */
+ lifr.lifr_flags &= ~IFF_UP;
+ (void) ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr);
+ bringup = _B_TRUE;
+ }
+
lifr.lifr_flags &= ~value;
- if ((value & IFF_UP) && (lifr.lifr_flags & IFF_DUPLICATE)) {
+ if ((value & (IFF_UP | IFF_NOFAILOVER)) &&
+ (lifr.lifr_flags & IFF_DUPLICATE)) {
/*
* If the user is trying to mark an interface with a
- * duplicate address as "down," then fetch the address
- * and set it. This will cause IP to clear the
- * IFF_DUPLICATE flag and stop the automatic recovery
- * timer.
+ * duplicate address as "down," or convert a duplicate
+ * test address to a data address, then fetch the
+ * address and set it. This will cause IP to clear
+ * the IFF_DUPLICATE flag and stop the automatic
+ * recovery timer.
*/
value = lifr.lifr_flags;
if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) >= 0)
@@ -1483,10 +1498,48 @@ setifflags(char *val, int64_t value)
} else {
lifr.lifr_flags |= value;
}
+
+ /*
+ * If we're about to bring up an underlying physical IPv6 interface in
+ * an IPMP group, ensure the IPv6 IPMP interface is also up. This is
+ * for backward compatibility with legacy configurations in which
+ * there are no explicit hostname files for IPMP interfaces. (For
+ * IPv4, this is automatically handled by the kernel when migrating
+ * the underlying interface's data address to the IPMP interface.)
+ */
+ (void) strlcpy(lifrl.lifr_name, name, LIFNAMSIZ);
+
+ if (lifnum(lifr.lifr_name) == 0 &&
+ (lifr.lifr_flags & (IFF_UP|IFF_IPV6)) == (IFF_UP|IFF_IPV6) &&
+ ioctl(s, SIOCGLIFGROUPNAME, &lifrl) == 0 &&
+ lifrl.lifr_groupname[0] != '\0') {
+ lifgroupinfo_t lifgr;
+
+ (void) strlcpy(lifgr.gi_grname, lifrl.lifr_groupname,
+ LIFGRNAMSIZ);
+ if (ioctl(s, SIOCGLIFGROUPINFO, &lifgr) == -1)
+ Perror0_exit("setifflags: SIOCGLIFGROUPINFO");
+
+ (void) strlcpy(lifrl.lifr_name, lifgr.gi_grifname, LIFNAMSIZ);
+ if (ioctl(s, SIOCGLIFFLAGS, &lifrl) == -1)
+ Perror0_exit("setifflags: SIOCGLIFFLAGS");
+ if (!(lifrl.lifr_flags & IFF_UP)) {
+ lifrl.lifr_flags |= IFF_UP;
+ if (ioctl(s, SIOCSLIFFLAGS, &lifrl) == -1)
+ Perror0_exit("setifflags: SIOCSLIFFLAGS");
+ }
+ }
+
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
- if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
+ if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0)
Perror0_exit("setifflags: SIOCSLIFFLAGS");
+
+ if (bringup) {
+ lifr.lifr_flags |= IFF_UP;
+ if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0)
+ Perror0_exit("setifflags: SIOCSLIFFLAGS IFF_UP");
}
+
return (0);
}
@@ -1524,12 +1577,21 @@ setifindex(char *val, int64_t param)
}
/* ARGSUSED */
+static void
+notifycb(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg)
+{
+}
+
+/* ARGSUSED */
static int
setifether(char *addr, int64_t param)
{
- uchar_t *ea;
- iface_t *current;
- int maclen;
+ uchar_t *hwaddr;
+ int hwaddrlen;
+ int retval;
+ ifaddrlistx_t *ifaddrp, *ifaddrs = NULL;
+ dlpi_handle_t dh;
+ dlpi_notifyid_t id;
if (addr == NULL) {
ifstatus(name);
@@ -1537,9 +1599,6 @@ setifether(char *addr, int64_t param)
return (0);
}
- phyif = NULL;
- logifs = NULL;
-
/*
* if the IP interface in the arguments is a logical
* interface, exit with an error now.
@@ -1550,79 +1609,68 @@ setifether(char *addr, int64_t param)
exit(1);
}
- ea = _link_aton(addr, &maclen);
- if (ea == NULL) {
- if (maclen == -1)
+ if ((hwaddr = _link_aton(addr, &hwaddrlen)) == NULL) {
+ if (hwaddrlen == -1)
(void) fprintf(stderr,
- "ifconfig: %s: bad address\n", addr);
+ "ifconfig: %s: bad address\n", hwaddr);
else
(void) fprintf(stderr, "ifconfig: malloc() failed\n");
exit(1);
}
- (void) strncpy(savedname, name, sizeof (savedname));
+ if ((retval = dlpi_open(name, &dh, 0)) != DLPI_SUCCESS)
+ Perrdlpi_exit("cannot dlpi_open() link", name, retval);
- /*
- * Call selectifs only for the IP interfaces that are ipv4.
- * offflags == IFF_IPV6 because you should not change the
- * Ethernet address of an ipv6 interface
- */
- foreachinterface(selectifs, 0, (char **)NULL, 0, 0, IFF_IPV6, 0);
+ if ((retval = dlpi_bind(dh, DLPI_ANY_SAP, NULL)) != DLPI_SUCCESS)
+ Perrdlpi_exit("cannot dlpi_bind() link", name, retval);
- /* If physical interface not found, exit now */
- if (phyif == NULL) {
- (void) fprintf(stderr,
- "ifconfig: interface %s not found\n", savedname);
- exit(1);
- }
-
- /* Restore */
- (void) strncpy(name, savedname, sizeof (name));
- (void) strncpy(origname, savedname, sizeof (origname));
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
-
- /*
- * close and reopen the socket
- * we don't know which type of socket we have now
- */
- (void) close(s);
- s = socket(SOCKET_AF(AF_UNSPEC), SOCK_DGRAM, 0);
- if (s < 0) {
- Perror0_exit("socket");
- }
-
- /*
- * mark down the logical interfaces first,
- * and then the physical interface
- */
- if (updownifs(logifs, 0) < 0 || updownifs(phyif, 0) < 0) {
- Perror0_exit("mark down interface failed");
+ retval = dlpi_enabnotify(dh, DL_NOTE_PHYS_ADDR, notifycb, NULL, &id);
+ if (retval == DLPI_SUCCESS) {
+ (void) dlpi_disabnotify(dh, id, NULL);
+ } else {
+ /*
+ * This link does not support DL_NOTE_PHYS_ADDR: bring down
+ * all of the addresses to flush the old hardware address
+ * information out of IP.
+ *
+ * NOTE: Skipping this when DL_NOTE_PHYS_ADDR is supported is
+ * more than an optimization: in.mpathd will set IFF_OFFLINE
+ * if it's notified and the new address is a duplicate of
+ * another in the group -- but the flags manipulation in
+ * ifaddr_{down,up}() cannot be atomic and thus might clobber
+ * IFF_OFFLINE, confusing in.mpathd.
+ */
+ if (ifaddrlistx(name, IFF_UP, 0, &ifaddrs) == -1)
+ Perror2_exit(name, "cannot get address list");
+
+ ifaddrp = ifaddrs;
+ for (; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!ifaddr_down(ifaddrp)) {
+ Perror2_exit(ifaddrp->ia_name,
+ "cannot bring down");
+ }
+ }
}
/*
- * Change the physical address
+ * Change the hardware address.
*/
- if (dlpi_set_address(savedname, ea, maclen) == -1) {
+ retval = dlpi_set_physaddr(dh, DL_CURR_PHYS_ADDR, hwaddr, hwaddrlen);
+ if (retval != DLPI_SUCCESS) {
(void) fprintf(stderr,
- "ifconfig: failed setting mac address on %s\n",
- savedname);
+ "ifconfig: failed setting mac address on %s\n", name);
}
+ dlpi_close(dh);
/*
- * if any interfaces were marked down before changing the
- * ethernet address, put them up again.
- * First the physical interface, then the logical ones.
+ * If any addresses were brought down before changing the hardware
+ * address, bring them up again.
*/
- if (updownifs(phyif, 1) < 0 || updownifs(logifs, 1) < 0) {
- Perror0_exit("mark down interface failed");
- }
-
- /* Free the memory allocated by selectifs */
- free(phyif);
- for (current = logifs; current != NULL; current = logifs) {
- logifs = logifs->next;
- free(current);
+ for (ifaddrp = ifaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!ifaddr_up(ifaddrp))
+ Perror2_exit(ifaddrp->ia_name, "cannot bring up");
}
+ ifaddrlistx_free(ifaddrs);
return (0);
}
@@ -1655,8 +1703,8 @@ print_ifether(char *ifname)
}
(void) close(fd);
- /* Virtual interfaces don't have MAC addresses */
- if (lifr.lifr_flags & IFF_VIRTUAL)
+ /* VNI and IPMP interfaces don't have MAC addresses */
+ if (lifr.lifr_flags & (IFF_VIRTUAL|IFF_IPMP))
return;
/*
@@ -1685,104 +1733,6 @@ print_ifether(char *ifname)
}
/*
- * static void selectifs(int argc, char *argv[], int af, struct lifreq *rp)
- *
- * Called inside setifether() to create a list of interfaces to
- * mark down/up when changing the Ethernet address.
- * If the current interface is the physical interface passed
- * as an argument to ifconfig, update phyif.
- * If the current interface is a logical interface associated
- * to the physical interface, add it to the logifs list.
- */
-/* ARGSUSED */
-static void
-selectifs(int argc, char *argv[], int af, struct lifreq *rp)
-{
- char *colonp;
- int length;
- iface_t *current;
-
- /*
- * savedname= name of the IP interface to which you want to
- * change ethernet address
- * name= name of the current IP interface
- */
- colonp = strchr(name, ':');
- if (colonp == NULL)
- length = max(strlen(savedname), strlen(name));
- else
- length = max(strlen(savedname), colonp - name);
- if (strncmp(savedname, name, length) == 0) {
- (void) strcpy(lifr.lifr_name, name);
- if (ioctl(s, SIOCGLIFFLAGS, &lifr) < 0) {
- Perror0("selectifs: SIOCGLIFFLAGS");
- return;
- }
-
- if ((current = malloc(sizeof (iface_t))) == NULL) {
- Perror0_exit("selectifs: malloc failed\n");
- }
-
- if (colonp == NULL) {
- /* this is the physical interface */
- phyif = current;
- bcopy(&lifr, &phyif->lifr, sizeof (struct lifreq));
- phyif->next = NULL;
- } else {
- /* this is a logical interface */
- bcopy(&lifr, &current->lifr, sizeof (struct lifreq));
- current->next = logifs;
- logifs = current;
- }
- }
-}
-
-/*
- * static int updownifs(iface_t *ifs, int up)
- *
- * It takes in input a list of IP interfaces (ifs)
- * and a flag (up).
- * It marks each interface in the list down (up = 0)
- * or up (up > 0). This is done ONLY if the IP
- * interface was originally up.
- *
- * Return values:
- * 0 = everything OK
- * -1 = problem
- */
-static int
-updownifs(iface_t *ifs, int up)
-{
- iface_t *current;
- int ret = 0;
- int save_errno;
- char savename[LIFNAMSIZ];
- uint64_t orig_flags;
-
- for (current = ifs; current != NULL; current = current->next) {
- if (current->lifr.lifr_flags & IFF_UP) {
- orig_flags = current->lifr.lifr_flags;
- if (!up)
- current->lifr.lifr_flags &= ~IFF_UP;
- if (ioctl(s, SIOCSLIFFLAGS, &current->lifr) < 0) {
- save_errno = errno;
- (void) strcpy(savename,
- current->lifr.lifr_name);
- ret = -1;
- }
- if (!up) /* restore the original flags */
- current->lifr.lifr_flags = orig_flags;
- }
- }
-
- if (ret == -1) {
- (void) strcpy(lifr.lifr_name, savename);
- errno = save_errno;
- }
- return (ret);
-}
-
-/*
* static int find_all_global_interfaces(struct lifconf *lifcp, char **buf,
* int64_t lifc_flags)
*
@@ -2109,130 +2059,217 @@ setiftoken(char *addr, int64_t param)
return (0);
}
-/*
- * Return value: 0 on success, -1 on failure.
- */
-static int
-connect_to_mpathd(int family)
-{
- int s;
- struct sockaddr_storage ss;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
- struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
- int addrlen;
- int ret;
- int on;
-
- s = socket(family, SOCK_STREAM, 0);
- if (s < 0) {
- Perror0_exit("connect_to_mpathd: socket");
- }
- (void) bzero((char *)&ss, sizeof (ss));
- ss.ss_family = family;
- /*
- * Need to bind to a privileged port. For non-root, this
- * will fail. in.mpathd verifies that only commands coming
- * from privileged ports succeed so that ordinary users
- * can't connect and start talking to in.mpathd
- */
- on = 1;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- Perror0_exit("connect_to_mpathd: setsockopt");
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = 0;
- sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addrlen = sizeof (struct sockaddr_in);
- break;
- case AF_INET6:
- sin6->sin6_port = 0;
- sin6->sin6_addr = loopback_addr;
- addrlen = sizeof (struct sockaddr_in6);
- break;
- }
- ret = bind(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- (void) close(s);
- return (-1);
- }
-
- switch (family) {
- case AF_INET:
- sin->sin_port = htons(MPATHD_PORT);
- break;
- case AF_INET6:
- sin6->sin6_port = htons(MPATHD_PORT);
- break;
- }
- ret = connect(s, (struct sockaddr *)&ss, addrlen);
- (void) close(s);
- return (ret);
-}
-
/* ARGSUSED */
static int
-setifgroupname(char *grpname, int64_t param)
+setifgroupname(char *grname, int64_t param)
{
+ lifgroupinfo_t lifgr;
+ struct lifreq lifrl;
+ ifaddrlistx_t *ifaddrp, *nextifaddrp;
+ ifaddrlistx_t *ifaddrs = NULL, *downaddrs = NULL;
+ int af;
+
if (debug) {
(void) printf("Setting groupname %s on interface %s\n",
- grpname, name);
- }
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
- (void) strncpy(lifr.lifr_groupname, grpname,
- sizeof (lifr.lifr_groupname));
- if (ioctl(s, SIOCSLIFGROUPNAME, (caddr_t)&lifr) < 0) {
- Perror0_exit("setifgroupname: SIOCSLIFGROUPNAME");
+ grname, name);
}
- /*
- * If the SUNW_NO_MPATHD environment variable is set then don't
- * bother starting up in.mpathd. See PSARC/2002/249 for the
- * depressing details on this bit of stupidity.
- */
- if (getenv("SUNW_NO_MPATHD") != NULL) {
- return (0);
+ (void) strlcpy(lifrl.lifr_name, name, LIFNAMSIZ);
+ (void) strlcpy(lifrl.lifr_groupname, grname, LIFGRNAMSIZ);
+
+ while (ioctl(s, SIOCSLIFGROUPNAME, &lifrl) == -1) {
+ switch (errno) {
+ case ENOENT:
+ /*
+ * The group doesn't yet exist; create it and repeat.
+ */
+ af = afp->af_af;
+ if (create_ipmp(grname, af, NULL, _B_TRUE) == -1) {
+ if (errno == EEXIST)
+ continue;
+
+ Perror2(grname, "cannot create IPMP group");
+ goto fail;
+ }
+ continue;
+
+ case EALREADY:
+ /*
+ * The interface is already in another group; must
+ * remove existing membership first.
+ */
+ lifrl.lifr_groupname[0] = '\0';
+ if (ioctl(s, SIOCSLIFGROUPNAME, &lifrl) == -1) {
+ Perror2(name, "cannot remove existing "
+ "IPMP group membership");
+ goto fail;
+ }
+ (void) strlcpy(lifrl.lifr_groupname, grname,
+ LIFGRNAMSIZ);
+ continue;
+
+ case EAFNOSUPPORT:
+ /*
+ * The group exists, but it's not configured with the
+ * address families the interface needs. Since only
+ * two address families are currently supported, just
+ * configure the "other" address family. Note that we
+ * may race with group deletion or creation by another
+ * process (ENOENT or EEXIST); in such cases we repeat
+ * our original SIOCSLIFGROUPNAME.
+ */
+ (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ);
+ if (ioctl(s, SIOCGLIFGROUPINFO, &lifgr) == -1) {
+ if (errno == ENOENT)
+ continue;
+
+ Perror2(grname, "SIOCGLIFGROUPINFO");
+ goto fail;
+ }
+
+ af = lifgr.gi_v4 ? AF_INET6 : AF_INET;
+ if (create_ipmp(grname, af, lifgr.gi_grifname,
+ _B_TRUE) == -1) {
+ if (errno == EEXIST)
+ continue;
+
+ Perror2(grname, "cannot configure IPMP group");
+ goto fail;
+ }
+ continue;
+
+ case EADDRINUSE:
+ /*
+ * Some addresses are in-use (or under control of DAD).
+ * Bring them down and retry the group join operation.
+ * We will bring them back up after the interface has
+ * been placed in the group.
+ */
+ if (ifaddrlistx(lifrl.lifr_name, IFF_UP|IFF_DUPLICATE,
+ 0, &ifaddrs) == -1) {
+ Perror2(grname, "cannot get address list");
+ goto fail;
+ }
+
+ ifaddrp = ifaddrs;
+ for (; ifaddrp != NULL; ifaddrp = nextifaddrp) {
+ if (!ifaddr_down(ifaddrp)) {
+ ifaddrs = ifaddrp;
+ goto fail;
+ }
+ nextifaddrp = ifaddrp->ia_next;
+ ifaddrp->ia_next = downaddrs;
+ downaddrs = ifaddrp;
+ }
+ ifaddrs = NULL;
+ continue;
+
+ case EADDRNOTAVAIL: {
+ /*
+ * Some data addresses are under application control.
+ * For some of these (e.g., ADDRCONF), the application
+ * should remove the address, in which case we retry a
+ * few times (since the application's action is not
+ * atomic with respect to us) before bailing out and
+ * informing the user.
+ */
+ int ntries, nappaddr = 0;
+ const if_appflags_t *iap = if_appflags_tbl;
+
+ for (; iap->ia_app != NULL; iap++) {
+ ntries = 0;
+again:
+ if (ifaddrlistx(lifrl.lifr_name, iap->ia_flag,
+ IFF_NOFAILOVER, &ifaddrs) == -1) {
+ (void) fprintf(stderr, "ifconfig: %s: "
+ "cannot get data addresses managed "
+ "by %s\n", lifrl.lifr_name,
+ iap->ia_app);
+ goto fail;
+ }
+
+ if (ifaddrs == NULL)
+ continue;
+
+ ifaddrlistx_free(ifaddrs);
+ ifaddrs = NULL;
+
+ if (++ntries < iap->ia_tries) {
+ (void) poll(NULL, 0, 100);
+ goto again;
+ }
+
+ (void) fprintf(stderr, "ifconfig: cannot join "
+ "IPMP group: %s has data addresses managed "
+ "by %s\n", lifrl.lifr_name, iap->ia_app);
+ nappaddr++;
+ }
+ if (nappaddr > 0)
+ goto fail;
+ continue;
+ }
+ default:
+ Perror2(name, "SIOCSLIFGROUPNAME");
+ goto fail;
+ }
}
/*
- * Try to connect to in.mpathd using IPv4. If we succeed,
- * we conclude that in.mpathd is running, and quit.
+ * If there were addresses that we had to bring down, it's time to
+ * bring them up again. As part of bringing them up, the kernel will
+ * automatically move them to the new IPMP interface.
*/
- if (connect_to_mpathd(AF_INET) == 0) {
- /* connect succeeded, mpathd is already running */
- return (0);
+ for (ifaddrp = downaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!ifaddr_up(ifaddrp) && errno != ENXIO) {
+ (void) fprintf(stderr, "ifconfig: cannot bring back up "
+ "%s: %s\n", ifaddrp->ia_name, strerror(errno));
+ }
}
+ ifaddrlistx_free(downaddrs);
+ return (0);
+fail:
/*
- * Try to connect to in.mpathd using IPv6. If we succeed,
- * we conclude that in.mpathd is running, and quit.
+ * Attempt to bring back up any interfaces that we downed.
*/
- if (connect_to_mpathd(AF_INET6) == 0) {
- /* connect succeeded, mpathd is already running */
- return (0);
+ for (ifaddrp = downaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!ifaddr_up(ifaddrp) && errno != ENXIO) {
+ (void) fprintf(stderr, "ifconfig: cannot bring back up "
+ "%s: %s\n", ifaddrp->ia_name, strerror(errno));
+ }
}
+ ifaddrlistx_free(downaddrs);
+ ifaddrlistx_free(ifaddrs);
/*
- * in.mpathd may not be running. Start it now. If it is already
- * running, in.mpathd will take care of handling multiple incarnations
- * of itself. ifconfig only tries to optimize performance by not
- * starting another incarnation of in.mpathd.
+ * We'd return -1, but foreachinterface() doesn't propagate the error
+ * into the exit status, so we're forced to explicitly exit().
*/
- switch (fork()) {
+ exit(1);
+ /* NOTREACHED */
+}
- case -1:
- Perror0_exit("setifgroupname: fork");
- /* NOTREACHED */
- case 0:
- (void) execl(MPATHD_PATH, MPATHD_PATH, NULL);
- _exit(1);
- /* NOTREACHED */
- default:
- return (0);
+static boolean_t
+modcheck(const char *ifname)
+{
+ (void) strlcpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
+
+ if (ioctl(s, SIOCGLIFFLAGS, &lifr) < 0) {
+ Perror0("SIOCGLIFFLAGS");
+ return (_B_FALSE);
}
-}
+ if (lifr.lifr_flags & IFF_IPMP) {
+ (void) fprintf(stderr, "ifconfig: %s: module operations not"
+ " supported on IPMP interfaces\n", ifname);
+ return (_B_FALSE);
+ }
+ if (lifr.lifr_flags & IFF_VIRTUAL) {
+ (void) fprintf(stderr, "ifconfig: %s: module operations not"
+ " supported on virtual IP interfaces\n", ifname);
+ return (_B_FALSE);
+ }
+ return (_B_TRUE);
+}
/*
* To list all the modules above a given network interface.
@@ -2250,7 +2287,13 @@ modlist(char *null, int64_t param)
struct str_list strlist;
int orig_arpid;
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+ /*
+ * We'd return -1, but foreachinterface() doesn't propagate the error
+ * into the exit status, so we're forced to explicitly exit().
+ */
+ if (!modcheck(name))
+ exit(1);
+
if (ip_domux2fd(&muxfd, &muxid_fd, &ipfd_lowstr, &arpfd_lowstr,
&orig_arpid) < 0) {
return (-1);
@@ -2354,8 +2397,8 @@ open_arp_on_udp(char *udp_dev_name)
* Return:
* -1 if operation fails, 0 otherwise.
*
- * Please see the big block comment above plumb_one_device()
- * for the logic of the PLINK/PUNLINK
+ * Please see the big block comment above ifplumb() for the logic of the
+ * PLINK/PUNLINK
*/
static int
ip_domux2fd(int *muxfd, int *muxid_fd, int *ipfd_lowstr, int *arpfd_lowstr,
@@ -2467,8 +2510,8 @@ ip_domux2fd(int *muxfd, int *muxid_fd, int *ipfd_lowstr, int *arpfd_lowstr,
* Return:
* -1 if operation fails, 0 otherwise.
*
- * Please see the big block comment above plumb_one_device()
- * for the logic of the PLINK/PUNLINK
+ * Please see the big block comment above ifplumb() for the logic of the
+ * PLINK/PUNLINK
*/
static int
ip_plink(int muxfd, int muxid_fd, int ipfd_lowstr, int arpfd_lowstr,
@@ -2530,7 +2573,12 @@ modop(char *arg, char op)
char *arg_str;
int orig_arpid;
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+ /*
+ * We'd return -1, but foreachinterface() doesn't propagate the error
+ * into the exit status, so we're forced to explicitly exit().
+ */
+ if (!modcheck(name))
+ exit(1);
/* Need to save the original string for -a option. */
if ((arg_str = malloc(strlen(arg) + 1)) == NULL) {
@@ -3067,13 +3115,14 @@ status(void)
static int
configinfo(char *null, int64_t param)
{
+ char *cp;
struct afswtch *p = afp;
uint64_t flags;
- char phydevname[LIFNAMSIZ];
+ char lifname[LIFNAMSIZ];
char if_usesrc_name[LIFNAMSIZ];
- char *cp;
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+
if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
Perror0_exit("status: SIOCGLIFFLAGS");
}
@@ -3084,13 +3133,13 @@ configinfo(char *null, int64_t param)
name, flags, p != NULL ? p->af_af : -1);
}
- /* remove LIF component */
- (void) strncpy(phydevname, name, sizeof (phydevname));
- cp = strchr(phydevname, ':');
- if (cp) {
- *cp = 0;
- }
- phydevname[sizeof (phydevname) - 1] = '\0';
+ /*
+ * Build the interface name to print (we can't directly use `name'
+ * because one cannot "plumb" ":0" interfaces).
+ */
+ (void) strlcpy(lifname, name, LIFNAMSIZ);
+ if ((cp = strchr(lifname, ':')) != NULL && atoi(cp + 1) == 0)
+ *cp = '\0';
/*
* if the interface is IPv4
@@ -3105,7 +3154,7 @@ configinfo(char *null, int64_t param)
if (v4compat)
flags &= ~IFF_IPV4;
- (void) printf("%s inet plumb", phydevname);
+ (void) printf("%s inet plumb", lifname);
} else if (flags & IFF_IPV6) {
/*
* else if the interface is IPv6
@@ -3117,7 +3166,7 @@ configinfo(char *null, int64_t param)
if (v4compat)
return (-1);
- (void) printf("%s inet6 plumb", phydevname);
+ (void) printf("%s inet6 plumb", lifname);
}
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
@@ -3131,8 +3180,8 @@ configinfo(char *null, int64_t param)
ioctl(s, SIOCGLIFMTU, (caddr_t)&lifr) >= 0)
(void) printf(" mtu %d", lifr.lifr_metric);
- /* don't print index when in compatibility mode */
- if (!v4compat) {
+ /* Index only applies to the zeroth interface */
+ if (lifnum(name) == 0) {
if (ioctl(s, SIOCGLIFINDEX, (caddr_t)&lifr) >= 0)
(void) printf(" index %d", lifr.lifr_index);
}
@@ -3162,7 +3211,6 @@ configinfo(char *null, int64_t param)
}
(void) printf("\n");
-
return (0);
}
@@ -3398,15 +3446,11 @@ in_status(int force, uint64_t flags)
inet_ntoa(sin->sin_addr));
}
}
- /* If there is a groupname, print it for lun 0 alone */
+ /* If there is a groupname, print it for only the physical interface */
if (strchr(name, ':') == NULL) {
- (void) memset(lifr.lifr_groupname, 0,
- sizeof (lifr.lifr_groupname));
- if (ioctl(s, SIOCGLIFGROUPNAME, (caddr_t)&lifr) >= 0) {
- if (strlen(lifr.lifr_groupname) > 0) {
- (void) printf("\n\tgroupname %s",
- lifr.lifr_groupname);
- }
+ if (ioctl(s, SIOCGLIFGROUPNAME, &lifr) >= 0 &&
+ lifr.lifr_groupname[0] != '\0') {
+ (void) printf("\n\tgroupname %s", lifr.lifr_groupname);
}
}
(void) putchar('\n');
@@ -3550,11 +3594,7 @@ in_configinfo(int force, uint64_t flags)
Perror0_exit("in_configinfo: SIOCGLIFADDR");
}
sin = (struct sockaddr_in *)&lifr.lifr_addr;
- if (strchr(name, ':') != NULL) {
- (void) printf(" addif %s ", inet_ntoa(sin->sin_addr));
- } else {
- (void) printf(" set %s ", inet_ntoa(sin->sin_addr));
- }
+ (void) printf(" set %s ", inet_ntoa(sin->sin_addr));
laddr = sin;
}
@@ -3614,8 +3654,8 @@ in_configinfo(int force, uint64_t flags)
}
}
- /* If there is a groupname, print it for only the physical interface */
- if (strchr(name, ':') == NULL) {
+ /* If there is a groupname, print it for only the zeroth interface */
+ if (lifnum(name) == 0) {
if (ioctl(s, SIOCGLIFGROUPNAME, &lifr) >= 0 &&
lifr.lifr_groupname[0] != '\0') {
(void) printf(" group %s ", lifr.lifr_groupname);
@@ -3623,12 +3663,7 @@ in_configinfo(int force, uint64_t flags)
}
/* Print flags to configure */
- print_config_flags(flags);
-
- /* IFF_NOARP applies to AF_INET only */
- if (flags & IFF_NOARP) {
- (void) printf("-arp ");
- }
+ print_config_flags(AF_INET, flags);
}
static void
@@ -3657,17 +3692,9 @@ in6_configinfo(int force, uint64_t flags)
Perror0_exit("in6_configinfo: SIOCGLIFADDR");
}
sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
- if (strchr(name, ':') != NULL) {
- (void) printf(" addif %s/%d ",
- inet_ntop(AF_INET6, (void *)&sin6->sin6_addr,
- abuf, sizeof (abuf)),
- lifr.lifr_addrlen);
- } else {
- (void) printf(" set %s/%d ",
- inet_ntop(AF_INET6, (void *)&sin6->sin6_addr,
- abuf, sizeof (abuf)),
- lifr.lifr_addrlen);
- }
+ (void) printf(" set %s/%d ",
+ inet_ntop(AF_INET6, &sin6->sin6_addr, abuf, sizeof (abuf)),
+ lifr.lifr_addrlen);
laddr6 = sin6;
}
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
@@ -3720,8 +3747,8 @@ in6_configinfo(int force, uint64_t flags)
lifr.lifr_addrlen);
}
- /* If there is a groupname, print it for only the physical interface */
- if (strchr(name, ':') == NULL) {
+ /* If there is a groupname, print it for only the zeroth interface */
+ if (lifnum(name) == 0) {
if (ioctl(s, SIOCGLIFGROUPNAME, &lifr) >= 0 &&
lifr.lifr_groupname[0] != '\0') {
(void) printf(" group %s ", lifr.lifr_groupname);
@@ -3729,12 +3756,7 @@ in6_configinfo(int force, uint64_t flags)
}
/* Print flags to configure */
- print_config_flags(flags);
-
- /* IFF_NONUD applies to AF_INET6 only */
- if (flags & IFF_NONUD) {
- (void) printf("-nud ");
- }
+ print_config_flags(AF_INET6, flags);
}
/*
@@ -3768,31 +3790,41 @@ in6_configinfo(int force, uint64_t flags)
* compatibility for other utilities like atmifconfig etc. In this case
* the utility must use SIOCSLIFMUXID.
*/
-static void
-plumb_one_device(int af)
+static int
+ifplumb(const char *linkname, const char *ifname, boolean_t genppa, int af)
{
int arp_muxid = -1, ip_muxid;
int mux_fd, ip_fd, arp_fd;
int retval;
- uint_t ppa;
char *udp_dev_name;
- char provider[DLPI_LINKNAME_MAX];
+ uint64_t flags;
+ uint_t dlpi_flags;
dlpi_handle_t dh_arp, dh_ip;
/*
- * We use DLPI_NOATTACH because the ip module will do the attach
- * itself for DLPI style-2 devices.
+ * Always dlpi_open() with DLPI_NOATTACH because the IP and ARP module
+ * will do the attach themselves for DLPI style-2 links.
*/
- retval = dlpi_open(name, &dh_ip, DLPI_NOATTACH);
- if (retval != DLPI_SUCCESS)
- Perrdlpi_exit("cannot open link", name, retval);
+ dlpi_flags = DLPI_NOATTACH;
- if ((retval = dlpi_parselink(name, provider, &ppa)) != DLPI_SUCCESS)
- Perrdlpi_exit("dlpi_parselink", name, retval);
+ /*
+ * If `linkname' is the special token IPMPSTUB, then this is a request
+ * to create an IPMP interface atop /dev/ipmpstub0. (We can't simply
+ * pass "ipmpstub0" as `linkname' since an admin *could* have a normal
+ * vanity-named link named "ipmpstub0" that they'd like to plumb.)
+ */
+ if (linkname == IPMPSTUB) {
+ linkname = "ipmpstub0";
+ dlpi_flags |= DLPI_DEVONLY;
+ }
+
+ retval = dlpi_open(linkname, &dh_ip, dlpi_flags);
+ if (retval != DLPI_SUCCESS)
+ Perrdlpi_exit("cannot open link", linkname, retval);
if (debug) {
- (void) printf("ifconfig: plumb_one_device: provider %s,"
- " ppa %u\n", provider, ppa);
+ (void) printf("ifconfig: ifplumb: link %s, ifname %s, "
+ "genppa %u\n", linkname, ifname, genppa);
}
ip_fd = dlpi_fd(dh_ip);
@@ -3812,29 +3844,106 @@ plumb_one_device(int af)
Perror2_exit("I_PUSH", ARP_MOD_NAME);
/*
- * Set IFF_IPV4/IFF_IPV6 flags.
- * At this point in time the kernel also allows an
- * override of the CANTCHANGE flags.
+ * Prepare to set IFF_IPV4/IFF_IPV6 flags as part of SIOCSLIFNAME.
+ * (At this point in time the kernel also allows an override of the
+ * IFF_CANTCHANGE flags.)
*/
lifr.lifr_name[0] = '\0';
if (ioctl(ip_fd, SIOCGLIFFLAGS, (char *)&lifr) == -1)
- Perror0_exit("plumb_one_device: SIOCGLIFFLAGS");
+ Perror0_exit("ifplumb: SIOCGLIFFLAGS");
- /* Set the name string and the IFF_IPV* flag */
if (af == AF_INET6) {
- lifr.lifr_flags |= IFF_IPV6;
- lifr.lifr_flags &= ~(IFF_BROADCAST | IFF_IPV4);
+ flags = lifr.lifr_flags | IFF_IPV6;
+ flags &= ~(IFF_BROADCAST | IFF_IPV4);
} else {
- lifr.lifr_flags |= IFF_IPV4;
- lifr.lifr_flags &= ~IFF_IPV6;
+ flags = lifr.lifr_flags | IFF_IPV4;
+ flags &= ~IFF_IPV6;
}
- /* record the device and module names as interface name */
- lifr.lifr_ppa = ppa;
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+ /*
+ * Set the interface name. If we've been asked to generate the PPA,
+ * then find the lowest available PPA (only currently used for IPMP
+ * interfaces). Otherwise, use the interface name as-is.
+ */
+ if (genppa) {
+ int ppa;
+
+ /*
+ * We'd like to just set lifr_ppa to UINT_MAX and have the
+ * kernel pick a PPA. Unfortunately, that would mishandle
+ * two cases:
+ *
+ * 1. If the PPA is available but the groupname is taken
+ * (e.g., the "ipmp2" IP interface name is available
+ * but the "ipmp2" groupname is taken) then the
+ * auto-assignment by the kernel will fail.
+ *
+ * 2. If we're creating (e.g.) an IPv6-only IPMP
+ * interface, and there's already an IPv4-only IPMP
+ * interface, the kernel will allow us to accidentally
+ * reuse the IPv6 IPMP interface name (since
+ * SIOCSLIFNAME uniqueness is per-interface-type).
+ * This will cause administrative confusion.
+ *
+ * Thus, we instead take a brute-force approach of checking
+ * whether the IPv4 or IPv6 name is already in-use before
+ * attempting the SIOCSLIFNAME. As per (1) above, the
+ * SIOCSLIFNAME may still fail, in which case we just proceed
+ * to the next one. If this approach becomes too slow, we
+ * can add a new SIOC* to handle this case in the kernel.
+ */
+ for (ppa = 0; ppa < UINT_MAX; ppa++) {
+ (void) snprintf(lifr.lifr_name, LIFNAMSIZ, "%s%d",
+ ifname, ppa);
+
+ if (ioctl(s4, SIOCGLIFFLAGS, &lifr) != -1 ||
+ errno != ENXIO)
+ continue;
+
+ if (ioctl(s6, SIOCGLIFFLAGS, &lifr) != -1 ||
+ errno != ENXIO)
+ continue;
+
+ lifr.lifr_ppa = ppa;
+ lifr.lifr_flags = flags;
+ retval = ioctl(ip_fd, SIOCSLIFNAME, &lifr);
+ if (retval != -1 || errno != EEXIST)
+ break;
+ }
+ } else {
+ ifspec_t ifsp;
+
+ /*
+ * The interface name could have come from the command-line;
+ * check it.
+ */
+ if (!ifparse_ifspec(ifname, &ifsp) || ifsp.ifsp_lunvalid)
+ Perror2_exit("invalid IP interface name", ifname);
+
+ /*
+ * Before we call SIOCSLIFNAME, ensure that the IPMP group
+ * interface for this address family exists. Otherwise, the
+ * kernel will kick the interface out of the group when we do
+ * the SIOCSLIFNAME.
+ *
+ * Example: suppose bge0 is plumbed for IPv4 and in group "a".
+ * If we're now plumbing bge0 for IPv6, but the IPMP group
+ * interface for "a" is not plumbed for IPv6, the SIOCSLIFNAME
+ * will kick bge0 out of group "a", which is undesired.
+ */
+ if (create_ipmp_peer(af, ifname) == -1) {
+ (void) fprintf(stderr, "ifconfig: warning: cannot "
+ "create %s IPMP group; %s will be removed from "
+ "group\n", af == AF_INET ? "IPv4" : "IPv6", ifname);
+ }
- /* set the interface name */
- if (ioctl(ip_fd, SIOCSLIFNAME, (char *)&lifr) == -1) {
+ lifr.lifr_ppa = ifsp.ifsp_ppa;
+ lifr.lifr_flags = flags;
+ (void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
+ retval = ioctl(ip_fd, SIOCSLIFNAME, &lifr);
+ }
+
+ if (retval == -1) {
if (errno != EEXIST)
Perror0_exit("SIOCSLIFNAME for ip");
/*
@@ -3847,15 +3956,15 @@ plumb_one_device(int af)
* called for EEXIST.
*/
Perror0("SIOCSLIFNAME for ip");
- return;
+ return (-1);
}
/* Get the full set of existing flags for this stream */
if (ioctl(ip_fd, SIOCGLIFFLAGS, (char *)&lifr) == -1)
- Perror0_exit("plumb_one_device: SIOCFLIFFLAGS");
+ Perror0_exit("ifplumb: SIOCGLIFFLAGS");
if (debug) {
- (void) printf("ifconfig: plumb_one_device: %s got flags:\n",
+ (void) printf("ifconfig: ifplumb: %s got flags:\n",
lifr.lifr_name);
print_flags(lifr.lifr_flags);
(void) putchar('\n');
@@ -3890,7 +3999,7 @@ plumb_one_device(int af)
if ((ip_muxid = ioctl(mux_fd, I_PLINK, ip_fd)) == -1)
Perror0_exit("I_PLINK for ip");
(void) close(mux_fd);
- return;
+ return (lifr.lifr_ppa);
}
/*
@@ -3901,15 +4010,11 @@ plumb_one_device(int af)
* only on the interface stream, not on the ARP stream.
*/
if (debug)
- (void) printf("ifconfig: plumb_one_device: ifname: %s\n", name);
+ (void) printf("ifconfig: ifplumb: interface %s", ifname);
- /*
- * We use DLPI_NOATTACH because the arp module will do the attach
- * itself for DLPI style-2 devices.
- */
- retval = dlpi_open(name, &dh_arp, DLPI_NOATTACH);
+ retval = dlpi_open(linkname, &dh_arp, dlpi_flags);
if (retval != DLPI_SUCCESS)
- Perrdlpi_exit("cannot open link", name, retval);
+ Perrdlpi_exit("cannot open link", linkname, retval);
arp_fd = dlpi_fd(dh_arp);
if (ioctl(arp_fd, I_PUSH, ARP_MOD_NAME) == -1)
@@ -3919,16 +4024,13 @@ plumb_one_device(int af)
* Tell ARP the name and unit number for this interface.
* Note that arp has no support for transparent ioctls.
*/
- if (strioctl(arp_fd, SIOCSLIFNAME, (char *)&lifr,
- sizeof (lifr)) == -1) {
+ if (strioctl(arp_fd, SIOCSLIFNAME, &lifr, sizeof (lifr)) == -1) {
if (errno != EEXIST)
Perror0_exit("SIOCSLIFNAME for arp");
Perror0("SIOCSLIFNAME for arp");
- dlpi_close(dh_arp);
- dlpi_close(dh_ip);
- (void) close(mux_fd);
- return;
+ goto out;
}
+
/*
* PLINK the IP and ARP streams so that ifconfig can exit
* without tearing down the stream.
@@ -3942,12 +4044,13 @@ plumb_one_device(int af)
if (debug)
(void) printf("arp muxid = %d\n", arp_muxid);
+out:
dlpi_close(dh_ip);
dlpi_close(dh_arp);
(void) close(mux_fd);
+ return (lifr.lifr_ppa);
}
-
/*
* If this is a physical interface then remove it.
* If it is a logical interface name use SIOCLIFREMOVEIF to
@@ -3965,6 +4068,7 @@ inetunplumb(char *arg, int64_t param)
uint64_t flags;
boolean_t changed_arp_muxid = _B_FALSE;
int save_errno;
+ boolean_t v6 = (afp->af_af == AF_INET6);
strptr = strchr(name, ':');
if (strptr != NULL || strcmp(name, LOOPBACK_IF) == 0) {
@@ -3986,7 +4090,7 @@ inetunplumb(char *arg, int64_t param)
* We used /dev/udp or udp6 to set up the mux. So we have to use
* the same now for PUNLINK also.
*/
- if (afp->af_af == AF_INET6)
+ if (v6)
udp_dev_name = UDP6_DEV_NAME;
else
udp_dev_name = UDP_DEV_NAME;
@@ -4002,6 +4106,50 @@ inetunplumb(char *arg, int64_t param)
Perror0_exit("unplumb: SIOCGLIFFLAGS");
}
flags = lifr.lifr_flags;
+
+ if (flags & IFF_IPMP) {
+ lifgroupinfo_t lifgr;
+ ifaddrlistx_t *ifaddrs, *ifaddrp;
+
+ /*
+ * The kernel will fail the I_PUNLINK if the group still has
+ * members, but check now to provide a better error message.
+ */
+ if (ioctl(s, SIOCGLIFGROUPNAME, &lifr) == -1)
+ Perror0_exit("unplumb: SIOCGLIFGROUPNAME");
+
+ (void) strlcpy(lifgr.gi_grname, lifr.lifr_groupname,
+ LIFGRNAMSIZ);
+ if (ioctl(s, SIOCGLIFGROUPINFO, &lifgr) == -1)
+ Perror0_exit("unplumb: SIOCGLIFGROUPINFO");
+
+ if ((v6 && lifgr.gi_nv6 != 0) || (!v6 && lifgr.gi_nv4 != 0)) {
+ (void) fprintf(stderr, "ifconfig: %s: cannot unplumb:"
+ " IPMP group is not empty\n", name);
+ exit(1);
+ }
+
+ /*
+ * The kernel will fail the I_PUNLINK if the IPMP interface
+ * has administratively up addresses; bring 'em down.
+ */
+ if (ifaddrlistx(name, IFF_UP|IFF_DUPLICATE, 0, &ifaddrs) == -1)
+ Perror2_exit(name, "cannot get address list");
+
+ ifaddrp = ifaddrs;
+ for (; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (((ifaddrp->ia_flags & IFF_IPV6) && !v6) ||
+ (!(ifaddrp->ia_flags & IFF_IPV6) && v6))
+ continue;
+
+ if (!ifaddr_down(ifaddrp)) {
+ Perror2_exit(ifaddrp->ia_name,
+ "cannot bring down");
+ }
+ }
+ ifaddrlistx_free(ifaddrs);
+ }
+
if (ioctl(muxid_fd, SIOCGLIFMUXID, (caddr_t)&lifr) < 0) {
Perror0_exit("unplumb: SIOCGLIFMUXID");
}
@@ -4098,12 +4246,6 @@ inetplumb(char *arg, int64_t param)
Perror2_exit("plumb: SIOCLIFADDIF", name);
}
}
- /*
- * IP can create the new logical interface on a different
- * physical interface in the same IPMP group. Take the new
- * interface into account for further operations.
- */
- (void) strncpy(name, lifr.lifr_name, sizeof (name));
return (0);
}
@@ -4131,10 +4273,229 @@ inetplumb(char *arg, int64_t param)
if (debug)
(void) printf("inetplumb: %s af %d\n", name, afp->af_af);
- plumb_one_device(afp->af_af);
+ (void) ifplumb(name, name, _B_FALSE, afp->af_af);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+inetipmp(char *arg, int64_t param)
+{
+ int retval;
+
+ /*
+ * Treat e.g. "ifconfig ipmp0:2 ipmp" as "ifconfig ipmp0:2 plumb".
+ * Otherwise, try to create the requested IPMP interface.
+ */
+ if (strchr(name, ':') != NULL)
+ retval = inetplumb(arg, param);
+ else
+ retval = create_ipmp(name, afp->af_af, name, _B_FALSE);
+
+ /*
+ * We'd return -1, but foreachinterface() doesn't propagate the error
+ * into the exit status, so we're forced to explicitly exit().
+ */
+ if (retval == -1)
+ exit(1);
return (0);
}
+/*
+ * Create an IPMP group `grname' with address family `af'. If `ifname' is
+ * non-NULL, it specifies the interface name to use. Otherwise, use the name
+ * ipmpN, where N corresponds to the lowest available integer. If `implicit'
+ * is set, then the group is being created as a side-effect of placing an
+ * underlying interface in a group. Also start in.mpathd if necessary.
+ */
+static int
+create_ipmp(const char *grname, int af, const char *ifname, boolean_t implicit)
+{
+ int ppa;
+ static int ipmp_daemon_started;
+
+ if (debug) {
+ (void) printf("create_ipmp: ifname %s grname %s af %d\n",
+ ifname != NULL ? ifname : "NULL", grname, af);
+ }
+
+ if (ifname != NULL)
+ ppa = ifplumb(IPMPSTUB, ifname, _B_FALSE, af);
+ else
+ ppa = ifplumb(IPMPSTUB, "ipmp", _B_TRUE, af);
+
+ if (ppa == -1) {
+ Perror2(grname, "cannot create IPMP interface");
+ return (-1);
+ }
+
+ if (ifname != NULL)
+ (void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
+ else
+ (void) snprintf(lifr.lifr_name, LIFNAMSIZ, "ipmp%d", ppa);
+
+ /*
+ * To preserve backward-compatibility, always bring up the link-local
+ * address for implicitly-created IPv6 IPMP interfaces.
+ */
+ if (implicit && af == AF_INET6) {
+ if (ioctl(s6, SIOCGLIFFLAGS, &lifr) == 0) {
+ lifr.lifr_flags |= IFF_UP;
+ (void) ioctl(s6, SIOCSLIFFLAGS, &lifr);
+ }
+ }
+
+ /*
+ * If the caller requested a different group name, issue a
+ * SIOCSLIFGROUPNAME on the new IPMP interface.
+ */
+ if (strcmp(lifr.lifr_name, grname) != 0) {
+ (void) strlcpy(lifr.lifr_groupname, grname, LIFGRNAMSIZ);
+ if (ioctl(s, SIOCSLIFGROUPNAME, &lifr) == -1) {
+ Perror0("SIOCSLIFGROUPNAME");
+ return (-1);
+ }
+ }
+
+ /*
+ * If we haven't done so yet, ensure in.mpathd is started.
+ */
+ if (ipmp_daemon_started++ == 0)
+ start_ipmp_daemon();
+
+ return (0);
+}
+
+/*
+ * Check if `ifname' is plumbed and in an IPMP group on its "other" address
+ * family. If so, create a matching IPMP group for address family `af'.
+ */
+static int
+create_ipmp_peer(int af, const char *ifname)
+{
+ int fd;
+ lifgroupinfo_t lifgr;
+
+ assert(af == AF_INET || af == AF_INET6);
+
+ /*
+ * Get the socket for the "other" address family.
+ */
+ fd = (af == AF_INET) ? s6 : s4;
+
+ (void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPNAME, &lifr) != 0)
+ return (0);
+
+ (void) strlcpy(lifgr.gi_grname, lifr.lifr_groupname, LIFGRNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPINFO, &lifgr) != 0)
+ return (0);
+
+ /*
+ * If `ifname' *is* the IPMP group interface, or if the relevant
+ * address family is already configured, then there's nothing to do.
+ */
+ if (strcmp(lifgr.gi_grifname, ifname) == 0 ||
+ (af == AF_INET && lifgr.gi_v4) || (af == AF_INET6 && lifgr.gi_v6))
+ return (0);
+
+ return (create_ipmp(lifgr.gi_grname, af, lifgr.gi_grifname, _B_TRUE));
+}
+
+/*
+ * Start in.mpathd if it's not already running.
+ */
+static void
+start_ipmp_daemon(void)
+{
+ int retval;
+ ipmp_handle_t ipmp_handle;
+
+ /*
+ * Ping in.mpathd to see if it's running already.
+ */
+ if ((retval = ipmp_open(&ipmp_handle)) != IPMP_SUCCESS) {
+ (void) fprintf(stderr, "ifconfig: cannot create IPMP handle: "
+ "%s\n", ipmp_errmsg(retval));
+ return;
+ }
+
+ retval = ipmp_ping_daemon(ipmp_handle);
+ ipmp_close(ipmp_handle);
+
+ switch (retval) {
+ case IPMP_ENOMPATHD:
+ break;
+ case IPMP_SUCCESS:
+ return;
+ default:
+ (void) fprintf(stderr, "ifconfig: cannot ping in.mpathd: %s\n",
+ ipmp_errmsg(retval));
+ break;
+ }
+
+ /*
+ * Start in.mpathd. Note that in.mpathd will handle multiple
+ * incarnations (ipmp_ping_daemon() is just an optimization) so we
+ * don't need to worry about racing with another ifconfig process.
+ */
+ switch (fork()) {
+ case -1:
+ Perror0_exit("start_ipmp_daemon: fork");
+ /* NOTREACHED */
+ case 0:
+ (void) execl(MPATHD_PATH, MPATHD_PATH, NULL);
+ _exit(1);
+ /* NOTREACHED */
+ default:
+ break;
+ }
+}
+
+/*
+ * Bring the address named by `ifaddrp' up or down. Doesn't trust any mutable
+ * values in ia_flags since they may be stale.
+ */
+static boolean_t
+ifaddr_op(ifaddrlistx_t *ifaddrp, boolean_t up)
+{
+ struct lifreq lifrl; /* Local lifreq struct */
+ int fd = (ifaddrp->ia_flags & IFF_IPV4) ? s4 : s6;
+
+ (void) memset(&lifrl, 0, sizeof (lifrl));
+ (void) strlcpy(lifrl.lifr_name, ifaddrp->ia_name, LIFNAMSIZ);
+ if (ioctl(fd, SIOCGLIFFLAGS, &lifrl) == -1)
+ return (_B_FALSE);
+
+ if (up) {
+ lifrl.lifr_flags |= IFF_UP;
+ } else {
+ /*
+ * If we've been asked to bring down an IFF_DUPLICATE address,
+ * then get the address and set it. This will cause IP to
+ * clear IFF_DUPLICATE and stop the automatic recovery timer.
+ */
+ if (lifrl.lifr_flags & IFF_DUPLICATE) {
+ return (ioctl(fd, SIOCGLIFADDR, &lifrl) != -1 &&
+ ioctl(fd, SIOCSLIFADDR, &lifrl) != -1);
+ }
+ lifrl.lifr_flags &= ~IFF_UP;
+ }
+ return (ioctl(fd, SIOCSLIFFLAGS, &lifrl) == 0);
+}
+
+static boolean_t
+ifaddr_up(ifaddrlistx_t *ifaddrp)
+{
+ return (ifaddr_op(ifaddrp, _B_TRUE));
+}
+
+static boolean_t
+ifaddr_down(ifaddrlistx_t *ifaddrp)
+{
+ return (ifaddr_op(ifaddrp, _B_FALSE));
+}
+
void
Perror0(const char *cmd)
{
@@ -4404,14 +4765,14 @@ print_flags(uint64_t flags)
}
static void
-print_config_flags(uint64_t flags)
+print_config_flags(int af, uint64_t flags)
{
- int cnt, i;
+ if_config_cmd_t *cmdp;
- cnt = sizeof (if_config_cmd_tbl) / sizeof (if_config_cmd_t);
- for (i = 0; i < cnt; i++) {
- if (flags & if_config_cmd_tbl[i].iff_flag) {
- (void) printf("%s ", if_config_cmd_tbl[i].iff_name);
+ for (cmdp = if_config_cmd_tbl; cmdp->iff_flag != 0; cmdp++) {
+ if ((flags & cmdp->iff_flag) &&
+ (cmdp->iff_af == AF_UNSPEC || cmdp->iff_af == af)) {
+ (void) printf("%s ", cmdp->iff_name);
}
}
}
@@ -4454,7 +4815,18 @@ in_getmask(struct sockaddr_in *saddr, boolean_t addr_set)
}
static int
-strioctl(int s, int cmd, char *buf, int buflen)
+lifnum(const char *ifname)
+{
+ const char *cp;
+
+ if ((cp = strchr(ifname, ':')) == NULL)
+ return (0);
+ else
+ return (atoi(cp + 1));
+}
+
+static int
+strioctl(int s, int cmd, void *buf, int buflen)
{
struct strioctl ioc;
@@ -4681,6 +5053,7 @@ usage(void)
"\t[ modlist ]\n"
"\t[ modinsert <module_name@position> ]\n"
"\t[ modremove <module_name@position> ]\n"
+ "\t[ ipmp ]\n"
"\t[ group <groupname>] | [ group \"\"]\n"
"\t[ deprecated | -deprecated ]\n"
"\t[ standby | -standby ]\n"
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h
index 0ac600001f..f11f4d0a94 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -11,8 +11,6 @@
#ifndef _IFCONFIG_H
#define _IFCONFIG_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -39,7 +37,6 @@ extern void Perrdlpi_exit(const char *, const char *, int);
extern int doifrevarp(const char *, struct sockaddr_in *);
-extern int dlpi_set_address(const char *, uchar_t *, uint_t);
extern void dlpi_print_address(const char *);
#ifdef __cplusplus
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c
index 725c8b24c3..aba4794942 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c
@@ -19,14 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "defs.h"
#include "ifconfig.h"
#include <sys/types.h>
@@ -88,6 +86,7 @@ doifrevarp(const char *linkname, struct sockaddr_in *laddr)
/* don't try to revarp if we know it won't work */
if ((lifr.lifr_flags & IFF_LOOPBACK) ||
(lifr.lifr_flags & IFF_NOARP) ||
+ (lifr.lifr_flags & IFF_IPMP) ||
(lifr.lifr_flags & IFF_POINTOPOINT)) {
(void) close(s);
return (0);
@@ -326,28 +325,6 @@ rarp_recv(dlpi_handle_t dh, struct arphdr *ans, size_t msglen,
return (DLPI_ETIMEDOUT);
}
-int
-dlpi_set_address(const char *linkname, uchar_t *physaddr, uint_t physaddrlen)
-{
- int retval;
- dlpi_handle_t dh;
-
- if ((retval = dlpi_open(linkname, &dh, 0)) != DLPI_SUCCESS) {
- Perrdlpi("dlpi_open failed", linkname, retval);
- return (-1);
- }
-
- if ((retval = dlpi_set_physaddr(dh, DL_CURR_PHYS_ADDR, physaddr,
- physaddrlen)) != DLPI_SUCCESS) {
- Perrdlpi("dlpi_set_physaddr failed", linkname, retval);
- dlpi_close(dh);
- return (-1);
- }
-
- dlpi_close(dh);
- return (0);
-}
-
void
dlpi_print_address(const char *linkname)
{
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h
index 900b5841ed..5cca3ecb2e 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 1983, 1988, 1993
@@ -414,16 +414,9 @@ struct interface {
(IS_REMOTE|IS_PASSIVE))
/*
- * Is an IP interface up? Because of the way IPMP uses deprecated
- * interfaces, we need to check more than the IFF_UP and IFF_RUNNING
- * interface flags here. Basically, we do not want to use IFF_DEPRECATED
- * interfaces unless they are also IFF_STANDBY and not IFF_INACTIVE.
+ * Is an IP interface up?
*/
-#define IFF_GOOD (IFF_UP|IFF_RUNNING)
-#define IS_IFF_UP(f) \
- ((((f) & (IFF_GOOD|IFF_DEPRECATED)) == IFF_GOOD) || \
- (((f) & (IFF_GOOD|IFF_INACTIVE|IFF_STANDBY)) == \
- (IFF_GOOD|IFF_STANDBY)))
+#define IS_IFF_UP(f) (((f) & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
/*
* This defines interfaces that we should not use for advertising or
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c
index 79ae02e703..a3a26ac2cb 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 1983, 1988, 1993
@@ -36,8 +36,6 @@
* $FreeBSD: src/sbin/routed/trace.c,v 1.6 2000/08/11 08:24:38 sheldonh Exp $
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "defs.h"
#include "pathnames.h"
#include <signal.h>
@@ -566,6 +564,7 @@ static struct bits if_bits[] = {
{ IFF_TEMPORARY, 0, "TEMPORARY" },
{ IFF_FIXEDMTU, 0, "FIXEDMTU" },
{ IFF_VIRTUAL, 0, "VIRTUAL"},
+ { IFF_IPMP, 0, "IPMP"},
{ 0, 0, NULL}
};
@@ -898,8 +897,8 @@ trace_upslot(struct rt_entry *rt,
print_rts(rts, 0, 0,
rts->rts_gate != new->rts_gate,
rts->rts_tag != new->rts_tag,
- rts != rt->rt_spares || AGE_RT(rt->rt_state,
- rts->rts_origin, rt->rt_ifp));
+ rts != rt->rt_spares ||
+ AGE_RT(rt->rt_state, rts->rts_origin, rt->rt_ifp));
(void) fprintf(ftrace, "\n %19s%-16s ", "",
(new->rts_gate != rts->rts_gate ?
@@ -1173,10 +1172,9 @@ trace_rip(const char *dir1, const char *dir2,
if (NA->a_type == RIP_AUTH_PW &&
n == msg->rip_nets) {
(void) fprintf(ftrace, "\tPassword"
- " Authentication:"
- " \"%s\"\n",
+ " Authentication: \"%s\"\n",
qstring(NA->au.au_pw,
- RIP_AUTH_PW_LEN));
+ RIP_AUTH_PW_LEN));
continue;
}
@@ -1186,13 +1184,12 @@ trace_rip(const char *dir1, const char *dir2,
"\tMD5 Auth"
" pkt_len=%d KeyID=%u"
" auth_len=%d"
- " seqno=%#lx"
- " rsvd=%#x,%#x\n",
+ " seqno=%#x"
+ " rsvd=%#hx,%#hx\n",
ntohs(NA->au.a_md5.md5_pkt_len),
NA->au.a_md5.md5_keyid,
NA->au.a_md5.md5_auth_len,
- (unsigned long)ntohl(NA->au.a_md5.
- md5_seqno),
+ ntohl(NA->au.a_md5.md5_seqno),
ntohs(NA->au.a_md5.rsvd[0]),
ntohs(NA->au.a_md5.rsvd[1]));
continue;
@@ -1217,14 +1214,12 @@ trace_rip(const char *dir1, const char *dir2,
inet_ntoa(tmp_mask));
} else if (msg->rip_vers == RIPv1) {
(void) fprintf(ftrace, "\t%-18s ",
- addrname(n->n_dst,
- ntohl(n->n_mask),
- n->n_mask == 0 ? 2 : 1));
+ addrname(n->n_dst, ntohl(n->n_mask),
+ n->n_mask == 0 ? 2 : 1));
} else {
(void) fprintf(ftrace, "\t%-18s ",
- addrname(n->n_dst,
- ntohl(n->n_mask),
- n->n_mask == 0 ? 2 : 0));
+ addrname(n->n_dst, ntohl(n->n_mask),
+ n->n_mask == 0 ? 2 : 0));
}
(void) fprintf(ftrace, "metric=%-2lu ",
(unsigned long)ntohl(n->n_metric));
@@ -1242,8 +1237,8 @@ trace_rip(const char *dir1, const char *dir2,
break;
case RIPCMD_TRACEON:
- (void) fprintf(ftrace, "\tfile=\"%.*s\"\n", size-4,
- msg->rip_tracefile);
+ (void) fprintf(ftrace, "\tfile=\"%.*s\"\n", size - 4,
+ msg->rip_tracefile);
break;
case RIPCMD_TRACEOFF:
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/Makefile
new file mode 100644
index 0000000000..a256cf5f49
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG = ipmpstat
+ROOTFS_PROG = $(PROG)
+ROOTUSRSBINLINKS = $(PROG:%=$(ROOTUSRSBIN)/%)
+
+include $(SRC)/cmd/Makefile.cmd
+
+C99MODE = $(C99_ENABLE)
+LDLIBS += -lipmp -lsocket -lsysevent -lnvpair
+XGETFLAGS += -a -x $(PROG).xcl
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTSBINPROG) $(ROOTUSRSBINLINKS)
+
+clean:
+
+lint: lint_PROG
+
+$(ROOTUSRSBINLINKS):
+ -$(RM) $@; $(SYMLINK) ../../sbin/$(@F) $@
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.c b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.c
new file mode 100644
index 0000000000..4620c34a24
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.c
@@ -0,0 +1,1498 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <alloca.h>
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <ipmp_admin.h>
+#include <ipmp_query.h>
+#include <libintl.h>
+#include <libnvpair.h>
+#include <libsysevent.h>
+#include <locale.h>
+#include <netdb.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/ipmp.h>
+#include <sys/sysmacros.h>
+#include <sys/termios.h>
+#include <sys/types.h>
+
+/*
+ * ipmpstat -- display IPMP subsystem status.
+ *
+ * This utility makes extensive use of libipmp and IPMP sysevents to gather
+ * and pretty-print the status of the IPMP subsystem. All output formats
+ * except for -p (probe) use libipmp to create a point-in-time snapshot of the
+ * IPMP subsystem (unless the test-special -L flag is used), and then output
+ * the contents of that snapshot in a user-specified manner. Because the
+ * output format and requested fields aren't known until run-time, three sets
+ * of function pointers and two core data structures are used. Specifically:
+ *
+ * * The ipmpstat_walker_t function pointers (walk_*) iterate through
+ * all instances of a given IPMP object (group, interface, or address).
+ * At most one ipmpstat_walker_t is used per ipmpstat invocation.
+ * Since target information is included with the interface information,
+ * both -i and -t use the interface walker (walk_if()).
+ *
+ * * The ipmpstat_sfunc_t function pointers (sfunc_*) obtain a given
+ * value for a given IPMP object. Each ipmpstat_sunc_t is passed a
+ * buffer to write its result into, the buffer's size, and an
+ * ipmpstat_sfunc_arg_t state structure. The state structure consists
+ * of a pointer to the IPMP object to obtain information from
+ * (sa_data), and an open libipmp handle (sa_ih) which can be used to
+ * do additional libipmp queries, if necessary (e.g., because the
+ * object does not have all of the needed information).
+ *
+ * * The ipmpstat_field_t structure provides the list of supported fields
+ * for a given output format, along with output formatting information
+ * (e.g., field width), and a pointer to an ipmpstat_sfunc_t function
+ * that can obtain the value for a IPMP given object. For a given
+ * ipmpstat output format, there's a corresponding array of
+ * ipmpstat_field_t structures. Thus, one ipmpstat_field_t array is
+ * used per ipmpstat invocation.
+ *
+ * * The ipmpstat_ofmt_t provides an ordered list of the requested
+ * ipmpstat_field_t's (e.g., via -o) for a given ipmpstat invocation.
+ * It is built at runtime from the command-line arguments. This
+ * structure (and a given IPMP object) is used by ofmt_output() to
+ * output a single line of information about that IPMP object.
+ *
+ * * The ipmpstat_cbfunc_t function pointers (*_cbfunc) are called back
+ * by the walkers. They are used both internally to implement nested
+ * walks, and by the ipmpstat output logic to provide the glue between
+ * the IPMP object walkers and the ofmt_output() logic. Usually, a
+ * single line is output for each IPMP object, and thus ofmt_output()
+ * can be directly invoked (see info_output_cbfunc()). However, if
+ * multiple lines need to be output, then a more complex cbfunc is
+ * needed (see targinfo_output_cbfunc()). At most one cbfunc is used
+ * per ipmpstat invocation.
+ */
+
+/*
+ * Data type used by the sfunc callbacks to obtain the requested information
+ * from the agreed-upon object.
+ */
+typedef struct ipmpstat_sfunc_arg {
+ ipmp_handle_t sa_ih;
+ void *sa_data;
+} ipmpstat_sfunc_arg_t;
+
+typedef void ipmpstat_sfunc_t(ipmpstat_sfunc_arg_t *, char *, uint_t);
+
+/*
+ * Data type that describes how to output a field; used by ofmt_output*().
+ */
+typedef struct ipmpstat_field {
+ const char *f_name; /* field name */
+ uint_t f_width; /* output width */
+ ipmpstat_sfunc_t *f_sfunc; /* value->string function */
+} ipmpstat_field_t;
+
+/*
+ * Data type that specifies the output field order; used by ofmt_output*()
+ */
+typedef struct ipmpstat_ofmt {
+ const ipmpstat_field_t *o_field; /* current field info */
+ struct ipmpstat_ofmt *o_next; /* next field */
+} ipmpstat_ofmt_t;
+
+/*
+ * Function pointers used to iterate through IPMP objects.
+ */
+typedef void ipmpstat_cbfunc_t(ipmp_handle_t, void *, void *);
+typedef void ipmpstat_walker_t(ipmp_handle_t, ipmpstat_cbfunc_t *, void *);
+
+/*
+ * Data type used to implement nested walks.
+ */
+typedef struct ipmpstat_walkdata {
+ ipmpstat_cbfunc_t *iw_func; /* caller-specified callback */
+ void *iw_funcarg; /* caller-specified arg */
+} ipmpstat_walkdata_t;
+
+/*
+ * Data type used by enum2str() to map an enumerated value to a string.
+ */
+typedef struct ipmpstat_enum {
+ const char *e_name; /* string */
+ int e_val; /* value */
+} ipmpstat_enum_t;
+
+/*
+ * Data type used to pass state between probe_output() and probe_event().
+ */
+typedef struct ipmpstat_probe_state {
+ ipmp_handle_t ps_ih; /* open IPMP handle */
+ ipmpstat_ofmt_t *ps_ofmt; /* requested ofmt string */
+} ipmpstat_probe_state_t;
+
+/*
+ * Options that modify the output mode; more than one may be lit.
+ */
+typedef enum {
+ IPMPSTAT_OPT_NUMERIC = 0x1,
+ IPMPSTAT_OPT_PARSABLE = 0x2
+} ipmpstat_opt_t;
+
+/*
+ * Indices for the FLAGS field of the `-i' output format.
+ */
+enum {
+ IPMPSTAT_IFLAG_INDEX, IPMPSTAT_SFLAG_INDEX, IPMPSTAT_M4FLAG_INDEX,
+ IPMPSTAT_BFLAG_INDEX, IPMPSTAT_M6FLAG_INDEX, IPMPSTAT_DFLAG_INDEX,
+ IPMPSTAT_HFLAG_INDEX, IPMPSTAT_NUM_FLAGS
+};
+
+#define IPMPSTAT_NCOL 80
+#define NS2FLOATMS(ns) ((float)(ns) / (NANOSEC / MILLISEC))
+#define MS2FLOATSEC(ms) ((float)(ms) / 1000)
+
+static const char *progname;
+static hrtime_t probe_output_start;
+static struct winsize winsize;
+static ipmpstat_opt_t opt;
+static ipmpstat_enum_t addr_state[], group_state[], if_state[], if_link[];
+static ipmpstat_enum_t if_probe[], targ_mode[];
+static ipmpstat_field_t addr_fields[], group_fields[], if_fields[];
+static ipmpstat_field_t probe_fields[], targ_fields[];
+static ipmpstat_cbfunc_t walk_addr_cbfunc, walk_if_cbfunc;
+static ipmpstat_cbfunc_t info_output_cbfunc, targinfo_output_cbfunc;
+static ipmpstat_walker_t walk_addr, walk_if, walk_group;
+
+static int probe_event(sysevent_t *, void *);
+static void probe_output(ipmp_handle_t, ipmpstat_ofmt_t *);
+static ipmpstat_field_t *field_find(ipmpstat_field_t *, const char *);
+static ipmpstat_ofmt_t *ofmt_create(const char *, ipmpstat_field_t []);
+static void ofmt_output(const ipmpstat_ofmt_t *, ipmp_handle_t, void *);
+static void ofmt_destroy(ipmpstat_ofmt_t *);
+static void enum2str(const ipmpstat_enum_t *, int, char *, uint_t);
+static void sockaddr2str(const struct sockaddr_storage *, char *, uint_t);
+static void sighandler(int);
+static void usage(void);
+static void die(const char *, ...);
+static void die_ipmperr(int, const char *, ...);
+static void warn(const char *, ...);
+static void warn_ipmperr(int, const char *, ...);
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ int err;
+ const char *ofields = NULL;
+ ipmp_handle_t ih;
+ ipmp_qcontext_t qcontext = IPMP_QCONTEXT_SNAP;
+ ipmpstat_ofmt_t *ofmt;
+ ipmpstat_field_t *fields = NULL;
+ ipmpstat_cbfunc_t *cbfunc;
+ ipmpstat_walker_t *walker;
+
+ if ((progname = strrchr(argv[0], '/')) == NULL)
+ progname = argv[0];
+ else
+ progname++;
+
+ (void) setlocale(LC_ALL, "");
+ (void) textdomain(TEXT_DOMAIN);
+
+ while ((c = getopt(argc, argv, "nLPo:agipt")) != EOF) {
+ if (fields != NULL && strchr("agipt", c) != NULL)
+ die("only one output format may be specified\n");
+
+ switch (c) {
+ case 'n':
+ opt |= IPMPSTAT_OPT_NUMERIC;
+ break;
+ case 'L':
+ /* Undocumented option: for testing use ONLY */
+ qcontext = IPMP_QCONTEXT_LIVE;
+ break;
+ case 'P':
+ opt |= IPMPSTAT_OPT_PARSABLE;
+ break;
+ case 'o':
+ ofields = optarg;
+ break;
+ case 'a':
+ walker = walk_addr;
+ cbfunc = info_output_cbfunc;
+ fields = addr_fields;
+ break;
+ case 'g':
+ walker = walk_group;
+ cbfunc = info_output_cbfunc;
+ fields = group_fields;
+ break;
+ case 'i':
+ walker = walk_if;
+ cbfunc = info_output_cbfunc;
+ fields = if_fields;
+ break;
+ case 'p':
+ fields = probe_fields;
+ break;
+ case 't':
+ walker = walk_if;
+ cbfunc = targinfo_output_cbfunc;
+ fields = targ_fields;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (argc > optind || fields == NULL)
+ usage();
+
+ if (opt & IPMPSTAT_OPT_PARSABLE) {
+ if (ofields == NULL) {
+ die("output field list (-o) required in parsable "
+ "output mode\n");
+ } else if (strcasecmp(ofields, "all") == 0) {
+ die("\"all\" not allowed in parsable output mode\n");
+ }
+ }
+
+ /*
+ * Obtain the window size and monitor changes to the size. This data
+ * is used to redisplay the output headers when necessary.
+ */
+ (void) sigset(SIGWINCH, sighandler);
+ sighandler(SIGWINCH);
+
+ if ((err = ipmp_open(&ih)) != IPMP_SUCCESS)
+ die_ipmperr(err, "cannot create IPMP handle");
+
+ if (ipmp_ping_daemon(ih) != IPMP_SUCCESS)
+ die("cannot contact in.mpathd(1M) -- is IPMP in use?\n");
+
+ /*
+ * Create the ofmt linked list that will eventually be passed to
+ * to ofmt_output() to output the fields.
+ */
+ ofmt = ofmt_create(ofields, fields);
+
+ /*
+ * If we've been asked to display probes, then call the probe output
+ * function. Otherwise, snapshot IPMP state (or use live state) and
+ * invoke the specified walker with the specified callback function.
+ */
+ if (fields == probe_fields) {
+ probe_output(ih, ofmt);
+ } else {
+ if ((err = ipmp_setqcontext(ih, qcontext)) != IPMP_SUCCESS) {
+ if (qcontext == IPMP_QCONTEXT_SNAP)
+ die_ipmperr(err, "cannot snapshot IPMP state");
+ else
+ die_ipmperr(err, "cannot use live IPMP state");
+ }
+ (*walker)(ih, cbfunc, ofmt);
+ }
+
+ ofmt_destroy(ofmt);
+ ipmp_close(ih);
+
+ return (EXIT_SUCCESS);
+}
+
+/*
+ * Walks all IPMP groups on the system and invokes `cbfunc' on each, passing
+ * it `ih', the ipmp_groupinfo_t pointer, and `arg'.
+ */
+static void
+walk_group(ipmp_handle_t ih, ipmpstat_cbfunc_t *cbfunc, void *arg)
+{
+ int err;
+ uint_t i;
+ ipmp_groupinfo_t *grinfop;
+ ipmp_grouplist_t *grlistp;
+
+ if ((err = ipmp_getgrouplist(ih, &grlistp)) != IPMP_SUCCESS)
+ die_ipmperr(err, "cannot get IPMP group list");
+
+ for (i = 0; i < grlistp->gl_ngroup; i++) {
+ err = ipmp_getgroupinfo(ih, grlistp->gl_groups[i], &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for group `%s'",
+ grlistp->gl_groups[i]);
+ continue;
+ }
+ (*cbfunc)(ih, grinfop, arg);
+ ipmp_freegroupinfo(grinfop);
+ }
+
+ ipmp_freegrouplist(grlistp);
+}
+
+/*
+ * Walks all IPMP interfaces on the system and invokes `cbfunc' on each,
+ * passing it `ih', the ipmp_ifinfo_t pointer, and `arg'.
+ */
+static void
+walk_if(ipmp_handle_t ih, ipmpstat_cbfunc_t *cbfunc, void *arg)
+{
+ ipmpstat_walkdata_t iw = { cbfunc, arg };
+
+ walk_group(ih, walk_if_cbfunc, &iw);
+}
+
+/*
+ * Walks all IPMP data addresses on the system and invokes `cbfunc' on each.
+ * passing it `ih', the ipmp_addrinfo_t pointer, and `arg'.
+ */
+static void
+walk_addr(ipmp_handle_t ih, ipmpstat_cbfunc_t *cbfunc, void *arg)
+{
+ ipmpstat_walkdata_t iw = { cbfunc, arg };
+
+ walk_group(ih, walk_addr_cbfunc, &iw);
+}
+
+/*
+ * Nested walker callback function for walk_if().
+ */
+static void
+walk_if_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ int err;
+ uint_t i;
+ ipmp_groupinfo_t *grinfop = infop;
+ ipmp_ifinfo_t *ifinfop;
+ ipmp_iflist_t *iflistp = grinfop->gr_iflistp;
+ ipmpstat_walkdata_t *iwp = arg;
+
+ for (i = 0; i < iflistp->il_nif; i++) {
+ err = ipmp_getifinfo(ih, iflistp->il_ifs[i], &ifinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for interface `%s'",
+ iflistp->il_ifs[i]);
+ continue;
+ }
+ (*iwp->iw_func)(ih, ifinfop, iwp->iw_funcarg);
+ ipmp_freeifinfo(ifinfop);
+ }
+}
+
+/*
+ * Nested walker callback function for walk_addr().
+ */
+static void
+walk_addr_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ int err;
+ uint_t i;
+ ipmp_groupinfo_t *grinfop = infop;
+ ipmp_addrinfo_t *adinfop;
+ ipmp_addrlist_t *adlistp = grinfop->gr_adlistp;
+ ipmpstat_walkdata_t *iwp = arg;
+ char addr[INET6_ADDRSTRLEN];
+ struct sockaddr_storage *addrp;
+
+ for (i = 0; i < adlistp->al_naddr; i++) {
+ addrp = &adlistp->al_addrs[i];
+ err = ipmp_getaddrinfo(ih, grinfop->gr_name, addrp, &adinfop);
+ if (err != IPMP_SUCCESS) {
+ sockaddr2str(addrp, addr, sizeof (addr));
+ warn_ipmperr(err, "cannot get info for `%s'", addr);
+ continue;
+ }
+ (*iwp->iw_func)(ih, adinfop, iwp->iw_funcarg);
+ ipmp_freeaddrinfo(adinfop);
+ }
+}
+
+static void
+sfunc_nvwarn(const char *nvname, char *buf, uint_t bufsize)
+{
+ warn("cannot retrieve %s\n", nvname);
+ (void) strlcpy(buf, "?", bufsize);
+}
+
+static void
+sfunc_addr_address(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+
+ sockaddr2str(&adinfop->ad_addr, buf, bufsize);
+}
+
+static void
+sfunc_addr_group(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+ ipmp_groupinfo_t *grinfop;
+
+ err = ipmp_getgroupinfo(arg->sa_ih, adinfop->ad_group, &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for group `%s'",
+ adinfop->ad_group);
+ (void) strlcpy(buf, "?", bufsize);
+ return;
+ }
+ (void) strlcpy(buf, grinfop->gr_ifname, bufsize);
+ ipmp_freegroupinfo(grinfop);
+}
+
+static void
+sfunc_addr_state(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+
+ enum2str(addr_state, adinfop->ad_state, buf, bufsize);
+}
+
+static void
+sfunc_addr_inbound(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+
+ (void) strlcpy(buf, adinfop->ad_binding, bufsize);
+}
+
+static void
+sfunc_addr_outbound(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ uint_t i, nactive = 0;
+ ipmp_ifinfo_t *ifinfop;
+ ipmp_iflist_t *iflistp;
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+ ipmp_groupinfo_t *grinfop;
+
+ if (adinfop->ad_state == IPMP_ADDR_DOWN)
+ return;
+
+ /*
+ * If there's no inbound interface for this address, there can't
+ * be any outbound traffic.
+ */
+ if (adinfop->ad_binding[0] == '\0')
+ return;
+
+ /*
+ * The address can use any active interface in the group, so
+ * obtain all of those.
+ */
+ err = ipmp_getgroupinfo(arg->sa_ih, adinfop->ad_group, &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for group `%s'",
+ adinfop->ad_group);
+ (void) strlcpy(buf, "?", bufsize);
+ return;
+ }
+
+ iflistp = grinfop->gr_iflistp;
+ for (i = 0; i < iflistp->il_nif; i++) {
+ err = ipmp_getifinfo(arg->sa_ih, iflistp->il_ifs[i], &ifinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for interface `%s'",
+ iflistp->il_ifs[i]);
+ continue;
+ }
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_ACTIVE) {
+ if (nactive++ != 0)
+ (void) strlcat(buf, " ", bufsize);
+ (void) strlcat(buf, ifinfop->if_name, bufsize);
+ }
+ ipmp_freeifinfo(ifinfop);
+ }
+ ipmp_freegroupinfo(grinfop);
+}
+
+static void
+sfunc_group_name(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+
+ (void) strlcpy(buf, grinfop->gr_name, bufsize);
+}
+
+static void
+sfunc_group_ifname(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+
+ (void) strlcpy(buf, grinfop->gr_ifname, bufsize);
+}
+
+static void
+sfunc_group_state(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+
+ enum2str(group_state, grinfop->gr_state, buf, bufsize);
+}
+
+static void
+sfunc_group_fdt(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+
+ if (grinfop->gr_fdt == 0)
+ return;
+
+ (void) snprintf(buf, bufsize, "%.2fs", MS2FLOATSEC(grinfop->gr_fdt));
+}
+
+static void
+sfunc_group_interfaces(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ uint_t i;
+ char *active, *inactive, *unusable;
+ uint_t nactive = 0, ninactive = 0, nunusable = 0;
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+ ipmp_iflist_t *iflistp = grinfop->gr_iflistp;
+ ipmp_ifinfo_t *ifinfop;
+
+ active = alloca(bufsize);
+ active[0] = '\0';
+ inactive = alloca(bufsize);
+ inactive[0] = '\0';
+ unusable = alloca(bufsize);
+ unusable[0] = '\0';
+
+ for (i = 0; i < iflistp->il_nif; i++) {
+ err = ipmp_getifinfo(arg->sa_ih, iflistp->il_ifs[i], &ifinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for interface `%s'",
+ iflistp->il_ifs[i]);
+ continue;
+ }
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_ACTIVE) {
+ if (nactive++ != 0)
+ (void) strlcat(active, " ", bufsize);
+ (void) strlcat(active, ifinfop->if_name, bufsize);
+ } else if (ifinfop->if_flags & IPMP_IFFLAG_INACTIVE) {
+ if (ninactive++ != 0)
+ (void) strlcat(inactive, " ", bufsize);
+ (void) strlcat(inactive, ifinfop->if_name, bufsize);
+ } else {
+ if (nunusable++ != 0)
+ (void) strlcat(unusable, " ", bufsize);
+ (void) strlcat(unusable, ifinfop->if_name, bufsize);
+ }
+
+ ipmp_freeifinfo(ifinfop);
+ }
+
+ (void) strlcpy(buf, active, bufsize);
+
+ if (ninactive > 0) {
+ if (nactive != 0)
+ (void) strlcat(buf, " ", bufsize);
+
+ (void) strlcat(buf, "(", bufsize);
+ (void) strlcat(buf, inactive, bufsize);
+ (void) strlcat(buf, ")", bufsize);
+ }
+
+ if (nunusable > 0) {
+ if (nactive + ninactive != 0)
+ (void) strlcat(buf, " ", bufsize);
+
+ (void) strlcat(buf, "[", bufsize);
+ (void) strlcat(buf, unusable, bufsize);
+ (void) strlcat(buf, "]", bufsize);
+ }
+}
+
+static void
+sfunc_if_name(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ (void) strlcpy(buf, ifinfop->if_name, bufsize);
+}
+
+static void
+sfunc_if_active(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_ACTIVE)
+ (void) strlcpy(buf, "yes", bufsize);
+ else
+ (void) strlcpy(buf, "no", bufsize);
+}
+
+static void
+sfunc_if_group(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+ ipmp_groupinfo_t *grinfop;
+
+ err = ipmp_getgroupinfo(arg->sa_ih, ifinfop->if_group, &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for group `%s'",
+ ifinfop->if_group);
+ (void) strlcpy(buf, "?", bufsize);
+ return;
+ }
+
+ (void) strlcpy(buf, grinfop->gr_ifname, bufsize);
+ ipmp_freegroupinfo(grinfop);
+}
+
+static void
+sfunc_if_flags(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+ ipmp_groupinfo_t *grinfop;
+
+ assert(bufsize > IPMPSTAT_NUM_FLAGS);
+
+ (void) memset(buf, '-', IPMPSTAT_NUM_FLAGS);
+ buf[IPMPSTAT_NUM_FLAGS] = '\0';
+
+ if (ifinfop->if_type == IPMP_IF_STANDBY)
+ buf[IPMPSTAT_SFLAG_INDEX] = 's';
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_INACTIVE)
+ buf[IPMPSTAT_IFLAG_INDEX] = 'i';
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_DOWN)
+ buf[IPMPSTAT_DFLAG_INDEX] = 'd';
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_HWADDRDUP)
+ buf[IPMPSTAT_HFLAG_INDEX] = 'h';
+
+ err = ipmp_getgroupinfo(arg->sa_ih, ifinfop->if_group, &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get broadcast/multicast info for "
+ "group `%s'", ifinfop->if_group);
+ return;
+ }
+
+ if (strcmp(grinfop->gr_m4ifname, ifinfop->if_name) == 0)
+ buf[IPMPSTAT_M4FLAG_INDEX] = 'm';
+
+ if (strcmp(grinfop->gr_m6ifname, ifinfop->if_name) == 0)
+ buf[IPMPSTAT_M6FLAG_INDEX] = 'M';
+
+ if (strcmp(grinfop->gr_bcifname, ifinfop->if_name) == 0)
+ buf[IPMPSTAT_BFLAG_INDEX] = 'b';
+
+ ipmp_freegroupinfo(grinfop);
+}
+
+static void
+sfunc_if_link(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ enum2str(if_link, ifinfop->if_linkstate, buf, bufsize);
+}
+
+static void
+sfunc_if_probe(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ enum2str(if_probe, ifinfop->if_probestate, buf, bufsize);
+}
+
+static void
+sfunc_if_state(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ enum2str(if_state, ifinfop->if_state, buf, bufsize);
+}
+
+static void
+sfunc_probe_id(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ uint32_t probe_id;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_uint32(nvl, IPMP_PROBE_ID, &probe_id) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_ID", buf, bufsize);
+ return;
+ }
+
+ (void) snprintf(buf, bufsize, "%u", probe_id);
+}
+
+static void
+sfunc_probe_ifname(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ char *ifname;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_string(nvl, IPMP_IF_NAME, &ifname) != 0) {
+ sfunc_nvwarn("IPMP_IF_NAME", buf, bufsize);
+ return;
+ }
+
+ (void) strlcpy(buf, ifname, bufsize);
+}
+
+static void
+sfunc_probe_time(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ hrtime_t start;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_START_TIME, &start) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_START_TIME", buf, bufsize);
+ return;
+ }
+
+ (void) snprintf(buf, bufsize, "%.2fs",
+ (float)(start - probe_output_start) / NANOSEC);
+}
+
+static void
+sfunc_probe_target(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ uint_t nelem;
+ struct sockaddr_storage *target;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_byte_array(nvl, IPMP_PROBE_TARGET,
+ (uchar_t **)&target, &nelem) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_TARGET", buf, bufsize);
+ return;
+ }
+
+ sockaddr2str(target, buf, bufsize);
+}
+
+static void
+sfunc_probe_rtt(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ hrtime_t start, ackproc;
+ nvlist_t *nvl = arg->sa_data;
+ uint32_t state;
+
+ if (nvlist_lookup_uint32(nvl, IPMP_PROBE_STATE, &state) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_STATE", buf, bufsize);
+ return;
+ }
+
+ if (state != IPMP_PROBE_ACKED)
+ return;
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_START_TIME, &start) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_START_TIME", buf, bufsize);
+ return;
+ }
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, &ackproc) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_ACKPROC_TIME", buf, bufsize);
+ return;
+ }
+
+ (void) snprintf(buf, bufsize, "%.2fms", NS2FLOATMS(ackproc - start));
+}
+
+static void
+sfunc_probe_netrtt(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ hrtime_t sent, ackrecv;
+ nvlist_t *nvl = arg->sa_data;
+ uint32_t state;
+
+ if (nvlist_lookup_uint32(nvl, IPMP_PROBE_STATE, &state) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_STATE", buf, bufsize);
+ return;
+ }
+
+ if (state != IPMP_PROBE_ACKED)
+ return;
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_SENT_TIME, &sent) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_SENT_TIME", buf, bufsize);
+ return;
+ }
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, &ackrecv) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_ACKRECV_TIME", buf, bufsize);
+ return;
+ }
+
+ (void) snprintf(buf, bufsize, "%.2fms", NS2FLOATMS(ackrecv - sent));
+}
+
+static void
+sfunc_probe_rttavg(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int64_t rttavg;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_int64(nvl, IPMP_PROBE_TARGET_RTTAVG, &rttavg) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_TARGET_RTTAVG", buf, bufsize);
+ return;
+ }
+
+ if (rttavg != 0)
+ (void) snprintf(buf, bufsize, "%.2fms", NS2FLOATMS(rttavg));
+}
+
+static void
+sfunc_probe_rttdev(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int64_t rttdev;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_int64(nvl, IPMP_PROBE_TARGET_RTTDEV, &rttdev) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_TARGET_RTTDEV", buf, bufsize);
+ return;
+ }
+
+ if (rttdev != 0)
+ (void) snprintf(buf, bufsize, "%.2fms", NS2FLOATMS(rttdev));
+}
+
+/* ARGSUSED */
+static void
+probe_enabled_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ uint_t *nenabledp = arg;
+ ipmp_ifinfo_t *ifinfop = infop;
+
+ if (ifinfop->if_probestate != IPMP_PROBE_DISABLED)
+ (*nenabledp)++;
+}
+
+static void
+probe_output(ipmp_handle_t ih, ipmpstat_ofmt_t *ofmt)
+{
+ char sub[MAX_SUBID_LEN];
+ evchan_t *evch;
+ ipmpstat_probe_state_t ps = { ih, ofmt };
+ uint_t nenabled = 0;
+
+ /*
+ * Check if any interfaces are enabled for probe-based failure
+ * detection. If not, immediately fail.
+ */
+ walk_if(ih, probe_enabled_cbfunc, &nenabled);
+ if (nenabled == 0)
+ die("probe-based failure detection is disabled\n");
+
+ probe_output_start = gethrtime();
+
+ /*
+ * Unfortunately, until 4791900 is fixed, only privileged processes
+ * can bind and thus receive sysevents.
+ */
+ errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evch, EVCH_CREAT);
+ if (errno != 0) {
+ if (errno == EPERM)
+ die("insufficient privileges for -p\n");
+ die("sysevent_evc_bind to channel %s failed", IPMP_EVENT_CHAN);
+ }
+
+ /*
+ * The subscriber must be unique in order for sysevent_evc_subscribe()
+ * to succeed, so combine our name and pid.
+ */
+ (void) snprintf(sub, sizeof (sub), "%d-%s", getpid(), progname);
+
+ errno = sysevent_evc_subscribe(evch, sub, EC_IPMP, probe_event, &ps, 0);
+ if (errno != 0)
+ die("sysevent_evc_subscribe for class %s failed", EC_IPMP);
+
+ for (;;)
+ (void) pause();
+}
+
+static int
+probe_event(sysevent_t *ev, void *arg)
+{
+ nvlist_t *nvl;
+ uint32_t state;
+ uint32_t version;
+ ipmpstat_probe_state_t *psp = arg;
+
+ if (strcmp(sysevent_get_subclass_name(ev), ESC_IPMP_PROBE_STATE) != 0)
+ return (0);
+
+ if (sysevent_get_attr_list(ev, &nvl) != 0) {
+ warn("sysevent_get_attr_list failed; dropping event");
+ return (0);
+ }
+
+ if (nvlist_lookup_uint32(nvl, IPMP_EVENT_VERSION, &version) != 0) {
+ warn("dropped event with no IPMP_EVENT_VERSION\n");
+ goto out;
+ }
+
+ if (version != IPMP_EVENT_CUR_VERSION) {
+ warn("dropped event with unsupported IPMP_EVENT_VERSION %d\n",
+ version);
+ goto out;
+ }
+
+ if (nvlist_lookup_uint32(nvl, IPMP_PROBE_STATE, &state) != 0) {
+ warn("dropped event with no IPMP_PROBE_STATE\n");
+ goto out;
+ }
+
+ if (state == IPMP_PROBE_ACKED || state == IPMP_PROBE_LOST)
+ ofmt_output(psp->ps_ofmt, psp->ps_ih, nvl);
+out:
+ nvlist_free(nvl);
+ return (0);
+}
+
+static void
+sfunc_targ_ifname(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_targinfo_t *targinfop = arg->sa_data;
+
+ (void) strlcpy(buf, targinfop->it_name, bufsize);
+}
+
+static void
+sfunc_targ_mode(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_targinfo_t *targinfop = arg->sa_data;
+
+ enum2str(targ_mode, targinfop->it_targmode, buf, bufsize);
+}
+
+static void
+sfunc_targ_testaddr(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_targinfo_t *targinfop = arg->sa_data;
+
+ if (targinfop->it_targmode != IPMP_TARG_DISABLED)
+ sockaddr2str(&targinfop->it_testaddr, buf, bufsize);
+}
+
+static void
+sfunc_targ_targets(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ uint_t i;
+ char *targname = alloca(bufsize);
+ ipmp_targinfo_t *targinfop = arg->sa_data;
+ ipmp_addrlist_t *targlistp = targinfop->it_targlistp;
+
+ for (i = 0; i < targlistp->al_naddr; i++) {
+ sockaddr2str(&targlistp->al_addrs[i], targname, bufsize);
+ (void) strlcat(buf, targname, bufsize);
+ if ((i + 1) < targlistp->al_naddr)
+ (void) strlcat(buf, " ", bufsize);
+ }
+}
+
+static void
+info_output_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ ofmt_output(arg, ih, infop);
+}
+
+static void
+targinfo_output_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ ipmp_ifinfo_t *ifinfop = infop;
+ ipmp_if_targmode_t targmode4 = ifinfop->if_targinfo4.it_targmode;
+ ipmp_if_targmode_t targmode6 = ifinfop->if_targinfo6.it_targmode;
+
+ /*
+ * Usually, either IPv4 or IPv6 probing will be enabled, but the admin
+ * may enable both. If only one is enabled, omit the other one so as
+ * to not encourage the admin to enable both. If neither is enabled,
+ * we still print one just so the admin can see a MODE of "disabled".
+ */
+ if (targmode4 != IPMP_TARG_DISABLED || targmode6 == IPMP_TARG_DISABLED)
+ ofmt_output(arg, ih, &ifinfop->if_targinfo4);
+ if (targmode6 != IPMP_TARG_DISABLED)
+ ofmt_output(arg, ih, &ifinfop->if_targinfo6);
+}
+
+/*
+ * Creates an ipmpstat_ofmt_t field list from the comma-separated list of
+ * user-specified fields passed via `ofields'. The table of known fields
+ * (and their attributes) is passed via `fields'.
+ */
+static ipmpstat_ofmt_t *
+ofmt_create(const char *ofields, ipmpstat_field_t fields[])
+{
+ char *token, *lasts, *ofields_dup;
+ const char *fieldname;
+ ipmpstat_ofmt_t *ofmt, *ofmt_head = NULL, *ofmt_tail;
+ ipmpstat_field_t *fieldp;
+ uint_t cols = 0;
+
+ /*
+ * If "-o" was omitted or "-o all" was specified, build a list of
+ * field names. If "-o" was omitted, stop building the list when
+ * we run out of columns.
+ */
+ if (ofields == NULL || strcasecmp(ofields, "all") == 0) {
+ for (fieldp = fields; fieldp->f_name != NULL; fieldp++) {
+ cols += fieldp->f_width;
+ if (ofields == NULL && cols > IPMPSTAT_NCOL)
+ break;
+
+ if ((ofmt = calloc(sizeof (*ofmt), 1)) == NULL)
+ die("cannot allocate output format list");
+
+ ofmt->o_field = fieldp;
+ if (ofmt_head == NULL) {
+ ofmt_head = ofmt;
+ ofmt_tail = ofmt;
+ } else {
+ ofmt_tail->o_next = ofmt;
+ ofmt_tail = ofmt;
+ }
+ }
+ return (ofmt_head);
+ }
+
+ if ((ofields_dup = strdup(ofields)) == NULL)
+ die("cannot allocate output format list");
+
+ token = ofields_dup;
+ while ((fieldname = strtok_r(token, ",", &lasts)) != NULL) {
+ token = NULL;
+
+ if ((fieldp = field_find(fields, fieldname)) == NULL) {
+ /*
+ * Since machine parsers are unlikely to be able to
+ * gracefully handle missing fields, die if we're in
+ * parsable mode. Otherwise, just print a warning.
+ */
+ if (opt & IPMPSTAT_OPT_PARSABLE)
+ die("unknown output field `%s'\n", fieldname);
+
+ warn("ignoring unknown output field `%s'\n", fieldname);
+ continue;
+ }
+
+ if ((ofmt = calloc(sizeof (*ofmt), 1)) == NULL)
+ die("cannot allocate output format list");
+
+ ofmt->o_field = fieldp;
+ if (ofmt_head == NULL) {
+ ofmt_head = ofmt;
+ ofmt_tail = ofmt;
+ } else {
+ ofmt_tail->o_next = ofmt;
+ ofmt_tail = ofmt;
+ }
+ }
+
+ free(ofields_dup);
+ if (ofmt_head == NULL)
+ die("no valid output fields specified\n");
+
+ return (ofmt_head);
+}
+
+/*
+ * Destroys the provided `ofmt' field list.
+ */
+static void
+ofmt_destroy(ipmpstat_ofmt_t *ofmt)
+{
+ ipmpstat_ofmt_t *ofmt_next;
+
+ for (; ofmt != NULL; ofmt = ofmt_next) {
+ ofmt_next = ofmt->o_next;
+ free(ofmt);
+ }
+}
+
+/*
+ * Outputs a header for the fields named by `ofmt'.
+ */
+static void
+ofmt_output_header(const ipmpstat_ofmt_t *ofmt)
+{
+ const ipmpstat_field_t *fieldp;
+
+ for (; ofmt != NULL; ofmt = ofmt->o_next) {
+ fieldp = ofmt->o_field;
+
+ if (ofmt->o_next == NULL)
+ (void) printf("%s", fieldp->f_name);
+ else
+ (void) printf("%-*s", fieldp->f_width, fieldp->f_name);
+ }
+ (void) printf("\n");
+}
+
+/*
+ * Outputs one row of values for the fields named by `ofmt'. The values to
+ * output are obtained through the `ofmt' function pointers, which are
+ * indirectly passed the `ih' and `arg' structures for state; see the block
+ * comment at the start of this file for details.
+ */
+static void
+ofmt_output(const ipmpstat_ofmt_t *ofmt, ipmp_handle_t ih, void *arg)
+{
+ int i;
+ char buf[1024];
+ boolean_t escsep;
+ static int nrow;
+ const char *value;
+ uint_t width, valwidth;
+ uint_t compress, overflow = 0;
+ const ipmpstat_field_t *fieldp;
+ ipmpstat_sfunc_arg_t sfunc_arg;
+
+ /*
+ * For each screenful of data, display the header.
+ */
+ if ((nrow++ % winsize.ws_row) == 0 && !(opt & IPMPSTAT_OPT_PARSABLE)) {
+ ofmt_output_header(ofmt);
+ nrow++;
+ }
+
+ /*
+ * Check if we'll be displaying multiple fields per line, and thus
+ * need to escape the field separator.
+ */
+ escsep = (ofmt != NULL && ofmt->o_next != NULL);
+
+ for (; ofmt != NULL; ofmt = ofmt->o_next) {
+ fieldp = ofmt->o_field;
+
+ sfunc_arg.sa_ih = ih;
+ sfunc_arg.sa_data = arg;
+
+ buf[0] = '\0';
+ (*fieldp->f_sfunc)(&sfunc_arg, buf, sizeof (buf));
+
+ if (opt & IPMPSTAT_OPT_PARSABLE) {
+ for (i = 0; buf[i] != '\0'; i++) {
+ if (escsep && (buf[i] == ':' || buf[i] == '\\'))
+ (void) putchar('\\');
+ (void) putchar(buf[i]);
+ }
+ if (ofmt->o_next != NULL)
+ (void) putchar(':');
+ } else {
+ value = (buf[0] == '\0') ? "--" : buf;
+
+ /*
+ * To avoid needless line-wraps, for the last field,
+ * don't include any trailing whitespace.
+ */
+ if (ofmt->o_next == NULL) {
+ (void) printf("%s", value);
+ continue;
+ }
+
+ /*
+ * For other fields, grow the width as necessary to
+ * ensure the value completely fits. However, if
+ * there's unused whitespace in subsequent fields,
+ * then "compress" that whitespace to attempt to get
+ * the columns to line up again.
+ */
+ width = fieldp->f_width;
+ valwidth = strlen(value);
+
+ if (valwidth + overflow >= width) {
+ overflow += valwidth - width + 1;
+ (void) printf("%s ", value);
+ continue;
+ }
+
+ if (overflow > 0) {
+ compress = MIN(overflow, width - valwidth);
+ overflow -= compress;
+ width -= compress;
+ }
+ (void) printf("%-*s", width, value);
+ }
+ }
+ (void) printf("\n");
+
+ /*
+ * In case stdout has been redirected to e.g. a pipe, flush stdout so
+ * that commands can act on our output immediately.
+ */
+ (void) fflush(stdout);
+}
+
+/*
+ * Searches the `fields' array for a field matching `fieldname'. Returns
+ * a pointer to that field on success, or NULL on failure.
+ */
+static ipmpstat_field_t *
+field_find(ipmpstat_field_t *fields, const char *fieldname)
+{
+ ipmpstat_field_t *fieldp;
+
+ for (fieldp = fields; fieldp->f_name != NULL; fieldp++) {
+ if (strcasecmp(fieldp->f_name, fieldname) == 0)
+ return (fieldp);
+ }
+ return (NULL);
+}
+
+/*
+ * Uses `enums' to map `enumval' to a string, and stores at most `bufsize'
+ * bytes of that string into `buf'.
+ */
+static void
+enum2str(const ipmpstat_enum_t *enums, int enumval, char *buf, uint_t bufsize)
+{
+ const ipmpstat_enum_t *enump;
+
+ for (enump = enums; enump->e_name != NULL; enump++) {
+ if (enump->e_val == enumval) {
+ (void) strlcpy(buf, enump->e_name, bufsize);
+ return;
+ }
+ }
+ (void) snprintf(buf, bufsize, "<%d>", enumval);
+}
+
+/*
+ * Stores the stringified value of the sockaddr_storage pointed to by `ssp'
+ * into at most `bufsize' bytes of `buf'.
+ */
+static void
+sockaddr2str(const struct sockaddr_storage *ssp, char *buf, uint_t bufsize)
+{
+ int flags = NI_NOFQDN;
+ socklen_t socklen;
+ struct sockaddr *sp = (struct sockaddr *)ssp;
+
+ /*
+ * Sadly, getnameinfo() does not allow the socklen to be oversized for
+ * a given family -- so we must determine the exact size to pass to it.
+ */
+ switch (ssp->ss_family) {
+ case AF_INET:
+ socklen = sizeof (struct sockaddr_in);
+ break;
+ case AF_INET6:
+ socklen = sizeof (struct sockaddr_in6);
+ break;
+ default:
+ (void) strlcpy(buf, "?", bufsize);
+ return;
+ }
+
+ if (opt & IPMPSTAT_OPT_NUMERIC)
+ flags |= NI_NUMERICHOST;
+
+ (void) getnameinfo(sp, socklen, buf, bufsize, NULL, 0, flags);
+}
+
+static void
+sighandler(int sig)
+{
+ assert(sig == SIGWINCH);
+
+ if (ioctl(1, TIOCGWINSZ, &winsize) == -1 ||
+ winsize.ws_col == 0 || winsize.ws_row == 0) {
+ winsize.ws_col = 80;
+ winsize.ws_row = 24;
+ }
+}
+
+static void
+usage(void)
+{
+ const char *argstr = gettext("[-n] [-o <field> [-P]] -a|-g|-i|-p|-t");
+
+ (void) fprintf(stderr, gettext("usage: %s %s\n"), progname, argstr);
+ exit(EXIT_FAILURE);
+}
+
+/* PRINTFLIKE1 */
+static void
+warn(const char *format, ...)
+{
+ va_list alist;
+ int error = errno;
+
+ format = gettext(format);
+ (void) fprintf(stderr, gettext("%s: warning: "), progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+
+ if (strchr(format, '\n') == NULL)
+ (void) fprintf(stderr, ": %s\n", strerror(error));
+}
+
+/* PRINTFLIKE2 */
+static void
+warn_ipmperr(int ipmperr, const char *format, ...)
+{
+ va_list alist;
+
+ format = gettext(format);
+ (void) fprintf(stderr, gettext("%s: warning: "), progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+
+ (void) fprintf(stderr, ": %s\n", ipmp_errmsg(ipmperr));
+}
+
+/* PRINTFLIKE1 */
+static void
+die(const char *format, ...)
+{
+ va_list alist;
+ int error = errno;
+
+ format = gettext(format);
+ (void) fprintf(stderr, "%s: ", progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+
+ if (strchr(format, '\n') == NULL)
+ (void) fprintf(stderr, ": %s\n", strerror(error));
+
+ exit(EXIT_FAILURE);
+}
+
+/* PRINTFLIKE2 */
+static void
+die_ipmperr(int ipmperr, const char *format, ...)
+{
+ va_list alist;
+
+ format = gettext(format);
+ (void) fprintf(stderr, "%s: ", progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+ (void) fprintf(stderr, ": %s\n", ipmp_errmsg(ipmperr));
+
+ exit(EXIT_FAILURE);
+}
+
+static ipmpstat_field_t addr_fields[] = {
+ { "ADDRESS", 26, sfunc_addr_address },
+ { "STATE", 7, sfunc_addr_state },
+ { "GROUP", 12, sfunc_addr_group },
+ { "INBOUND", 12, sfunc_addr_inbound },
+ { "OUTBOUND", 23, sfunc_addr_outbound },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_field_t group_fields[] = {
+ { "GROUP", 12, sfunc_group_ifname },
+ { "GROUPNAME", 12, sfunc_group_name },
+ { "STATE", 10, sfunc_group_state },
+ { "FDT", 10, sfunc_group_fdt },
+ { "INTERFACES", 30, sfunc_group_interfaces },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_field_t if_fields[] = {
+ { "INTERFACE", 12, sfunc_if_name },
+ { "ACTIVE", 8, sfunc_if_active },
+ { "GROUP", 12, sfunc_if_group },
+ { "FLAGS", 10, sfunc_if_flags },
+ { "LINK", 10, sfunc_if_link },
+ { "PROBE", 10, sfunc_if_probe },
+ { "STATE", 10, sfunc_if_state },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_field_t probe_fields[] = {
+ { "TIME", 10, sfunc_probe_time },
+ { "INTERFACE", 12, sfunc_probe_ifname },
+ { "PROBE", 7, sfunc_probe_id },
+ { "NETRTT", 10, sfunc_probe_netrtt },
+ { "RTT", 10, sfunc_probe_rtt },
+ { "RTTAVG", 10, sfunc_probe_rttavg },
+ { "TARGET", 20, sfunc_probe_target },
+ { "RTTDEV", 10, sfunc_probe_rttdev },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_field_t targ_fields[] = {
+ { "INTERFACE", 12, sfunc_targ_ifname },
+ { "MODE", 10, sfunc_targ_mode },
+ { "TESTADDR", 20, sfunc_targ_testaddr },
+ { "TARGETS", 38, sfunc_targ_targets },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_enum_t addr_state[] = {
+ { "up", IPMP_ADDR_UP },
+ { "down", IPMP_ADDR_DOWN },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t group_state[] = {
+ { "ok", IPMP_GROUP_OK },
+ { "failed", IPMP_GROUP_FAILED },
+ { "degraded", IPMP_GROUP_DEGRADED },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t if_link[] = {
+ { "up", IPMP_LINK_UP },
+ { "down", IPMP_LINK_DOWN },
+ { "unknown", IPMP_LINK_UNKNOWN },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t if_probe[] = {
+ { "ok", IPMP_PROBE_OK },
+ { "failed", IPMP_PROBE_FAILED },
+ { "unknown", IPMP_PROBE_UNKNOWN },
+ { "disabled", IPMP_PROBE_DISABLED },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t if_state[] = {
+ { "ok", IPMP_IF_OK },
+ { "failed", IPMP_IF_FAILED },
+ { "unknown", IPMP_IF_UNKNOWN },
+ { "offline", IPMP_IF_OFFLINE },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t targ_mode[] = {
+ { "disabled", IPMP_TARG_DISABLED },
+ { "routes", IPMP_TARG_ROUTES },
+ { "multicast", IPMP_TARG_MULTICAST },
+ { NULL, 0 }
+};
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.xcl b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.xcl
new file mode 100644
index 0000000000..e2398aaf64
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.xcl
@@ -0,0 +1,106 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+msgid " "
+msgid "%-*s"
+msgid "%.2fms"
+msgid "%.2fs"
+msgid "%d-%s"
+msgid "%s"
+msgid "%s "
+msgid "%s: "
+msgid "%u"
+msgid "("
+msgid ")"
+msgid ","
+msgid "--"
+msgid ": %s\n"
+msgid "?"
+msgid "["
+msgid "]"
+msgid "<%d>"
+msgid "\n"
+msgid "ACTIVE"
+msgid "ADDRESS"
+msgid "EC_ipmp"
+msgid "ESC_ipmp_probe_state"
+msgid "FDT"
+msgid "FLAGS"
+msgid "GROUP"
+msgid "GROUPNAME"
+msgid "INBOUND"
+msgid "INTERFACE"
+msgid "INTERFACES"
+msgid "IPMP_IF_NAME"
+msgid "IPMP_PROBE_ACKPROC_TIME"
+msgid "IPMP_PROBE_ACKRECV_TIME"
+msgid "IPMP_PROBE_ID"
+msgid "IPMP_PROBE_SENT_TIME"
+msgid "IPMP_PROBE_START_TIME"
+msgid "IPMP_PROBE_STATE"
+msgid "IPMP_PROBE_TARGET"
+msgid "IPMP_PROBE_TARGET_RTTAVG"
+msgid "IPMP_PROBE_TARGET_RTTDEV"
+msgid "LINK"
+msgid "MODE"
+msgid "NETRTT"
+msgid "OUTBOUND"
+msgid "PROBE"
+msgid "RTT"
+msgid "RTTAVG"
+msgid "RTTDEV"
+msgid "STATE"
+msgid "TARGET"
+msgid "TARGETS"
+msgid "TESTADDR"
+msgid "TIME"
+msgid "agipt"
+msgid "all"
+msgid "bufsize > IPMPSTAT_NUM_FLAGS"
+msgid "com.sun:ipmp:events"
+msgid "degraded"
+msgid "disabled"
+msgid "down"
+msgid "failed"
+msgid "ipmp_event_version"
+msgid "ipmp_if_name"
+msgid "ipmp_probe_ackproc_time"
+msgid "ipmp_probe_ackrecv_time"
+msgid "ipmp_probe_id"
+msgid "ipmp_probe_sent_time"
+msgid "ipmp_probe_start_time"
+msgid "ipmp_probe_state"
+msgid "ipmp_probe_target"
+msgid "ipmp_probe_target_rttavg"
+msgid "ipmp_probe_target_rttdev"
+msgid "ipmpstat.c"
+msgid "multicast"
+msgid "nLPo:agipt"
+msgid "no"
+msgid "offline"
+msgid "ok"
+msgid "routes"
+msgid "sig == SIGWINCH"
+msgid "unknown"
+msgid "up"
+msgid "yes"
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types b/usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types
index bb15199492..e42bc626d8 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types
@@ -1,13 +1,12 @@
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -23,15 +22,12 @@
# CDDL HEADER END
#
-#pragma ident "%Z%%M% %I% %E% SMI"
-
fmt_version 1.0
mod_version 1.0
#PERM_CLASS default
filter name string
-filter if_groupname string
filter user user
filter projid int32
filter if_name ifname
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c b/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c
index 17891ffc78..2a4ff60d57 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c
@@ -18,7 +18,7 @@
*
* CDDL HEADER END
*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -37,8 +37,6 @@
* contributors.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <strings.h>
#include <errno.h>
@@ -243,7 +241,7 @@ main(int argc, char *argv[])
ushort_t udp_src_port6; /* used to identify replies */
uint_t flowinfo = 0;
uint_t class = 0;
- char tmp_buf[INET6_ADDRSTRLEN];
+ char abuf[INET6_ADDRSTRLEN];
int c;
int i;
boolean_t has_sys_ip_config;
@@ -671,24 +669,18 @@ main(int argc, char *argv[])
Printf("PING %s: %d data bytes\n", targethost, datalen);
} else {
if (ai_dst->ai_family == AF_INET) {
- Printf("PING %s (%s): %d data bytes\n",
- targethost,
- inet_ntop(AF_INET,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- &((struct sockaddr_in *)
- ai_dst->ai_addr)->sin_addr,
- tmp_buf, sizeof (tmp_buf)),
- datalen);
+ (void) inet_ntop(AF_INET,
+ &((struct sockaddr_in *)(void *)
+ ai_dst->ai_addr)->sin_addr,
+ abuf, sizeof (abuf));
} else {
- Printf("PING %s (%s): %d data bytes\n",
- targethost,
- inet_ntop(AF_INET6,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- &((struct sockaddr_in6 *)
- ai_dst->ai_addr)->sin6_addr,
- tmp_buf, sizeof (tmp_buf)),
- datalen);
+ (void) inet_ntop(AF_INET6,
+ &((struct sockaddr_in6 *)(void *)
+ ai_dst->ai_addr)->sin6_addr,
+ abuf, sizeof (abuf));
}
+ Printf("PING %s (%s): %d data bytes\n",
+ targethost, abuf, datalen);
}
}
@@ -1074,12 +1066,12 @@ select_all_src_addrs(union any_in_addr **src_addr_list, struct addrinfo *ai,
int num_dst = 1;
int i;
- if (probe_all)
- for (aip = ai; aip->ai_next != NULL;
- aip = aip->ai_next, num_dst++);
+ if (probe_all) {
+ for (aip = ai; aip->ai_next != NULL; aip = aip->ai_next)
+ num_dst++;
+ }
- list = (union any_in_addr *)
- calloc((size_t)num_dst, sizeof (union any_in_addr));
+ list = calloc((size_t)num_dst, sizeof (union any_in_addr));
if (list == NULL) {
Fprintf(stderr, "%s: calloc: %s\n", progname, strerror(errno));
exit(EXIT_FAILURE);
@@ -1472,7 +1464,7 @@ setup_socket(int family, int *send_sockp, int *recv_sockp, int *if_index,
int i;
/* pull out the interface list */
- num_ifs = ifaddrlist(&al, family, errbuf);
+ num_ifs = ifaddrlist(&al, family, LIFC_UNDER_IPMP, errbuf);
if (num_ifs == -1) {
Fprintf(stderr, "%s: %s\n", progname, errbuf);
exit(EXIT_FAILURE);
@@ -1699,8 +1691,8 @@ send_scheduled_probe()
} else {
Printf("no answer from %s(%s)\n", targethost,
inet_ntop(current_targetaddr->family,
- &current_targetaddr->dst_addr,
- tmp_buf, sizeof (tmp_buf)));
+ &current_targetaddr->dst_addr,
+ tmp_buf, sizeof (tmp_buf)));
}
}
/*
@@ -1736,9 +1728,8 @@ send_scheduled_probe()
* Each time we move to a new targetaddr, which has
* a different target IP address, we update this field.
*/
- current_targetaddr->starting_seq_num =
- use_udp ? dest_port :
- (ntransmitted % (MAX_ICMP_SEQ + 1));
+ current_targetaddr->starting_seq_num = use_udp ?
+ dest_port : (ntransmitted % (MAX_ICMP_SEQ + 1));
}
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
index f062247997..e5b23fa126 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -139,7 +139,7 @@ check_device(dlpi_handle_t *dhp, char **devicep)
if (ioctl(s, SIOCGIFFLAGS, (char *)ifr) < 0)
pr_err("ioctl SIOCGIFFLAGS");
if ((ifr->ifr_flags &
- (IFF_VIRTUAL|IFF_LOOPBACK|IFF_UP|
+ (IFF_VIRTUAL|IFF_IPMP|IFF_UP|
IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
break;
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c b/usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c
index adc6a932b0..cae75df60d 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -27,8 +27,6 @@
* @(#)$Header: traceroute.c,v 1.49 97/06/13 02:30:23 leres Exp $ (LBL)
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/param.h>
#include <sys/file.h>
#include <sys/ioctl.h>
@@ -707,7 +705,7 @@ get_hostinfo(char *host, int family, struct addrinfo **aipp)
struct addrinfo hints, *ai;
struct in6_addr addr6;
struct in_addr addr;
- char temp_buf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
+ char abuf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
int rc;
/*
@@ -720,11 +718,10 @@ get_hostinfo(char *host, int family, struct addrinfo **aipp)
IN6_V4MAPPED_TO_INADDR(&addr6, &addr);
/* convert it back to a string */
- (void) inet_ntop(AF_INET, (void *)&addr, temp_buf,
- sizeof (temp_buf));
+ (void) inet_ntop(AF_INET, &addr, abuf, sizeof (abuf));
/* now the host is an IPv4 address */
- (void) strcpy(host, temp_buf);
+ (void) strcpy(host, abuf);
/*
* If it's a mapped address, we convert it into IPv4
@@ -826,15 +823,19 @@ set_src_addr(struct pr_set *pr, struct ifaddrlist **alp)
struct sockaddr_in6 *sin6_from = (struct sockaddr_in6 *)pr->from;
struct addrinfo *aip;
char errbuf[ERRBUFSIZE];
- char temp_buf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
+ char abuf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
int num_ifs; /* all the interfaces */
int num_src_ifs; /* exclude loopback and down */
int i;
+ uint_t ifaddrflags = 0;
source = source_input;
+ if (device != NULL)
+ ifaddrflags |= LIFC_UNDER_IPMP;
+
/* get the interface address list */
- num_ifs = ifaddrlist(&al, pr->family, errbuf);
+ num_ifs = ifaddrlist(&al, pr->family, ifaddrflags, errbuf);
if (num_ifs < 0) {
Fprintf(stderr, "%s: ifaddrlist: %s\n", prog, errbuf);
exit(EXIT_FAILURE);
@@ -881,26 +882,20 @@ set_src_addr(struct pr_set *pr, struct ifaddrlist **alp)
if (pr->family == AF_INET)
ap = (union any_in_addr *)
/* LINTED E_BAD_PTR_CAST_ALIGN */
- &((struct sockaddr_in *)
- aip->ai_addr)->sin_addr;
+ &((struct sockaddr_in *)aip->ai_addr)->sin_addr;
else
ap = (union any_in_addr *)
/* LINTED E_BAD_PTR_CAST_ALIGN */
- &((struct sockaddr_in6 *)
- aip->ai_addr)->sin6_addr;
+ &((struct sockaddr_in6 *)aip->ai_addr)->sin6_addr;
/*
* LBNL bug fixed: used to accept any src address
*/
tmp2_al = find_ifaddr(al, num_ifs, ap, pr->family);
-
if (tmp2_al == NULL) {
- Fprintf(stderr,
- "%s: %s is not a local %s address\n",
- prog, inet_ntop(pr->family, ap,
- temp_buf, sizeof (temp_buf)),
- pr->name);
-
+ (void) inet_ntop(pr->family, ap, abuf, sizeof (abuf));
+ Fprintf(stderr, "%s: %s is not a local %s address\n",
+ prog, abuf, pr->name);
free(al);
freeaddrinfo(aip);
return (0);
@@ -928,13 +923,11 @@ set_src_addr(struct pr_set *pr, struct ifaddrlist **alp)
set_sin(pr->from, ap, pr->family);
if (aip->ai_next != NULL) {
- Fprintf(stderr,
- "%s: Warning: %s has multiple "
- "addresses; using %s\n",
- prog, source,
- inet_ntop(pr->family,
- (const void *)pr->from_sin_addr,
- temp_buf, sizeof (temp_buf)));
+ (void) inet_ntop(pr->family, pr->from_sin_addr,
+ abuf, sizeof (abuf));
+ Fprintf(stderr, "%s: Warning: %s has multiple "
+ "addresses; using %s\n", prog, source,
+ abuf);
}
} else { /* -i and -s used */
/*
@@ -1484,7 +1477,7 @@ traceroute(union any_in_addr *ip_addr, struct msghdr *msg6, struct pr_set *pr,
uchar_t code; /* icmp code */
int reply;
int seq = 0;
- char temp_buf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
+ char abuf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
int longjmp_return; /* return value from longjump */
struct ip *ip = (struct ip *)packet;
boolean_t got_there = _B_FALSE; /* we hit the destination */
@@ -1535,13 +1528,11 @@ traceroute(union any_in_addr *ip_addr, struct msghdr *msg6, struct pr_set *pr,
if (dev_name == NULL)
dev_name = "?";
+ (void) inet_ntop(pr->family, pr->from_sin_addr, abuf,
+ sizeof (abuf));
Fprintf(stderr,
"%s: Warning: Multiple interfaces found;"
- " using %s @ %s\n",
- prog, inet_ntop(pr->family,
- (const void *)pr->from_sin_addr,
- temp_buf, sizeof (temp_buf)),
- dev_name);
+ " using %s @ %s\n", prog, abuf, dev_name);
}
}
@@ -1558,8 +1549,7 @@ traceroute(union any_in_addr *ip_addr, struct msghdr *msg6, struct pr_set *pr,
Fprintf(stderr, "%s to %s", prog, hostname);
} else {
Fprintf(stderr, "%s to %s (%s)", prog, hostname,
- inet_ntop(pr->family, (const void *)ip_addr, temp_buf,
- sizeof (temp_buf)));
+ inet_ntop(pr->family, ip_addr, abuf, sizeof (abuf)));
}
if (source)
@@ -1700,9 +1690,8 @@ traceroute(union any_in_addr *ip_addr, struct msghdr *msg6, struct pr_set *pr,
}
if (pr->family == AF_INET6) {
- intp =
- (int *)find_ancillary_data(&in_msg,
- IPPROTO_IPV6, IPV6_HOPLIMIT);
+ intp = find_ancillary_data(&in_msg,
+ IPPROTO_IPV6, IPV6_HOPLIMIT);
if (intp == NULL) {
Fprintf(stderr,
"%s: can't find "
@@ -2188,10 +2177,11 @@ static void
usage(void)
{
Fprintf(stderr, "Usage: %s [-adFIlnSvx] [-A address_family] "
-"[-c traffic_class] \n"
-"\t[-f first_hop] [-g gateway [-g gateway ...]| -r] [-i iface]\n"
-"\t[-L flow_label] [-m max_hop] [-P pause_sec] [-p port] [-Q max_timeout]\n"
-"\t[-q nqueries] [-s src_addr] [-t tos] [-w wait_time] host [packetlen]\n",
- prog);
+ "[-c traffic_class]\n"
+ "\t[-f first_hop] [-g gateway [-g gateway ...]| -r] [-i iface]\n"
+ "\t[-L flow_label] [-m max_hop] [-P pause_sec] [-p port] "
+ "[-Q max_timeout]\n"
+ "\t[-q nqueries] [-s src_addr] [-t tos] [-w wait_time] host "
+ "[packetlen]\n", prog);
exit(EXIT_FAILURE);
}
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index c72be6be37..44756c3e98 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -104,7 +104,7 @@ static devfsadm_create_t misc_cbt[] = {
"(^ip$)|(^tcp$)|(^udp$)|(^icmp$)|(^sctp$)|"
"(^ip6$)|(^tcp6$)|(^udp6$)|(^icmp6$)|(^sctp6$)|"
"(^rts$)|(^arp$)|(^ipsecah$)|(^ipsecesp$)|(^keysock$)|(^spdsock$)|"
- "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)",
+ "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)",
TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name
},
{ "pseudo", "ddi_pseudo",
diff --git a/usr/src/cmd/mdb/common/modules/ip/ip.c b/usr/src/cmd/mdb/common/modules/ip/ip.c
index f2dadd5261..f064b58d83 100644
--- a/usr/src/cmd/mdb/common/modules/ip/ip.c
+++ b/usr/src/cmd/mdb/common/modules/ip/ip.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/stropts.h>
#include <sys/stream.h>
@@ -524,8 +522,7 @@ ire_format(uintptr_t addr, const void *ire_arg, void *ire_cb_arg)
static const mdb_bitmask_t mmasks[] = {
{ "CONDEMNED", IRE_MARK_CONDEMNED, IRE_MARK_CONDEMNED },
- { "NORECV", IRE_MARK_NORECV, IRE_MARK_NORECV },
- { "HIDDEN", IRE_MARK_HIDDEN, IRE_MARK_HIDDEN },
+ { "TESTHIDDEN", IRE_MARK_TESTHIDDEN, IRE_MARK_TESTHIDDEN },
{ "NOADD", IRE_MARK_NOADD, IRE_MARK_NOADD },
{ "TEMPORARY", IRE_MARK_TEMPORARY, IRE_MARK_TEMPORARY },
{ "USESRC", IRE_MARK_USESRC_CHECK, IRE_MARK_USESRC_CHECK },
diff --git a/usr/src/cmd/rcm_daemon/Makefile.com b/usr/src/cmd/rcm_daemon/Makefile.com
index 365371c45c..dbe3c1f1d1 100644
--- a/usr/src/cmd/rcm_daemon/Makefile.com
+++ b/usr/src/cmd/rcm_daemon/Makefile.com
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -124,7 +124,7 @@ SUNW_network_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vlan_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vnic_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_aggr_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
-SUNW_ip_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil -ldladm
+SUNW_ip_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil -ldladm -lipmp
SUNW_ip_anon_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil
LDLIBS += -lgen -lelf -lrcm -lnvpair -ldevinfo -lnsl -lsocket
diff --git a/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c b/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c
index be9a31f952..6e1fe1bf39 100644
--- a/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c
+++ b/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* RCM module to prevent plumbed IP addresses from being removed.
*/
@@ -177,7 +175,7 @@ ip_anon_register(rcm_handle_t *hdl)
if (_cladm(CL_INITIALIZE, CL_GET_BOOTFLAG, &bootflags) != 0) {
rcm_log_message(RCM_ERROR,
- gettext("unable to check cluster status\n"));
+ gettext("unable to check cluster status\n"));
(void) mutex_unlock(&ip_list_lock);
return (RCM_FAILURE);
}
@@ -199,7 +197,7 @@ ip_anon_register(rcm_handle_t *hdl)
else {
if ((exclude_addrs.cladm_netaddrs_array =
malloc(sizeof (cladm_netaddr_entry_t) *
- (num_exclude_addrs))) == NULL) {
+ (num_exclude_addrs))) == NULL) {
rcm_log_message(RCM_ERROR,
gettext("out of memory\n"));
(void) mutex_unlock(&ip_list_lock);
@@ -274,7 +272,7 @@ ip_anon_register(rcm_handle_t *hdl)
rcm_log_message(RCM_DEBUG,
"ip_anon: obtaining list of IPv4 addresses.\n");
- num_ifs = ifaddrlist(&al, AF_INET, errbuf);
+ num_ifs = ifaddrlist(&al, AF_INET, LIFC_UNDER_IPMP, errbuf);
if (num_ifs == -1) {
rcm_log_message(RCM_ERROR,
gettext("cannot get IPv4 address list errno=%d (%s)\n"),
@@ -286,7 +284,7 @@ ip_anon_register(rcm_handle_t *hdl)
rcm_log_message(RCM_DEBUG,
"ip_anon: obtaining list of IPv6 addresses.\n");
- num_ifs6 = ifaddrlist(&al6, AF_INET6, errbuf);
+ num_ifs6 = ifaddrlist(&al6, AF_INET6, LIFC_UNDER_IPMP, errbuf);
if (num_ifs6 == -1) {
rcm_log_message(RCM_ERROR,
gettext("cannot get IPv6 address list errno=%d (%s)\n"),
@@ -392,7 +390,7 @@ ip_anon_register(rcm_handle_t *hdl)
* currently know about it.
*/
if (!(tentry->flags & IP_FLAG_CL) &&
- !(tentry->flags & IP_FLAG_REG)) {
+ !(tentry->flags & IP_FLAG_REG)) {
tentry->flags |= IP_FLAG_REG;
rcm_log_message(RCM_DEBUG,
"ip_anon: registering interest in %s\n",
diff --git a/usr/src/cmd/rcm_daemon/common/ip_rcm.c b/usr/src/cmd/rcm_daemon/common/ip_rcm.c
index f62b3dfc19..24be0cafeb 100644
--- a/usr/src/cmd/rcm_daemon/common/ip_rcm.c
+++ b/usr/src/cmd/rcm_daemon/common/ip_rcm.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,23 +38,22 @@
#include <errno.h>
#include <fcntl.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <net/if.h>
#include <netinet/in.h>
-#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <stropts.h>
#include <strings.h>
-#include <libdevinfo.h>
-#include <sys/systeminfo.h>
-#include <netdb.h>
+#include <sys/sysmacros.h>
#include <inet/ip.h>
#include <libinetutil.h>
#include <libdllink.h>
+#include <libgen.h>
+#include <ipmp_admin.h>
-#include <ipmp_mpathd.h>
#include "rcm_module.h"
/*
@@ -75,42 +74,19 @@
#define RCM_LINK_RESOURCE_MAX (13 + LINKID_STR_WIDTH)
#define RCM_STR_SUNW_IP "SUNW_ip/" /* IP address export prefix */
-#define RCM_SIZE_SUNW_IP 9 /* strlen("SUNW_ip/") + 1 */
-/* ifconfig(1M) */
-#define USR_SBIN_IFCONFIG "/usr/sbin/ifconfig" /* ifconfig command */
-#define CFGFILE_FMT_IPV4 "/etc/hostname." /* IPV4 config file */
-#define CFGFILE_FMT_IPV6 "/etc/hostname6." /* IPV6 config file */
+#define SBIN_IFCONFIG "/sbin/ifconfig" /* ifconfig command */
+#define SBIN_IFPARSE "/sbin/ifparse" /* ifparse command */
+#define DHCPFILE_FMT "/etc/dhcp.%s" /* DHCP config file */
+#define CFGFILE_FMT_IPV4 "/etc/hostname.%s" /* IPV4 config file */
+#define CFGFILE_FMT_IPV6 "/etc/hostname6.%s" /* IPV6 config file */
#define CFG_CMDS_STD " netmask + broadcast + up" /* Normal config string */
-#define CONFIG_AF_INET 0x1 /* Post-configure IPv4 */
-#define CONFIG_AF_INET6 0x2 /* Post-configure IPv6 */
-#define MAXLINE 1024 /* Max. line length */
-#define MAXARGS 512 /* Max. args in ifconfig cmd */
-
-/* Physical interface flags mask */
-#define RCM_PIF_FLAGS (IFF_OFFLINE | IFF_INACTIVE | IFF_FAILED | \
- IFF_STANDBY)
+#define CFG_DHCP_CMD "dhcp wait 0" /* command to start DHCP */
/* Some useful macros */
-#ifndef MAX
-#define MAX(a, b) (((a) > (b))?(a):(b))
-#endif /* MAX */
-
-#ifndef ISSPACE
#define ISSPACE(c) ((c) == ' ' || (c) == '\t')
-#endif
-
-#ifndef ISEOL
#define ISEOL(c) ((c) == '\n' || (c) == '\r' || (c) == '\0')
-#endif
-
-#ifndef STREQ
#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
-#endif
-
-#ifndef ADDSPACE
-#define ADDSPACE(a) ((void) strcat((a), " "))
-#endif
/* Interface Cache state flags */
#define CACHE_IF_STALE 0x1 /* stale cached data */
@@ -125,48 +101,20 @@
/* RCM IPMP Module specific property definitions */
#define RCM_IPMP_MIN_REDUNDANCY 1 /* default min. redundancy */
-/* in.mpathd(1M) specifics */
-#define MPATHD_MAX_RETRIES 5 /* Max. offline retries */
-
/* Stream module operations */
#define MOD_INSERT 0 /* Insert a mid-stream module */
#define MOD_REMOVE 1 /* Remove a mid-stream module */
#define MOD_CHECK 2 /* Check mid-stream module safety */
/*
- * in.mpathd(1M) message passing formats
- */
-typedef struct mpathd_cmd {
- uint32_t cmd_command; /* message command */
- char cmd_ifname[LIFNAMSIZ]; /* this interface name */
- char cmd_movetoif[LIFNAMSIZ]; /* move to interface */
- uint32_t cmd_min_red; /* min. redundancy */
-/* Message passing values for MI_SETOINDEX */
-#define from_lifname cmd_ifname /* current logical interface */
-#define to_pifname cmd_movetoif /* new physical interface */
-#define addr_family cmd_min_red /* address family */
-} mpathd_cmd_t;
-
-/* This is needed since mpathd checks message size for offline */
-typedef struct mpathd_unoffline {
- uint32_t cmd_command; /* offline / undo offline */
- char cmd_ifname[LIFNAMSIZ]; /* this interface name */
-} mpathd_unoffline_t;
-
-typedef struct mpathd_response {
- uint32_t resp_sys_errno; /* system errno */
- uint32_t resp_mpathd_err; /* mpathd error information */
-} mpathd_response_t;
-
-/*
* IP module data types
*/
/* Physical interface representation */
typedef struct ip_pif {
- char pi_ifname[LIFNAMSIZ+1]; /* interface name */
- char pi_grpname[LIFNAMSIZ+1]; /* IPMP group name */
- struct ip_lif *pi_lifs; /* ptr to logical interfaces */
+ char pi_ifname[LIFNAMSIZ]; /* interface name */
+ char pi_grname[LIFGRNAMSIZ]; /* IPMP group name */
+ struct ip_lif *pi_lifs; /* ptr to logical interfaces */
} ip_pif_t;
/* Logical interface representation */
@@ -239,7 +187,7 @@ static void free_node(ip_cache_t *);
static void cache_insert(ip_cache_t *);
static char *ip_usage(ip_cache_t *);
static int update_pif(rcm_handle_t *, int, int, struct lifreq *);
-static int ip_ipmp_offline(ip_cache_t *, ip_cache_t *);
+static int ip_ipmp_offline(ip_cache_t *);
static int ip_ipmp_undo_offline(ip_cache_t *);
static int if_cfginfo(ip_cache_t *, uint_t);
static int if_unplumb(ip_cache_t *);
@@ -247,9 +195,6 @@ static int if_replumb(ip_cache_t *);
static void ip_log_err(ip_cache_t *, char **, char *);
static char *get_link_resource(const char *);
static void clr_cfg_state(ip_pif_t *);
-static uint64_t if_get_flags(ip_pif_t *);
-static int mpathd_send_cmd(mpathd_cmd_t *);
-static int connect_to_mpathd(int);
static int modop(char *, char *, int, char);
static int get_modlist(char *, ip_lif_t *);
static int ip_domux2fd(int *, int *, int *, struct lifreq *);
@@ -262,15 +207,13 @@ static char **ip_get_addrlist(ip_cache_t *);
static void ip_free_addrlist(char **);
static void ip_consumer_notify(rcm_handle_t *, datalink_id_t, char **,
uint_t, rcm_info_t **);
+static boolean_t ip_addrstr(ip_lif_t *, char *, size_t);
static int if_configure(datalink_id_t);
-static int isgrouped(char *);
-static int if_ipmp_config(char *, int, int);
-static int if_mpathd_configure(char *, char *, int, int);
-static char *get_mpathd_dest(char *, int);
-static int if_getcount(int);
-static void tokenize(char *, char **, char *, int *);
-
+static boolean_t isgrouped(const char *);
+static int if_config_inst(const char *, FILE *, int, boolean_t);
+static uint_t ntok(const char *cp);
+static boolean_t ifconfig(const char *, const char *, const char *, boolean_t);
/* Module-Private data */
static struct rcm_mod_ops ip_ops =
@@ -429,9 +372,9 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
{
ip_cache_t *node;
ip_pif_t *pif;
- int detachable = 0;
- int nofailover = 0;
- int ipmp = 0;
+ boolean_t detachable = B_FALSE;
+ boolean_t ipmp;
+ int retval;
rcm_log_message(RCM_TRACE1, "IP: offline(%s)\n", rsrc);
@@ -455,25 +398,17 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
pif = node->ip_pif;
/* Establish default detachability criteria */
- if (flags & RCM_FORCE) {
- detachable++;
- }
+ if (flags & RCM_FORCE)
+ detachable = B_TRUE;
- /* Check if the interface is an IPMP grouped interface */
- if (strcmp(pif->pi_grpname, "")) {
- ipmp++;
- }
-
- if (if_get_flags(pif) & IFF_NOFAILOVER) {
- nofailover++;
- }
+ /* Check if the interface is under IPMP */
+ ipmp = (pif->pi_grname[0] != '\0');
/*
- * Even if the interface is not in an IPMP group, it's possible that
- * it's still okay to offline it as long as there are higher-level
- * failover mechanisms for the addresses it owns (e.g., clustering).
- * In this case, ip_offlinelist() will return RCM_SUCCESS, and we
- * charge on.
+ * Even if the interface is not under IPMP, it's possible that it's
+ * still okay to offline it as long as there are higher-level failover
+ * mechanisms for the addresses it owns (e.g., clustering). In this
+ * case, ip_offlinelist() will return RCM_SUCCESS, and we charge on.
*/
if (!ipmp && !detachable) {
/* Inform consumers of IP addresses being offlined */
@@ -489,17 +424,6 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
}
}
- /*
- * Cannot remove an IPMP interface if IFF_NOFAILOVER is set.
- */
- if (ipmp && nofailover) {
- /* Interface is part of an IPMP group, and cannot failover */
- ip_log_err(node, errorp, "Failover disabled");
- errno = EBUSY;
- (void) mutex_unlock(&cache_lock);
- return (RCM_FAILURE);
- }
-
/* Check if it's a query */
if (flags & RCM_QUERY) {
rcm_log_message(RCM_TRACE1, "IP: offline query success(%s)\n",
@@ -534,38 +458,32 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
}
/*
- * This an IPMP interface that can be failed over.
- * Request in.mpathd(1M) to failover the physical interface.
+ * This is an IPMP interface that can be offlined.
+ * Request in.mpathd(1M) to offline the physical interface.
*/
+ if ((retval = ip_ipmp_offline(node)) != IPMP_SUCCESS)
+ ip_log_err(node, errorp, "in.mpathd offline failed");
- /* Failover to "any", let mpathd determine best failover candidate */
- if (ip_ipmp_offline(node, NULL) < 0) {
- ip_log_err(node, errorp, "in.mpathd failover failed");
+ if (retval == IPMP_EMINRED && !detachable) {
/*
- * Odds are that in.mpathd(1M) could not offline the device
- * because it was the last interface in the group. However,
- * it's possible that it's still okay to offline it as long as
- * there are higher-level failover mechanisms for the
- * addresses it owns (e.g., clustering). In this case,
- * ip_offlinelist() will return RCM_SUCCESS, and we charge on.
- *
- * TODO: change ip_ipmp_offline() to return the actual failure
- * from in.mpathd so that we can verify that it did indeed
- * fail with IPMP_EMINRED.
+ * in.mpathd(1M) could not offline the device because it was
+ * the last interface in the group. However, it's possible
+ * that it's still okay to offline it as long as there are
+ * higher-level failover mechanisms for the addresses it owns
+ * (e.g., clustering). In this case, ip_offlinelist() will
+ * return RCM_SUCCESS, and we charge on.
*/
- if (!detachable) {
- /* Inform consumers of IP addresses being offlined */
- if (ip_offlinelist(hd, node, errorp, flags,
- depend_info) == RCM_SUCCESS) {
- rcm_log_message(RCM_DEBUG,
- "IP: consumers agree on detach");
- } else {
- ip_log_err(node, errorp,
- "Device consumers prohibit offline");
- (void) mutex_unlock(&cache_lock);
- errno = EBUSY;
- return (RCM_FAILURE);
- }
+ /* Inform consumers of IP addresses being offlined */
+ if (ip_offlinelist(hd, node, errorp, flags,
+ depend_info) == RCM_SUCCESS) {
+ rcm_log_message(RCM_DEBUG,
+ "IP: consumers agree on detach");
+ } else {
+ ip_log_err(node, errorp,
+ "Device consumers prohibit offline");
+ (void) mutex_unlock(&cache_lock);
+ errno = EBUSY;
+ return (RCM_FAILURE);
}
}
@@ -574,8 +492,8 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
_("IP: Unplumb failed (%s)\n"),
pif->pi_ifname);
- /* Request mpathd to undo the offline */
- if (ip_ipmp_undo_offline(node) < 0) {
+ /* Request in.mpathd to undo the offline */
+ if (ip_ipmp_undo_offline(node) != IPMP_SUCCESS) {
ip_log_err(node, errorp, "Undo offline failed");
(void) mutex_unlock(&cache_lock);
return (RCM_FAILURE);
@@ -862,18 +780,16 @@ static char *
ip_usage(ip_cache_t *node)
{
ip_lif_t *lif;
- int numifs;
- char *buf;
- char *linkidstr;
+ uint_t numup;
+ char *sep, *buf, *linkidstr;
datalink_id_t linkid;
- const char *fmt;
- char *sep;
+ const char *msg;
char link[MAXLINKNAMELEN];
char addrstr[INET6_ADDRSTRLEN];
char errmsg[DLADM_STRSIZE];
dladm_status_t status;
- int offline = 0;
- size_t bufsz;
+ boolean_t offline, ipmp;
+ size_t bufsz = 0;
rcm_log_message(RCM_TRACE2, "IP: usage(%s)\n", node->ip_resource);
@@ -904,76 +820,53 @@ ip_usage(ip_cache_t *node)
/* TRANSLATION_NOTE: separator used between IP addresses */
sep = _(", ");
- numifs = 0;
- for (lif = node->ip_pif->pi_lifs; lif != NULL; lif = lif->li_next) {
- if (lif->li_ifflags & IFF_UP) {
- numifs++;
- }
- }
+ numup = 0;
+ for (lif = node->ip_pif->pi_lifs; lif != NULL; lif = lif->li_next)
+ if (lif->li_ifflags & IFF_UP)
+ numup++;
- if (node->ip_cachestate & CACHE_IF_OFFLINED) {
- offline++;
- }
+ ipmp = (node->ip_pif->pi_grname[0] != '\0');
+ offline = ((node->ip_cachestate & CACHE_IF_OFFLINED) != 0);
- if (!offline && numifs) {
- fmt = _("%1$s hosts IP addresses: ");
- } else if (offline) {
- fmt = _("%1$s offlined");
+ if (offline) {
+ msg = _("offlined");
+ } else if (numup == 0) {
+ msg = _("plumbed but down");
} else {
- fmt = _("%1$s plumbed but down");
+ if (ipmp) {
+ msg = _("providing connectivity for IPMP group ");
+ bufsz += LIFGRNAMSIZ;
+ } else {
+ msg = _("hosts IP addresses: ");
+ bufsz += (numup * (INET6_ADDRSTRLEN + strlen(sep)));
+ }
}
- /* space for addresses and separators, plus message */
- bufsz = ((numifs * (INET6_ADDRSTRLEN + strlen(sep))) +
- strlen(fmt) + strlen(link) + 1);
+ bufsz += strlen(link) + strlen(msg) + 1;
if ((buf = malloc(bufsz)) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: usage(%s) malloc failure(%s)\n"),
node->ip_resource, strerror(errno));
return (NULL);
}
- bzero(buf, bufsz);
- (void) sprintf(buf, fmt, link);
-
- if (offline || (numifs == 0)) { /* Nothing else to do */
- rcm_log_message(RCM_TRACE2, "IP: usage (%s) info = %s\n",
- node->ip_resource, buf);
-
- return (buf);
- }
-
- for (lif = node->ip_pif->pi_lifs; lif != NULL; lif = lif->li_next) {
+ (void) snprintf(buf, bufsz, "%s: %s", link, msg);
- void *addr;
- int af;
-
- if (!(lif->li_ifflags & IFF_UP)) {
- /* ignore interfaces not up */
- continue;
- }
- af = lif->li_addr.family;
- if (af == AF_INET6) {
- addr = &lif->li_addr.ip6.sin6_addr;
- } else if (af == AF_INET) {
- addr = &lif->li_addr.ip4.sin_addr;
+ if (!offline && numup > 0) {
+ if (ipmp) {
+ (void) strlcat(buf, node->ip_pif->pi_grname, bufsz);
} else {
- rcm_log_message(RCM_DEBUG,
- "IP: unknown addr family %d, assuming AF_INET\n",
- af);
- af = AF_INET;
- addr = &lif->li_addr.ip4.sin_addr;
- }
- if (inet_ntop(af, addr, addrstr, INET6_ADDRSTRLEN) == NULL) {
- rcm_log_message(RCM_ERROR,
- _("IP: inet_ntop: %s\n"), strerror(errno));
- continue;
- }
- rcm_log_message(RCM_DEBUG, "IP addr := %s\n", addrstr);
+ lif = node->ip_pif->pi_lifs;
+ for (; lif != NULL; lif = lif->li_next) {
+ if (!(lif->li_ifflags & IFF_UP))
+ continue;
+
+ if (!ip_addrstr(lif, addrstr, sizeof (addrstr)))
+ continue;
- (void) strcat(buf, addrstr);
- numifs--;
- if (numifs > 0) {
- (void) strcat(buf, ", ");
+ (void) strlcat(buf, addrstr, bufsz);
+ if (--numup > 0)
+ (void) strlcat(buf, sep, bufsz);
+ }
}
}
@@ -983,6 +876,32 @@ ip_usage(ip_cache_t *node)
return (buf);
}
+static boolean_t
+ip_addrstr(ip_lif_t *lif, char *addrstr, size_t addrsize)
+{
+ int af = lif->li_addr.family;
+ void *addr;
+
+ if (af == AF_INET6) {
+ addr = &lif->li_addr.ip6.sin6_addr;
+ } else if (af == AF_INET) {
+ addr = &lif->li_addr.ip4.sin_addr;
+ } else {
+ rcm_log_message(RCM_DEBUG,
+ "IP: unknown addr family %d, assuming AF_INET\n", af);
+ af = AF_INET;
+ addr = &lif->li_addr.ip4.sin_addr;
+ }
+ if (inet_ntop(af, addr, addrstr, addrsize) == NULL) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: inet_ntop: %s\n"), strerror(errno));
+ return (B_FALSE);
+ }
+
+ rcm_log_message(RCM_DEBUG, "IP addr := %s\n", addrstr);
+ return (B_TRUE);
+}
+
/*
* Cache management routines, all cache management functions should be
* be called with cache_lock held.
@@ -1121,11 +1040,13 @@ update_pif(rcm_handle_t *hd, int af, int sock, struct lifreq *lifr)
ifnumber = ifspec.ifsp_lun;
/* Get the interface flags */
- (void) strcpy(lifreq.lifr_name, lifr->lifr_name);
+ (void) strlcpy(lifreq.lifr_name, lifr->lifr_name, LIFNAMSIZ);
if (ioctl(sock, SIOCGLIFFLAGS, (char *)&lifreq) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFFLAGS(%s): %s\n"),
- pif.pi_ifname, strerror(errno));
+ if (errno != ENXIO) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: SIOCGLIFFLAGS(%s): %s\n"),
+ lifreq.lifr_name, strerror(errno));
+ }
return (-1);
}
(void) memcpy(&ifflags, &lifreq.lifr_flags, sizeof (ifflags));
@@ -1135,12 +1056,13 @@ update_pif(rcm_handle_t *hd, int af, int sock, struct lifreq *lifr)
* - IFF_VIRTUAL: e.g., loopback and vni
* - IFF_POINTOPOINT: e.g., sppp and ip.tun
* - !IFF_MULTICAST: e.g., ip.6to4tun
+ * - IFF_IPMP: IPMP meta-interfaces
*
* Note: The !IFF_MULTICAST check can be removed once iptun is
* implemented as a datalink.
*/
if (!(ifflags & IFF_MULTICAST) ||
- (ifflags & (IFF_POINTOPOINT | IFF_VIRTUAL))) {
+ (ifflags & (IFF_POINTOPOINT | IFF_VIRTUAL | IFF_IPMP))) {
rcm_log_message(RCM_TRACE3, "IP: if ignored (%s)\n",
pif.pi_ifname);
return (0);
@@ -1148,23 +1070,26 @@ update_pif(rcm_handle_t *hd, int af, int sock, struct lifreq *lifr)
/* Get the interface group name for this interface */
if (ioctl(sock, SIOCGLIFGROUPNAME, (char *)&lifreq) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFGROUPNAME(%s): %s\n"),
- lifreq.lifr_name, strerror(errno));
+ if (errno != ENXIO) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: SIOCGLIFGROUPNAME(%s): %s\n"),
+ lifreq.lifr_name, strerror(errno));
+ }
return (-1);
}
/* copy the group name */
- (void) memcpy(&pif.pi_grpname, &lifreq.lifr_groupname,
- sizeof (pif.pi_grpname));
- pif.pi_grpname[sizeof (pif.pi_grpname) - 1] = '\0';
+ (void) strlcpy(pif.pi_grname, lifreq.lifr_groupname,
+ sizeof (pif.pi_grname));
/* Get the interface address for this interface */
if (ioctl(sock, SIOCGLIFADDR, (char *)&lifreq) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFADDR(%s): %s\n"),
- lifreq.lifr_name, strerror(errno));
- return (-1);
+ if (errno != ENXIO) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: SIOCGLIFADDR(%s): %s\n"),
+ lifreq.lifr_name, strerror(errno));
+ return (-1);
+ }
}
(void) memcpy(&ifaddr, &lifreq.lifr_addr, sizeof (ifaddr));
@@ -1241,9 +1166,9 @@ update_pif(rcm_handle_t *hd, int af, int sock, struct lifreq *lifr)
sizeof (pif.pi_ifname));
}
- /* save pif properties */
- (void) memcpy(&probepif->pi_grpname, &pif.pi_grpname,
- sizeof (pif.pi_grpname));
+ /* save the group name */
+ (void) strlcpy(probepif->pi_grname, pif.pi_grname,
+ sizeof (pif.pi_grname));
/* add lif, if this is a lif and it is not in cache */
if (!lif_listed) {
@@ -1304,7 +1229,7 @@ update_ipifs(rcm_handle_t *hd, int af)
}
lifn.lifn_family = af;
- lifn.lifn_flags = 0;
+ lifn.lifn_flags = LIFC_UNDER_IPMP;
if (ioctl(sock, SIOCGLIFNUM, (char *)&lifn) < 0) {
rcm_log_message(RCM_ERROR,
_("IP: SIOCLGIFNUM failed: %s\n"),
@@ -1321,7 +1246,7 @@ update_ipifs(rcm_handle_t *hd, int af)
}
lifc.lifc_family = af;
- lifc.lifc_flags = 0;
+ lifc.lifc_flags = LIFC_UNDER_IPMP;
lifc.lifc_len = sizeof (struct lifreq) * lifn.lifn_count;
lifc.lifc_buf = buf;
@@ -1480,39 +1405,33 @@ static void
ip_log_err(ip_cache_t *node, char **errorp, char *errmsg)
{
char *ifname = NULL;
- int len;
+ int size;
const char *errfmt;
- char *error;
+ char *error = NULL;
if ((node != NULL) && (node->ip_pif != NULL) &&
(node->ip_pif->pi_ifname != NULL)) {
ifname = node->ip_pif->pi_ifname;
}
- if (errorp != NULL)
- *errorp = NULL;
-
if (ifname == NULL) {
rcm_log_message(RCM_ERROR, _("IP: %s\n"), errmsg);
errfmt = _("IP: %s");
- len = strlen(errfmt) + strlen(errmsg) + 1;
- if (error = (char *)calloc(1, len)) {
- (void) sprintf(error, errfmt, errmsg);
- }
+ size = strlen(errfmt) + strlen(errmsg) + 1;
+ if (errorp != NULL && (error = malloc(size)) != NULL)
+ (void) snprintf(error, size, errfmt, errmsg);
} else {
rcm_log_message(RCM_ERROR, _("IP: %s(%s)\n"), errmsg, ifname);
errfmt = _("IP: %s(%s)");
- len = strlen(errfmt) + strlen(errmsg) + strlen(ifname) + 1;
- if (error = (char *)calloc(1, len)) {
- (void) sprintf(error, errfmt, errmsg, ifname);
- }
+ size = strlen(errfmt) + strlen(errmsg) + strlen(ifname) + 1;
+ if (errorp != NULL && (error = malloc(size)) != NULL)
+ (void) snprintf(error, size, errfmt, errmsg, ifname);
}
if (errorp != NULL)
*errorp = error;
}
-
/*
* if_cfginfo() - Save off the config info for all interfaces
*/
@@ -1538,7 +1457,7 @@ if_cfginfo(ip_cache_t *node, uint_t force)
rcm_log_message(RCM_ERROR,
_("IP: get modlist error (%s) %s\n"),
pif->pi_ifname, strerror(errno));
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
@@ -1551,7 +1470,7 @@ if_cfginfo(ip_cache_t *node, uint_t force)
rcm_log_message(RCM_ERROR,
_("IP: module %s@%d\n"),
lif->li_modules[i], i);
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
}
@@ -1595,11 +1514,11 @@ if_cfginfo(ip_cache_t *node, uint_t force)
/* Save reconfiguration information */
if (lif->li_ifflags & IFF_IPV4) {
(void) snprintf(syscmd, sizeof (syscmd),
- "%s %s:%d configinfo\n", USR_SBIN_IFCONFIG,
+ "%s %s:%d configinfo\n", SBIN_IFCONFIG,
pif->pi_ifname, lif->li_ifnum);
} else if (lif->li_ifflags & IFF_IPV6) {
(void) snprintf(syscmd, sizeof (syscmd),
- "%s %s:%d inet6 configinfo\n", USR_SBIN_IFCONFIG,
+ "%s %s:%d inet6 configinfo\n", SBIN_IFCONFIG,
pif->pi_ifname, lif->li_ifnum);
}
rcm_log_message(RCM_TRACE2, "IP: %s\n", syscmd);
@@ -1609,7 +1528,7 @@ if_cfginfo(ip_cache_t *node, uint_t force)
rcm_log_message(RCM_ERROR,
_("IP: ifconfig configinfo error (%s:%d) %s\n"),
pif->pi_ifname, lif->li_ifnum, strerror(errno));
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
bzero(buf, MAX_RECONFIG_SIZE);
@@ -1619,20 +1538,18 @@ if_cfginfo(ip_cache_t *node, uint_t force)
_("IP: ifconfig configinfo error (%s:%d) %s\n"),
pif->pi_ifname, lif->li_ifnum, strerror(errno));
(void) pclose(fp);
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
(void) pclose(fp);
- lif->li_reconfig = malloc(strlen(buf)+1);
- if (lif->li_reconfig == NULL) {
+ if ((lif->li_reconfig = strdup(buf)) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: malloc error (%s) %s\n"),
pif->pi_ifname, strerror(errno));
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
- (void) strcpy(lif->li_reconfig, buf);
rcm_log_message(RCM_DEBUG,
"IP: if_cfginfo: reconfig string(%s:%d) = %s\n",
pif->pi_ifname, lif->li_ifnum, lif->li_reconfig);
@@ -1654,57 +1571,37 @@ static int
if_unplumb(ip_cache_t *node)
{
ip_lif_t *lif;
- ip_pif_t *pif;
- int ipv4 = 0, ipv6 = 0;
- char syscmd[MAX_RECONFIG_SIZE + LIFNAMSIZ];
+ ip_pif_t *pif = node->ip_pif;
+ boolean_t ipv4 = B_FALSE;
+ boolean_t ipv6 = B_FALSE;
rcm_log_message(RCM_TRACE2, "IP: if_unplumb(%s)\n", node->ip_resource);
- pif = node->ip_pif;
- lif = pif->pi_lifs;
-
- while (lif != NULL) {
+ for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) {
if (lif->li_ifflags & IFF_IPV4) {
- ipv4++;
+ ipv4 = B_TRUE;
} else if (lif->li_ifflags & IFF_IPV6) {
- ipv6++;
+ ipv6 = B_TRUE;
} else {
/* Unlikely case */
rcm_log_message(RCM_DEBUG,
"IP: Unplumb ignored (%s:%d)\n",
pif->pi_ifname, lif->li_ifnum);
- lif = lif->li_next;
- continue;
}
- lif = lif->li_next;
}
- /* Unplumb the physical interface */
- if (ipv4) {
- rcm_log_message(RCM_TRACE2,
- "IP: if_unplumb: ifconfig %s unplumb\n", pif->pi_ifname);
- (void) snprintf(syscmd, sizeof (syscmd), "%s %s unplumb\n",
- USR_SBIN_IFCONFIG, pif->pi_ifname);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot unplumb (%s) %s\n"),
- pif->pi_ifname, strerror(errno));
- return (-1);
- }
+ if (ipv4 && !ifconfig(pif->pi_ifname, "inet", "unplumb", B_FALSE)) {
+ rcm_log_message(RCM_ERROR, _("IP: Cannot unplumb (%s) %s\n"),
+ pif->pi_ifname, strerror(errno));
+ return (-1);
}
- if (ipv6) {
- rcm_log_message(RCM_TRACE2,
- "IP: if_unplumb: ifconfig %s inet6 unplumb\n",
- pif->pi_ifname);
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s inet6 unplumb\n", USR_SBIN_IFCONFIG, pif->pi_ifname);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot unplumb (%s) %s\n"),
- pif->pi_ifname, strerror(errno));
- return (-1);
- }
+
+ if (ipv6 && !ifconfig(pif->pi_ifname, "inet6", "unplumb", B_FALSE)) {
+ rcm_log_message(RCM_ERROR, _("IP: Cannot unplumb (%s) %s\n"),
+ pif->pi_ifname, strerror(errno));
+ return (-1);
}
+
rcm_log_message(RCM_TRACE2, "IP: if_unplumb(%s) success\n",
node->ip_resource);
@@ -1723,8 +1620,11 @@ if_replumb(ip_cache_t *node)
ip_lif_t *lif;
ip_pif_t *pif;
int i;
- char syscmd[LIFNAMSIZ+MAXPATHLEN]; /* must be big enough */
- int max_ipv4 = 0, max_ipv6 = 0;
+ boolean_t success, ipmp;
+ const char *fstr;
+ char lifname[LIFNAMSIZ];
+ char buf[MAX_RECONFIG_SIZE];
+ int max_lifnum = 0;
rcm_log_message(RCM_TRACE2, "IP: if_replumb(%s)\n", node->ip_resource);
@@ -1738,100 +1638,103 @@ if_replumb(ip_cache_t *node)
*/
pif = node->ip_pif;
- lif = pif->pi_lifs;
+ ipmp = (node->ip_pif->pi_grname[0] != '\0');
/*
* Make a first pass to plumb in physical interfaces and get a count
* of the max logical interfaces
*/
- while (lif != NULL) {
+ for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) {
+ max_lifnum = MAX(lif->li_ifnum, max_lifnum);
if (lif->li_ifflags & IFF_IPV4) {
- if (lif->li_ifnum > max_ipv4) {
- max_ipv4 = lif->li_ifnum;
- }
+ fstr = "inet";
} else if (lif->li_ifflags & IFF_IPV6) {
- if (lif->li_ifnum > max_ipv6) {
- max_ipv6 = lif->li_ifnum;
- }
+ fstr = "inet6";
} else {
/* Unlikely case */
rcm_log_message(RCM_DEBUG,
"IP: Re-plumb ignored (%s:%d)\n",
pif->pi_ifname, lif->li_ifnum);
- lif = lif->li_next;
continue;
}
- if (lif->li_ifnum == 0) { /* physical interface instance */
- if ((lif->li_ifflags & IFF_NOFAILOVER) ||
- (strcmp(pif->pi_grpname, "") == 0)) {
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s\n", USR_SBIN_IFCONFIG,
- lif->li_reconfig);
- } else if (lif->li_ifflags & IFF_IPV4) {
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s inet plumb group %s\n",
- USR_SBIN_IFCONFIG,
- pif->pi_ifname, pif->pi_grpname);
- } else if (lif->li_ifflags & IFF_IPV6) {
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s inet6 plumb group %s\n",
- USR_SBIN_IFCONFIG,
- pif->pi_ifname, pif->pi_grpname);
- }
+ /* ignore logical interface instances */
+ if (lif->li_ifnum != 0)
+ continue;
+
+ if ((lif->li_ifflags & IFF_NOFAILOVER) || !ipmp) {
+ success = ifconfig("", "", lif->li_reconfig, B_FALSE);
+ } else {
+ (void) snprintf(buf, sizeof (buf), "plumb group %s",
+ pif->pi_grname);
+ success = ifconfig(pif->pi_ifname, fstr, buf, B_FALSE);
+ }
+
+ if (!success) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot plumb (%s) %s\n"), pif->pi_ifname,
+ strerror(errno));
+ return (-1);
+ }
+
+ /*
+ * Restart DHCP if necessary.
+ */
+ if ((lif->li_ifflags & IFF_DHCPRUNNING) &&
+ !ifconfig(pif->pi_ifname, fstr, CFG_DHCP_CMD, B_FALSE)) {
+ rcm_log_message(RCM_ERROR, _("IP: Cannot start DHCP "
+ "(%s) %s\n"), pif->pi_ifname, strerror(errno));
+ return (-1);
+ }
+ rcm_log_message(RCM_TRACE2,
+ "IP: if_replumb: Modcnt = %d\n", lif->li_modcnt);
+ /* modinsert modules in order, ignore driver(last) */
+ for (i = 0; i < (lif->li_modcnt - 1); i++) {
rcm_log_message(RCM_TRACE2,
- "IP: if_replumb: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
+ "IP: modinsert: Pos = %d Mod = %s\n",
+ i, lif->li_modules[i]);
+ if (modop(pif->pi_ifname, lif->li_modules[i], i,
+ MOD_INSERT) == -1) {
rcm_log_message(RCM_ERROR,
- _("IP: Cannot plumb (%s) %s\n"),
- pif->pi_ifname, strerror(errno));
+ _("IP: modinsert error(%s)\n"),
+ pif->pi_ifname);
return (-1);
}
-
- rcm_log_message(RCM_TRACE2,
- "IP: if_replumb: Modcnt = %d\n", lif->li_modcnt);
- /* modinsert modules in order, ignore driver(last) */
- for (i = 0; i < (lif->li_modcnt - 1); i++) {
- rcm_log_message(RCM_TRACE2,
- "IP: modinsert: Pos = %d Mod = %s\n",
- i, lif->li_modules[i]);
- if (modop(pif->pi_ifname, lif->li_modules[i], i,
- MOD_INSERT) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: modinsert error(%s)\n"),
- pif->pi_ifname);
- return (-1);
- }
- }
}
-
- lif = lif->li_next;
}
/* Now, add all the logical interfaces in the correct order */
- for (i = 1; i <= MAX(max_ipv6, max_ipv4); i++) {
+ for (i = 1; i <= max_lifnum; i++) {
+ (void) snprintf(lifname, LIFNAMSIZ, "%s:%d", pif->pi_ifname, i);
+
/* reset lif through every iteration */
- lif = pif->pi_lifs;
- while (lif != NULL) {
- if (((lif->li_ifflags & IFF_NOFAILOVER) ||
- (strcmp(pif->pi_grpname, "") == 0)) &&
- (lif->li_ifnum == i)) {
- /* Plumb in the logical interface */
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s\n", USR_SBIN_IFCONFIG,
- lif->li_reconfig);
- rcm_log_message(RCM_TRACE2,
- "IP: if_replumb: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot addif (%s:%d) "
- "%s\n"),
- pif->pi_ifname, i, strerror(errno));
- return (-1);
- }
+ for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) {
+ /*
+ * Process entries in order. If the interface is
+ * using IPMP, only process test addresses.
+ */
+ if (lif->li_ifnum != i ||
+ (ipmp && !(lif->li_ifflags & IFF_NOFAILOVER)))
+ continue;
+
+ if (!ifconfig("", "", lif->li_reconfig, B_FALSE)) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot addif (%s) %s\n"), lifname,
+ strerror(errno));
+ return (-1);
+ }
+
+ /*
+ * Restart DHCP if necessary.
+ */
+ if ((lif->li_ifflags & IFF_DHCPRUNNING) &&
+ !ifconfig(lifname, fstr, CFG_DHCP_CMD, B_FALSE)) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot start DHCP (%s) %s\n"),
+ lifname, strerror(errno));
+ return (-1);
}
- lif = lif->li_next;
}
}
@@ -1865,71 +1768,64 @@ clr_cfg_state(ip_pif_t *pif)
}
/*
- * ip_ipmp_offline() - Failover from if_from to if_to using a
- * minimum redudancy of min_red. This uses IPMPs
- * "offline" mechanism to achieve the failover.
+ * Attempt to offline ip_cache_t `node'; returns an IPMP error code.
*/
static int
-ip_ipmp_offline(ip_cache_t *if_from, ip_cache_t *if_to)
+ip_ipmp_offline(ip_cache_t *node)
{
- mpathd_cmd_t mpdcmd;
-
- if ((if_from == NULL) || (if_from->ip_pif == NULL) ||
- (if_from->ip_pif->pi_ifname == NULL)) {
- return (-1);
- }
+ int retval;
+ ipmp_handle_t handle;
rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_offline\n");
- mpdcmd.cmd_command = MI_OFFLINE;
- (void) strcpy(mpdcmd.cmd_ifname, if_from->ip_pif->pi_ifname);
-
- if ((if_to != NULL) && (if_to->ip_pif != NULL) &&
- (if_to->ip_pif->pi_ifname != NULL)) {
- rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_offline (%s)->(%s)\n",
- if_from->ip_pif->pi_ifname, if_to->ip_pif->pi_ifname);
- (void) strncpy(mpdcmd.cmd_movetoif, if_to->ip_pif->pi_ifname,
- sizeof (mpdcmd.cmd_movetoif));
- mpdcmd.cmd_movetoif[sizeof (mpdcmd.cmd_movetoif) - 1] = '\0';
- } else {
- rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_offline (%s)->(any)\n",
- if_from->ip_pif->pi_ifname);
- (void) strcpy(mpdcmd.cmd_movetoif, ""); /* signifies any */
+ if ((retval = ipmp_open(&handle)) != IPMP_SUCCESS) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: cannot create ipmp handle: %s\n"),
+ ipmp_errmsg(retval));
+ return (retval);
}
- mpdcmd.cmd_min_red = if_from->ip_ifred;
- if (mpathd_send_cmd(&mpdcmd) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd offline error: %s\n"),
- strerror(errno));
- return (-1);
+ retval = ipmp_offline(handle, node->ip_pif->pi_ifname, node->ip_ifred);
+ if (retval != IPMP_SUCCESS) {
+ rcm_log_message(RCM_ERROR, _("IP: ipmp_offline error: %s\n"),
+ ipmp_errmsg(retval));
+ } else {
+ rcm_log_message(RCM_TRACE1, "IP: ipmp_offline success\n");
}
- rcm_log_message(RCM_TRACE1, "IP: ipmp offline success\n");
- return (0);
+ ipmp_close(handle);
+ return (retval);
}
/*
- * ip_ipmp_undo_offline() - Undo prior offline of the interface.
- * This uses IPMPs "undo offline" feature.
+ * Attempt to undo the offline ip_cache_t `node'; returns an IPMP error code.
*/
static int
ip_ipmp_undo_offline(ip_cache_t *node)
{
- mpathd_cmd_t mpdcmd;
+ int retval;
+ ipmp_handle_t handle;
- mpdcmd.cmd_command = MI_UNDO_OFFLINE;
- (void) strcpy(mpdcmd.cmd_ifname, node->ip_pif->pi_ifname);
+ rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_undo_offline\n");
- if (mpathd_send_cmd(&mpdcmd) < 0) {
+ if ((retval = ipmp_open(&handle)) != IPMP_SUCCESS) {
rcm_log_message(RCM_ERROR,
- _("IP: mpathd error: %s\n"),
- strerror(errno));
- return (-1);
+ _("IP: cannot create ipmp handle: %s\n"),
+ ipmp_errmsg(retval));
+ return (retval);
}
- rcm_log_message(RCM_TRACE1, "IP: ipmp undo offline success\n");
- return (0);
+ retval = ipmp_undo_offline(handle, node->ip_pif->pi_ifname);
+ if (retval != IPMP_SUCCESS) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: ipmp_undo_offline error: %s\n"),
+ ipmp_errmsg(retval));
+ } else {
+ rcm_log_message(RCM_TRACE1, "IP: ipmp_undo_offline success\n");
+ }
+
+ ipmp_close(handle);
+ return (retval);
}
/*
@@ -1946,10 +1842,9 @@ get_link_resource(const char *link)
char *resource;
dladm_status_t status;
- if ((status = dladm_name2info(dld_handle, link, &linkid, &flags, NULL,
- NULL)) != DLADM_STATUS_OK) {
+ status = dladm_name2info(dld_handle, link, &linkid, &flags, NULL, NULL);
+ if (status != DLADM_STATUS_OK)
goto fail;
- }
if (!(flags & DLADM_OPT_ACTIVE)) {
status = DLADM_STATUS_FAILED;
@@ -1976,243 +1871,6 @@ fail:
}
/*
- * if_get_flags() - Return the cached physical interface flags
- * Call with cache_lock held
- */
-static uint64_t
-if_get_flags(ip_pif_t *pif)
-{
- ip_lif_t *lif;
-
- for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) {
- if (lif->li_ifnum == 0) {
- return (lif->li_ifflags & RCM_PIF_FLAGS);
- }
- }
- return (0);
-}
-
-/*
- * mpathd_send_cmd() - Sends the command to in.mpathd.
- */
-static int
-mpathd_send_cmd(mpathd_cmd_t *mpd)
-{
- mpathd_unoffline_t mpc;
- struct mpathd_response mpr;
- int i;
- int s;
-
- rcm_log_message(RCM_TRACE1, "IP: mpathd_send_cmd \n");
-
- for (i = 0; i < MPATHD_MAX_RETRIES; i++) {
- s = connect_to_mpathd(AF_INET);
- if (s == -1) {
- s = connect_to_mpathd(AF_INET6);
- if (s == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot talk to mpathd\n"));
- return (-1);
- }
- }
- switch (mpd->cmd_command) {
- case MI_OFFLINE :
- rcm_log_message(RCM_TRACE1, "IP: MI_OFFLINE: "
- "(%s)->(%s) redundancy = %d\n", mpd->cmd_ifname,
- mpd->cmd_movetoif, mpd->cmd_min_red);
-
- if (write(s, mpd, sizeof (mpathd_cmd_t)) !=
- sizeof (mpathd_cmd_t)) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd write: %s\n"),
- strerror(errno));
- (void) close(s);
- return (-1);
- }
- break;
-
- case MI_SETOINDEX :
- rcm_log_message(RCM_TRACE1, "IP: MI_SETOINDEX: "
- "(%s)->(%s) family = %d\n", mpd->from_lifname,
- mpd->to_pifname, mpd->addr_family);
-
- if (write(s, mpd, sizeof (mpathd_cmd_t)) !=
- sizeof (mpathd_cmd_t)) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd write: %s\n"),
- strerror(errno));
- (void) close(s);
- return (-1);
- }
- break;
-
- case MI_UNDO_OFFLINE:
- /* mpathd checks for exact size of the message */
- mpc.cmd_command = mpd->cmd_command;
- (void) strcpy(mpc.cmd_ifname, mpd->cmd_ifname);
-
- rcm_log_message(RCM_TRACE1, "IP: MI_UNDO_OFFLINE: "
- "(%s)\n", mpd->cmd_ifname);
-
- if (write(s, &mpc, sizeof (mpathd_unoffline_t)) !=
- sizeof (mpathd_unoffline_t)) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd write: %s\n"),
- strerror(errno));
- (void) close(s);
- return (-1);
- }
- break;
- default :
- rcm_log_message(RCM_ERROR,
- _("IP: unsupported mpathd command\n"));
- (void) close(s);
- return (-1);
- }
-
- bzero(&mpr, sizeof (struct mpathd_response));
- /* Read the result from mpathd */
- if (read(s, &mpr, sizeof (struct mpathd_response)) !=
- sizeof (struct mpathd_response)) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd read : %s\n"), strerror(errno));
- (void) close(s);
- return (-1);
- }
-
- (void) close(s);
- if (mpr.resp_mpathd_err == 0) {
- rcm_log_message(RCM_TRACE1,
- "IP: mpathd_send_cmd success\n");
- return (0); /* Successful */
- }
-
- if (mpr.resp_mpathd_err == MPATHD_SYS_ERROR) {
- if (mpr.resp_sys_errno == EAGAIN) {
- (void) sleep(1);
- rcm_log_message(RCM_DEBUG,
- "IP: mpathd retrying\n");
- continue; /* Retry */
- }
- errno = mpr.resp_sys_errno;
- rcm_log_message(RCM_WARNING,
- _("IP: mpathd_send_cmd error: %s\n"),
- strerror(errno));
- } else if (mpr.resp_mpathd_err == MPATHD_MIN_RED_ERROR) {
- errno = EIO;
- rcm_log_message(RCM_ERROR, _("IP: in.mpathd(1M): "
- "Minimum redundancy not met\n"));
- } else {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd_send_cmd error\n"));
- }
- /* retry */
- }
-
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd_send_cmd failed %d retries\n"), MPATHD_MAX_RETRIES);
- return (-1);
-}
-
-/*
- * Returns -1 on failure. Returns the socket file descriptor on
- * success.
- */
-static int
-connect_to_mpathd(int family)
-{
- int s;
- struct sockaddr_storage ss;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
- struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
- int addrlen;
- int ret;
- int on;
-
- rcm_log_message(RCM_TRACE1, "IP: connect_to_mpathd\n");
-
- s = socket(family, SOCK_STREAM, 0);
- if (s < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd socket: %s\n"), strerror(errno));
- return (-1);
- }
- bzero((char *)&ss, sizeof (ss));
- ss.ss_family = family;
- /*
- * Need to bind to a privelged port. For non-root, this
- * will fail. in.mpathd verifies that only commands coming
- * from priveleged ports succeed so that the ordinary user
- * can't issue offline commands.
- */
- on = 1;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd setsockopt: TCP_ANONPRIVBIND: %s\n"),
- strerror(errno));
- return (-1);
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = 0;
- sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addrlen = sizeof (struct sockaddr_in);
- break;
- case AF_INET6:
- sin6->sin6_port = 0;
- sin6->sin6_addr = loopback_addr;
- addrlen = sizeof (struct sockaddr_in6);
- break;
- }
- ret = bind(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd bind: %s\n"), strerror(errno));
- return (-1);
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = htons(MPATHD_PORT);
- break;
- case AF_INET6:
- sin6->sin6_port = htons(MPATHD_PORT);
- break;
- }
- ret = connect(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- if (errno == ECONNREFUSED) {
- /* in.mpathd is not running, start it */
- if (rcm_exec_cmd(MPATHD_PATH) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd exec: %s\n"),
- strerror(errno));
- return (-1);
- }
- ret = connect(s, (struct sockaddr *)&ss, addrlen);
- }
- if (ret != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd connect: %s\n"), strerror(errno));
- return (-1);
- }
- }
- on = 0;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd setsockopt TCP_ANONPRIVBIND: %s\n"),
- strerror(errno));
- return (-1);
- }
-
- rcm_log_message(RCM_TRACE1, "IP: connect_to_mpathd success\n");
-
- return (s);
-}
-
-/*
* modop() - Remove/insert a module
*/
static int
@@ -2239,12 +1897,10 @@ modop(char *name, char *arg, int pos, char op)
if (op == MOD_REMOVE) {
(void) snprintf(syscmd, sizeof (syscmd),
- "%s %s modremove %s@%d\n", USR_SBIN_IFCONFIG, name, arg,
- pos);
+ "%s %s modremove %s@%d\n", SBIN_IFCONFIG, name, arg, pos);
} else if (op == MOD_INSERT) {
(void) snprintf(syscmd, sizeof (syscmd),
- "%s %s modinsert %s@%d\n", USR_SBIN_IFCONFIG, name, arg,
- pos);
+ "%s %s modinsert %s@%d\n", SBIN_IFCONFIG, name, arg, pos);
} else {
rcm_log_message(RCM_ERROR,
_("IP: modop(%s): unknown operation\n"), name);
@@ -2277,11 +1933,11 @@ get_modlist(char *name, ip_lif_t *lif)
int i;
int num_mods;
struct lifreq lifr;
- struct str_list strlist;
+ struct str_list strlist = { 0 };
rcm_log_message(RCM_TRACE1, "IP: getmodlist(%s)\n", name);
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+ (void) strlcpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
lifr.lifr_flags = lif->li_ifflags;
if (ip_domux2fd(&mux_fd, &muxid_fd, &fd, &lifr) < 0) {
rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd(%s)\n"), name);
@@ -2292,39 +1948,34 @@ get_modlist(char *name, ip_lif_t *lif)
rcm_log_message(RCM_ERROR,
_("IP: get_modlist(%s): I_LIST(%s) \n"),
name, strerror(errno));
- (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
- return (-1);
+ goto fail;
}
strlist.sl_nmods = num_mods;
strlist.sl_modlist = malloc(sizeof (struct str_mlist) * num_mods);
-
if (strlist.sl_modlist == NULL) {
rcm_log_message(RCM_ERROR, _("IP: get_modlist(%s): %s\n"),
name, strerror(errno));
- (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
- return (-1);
+ goto fail;
}
if (ioctl(fd, I_LIST, (caddr_t)&strlist) < 0) {
rcm_log_message(RCM_ERROR,
_("IP: get_modlist(%s): I_LIST error: %s\n"),
name, strerror(errno));
- (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
- return (-1);
+ goto fail;
}
for (i = 0; i < strlist.sl_nmods; i++) {
- lif->li_modules[i] =
- malloc(strlen(strlist.sl_modlist[i].l_name)+1);
+ lif->li_modules[i] = strdup(strlist.sl_modlist[i].l_name);
if (lif->li_modules[i] == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: get_modlist(%s): %s\n"),
name, strerror(errno));
- (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
- return (-1);
+ while (i > 0)
+ free(lif->li_modules[--i]);
+ goto fail;
}
- (void) strcpy(lif->li_modules[i], strlist.sl_modlist[i].l_name);
}
lif->li_modcnt = strlist.sl_nmods;
@@ -2332,6 +1983,10 @@ get_modlist(char *name, ip_lif_t *lif)
rcm_log_message(RCM_TRACE1, "IP: getmodlist(%s) success\n", name);
return (ip_plink(mux_fd, muxid_fd, fd, &lifr));
+fail:
+ free(strlist.sl_modlist);
+ (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
+ return (-1);
}
/*
@@ -2436,6 +2091,7 @@ ip_plink(int mux_fd, int muxid_fd, int fd, struct lifreq *lifr)
*
* Notify online to IP address consumers.
*/
+/*ARGSUSED*/
static int
ip_onlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags,
rcm_info_t **depend_info)
@@ -2464,6 +2120,7 @@ ip_onlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags,
*
* Offline IP address consumers.
*/
+/*ARGSUSED*/
static int
ip_offlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags,
rcm_info_t **depend_info)
@@ -2494,9 +2151,9 @@ ip_offlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags,
}
/*
- * ip_get_addrlist() - Compile list of IP addresses hosted on this NIC (node)
- * This routine malloc() required memeory for the list
- * Returns list on success, NULL if failed
+ * ip_get_addrlist() - Get the list of IP addresses on this interface (node);
+ * This routine malloc()s required memory for the list.
+ * Returns the list on success, NULL on failure.
* Call with cache_lock held.
*/
static char **
@@ -2504,11 +2161,9 @@ ip_get_addrlist(ip_cache_t *node)
{
ip_lif_t *lif;
char **addrlist = NULL;
- int numifs;
+ int i, numifs;
+ size_t addrlistsize;
char addrstr[INET6_ADDRSTRLEN];
- void *addr;
- int af;
- int i;
rcm_log_message(RCM_TRACE2, "IP: ip_get_addrlist(%s)\n",
node->ip_resource);
@@ -2532,35 +2187,21 @@ ip_get_addrlist(ip_cache_t *node)
for (lif = node->ip_pif->pi_lifs, i = 0; lif != NULL;
lif = lif->li_next, i++) {
- af = lif->li_addr.family;
- if (af == AF_INET6) {
- addr = &lif->li_addr.ip6.sin6_addr;
- } else if (af == AF_INET) {
- addr = &lif->li_addr.ip4.sin_addr;
- } else {
- rcm_log_message(RCM_DEBUG,
- "IP: unknown addr family %d, assuming AF_INET\n",
- af);
- af = AF_INET;
- addr = &lif->li_addr.ip4.sin_addr;
- }
- if (inet_ntop(af, addr, addrstr, INET6_ADDRSTRLEN) == NULL) {
- rcm_log_message(RCM_ERROR,
- _("IP: inet_ntop: %s\n"), strerror(errno));
+ if (!ip_addrstr(lif, addrstr, sizeof (addrstr))) {
ip_free_addrlist(addrlist);
return (NULL);
}
- if ((addrlist[i] = malloc(strlen(addrstr) + RCM_SIZE_SUNW_IP))
- == NULL) {
+ addrlistsize = strlen(addrstr) + sizeof (RCM_STR_SUNW_IP);
+ if ((addrlist[i] = malloc(addrlistsize)) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: ip_get_addrlist(%s) malloc failure(%s)\n"),
node->ip_resource, strerror(errno));
ip_free_addrlist(addrlist);
return (NULL);
}
- (void) strcpy(addrlist[i], RCM_STR_SUNW_IP); /* SUNW_ip/ */
- (void) strcat(addrlist[i], addrstr); /* SUNW_ip/<address> */
+ (void) snprintf(addrlist[i], addrlistsize, "%s%s",
+ RCM_STR_SUNW_IP, addrstr);
rcm_log_message(RCM_DEBUG, "Anon Address: %s\n", addrlist[i]);
}
@@ -2611,16 +2252,13 @@ ip_consumer_notify(rcm_handle_t *hd, datalink_id_t linkid, char **errorp,
return;
}
/*
- * Inform anonymous consumers about IP addresses being
- * onlined
+ * Inform anonymous consumers about IP addresses being onlined.
*/
(void) ip_onlinelist(hd, node, errorp, flags, depend_info);
(void) mutex_unlock(&cache_lock);
rcm_log_message(RCM_TRACE2, "IP: ip_consumer_notify success\n");
- return;
-
}
/*
@@ -2632,20 +2270,18 @@ if_configure(datalink_id_t linkid)
char ifinst[MAXLINKNAMELEN];
char cfgfile[MAXPATHLEN];
char cached_name[RCM_LINK_RESOURCE_MAX];
- struct stat statbuf;
+ FILE *hostfp, *host6fp;
ip_cache_t *node;
- int af = 0;
- int ipmp = 0;
+ boolean_t ipmp = B_FALSE;
assert(linkid != DATALINK_INVALID_LINKID);
-
rcm_log_message(RCM_TRACE1, _("IP: if_configure(%u)\n"), linkid);
/* Check for the interface in the cache */
(void) snprintf(cached_name, sizeof (cached_name), "%s/%u",
RCM_LINK_PREFIX, linkid);
- /* Check if the interface is new or was previously offlined */
+ /* Check if the interface is new or was not previously offlined */
(void) mutex_lock(&cache_lock);
if (((node = cache_lookup(NULL, cached_name, CACHE_REFRESH)) != NULL) &&
(!(node->ip_cachestate & CACHE_IF_OFFLINED))) {
@@ -2663,76 +2299,69 @@ if_configure(datalink_id_t linkid)
return (-1);
}
- /* Scan IPv4 configuration first */
- (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV4, ifinst);
- cfgfile[MAXPATHLEN - 1] = '\0';
-
+ /*
+ * Scan the IPv4 and IPv6 hostname files to see if (a) they exist
+ * and (b) if either one places the interface into an IPMP group.
+ */
+ (void) snprintf(cfgfile, MAXPATHLEN, CFGFILE_FMT_IPV4, ifinst);
rcm_log_message(RCM_TRACE1, "IP: Scanning %s\n", cfgfile);
- if (stat(cfgfile, &statbuf) == 0) {
- af |= CONFIG_AF_INET;
- if (isgrouped(cfgfile)) {
- ipmp++;
- }
+ if ((hostfp = fopen(cfgfile, "r")) != NULL) {
+ if (isgrouped(cfgfile))
+ ipmp = B_TRUE;
}
- /* Scan IPv6 configuration details */
- (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV6, ifinst);
- cfgfile[MAXPATHLEN - 1] = '\0';
+ (void) snprintf(cfgfile, MAXPATHLEN, CFGFILE_FMT_IPV6, ifinst);
rcm_log_message(RCM_TRACE1, "IP: Scanning %s\n", cfgfile);
- if (stat(cfgfile, &statbuf) == 0) {
- af |= CONFIG_AF_INET6;
- if ((ipmp == 0) && isgrouped(cfgfile)) {
- ipmp++;
- }
+ if ((host6fp = fopen(cfgfile, "r")) != NULL) {
+ if (!ipmp && isgrouped(cfgfile))
+ ipmp = B_TRUE;
}
- if (af & CONFIG_AF_INET) {
- if (if_ipmp_config(ifinst, CONFIG_AF_INET, ipmp) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: IPv4 Post-attach failed (%s)\n"), ifinst);
- return (-1);
- }
+ /*
+ * Configure the interface according to its hostname files.
+ */
+ if (hostfp != NULL &&
+ if_config_inst(ifinst, hostfp, AF_INET, ipmp) == -1) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: IPv4 Post-attach failed (%s)\n"), ifinst);
+ goto fail;
}
- if (af & CONFIG_AF_INET6) {
- if (if_ipmp_config(ifinst, CONFIG_AF_INET6, ipmp) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: IPv6 Post-attach failed(%s)\n"), ifinst);
- return (-1);
- }
+ if (host6fp != NULL &&
+ if_config_inst(ifinst, host6fp, AF_INET6, ipmp) == -1) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: IPv6 Post-attach failed (%s)\n"), ifinst);
+ goto fail;
}
+ (void) fclose(hostfp);
+ (void) fclose(host6fp);
rcm_log_message(RCM_TRACE1, "IP: if_configure(%s) success\n", ifinst);
-
return (0);
-
+fail:
+ (void) fclose(hostfp);
+ (void) fclose(host6fp);
+ return (-1);
}
/*
- * isgrouped() - Scans the given config file to see if this is a grouped
- * interface
- * Returns non-zero if true; 0 if false
+ * isgrouped() - Scans the given config file to see if this interface is
+ * using IPMP. Returns B_TRUE or B_FALSE.
*/
-static int
-isgrouped(char *cfgfile)
+static boolean_t
+isgrouped(const char *cfgfile)
{
FILE *fp;
struct stat statb;
- char *buf = NULL;
- char *tokens[MAXARGS]; /* token pointers */
- char tspace[MAXLINE]; /* token space */
- int ntok;
- int group = 0;
-
- if (cfgfile == NULL)
- return (0);
+ char *nlp, *line, *token, *lasts, *buf;
+ boolean_t grouped = B_FALSE;
rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s)\n", cfgfile);
if (stat(cfgfile, &statb) != 0) {
rcm_log_message(RCM_TRACE1,
_("IP: No config file(%s)\n"), cfgfile);
- return (0);
+ return (B_FALSE);
}
/*
@@ -2744,609 +2373,284 @@ isgrouped(char *cfgfile)
if (statb.st_size <= 1) {
rcm_log_message(RCM_TRACE1,
_("IP: Empty config file(%s)\n"), cfgfile);
- return (0);
+ return (B_FALSE);
}
if ((fp = fopen(cfgfile, "r")) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: Cannot open configuration file(%s): %s\n"), cfgfile,
strerror(errno));
- return (0);
+ return (B_FALSE);
}
- if ((buf = calloc(1, statb.st_size)) == NULL) {
+ if ((buf = malloc(statb.st_size)) == NULL) {
rcm_log_message(RCM_ERROR,
- _("IP: calloc failure(%s): %s\n"), cfgfile,
+ _("IP: malloc failure(%s): %s\n"), cfgfile,
strerror(errno));
- (void) fclose(fp);
- return (0);
+ goto out;
}
while (fgets(buf, statb.st_size, fp) != NULL) {
- if (*buf == '\0')
- continue;
-
- tokenize(buf, tokens, tspace, &ntok);
- while (ntok) {
- if (STREQ("group", tokens[ntok - 1])) {
- if (tokens[ntok] != NULL) {
- group++;
- }
+ if ((nlp = strrchr(buf, '\n')) != NULL)
+ *nlp = '\0';
+
+ line = buf;
+ while ((token = strtok_r(line, " \t", &lasts)) != NULL) {
+ line = NULL;
+ if (STREQ("group", token) &&
+ strtok_r(NULL, " \t", &lasts) != NULL) {
+ grouped = B_TRUE;
+ goto out;
}
- ntok--;
}
}
-
+out:
free(buf);
-
(void) fclose(fp);
- if (group <= 0) {
- rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s) non-grouped\n",
- cfgfile);
- return (0);
- } else {
- rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s) grouped\n",
- cfgfile);
- return (1);
- }
-}
+ rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s): %d\n", cfgfile,
+ grouped);
+ return (grouped);
+}
/*
- * if_ipmp_config() - Configure an interface instance as specified by the
+ * if_config_inst() - Configure an interface instance as specified by the
* address family af and if it is grouped (ipmp).
*/
static int
-if_ipmp_config(char *ifinst, int af, int ipmp)
+if_config_inst(const char *ifinst, FILE *hfp, int af, boolean_t ipmp)
{
- char cfgfile[MAXPATHLEN]; /* configuration file */
- FILE *fp;
+ FILE *ifparsefp;
struct stat statb;
- char *buf;
- char *tokens[MAXARGS]; /* list of config attributes */
- char tspace[MAXLINE]; /* token space */
- char syscmd[MAX_RECONFIG_SIZE + MAXPATHLEN + 1];
- char grpcmd[MAX_RECONFIG_SIZE + MAXPATHLEN + 1];
- char fstr[8]; /* address family string inet or inet6 */
- int nofailover = 0;
- int newattach = 0;
- int cmdvalid = 0;
- int ntok;
- int n;
- int stdif = 0;
-
- if (ifinst == NULL)
- return (0);
+ char *buf = NULL;
+ char *ifparsebuf = NULL;
+ uint_t ifparsebufsize;
+ const char *fstr; /* address family string */
+ boolean_t stdif = B_FALSE;
- rcm_log_message(RCM_TRACE1, "IP: if_ipmp_config(%s) ipmp = %d\n",
+ rcm_log_message(RCM_TRACE1, "IP: if_config_inst(%s) ipmp = %d\n",
ifinst, ipmp);
- if (af & CONFIG_AF_INET) {
- (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV4,
- ifinst);
- (void) strcpy(fstr, "inet");
- } else if (af & CONFIG_AF_INET6) {
- (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV6,
- ifinst);
- (void) strcpy(fstr, "inet6");
- } else {
- return (0); /* nothing to do */
- }
-
- cfgfile[MAXPATHLEN - 1] = '\0';
- grpcmd[0] = '\0';
-
- if (stat(cfgfile, &statb) != 0) {
- rcm_log_message(RCM_TRACE1,
- "IP: No config file(%s)\n", ifinst);
- return (0);
+ if (fstat(fileno(hfp), &statb) != 0) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot fstat file(%s)\n"), ifinst);
+ goto fail;
}
- /* Config file exists, plumb in the physical interface */
- if (af & CONFIG_AF_INET6) {
- if (if_getcount(AF_INET6) == 0) {
- /*
- * Configure software loopback driver if this is the
- * first IPv6 interface plumbed
- */
- newattach++;
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s lo0 %s plumb ::1 up", USR_SBIN_IFCONFIG, fstr);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot plumb (%s) %s\n"),
- ifinst, strerror(errno));
- return (-1);
- }
- }
- (void) snprintf(syscmd, sizeof (syscmd), "%s %s %s plumb up",
- USR_SBIN_IFCONFIG, ifinst, fstr);
- } else {
- (void) snprintf(syscmd, sizeof (syscmd), "%s %s %s plumb ",
- USR_SBIN_IFCONFIG, ifinst, fstr);
- if (if_getcount(AF_INET) == 0) {
- newattach++;
- }
+ switch (af) {
+ case AF_INET:
+ fstr = "inet";
+ break;
+ case AF_INET6:
+ fstr = "inet6";
+ break;
+ default:
+ assert(0);
}
- rcm_log_message(RCM_TRACE1, "IP: Exec: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot plumb (%s) %s\n"), ifinst, strerror(errno));
- return (-1);
- }
+ /*
+ * The hostname file exists; plumb the physical interface.
+ */
+ if (!ifconfig(ifinst, fstr, "plumb", B_FALSE))
+ goto fail;
- /* Check if config file is empty, if so, nothing else to do */
- if (statb.st_size == 0) {
+ /* Skip static configuration if the hostname file is empty */
+ if (statb.st_size <= 1) {
rcm_log_message(RCM_TRACE1,
- "IP: Zero size config file(%s)\n", ifinst);
- return (0);
+ _("IP: Zero size hostname file(%s)\n"), ifinst);
+ goto configured;
}
- if ((fp = fopen(cfgfile, "r")) == NULL) {
+ if (fseek(hfp, 0, SEEK_SET) == -1) {
rcm_log_message(RCM_ERROR,
- _("IP: Open error(%s): %s\n"), cfgfile, strerror(errno));
- return (-1);
+ _("IP: Cannot rewind hostname file(%s): %s\n"), ifinst,
+ strerror(errno));
+ goto fail;
}
+ /*
+ * Allocate the worst-case single-line buffer sizes. A bit skanky,
+ * but since hostname files are small, this should suffice.
+ */
if ((buf = calloc(1, statb.st_size)) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: calloc(%s): %s\n"), ifinst, strerror(errno));
- (void) fclose(fp);
- return (-1);
+ goto fail;
}
- /* a single line with one token implies a classical if */
- if (fgets(buf, statb.st_size, fp) != NULL) {
- tokenize(buf, tokens, tspace, &ntok);
- if (ntok == 1) {
- rcm_log_message(RCM_TRACE1, "IP: Standard interface\n");
- stdif++;
- }
- }
- if (fseek(fp, 0L, SEEK_SET) == -1) {
- rcm_log_message(RCM_ERROR, _("IP: fseek: %s\n"),
- strerror(errno));
- return (-1);
+ ifparsebufsize = statb.st_size + sizeof (SBIN_IFPARSE " -s inet6 ");
+ if ((ifparsebuf = calloc(1, ifparsebufsize)) == NULL) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: calloc(%s): %s\n"), ifinst, strerror(errno));
+ goto fail;
}
/*
- * Process the config command
- * This loop also handles multiple logical interfaces that may
- * be configured on a single line
+ * For IPv4, determine whether the hostname file consists of a single
+ * line. We need to handle these specially since they should
+ * automatically be suffixed with "netmask + broadcast + up".
*/
- while (fgets(buf, statb.st_size, fp) != NULL) {
- nofailover = 0;
- cmdvalid = 0;
+ if (af == AF_INET &&
+ fgets(buf, statb.st_size, hfp) != NULL &&
+ fgets(buf, statb.st_size, hfp) == NULL) {
+ rcm_log_message(RCM_TRACE1, "IP: one-line hostname file\n");
+ stdif = B_TRUE;
+ }
- if (*buf == '\0')
- continue;
+ if (fseek(hfp, 0L, SEEK_SET) == -1) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot rewind hostname file(%s): %s\n"), ifinst,
+ strerror(errno));
+ goto fail;
+ }
- tokenize(buf, tokens, tspace, &ntok);
- if (ntok <= 0)
+ /*
+ * Loop through the file one line at a time and feed it to ifconfig.
+ * If the interface is using IPMP, then we use /sbin/ifparse -s to
+ * weed out all of the data addresses, since those are already on the
+ * IPMP meta-interface.
+ */
+ while (fgets(buf, statb.st_size, hfp) != NULL) {
+ if (ntok(buf) == 0)
continue;
- /* Reset the config command */
- (void) snprintf(syscmd, sizeof (syscmd), "%s %s %s ",
- USR_SBIN_IFCONFIG, ifinst, fstr);
-
- /* No parsing if this is first interface of its kind */
- if (newattach) {
- (void) strcat(syscmd, buf);
- /* Classic if */
- if ((af & CONFIG_AF_INET) && (stdif == 1)) {
- (void) strcat(syscmd, CFG_CMDS_STD);
- }
- rcm_log_message(RCM_TRACE1, "IP: New: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Error: %s (%s): %s\n"),
- syscmd, ifinst, strerror(errno));
- }
+ if (!ipmp) {
+ (void) ifconfig(ifinst, fstr, buf, stdif);
continue;
}
- /* Parse the tokens to determine nature of the interface */
- for (n = 0; n < ntok; n++) {
- /* Handle pathological failover cases */
- if (STREQ("-failover", tokens[n]))
- nofailover++;
- if (STREQ("failover", tokens[n]))
- nofailover--;
-
- /* group attribute requires special processing */
- if (STREQ("group", tokens[n])) {
- if (tokens[n + 1] != NULL) {
- (void) snprintf(grpcmd, sizeof (grpcmd),
- "%s %s %s %s %s", USR_SBIN_IFCONFIG,
- ifinst, fstr,
- tokens[n], tokens[n + 1]);
- n++; /* skip next token */
- continue;
- }
- }
-
- /* Execute buffered command ? */
- if (STREQ("set", tokens[n]) ||
- STREQ("addif", tokens[n]) ||
- STREQ("removeif", tokens[n]) ||
- (n == (ntok -1))) {
-
- /* config command complete ? */
- if (n == (ntok -1)) {
- ADDSPACE(syscmd);
- (void) strcat(syscmd, tokens[n]);
- cmdvalid++;
- }
-
- if (!cmdvalid) {
- ADDSPACE(syscmd);
- (void) strcat(syscmd, tokens[n]);
- cmdvalid++;
- continue;
- }
- /* Classic if ? */
- if ((af & CONFIG_AF_INET) && (stdif == 1)) {
- (void) strcat(syscmd, CFG_CMDS_STD);
- }
-
- if (nofailover > 0) {
- rcm_log_message(RCM_TRACE1,
- "IP: Interim exec: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: %s fail(%s): %s\n"),
- syscmd, ifinst,
- strerror(errno));
- }
- } else {
- /* Have mpathd configure the address */
- if (if_mpathd_configure(syscmd, ifinst,
- af, ipmp) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: %s fail(%s): %s\n"),
- syscmd, ifinst,
- strerror(errno));
- }
- }
-
- /* Reset config command */
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s %s ", USR_SBIN_IFCONFIG, ifinst,
- fstr);
- nofailover = 0;
- cmdvalid = 0;
- }
- /*
- * Note: No explicit command validation is required
- * since ifconfig to does it for us
- */
- ADDSPACE(syscmd);
- (void) strcat(syscmd, tokens[n]);
- cmdvalid++;
- }
- }
-
- free(buf);
- (void) fclose(fp);
-
- /*
- * The group name needs to be set after all the test/nofailover
- * addresses have been configured. Otherwise, if IPMP detects that the
- * interface is failed, the addresses will be moved to a working
- * interface before the '-failover' flag can be set.
- */
- if (grpcmd[0] != '\0') {
- rcm_log_message(RCM_TRACE1, "IP: set group name: %s\n", grpcmd);
- if (rcm_exec_cmd(grpcmd) != 0) {
- rcm_log_message(RCM_ERROR, _("IP: %s fail(%s): %s\n"),
- grpcmd, ifinst, strerror(errno));
+ (void) snprintf(ifparsebuf, ifparsebufsize, SBIN_IFPARSE
+ " -s %s %s", fstr, buf);
+ if ((ifparsefp = popen(ifparsebuf, "r")) == NULL) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: cannot configure %s: popen \"%s\" "
+ "failed: %s\n"), ifinst, buf, strerror(errno));
+ goto fail;
}
- }
- rcm_log_message(RCM_TRACE1, "IP: if_ipmp_config(%s) success\n", ifinst);
-
- return (0);
-}
-
-/*
- * if_mpathd_configure() - Determine configuration disposition of the interface
- */
-static int
-if_mpathd_configure(char *syscmd, char *ifinst, int af, int ipmp)
-{
- char *tokens[MAXARGS];
- char tspace[MAXLINE];
- int ntok;
- char *addr;
- char *from_lifname;
- mpathd_cmd_t mpdcmd;
- int n;
-
- rcm_log_message(RCM_TRACE1, "IP: if_mpathd_configure(%s): %s\n",
- ifinst, syscmd);
-
- tokenize(syscmd, tokens, tspace, &ntok);
- if (ntok <= 0)
- return (0);
-
- addr = tokens[3]; /* by default, third token is valid address */
- for (n = 0; n < ntok; n++) {
- if (STREQ("set", tokens[n]) ||
- STREQ("addif", tokens[n])) {
- addr = tokens[n+1];
- if (addr == NULL) { /* invalid format */
- return (-1);
- } else
- break;
+ while (fgets(buf, statb.st_size, ifparsefp) != NULL) {
+ if (ntok(buf) > 0)
+ (void) ifconfig(ifinst, fstr, buf, stdif);
}
- }
- /* Check std. commands or no failed over address */
- if (STREQ("removeif", addr) || STREQ("group", addr) ||
- ((from_lifname = get_mpathd_dest(addr, af)) == NULL)) {
- rcm_log_message(RCM_TRACE1,
- "IP: No failed-over host, exec %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
+ if (pclose(ifparsefp) == -1) {
rcm_log_message(RCM_ERROR,
- _("IP: %s failed(%s): %s\n"),
- syscmd, ifinst, strerror(errno));
- return (-1);
+ _("IP: cannot configure %s: pclose \"%s\" "
+ "failed: %s\n"), ifinst, buf, strerror(errno));
+ goto fail;
}
- return (0);
- }
-
- /* Check for non-IPMP failover scenarios */
- if ((ipmp <= 0) && (from_lifname != NULL)) {
- /* Address already hosted on another NIC, return */
- rcm_log_message(RCM_TRACE1,
- "IP: Non-IPMP failed-over host(%s): %s\n",
- ifinst, addr);
- return (0);
}
+configured:
/*
- * Valid failed-over host; have mpathd set the original index
+ * Bring up the interface (it may already be up)
+ *
+ * Technically, since the boot scripts only unconditionally bring up
+ * IPv6 interfaces, we should only unconditionally bring up IPv6 here.
+ * However, if we don't bring up IPv4, and a legacy IPMP configuration
+ * without test addresses is being used, we will never bring the
+ * interface up even though we would've at boot. One fix is to check
+ * if the IPv4 hostname file contains data addresses that we would've
+ * brought up, but there's no simple way to do that. Given that it's
+ * rare to have persistent IP configuration for an interface that
+ * leaves it down, we cheap out and always bring it up for IPMP.
*/
- mpdcmd.cmd_command = MI_SETOINDEX;
- (void) strcpy(mpdcmd.from_lifname, from_lifname);
- (void) strcpy(mpdcmd.to_pifname, ifinst);
- if (af & CONFIG_AF_INET6) {
- mpdcmd.addr_family = AF_INET6;
- } else {
- mpdcmd.addr_family = AF_INET;
- }
-
- /* Send command to in.mpathd(1M) */
- rcm_log_message(RCM_TRACE1,
- "IP: Attempting setoindex from (%s) to (%s) ....\n",
- from_lifname, ifinst);
-
- if (mpathd_send_cmd(&mpdcmd) < 0) {
- rcm_log_message(RCM_TRACE1,
- "IP: mpathd set original index unsuccessful: %s\n",
- strerror(errno));
- return (-1);
- }
-
- rcm_log_message(RCM_TRACE1,
- "IP: setoindex success (%s) to (%s)\n",
- from_lifname, ifinst);
-
- return (0);
-}
-
-/*
- * get_mpathd_dest() - Return current destination for lif; caller is
- * responsible to free memory allocated for address
- */
-static char *
-get_mpathd_dest(char *addr, int family)
-{
- int sock;
- char *buf;
- struct lifnum lifn;
- struct lifconf lifc;
- struct lifreq *lifrp;
- sa_family_t af = AF_INET; /* IPv4 by default */
- int i;
- struct lifreq lifreq;
- struct sockaddr_in *sin;
- struct sockaddr_in6 *sin6;
- struct hostent *hp;
- char *ifname = NULL;
- char *prefix = NULL;
- char addrstr[INET6_ADDRSTRLEN];
- char ifaddr[INET6_ADDRSTRLEN];
- int err;
-
- if (addr == NULL) {
- return (NULL);
- }
-
- rcm_log_message(RCM_TRACE2, "IP: get_mpathd_dest(%s)\n", addr);
-
- if (family & CONFIG_AF_INET6) {
- af = AF_INET6;
- } else {
- af = AF_INET;
- }
-
- if ((sock = socket(af, SOCK_DGRAM, 0)) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: failure opening %s socket: %s\n"),
- af == AF_INET6 ? "IPv6" : "IPv4", strerror(errno));
- return (NULL);
- }
-
- lifn.lifn_family = af;
- lifn.lifn_flags = 0;
- if (ioctl(sock, SIOCGLIFNUM, (char *)&lifn) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCLGIFNUM failed: %s\n"),
- strerror(errno));
- (void) close(sock);
- return (NULL);
- }
-
- if ((buf = calloc(lifn.lifn_count, sizeof (struct lifreq))) == NULL) {
- rcm_log_message(RCM_ERROR, _("IP: calloc: %s\n"),
- strerror(errno));
- (void) close(sock);
- return (NULL);
- }
-
- lifc.lifc_family = af;
- lifc.lifc_flags = 0;
- lifc.lifc_len = sizeof (struct lifreq) * lifn.lifn_count;
- lifc.lifc_buf = buf;
-
- if (ioctl(sock, SIOCGLIFCONF, (char *)&lifc) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFCONF failed: %s\n"),
- strerror(errno));
- free(buf);
- (void) close(sock);
- return (NULL);
- }
+ if ((af == AF_INET6 || ipmp) && !ifconfig(ifinst, fstr, "up", B_FALSE))
+ goto fail;
- /* Filter out prefix address from netmask */
- (void) strcpy(ifaddr, addr);
- if ((prefix = strchr(ifaddr, '/')) != NULL) {
- *prefix = '\0'; /* We care about the address part only */
- }
+ /*
+ * For IPv4, if a DHCP configuration file exists, have DHCP configure
+ * the interface. As with the boot scripts, this is done after the
+ * hostname files are processed so that configuration in those files
+ * (such as IPMP group names) will be applied first.
+ */
+ if (af == AF_INET) {
+ char dhcpfile[MAXPATHLEN];
+ char *dhcpbuf;
+ off_t i, dhcpsize;
- /* Check for aliases */
- hp = getipnodebyname(ifaddr, af, AI_DEFAULT, &err);
- if (hp) {
- if (inet_ntop(af, (void *)hp->h_addr_list[0],
- ifaddr, sizeof (ifaddr)) == NULL) {
- /* Restore original address and use it */
- (void) strcpy(ifaddr, addr);
- if ((prefix = strchr(ifaddr, '/')) != NULL) {
- *prefix = '\0';
- }
- }
- freehostent(hp);
- }
- rcm_log_message(RCM_TRACE2, "IP: ifaddr(%s) = %s\n", addr, ifaddr);
+ (void) snprintf(dhcpfile, MAXPATHLEN, DHCPFILE_FMT, ifinst);
+ if (stat(dhcpfile, &statb) == -1)
+ goto out;
- /* now search the interfaces */
- lifrp = lifc.lifc_req;
- for (i = 0; i < lifn.lifn_count; i++, lifrp++) {
- (void) strcpy(lifreq.lifr_name, lifrp->lifr_name);
- /* Get the interface address for this interface */
- if (ioctl(sock, SIOCGLIFADDR, (char *)&lifreq) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFADDR: %s\n"), strerror(errno));
- free(buf);
- (void) close(sock);
- return (NULL);
- }
-
- if (af == AF_INET6) {
- sin6 = (struct sockaddr_in6 *)&lifreq.lifr_addr;
- if (inet_ntop(AF_INET6, (void *)&sin6->sin6_addr,
- addrstr, sizeof (addrstr)) == NULL) {
- continue;
- }
- } else {
- sin = (struct sockaddr_in *)&lifreq.lifr_addr;
- if (inet_ntop(AF_INET, (void *)&sin->sin_addr,
- addrstr, sizeof (addrstr)) == NULL) {
- continue;
- }
+ if ((dhcpbuf = copylist(dhcpfile, &dhcpsize)) == NULL) {
+ rcm_log_message(RCM_ERROR, _("IP: cannot read "
+ "(%s): %s\n"), dhcpfile, strerror(errno));
+ goto fail;
}
- if (STREQ(addrstr, ifaddr)) {
- /* Allocate memory to hold interface name */
- if ((ifname = (char *)malloc(LIFNAMSIZ)) == NULL) {
- rcm_log_message(RCM_ERROR,
- _("IP: malloc: %s\n"), strerror(errno));
- free(buf);
- (void) close(sock);
- return (NULL);
- }
-
- /* Copy the interface name */
- /*
- * (void) memcpy(ifname, lifrp->lifr_name,
- * sizeof (ifname));
- * ifname[sizeof (ifname) - 1] = '\0';
- */
- (void) strcpy(ifname, lifrp->lifr_name);
- break;
+ /*
+ * The copylist() API converts \n's to \0's, but we want them
+ * to be spaces.
+ */
+ if (dhcpsize > 0) {
+ for (i = 0; i < dhcpsize; i++)
+ if (dhcpbuf[i] == '\0')
+ dhcpbuf[i] = ' ';
+ dhcpbuf[dhcpsize - 1] = '\0';
}
+ (void) ifconfig(ifinst, CFG_DHCP_CMD, dhcpbuf, B_FALSE);
+ free(dhcpbuf);
}
-
- (void) close(sock);
+out:
+ free(ifparsebuf);
free(buf);
-
- if (ifname == NULL)
- rcm_log_message(RCM_TRACE2, "IP: get_mpathd_dest(%s): none\n",
- addr);
- else
- rcm_log_message(RCM_TRACE2, "IP: get_mpathd_dest(%s): %s\n",
- addr, ifname);
-
- return (ifname);
-}
-
-static int
-if_getcount(int af)
-{
- int sock;
- struct lifnum lifn;
-
- rcm_log_message(RCM_TRACE1, "IP: if_getcount\n");
-
- if ((sock = socket(af, SOCK_DGRAM, 0)) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: failure opening %s socket: %s\n"),
- af == AF_INET6 ? "IPv6" : "IPv4", strerror(errno));
- return (-1);
- }
-
- lifn.lifn_family = af;
- lifn.lifn_flags = 0;
- if (ioctl(sock, SIOCGLIFNUM, (char *)&lifn) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCLGIFNUM failed: %s\n"),
- strerror(errno));
- (void) close(sock);
- return (-1);
- }
- (void) close(sock);
-
- rcm_log_message(RCM_TRACE1, "IP: if_getcount success: %d\n",
- lifn.lifn_count);
-
- return (lifn.lifn_count);
+ rcm_log_message(RCM_TRACE1, "IP: if_config_inst(%s) success\n", ifinst);
+ return (0);
+fail:
+ free(ifparsebuf);
+ free(buf);
+ rcm_log_message(RCM_ERROR, "IP: if_config_inst(%s) failure\n", ifinst);
+ return (-1);
}
/*
- * tokenize() - turn a command line into tokens; caller is responsible to
- * provide enough memory to hold all tokens
+ * ntok() - count the number of tokens in the provided buffer.
*/
-static void
-tokenize(char *line, char **tokens, char *tspace, int *ntok)
+static uint_t
+ntok(const char *cp)
{
- char *cp;
- char *sp;
+ uint_t ntok = 0;
- sp = tspace;
- cp = line;
- for (*ntok = 0; *ntok < MAXARGS; (*ntok)++) {
- tokens[*ntok] = sp;
+ for (;;) {
while (ISSPACE(*cp))
cp++;
+
if (ISEOL(*cp))
break;
+
do {
- *sp++ = *cp++;
+ cp++;
} while (!ISSPACE(*cp) && !ISEOL(*cp));
- *sp++ = '\0';
+ ntok++;
+ }
+ return (ntok);
+}
+
+static boolean_t
+ifconfig(const char *ifinst, const char *fstr, const char *buf, boolean_t stdif)
+{
+ char syscmd[MAX_RECONFIG_SIZE + MAXPATHLEN + 1];
+ int status;
+
+ (void) snprintf(syscmd, sizeof (syscmd), SBIN_IFCONFIG " %s %s %s",
+ ifinst, fstr, buf);
+
+ if (stdif)
+ (void) strlcat(syscmd, CFG_CMDS_STD, sizeof (syscmd));
+
+ rcm_log_message(RCM_TRACE1, "IP: Exec: %s\n", syscmd);
+ if ((status = rcm_exec_cmd(syscmd)) != 0) {
+ if (WIFEXITED(status)) {
+ rcm_log_message(RCM_ERROR, _("IP: \"%s\" failed with "
+ "exit status %d\n"), syscmd, WEXITSTATUS(status));
+ } else {
+ rcm_log_message(RCM_ERROR, _("IP: Error: %s: %s\n"),
+ syscmd, strerror(errno));
+ }
+ return (B_FALSE);
}
+ return (B_TRUE);
}
diff --git a/usr/src/cmd/svc/milestone/net-init b/usr/src/cmd/svc/milestone/net-init
index 26b295dce9..7f0804af67 100644
--- a/usr/src/cmd/svc/milestone/net-init
+++ b/usr/src/cmd/svc/milestone/net-init
@@ -20,11 +20,9 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# This is the second phase of TCP/IP configuration. The first part is
# run by the svc:/network/physical service and includes configuring the
# interfaces and setting the machine's hostname. The svc:/network/initial
@@ -52,10 +50,11 @@ if [ -f /etc/inet/ipaddrsel.conf ]; then
fi
#
-# Now that /usr is mounted, see if in.mpathd needs to be started by firing it
-# up in "adopt" mode; if there are no interfaces it needs to manage, it will
-# automatically exit. Note that it may already be running if we're not
-# executing as part of system boot.
+# If explicit IPMP groups are being used, in.mpathd will already be started.
+# However, if TRACK_INTERFACES_ONLY_WITH_GROUPS=no and no explicit IPMP
+# groups have been configured, then it still needs to be started. So, fire
+# it up in "adopt" mode; if there are no interfaces it needs to manage, it
+# will automatically exit.
#
/usr/bin/pgrep -x -u 0 -z `smf_zonename` in.mpathd >/dev/null 2>&1 || \
/usr/lib/inet/in.mpathd -a
diff --git a/usr/src/cmd/svc/milestone/net-loopback b/usr/src/cmd/svc/milestone/net-loopback
index 3bd5a0f525..d07afd4ada 100644
--- a/usr/src/cmd/svc/milestone/net-loopback
+++ b/usr/src/cmd/svc/milestone/net-loopback
@@ -20,10 +20,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
. /lib/svc/share/smf_include.sh
@@ -36,14 +35,6 @@
smf_configure_ip || exit $SMF_EXIT_OK
#
-# Cause ifconfig to not automatically start in.mpathd when IPMP groups are
-# configured. This is not strictly necessary but makes it so that in.mpathd
-# will always be started explicitly from /lib/svc/method/net-init (the
-# svc:/network/initial service), when we're sure that /usr is mounted.
-#
-SUNW_NO_MPATHD=; export SUNW_NO_MPATHD
-
-#
# Before any interfaces are configured, we need to set the system
# default IP forwarding behavior. This will be the setting for
# interfaces that don't modify the per-interface setting with the
diff --git a/usr/src/cmd/svc/milestone/net-physical b/usr/src/cmd/svc/milestone/net-physical
index 8530806768..bc74c2a206 100644
--- a/usr/src/cmd/svc/milestone/net-physical
+++ b/usr/src/cmd/svc/milestone/net-physical
@@ -20,7 +20,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T.
@@ -38,22 +38,9 @@
#
smf_configure_ip || exit $SMF_EXIT_OK
-# Print warnings to console
-warn_failed_ifs() {
- echo "Failed to $1 interface(s): $2" >/dev/msglog
-}
-
# Make sure that the libraries essential to this stage of booting can be found.
LD_LIBRARY_PATH=/lib; export LD_LIBRARY_PATH
-#
-# Cause ifconfig to not automatically start in.mpathd when IPMP groups are
-# configured. This is not strictly necessary but makes it so that in.mpathd
-# will always be started explicitly from /etc/init.d/inetinit, when we're
-# sure that /usr is mounted.
-#
-SUNW_NO_MPATHD=; export SUNW_NO_MPATHD
-
smf_netstrategy
if smf_is_globalzone; then
@@ -127,13 +114,18 @@ if [ "$interface_names" != "/etc/hostname.*[0-9]" ]; then
IFS="$ORIGIFS"
while [ $# -ge 2 ]; do
shift
- if [ $# -gt 1 -a "$2" != "/etc/hostname" ]; then
- while [ $# -gt 1 -a "$1" != "/etc/hostname" ]; do
- shift
- done
- else
- inet_list="$inet_list $1"
+ intf_name=$1
+ while [ $# -gt 1 -a "$2" != "/etc/hostname" ]; do
+ intf_name="$intf_name.$2"
shift
+ done
+ shift
+
+ read one rest < /etc/hostname.$intf_name
+ if [ "$one" = ipmp ]; then
+ ipmp_list="$ipmp_list $intf_name"
+ else
+ inet_list="$inet_list $intf_name"
fi
done
fi
@@ -151,17 +143,38 @@ if [ "$interface_names" != "/etc/hostname6.*[0-9]" ]; then
IFS="$ORIGIFS"
while [ $# -ge 2 ]; do
shift
- if [ $# -gt 1 -a "$2" != "/etc/hostname6" ]; then
- while [ $# -gt 1 -a "$1" != "/etc/hostname6" ]; do
- shift
- done
- else
- inet6_list="$inet6_list $1"
+ intf_name=$1
+ while [ $# -gt 1 -a "$2" != "/etc/hostname6" ]; do
+ intf_name="$intf_name.$2"
shift
+ done
+ shift
+
+ read one rest < /etc/hostname6.$intf_name
+ if [ "$one" = ipmp ]; then
+ ipmp6_list="$ipmp6_list $intf_name"
+ else
+ inet6_list="$inet6_list $intf_name"
fi
done
fi
+#
+# Create all of the IPv4 IPMP interfaces.
+#
+if [ -n "$ipmp_list" ]; then
+ set -- $ipmp_list
+ while [ $# -gt 0 ]; do
+ if /sbin/ifconfig $1 ipmp; then
+ ipmp_created="$ipmp_created $1"
+ else
+ ipmp_failed="$ipmp_failed $1"
+ fi
+ shift
+ done
+ [ -n "$ipmp_failed" ] && warn_failed_ifs "create IPv4 IPMP" \
+ "$ipmp_failed"
+fi
#
# Step through the IPv4 interface list and try to plumb every interface.
@@ -178,7 +191,7 @@ if [ -n "$inet_list" ]; then
fi
shift
done
- [ -n "$inet_failed" ] && warn_failed_ifs "plumb IPv4" $inet_failed
+ [ -n "$inet_failed" ] && warn_failed_ifs "plumb IPv4" "$inet_failed"
fi
# Run autoconf to connect to a WLAN if the interface is a wireless one
@@ -209,7 +222,24 @@ if [ -n "$inet6_list" ]; then
fi
shift
done
- [ -n "$inet6_failed" ] && warn_failed_ifs "plumb IPv6" $inet6_failed
+ [ -n "$inet6_failed" ] && warn_failed_ifs "plumb IPv6" "$inet6_failed"
+fi
+
+#
+# Create all of the IPv6 IPMP interfaces.
+#
+if [ -n "$ipmp6_list" ]; then
+ set -- $ipmp6_list
+ while [ $# -gt 0 ]; do
+ if /sbin/ifconfig $1 inet6 ipmp; then
+ ipmp6_created="$ipmp6_created $1"
+ else
+ ipmp6_failed="$ipmp6_failed $1"
+ fi
+ shift
+ done
+ [ -n "$ipmp6_failed" ] && warn_failed_ifs "create IPv6 IPMP" \
+ "$ipmp6_failed"
fi
if smf_is_globalzone; then
@@ -224,49 +254,24 @@ if smf_is_globalzone; then
fi
#
-# Process the /etc/hostname.* files of plumbed IPv4 interfaces. If an
-# /etc/hostname file is not present or is empty, the ifconfig auto-dhcp
-# / auto-revarp command will attempt to set the address, later.
+# Process the /etc/hostname[6].* files for IPMP interfaces. Processing these
+# before non-IPMP interfaces avoids accidental implicit IPMP group creation.
+#
+[ -n "$ipmp_created" ] && if_configure inet "IPMP" $ipmp_created
+[ -n "$ipmp6_created" ] && if_configure inet6 "IPMP" $ipmp6_created
+
#
-# If /etc/hostname.lo0 exists the loop below will do additional
-# configuration of lo0.
+# Process the /etc/hostname[6].* files for non-IPMP interfaces.
#
-if [ -n "$inet_plumbed" ]; then
- i4s_fail=
- echo "configuring IPv4 interfaces:\c"
- set -- $inet_plumbed
- while [ $# -gt 0 ]; do
- inet_process_hostname /sbin/ifconfig $1 inet \
- </etc/hostname.$1 >/dev/null
- [ $? != 0 ] && i4s_fail="$i4s_fail $1"
- echo " $1\c"
- shift
- done
- echo "."
- [ -n "$i4s_fail" ] && warn_failed_ifs "configure IPv4" $i4s_fail
-fi
+[ -n "$inet_plumbed" ] && if_configure inet "" $inet_plumbed
+[ -n "$inet6_plumbed" ] && if_configure inet6 "" $inet6_plumbed
#
-# Process the /etc/hostname6.* files of plumbed IPv6 interfaces. After
-# processing the hostname6 file, bring the interface up. If
-# /etc/hostname6.lo0 exists the loop below will do additional
-# configuration of lo0.
+# For the IPv4 and IPv6 interfaces that failed to plumb, find (or create)
+# IPMP meta-interfaces to host their data addresses.
#
-if [ -n "$inet6_plumbed" ]; then
- i6_fail=
- echo "configuring IPv6 interfaces:\c"
- set -- $inet6_plumbed
- while [ $# -gt 0 ]; do
- inet6_process_hostname /sbin/ifconfig $1 inet6 \
- </etc/hostname6.$1 >/dev/null &&
- /sbin/ifconfig $1 inet6 up
- [ $? != 0 ] && i6_fail="$i6_fail $1"
- echo " $1\c"
- shift
- done
- echo "."
- [ -n "$i6_fail" ] && warn_failed_ifs "configure IPv6" $i6_fail
-fi
+[ -n "$inet_failed" ] && move_addresses inet
+[ -n "$inet6_failed" ] && move_addresses inet6
# Run DHCP if requested. Skip boot-configured interface.
interface_names="`echo /etc/dhcp.*[0-9] 2>/dev/null`"
@@ -326,7 +331,7 @@ if [ "$interface_names" != '/etc/dhcp.*[0-9]' ]; then
done
IFS="$ORIGIFS"
unset ORIGIFS
- [ -n "$i4d_fail" ] && warn_failed_ifs "configure IPv4 DHCP" $i4d_fail
+ [ -n "$i4d_fail" ] && warn_failed_ifs "configure IPv4 DHCP" "$i4d_fail"
fi
# In order to avoid bringing up the interfaces that have
@@ -338,14 +343,6 @@ if [ "$_INIT_NET_STRATEGY" = "rarp" -o -z "$hostname" ]; then
fi
#
-# Process IPv4 and IPv6 interfaces that failed to plumb. Find an
-# alternative interface to host the addresses.
-#
-[ -n "$inet_failed" ] && move_addresses inet
-
-[ -n "$inet6_failed" ] && move_addresses inet6
-
-#
# If the /etc/defaultrouter file exists, process it now so that the next
# stage of booting will have access to NFS.
#
diff --git a/usr/src/cmd/svc/shell/net_include.sh b/usr/src/cmd/svc/shell/net_include.sh
index 51c87a40a8..71dc6a8256 100644
--- a/usr/src/cmd/svc/shell/net_include.sh
+++ b/usr/src/cmd/svc/shell/net_include.sh
@@ -20,13 +20,18 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T.
# All rights reserved.
#
+# Print warnings to console
+warn_failed_ifs() {
+ echo "Failed to $1 interface(s):$2" >/dev/msglog
+}
+
#
# shcat file
# Simulates cat in sh so it doesn't need to be on the root filesystem.
@@ -41,20 +46,28 @@ shcat() {
}
#
-# Inet_list, list of IPv4 interfaces.
-# Inet_plumbed, list of plumbed IPv4 interfaces.
-# Inet_failed, list of IPv4 interfaces that failed to plumb.
-# Inet6_list, list of IPv6 interfaces.
-# Inet6_plumbed, list of plumbed IPv6 interfaces.
-# Inet6_failed, list of IPv6 interfaces that failed to plumb.
+# inet_list list of IPv4 interfaces.
+# inet6_list list of IPv6 interfaces.
+# ipmp_list list of IPMP IPv4 interfaces.
+# ipmp6_list list of IPMP IPv6 interfaces.
+# inet_plumbed list of plumbed IPv4 interfaces.
+# inet6_plumbed list of plumbed IPv6 interfaces.
+# ipmp_created list of created IPMP IPv4 interfaces.
+# ipmp6_created list of created IPMP IPv6 interfaces.
+# inet_failed list of IPv4 interfaces that failed to plumb.
+# inet6_failed list of IPv6 interfaces that failed to plumb.
+# ipmp_failed list of IPMP IPv4 interfaces that failed to be created.
+# ipmp6_failed list of IPMP IPv6 interfaces that failed to be created.
#
unset inet_list inet_plumbed inet_failed \
- inet6_list inet6_plumbed inet6_failed
+ inet6_list inet6_plumbed inet6_failed \
+ ipmp_list ipmp_created ipmp_failed \
+ ipmp6_list ipmp6_created ipmp6_failed
+
#
# get_physical interface
#
-# Return physical interface corresponding to the given logical
-# interface.
+# Return physical interface corresponding to the given interface.
#
get_physical()
{
@@ -70,7 +83,7 @@ get_physical()
# get_logical interface
#
# Return logical interface number. Zero will be returned
-# if there is no explicit logical device number.
+# if there is no explicit logical number.
#
get_logical()
{
@@ -89,19 +102,18 @@ get_logical()
#
# if_comp if1 if2
#
-# Compare Interfaces. Do the physical interface names and logical interface
+# Compare interfaces. Do the physical interface names and logical interface
# numbers match?
#
if_comp()
{
- [ "`get_physical $1`" = "`get_physical $2`" ] && \
- [ `get_logical $1` -eq `get_logical $2` ]
+ physical_comp $1 $2 && [ `get_logical $1` -eq `get_logical $2` ]
}
-
+
#
# physical_comp if1 if2
#
-# Do the two devices share a physical interface?
+# Do the two interfaces share a physical interface?
#
physical_comp()
{
@@ -129,19 +141,110 @@ in_list()
}
#
-# get_group_from_hostname interface type
+# get_inactive_ifname groupname
+#
+# Return the name of an inactive interface in `groupname', if one exists.
+#
+get_inactive_ifname()
+{
+ ORIGIFS="$IFS"
+ /sbin/ipmpstat -gP -o groupname,interfaces |
+ while IFS=: read groupname ifnames; do
+ #
+ # Skip other IPMP groups.
+ #
+ [ "$groupname" != "$1" ] && continue
+
+ #
+ # Standby interfaces are always enclosed in ()'s, so look
+ # for the first interface name starting with a "(", and
+ # strip those off.
+ #
+ IFS=" "
+ for ifname in $ifnames; do
+ case "$ifname" in
+ '('*) IFS="()"
+ echo $ifname
+ IFS="$ORIGIFS"
+ return
+ ;;
+ *) ;;
+ esac
+ done
+ done
+ IFS="$ORIGIFS"
+}
+
+#
+# get_groupifname groupname
+#
+# Return the IPMP meta-interface name for the group, if it exists.
+#
+get_groupifname()
+{
+ /sbin/ipmpstat -gP -o groupname,group | while IFS=: read name ifname; do
+ if [ "$name" = "$1" ]; then
+ echo "$ifname"
+ return
+ fi
+ done
+}
+
+#
+# create_ipmp ifname groupname type
+#
+# Helper function for create_groupifname() that returns zero if it's able
+# to create an IPMP interface of the specified type and place it in the
+# specified group, or non-zero otherwise.
+#
+create_ipmp()
+{
+ /sbin/ifconfig $1 >/dev/null 2>&1 && return 1
+ /sbin/ifconfig $1 inet6 >/dev/null 2>&1 && return 1
+ /sbin/ifconfig $1 $3 ipmp group $2 2>/dev/null
+}
+
+#
+# create_groupifname groupname type
+#
+# Create an IPMP meta-interface name for the group. We only use this
+# function if all of the interfaces in the group failed at boot and there
+# were no /etc/hostname[6].<if> files for the IPMP meta-interface.
+#
+create_groupifname()
+{
+ #
+ # This is a horrible way to count from 0 to 999, but in sh and
+ # without necessarily having /usr mounted, what else can we do?
+ #
+ for a in "" 1 2 3 4 5 6 7 8 9; do
+ for b in 0 1 2 3 4 5 6 7 8 9; do
+ for c in 0 1 2 3 4 5 6 7 8 9; do
+ # strip leading zeroes
+ [ "$a" = "" ] && [ "$b" = 0 ] && b=""
+ if create_ipmp ipmp$a$b$c $1 $2; then
+ echo ipmp$a$b$c
+ return
+ fi
+ done
+ done
+ done
+}
+
+#
+# get_hostname_ipmpinfo interface type
#
-# Return all group settings from hostname file for a given interface.
+# Return all requested IPMP keywords from hostname file for a given interface.
#
# Example:
-# get_group_from_hostname hme0 inet
+# get_hostname_ipmpinfo hme0 inet keyword [ keyword ... ]
#
-get_group_from_hostname()
+get_hostname_ipmpinfo()
{
case "$2" in
- inet) file=/etc/hostname.$1
+ inet) file=/etc/hostname.$1
;;
- inet6) file=/etc/hostname6.$1
+ inet6) file=/etc/hostname6.$1
;;
*)
return
@@ -150,16 +253,21 @@ get_group_from_hostname()
[ -r "$file" ] || return
+ type=$2
+ shift 2
+
#
- # Read through the hostname file looking for group settings
- # There may be several group settings in the file. It is up
- # to the caller to pick the right one (i.e. the last one).
+ # Read through the hostname file looking for the specified
+ # keywords. Since there may be several keywords that cancel
+ # each other out, the caller must post-process as appropriate.
#
while read line; do
[ -z "$line" ] && continue
- /sbin/ifparse -s "$2" $line
- done < "$file" | while read one two three; do
- [ "$one" = "group" ] && echo "$two"
+ /sbin/ifparse -s "$type" $line
+ done < "$file" | while read one two; do
+ for keyword in "$@"; do
+ [ "$one" = "$keyword" ] && echo "$one $two"
+ done
done
}
@@ -174,7 +282,6 @@ get_group_from_hostname()
get_group_for_type()
{
physical=`get_physical $1`
-
type=$2
group=""
@@ -183,184 +290,77 @@ get_group_for_type()
# the reason for the second while loop.
#
shift 2
- while [ $# -gt 0 ]; do
- if if_comp "$physical" $1; then
- get_group_from_hostname $1 $type
+ for ifname in "$@"; do
+ if if_comp "$physical" $ifname; then
+ get_hostname_ipmpinfo $ifname $type group
fi
- shift
done | while :; do
- read next || {
+ read keyword grname || {
echo "$group"
break
}
- group="$next"
+ group="$grname"
done
}
#
-# get_group interface [ configured | failed ]
-#
-# If there is both an inet and inet6 version of an interface, the group
-# could be set in either set of hostname files.
-#
-# Inet6 is configured after inet, so if the group is set in both
-# sets of hostname files, the inet6 file wins.
-#
-# The "configured" argument should be used to get the group for
-# an interface that has been plumbed into the stack and configured. Use
-# the "failed" argument to get the group for an interface that failed to
-# plumb.
-#
-get_group()
-{
- group=""
-
- case "$2" in
- configured)
- group=`get_group_for_type $1 inet6 $inet6_plumbed`
- ;;
- failed)
- group=`get_group_for_type $1 inet6 $inet6_list`
- ;;
- *)
- return
- ;;
- esac
-
- if [ -z "$group" ]; then
- if [ "$2" = configured ]; then
- group=`get_group_for_type $1 inet $inet_plumbed`
- else
- group=`get_group_for_type $1 inet $inet_list`
- fi
- fi
-
- echo $group
-}
-
-#
-# get_standby_from_hostname interface type
-#
-# Return any "standby" or "-standby" flags in the hostname file.
-#
-# Example:
-# get_standby_from_hostname hme0 inet6
-#
-#
-get_standby_from_hostname()
-{
- case "$2" in
- inet) file=/etc/hostname.$1
- ;;
- inet6) file=/etc/hostname6.$1
- ;;
- *)
- return
- ;;
- esac
-
- [ -r "$file" ] || return
-
- #
- # There may be several instances of the "standby" and
- # "-standby" flags in the hostname file. It is up to
- # the caller to pick the correct one.
- #
- while read line; do
- [ -z "$line" ] && continue
- /sbin/ifparse -s "$2" $line
- done < "$file" | while read one two; do
- [ "$one" = "standby" ] || [ "$one" = "-standby" ] \
- && echo "$one"
- done
-}
-
-#
-# get_standby_for_type interface type plumbed_list
+# get_standby_for_type interface type list
#
# Look through the set of hostname files associated with the same physical
-# interface as "interface", and determine whether they would configure
-# the interface as a standby interface.
+# interface as "interface", and print the standby value ("standby",
+# "-standby", or nothing). Only hostname files associated with the
+# physical interface or logical interface zero can set this flag.
#
get_standby_for_type()
{
-
physical=`get_physical $1`
type=$2
- final=""
-
#
- # The last "standby" or "-standby" flag is the one that counts,
- # which is the reason for the second while loop.
+ # The last setting of "standby" or "-standby" is the one that
+ # counts, which is the reason for the second while loop.
#
shift 2
- while [ $# -gt 0 ]; do
- if [ "`get_physical $1`" = "$physical" ]; then
- get_standby_from_hostname $1 $type
+ for ifname in "$@"; do
+ if if_comp "$physical" $ifname; then
+ get_hostname_ipmpinfo $ifname $type standby -standby
fi
- shift
done | while :; do
- read next || {
- echo "$final"
+ read keyword || {
+ echo "$iftype"
break
}
- final="$next"
+ iftype="$keyword"
done
}
#
-# is_standby interface
+# get_group interface
#
-# Determine whether a configured interface is a standby interface.
-#
-# Both the inet and inet6 hostname file sets must be checked.
-# If "standby" or "-standby" is set in the inet6 hostname file set,
-# don't bother looking at the inet set.
+# If there is both an inet and inet6 version of an interface, the group
+# could be set in either set of hostname files. Since inet6 is configured
+# after inet, if there's a setting in both files, inet6 wins.
#
-is_standby()
+get_group()
{
- standby=`get_standby_for_type $1 inet6 $inet6_plumbed`
-
- if [ -z "$standby" ]; then
- standby=`get_standby_for_type $1 inet $inet_plumbed`
- fi
-
- # The return value is the value of the following test.
- [ "$standby" = "standby" ]
+ group=`get_group_for_type $1 inet6 $inet6_list`
+ [ -z "$group" ] && group=`get_group_for_type $1 inet $inet_list`
+ echo $group
}
#
-# get_alternate interface plumbed_list
-#
-# Look for a plumbed interface in the same group as "interface".
-# A standby interface is preferred over a non-standby interface.
+# is_standby interface
#
-# Example:
-# get_alternate hme0 $inet_plumbed
+# If there is both an inet and inet6 version of an interface, the
+# "standby" or "-standby" flag could be set in either set of hostname
+# files. Since inet6 is configured after inet, if there's a setting in
+# both files, inet6 wins.
#
-get_alternate()
+is_standby()
{
- mygroup=`get_group $1 failed`
- [ -z "$mygroup" ] && return
-
- maybe=""
-
- shift
- while [ $# -gt 0 ]; do
- group=`get_group $1 configured`
- if [ "$group" = "$mygroup" ]; then
- if is_standby $1; then
- get_physical $1
- return
- else
- [ -z "$maybe" ] && maybe=$1
- fi
- fi
- shift
- done
-
- get_physical $maybe
+ standby=`get_standby_for_type $1 inet6 $inet6_list`
+ [ -z "$standby" ] && standby=`get_standby_for_type $1 inet $inet_list`
+ [ "$standby" = "standby" ]
}
#
@@ -394,7 +394,7 @@ doDHCPhostname()
#
# If there is only line in an hostname file we assume it contains
# the old style address which results in the interface being brought up
-# and the netmask and broadcast address being set.
+# and the netmask and broadcast address being set ($inet_oneline_epilogue).
#
# If there are multiple lines we assume the file contains a list of
# commands to the processor with neither the implied bringing up of the
@@ -403,6 +403,8 @@ doDHCPhostname()
# Return non-zero if any command fails so that the caller may alert
# users to errors in the configuration.
#
+inet_oneline_epilogue="netmask + broadcast + up"
+
inet_process_hostname()
{
if doDHCPhostname $2; then
@@ -418,7 +420,7 @@ inet_process_hostname()
ifcmds=""
retval=0
- while read line; do
+ while read one rest; do
if [ -n "$ifcmds" ]; then
#
# This handles the first N-1
@@ -427,7 +429,14 @@ inet_process_hostname()
$* $ifcmds || retval=$?
multiple_lines=true
fi
- ifcmds="$line"
+
+ #
+ # Strip out the "ipmp" keyword if it's the
+ # first token, since it's used to control
+ # interface creation, not configuration.
+ #
+ [ "$one" = ipmp ] && one=
+ ifcmds="$one $rest"
done
#
@@ -437,8 +446,8 @@ inet_process_hostname()
#
[ -z "$ifcmds" ] && return $retval
if [ $multiple_lines = false ]; then
- # The traditional single-line hostname file.
- ifcmds="$ifcmds netmask + broadcast + up"
+ # The traditional one-line hostname file.
+ ifcmds="$ifcmds $inet_oneline_epilogue"
fi
#
@@ -470,7 +479,13 @@ inet_process_hostname()
inet6_process_hostname()
{
retval=0
- while read ifcmds; do
+ while read one rest; do
+ #
+ # See comment in inet_process_hostname for details.
+ #
+ [ "$one" = ipmp ] && one=
+ ifcmds="$one $rest"
+
if [ -n "$ifcmds" ]; then
$* $ifcmds || retval=$?
fi
@@ -479,10 +494,9 @@ inet6_process_hostname()
}
#
-# Process interfaces that failed to plumb. Find an alternative
-# interface to host the addresses. For IPv6, only static addresses
-# defined in hostname6 files are moved, autoconfigured addresses are
-# not moved.
+# Process interfaces that failed to plumb. Find the IPMP meta-interface
+# that should host the addresses. For IPv6, only static addresses defined
+# in hostname6 files are moved, autoconfigured addresses are not moved.
#
# Example:
# move_addresses inet6
@@ -491,35 +505,43 @@ move_addresses()
{
type="$1"
eval "failed=\"\$${type}_failed\""
- eval "plumbed=\"\$${type}_plumbed\""
eval "list=\"\$${type}_list\""
- process_hostname="${type}_process_hostname"
+ process_func="${type}_process_hostname"
processed=""
if [ "$type" = inet ]; then
- echo "moving addresses from failed IPv4 interfaces:\c"
+ typedesc="IPv4"
zaddr="0.0.0.0"
hostpfx="/etc/hostname"
else
- echo "moving addresses from failed IPv6 interfaces:\c"
+ typedesc="IPv6"
zaddr="::"
hostpfx="/etc/hostname6"
fi
- set -- $failed
- while [ $# -gt 0 ]; do
- in_list if_comp $1 $processed && { shift; continue; }
-
- alternate="`get_alternate $1 $plumbed`"
- if [ -z "$alternate" ]; then
- in_list physical_comp $1 $processed || {
- echo " $1 (couldn't move, no" \
- "alternative interface)\c"
- processed="$processed $1"
+ echo "Moving addresses from missing ${typedesc} interface(s):\c" \
+ >/dev/msglog
+
+ for ifname in $failed; do
+ in_list if_comp $ifname $processed && continue
+
+ group=`get_group $ifname`
+ if [ -z "$group" ]; then
+ in_list physical_comp $ifname $processed || {
+ echo " $ifname (not moved -- not" \
+ "in an IPMP group)\c" >/dev/msglog
+ processed="$processed $ifname"
}
- shift
continue
fi
+
+ #
+ # Lookup the IPMP meta-interface name. If one doesn't exist,
+ # create it.
+ #
+ grifname=`get_groupifname $group`
+ [ -z "$grifname" ] && grifname=`create_groupifname $group $type`
+
#
# The hostname files are processed twice. In the first
# pass, we are looking for all commands that apply
@@ -528,7 +550,7 @@ move_addresses()
# whether the address represents a failover address
# or not until we've read all the files associated with the
# interface.
-
+ #
# In the first pass through the hostname files, all
# additional logical interface commands are removed.
# The remaining commands are concatenated together and
@@ -541,19 +563,18 @@ move_addresses()
# the embedded "set" command set the address later.
#
/sbin/ifparse -f $type `
- for item in $list; do
- if_comp $1 $item && \
- $process_hostname /sbin/ifparse \
- $type < $hostpfx.$item
- done | while read three four; do
- [ "$three" != addif ] && \
- echo "$three $four \c"
- done` | while read one two; do
- [ -z "$one" ] && continue
- line="addif $zaddr $one $two"
- /sbin/ifconfig $alternate $type \
- -standby $line >/dev/null
- done
+ for item in $list; do
+ if_comp $ifname $item && $process_func \
+ /sbin/ifparse $type < $hostpfx.$item
+ done | while read three four; do
+ [ "$three" != addif ] && echo "$three $four \c"
+ done` | while read one two; do
+ [ -z "$one" ] && continue
+ [ "$one $two" = "$inet_oneline_epilogue" ] && \
+ continue
+ line="addif $zaddr $one $two"
+ /sbin/ifconfig $grifname $type $line >/dev/null
+ done
#
# In the second pass, look for the the "addif" commands
@@ -561,22 +582,75 @@ move_addresses()
# commands are not valid in logical interface hostname
# files.
#
- if [ "$1" = "`get_physical $1`" ]; then
- $process_hostname /sbin/ifparse -f $type \
- <$hostpfx.$1 | while read one two; do
- [ "$one" = addif ] && \
- /sbin/ifconfig $alternate $type -standby \
- addif $two >/dev/null
+ if [ "$ifname" = "`get_physical $ifname`" ]; then
+ $process_func /sbin/ifparse -f $type < $hostpfx.$ifname \
+ | while read one two; do
+ [ "$one" = addif ] && \
+ /sbin/ifconfig $grifname $type \
+ addif $two >/dev/null
done
fi
- in_list physical_comp $1 $processed || {
- echo " $1 (moved to $alternate)\c"
- processed="$processed $1"
+ #
+ # Check if this was an active interface in the group. If so,
+ # activate another IP interface (if possible)
+ #
+ is_standby $ifname || inactive=`get_inactive_ifname $group`
+ [ -n "$inactive" ] && /sbin/ifconfig $inactive $type -standby
+
+ in_list physical_comp $ifname $processed || {
+ processed="$processed $ifname"
+ echo " $ifname (moved to $grifname\c" > /dev/msglog
+ if [ -n "$inactive" ]; then
+ echo " and cleared 'standby' on\c" > /dev/msglog
+ echo " $inactive to compensate\c" > /dev/msglog
+ fi
+ echo ")\c" > /dev/msglog
}
+ inactive=""
+ done
+ echo "." >/dev/msglog
+}
+
+#
+# if_configure type class interface_list
+#
+# Configure all of the interfaces of type `type' (e.g., "inet6") in
+# `interface_list' according to their /etc/hostname[6].* files. `class'
+# describes the class of interface (e.g., "IPMP"), as a diagnostic aid.
+# For inet6 interfaces, the interface is also brought up.
+#
+if_configure()
+{
+ fail=
+ type=$1
+ class=$2
+ process_func=${type}_process_hostname
+ shift 2
+
+ if [ "$type" = inet ]; then
+ desc="IPv4"
+ hostpfx="/etc/hostname"
+ else
+ desc="IPv6"
+ hostpfx="/etc/hostname6"
+ fi
+ [ -n "$class" ] && desc="$class $desc"
+
+ echo "configuring $desc interfaces:\c"
+ while [ $# -gt 0 ]; do
+ $process_func /sbin/ifconfig $1 $type < $hostpfx.$1 >/dev/null
+ if [ $? != 0 ]; then
+ fail="$fail $1"
+ elif [ "$type" = inet6 ]; then
+ /sbin/ifconfig $1 inet6 up || fail="$fail $1"
+ fi
+ echo " $1\c"
shift
done
echo "."
+
+ [ -n "$fail" ] && warn_failed_ifs "configure $desc" "$fail"
}
#
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c
index 46b2b5a958..dc90957dfa 100644
--- a/usr/src/cmd/truss/codes.c
+++ b/usr/src/cmd/truss/codes.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -615,13 +615,10 @@ const struct ioc {
{ (uint_t)SIOCSIPSECONFIG, "SIOCSIPSECONFIG", NULL },
{ (uint_t)SIOCDIPSECONFIG, "SIOCDIPSECONFIG", NULL },
{ (uint_t)SIOCLIPSECONFIG, "SIOCLIPSECONFIG", NULL },
- { (uint_t)SIOCLIFFAILOVER, "SIOCLIFFAILOVER", "lifreq" },
- { (uint_t)SIOCLIFFAILBACK, "SIOCLIFFAILBACK", "lifreq" },
- { (uint_t)SIOCSIPMPFAILBACK, "SIOCSIPMPFAILBACK", NULL },
+ { (uint_t)SIOCGLIFBINDING, "SIOCGLIFBINDING", "lifreq" },
{ (uint_t)SIOCSLIFGROUPNAME, "SIOCSLIFGROUPNAME", "lifreq" },
{ (uint_t)SIOCGLIFGROUPNAME, "SIOCGLIFGROUPNAME", "lifreq" },
- { (uint_t)SIOCGLIFOINDEX, "SIOCGLIFOINDEX", "lifreq" },
- { (uint_t)SIOCSLIFOINDEX, "SIOCSLIFOINDEX", "lifreq" },
+ { (uint_t)SIOCGLIFGROUPINFO, "SIOCGLIFGROUPINFO", "lifgroupinfo" },
{ (uint_t)SIOCGDSTINFO, "SIOCGDSTINFO", NULL },
{ (uint_t)SIOCGIP6ADDRPOLICY, "SIOCGIP6ADDRPOLICY", NULL },
{ (uint_t)SIOCSIP6ADDRPOLICY, "SIOCSIP6ADDRPOLICY", NULL },
diff --git a/usr/src/cmd/truss/print.c b/usr/src/cmd/truss/print.c
index edc610559d..8165f64f99 100644
--- a/usr/src/cmd/truss/print.c
+++ b/usr/src/cmd/truss/print.c
@@ -19,16 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#define _SYSCALL32 /* make 32-bit compat headers visible */
#include <stdio.h>
@@ -73,6 +70,7 @@
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/sctp.h>
+#include <net/route.h>
#include <sys/utrap.h>
#include <sys/lgrp_user.h>
#include <sys/door.h>
@@ -1749,6 +1747,8 @@ prt_sol(private_t *pri, int raw, long val)
{
if (val == SOL_SOCKET) {
outstring(pri, "SOL_SOCKET");
+ } else if (val == SOL_ROUTE) {
+ outstring(pri, "SOL_ROUTE");
} else {
const struct protoent *p;
struct protoent res;
@@ -1826,6 +1826,18 @@ sol_optname(private_t *pri, long val)
#undef CBSIZE
}
+const char *
+route_optname(private_t *pri, long val)
+{
+ switch (val) {
+ case RT_AWARE:
+ return ("RT_AWARE");
+ default:
+ (void) snprintf(pri->code_buf, sizeof (pri->code_buf),
+ "0x%lx", val);
+ return (pri->code_buf);
+ }
+}
const char *
tcp_optname(private_t *pri, long val)
@@ -1918,6 +1930,8 @@ prt_son(private_t *pri, int raw, long val)
switch (pri->sys_args[1]) {
case SOL_SOCKET: outstring(pri, sol_optname(pri, val));
break;
+ case SOL_ROUTE: outstring(pri, route_optname(pri, val));
+ break;
case IPPROTO_TCP: outstring(pri, tcp_optname(pri, val));
break;
case IPPROTO_UDP: outstring(pri, udp_optname(pri, val));
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index 72b6ce5c76..fb8f540cb5 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -2397,6 +2397,7 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
*/
char buffer[INET6_ADDRSTRLEN];
void *addr;
+ const char *nomatch = "no matching subnet found in netmasks(4)";
if (af == AF_INET)
addr = &((struct sockaddr_in *)
@@ -2405,14 +2406,23 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
addr = &((struct sockaddr_in6 *)
(&lifr.lifr_addr))->sin6_addr;
- /* Find out what netmask interface is going to be using */
+ /*
+ * Find out what netmask the interface is going to be using.
+ * If we just brought up an IPMP data address on an underlying
+ * interface above, the address will have already migrated, so
+ * the SIOCGLIFNETMASK won't be able to find it (but we need
+ * to bring the address up to get the actual netmask). Just
+ * omit printing the actual netmask in this corner-case.
+ */
if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 ||
- inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL)
- goto bad;
- zerror(zlogp, B_FALSE,
- "WARNING: %s: no matching subnet found in netmasks(4) for "
- "%s; using default of %s.",
- lifr.lifr_name, addrstr4, buffer);
+ inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL) {
+ zerror(zlogp, B_FALSE, "WARNING: %s; using default.",
+ nomatch);
+ } else {
+ zerror(zlogp, B_FALSE,
+ "WARNING: %s: %s: %s; using default of %s.",
+ lifr.lifr_name, nomatch, addrstr4, buffer);
+ }
}
/*