summaryrefslogtreecommitdiff
path: root/usr/src/cmd
diff options
context:
space:
mode:
authormeem <Peter.Memishian@Sun.COM>2009-01-06 20:16:25 -0500
committermeem <Peter.Memishian@Sun.COM>2009-01-06 20:16:25 -0500
commite11c3f44f531fdff80941ce57c065d2ae861cefc (patch)
treee921e957d727a9596275a1119fd627ef2ecca47d /usr/src/cmd
parent732675dd38771d280fdc276731344e9652071753 (diff)
downloadillumos-gate-e11c3f44f531fdff80941ce57c065d2ae861cefc.tar.gz
PSARC/2007/272 Project Clearview: IPMP Rearchitecture
PSARC/2008/773 IPQoS if_groupname Selector Removal PSARC/2009/001 Move in.mpathd into /lib/inet 6783149 Clearview IPMP Rearchitecture 4472956 libipmp should provide administrative interfaces 4494577 ipmp is opaque - there's no way to get current status 4509788 IPMP's usage of interface flags is not backward compatible 4509869 IPMP's address move mechanism needs to be transparent to applications 4531232 "in.rdiscd: sendto: Bad file number" seen during IPMP DR 4533876 new instances of interfaces under ipmp are generated with each dr/op 4699003 in.mpathd should squawk if interfaces in a group have the same hwaddr 4704937 SUNW_ip_rcm.so is sloppy with strings 4713308 IPMP shouldn't failover unconfigured logical interfaces 4785694 non-homogeneous IPMP group does not do failback 4850407 if_mpadm and IPMP DR failure 5015757 ip can panic with ASSERT(attach_ill == ipif->ipif_ill) failure 5086201 in.ndpd's phyint_reach_random() spews "SIOCSLIFLNKINFO Invalid argument" 6184000 routes cannot be created on failed interfaces 6246564 if_mpadm -r <ifname> doesn't bring up IPv6 link-local data address 6359058 SIOCLIFFAILBACK repeatedly fails with EAGAIN; in.mpathd fills logs 6359536 enabling STANDBY on an interface with no test address acts oddly 6378487 in.dhcpd doesn't work well in an IPMP setup 6462335 cannot offline to IPMP interfaces that have no probe targets 6516992 in.routed spews "Address already in use" during IPMP address move 6518460 ip_rcm`update_pif() must remain calm when logical interfaces disappear 6549957 failed IP interfaces at boot may go unreported 6591186 rpcbind can't deal with indirect calls if all addresses are deprecated 6667143 NCE_F_UNSOL_ADV broken 6698480 IGMP version not retained during IPMP failover 6726235 IPMP group failure can sometimes lead to an extra failover 6726645 in.routed skips DEPRECATED addresses even when no others exist 6738310 ip_ndp_recover() checks IPIF_CONDEMNED on the wrong ipif flags field 6739454 system panics at sdpib`sdp_rts_announce 6739531 IPv6 DAD doesn't work well with IPMP 6740719 in.mpathd may fail to switch to router target mode 6743260 ipif_resolver_up() can fail and leave ARP bringup pending 6746613 ip's DL_NOTE_SDU_SIZE logic mishandles ill_max_frag < ill_max_mtu 6748145 in.ndpd's IPv6 link-local hardware address mappings can go stale 6753560 ilg_delete_all() can race with ill_delete_tail() when ilg_ill changes 6755987 stillborn IFF_POINTOPOINT in.mpathd logic should be hauled out 6775126 SUBDIRS ipsecutils element does not in order be 6775811 NCEs can get stuck in ND_INCOMPLETE if ARP fails when IPMP is in-use 6777496 receive-side ILL_HCKSUM_CAPABLE checks look at the wrong ill 6781488 IPSQ timer restart logic can deadlock under stress 6781883 ip_ndp_find_solicitation() can be passed adverts, and has API issues 6784852 RPCIB, SDP, and RDS all break when vanity naming is used 6786048 IPv6 ND probes create IREs with incorrect source addresses 6786091 I_PLINK handling in IP must not request reentry via ipsq_try_enter() 6786711 IPQoS if_groupname selector needs to go 6787091 assertion failure in ipcl_conn_cleanup() due to non-NULL conn_ilg 6789235 INADDR_ANY ILMs can trigger an assertion failure in IPMP environments 6789502 ipif_resolver_up() calls after ipif_ndp_up() clobber ipif_addr_ready 6789718 ip6.tun0 cannot be plumbed in a non-global-zone post-6745288 6789732 libdlpi may get stuck in i_dlpi_strgetmsg() 6789870 ipif6_dup_recovery() may operate on a freed ipif, corrupting memory 6789874 ipnet_nicevent_cb() may call taskq_dispatch() on a bogus taskq 6790310 in.mpathd may core with "phyint_inst_timer: invalid state 4" --HG-- rename : usr/src/lib/libinetutil/common/inetutil4.c => usr/src/lib/libinetutil/common/inetutil.c rename : usr/src/uts/common/inet/vni/vni.c => usr/src/uts/common/inet/dlpistub/dlpistub.c rename : usr/src/uts/common/inet/vni/vni.conf => usr/src/uts/common/inet/dlpistub/dlpistub.conf rename : usr/src/uts/common/inet/vni/vni_impl.h => usr/src/uts/common/inet/dlpistub/dlpistub_impl.h rename : usr/src/uts/intel/vni/Makefile => usr/src/uts/intel/dlpistub/Makefile rename : usr/src/uts/sparc/vni/Makefile => usr/src/uts/sparc/dlpistub/Makefile
Diffstat (limited to 'usr/src/cmd')
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c16
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c7
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c206
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h9
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c9
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c7
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c22
-rw-r--r--usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c5
-rw-r--r--usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c263
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile45
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h14
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c1013
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c1210
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c1331
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h130
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c236
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c66
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c183
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h11
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c10
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/Makefile18
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c717
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile5
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h8
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c1265
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h5
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c27
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h13
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c35
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/Makefile48
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.c1498
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.xcl106
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types10
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c53
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c4
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c76
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c4
-rw-r--r--usr/src/cmd/mdb/common/modules/ip/ip.c7
-rw-r--r--usr/src/cmd/rcm_daemon/Makefile.com4
-rw-r--r--usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c14
-rw-r--r--usr/src/cmd/rcm_daemon/common/ip_rcm.c1798
-rw-r--r--usr/src/cmd/svc/milestone/net-init13
-rw-r--r--usr/src/cmd/svc/milestone/net-loopback11
-rw-r--r--usr/src/cmd/svc/milestone/net-physical147
-rw-r--r--usr/src/cmd/svc/shell/net_include.sh510
-rw-r--r--usr/src/cmd/truss/codes.c9
-rw-r--r--usr/src/cmd/truss/print.c22
-rw-r--r--usr/src/cmd/zoneadmd/vplat.c26
48 files changed, 6262 insertions, 4984 deletions
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c
index 34bb772632..5a4779cfa5 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -133,6 +133,7 @@ main(int argc, char **argv)
boolean_t is_verbose;
int ipc_fd;
int c;
+ int aware = RTAW_UNDER_IPMP;
struct rlimit rl;
debug_level = df_get_int("", B_FALSE, DF_DEBUG_LEVEL);
@@ -301,6 +302,17 @@ main(int argc, char **argv)
dhcpmsg(MSG_ERR, "cannot open routing socket");
return (EXIT_FAILURE);
}
+
+ /*
+ * We're IPMP-aware and can manage IPMP test addresses, so issue
+ * RT_AWARE to get routing socket messages for interfaces under IPMP.
+ */
+ if (setsockopt(rtsock_fd, SOL_ROUTE, RT_AWARE, &aware,
+ sizeof (aware)) == -1) {
+ dhcpmsg(MSG_ERR, "cannot set RT_AWARE on routing socket");
+ return (EXIT_FAILURE);
+ }
+
if (iu_register_event(eh, rtsock_fd, POLLIN, rtsock_event, 0) == -1) {
dhcpmsg(MSG_ERR, "cannot register routing socket for messages");
return (EXIT_FAILURE);
@@ -1182,7 +1194,7 @@ check_lif(dhcp_lif_t *lif, const struct ifa_msghdr *ifam, int msglen)
lif->lif_name);
lif_mark_decline(lif, "duplicate address");
close_ip_lif(lif);
- (void) open_ip_lif(lif, INADDR_ANY);
+ (void) open_ip_lif(lif, INADDR_ANY, B_TRUE);
}
dad_wait = lif->lif_dad_wait;
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c
index 4637ecc346..6cfce9f0a9 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c
@@ -19,14 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* BOUND state of the DHCP client state machine.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/socket.h>
#include <sys/types.h>
#include <string.h>
@@ -358,7 +356,8 @@ dhcp_bound_complete(dhcp_smach_t *dsmp)
lif = dsmp->dsm_lif;
if (router_list != NULL &&
(router_list->len % sizeof (ipaddr_t)) == 0 &&
- strchr(lif->lif_name, ':') == NULL) {
+ strchr(lif->lif_name, ':') == NULL &&
+ !lif->lif_pif->pif_under_ipmp) {
dsmp->dsm_nrouters = router_list->len / sizeof (ipaddr_t);
dsmp->dsm_routers = malloc(router_list->len);
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c
index 0cfdad40e3..5d2d5fb99e 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -76,6 +76,7 @@ insert_pif(const char *pname, boolean_t isv6, int *error)
{
dhcp_pif_t *pif;
struct lifreq lifr;
+ lifgroupinfo_t lifgr;
dlpi_handle_t dh = NULL;
int fd = isv6 ? v6_sock_fd : v4_sock_fd;
@@ -127,12 +128,60 @@ insert_pif(const char *pname, boolean_t isv6, int *error)
}
/*
- * For IPv4, use DLPI to determine the hardware type, hardware
- * address, and hardware address length.
+ * Check if the pif is in an IPMP group. Interfaces using IPMP don't
+ * have dedicated hardware addresses, and get their hardware type from
+ * the SIOCGLIFGROUPINFO ioctl rather than DLPI.
*/
- if (!isv6) {
- int rc;
- dlpi_info_t dlinfo;
+ if (ioctl(fd, SIOCGLIFGROUPNAME, &lifr) == -1) {
+ *error = DHCP_IPC_E_INT;
+ dhcpmsg(MSG_ERR, "insert_pif: SIOCGLIFGROUPNAME for %s", pname);
+ goto failure;
+ }
+
+ if (lifr.lifr_groupname[0] != '\0') {
+ (void) strlcpy(lifgr.gi_grname, lifr.lifr_groupname,
+ LIFGRNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPINFO, &lifgr) == -1) {
+ *error = DHCP_IPC_E_INT;
+ dhcpmsg(MSG_ERR, "insert_pif: SIOCGLIFGROUPINFO for %s",
+ lifgr.gi_grname);
+ goto failure;
+ }
+
+ pif->pif_hwtype = dlpi_arptype(lifgr.gi_mactype);
+ pif->pif_under_ipmp = (strcmp(pname, lifgr.gi_grifname) != 0);
+ (void) strlcpy(pif->pif_grifname, lifgr.gi_grifname, LIFNAMSIZ);
+
+ /*
+ * For IPMP underlying interfaces, stash the interface index
+ * of the IPMP meta-interface; we'll use it to send/receive
+ * traffic. This is both necessary (since IP_BOUND_IF for
+ * non-unicast traffic won't work on underlying interfaces)
+ * and preferred (since a test address lease will be able to
+ * be maintained as long as another interface in the group is
+ * still functioning).
+ */
+ if (pif->pif_under_ipmp) {
+ (void) strlcpy(lifr.lifr_name, pif->pif_grifname,
+ LIFNAMSIZ);
+
+ if (ioctl(fd, SIOCGLIFINDEX, &lifr) == -1) {
+ *error = DHCP_IPC_E_INT;
+ dhcpmsg(MSG_ERR, "insert_pif: SIOCGLIFINDEX "
+ "for %s", lifr.lifr_name);
+ goto failure;
+ }
+ pif->pif_grindex = lifr.lifr_index;
+ }
+ }
+
+ /*
+ * For IPv4, if the hardware type is still unknown, use DLPI to
+ * determine it, the hardware address, and hardware address length.
+ */
+ if (!isv6 && pif->pif_hwtype == 0) {
+ int rc;
+ dlpi_info_t dlinfo;
if ((rc = dlpi_open(pname, &dh, 0)) != DLPI_SUCCESS) {
dhcpmsg(MSG_ERROR, "insert_pif: dlpi_open: %s",
@@ -661,11 +710,12 @@ verify_lif(const dhcp_lif_t *lif)
boolean_t isv6;
int fd;
struct lifreq lifr;
+ dhcp_pif_t *pif = lif->lif_pif;
(void) memset(&lifr, 0, sizeof (struct lifreq));
(void) strlcpy(lifr.lifr_name, lif->lif_name, LIFNAMSIZ);
- isv6 = lif->lif_pif->pif_isv6;
+ isv6 = pif->pif_isv6;
fd = isv6 ? v6_sock_fd : v4_sock_fd;
if (ioctl(fd, SIOCGLIFFLAGS, &lifr) == -1) {
@@ -689,43 +739,41 @@ verify_lif(const dhcp_lif_t *lif)
}
/*
- * Special case: if the interface has gone down as a duplicate, then
- * this alone does _not_ mean that we're abandoning it just yet. Allow
- * the state machine to handle this normally by trying to get a new
- * lease.
- */
- if ((lifr.lifr_flags & (IFF_UP|IFF_DUPLICATE)) == IFF_DUPLICATE) {
- dhcpmsg(MSG_DEBUG, "verify_lif: duplicate address on %s",
- lif->lif_name);
- return (B_TRUE);
- }
-
- /*
- * If the user has torn down or started up the interface manually, then
- * abandon the lease.
- */
- if ((lif->lif_flags ^ lifr.lifr_flags) & IFF_UP) {
- dhcpmsg(MSG_DEBUG, "verify_lif: user has %s %s",
- lifr.lifr_flags & IFF_UP ? "started up" : "shut down",
- lif->lif_name);
- return (B_FALSE);
- }
-
- /*
* Check for delete and recreate.
*/
if (ioctl(fd, SIOCGLIFINDEX, &lifr) == -1) {
- dhcpmsg(MSG_ERR, "verify_lif: SIOCGLIFINDEX failed on %s",
- lif->lif_name);
+ if (errno != ENXIO) {
+ dhcpmsg(MSG_ERR, "verify_lif: SIOCGLIFINDEX failed "
+ "on %s", lif->lif_name);
+ }
return (B_FALSE);
}
- if (lifr.lifr_index != lif->lif_pif->pif_index) {
+ if (lifr.lifr_index != pif->pif_index) {
dhcpmsg(MSG_DEBUG,
"verify_lif: ifindex on %s changed: %u to %u",
- lif->lif_name, lif->lif_pif->pif_index, lifr.lifr_index);
+ lif->lif_name, pif->pif_index, lifr.lifr_index);
return (B_FALSE);
}
+ if (pif->pif_under_ipmp) {
+ (void) strlcpy(lifr.lifr_name, pif->pif_grifname, LIFNAMSIZ);
+
+ if (ioctl(fd, SIOCGLIFINDEX, &lifr) == -1) {
+ if (errno != ENXIO) {
+ dhcpmsg(MSG_ERR, "verify_lif: SIOCGLIFINDEX "
+ "failed on %s", lifr.lifr_name);
+ }
+ return (B_FALSE);
+ }
+
+ if (lifr.lifr_index != pif->pif_grindex) {
+ dhcpmsg(MSG_DEBUG, "verify_lif: IPMP group ifindex "
+ "on %s changed: %u to %u", lifr.lifr_name,
+ pif->pif_grindex, lifr.lifr_index);
+ return (B_FALSE);
+ }
+ }
+
/*
* If the IP address, netmask, or broadcast address have changed, or
* the interface has been unplumbed, then we act like there has been an
@@ -934,6 +982,13 @@ plumb_lif(dhcp_pif_t *pif, const in6_addr_t *addr)
lifr.lifr_name);
goto failure;
}
+
+ /*
+ * See comment in set_lif_dhcp().
+ */
+ if (pif->pif_under_ipmp && !(lifr.lifr_flags & IFF_NOFAILOVER))
+ lifr.lifr_flags |= IFF_NOFAILOVER | IFF_DEPRECATED;
+
lifr.lifr_flags |= IFF_UP | IFF_DHCPRUNNING;
if (ioctl(v6_sock_fd, SIOCSLIFFLAGS, &lifr) == -1) {
dhcpmsg(MSG_ERR, "plumb_lif: SIOCSLIFFLAGS %s",
@@ -1060,8 +1115,9 @@ set_lif_dhcp(dhcp_lif_t *lif, boolean_t is_adopting)
int fd;
int err;
struct lifreq lifr;
+ dhcp_pif_t *pif = lif->lif_pif;
- fd = lif->lif_pif->pif_isv6 ? v6_sock_fd : v4_sock_fd;
+ fd = pif->pif_isv6 ? v6_sock_fd : v4_sock_fd;
(void) strlcpy(lifr.lifr_name, lif->lif_name, LIFNAMSIZ);
@@ -1098,6 +1154,17 @@ set_lif_dhcp(dhcp_lif_t *lif, boolean_t is_adopting)
"set on %s", lif->lif_name);
}
} else {
+ /*
+ * If the lif is on an interface under IPMP, IFF_NOFAILOVER
+ * must be set or the kernel will prevent us from setting
+ * IFF_DHCPRUNNING (since the subsequent IFF_UP would lead to
+ * migration). We set IFF_DEPRECATED too since the kernel
+ * will set it automatically when setting IFF_NOFAILOVER,
+ * causing our lif_flags value to grow stale.
+ */
+ if (pif->pif_under_ipmp && !(lifr.lifr_flags & IFF_NOFAILOVER))
+ lifr.lifr_flags |= IFF_NOFAILOVER | IFF_DEPRECATED;
+
lifr.lifr_flags |= IFF_DHCPRUNNING;
if (ioctl(fd, SIOCSLIFFLAGS, &lifr) == -1) {
dhcpmsg(MSG_ERR, "set_lif_dhcp: SIOCSLIFFLAGS for %s",
@@ -1207,6 +1274,13 @@ clear_lif_deprecated(dhcp_lif_t *lif)
return (B_FALSE);
}
+ /*
+ * Don't try to clear IFF_DEPRECATED if this is a test address,
+ * since IPMP's use of IFF_DEPRECATED is not compatible with ours.
+ */
+ if (lifr.lifr_flags & IFF_NOFAILOVER)
+ return (B_TRUE);
+
if (!(lifr.lifr_flags & IFF_DEPRECATED))
return (B_TRUE);
@@ -1226,16 +1300,19 @@ clear_lif_deprecated(dhcp_lif_t *lif)
*
* input: dhcp_lif_t *: the logical interface to operate on
* in_addr_t: the address the socket will be bound to (in hbo)
+ * boolean_t: B_TRUE if the address should be brought up (if needed)
* output: boolean_t: B_TRUE if the socket was opened successfully.
*/
boolean_t
-open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo)
+open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo, boolean_t bringup)
{
const char *errmsg;
struct lifreq lifr;
int on = 1;
uchar_t ttl = 255;
+ uint32_t ifindex;
+ dhcp_pif_t *pif = lif->lif_pif;
if (lif->lif_sock_ip_fd != -1) {
dhcpmsg(MSG_WARNING, "open_ip_lif: socket already open on %s",
@@ -1270,7 +1347,7 @@ open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo)
}
if (setsockopt(lif->lif_sock_ip_fd, IPPROTO_IP, IP_DHCPINIT_IF,
- &lif->lif_pif->pif_index, sizeof (int)) == -1) {
+ &pif->pif_index, sizeof (int)) == -1) {
errmsg = "cannot set IP_DHCPINIT_IF";
goto failure;
}
@@ -1288,23 +1365,40 @@ open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo)
goto failure;
}
- if (setsockopt(lif->lif_sock_ip_fd, IPPROTO_IP, IP_BOUND_IF,
- &lif->lif_pif->pif_index, sizeof (int)) == -1) {
+ ifindex = pif->pif_under_ipmp ? pif->pif_grindex : pif->pif_index;
+ if (setsockopt(lif->lif_sock_ip_fd, IPPROTO_IP, IP_BOUND_IF, &ifindex,
+ sizeof (int)) == -1) {
errmsg = "cannot set IP_BOUND_IF";
goto failure;
}
- /*
- * Make sure at least one lif on the interface we used in IP_BOUND_IF
- * is IFF_UP so that we can send and receive IP packets.
- */
(void) strlcpy(lifr.lifr_name, lif->lif_name, LIFNAMSIZ);
if (ioctl(v4_sock_fd, SIOCGLIFFLAGS, &lifr) == -1) {
errmsg = "cannot get interface flags";
goto failure;
}
- if (!(lifr.lifr_flags & IFF_UP)) {
+ /*
+ * If the lif is part of an interface under IPMP, IFF_NOFAILOVER must
+ * be set or the kernel will prevent us from setting IFF_DHCPRUNNING
+ * (since the subsequent IFF_UP would lead to migration). We set
+ * IFF_DEPRECATED too since the kernel will set it automatically when
+ * setting IFF_NOFAILOVER, causing our lif_flags value to grow stale.
+ */
+ if (pif->pif_under_ipmp && !(lifr.lifr_flags & IFF_NOFAILOVER)) {
+ lifr.lifr_flags |= IFF_NOFAILOVER | IFF_DEPRECATED;
+ if (ioctl(v4_sock_fd, SIOCSLIFFLAGS, &lifr) == -1) {
+ errmsg = "cannot set IFF_NOFAILOVER";
+ goto failure;
+ }
+ }
+ lif->lif_flags = lifr.lifr_flags;
+
+ /*
+ * If this is initial bringup, make sure the address we're acquiring a
+ * lease on is IFF_UP.
+ */
+ if (bringup && !(lifr.lifr_flags & IFF_UP)) {
/*
* Start from a clean slate.
*/
@@ -1330,6 +1424,30 @@ open_ip_lif(dhcp_lif_t *lif, in_addr_t addr_hbo)
((struct sockaddr_in *)&lifr.lifr_addr)->sin_addr.s_addr;
}
+ /*
+ * Usually, bringing up the address we're acquiring a lease on is
+ * sufficient to allow packets to be sent and received via the
+ * IP_BOUND_IF we did earlier. However, if we're acquiring a lease on
+ * an underlying IPMP interface, the group interface will be used for
+ * sending and receiving IP packets via IP_BOUND_IF. Thus, ensure at
+ * least one address on the group interface is IFF_UP.
+ */
+ if (bringup && pif->pif_under_ipmp) {
+ (void) strlcpy(lifr.lifr_name, pif->pif_grifname, LIFNAMSIZ);
+ if (ioctl(v4_sock_fd, SIOCGLIFFLAGS, &lifr) == -1) {
+ errmsg = "cannot get IPMP group interface flags";
+ goto failure;
+ }
+
+ if (!(lifr.lifr_flags & IFF_UP)) {
+ lifr.lifr_flags |= IFF_UP;
+ if (ioctl(v4_sock_fd, SIOCSLIFFLAGS, &lifr) == -1) {
+ errmsg = "cannot bring up IPMP group interface";
+ goto failure;
+ }
+ }
+ }
+
lif->lif_packet_id = iu_register_event(eh, lif->lif_sock_ip_fd, POLLIN,
dhcp_packet_lif, lif);
if (lif->lif_packet_id == -1) {
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h
index a59e3ea68d..46cf30bedb 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef INTERFACE_H
#define INTERFACE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Interface.[ch] encapsulate all of the agent's knowledge of network
* interfaces from the DHCP agent's perspective. See interface.c for
@@ -66,6 +64,9 @@ struct dhcp_pif_s {
boolean_t pif_running; /* interface is running */
uint_t pif_hold_count; /* reference count */
char pif_name[LIFNAMSIZ];
+ char pif_grifname[LIFNAMSIZ];
+ uint32_t pif_grindex; /* interface index for pif_grifname */
+ boolean_t pif_under_ipmp; /* is an ipmp underlying interface */
};
struct dhcp_lif_s {
@@ -182,7 +183,7 @@ dhcp_lif_t *attach_lif(const char *, boolean_t, int *);
int set_lif_dhcp(dhcp_lif_t *, boolean_t);
void set_lif_deprecated(dhcp_lif_t *);
boolean_t clear_lif_deprecated(dhcp_lif_t *);
-boolean_t open_ip_lif(dhcp_lif_t *, in_addr_t);
+boolean_t open_ip_lif(dhcp_lif_t *, in_addr_t, boolean_t);
void close_ip_lif(dhcp_lif_t *);
void lif_mark_decline(dhcp_lif_t *, const char *);
boolean_t schedule_lif_timer(dhcp_lif_t *, dhcp_timer_t *,
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c
index 8a32b55ea5..a763530436 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/packet.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <string.h>
#include <sys/types.h>
#include <stdlib.h>
@@ -970,7 +968,10 @@ send_pkt_internal(dhcp_smach_t *dsmp)
ipi6->ipi6_addr = lif->lif_v6addr;
else
ipi6->ipi6_addr = my_in6addr_any;
- ipi6->ipi6_ifindex = lif->lif_pif->pif_index;
+ if (lif->lif_pif->pif_under_ipmp)
+ ipi6->ipi6_ifindex = lif->lif_pif->pif_grindex;
+ else
+ ipi6->ipi6_ifindex = lif->lif_pif->pif_index;
cmsg->cmsg_len = (char *)(ipi6 + 1) - (char *)cmsg;
/*
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
index a8c05de986..78da07aebf 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
@@ -19,14 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* REQUESTING state of the client state machine.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdlib.h>
#include <string.h>
#include <search.h>
@@ -1008,7 +1006,8 @@ dhcp_acknak_global(iu_eh_t *ehp, int fd, short events, iu_event_id_t id,
for (dsmp = lookup_smach_by_xid(xid, NULL, isv6); dsmp != NULL;
dsmp = lookup_smach_by_xid(xid, dsmp, isv6)) {
pif = dsmp->dsm_lif->lif_pif;
- if (pif->pif_index == plp->ifindex)
+ if (pif->pif_index == plp->ifindex ||
+ pif->pif_under_ipmp && pif->pif_grindex == plp->ifindex)
break;
}
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c
index 9ae7fd7aba..852b428551 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.c
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* This module contains core functions for managing DHCP state machine
* instances.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdlib.h>
#include <search.h>
#include <string.h>
@@ -151,7 +149,7 @@ insert_smach(dhcp_lif_t *lif, int *error)
/*
* With IPv4 DHCP, we use a socket per lif.
*/
- if (!open_ip_lif(lif, INADDR_ANY)) {
+ if (!open_ip_lif(lif, INADDR_ANY, B_TRUE)) {
dhcpmsg(MSG_ERR, "unable to open socket for %s",
lif->lif_name);
/* This will also dispose of the LIF */
@@ -696,14 +694,15 @@ set_smach_state(dhcp_smach_t *dsmp, DHCPSTATE state)
if (is_bound_state(dsmp->dsm_state)) {
if (!is_bound_state(state)) {
close_ip_lif(lif);
- if (!open_ip_lif(lif, INADDR_ANY))
+ if (!open_ip_lif(lif, INADDR_ANY,
+ B_FALSE))
return (B_FALSE);
}
} else {
if (is_bound_state(state)) {
close_ip_lif(lif);
if (!open_ip_lif(lif,
- ntohl(lif->lif_addr)))
+ ntohl(lif->lif_addr), B_FALSE))
return (B_FALSE);
}
}
@@ -952,11 +951,14 @@ no_specified_id:
* unable to parse it. We need to determine if a Client ID is required
* and, if so, generate one.
*
- * If it's IPv4 and not a logical interface, then we need to preserve
- * backward-compatibility by avoiding new-fangled DUID/IAID
- * construction.
+ * If it's IPv4, not in an IPMP group, and not a logical interface,
+ * then we need to preserve backward-compatibility by avoiding
+ * new-fangled DUID/IAID construction. (Note: even for IPMP test
+ * addresses, we construct a DUID/IAID since we may renew a lease for
+ * an IPMP test address on any functioning IP interface in the group.)
*/
- if (!pif->pif_isv6 && strchr(dsmp->dsm_name, ':') == NULL) {
+ if (!pif->pif_isv6 && pif->pif_grifname[0] == '\0' &&
+ strchr(dsmp->dsm_name, ':') == NULL) {
if (pif->pif_hwtype == ARPHRD_IB) {
/*
* This comes from the DHCP over IPoIB specification.
diff --git a/usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c b/usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c
index 47e1202b32..d73722cc55 100644
--- a/usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c
+++ b/usr/src/cmd/cmd-inet/sbin/ifparse/ifparse.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -8,8 +8,6 @@
* specifies the terms and conditions for redistribution.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Ifparse splits up an ifconfig command line, and was written for use
* with the networking boot scripts; see $SRC/cmd/svc/shell/net_include.sh
@@ -184,6 +182,7 @@ struct cmd {
{ "auto-revarp", 0, AF_INET, PARSEFIXED},
{ "plumb", 0, AF_ANY, PARSENOW },
{ "unplumb", 0, AF_ANY, PARSENOW },
+ { "ipmp", 0, AF_ANY, PARSELOG0 },
{ "subnet", NEXTARG, AF_ANY, 0 },
{ "token", NEXTARG, AF_INET6, PARSELOG0 },
{ "tsrc", NEXTARG, AF_ANY, PARSELOG0 },
diff --git a/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c b/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c
index b9a02b54e7..2d115e221b 100644
--- a/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c
+++ b/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -29,8 +29,6 @@
* MROUTING Revision 3.5
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* simple netstat based on snmp/mib-2 interface to the TCP/IP stack
*
@@ -221,6 +219,7 @@ static char *plural(int n);
static char *pluraly(int n);
static char *plurales(int n);
static void process_filter(char *arg);
+static char *ifindex2str(uint_t, char *);
static boolean_t family_selected(int family);
static void usage(char *);
@@ -680,8 +679,14 @@ mibget(int sd)
tor->OPT_offset = sizeof (struct T_optmgmt_req);
tor->OPT_length = sizeof (struct opthdr);
tor->MGMT_flags = T_CURRENT;
+
+
+ /*
+ * Note: we use the special level value below so that IP will return
+ * us information concerning IRE_MARK_TESTHIDDEN routes.
+ */
req = (struct opthdr *)&tor[1];
- req->level = MIB2_IP; /* any MIB2_xxx value ok here */
+ req->level = EXPER_IP_AND_TESTHIDDEN;
req->name = 0;
req->len = 0;
@@ -712,7 +717,7 @@ mibget(int sd)
stderr);
i = 0;
for (last_item = first_item; last_item;
- last_item = last_item->next_item)
+ last_item = last_item->next_item)
(void) printf("%d %4d %5d %d\n",
++i,
last_item->group,
@@ -1707,19 +1712,19 @@ mib_get_constants(mib_item_t *item)
ipRouteAttributeSize = ip->ipRouteAttributeSize;
transportMLPSize = ip->transportMLPSize;
assert(IS_P2ALIGNED(ipAddrEntrySize,
- sizeof (mib2_ipAddrEntry_t *)) &&
- IS_P2ALIGNED(ipRouteEntrySize,
- sizeof (mib2_ipRouteEntry_t *)) &&
- IS_P2ALIGNED(ipNetToMediaEntrySize,
- sizeof (mib2_ipNetToMediaEntry_t *)) &&
- IS_P2ALIGNED(ipMemberEntrySize,
- sizeof (ip_member_t *)) &&
- IS_P2ALIGNED(ipGroupSourceEntrySize,
- sizeof (ip_grpsrc_t *)) &&
- IS_P2ALIGNED(ipRouteAttributeSize,
- sizeof (mib2_ipAttributeEntry_t *)) &&
- IS_P2ALIGNED(transportMLPSize,
- sizeof (mib2_transportMLPEntry_t *)));
+ sizeof (mib2_ipAddrEntry_t *)));
+ assert(IS_P2ALIGNED(ipRouteEntrySize,
+ sizeof (mib2_ipRouteEntry_t *)));
+ assert(IS_P2ALIGNED(ipNetToMediaEntrySize,
+ sizeof (mib2_ipNetToMediaEntry_t *)));
+ assert(IS_P2ALIGNED(ipMemberEntrySize,
+ sizeof (ip_member_t *)));
+ assert(IS_P2ALIGNED(ipGroupSourceEntrySize,
+ sizeof (ip_grpsrc_t *)));
+ assert(IS_P2ALIGNED(ipRouteAttributeSize,
+ sizeof (mib2_ipAttributeEntry_t *)));
+ assert(IS_P2ALIGNED(transportMLPSize,
+ sizeof (mib2_transportMLPEntry_t *)));
break;
}
case EXPER_DVMRP: {
@@ -1728,8 +1733,9 @@ mib_get_constants(mib_item_t *item)
vifctlSize = mrts->mrts_vifctlSize;
mfcctlSize = mrts->mrts_mfcctlSize;
assert(IS_P2ALIGNED(vifctlSize,
- sizeof (struct vifclt *)) &&
- IS_P2ALIGNED(mfcctlSize, sizeof (struct mfcctl *)));
+ sizeof (struct vifclt *)));
+ assert(IS_P2ALIGNED(mfcctlSize,
+ sizeof (struct mfcctl *)));
break;
}
case MIB2_IP6: {
@@ -1745,17 +1751,17 @@ mib_get_constants(mib_item_t *item)
ipv6GroupSourceEntrySize =
ip6->ipv6GroupSourceEntrySize;
assert(IS_P2ALIGNED(ipv6IfStatsEntrySize,
- sizeof (mib2_ipv6IfStatsEntry_t *)) &&
- IS_P2ALIGNED(ipv6AddrEntrySize,
- sizeof (mib2_ipv6AddrEntry_t *)) &&
- IS_P2ALIGNED(ipv6RouteEntrySize,
- sizeof (mib2_ipv6RouteEntry_t *)) &&
- IS_P2ALIGNED(ipv6NetToMediaEntrySize,
- sizeof (mib2_ipv6NetToMediaEntry_t *)) &&
- IS_P2ALIGNED(ipv6MemberEntrySize,
- sizeof (ipv6_member_t *)) &&
- IS_P2ALIGNED(ipv6GroupSourceEntrySize,
- sizeof (ipv6_grpsrc_t *)));
+ sizeof (mib2_ipv6IfStatsEntry_t *)));
+ assert(IS_P2ALIGNED(ipv6AddrEntrySize,
+ sizeof (mib2_ipv6AddrEntry_t *)));
+ assert(IS_P2ALIGNED(ipv6RouteEntrySize,
+ sizeof (mib2_ipv6RouteEntry_t *)));
+ assert(IS_P2ALIGNED(ipv6NetToMediaEntrySize,
+ sizeof (mib2_ipv6NetToMediaEntry_t *)));
+ assert(IS_P2ALIGNED(ipv6MemberEntrySize,
+ sizeof (ipv6_member_t *)));
+ assert(IS_P2ALIGNED(ipv6GroupSourceEntrySize,
+ sizeof (ipv6_grpsrc_t *)));
break;
}
case MIB2_ICMP6: {
@@ -1774,9 +1780,9 @@ mib_get_constants(mib_item_t *item)
tcpConnEntrySize = tcp->tcpConnTableSize;
tcp6ConnEntrySize = tcp->tcp6ConnTableSize;
assert(IS_P2ALIGNED(tcpConnEntrySize,
- sizeof (mib2_tcpConnEntry_t *)) &&
- IS_P2ALIGNED(tcp6ConnEntrySize,
- sizeof (mib2_tcp6ConnEntry_t *)));
+ sizeof (mib2_tcpConnEntry_t *)));
+ assert(IS_P2ALIGNED(tcp6ConnEntrySize,
+ sizeof (mib2_tcp6ConnEntry_t *)));
break;
}
case MIB2_UDP: {
@@ -1785,9 +1791,9 @@ mib_get_constants(mib_item_t *item)
udpEntrySize = udp->udpEntrySize;
udp6EntrySize = udp->udp6EntrySize;
assert(IS_P2ALIGNED(udpEntrySize,
- sizeof (mib2_udpEntry_t *)) &&
- IS_P2ALIGNED(udp6EntrySize,
- sizeof (mib2_udp6Entry_t *)));
+ sizeof (mib2_udpEntry_t *)));
+ assert(IS_P2ALIGNED(udp6EntrySize,
+ sizeof (mib2_udp6Entry_t *)));
break;
}
case MIB2_SCTP: {
@@ -1843,7 +1849,6 @@ stat_report(mib_item_t *item)
{
int jtemp = 0;
char ifname[LIFNAMSIZ + 1];
- char *ifnamep;
/* 'for' loop 1: */
for (; item; item = item->next_item) {
@@ -1891,12 +1896,10 @@ stat_report(mib_item_t *item)
bzero(&sum6, sizeof (sum6));
/* 'for' loop 2a: */
for (ip6 = (mib2_ipv6IfStatsEntry_t *)item->valp;
- (char *)ip6 < (char *)item->valp
- + item->length;
+ (char *)ip6 < (char *)item->valp + item->length;
/* LINTED: (note 1) */
ip6 = (mib2_ipv6IfStatsEntry_t *)((char *)ip6 +
ipv6IfStatsEntrySize)) {
-
if (ip6->ipv6IfIndex == 0) {
/*
* The "unknown interface" ip6
@@ -1905,19 +1908,10 @@ stat_report(mib_item_t *item)
sum_ip6_stats(ip6, &sum6);
continue; /* 'for' loop 2a */
}
- ifnamep = if_indextoname(
- ip6->ipv6IfIndex,
- ifname);
- if (ifnamep == NULL) {
- (void) printf(
- "Invalid ifindex %d\n",
- ip6->ipv6IfIndex);
- continue; /* 'for' loop 2a */
- }
-
if (Aflag) {
(void) printf("\nIPv6 for %s\n",
- ifnamep);
+ ifindex2str(ip6->ipv6IfIndex,
+ ifname));
print_ip6_stats(ip6);
}
sum_ip6_stats(ip6, &sum6);
@@ -1935,15 +1929,10 @@ stat_report(mib_item_t *item)
break;
bzero(&sum6, sizeof (sum6));
/* 'for' loop 2b: */
- for (icmp6 =
- (mib2_ipv6IfIcmpEntry_t *)item->valp;
- (char *)icmp6 < (char *)item->valp
- + item->length;
- icmp6 =
- /* LINTED: (note 1) */
- (mib2_ipv6IfIcmpEntry_t *)((char *)icmp6
- + ipv6IfIcmpEntrySize)) {
-
+ for (icmp6 = (mib2_ipv6IfIcmpEntry_t *)item->valp;
+ (char *)icmp6 < (char *)item->valp + item->length;
+ icmp6 = (void *)((char *)icmp6 +
+ ipv6IfIcmpEntrySize)) {
if (icmp6->ipv6IfIcmpIfIndex == 0) {
/*
* The "unknown interface" icmp6
@@ -1952,19 +1941,10 @@ stat_report(mib_item_t *item)
sum_icmp6_stats(icmp6, &sum6);
continue; /* 'for' loop 2b: */
}
- ifnamep = if_indextoname(
- icmp6->ipv6IfIcmpIfIndex, ifname);
- if (ifnamep == NULL) {
- (void) printf(
- "Invalid ifindex %d\n",
- icmp6->ipv6IfIcmpIfIndex);
- continue; /* 'for' loop 2b: */
- }
-
if (Aflag) {
- (void) printf(
- "\nICMPv6 for %s\n",
- ifnamep);
+ (void) printf("\nICMPv6 for %s\n",
+ ifindex2str(
+ icmp6->ipv6IfIcmpIfIndex, ifname));
print_icmp6_stats(icmp6);
}
sum_icmp6_stats(icmp6, &sum6);
@@ -2369,51 +2349,49 @@ print_mrt_stats(struct mrtstat *mrts)
{
(void) puts("DVMRP multicast routing:");
(void) printf(" %10u hit%s - kernel forwarding cache hits\n",
- mrts->mrts_mfc_hits, PLURAL(mrts->mrts_mfc_hits));
+ mrts->mrts_mfc_hits, PLURAL(mrts->mrts_mfc_hits));
(void) printf(" %10u miss%s - kernel forwarding cache misses\n",
- mrts->mrts_mfc_misses, PLURALES(mrts->mrts_mfc_misses));
+ mrts->mrts_mfc_misses, PLURALES(mrts->mrts_mfc_misses));
(void) printf(" %10u packet%s potentially forwarded\n",
- mrts->mrts_fwd_in, PLURAL(mrts->mrts_fwd_in));
+ mrts->mrts_fwd_in, PLURAL(mrts->mrts_fwd_in));
(void) printf(" %10u packet%s actually sent out\n",
- mrts->mrts_fwd_out, PLURAL(mrts->mrts_fwd_out));
+ mrts->mrts_fwd_out, PLURAL(mrts->mrts_fwd_out));
(void) printf(" %10u upcall%s - upcalls made to mrouted\n",
- mrts->mrts_upcalls, PLURAL(mrts->mrts_upcalls));
+ mrts->mrts_upcalls, PLURAL(mrts->mrts_upcalls));
(void) printf(" %10u packet%s not sent out due to lack of resources\n",
- mrts->mrts_fwd_drop, PLURAL(mrts->mrts_fwd_drop));
+ mrts->mrts_fwd_drop, PLURAL(mrts->mrts_fwd_drop));
(void) printf(" %10u datagram%s with malformed tunnel options\n",
- mrts->mrts_bad_tunnel, PLURAL(mrts->mrts_bad_tunnel));
+ mrts->mrts_bad_tunnel, PLURAL(mrts->mrts_bad_tunnel));
(void) printf(" %10u datagram%s with no room for tunnel options\n",
- mrts->mrts_cant_tunnel, PLURAL(mrts->mrts_cant_tunnel));
+ mrts->mrts_cant_tunnel, PLURAL(mrts->mrts_cant_tunnel));
(void) printf(" %10u datagram%s arrived on wrong interface\n",
- mrts->mrts_wrong_if, PLURAL(mrts->mrts_wrong_if));
+ mrts->mrts_wrong_if, PLURAL(mrts->mrts_wrong_if));
(void) printf(" %10u datagram%s dropped due to upcall Q overflow\n",
- mrts->mrts_upq_ovflw, PLURAL(mrts->mrts_upq_ovflw));
+ mrts->mrts_upq_ovflw, PLURAL(mrts->mrts_upq_ovflw));
(void) printf(" %10u datagram%s cleaned up by the cache\n",
- mrts->mrts_cache_cleanups, PLURAL(mrts->mrts_cache_cleanups));
+ mrts->mrts_cache_cleanups, PLURAL(mrts->mrts_cache_cleanups));
(void) printf(" %10u datagram%s dropped selectively by ratelimiter\n",
- mrts->mrts_drop_sel, PLURAL(mrts->mrts_drop_sel));
+ mrts->mrts_drop_sel, PLURAL(mrts->mrts_drop_sel));
(void) printf(" %10u datagram%s dropped - bucket Q overflow\n",
- mrts->mrts_q_overflow, PLURAL(mrts->mrts_q_overflow));
+ mrts->mrts_q_overflow, PLURAL(mrts->mrts_q_overflow));
(void) printf(" %10u datagram%s dropped - larger than bkt size\n",
- mrts->mrts_pkt2large, PLURAL(mrts->mrts_pkt2large));
+ mrts->mrts_pkt2large, PLURAL(mrts->mrts_pkt2large));
(void) printf("\nPIM multicast routing:\n");
(void) printf(" %10u datagram%s dropped - bad version number\n",
- mrts->mrts_pim_badversion, PLURAL(mrts->mrts_pim_badversion));
+ mrts->mrts_pim_badversion, PLURAL(mrts->mrts_pim_badversion));
(void) printf(" %10u datagram%s dropped - bad checksum\n",
- mrts->mrts_pim_rcv_badcsum, PLURAL(mrts->mrts_pim_rcv_badcsum));
+ mrts->mrts_pim_rcv_badcsum, PLURAL(mrts->mrts_pim_rcv_badcsum));
(void) printf(" %10u datagram%s dropped - bad register packets\n",
- mrts->mrts_pim_badregisters,
- PLURAL(mrts->mrts_pim_badregisters));
+ mrts->mrts_pim_badregisters, PLURAL(mrts->mrts_pim_badregisters));
(void) printf(
- " %10u datagram%s potentially forwarded - register packets\n",
- mrts->mrts_pim_regforwards, PLURAL(mrts->mrts_pim_regforwards));
+ " %10u datagram%s potentially forwarded - register packets\n",
+ mrts->mrts_pim_regforwards, PLURAL(mrts->mrts_pim_regforwards));
(void) printf(" %10u datagram%s dropped - register send drops\n",
- mrts->mrts_pim_regsend_drops,
- PLURAL(mrts->mrts_pim_regsend_drops));
+ mrts->mrts_pim_regsend_drops, PLURAL(mrts->mrts_pim_regsend_drops));
(void) printf(" %10u datagram%s dropped - packet malformed\n",
- mrts->mrts_pim_malformed, PLURAL(mrts->mrts_pim_malformed));
+ mrts->mrts_pim_malformed, PLURAL(mrts->mrts_pim_malformed));
(void) printf(" %10u datagram%s dropped - no memory to forward\n",
- mrts->mrts_pim_nomemory, PLURAL(mrts->mrts_pim_nomemory));
+ mrts->mrts_pim_nomemory, PLURAL(mrts->mrts_pim_nomemory));
}
static void
@@ -2674,7 +2652,7 @@ if_report(mib_item_t *item, char *matchname,
"Ierrs", "Opkts", "Oerrs",
"Collis", "Queue");
- first = B_FALSE;
+ first = B_FALSE;
}
if_report_ip4(ap, ifname,
logintname, &stat, B_TRUE);
@@ -2717,7 +2695,7 @@ if_report(mib_item_t *item, char *matchname,
+ item->length;
ap++) {
(void) octetstr(&ap->ipAdEntIfIndex,
- 'a', ifname, sizeof (ifname));
+ 'a', ifname, sizeof (ifname));
(void) strtok(ifname, ":");
if (matchname) {
@@ -3387,7 +3365,7 @@ dhcp_walk_interfaces(uint_t flags_on, uint_t flags_off, int af,
*/
(void) memset(&lifn, 0, sizeof (lifn));
lifn.lifn_family = af;
- lifn.lifn_flags = LIFC_ALLZONES | LIFC_NOXMIT;
+ lifn.lifn_flags = LIFC_ALLZONES | LIFC_NOXMIT | LIFC_UNDER_IPMP;
if (ioctl(sock_fd, SIOCGLIFNUM, &lifn) == -1)
n_ifs = LIFN_GUARD_VALUE;
else
@@ -3471,7 +3449,6 @@ group_report(mib_item_t *item)
ip_grpsrc_t *ips;
ipv6_member_t *ipmp6;
ipv6_grpsrc_t *ips6;
- char *ifnamep;
boolean_t first, first_src;
/* 'for' loop 1: */
@@ -3604,7 +3581,7 @@ group_report(mib_item_t *item)
(char *)ipmp6 < (char *)v6grp->valp + v6grp->length;
/* LINTED: (note 1) */
ipmp6 = (ipv6_member_t *)((char *)ipmp6 +
- ipv6MemberEntrySize)) {
+ ipv6MemberEntrySize)) {
if (first) {
(void) puts("Group Memberships: "
"IPv6");
@@ -3615,15 +3592,8 @@ group_report(mib_item_t *item)
first = B_FALSE;
}
- ifnamep = if_indextoname(
- ipmp6->ipv6GroupMemberIfIndex, ifname);
- if (ifnamep == NULL) {
- (void) printf("Invalid ifindex %d\n",
- ipmp6->ipv6GroupMemberIfIndex);
- continue;
- }
(void) printf("%-5s %-27s %5u\n",
- ifnamep,
+ ifindex2str(ipmp6->ipv6GroupMemberIfIndex, ifname),
pr_addr6(&ipmp6->ipv6GroupMemberAddress,
abuf, sizeof (abuf)),
ipmp6->ipv6GroupMemberRefCnt);
@@ -3784,7 +3754,6 @@ ndp_report(mib_item_t *item)
char xbuf[STR_EXPAND * OCTET_LENGTH + 1];
mib2_ipv6NetToMediaEntry_t *np6;
char ifname[LIFNAMSIZ + 1];
- char *ifnamep;
boolean_t first;
if (!(family_selected(AF_INET6)))
@@ -3820,13 +3789,6 @@ ndp_report(mib_item_t *item)
first = B_FALSE;
}
- ifnamep = if_indextoname(np6->ipv6NetToMediaIfIndex,
- ifname);
- if (ifnamep == NULL) {
- (void) printf("Invalid ifindex %d\n",
- np6->ipv6NetToMediaIfIndex);
- continue; /* 'for' loop 2 */
- }
switch (np6->ipv6NetToMediaState) {
case ND_INCOMPLETE:
state = "INCOMPLETE";
@@ -3865,7 +3827,7 @@ ndp_report(mib_item_t *item)
break;
}
(void) printf("%-5s %-17s %-7s %-12s %-27s\n",
- ifnamep,
+ ifindex2str(np6->ipv6NetToMediaIfIndex, ifname),
octetstr(&np6->ipv6NetToMediaPhysAddress, 'h',
xbuf, sizeof (xbuf)),
type,
@@ -4472,7 +4434,7 @@ ire_report_item_v6(const mib2_ipv6RouteEntry_t *rp6, boolean_t first,
(void) printf("%-27s %-27s %-5s %5u%c %5u %3u "
"%-5s %6u %6u %s\n",
pr_prefix6(&rp6->ipv6RouteDest,
- rp6->ipv6RoutePfxLength, dstbuf, sizeof (dstbuf)),
+ rp6->ipv6RoutePfxLength, dstbuf, sizeof (dstbuf)),
IN6_IS_ADDR_UNSPECIFIED(&rp6->ipv6RouteNextHop) ?
" --" :
pr_addr6(&rp6->ipv6RouteNextHop, gwbuf, sizeof (gwbuf)),
@@ -4489,7 +4451,7 @@ ire_report_item_v6(const mib2_ipv6RouteEntry_t *rp6, boolean_t first,
} else {
(void) printf("%-27s %-27s %-5s %3u %7u %-5s %s\n",
pr_prefix6(&rp6->ipv6RouteDest,
- rp6->ipv6RoutePfxLength, dstbuf, sizeof (dstbuf)),
+ rp6->ipv6RoutePfxLength, dstbuf, sizeof (dstbuf)),
IN6_IS_ADDR_UNSPECIFIED(&rp6->ipv6RouteNextHop) ?
" --" :
pr_addr6(&rp6->ipv6RouteNextHop, gwbuf, sizeof (gwbuf)),
@@ -4690,9 +4652,9 @@ tcp_report_item_v4(const mib2_tcpConnEntry_t *tp, boolean_t first,
(void) printf("%-20s\n%-20s %5u %08x %08x %5u %08x %08x "
"%5u %5u %s\n",
pr_ap(tp->tcpConnLocalAddress,
- tp->tcpConnLocalPort, "tcp", lname, sizeof (lname)),
+ tp->tcpConnLocalPort, "tcp", lname, sizeof (lname)),
pr_ap(tp->tcpConnRemAddress,
- tp->tcpConnRemPort, "tcp", fname, sizeof (fname)),
+ tp->tcpConnRemPort, "tcp", fname, sizeof (fname)),
tp->tcpConnEntryInfo.ce_swnd,
tp->tcpConnEntryInfo.ce_snxt,
tp->tcpConnEntryInfo.ce_suna,
@@ -4710,9 +4672,9 @@ tcp_report_item_v4(const mib2_tcpConnEntry_t *tp, boolean_t first,
(void) printf("%-20s %-20s %5u %6d %5u %6d %s\n",
pr_ap(tp->tcpConnLocalAddress,
- tp->tcpConnLocalPort, "tcp", lname, sizeof (lname)),
+ tp->tcpConnLocalPort, "tcp", lname, sizeof (lname)),
pr_ap(tp->tcpConnRemAddress,
- tp->tcpConnRemPort, "tcp", fname, sizeof (fname)),
+ tp->tcpConnRemPort, "tcp", fname, sizeof (fname)),
tp->tcpConnEntryInfo.ce_swnd,
(sq >= 0) ? sq : 0,
tp->tcpConnEntryInfo.ce_rwnd,
@@ -4756,9 +4718,9 @@ tcp_report_item_v6(const mib2_tcp6ConnEntry_t *tp6, boolean_t first,
(void) printf("%-33s\n%-33s %5u %08x %08x %5u %08x %08x "
"%5u %5u %-11s %s\n",
pr_ap6(&tp6->tcp6ConnLocalAddress,
- tp6->tcp6ConnLocalPort, "tcp", lname, sizeof (lname)),
+ tp6->tcp6ConnLocalPort, "tcp", lname, sizeof (lname)),
pr_ap6(&tp6->tcp6ConnRemAddress,
- tp6->tcp6ConnRemPort, "tcp", fname, sizeof (fname)),
+ tp6->tcp6ConnRemPort, "tcp", fname, sizeof (fname)),
tp6->tcp6ConnEntryInfo.ce_swnd,
tp6->tcp6ConnEntryInfo.ce_snxt,
tp6->tcp6ConnEntryInfo.ce_suna,
@@ -4777,9 +4739,9 @@ tcp_report_item_v6(const mib2_tcp6ConnEntry_t *tp6, boolean_t first,
(void) printf("%-33s %-33s %5u %6d %5u %6d %-11s %s\n",
pr_ap6(&tp6->tcp6ConnLocalAddress,
- tp6->tcp6ConnLocalPort, "tcp", lname, sizeof (lname)),
+ tp6->tcp6ConnLocalPort, "tcp", lname, sizeof (lname)),
pr_ap6(&tp6->tcp6ConnRemAddress,
- tp6->tcp6ConnRemPort, "tcp", fname, sizeof (fname)),
+ tp6->tcp6ConnRemPort, "tcp", fname, sizeof (fname)),
tp6->tcp6ConnEntryInfo.ce_swnd,
(sq >= 0) ? sq : 0,
tp6->tcp6ConnEntryInfo.ce_rwnd,
@@ -5112,7 +5074,7 @@ sctp_pr_addr(int type, char *name, int namelen, const in6_addr_t *addr,
* displaying.
*/
switch (type) {
- case MIB2_SCTP_ADDR_V4:
+ case MIB2_SCTP_ADDR_V4:
/* v4 */
v6addr = *addr;
@@ -5124,7 +5086,7 @@ sctp_pr_addr(int type, char *name, int namelen, const in6_addr_t *addr,
}
break;
- case MIB2_SCTP_ADDR_V6:
+ case MIB2_SCTP_ADDR_V6:
/* v6 */
if (port > 0) {
(void) pr_ap6(addr, port, "sctp", name, namelen);
@@ -5133,7 +5095,7 @@ sctp_pr_addr(int type, char *name, int namelen, const in6_addr_t *addr,
}
break;
- default:
+ default:
(void) snprintf(name, namelen, "<unknown addr type>");
break;
}
@@ -5379,7 +5341,7 @@ mrt_report(mib_item_t *item)
case EXPER_DVMRP_MRT:
if (Dflag)
(void) printf("%u records for ipMfcTable:\n",
- item->length/sizeof (struct vifctl));
+ item->length/sizeof (struct vifctl));
if (item->length/sizeof (struct vifctl) == 0) {
(void) puts("\nMulticast Forwarding Cache is "
"empty");
@@ -5402,10 +5364,10 @@ mrt_report(mib_item_t *item)
abuf, sizeof (abuf)));
(void) printf("%-15.15s %6s %3u ",
pr_net(mfccp->mfcc_mcastgrp.s_addr,
- mfccp->mfcc_mcastgrp.s_addr,
- abuf, sizeof (abuf)),
+ mfccp->mfcc_mcastgrp.s_addr,
+ abuf, sizeof (abuf)),
pktscale((int)mfccp->mfcc_pkt_cnt),
- mfccp->mfcc_parent);
+ mfccp->mfcc_parent);
for (vifi = 0; vifi < MAXVIFS; ++vifi) {
if (mfccp->mfcc_ttls[vifi]) {
@@ -5468,7 +5430,7 @@ kmem_cache_stats(char *title, char *name, int prefix, int64_t *total_bytes)
strncmp(ksp->ks_name, "streams_dblk", 12) == 0) {
(void) safe_kstat_read(kc, ksp, NULL);
total_buf_inuse -=
- kstat_named_value(ksp, "buf_constructed");
+ kstat_named_value(ksp, "buf_constructed");
continue; /* 'for' loop 1 */
}
@@ -5501,7 +5463,7 @@ kmem_cache_stats(char *title, char *name, int prefix, int64_t *total_bytes)
if (buf_size == 0) {
(void) printf("%-22s [couldn't find statistics for %s]\n",
- title, name);
+ title, name);
return;
}
@@ -5511,7 +5473,7 @@ kmem_cache_stats(char *title, char *name, int prefix, int64_t *total_bytes)
(void) snprintf(buf, sizeof (buf), "%s", title);
(void) printf("%-22s %6d %9d %11lld %11d\n", buf,
- total_buf_inuse, total_buf_max, total_alloc, total_alloc_fail);
+ total_buf_inuse, total_buf_max, total_alloc, total_alloc_fail);
}
static void
@@ -5534,7 +5496,7 @@ m_report(void)
kmem_cache_stats("qband", "qband_cache", 0, &total_bytes);
(void) printf("\n%lld Kbytes allocated for streams data\n",
- total_bytes / 1024);
+ total_bytes / 1024);
(void) putchar('\n');
(void) fflush(stdout);
@@ -5967,7 +5929,7 @@ portname(uint_t port, char *proto, char *dst, uint_t dstlen)
sp = getservbyport(htons(port), proto);
if (sp || port == 0)
(void) snprintf(dst, dstlen, "%.*s", MAXHOSTNAMELEN,
- sp ? sp->s_name : "*");
+ sp ? sp->s_name : "*");
else
(void) snprintf(dst, dstlen, "%d", port);
dst[dstlen - 1] = 0;
@@ -6161,8 +6123,8 @@ process_filter(char *arg)
*/
if (hp->h_addr_list[0] != NULL &&
/* LINTED: (note 1) */
- IN6_IS_ADDR_V4MAPPED((in6_addr_t
- *)hp->h_addr_list[0])) {
+ IN6_IS_ADDR_V4MAPPED((in6_addr_t *)
+ hp->h_addr_list[0])) {
maxv = IP_ABITS;
} else {
maxv = IPV6_ABITS;
@@ -6226,6 +6188,21 @@ family_selected(int family)
}
/*
+ * Convert the interface index to a string using the buffer `ifname', which
+ * must be at least LIFNAMSIZ bytes. We first try to map it to name. If that
+ * fails (e.g., because we're inside a zone and it does not have access to
+ * interface for the index in question), just return "if#<num>".
+ */
+static char *
+ifindex2str(uint_t ifindex, char *ifname)
+{
+ if (if_indextoname(ifindex, ifname) == NULL)
+ (void) snprintf(ifname, LIFNAMSIZ, "if#%d", ifindex);
+
+ return (ifname);
+}
+
+/*
* print the usage line
*/
static void
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile
index f0c4c03250..f3ce9fae4b 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/Makefile
@@ -19,51 +19,58 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
-
-PROG = in.mpathd
-OBJS = mpd_tables.o mpd_main.o mpd_probe.o
-SRCS = $(OBJS:%.o=%.c)
-DEFAULTFILES = mpathd.dfl
+PROG = in.mpathd
+ROOTFS_PROG = $(PROG)
+OBJS = mpd_tables.o mpd_main.o mpd_probe.o
+SRCS = $(OBJS:%.o=%.c)
+DEFAULTFILES = mpathd.dfl
include ../../../Makefile.cmd
-POFILE = $(PROG).po
-POFILES = $(SRCS:%.c=%.po)
+ROOTCMDDIR = $(ROOT)/lib/inet
+
+POFILE = $(PROG).po
+POFILES = $(SRCS:%.c=%.po)
-C99MODE= $(C99_ENABLE)
+C99MODE = $(C99_ENABLE)
#
# We need access to the ancillary data features which are only available
# via the SUS standards. Further, C99 support requires SUSv3 or higher.
#
CPPFLAGS += -D_XOPEN_SOURCE=600 -D__EXTENSIONS__
-LDLIBS += -lsocket -lnsl -lsysevent -lnvpair -lipmp -lc
+LDLIBS += -lsocket -lnsl -lsysevent -lnvpair -lipmp -linetutil -ldlpi
+LINTFLAGS += -erroff=E_INCONS_ARG_DECL2 -erroff=E_INCONS_ARG_USED2
-LINTFLAGS += -erroff=E_FUNC_DECL_VAR_ARG2 -erroff=E_INCONS_VAL_TYPE_DECL2 \
- -erroff=E_FUNC_USED_VAR_ARG2 -erroff=E_INCONS_ARG_DECL2 \
- -erroff=E_NAME_USED_NOT_DEF2 -erroff=E_INCONS_ARG_USED2 \
- -errtags=yes
+#
+# Instrument in.mpathd with CTF data to ease debugging.
+#
+CTFCONVERT_HOOK = && $(CTFCONVERT_O)
+CTFMERGE_HOOK = && $(CTFMERGE) -L VERSION -o $@ $(OBJS)
+$(OBJS) := CFLAGS += $(CTF_FLAGS)
.KEEP_STATE:
all: $(PROG)
$(PROG): $(OBJS)
- $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS) $(CTFMERGE_HOOK)
$(POST_PROCESS)
include ../Makefile.lib
+$(ROOTLIBINETPROG):
+ $(RM) $@; $(SYMLINK) ../../../lib/inet/$(PROG) $@
+
$(ROOTSBINPROG):
- $(RM) $@; $(SYMLINK) ../usr/lib/inet/$(PROG) $@
+ $(RM) $@; $(SYMLINK) ../lib/inet/$(PROG) $@
-install: all $(ROOTLIBINETPROG) $(ROOTSBINPROG) $(ROOTETCDEFAULTFILES)
+install: all $(ROOTLIBINETPROG) $(ROOTSBINPROG) $(ROOTCMD) \
+ $(ROOTETCDEFAULTFILES)
clean:
$(RM) $(OBJS)
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h
index 9b07e2a7a3..e7cb096bf7 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_defs.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _MPD_DEFS_H
#define _MPD_DEFS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -78,12 +76,13 @@ extern "C" {
#include <locale.h>
#include <deflt.h>
+#include <libdlpi.h>
+#include <libinetutil.h>
#include <libnvpair.h>
#include <libsysevent.h>
#include <sys/sysevent.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/ipmp.h>
-#include <zone.h>
#include <ipmp_mpathd.h>
#include <ipmp_query_impl.h>
@@ -92,7 +91,7 @@ extern "C" {
/* Debug flags */
#define D_ALL 0xffff /* enable all debug */
#define D_PROBE 0x0001 /* probe mechanism */
-#define D_FAILOVER 0x0002 /* failover mechanism */
+#define D_FAILREP 0x0002 /* failure/repair mechanism */
#define D_PHYINT 0x0004 /* phyint table */
#define D_LOGINT 0x0008 /* logint table */
#define D_TARGET 0x0010 /* target table */
@@ -199,10 +198,8 @@ extern int user_failure_detection_time; /* User specified fdt */
extern int ifsock_v4; /* IPv4 socket for ioctls */
extern int ifsock_v6; /* IPv6 socket for ioctls */
-extern boolean_t full_scan_required; /* Do full scans */
-
extern int debug; /* debug option */
-
+extern boolean_t cleanup_started; /* true if we're shutting down */
extern boolean_t handle_link_notifications;
/*
@@ -212,6 +209,7 @@ extern void timer_schedule(uint_t delay);
extern void logmsg(int pri, const char *fmt, ...);
extern void logperror(const char *str);
extern int poll_add(int fd);
+extern int poll_remove(int fd);
extern uint64_t getcurrentsec(void);
extern uint_t getcurrenttime(void);
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c
index aa6a99fb9c..e1e22e12d4 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "mpd_defs.h"
#include "mpd_tables.h"
@@ -46,7 +44,6 @@ static int lsock_v6; /* Listen socket to detect mpathd */
static int mibfd = -1; /* fd to get mib info */
static boolean_t force_mcast = _B_FALSE; /* Only for test purposes */
-boolean_t full_scan_required = _B_FALSE;
static uint_t last_initifs_time; /* Time when initifs was last run */
static char **argv0; /* Saved for re-exec on SIGHUP */
boolean_t handle_link_notifications = _B_TRUE;
@@ -58,10 +55,6 @@ static void check_if_removed(struct phyint_instance *pii);
static void select_test_ifs(void);
static void ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len);
static void ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len);
-static void router_add_v4(mib2_ipRouteEntry_t *rp1,
- struct in_addr nexthop_v4);
-static void router_add_v6(mib2_ipv6RouteEntry_t *rp1,
- struct in6_addr nexthop_v6);
static void router_add_common(int af, char *ifname,
struct in6_addr nexthop);
static void init_router_targets();
@@ -74,17 +67,17 @@ static void check_addr_unique(struct phyint_instance *,
static void init_host_targets(void);
static void dup_host_targets(struct phyint_instance *desired_pii);
static void loopback_cmd(int sock, int family);
-static int poll_remove(int fd);
static boolean_t daemonize(void);
static int closefunc(void *, int);
static unsigned int process_cmd(int newfd, union mi_commands *mpi);
static unsigned int process_query(int fd, mi_query_t *miq);
+static unsigned int send_addrinfo(int fd, ipmp_addrinfo_t *adinfop);
static unsigned int send_groupinfo(int fd, ipmp_groupinfo_t *grinfop);
static unsigned int send_grouplist(int fd, ipmp_grouplist_t *grlistp);
static unsigned int send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop);
static unsigned int send_result(int fd, unsigned int error, int syserror);
-struct local_addr *laddr_list = NULL;
+addrlist_t *localaddrs;
/*
* Return the current time in milliseconds (from an arbitrary reference)
@@ -153,7 +146,7 @@ retry:
/*
* Remove fd from the set being polled. Returns 0 if ok; -1 if failed.
*/
-static int
+int
poll_remove(int fd)
{
int i;
@@ -205,17 +198,11 @@ pii_process(int af, char *name, struct phyint_instance **pii_p)
break;
case PI_GROUP_CHANGED:
- /*
- * The phyint has changed group.
- */
- restore_phyint(pii->pii_phyint);
- /* FALLTHRU */
-
case PI_IFINDEX_CHANGED:
/*
- * Interface index has changed. Delete and
- * recreate the phyint as it is quite likely
- * the interface has been unplumbed and replumbed.
+ * Interface index or group membership has changed.
+ * Delete the old state and recreate based on the new
+ * state (it may no longer be in a group).
*/
pii_other = phyint_inst_other(pii);
if (pii_other != NULL)
@@ -249,51 +236,26 @@ pii_process(int af, char *name, struct phyint_instance **pii_p)
}
/*
- * This phyint is leaving the group. Try to restore the phyint to its
- * initial state. Return the addresses that belong to other group members,
- * to the group, and take back any addresses owned by this phyint
- */
-void
-restore_phyint(struct phyint *pi)
-{
- if (pi->pi_group == phyint_anongroup)
- return;
-
- /*
- * Move everthing to some other member in the group.
- * The phyint has changed group in the kernel. But we
- * have yet to do it in our tables.
- */
- if (!pi->pi_empty)
- (void) try_failover(pi, FAILOVER_TO_ANY);
- /*
- * Move all addresses owned by 'pi' back to pi, from each
- * of the other members of the group
- */
- (void) try_failback(pi);
-}
-
-/*
* Scan all interfaces to detect changes as well as new and deleted interfaces
*/
static void
initifs()
{
- int n;
+ int i, nlifr;
int af;
char *cp;
char *buf;
- int numifs;
+ int sockfd;
+ uint64_t flags;
struct lifnum lifn;
struct lifconf lifc;
+ struct lifreq lifreq;
struct lifreq *lifr;
struct logint *li;
struct phyint_instance *pii;
struct phyint_instance *next_pii;
- char pi_name[LIFNAMSIZ + 1];
- boolean_t exists;
- struct phyint *pi;
- struct local_addr *next;
+ struct phyint_group *pg, *next_pg;
+ char pi_name[LIFNAMSIZ + 1];
if (debug & D_PHYINT)
logdebug("initifs: Scanning interfaces\n");
@@ -301,13 +263,9 @@ initifs()
last_initifs_time = getcurrenttime();
/*
- * Free the laddr_list before collecting the local addresses.
+ * Free the existing local address list; we'll build a new list below.
*/
- while (laddr_list != NULL) {
- next = laddr_list->next;
- free(laddr_list);
- laddr_list = next;
- }
+ addrlist_free(&localaddrs);
/*
* Mark the interfaces so that we can find phyints and logints
@@ -326,122 +284,142 @@ initifs()
}
}
+ /*
+ * As above, mark groups so that we can detect IPMP interfaces which
+ * have been removed from the kernel. Also, delete the group address
+ * list since we'll iteratively recreate it below.
+ */
+ for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
+ pg->pg_in_use = _B_FALSE;
+ addrlist_free(&pg->pg_addrs);
+ }
+
lifn.lifn_family = AF_UNSPEC;
- lifn.lifn_flags = LIFC_ALLZONES;
+ lifn.lifn_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
+again:
if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
- logperror("initifs: ioctl (get interface numbers)");
+ logperror("initifs: ioctl (get interface count)");
return;
}
- numifs = lifn.lifn_count;
+ /*
+ * Pad the interface count to detect when additional interfaces have
+ * been configured between SIOCGLIFNUM and SIOCGLIFCONF.
+ */
+ lifn.lifn_count += 4;
- buf = (char *)calloc(numifs, sizeof (struct lifreq));
- if (buf == NULL) {
+ if ((buf = calloc(lifn.lifn_count, sizeof (struct lifreq))) == NULL) {
logperror("initifs: calloc");
return;
}
lifc.lifc_family = AF_UNSPEC;
- lifc.lifc_flags = LIFC_ALLZONES;
- lifc.lifc_len = numifs * sizeof (struct lifreq);
+ lifc.lifc_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
+ lifc.lifc_len = lifn.lifn_count * sizeof (struct lifreq);
lifc.lifc_buf = buf;
if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
- /*
- * EINVAL is commonly encountered, when things change
- * underneath us rapidly, (eg. at boot, when new interfaces
- * are plumbed successively) and the kernel finds the buffer
- * size we passed as too small. We will retry again
- * when we see the next routing socket msg, or at worst after
- * IF_SCAN_INTERVAL ms.
- */
- if (errno != EINVAL) {
- logperror("initifs: ioctl"
- " (get interface configuration)");
- }
+ logperror("initifs: ioctl (get interface configuration)");
free(buf);
return;
}
- lifr = (struct lifreq *)lifc.lifc_req;
-
/*
- * For each lifreq returned by SIOGGLIFCONF, call pii_process()
- * and get the state of the corresponding phyint_instance. If it is
- * successful, then call logint_init_from_k() to get the state of the
- * logint.
+ * If every lifr_req slot is taken, then additional interfaces must
+ * have been plumbed between the SIOCGLIFNUM and the SIOCGLIFCONF.
+ * Recalculate to make sure we didn't miss any interfaces.
*/
- for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifr++) {
- int sockfd;
- struct local_addr *taddr;
- struct sockaddr_in *sin;
- struct sockaddr_in6 *sin6;
- struct lifreq lifreq;
+ nlifr = lifc.lifc_len / sizeof (struct lifreq);
+ if (nlifr >= lifn.lifn_count) {
+ free(buf);
+ goto again;
+ }
+ /*
+ * Walk through the lifreqs returned by SIOGGLIFCONF, and refresh the
+ * global list of addresses, phyint groups, phyints, and logints.
+ */
+ for (lifr = lifc.lifc_req, i = 0; i < nlifr; i++, lifr++) {
af = lifr->lifr_addr.ss_family;
-
- /*
- * Collect all local addresses.
- */
sockfd = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
- (void) memset(&lifreq, 0, sizeof (lifreq));
- (void) strlcpy(lifreq.lifr_name, lifr->lifr_name,
- sizeof (lifreq.lifr_name));
+ (void) strlcpy(lifreq.lifr_name, lifr->lifr_name, LIFNAMSIZ);
if (ioctl(sockfd, SIOCGLIFFLAGS, &lifreq) == -1) {
if (errno != ENXIO)
logperror("initifs: ioctl (SIOCGLIFFLAGS)");
continue;
}
+ flags = lifreq.lifr_flags;
+
+ /*
+ * If the address is IFF_UP, add it to the local address list.
+ * (We ignore addresses that aren't IFF_UP since another node
+ * might legitimately have that address IFF_UP.)
+ */
+ if (flags & IFF_UP) {
+ (void) addrlist_add(&localaddrs, lifr->lifr_name, flags,
+ &lifr->lifr_addr);
+ }
/*
- * Add the interface address to laddr_list.
- * Another node might have the same IP address which is up.
- * In that case, it is appropriate to use the address as a
- * target, even though it is also configured (but not up) on
- * the local system.
- * Hence,the interface address is not added to laddr_list
- * unless it is IFF_UP.
+ * If this address is on an IPMP meta-interface, update our
+ * phyint_group information (either by recording that group
+ * still exists or creating a new group), and track what
+ * group the address is part of.
*/
- if (lifreq.lifr_flags & IFF_UP) {
- taddr = malloc(sizeof (struct local_addr));
- if (taddr == NULL) {
- logperror("initifs: malloc");
+ if (flags & IFF_IPMP) {
+ if (ioctl(sockfd, SIOCGLIFGROUPNAME, &lifreq) == -1) {
+ if (errno != ENXIO)
+ logperror("initifs: ioctl "
+ "(SIOCGLIFGROUPNAME)");
continue;
}
- if (af == AF_INET) {
- sin = (struct sockaddr_in *)&lifr->lifr_addr;
- IN6_INADDR_TO_V4MAPPED(&sin->sin_addr,
- &taddr->addr);
- } else {
- sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
- taddr->addr = sin6->sin6_addr;
+
+ pg = phyint_group_lookup(lifreq.lifr_groupname);
+ if (pg == NULL) {
+ pg = phyint_group_create(lifreq.lifr_groupname);
+ if (pg == NULL) {
+ logerr("initifs: cannot create group "
+ "%s\n", lifreq.lifr_groupname);
+ continue;
+ }
+ phyint_group_insert(pg);
+ }
+ pg->pg_in_use = _B_TRUE;
+
+ /*
+ * Add this to the group's list of data addresses.
+ */
+ if (!addrlist_add(&pg->pg_addrs, lifr->lifr_name, flags,
+ &lifr->lifr_addr)) {
+ logerr("initifs: insufficient memory to track "
+ "data address information for %s\n",
+ lifr->lifr_name);
}
- taddr->next = laddr_list;
- laddr_list = taddr;
+ continue;
}
/*
- * Need to pass a phyint name to pii_process. Insert the
- * null where the ':' IF_SEPARATOR is found in the logical
- * name.
+ * This isn't an address on an IPMP meta-interface, so it's
+ * either on an underlying interface or not related to any
+ * group. Update our phyint and logint information (via
+ * pii_process() and logint_init_from_k()) -- but first,
+ * convert the logint name to a phyint name so we can call
+ * pii_process().
*/
(void) strlcpy(pi_name, lifr->lifr_name, sizeof (pi_name));
if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
*cp = '\0';
- exists = pii_process(af, pi_name, &pii);
- if (exists) {
+ if (pii_process(af, pi_name, &pii)) {
/* The phyint is fine. So process the logint */
logint_init_from_k(pii, lifr->lifr_name);
check_addr_unique(pii, &lifr->lifr_addr);
}
-
}
-
free(buf);
/*
- * Scan for phyints and logints that have disappeared from the
+ * Scan for groups, phyints and logints that have disappeared from the
* kernel, and delete them.
*/
for (pii = phyint_instances; pii != NULL; pii = next_pii) {
@@ -449,70 +427,31 @@ initifs()
check_if_removed(pii);
}
+ for (pg = phyint_groups; pg != NULL; pg = next_pg) {
+ next_pg = pg->pg_next;
+ if (!pg->pg_in_use) {
+ phyint_group_delete(pg);
+ continue;
+ }
+ /*
+ * Refresh the group's state. This is necessary since the
+ * group's state is defined by the set of usable interfaces in
+ * the group, and an interface is considered unusable if all
+ * of its addresses are down. When an address goes down/up,
+ * the RTM_DELADDR/RTM_NEWADDR brings us through here.
+ */
+ phyint_group_refresh_state(pg);
+ }
+
/*
* Select a test address for sending probes on each phyint instance
*/
select_test_ifs();
/*
- * Handle link up/down notifications from the NICs.
+ * Handle link up/down notifications.
*/
process_link_state_changes();
-
- for (pi = phyints; pi != NULL; pi = pi->pi_next) {
- /*
- * If this is a case of group failure, we don't have much
- * to do until the group recovers again.
- */
- if (GROUP_FAILED(pi->pi_group))
- continue;
-
- /*
- * Try/Retry any pending failovers / failbacks, that did not
- * not complete, or that could not be initiated previously.
- * This implements the 3 invariants described in the big block
- * comment at the beginning of probe.c
- */
- if (pi->pi_flags & IFF_INACTIVE) {
- if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY))
- (void) try_failover(pi, FAILOVER_TO_NONSTANDBY);
- } else {
- struct phyint_instance *pii;
-
- /*
- * Skip LINK UP interfaces which are not capable
- * of probing.
- */
- pii = pi->pi_v4;
- if (pii == NULL ||
- (LINK_UP(pi) && !PROBE_CAPABLE(pii))) {
- pii = pi->pi_v6;
- if (pii == NULL ||
- (LINK_UP(pi) && !PROBE_CAPABLE(pii)))
- continue;
- }
-
- /*
- * It is possible that the phyint has started
- * receiving packets, after it has been marked
- * PI_FAILED. Don't initiate failover, if the
- * phyint has started recovering. failure_state()
- * captures this check. A similar logic is used
- * for failback/repair case.
- */
- if (pi->pi_state == PI_FAILED && !pi->pi_empty &&
- (failure_state(pii) == PHYINT_FAILURE)) {
- (void) try_failover(pi, FAILOVER_NORMAL);
- } else if (pi->pi_state == PI_RUNNING && !pi->pi_full) {
- if (try_failback(pi) != IPMP_FAILURE) {
- (void) change_lif_flags(pi, IFF_FAILED,
- _B_FALSE);
- /* Per state diagram */
- pi->pi_empty = 0;
- }
- }
- }
- }
}
/*
@@ -569,7 +508,7 @@ check_addr_unique(struct phyint_instance *ourpii, struct sockaddr_storage *ss)
* The probe socket is closed on each interface instance, and the
* interface state set to PI_OFFLINE.
*/
-static void
+void
stop_probing(struct phyint *pi)
{
struct phyint_instance *pii;
@@ -631,7 +570,6 @@ select_test_ifs(void)
struct logint *li;
struct logint *probe_logint;
boolean_t target_scan_reqd = _B_FALSE;
- struct target *tg;
int rating;
if (debug & D_PHYINT)
@@ -645,8 +583,8 @@ select_test_ifs(void)
probe_logint = NULL;
/*
- * An interface that is offline, should not be probed.
- * Offline interfaces should always in PI_OFFLINE state,
+ * An interface that is offline should not be probed.
+ * IFF_OFFLINE interfaces should always be PI_OFFLINE
* unless some other entity has set the offline flag.
*/
if (pii->pii_phyint->pi_flags & IFF_OFFLINE) {
@@ -659,6 +597,15 @@ select_test_ifs(void)
stop_probing(pii->pii_phyint);
}
continue;
+ } else {
+ /*
+ * If something cleared IFF_OFFLINE (e.g., by accident
+ * because the SIOCGLIFFLAGS/SIOCSLIFFLAGS sequence is
+ * inherently racy), the phyint may still be offline.
+ * Just ignore it.
+ */
+ if (pii->pii_phyint->pi_state == PI_OFFLINE)
+ continue;
}
li = pii->pii_probe_logint;
@@ -776,17 +723,6 @@ select_test_ifs(void)
phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
}
- if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) {
- tg = pii->pii_targets;
- if (tg != NULL)
- target_delete(tg);
- assert(pii->pii_targets == NULL);
- assert(pii->pii_target_next == NULL);
- assert(pii->pii_ntargets == 0);
- target_create(pii, probe_logint->li_dstaddr,
- _B_TRUE);
- }
-
/*
* If no targets are currently known for this phyint
* we need to call init_router_targets. Since
@@ -806,15 +742,16 @@ select_test_ifs(void)
}
/*
- * Check the interface list for any interfaces that are marked
- * PI_FAILED but no longer enabled to send probes, and call
- * phyint_check_for_repair() to see if the link now indicates that the
- * interface should be repaired. Also see the state diagram in
+ * Scan the interface list for any interfaces that are PI_FAILED or
+ * PI_NOTARGETS but no longer enabled to send probes, and call
+ * phyint_check_for_repair() to see if the link state indicates that
+ * the interface should be repaired. Also see the state diagram in
* mpd_probe.c.
*/
for (pi = phyints; pi != NULL; pi = pi->pi_next) {
- if (pi->pi_state == PI_FAILED &&
- !PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) {
+ if ((!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) &&
+ (pi->pi_state == PI_FAILED ||
+ pi->pi_state == PI_NOTARGETS)) {
phyint_check_for_repair(pi);
}
}
@@ -875,15 +812,14 @@ check_testconfig(void)
pi->pi_v6->pii_probe_logint->li_dupaddr)
li = pi->pi_v6->pii_probe_logint;
- if (li != NULL) {
- if (!pi->pi_duptaddrmsg_printed) {
- (void) pr_addr(li->li_phyint_inst->pii_af,
- li->li_addr, abuf, sizeof (abuf));
- logerr("Test address %s is not unique in "
- "group; disabling probe-based failure "
- "detection on %s\n", abuf, pi->pi_name);
- pi->pi_duptaddrmsg_printed = 1;
- }
+ if (li != NULL && li->li_dupaddr) {
+ if (pi->pi_duptaddrmsg_printed)
+ continue;
+ logerr("Test address %s is not unique in group; "
+ "disabling probe-based failure detection on %s\n",
+ pr_addr(li->li_phyint_inst->pii_af,
+ li->li_addr, abuf, sizeof (abuf)), pi->pi_name);
+ pi->pi_duptaddrmsg_printed = 1;
continue;
}
@@ -915,10 +851,10 @@ check_config(void)
boolean_t v6_in_group;
/*
- * All phyints of a group must be homogenous to ensure that
- * failover or failback can be done. If any phyint in a group
- * has IPv4 plumbed, check that all phyints have IPv4 plumbed.
- * Do a similar check for IPv6.
+ * All phyints of a group must be homogeneous to ensure that they can
+ * take over for one another. If any phyint in a group has IPv4
+ * plumbed, check that all phyints have IPv4 plumbed. Do a similar
+ * check for IPv6.
*/
for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
if (pg == phyint_anongroup)
@@ -949,9 +885,9 @@ check_config(void)
if (v4_in_group == _B_TRUE && pi->pi_v4 == NULL) {
if (!pi->pi_cfgmsg_printed) {
- logerr("NIC %s of group %s is"
- " not plumbed for IPv4 and may"
- " affect failover capability\n",
+ logerr("IP interface %s in group %s is"
+ " not plumbed for IPv4, affecting"
+ " IPv4 connectivity\n",
pi->pi_name,
pi->pi_group->pg_name);
pi->pi_cfgmsg_printed = 1;
@@ -959,9 +895,9 @@ check_config(void)
} else if (v6_in_group == _B_TRUE &&
pi->pi_v6 == NULL) {
if (!pi->pi_cfgmsg_printed) {
- logerr("NIC %s of group %s is"
- " not plumbed for IPv6 and may"
- " affect failover capability\n",
+ logerr("IP interface %s in group %s is"
+ " not plumbed for IPv6, affecting"
+ " IPv6 connectivity\n",
pi->pi_name,
pi->pi_group->pg_name);
pi->pi_cfgmsg_printed = 1;
@@ -974,10 +910,10 @@ check_config(void)
* error recovery message
*/
if (pi->pi_cfgmsg_printed) {
- logerr("NIC %s is now consistent with "
- "group %s and failover capability "
- "is restored\n", pi->pi_name,
- pi->pi_group->pg_name);
+ logerr("IP interface %s is now"
+ " consistent with group %s "
+ " and connectivity is restored\n",
+ pi->pi_name, pi->pi_group->pg_name);
pi->pi_cfgmsg_printed = 0;
}
}
@@ -1117,8 +1053,8 @@ run_timeouts(void)
static int eventpipe_read = -1; /* Used for synchronous signal delivery */
static int eventpipe_write = -1;
-static boolean_t cleanup_started = _B_FALSE;
- /* Don't write to eventpipe if in cleanup */
+boolean_t cleanup_started = _B_FALSE; /* true if we're going away */
+
/*
* Ensure that signals are processed synchronously with the rest of
* the code by just writing a one character signal number on the pipe.
@@ -1228,7 +1164,7 @@ in_signal(int fd)
"Number of probes sent %lld\n"
"Number of probe acks received %lld\n"
"Number of probes/acks lost %lld\n"
- "Number of valid unacknowled probes %lld\n"
+ "Number of valid unacknowledged probes %lld\n"
"Number of ambiguous probe acks received %lld\n",
AF_STR(pii->pii_af), pii->pii_name,
sent, acked, lost, unacked, unknown);
@@ -1321,12 +1257,20 @@ setup_rtsock(int af)
{
int s;
int flags;
+ int aware = RTAW_UNDER_IPMP;
s = socket(PF_ROUTE, SOCK_RAW, af);
if (s == -1) {
logperror("setup_rtsock: socket PF_ROUTE");
exit(1);
}
+
+ if (setsockopt(s, SOL_ROUTE, RT_AWARE, &aware, sizeof (aware)) == -1) {
+ logperror("setup_rtsock: setsockopt RT_AWARE");
+ (void) close(s);
+ exit(1);
+ }
+
if ((flags = fcntl(s, F_GETFL, 0)) < 0) {
logperror("setup_rtsock: fcntl F_GETFL");
(void) close(s);
@@ -1347,8 +1291,7 @@ setup_rtsock(int af)
/*
* Process an RTM_IFINFO message received on a routing socket.
* The return value indicates whether a full interface scan is required.
- * Link up/down notifications from the NICs are reflected in the
- * IFF_RUNNING flag.
+ * Link up/down notifications are reflected in the IFF_RUNNING flag.
* If just the state of the IFF_RUNNING interface flag has changed, a
* a full interface scan isn't required.
*/
@@ -1400,7 +1343,7 @@ process_rtm_ifinfo(if_msghdr_t *ifm, int type)
/*
* We want to try and avoid doing a full interface scan for
- * link state notifications from the NICs, as indicated
+ * link state notifications from the datalink layer, as indicated
* by the state of the IFF_RUNNING flag. If just the
* IFF_RUNNING flag has changed state, the link state changes
* are processed without a full scan.
@@ -1441,25 +1384,7 @@ process_rtm_ifinfo(if_msghdr_t *ifm, int type)
* types.
*/
if ((old_flags ^ pii->pii_flags) & IFF_STANDBY)
- phyint_newtype(pi);
-
- /*
- * If IFF_INACTIVE has been set, then no data addresses should be
- * hosted on the interface. If IFF_INACTIVE has been cleared, then
- * move previously failed-over addresses back to it, provided it is
- * not failed. For details, see the state diagram in mpd_probe.c.
- */
- if ((old_flags ^ pii->pii_flags) & IFF_INACTIVE) {
- if (pii->pii_flags & IFF_INACTIVE) {
- if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY))
- (void) try_failover(pi, FAILOVER_TO_NONSTANDBY);
- } else {
- if (pi->pi_state == PI_RUNNING && !pi->pi_full) {
- pi->pi_empty = 0;
- (void) try_failback(pi);
- }
- }
- }
+ phyint_changed(pi);
/* Has just the IFF_RUNNING flag changed state ? */
if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) {
@@ -1620,22 +1545,24 @@ update_router_list(int fd)
t_scalar_t prim;
tor = (struct T_optmgmt_req *)&buf;
-
tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
tor->OPT_offset = sizeof (struct T_optmgmt_req);
tor->OPT_length = sizeof (struct opthdr);
tor->MGMT_flags = T_CURRENT;
+ /*
+ * Note: we use the special level value below so that IP will return
+ * us information concerning IRE_MARK_TESTHIDDEN routes.
+ */
req = (struct opthdr *)&tor[1];
- req->level = MIB2_IP; /* any MIB2_xxx value ok here */
+ req->level = EXPER_IP_AND_TESTHIDDEN;
req->name = 0;
req->len = 0;
ctlbuf.buf = (char *)&buf;
ctlbuf.len = tor->OPT_length + tor->OPT_offset;
ctlbuf.maxlen = sizeof (buf);
- flags = 0;
- if (putmsg(fd, &ctlbuf, NULL, flags) == -1) {
+ if (putmsg(fd, &ctlbuf, NULL, 0) == -1) {
logperror("update_router_list: putmsg(ctl)");
return (_B_FALSE);
}
@@ -1689,7 +1616,8 @@ update_router_list(int fd)
case T_OPTMGMT_ACK:
toa = &buf.uprim.optmgmt_ack;
optp = (struct opthdr *)&toa[1];
- if (ctlbuf.len < sizeof (struct T_optmgmt_ack)) {
+ if (ctlbuf.len < (sizeof (struct T_optmgmt_ack) +
+ sizeof (struct opthdr))) {
logerr("update_router_list: ctlbuf.len %d\n",
ctlbuf.len);
return (_B_FALSE);
@@ -1707,7 +1635,7 @@ update_router_list(int fd)
return (_B_FALSE);
}
- /* Process the T_OPGMGMT_ACK below */
+ /* Process the T_OPTMGMT_ACK below */
assert(prim == T_OPTMGMT_ACK);
switch (status) {
@@ -1717,9 +1645,8 @@ update_router_list(int fd)
* message. If this is the last message i.e EOD,
* return, else process the next T_OPTMGMT_ACK msg.
*/
- if ((ctlbuf.len == sizeof (struct T_optmgmt_ack) +
- sizeof (struct opthdr)) && optp->len == 0 &&
- optp->name == 0 && optp->level == 0) {
+ if (optp->len == 0 && optp->name == 0 &&
+ optp->level == 0) {
/*
* This is the EOD message. Return
*/
@@ -1747,17 +1674,14 @@ update_router_list(int fd)
databuf.len = 0;
flags = 0;
for (;;) {
- status = getmsg(fd, NULL, &databuf, &flags);
- if (status >= 0) {
+ if (getmsg(fd, NULL, &databuf, &flags) >= 0)
break;
- } else if (errno == EINTR) {
+ if (errno == EINTR)
continue;
- } else {
- logperror("update_router_list:"
- " getmsg(data)");
- free(databuf.buf);
- return (_B_FALSE);
- }
+
+ logperror("update_router_list: getmsg(data)");
+ free(databuf.buf);
+ return (_B_FALSE);
}
if (optp->level == MIB2_IP &&
@@ -1777,18 +1701,35 @@ update_router_list(int fd)
/* NOTREACHED */
}
+
+/*
+ * Convert octet `octp' to a phyint name and store in `ifname'
+ */
+static void
+oct2ifname(const Octet_t *octp, char *ifname, size_t ifsize)
+{
+ char *cp;
+ size_t len = MIN(octp->o_length, ifsize - 1);
+
+ (void) strncpy(ifname, octp->o_bytes, len);
+ ifname[len] = '\0';
+
+ if ((cp = strchr(ifname, IF_SEPARATOR)) != NULL)
+ *cp = '\0';
+}
+
/*
- * Examine the IPv4 routing table, for default routers. For each default
- * router, populate the list of targets of each phyint that is on the same
- * link as the default router
+ * Examine the IPv4 routing table `buf' for possible targets. For each
+ * possible target, if it's on the same subnet an interface route, pass
+ * it to router_add_common() for further consideration.
*/
static void
ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len)
{
- mib2_ipRouteEntry_t *rp;
- mib2_ipRouteEntry_t *rp1;
- struct in_addr nexthop_v4;
- mib2_ipRouteEntry_t *endp;
+ char ifname[LIFNAMSIZ];
+ mib2_ipRouteEntry_t *rp, *rp1, *endp;
+ struct in_addr nexthop_v4;
+ struct in6_addr nexthop;
if (len == 0)
return;
@@ -1797,75 +1738,40 @@ ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len)
endp = buf + (len / sizeof (mib2_ipRouteEntry_t));
/*
- * Loop thru the routing table entries. Process any IRE_DEFAULT,
- * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others.
- * For each such IRE_OFFSUBNET ire, get the nexthop gateway address.
- * This is a potential target for probing, which we try to add
- * to the list of probe targets.
+ * Scan the routing table entries for any IRE_OFFSUBNET entries, and
+ * cross-reference them with the interface routes to determine if
+ * they're possible probe targets.
*/
for (rp = buf; rp < endp; rp++) {
if (!(rp->ipRouteInfo.re_ire_type & IRE_OFFSUBNET))
continue;
- /* Get the nexthop address. */
+ /* Get the nexthop address. */
nexthop_v4.s_addr = rp->ipRouteNextHop;
/*
- * Get the nexthop address. Then determine the outgoing
- * interface, by examining all interface IREs, and picking the
- * match. We don't look at the interface specified in the route
- * because we need to add the router target on all matching
- * interfaces anyway; the goal is to avoid falling back to
- * multicast when some interfaces are in the same subnet but
- * not in the same group.
+ * Rescan the routing table looking for interface routes that
+ * are on the same subnet, and try to add them. If they're
+ * not relevant (e.g., the interface route isn't part of an
+ * IPMP group, router_add_common() will discard).
*/
for (rp1 = buf; rp1 < endp; rp1++) {
- if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE)) {
+ if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE) ||
+ rp1->ipRouteIfIndex.o_length == 0)
continue;
- }
- /*
- * Determine the interface IRE that matches the nexthop.
- * i.e. (IRE addr & IRE mask) == (nexthop & IRE mask)
- */
- if ((rp1->ipRouteDest & rp1->ipRouteMask) ==
- (nexthop_v4.s_addr & rp1->ipRouteMask)) {
- /*
- * We found the interface ire
- */
- router_add_v4(rp1, nexthop_v4);
- }
+ if ((rp1->ipRouteDest & rp1->ipRouteMask) !=
+ (nexthop_v4.s_addr & rp1->ipRouteMask))
+ continue;
+
+ oct2ifname(&rp1->ipRouteIfIndex, ifname, LIFNAMSIZ);
+ IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop);
+ router_add_common(AF_INET, ifname, nexthop);
}
}
}
void
-router_add_v4(mib2_ipRouteEntry_t *rp1, struct in_addr nexthop_v4)
-{
- char *cp;
- char ifname[LIFNAMSIZ + 1];
- struct in6_addr nexthop;
- int len;
-
- if (debug & D_TARGET)
- logdebug("router_add_v4()\n");
-
- len = MIN(rp1->ipRouteIfIndex.o_length, sizeof (ifname) - 1);
- (void) memcpy(ifname, rp1->ipRouteIfIndex.o_bytes, len);
- ifname[len] = '\0';
-
- if (ifname[0] == '\0')
- return;
-
- cp = strchr(ifname, IF_SEPARATOR);
- if (cp != NULL)
- *cp = '\0';
-
- IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop);
- router_add_common(AF_INET, ifname, nexthop);
-}
-
-void
router_add_common(int af, char *ifname, struct in6_addr nexthop)
{
struct phyint_instance *pii;
@@ -1906,16 +1812,17 @@ router_add_common(int af, char *ifname, struct in6_addr nexthop)
}
/*
- * Examine the IPv6 routing table, for default routers. For each default
- * router, populate the list of targets of each phyint that is on the same
- * link as the default router
+ * Examine the IPv6 routing table `buf' for possible link-local targets, and
+ * pass any contenders to router_add_common() for further consideration.
*/
static void
ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len)
{
- mib2_ipv6RouteEntry_t *rp;
- mib2_ipv6RouteEntry_t *endp;
- struct in6_addr nexthop_v6;
+ struct lifreq lifr;
+ char ifname[LIFNAMSIZ];
+ char grname[LIFGRNAMSIZ];
+ mib2_ipv6RouteEntry_t *rp, *rp1, *endp;
+ struct in6_addr nexthop_v6;
if (debug & D_TARGET)
logdebug("ire_process_v6(len %d)\n", len);
@@ -1927,62 +1834,51 @@ ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len)
endp = buf + (len / sizeof (mib2_ipv6RouteEntry_t));
/*
- * Loop thru the routing table entries. Process any IRE_DEFAULT,
- * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others.
- * For each such IRE_OFFSUBNET ire, get the nexthop gateway address.
- * This is a potential target for probing, which we try to add
- * to the list of probe targets.
+ * Scan the routing table entries for any IRE_OFFSUBNET entries, and
+ * cross-reference them with the interface routes to determine if
+ * they're possible probe targets.
*/
for (rp = buf; rp < endp; rp++) {
- if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET))
+ if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET) ||
+ !IN6_IS_ADDR_LINKLOCAL(&rp->ipv6RouteNextHop))
continue;
- /*
- * We have the outgoing interface in ipv6RouteIfIndex
- * if ipv6RouteIfindex.o_length is non-zero. The outgoing
- * interface must be present for link-local addresses. Since
- * we use only link-local addreses for probing, we don't
- * consider the case when the outgoing interface is not
- * known and we need to scan interface ires
- */
+ /* Get the nexthop address. */
nexthop_v6 = rp->ipv6RouteNextHop;
- if (rp->ipv6RouteIfIndex.o_length != 0) {
- /*
- * We already have the outgoing interface
- * in ipv6RouteIfIndex.
- */
- router_add_v6(rp, nexthop_v6);
- }
- }
-}
-
-void
-router_add_v6(mib2_ipv6RouteEntry_t *rp1, struct in6_addr nexthop_v6)
-{
- char ifname[LIFNAMSIZ + 1];
- char *cp;
- int len;
-
- if (debug & D_TARGET)
- logdebug("router_add_v6()\n");
-
- len = MIN(rp1->ipv6RouteIfIndex.o_length, sizeof (ifname) - 1);
- (void) memcpy(ifname, rp1->ipv6RouteIfIndex.o_bytes, len);
- ifname[len] = '\0';
+ /*
+ * The interface name should always exist for link-locals;
+ * we use it to map this entry to an IPMP group name.
+ */
+ if (rp->ipv6RouteIfIndex.o_length == 0)
+ continue;
- if (ifname[0] == '\0')
- return;
+ oct2ifname(&rp->ipv6RouteIfIndex, lifr.lifr_name, LIFNAMSIZ);
+ if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) == -1 ||
+ strlcpy(grname, lifr.lifr_groupname, LIFGRNAMSIZ) == 0) {
+ continue;
+ }
- cp = strchr(ifname, IF_SEPARATOR);
- if (cp != NULL)
- *cp = '\0';
+ /*
+ * Rescan the list of routes for interface routes, and add the
+ * above target to any interfaces in the same IPMP group.
+ */
+ for (rp1 = buf; rp1 < endp; rp1++) {
+ if (!(rp1->ipv6RouteInfo.re_ire_type & IRE_INTERFACE) ||
+ rp1->ipv6RouteIfIndex.o_length == 0) {
+ continue;
+ }
+ oct2ifname(&rp1->ipv6RouteIfIndex, ifname, LIFNAMSIZ);
+ (void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
- router_add_common(AF_INET6, ifname, nexthop_v6);
+ if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) != -1 &&
+ strcmp(lifr.lifr_groupname, grname) == 0) {
+ router_add_common(AF_INET6, ifname, nexthop_v6);
+ }
+ }
+ }
}
-
-
/*
* Build a list of target routers, by scanning the routing tables.
* It is assumed that interface routes exist, to reach the routers.
@@ -2001,11 +1897,9 @@ init_router_targets(void)
for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
pi = pii->pii_phyint;
/*
- * Exclude ptp and host targets. Set tg_in_use to false,
- * only for router targets.
+ * Set tg_in_use to false only for router targets.
*/
- if (!pii->pii_targets_are_routers ||
- (pi->pi_flags & IFF_POINTOPOINT))
+ if (!pii->pii_targets_are_routers)
continue;
for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next)
@@ -2026,15 +1920,21 @@ init_router_targets(void)
}
for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
- if (!pii->pii_targets_are_routers ||
- (pi->pi_flags & IFF_POINTOPOINT))
+ pi = pii->pii_phyint;
+ if (!pii->pii_targets_are_routers)
continue;
for (tg = pii->pii_targets; tg != NULL; tg = next_tg) {
next_tg = tg->tg_next;
- if (!tg->tg_in_use) {
+ /*
+ * If the group has failed, it's likely the route was
+ * removed by an application affected by that failure.
+ * In that case, we keep the target so that we can
+ * reliably repair, at which point we'll refresh the
+ * target list again.
+ */
+ if (!tg->tg_in_use && !GROUP_FAILED(pi->pi_group))
target_delete(tg);
- }
}
}
}
@@ -2140,7 +2040,7 @@ getdefault(char *name)
* Command line options below
*/
boolean_t failback_enabled = _B_TRUE; /* failback enabled/disabled */
-boolean_t track_all_phyints = _B_FALSE; /* option to track all NICs */
+boolean_t track_all_phyints = _B_FALSE; /* track all IP interfaces */
static boolean_t adopt = _B_FALSE;
static boolean_t foreground = _B_FALSE;
@@ -2149,6 +2049,7 @@ main(int argc, char *argv[])
{
int i;
int c;
+ struct phyint *pi;
struct phyint_instance *pii;
char *value;
@@ -2173,14 +2074,15 @@ main(int argc, char *argv[])
if (user_failure_detection_time <= 0) {
user_failure_detection_time = FAILURE_DETECTION_TIME;
logerr("Invalid failure detection time %s, assuming "
- "default %d\n", value, user_failure_detection_time);
+ "default of %d ms\n", value,
+ user_failure_detection_time);
} else if (user_failure_detection_time <
MIN_FAILURE_DETECTION_TIME) {
user_failure_detection_time =
MIN_FAILURE_DETECTION_TIME;
logerr("Too small failure detection time of %s, "
- "assuming minimum %d\n", value,
+ "assuming minimum of %d ms\n", value,
user_failure_detection_time);
}
free(value);
@@ -2211,9 +2113,9 @@ main(int argc, char *argv[])
*/
value = getdefault("FAILBACK");
if (value != NULL) {
- if (strncasecmp(value, "yes", 3) == 0)
+ if (strcasecmp(value, "yes") == 0)
failback_enabled = _B_TRUE;
- else if (strncasecmp(value, "no", 2) == 0)
+ else if (strcasecmp(value, "no") == 0)
failback_enabled = _B_FALSE;
else
logerr("Invalid value for FAILBACK %s\n", value);
@@ -2229,9 +2131,9 @@ main(int argc, char *argv[])
*/
value = getdefault("TRACK_INTERFACES_ONLY_WITH_GROUPS");
if (value != NULL) {
- if (strncasecmp(value, "yes", 3) == 0)
+ if (strcasecmp(value, "yes") == 0)
track_all_phyints = _B_FALSE;
- else if (strncasecmp(value, "no", 2) == 0)
+ else if (strcasecmp(value, "no") == 0)
track_all_phyints = _B_TRUE;
else
logerr("Invalid value for "
@@ -2340,12 +2242,6 @@ main(int argc, char *argv[])
initifs();
- /* Inform kernel whether failback is enabled or disabled */
- if (ioctl(ifsock_v4, SIOCSIPMPFAILBACK, (int *)&failback_enabled) < 0) {
- logperror("main: ioctl (SIOCSIPMPFAILBACK)");
- exit(1);
- }
-
/*
* If we're operating in "adopt" mode and no interfaces need to be
* tracked, shut down (ifconfig(1M) will restart us on demand if
@@ -2379,6 +2275,7 @@ main(int argc, char *argv[])
process_rtsock(rtsock_v4, rtsock_v6);
break;
}
+
for (pii = phyint_instances; pii != NULL;
pii = pii->pii_next) {
if (pollfds[i].fd == pii->pii_probe_sock) {
@@ -2389,15 +2286,21 @@ main(int argc, char *argv[])
break;
}
}
+
+ for (pi = phyints; pi != NULL; pi = pi->pi_next) {
+ if (pi->pi_notes != 0 &&
+ pollfds[i].fd == dlpi_fd(pi->pi_dh)) {
+ (void) dlpi_recv(pi->pi_dh, NULL, NULL,
+ NULL, NULL, 0, NULL);
+ break;
+ }
+ }
+
if (pollfds[i].fd == lsock_v4)
loopback_cmd(lsock_v4, AF_INET);
else if (pollfds[i].fd == lsock_v6)
loopback_cmd(lsock_v6, AF_INET6);
}
- if (full_scan_required) {
- initifs();
- full_scan_required = _B_FALSE;
- }
}
/* NOTREACHED */
return (EXIT_SUCCESS);
@@ -2481,29 +2384,23 @@ static struct {
{ "MI_PING", sizeof (uint32_t) },
{ "MI_OFFLINE", sizeof (mi_offline_t) },
{ "MI_UNDO_OFFLINE", sizeof (mi_undo_offline_t) },
- { "MI_SETOINDEX", sizeof (mi_setoindex_t) },
{ "MI_QUERY", sizeof (mi_query_t) }
};
/*
- * Commands received over the loopback interface come here. Currently
- * the agents that send commands are ifconfig, if_mpadm and the RCM IPMP
- * module. ifconfig only makes a connection, and closes it to check if
- * in.mpathd is running.
- * if_mpadm sends commands in the format specified by the mpathd_interface
- * structure.
+ * Commands received over the loopback interface come here (via libipmp).
*/
static void
loopback_cmd(int sock, int family)
{
int newfd;
ssize_t len;
+ boolean_t is_priv = _B_FALSE;
struct sockaddr_storage peer;
struct sockaddr_in *peer_sin;
struct sockaddr_in6 *peer_sin6;
socklen_t peerlen;
union mi_commands mpi;
- struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
char abuf[INET6_ADDRSTRLEN];
uint_t cmd;
int retval;
@@ -2528,10 +2425,11 @@ loopback_cmd(int sock, int family)
return;
}
peer_sin = (struct sockaddr_in *)&peer;
- if ((ntohs(peer_sin->sin_port) >= IPPORT_RESERVED) ||
- (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK)) {
- (void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr,
- abuf, sizeof (abuf));
+ is_priv = ntohs(peer_sin->sin_port) < IPPORT_RESERVED;
+ (void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr,
+ abuf, sizeof (abuf));
+
+ if (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK) {
logerr("Attempt to connect from addr %s port %d\n",
abuf, ntohs(peer_sin->sin_port));
(void) close(newfd);
@@ -2551,11 +2449,10 @@ loopback_cmd(int sock, int family)
* talking to us.
*/
peer_sin6 = (struct sockaddr_in6 *)&peer;
- if ((ntohs(peer_sin6->sin6_port) >= IPPORT_RESERVED) ||
- (!IN6_ARE_ADDR_EQUAL(&peer_sin6->sin6_addr,
- &loopback_addr))) {
- (void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf,
- sizeof (abuf));
+ is_priv = ntohs(peer_sin6->sin6_port) < IPPORT_RESERVED;
+ (void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf,
+ sizeof (abuf));
+ if (!IN6_IS_ADDR_LOOPBACK(&peer_sin6->sin6_addr)) {
logerr("Attempt to connect from addr %s port %d\n",
abuf, ntohs(peer_sin6->sin6_port));
(void) close(newfd);
@@ -2575,15 +2472,6 @@ loopback_cmd(int sock, int family)
len = read(newfd, &mpi, sizeof (mpi));
/*
- * ifconfig does not send any data. Just tests to see if mpathd
- * is already running.
- */
- if (len <= 0) {
- (void) close(newfd);
- return;
- }
-
- /*
* In theory, we can receive any sized message for a stream socket,
* but we don't expect that to happen for a small message over a
* loopback connection.
@@ -2591,6 +2479,8 @@ loopback_cmd(int sock, int family)
if (len < sizeof (uint32_t)) {
logerr("loopback_cmd: bad command format or read returns "
"partial data %d\n", len);
+ (void) close(newfd);
+ return;
}
cmd = mpi.mi_command;
@@ -2600,6 +2490,16 @@ loopback_cmd(int sock, int family)
return;
}
+ /*
+ * Only MI_PING and MI_QUERY can come from unprivileged sources.
+ */
+ if (!is_priv && (cmd != MI_QUERY && cmd != MI_PING)) {
+ logerr("Unprivileged request from %s for privileged "
+ "command %s\n", abuf, commands[cmd].name);
+ (void) close(newfd);
+ return;
+ }
+
if (len < commands[cmd].size) {
logerr("loopback_cmd: short %s command (expected %d, got %d)\n",
commands[cmd].name, commands[cmd].size, len);
@@ -2615,179 +2515,46 @@ loopback_cmd(int sock, int family)
(void) close(newfd);
}
-extern int global_errno; /* set by failover() or failback() */
-
/*
- * Process the offline, undo offline and set original index commands,
- * received from if_mpadm(1M)
+ * Process the commands received via libipmp.
*/
static unsigned int
process_cmd(int newfd, union mi_commands *mpi)
{
- uint_t nif = 0;
- uint32_t cmd;
struct phyint *pi;
- struct phyint *pi2;
- struct phyint_group *pg;
- boolean_t success;
- int error;
struct mi_offline *mio;
struct mi_undo_offline *miu;
- struct lifreq lifr;
- int ifsock;
- struct mi_setoindex *mis;
+ unsigned int retval;
- cmd = mpi->mi_command;
+ switch (mpi->mi_command) {
+ case MI_PING:
+ return (send_result(newfd, IPMP_SUCCESS, 0));
- switch (cmd) {
case MI_OFFLINE:
mio = &mpi->mi_ocmd;
- /*
- * Lookup the interface that needs to be offlined.
- * If it does not exist, return a suitable error.
- */
+
pi = phyint_lookup(mio->mio_ifname);
if (pi == NULL)
- return (send_result(newfd, IPMP_FAILURE, EINVAL));
-
- /*
- * Verify that the minimum redundancy requirements are met.
- * The multipathing group must have at least the specified
- * number of functional interfaces after offlining the
- * requested interface. Otherwise return a suitable error.
- */
- pg = pi->pi_group;
- nif = 0;
- if (pg != phyint_anongroup) {
- for (nif = 0, pi2 = pg->pg_phyint; pi2 != NULL;
- pi2 = pi2->pi_pgnext) {
- if ((pi2->pi_state == PI_RUNNING) ||
- (pg->pg_groupfailed &&
- !(pi2->pi_flags & IFF_OFFLINE)))
- nif++;
- }
- }
- if (nif < mio->mio_min_redundancy)
- return (send_result(newfd, IPMP_EMINRED, 0));
+ return (send_result(newfd, IPMP_EUNKIF, 0));
- /*
- * The order of operation is to set IFF_OFFLINE, followed by
- * failover. Setting IFF_OFFLINE ensures that no new ipif's
- * can be created. Subsequent failover moves everything on
- * the OFFLINE interface to some other functional interface.
- */
- success = change_lif_flags(pi, IFF_OFFLINE, _B_TRUE);
- if (success) {
- if (!pi->pi_empty) {
- error = try_failover(pi, FAILOVER_NORMAL);
- if (error != 0) {
- if (!change_lif_flags(pi, IFF_OFFLINE,
- _B_FALSE)) {
- logerr("process_cmd: couldn't"
- " clear OFFLINE flag on"
- " %s\n", pi->pi_name);
- /*
- * Offline interfaces should
- * not be probed.
- */
- stop_probing(pi);
- }
- return (send_result(newfd, error,
- global_errno));
- }
- }
- } else {
+ retval = phyint_offline(pi, mio->mio_min_redundancy);
+ if (retval == IPMP_FAILURE)
return (send_result(newfd, IPMP_FAILURE, errno));
- }
- /*
- * The interface is now Offline, so stop probing it.
- * Note that if_mpadm(1M) will down the test addresses,
- * after receiving a success reply from us. The routing
- * socket message will then make us close the socket used
- * for sending probes. But it is more logical that an
- * offlined interface must not be probed, even if it has
- * test addresses.
- */
- stop_probing(pi);
- return (send_result(newfd, IPMP_SUCCESS, 0));
+ return (send_result(newfd, retval, 0));
case MI_UNDO_OFFLINE:
miu = &mpi->mi_ucmd;
- /*
- * Undo the offline command. As usual lookup the interface.
- * Send an error if it does not exist or is not offline.
- */
- pi = phyint_lookup(miu->miu_ifname);
- if (pi == NULL || pi->pi_state != PI_OFFLINE)
- return (send_result(newfd, IPMP_FAILURE, EINVAL));
-
- /*
- * Reset the state of the interface based on the current link
- * state; if this phyint subsequently acquires a test address,
- * the state will be updated later as a result of the probes.
- */
- if (LINK_UP(pi))
- phyint_chstate(pi, PI_RUNNING);
- else
- phyint_chstate(pi, PI_FAILED);
-
- if (pi->pi_state == PI_RUNNING) {
- /*
- * Note that the success of MI_UNDO_OFFLINE is not
- * contingent on actually failing back; in the odd
- * case where we cannot do it here, we will try again
- * in initifs() since pi->pi_full will still be zero.
- */
- if (do_failback(pi) != IPMP_SUCCESS) {
- logdebug("process_cmd: cannot failback from "
- "%s during MI_UNDO_OFFLINE\n", pi->pi_name);
- }
- }
-
- /*
- * Clear the IFF_OFFLINE flag. We have to do this last
- * because do_failback() relies on it being set to decide
- * when to display messages.
- */
- (void) change_lif_flags(pi, IFF_OFFLINE, _B_FALSE);
-
- /*
- * Give the requestor time to configure test addresses
- * before complaining that they're missing.
- */
- pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME;
-
- return (send_result(newfd, IPMP_SUCCESS, 0));
-
- case MI_SETOINDEX:
- mis = &mpi->mi_scmd;
- /* Get the socket for doing ioctls */
- ifsock = (mis->mis_iftype == AF_INET) ? ifsock_v4 : ifsock_v6;
-
- /*
- * Get index of new original interface.
- * The index is returned in lifr.lifr_index.
- */
- (void) strlcpy(lifr.lifr_name, mis->mis_new_pifname,
- sizeof (lifr.lifr_name));
+ pi = phyint_lookup(miu->miu_ifname);
+ if (pi == NULL)
+ return (send_result(newfd, IPMP_EUNKIF, 0));
- if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0)
+ retval = phyint_undo_offline(pi);
+ if (retval == IPMP_FAILURE)
return (send_result(newfd, IPMP_FAILURE, errno));
- /*
- * Set new original interface index.
- * The new index was put into lifr.lifr_index by the
- * SIOCGLIFINDEX ioctl.
- */
- (void) strlcpy(lifr.lifr_name, mis->mis_lifname,
- sizeof (lifr.lifr_name));
-
- if (ioctl(ifsock, SIOCSLIFOINDEX, (char *)&lifr) < 0)
- return (send_result(newfd, IPMP_FAILURE, errno));
-
- return (send_result(newfd, IPMP_SUCCESS, 0));
+ return (send_result(newfd, retval, 0));
case MI_QUERY:
return (process_query(newfd, &mpi->mi_qcmd));
@@ -2806,6 +2573,8 @@ process_cmd(int newfd, union mi_commands *mpi)
static unsigned int
process_query(int fd, mi_query_t *miq)
{
+ ipmp_addrinfo_t *adinfop;
+ ipmp_addrinfolist_t *adlp;
ipmp_groupinfo_t *grinfop;
ipmp_groupinfolist_t *grlp;
ipmp_grouplist_t *grlistp;
@@ -2815,6 +2584,19 @@ process_query(int fd, mi_query_t *miq)
unsigned int retval;
switch (miq->miq_inforeq) {
+ case IPMP_ADDRINFO:
+ retval = getgraddrinfo(miq->miq_grname, &miq->miq_addr,
+ &adinfop);
+ if (retval != IPMP_SUCCESS)
+ return (send_result(fd, retval, errno));
+
+ retval = send_result(fd, IPMP_SUCCESS, 0);
+ if (retval == IPMP_SUCCESS)
+ retval = send_addrinfo(fd, adinfop);
+
+ ipmp_freeaddrinfo(adinfop);
+ return (retval);
+
case IPMP_GROUPLIST:
retval = getgrouplist(&grlistp);
if (retval != IPMP_SUCCESS)
@@ -2829,7 +2611,7 @@ process_query(int fd, mi_query_t *miq)
case IPMP_GROUPINFO:
miq->miq_grname[LIFGRNAMSIZ - 1] = '\0';
- retval = getgroupinfo(miq->miq_ifname, &grinfop);
+ retval = getgroupinfo(miq->miq_grname, &grinfop);
if (retval != IPMP_SUCCESS)
return (send_result(fd, retval, errno));
@@ -2854,6 +2636,11 @@ process_query(int fd, mi_query_t *miq)
return (retval);
case IPMP_SNAP:
+ /*
+ * Before taking the snapshot, sync with the kernel.
+ */
+ initifs();
+
retval = getsnap(&snap);
if (retval != IPMP_SUCCESS)
return (send_result(fd, retval, errno));
@@ -2883,6 +2670,13 @@ process_query(int fd, mi_query_t *miq)
if (retval != IPMP_SUCCESS)
goto out;
}
+
+ adlp = snap->sn_adinfolistp;
+ for (; adlp != NULL; adlp = adlp->adl_next) {
+ retval = send_addrinfo(fd, adlp->adl_adinfop);
+ if (retval != IPMP_SUCCESS)
+ goto out;
+ }
out:
ipmp_snap_free(snap);
return (retval);
@@ -2902,14 +2696,20 @@ static unsigned int
send_groupinfo(int fd, ipmp_groupinfo_t *grinfop)
{
ipmp_iflist_t *iflistp = grinfop->gr_iflistp;
+ ipmp_addrlist_t *adlistp = grinfop->gr_adlistp;
unsigned int retval;
retval = ipmp_writetlv(fd, IPMP_GROUPINFO, sizeof (*grinfop), grinfop);
if (retval != IPMP_SUCCESS)
return (retval);
- return (ipmp_writetlv(fd, IPMP_IFLIST,
- IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp));
+ retval = ipmp_writetlv(fd, IPMP_IFLIST,
+ IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp);
+ if (retval != IPMP_SUCCESS)
+ return (retval);
+
+ return (ipmp_writetlv(fd, IPMP_ADDRLIST,
+ IPMP_ADDRLIST_SIZE(adlistp->al_naddr), adlistp));
}
/*
@@ -2919,7 +2719,31 @@ send_groupinfo(int fd, ipmp_groupinfo_t *grinfop)
static unsigned int
send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop)
{
- return (ipmp_writetlv(fd, IPMP_IFINFO, sizeof (*ifinfop), ifinfop));
+ ipmp_addrlist_t *adlist4p = ifinfop->if_targinfo4.it_targlistp;
+ ipmp_addrlist_t *adlist6p = ifinfop->if_targinfo6.it_targlistp;
+ unsigned int retval;
+
+ retval = ipmp_writetlv(fd, IPMP_IFINFO, sizeof (*ifinfop), ifinfop);
+ if (retval != IPMP_SUCCESS)
+ return (retval);
+
+ retval = ipmp_writetlv(fd, IPMP_ADDRLIST,
+ IPMP_ADDRLIST_SIZE(adlist4p->al_naddr), adlist4p);
+ if (retval != IPMP_SUCCESS)
+ return (retval);
+
+ return (ipmp_writetlv(fd, IPMP_ADDRLIST,
+ IPMP_ADDRLIST_SIZE(adlist6p->al_naddr), adlist6p));
+}
+
+/*
+ * Send the address information pointed to by `adinfop' on file descriptor
+ * `fd'. Returns an IPMP error code.
+ */
+static unsigned int
+send_addrinfo(int fd, ipmp_addrinfo_t *adinfop)
+{
+ return (ipmp_writetlv(fd, IPMP_ADDRINFO, sizeof (*adinfop), adinfop));
}
/*
@@ -3109,3 +2933,32 @@ close_probe_socket(struct phyint_instance *pii, boolean_t polled)
pii->pii_probe_sock = -1;
pii->pii_basetime_inited = 0;
}
+
+boolean_t
+addrlist_add(addrlist_t **addrsp, const char *name, uint64_t flags,
+ struct sockaddr_storage *ssp)
+{
+ addrlist_t *addrp;
+
+ if ((addrp = malloc(sizeof (addrlist_t))) == NULL)
+ return (_B_FALSE);
+
+ (void) strlcpy(addrp->al_name, name, LIFNAMSIZ);
+ addrp->al_flags = flags;
+ addrp->al_addr = *ssp;
+ addrp->al_next = *addrsp;
+ *addrsp = addrp;
+ return (_B_TRUE);
+}
+
+void
+addrlist_free(addrlist_t **addrsp)
+{
+ addrlist_t *addrp, *next_addrp;
+
+ for (addrp = *addrsp; addrp != NULL; addrp = next_addrp) {
+ next_addrp = addrp->al_next;
+ free(addrp);
+ }
+ *addrsp = NULL;
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c
index a2ff76a983..cf327fbaff 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -20,8 +20,6 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "mpd_defs.h"
#include "mpd_tables.h"
@@ -45,7 +43,7 @@ struct pr_icmp
uint16_t pr_icmp_cksum; /* checksum field */
uint16_t pr_icmp_id; /* Identification */
uint16_t pr_icmp_seq; /* sequence number */
- uint32_t pr_icmp_timestamp; /* Time stamp */
+ uint64_t pr_icmp_timestamp; /* Time stamp (in ns) */
uint32_t pr_icmp_mtype; /* Message type */
};
@@ -58,11 +56,12 @@ static struct in_addr all_nodes_mcast_v4 = { { { 0xe0, 0x0, 0x0, 0x1 } } };
static hrtime_t last_fdt_bumpup_time; /* When FDT was bumped up last */
-static void *find_ancillary(struct msghdr *msg, int cmsg_type);
-static void pi_set_crtt(struct target *tg, int m,
+static void *find_ancillary(struct msghdr *msg, int cmsg_level,
+ int cmsg_type);
+static void pi_set_crtt(struct target *tg, int64_t m,
boolean_t is_probe_uni);
static void incoming_echo_reply(struct phyint_instance *pii,
- struct pr_icmp *reply, struct in6_addr fromaddr);
+ struct pr_icmp *reply, struct in6_addr fromaddr, struct timeval *recv_tvp);
static void incoming_rtt_reply(struct phyint_instance *pii,
struct pr_icmp *reply, struct in6_addr fromaddr);
static void incoming_mcast_reply(struct phyint_instance *pii,
@@ -78,13 +77,11 @@ static void probe_success_info(struct phyint_instance *pii,
struct target *cur_tg, struct probe_success_count *psinfo);
static boolean_t phyint_repaired(struct phyint *pi);
-static int failover(struct phyint *from, struct phyint *to);
-static int failback(struct phyint *from, struct phyint *to);
-static struct phyint *get_failover_dst(struct phyint *pi, int failover_type);
-
static boolean_t highest_ack_tg(uint16_t seq, struct target *tg);
static int in_cksum(ushort_t *addr, int len);
static void reset_snxt_basetimes(void);
+static int ns2ms(int64_t ns);
+static int64_t tv2ns(struct timeval *);
/*
* CRTT - Conservative Round Trip Time Estimate
@@ -104,7 +101,7 @@ static void reset_snxt_basetimes(void);
* Phyint state diagram
*
* The state of a phyint that is capable of being probed, is completely
- * specified by the 5-tuple <pi_state, pg_groupfailed, I, pi_empty, pi_full>.
+ * specified by the 3-tuple <pi_state, pg_state, I>.
*
* A phyint starts in either PI_RUNNING or PI_FAILED, depending on the state
* of the link (according to the driver). If the phyint is also configured
@@ -117,8 +114,8 @@ static void reset_snxt_basetimes(void);
* state, which indicates that the link is apparently functional but that
* in.mpathd is unable to send probes to verify functionality (in this case,
* in.mpathd makes the optimistic assumption that the interface is working
- * correctly and thus does not perform a failover, but reports the interface
- * as IPMP_IF_UNKNOWN through the async events and query interfaces).
+ * correctly and thus does not mark the interface FAILED, but reports it as
+ * IPMP_IF_UNKNOWN through the async events and query interfaces).
*
* At any point, a phyint may be administratively marked offline via if_mpadm.
* In this case, the interface always transitions to PI_OFFLINE, regardless
@@ -131,8 +128,11 @@ static void reset_snxt_basetimes(void);
* PI_RUNNING: The failure detection logic says the phyint is good.
* PI_FAILED: The failure detection logic says the phyint has failed.
*
- * pg_groupfailed - Group failure, all interfaces in the group have failed.
- * The pi_state may be either PI_FAILED or PI_NOTARGETS.
+ * pg_state - PG_OK, PG_DEGRADED, or PG_FAILED.
+ * PG_OK: All interfaces in the group are OK.
+ * PG_DEGRADED: Some interfaces in the group are unusable.
+ * PG_FAILED: All interfaces in the group are unusable.
+ *
* In the case of router targets, we assume that the current list of
* targets obtained from the routing table, is still valid, so the
* phyint stat is PI_FAILED. In the case of host targets, we delete the
@@ -140,144 +140,46 @@ static void reset_snxt_basetimes(void);
* target list. So the phyints are in the PI_NOTARGETS state.
*
* I - value of (pi_flags & IFF_INACTIVE)
- * IFF_INACTIVE: No failovers have been done to this phyint, from
- * other phyints. This phyint is inactive. Phyint can be a Standby.
- * When failback has been disabled (FAILOVER=no configured),
- * phyint can also be a non-STANDBY. In this case IFF_INACTIVE
- * is set when phyint subsequently recovers after a failure.
- *
- * pi_empty
- * This phyint has failed over successfully to another phyint, and
- * this phyint is currently "empty". It does not host any addresses or
- * multicast membership etc. This is the state of a phyint after a
- * failover from the phyint has completed successfully and no subsequent
- * 'failover to' or 'failback to' has occurred on the phyint.
- * IP guarantees that no new logicals will be hosted nor any multicast
- * joins permitted on the phyint, since the phyint is either failed or
- * inactive. pi_empty is set implies the phyint is either failed or
- * inactive.
- *
- * pi_full
- * The phyint hosts all of its own addresses that it "owns". If the
- * phyint was previously failed or inactive, failbacks to the phyint
- * has completed successfully. i.e. No more failbacks to this phyint
- * can produce any change in system state whatsoever.
- *
- * Not all 32 possible combinations of the above 5-tuple are possible.
- * Furthermore some of the above combinations are transient. They may occur
- * only because the failover or failback did not complete successfully. The
- * failover/failback will be retried and eventually a stable state will be
- * reached.
- *
- * I is tracked by IP. pi_state, pi_empty and pi_full are tracked by mpathd.
- * The following are the state machines. 'from' and 'to' are the src and
- * dst of the failover/failback, below
- *
- * pi_empty state machine
- * ---------------------------------------------------------------------------
- * Event State -> New State
- * ---------------------------------------------------------------------------
- * successful completion from.pi_empty = 0 -> from.pi_empty = 1
- * of failover
+ * IFF_INACTIVE: This phyint will not send or receive packets.
+ * Usually, inactive is tied to standby interfaces that are not yet
+ * needed (e.g., no non-standby interfaces in the group have failed).
+ * When failback has been disabled (FAILBACK=no configured), phyint can
+ * also be a non-STANDBY. In this case IFF_INACTIVE is set when phyint
+ * subsequently recovers after a failure.
*
- * Initiate failover to.pi_empty = X -> to.pi_empty = 0
+ * Not all 9 possible combinations of the above 3-tuple are possible.
*
- * Initiate failback to.pi_empty = X -> to.pi_empty = 0
- *
- * group failure pi_empty = X -> pi_empty = 0
- * ---------------------------------------------------------------------------
- *
- * pi_full state machine
- * ---------------------------------------------------------------------------
- * Event State -> New State
- * ---------------------------------------------------------------------------
- * successful completion to.pi_full = 0 -> to.pi_full = 1
- * of failback from
- * each of the other phyints
- *
- * Initiate failover from.pi_full = X -> from.pi_full = 0
- *
- * group failure pi_full = X -> pi_full = 0
- * ---------------------------------------------------------------------------
+ * I is tracked by IP. pi_state is tracked by mpathd.
*
* pi_state state machine
* ---------------------------------------------------------------------------
* Event State New State
* Action:
* ---------------------------------------------------------------------------
- * NIC failure (PI_RUNNING, I == 0) -> (PI_FAILED, I == 0)
+ * IP interface failure (PI_RUNNING, I == 0) -> (PI_FAILED, I == 0)
* detection : set IFF_FAILED on this phyint
- * : failover from this phyint to another
*
- * NIC failure (PI_RUNNING, I == 1) -> (PI_FAILED, I == 0)
+ * IP interface failure (PI_RUNNING, I == 1) -> (PI_FAILED, I == 0)
* detection : set IFF_FAILED on this phyint
*
- * NIC repair (PI_FAILED, I == 0, FAILBACK=yes)
+ * IP interface repair (PI_FAILED, I == 0, FAILBACK=yes)
* detection -> (PI_RUNNING, I == 0)
- * : to.pi_empty = 0
* : clear IFF_FAILED on this phyint
- * : failback to this phyint if enabled
*
- * NIC repair (PI_FAILED, I == 0, FAILBACK=no)
+ * IP interface repair (PI_FAILED, I == 0, FAILBACK=no)
* detection -> (PI_RUNNING, I == 1)
- * : to.pi_empty = 0
* : clear IFF_FAILED on this phyint
* : if failback is disabled set I == 1
*
* Group failure (perform on all phyints in the group)
* detection PI_RUNNING PI_FAILED
* (Router targets) : set IFF_FAILED
- * : clear pi_empty and pi_full
*
* Group failure (perform on all phyints in the group)
* detection PI_RUNNING PI_NOTARGETS
* (Host targets) : set IFF_FAILED
- * : clear pi_empty and pi_full
* : delete the target list on all phyints
* ---------------------------------------------------------------------------
- *
- * I state machine
- * ---------------------------------------------------------------------------
- * Event State Action:
- * ---------------------------------------------------------------------------
- * Turn on I pi_empty == 0, STANDBY : failover from standby
- *
- * Turn off I PI_RUNNING, STANDBY : pi_empty = 0
- * pi_full == 0 : failback to this if enabled
- * ---------------------------------------------------------------------------
- *
- * Assertions: (Read '==>' as implies)
- *
- * (pi_empty == 1) ==> (I == 1 || pi_state == PI_FAILED)
- * (pi_empty == 1) ==> (pi_full == 0)
- * (pi_full == 1) ==> (pi_empty == 0)
- *
- * Invariants
- *
- * pg_groupfailed = 0 &&
- * 1. (I == 1, pi_empty == 0) ==> initiate failover from standby
- * 2. (I == 0, PI_FAILED, pi_empty == 0) ==> initiate failover from phyint
- * 3. (I == 0, PI_RUNNING, pi_full == 0) ==> initiate failback to phyint
- *
- * 1. says that an inactive standby, that is not empty, has to be failed
- * over. For a standby to be truly inactive, it should not host any
- * addresses. So we move them to some other phyint. Usually we catch the
- * turn on of IFF_INACTIVE, and perform this action. However if the failover
- * did not complete successfully, then subsequently we have lost the edge
- * trigger, and this invariant kicks in and completes the action.
- *
- * 2. says that any failed phyint that is not empty must be failed over.
- * Usually we do the failover when we detect NIC failure. However if the
- * failover does not complete successfully, this invariant kicks in and
- * completes the failover. We exclude inactive standby which is covered by 1.
- *
- * 3. says that any running phyint that is not full must be failed back.
- * Usually we do the failback when we detect NIC repair. However if the
- * failback does not complete successfully, this invariant kicks in and
- * completes the failback. Note that we don't want to failback to an inactive
- * standby.
- *
- * The invariants 1 - 3 and the actions are in initifs().
*/
struct probes_missed probes_missed;
@@ -295,7 +197,7 @@ struct probes_missed probes_missed;
* not less than the current CRTT. pii_probes[] stores data
* about these probes. These packets consume sequence number space.
*
- * PROBE_RTT: This type is used to make only rtt measurments. Normally these
+ * PROBE_RTT: This type is used to make only rtt measurements. Normally these
* are not used. Under heavy network load, the rtt may go up very high,
* due to a spike, or may appear to go high, due to extreme scheduling
* delays. Once the network stress is removed, mpathd takes long time to
@@ -310,17 +212,19 @@ struct probes_missed probes_missed;
* no targets are known. The packet is multicast to the all hosts addr.
*/
static void
-probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
+probe(struct phyint_instance *pii, uint_t probe_type, hrtime_t start_hrtime)
{
+ hrtime_t sent_hrtime;
+ struct timeval sent_tv;
struct pr_icmp probe_pkt; /* Probe packet */
- struct sockaddr_in6 whereto6; /* target address IPv6 */
- struct sockaddr_in whereto; /* target address IPv4 */
+ struct sockaddr_storage targ; /* target address */
+ uint_t targaddrlen; /* targed address length */
int pr_ndx; /* probe index in pii->pii_probes[] */
boolean_t sent = _B_TRUE;
if (debug & D_TARGET) {
- logdebug("probe(%s %s %d %u)\n", AF_STR(pii->pii_af),
- pii->pii_name, probe_type, cur_time);
+ logdebug("probe(%s %s %d %lld)\n", AF_STR(pii->pii_af),
+ pii->pii_name, probe_type, start_hrtime);
}
assert(pii->pii_probe_sock != -1);
@@ -339,7 +243,7 @@ probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
* network byte order at initialization itself.
*/
probe_pkt.pr_icmp_id = pii->pii_icmpid;
- probe_pkt.pr_icmp_timestamp = htonl(cur_time);
+ probe_pkt.pr_icmp_timestamp = htonll(start_hrtime);
probe_pkt.pr_icmp_mtype = htonl(probe_type);
/*
@@ -349,38 +253,34 @@ probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
assert(probe_type == PROBE_MULTI || ((pii->pii_target_next != NULL) &&
pii->pii_rtt_target_next != NULL));
+ bzero(&targ, sizeof (targ));
+ targ.ss_family = pii->pii_af;
+
if (pii->pii_af == AF_INET6) {
- bzero(&whereto6, sizeof (whereto6));
- whereto6.sin6_family = AF_INET6;
+ struct in6_addr *addr6;
+
+ addr6 = &((struct sockaddr_in6 *)&targ)->sin6_addr;
+ targaddrlen = sizeof (struct sockaddr_in6);
if (probe_type == PROBE_MULTI) {
- whereto6.sin6_addr = all_nodes_mcast_v6;
+ *addr6 = all_nodes_mcast_v6;
} else if (probe_type == PROBE_UNI) {
- whereto6.sin6_addr = pii->pii_target_next->tg_address;
- } else {
- /* type is PROBE_RTT */
- whereto6.sin6_addr =
- pii->pii_rtt_target_next->tg_address;
- }
- if (sendto(pii->pii_probe_sock, (char *)&probe_pkt,
- sizeof (probe_pkt), 0, (struct sockaddr *)&whereto6,
- sizeof (whereto6)) != sizeof (probe_pkt)) {
- logperror_pii(pii, "probe: probe sendto");
- sent = _B_FALSE;
+ *addr6 = pii->pii_target_next->tg_address;
+ } else { /* type is PROBE_RTT */
+ *addr6 = pii->pii_rtt_target_next->tg_address;
}
} else {
- bzero(&whereto, sizeof (whereto));
- whereto.sin_family = AF_INET;
+ struct in_addr *addr4;
+
+ addr4 = &((struct sockaddr_in *)&targ)->sin_addr;
+ targaddrlen = sizeof (struct sockaddr_in);
if (probe_type == PROBE_MULTI) {
- whereto.sin_addr = all_nodes_mcast_v4;
+ *addr4 = all_nodes_mcast_v4;
} else if (probe_type == PROBE_UNI) {
IN6_V4MAPPED_TO_INADDR(
- &pii->pii_target_next->tg_address,
- &whereto.sin_addr);
- } else {
- /* type is PROBE_RTT */
+ &pii->pii_target_next->tg_address, addr4);
+ } else { /* type is PROBE_RTT */
IN6_V4MAPPED_TO_INADDR(
- &pii->pii_rtt_target_next->tg_address,
- &whereto.sin_addr);
+ &pii->pii_rtt_target_next->tg_address, addr4);
}
/*
@@ -388,12 +288,18 @@ probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
*/
probe_pkt.pr_icmp_cksum =
in_cksum((ushort_t *)&probe_pkt, (int)sizeof (probe_pkt));
- if (sendto(pii->pii_probe_sock, (char *)&probe_pkt,
- sizeof (probe_pkt), 0, (struct sockaddr *)&whereto,
- sizeof (whereto)) != sizeof (probe_pkt)) {
- logperror_pii(pii, "probe: probe sendto");
- sent = _B_FALSE;
- }
+ }
+
+ /*
+ * Use the current time as the time we sent. Not atomic, but the best
+ * we can do from here.
+ */
+ sent_hrtime = gethrtime();
+ (void) gettimeofday(&sent_tv, NULL);
+ if (sendto(pii->pii_probe_sock, &probe_pkt, sizeof (probe_pkt), 0,
+ (struct sockaddr *)&targ, targaddrlen) != sizeof (probe_pkt)) {
+ logperror_pii(pii, "probe: probe sendto");
+ sent = _B_FALSE;
}
/*
@@ -415,9 +321,13 @@ probe(struct phyint_instance *pii, uint_t probe_type, uint_t cur_time)
pii->pii_cum_stats.acked++;
pii->pii_cum_stats.sent++;
- pii->pii_probes[pr_ndx].pr_status = PR_UNACKED;
+ pii->pii_probes[pr_ndx].pr_id = pii->pii_snxt;
+ pii->pii_probes[pr_ndx].pr_tv_sent = sent_tv;
+ pii->pii_probes[pr_ndx].pr_hrtime_sent = sent_hrtime;
+ pii->pii_probes[pr_ndx].pr_hrtime_start = start_hrtime;
pii->pii_probes[pr_ndx].pr_target = pii->pii_target_next;
- pii->pii_probes[pr_ndx].pr_time_sent = cur_time;
+ probe_chstate(&pii->pii_probes[pr_ndx], pii, PR_UNACKED);
+
pii->pii_probe_next = PROBE_INDEX_NEXT(pii->pii_probe_next);
pii->pii_target_next = target_next(pii->pii_target_next);
assert(pii->pii_target_next != NULL);
@@ -448,33 +358,42 @@ in_data(struct phyint_instance *pii)
{
struct sockaddr_in from;
struct in6_addr fromaddr;
- uint_t fromlen;
- static uint_t in_packet[(IP_MAXPACKET + 1)/4];
+ static uint64_t in_packet[(IP_MAXPACKET + 1)/8];
+ static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8];
struct ip *ip;
int iphlen;
int len;
char abuf[INET_ADDRSTRLEN];
- struct pr_icmp *reply;
+ struct msghdr msg;
+ struct iovec iov;
+ struct pr_icmp *reply;
+ struct timeval *recv_tvp;
if (debug & D_PROBE) {
logdebug("in_data(%s %s)\n",
AF_STR(pii->pii_af), pii->pii_name);
}
+ iov.iov_base = (char *)in_packet;
+ iov.iov_len = sizeof (in_packet);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_name = (struct sockaddr *)&from;
+ msg.msg_namelen = sizeof (from);
+ msg.msg_control = ancillary_data;
+ msg.msg_controllen = sizeof (ancillary_data);
+
/*
* Poll has already told us that a message is waiting,
* on this socket. Read it now. We should not block.
*/
- fromlen = sizeof (from);
- len = recvfrom(pii->pii_probe_sock, (char *)in_packet,
- sizeof (in_packet), 0, (struct sockaddr *)&from, &fromlen);
- if (len < 0) {
- logperror_pii(pii, "in_data: recvfrom");
+ if ((len = recvmsg(pii->pii_probe_sock, &msg, 0)) < 0) {
+ logperror_pii(pii, "in_data: recvmsg");
return;
}
/*
- * If the NIC has indicated the link is down, don't go
+ * If the datalink has indicated the link is down, don't go
* any further.
*/
if (LINK_DOWN(pii->pii_phyint))
@@ -483,6 +402,15 @@ in_data(struct phyint_instance *pii)
/* Get the printable address for error reporting */
(void) inet_ntop(AF_INET, &from.sin_addr, abuf, sizeof (abuf));
+ /* Ignore packets > 64k or control buffers that don't fit */
+ if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
+ if (debug & D_PKTBAD) {
+ logdebug("Truncated message: msg_flags 0x%x from %s\n",
+ msg.msg_flags, abuf);
+ }
+ return;
+ }
+
/* Make sure packet contains at least minimum ICMP header */
ip = (struct ip *)in_packet;
iphlen = ip->ip_hl << 2;
@@ -528,10 +456,17 @@ in_data(struct phyint_instance *pii)
return;
}
+ recv_tvp = find_ancillary(&msg, SOL_SOCKET, SCM_TIMESTAMP);
+ if (recv_tvp == NULL) {
+ logtrace("message without timestamp from %s on %s\n",
+ abuf, pii->pii_name);
+ return;
+ }
+
IN6_INADDR_TO_V4MAPPED(&from.sin_addr, &fromaddr);
if (reply->pr_icmp_mtype == htonl(PROBE_UNI))
/* Unicast probe reply */
- incoming_echo_reply(pii, reply, fromaddr);
+ incoming_echo_reply(pii, reply, fromaddr, recv_tvp);
else if (reply->pr_icmp_mtype == htonl(PROBE_MULTI)) {
/* Multicast reply */
incoming_mcast_reply(pii, reply, fromaddr);
@@ -543,7 +478,6 @@ in_data(struct phyint_instance *pii)
reply->pr_icmp_mtype, abuf, pii->pii_name);
return;
}
-
}
/*
@@ -559,8 +493,9 @@ in6_data(struct phyint_instance *pii)
char abuf[INET6_ADDRSTRLEN];
struct msghdr msg;
struct iovec iov;
- uchar_t *opt;
+ void *opt;
struct pr_icmp *reply;
+ struct timeval *recv_tvp;
if (debug & D_PROBE) {
logdebug("in6_data(%s %s)\n",
@@ -577,12 +512,12 @@ in6_data(struct phyint_instance *pii)
msg.msg_controllen = sizeof (ancillary_data);
if ((len = recvmsg(pii->pii_probe_sock, &msg, 0)) < 0) {
- logperror_pii(pii, "in6_data: recvfrom");
+ logperror_pii(pii, "in6_data: recvmsg");
return;
}
/*
- * If the NIC has indicated that the link is down, don't go
+ * If the datalink has indicated that the link is down, don't go
* any further.
*/
if (LINK_DOWN(pii->pii_phyint))
@@ -623,13 +558,14 @@ in6_data(struct phyint_instance *pii)
"%s on %s\n", abuf, pii->pii_name);
return;
}
- opt = find_ancillary(&msg, IPV6_RTHDR);
+ opt = find_ancillary(&msg, IPPROTO_IPV6, IPV6_RTHDR);
if (opt != NULL) {
/* Can't allow routing headers in probe replies */
logtrace("message with routing header from %s on %s\n",
abuf, pii->pii_name);
return;
}
+
if (reply->pr_icmp_code != 0) {
logtrace("probe reply code: %d from %s on %s\n",
reply->pr_icmp_code, abuf, pii->pii_name);
@@ -640,8 +576,16 @@ in6_data(struct phyint_instance *pii)
len, abuf, pii->pii_name);
return;
}
+
+ recv_tvp = find_ancillary(&msg, SOL_SOCKET, SCM_TIMESTAMP);
+ if (recv_tvp == NULL) {
+ logtrace("message without timestamp from %s on %s\n",
+ abuf, pii->pii_name);
+ return;
+ }
+
if (reply->pr_icmp_mtype == htonl(PROBE_UNI)) {
- incoming_echo_reply(pii, reply, from.sin6_addr);
+ incoming_echo_reply(pii, reply, from.sin6_addr, recv_tvp);
} else if (reply->pr_icmp_mtype == htonl(PROBE_MULTI)) {
incoming_mcast_reply(pii, reply, from.sin6_addr);
} else if (reply->pr_icmp_mtype == htonl(PROBE_RTT)) {
@@ -663,11 +607,9 @@ static void
incoming_rtt_reply(struct phyint_instance *pii, struct pr_icmp *reply,
struct in6_addr fromaddr)
{
- int m; /* rtt measurment in ms */
- uint32_t cur_time; /* in ms from some arbitrary point */
+ int64_t m; /* rtt measurement in ns */
char abuf[INET6_ADDRSTRLEN];
struct target *target;
- uint32_t pr_icmp_timestamp;
struct phyint_group *pg;
/* Get the printable address for error reporting */
@@ -683,10 +625,7 @@ incoming_rtt_reply(struct phyint_instance *pii, struct pr_icmp *reply,
if (target == NULL)
return;
- pr_icmp_timestamp = ntohl(reply->pr_icmp_timestamp);
- cur_time = getcurrenttime();
- m = (int)(cur_time - pr_icmp_timestamp);
-
+ m = (int64_t)(gethrtime() - ntohll(reply->pr_icmp_timestamp));
/* Invalid rtt. It has wrapped around */
if (m < 0)
return;
@@ -754,29 +693,30 @@ incoming_rtt_reply(struct phyint_instance *pii, struct pr_icmp *reply,
*/
static void
incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
- struct in6_addr fromaddr)
+ struct in6_addr fromaddr, struct timeval *recv_tvp)
{
- int m; /* rtt measurment in ms */
- uint32_t cur_time; /* in ms from some arbitrary point */
+ int64_t m; /* rtt measurement in ns */
+ hrtime_t cur_hrtime; /* in ns from some arbitrary point */
char abuf[INET6_ADDRSTRLEN];
int pr_ndx;
struct target *target;
boolean_t exception;
- uint32_t pr_icmp_timestamp;
+ uint64_t pr_icmp_timestamp;
uint16_t pr_icmp_seq;
+ struct probe_stats *pr_statp;
struct phyint_group *pg = pii->pii_phyint->pi_group;
/* Get the printable address for error reporting */
(void) pr_addr(pii->pii_af, fromaddr, abuf, sizeof (abuf));
if (debug & D_PROBE) {
- logdebug("incoming_echo_reply: %s %s %s seq %u\n",
+ logdebug("incoming_echo_reply: %s %s %s seq %u recv_tvp %lld\n",
AF_STR(pii->pii_af), pii->pii_name, abuf,
- ntohs(reply->pr_icmp_seq));
+ ntohs(reply->pr_icmp_seq), tv2ns(recv_tvp));
}
- pr_icmp_timestamp = ntohl(reply->pr_icmp_timestamp);
- pr_icmp_seq = ntohs(reply->pr_icmp_seq);
+ pr_icmp_timestamp = ntohll(reply->pr_icmp_timestamp);
+ pr_icmp_seq = ntohs(reply->pr_icmp_seq);
/* Reject out of window probe replies */
if (SEQ_GE(pr_icmp_seq, pii->pii_snxt) ||
@@ -786,15 +726,16 @@ incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
pii->pii_cum_stats.unknown++;
return;
}
- cur_time = getcurrenttime();
- m = (int)(cur_time - pr_icmp_timestamp);
+
+ cur_hrtime = gethrtime();
+ m = (int64_t)(cur_hrtime - pr_icmp_timestamp);
if (m < 0) {
/*
* This is a ridiculously high value of rtt. rtt has wrapped
* around. Log a message, and ignore the rtt.
*/
- logerr("incoming_echo_reply: rtt wraparound cur_time %u reply "
- "timestamp %u\n", cur_time, pr_icmp_timestamp);
+ logerr("incoming_echo_reply: rtt wraparound cur_hrtime %lld "
+ "reply timestamp %lld\n", cur_hrtime, pr_icmp_timestamp);
}
/*
@@ -868,10 +809,10 @@ incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
* debugger, or the system was hung or too busy for a
* substantial time that we didn't get a chance to run.
*/
- if ((m < 0) || (m > PROBE_STATS_COUNT * pg->pg_probeint)) {
+ if ((m < 0) || (ns2ms(m) > PROBE_STATS_COUNT * pg->pg_probeint)) {
/*
- * If the probe corresponding to this receieved response
- * was truly sent 'm' ms. ago, then this response must
+ * If the probe corresponding to this received response
+ * was truly sent 'm' ns. ago, then this response must
* have been rejected by the sequence number checks. The
* fact that it has passed the sequence number checks
* means that the measured rtt is wrong. We were probably
@@ -947,7 +888,7 @@ incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
* adjusts pii->pii_target_next
*/
target_delete(target);
- probe(pii, PROBE_MULTI, cur_time);
+ probe(pii, PROBE_MULTI, cur_hrtime);
}
} else {
/*
@@ -999,8 +940,12 @@ incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
}
}
out:
- pii->pii_probes[pr_ndx].pr_status = PR_ACKED;
- pii->pii_probes[pr_ndx].pr_time_acked = cur_time;
+ pr_statp = &pii->pii_probes[pr_ndx];
+ pr_statp->pr_hrtime_ackproc = cur_hrtime;
+ pr_statp->pr_hrtime_ackrecv = pr_statp->pr_hrtime_sent +
+ (tv2ns(recv_tvp) - tv2ns(&pr_statp->pr_tv_sent));
+
+ probe_chstate(pr_statp, pii, PR_ACKED);
/*
* Update pii->pii_rack, i.e. the sequence number of the last received
@@ -1240,13 +1185,13 @@ incoming_mcast_reply(struct phyint_instance *pii, struct pr_icmp *reply,
*
* New scaled average and deviation are passed back via sap and svp
*/
-static int
-compute_crtt(int *sap, int *svp, int m)
+static int64_t
+compute_crtt(int64_t *sap, int64_t *svp, int64_t m)
{
- int sa = *sap;
- int sv = *svp;
- int crtt;
- int saved_m = m;
+ int64_t sa = *sap;
+ int64_t sv = *svp;
+ int64_t crtt;
+ int64_t saved_m = m;
assert(*sap >= -1);
assert(*svp >= 0);
@@ -1285,8 +1230,8 @@ compute_crtt(int *sap, int *svp, int m)
crtt = (sa >> 3) + sv;
if (debug & D_PROBE) {
- logdebug("compute_crtt: m = %d sa = %d, sv = %d -> crtt = "
- "%d\n", saved_m, sa, sv, crtt);
+ logerr("compute_crtt: m = %lld sa = %lld, sv = %lld -> "
+ "crtt = %lld\n", saved_m, sa, sv, crtt);
}
*sap = sa;
@@ -1300,22 +1245,22 @@ compute_crtt(int *sap, int *svp, int m)
}
static void
-pi_set_crtt(struct target *tg, int m, boolean_t is_probe_uni)
+pi_set_crtt(struct target *tg, int64_t m, boolean_t is_probe_uni)
{
struct phyint_instance *pii = tg->tg_phyint_inst;
int probe_interval = pii->pii_phyint->pi_group->pg_probeint;
- int sa = tg->tg_rtt_sa;
- int sv = tg->tg_rtt_sd;
+ int64_t sa = tg->tg_rtt_sa;
+ int64_t sv = tg->tg_rtt_sd;
int new_crtt;
int i;
if (debug & D_PROBE)
- logdebug("pi_set_crtt: target - m %d\n", m);
+ logdebug("pi_set_crtt: target - m %lld\n", m);
/* store the round trip time, in case we need to defer computation */
tg->tg_deferred[tg->tg_num_deferred] = m;
- new_crtt = compute_crtt(&sa, &sv, m);
+ new_crtt = ns2ms(compute_crtt(&sa, &sv, m));
/*
* If this probe's round trip time would singlehandedly cause an
@@ -1342,8 +1287,8 @@ pi_set_crtt(struct target *tg, int m, boolean_t is_probe_uni)
}
for (i = 0; i <= tg->tg_num_deferred; i++) {
- tg->tg_crtt = compute_crtt(&tg->tg_rtt_sa,
- &tg->tg_rtt_sd, tg->tg_deferred[i]);
+ tg->tg_crtt = ns2ms(compute_crtt(&tg->tg_rtt_sa,
+ &tg->tg_rtt_sd, tg->tg_deferred[i]));
}
tg->tg_num_deferred = 0;
@@ -1373,13 +1318,13 @@ pi_set_crtt(struct target *tg, int m, boolean_t is_probe_uni)
* If not found return NULL.
*/
static void *
-find_ancillary(struct msghdr *msg, int cmsg_type)
+find_ancillary(struct msghdr *msg, int cmsg_level, int cmsg_type)
{
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
cmsg = CMSG_NXTHDR(msg, cmsg)) {
- if (cmsg->cmsg_level == IPPROTO_IPV6 &&
+ if (cmsg->cmsg_level == cmsg_level &&
cmsg->cmsg_type == cmsg_type) {
return (CMSG_DATA(cmsg));
}
@@ -1388,107 +1333,194 @@ find_ancillary(struct msghdr *msg, int cmsg_type)
}
/*
- * See if a previously failed interface has started working again.
+ * Try to activate another INACTIVE interface in the same group as `pi'.
+ * Prefer STANDBY INACTIVE to just INACTIVE.
*/
void
-phyint_check_for_repair(struct phyint *pi)
+phyint_activate_another(struct phyint *pi)
{
- if (phyint_repaired(pi)) {
- if (pi->pi_group == phyint_anongroup) {
- logerr("NIC repair detected on %s\n", pi->pi_name);
- } else {
- logerr("NIC repair detected on %s of group %s\n",
- pi->pi_name, pi->pi_group->pg_name);
- }
+ struct phyint *pi2;
+ struct phyint *inactivepi = NULL;
- /*
- * If the interface is offline, just clear the FAILED flag,
- * delaying the state change and failback operation until it
- * is brought back online.
- */
- if (pi->pi_state == PI_OFFLINE) {
- (void) change_lif_flags(pi, IFF_FAILED, _B_FALSE);
- return;
- }
+ if (pi->pi_group == phyint_anongroup)
+ return;
- if (pi->pi_flags & IFF_STANDBY) {
- (void) change_lif_flags(pi, IFF_FAILED, _B_FALSE);
- } else {
- if (try_failback(pi) != IPMP_FAILURE) {
- (void) change_lif_flags(pi,
- IFF_FAILED, _B_FALSE);
- /* Per state diagram */
- pi->pi_empty = 0;
+ for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ if (pi == pi2 || pi2->pi_state != PI_RUNNING ||
+ !(pi2->pi_flags & IFF_INACTIVE))
+ continue;
+
+ inactivepi = pi2;
+ if (pi2->pi_flags & IFF_STANDBY)
+ break;
+ }
+
+ if (inactivepi != NULL)
+ (void) change_pif_flags(inactivepi, 0, IFF_INACTIVE);
+}
+
+/*
+ * Transition a phyint back to PI_RUNNING (from PI_FAILED or PI_OFFLINE). The
+ * caller must ensure that the transition is appropriate. Clears IFF_OFFLINE
+ * or IFF_FAILED, as appropriate. Also sets IFF_INACTIVE on this or other
+ * interfaces as appropriate (see comment below). Finally, also updates the
+ * phyint's group state to account for the change.
+ */
+void
+phyint_transition_to_running(struct phyint *pi)
+{
+ struct phyint *pi2;
+ struct phyint *actstandbypi = NULL;
+ uint_t nactive = 0, nnonstandby = 0;
+ boolean_t onlining = (pi->pi_state == PI_OFFLINE);
+ uint64_t set, clear;
+
+ /*
+ * The interface is running again, but should it or another interface
+ * in the group end up INACTIVE? There are three cases:
+ *
+ * 1. If it's a STANDBY interface, it should be end up INACTIVE if
+ * the group is operating at capacity (i.e., there are at least as
+ * many active interfaces as non-STANDBY interfaces in the group).
+ * No other interfaces should be changed.
+ *
+ * 2. If it's a non-STANDBY interface and we're onlining it or
+ * FAILBACK is enabled, then it should *not* end up INACTIVE.
+ * Further, if the group is above capacity as a result of this
+ * interface, then an active STANDBY interface in the group should
+ * end up INACTIVE.
+ *
+ * 3. If it's a non-STANDBY interface, we're repairing it, and
+ * FAILBACK is disabled, then it should end up INACTIVE *unless*
+ * the group was failed (in which case we have no choice but to
+ * use it). No other interfaces should be changed.
+ */
+ if (pi->pi_group != phyint_anongroup) {
+ pi2 = pi->pi_group->pg_phyint;
+ for (; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ if (!(pi2->pi_flags & IFF_STANDBY))
+ nnonstandby++;
+
+ if (pi2->pi_state == PI_RUNNING) {
+ if (!(pi2->pi_flags & IFF_INACTIVE)) {
+ nactive++;
+ if (pi2->pi_flags & IFF_STANDBY)
+ actstandbypi = pi2;
+ }
}
}
+ }
- phyint_chstate(pi, PI_RUNNING);
+ set = 0;
+ clear = (onlining ? IFF_OFFLINE : IFF_FAILED);
- if (GROUP_FAILED(pi->pi_group)) {
- /*
- * This is the 1st phyint to receive a response
- * after group failure.
- */
- logerr("At least 1 interface (%s) of group %s has "
- "repaired\n", pi->pi_name, pi->pi_group->pg_name);
- phyint_group_chstate(pi->pi_group, PG_RUNNING);
- }
+ if (pi->pi_flags & IFF_STANDBY) { /* case 1 */
+ if (nactive >= nnonstandby)
+ set |= IFF_INACTIVE;
+ else
+ clear |= IFF_INACTIVE;
+ } else if (onlining || failback_enabled) { /* case 2 */
+ if (nactive >= nnonstandby && actstandbypi != NULL)
+ (void) change_pif_flags(actstandbypi, IFF_INACTIVE, 0);
+ } else if (!GROUP_FAILED(pi->pi_group)) { /* case 3 */
+ set |= IFF_INACTIVE;
+ }
+ (void) change_pif_flags(pi, set, clear);
+
+ phyint_chstate(pi, PI_RUNNING);
+
+ /*
+ * Update the group state to account for the change.
+ */
+ phyint_group_refresh_state(pi->pi_group);
+}
+
+/*
+ * See if a previously failed interface has started working again.
+ */
+void
+phyint_check_for_repair(struct phyint *pi)
+{
+ if (!phyint_repaired(pi))
+ return;
+
+ if (pi->pi_group == phyint_anongroup) {
+ logerr("IP interface repair detected on %s\n", pi->pi_name);
+ } else {
+ logerr("IP interface repair detected on %s of group %s\n",
+ pi->pi_name, pi->pi_group->pg_name);
}
+
+ /*
+ * If the interface is PI_OFFLINE, it can't be made PI_RUNNING yet.
+ * So just clear IFF_OFFLINE and defer phyint_transition_to_running()
+ * until it is brought back online.
+ */
+ if (pi->pi_state == PI_OFFLINE) {
+ (void) change_pif_flags(pi, 0, IFF_FAILED);
+ return;
+ }
+
+ phyint_transition_to_running(pi); /* calls phyint_chstate() */
}
/*
- * See if a previously functioning interface has failed, or if the
- * whole group of interfaces has failed.
+ * See if an interface has failed, or if the whole group of interfaces has
+ * failed.
*/
static void
phyint_inst_check_for_failure(struct phyint_instance *pii)
{
- struct phyint *pi;
- struct phyint *pi2;
-
- pi = pii->pii_phyint;
+ struct phyint *pi = pii->pii_phyint;
+ struct phyint *pi2;
+ boolean_t was_active;
switch (failure_state(pii)) {
case PHYINT_FAILURE:
- (void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
+ was_active = ((pi->pi_flags & IFF_INACTIVE) == 0);
+
+ (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE);
if (pi->pi_group == phyint_anongroup) {
- logerr("NIC failure detected on %s\n", pii->pii_name);
+ logerr("IP interface failure detected on %s\n",
+ pii->pii_name);
} else {
- logerr("NIC failure detected on %s of group %s\n",
- pii->pii_name, pi->pi_group->pg_name);
+ logerr("IP interface failure detected on %s of group"
+ " %s\n", pii->pii_name, pi->pi_group->pg_name);
}
+
/*
- * Do the failover, unless the interface is offline (in
- * which case we've already failed over).
+ * If the interface is offline, the state change will be
+ * noted when it comes back online.
*/
if (pi->pi_state != PI_OFFLINE) {
+ /*
+ * If the failed interface was active, activate
+ * another INACTIVE interface in the group if
+ * possible. (If the interface is PI_OFFLINE,
+ * we already activated another.)
+ */
+ if (was_active)
+ phyint_activate_another(pi);
+
phyint_chstate(pi, PI_FAILED);
reset_crtt_all(pi);
- if (!(pi->pi_flags & IFF_INACTIVE))
- (void) try_failover(pi, FAILOVER_NORMAL);
}
break;
case GROUP_FAILURE:
- logerr("All Interfaces in group %s have failed\n",
- pi->pi_group->pg_name);
- for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL;
- pi2 = pi2->pi_pgnext) {
- if (pi2->pi_flags & IFF_OFFLINE)
+ pi2 = pi->pi_group->pg_phyint;
+ for (; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ (void) change_pif_flags(pi2, IFF_FAILED, IFF_INACTIVE);
+ if (pi2->pi_state == PI_OFFLINE) /* see comment above */
continue;
- (void) change_lif_flags(pi2, IFF_FAILED, _B_TRUE);
- reset_crtt_all(pi2);
+ reset_crtt_all(pi2);
/*
- * In the case of host targets, we
- * would have flushed the targets,
- * and gone to PI_NOTARGETS state.
+ * In the case of host targets, we would have flushed
+ * the targets, and gone to PI_NOTARGETS state.
*/
if (pi2->pi_state == PI_RUNNING)
phyint_chstate(pi2, PI_FAILED);
-
- pi2->pi_empty = 0;
- pi2->pi_full = 0;
}
break;
@@ -1519,7 +1551,8 @@ phyint_inst_timer(struct phyint_instance *pii)
hrtime_t cur_hrtime;
int probe_interval = pii->pii_phyint->pi_group->pg_probeint;
- cur_time = getcurrenttime();
+ cur_hrtime = gethrtime();
+ cur_time = ns2ms(cur_hrtime);
if (debug & D_TIMER) {
logdebug("phyint_inst_timer(%s %s)\n",
@@ -1621,7 +1654,7 @@ phyint_inst_timer(struct phyint_instance *pii)
* the failure detection (fd) probe timer has not yet fired.
* Need to send only an rtt probe. The probe type is PROBE_RTT.
*/
- probe(pii, PROBE_RTT, cur_time);
+ probe(pii, PROBE_RTT, cur_hrtime);
return (interval);
}
/*
@@ -1651,7 +1684,7 @@ phyint_inst_timer(struct phyint_instance *pii)
* We can have at most, the latest 2 probes that we sent, in
* the PR_UNACKED state. All previous probes sent, are either
* PR_LOST or PR_ACKED. An unacknowledged probe is considered
- * timed out if the probe's time_sent + the CRTT < currenttime.
+ * timed out if the probe's time_start + the CRTT < currenttime.
* For each of the last 2 probes, examine whether it has timed
* out. If so, mark it PR_LOST. The probe stats is a circular array.
*/
@@ -1686,16 +1719,15 @@ phyint_inst_timer(struct phyint_instance *pii)
* not available use group's probe interval,
* which is a worst case estimate.
*/
+ timeout = ns2ms(pr_statp->pr_hrtime_start);
if (cur_tg->tg_crtt != 0) {
- timeout = pr_statp->pr_time_sent +
- cur_tg->tg_crtt;
+ timeout += cur_tg->tg_crtt;
} else {
- timeout = pr_statp->pr_time_sent +
- probe_interval;
+ timeout += probe_interval;
}
if (TIME_LT(timeout, cur_time)) {
- pr_statp->pr_status = PR_LOST;
pr_statp->pr_time_lost = timeout;
+ probe_chstate(pr_statp, pii, PR_LOST);
} else if (i == 1) {
/*
* We are forced to consider this probe
@@ -1711,8 +1743,8 @@ phyint_inst_timer(struct phyint_instance *pii)
* when the timer fires, we find 2 valid
* unacked probes, and they are yet to timeout
*/
- pr_statp->pr_status = PR_LOST;
pr_statp->pr_time_lost = cur_time;
+ probe_chstate(pr_statp, pii, PR_LOST);
} else {
/*
* Only the most recent probe can enter
@@ -1740,16 +1772,15 @@ phyint_inst_timer(struct phyint_instance *pii)
* The timer has fired. Take appropriate action depending
* on the current state of the phyint.
*
- * PI_RUNNING state - Failure detection and failover
- * PI_FAILED state - Repair detection and failback
+ * PI_RUNNING state - Failure detection
+ * PI_FAILED state - Repair detection
*/
switch (pii->pii_phyint->pi_state) {
case PI_FAILED:
/*
* If the most recent probe (excluding unacked probes that
* are yet to time out) has been acked, check whether the
- * phyint is now repaired. If the phyint is repaired, then
- * attempt failback, unless it is an inactive standby.
+ * phyint is now repaired.
*/
if (pii->pii_rack + valid_unack_count + 1 == pii->pii_snxt) {
phyint_check_for_repair(pii->pii_phyint);
@@ -1760,10 +1791,8 @@ phyint_inst_timer(struct phyint_instance *pii)
/*
* It's possible our probes have been lost because of a
* spanning-tree mandated quiet period on the switch. If so,
- * ignore the lost probes and consider the interface to still
- * be functioning.
+ * ignore the lost probes.
*/
- cur_hrtime = gethrtime();
if (pii->pii_fd_hrtime - cur_hrtime > 0)
break;
@@ -1771,8 +1800,7 @@ phyint_inst_timer(struct phyint_instance *pii)
/*
* We have 1 or more failed probes (excluding unacked
* probes that are yet to time out). Determine if the
- * phyint has failed. If so attempt a failover,
- * unless it is an inactive standby
+ * phyint has failed.
*/
phyint_inst_check_for_failure(pii);
}
@@ -1790,16 +1818,16 @@ phyint_inst_timer(struct phyint_instance *pii)
* was called, the target list may be empty.
*/
if (pii->pii_target_next != NULL) {
- probe(pii, PROBE_UNI, cur_time);
+ probe(pii, PROBE_UNI, cur_hrtime);
/*
* If we have just the one probe target, and we're not using
* router targets, try to find another as we presently have
* no resilience.
*/
if (!pii->pii_targets_are_routers && pii->pii_ntargets == 1)
- probe(pii, PROBE_MULTI, cur_time);
+ probe(pii, PROBE_MULTI, cur_hrtime);
} else {
- probe(pii, PROBE_MULTI, cur_time);
+ probe(pii, PROBE_MULTI, cur_hrtime);
}
return (interval);
}
@@ -1859,8 +1887,8 @@ process_link_state_down(struct phyint *pi)
/*
* Clear the probe statistics arrays, we don't want the repair
- * detection logic relying on probes that were succesful prior
- * to the link going down.
+ * detection logic relying on probes that were successful prior
+ * to the link going down.
*/
if (PROBE_CAPABLE(pi->pi_v4))
clear_pii_probe_stats(pi->pi_v4);
@@ -2016,7 +2044,7 @@ phyint_inst_probe_failure_state(struct phyint_instance *pii, uint_t *tff)
pii->pii_target_next = target_next(cur_tg);
} else {
target_delete(cur_tg);
- probe(pii, PROBE_MULTI, getcurrenttime());
+ probe(pii, PROBE_MULTI, gethrtime());
}
return (PHYINT_OK);
}
@@ -2065,13 +2093,13 @@ failure_state(struct phyint_instance *pii)
struct probe_success_count psinfo;
uint_t pi2_tls; /* time last success */
uint_t pi_tff; /* time first fail */
- struct phyint *pi2;
+ struct phyint *pi2;
struct phyint *pi;
struct phyint_instance *pii2;
struct phyint_group *pg;
- boolean_t alone;
+ int retval;
- if (debug & D_FAILOVER)
+ if (debug & D_FAILREP)
logdebug("phyint_failed(%s)\n", pii->pii_name);
pi = pii->pii_phyint;
@@ -2082,24 +2110,13 @@ failure_state(struct phyint_instance *pii)
return (PHYINT_OK);
/*
- * At this point, the link is down, or the phyint is suspect,
- * as it has lost NUM_PROBE_FAILS or more probes. If the phyint
- * does not belong to any group, or is the only member of the
- * group capable of being probed, return PHYINT_FAILURE.
+ * At this point, the link is down, or the phyint is suspect, as it
+ * has lost NUM_PROBE_FAILS or more probes. If the phyint does not
+ * belong to any group, this is a PHYINT_FAILURE. Otherwise, continue
+ * on to determine whether this should be considered a PHYINT_FAILURE
+ * or GROUP_FAILURE.
*/
- alone = _B_TRUE;
- if (pg != phyint_anongroup) {
- for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
- if (pi2 == pi)
- continue;
- if (PROBE_CAPABLE(pi2->pi_v4) ||
- PROBE_CAPABLE(pi2->pi_v6)) {
- alone = _B_FALSE;
- break;
- }
- }
- }
- if (alone)
+ if (pg == phyint_anongroup)
return (PHYINT_FAILURE);
/*
@@ -2116,6 +2133,7 @@ failure_state(struct phyint_instance *pii)
* after it was received, so there is no point looking at the tls
* of other phyints.
*/
+ retval = GROUP_FAILURE;
for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
/* Exclude ourself from comparison */
if (pi2 == pi)
@@ -2123,76 +2141,86 @@ failure_state(struct phyint_instance *pii)
if (LINK_DOWN(pi)) {
/*
- * We use FLAGS_TO_LINK_STATE() to test the
- * flags directly, rather then LINK_UP() or
- * LINK_DOWN(), as we may not have got round
- * to processing the link state for the other
- * phyints in the group yet.
+ * We use FLAGS_TO_LINK_STATE() to test the flags
+ * directly, rather then LINK_UP() or LINK_DOWN(), as
+ * we may not have got round to processing the link
+ * state for the other phyints in the group yet.
*
- * The check for PI_RUNNING and group
- * failure handles the case when the
- * group begins to recover. The first
- * phyint to recover should not trigger
- * a failover from the soon-to-recover
- * other phyints to the first recovered
- * phyint. PI_RUNNING will be set, and
- * pg_groupfailed cleared only after
- * receipt of NUM_PROBE_REPAIRS, by
- * which time the other phyints should
- * have received at least 1 packet,
- * and so will not have NUM_PROBE_FAILS.
+ * The check for PI_RUNNING and group failure handles
+ * the case when the group begins to recover.
+ * PI_RUNNING will be set, and group failure cleared
+ * only after receipt of NUM_PROBE_REPAIRS, by which
+ * time the other phyints should have received at
+ * least 1 packet, and so will not have NUM_PROBE_FAILS.
*/
if ((pi2->pi_state == PI_RUNNING) &&
- !GROUP_FAILED(pg) && FLAGS_TO_LINK_STATE(pi2))
- return (PHYINT_FAILURE);
- } else {
- /*
- * Need to compare against both IPv4 and
- * IPv6 instances.
- */
- pii2 = pi2->pi_v4;
- if (pii2 != NULL) {
- probe_success_info(pii2, NULL, &psinfo);
- if (psinfo.ps_tls_valid) {
- pi2_tls = psinfo.ps_tls;
- /*
- * See comment above regarding check
- * for PI_RUNNING and group failure.
- */
- if (TIME_GT(pi2_tls, pi_tff) &&
- (pi2->pi_state == PI_RUNNING) &&
- !GROUP_FAILED(pg) &&
- FLAGS_TO_LINK_STATE(pi2))
- return (PHYINT_FAILURE);
+ !GROUP_FAILED(pg) && FLAGS_TO_LINK_STATE(pi2)) {
+ retval = PHYINT_FAILURE;
+ break;
+ }
+ continue;
+ }
+
+ if (LINK_DOWN(pi2))
+ continue;
+
+ /*
+ * If there's no probe-based failure detection on this
+ * interface, and its link is still up, then it's still
+ * working and thus the group has not failed.
+ */
+ if (!PROBE_ENABLED(pi2->pi_v4) && !PROBE_ENABLED(pi2->pi_v6)) {
+ retval = PHYINT_FAILURE;
+ break;
+ }
+
+ /*
+ * Need to compare against both IPv4 and IPv6 instances.
+ */
+ pii2 = pi2->pi_v4;
+ if (pii2 != NULL) {
+ probe_success_info(pii2, NULL, &psinfo);
+ if (psinfo.ps_tls_valid) {
+ pi2_tls = psinfo.ps_tls;
+ /*
+ * See comment above regarding check
+ * for PI_RUNNING and group failure.
+ */
+ if (TIME_GT(pi2_tls, pi_tff) &&
+ (pi2->pi_state == PI_RUNNING) &&
+ !GROUP_FAILED(pg) &&
+ FLAGS_TO_LINK_STATE(pi2)) {
+ retval = PHYINT_FAILURE;
+ break;
}
}
+ }
- pii2 = pi2->pi_v6;
- if (pii2 != NULL) {
- probe_success_info(pii2, NULL, &psinfo);
- if (psinfo.ps_tls_valid) {
- pi2_tls = psinfo.ps_tls;
- /*
- * See comment above regarding check
- * for PI_RUNNING and group failure.
- */
- if (TIME_GT(pi2_tls, pi_tff) &&
- (pi2->pi_state == PI_RUNNING) &&
- !GROUP_FAILED(pg) &&
- FLAGS_TO_LINK_STATE(pi2))
- return (PHYINT_FAILURE);
+ pii2 = pi2->pi_v6;
+ if (pii2 != NULL) {
+ probe_success_info(pii2, NULL, &psinfo);
+ if (psinfo.ps_tls_valid) {
+ pi2_tls = psinfo.ps_tls;
+ /*
+ * See comment above regarding check
+ * for PI_RUNNING and group failure.
+ */
+ if (TIME_GT(pi2_tls, pi_tff) &&
+ (pi2->pi_state == PI_RUNNING) &&
+ !GROUP_FAILED(pg) &&
+ FLAGS_TO_LINK_STATE(pi2)) {
+ retval = PHYINT_FAILURE;
+ break;
}
}
}
}
/*
- * Change the group state to PG_FAILED if it's not already.
+ * Update the group state to account for the changes.
*/
- if (!GROUP_FAILED(pg))
- phyint_group_chstate(pg, PG_FAILED);
-
- return (GROUP_FAILURE);
+ phyint_group_refresh_state(pg);
+ return (retval);
}
/*
@@ -2215,7 +2243,7 @@ probe_success_info(struct phyint_instance *pii, struct target *cur_tg,
uint_t timeout;
struct target *tg;
- if (debug & D_FAILOVER)
+ if (debug & D_FAILREP)
logdebug("probe_success_info(%s)\n", pii->pii_name);
bzero(psinfo, sizeof (*psinfo));
@@ -2248,10 +2276,11 @@ probe_success_info(struct phyint_instance *pii, struct target *cur_tg,
* not available use the value of the group's probe
* interval which is a worst case estimate.
*/
+ timeout = ns2ms(pr_statp->pr_hrtime_start);
if (tg->tg_crtt != 0) {
- timeout = pr_statp->pr_time_sent + tg->tg_crtt;
+ timeout += tg->tg_crtt;
} else {
- timeout = pr_statp->pr_time_sent +
+ timeout +=
pii->pii_phyint->pi_group->pg_probeint;
}
@@ -2261,7 +2290,7 @@ probe_success_info(struct phyint_instance *pii, struct target *cur_tg,
* recent consecutive successes.
*/
pr_statp->pr_time_lost = timeout;
- pr_statp->pr_status = PR_LOST;
+ probe_chstate(pr_statp, pii, PR_LOST);
pi_found_failure = _B_TRUE;
if (cur_tg != NULL && tg == cur_tg) {
/*
@@ -2292,7 +2321,8 @@ probe_success_info(struct phyint_instance *pii, struct target *cur_tg,
* the most recent probe success.
*/
if (!psinfo->ps_tls_valid) {
- psinfo->ps_tls = pr_statp->pr_time_acked;
+ psinfo->ps_tls =
+ ns2ms(pr_statp->pr_hrtime_ackproc);
psinfo->ps_tls_valid = _B_TRUE;
}
break;
@@ -2339,7 +2369,7 @@ probe_fail_info(struct phyint_instance *pii, struct target *cur_tg,
uint_t timeout;
struct target *tg;
- if (debug & D_FAILOVER)
+ if (debug & D_FAILREP)
logdebug("probe_fail_info(%s)\n", pii->pii_name);
bzero(pfinfo, sizeof (*pfinfo));
@@ -2377,10 +2407,11 @@ probe_fail_info(struct phyint_instance *pii, struct target *cur_tg,
* not available use the group's probe interval,
* which is a worst case estimate.
*/
+ timeout = ns2ms(pr_statp->pr_hrtime_start);
if (tg->tg_crtt != 0) {
- timeout = pr_statp->pr_time_sent + tg->tg_crtt;
+ timeout += tg->tg_crtt;
} else {
- timeout = pr_statp->pr_time_sent +
+ timeout +=
pii->pii_phyint->pi_group->pg_probeint;
}
@@ -2388,7 +2419,7 @@ probe_fail_info(struct phyint_instance *pii, struct target *cur_tg,
break;
pr_statp->pr_time_lost = timeout;
- pr_statp->pr_status = PR_LOST;
+ probe_chstate(pr_statp, pii, PR_LOST);
/* FALLTHRU */
case PR_LOST:
@@ -2421,6 +2452,19 @@ probe_fail_info(struct phyint_instance *pii, struct target *cur_tg,
}
/*
+ * Change the state of probe `pr' on phyint_instance `pii' to state `state'.
+ */
+void
+probe_chstate(struct probe_stats *pr, struct phyint_instance *pii, int state)
+{
+ if (pr->pr_status == state)
+ return;
+
+ pr->pr_status = state;
+ (void) probe_state_event(pr, pii);
+}
+
+/*
* Check if the phyint has been repaired. If no test address has been
* configured, then consider the interface repaired if the link is up (unless
* the link is flapping; see below). Otherwise, look for proof of probes
@@ -2436,7 +2480,7 @@ phyint_repaired(struct phyint *pi)
int pr_ndx;
uint_t cur_time;
- if (debug & D_FAILOVER)
+ if (debug & D_FAILREP)
logdebug("phyint_repaired(%s)\n", pi->pi_name);
if (LINK_DOWN(pi))
@@ -2458,7 +2502,7 @@ phyint_repaired(struct phyint *pi)
}
if (!pi->pi_lfmsg_printed) {
logerr("The link has come up on %s more than %d times "
- "in the last minute; disabling failback until it "
+ "in the last minute; disabling repair until it "
"stabilizes\n", pi->pi_name, LINK_UP_PERMIN);
pi->pi_lfmsg_printed = 1;
}
@@ -2490,354 +2534,41 @@ phyint_repaired(struct phyint *pi)
}
/*
- * Try failover from phyint 'pi' to a suitable destination.
- */
-int
-try_failover(struct phyint *pi, int failover_type)
-{
- struct phyint *dst;
- int err;
-
- if (debug & D_FAILOVER)
- logdebug("try_failover(%s %d)\n", pi->pi_name, failover_type);
-
- /*
- * Attempt to find a failover destination 'dst'.
- * dst will be null if any of the following is true
- * Phyint is not part of a group OR
- * Phyint is the only member of a group OR
- * No suitable failover dst was available
- */
- dst = get_failover_dst(pi, failover_type);
- if (dst == NULL)
- return (IPMP_EMINRED);
-
- dst->pi_empty = 0; /* Per state diagram */
- pi->pi_full = 0; /* Per state diagram */
-
- err = failover(pi, dst);
-
- if (debug & D_FAILOVER) {
- logdebug("failed over from %s to %s ret %d\n",
- pi->pi_name, dst->pi_name, err);
- }
- if (err == 0) {
- pi->pi_empty = 1; /* Per state diagram */
- /*
- * we don't want to print out this message if a
- * phyint is leaving the group, nor for failover from
- * standby
- */
- if (failover_type == FAILOVER_NORMAL) {
- logerr("Successfully failed over from NIC %s to NIC "
- "%s\n", pi->pi_name, dst->pi_name);
- }
- return (0);
- } else {
- /*
- * The failover did not succeed. We must retry the failover
- * only after resyncing our state based on the kernel's.
- * For eg. either the src or the dst might have been unplumbed
- * causing this failure. initifs() will be called again,
- * from main, since full_scan_required has been set to true
- * by failover();
- */
- return (IPMP_FAILURE);
- }
-}
-
-/*
- * global_errno captures the errno value, if failover() or failback()
- * fails. This is sent to if_mpadm(1M).
- */
-int global_errno;
-
-/*
- * Attempt failover from phyint 'from' to phyint 'to'.
- * IP moves everything from phyint 'from' to phyint 'to'.
- */
-static int
-failover(struct phyint *from, struct phyint *to)
-{
- struct lifreq lifr;
- int ret;
-
- if (debug & D_FAILOVER) {
- logdebug("failing over from %s to %s\n",
- from->pi_name, to->pi_name);
- }
-
- /*
- * Perform the failover. Both IPv4 and IPv6 are failed over
- * using a single ioctl by passing in AF_UNSPEC family.
- */
- lifr.lifr_addr.ss_family = AF_UNSPEC;
- (void) strncpy(lifr.lifr_name, from->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_movetoindex = to->pi_ifindex;
-
- ret = ioctl(ifsock_v4, SIOCLIFFAILOVER, (caddr_t)&lifr);
- if (ret < 0) {
- global_errno = errno;
- logperror("failover: ioctl (failover)");
- }
-
- /*
- * Set full_scan_required to true. This will make us read
- * the state from the kernel in initifs() and update our tables,
- * to reflect the current state after the failover. If the
- * failover has failed it will then reissue the failover.
- */
- full_scan_required = _B_TRUE;
- return (ret);
-}
-
-/*
- * phyint 'pi' has recovered. Attempt failback from every phyint in the same
- * group as phyint 'pi' that is a potential failback source, to phyint 'pi'.
- * Return values:
- * IPMP_SUCCESS: Failback successful from each of the other
- * phyints in the group.
- * IPMP_EFBPARTIAL: Failback successful from some of the other
- * phyints in the group.
- * IPMP_FAILURE: Failback syscall failed with some error.
- *
- * Note that failback is attempted regardless of the setting of the
- * failback_enabled flag.
- */
-int
-do_failback(struct phyint *pi)
-{
- struct phyint *from;
- boolean_t done;
- boolean_t partial;
- boolean_t attempted_failback = _B_FALSE;
-
- if (debug & D_FAILOVER)
- logdebug("do_failback(%s)\n", pi->pi_name);
-
- /* If this phyint is not part of a named group, return. */
- if (pi->pi_group == phyint_anongroup) {
- pi->pi_full = 1;
- return (IPMP_SUCCESS);
- }
-
- /*
- * Attempt failback from every phyint in the group to 'pi'.
- * The reason for doing this, instead of only from the
- * phyint to which we did the failover is given below.
- *
- * After 'pi' failed, if any app. tries to join on a multicast
- * address (IPv6), on the failed phyint, IP picks any arbitrary
- * non-failed phyint in the group, instead of the failed phyint,
- * in.mpathd is not aware of this. Thus failing back only from the
- * interface to which 'pi' failed over, will failback the ipif's
- * but not the ilm's. So we need to failback from all members of
- * the phyint group
- */
- done = _B_TRUE;
- partial = _B_FALSE;
- for (from = pi->pi_group->pg_phyint; from != NULL;
- from = from->pi_pgnext) {
- /* Exclude ourself as a failback src */
- if (from == pi)
- continue;
-
- /*
- * If the 'from' phyint has IPv4 plumbed, the 'to'
- * phyint must also have IPv4 plumbed. Similar check
- * for IPv6. IP makes the same check. Otherwise the
- * failback will fail.
- */
- if ((from->pi_v4 != NULL && pi->pi_v4 == NULL) ||
- (from->pi_v6 != NULL && pi->pi_v6 == NULL)) {
- partial = _B_TRUE;
- continue;
- }
-
- pi->pi_empty = 0; /* Per state diagram */
- attempted_failback = _B_TRUE;
- if (failback(from, pi) != 0) {
- done = _B_FALSE;
- break;
- }
- }
-
- /*
- * We are done. No more phyint from which we can src the failback
- */
- if (done) {
- if (!partial)
- pi->pi_full = 1; /* Per state diagram */
- /*
- * Don't print out a message unless there is a
- * transition from FAILED to RUNNING. For eg.
- * we don't want to print out this message if a
- * phyint is leaving the group, or at startup
- */
- if (attempted_failback && (pi->pi_flags &
- (IFF_FAILED | IFF_OFFLINE))) {
- logerr("Successfully failed back to NIC %s\n",
- pi->pi_name);
- }
- return (partial ? IPMP_EFBPARTIAL : IPMP_SUCCESS);
- }
-
- return (IPMP_FAILURE);
-}
-
-/*
- * This function is similar to do_failback() above, but respects the
- * failback_enabled flag for phyints in named groups.
- */
-int
-try_failback(struct phyint *pi)
-{
- if (debug & D_FAILOVER)
- logdebug("try_failback(%s)\n", pi->pi_name);
-
- if (pi->pi_group != phyint_anongroup && !failback_enabled)
- return (IPMP_EFBDISABLED);
-
- return (do_failback(pi));
-}
-
-/*
- * Failback everything from phyint 'from' that has the same ifindex
- * as phyint to's ifindex.
- */
-static int
-failback(struct phyint *from, struct phyint *to)
-{
- struct lifreq lifr;
- int ret;
-
- if (debug & D_FAILOVER)
- logdebug("failback(%s %s)\n", from->pi_name, to->pi_name);
-
- lifr.lifr_addr.ss_family = AF_UNSPEC;
- (void) strncpy(lifr.lifr_name, from->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_movetoindex = to->pi_ifindex;
-
- ret = ioctl(ifsock_v4, SIOCLIFFAILBACK, (caddr_t)&lifr);
- if (ret < 0) {
- global_errno = errno;
- logperror("failback: ioctl (failback)");
- }
-
- /*
- * Set full_scan_required to true. This will make us read
- * the state from the kernel in initifs() and update our tables,
- * to reflect the current state after the failback. If the
- * failback has failed it will then reissue the failback.
- */
- full_scan_required = _B_TRUE;
-
- return (ret);
-}
-
-/*
- * Select a target phyint for failing over from 'pi'.
- * In the normal case i.e. failover_type is FAILOVER_NORMAL, the preferred
- * target phyint is chosen as follows,
- * 1. Pick any inactive standby interface.
- * 2. If no inactive standby is available, select any phyint in the
- * same group that has the least number of logints, (excluding
- * IFF_NOFAILOVER and !IFF_UP logints)
- * If we are failing over from a standby, failover_type is
- * FAILOVER_TO_NONSTANDBY, and we won't pick a standby for the destination.
- * If a phyint is leaving the group, then failover_type is FAILOVER_TO_ANY,
- * and we won't return NULL, as long as there is at least 1 other phyint
- * in the group.
- */
-static struct phyint *
-get_failover_dst(struct phyint *pi, int failover_type)
-{
- struct phyint *maybe = NULL;
- struct phyint *pi2;
- struct phyint *last_choice = NULL;
-
- if (pi->pi_group == phyint_anongroup)
- return (NULL);
-
- /*
- * Loop thru the phyints in the group, and pick the preferred
- * phyint for the target.
- */
- for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
- /* Exclude ourself and offlined interfaces */
- if (pi2 == pi || pi2->pi_state == PI_OFFLINE)
- continue;
-
- /*
- * The chosen target phyint must have IPv4 instance
- * plumbed, if the src phyint has IPv4 plumbed. Similarly
- * for IPv6.
- */
- if ((pi2->pi_v4 == NULL && pi->pi_v4 != NULL) ||
- (pi2->pi_v6 == NULL && pi->pi_v6 != NULL))
- continue;
-
- /* The chosen target must be PI_RUNNING. */
- if (pi2->pi_state != PI_RUNNING) {
- last_choice = pi2;
- continue;
- }
-
- if ((pi2->pi_flags & (IFF_STANDBY | IFF_INACTIVE)) &&
- (failover_type != FAILOVER_TO_NONSTANDBY)) {
- return (pi2);
- } else {
- if (maybe == NULL)
- maybe = pi2;
- else if (logint_upcount(pi2) < logint_upcount(maybe))
- maybe = pi2;
- }
- }
- if (maybe == NULL && failover_type == FAILOVER_TO_ANY)
- return (last_choice);
- else
- return (maybe);
-}
-
-/*
* Used to set/clear phyint flags, by making a SIOCSLIFFLAGS call.
*/
boolean_t
-change_lif_flags(struct phyint *pi, uint64_t flags, boolean_t setfl)
+change_pif_flags(struct phyint *pi, uint64_t set, uint64_t clear)
{
int ifsock;
struct lifreq lifr;
uint64_t old_flags;
- if (debug & D_FAILOVER) {
- logdebug("change_lif_flags(%s): flags %llx setfl %d\n",
- pi->pi_name, flags, (int)setfl);
+ if (debug & D_FAILREP) {
+ logdebug("change_pif_flags(%s): set %llx clear %llx\n",
+ pi->pi_name, set, clear);
}
- if (pi->pi_v4 != NULL) {
+ if (pi->pi_v4 != NULL)
ifsock = ifsock_v4;
- } else {
+ else
ifsock = ifsock_v6;
- }
/*
* Get the current flags from the kernel, and set/clear the
* desired phyint flags. Since we set only phyint flags, we can
* do it on either IPv4 or IPv6 instance.
*/
- (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
+
if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO)
- logperror("change_lif_flags: ioctl (get flags)");
+ logperror("change_pif_flags: ioctl (get flags)");
return (_B_FALSE);
}
old_flags = lifr.lifr_flags;
- if (setfl)
- lifr.lifr_flags |= flags;
- else
- lifr.lifr_flags &= ~flags;
+ lifr.lifr_flags |= set;
+ lifr.lifr_flags &= ~clear;
if (old_flags == lifr.lifr_flags) {
/* No change in the flags. No need to send ioctl */
@@ -2846,7 +2577,7 @@ change_lif_flags(struct phyint *pi, uint64_t flags, boolean_t setfl)
if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO)
- logperror("change_lif_flags: ioctl (set flags)");
+ logperror("change_pif_flags: ioctl (set flags)");
return (_B_FALSE);
}
@@ -2854,15 +2585,13 @@ change_lif_flags(struct phyint *pi, uint64_t flags, boolean_t setfl)
* Keep pi_flags in synch. with actual flags. Assumes flags are
* phyint flags.
*/
- if (setfl)
- pi->pi_flags |= flags;
- else
- pi->pi_flags &= ~flags;
+ pi->pi_flags |= set;
+ pi->pi_flags &= ~clear;
- if (pi->pi_v4)
+ if (pi->pi_v4 != NULL)
pi->pi_v4->pii_flags = pi->pi_flags;
- if (pi->pi_v6)
+ if (pi->pi_v6 != NULL)
pi->pi_v6->pii_flags = pi->pi_flags;
return (_B_TRUE);
@@ -2928,18 +2657,31 @@ reset_snxt_basetimes(void)
* and it is up, it is not possible to detect the interface failure.
* SIOCTMYADDR also doesn't consider local zone address as own address.
* So, we choose to use SIOCGLIFCONF to collect the local addresses, and they
- * are stored in laddr_list.
+ * are stored in `localaddrs'
*/
-
boolean_t
own_address(struct in6_addr addr)
{
- struct local_addr *taddr = laddr_list;
+ addrlist_t *addrp;
+ struct sockaddr_storage ss;
+ int af = IN6_IS_ADDR_V4MAPPED(&addr) ? AF_INET : AF_INET6;
- for (; taddr != NULL; taddr = taddr->next) {
- if (IN6_ARE_ADDR_EQUAL(&addr, &taddr->addr)) {
+ addr2storage(af, &addr, &ss);
+ for (addrp = localaddrs; addrp != NULL; addrp = addrp->al_next) {
+ if (sockaddrcmp(&ss, &addrp->al_addr))
return (_B_TRUE);
- }
}
return (_B_FALSE);
}
+
+static int
+ns2ms(int64_t ns)
+{
+ return (ns / (NANOSEC / MILLISEC));
+}
+
+static int64_t
+tv2ns(struct timeval *tvp)
+{
+ return (tvp->tv_sec * NANOSEC + tvp->tv_usec * 1000);
+}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c
index b56648cf12..def08d39ce 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "mpd_defs.h"
#include "mpd_tables.h"
@@ -47,11 +45,7 @@ static void phyint_inst_print(struct phyint_instance *pii);
static void phyint_insert(struct phyint *pi, struct phyint_group *pg);
static void phyint_delete(struct phyint *pi);
-
-static void phyint_group_insert(struct phyint_group *pg);
-static void phyint_group_delete(struct phyint_group *pg);
-static struct phyint_group *phyint_group_lookup(const char *pg_name);
-static struct phyint_group *phyint_group_create(const char *pg_name);
+static boolean_t phyint_is_usable(struct phyint *pi);
static void logint_print(struct logint *li);
static void logint_insert(struct phyint_instance *pii, struct logint *li);
@@ -68,16 +62,13 @@ static void reset_pii_probes(struct phyint_instance *pii, struct target *tg);
static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii);
static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii);
-static void ip_index_to_mask_v6(uint_t masklen, struct in6_addr *bitmask);
-static boolean_t prefix_equal(struct in6_addr p1, struct in6_addr p2,
- int prefix_len);
-
static int phyint_state_event(struct phyint_group *pg, struct phyint *pi);
static int phyint_group_state_event(struct phyint_group *pg);
static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t);
static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi,
ipmp_if_op_t op);
+static int logint_upcount(struct phyint *pi);
static uint64_t gensig(void);
/* Initialize any per-file global state. Returns 0 on success, -1 on failure */
@@ -110,6 +101,183 @@ phyint_lookup(const char *name)
return (pi);
}
+/*
+ * Lookup a phyint in the group that has the same hardware address as `pi', or
+ * NULL if there's none. If `online_only' is set, then only online phyints
+ * are considered when matching. Otherwise, phyints that had been offlined
+ * due to a duplicate hardware address will also be considered.
+ */
+static struct phyint *
+phyint_lookup_hwaddr(struct phyint *pi, boolean_t online_only)
+{
+ struct phyint *pi2;
+
+ if (pi->pi_group == phyint_anongroup)
+ return (NULL);
+
+ for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ if (pi2 == pi)
+ continue;
+
+ /*
+ * NOTE: even when online_only is B_FALSE, we ignore phyints
+ * that are administratively offline (rather than offline
+ * because they're dups); when they're brought back online,
+ * they'll be flagged as dups if need be.
+ */
+ if (pi2->pi_state == PI_OFFLINE &&
+ (online_only || !pi2->pi_hwaddrdup))
+ continue;
+
+ if (pi2->pi_hwaddrlen == pi->pi_hwaddrlen &&
+ bcmp(pi2->pi_hwaddr, pi->pi_hwaddr, pi->pi_hwaddrlen) == 0)
+ return (pi2);
+ }
+ return (NULL);
+}
+
+/*
+ * Respond to DLPI notifications. Currently, this only processes physical
+ * address changes for the phyint passed via `arg' by onlining or offlining
+ * phyints in the group.
+ */
+/* ARGSUSED */
+static void
+phyint_link_notify(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg)
+{
+ struct phyint *pi = arg;
+ struct phyint *oduppi = NULL, *duppi = NULL;
+
+ assert((dnip->dni_note & pi->pi_notes) != 0);
+
+ if (dnip->dni_note != DL_NOTE_PHYS_ADDR)
+ return;
+
+ assert(dnip->dni_physaddrlen <= DLPI_PHYSADDR_MAX);
+
+ /*
+ * If our hardware address hasn't changed, there's nothing to do.
+ */
+ if (pi->pi_hwaddrlen == dnip->dni_physaddrlen &&
+ bcmp(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen) == 0)
+ return;
+
+ oduppi = phyint_lookup_hwaddr(pi, _B_FALSE);
+ pi->pi_hwaddrlen = dnip->dni_physaddrlen;
+ (void) memcpy(pi->pi_hwaddr, dnip->dni_physaddr, pi->pi_hwaddrlen);
+ duppi = phyint_lookup_hwaddr(pi, _B_FALSE);
+
+ if (oduppi != NULL || pi->pi_hwaddrdup) {
+ /*
+ * Our old hardware address was a duplicate. If we'd been
+ * offlined because of it, and our new hardware address is not
+ * a duplicate, then bring us online. Otherwise, `oduppi'
+ * must've been the one brought offline; bring it online.
+ */
+ if (pi->pi_hwaddrdup) {
+ if (duppi == NULL)
+ (void) phyint_undo_offline(pi);
+ } else {
+ assert(oduppi->pi_hwaddrdup);
+ (void) phyint_undo_offline(oduppi);
+ }
+ }
+
+ if (duppi != NULL && !pi->pi_hwaddrdup) {
+ /*
+ * Our new hardware address was a duplicate and we're not
+ * yet flagged as a duplicate; bring us offline.
+ */
+ pi->pi_hwaddrdup = _B_TRUE;
+ (void) phyint_offline(pi, 0);
+ }
+}
+
+/*
+ * Initialize information about the underlying link for `pi', and set us
+ * up to be notified about future changes. Returns _B_TRUE on success.
+ */
+boolean_t
+phyint_link_init(struct phyint *pi)
+{
+ int retval;
+ uint_t notes;
+ const char *errmsg;
+ dlpi_notifyid_t id;
+
+ pi->pi_notes = 0;
+ retval = dlpi_open(pi->pi_name, &pi->pi_dh, 0);
+ if (retval != DLPI_SUCCESS) {
+ pi->pi_dh = NULL;
+ errmsg = "cannot open";
+ goto failed;
+ }
+
+ pi->pi_hwaddrlen = DLPI_PHYSADDR_MAX;
+ retval = dlpi_get_physaddr(pi->pi_dh, DL_CURR_PHYS_ADDR, pi->pi_hwaddr,
+ &pi->pi_hwaddrlen);
+ if (retval != DLPI_SUCCESS) {
+ errmsg = "cannot get hardware address";
+ goto failed;
+ }
+
+ retval = dlpi_bind(pi->pi_dh, DLPI_ANY_SAP, NULL);
+ if (retval != DLPI_SUCCESS) {
+ errmsg = "cannot bind to DLPI_ANY_SAP";
+ goto failed;
+ }
+
+ /*
+ * Check if the link supports DLPI link state notifications. For
+ * historical reasons, the actual changes are tracked through routing
+ * sockets, so we immediately disable the notification upon success.
+ */
+ notes = DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN;
+ retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id);
+ if (retval == DLPI_SUCCESS) {
+ (void) dlpi_disabnotify(pi->pi_dh, id, NULL);
+ pi->pi_notes |= notes;
+ }
+
+ /*
+ * Enable notification of hardware address changes to keep pi_hwaddr
+ * up-to-date and track if we need to offline/undo-offline phyints.
+ */
+ notes = DL_NOTE_PHYS_ADDR;
+ retval = dlpi_enabnotify(pi->pi_dh, notes, phyint_link_notify, pi, &id);
+ if (retval == DLPI_SUCCESS && poll_add(dlpi_fd(pi->pi_dh)) == 0)
+ pi->pi_notes |= notes;
+
+ return (_B_TRUE);
+failed:
+ logerr("%s: %s: %s\n", pi->pi_name, errmsg, dlpi_strerror(retval));
+ if (pi->pi_dh != NULL) {
+ dlpi_close(pi->pi_dh);
+ pi->pi_dh = NULL;
+ }
+ return (_B_FALSE);
+}
+
+/*
+ * Close use of link on `pi'.
+ */
+void
+phyint_link_close(struct phyint *pi)
+{
+ if (pi->pi_notes & DL_NOTE_PHYS_ADDR) {
+ (void) poll_remove(dlpi_fd(pi->pi_dh));
+ pi->pi_notes &= ~DL_NOTE_PHYS_ADDR;
+ }
+
+ /*
+ * NOTE: we don't clear pi_notes here so that iflinkstate() can still
+ * properly report the link state even when offline (which is possible
+ * since we use IFF_RUNNING to track link state).
+ */
+ dlpi_close(pi->pi_dh);
+ pi->pi_dh = NULL;
+}
+
/* Return the phyint instance with the given name and the given family */
struct phyint_instance *
phyint_inst_lookup(int af, char *name)
@@ -128,7 +296,7 @@ phyint_inst_lookup(int af, char *name)
return (PHYINT_INSTANCE(pi, af));
}
-static struct phyint_group *
+struct phyint_group *
phyint_group_lookup(const char *pg_name)
{
struct phyint_group *pg;
@@ -173,6 +341,9 @@ phyint_insert(struct phyint *pi, struct phyint_group *pg)
pi->pi_pgnext->pi_pgprev = pi;
pg->pg_phyint = pi;
+ /* Refresh the group state now that this phyint has been added */
+ phyint_group_refresh_state(pg);
+
pg->pg_sig++;
(void) phyint_group_member_event(pg, pi, IPMP_IF_ADD);
}
@@ -214,24 +385,24 @@ phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex,
}
/*
- * Record the phyint values. Also insert the phyint into the
- * phyint group by calling phyint_insert().
+ * Record the phyint values.
*/
(void) strlcpy(pi->pi_name, pi_name, sizeof (pi->pi_name));
pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME;
pi->pi_ifindex = ifindex;
- pi->pi_icmpid =
- htons(((getpid() & 0xFF) << 8) | (pi->pi_ifindex & 0xFF));
+ pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF));
+
/*
- * We optimistically start in the PI_RUNNING state. Later (in
- * process_link_state_changes()), we will readjust this to match the
+ * If the interface is offline, we set the state to PI_OFFLINE.
+ * Otherwise, we optimistically start in the PI_RUNNING state. Later
+ * (in process_link_state_changes()), we will adjust this to match the
* current state of the link. Further, if test addresses are
* subsequently assigned, we will transition to PI_NOTARGETS and then
- * either PI_RUNNING or PI_FAILED, depending on the result of the test
- * probes.
+ * to either PI_RUNNING or PI_FAILED depending on the probe results.
*/
- pi->pi_state = PI_RUNNING;
+ pi->pi_state = (flags & IFF_OFFLINE) ? PI_OFFLINE : PI_RUNNING;
pi->pi_flags = PHYINT_FLAGS(flags);
+
/*
* Initialise the link state. The link state is initialised to
* up, so that if the link is down when IPMP starts monitoring
@@ -241,19 +412,17 @@ phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex,
*/
INIT_LINK_STATE(pi);
+ if (!phyint_link_init(pi)) {
+ free(pi);
+ return (NULL);
+ }
+
/*
* Insert the phyint in the list of all phyints, and the
* list of phyint group members
*/
phyint_insert(pi, pg);
- /*
- * If we are joining a failed group, mark the interface as
- * failed.
- */
- if (GROUP_FAILED(pg))
- (void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
-
return (pi);
}
@@ -313,15 +482,14 @@ phyint_chstate(struct phyint *pi, enum pi_state state)
return;
pi->pi_state = state;
- pi->pi_group->pg_sig++;
- (void) phyint_state_event(pi->pi_group, pi);
+ phyint_changed(pi);
}
/*
- * Note that the type of phyint `pi' has changed.
+ * Note that `pi' has changed state.
*/
void
-phyint_newtype(struct phyint *pi)
+phyint_changed(struct phyint *pi)
{
pi->pi_group->pg_sig++;
(void) phyint_state_event(pi->pi_group, pi);
@@ -331,7 +499,7 @@ phyint_newtype(struct phyint *pi)
* Insert the phyint group in the linked list of all phyint groups
* at the head of the list
*/
-static void
+void
phyint_group_insert(struct phyint_group *pg)
{
pg->pg_next = phyint_groups;
@@ -347,7 +515,7 @@ phyint_group_insert(struct phyint_group *pg)
/*
* Create a new phyint group called 'name'.
*/
-static struct phyint_group *
+struct phyint_group *
phyint_group_create(const char *name)
{
struct phyint_group *pg;
@@ -363,9 +531,16 @@ phyint_group_create(const char *name)
(void) strlcpy(pg->pg_name, name, sizeof (pg->pg_name));
pg->pg_sig = gensig();
-
pg->pg_fdt = user_failure_detection_time;
pg->pg_probeint = user_probe_interval;
+ pg->pg_in_use = _B_TRUE;
+
+ /*
+ * Normal groups always start in the PG_FAILED state since they
+ * have no active interfaces. In contrast, anonymous groups are
+ * heterogeneous and thus always PG_OK.
+ */
+ pg->pg_state = (name[0] == '\0' ? PG_OK : PG_FAILED);
return (pg);
}
@@ -378,10 +553,20 @@ phyint_group_chstate(struct phyint_group *pg, enum pg_state state)
{
assert(pg != phyint_anongroup);
+ /*
+ * To simplify things, some callers always set a given state
+ * regardless of the previous state of the group (e.g., setting
+ * PG_DEGRADED when it's already set). We shouldn't bother
+ * generating an event or consuming a signature for these, since
+ * the actual state of the group is unchanged.
+ */
+ if (pg->pg_state == state)
+ return;
+
+ pg->pg_state = state;
+
switch (state) {
case PG_FAILED:
- pg->pg_groupfailed = 1;
-
/*
* We can never know with certainty that a group has
* failed. It is possible that all known targets have
@@ -392,16 +577,15 @@ phyint_group_chstate(struct phyint_group *pg, enum pg_state state)
* hosts, we have to discover it by multicast. So flush
* all the host targets. The next probe will send out a
* multicast echo request. If this is a group failure, we
- * will still not see any response, otherwise we will
- * clear the pg_groupfailed flag after we get
- * NUM_PROBE_REPAIRS consecutive unicast replies on any
- * phyint.
+ * will still not see any response, otherwise the group
+ * will be repaired after we get NUM_PROBE_REPAIRS
+ * consecutive unicast replies on any phyint.
*/
target_flush_hosts(pg);
break;
- case PG_RUNNING:
- pg->pg_groupfailed = 0;
+ case PG_OK:
+ case PG_DEGRADED:
break;
default:
@@ -432,7 +616,6 @@ phyint_inst_init_from_k(int af, char *pi_name)
struct lifreq lifr;
struct phyint *pi;
struct phyint_instance *pii;
- boolean_t pg_created;
boolean_t pi_created;
struct phyint_group *pg;
@@ -441,7 +624,6 @@ retry:
pi = NULL;
pg = NULL;
pi_created = _B_FALSE;
- pg_created = _B_FALSE;
if (debug & D_PHYINT) {
logdebug("phyint_inst_init_from_k(%s %s)\n",
@@ -454,11 +636,11 @@ retry:
ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
/*
- * Get the interface flags. Ignore loopback and multipoint
- * interfaces.
+ * Get the interface flags. Ignore virtual interfaces, IPMP
+ * meta-interfaces, point-to-point interfaces, and interfaces
+ * that can't support multicast.
*/
- (void) strncpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
+ (void) strlcpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name));
if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO) {
logperror("phyint_inst_init_from_k:"
@@ -467,7 +649,8 @@ retry:
return (NULL);
}
flags = lifr.lifr_flags;
- if (!(flags & IFF_MULTICAST) || (flags & IFF_LOOPBACK))
+ if (!(flags & IFF_MULTICAST) ||
+ (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)))
return (NULL);
/*
@@ -493,8 +676,7 @@ retry:
}
return (NULL);
}
- (void) strncpy(pg_name, lifr.lifr_groupname, sizeof (pg_name));
- pg_name[sizeof (pg_name) - 1] = '\0';
+ (void) strlcpy(pg_name, lifr.lifr_groupname, sizeof (pg_name));
/*
* If the phyint is not part of any group, pg_name is the
@@ -503,12 +685,13 @@ retry:
*/
if (pg_name[0] == '\0' && !track_all_phyints) {
/*
- * If the IFF_FAILED or IFF_OFFLINE flags are set, reset
- * them. These flags shouldn't be set if IPMP isn't
- * tracking the interface.
+ * If the IFF_FAILED, IFF_INACTIVE, or IFF_OFFLINE flags are
+ * set, reset them. These flags shouldn't be set if in.mpathd
+ * isn't tracking the interface.
*/
- if ((flags & (IFF_FAILED | IFF_OFFLINE)) != 0) {
- lifr.lifr_flags = flags & ~(IFF_FAILED | IFF_OFFLINE);
+ if ((flags & (IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE))) {
+ lifr.lifr_flags = flags &
+ ~(IFF_FAILED | IFF_INACTIVE | IFF_OFFLINE);
if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO) {
logperror("phyint_inst_init_from_k:"
@@ -520,21 +703,20 @@ retry:
}
/*
- * We need to create a new phyint instance. A phyint instance
- * belongs to a phyint, and the phyint belongs to a phyint group.
- * So we first lookup the 'parents' and if they don't exist then
- * we create them.
+ * We need to create a new phyint instance. We may also need to
+ * create the group if e.g. the SIOCGLIFCONF loop in initifs() found
+ * an underlying interface before it found its IPMP meta-interface.
+ * Note that we keep any created groups even if phyint_inst_from_k()
+ * fails since a group's existence is not dependent on the ability of
+ * in.mpathd to the track the group's interfaces.
*/
- pg = phyint_group_lookup(pg_name);
- if (pg == NULL) {
- pg = phyint_group_create(pg_name);
- if (pg == NULL) {
- logerr("phyint_inst_init_from_k:"
- " unable to create group %s\n", pg_name);
+ if ((pg = phyint_group_lookup(pg_name)) == NULL) {
+ if ((pg = phyint_group_create(pg_name)) == NULL) {
+ logerr("phyint_inst_init_from_k: cannot create group "
+ "%s\n", pg_name);
return (NULL);
}
phyint_group_insert(pg);
- pg_created = _B_TRUE;
}
/*
@@ -546,8 +728,6 @@ retry:
if (pi == NULL) {
logerr("phyint_inst_init_from_k:"
" unable to create phyint %s\n", pi_name);
- if (pg_created)
- phyint_group_delete(pg);
return (NULL);
}
pi_created = _B_TRUE;
@@ -564,8 +744,6 @@ retry:
* while we are yet to update our tables. Do it now.
*/
if (pi->pi_ifindex != ifindex) {
- if (pg_created)
- phyint_group_delete(pg);
phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af)));
goto retry;
}
@@ -577,9 +755,6 @@ retry:
* changed, while we are yet to update our tables. Do it now.
*/
if (strcmp(pi->pi_group->pg_name, pg_name) != 0) {
- if (pg_created)
- phyint_group_delete(pg);
- restore_phyint(pi);
phyint_inst_delete(PHYINT_INSTANCE(pi,
AF_OTHER(af)));
goto retry;
@@ -594,16 +769,25 @@ retry:
if (pii == NULL) {
logerr("phyint_inst_init_from_k: unable to create"
"phyint inst %s\n", pi->pi_name);
- if (pi_created) {
- /*
- * Deleting the phyint will delete the phyint group
- * if this is the last phyint in the group.
- */
+ if (pi_created)
phyint_delete(pi);
- }
+
return (NULL);
}
+ if (pi_created) {
+ /*
+ * If this phyint does not have a unique hardware address in its
+ * group, offline it. (The change_pif_flags() implementation
+ * requires that we defer this until after the phyint_instance
+ * is created.)
+ */
+ if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) {
+ pi->pi_hwaddrdup = _B_TRUE;
+ (void) phyint_offline(pi, 0);
+ }
+ }
+
return (pii);
}
@@ -677,16 +861,16 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
{
icmp6_filter_t filter;
int hopcount = 1;
- int int_op;
+ int off = 0;
+ int on = 1;
struct sockaddr_in6 testaddr;
/*
* Open a raw socket with ICMPv6 protocol.
*
- * Use IPV6_DONTFAILOVER_IF to make sure that probes go out
- * on the specified phyint only, and are not subject to load
- * balancing. Bind to the src address chosen will ensure that
- * the responses are received only on the specified phyint.
+ * Use IPV6_BOUND_IF to make sure that probes are sent and received on
+ * the specified phyint only. Bind to the test address to ensure that
+ * the responses are sent to the specified phyint.
*
* Set the hopcount to 1 so that probe packets are not routed.
* Disable multicast loopback. Set the receive filter to
@@ -696,7 +880,7 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
if (pii->pii_probe_sock < 0) {
logperror_pii(pii, "phyint_inst_v6_sockinit: socket");
return (_B_FALSE);
-}
+ }
bzero(&testaddr, sizeof (testaddr));
testaddr.sin6_family = AF_INET6;
@@ -709,14 +893,17 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- /*
- * IPV6_DONTFAILOVER_IF option takes precedence over setting
- * IP_MULTICAST_IF. So we don't set IPV6_MULTICAST_IF again.
- */
- if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_DONTFAILOVER_IF,
+ if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_IF,
(char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) {
logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
- " IPV6_DONTFAILOVER_IF");
+ " IPV6_MULTICAST_IF");
+ return (_B_FALSE);
+ }
+
+ if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_BOUND_IF,
+ &pii->pii_ifindex, sizeof (uint_t)) < 0) {
+ logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
+ " IPV6_BOUND_IF");
return (_B_FALSE);
}
@@ -734,9 +921,8 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- int_op = 0; /* used to turn off option */
if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
- (char *)&int_op, sizeof (int_op)) < 0) {
+ (char *)&off, sizeof (off)) < 0) {
logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
" IPV6_MULTICAST_LOOP");
return (_B_FALSE);
@@ -755,15 +941,22 @@ phyint_inst_v6_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- /* Enable receipt of ancillary data */
- int_op = 1;
+ /* Enable receipt of hoplimit */
if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
- (char *)&int_op, sizeof (int_op)) < 0) {
+ &on, sizeof (on)) < 0) {
logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
" IPV6_RECVHOPLIMIT");
return (_B_FALSE);
}
+ /* Enable receipt of timestamp */
+ if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP,
+ &on, sizeof (on)) < 0) {
+ logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
+ " SO_TIMESTAMP");
+ return (_B_FALSE);
+ }
+
return (_B_TRUE);
}
@@ -775,20 +968,20 @@ static boolean_t
phyint_inst_v4_sockinit(struct phyint_instance *pii)
{
struct sockaddr_in testaddr;
- char char_op;
+ char char_off = 0;
int ttl = 1;
char char_ttl = 1;
+ int on = 1;
/*
* Open a raw socket with ICMPv4 protocol.
*
- * Use IP_DONTFAILOVER_IF to make sure that probes go out
- * on the specified phyint only, and are not subject to load
- * balancing. Bind to the src address chosen will ensure that
- * the responses are received only on the specified phyint.
+ * Use IP_BOUND_IF to make sure that probes are sent and received on
+ * the specified phyint only. Bind to the test address to ensure that
+ * the responses are sent to the specified phyint.
*
* Set the ttl to 1 so that probe packets are not routed.
- * Disable multicast loopback.
+ * Disable multicast loopback. Enable receipt of timestamp.
*/
pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP);
if (pii->pii_probe_sock < 0) {
@@ -808,14 +1001,17 @@ phyint_inst_v4_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- /*
- * IP_DONTFAILOVER_IF option takes precedence over setting
- * IP_MULTICAST_IF. So we don't set IP_MULTICAST_IF again.
- */
- if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_DONTFAILOVER_IF,
+ if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_BOUND_IF,
+ &pii->pii_ifindex, sizeof (uint_t)) < 0) {
+ logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
+ " IP_BOUND_IF");
+ return (_B_FALSE);
+ }
+
+ if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_IF,
(char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) {
logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
- " IP_DONTFAILOVER");
+ " IP_MULTICAST_IF");
return (_B_FALSE);
}
@@ -826,9 +1022,8 @@ phyint_inst_v4_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
- char_op = 0; /* used to turn off option */
if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP,
- (char *)&char_op, sizeof (char_op)) == -1) {
+ (char *)&char_off, sizeof (char_off)) == -1) {
logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
" IP_MULTICAST_LOOP");
return (_B_FALSE);
@@ -841,6 +1036,13 @@ phyint_inst_v4_sockinit(struct phyint_instance *pii)
return (_B_FALSE);
}
+ if (setsockopt(pii->pii_probe_sock, SOL_SOCKET, SO_TIMESTAMP, &on,
+ sizeof (on)) < 0) {
+ logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
+ " SO_TIMESTAMP");
+ return (_B_FALSE);
+ }
+
return (_B_TRUE);
}
@@ -848,7 +1050,7 @@ phyint_inst_v4_sockinit(struct phyint_instance *pii)
* Remove the phyint group from the list of 'all phyint groups'
* and free it.
*/
-static void
+void
phyint_group_delete(struct phyint_group *pg)
{
/*
@@ -881,10 +1083,69 @@ phyint_group_delete(struct phyint_group *pg)
phyint_grouplistsig++;
(void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE);
+ addrlist_free(&pg->pg_addrs);
free(pg);
}
/*
+ * Refresh the state of `pg' based on its current members.
+ */
+void
+phyint_group_refresh_state(struct phyint_group *pg)
+{
+ enum pg_state state;
+ enum pg_state origstate = pg->pg_state;
+ struct phyint *pi, *usablepi;
+ uint_t nif = 0, nusable = 0;
+
+ /*
+ * Anonymous groups never change state.
+ */
+ if (pg == phyint_anongroup)
+ return;
+
+ for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
+ nif++;
+ if (phyint_is_usable(pi)) {
+ nusable++;
+ usablepi = pi;
+ }
+ }
+
+ if (nusable == 0)
+ state = PG_FAILED;
+ else if (nif == nusable)
+ state = PG_OK;
+ else
+ state = PG_DEGRADED;
+
+ phyint_group_chstate(pg, state);
+
+ /*
+ * If we're shutting down, skip logging messages since otherwise our
+ * shutdown housecleaning will make us report that groups are unusable.
+ */
+ if (cleanup_started)
+ return;
+
+ /*
+ * NOTE: We use pg_failmsg_printed rather than origstate since
+ * otherwise at startup we'll log a "now usable" message when the
+ * first usable phyint is added to an empty group.
+ */
+ if (state != PG_FAILED && pg->pg_failmsg_printed) {
+ assert(origstate == PG_FAILED);
+ logerr("At least 1 IP interface (%s) in group %s is now "
+ "usable\n", usablepi->pi_name, pg->pg_name);
+ pg->pg_failmsg_printed = _B_FALSE;
+ } else if (origstate != PG_FAILED && state == PG_FAILED) {
+ logerr("All IP interfaces in group %s are now unusable\n",
+ pg->pg_name);
+ pg->pg_failmsg_printed = _B_TRUE;
+ }
+}
+
+/*
* Extract information from the kernel about the desired phyint.
* Look only for properties of the phyint and not properties of logints.
* Take appropriate action on the changes.
@@ -998,28 +1259,16 @@ phyint_inst_update_from_k(struct phyint_instance *pii)
if (pi->pi_v6 != NULL)
pi->pi_v6->pii_flags = pi->pi_flags;
+ /*
+ * Make sure the IFF_FAILED flag is set if and only if we think
+ * the interface should be failed.
+ */
if (pi->pi_flags & IFF_FAILED) {
- /*
- * If we are in the running and full state, we have
- * completed failbacks successfully and we would have
- * expected IFF_FAILED to have been clear. That it is
- * set means there was a race condition. Some other
- * process turned on the IFF_FAILED flag. Since the
- * flag setting is not atomic, i.e. a get ioctl followed
- * by a set ioctl, and since there is no way to set an
- * individual flag bit, this could have occurred.
- */
- if (pi->pi_state == PI_RUNNING && pi->pi_full)
- (void) change_lif_flags(pi, IFF_FAILED, _B_FALSE);
+ if (pi->pi_state == PI_RUNNING)
+ (void) change_pif_flags(pi, 0, IFF_FAILED);
} else {
- /*
- * If we are in the failed state, there was a race.
- * we have completed failover successfully because our
- * state is failed and empty. Some other process turned
- * off the IFF_FAILED flag. Same comment as above
- */
- if (pi->pi_state == PI_FAILED && pi->pi_empty)
- (void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
+ if (pi->pi_state == PI_FAILED)
+ (void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE);
}
/* No change in phyint status */
@@ -1028,12 +1277,12 @@ phyint_inst_update_from_k(struct phyint_instance *pii)
/*
* Delete the phyint. Remove it from the list of all phyints, and the
- * list of phyint group members. If the group becomes empty, delete the
- * group also.
+ * list of phyint group members.
*/
static void
phyint_delete(struct phyint *pi)
{
+ struct phyint *pi2;
struct phyint_group *pg = pi->pi_group;
if (debug & D_PHYINT)
@@ -1065,6 +1314,9 @@ phyint_delete(struct phyint *pi)
pi->pi_pgnext = NULL;
pi->pi_pgprev = NULL;
+ /* Refresh the group state now that this phyint has been removed */
+ phyint_group_refresh_state(pg);
+
/* Remove the phyint from the global list of phyints */
if (pi->pi_prev == NULL) {
/* Phyint is the 1st in the list */
@@ -1077,11 +1329,153 @@ phyint_delete(struct phyint *pi)
pi->pi_next = NULL;
pi->pi_prev = NULL;
+ /*
+ * See if another phyint in the group had been offlined because
+ * it was a dup of `pi' -- and if so, online it.
+ */
+ if (!pi->pi_hwaddrdup &&
+ (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) {
+ assert(pi2->pi_hwaddrdup);
+ (void) phyint_undo_offline(pi2);
+ }
+ phyint_link_close(pi);
free(pi);
+}
+
+/*
+ * Offline phyint `pi' if at least `minred' usable interfaces remain in the
+ * group. Returns an IPMP error code.
+ */
+int
+phyint_offline(struct phyint *pi, uint_t minred)
+{
+ unsigned int nusable = 0;
+ struct phyint *pi2;
+ struct phyint_group *pg = pi->pi_group;
+
+ /*
+ * Verify that enough usable interfaces in the group would remain.
+ * As a special case, if the group has failed, allow any non-offline
+ * phyints to be offlined.
+ */
+ if (pg != phyint_anongroup) {
+ for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+ if (pi2 == pi)
+ continue;
+ if (phyint_is_usable(pi2) ||
+ (GROUP_FAILED(pg) && pi2->pi_state != PI_OFFLINE))
+ nusable++;
+ }
+ }
+ if (nusable < minred)
+ return (IPMP_EMINRED);
+
+ if (!change_pif_flags(pi, IFF_OFFLINE, 0))
+ return (IPMP_FAILURE);
+
+ /*
+ * The interface is now offline, so stop probing it. Note that
+ * if_mpadm(1M) will down the test addresses, after receiving a
+ * success reply from us. The routing socket message will then make us
+ * close the socket used for sending probes. But it is more logical
+ * that an offlined interface must not be probed, even if it has test
+ * addresses.
+ *
+ * NOTE: stop_probing() also sets PI_OFFLINE.
+ */
+ stop_probing(pi);
+
+ /*
+ * If we're offlining the phyint because it has a duplicate hardware
+ * address, print a warning -- and leave the link open so that we can
+ * be notified of hardware address changes that make it usable again.
+ * Otherwise, close the link so that we won't prevent a detach.
+ */
+ if (pi->pi_hwaddrdup) {
+ logerr("IP interface %s has a hardware address which is not "
+ "unique in group %s; offlining\n", pi->pi_name,
+ pg->pg_name);
+ } else {
+ phyint_link_close(pi);
+ }
+
+ /*
+ * If this phyint was preventing another phyint with a duplicate
+ * hardware address from being online, bring that one online now.
+ */
+ if (!pi->pi_hwaddrdup &&
+ (pi2 = phyint_lookup_hwaddr(pi, _B_FALSE)) != NULL) {
+ assert(pi2->pi_hwaddrdup);
+ (void) phyint_undo_offline(pi2);
+ }
- /* Delete the phyint_group if the last phyint has been deleted */
- if (pg->pg_phyint == NULL)
- phyint_group_delete(pg);
+ /*
+ * If this interface was active, try to activate another INACTIVE
+ * interface in the group.
+ */
+ if (!(pi->pi_flags & IFF_INACTIVE))
+ phyint_activate_another(pi);
+
+ return (IPMP_SUCCESS);
+}
+
+/*
+ * Undo a previous offline of `pi'. Returns an IPMP error code.
+ */
+int
+phyint_undo_offline(struct phyint *pi)
+{
+ if (pi->pi_state != PI_OFFLINE) {
+ errno = EINVAL;
+ return (IPMP_FAILURE);
+ }
+
+ /*
+ * If necessary, reinitialize our link information and verify that its
+ * hardware address is still unique across the group.
+ */
+ if (pi->pi_dh == NULL && !phyint_link_init(pi)) {
+ errno = EIO;
+ return (IPMP_FAILURE);
+ }
+
+ if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) {
+ pi->pi_hwaddrdup = _B_TRUE;
+ return (IPMP_EHWADDRDUP);
+ }
+
+ if (pi->pi_hwaddrdup) {
+ logerr("IP interface %s now has a unique hardware address in "
+ "group %s; onlining\n", pi->pi_name, pi->pi_group->pg_name);
+ pi->pi_hwaddrdup = _B_FALSE;
+ }
+
+ if (!change_pif_flags(pi, 0, IFF_OFFLINE))
+ return (IPMP_FAILURE);
+
+ /*
+ * While the interface was offline, it may have failed (e.g. the link
+ * may have gone down). phyint_inst_check_for_failure() will have
+ * already set pi_flags with IFF_FAILED, so we can use that to decide
+ * whether the phyint should transition to running. Note that after
+ * we transition to running, we will start sending probes again (if
+ * test addresses are configured), which may also reveal that the
+ * interface is in fact failed.
+ */
+ if (pi->pi_flags & IFF_FAILED) {
+ phyint_chstate(pi, PI_FAILED);
+ } else {
+ /* calls phyint_chstate() */
+ phyint_transition_to_running(pi);
+ }
+
+ /*
+ * Give the requestor time to configure test addresses before
+ * complaining that they're missing.
+ */
+ pi->pi_taddrthresh = getcurrentsec() + TESTADDR_CONF_TIME;
+
+ return (IPMP_SUCCESS);
}
/*
@@ -1166,11 +1560,10 @@ phyint_inst_print(struct phyint_instance *pii)
}
logdebug("\nPhyint instance: %s %s index %u state %x flags %llx "
- "sock %x in_use %d empty %x full %x\n",
+ "sock %x in_use %d\n",
AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex,
pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock,
- pii->pii_in_use, pii->pii_phyint->pi_empty,
- pii->pii_phyint->pi_full);
+ pii->pii_in_use);
for (li = pii->pii_logint; li != NULL; li = li->li_next)
logint_print(li);
@@ -1211,9 +1604,11 @@ phyint_inst_print(struct phyint_instance *pii)
} else {
logdebug("#%d target NULL ", i);
}
- logdebug("time_sent %u status %d time_ack/lost %u\n",
- pii->pii_probes[i].pr_time_sent,
+ logdebug("time_start %lld status %d "
+ "time_ackproc %lld time_lost %u",
+ pii->pii_probes[i].pr_hrtime_start,
pii->pii_probes[i].pr_status,
+ pii->pii_probes[i].pr_hrtime_ackproc,
pii->pii_probes[i].pr_time_lost);
i = PROBE_INDEX_PREV(i);
} while (i != most_recent);
@@ -1293,7 +1688,6 @@ logint_init_from_k(struct phyint_instance *pii, char *li_name)
struct logint *li;
struct lifreq lifr;
struct in6_addr test_subnet;
- struct in6_addr test_subnet_mask;
struct in6_addr testaddr;
int test_subnet_len;
struct sockaddr_in6 *sin6;
@@ -1373,55 +1767,21 @@ logint_init_from_k(struct phyint_instance *pii, char *li_name)
testaddr = sin6->sin6_addr;
}
- if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) {
- ptp = _B_TRUE;
- if (ioctl(ifsock, SIOCGLIFDSTADDR, (char *)&lifr) < 0) {
- if (errno != ENXIO) {
- logperror_li(li, "logint_init_from_k:"
- " (get dstaddr)");
- }
- goto error;
- }
- if (pii->pii_af == AF_INET) {
- sin = (struct sockaddr_in *)&lifr.lifr_addr;
- IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &tgaddr);
- } else {
- sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
- tgaddr = sin6->sin6_addr;
- }
- } else {
- if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) {
- /* Interface may have vanished */
- if (errno != ENXIO) {
- logperror_li(li, "logint_init_from_k:"
- " (get subnet)");
- }
- goto error;
- }
- if (lifr.lifr_subnet.ss_family == AF_INET6) {
- sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet;
- test_subnet = sin6->sin6_addr;
- test_subnet_len = lifr.lifr_addrlen;
- } else {
- sin = (struct sockaddr_in *)&lifr.lifr_subnet;
- IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet);
- test_subnet_len = lifr.lifr_addrlen +
- (IPV6_ABITS - IP_ABITS);
- }
- (void) ip_index_to_mask_v6(test_subnet_len, &test_subnet_mask);
- }
-
- /*
- * Also record the OINDEX for completeness. This information is
- * not used.
- */
- if (ioctl(ifsock, SIOCGLIFOINDEX, (char *)&lifr) < 0) {
- if (errno != ENXIO) {
- logperror_li(li, "logint_init_from_k:"
- " (get lifoindex)");
- }
+ if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) {
+ /* Interface may have vanished */
+ if (errno != ENXIO)
+ logperror_li(li, "logint_init_from_k: (get subnet)");
goto error;
}
+ if (lifr.lifr_subnet.ss_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet;
+ test_subnet = sin6->sin6_addr;
+ test_subnet_len = lifr.lifr_addrlen;
+ } else {
+ sin = (struct sockaddr_in *)&lifr.lifr_subnet;
+ IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet);
+ test_subnet_len = lifr.lifr_addrlen + (IPV6_ABITS - IP_ABITS);
+ }
/*
* If this is the logint corresponding to the test address used for
@@ -1454,7 +1814,6 @@ logint_init_from_k(struct phyint_instance *pii, char *li_name)
/* Update the logint with the values obtained from the kernel. */
li->li_addr = testaddr;
li->li_in_use = 1;
- li->li_oifindex = lifr.lifr_index;
if (ptp) {
li->li_dstaddr = tgaddr;
li->li_subnet_len = (pii->pii_af == AF_INET) ?
@@ -1530,15 +1889,12 @@ static void
logint_print(struct logint *li)
{
char abuf[INET6_ADDRSTRLEN];
- int af;
-
- af = li->li_phyint_inst->pii_af;
+ int af = li->li_phyint_inst->pii_af;
logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name,
pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len);
- logdebug("\tFlags: %llx in_use %d oifindex %d\n",
- li->li_flags, li->li_in_use, li->li_oifindex);
+ logdebug("\tFlags: %llx in_use %d\n", li->li_flags, li->li_in_use);
}
char *
@@ -1555,6 +1911,33 @@ pr_addr(int af, struct in6_addr addr, char *abuf, int len)
return (abuf);
}
+/*
+ * Fill in the sockaddr_storage pointed to by `ssp' with the IP address
+ * represented by the [`af',`addr'] pair. Needed because in.mpathd internally
+ * stores all addresses as in6_addrs, but we don't want to expose that.
+ */
+void
+addr2storage(int af, const struct in6_addr *addr, struct sockaddr_storage *ssp)
+{
+ struct sockaddr_in *sinp = (struct sockaddr_in *)ssp;
+ struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)ssp;
+
+ assert(af == AF_INET || af == AF_INET6);
+
+ switch (af) {
+ case AF_INET:
+ (void) memset(sinp, 0, sizeof (*sinp));
+ sinp->sin_family = AF_INET;
+ IN6_V4MAPPED_TO_INADDR(addr, &sinp->sin_addr);
+ break;
+ case AF_INET6:
+ (void) memset(sin6p, 0, sizeof (*sin6p));
+ sin6p->sin6_family = AF_INET6;
+ sin6p->sin6_addr = *addr;
+ break;
+ }
+}
+
/* Lookup target on its address */
struct target *
target_lookup(struct phyint_instance *pii, struct in6_addr addr)
@@ -1686,7 +2069,7 @@ target_select_best(struct phyint_instance *pii)
if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
slow_recovered = tg;
/*
- * Promote the slow_recoverd to unused
+ * Promote the slow_recovered to unused
*/
tg->tg_status = TG_UNUSED;
} else {
@@ -1698,7 +2081,7 @@ target_select_best(struct phyint_instance *pii)
if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
dead_recovered = tg;
/*
- * Promote the dead_recoverd to slow
+ * Promote the dead_recovered to slow
*/
tg->tg_status = TG_SLOW;
tg->tg_latime = now;
@@ -1798,11 +2181,9 @@ target_create(struct phyint_instance *pii, struct in6_addr addr,
/*
* If there are multiple subnets associated with an interface, then
- * add the target to this phyint instance, only if it belongs to the
- * same subnet as the test address. The reason is that interface
- * routes derived from non-test-addresses i.e. non-IFF_NOFAILOVER
- * addresses, will disappear after failover, and the targets will not
- * be reachable from this interface.
+ * add the target to this phyint instance only if it belongs to the
+ * same subnet as the test address. This assures us that we will
+ * be able to reach this target through our routing table.
*/
if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len))
return;
@@ -1906,11 +2287,12 @@ target_add(struct phyint_instance *pii, struct in6_addr addr,
/*
* If the target does not exist, create it; target_create() will set
- * tg_in_use to true. If it exists already, and it is a router
- * target, set tg_in_use to to true, so that init_router_targets()
- * won't delete it
+ * tg_in_use to true. Even if it exists already, if it's a router
+ * target and we'd previously learned of it through multicast, then we
+ * need to recreate it as a router target. Otherwise, just set
+ * tg_in_use to to true so that init_router_targets() won't delete it.
*/
- if (tg == NULL)
+ if (tg == NULL || (is_router && !pii->pii_targets_are_routers))
target_create(pii, addr, is_router);
else if (is_router)
tg->tg_in_use = 1;
@@ -2034,16 +2416,17 @@ target_delete(struct target *tg)
* relevant any longer.
*/
assert(pii->pii_targets == NULL);
+ pii->pii_targets_are_routers = _B_FALSE;
clear_pii_probe_stats(pii);
pii_other = phyint_inst_other(pii);
/*
- * If there are no targets on both instances and the interface is
- * online, go back to PI_NOTARGETS state, since we cannot probe this
- * phyint any more. For more details, please see phyint state
- * diagram in mpd_probe.c.
+ * If there are no targets on both instances and the interface would
+ * otherwise be considered PI_RUNNING, go back to PI_NOTARGETS state,
+ * since we cannot probe this phyint any more. For more details,
+ * please see phyint state diagram in mpd_probe.c.
*/
- if (!PROBE_CAPABLE(pii_other) &&
+ if (!PROBE_CAPABLE(pii_other) && LINK_UP(pii->pii_phyint) &&
pii->pii_phyint->pi_state != PI_OFFLINE)
phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
}
@@ -2101,9 +2484,11 @@ reset_pii_probes(struct phyint_instance *pii, struct target *tg)
for (i = 0; i < PROBE_STATS_COUNT; i++) {
if (pii->pii_probes[i].pr_target == tg) {
+ if (pii->pii_probes[i].pr_status == PR_UNACKED) {
+ probe_chstate(&pii->pii_probes[i], pii,
+ PR_LOST);
+ }
pii->pii_probes[i].pr_target = NULL;
- if (pii->pii_probes[i].pr_status == PR_UNACKED)
- pii->pii_probes[i].pr_status = PR_LOST;
}
}
@@ -2132,7 +2517,7 @@ target_print(struct target *tg)
af = tg->tg_phyint_inst->pii_af;
logdebug("Target on %s %s addr %s\n"
- "status %d rtt_sa %d rtt_sd %d crtt %d tg_in_use %d\n",
+ "status %d rtt_sa %lld rtt_sd %lld crtt %d tg_in_use %d\n",
AF_STR(af), tg->tg_phyint_inst->pii_name,
pr_addr(af, tg->tg_address, abuf, sizeof (abuf)),
tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd,
@@ -2158,35 +2543,16 @@ phyint_inst_print_all(void)
}
/*
- * Convert length for a mask to the mask.
- */
-static void
-ip_index_to_mask_v6(uint_t masklen, struct in6_addr *bitmask)
-{
- int j;
-
- assert(masklen <= IPV6_ABITS);
- bzero((char *)bitmask, sizeof (*bitmask));
-
- /* Make the 'masklen' leftmost bits one */
- for (j = 0; masklen > 8; masklen -= 8, j++)
- bitmask->s6_addr[j] = 0xff;
-
- bitmask->s6_addr[j] = 0xff << (8 - masklen);
-
-}
-
-/*
* Compare two prefixes that have the same prefix length.
* Fails if the prefix length is unreasonable.
*/
-static boolean_t
-prefix_equal(struct in6_addr p1, struct in6_addr p2, int prefix_len)
+boolean_t
+prefix_equal(struct in6_addr p1, struct in6_addr p2, uint_t prefix_len)
{
uchar_t mask;
int j;
- if (prefix_len < 0 || prefix_len > IPV6_ABITS)
+ if (prefix_len > IPV6_ABITS)
return (_B_FALSE);
for (j = 0; prefix_len > 8; prefix_len -= 8, j++)
@@ -2202,35 +2568,25 @@ prefix_equal(struct in6_addr p1, struct in6_addr p2, int prefix_len)
}
/*
- * Get the number of UP logints (excluding IFF_NOFAILOVERs), on both
- * IPv4 and IPv6 put together. The phyint with the least such number
- * will be used as the failover destination, if no standby interface is
- * available
+ * Get the number of UP logints on phyint `pi'.
*/
-int
+static int
logint_upcount(struct phyint *pi)
{
struct logint *li;
- struct phyint_instance *pii;
int count = 0;
- pii = pi->pi_v4;
- if (pii != NULL) {
- for (li = pii->pii_logint; li != NULL; li = li->li_next) {
- if ((li->li_flags &
- (IFF_UP | IFF_NOFAILOVER)) == IFF_UP) {
+ if (pi->pi_v4 != NULL) {
+ for (li = pi->pi_v4->pii_logint; li != NULL; li = li->li_next) {
+ if (li->li_flags & IFF_UP)
count++;
- }
}
}
- pii = pi->pi_v6;
- if (pii != NULL) {
- for (li = pii->pii_logint; li != NULL; li = li->li_next) {
- if ((li->li_flags &
- (IFF_UP | IFF_NOFAILOVER)) == IFF_UP) {
+ if (pi->pi_v6 != NULL) {
+ for (li = pi->pi_v6->pii_logint; li != NULL; li = li->li_next) {
+ if (li->li_flags & IFF_UP)
count++;
- }
}
}
@@ -2250,6 +2606,28 @@ phyint_inst_other(struct phyint_instance *pii)
}
/*
+ * Check whether a phyint is functioning.
+ */
+static boolean_t
+phyint_is_functioning(struct phyint *pi)
+{
+ if (pi->pi_state == PI_RUNNING)
+ return (_B_TRUE);
+ return (pi->pi_state == PI_NOTARGETS && !(pi->pi_flags & IFF_FAILED));
+}
+
+/*
+ * Check whether a phyint is usable.
+ */
+static boolean_t
+phyint_is_usable(struct phyint *pi)
+{
+ if (logint_upcount(pi) == 0)
+ return (_B_FALSE);
+ return (phyint_is_functioning(pi));
+}
+
+/*
* Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'.
* Before sending the event, it prepends the current version of the IPMP
* sysevent API. Returns 0 on success, -1 on failure (in either case,
@@ -2258,16 +2636,18 @@ phyint_inst_other(struct phyint_instance *pii)
static int
post_event(const char *subclass, nvlist_t *nvl)
{
- sysevent_id_t eid;
+ static evchan_t *evchp = NULL;
/*
- * Since sysevents don't work yet in non-global zones, there cannot
- * possibly be any consumers yet, so don't bother trying to generate
- * them. (Otherwise, we'll spew warnings.)
+ * Initialize the event channel if we haven't already done so.
*/
- if (getzoneid() != GLOBAL_ZONEID) {
- nvlist_free(nvl);
- return (0);
+ if (evchp == NULL) {
+ errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evchp, EVCH_CREAT);
+ if (errno != 0) {
+ logerr("cannot create event channel `%s': %s\n",
+ IPMP_EVENT_CHAN, strerror(errno));
+ goto failed;
+ }
}
errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION,
@@ -2278,8 +2658,9 @@ post_event(const char *subclass, nvlist_t *nvl)
goto failed;
}
- if (sysevent_post_event(EC_IPMP, (char *)subclass, SUNW_VENDOR,
- "in.mpathd", nvl, &eid) == -1) {
+ errno = sysevent_evc_publish(evchp, EC_IPMP, subclass, "com.sun",
+ "in.mpathd", nvl, EVCH_NOSLEEP);
+ if (errno != 0) {
logerr("cannot send `%s' event: %s\n", subclass,
strerror(errno));
goto failed;
@@ -2300,6 +2681,8 @@ ifstate(struct phyint *pi)
{
switch (pi->pi_state) {
case PI_NOTARGETS:
+ if (pi->pi_flags & IFF_FAILED)
+ return (IPMP_IF_FAILED);
return (IPMP_IF_UNKNOWN);
case PI_OFFLINE:
@@ -2330,12 +2713,203 @@ iftype(struct phyint *pi)
}
/*
+ * Return the external IPMP link state associated with phyint `pi'.
+ */
+static ipmp_if_linkstate_t
+iflinkstate(struct phyint *pi)
+{
+ if (!(pi->pi_notes & (DL_NOTE_LINK_UP|DL_NOTE_LINK_DOWN)))
+ return (IPMP_LINK_UNKNOWN);
+
+ return (LINK_DOWN(pi) ? IPMP_LINK_DOWN : IPMP_LINK_UP);
+}
+
+/*
+ * Return the external IPMP probe state associated with phyint `pi'.
+ */
+static ipmp_if_probestate_t
+ifprobestate(struct phyint *pi)
+{
+ if (!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6))
+ return (IPMP_PROBE_DISABLED);
+
+ if (pi->pi_state == PI_FAILED)
+ return (IPMP_PROBE_FAILED);
+
+ if (!PROBE_CAPABLE(pi->pi_v4) && !PROBE_CAPABLE(pi->pi_v6))
+ return (IPMP_PROBE_UNKNOWN);
+
+ return (IPMP_PROBE_OK);
+}
+
+/*
+ * Return the external IPMP target mode associated with phyint instance `pii'.
+ */
+static ipmp_if_targmode_t
+iftargmode(struct phyint_instance *pii)
+{
+ if (!PROBE_ENABLED(pii))
+ return (IPMP_TARG_DISABLED);
+ else if (pii->pii_targets_are_routers)
+ return (IPMP_TARG_ROUTES);
+ else
+ return (IPMP_TARG_MULTICAST);
+}
+
+/*
+ * Return the external IPMP flags associated with phyint `pi'.
+ */
+static ipmp_if_flags_t
+ifflags(struct phyint *pi)
+{
+ ipmp_if_flags_t flags = 0;
+
+ if (logint_upcount(pi) == 0)
+ flags |= IPMP_IFFLAG_DOWN;
+ if (pi->pi_flags & IFF_INACTIVE)
+ flags |= IPMP_IFFLAG_INACTIVE;
+ if (pi->pi_hwaddrdup)
+ flags |= IPMP_IFFLAG_HWADDRDUP;
+ if (phyint_is_functioning(pi) && flags == 0)
+ flags |= IPMP_IFFLAG_ACTIVE;
+
+ return (flags);
+}
+
+/*
+ * Store the test address used on phyint instance `pii' in `ssp'. If there's
+ * no test address, 0.0.0.0 is stored.
+ */
+static struct sockaddr_storage *
+iftestaddr(struct phyint_instance *pii, struct sockaddr_storage *ssp)
+{
+ if (PROBE_ENABLED(pii))
+ addr2storage(pii->pii_af, &pii->pii_probe_logint->li_addr, ssp);
+ else
+ addr2storage(AF_INET6, &in6addr_any, ssp);
+
+ return (ssp);
+}
+
+/*
* Return the external IPMP group state associated with phyint group `pg'.
*/
static ipmp_group_state_t
groupstate(struct phyint_group *pg)
{
- return (GROUP_FAILED(pg) ? IPMP_GROUP_FAILED : IPMP_GROUP_OK);
+ switch (pg->pg_state) {
+ case PG_FAILED:
+ return (IPMP_GROUP_FAILED);
+ case PG_DEGRADED:
+ return (IPMP_GROUP_DEGRADED);
+ case PG_OK:
+ return (IPMP_GROUP_OK);
+ }
+
+ logerr("groupstate: unknown state %d; aborting\n", pg->pg_state);
+ abort();
+ /* NOTREACHED */
+}
+
+/*
+ * Return the external IPMP probe state associated with probe `ps'.
+ */
+static ipmp_probe_state_t
+probestate(struct probe_stats *ps)
+{
+ switch (ps->pr_status) {
+ case PR_UNUSED:
+ case PR_LOST:
+ return (IPMP_PROBE_LOST);
+ case PR_UNACKED:
+ return (IPMP_PROBE_SENT);
+ case PR_ACKED:
+ return (IPMP_PROBE_ACKED);
+ }
+
+ logerr("probestate: unknown state %d; aborting\n", ps->pr_status);
+ abort();
+ /* NOTREACHED */
+}
+
+/*
+ * Generate an ESC_IPMP_PROBE_STATE sysevent for the probe described by `pr'
+ * on phyint instance `pii'. Returns 0 on success, -1 on failure.
+ */
+int
+probe_state_event(struct probe_stats *pr, struct phyint_instance *pii)
+{
+ nvlist_t *nvl;
+ hrtime_t proc_time = 0, recv_time = 0;
+ struct sockaddr_storage ss;
+ struct target *tg = pr->pr_target;
+
+ errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
+ if (errno != 0) {
+ logperror("cannot create `interface change' event");
+ return (-1);
+ }
+
+ errno = nvlist_add_uint32(nvl, IPMP_PROBE_ID, pr->pr_id);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_string(nvl, IPMP_IF_NAME, pii->pii_phyint->pi_name);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_uint32(nvl, IPMP_PROBE_STATE, probestate(pr));
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_hrtime(nvl, IPMP_PROBE_START_TIME,
+ pr->pr_hrtime_start);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_hrtime(nvl, IPMP_PROBE_SENT_TIME,
+ pr->pr_hrtime_sent);
+ if (errno != 0)
+ goto failed;
+
+ if (pr->pr_status == PR_ACKED) {
+ recv_time = pr->pr_hrtime_ackrecv;
+ proc_time = pr->pr_hrtime_ackproc;
+ }
+
+ errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, recv_time);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, proc_time);
+ if (errno != 0)
+ goto failed;
+
+ if (tg != NULL)
+ addr2storage(pii->pii_af, &tg->tg_address, &ss);
+ else
+ addr2storage(pii->pii_af, &in6addr_any, &ss);
+
+ errno = nvlist_add_byte_array(nvl, IPMP_PROBE_TARGET, (uchar_t *)&ss,
+ sizeof (ss));
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTAVG,
+ tg->tg_rtt_sa / 8);
+ if (errno != 0)
+ goto failed;
+
+ errno = nvlist_add_int64(nvl, IPMP_PROBE_TARGET_RTTDEV,
+ tg->tg_rtt_sd / 4);
+ if (errno != 0)
+ goto failed;
+
+ return (post_event(ESC_IPMP_PROBE_STATE, nvl));
+failed:
+ logperror("cannot create `probe state' event");
+ nvlist_free(nvl);
+ return (-1);
}
/*
@@ -2529,10 +3103,15 @@ gensig(void)
unsigned int
getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp)
{
- struct phyint_group *pg;
struct phyint *pi;
+ struct phyint_group *pg;
char (*ifs)[LIFNAMSIZ];
- unsigned int nif, i;
+ unsigned int i, j;
+ unsigned int nif = 0, naddr = 0;
+ lifgroupinfo_t lifgr;
+ addrlist_t *addrp;
+ struct sockaddr_storage *addrs;
+ int fdt = 0;
pg = phyint_group_lookup(grname);
if (pg == NULL)
@@ -2540,39 +3119,143 @@ getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp)
/*
* Tally up the number of interfaces, allocate an array to hold them,
- * and insert their names into the array.
+ * and insert their names into the array. While we're at it, if any
+ * interface is actually enabled to send probes, save the group fdt.
*/
- for (nif = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext)
+ for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext)
nif++;
ifs = alloca(nif * sizeof (*ifs));
for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) {
assert(i < nif);
(void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ);
+ if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6))
+ fdt = pg->pg_fdt;
}
assert(i == nif);
- *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig,
- groupstate(pg), nif, ifs);
+ /*
+ * If this is the anonymous group, there's no other information to
+ * collect (since there's no IPMP interface).
+ */
+ if (pg == phyint_anongroup) {
+ *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt,
+ groupstate(pg), nif, ifs, "", "", "", "", 0, NULL);
+ return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+ }
+
+ /*
+ * Grab some additional information about the group from the kernel.
+ * (NOTE: since SIOCGLIFGROUPINFO does not look up by interface name,
+ * we can use ifsock_v4 even for a V6-only group.)
+ */
+ (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ);
+ if (ioctl(ifsock_v4, SIOCGLIFGROUPINFO, &lifgr) == -1) {
+ if (errno == ENOENT)
+ return (IPMP_EUNKGROUP);
+
+ logperror("getgroupinfo: SIOCGLIFGROUPINFO");
+ return (IPMP_FAILURE);
+ }
+
+ /*
+ * Tally up the number of data addresses, allocate an array to hold
+ * them, and insert their values into the array.
+ */
+ for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next)
+ naddr++;
+
+ addrs = alloca(naddr * sizeof (*addrs));
+ i = 0;
+ for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) {
+ /*
+ * It's possible to have duplicate addresses (if some are
+ * down). Weed the dups out to avoid confusing consumers.
+ * (If groups start having tons of addresses, we'll need a
+ * better algorithm here.)
+ */
+ for (j = 0; j < i; j++) {
+ if (sockaddrcmp(&addrs[j], &addrp->al_addr))
+ break;
+ }
+ if (j == i) {
+ assert(i < naddr);
+ addrs[i++] = addrp->al_addr;
+ }
+ }
+ naddr = i;
+
+ *grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig, fdt,
+ groupstate(pg), nif, ifs, lifgr.gi_grifname, lifgr.gi_m4ifname,
+ lifgr.gi_m6ifname, lifgr.gi_bcifname, naddr, addrs);
return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
}
/*
+ * Store the target information associated with phyint instance `pii' into a
+ * dynamically allocated structure pointed to by `*targinfopp'. Returns an
+ * IPMP error code.
+ */
+unsigned int
+gettarginfo(struct phyint_instance *pii, const char *name,
+ ipmp_targinfo_t **targinfopp)
+{
+ uint_t ntarg = 0;
+ struct target *tg;
+ struct sockaddr_storage ss;
+ struct sockaddr_storage *targs = NULL;
+
+ if (PROBE_CAPABLE(pii)) {
+ targs = alloca(pii->pii_ntargets * sizeof (*targs));
+ tg = pii->pii_target_next;
+ do {
+ if (tg->tg_status == TG_ACTIVE) {
+ assert(ntarg < pii->pii_ntargets);
+ addr2storage(pii->pii_af, &tg->tg_address,
+ &targs[ntarg++]);
+ }
+ if ((tg = tg->tg_next) == NULL)
+ tg = pii->pii_targets;
+ } while (tg != pii->pii_target_next);
+
+ assert(ntarg == pii->pii_ntargets);
+ }
+
+ *targinfopp = ipmp_targinfo_create(name, iftestaddr(pii, &ss),
+ iftargmode(pii), ntarg, targs);
+ return (*targinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+}
+
+/*
* Store the information associated with interface `ifname' into a dynamically
* allocated structure pointed to by `*ifinfopp'. Returns an IPMP error code.
*/
unsigned int
getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp)
{
+ int retval;
struct phyint *pi;
+ ipmp_targinfo_t *targinfo4;
+ ipmp_targinfo_t *targinfo6;
pi = phyint_lookup(ifname);
if (pi == NULL)
return (IPMP_EUNKIF);
+ if ((retval = gettarginfo(pi->pi_v4, pi->pi_name, &targinfo4)) != 0 ||
+ (retval = gettarginfo(pi->pi_v6, pi->pi_name, &targinfo6)) != 0)
+ goto out;
+
*ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name,
- ifstate(pi), iftype(pi));
- return (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+ ifstate(pi), iftype(pi), iflinkstate(pi), ifprobestate(pi),
+ ifflags(pi), targinfo4, targinfo6);
+ retval = (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+out:
+ if (targinfo4 != NULL)
+ ipmp_freetarginfo(targinfo4);
+ if (targinfo6 != NULL)
+ ipmp_freetarginfo(targinfo6);
+ return (retval);
}
/*
@@ -2605,6 +3288,54 @@ getgrouplist(ipmp_grouplist_t **grlistpp)
}
/*
+ * Store the address information for `ssp' (in group `grname') into a
+ * dynamically allocated structure pointed to by `*adinfopp'. Returns an IPMP
+ * error code. (We'd call this function getaddrinfo(), but it would conflict
+ * with getaddrinfo(3SOCKET)).
+ */
+unsigned int
+getgraddrinfo(const char *grname, struct sockaddr_storage *ssp,
+ ipmp_addrinfo_t **adinfopp)
+{
+ int ifsock;
+ addrlist_t *addrp, *addrmatchp = NULL;
+ ipmp_addr_state_t state;
+ const char *binding = "";
+ struct lifreq lifr;
+ struct phyint_group *pg;
+
+ if ((pg = phyint_group_lookup(grname)) == NULL)
+ return (IPMP_EUNKADDR);
+
+ /*
+ * Walk through the data addresses, and find a match. Note that since
+ * some of the addresses may be down, more than one may match. We
+ * prefer an up address (if one exists).
+ */
+ for (addrp = pg->pg_addrs; addrp != NULL; addrp = addrp->al_next) {
+ if (sockaddrcmp(ssp, &addrp->al_addr)) {
+ addrmatchp = addrp;
+ if (addrmatchp->al_flags & IFF_UP)
+ break;
+ }
+ }
+
+ if (addrmatchp == NULL)
+ return (IPMP_EUNKADDR);
+
+ state = (addrmatchp->al_flags & IFF_UP) ? IPMP_ADDR_UP : IPMP_ADDR_DOWN;
+ if (state == IPMP_ADDR_UP) {
+ ifsock = (ssp->ss_family == AF_INET) ? ifsock_v4 : ifsock_v6;
+ (void) strlcpy(lifr.lifr_name, addrmatchp->al_name, LIFNAMSIZ);
+ if (ioctl(ifsock, SIOCGLIFBINDING, &lifr) >= 0)
+ binding = lifr.lifr_binding;
+ }
+
+ *adinfopp = ipmp_addrinfo_create(ssp, state, pg->pg_name, binding);
+ return (*adinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
+}
+
+/*
* Store a snapshot of the IPMP subsystem into a dynamically allocated
* structure pointed to by `*snapp'. Returns an IPMP error code.
*/
@@ -2613,10 +3344,12 @@ getsnap(ipmp_snap_t **snapp)
{
ipmp_grouplist_t *grlistp;
ipmp_groupinfo_t *grinfop;
+ ipmp_addrinfo_t *adinfop;
+ ipmp_addrlist_t *adlistp;
ipmp_ifinfo_t *ifinfop;
ipmp_snap_t *snap;
struct phyint *pi;
- unsigned int i;
+ unsigned int i, j;
int retval;
snap = ipmp_snap_create();
@@ -2627,26 +3360,37 @@ getsnap(ipmp_snap_t **snapp)
* Add group list.
*/
retval = getgrouplist(&snap->sn_grlistp);
- if (retval != IPMP_SUCCESS) {
- ipmp_snap_free(snap);
- return (retval);
- }
+ if (retval != IPMP_SUCCESS)
+ goto failed;
/*
- * Add information for each group in the list.
+ * Add information for each group in the list, along with all of its
+ * data addresses.
*/
grlistp = snap->sn_grlistp;
for (i = 0; i < grlistp->gl_ngroup; i++) {
retval = getgroupinfo(grlistp->gl_groups[i], &grinfop);
- if (retval != IPMP_SUCCESS) {
- ipmp_snap_free(snap);
- return (retval);
- }
+ if (retval != IPMP_SUCCESS)
+ goto failed;
+
retval = ipmp_snap_addgroupinfo(snap, grinfop);
if (retval != IPMP_SUCCESS) {
ipmp_freegroupinfo(grinfop);
- ipmp_snap_free(snap);
- return (retval);
+ goto failed;
+ }
+
+ adlistp = grinfop->gr_adlistp;
+ for (j = 0; j < adlistp->al_naddr; j++) {
+ retval = getgraddrinfo(grinfop->gr_name,
+ &adlistp->al_addrs[j], &adinfop);
+ if (retval != IPMP_SUCCESS)
+ goto failed;
+
+ retval = ipmp_snap_addaddrinfo(snap, adinfop);
+ if (retval != IPMP_SUCCESS) {
+ ipmp_freeaddrinfo(adinfop);
+ goto failed;
+ }
}
}
@@ -2655,18 +3399,19 @@ getsnap(ipmp_snap_t **snapp)
*/
for (pi = phyints; pi != NULL; pi = pi->pi_next) {
retval = getifinfo(pi->pi_name, &ifinfop);
- if (retval != IPMP_SUCCESS) {
- ipmp_snap_free(snap);
- return (retval);
- }
+ if (retval != IPMP_SUCCESS)
+ goto failed;
+
retval = ipmp_snap_addifinfo(snap, ifinfop);
if (retval != IPMP_SUCCESS) {
ipmp_freeifinfo(ifinfop);
- ipmp_snap_free(snap);
- return (retval);
+ goto failed;
}
}
*snapp = snap;
return (IPMP_SUCCESS);
+failed:
+ ipmp_snap_free(snap);
+ return (retval);
}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h
index e4be3ccb30..39da2c3f1b 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _MPD_TABLES_H
#define _MPD_TABLES_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -47,20 +45,11 @@ extern "C" {
* switch AND
* (ii) share the same phyint group name.
* Load spreading and failover occur across members of the same phyint group.
- * phyint group members must be homogenous. i.e. if a phyint belonging to a
+ * phyint group members must be homogeneous. i.e. if a phyint belonging to a
* phyint group has a IPv6 protocol instance, then all members of the phyint
* group, must have IPv6 protocol instances. (struct phyint_group)
*/
-/*
- * Parameter passed to try_failover(), indicating the type of failover
- * that is requested.
- */
-#define FAILOVER_NORMAL 1 /* Failover to another phyint */
- /* that is preferably a standby */
-#define FAILOVER_TO_NONSTANDBY 2 /* Failover to non-standby phyint */
-#define FAILOVER_TO_ANY 3 /* Failover to any available phyint */
-
#define MAXDEFERREDRTT 1 /* Maximum number of deferred rtts */
/*
@@ -79,15 +68,9 @@ extern "C" {
#define PI_IOCTL_ERROR 4 /* Some ioctl error */
#define PI_GROUP_CHANGED 5 /* The phyint has changed group. */
-/*
- * Though IFF_POINTOPOINT is a logint property, for the purpose of
- * failover, we treat it as a phyint property. Note that we cannot failover
- * individual logints.
- */
#define PHYINT_FLAGS(flags) \
- (((flags) & (IFF_STANDBY | IFF_INACTIVE | IFF_FAILED | IFF_OFFLINE | \
- IFF_POINTOPOINT | IFF_RUNNING)) | (handle_link_notifications ? \
- 0 : IFF_RUNNING))
+ (((flags) & (IFF_STANDBY | IFF_INACTIVE | IFF_FAILED | IFF_OFFLINE | \
+ IFF_RUNNING)) | (handle_link_notifications ? 0 : IFF_RUNNING))
/* A Phyint can have up to 2 instances, the IPv4 and the IPv6 instance */
#define PHYINT_INSTANCE(pi, af) \
@@ -152,29 +135,32 @@ extern "C" {
* Phyint group states; see below for the phyint group definition.
*/
enum pg_state {
- PG_RUNNING = 1, /* at least one interface in group is working */
- PG_FAILED = 2 /* group has failed completely */
+ PG_OK = 1, /* all interfaces in the group are working */
+ PG_DEGRADED, /* some interfaces in the group are unusable */
+ PG_FAILED /* all interfaces in the group are unusable */
};
/*
* Convenience macro to check if the whole group has failed.
*/
-#define GROUP_FAILED(pg) ((pg)->pg_groupfailed)
+#define GROUP_FAILED(pg) ((pg)->pg_state == PG_FAILED)
/*
* A doubly linked list of all phyint groups in the system.
* A phyint group is identified by its group name.
*/
struct phyint_group {
- char pg_name[LIFNAMSIZ + 1]; /* Phyint group name */
+ char pg_name[LIFGRNAMSIZ]; /* Phyint group name */
struct phyint *pg_phyint; /* List of phyints in this group */
struct phyint_group *pg_next; /* Next phyint group */
struct phyint_group *pg_prev; /* Prev phyint group */
- uint64_t pg_sig; /* Current signature of this group */
- int pg_probeint; /* Interval between probes */
- int pg_fdt; /* Time needed to detect failure */
- uint_t
- pg_groupfailed : 1; /* The whole group has failed */
+ uint64_t pg_sig; /* Current signature of this group */
+ int pg_probeint; /* Interval between probes */
+ int pg_fdt; /* Time needed to detect failure */
+ enum pg_state pg_state; /* Current group state */
+ boolean_t pg_in_use; /* To detect removed groups */
+ struct addrlist *pg_addrs; /* Data addresses in this group */
+ boolean_t pg_failmsg_printed; /* Group failure msg printed */
};
/*
@@ -207,6 +193,11 @@ struct phyint {
uint16_t pi_icmpid; /* icmp id in icmp echo request */
uint64_t pi_taddrthresh; /* time (in secs) to delay logging */
/* about missing test addresses */
+ dlpi_handle_t pi_dh; /* DLPI handle to underlying link */
+ uint_t pi_notes; /* enabled DLPI notifications */
+ uchar_t pi_hwaddr[DLPI_PHYSADDR_MAX]; /* phyint's hw address */
+ size_t pi_hwaddrlen; /* phyint's hw address length */
+
/*
* The pi_whenup array is a circular buffer of the most recent
* times (in milliseconds since some arbitrary point of time in
@@ -217,14 +208,12 @@ struct phyint {
unsigned int pi_whendx;
uint_t
- pi_empty : 1, /* failover done, empty */
- pi_full : 1, /* failback done, full */
- /* More details in probe.c */
pi_taddrmsg_printed : 1, /* testaddr msg printed */
pi_duptaddrmsg_printed : 1, /* dup testaddr msg printed */
pi_cfgmsg_printed : 1, /* bad config msg printed */
pi_lfmsg_printed : 1, /* link-flapping msg printed */
- pi_link_state : 1; /* interface link state */
+ pi_link_state : 1, /* interface link state */
+ pi_hwaddrdup : 1; /* disabled due to dup hw address */
};
/*
@@ -260,19 +249,19 @@ struct phyint_instance {
uint64_t pii_flags; /* Phyint flags from kernel */
struct probe_stats {
- struct target *pr_target; /* Probe Target */
- uint_t pr_time_sent; /* Time probe was sent */
+ uint_t pr_id; /* Full ID of probe */
+ struct target *pr_target; /* Probe Target */
+ uint_t pr_time_lost; /* Time probe declared lost */
+ struct timeval pr_tv_sent; /* Wall time probe was sent */
+ hrtime_t pr_hrtime_start; /* hrtime probe op started */
+ hrtime_t pr_hrtime_sent; /* hrtime probe was sent */
+ hrtime_t pr_hrtime_ackrecv; /* hrtime probe ack received */
+ hrtime_t pr_hrtime_ackproc; /* hrtime probe ack processed */
uint_t pr_status; /* probe status as below */
#define PR_UNUSED 0 /* Probe slot unused */
#define PR_UNACKED 1 /* Probe is unacknowledged */
#define PR_ACKED 2 /* Probe has been acknowledged */
#define PR_LOST 3 /* Probe is declared lost */
- union {
- uint_t tl; /* time probe is declared lost */
- uint_t ta; /* time probe is acked */
- } prt;
-#define pr_time_lost prt.tl
-#define pr_time_acked prt.ta
} pii_probes[PROBE_STATS_COUNT];
uint_t
@@ -319,7 +308,6 @@ struct logint {
struct in6_addr li_subnet; /* prefix / subnet */
uint_t li_subnet_len; /* prefix / subnet length */
uint64_t li_flags; /* IFF_* flags */
- uint_t li_oifindex; /* original ifindex (SIOCGLIFOINDEX) */
uint_t
li_in_use : 1, /* flag to detect deleted logints */
li_dupaddr : 1; /* test address is not unique */
@@ -345,12 +333,12 @@ struct target {
#define TG_DEAD 4 /* Target is not responding */
hrtime_t tg_latime; /* Target's last active time */
- int tg_rtt_sa; /* Scaled round trip time(RTT) avg. */
- int tg_rtt_sd; /* Scaled RTT deviation */
- int tg_crtt; /* Conservative RTT = A + 4D */
+ int64_t tg_rtt_sa; /* Scaled RTT average (in ns) */
+ int64_t tg_rtt_sd; /* Scaled RTT deviation (in ns) */
+ int tg_crtt; /* Conservative RTT = A + 4D (in ms) */
uint32_t
tg_in_use : 1; /* In use flag */
- int tg_deferred[MAXDEFERREDRTT + 1];
+ int64_t tg_deferred[MAXDEFERREDRTT + 1];
/* Deferred rtt data points */
int tg_num_deferred;
/* Number of deferred rtt data points */
@@ -393,19 +381,20 @@ struct probe_success_count
struct probes_missed
{
uint_t pm_nprobes; /* Cumulative number of missed probes */
- uint_t pm_ntimes; /* Total number of occassions */
+ uint_t pm_ntimes; /* Total number of occasions */
};
-struct local_addr
-{
- struct in6_addr addr;
- struct local_addr *next;
-};
+typedef struct addrlist {
+ struct addrlist *al_next; /* next address */
+ char al_name[LIFNAMSIZ]; /* address lif name */
+ uint64_t al_flags; /* address flags */
+ struct sockaddr_storage al_addr; /* address */
+} addrlist_t;
/*
* Globals
*/
-extern struct local_addr *laddr_list;
+extern addrlist_t *localaddrs;
/* List of all local addresses, including local zones */
extern struct phyint *phyints; /* List of all phyints */
extern struct phyint_group *phyint_groups; /* List of all phyint groups */
@@ -428,10 +417,19 @@ extern void phyint_inst_delete(struct phyint_instance *pii);
extern uint_t phyint_inst_timer(struct phyint_instance *pii);
extern boolean_t phyint_inst_sockinit(struct phyint_instance *pii);
-extern void phyint_newtype(struct phyint *pi);
+extern void phyint_changed(struct phyint *pi);
extern void phyint_chstate(struct phyint *pi, enum pi_state state);
extern void phyint_group_chstate(struct phyint_group *pg, enum pg_state state);
+extern struct phyint_group *phyint_group_create(const char *pg_name);
+extern struct phyint_group *phyint_group_lookup(const char *pg_name);
+extern void phyint_group_insert(struct phyint_group *pg);
+extern void phyint_group_delete(struct phyint_group *pg);
+extern void phyint_group_refresh_state(struct phyint_group *pg);
extern void phyint_check_for_repair(struct phyint *pi);
+extern void phyint_transition_to_running(struct phyint *pi);
+extern void phyint_activate_another(struct phyint *pi);
+extern int phyint_offline(struct phyint *pi, unsigned int);
+extern int phyint_undo_offline(struct phyint *pi);
extern void logint_init_from_k(struct phyint_instance *pii, char *li_name);
extern void logint_delete(struct logint *li);
@@ -448,34 +446,40 @@ extern void target_add(struct phyint_instance *pii, struct in6_addr addr,
extern void in_data(struct phyint_instance *pii);
extern void in6_data(struct phyint_instance *pii);
-extern int try_failover(struct phyint *pi, int failover_type);
-extern int try_failback(struct phyint *pi);
-extern int do_failback(struct phyint *pi);
-extern boolean_t change_lif_flags(struct phyint *pi, uint64_t flags,
- boolean_t setfl);
-
extern void logperror_pii(struct phyint_instance *pii, const char *str);
extern void logperror_li(struct logint *li, const char *str);
extern char *pr_addr(int af, struct in6_addr addr, char *abuf, int len);
+extern void addr2storage(int af, const struct in6_addr *addr,
+ struct sockaddr_storage *ssp);
extern void phyint_inst_print_all(void);
+extern boolean_t prefix_equal(struct in6_addr, struct in6_addr, uint_t);
-extern int logint_upcount(struct phyint *pi);
-extern void restore_phyint(struct phyint *pi);
extern void reset_crtt_all(struct phyint *pi);
extern int failure_state(struct phyint_instance *pii);
extern void process_link_state_changes(void);
extern void clear_pii_probe_stats(struct phyint_instance *pii);
extern void start_timer(struct phyint_instance *pii);
+extern void stop_probing(struct phyint *pi);
extern boolean_t own_address(struct in6_addr addr);
+extern boolean_t change_pif_flags(struct phyint *pi, uint64_t set,
+ uint64_t clear);
extern void close_probe_socket(struct phyint_instance *pii, boolean_t flag);
+extern int probe_state_event(struct probe_stats *, struct phyint_instance *);
+extern void probe_chstate(struct probe_stats *, struct phyint_instance *, int);
+extern unsigned int getgraddrinfo(const char *, struct sockaddr_storage *,
+ ipmp_addrinfo_t **);
extern unsigned int getifinfo(const char *, ipmp_ifinfo_t **);
extern unsigned int getgroupinfo(const char *, ipmp_groupinfo_t **);
extern unsigned int getgrouplist(ipmp_grouplist_t **);
extern unsigned int getsnap(ipmp_snap_t **);
+extern boolean_t addrlist_add(addrlist_t **, const char *, uint64_t,
+ struct sockaddr_storage *);
+extern void addrlist_free(addrlist_t **);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c
index 27716cabce..703ddcfaad 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c
@@ -17,14 +17,11 @@
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ *
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "defs.h"
#include "tables.h"
#include <fcntl.h>
@@ -122,7 +119,7 @@ sendpacket(struct sockaddr_in6 *sin6, int sock, int size, int flags)
char abuf[INET6_ADDRSTRLEN];
cc = sendto(sock, (char *)packet, size, flags,
- (struct sockaddr *)sin6, sizeof (*sin6));
+ (struct sockaddr *)sin6, sizeof (*sin6));
if (cc < 0 || cc != size) {
if (cc < 0) {
logperror("sendpacket: sendto");
@@ -135,6 +132,32 @@ sendpacket(struct sockaddr_in6 *sin6, int sock, int size, int flags)
}
}
+/*
+ * If possible, place an ND_OPT_SOURCE_LINKADDR option at `optp'.
+ * Return the number of bytes placed in the option.
+ */
+static uint_t
+add_opt_lla(struct phyint *pi, struct nd_opt_lla *optp)
+{
+ uint_t optlen;
+ uint_t hwaddrlen;
+ struct lifreq lifr;
+
+ /* If this phyint doesn't have a link-layer address, bail */
+ if (phyint_get_lla(pi, &lifr) == -1)
+ return (0);
+
+ hwaddrlen = lifr.lifr_nd.lnr_hdw_len;
+ /* roundup to multiple of 8 and make padding zero */
+ optlen = ((sizeof (struct nd_opt_hdr) + hwaddrlen + 7) / 8) * 8;
+ bzero(optp, optlen);
+ optp->nd_opt_lla_type = ND_OPT_SOURCE_LINKADDR;
+ optp->nd_opt_lla_len = optlen / 8;
+ bcopy(lifr.lifr_nd.lnr_hdw_addr, optp->nd_opt_lla_hdw_addr, hwaddrlen);
+
+ return (optlen);
+}
+
/* Send a Router Solicitation */
static void
solicit(struct sockaddr_in6 *sin6, struct phyint *pi)
@@ -151,24 +174,8 @@ solicit(struct sockaddr_in6 *sin6, struct phyint *pi)
packetlen += sizeof (*rs);
pptr += sizeof (*rs);
- /* Attach any options */
- if (pi->pi_hdw_addr_len != 0) {
- struct nd_opt_lla *lo = (struct nd_opt_lla *)pptr;
- int optlen;
-
- /* roundup to multiple of 8 and make padding zero */
- optlen = ((sizeof (struct nd_opt_hdr) +
- pi->pi_hdw_addr_len + 7) / 8) * 8;
- bzero(pptr, optlen);
-
- lo->nd_opt_lla_type = ND_OPT_SOURCE_LINKADDR;
- lo->nd_opt_lla_len = optlen / 8;
- bcopy((char *)pi->pi_hdw_addr,
- (char *)lo->nd_opt_lla_hdw_addr,
- pi->pi_hdw_addr_len);
- packetlen += optlen;
- pptr += optlen;
- }
+ /* add options */
+ packetlen += add_opt_lla(pi, (struct nd_opt_lla *)pptr);
if (debug & D_PKTOUT) {
print_route_sol("Sending solicitation to ", pi, rs, packetlen,
@@ -224,24 +231,9 @@ advertise(struct sockaddr_in6 *sin6, struct phyint *pi, boolean_t no_prefixes)
return;
}
- /* Attach any options */
- if (pi->pi_hdw_addr_len != 0) {
- struct nd_opt_lla *lo = (struct nd_opt_lla *)pptr;
- int optlen;
-
- /* roundup to multiple of 8 and make padding zero */
- optlen = ((sizeof (struct nd_opt_hdr) +
- pi->pi_hdw_addr_len + 7) / 8) * 8;
- bzero(pptr, optlen);
-
- lo->nd_opt_lla_type = ND_OPT_SOURCE_LINKADDR;
- lo->nd_opt_lla_len = optlen / 8;
- bcopy((char *)pi->pi_hdw_addr,
- (char *)lo->nd_opt_lla_hdw_addr,
- pi->pi_hdw_addr_len);
- packetlen += optlen;
- pptr += optlen;
- }
+ /* add options */
+ packetlen += add_opt_lla(pi, (struct nd_opt_lla *)pptr);
+ pptr = (char *)packet + packetlen;
if (pi->pi_AdvLinkMTU != 0) {
struct nd_opt_mtu *mo = (struct nd_opt_mtu *)pptr;
@@ -1671,10 +1663,10 @@ process_rtsock(int rtsock)
return;
}
- if (ifm->ifm_flags != pi->pi_flags) {
+ if (ifm->ifm_flags != (uint_t)pi->pi_flags) {
if (debug & D_IFSCAN) {
logmsg(LOG_DEBUG, "process_rtsock: clr for "
- "%s old flags 0x%x new flags 0x%x\n",
+ "%s old flags 0x%llx new flags 0x%x\n",
pi->pi_name, pi->pi_flags, ifm->ifm_flags);
}
}
@@ -1825,141 +1817,67 @@ process_mibsock(int mibsock)
}
/*
- * Check whether the address formed by pr->pr_prefix and pi_token
- * exists in the kernel. Cannot call SIOCTMYADDR/ONLINK as it
- * does not check for down addresses. This function should not
- * be called for onlink prefixes.
- */
-static boolean_t
-is_address_present(struct phyint *pi, struct prefix *pr, uint64_t flags)
-{
- int s;
- in6_addr_t addr, *token;
- int i;
- int ret;
- struct sockaddr_in6 sin6;
-
- s = socket(AF_INET6, SOCK_DGRAM, 0);
- if (s < 0) {
- logperror("is_address_present: socket");
- /*
- * By returning B_TRUE, we make the caller delete
- * the prefix from the internal table. In the worst
- * case the next RA will create the prefix.
- */
- return (_B_TRUE);
- }
- if (flags & IFF_TEMPORARY)
- token = &pi->pi_tmp_token;
- else
- token = &pi->pi_token;
- for (i = 0; i < 16; i++) {
- /*
- * prefix_create ensures that pr_prefix has all-zero
- * bits after prefixlen.
- */
- addr.s6_addr[i] = pr->pr_prefix.s6_addr[i] | token->s6_addr[i];
- }
- (void) memset(&sin6, 0, sizeof (struct sockaddr_in6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_addr = addr;
- ret = bind(s, (struct sockaddr *)&sin6, sizeof (struct sockaddr_in6));
- (void) close(s);
- if (ret < 0 && errno == EADDRNOTAVAIL)
- return (_B_FALSE);
- else
- return (_B_TRUE);
-}
-
-/*
* Look if the phyint or one of its prefixes have been removed from
* the kernel and take appropriate action.
- * Uses {pi,pr}_in_use.
+ * Uses pr_in_use and pi{,_kernel}_state.
*/
static void
check_if_removed(struct phyint *pi)
{
- struct prefix *pr;
- struct prefix *next_pr;
+ struct prefix *pr, *next_pr;
/*
- * Detect phyints that have been removed from the kernel.
- * Since we can't recreate it here (would require ifconfig plumb
- * logic) we just terminate use of that phyint.
- */
- if (!(pi->pi_kernel_state & PI_PRESENT) &&
- (pi->pi_state & PI_PRESENT)) {
- logmsg(LOG_ERR, "Interface %s has been removed from kernel. "
- "in.ndpd will no longer use it\n", pi->pi_name);
- /*
- * Clear state so that should the phyint reappear
- * we will start with initial advertisements or
- * solicitations.
- */
- phyint_cleanup(pi);
- }
- /*
* Detect prefixes which are removed.
- *
- * We remove the prefix in all of the following cases :
- *
- * 1) Static prefixes are not the ones we create. So,
- * just remove it from our tables.
- *
- * 2) On-link prefixes potentially move to a different
- * phyint during failover. As it does not have
- * an address, we can't use the logic in is_address_present
- * to detect whether it is present in the kernel or not.
- * Thus when it is manually removed we don't recreate it.
- *
- * 3) If there is a token mis-match and this prefix is not
- * in the kernel, it means we don't need this prefix on
- * this interface anymore. It must have been moved to a
- * different interface by in.mpathd. This normally
- * happens after a failover followed by a failback (or
- * another failover) and we re-read the network
- * configuration. For the failover from A to B, we would
- * have created state on B about A's address, which will
- * not be in use after the subsequent failback. So, we
- * remove that prefix here.
- *
- * 4) If the physical interface is not present, then remove
- * the prefix. In the cases where we are advertising
- * prefixes, the state is kept in advertisement prefix and
- * hence we can delete the prefix.
- *
- * 5) Similar to case (3), when we failover from A to B, the
- * prefix in A will not be in use as it has been moved to B.
- * We will delete it from our tables and recreate it when
- * it fails back. is_address_present makes sure that the
- * address is still valid in kernel.
- *
- * If none of the above is true, we recreate the prefix as it
- * has been manually removed. We do it only when the interface
- * is not FAILED or INACTIVE or OFFLINE.
+ * Static prefixes are just removed from our tables.
+ * Non-static prefixes are recreated i.e. in.ndpd takes precedence
+ * over manually removing prefixes via ifconfig.
*/
for (pr = pi->pi_prefix_list; pr != NULL; pr = next_pr) {
next_pr = pr->pr_next;
if (!pr->pr_in_use) {
- /* Clear PR_AUTO and PR_ONLINK */
+ /* Clear everything except PR_STATIC */
pr->pr_kernel_state &= PR_STATIC;
- if ((pr->pr_state & PR_STATIC) ||
- !(pr->pr_state & PR_AUTO) ||
- !(prefix_token_match(pi, pr, pr->pr_flags)) ||
- (!(pi->pi_kernel_state & PI_PRESENT)) ||
- (is_address_present(pi, pr, pr->pr_flags))) {
+ pr->pr_name[0] = '\0';
+ if (pr->pr_state & PR_STATIC) {
prefix_delete(pr);
- } else if (!(pi->pi_flags &
- (IFF_FAILED|IFF_INACTIVE|IFF_OFFLINE)) &&
- pr->pr_state != pr->pr_kernel_state) {
- pr->pr_name[0] = '\0';
+ } else if (!(pi->pi_kernel_state & PI_PRESENT)) {
+ /*
+ * Ensure that there are no future attempts to
+ * run prefix_update_k since the phyint is gone.
+ */
+ pr->pr_state = pr->pr_kernel_state;
+ } else if (pr->pr_state != pr->pr_kernel_state) {
logmsg(LOG_INFO, "Prefix manually removed "
- "on %s - recreating it!\n",
- pi->pi_name);
+ "on %s; recreating\n", pi->pi_name);
prefix_update_k(pr);
}
}
}
+
+ /*
+ * Detect phyints that have been removed from the kernel, and tear
+ * down any prefixes we created that are associated with that phyint.
+ * (NOTE: IPMP depends on in.ndpd tearing down these prefixes so an
+ * administrator can easily place an IP interface with ADDRCONF'd
+ * addresses into an IPMP group.)
+ */
+ if (!(pi->pi_kernel_state & PI_PRESENT) &&
+ (pi->pi_state & PI_PRESENT)) {
+ logmsg(LOG_ERR, "Interface %s has been removed from kernel. "
+ "in.ndpd will no longer use it\n", pi->pi_name);
+
+ for (pr = pi->pi_prefix_list; pr != NULL; pr = next_pr) {
+ next_pr = pr->pr_next;
+ if (pr->pr_state & PR_AUTO)
+ prefix_delete(pr);
+ }
+
+ /*
+ * Clear state so that should the phyint reappear we will
+ * start with initial advertisements or solicitations.
+ */
+ phyint_cleanup(pi);
+ }
}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c
index 5d64a9303d..0a9e1e6a13 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -383,29 +383,12 @@ incoming_ra(struct phyint *pi, struct nd_router_advert *ra, int len,
if (no_loopback && loopback)
return;
- /*
- * If the interface is FAILED or INACTIVE or OFFLINE, don't
- * create any addresses on them. in.mpathd assumes that no new
- * addresses will appear on these. This implies that we
- * won't create any new prefixes advertised by the router
- * on FAILED/INACTIVE/OFFLINE interfaces. When the state changes,
- * the next RA will create the prefix on this interface.
- */
- if (pi->pi_flags & (IFF_FAILED|IFF_INACTIVE|IFF_OFFLINE))
- return;
+ bzero(&lifr, sizeof (lifr));
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
- if (ioctl(pi->pi_sock, SIOCGLIFLNKINFO, (char *)&lifr) < 0) {
- if (errno == ENXIO)
- return;
- logperror_pi(pi, "incoming_ra: SIOCGLIFLNKINFO");
- return;
- }
if (ra->nd_ra_curhoplimit != CURHOP_UNSPECIFIED &&
ra->nd_ra_curhoplimit != pi->pi_CurHopLimit) {
pi->pi_CurHopLimit = ra->nd_ra_curhoplimit;
-
lifr.lifr_ifinfo.lir_maxhops = pi->pi_CurHopLimit;
set_needed = _B_TRUE;
}
@@ -460,7 +443,7 @@ incoming_ra(struct phyint *pi, struct nd_router_advert *ra, int len,
logmsg(LOG_DEBUG,
"incoming_ra: trigger dhcp %s on %s\n",
(ra->nd_ra_flags_reserved & ~pi->pi_ra_flags &
- ND_RA_FLAG_MANAGED) ? "MANAGED" : "OTHER",
+ ND_RA_FLAG_MANAGED) ? "MANAGED" : "OTHER",
pi->pi_name);
}
pi->pi_ra_flags |= ra->nd_ra_flags_reserved;
@@ -999,11 +982,9 @@ incoming_prefix_addrconf_process(struct phyint *pi, struct prefix *pr,
* Delete this prefix structure as kernel
* does not allow duplicated addresses
*/
-
logmsg(LOG_ERR, "incoming_prefix_addrconf_process: "
- "Duplicate prefix %s received on interface %s\n",
- inet_ntop(AF_INET6,
- (void *)&po->nd_opt_pi_prefix, abuf,
+ "Duplicate prefix %s received on interface %s\n",
+ inet_ntop(AF_INET6, &po->nd_opt_pi_prefix, abuf,
sizeof (abuf)), pi->pi_name);
logmsg(LOG_ERR, "incoming_prefix_addrconf_process: "
"Prefix already exists in interface %s\n",
@@ -1129,12 +1110,8 @@ incoming_mtu_opt(struct phyint *pi, uchar_t *opt,
}
pi->pi_LinkMTU = mtu;
- (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
- if (ioctl(pi->pi_sock, SIOCGLIFLNKINFO, (char *)&lifr) < 0) {
- logperror_pi(pi, "incoming_mtu_opt: SIOCGLIFLNKINFO");
- return;
- }
+ bzero(&lifr, sizeof (lifr));
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
lifr.lifr_ifinfo.lir_maxmtu = pi->pi_LinkMTU;
if (ioctl(pi->pi_sock, SIOCSLIFLNKINFO, (char *)&lifr) < 0) {
logperror_pi(pi, "incoming_mtu_opt: SIOCSLIFLNKINFO");
@@ -1155,33 +1132,33 @@ incoming_lla_opt(struct phyint *pi, uchar_t *opt,
struct sockaddr_in6 *sin6;
int max_content_len;
- if (pi->pi_hdw_addr_len == 0)
+ /*
+ * Get our link-layer address length. We may not have one, in which
+ * case we can just bail.
+ */
+ if (phyint_get_lla(pi, &lifr) != 0)
return;
/*
* Can't remove padding since it is link type specific.
- * However, we check against the length of our link-layer
- * address.
- * Note: assumes that all links have a fixed lengh address.
+ * However, we check against the length of our link-layer address.
+ * Note: assumes that all links have a fixed length address.
*/
max_content_len = lo->nd_opt_lla_len * 8 - sizeof (struct nd_opt_hdr);
- if (max_content_len < pi->pi_hdw_addr_len ||
+ if (max_content_len < lifr.lifr_nd.lnr_hdw_len ||
(max_content_len >= 8 &&
- max_content_len - 7 > pi->pi_hdw_addr_len)) {
+ max_content_len - 7 > lifr.lifr_nd.lnr_hdw_len)) {
char abuf[INET6_ADDRSTRLEN];
(void) inet_ntop(AF_INET6, (void *)&from->sin6_addr,
abuf, sizeof (abuf));
logmsg(LOG_INFO, "lla option from %s on %s too long with bad "
- "physaddr length (%d vs. %d bytes)\n",
- abuf, pi->pi_name,
- max_content_len, pi->pi_hdw_addr_len);
+ "physaddr length (%d vs. %d bytes)\n", abuf, pi->pi_name,
+ max_content_len, lifr.lifr_nd.lnr_hdw_len);
return;
}
- lifr.lifr_nd.lnr_hdw_len = pi->pi_hdw_addr_len;
- bcopy((char *)lo->nd_opt_lla_hdw_addr,
- (char *)lifr.lifr_nd.lnr_hdw_addr,
+ bcopy(lo->nd_opt_lla_hdw_addr, lifr.lifr_nd.lnr_hdw_addr,
lifr.lifr_nd.lnr_hdw_len);
sin6 = (struct sockaddr_in6 *)&lifr.lifr_nd.lnr_addr;
@@ -1196,8 +1173,7 @@ incoming_lla_opt(struct phyint *pi, uchar_t *opt,
lifr.lifr_nd.lnr_state_same_lla = ND_UNCHANGED;
lifr.lifr_nd.lnr_state_diff_lla = ND_STALE;
lifr.lifr_nd.lnr_flags = isrouter;
- (void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
- lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
if (ioctl(pi->pi_sock, SIOCLIFSETND, (char *)&lifr) < 0) {
logperror_pi(pi, "incoming_lla_opt: SIOCLIFSETND");
return;
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c
index c8fc6381b7..09e6137965 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "defs.h"
#include "tables.h"
@@ -171,6 +169,7 @@ phyint_init_from_k(struct phyint *pi)
struct ipv6_mreq v6mcastr;
struct lifreq lifr;
int fd;
+ int save_errno;
boolean_t newsock;
uint_t ttl;
struct sockaddr_in6 *sin6;
@@ -297,30 +296,6 @@ start_over:
pi->pi_dst_token = in6addr_any;
}
- /* Get link-layer address */
- if (!(pi->pi_flags & IFF_MULTICAST) ||
- (pi->pi_flags & IFF_POINTOPOINT)) {
- pi->pi_hdw_addr_len = 0;
- } else {
- sin6 = (struct sockaddr_in6 *)&lifr.lifr_nd.lnr_addr;
- bzero(sin6, sizeof (struct sockaddr_in6));
- sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = pi->pi_ifaddr;
-
- if (ioctl(fd, SIOCLIFGETND, (char *)&lifr) < 0) {
- logperror_pi(pi, "phyint_init_from_k: SIOCLIFGETND");
- goto error;
- }
-
- pi->pi_hdw_addr_len = lifr.lifr_nd.lnr_hdw_len;
-
- if (lifr.lifr_nd.lnr_hdw_len != 0) {
- bcopy((char *)lifr.lifr_nd.lnr_hdw_addr,
- (char *)pi->pi_hdw_addr,
- lifr.lifr_nd.lnr_hdw_len);
- }
- }
-
if (newsock) {
icmp6_filter_t filter;
int on = 1;
@@ -360,8 +335,21 @@ start_over:
v6mcastr.ipv6mr_interface = pi->pi_index;
if (setsockopt(fd, IPPROTO_IPV6, IPV6_JOIN_GROUP,
(char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- logperror_pi(pi, "phyint_init_from_k: "
- "setsockopt IPV6_JOIN_GROUP");
+ /*
+ * One benign reason IPV6_JOIN_GROUP could fail is
+ * when `pi' has been placed into an IPMP group and we
+ * haven't yet processed the routing socket message
+ * informing us of its disappearance. As such, if
+ * it's now in a group, don't print an error.
+ */
+ save_errno = errno;
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, LIFNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPNAME, &lifr) == -1 ||
+ lifr.lifr_groupname[0] == '\0') {
+ errno = save_errno;
+ logperror_pi(pi, "phyint_init_from_k: "
+ "setsockopt IPV6_JOIN_GROUP");
+ }
goto error;
}
pi->pi_state |= PI_JOINED_ALLNODES;
@@ -403,8 +391,17 @@ start_over:
v6mcastr.ipv6mr_interface = pi->pi_index;
if (setsockopt(fd, IPPROTO_IPV6, IPV6_JOIN_GROUP,
(char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- logperror_pi(pi, "phyint_init_from_k: setsockopt "
- "IPV6_JOIN_GROUP");
+ /*
+ * See IPV6_JOIN_GROUP comment above.
+ */
+ save_errno = errno;
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, LIFNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPNAME, &lifr) == -1 ||
+ lifr.lifr_groupname[0] == '\0') {
+ errno = save_errno;
+ logperror_pi(pi, "phyint_init_from_k: "
+ "setsockopt IPV6_JOIN_GROUP");
+ }
goto error;
}
pi->pi_state |= PI_JOINED_ALLROUTERS;
@@ -569,22 +566,16 @@ phyint_print(struct phyint *pi)
struct adv_prefix *adv_pr;
struct router *dr;
char abuf[INET6_ADDRSTRLEN];
- char llabuf[BUFSIZ];
logmsg(LOG_DEBUG, "Phyint %s index %d state %x, kernel %x, "
"num routers %d\n",
pi->pi_name, pi->pi_index, pi->pi_state, pi->pi_kernel_state,
pi->pi_num_k_routers);
- logmsg(LOG_DEBUG, "\taddress: %s flags %x\n",
+ logmsg(LOG_DEBUG, "\taddress: %s flags %llx\n",
inet_ntop(AF_INET6, (void *)&pi->pi_ifaddr,
abuf, sizeof (abuf)), pi->pi_flags);
- logmsg(LOG_DEBUG, "\tsock %d mtu %d hdw_addr len %d <%s>\n",
- pi->pi_sock, pi->pi_mtu, pi->pi_hdw_addr_len,
- ((pi->pi_hdw_addr_len != 0) ?
- fmt_lla(llabuf, sizeof (llabuf), pi->pi_hdw_addr,
- pi->pi_hdw_addr_len) : "none"));
- logmsg(LOG_DEBUG, "\ttoken: len %d %s\n",
- pi->pi_token_length,
+ logmsg(LOG_DEBUG, "\tsock %d mtu %d\n", pi->pi_sock, pi->pi_mtu);
+ logmsg(LOG_DEBUG, "\ttoken: len %d %s\n", pi->pi_token_length,
inet_ntop(AF_INET6, (void *)&pi->pi_token,
abuf, sizeof (abuf)));
if (pi->pi_TmpAddrsEnabled) {
@@ -632,6 +623,43 @@ phyint_print(struct phyint *pi)
logmsg(LOG_DEBUG, "\n");
}
+
+/*
+ * Store the LLA for the phyint `pi' `lifrp'. Returns 0 on success, or
+ * -1 on failure.
+ *
+ * Note that we do not cache the hardware address since there's no reliable
+ * mechanism to determine when it's become stale.
+ */
+int
+phyint_get_lla(struct phyint *pi, struct lifreq *lifrp)
+{
+ struct sockaddr_in6 *sin6;
+
+ /* If this phyint doesn't have a link-layer address, bail */
+ if (!(pi->pi_flags & IFF_MULTICAST) ||
+ (pi->pi_flags & IFF_POINTOPOINT)) {
+ return (-1);
+ }
+
+ (void) strlcpy(lifrp->lifr_name, pi->pi_name, LIFNAMSIZ);
+ sin6 = (struct sockaddr_in6 *)&(lifrp->lifr_nd.lnr_addr);
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_addr = pi->pi_ifaddr;
+ if (ioctl(pi->pi_sock, SIOCLIFGETND, lifrp) < 0) {
+ /*
+ * For IPMP interfaces, don't report ESRCH errors since that
+ * merely indicates that there are no active interfaces in the
+ * IPMP group (and thus there's no working hardware address),
+ * and the packet will thus never make it out anyway.
+ */
+ if (!(pi->pi_flags & IFF_IPMP) || errno != ESRCH)
+ logperror_pi(pi, "phyint_get_lla: SIOCLIFGETND");
+ return (-1);
+ }
+ return (0);
+}
+
/*
* Randomize pi->pi_ReachableTime.
* Done periodically when there are no RAs and at a maximum frequency when
@@ -642,20 +670,14 @@ phyint_print(struct phyint *pi)
void
phyint_reach_random(struct phyint *pi, boolean_t set_needed)
{
+ struct lifreq lifr;
+
pi->pi_ReachableTime = GET_RANDOM(
(int)(ND_MIN_RANDOM_FACTOR * pi->pi_BaseReachableTime),
(int)(ND_MAX_RANDOM_FACTOR * pi->pi_BaseReachableTime));
if (set_needed) {
- struct lifreq lifr;
-
- (void) strncpy(lifr.lifr_name, pi->pi_name,
- sizeof (lifr.lifr_name));
- pi->pi_name[sizeof (pi->pi_name) - 1] = '\0';
- if (ioctl(pi->pi_sock, SIOCGLIFLNKINFO, (char *)&lifr) < 0) {
- logperror_pi(pi,
- "phyint_reach_random: SIOCGLIFLNKINFO");
- return;
- }
+ bzero(&lifr, sizeof (lifr));
+ (void) strlcpy(lifr.lifr_name, pi->pi_name, LIFNAMSIZ);
lifr.lifr_ifinfo.lir_reachtime = pi->pi_ReachableTime;
if (ioctl(pi->pi_sock, SIOCSLIFLNKINFO, (char *)&lifr) < 0) {
logperror_pi(pi,
@@ -1386,12 +1408,12 @@ prefix_modify_flags(struct prefix *pr, uint64_t onflags, uint64_t offflags)
(void) strncpy(lifr.lifr_name, pr->pr_name, sizeof (lifr.lifr_name));
lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
if (ioctl(pi->pi_sock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
- logperror_pr(pr, "prefix_modify_flags: SIOCGLIFFLAGS");
- logmsg(LOG_ERR, "prefix_modify_flags(%s, %s) old 0x%llx "
- "on 0x%llx off 0x%llx\n",
- pr->pr_physical->pi_name,
- pr->pr_name,
- pr->pr_flags, onflags, offflags);
+ if (errno != ENXIO) {
+ logperror_pr(pr, "prefix_modify_flags: SIOCGLIFFLAGS");
+ logmsg(LOG_ERR, "prefix_modify_flags(%s, %s) old 0x%llx"
+ " on 0x%llx off 0x%llx\n", pr->pr_physical->pi_name,
+ pr->pr_name, pr->pr_flags, onflags, offflags);
+ }
return (-1);
}
old_flags = lifr.lifr_flags;
@@ -1399,12 +1421,13 @@ prefix_modify_flags(struct prefix *pr, uint64_t onflags, uint64_t offflags)
lifr.lifr_flags &= ~offflags;
pr->pr_flags = lifr.lifr_flags;
if (ioctl(pi->pi_sock, SIOCSLIFFLAGS, (char *)&lifr) < 0) {
- logperror_pr(pr, "prefix_modify_flags: SIOCSLIFFLAGS");
- logmsg(LOG_ERR, "prefix_modify_flags(%s, %s) old 0x%llx "
- "new 0x%llx on 0x%llx off 0x%llx\n",
- pr->pr_physical->pi_name,
- pr->pr_name,
- old_flags, lifr.lifr_flags, onflags, offflags);
+ if (errno != ENXIO) {
+ logperror_pr(pr, "prefix_modify_flags: SIOCSLIFFLAGS");
+ logmsg(LOG_ERR, "prefix_modify_flags(%s, %s) old 0x%llx"
+ " new 0x%llx on 0x%llx off 0x%llx\n",
+ pr->pr_physical->pi_name, pr->pr_name,
+ old_flags, lifr.lifr_flags, onflags, offflags);
+ }
return (-1);
}
return (0);
@@ -1540,7 +1563,8 @@ prefix_update_k(struct prefix *pr)
/* Remove logical interface based on pr_name */
lifr.lifr_addr.ss_family = AF_UNSPEC;
- if (ioctl(pi->pi_sock, SIOCLIFREMOVEIF, (char *)&lifr) < 0) {
+ if (ioctl(pi->pi_sock, SIOCLIFREMOVEIF, (char *)&lifr) < 0 &&
+ errno != ENXIO) {
logperror_pr(pr, "prefix_update_k: SIOCLIFREMOVEIF");
}
pr->pr_kernel_state = 0;
@@ -1865,36 +1889,6 @@ prefix_print(struct prefix *pr)
}
/*
- * Does the address formed by pr->pr_prefix and pi->pi_token match
- * pr->pr_address. It does not match if a failover has happened
- * earlier (done by in.mpathd) from a different pi. Should not
- * be called for onlink prefixes.
- */
-boolean_t
-prefix_token_match(struct phyint *pi, struct prefix *pr, uint64_t flags)
-{
- int i;
- in6_addr_t addr, *token;
-
- if (flags & IFF_TEMPORARY)
- token = &pi->pi_tmp_token;
- else
- token = &pi->pi_token;
- for (i = 0; i < 16; i++) {
- /*
- * prefix_create ensures that pr_prefix has all-zero
- * bits after prefixlen.
- */
- addr.s6_addr[i] = pr->pr_prefix.s6_addr[i] | token->s6_addr[i];
- }
- if (IN6_ARE_ADDR_EQUAL(&pr->pr_address, &addr)) {
- return (_B_TRUE);
- } else {
- return (_B_FALSE);
- }
-}
-
-/*
* Lookup advertisement prefix structure that matches the prefix and
* prefix length.
* Assumes that the bits after prefixlen might not be zero.
@@ -2305,8 +2299,7 @@ phyint_print_all(void)
}
void
-phyint_cleanup(pi)
- struct phyint *pi;
+phyint_cleanup(struct phyint *pi)
{
pi->pi_state = 0;
pi->pi_kernel_state = 0;
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h
index 409600a402..dfc5414d5d 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _NDPD_TABLES_H
#define _NDPD_TABLES_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -58,9 +56,7 @@ struct phyint {
char pi_name[LIFNAMSIZ]; /* Used to identify it */
int pi_sock; /* For sending and receiving */
struct in6_addr pi_ifaddr; /* Local address */
- uint_t pi_flags; /* IFF_* flags */
- uint_t pi_hdw_addr_len;
- uchar_t pi_hdw_addr[ND_MAX_HDW_LEN];
+ uint64_t pi_flags; /* IFF_* flags */
uint_t pi_mtu; /* From SIOCGLIFMTU */
struct in6_addr pi_token;
uint_t pi_token_length;
@@ -256,6 +252,7 @@ extern int phyint_init_from_k(struct phyint *pi);
extern void phyint_delete(struct phyint *pi);
extern uint_t phyint_timer(struct phyint *pi, uint_t elapsed);
extern void phyint_print_all(void);
+extern int phyint_get_lla(struct phyint *pi, struct lifreq *lifrp);
extern void phyint_reach_random(struct phyint *pi,
boolean_t set_needed);
extern void phyint_cleanup(struct phyint *pi);
@@ -280,8 +277,6 @@ extern void prefix_update_k(struct prefix *pr);
extern uint_t prefix_timer(struct prefix *pr, uint_t elapsed);
extern uint_t adv_prefix_timer(struct adv_prefix *adv_pr,
uint_t elapsed);
-extern boolean_t prefix_token_match(struct phyint *pi,
- struct prefix *pr, uint64_t flags);
extern struct prefix *prefix_lookup_addr(struct phyint *pi,
struct in6_addr prefix);
diff --git a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c
index 15db1b7539..b76341e303 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/mDNSUNP.c
@@ -1,3 +1,7 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
/* -*- Mode: C; tab-width: 4 -*-
*
* Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved.
@@ -130,8 +134,6 @@ First checkin
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "mDNSUNP.h"
#include "mDNSDebug.h"
@@ -398,13 +400,11 @@ select_src_ifi_info_solaris(int sockfd, int numifs,
continue;
/*
* Avoid address if any of the following flags are set:
- * IFF_NOFAILOVER: IPMP test address for use only by in.mpathd
* IFF_NOXMIT: no packets transmitted over interface
* IFF_NOLOCAL: no address
* IFF_PRIVATE: is not advertised
*/
- if (ifflags & (IFF_NOFAILOVER | IFF_NOXMIT
- | IFF_NOLOCAL | IFF_PRIVATE))
+ if (ifflags & (IFF_NOXMIT | IFF_NOLOCAL | IFF_PRIVATE))
continue;
if (*best_lifr != NULL) {
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/Makefile
index d91d113347..e29c1765ec 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/Makefile
@@ -20,7 +20,7 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -65,12 +65,13 @@ K5TELNETOBJS= in.telnetd.o
SRCS= $(PROGSRCS) $(OTHERSRC)
SUBDIRS= bootconfchk htable ifconfig in.ftpd in.rdisc in.routed \
- in.talkd inetadm inetconv ipqosconf kssl/kssladm kssl/ksslcfg \
- ping routeadm snoop sppptun traceroute wificonfig ipsecutils
+ in.talkd inetadm inetconv ipmpstat ipqosconf ipsecutils \
+ kssl/kssladm kssl/ksslcfg ping routeadm snoop sppptun \
+ traceroute wificonfig
MSGSUBDIRS= bootconfchk htable ifconfig in.ftpd in.routed in.talkd inetadm \
- inetconv ipqosconf kssl/ksslcfg routeadm sppptun snoop \
- wificonfig ipsecutils
+ inetconv ipmpstat ipqosconf ipsecutils kssl/ksslcfg routeadm \
+ sppptun snoop wificonfig
# As programs get lint-clean, add them here and to the 'lint' target.
# Eventually this hack should go away, and all in PROG should be
@@ -83,7 +84,8 @@ LINTCLEAN= 6to4relay arp in.rlogind in.rshd in.telnetd in.tftpd \
# with SUBDIRS. Also (sigh) deal with the commented-out build lines
# for the lint rule.
LINTSUBDIRS= bootconfchk in.rdisc in.routed in.talkd inetadm inetconv \
- ipqosconf ping routeadm sppptun traceroute wificonfig ipsecutils
+ ipmpstat ipqosconf ipsecutils ping routeadm sppptun traceroute \
+ wificonfig
# And as programs are verified not to attempt to write into constants,
# -xstrconst should be used to ensure they stay that way.
CONSTCLEAN=
@@ -144,6 +146,8 @@ LDLIBS += $(K5LIBS)
$(TSNETPROG) := LDLIBS += -ltsnet
in.rarpd := LDLIBS += -linetutil -ldlpi
+if_mpadm := LDLIBS += -linetutil -lipmp
+if_mpadm.po := XGETFLAGS += -a
route := CPPFLAGS += -DNDEBUG
ndd := LDLIBS += -ldladm
gettable in.comsat := LDFLAGS += $(MAPFILE.NGB:%=-M%)
@@ -245,7 +249,7 @@ lint: $(LINTSUBDIRS)
-I$(SRC)/lib/gss_mechs/mech_krb5/include \
-I$(SRC)/lib/pam_modules/krb5 \
in.telnetd.c $(LDLIBS) -lbsm -lpam -lsocket -lnsl
- $(LINT.c) if_mpadm.c $(LDLIBS) -lsocket -lnsl -lipmp
+ $(LINT.c) if_mpadm.c $(LDLIBS) -lsocket -lnsl -lipmp -linetutil
$(LINT.c) ipaddrsel.c $(LDLIBS) -lsocket -lnsl
$(LINT.c) route.c $(LDLIBS) -lsocket -lnsl -ltsnet
$(LINT.c) syncinit.c $(LDLIBS) -ldlpi
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c b/usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c
index d4874135fd..7c5d73c796 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/if_mpadm.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,660 +19,250 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <unistd.h>
-#include <stdlib.h>
+#include <errno.h>
+#include <ipmp_admin.h>
+#include <libinetutil.h>
+#include <locale.h>
+#include <net/if.h>
+#include <stdarg.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
#include <sys/sockio.h>
-#include <net/if.h>
-#include <errno.h>
-#include <strings.h>
-#include <ipmp_mpathd.h>
-#include <libintl.h>
+#include <sys/types.h>
-static int if_down(int ifsock, struct lifreq *lifr);
-static int if_up(int ifsock, struct lifreq *lifr);
-static void send_cmd(int cmd, char *ifname);
-static int connect_to_mpathd(sa_family_t family);
-static void do_offline(char *ifname);
-static void undo_offline(char *ifname);
-static boolean_t offline_set(char *ifname);
+typedef void offline_func_t(const char *, ipmp_handle_t);
-#define IF_SEPARATOR ':'
-#define MAX_RETRIES 3
+static const char *progname;
+static int sioc4fd, sioc6fd;
+static offline_func_t do_offline, undo_offline;
+static boolean_t set_lifflags(const char *, uint64_t);
+static boolean_t is_offline(const char *);
+static void warn(const char *, ...);
+static void die(const char *, ...);
static void
usage()
{
- (void) fprintf(stderr, "Usage : if_mpadm [-d | -r] <interface_name>\n");
+ (void) fprintf(stderr, "Usage: %s [-d | -r] <interface>\n", progname);
+ exit(1);
}
-static void
-print_mpathd_error_msg(uint32_t error)
+static const char *
+mpadm_errmsg(uint32_t error)
{
switch (error) {
- case MPATHD_MIN_RED_ERROR:
- (void) fprintf(stderr, gettext(
- "Offline failed as there is no other functional "
- "interface available in the multipathing group "
- "for failing over the network access.\n"));
- break;
-
- case MPATHD_FAILBACK_PARTIAL:
- (void) fprintf(stderr, gettext(
- "Offline cannot be undone because multipathing "
- "configuration is not consistent across all the "
- "interfaces in the group.\n"));
- break;
-
+ case IPMP_EUNKIF:
+ return ("not a physical interface or not in an IPMP group");
+ case IPMP_EMINRED:
+ return ("no other functioning interfaces are in its IPMP "
+ "group");
default:
- /*
- * We shouldn't get here. All errors should have a
- * meaningful error message, as shown in the above
- * cases. If we get here, someone has made a mistake.
- */
- (void) fprintf(stderr, gettext(
- "Operation returned an unrecognized error: %u\n"),
- error);
- break;
+ return (ipmp_errmsg(error));
}
}
int
main(int argc, char **argv)
{
- char *ifname;
- int cmd = 0;
+ int retval;
+ ipmp_handle_t handle;
+ offline_func_t *ofuncp = NULL;
+ const char *ifname;
int c;
-#if !defined(TEXT_DOMAIN)
-#define TEXT_DOMAIN "SYS_TEST"
-#endif
+ if ((progname = strrchr(argv[0], '/')) != NULL)
+ progname++;
+ else
+ progname = argv[0];
+
+ (void) setlocale(LC_ALL, "");
(void) textdomain(TEXT_DOMAIN);
while ((c = getopt(argc, argv, "d:r:")) != EOF) {
switch (c) {
case 'd':
ifname = optarg;
- cmd = MI_OFFLINE;
- if (offline_set(ifname)) {
- (void) fprintf(stderr, gettext("Interface "
- "already offlined\n"));
- exit(1);
- }
+ ofuncp = do_offline;
break;
case 'r':
ifname = optarg;
- cmd = MI_UNDO_OFFLINE;
- if (!offline_set(ifname)) {
- (void) fprintf(stderr, gettext("Interface not "
- "offlined\n"));
- exit(1);
- }
+ ofuncp = undo_offline;
break;
default :
usage();
- exit(1);
}
}
- if (cmd == 0) {
+ if (ofuncp == NULL)
usage();
- exit(1);
- }
/*
- * Send the command to in.mpathd which is generic to
- * both the commands. send_cmd returns only if there
- * is no error.
+ * Create the global V4 and V6 socket ioctl descriptors.
*/
- send_cmd(cmd, ifname);
- if (cmd == MI_OFFLINE) {
- do_offline(ifname);
- } else {
- undo_offline(ifname);
- }
+ sioc4fd = socket(AF_INET, SOCK_DGRAM, 0);
+ sioc6fd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (sioc4fd == -1 || sioc6fd == -1)
+ die("cannot create sockets");
- return (0);
-}
+ if ((retval = ipmp_open(&handle)) != IPMP_SUCCESS)
+ die("cannot create ipmp handle: %s\n", ipmp_errmsg(retval));
-/*
- * Is IFF_OFFLINE set ?
- * Returns B_FALSE on failure and B_TRUE on success.
- */
-boolean_t
-offline_set(char *ifname)
-{
- struct lifreq lifr;
- int s4;
- int s6;
- int ret;
-
- s4 = socket(AF_INET, SOCK_DGRAM, 0);
- if (s4 < 0) {
- perror("socket");
- exit(1);
- }
- s6 = socket(AF_INET6, SOCK_DGRAM, 0);
- if (s6 < 0) {
- perror("socket");
- exit(1);
- }
- (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
- ret = ioctl(s4, SIOCGLIFFLAGS, (caddr_t)&lifr);
- if (ret < 0) {
- if (errno != ENXIO) {
- perror("ioctl: SIOCGLIFFLAGS");
- exit(1);
- }
- ret = ioctl(s6, SIOCGLIFFLAGS, (caddr_t)&lifr);
- if (ret < 0) {
- perror("ioctl: SIOCGLIFFLAGS");
- exit(1);
- }
- }
- (void) close(s4);
- (void) close(s6);
- if (lifr.lifr_flags & IFF_OFFLINE)
- return (B_TRUE);
- else
- return (B_FALSE);
+ (*ofuncp)(ifname, handle);
+
+ ipmp_close(handle);
+ (void) close(sioc4fd);
+ (void) close(sioc6fd);
+
+ return (EXIT_SUCCESS);
}
/*
- * Sends the command to in.mpathd. If not successful, prints
- * an error message and exits.
+ * Checks whether IFF_OFFLINE is set on `ifname'.
*/
-void
-send_cmd(int cmd, char *ifname)
+boolean_t
+is_offline(const char *ifname)
{
- struct mi_offline mio;
- struct mi_undo_offline miu;
- struct mi_result me;
- int ret;
- int cmd_len;
- int i;
- int s;
-
- for (i = 0; i < MAX_RETRIES; i++) {
- s = connect_to_mpathd(AF_INET);
- if (s == -1) {
- s = connect_to_mpathd(AF_INET6);
- if (s == -1) {
- (void) fprintf(stderr, gettext("Cannot "
- "establish communication with "
- "in.mpathd.\n"));
- exit(1);
- }
- }
- switch (cmd) {
- case MI_OFFLINE :
- cmd_len = sizeof (struct mi_offline);
- bzero(&mio, cmd_len);
- mio.mio_command = cmd;
- (void) strncpy(mio.mio_ifname, ifname, LIFNAMSIZ);
- mio.mio_min_redundancy = 1;
- ret = write(s, &mio, cmd_len);
- if (ret != cmd_len) {
- /* errno is set only when ret is -1 */
- if (ret == -1)
- perror("write");
- (void) fprintf(stderr, gettext("Failed to "
- "successfully send command to "
- "in.mpathd.\n"));
- exit(1);
- }
- break;
- case MI_UNDO_OFFLINE:
- cmd_len = sizeof (struct mi_undo_offline);
- bzero(&miu, cmd_len);
- miu.miu_command = cmd;
- (void) strncpy(miu.miu_ifname, ifname, LIFNAMSIZ);
- ret = write(s, &miu, cmd_len);
- if (ret != cmd_len) {
- /* errno is set only when ret is -1 */
- if (ret == -1)
- perror("write");
- (void) fprintf(stderr, gettext("Failed to "
- "successfully send command to "
- "in.mpathd.\n"));
- exit(1);
- }
- break;
- default :
- (void) fprintf(stderr, "Unknown command \n");
- exit(1);
- }
+ struct lifreq lifr = { 0 };
- /* Read the result from mpathd */
- ret = read(s, &me, sizeof (me));
- if (ret != sizeof (me)) {
- /* errno is set only when ret is -1 */
- if (ret == -1)
- perror("read");
- (void) fprintf(stderr, gettext("Failed to successfully "
- "read result from in.mpathd.\n"));
- exit(1);
+ (void) strlcpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
+ if (ioctl(sioc4fd, SIOCGLIFFLAGS, &lifr) == -1) {
+ if (errno != ENXIO ||
+ ioctl(sioc6fd, SIOCGLIFFLAGS, &lifr) == -1) {
+ die("cannot get interface flags on %s", ifname);
}
- if (me.me_mpathd_error == 0) {
- if (i != 0) {
- /*
- * We retried at least once. Tell the user
- * that things succeeded now.
- */
- (void) fprintf(stderr,
- gettext("Retry Successful.\n"));
- }
- return; /* Successful */
- }
-
- if (me.me_mpathd_error == MPATHD_SYS_ERROR) {
- if (me.me_sys_error == EAGAIN) {
- (void) close(s);
- (void) sleep(1);
- (void) fprintf(stderr,
- gettext("Retrying ...\n"));
- continue; /* Retry */
- }
- errno = me.me_sys_error;
- perror("if_mpadm");
- } else {
- print_mpathd_error_msg(me.me_mpathd_error);
- }
- exit(1);
}
- /*
- * We come here only if we retry the operation multiple
- * times and did not succeed. Let the user try it again
- * later.
- */
- (void) fprintf(stderr,
- gettext("Device busy. Retry the operation later.\n"));
- exit(1);
+
+ return ((lifr.lifr_flags & IFF_OFFLINE) != 0);
}
static void
-do_offline(char *ifname)
+do_offline(const char *ifname, ipmp_handle_t handle)
{
- struct lifreq lifr;
- struct lifreq *lifcr;
- struct lifnum lifn;
- struct lifconf lifc;
- char *buf;
- int numifs;
- int n;
- char pi_name[LIFNAMSIZ + 1];
- char *cp;
- int ifsock_v4;
- int ifsock_v6;
- int af;
- int ret;
+ ifaddrlistx_t *ifaddrp, *ifaddrs;
+ int retval;
+
+ if (is_offline(ifname))
+ die("interface %s is already offline\n", ifname);
+
+ if ((retval = ipmp_offline(handle, ifname, 1)) != IPMP_SUCCESS)
+ die("cannot offline %s: %s\n", ifname, mpadm_errmsg(retval));
/*
- * Verify whether IFF_OFFLINE is not set as a sanity check.
- */
- if (!offline_set(ifname)) {
- (void) fprintf(stderr, gettext("Operation failed : in.mpathd "
- "has not set IFF_OFFLINE on %s\n"), ifname);
- exit(1);
- }
- /*
- * Get both the sockets as we may need to bring both
- * IPv4 and IPv6 interfaces down.
- */
- ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0);
- if (ifsock_v4 < 0) {
- perror("socket");
- exit(1);
- }
- ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0);
- if (ifsock_v6 < 0) {
- perror("socket");
- exit(1);
- }
- /*
- * Get all the logicals for "ifname" and mark them down.
- * There is no easy way of doing this. We get all the
- * interfaces in the system using SICGLIFCONF and mark the
- * ones matching the name down.
+ * Get all the up addresses for `ifname' and bring them down.
*/
- lifn.lifn_family = AF_UNSPEC;
- lifn.lifn_flags = 0;
- if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
- perror("ioctl : SIOCGLIFNUM");
- exit(1);
- }
- numifs = lifn.lifn_count;
-
- buf = calloc(numifs, sizeof (struct lifreq));
- if (buf == NULL) {
- perror("calloc");
- exit(1);
- }
+ if (ifaddrlistx(ifname, IFF_UP, 0, &ifaddrs) == -1)
+ die("cannot get addresses on %s", ifname);
- lifc.lifc_family = AF_UNSPEC;
- lifc.lifc_flags = 0;
- lifc.lifc_len = numifs * sizeof (struct lifreq);
- lifc.lifc_buf = buf;
+ for (ifaddrp = ifaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!(ifaddrp->ia_flags & IFF_OFFLINE))
+ warn("IFF_OFFLINE vanished on %s\n", ifaddrp->ia_name);
- if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
- perror("ioctl : SIOCGLIFCONF");
- exit(1);
+ if (!set_lifflags(ifaddrp->ia_name,
+ ifaddrp->ia_flags & ~IFF_UP))
+ warn("cannot bring down address on %s\n",
+ ifaddrp->ia_name);
}
- lifcr = (struct lifreq *)lifc.lifc_req;
- for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifcr++) {
- af = lifcr->lifr_addr.ss_family;
- (void) strncpy(pi_name, lifcr->lifr_name,
- sizeof (pi_name));
- pi_name[sizeof (pi_name) - 1] = '\0';
- if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
- *cp = '\0';
- if (strcmp(pi_name, ifname) == 0) {
- /* It matches the interface name that was offlined */
- (void) strncpy(lifr.lifr_name, lifcr->lifr_name,
- sizeof (lifr.lifr_name));
- if (af == AF_INET)
- ret = if_down(ifsock_v4, &lifr);
- else
- ret = if_down(ifsock_v6, &lifr);
- if (ret != 0) {
- (void) fprintf(stderr, gettext("Bringing down "
- "the interfaces failed.\n"));
- exit(1);
- }
- }
- }
+ ifaddrlistx_free(ifaddrs);
}
static void
-undo_offline(char *ifname)
+undo_offline(const char *ifname, ipmp_handle_t handle)
{
- struct lifreq lifr;
- struct lifreq *lifcr;
- struct lifnum lifn;
- struct lifconf lifc;
- char *buf;
- int numifs;
- int n;
- char pi_name[LIFNAMSIZ + 1];
- char *cp;
- int ifsock_v4;
- int ifsock_v6;
- int af;
- int ret;
+ ifaddrlistx_t *ifaddrp, *ifaddrs;
+ int retval;
+
+ if (!is_offline(ifname))
+ die("interface %s is not offline\n", ifname);
/*
- * Verify whether IFF_OFFLINE is set as a sanity check.
- */
- if (offline_set(ifname)) {
- (void) fprintf(stderr, gettext("Operation failed : in.mpathd "
- "has not cleared IFF_OFFLINE on %s\n"), ifname);
- exit(1);
- }
- /*
- * Get both the sockets as we may need to bring both
- * IPv4 and IPv6 interfaces UP.
- */
- ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0);
- if (ifsock_v4 < 0) {
- perror("socket");
- exit(1);
- }
- ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0);
- if (ifsock_v6 < 0) {
- perror("socket");
- exit(1);
- }
- /*
- * Get all the logicals for "ifname" and mark them up.
- * There is no easy way of doing this. We get all the
- * interfaces in the system using SICGLIFCONF and mark the
- * ones matching the name up.
+ * Get all the down addresses for `ifname' and bring them up.
*/
- lifn.lifn_family = AF_UNSPEC;
- lifn.lifn_flags = 0;
- if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
- perror("ioctl : SIOCGLIFNUM");
- exit(1);
- }
- numifs = lifn.lifn_count;
-
- buf = calloc(numifs, sizeof (struct lifreq));
- if (buf == NULL) {
- perror("calloc");
- exit(1);
- }
+ if (ifaddrlistx(ifname, 0, IFF_UP, &ifaddrs) == -1)
+ die("cannot get addresses for %s", ifname);
- lifc.lifc_family = AF_UNSPEC;
- lifc.lifc_flags = 0;
- lifc.lifc_len = numifs * sizeof (struct lifreq);
- lifc.lifc_buf = buf;
+ for (ifaddrp = ifaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!(ifaddrp->ia_flags & IFF_OFFLINE))
+ warn("IFF_OFFLINE vanished on %s\n", ifaddrp->ia_name);
- if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
- perror("ioctl : SIOCGLIFCONF");
- exit(1);
+ if (!set_lifflags(ifaddrp->ia_name, ifaddrp->ia_flags | IFF_UP))
+ warn("cannot bring up address on %s\n",
+ ifaddrp->ia_name);
}
- lifcr = (struct lifreq *)lifc.lifc_req;
- for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifcr++) {
- af = lifcr->lifr_addr.ss_family;
- (void) strncpy(pi_name, lifcr->lifr_name,
- sizeof (pi_name));
- pi_name[sizeof (pi_name) - 1] = '\0';
- if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
- *cp = '\0';
-
- if (strcmp(pi_name, ifname) == 0) {
- /* It matches the interface name that was offlined */
- (void) strncpy(lifr.lifr_name, lifcr->lifr_name,
- sizeof (lifr.lifr_name));
- if (af == AF_INET)
- ret = if_up(ifsock_v4, &lifr);
- else
- ret = if_up(ifsock_v6, &lifr);
- if (ret != 0) {
- (void) fprintf(stderr, gettext("Bringing up "
- "the interfaces failed.\n"));
- exit(1);
- }
- }
- }
-}
+ ifaddrlistx_free(ifaddrs);
-/*
- * Returns -1 on failure. Returns the socket file descriptor on
- * success.
- */
-static int
-connect_to_mpathd(sa_family_t family)
-{
- int s;
- struct sockaddr_storage ss;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
- struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
- int addrlen;
- int ret;
- int on;
-
- s = socket(family, SOCK_STREAM, 0);
- if (s < 0) {
- perror("socket");
- return (-1);
- }
- bzero((char *)&ss, sizeof (ss));
- ss.ss_family = family;
/*
- * Need to bind to a privileged port. For non-root, this
- * will fail. in.mpathd verifies that only commands coming
- * from privileged ports succeed so that the ordinary user
- * can't issue offline commands.
+ * Undo the offline.
*/
- on = 1;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- perror("setsockopt : TCP_ANONPRIVBIND");
- exit(1);
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = 0;
- sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addrlen = sizeof (struct sockaddr_in);
- break;
- case AF_INET6:
- sin6->sin6_port = 0;
- sin6->sin6_addr = loopback_addr;
- addrlen = sizeof (struct sockaddr_in6);
- break;
- }
- ret = bind(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- perror("bind");
- return (-1);
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = htons(MPATHD_PORT);
- break;
- case AF_INET6:
- sin6->sin6_port = htons(MPATHD_PORT);
- break;
+ if ((retval = ipmp_undo_offline(handle, ifname)) != IPMP_SUCCESS) {
+ die("cannot undo-offline %s: %s\n", ifname,
+ mpadm_errmsg(retval));
}
- ret = connect(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- perror("connect");
- return (-1);
- }
- on = 0;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- perror("setsockopt : TCP_ANONPRIVBIND");
- return (-1);
- }
- return (s);
+
+ /*
+ * Verify whether IFF_OFFLINE is set as a sanity check.
+ */
+ if (is_offline(ifname))
+ warn("in.mpathd has not cleared IFF_OFFLINE on %s\n", ifname);
}
/*
- * Bring down the interface specified by the name lifr->lifr_name.
- *
- * Returns -1 on failure. Returns 0 on success.
+ * Change `lifname' to have `flags' set. Returns B_TRUE on success.
*/
-static int
-if_down(int ifsock, struct lifreq *lifr)
+static boolean_t
+set_lifflags(const char *lifname, uint64_t flags)
{
- int ret;
+ struct lifreq lifr = { 0 };
+ int fd = (flags & IFF_IPV4) ? sioc4fd : sioc6fd;
- ret = ioctl(ifsock, SIOCGLIFFLAGS, (caddr_t)lifr);
- if (ret < 0) {
- perror("ioctl: SIOCGLIFFLAGS");
- return (-1);
- }
+ (void) strlcpy(lifr.lifr_name, lifname, LIFNAMSIZ);
+ lifr.lifr_flags = flags;
- /* IFF_OFFLINE was set to start with. Is it still there ? */
- if (!(lifr->lifr_flags & (IFF_OFFLINE))) {
- (void) fprintf(stderr, gettext("IFF_OFFLINE disappeared on "
- "%s\n"), lifr->lifr_name);
- return (-1);
- }
- lifr->lifr_flags &= ~IFF_UP;
- ret = ioctl(ifsock, SIOCSLIFFLAGS, (caddr_t)lifr);
- if (ret < 0) {
- perror("ioctl: SIOCSLIFFLAGS");
- return (-1);
- }
- return (0);
+ return (ioctl(fd, SIOCSLIFFLAGS, &lifr) >= 0);
}
-/*
- * Bring up the interface specified by the name lifr->lifr_name.
- *
- * Returns -1 on failure. Returns 0 on success.
- */
-static int
-if_up(int ifsock, struct lifreq *lifr)
+/* PRINTFLIKE1 */
+static void
+die(const char *format, ...)
{
- int ret;
- boolean_t zeroaddr = B_FALSE;
- struct sockaddr_in *addr;
-
- ret = ioctl(ifsock, SIOCGLIFADDR, lifr);
- if (ret < 0) {
- perror("ioctl: SIOCGLIFADDR");
- return (-1);
- }
+ va_list alist;
+ char *errstr = strerror(errno);
- addr = (struct sockaddr_in *)&lifr->lifr_addr;
- switch (addr->sin_family) {
- case AF_INET:
- zeroaddr = (addr->sin_addr.s_addr == INADDR_ANY);
- break;
+ format = gettext(format);
+ (void) fprintf(stderr, gettext("%s: fatal: "), progname);
- case AF_INET6:
- zeroaddr = IN6_IS_ADDR_UNSPECIFIED(
- &((struct sockaddr_in6 *)addr)->sin6_addr);
- break;
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
- default:
- break;
- }
+ if (strchr(format, '\n') == NULL)
+ (void) fprintf(stderr, ": %s\n", errstr);
- ret = ioctl(ifsock, SIOCGLIFFLAGS, lifr);
- if (ret < 0) {
- perror("ioctl: SIOCGLIFFLAGS");
- return (-1);
- }
- /*
- * Don't affect the state of addresses that failed back.
- *
- * XXX Link local addresses that are not marked IFF_NOFAILOVER
- * will not be brought up. Link local addresses never failover.
- * When the interface was offlined, we brought the link local
- * address down. We will not bring it up now if IFF_NOFAILOVER
- * is not marked. We check for IFF_NOFAILOVER below so that
- * we want to maintain the state of all other addresses as it
- * was before offline. Normally link local addresses are marked
- * IFF_NOFAILOVER and hence this is not an issue. These can
- * be fixed in future with RCM and it is beyond the scope
- * of if_mpadm to maintain state and do this correctly.
- */
- if (!(lifr->lifr_flags & IFF_NOFAILOVER))
- return (0);
+ exit(EXIT_FAILURE);
+}
- /*
- * When a data address associated with the physical interface itself
- * is failed over (e.g., qfe0, rather than qfe0:1), the kernel must
- * fill the ipif data structure for qfe0 with a placeholder entry (the
- * "replacement ipif"). Replacement ipif's cannot be brought IFF_UP
- * (nor would it make any sense to do so), so we must be careful to
- * skip them; thankfully they can be easily identified since they
- * all have a zeroed address.
- */
- if (zeroaddr)
- return (0);
-
- /* IFF_OFFLINE was not set to start with. Is it there ? */
- if (lifr->lifr_flags & IFF_OFFLINE) {
- (void) fprintf(stderr,
- gettext("IFF_OFFLINE set wrongly on %s\n"),
- lifr->lifr_name);
- return (-1);
- }
- lifr->lifr_flags |= IFF_UP;
- ret = ioctl(ifsock, SIOCSLIFFLAGS, lifr);
- if (ret < 0) {
- perror("ioctl: SIOCSLIFFLAGS");
- return (-1);
- }
- return (0);
+/* PRINTFLIKE1 */
+static void
+warn(const char *format, ...)
+{
+ va_list alist;
+ char *errstr = strerror(errno);
+
+ format = gettext(format);
+ (void) fprintf(stderr, gettext("%s: warning: "), progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+
+ if (strchr(format, '\n') == NULL)
+ (void) fprintf(stderr, ": %s\n", errstr);
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile
index 69e91758ea..e99f2945a7 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile
@@ -19,10 +19,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#
PROG = ifconfig
ROOTFS_PROG = $(PROG)
@@ -38,7 +37,7 @@ COMMONSRCS= $(CMDINETCOMMONDIR)/$(COMMONOBJS:%.o=%.c)
SRCS= $(LOCALSRCS) $(COMMONSRCS)
CPPFLAGS += -I$(CMDINETCOMMONDIR) -I$(SRC)/common/net/dhcp
-LDLIBS += -ldhcpagent -linetcfg -ldlpi -ldladm
+LDLIBS += -ldhcpagent -ldlpi -linetutil -linetcfg -lipmp -ldladm
LINTFLAGS += -m
ROOTUSRSBINLINKS = $(PROG:%=$(ROOTUSRSBIN)/%)
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h
index c993baeb02..4aa1aa0ed7 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/defs.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -11,13 +11,12 @@
#ifndef _DEFS_H
#define _DEFS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
#include <errno.h>
+#include <limits.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
@@ -54,7 +53,10 @@ extern "C" {
#include <assert.h>
#include <ipmp_mpathd.h>
+#include <ipmp_admin.h>
#include <inetcfg.h>
+#include <libinetutil.h>
+#include <alloca.h>
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c
index f49fca249c..d5517a4700 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -23,6 +23,7 @@
#define TUN_NAME "tun"
#define ATUN_NAME "atun"
#define TUN6TO4_NAME "6to4tun"
+#define IPMPSTUB (void *)-1
typedef struct if_flags {
uint64_t iff_value;
@@ -67,7 +68,20 @@ static if_flags_t if_flags_tbl[] = {
{ IFF_TEMPORARY, "TEMPORARY" },
{ IFF_FIXEDMTU, "FIXEDMTU" },
{ IFF_VIRTUAL, "VIRTUAL" },
- { IFF_DUPLICATE, "DUPLICATE" }
+ { IFF_DUPLICATE, "DUPLICATE" },
+ { IFF_IPMP, "IPMP"}
+};
+
+typedef struct {
+ const char *ia_app;
+ uint64_t ia_flag;
+ uint_t ia_tries;
+} if_appflags_t;
+
+static const if_appflags_t if_appflags_tbl[] = {
+ { "dhcpagent(1M)", IFF_DHCPRUNNING, 1 },
+ { "in.ndpd(1M)", IFF_ADDRCONF, 3 },
+ { NULL, 0, 0 }
};
static struct lifreq lifr;
@@ -75,7 +89,6 @@ static struct lifreq lifr;
static char name[LIFNAMSIZ];
/* foreach interface saved name */
static char origname[LIFNAMSIZ];
-static char savedname[LIFNAMSIZ]; /* For addif */
static int setaddr;
/*
@@ -89,20 +102,7 @@ static int setaddr;
#define NO_ESP_AALG 256
#define NO_ESP_EALG 256
-/*
- * iface_t
- * used by setifether to create a list of interfaces to mark
- * down-up when changing the ethernet address of an interface
- */
-typedef struct iface {
- struct lifreq lifr;
- struct iface *next; /* pointer to the next list element */
-} iface_t;
-
-static iface_t *logifs = NULL; /* list of logical interfaces */
-static iface_t *phyif = NULL; /* physical interface */
-
-int s;
+int s, s4, s6;
int af = AF_INET; /* default address family */
int debug = 0;
int all = 0; /* setifdhcp() needs to know this */
@@ -113,6 +113,7 @@ int v4compat = 0; /* Compatible printing format */
* Function prototypes for command functions.
*/
static int addif(char *arg, int64_t param);
+static int inetipmp(char *arg, int64_t param);
static int inetplumb(char *arg, int64_t param);
static int inetunplumb(char *arg, int64_t param);
static int removeif(char *arg, int64_t param);
@@ -141,7 +142,7 @@ static int modinsert(char *arg, int64_t param);
static int modremove(char *arg, int64_t param);
static int setifgroupname(char *arg, int64_t param);
static int configinfo(char *arg, int64_t param);
-static void print_config_flags(uint64_t flags);
+static void print_config_flags(int af, uint64_t flags);
static void print_flags(uint64_t flags);
static void print_ifether(char *ifname);
static int set_tun_encap_limit(char *arg, int64_t param);
@@ -150,6 +151,7 @@ static int set_tun_hop_limit(char *arg, int64_t param);
static int setzone(char *arg, int64_t param);
static int setallzones(char *arg, int64_t param);
static int setifsrc(char *arg, int64_t param);
+static int lifnum(const char *ifname);
/*
* Address family specific function prototypes.
@@ -179,19 +181,22 @@ static int settaddr(char *, int (*)(icfg_handle_t,
static void status(void);
static void ifstatus(const char *);
static void usage(void);
-static int strioctl(int s, int cmd, char *buf, int buflen);
+static int strioctl(int s, int cmd, void *buf, int buflen);
static int setifdhcp(const char *caller, const char *ifname,
int argc, char *argv[]);
static int ip_domux2fd(int *, int *, int *, int *, int *);
static int ip_plink(int, int, int, int, int);
static int modop(char *arg, char op);
-static void selectifs(int argc, char *argv[], int af,
- struct lifreq *lifrp);
-static int updownifs(iface_t *ifs, int up);
static int find_all_global_interfaces(struct lifconf *lifcp, char **buf,
int64_t lifc_flags);
static int find_all_zone_interfaces(struct lifconf *lifcp, char **buf,
int64_t lifc_flags);
+static int create_ipmp(const char *grname, int af, const char *ifname,
+ boolean_t implicit);
+static int create_ipmp_peer(int af, const char *ifname);
+static void start_ipmp_daemon(void);
+static boolean_t ifaddr_up(ifaddrlistx_t *ifaddrp);
+static boolean_t ifaddr_down(ifaddrlistx_t *ifaddrp);
#define max(a, b) ((a) < (b) ? (b) : (a))
@@ -251,6 +256,7 @@ struct cmd {
{ "index", NEXTARG, setifindex, 0, AF_ANY },
{ "broadcast", NEXTARG, setifbroadaddr, 0, AF_INET },
{ "auto-revarp", 0, setifrevarp, 1, AF_INET },
+ { "ipmp", 0, inetipmp, 1, AF_ANY },
{ "plumb", 0, inetplumb, 1, AF_ANY },
{ "unplumb", 0, inetunplumb, 0, AF_ANY },
{ "subnet", NEXTARG, setifsubnet, 0, AF_ANY },
@@ -297,22 +303,30 @@ struct cmd {
typedef struct if_config_cmd {
uint64_t iff_flag;
+ int iff_af;
char *iff_name;
} if_config_cmd_t;
+/*
+ * NOTE: print_config_flags() processes this table in order, so we put "up"
+ * last so that we can be sure "-failover" will take effect first. Otherwise,
+ * IPMP test addresses will erroneously migrate to the IPMP interface.
+ */
static if_config_cmd_t if_config_cmd_tbl[] = {
- { IFF_UP, "up" },
- { IFF_NOTRAILERS, "-trailers" },
- { IFF_PRIVATE, "private" },
- { IFF_NOXMIT, "-xmit" },
- { IFF_ANYCAST, "anycast" },
- { IFF_NOLOCAL, "-local" },
- { IFF_DEPRECATED, "deprecated" },
- { IFF_NOFAILOVER, "-failover" },
- { IFF_STANDBY, "standby" },
- { IFF_FAILED, "failed" },
- { IFF_PREFERRED, "preferred" },
- { 0, 0 },
+ { IFF_NOTRAILERS, AF_UNSPEC, "-trailers" },
+ { IFF_PRIVATE, AF_UNSPEC, "private" },
+ { IFF_NOXMIT, AF_UNSPEC, "-xmit" },
+ { IFF_ANYCAST, AF_INET6, "anycast" },
+ { IFF_NOLOCAL, AF_UNSPEC, "-local" },
+ { IFF_DEPRECATED, AF_UNSPEC, "deprecated" },
+ { IFF_NOFAILOVER, AF_UNSPEC, "-failover" },
+ { IFF_STANDBY, AF_UNSPEC, "standby" },
+ { IFF_FAILED, AF_UNSPEC, "failed" },
+ { IFF_PREFERRED, AF_UNSPEC, "preferred" },
+ { IFF_NONUD, AF_INET6, "-nud" },
+ { IFF_NOARP, AF_INET, "-arp" },
+ { IFF_UP, AF_UNSPEC, "up" },
+ { 0, 0, NULL },
};
typedef struct ni {
@@ -345,10 +359,11 @@ struct afswtch *afp; /* the address family being set or asked about */
int
main(int argc, char *argv[])
{
- /* Include IFF_NOXMIT, IFF_TEMPORARY and all zone interfaces */
- int64_t lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
+ int64_t lifc_flags;
char *default_ip_str;
+ lifc_flags = LIFC_NOXMIT|LIFC_TEMPORARY|LIFC_ALLZONES|LIFC_UNDER_IPMP;
+
if (argc < 2) {
usage();
exit(1);
@@ -388,9 +403,10 @@ main(int argc, char *argv[])
}
s = socket(SOCKET_AF(af), SOCK_DGRAM, 0);
- if (s < 0) {
+ s4 = socket(AF_INET, SOCK_DGRAM, 0);
+ s6 = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (s == -1 || s4 == -1 || s6 == -1)
Perror0_exit("socket");
- }
/*
* Special interface names is any combination of these flags.
@@ -1441,39 +1457,38 @@ setifdstaddr(char *addr, int64_t param)
static int
setifflags(char *val, int64_t value)
{
- int phyintlen, origphyintlen;
+ struct lifreq lifrl; /* local lifreq struct */
+ boolean_t bringup = _B_FALSE;
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0)
Perror0_exit("setifflags: SIOCGLIFFLAGS");
- if (value == IFF_NOFAILOVER) {
- /*
- * Fail if '-failover' is set after a prior addif created the
- * alias on a different interface. This can happen when the
- * interface is part of an IPMP group.
- */
- phyintlen = strcspn(name, ":");
- origphyintlen = strcspn(origname, ":");
- if (phyintlen != origphyintlen ||
- strncmp(name, origname, phyintlen) != 0) {
- (void) fprintf(stderr, "ifconfig: can't set -failover "
- "on failed/standby/offlined interface %s\n",
- origname);
- exit(1);
- }
- }
-
if (value < 0) {
value = -value;
+
+ if ((value & IFF_NOFAILOVER) && (lifr.lifr_flags & IFF_UP)) {
+ /*
+ * The kernel does not allow administratively up test
+ * addresses to be converted to data addresses. Bring
+ * the address down first, then bring it up after it's
+ * been converted to a data address.
+ */
+ lifr.lifr_flags &= ~IFF_UP;
+ (void) ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr);
+ bringup = _B_TRUE;
+ }
+
lifr.lifr_flags &= ~value;
- if ((value & IFF_UP) && (lifr.lifr_flags & IFF_DUPLICATE)) {
+ if ((value & (IFF_UP | IFF_NOFAILOVER)) &&
+ (lifr.lifr_flags & IFF_DUPLICATE)) {
/*
* If the user is trying to mark an interface with a
- * duplicate address as "down," then fetch the address
- * and set it. This will cause IP to clear the
- * IFF_DUPLICATE flag and stop the automatic recovery
- * timer.
+ * duplicate address as "down," or convert a duplicate
+ * test address to a data address, then fetch the
+ * address and set it. This will cause IP to clear
+ * the IFF_DUPLICATE flag and stop the automatic
+ * recovery timer.
*/
value = lifr.lifr_flags;
if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) >= 0)
@@ -1483,10 +1498,48 @@ setifflags(char *val, int64_t value)
} else {
lifr.lifr_flags |= value;
}
+
+ /*
+ * If we're about to bring up an underlying physical IPv6 interface in
+ * an IPMP group, ensure the IPv6 IPMP interface is also up. This is
+ * for backward compatibility with legacy configurations in which
+ * there are no explicit hostname files for IPMP interfaces. (For
+ * IPv4, this is automatically handled by the kernel when migrating
+ * the underlying interface's data address to the IPMP interface.)
+ */
+ (void) strlcpy(lifrl.lifr_name, name, LIFNAMSIZ);
+
+ if (lifnum(lifr.lifr_name) == 0 &&
+ (lifr.lifr_flags & (IFF_UP|IFF_IPV6)) == (IFF_UP|IFF_IPV6) &&
+ ioctl(s, SIOCGLIFGROUPNAME, &lifrl) == 0 &&
+ lifrl.lifr_groupname[0] != '\0') {
+ lifgroupinfo_t lifgr;
+
+ (void) strlcpy(lifgr.gi_grname, lifrl.lifr_groupname,
+ LIFGRNAMSIZ);
+ if (ioctl(s, SIOCGLIFGROUPINFO, &lifgr) == -1)
+ Perror0_exit("setifflags: SIOCGLIFGROUPINFO");
+
+ (void) strlcpy(lifrl.lifr_name, lifgr.gi_grifname, LIFNAMSIZ);
+ if (ioctl(s, SIOCGLIFFLAGS, &lifrl) == -1)
+ Perror0_exit("setifflags: SIOCGLIFFLAGS");
+ if (!(lifrl.lifr_flags & IFF_UP)) {
+ lifrl.lifr_flags |= IFF_UP;
+ if (ioctl(s, SIOCSLIFFLAGS, &lifrl) == -1)
+ Perror0_exit("setifflags: SIOCSLIFFLAGS");
+ }
+ }
+
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
- if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
+ if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0)
Perror0_exit("setifflags: SIOCSLIFFLAGS");
+
+ if (bringup) {
+ lifr.lifr_flags |= IFF_UP;
+ if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0)
+ Perror0_exit("setifflags: SIOCSLIFFLAGS IFF_UP");
}
+
return (0);
}
@@ -1524,12 +1577,21 @@ setifindex(char *val, int64_t param)
}
/* ARGSUSED */
+static void
+notifycb(dlpi_handle_t dh, dlpi_notifyinfo_t *dnip, void *arg)
+{
+}
+
+/* ARGSUSED */
static int
setifether(char *addr, int64_t param)
{
- uchar_t *ea;
- iface_t *current;
- int maclen;
+ uchar_t *hwaddr;
+ int hwaddrlen;
+ int retval;
+ ifaddrlistx_t *ifaddrp, *ifaddrs = NULL;
+ dlpi_handle_t dh;
+ dlpi_notifyid_t id;
if (addr == NULL) {
ifstatus(name);
@@ -1537,9 +1599,6 @@ setifether(char *addr, int64_t param)
return (0);
}
- phyif = NULL;
- logifs = NULL;
-
/*
* if the IP interface in the arguments is a logical
* interface, exit with an error now.
@@ -1550,79 +1609,68 @@ setifether(char *addr, int64_t param)
exit(1);
}
- ea = _link_aton(addr, &maclen);
- if (ea == NULL) {
- if (maclen == -1)
+ if ((hwaddr = _link_aton(addr, &hwaddrlen)) == NULL) {
+ if (hwaddrlen == -1)
(void) fprintf(stderr,
- "ifconfig: %s: bad address\n", addr);
+ "ifconfig: %s: bad address\n", hwaddr);
else
(void) fprintf(stderr, "ifconfig: malloc() failed\n");
exit(1);
}
- (void) strncpy(savedname, name, sizeof (savedname));
+ if ((retval = dlpi_open(name, &dh, 0)) != DLPI_SUCCESS)
+ Perrdlpi_exit("cannot dlpi_open() link", name, retval);
- /*
- * Call selectifs only for the IP interfaces that are ipv4.
- * offflags == IFF_IPV6 because you should not change the
- * Ethernet address of an ipv6 interface
- */
- foreachinterface(selectifs, 0, (char **)NULL, 0, 0, IFF_IPV6, 0);
+ if ((retval = dlpi_bind(dh, DLPI_ANY_SAP, NULL)) != DLPI_SUCCESS)
+ Perrdlpi_exit("cannot dlpi_bind() link", name, retval);
- /* If physical interface not found, exit now */
- if (phyif == NULL) {
- (void) fprintf(stderr,
- "ifconfig: interface %s not found\n", savedname);
- exit(1);
- }
-
- /* Restore */
- (void) strncpy(name, savedname, sizeof (name));
- (void) strncpy(origname, savedname, sizeof (origname));
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
-
- /*
- * close and reopen the socket
- * we don't know which type of socket we have now
- */
- (void) close(s);
- s = socket(SOCKET_AF(AF_UNSPEC), SOCK_DGRAM, 0);
- if (s < 0) {
- Perror0_exit("socket");
- }
-
- /*
- * mark down the logical interfaces first,
- * and then the physical interface
- */
- if (updownifs(logifs, 0) < 0 || updownifs(phyif, 0) < 0) {
- Perror0_exit("mark down interface failed");
+ retval = dlpi_enabnotify(dh, DL_NOTE_PHYS_ADDR, notifycb, NULL, &id);
+ if (retval == DLPI_SUCCESS) {
+ (void) dlpi_disabnotify(dh, id, NULL);
+ } else {
+ /*
+ * This link does not support DL_NOTE_PHYS_ADDR: bring down
+ * all of the addresses to flush the old hardware address
+ * information out of IP.
+ *
+ * NOTE: Skipping this when DL_NOTE_PHYS_ADDR is supported is
+ * more than an optimization: in.mpathd will set IFF_OFFLINE
+ * if it's notified and the new address is a duplicate of
+ * another in the group -- but the flags manipulation in
+ * ifaddr_{down,up}() cannot be atomic and thus might clobber
+ * IFF_OFFLINE, confusing in.mpathd.
+ */
+ if (ifaddrlistx(name, IFF_UP, 0, &ifaddrs) == -1)
+ Perror2_exit(name, "cannot get address list");
+
+ ifaddrp = ifaddrs;
+ for (; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!ifaddr_down(ifaddrp)) {
+ Perror2_exit(ifaddrp->ia_name,
+ "cannot bring down");
+ }
+ }
}
/*
- * Change the physical address
+ * Change the hardware address.
*/
- if (dlpi_set_address(savedname, ea, maclen) == -1) {
+ retval = dlpi_set_physaddr(dh, DL_CURR_PHYS_ADDR, hwaddr, hwaddrlen);
+ if (retval != DLPI_SUCCESS) {
(void) fprintf(stderr,
- "ifconfig: failed setting mac address on %s\n",
- savedname);
+ "ifconfig: failed setting mac address on %s\n", name);
}
+ dlpi_close(dh);
/*
- * if any interfaces were marked down before changing the
- * ethernet address, put them up again.
- * First the physical interface, then the logical ones.
+ * If any addresses were brought down before changing the hardware
+ * address, bring them up again.
*/
- if (updownifs(phyif, 1) < 0 || updownifs(logifs, 1) < 0) {
- Perror0_exit("mark down interface failed");
- }
-
- /* Free the memory allocated by selectifs */
- free(phyif);
- for (current = logifs; current != NULL; current = logifs) {
- logifs = logifs->next;
- free(current);
+ for (ifaddrp = ifaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!ifaddr_up(ifaddrp))
+ Perror2_exit(ifaddrp->ia_name, "cannot bring up");
}
+ ifaddrlistx_free(ifaddrs);
return (0);
}
@@ -1655,8 +1703,8 @@ print_ifether(char *ifname)
}
(void) close(fd);
- /* Virtual interfaces don't have MAC addresses */
- if (lifr.lifr_flags & IFF_VIRTUAL)
+ /* VNI and IPMP interfaces don't have MAC addresses */
+ if (lifr.lifr_flags & (IFF_VIRTUAL|IFF_IPMP))
return;
/*
@@ -1685,104 +1733,6 @@ print_ifether(char *ifname)
}
/*
- * static void selectifs(int argc, char *argv[], int af, struct lifreq *rp)
- *
- * Called inside setifether() to create a list of interfaces to
- * mark down/up when changing the Ethernet address.
- * If the current interface is the physical interface passed
- * as an argument to ifconfig, update phyif.
- * If the current interface is a logical interface associated
- * to the physical interface, add it to the logifs list.
- */
-/* ARGSUSED */
-static void
-selectifs(int argc, char *argv[], int af, struct lifreq *rp)
-{
- char *colonp;
- int length;
- iface_t *current;
-
- /*
- * savedname= name of the IP interface to which you want to
- * change ethernet address
- * name= name of the current IP interface
- */
- colonp = strchr(name, ':');
- if (colonp == NULL)
- length = max(strlen(savedname), strlen(name));
- else
- length = max(strlen(savedname), colonp - name);
- if (strncmp(savedname, name, length) == 0) {
- (void) strcpy(lifr.lifr_name, name);
- if (ioctl(s, SIOCGLIFFLAGS, &lifr) < 0) {
- Perror0("selectifs: SIOCGLIFFLAGS");
- return;
- }
-
- if ((current = malloc(sizeof (iface_t))) == NULL) {
- Perror0_exit("selectifs: malloc failed\n");
- }
-
- if (colonp == NULL) {
- /* this is the physical interface */
- phyif = current;
- bcopy(&lifr, &phyif->lifr, sizeof (struct lifreq));
- phyif->next = NULL;
- } else {
- /* this is a logical interface */
- bcopy(&lifr, &current->lifr, sizeof (struct lifreq));
- current->next = logifs;
- logifs = current;
- }
- }
-}
-
-/*
- * static int updownifs(iface_t *ifs, int up)
- *
- * It takes in input a list of IP interfaces (ifs)
- * and a flag (up).
- * It marks each interface in the list down (up = 0)
- * or up (up > 0). This is done ONLY if the IP
- * interface was originally up.
- *
- * Return values:
- * 0 = everything OK
- * -1 = problem
- */
-static int
-updownifs(iface_t *ifs, int up)
-{
- iface_t *current;
- int ret = 0;
- int save_errno;
- char savename[LIFNAMSIZ];
- uint64_t orig_flags;
-
- for (current = ifs; current != NULL; current = current->next) {
- if (current->lifr.lifr_flags & IFF_UP) {
- orig_flags = current->lifr.lifr_flags;
- if (!up)
- current->lifr.lifr_flags &= ~IFF_UP;
- if (ioctl(s, SIOCSLIFFLAGS, &current->lifr) < 0) {
- save_errno = errno;
- (void) strcpy(savename,
- current->lifr.lifr_name);
- ret = -1;
- }
- if (!up) /* restore the original flags */
- current->lifr.lifr_flags = orig_flags;
- }
- }
-
- if (ret == -1) {
- (void) strcpy(lifr.lifr_name, savename);
- errno = save_errno;
- }
- return (ret);
-}
-
-/*
* static int find_all_global_interfaces(struct lifconf *lifcp, char **buf,
* int64_t lifc_flags)
*
@@ -2109,130 +2059,217 @@ setiftoken(char *addr, int64_t param)
return (0);
}
-/*
- * Return value: 0 on success, -1 on failure.
- */
-static int
-connect_to_mpathd(int family)
-{
- int s;
- struct sockaddr_storage ss;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
- struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
- int addrlen;
- int ret;
- int on;
-
- s = socket(family, SOCK_STREAM, 0);
- if (s < 0) {
- Perror0_exit("connect_to_mpathd: socket");
- }
- (void) bzero((char *)&ss, sizeof (ss));
- ss.ss_family = family;
- /*
- * Need to bind to a privileged port. For non-root, this
- * will fail. in.mpathd verifies that only commands coming
- * from privileged ports succeed so that ordinary users
- * can't connect and start talking to in.mpathd
- */
- on = 1;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- Perror0_exit("connect_to_mpathd: setsockopt");
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = 0;
- sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addrlen = sizeof (struct sockaddr_in);
- break;
- case AF_INET6:
- sin6->sin6_port = 0;
- sin6->sin6_addr = loopback_addr;
- addrlen = sizeof (struct sockaddr_in6);
- break;
- }
- ret = bind(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- (void) close(s);
- return (-1);
- }
-
- switch (family) {
- case AF_INET:
- sin->sin_port = htons(MPATHD_PORT);
- break;
- case AF_INET6:
- sin6->sin6_port = htons(MPATHD_PORT);
- break;
- }
- ret = connect(s, (struct sockaddr *)&ss, addrlen);
- (void) close(s);
- return (ret);
-}
-
/* ARGSUSED */
static int
-setifgroupname(char *grpname, int64_t param)
+setifgroupname(char *grname, int64_t param)
{
+ lifgroupinfo_t lifgr;
+ struct lifreq lifrl;
+ ifaddrlistx_t *ifaddrp, *nextifaddrp;
+ ifaddrlistx_t *ifaddrs = NULL, *downaddrs = NULL;
+ int af;
+
if (debug) {
(void) printf("Setting groupname %s on interface %s\n",
- grpname, name);
- }
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
- (void) strncpy(lifr.lifr_groupname, grpname,
- sizeof (lifr.lifr_groupname));
- if (ioctl(s, SIOCSLIFGROUPNAME, (caddr_t)&lifr) < 0) {
- Perror0_exit("setifgroupname: SIOCSLIFGROUPNAME");
+ grname, name);
}
- /*
- * If the SUNW_NO_MPATHD environment variable is set then don't
- * bother starting up in.mpathd. See PSARC/2002/249 for the
- * depressing details on this bit of stupidity.
- */
- if (getenv("SUNW_NO_MPATHD") != NULL) {
- return (0);
+ (void) strlcpy(lifrl.lifr_name, name, LIFNAMSIZ);
+ (void) strlcpy(lifrl.lifr_groupname, grname, LIFGRNAMSIZ);
+
+ while (ioctl(s, SIOCSLIFGROUPNAME, &lifrl) == -1) {
+ switch (errno) {
+ case ENOENT:
+ /*
+ * The group doesn't yet exist; create it and repeat.
+ */
+ af = afp->af_af;
+ if (create_ipmp(grname, af, NULL, _B_TRUE) == -1) {
+ if (errno == EEXIST)
+ continue;
+
+ Perror2(grname, "cannot create IPMP group");
+ goto fail;
+ }
+ continue;
+
+ case EALREADY:
+ /*
+ * The interface is already in another group; must
+ * remove existing membership first.
+ */
+ lifrl.lifr_groupname[0] = '\0';
+ if (ioctl(s, SIOCSLIFGROUPNAME, &lifrl) == -1) {
+ Perror2(name, "cannot remove existing "
+ "IPMP group membership");
+ goto fail;
+ }
+ (void) strlcpy(lifrl.lifr_groupname, grname,
+ LIFGRNAMSIZ);
+ continue;
+
+ case EAFNOSUPPORT:
+ /*
+ * The group exists, but it's not configured with the
+ * address families the interface needs. Since only
+ * two address families are currently supported, just
+ * configure the "other" address family. Note that we
+ * may race with group deletion or creation by another
+ * process (ENOENT or EEXIST); in such cases we repeat
+ * our original SIOCSLIFGROUPNAME.
+ */
+ (void) strlcpy(lifgr.gi_grname, grname, LIFGRNAMSIZ);
+ if (ioctl(s, SIOCGLIFGROUPINFO, &lifgr) == -1) {
+ if (errno == ENOENT)
+ continue;
+
+ Perror2(grname, "SIOCGLIFGROUPINFO");
+ goto fail;
+ }
+
+ af = lifgr.gi_v4 ? AF_INET6 : AF_INET;
+ if (create_ipmp(grname, af, lifgr.gi_grifname,
+ _B_TRUE) == -1) {
+ if (errno == EEXIST)
+ continue;
+
+ Perror2(grname, "cannot configure IPMP group");
+ goto fail;
+ }
+ continue;
+
+ case EADDRINUSE:
+ /*
+ * Some addresses are in-use (or under control of DAD).
+ * Bring them down and retry the group join operation.
+ * We will bring them back up after the interface has
+ * been placed in the group.
+ */
+ if (ifaddrlistx(lifrl.lifr_name, IFF_UP|IFF_DUPLICATE,
+ 0, &ifaddrs) == -1) {
+ Perror2(grname, "cannot get address list");
+ goto fail;
+ }
+
+ ifaddrp = ifaddrs;
+ for (; ifaddrp != NULL; ifaddrp = nextifaddrp) {
+ if (!ifaddr_down(ifaddrp)) {
+ ifaddrs = ifaddrp;
+ goto fail;
+ }
+ nextifaddrp = ifaddrp->ia_next;
+ ifaddrp->ia_next = downaddrs;
+ downaddrs = ifaddrp;
+ }
+ ifaddrs = NULL;
+ continue;
+
+ case EADDRNOTAVAIL: {
+ /*
+ * Some data addresses are under application control.
+ * For some of these (e.g., ADDRCONF), the application
+ * should remove the address, in which case we retry a
+ * few times (since the application's action is not
+ * atomic with respect to us) before bailing out and
+ * informing the user.
+ */
+ int ntries, nappaddr = 0;
+ const if_appflags_t *iap = if_appflags_tbl;
+
+ for (; iap->ia_app != NULL; iap++) {
+ ntries = 0;
+again:
+ if (ifaddrlistx(lifrl.lifr_name, iap->ia_flag,
+ IFF_NOFAILOVER, &ifaddrs) == -1) {
+ (void) fprintf(stderr, "ifconfig: %s: "
+ "cannot get data addresses managed "
+ "by %s\n", lifrl.lifr_name,
+ iap->ia_app);
+ goto fail;
+ }
+
+ if (ifaddrs == NULL)
+ continue;
+
+ ifaddrlistx_free(ifaddrs);
+ ifaddrs = NULL;
+
+ if (++ntries < iap->ia_tries) {
+ (void) poll(NULL, 0, 100);
+ goto again;
+ }
+
+ (void) fprintf(stderr, "ifconfig: cannot join "
+ "IPMP group: %s has data addresses managed "
+ "by %s\n", lifrl.lifr_name, iap->ia_app);
+ nappaddr++;
+ }
+ if (nappaddr > 0)
+ goto fail;
+ continue;
+ }
+ default:
+ Perror2(name, "SIOCSLIFGROUPNAME");
+ goto fail;
+ }
}
/*
- * Try to connect to in.mpathd using IPv4. If we succeed,
- * we conclude that in.mpathd is running, and quit.
+ * If there were addresses that we had to bring down, it's time to
+ * bring them up again. As part of bringing them up, the kernel will
+ * automatically move them to the new IPMP interface.
*/
- if (connect_to_mpathd(AF_INET) == 0) {
- /* connect succeeded, mpathd is already running */
- return (0);
+ for (ifaddrp = downaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!ifaddr_up(ifaddrp) && errno != ENXIO) {
+ (void) fprintf(stderr, "ifconfig: cannot bring back up "
+ "%s: %s\n", ifaddrp->ia_name, strerror(errno));
+ }
}
+ ifaddrlistx_free(downaddrs);
+ return (0);
+fail:
/*
- * Try to connect to in.mpathd using IPv6. If we succeed,
- * we conclude that in.mpathd is running, and quit.
+ * Attempt to bring back up any interfaces that we downed.
*/
- if (connect_to_mpathd(AF_INET6) == 0) {
- /* connect succeeded, mpathd is already running */
- return (0);
+ for (ifaddrp = downaddrs; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (!ifaddr_up(ifaddrp) && errno != ENXIO) {
+ (void) fprintf(stderr, "ifconfig: cannot bring back up "
+ "%s: %s\n", ifaddrp->ia_name, strerror(errno));
+ }
}
+ ifaddrlistx_free(downaddrs);
+ ifaddrlistx_free(ifaddrs);
/*
- * in.mpathd may not be running. Start it now. If it is already
- * running, in.mpathd will take care of handling multiple incarnations
- * of itself. ifconfig only tries to optimize performance by not
- * starting another incarnation of in.mpathd.
+ * We'd return -1, but foreachinterface() doesn't propagate the error
+ * into the exit status, so we're forced to explicitly exit().
*/
- switch (fork()) {
+ exit(1);
+ /* NOTREACHED */
+}
- case -1:
- Perror0_exit("setifgroupname: fork");
- /* NOTREACHED */
- case 0:
- (void) execl(MPATHD_PATH, MPATHD_PATH, NULL);
- _exit(1);
- /* NOTREACHED */
- default:
- return (0);
+static boolean_t
+modcheck(const char *ifname)
+{
+ (void) strlcpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
+
+ if (ioctl(s, SIOCGLIFFLAGS, &lifr) < 0) {
+ Perror0("SIOCGLIFFLAGS");
+ return (_B_FALSE);
}
-}
+ if (lifr.lifr_flags & IFF_IPMP) {
+ (void) fprintf(stderr, "ifconfig: %s: module operations not"
+ " supported on IPMP interfaces\n", ifname);
+ return (_B_FALSE);
+ }
+ if (lifr.lifr_flags & IFF_VIRTUAL) {
+ (void) fprintf(stderr, "ifconfig: %s: module operations not"
+ " supported on virtual IP interfaces\n", ifname);
+ return (_B_FALSE);
+ }
+ return (_B_TRUE);
+}
/*
* To list all the modules above a given network interface.
@@ -2250,7 +2287,13 @@ modlist(char *null, int64_t param)
struct str_list strlist;
int orig_arpid;
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+ /*
+ * We'd return -1, but foreachinterface() doesn't propagate the error
+ * into the exit status, so we're forced to explicitly exit().
+ */
+ if (!modcheck(name))
+ exit(1);
+
if (ip_domux2fd(&muxfd, &muxid_fd, &ipfd_lowstr, &arpfd_lowstr,
&orig_arpid) < 0) {
return (-1);
@@ -2354,8 +2397,8 @@ open_arp_on_udp(char *udp_dev_name)
* Return:
* -1 if operation fails, 0 otherwise.
*
- * Please see the big block comment above plumb_one_device()
- * for the logic of the PLINK/PUNLINK
+ * Please see the big block comment above ifplumb() for the logic of the
+ * PLINK/PUNLINK
*/
static int
ip_domux2fd(int *muxfd, int *muxid_fd, int *ipfd_lowstr, int *arpfd_lowstr,
@@ -2467,8 +2510,8 @@ ip_domux2fd(int *muxfd, int *muxid_fd, int *ipfd_lowstr, int *arpfd_lowstr,
* Return:
* -1 if operation fails, 0 otherwise.
*
- * Please see the big block comment above plumb_one_device()
- * for the logic of the PLINK/PUNLINK
+ * Please see the big block comment above ifplumb() for the logic of the
+ * PLINK/PUNLINK
*/
static int
ip_plink(int muxfd, int muxid_fd, int ipfd_lowstr, int arpfd_lowstr,
@@ -2530,7 +2573,12 @@ modop(char *arg, char op)
char *arg_str;
int orig_arpid;
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+ /*
+ * We'd return -1, but foreachinterface() doesn't propagate the error
+ * into the exit status, so we're forced to explicitly exit().
+ */
+ if (!modcheck(name))
+ exit(1);
/* Need to save the original string for -a option. */
if ((arg_str = malloc(strlen(arg) + 1)) == NULL) {
@@ -3067,13 +3115,14 @@ status(void)
static int
configinfo(char *null, int64_t param)
{
+ char *cp;
struct afswtch *p = afp;
uint64_t flags;
- char phydevname[LIFNAMSIZ];
+ char lifname[LIFNAMSIZ];
char if_usesrc_name[LIFNAMSIZ];
- char *cp;
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+
if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
Perror0_exit("status: SIOCGLIFFLAGS");
}
@@ -3084,13 +3133,13 @@ configinfo(char *null, int64_t param)
name, flags, p != NULL ? p->af_af : -1);
}
- /* remove LIF component */
- (void) strncpy(phydevname, name, sizeof (phydevname));
- cp = strchr(phydevname, ':');
- if (cp) {
- *cp = 0;
- }
- phydevname[sizeof (phydevname) - 1] = '\0';
+ /*
+ * Build the interface name to print (we can't directly use `name'
+ * because one cannot "plumb" ":0" interfaces).
+ */
+ (void) strlcpy(lifname, name, LIFNAMSIZ);
+ if ((cp = strchr(lifname, ':')) != NULL && atoi(cp + 1) == 0)
+ *cp = '\0';
/*
* if the interface is IPv4
@@ -3105,7 +3154,7 @@ configinfo(char *null, int64_t param)
if (v4compat)
flags &= ~IFF_IPV4;
- (void) printf("%s inet plumb", phydevname);
+ (void) printf("%s inet plumb", lifname);
} else if (flags & IFF_IPV6) {
/*
* else if the interface is IPv6
@@ -3117,7 +3166,7 @@ configinfo(char *null, int64_t param)
if (v4compat)
return (-1);
- (void) printf("%s inet6 plumb", phydevname);
+ (void) printf("%s inet6 plumb", lifname);
}
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
@@ -3131,8 +3180,8 @@ configinfo(char *null, int64_t param)
ioctl(s, SIOCGLIFMTU, (caddr_t)&lifr) >= 0)
(void) printf(" mtu %d", lifr.lifr_metric);
- /* don't print index when in compatibility mode */
- if (!v4compat) {
+ /* Index only applies to the zeroth interface */
+ if (lifnum(name) == 0) {
if (ioctl(s, SIOCGLIFINDEX, (caddr_t)&lifr) >= 0)
(void) printf(" index %d", lifr.lifr_index);
}
@@ -3162,7 +3211,6 @@ configinfo(char *null, int64_t param)
}
(void) printf("\n");
-
return (0);
}
@@ -3398,15 +3446,11 @@ in_status(int force, uint64_t flags)
inet_ntoa(sin->sin_addr));
}
}
- /* If there is a groupname, print it for lun 0 alone */
+ /* If there is a groupname, print it for only the physical interface */
if (strchr(name, ':') == NULL) {
- (void) memset(lifr.lifr_groupname, 0,
- sizeof (lifr.lifr_groupname));
- if (ioctl(s, SIOCGLIFGROUPNAME, (caddr_t)&lifr) >= 0) {
- if (strlen(lifr.lifr_groupname) > 0) {
- (void) printf("\n\tgroupname %s",
- lifr.lifr_groupname);
- }
+ if (ioctl(s, SIOCGLIFGROUPNAME, &lifr) >= 0 &&
+ lifr.lifr_groupname[0] != '\0') {
+ (void) printf("\n\tgroupname %s", lifr.lifr_groupname);
}
}
(void) putchar('\n');
@@ -3550,11 +3594,7 @@ in_configinfo(int force, uint64_t flags)
Perror0_exit("in_configinfo: SIOCGLIFADDR");
}
sin = (struct sockaddr_in *)&lifr.lifr_addr;
- if (strchr(name, ':') != NULL) {
- (void) printf(" addif %s ", inet_ntoa(sin->sin_addr));
- } else {
- (void) printf(" set %s ", inet_ntoa(sin->sin_addr));
- }
+ (void) printf(" set %s ", inet_ntoa(sin->sin_addr));
laddr = sin;
}
@@ -3614,8 +3654,8 @@ in_configinfo(int force, uint64_t flags)
}
}
- /* If there is a groupname, print it for only the physical interface */
- if (strchr(name, ':') == NULL) {
+ /* If there is a groupname, print it for only the zeroth interface */
+ if (lifnum(name) == 0) {
if (ioctl(s, SIOCGLIFGROUPNAME, &lifr) >= 0 &&
lifr.lifr_groupname[0] != '\0') {
(void) printf(" group %s ", lifr.lifr_groupname);
@@ -3623,12 +3663,7 @@ in_configinfo(int force, uint64_t flags)
}
/* Print flags to configure */
- print_config_flags(flags);
-
- /* IFF_NOARP applies to AF_INET only */
- if (flags & IFF_NOARP) {
- (void) printf("-arp ");
- }
+ print_config_flags(AF_INET, flags);
}
static void
@@ -3657,17 +3692,9 @@ in6_configinfo(int force, uint64_t flags)
Perror0_exit("in6_configinfo: SIOCGLIFADDR");
}
sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
- if (strchr(name, ':') != NULL) {
- (void) printf(" addif %s/%d ",
- inet_ntop(AF_INET6, (void *)&sin6->sin6_addr,
- abuf, sizeof (abuf)),
- lifr.lifr_addrlen);
- } else {
- (void) printf(" set %s/%d ",
- inet_ntop(AF_INET6, (void *)&sin6->sin6_addr,
- abuf, sizeof (abuf)),
- lifr.lifr_addrlen);
- }
+ (void) printf(" set %s/%d ",
+ inet_ntop(AF_INET6, &sin6->sin6_addr, abuf, sizeof (abuf)),
+ lifr.lifr_addrlen);
laddr6 = sin6;
}
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
@@ -3720,8 +3747,8 @@ in6_configinfo(int force, uint64_t flags)
lifr.lifr_addrlen);
}
- /* If there is a groupname, print it for only the physical interface */
- if (strchr(name, ':') == NULL) {
+ /* If there is a groupname, print it for only the zeroth interface */
+ if (lifnum(name) == 0) {
if (ioctl(s, SIOCGLIFGROUPNAME, &lifr) >= 0 &&
lifr.lifr_groupname[0] != '\0') {
(void) printf(" group %s ", lifr.lifr_groupname);
@@ -3729,12 +3756,7 @@ in6_configinfo(int force, uint64_t flags)
}
/* Print flags to configure */
- print_config_flags(flags);
-
- /* IFF_NONUD applies to AF_INET6 only */
- if (flags & IFF_NONUD) {
- (void) printf("-nud ");
- }
+ print_config_flags(AF_INET6, flags);
}
/*
@@ -3768,31 +3790,41 @@ in6_configinfo(int force, uint64_t flags)
* compatibility for other utilities like atmifconfig etc. In this case
* the utility must use SIOCSLIFMUXID.
*/
-static void
-plumb_one_device(int af)
+static int
+ifplumb(const char *linkname, const char *ifname, boolean_t genppa, int af)
{
int arp_muxid = -1, ip_muxid;
int mux_fd, ip_fd, arp_fd;
int retval;
- uint_t ppa;
char *udp_dev_name;
- char provider[DLPI_LINKNAME_MAX];
+ uint64_t flags;
+ uint_t dlpi_flags;
dlpi_handle_t dh_arp, dh_ip;
/*
- * We use DLPI_NOATTACH because the ip module will do the attach
- * itself for DLPI style-2 devices.
+ * Always dlpi_open() with DLPI_NOATTACH because the IP and ARP module
+ * will do the attach themselves for DLPI style-2 links.
*/
- retval = dlpi_open(name, &dh_ip, DLPI_NOATTACH);
- if (retval != DLPI_SUCCESS)
- Perrdlpi_exit("cannot open link", name, retval);
+ dlpi_flags = DLPI_NOATTACH;
- if ((retval = dlpi_parselink(name, provider, &ppa)) != DLPI_SUCCESS)
- Perrdlpi_exit("dlpi_parselink", name, retval);
+ /*
+ * If `linkname' is the special token IPMPSTUB, then this is a request
+ * to create an IPMP interface atop /dev/ipmpstub0. (We can't simply
+ * pass "ipmpstub0" as `linkname' since an admin *could* have a normal
+ * vanity-named link named "ipmpstub0" that they'd like to plumb.)
+ */
+ if (linkname == IPMPSTUB) {
+ linkname = "ipmpstub0";
+ dlpi_flags |= DLPI_DEVONLY;
+ }
+
+ retval = dlpi_open(linkname, &dh_ip, dlpi_flags);
+ if (retval != DLPI_SUCCESS)
+ Perrdlpi_exit("cannot open link", linkname, retval);
if (debug) {
- (void) printf("ifconfig: plumb_one_device: provider %s,"
- " ppa %u\n", provider, ppa);
+ (void) printf("ifconfig: ifplumb: link %s, ifname %s, "
+ "genppa %u\n", linkname, ifname, genppa);
}
ip_fd = dlpi_fd(dh_ip);
@@ -3812,29 +3844,106 @@ plumb_one_device(int af)
Perror2_exit("I_PUSH", ARP_MOD_NAME);
/*
- * Set IFF_IPV4/IFF_IPV6 flags.
- * At this point in time the kernel also allows an
- * override of the CANTCHANGE flags.
+ * Prepare to set IFF_IPV4/IFF_IPV6 flags as part of SIOCSLIFNAME.
+ * (At this point in time the kernel also allows an override of the
+ * IFF_CANTCHANGE flags.)
*/
lifr.lifr_name[0] = '\0';
if (ioctl(ip_fd, SIOCGLIFFLAGS, (char *)&lifr) == -1)
- Perror0_exit("plumb_one_device: SIOCGLIFFLAGS");
+ Perror0_exit("ifplumb: SIOCGLIFFLAGS");
- /* Set the name string and the IFF_IPV* flag */
if (af == AF_INET6) {
- lifr.lifr_flags |= IFF_IPV6;
- lifr.lifr_flags &= ~(IFF_BROADCAST | IFF_IPV4);
+ flags = lifr.lifr_flags | IFF_IPV6;
+ flags &= ~(IFF_BROADCAST | IFF_IPV4);
} else {
- lifr.lifr_flags |= IFF_IPV4;
- lifr.lifr_flags &= ~IFF_IPV6;
+ flags = lifr.lifr_flags | IFF_IPV4;
+ flags &= ~IFF_IPV6;
}
- /* record the device and module names as interface name */
- lifr.lifr_ppa = ppa;
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+ /*
+ * Set the interface name. If we've been asked to generate the PPA,
+ * then find the lowest available PPA (only currently used for IPMP
+ * interfaces). Otherwise, use the interface name as-is.
+ */
+ if (genppa) {
+ int ppa;
+
+ /*
+ * We'd like to just set lifr_ppa to UINT_MAX and have the
+ * kernel pick a PPA. Unfortunately, that would mishandle
+ * two cases:
+ *
+ * 1. If the PPA is available but the groupname is taken
+ * (e.g., the "ipmp2" IP interface name is available
+ * but the "ipmp2" groupname is taken) then the
+ * auto-assignment by the kernel will fail.
+ *
+ * 2. If we're creating (e.g.) an IPv6-only IPMP
+ * interface, and there's already an IPv4-only IPMP
+ * interface, the kernel will allow us to accidentally
+ * reuse the IPv6 IPMP interface name (since
+ * SIOCSLIFNAME uniqueness is per-interface-type).
+ * This will cause administrative confusion.
+ *
+ * Thus, we instead take a brute-force approach of checking
+ * whether the IPv4 or IPv6 name is already in-use before
+ * attempting the SIOCSLIFNAME. As per (1) above, the
+ * SIOCSLIFNAME may still fail, in which case we just proceed
+ * to the next one. If this approach becomes too slow, we
+ * can add a new SIOC* to handle this case in the kernel.
+ */
+ for (ppa = 0; ppa < UINT_MAX; ppa++) {
+ (void) snprintf(lifr.lifr_name, LIFNAMSIZ, "%s%d",
+ ifname, ppa);
+
+ if (ioctl(s4, SIOCGLIFFLAGS, &lifr) != -1 ||
+ errno != ENXIO)
+ continue;
+
+ if (ioctl(s6, SIOCGLIFFLAGS, &lifr) != -1 ||
+ errno != ENXIO)
+ continue;
+
+ lifr.lifr_ppa = ppa;
+ lifr.lifr_flags = flags;
+ retval = ioctl(ip_fd, SIOCSLIFNAME, &lifr);
+ if (retval != -1 || errno != EEXIST)
+ break;
+ }
+ } else {
+ ifspec_t ifsp;
+
+ /*
+ * The interface name could have come from the command-line;
+ * check it.
+ */
+ if (!ifparse_ifspec(ifname, &ifsp) || ifsp.ifsp_lunvalid)
+ Perror2_exit("invalid IP interface name", ifname);
+
+ /*
+ * Before we call SIOCSLIFNAME, ensure that the IPMP group
+ * interface for this address family exists. Otherwise, the
+ * kernel will kick the interface out of the group when we do
+ * the SIOCSLIFNAME.
+ *
+ * Example: suppose bge0 is plumbed for IPv4 and in group "a".
+ * If we're now plumbing bge0 for IPv6, but the IPMP group
+ * interface for "a" is not plumbed for IPv6, the SIOCSLIFNAME
+ * will kick bge0 out of group "a", which is undesired.
+ */
+ if (create_ipmp_peer(af, ifname) == -1) {
+ (void) fprintf(stderr, "ifconfig: warning: cannot "
+ "create %s IPMP group; %s will be removed from "
+ "group\n", af == AF_INET ? "IPv4" : "IPv6", ifname);
+ }
- /* set the interface name */
- if (ioctl(ip_fd, SIOCSLIFNAME, (char *)&lifr) == -1) {
+ lifr.lifr_ppa = ifsp.ifsp_ppa;
+ lifr.lifr_flags = flags;
+ (void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
+ retval = ioctl(ip_fd, SIOCSLIFNAME, &lifr);
+ }
+
+ if (retval == -1) {
if (errno != EEXIST)
Perror0_exit("SIOCSLIFNAME for ip");
/*
@@ -3847,15 +3956,15 @@ plumb_one_device(int af)
* called for EEXIST.
*/
Perror0("SIOCSLIFNAME for ip");
- return;
+ return (-1);
}
/* Get the full set of existing flags for this stream */
if (ioctl(ip_fd, SIOCGLIFFLAGS, (char *)&lifr) == -1)
- Perror0_exit("plumb_one_device: SIOCFLIFFLAGS");
+ Perror0_exit("ifplumb: SIOCGLIFFLAGS");
if (debug) {
- (void) printf("ifconfig: plumb_one_device: %s got flags:\n",
+ (void) printf("ifconfig: ifplumb: %s got flags:\n",
lifr.lifr_name);
print_flags(lifr.lifr_flags);
(void) putchar('\n');
@@ -3890,7 +3999,7 @@ plumb_one_device(int af)
if ((ip_muxid = ioctl(mux_fd, I_PLINK, ip_fd)) == -1)
Perror0_exit("I_PLINK for ip");
(void) close(mux_fd);
- return;
+ return (lifr.lifr_ppa);
}
/*
@@ -3901,15 +4010,11 @@ plumb_one_device(int af)
* only on the interface stream, not on the ARP stream.
*/
if (debug)
- (void) printf("ifconfig: plumb_one_device: ifname: %s\n", name);
+ (void) printf("ifconfig: ifplumb: interface %s", ifname);
- /*
- * We use DLPI_NOATTACH because the arp module will do the attach
- * itself for DLPI style-2 devices.
- */
- retval = dlpi_open(name, &dh_arp, DLPI_NOATTACH);
+ retval = dlpi_open(linkname, &dh_arp, dlpi_flags);
if (retval != DLPI_SUCCESS)
- Perrdlpi_exit("cannot open link", name, retval);
+ Perrdlpi_exit("cannot open link", linkname, retval);
arp_fd = dlpi_fd(dh_arp);
if (ioctl(arp_fd, I_PUSH, ARP_MOD_NAME) == -1)
@@ -3919,16 +4024,13 @@ plumb_one_device(int af)
* Tell ARP the name and unit number for this interface.
* Note that arp has no support for transparent ioctls.
*/
- if (strioctl(arp_fd, SIOCSLIFNAME, (char *)&lifr,
- sizeof (lifr)) == -1) {
+ if (strioctl(arp_fd, SIOCSLIFNAME, &lifr, sizeof (lifr)) == -1) {
if (errno != EEXIST)
Perror0_exit("SIOCSLIFNAME for arp");
Perror0("SIOCSLIFNAME for arp");
- dlpi_close(dh_arp);
- dlpi_close(dh_ip);
- (void) close(mux_fd);
- return;
+ goto out;
}
+
/*
* PLINK the IP and ARP streams so that ifconfig can exit
* without tearing down the stream.
@@ -3942,12 +4044,13 @@ plumb_one_device(int af)
if (debug)
(void) printf("arp muxid = %d\n", arp_muxid);
+out:
dlpi_close(dh_ip);
dlpi_close(dh_arp);
(void) close(mux_fd);
+ return (lifr.lifr_ppa);
}
-
/*
* If this is a physical interface then remove it.
* If it is a logical interface name use SIOCLIFREMOVEIF to
@@ -3965,6 +4068,7 @@ inetunplumb(char *arg, int64_t param)
uint64_t flags;
boolean_t changed_arp_muxid = _B_FALSE;
int save_errno;
+ boolean_t v6 = (afp->af_af == AF_INET6);
strptr = strchr(name, ':');
if (strptr != NULL || strcmp(name, LOOPBACK_IF) == 0) {
@@ -3986,7 +4090,7 @@ inetunplumb(char *arg, int64_t param)
* We used /dev/udp or udp6 to set up the mux. So we have to use
* the same now for PUNLINK also.
*/
- if (afp->af_af == AF_INET6)
+ if (v6)
udp_dev_name = UDP6_DEV_NAME;
else
udp_dev_name = UDP_DEV_NAME;
@@ -4002,6 +4106,50 @@ inetunplumb(char *arg, int64_t param)
Perror0_exit("unplumb: SIOCGLIFFLAGS");
}
flags = lifr.lifr_flags;
+
+ if (flags & IFF_IPMP) {
+ lifgroupinfo_t lifgr;
+ ifaddrlistx_t *ifaddrs, *ifaddrp;
+
+ /*
+ * The kernel will fail the I_PUNLINK if the group still has
+ * members, but check now to provide a better error message.
+ */
+ if (ioctl(s, SIOCGLIFGROUPNAME, &lifr) == -1)
+ Perror0_exit("unplumb: SIOCGLIFGROUPNAME");
+
+ (void) strlcpy(lifgr.gi_grname, lifr.lifr_groupname,
+ LIFGRNAMSIZ);
+ if (ioctl(s, SIOCGLIFGROUPINFO, &lifgr) == -1)
+ Perror0_exit("unplumb: SIOCGLIFGROUPINFO");
+
+ if ((v6 && lifgr.gi_nv6 != 0) || (!v6 && lifgr.gi_nv4 != 0)) {
+ (void) fprintf(stderr, "ifconfig: %s: cannot unplumb:"
+ " IPMP group is not empty\n", name);
+ exit(1);
+ }
+
+ /*
+ * The kernel will fail the I_PUNLINK if the IPMP interface
+ * has administratively up addresses; bring 'em down.
+ */
+ if (ifaddrlistx(name, IFF_UP|IFF_DUPLICATE, 0, &ifaddrs) == -1)
+ Perror2_exit(name, "cannot get address list");
+
+ ifaddrp = ifaddrs;
+ for (; ifaddrp != NULL; ifaddrp = ifaddrp->ia_next) {
+ if (((ifaddrp->ia_flags & IFF_IPV6) && !v6) ||
+ (!(ifaddrp->ia_flags & IFF_IPV6) && v6))
+ continue;
+
+ if (!ifaddr_down(ifaddrp)) {
+ Perror2_exit(ifaddrp->ia_name,
+ "cannot bring down");
+ }
+ }
+ ifaddrlistx_free(ifaddrs);
+ }
+
if (ioctl(muxid_fd, SIOCGLIFMUXID, (caddr_t)&lifr) < 0) {
Perror0_exit("unplumb: SIOCGLIFMUXID");
}
@@ -4098,12 +4246,6 @@ inetplumb(char *arg, int64_t param)
Perror2_exit("plumb: SIOCLIFADDIF", name);
}
}
- /*
- * IP can create the new logical interface on a different
- * physical interface in the same IPMP group. Take the new
- * interface into account for further operations.
- */
- (void) strncpy(name, lifr.lifr_name, sizeof (name));
return (0);
}
@@ -4131,10 +4273,229 @@ inetplumb(char *arg, int64_t param)
if (debug)
(void) printf("inetplumb: %s af %d\n", name, afp->af_af);
- plumb_one_device(afp->af_af);
+ (void) ifplumb(name, name, _B_FALSE, afp->af_af);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+inetipmp(char *arg, int64_t param)
+{
+ int retval;
+
+ /*
+ * Treat e.g. "ifconfig ipmp0:2 ipmp" as "ifconfig ipmp0:2 plumb".
+ * Otherwise, try to create the requested IPMP interface.
+ */
+ if (strchr(name, ':') != NULL)
+ retval = inetplumb(arg, param);
+ else
+ retval = create_ipmp(name, afp->af_af, name, _B_FALSE);
+
+ /*
+ * We'd return -1, but foreachinterface() doesn't propagate the error
+ * into the exit status, so we're forced to explicitly exit().
+ */
+ if (retval == -1)
+ exit(1);
return (0);
}
+/*
+ * Create an IPMP group `grname' with address family `af'. If `ifname' is
+ * non-NULL, it specifies the interface name to use. Otherwise, use the name
+ * ipmpN, where N corresponds to the lowest available integer. If `implicit'
+ * is set, then the group is being created as a side-effect of placing an
+ * underlying interface in a group. Also start in.mpathd if necessary.
+ */
+static int
+create_ipmp(const char *grname, int af, const char *ifname, boolean_t implicit)
+{
+ int ppa;
+ static int ipmp_daemon_started;
+
+ if (debug) {
+ (void) printf("create_ipmp: ifname %s grname %s af %d\n",
+ ifname != NULL ? ifname : "NULL", grname, af);
+ }
+
+ if (ifname != NULL)
+ ppa = ifplumb(IPMPSTUB, ifname, _B_FALSE, af);
+ else
+ ppa = ifplumb(IPMPSTUB, "ipmp", _B_TRUE, af);
+
+ if (ppa == -1) {
+ Perror2(grname, "cannot create IPMP interface");
+ return (-1);
+ }
+
+ if (ifname != NULL)
+ (void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
+ else
+ (void) snprintf(lifr.lifr_name, LIFNAMSIZ, "ipmp%d", ppa);
+
+ /*
+ * To preserve backward-compatibility, always bring up the link-local
+ * address for implicitly-created IPv6 IPMP interfaces.
+ */
+ if (implicit && af == AF_INET6) {
+ if (ioctl(s6, SIOCGLIFFLAGS, &lifr) == 0) {
+ lifr.lifr_flags |= IFF_UP;
+ (void) ioctl(s6, SIOCSLIFFLAGS, &lifr);
+ }
+ }
+
+ /*
+ * If the caller requested a different group name, issue a
+ * SIOCSLIFGROUPNAME on the new IPMP interface.
+ */
+ if (strcmp(lifr.lifr_name, grname) != 0) {
+ (void) strlcpy(lifr.lifr_groupname, grname, LIFGRNAMSIZ);
+ if (ioctl(s, SIOCSLIFGROUPNAME, &lifr) == -1) {
+ Perror0("SIOCSLIFGROUPNAME");
+ return (-1);
+ }
+ }
+
+ /*
+ * If we haven't done so yet, ensure in.mpathd is started.
+ */
+ if (ipmp_daemon_started++ == 0)
+ start_ipmp_daemon();
+
+ return (0);
+}
+
+/*
+ * Check if `ifname' is plumbed and in an IPMP group on its "other" address
+ * family. If so, create a matching IPMP group for address family `af'.
+ */
+static int
+create_ipmp_peer(int af, const char *ifname)
+{
+ int fd;
+ lifgroupinfo_t lifgr;
+
+ assert(af == AF_INET || af == AF_INET6);
+
+ /*
+ * Get the socket for the "other" address family.
+ */
+ fd = (af == AF_INET) ? s6 : s4;
+
+ (void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPNAME, &lifr) != 0)
+ return (0);
+
+ (void) strlcpy(lifgr.gi_grname, lifr.lifr_groupname, LIFGRNAMSIZ);
+ if (ioctl(fd, SIOCGLIFGROUPINFO, &lifgr) != 0)
+ return (0);
+
+ /*
+ * If `ifname' *is* the IPMP group interface, or if the relevant
+ * address family is already configured, then there's nothing to do.
+ */
+ if (strcmp(lifgr.gi_grifname, ifname) == 0 ||
+ (af == AF_INET && lifgr.gi_v4) || (af == AF_INET6 && lifgr.gi_v6))
+ return (0);
+
+ return (create_ipmp(lifgr.gi_grname, af, lifgr.gi_grifname, _B_TRUE));
+}
+
+/*
+ * Start in.mpathd if it's not already running.
+ */
+static void
+start_ipmp_daemon(void)
+{
+ int retval;
+ ipmp_handle_t ipmp_handle;
+
+ /*
+ * Ping in.mpathd to see if it's running already.
+ */
+ if ((retval = ipmp_open(&ipmp_handle)) != IPMP_SUCCESS) {
+ (void) fprintf(stderr, "ifconfig: cannot create IPMP handle: "
+ "%s\n", ipmp_errmsg(retval));
+ return;
+ }
+
+ retval = ipmp_ping_daemon(ipmp_handle);
+ ipmp_close(ipmp_handle);
+
+ switch (retval) {
+ case IPMP_ENOMPATHD:
+ break;
+ case IPMP_SUCCESS:
+ return;
+ default:
+ (void) fprintf(stderr, "ifconfig: cannot ping in.mpathd: %s\n",
+ ipmp_errmsg(retval));
+ break;
+ }
+
+ /*
+ * Start in.mpathd. Note that in.mpathd will handle multiple
+ * incarnations (ipmp_ping_daemon() is just an optimization) so we
+ * don't need to worry about racing with another ifconfig process.
+ */
+ switch (fork()) {
+ case -1:
+ Perror0_exit("start_ipmp_daemon: fork");
+ /* NOTREACHED */
+ case 0:
+ (void) execl(MPATHD_PATH, MPATHD_PATH, NULL);
+ _exit(1);
+ /* NOTREACHED */
+ default:
+ break;
+ }
+}
+
+/*
+ * Bring the address named by `ifaddrp' up or down. Doesn't trust any mutable
+ * values in ia_flags since they may be stale.
+ */
+static boolean_t
+ifaddr_op(ifaddrlistx_t *ifaddrp, boolean_t up)
+{
+ struct lifreq lifrl; /* Local lifreq struct */
+ int fd = (ifaddrp->ia_flags & IFF_IPV4) ? s4 : s6;
+
+ (void) memset(&lifrl, 0, sizeof (lifrl));
+ (void) strlcpy(lifrl.lifr_name, ifaddrp->ia_name, LIFNAMSIZ);
+ if (ioctl(fd, SIOCGLIFFLAGS, &lifrl) == -1)
+ return (_B_FALSE);
+
+ if (up) {
+ lifrl.lifr_flags |= IFF_UP;
+ } else {
+ /*
+ * If we've been asked to bring down an IFF_DUPLICATE address,
+ * then get the address and set it. This will cause IP to
+ * clear IFF_DUPLICATE and stop the automatic recovery timer.
+ */
+ if (lifrl.lifr_flags & IFF_DUPLICATE) {
+ return (ioctl(fd, SIOCGLIFADDR, &lifrl) != -1 &&
+ ioctl(fd, SIOCSLIFADDR, &lifrl) != -1);
+ }
+ lifrl.lifr_flags &= ~IFF_UP;
+ }
+ return (ioctl(fd, SIOCSLIFFLAGS, &lifrl) == 0);
+}
+
+static boolean_t
+ifaddr_up(ifaddrlistx_t *ifaddrp)
+{
+ return (ifaddr_op(ifaddrp, _B_TRUE));
+}
+
+static boolean_t
+ifaddr_down(ifaddrlistx_t *ifaddrp)
+{
+ return (ifaddr_op(ifaddrp, _B_FALSE));
+}
+
void
Perror0(const char *cmd)
{
@@ -4404,14 +4765,14 @@ print_flags(uint64_t flags)
}
static void
-print_config_flags(uint64_t flags)
+print_config_flags(int af, uint64_t flags)
{
- int cnt, i;
+ if_config_cmd_t *cmdp;
- cnt = sizeof (if_config_cmd_tbl) / sizeof (if_config_cmd_t);
- for (i = 0; i < cnt; i++) {
- if (flags & if_config_cmd_tbl[i].iff_flag) {
- (void) printf("%s ", if_config_cmd_tbl[i].iff_name);
+ for (cmdp = if_config_cmd_tbl; cmdp->iff_flag != 0; cmdp++) {
+ if ((flags & cmdp->iff_flag) &&
+ (cmdp->iff_af == AF_UNSPEC || cmdp->iff_af == af)) {
+ (void) printf("%s ", cmdp->iff_name);
}
}
}
@@ -4454,7 +4815,18 @@ in_getmask(struct sockaddr_in *saddr, boolean_t addr_set)
}
static int
-strioctl(int s, int cmd, char *buf, int buflen)
+lifnum(const char *ifname)
+{
+ const char *cp;
+
+ if ((cp = strchr(ifname, ':')) == NULL)
+ return (0);
+ else
+ return (atoi(cp + 1));
+}
+
+static int
+strioctl(int s, int cmd, void *buf, int buflen)
{
struct strioctl ioc;
@@ -4681,6 +5053,7 @@ usage(void)
"\t[ modlist ]\n"
"\t[ modinsert <module_name@position> ]\n"
"\t[ modremove <module_name@position> ]\n"
+ "\t[ ipmp ]\n"
"\t[ group <groupname>] | [ group \"\"]\n"
"\t[ deprecated | -deprecated ]\n"
"\t[ standby | -standby ]\n"
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h
index 0ac600001f..f11f4d0a94 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -11,8 +11,6 @@
#ifndef _IFCONFIG_H
#define _IFCONFIG_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -39,7 +37,6 @@ extern void Perrdlpi_exit(const char *, const char *, int);
extern int doifrevarp(const char *, struct sockaddr_in *);
-extern int dlpi_set_address(const char *, uchar_t *, uint_t);
extern void dlpi_print_address(const char *);
#ifdef __cplusplus
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c
index 725c8b24c3..aba4794942 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/revarp.c
@@ -19,14 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "defs.h"
#include "ifconfig.h"
#include <sys/types.h>
@@ -88,6 +86,7 @@ doifrevarp(const char *linkname, struct sockaddr_in *laddr)
/* don't try to revarp if we know it won't work */
if ((lifr.lifr_flags & IFF_LOOPBACK) ||
(lifr.lifr_flags & IFF_NOARP) ||
+ (lifr.lifr_flags & IFF_IPMP) ||
(lifr.lifr_flags & IFF_POINTOPOINT)) {
(void) close(s);
return (0);
@@ -326,28 +325,6 @@ rarp_recv(dlpi_handle_t dh, struct arphdr *ans, size_t msglen,
return (DLPI_ETIMEDOUT);
}
-int
-dlpi_set_address(const char *linkname, uchar_t *physaddr, uint_t physaddrlen)
-{
- int retval;
- dlpi_handle_t dh;
-
- if ((retval = dlpi_open(linkname, &dh, 0)) != DLPI_SUCCESS) {
- Perrdlpi("dlpi_open failed", linkname, retval);
- return (-1);
- }
-
- if ((retval = dlpi_set_physaddr(dh, DL_CURR_PHYS_ADDR, physaddr,
- physaddrlen)) != DLPI_SUCCESS) {
- Perrdlpi("dlpi_set_physaddr failed", linkname, retval);
- dlpi_close(dh);
- return (-1);
- }
-
- dlpi_close(dh);
- return (0);
-}
-
void
dlpi_print_address(const char *linkname)
{
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h
index 900b5841ed..5cca3ecb2e 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/defs.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 1983, 1988, 1993
@@ -414,16 +414,9 @@ struct interface {
(IS_REMOTE|IS_PASSIVE))
/*
- * Is an IP interface up? Because of the way IPMP uses deprecated
- * interfaces, we need to check more than the IFF_UP and IFF_RUNNING
- * interface flags here. Basically, we do not want to use IFF_DEPRECATED
- * interfaces unless they are also IFF_STANDBY and not IFF_INACTIVE.
+ * Is an IP interface up?
*/
-#define IFF_GOOD (IFF_UP|IFF_RUNNING)
-#define IS_IFF_UP(f) \
- ((((f) & (IFF_GOOD|IFF_DEPRECATED)) == IFF_GOOD) || \
- (((f) & (IFF_GOOD|IFF_INACTIVE|IFF_STANDBY)) == \
- (IFF_GOOD|IFF_STANDBY)))
+#define IS_IFF_UP(f) (((f) & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
/*
* This defines interfaces that we should not use for advertising or
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c
index 79ae02e703..a3a26ac2cb 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/in.routed/trace.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 1983, 1988, 1993
@@ -36,8 +36,6 @@
* $FreeBSD: src/sbin/routed/trace.c,v 1.6 2000/08/11 08:24:38 sheldonh Exp $
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "defs.h"
#include "pathnames.h"
#include <signal.h>
@@ -566,6 +564,7 @@ static struct bits if_bits[] = {
{ IFF_TEMPORARY, 0, "TEMPORARY" },
{ IFF_FIXEDMTU, 0, "FIXEDMTU" },
{ IFF_VIRTUAL, 0, "VIRTUAL"},
+ { IFF_IPMP, 0, "IPMP"},
{ 0, 0, NULL}
};
@@ -898,8 +897,8 @@ trace_upslot(struct rt_entry *rt,
print_rts(rts, 0, 0,
rts->rts_gate != new->rts_gate,
rts->rts_tag != new->rts_tag,
- rts != rt->rt_spares || AGE_RT(rt->rt_state,
- rts->rts_origin, rt->rt_ifp));
+ rts != rt->rt_spares ||
+ AGE_RT(rt->rt_state, rts->rts_origin, rt->rt_ifp));
(void) fprintf(ftrace, "\n %19s%-16s ", "",
(new->rts_gate != rts->rts_gate ?
@@ -1173,10 +1172,9 @@ trace_rip(const char *dir1, const char *dir2,
if (NA->a_type == RIP_AUTH_PW &&
n == msg->rip_nets) {
(void) fprintf(ftrace, "\tPassword"
- " Authentication:"
- " \"%s\"\n",
+ " Authentication: \"%s\"\n",
qstring(NA->au.au_pw,
- RIP_AUTH_PW_LEN));
+ RIP_AUTH_PW_LEN));
continue;
}
@@ -1186,13 +1184,12 @@ trace_rip(const char *dir1, const char *dir2,
"\tMD5 Auth"
" pkt_len=%d KeyID=%u"
" auth_len=%d"
- " seqno=%#lx"
- " rsvd=%#x,%#x\n",
+ " seqno=%#x"
+ " rsvd=%#hx,%#hx\n",
ntohs(NA->au.a_md5.md5_pkt_len),
NA->au.a_md5.md5_keyid,
NA->au.a_md5.md5_auth_len,
- (unsigned long)ntohl(NA->au.a_md5.
- md5_seqno),
+ ntohl(NA->au.a_md5.md5_seqno),
ntohs(NA->au.a_md5.rsvd[0]),
ntohs(NA->au.a_md5.rsvd[1]));
continue;
@@ -1217,14 +1214,12 @@ trace_rip(const char *dir1, const char *dir2,
inet_ntoa(tmp_mask));
} else if (msg->rip_vers == RIPv1) {
(void) fprintf(ftrace, "\t%-18s ",
- addrname(n->n_dst,
- ntohl(n->n_mask),
- n->n_mask == 0 ? 2 : 1));
+ addrname(n->n_dst, ntohl(n->n_mask),
+ n->n_mask == 0 ? 2 : 1));
} else {
(void) fprintf(ftrace, "\t%-18s ",
- addrname(n->n_dst,
- ntohl(n->n_mask),
- n->n_mask == 0 ? 2 : 0));
+ addrname(n->n_dst, ntohl(n->n_mask),
+ n->n_mask == 0 ? 2 : 0));
}
(void) fprintf(ftrace, "metric=%-2lu ",
(unsigned long)ntohl(n->n_metric));
@@ -1242,8 +1237,8 @@ trace_rip(const char *dir1, const char *dir2,
break;
case RIPCMD_TRACEON:
- (void) fprintf(ftrace, "\tfile=\"%.*s\"\n", size-4,
- msg->rip_tracefile);
+ (void) fprintf(ftrace, "\tfile=\"%.*s\"\n", size - 4,
+ msg->rip_tracefile);
break;
case RIPCMD_TRACEOFF:
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/Makefile
new file mode 100644
index 0000000000..a256cf5f49
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG = ipmpstat
+ROOTFS_PROG = $(PROG)
+ROOTUSRSBINLINKS = $(PROG:%=$(ROOTUSRSBIN)/%)
+
+include $(SRC)/cmd/Makefile.cmd
+
+C99MODE = $(C99_ENABLE)
+LDLIBS += -lipmp -lsocket -lsysevent -lnvpair
+XGETFLAGS += -a -x $(PROG).xcl
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTSBINPROG) $(ROOTUSRSBINLINKS)
+
+clean:
+
+lint: lint_PROG
+
+$(ROOTUSRSBINLINKS):
+ -$(RM) $@; $(SYMLINK) ../../sbin/$(@F) $@
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.c b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.c
new file mode 100644
index 0000000000..4620c34a24
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.c
@@ -0,0 +1,1498 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <alloca.h>
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <ipmp_admin.h>
+#include <ipmp_query.h>
+#include <libintl.h>
+#include <libnvpair.h>
+#include <libsysevent.h>
+#include <locale.h>
+#include <netdb.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/ipmp.h>
+#include <sys/sysmacros.h>
+#include <sys/termios.h>
+#include <sys/types.h>
+
+/*
+ * ipmpstat -- display IPMP subsystem status.
+ *
+ * This utility makes extensive use of libipmp and IPMP sysevents to gather
+ * and pretty-print the status of the IPMP subsystem. All output formats
+ * except for -p (probe) use libipmp to create a point-in-time snapshot of the
+ * IPMP subsystem (unless the test-special -L flag is used), and then output
+ * the contents of that snapshot in a user-specified manner. Because the
+ * output format and requested fields aren't known until run-time, three sets
+ * of function pointers and two core data structures are used. Specifically:
+ *
+ * * The ipmpstat_walker_t function pointers (walk_*) iterate through
+ * all instances of a given IPMP object (group, interface, or address).
+ * At most one ipmpstat_walker_t is used per ipmpstat invocation.
+ * Since target information is included with the interface information,
+ * both -i and -t use the interface walker (walk_if()).
+ *
+ * * The ipmpstat_sfunc_t function pointers (sfunc_*) obtain a given
+ * value for a given IPMP object. Each ipmpstat_sunc_t is passed a
+ * buffer to write its result into, the buffer's size, and an
+ * ipmpstat_sfunc_arg_t state structure. The state structure consists
+ * of a pointer to the IPMP object to obtain information from
+ * (sa_data), and an open libipmp handle (sa_ih) which can be used to
+ * do additional libipmp queries, if necessary (e.g., because the
+ * object does not have all of the needed information).
+ *
+ * * The ipmpstat_field_t structure provides the list of supported fields
+ * for a given output format, along with output formatting information
+ * (e.g., field width), and a pointer to an ipmpstat_sfunc_t function
+ * that can obtain the value for a IPMP given object. For a given
+ * ipmpstat output format, there's a corresponding array of
+ * ipmpstat_field_t structures. Thus, one ipmpstat_field_t array is
+ * used per ipmpstat invocation.
+ *
+ * * The ipmpstat_ofmt_t provides an ordered list of the requested
+ * ipmpstat_field_t's (e.g., via -o) for a given ipmpstat invocation.
+ * It is built at runtime from the command-line arguments. This
+ * structure (and a given IPMP object) is used by ofmt_output() to
+ * output a single line of information about that IPMP object.
+ *
+ * * The ipmpstat_cbfunc_t function pointers (*_cbfunc) are called back
+ * by the walkers. They are used both internally to implement nested
+ * walks, and by the ipmpstat output logic to provide the glue between
+ * the IPMP object walkers and the ofmt_output() logic. Usually, a
+ * single line is output for each IPMP object, and thus ofmt_output()
+ * can be directly invoked (see info_output_cbfunc()). However, if
+ * multiple lines need to be output, then a more complex cbfunc is
+ * needed (see targinfo_output_cbfunc()). At most one cbfunc is used
+ * per ipmpstat invocation.
+ */
+
+/*
+ * Data type used by the sfunc callbacks to obtain the requested information
+ * from the agreed-upon object.
+ */
+typedef struct ipmpstat_sfunc_arg {
+ ipmp_handle_t sa_ih;
+ void *sa_data;
+} ipmpstat_sfunc_arg_t;
+
+typedef void ipmpstat_sfunc_t(ipmpstat_sfunc_arg_t *, char *, uint_t);
+
+/*
+ * Data type that describes how to output a field; used by ofmt_output*().
+ */
+typedef struct ipmpstat_field {
+ const char *f_name; /* field name */
+ uint_t f_width; /* output width */
+ ipmpstat_sfunc_t *f_sfunc; /* value->string function */
+} ipmpstat_field_t;
+
+/*
+ * Data type that specifies the output field order; used by ofmt_output*()
+ */
+typedef struct ipmpstat_ofmt {
+ const ipmpstat_field_t *o_field; /* current field info */
+ struct ipmpstat_ofmt *o_next; /* next field */
+} ipmpstat_ofmt_t;
+
+/*
+ * Function pointers used to iterate through IPMP objects.
+ */
+typedef void ipmpstat_cbfunc_t(ipmp_handle_t, void *, void *);
+typedef void ipmpstat_walker_t(ipmp_handle_t, ipmpstat_cbfunc_t *, void *);
+
+/*
+ * Data type used to implement nested walks.
+ */
+typedef struct ipmpstat_walkdata {
+ ipmpstat_cbfunc_t *iw_func; /* caller-specified callback */
+ void *iw_funcarg; /* caller-specified arg */
+} ipmpstat_walkdata_t;
+
+/*
+ * Data type used by enum2str() to map an enumerated value to a string.
+ */
+typedef struct ipmpstat_enum {
+ const char *e_name; /* string */
+ int e_val; /* value */
+} ipmpstat_enum_t;
+
+/*
+ * Data type used to pass state between probe_output() and probe_event().
+ */
+typedef struct ipmpstat_probe_state {
+ ipmp_handle_t ps_ih; /* open IPMP handle */
+ ipmpstat_ofmt_t *ps_ofmt; /* requested ofmt string */
+} ipmpstat_probe_state_t;
+
+/*
+ * Options that modify the output mode; more than one may be lit.
+ */
+typedef enum {
+ IPMPSTAT_OPT_NUMERIC = 0x1,
+ IPMPSTAT_OPT_PARSABLE = 0x2
+} ipmpstat_opt_t;
+
+/*
+ * Indices for the FLAGS field of the `-i' output format.
+ */
+enum {
+ IPMPSTAT_IFLAG_INDEX, IPMPSTAT_SFLAG_INDEX, IPMPSTAT_M4FLAG_INDEX,
+ IPMPSTAT_BFLAG_INDEX, IPMPSTAT_M6FLAG_INDEX, IPMPSTAT_DFLAG_INDEX,
+ IPMPSTAT_HFLAG_INDEX, IPMPSTAT_NUM_FLAGS
+};
+
+#define IPMPSTAT_NCOL 80
+#define NS2FLOATMS(ns) ((float)(ns) / (NANOSEC / MILLISEC))
+#define MS2FLOATSEC(ms) ((float)(ms) / 1000)
+
+static const char *progname;
+static hrtime_t probe_output_start;
+static struct winsize winsize;
+static ipmpstat_opt_t opt;
+static ipmpstat_enum_t addr_state[], group_state[], if_state[], if_link[];
+static ipmpstat_enum_t if_probe[], targ_mode[];
+static ipmpstat_field_t addr_fields[], group_fields[], if_fields[];
+static ipmpstat_field_t probe_fields[], targ_fields[];
+static ipmpstat_cbfunc_t walk_addr_cbfunc, walk_if_cbfunc;
+static ipmpstat_cbfunc_t info_output_cbfunc, targinfo_output_cbfunc;
+static ipmpstat_walker_t walk_addr, walk_if, walk_group;
+
+static int probe_event(sysevent_t *, void *);
+static void probe_output(ipmp_handle_t, ipmpstat_ofmt_t *);
+static ipmpstat_field_t *field_find(ipmpstat_field_t *, const char *);
+static ipmpstat_ofmt_t *ofmt_create(const char *, ipmpstat_field_t []);
+static void ofmt_output(const ipmpstat_ofmt_t *, ipmp_handle_t, void *);
+static void ofmt_destroy(ipmpstat_ofmt_t *);
+static void enum2str(const ipmpstat_enum_t *, int, char *, uint_t);
+static void sockaddr2str(const struct sockaddr_storage *, char *, uint_t);
+static void sighandler(int);
+static void usage(void);
+static void die(const char *, ...);
+static void die_ipmperr(int, const char *, ...);
+static void warn(const char *, ...);
+static void warn_ipmperr(int, const char *, ...);
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ int err;
+ const char *ofields = NULL;
+ ipmp_handle_t ih;
+ ipmp_qcontext_t qcontext = IPMP_QCONTEXT_SNAP;
+ ipmpstat_ofmt_t *ofmt;
+ ipmpstat_field_t *fields = NULL;
+ ipmpstat_cbfunc_t *cbfunc;
+ ipmpstat_walker_t *walker;
+
+ if ((progname = strrchr(argv[0], '/')) == NULL)
+ progname = argv[0];
+ else
+ progname++;
+
+ (void) setlocale(LC_ALL, "");
+ (void) textdomain(TEXT_DOMAIN);
+
+ while ((c = getopt(argc, argv, "nLPo:agipt")) != EOF) {
+ if (fields != NULL && strchr("agipt", c) != NULL)
+ die("only one output format may be specified\n");
+
+ switch (c) {
+ case 'n':
+ opt |= IPMPSTAT_OPT_NUMERIC;
+ break;
+ case 'L':
+ /* Undocumented option: for testing use ONLY */
+ qcontext = IPMP_QCONTEXT_LIVE;
+ break;
+ case 'P':
+ opt |= IPMPSTAT_OPT_PARSABLE;
+ break;
+ case 'o':
+ ofields = optarg;
+ break;
+ case 'a':
+ walker = walk_addr;
+ cbfunc = info_output_cbfunc;
+ fields = addr_fields;
+ break;
+ case 'g':
+ walker = walk_group;
+ cbfunc = info_output_cbfunc;
+ fields = group_fields;
+ break;
+ case 'i':
+ walker = walk_if;
+ cbfunc = info_output_cbfunc;
+ fields = if_fields;
+ break;
+ case 'p':
+ fields = probe_fields;
+ break;
+ case 't':
+ walker = walk_if;
+ cbfunc = targinfo_output_cbfunc;
+ fields = targ_fields;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (argc > optind || fields == NULL)
+ usage();
+
+ if (opt & IPMPSTAT_OPT_PARSABLE) {
+ if (ofields == NULL) {
+ die("output field list (-o) required in parsable "
+ "output mode\n");
+ } else if (strcasecmp(ofields, "all") == 0) {
+ die("\"all\" not allowed in parsable output mode\n");
+ }
+ }
+
+ /*
+ * Obtain the window size and monitor changes to the size. This data
+ * is used to redisplay the output headers when necessary.
+ */
+ (void) sigset(SIGWINCH, sighandler);
+ sighandler(SIGWINCH);
+
+ if ((err = ipmp_open(&ih)) != IPMP_SUCCESS)
+ die_ipmperr(err, "cannot create IPMP handle");
+
+ if (ipmp_ping_daemon(ih) != IPMP_SUCCESS)
+ die("cannot contact in.mpathd(1M) -- is IPMP in use?\n");
+
+ /*
+ * Create the ofmt linked list that will eventually be passed to
+ * to ofmt_output() to output the fields.
+ */
+ ofmt = ofmt_create(ofields, fields);
+
+ /*
+ * If we've been asked to display probes, then call the probe output
+ * function. Otherwise, snapshot IPMP state (or use live state) and
+ * invoke the specified walker with the specified callback function.
+ */
+ if (fields == probe_fields) {
+ probe_output(ih, ofmt);
+ } else {
+ if ((err = ipmp_setqcontext(ih, qcontext)) != IPMP_SUCCESS) {
+ if (qcontext == IPMP_QCONTEXT_SNAP)
+ die_ipmperr(err, "cannot snapshot IPMP state");
+ else
+ die_ipmperr(err, "cannot use live IPMP state");
+ }
+ (*walker)(ih, cbfunc, ofmt);
+ }
+
+ ofmt_destroy(ofmt);
+ ipmp_close(ih);
+
+ return (EXIT_SUCCESS);
+}
+
+/*
+ * Walks all IPMP groups on the system and invokes `cbfunc' on each, passing
+ * it `ih', the ipmp_groupinfo_t pointer, and `arg'.
+ */
+static void
+walk_group(ipmp_handle_t ih, ipmpstat_cbfunc_t *cbfunc, void *arg)
+{
+ int err;
+ uint_t i;
+ ipmp_groupinfo_t *grinfop;
+ ipmp_grouplist_t *grlistp;
+
+ if ((err = ipmp_getgrouplist(ih, &grlistp)) != IPMP_SUCCESS)
+ die_ipmperr(err, "cannot get IPMP group list");
+
+ for (i = 0; i < grlistp->gl_ngroup; i++) {
+ err = ipmp_getgroupinfo(ih, grlistp->gl_groups[i], &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for group `%s'",
+ grlistp->gl_groups[i]);
+ continue;
+ }
+ (*cbfunc)(ih, grinfop, arg);
+ ipmp_freegroupinfo(grinfop);
+ }
+
+ ipmp_freegrouplist(grlistp);
+}
+
+/*
+ * Walks all IPMP interfaces on the system and invokes `cbfunc' on each,
+ * passing it `ih', the ipmp_ifinfo_t pointer, and `arg'.
+ */
+static void
+walk_if(ipmp_handle_t ih, ipmpstat_cbfunc_t *cbfunc, void *arg)
+{
+ ipmpstat_walkdata_t iw = { cbfunc, arg };
+
+ walk_group(ih, walk_if_cbfunc, &iw);
+}
+
+/*
+ * Walks all IPMP data addresses on the system and invokes `cbfunc' on each.
+ * passing it `ih', the ipmp_addrinfo_t pointer, and `arg'.
+ */
+static void
+walk_addr(ipmp_handle_t ih, ipmpstat_cbfunc_t *cbfunc, void *arg)
+{
+ ipmpstat_walkdata_t iw = { cbfunc, arg };
+
+ walk_group(ih, walk_addr_cbfunc, &iw);
+}
+
+/*
+ * Nested walker callback function for walk_if().
+ */
+static void
+walk_if_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ int err;
+ uint_t i;
+ ipmp_groupinfo_t *grinfop = infop;
+ ipmp_ifinfo_t *ifinfop;
+ ipmp_iflist_t *iflistp = grinfop->gr_iflistp;
+ ipmpstat_walkdata_t *iwp = arg;
+
+ for (i = 0; i < iflistp->il_nif; i++) {
+ err = ipmp_getifinfo(ih, iflistp->il_ifs[i], &ifinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for interface `%s'",
+ iflistp->il_ifs[i]);
+ continue;
+ }
+ (*iwp->iw_func)(ih, ifinfop, iwp->iw_funcarg);
+ ipmp_freeifinfo(ifinfop);
+ }
+}
+
+/*
+ * Nested walker callback function for walk_addr().
+ */
+static void
+walk_addr_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ int err;
+ uint_t i;
+ ipmp_groupinfo_t *grinfop = infop;
+ ipmp_addrinfo_t *adinfop;
+ ipmp_addrlist_t *adlistp = grinfop->gr_adlistp;
+ ipmpstat_walkdata_t *iwp = arg;
+ char addr[INET6_ADDRSTRLEN];
+ struct sockaddr_storage *addrp;
+
+ for (i = 0; i < adlistp->al_naddr; i++) {
+ addrp = &adlistp->al_addrs[i];
+ err = ipmp_getaddrinfo(ih, grinfop->gr_name, addrp, &adinfop);
+ if (err != IPMP_SUCCESS) {
+ sockaddr2str(addrp, addr, sizeof (addr));
+ warn_ipmperr(err, "cannot get info for `%s'", addr);
+ continue;
+ }
+ (*iwp->iw_func)(ih, adinfop, iwp->iw_funcarg);
+ ipmp_freeaddrinfo(adinfop);
+ }
+}
+
+static void
+sfunc_nvwarn(const char *nvname, char *buf, uint_t bufsize)
+{
+ warn("cannot retrieve %s\n", nvname);
+ (void) strlcpy(buf, "?", bufsize);
+}
+
+static void
+sfunc_addr_address(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+
+ sockaddr2str(&adinfop->ad_addr, buf, bufsize);
+}
+
+static void
+sfunc_addr_group(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+ ipmp_groupinfo_t *grinfop;
+
+ err = ipmp_getgroupinfo(arg->sa_ih, adinfop->ad_group, &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for group `%s'",
+ adinfop->ad_group);
+ (void) strlcpy(buf, "?", bufsize);
+ return;
+ }
+ (void) strlcpy(buf, grinfop->gr_ifname, bufsize);
+ ipmp_freegroupinfo(grinfop);
+}
+
+static void
+sfunc_addr_state(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+
+ enum2str(addr_state, adinfop->ad_state, buf, bufsize);
+}
+
+static void
+sfunc_addr_inbound(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+
+ (void) strlcpy(buf, adinfop->ad_binding, bufsize);
+}
+
+static void
+sfunc_addr_outbound(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ uint_t i, nactive = 0;
+ ipmp_ifinfo_t *ifinfop;
+ ipmp_iflist_t *iflistp;
+ ipmp_addrinfo_t *adinfop = arg->sa_data;
+ ipmp_groupinfo_t *grinfop;
+
+ if (adinfop->ad_state == IPMP_ADDR_DOWN)
+ return;
+
+ /*
+ * If there's no inbound interface for this address, there can't
+ * be any outbound traffic.
+ */
+ if (adinfop->ad_binding[0] == '\0')
+ return;
+
+ /*
+ * The address can use any active interface in the group, so
+ * obtain all of those.
+ */
+ err = ipmp_getgroupinfo(arg->sa_ih, adinfop->ad_group, &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for group `%s'",
+ adinfop->ad_group);
+ (void) strlcpy(buf, "?", bufsize);
+ return;
+ }
+
+ iflistp = grinfop->gr_iflistp;
+ for (i = 0; i < iflistp->il_nif; i++) {
+ err = ipmp_getifinfo(arg->sa_ih, iflistp->il_ifs[i], &ifinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for interface `%s'",
+ iflistp->il_ifs[i]);
+ continue;
+ }
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_ACTIVE) {
+ if (nactive++ != 0)
+ (void) strlcat(buf, " ", bufsize);
+ (void) strlcat(buf, ifinfop->if_name, bufsize);
+ }
+ ipmp_freeifinfo(ifinfop);
+ }
+ ipmp_freegroupinfo(grinfop);
+}
+
+static void
+sfunc_group_name(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+
+ (void) strlcpy(buf, grinfop->gr_name, bufsize);
+}
+
+static void
+sfunc_group_ifname(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+
+ (void) strlcpy(buf, grinfop->gr_ifname, bufsize);
+}
+
+static void
+sfunc_group_state(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+
+ enum2str(group_state, grinfop->gr_state, buf, bufsize);
+}
+
+static void
+sfunc_group_fdt(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+
+ if (grinfop->gr_fdt == 0)
+ return;
+
+ (void) snprintf(buf, bufsize, "%.2fs", MS2FLOATSEC(grinfop->gr_fdt));
+}
+
+static void
+sfunc_group_interfaces(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ uint_t i;
+ char *active, *inactive, *unusable;
+ uint_t nactive = 0, ninactive = 0, nunusable = 0;
+ ipmp_groupinfo_t *grinfop = arg->sa_data;
+ ipmp_iflist_t *iflistp = grinfop->gr_iflistp;
+ ipmp_ifinfo_t *ifinfop;
+
+ active = alloca(bufsize);
+ active[0] = '\0';
+ inactive = alloca(bufsize);
+ inactive[0] = '\0';
+ unusable = alloca(bufsize);
+ unusable[0] = '\0';
+
+ for (i = 0; i < iflistp->il_nif; i++) {
+ err = ipmp_getifinfo(arg->sa_ih, iflistp->il_ifs[i], &ifinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for interface `%s'",
+ iflistp->il_ifs[i]);
+ continue;
+ }
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_ACTIVE) {
+ if (nactive++ != 0)
+ (void) strlcat(active, " ", bufsize);
+ (void) strlcat(active, ifinfop->if_name, bufsize);
+ } else if (ifinfop->if_flags & IPMP_IFFLAG_INACTIVE) {
+ if (ninactive++ != 0)
+ (void) strlcat(inactive, " ", bufsize);
+ (void) strlcat(inactive, ifinfop->if_name, bufsize);
+ } else {
+ if (nunusable++ != 0)
+ (void) strlcat(unusable, " ", bufsize);
+ (void) strlcat(unusable, ifinfop->if_name, bufsize);
+ }
+
+ ipmp_freeifinfo(ifinfop);
+ }
+
+ (void) strlcpy(buf, active, bufsize);
+
+ if (ninactive > 0) {
+ if (nactive != 0)
+ (void) strlcat(buf, " ", bufsize);
+
+ (void) strlcat(buf, "(", bufsize);
+ (void) strlcat(buf, inactive, bufsize);
+ (void) strlcat(buf, ")", bufsize);
+ }
+
+ if (nunusable > 0) {
+ if (nactive + ninactive != 0)
+ (void) strlcat(buf, " ", bufsize);
+
+ (void) strlcat(buf, "[", bufsize);
+ (void) strlcat(buf, unusable, bufsize);
+ (void) strlcat(buf, "]", bufsize);
+ }
+}
+
+static void
+sfunc_if_name(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ (void) strlcpy(buf, ifinfop->if_name, bufsize);
+}
+
+static void
+sfunc_if_active(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_ACTIVE)
+ (void) strlcpy(buf, "yes", bufsize);
+ else
+ (void) strlcpy(buf, "no", bufsize);
+}
+
+static void
+sfunc_if_group(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+ ipmp_groupinfo_t *grinfop;
+
+ err = ipmp_getgroupinfo(arg->sa_ih, ifinfop->if_group, &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get info for group `%s'",
+ ifinfop->if_group);
+ (void) strlcpy(buf, "?", bufsize);
+ return;
+ }
+
+ (void) strlcpy(buf, grinfop->gr_ifname, bufsize);
+ ipmp_freegroupinfo(grinfop);
+}
+
+static void
+sfunc_if_flags(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int err;
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+ ipmp_groupinfo_t *grinfop;
+
+ assert(bufsize > IPMPSTAT_NUM_FLAGS);
+
+ (void) memset(buf, '-', IPMPSTAT_NUM_FLAGS);
+ buf[IPMPSTAT_NUM_FLAGS] = '\0';
+
+ if (ifinfop->if_type == IPMP_IF_STANDBY)
+ buf[IPMPSTAT_SFLAG_INDEX] = 's';
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_INACTIVE)
+ buf[IPMPSTAT_IFLAG_INDEX] = 'i';
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_DOWN)
+ buf[IPMPSTAT_DFLAG_INDEX] = 'd';
+
+ if (ifinfop->if_flags & IPMP_IFFLAG_HWADDRDUP)
+ buf[IPMPSTAT_HFLAG_INDEX] = 'h';
+
+ err = ipmp_getgroupinfo(arg->sa_ih, ifinfop->if_group, &grinfop);
+ if (err != IPMP_SUCCESS) {
+ warn_ipmperr(err, "cannot get broadcast/multicast info for "
+ "group `%s'", ifinfop->if_group);
+ return;
+ }
+
+ if (strcmp(grinfop->gr_m4ifname, ifinfop->if_name) == 0)
+ buf[IPMPSTAT_M4FLAG_INDEX] = 'm';
+
+ if (strcmp(grinfop->gr_m6ifname, ifinfop->if_name) == 0)
+ buf[IPMPSTAT_M6FLAG_INDEX] = 'M';
+
+ if (strcmp(grinfop->gr_bcifname, ifinfop->if_name) == 0)
+ buf[IPMPSTAT_BFLAG_INDEX] = 'b';
+
+ ipmp_freegroupinfo(grinfop);
+}
+
+static void
+sfunc_if_link(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ enum2str(if_link, ifinfop->if_linkstate, buf, bufsize);
+}
+
+static void
+sfunc_if_probe(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ enum2str(if_probe, ifinfop->if_probestate, buf, bufsize);
+}
+
+static void
+sfunc_if_state(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_ifinfo_t *ifinfop = arg->sa_data;
+
+ enum2str(if_state, ifinfop->if_state, buf, bufsize);
+}
+
+static void
+sfunc_probe_id(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ uint32_t probe_id;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_uint32(nvl, IPMP_PROBE_ID, &probe_id) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_ID", buf, bufsize);
+ return;
+ }
+
+ (void) snprintf(buf, bufsize, "%u", probe_id);
+}
+
+static void
+sfunc_probe_ifname(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ char *ifname;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_string(nvl, IPMP_IF_NAME, &ifname) != 0) {
+ sfunc_nvwarn("IPMP_IF_NAME", buf, bufsize);
+ return;
+ }
+
+ (void) strlcpy(buf, ifname, bufsize);
+}
+
+static void
+sfunc_probe_time(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ hrtime_t start;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_START_TIME, &start) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_START_TIME", buf, bufsize);
+ return;
+ }
+
+ (void) snprintf(buf, bufsize, "%.2fs",
+ (float)(start - probe_output_start) / NANOSEC);
+}
+
+static void
+sfunc_probe_target(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ uint_t nelem;
+ struct sockaddr_storage *target;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_byte_array(nvl, IPMP_PROBE_TARGET,
+ (uchar_t **)&target, &nelem) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_TARGET", buf, bufsize);
+ return;
+ }
+
+ sockaddr2str(target, buf, bufsize);
+}
+
+static void
+sfunc_probe_rtt(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ hrtime_t start, ackproc;
+ nvlist_t *nvl = arg->sa_data;
+ uint32_t state;
+
+ if (nvlist_lookup_uint32(nvl, IPMP_PROBE_STATE, &state) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_STATE", buf, bufsize);
+ return;
+ }
+
+ if (state != IPMP_PROBE_ACKED)
+ return;
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_START_TIME, &start) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_START_TIME", buf, bufsize);
+ return;
+ }
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_ACKPROC_TIME, &ackproc) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_ACKPROC_TIME", buf, bufsize);
+ return;
+ }
+
+ (void) snprintf(buf, bufsize, "%.2fms", NS2FLOATMS(ackproc - start));
+}
+
+static void
+sfunc_probe_netrtt(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ hrtime_t sent, ackrecv;
+ nvlist_t *nvl = arg->sa_data;
+ uint32_t state;
+
+ if (nvlist_lookup_uint32(nvl, IPMP_PROBE_STATE, &state) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_STATE", buf, bufsize);
+ return;
+ }
+
+ if (state != IPMP_PROBE_ACKED)
+ return;
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_SENT_TIME, &sent) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_SENT_TIME", buf, bufsize);
+ return;
+ }
+
+ if (nvlist_lookup_hrtime(nvl, IPMP_PROBE_ACKRECV_TIME, &ackrecv) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_ACKRECV_TIME", buf, bufsize);
+ return;
+ }
+
+ (void) snprintf(buf, bufsize, "%.2fms", NS2FLOATMS(ackrecv - sent));
+}
+
+static void
+sfunc_probe_rttavg(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int64_t rttavg;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_int64(nvl, IPMP_PROBE_TARGET_RTTAVG, &rttavg) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_TARGET_RTTAVG", buf, bufsize);
+ return;
+ }
+
+ if (rttavg != 0)
+ (void) snprintf(buf, bufsize, "%.2fms", NS2FLOATMS(rttavg));
+}
+
+static void
+sfunc_probe_rttdev(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ int64_t rttdev;
+ nvlist_t *nvl = arg->sa_data;
+
+ if (nvlist_lookup_int64(nvl, IPMP_PROBE_TARGET_RTTDEV, &rttdev) != 0) {
+ sfunc_nvwarn("IPMP_PROBE_TARGET_RTTDEV", buf, bufsize);
+ return;
+ }
+
+ if (rttdev != 0)
+ (void) snprintf(buf, bufsize, "%.2fms", NS2FLOATMS(rttdev));
+}
+
+/* ARGSUSED */
+static void
+probe_enabled_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ uint_t *nenabledp = arg;
+ ipmp_ifinfo_t *ifinfop = infop;
+
+ if (ifinfop->if_probestate != IPMP_PROBE_DISABLED)
+ (*nenabledp)++;
+}
+
+static void
+probe_output(ipmp_handle_t ih, ipmpstat_ofmt_t *ofmt)
+{
+ char sub[MAX_SUBID_LEN];
+ evchan_t *evch;
+ ipmpstat_probe_state_t ps = { ih, ofmt };
+ uint_t nenabled = 0;
+
+ /*
+ * Check if any interfaces are enabled for probe-based failure
+ * detection. If not, immediately fail.
+ */
+ walk_if(ih, probe_enabled_cbfunc, &nenabled);
+ if (nenabled == 0)
+ die("probe-based failure detection is disabled\n");
+
+ probe_output_start = gethrtime();
+
+ /*
+ * Unfortunately, until 4791900 is fixed, only privileged processes
+ * can bind and thus receive sysevents.
+ */
+ errno = sysevent_evc_bind(IPMP_EVENT_CHAN, &evch, EVCH_CREAT);
+ if (errno != 0) {
+ if (errno == EPERM)
+ die("insufficient privileges for -p\n");
+ die("sysevent_evc_bind to channel %s failed", IPMP_EVENT_CHAN);
+ }
+
+ /*
+ * The subscriber must be unique in order for sysevent_evc_subscribe()
+ * to succeed, so combine our name and pid.
+ */
+ (void) snprintf(sub, sizeof (sub), "%d-%s", getpid(), progname);
+
+ errno = sysevent_evc_subscribe(evch, sub, EC_IPMP, probe_event, &ps, 0);
+ if (errno != 0)
+ die("sysevent_evc_subscribe for class %s failed", EC_IPMP);
+
+ for (;;)
+ (void) pause();
+}
+
+static int
+probe_event(sysevent_t *ev, void *arg)
+{
+ nvlist_t *nvl;
+ uint32_t state;
+ uint32_t version;
+ ipmpstat_probe_state_t *psp = arg;
+
+ if (strcmp(sysevent_get_subclass_name(ev), ESC_IPMP_PROBE_STATE) != 0)
+ return (0);
+
+ if (sysevent_get_attr_list(ev, &nvl) != 0) {
+ warn("sysevent_get_attr_list failed; dropping event");
+ return (0);
+ }
+
+ if (nvlist_lookup_uint32(nvl, IPMP_EVENT_VERSION, &version) != 0) {
+ warn("dropped event with no IPMP_EVENT_VERSION\n");
+ goto out;
+ }
+
+ if (version != IPMP_EVENT_CUR_VERSION) {
+ warn("dropped event with unsupported IPMP_EVENT_VERSION %d\n",
+ version);
+ goto out;
+ }
+
+ if (nvlist_lookup_uint32(nvl, IPMP_PROBE_STATE, &state) != 0) {
+ warn("dropped event with no IPMP_PROBE_STATE\n");
+ goto out;
+ }
+
+ if (state == IPMP_PROBE_ACKED || state == IPMP_PROBE_LOST)
+ ofmt_output(psp->ps_ofmt, psp->ps_ih, nvl);
+out:
+ nvlist_free(nvl);
+ return (0);
+}
+
+static void
+sfunc_targ_ifname(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_targinfo_t *targinfop = arg->sa_data;
+
+ (void) strlcpy(buf, targinfop->it_name, bufsize);
+}
+
+static void
+sfunc_targ_mode(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_targinfo_t *targinfop = arg->sa_data;
+
+ enum2str(targ_mode, targinfop->it_targmode, buf, bufsize);
+}
+
+static void
+sfunc_targ_testaddr(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ ipmp_targinfo_t *targinfop = arg->sa_data;
+
+ if (targinfop->it_targmode != IPMP_TARG_DISABLED)
+ sockaddr2str(&targinfop->it_testaddr, buf, bufsize);
+}
+
+static void
+sfunc_targ_targets(ipmpstat_sfunc_arg_t *arg, char *buf, uint_t bufsize)
+{
+ uint_t i;
+ char *targname = alloca(bufsize);
+ ipmp_targinfo_t *targinfop = arg->sa_data;
+ ipmp_addrlist_t *targlistp = targinfop->it_targlistp;
+
+ for (i = 0; i < targlistp->al_naddr; i++) {
+ sockaddr2str(&targlistp->al_addrs[i], targname, bufsize);
+ (void) strlcat(buf, targname, bufsize);
+ if ((i + 1) < targlistp->al_naddr)
+ (void) strlcat(buf, " ", bufsize);
+ }
+}
+
+static void
+info_output_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ ofmt_output(arg, ih, infop);
+}
+
+static void
+targinfo_output_cbfunc(ipmp_handle_t ih, void *infop, void *arg)
+{
+ ipmp_ifinfo_t *ifinfop = infop;
+ ipmp_if_targmode_t targmode4 = ifinfop->if_targinfo4.it_targmode;
+ ipmp_if_targmode_t targmode6 = ifinfop->if_targinfo6.it_targmode;
+
+ /*
+ * Usually, either IPv4 or IPv6 probing will be enabled, but the admin
+ * may enable both. If only one is enabled, omit the other one so as
+ * to not encourage the admin to enable both. If neither is enabled,
+ * we still print one just so the admin can see a MODE of "disabled".
+ */
+ if (targmode4 != IPMP_TARG_DISABLED || targmode6 == IPMP_TARG_DISABLED)
+ ofmt_output(arg, ih, &ifinfop->if_targinfo4);
+ if (targmode6 != IPMP_TARG_DISABLED)
+ ofmt_output(arg, ih, &ifinfop->if_targinfo6);
+}
+
+/*
+ * Creates an ipmpstat_ofmt_t field list from the comma-separated list of
+ * user-specified fields passed via `ofields'. The table of known fields
+ * (and their attributes) is passed via `fields'.
+ */
+static ipmpstat_ofmt_t *
+ofmt_create(const char *ofields, ipmpstat_field_t fields[])
+{
+ char *token, *lasts, *ofields_dup;
+ const char *fieldname;
+ ipmpstat_ofmt_t *ofmt, *ofmt_head = NULL, *ofmt_tail;
+ ipmpstat_field_t *fieldp;
+ uint_t cols = 0;
+
+ /*
+ * If "-o" was omitted or "-o all" was specified, build a list of
+ * field names. If "-o" was omitted, stop building the list when
+ * we run out of columns.
+ */
+ if (ofields == NULL || strcasecmp(ofields, "all") == 0) {
+ for (fieldp = fields; fieldp->f_name != NULL; fieldp++) {
+ cols += fieldp->f_width;
+ if (ofields == NULL && cols > IPMPSTAT_NCOL)
+ break;
+
+ if ((ofmt = calloc(sizeof (*ofmt), 1)) == NULL)
+ die("cannot allocate output format list");
+
+ ofmt->o_field = fieldp;
+ if (ofmt_head == NULL) {
+ ofmt_head = ofmt;
+ ofmt_tail = ofmt;
+ } else {
+ ofmt_tail->o_next = ofmt;
+ ofmt_tail = ofmt;
+ }
+ }
+ return (ofmt_head);
+ }
+
+ if ((ofields_dup = strdup(ofields)) == NULL)
+ die("cannot allocate output format list");
+
+ token = ofields_dup;
+ while ((fieldname = strtok_r(token, ",", &lasts)) != NULL) {
+ token = NULL;
+
+ if ((fieldp = field_find(fields, fieldname)) == NULL) {
+ /*
+ * Since machine parsers are unlikely to be able to
+ * gracefully handle missing fields, die if we're in
+ * parsable mode. Otherwise, just print a warning.
+ */
+ if (opt & IPMPSTAT_OPT_PARSABLE)
+ die("unknown output field `%s'\n", fieldname);
+
+ warn("ignoring unknown output field `%s'\n", fieldname);
+ continue;
+ }
+
+ if ((ofmt = calloc(sizeof (*ofmt), 1)) == NULL)
+ die("cannot allocate output format list");
+
+ ofmt->o_field = fieldp;
+ if (ofmt_head == NULL) {
+ ofmt_head = ofmt;
+ ofmt_tail = ofmt;
+ } else {
+ ofmt_tail->o_next = ofmt;
+ ofmt_tail = ofmt;
+ }
+ }
+
+ free(ofields_dup);
+ if (ofmt_head == NULL)
+ die("no valid output fields specified\n");
+
+ return (ofmt_head);
+}
+
+/*
+ * Destroys the provided `ofmt' field list.
+ */
+static void
+ofmt_destroy(ipmpstat_ofmt_t *ofmt)
+{
+ ipmpstat_ofmt_t *ofmt_next;
+
+ for (; ofmt != NULL; ofmt = ofmt_next) {
+ ofmt_next = ofmt->o_next;
+ free(ofmt);
+ }
+}
+
+/*
+ * Outputs a header for the fields named by `ofmt'.
+ */
+static void
+ofmt_output_header(const ipmpstat_ofmt_t *ofmt)
+{
+ const ipmpstat_field_t *fieldp;
+
+ for (; ofmt != NULL; ofmt = ofmt->o_next) {
+ fieldp = ofmt->o_field;
+
+ if (ofmt->o_next == NULL)
+ (void) printf("%s", fieldp->f_name);
+ else
+ (void) printf("%-*s", fieldp->f_width, fieldp->f_name);
+ }
+ (void) printf("\n");
+}
+
+/*
+ * Outputs one row of values for the fields named by `ofmt'. The values to
+ * output are obtained through the `ofmt' function pointers, which are
+ * indirectly passed the `ih' and `arg' structures for state; see the block
+ * comment at the start of this file for details.
+ */
+static void
+ofmt_output(const ipmpstat_ofmt_t *ofmt, ipmp_handle_t ih, void *arg)
+{
+ int i;
+ char buf[1024];
+ boolean_t escsep;
+ static int nrow;
+ const char *value;
+ uint_t width, valwidth;
+ uint_t compress, overflow = 0;
+ const ipmpstat_field_t *fieldp;
+ ipmpstat_sfunc_arg_t sfunc_arg;
+
+ /*
+ * For each screenful of data, display the header.
+ */
+ if ((nrow++ % winsize.ws_row) == 0 && !(opt & IPMPSTAT_OPT_PARSABLE)) {
+ ofmt_output_header(ofmt);
+ nrow++;
+ }
+
+ /*
+ * Check if we'll be displaying multiple fields per line, and thus
+ * need to escape the field separator.
+ */
+ escsep = (ofmt != NULL && ofmt->o_next != NULL);
+
+ for (; ofmt != NULL; ofmt = ofmt->o_next) {
+ fieldp = ofmt->o_field;
+
+ sfunc_arg.sa_ih = ih;
+ sfunc_arg.sa_data = arg;
+
+ buf[0] = '\0';
+ (*fieldp->f_sfunc)(&sfunc_arg, buf, sizeof (buf));
+
+ if (opt & IPMPSTAT_OPT_PARSABLE) {
+ for (i = 0; buf[i] != '\0'; i++) {
+ if (escsep && (buf[i] == ':' || buf[i] == '\\'))
+ (void) putchar('\\');
+ (void) putchar(buf[i]);
+ }
+ if (ofmt->o_next != NULL)
+ (void) putchar(':');
+ } else {
+ value = (buf[0] == '\0') ? "--" : buf;
+
+ /*
+ * To avoid needless line-wraps, for the last field,
+ * don't include any trailing whitespace.
+ */
+ if (ofmt->o_next == NULL) {
+ (void) printf("%s", value);
+ continue;
+ }
+
+ /*
+ * For other fields, grow the width as necessary to
+ * ensure the value completely fits. However, if
+ * there's unused whitespace in subsequent fields,
+ * then "compress" that whitespace to attempt to get
+ * the columns to line up again.
+ */
+ width = fieldp->f_width;
+ valwidth = strlen(value);
+
+ if (valwidth + overflow >= width) {
+ overflow += valwidth - width + 1;
+ (void) printf("%s ", value);
+ continue;
+ }
+
+ if (overflow > 0) {
+ compress = MIN(overflow, width - valwidth);
+ overflow -= compress;
+ width -= compress;
+ }
+ (void) printf("%-*s", width, value);
+ }
+ }
+ (void) printf("\n");
+
+ /*
+ * In case stdout has been redirected to e.g. a pipe, flush stdout so
+ * that commands can act on our output immediately.
+ */
+ (void) fflush(stdout);
+}
+
+/*
+ * Searches the `fields' array for a field matching `fieldname'. Returns
+ * a pointer to that field on success, or NULL on failure.
+ */
+static ipmpstat_field_t *
+field_find(ipmpstat_field_t *fields, const char *fieldname)
+{
+ ipmpstat_field_t *fieldp;
+
+ for (fieldp = fields; fieldp->f_name != NULL; fieldp++) {
+ if (strcasecmp(fieldp->f_name, fieldname) == 0)
+ return (fieldp);
+ }
+ return (NULL);
+}
+
+/*
+ * Uses `enums' to map `enumval' to a string, and stores at most `bufsize'
+ * bytes of that string into `buf'.
+ */
+static void
+enum2str(const ipmpstat_enum_t *enums, int enumval, char *buf, uint_t bufsize)
+{
+ const ipmpstat_enum_t *enump;
+
+ for (enump = enums; enump->e_name != NULL; enump++) {
+ if (enump->e_val == enumval) {
+ (void) strlcpy(buf, enump->e_name, bufsize);
+ return;
+ }
+ }
+ (void) snprintf(buf, bufsize, "<%d>", enumval);
+}
+
+/*
+ * Stores the stringified value of the sockaddr_storage pointed to by `ssp'
+ * into at most `bufsize' bytes of `buf'.
+ */
+static void
+sockaddr2str(const struct sockaddr_storage *ssp, char *buf, uint_t bufsize)
+{
+ int flags = NI_NOFQDN;
+ socklen_t socklen;
+ struct sockaddr *sp = (struct sockaddr *)ssp;
+
+ /*
+ * Sadly, getnameinfo() does not allow the socklen to be oversized for
+ * a given family -- so we must determine the exact size to pass to it.
+ */
+ switch (ssp->ss_family) {
+ case AF_INET:
+ socklen = sizeof (struct sockaddr_in);
+ break;
+ case AF_INET6:
+ socklen = sizeof (struct sockaddr_in6);
+ break;
+ default:
+ (void) strlcpy(buf, "?", bufsize);
+ return;
+ }
+
+ if (opt & IPMPSTAT_OPT_NUMERIC)
+ flags |= NI_NUMERICHOST;
+
+ (void) getnameinfo(sp, socklen, buf, bufsize, NULL, 0, flags);
+}
+
+static void
+sighandler(int sig)
+{
+ assert(sig == SIGWINCH);
+
+ if (ioctl(1, TIOCGWINSZ, &winsize) == -1 ||
+ winsize.ws_col == 0 || winsize.ws_row == 0) {
+ winsize.ws_col = 80;
+ winsize.ws_row = 24;
+ }
+}
+
+static void
+usage(void)
+{
+ const char *argstr = gettext("[-n] [-o <field> [-P]] -a|-g|-i|-p|-t");
+
+ (void) fprintf(stderr, gettext("usage: %s %s\n"), progname, argstr);
+ exit(EXIT_FAILURE);
+}
+
+/* PRINTFLIKE1 */
+static void
+warn(const char *format, ...)
+{
+ va_list alist;
+ int error = errno;
+
+ format = gettext(format);
+ (void) fprintf(stderr, gettext("%s: warning: "), progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+
+ if (strchr(format, '\n') == NULL)
+ (void) fprintf(stderr, ": %s\n", strerror(error));
+}
+
+/* PRINTFLIKE2 */
+static void
+warn_ipmperr(int ipmperr, const char *format, ...)
+{
+ va_list alist;
+
+ format = gettext(format);
+ (void) fprintf(stderr, gettext("%s: warning: "), progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+
+ (void) fprintf(stderr, ": %s\n", ipmp_errmsg(ipmperr));
+}
+
+/* PRINTFLIKE1 */
+static void
+die(const char *format, ...)
+{
+ va_list alist;
+ int error = errno;
+
+ format = gettext(format);
+ (void) fprintf(stderr, "%s: ", progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+
+ if (strchr(format, '\n') == NULL)
+ (void) fprintf(stderr, ": %s\n", strerror(error));
+
+ exit(EXIT_FAILURE);
+}
+
+/* PRINTFLIKE2 */
+static void
+die_ipmperr(int ipmperr, const char *format, ...)
+{
+ va_list alist;
+
+ format = gettext(format);
+ (void) fprintf(stderr, "%s: ", progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+ (void) fprintf(stderr, ": %s\n", ipmp_errmsg(ipmperr));
+
+ exit(EXIT_FAILURE);
+}
+
+static ipmpstat_field_t addr_fields[] = {
+ { "ADDRESS", 26, sfunc_addr_address },
+ { "STATE", 7, sfunc_addr_state },
+ { "GROUP", 12, sfunc_addr_group },
+ { "INBOUND", 12, sfunc_addr_inbound },
+ { "OUTBOUND", 23, sfunc_addr_outbound },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_field_t group_fields[] = {
+ { "GROUP", 12, sfunc_group_ifname },
+ { "GROUPNAME", 12, sfunc_group_name },
+ { "STATE", 10, sfunc_group_state },
+ { "FDT", 10, sfunc_group_fdt },
+ { "INTERFACES", 30, sfunc_group_interfaces },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_field_t if_fields[] = {
+ { "INTERFACE", 12, sfunc_if_name },
+ { "ACTIVE", 8, sfunc_if_active },
+ { "GROUP", 12, sfunc_if_group },
+ { "FLAGS", 10, sfunc_if_flags },
+ { "LINK", 10, sfunc_if_link },
+ { "PROBE", 10, sfunc_if_probe },
+ { "STATE", 10, sfunc_if_state },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_field_t probe_fields[] = {
+ { "TIME", 10, sfunc_probe_time },
+ { "INTERFACE", 12, sfunc_probe_ifname },
+ { "PROBE", 7, sfunc_probe_id },
+ { "NETRTT", 10, sfunc_probe_netrtt },
+ { "RTT", 10, sfunc_probe_rtt },
+ { "RTTAVG", 10, sfunc_probe_rttavg },
+ { "TARGET", 20, sfunc_probe_target },
+ { "RTTDEV", 10, sfunc_probe_rttdev },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_field_t targ_fields[] = {
+ { "INTERFACE", 12, sfunc_targ_ifname },
+ { "MODE", 10, sfunc_targ_mode },
+ { "TESTADDR", 20, sfunc_targ_testaddr },
+ { "TARGETS", 38, sfunc_targ_targets },
+ { NULL, 0, NULL }
+};
+
+static ipmpstat_enum_t addr_state[] = {
+ { "up", IPMP_ADDR_UP },
+ { "down", IPMP_ADDR_DOWN },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t group_state[] = {
+ { "ok", IPMP_GROUP_OK },
+ { "failed", IPMP_GROUP_FAILED },
+ { "degraded", IPMP_GROUP_DEGRADED },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t if_link[] = {
+ { "up", IPMP_LINK_UP },
+ { "down", IPMP_LINK_DOWN },
+ { "unknown", IPMP_LINK_UNKNOWN },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t if_probe[] = {
+ { "ok", IPMP_PROBE_OK },
+ { "failed", IPMP_PROBE_FAILED },
+ { "unknown", IPMP_PROBE_UNKNOWN },
+ { "disabled", IPMP_PROBE_DISABLED },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t if_state[] = {
+ { "ok", IPMP_IF_OK },
+ { "failed", IPMP_IF_FAILED },
+ { "unknown", IPMP_IF_UNKNOWN },
+ { "offline", IPMP_IF_OFFLINE },
+ { NULL, 0 }
+};
+
+static ipmpstat_enum_t targ_mode[] = {
+ { "disabled", IPMP_TARG_DISABLED },
+ { "routes", IPMP_TARG_ROUTES },
+ { "multicast", IPMP_TARG_MULTICAST },
+ { NULL, 0 }
+};
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.xcl b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.xcl
new file mode 100644
index 0000000000..e2398aaf64
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipmpstat/ipmpstat.xcl
@@ -0,0 +1,106 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+msgid " "
+msgid "%-*s"
+msgid "%.2fms"
+msgid "%.2fs"
+msgid "%d-%s"
+msgid "%s"
+msgid "%s "
+msgid "%s: "
+msgid "%u"
+msgid "("
+msgid ")"
+msgid ","
+msgid "--"
+msgid ": %s\n"
+msgid "?"
+msgid "["
+msgid "]"
+msgid "<%d>"
+msgid "\n"
+msgid "ACTIVE"
+msgid "ADDRESS"
+msgid "EC_ipmp"
+msgid "ESC_ipmp_probe_state"
+msgid "FDT"
+msgid "FLAGS"
+msgid "GROUP"
+msgid "GROUPNAME"
+msgid "INBOUND"
+msgid "INTERFACE"
+msgid "INTERFACES"
+msgid "IPMP_IF_NAME"
+msgid "IPMP_PROBE_ACKPROC_TIME"
+msgid "IPMP_PROBE_ACKRECV_TIME"
+msgid "IPMP_PROBE_ID"
+msgid "IPMP_PROBE_SENT_TIME"
+msgid "IPMP_PROBE_START_TIME"
+msgid "IPMP_PROBE_STATE"
+msgid "IPMP_PROBE_TARGET"
+msgid "IPMP_PROBE_TARGET_RTTAVG"
+msgid "IPMP_PROBE_TARGET_RTTDEV"
+msgid "LINK"
+msgid "MODE"
+msgid "NETRTT"
+msgid "OUTBOUND"
+msgid "PROBE"
+msgid "RTT"
+msgid "RTTAVG"
+msgid "RTTDEV"
+msgid "STATE"
+msgid "TARGET"
+msgid "TARGETS"
+msgid "TESTADDR"
+msgid "TIME"
+msgid "agipt"
+msgid "all"
+msgid "bufsize > IPMPSTAT_NUM_FLAGS"
+msgid "com.sun:ipmp:events"
+msgid "degraded"
+msgid "disabled"
+msgid "down"
+msgid "failed"
+msgid "ipmp_event_version"
+msgid "ipmp_if_name"
+msgid "ipmp_probe_ackproc_time"
+msgid "ipmp_probe_ackrecv_time"
+msgid "ipmp_probe_id"
+msgid "ipmp_probe_sent_time"
+msgid "ipmp_probe_start_time"
+msgid "ipmp_probe_state"
+msgid "ipmp_probe_target"
+msgid "ipmp_probe_target_rttavg"
+msgid "ipmp_probe_target_rttdev"
+msgid "ipmpstat.c"
+msgid "multicast"
+msgid "nLPo:agipt"
+msgid "no"
+msgid "offline"
+msgid "ok"
+msgid "routes"
+msgid "sig == SIGWINCH"
+msgid "unknown"
+msgid "up"
+msgid "yes"
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types b/usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types
index bb15199492..e42bc626d8 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipqosconf/ipgpc.types
@@ -1,13 +1,12 @@
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -23,15 +22,12 @@
# CDDL HEADER END
#
-#pragma ident "%Z%%M% %I% %E% SMI"
-
fmt_version 1.0
mod_version 1.0
#PERM_CLASS default
filter name string
-filter if_groupname string
filter user user
filter projid int32
filter if_name ifname
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c b/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c
index 17891ffc78..2a4ff60d57 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ping/ping.c
@@ -18,7 +18,7 @@
*
* CDDL HEADER END
*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -37,8 +37,6 @@
* contributors.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <strings.h>
#include <errno.h>
@@ -243,7 +241,7 @@ main(int argc, char *argv[])
ushort_t udp_src_port6; /* used to identify replies */
uint_t flowinfo = 0;
uint_t class = 0;
- char tmp_buf[INET6_ADDRSTRLEN];
+ char abuf[INET6_ADDRSTRLEN];
int c;
int i;
boolean_t has_sys_ip_config;
@@ -671,24 +669,18 @@ main(int argc, char *argv[])
Printf("PING %s: %d data bytes\n", targethost, datalen);
} else {
if (ai_dst->ai_family == AF_INET) {
- Printf("PING %s (%s): %d data bytes\n",
- targethost,
- inet_ntop(AF_INET,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- &((struct sockaddr_in *)
- ai_dst->ai_addr)->sin_addr,
- tmp_buf, sizeof (tmp_buf)),
- datalen);
+ (void) inet_ntop(AF_INET,
+ &((struct sockaddr_in *)(void *)
+ ai_dst->ai_addr)->sin_addr,
+ abuf, sizeof (abuf));
} else {
- Printf("PING %s (%s): %d data bytes\n",
- targethost,
- inet_ntop(AF_INET6,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- &((struct sockaddr_in6 *)
- ai_dst->ai_addr)->sin6_addr,
- tmp_buf, sizeof (tmp_buf)),
- datalen);
+ (void) inet_ntop(AF_INET6,
+ &((struct sockaddr_in6 *)(void *)
+ ai_dst->ai_addr)->sin6_addr,
+ abuf, sizeof (abuf));
}
+ Printf("PING %s (%s): %d data bytes\n",
+ targethost, abuf, datalen);
}
}
@@ -1074,12 +1066,12 @@ select_all_src_addrs(union any_in_addr **src_addr_list, struct addrinfo *ai,
int num_dst = 1;
int i;
- if (probe_all)
- for (aip = ai; aip->ai_next != NULL;
- aip = aip->ai_next, num_dst++);
+ if (probe_all) {
+ for (aip = ai; aip->ai_next != NULL; aip = aip->ai_next)
+ num_dst++;
+ }
- list = (union any_in_addr *)
- calloc((size_t)num_dst, sizeof (union any_in_addr));
+ list = calloc((size_t)num_dst, sizeof (union any_in_addr));
if (list == NULL) {
Fprintf(stderr, "%s: calloc: %s\n", progname, strerror(errno));
exit(EXIT_FAILURE);
@@ -1472,7 +1464,7 @@ setup_socket(int family, int *send_sockp, int *recv_sockp, int *if_index,
int i;
/* pull out the interface list */
- num_ifs = ifaddrlist(&al, family, errbuf);
+ num_ifs = ifaddrlist(&al, family, LIFC_UNDER_IPMP, errbuf);
if (num_ifs == -1) {
Fprintf(stderr, "%s: %s\n", progname, errbuf);
exit(EXIT_FAILURE);
@@ -1699,8 +1691,8 @@ send_scheduled_probe()
} else {
Printf("no answer from %s(%s)\n", targethost,
inet_ntop(current_targetaddr->family,
- &current_targetaddr->dst_addr,
- tmp_buf, sizeof (tmp_buf)));
+ &current_targetaddr->dst_addr,
+ tmp_buf, sizeof (tmp_buf)));
}
}
/*
@@ -1736,9 +1728,8 @@ send_scheduled_probe()
* Each time we move to a new targetaddr, which has
* a different target IP address, we update this field.
*/
- current_targetaddr->starting_seq_num =
- use_udp ? dest_port :
- (ntransmitted % (MAX_ICMP_SEQ + 1));
+ current_targetaddr->starting_seq_num = use_udp ?
+ dest_port : (ntransmitted % (MAX_ICMP_SEQ + 1));
}
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
index f062247997..e5b23fa126 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -139,7 +139,7 @@ check_device(dlpi_handle_t *dhp, char **devicep)
if (ioctl(s, SIOCGIFFLAGS, (char *)ifr) < 0)
pr_err("ioctl SIOCGIFFLAGS");
if ((ifr->ifr_flags &
- (IFF_VIRTUAL|IFF_LOOPBACK|IFF_UP|
+ (IFF_VIRTUAL|IFF_IPMP|IFF_UP|
IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
break;
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c b/usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c
index adc6a932b0..cae75df60d 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/traceroute/traceroute.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -27,8 +27,6 @@
* @(#)$Header: traceroute.c,v 1.49 97/06/13 02:30:23 leres Exp $ (LBL)
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/param.h>
#include <sys/file.h>
#include <sys/ioctl.h>
@@ -707,7 +705,7 @@ get_hostinfo(char *host, int family, struct addrinfo **aipp)
struct addrinfo hints, *ai;
struct in6_addr addr6;
struct in_addr addr;
- char temp_buf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
+ char abuf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
int rc;
/*
@@ -720,11 +718,10 @@ get_hostinfo(char *host, int family, struct addrinfo **aipp)
IN6_V4MAPPED_TO_INADDR(&addr6, &addr);
/* convert it back to a string */
- (void) inet_ntop(AF_INET, (void *)&addr, temp_buf,
- sizeof (temp_buf));
+ (void) inet_ntop(AF_INET, &addr, abuf, sizeof (abuf));
/* now the host is an IPv4 address */
- (void) strcpy(host, temp_buf);
+ (void) strcpy(host, abuf);
/*
* If it's a mapped address, we convert it into IPv4
@@ -826,15 +823,19 @@ set_src_addr(struct pr_set *pr, struct ifaddrlist **alp)
struct sockaddr_in6 *sin6_from = (struct sockaddr_in6 *)pr->from;
struct addrinfo *aip;
char errbuf[ERRBUFSIZE];
- char temp_buf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
+ char abuf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
int num_ifs; /* all the interfaces */
int num_src_ifs; /* exclude loopback and down */
int i;
+ uint_t ifaddrflags = 0;
source = source_input;
+ if (device != NULL)
+ ifaddrflags |= LIFC_UNDER_IPMP;
+
/* get the interface address list */
- num_ifs = ifaddrlist(&al, pr->family, errbuf);
+ num_ifs = ifaddrlist(&al, pr->family, ifaddrflags, errbuf);
if (num_ifs < 0) {
Fprintf(stderr, "%s: ifaddrlist: %s\n", prog, errbuf);
exit(EXIT_FAILURE);
@@ -881,26 +882,20 @@ set_src_addr(struct pr_set *pr, struct ifaddrlist **alp)
if (pr->family == AF_INET)
ap = (union any_in_addr *)
/* LINTED E_BAD_PTR_CAST_ALIGN */
- &((struct sockaddr_in *)
- aip->ai_addr)->sin_addr;
+ &((struct sockaddr_in *)aip->ai_addr)->sin_addr;
else
ap = (union any_in_addr *)
/* LINTED E_BAD_PTR_CAST_ALIGN */
- &((struct sockaddr_in6 *)
- aip->ai_addr)->sin6_addr;
+ &((struct sockaddr_in6 *)aip->ai_addr)->sin6_addr;
/*
* LBNL bug fixed: used to accept any src address
*/
tmp2_al = find_ifaddr(al, num_ifs, ap, pr->family);
-
if (tmp2_al == NULL) {
- Fprintf(stderr,
- "%s: %s is not a local %s address\n",
- prog, inet_ntop(pr->family, ap,
- temp_buf, sizeof (temp_buf)),
- pr->name);
-
+ (void) inet_ntop(pr->family, ap, abuf, sizeof (abuf));
+ Fprintf(stderr, "%s: %s is not a local %s address\n",
+ prog, abuf, pr->name);
free(al);
freeaddrinfo(aip);
return (0);
@@ -928,13 +923,11 @@ set_src_addr(struct pr_set *pr, struct ifaddrlist **alp)
set_sin(pr->from, ap, pr->family);
if (aip->ai_next != NULL) {
- Fprintf(stderr,
- "%s: Warning: %s has multiple "
- "addresses; using %s\n",
- prog, source,
- inet_ntop(pr->family,
- (const void *)pr->from_sin_addr,
- temp_buf, sizeof (temp_buf)));
+ (void) inet_ntop(pr->family, pr->from_sin_addr,
+ abuf, sizeof (abuf));
+ Fprintf(stderr, "%s: Warning: %s has multiple "
+ "addresses; using %s\n", prog, source,
+ abuf);
}
} else { /* -i and -s used */
/*
@@ -1484,7 +1477,7 @@ traceroute(union any_in_addr *ip_addr, struct msghdr *msg6, struct pr_set *pr,
uchar_t code; /* icmp code */
int reply;
int seq = 0;
- char temp_buf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
+ char abuf[INET6_ADDRSTRLEN]; /* use for inet_ntop() */
int longjmp_return; /* return value from longjump */
struct ip *ip = (struct ip *)packet;
boolean_t got_there = _B_FALSE; /* we hit the destination */
@@ -1535,13 +1528,11 @@ traceroute(union any_in_addr *ip_addr, struct msghdr *msg6, struct pr_set *pr,
if (dev_name == NULL)
dev_name = "?";
+ (void) inet_ntop(pr->family, pr->from_sin_addr, abuf,
+ sizeof (abuf));
Fprintf(stderr,
"%s: Warning: Multiple interfaces found;"
- " using %s @ %s\n",
- prog, inet_ntop(pr->family,
- (const void *)pr->from_sin_addr,
- temp_buf, sizeof (temp_buf)),
- dev_name);
+ " using %s @ %s\n", prog, abuf, dev_name);
}
}
@@ -1558,8 +1549,7 @@ traceroute(union any_in_addr *ip_addr, struct msghdr *msg6, struct pr_set *pr,
Fprintf(stderr, "%s to %s", prog, hostname);
} else {
Fprintf(stderr, "%s to %s (%s)", prog, hostname,
- inet_ntop(pr->family, (const void *)ip_addr, temp_buf,
- sizeof (temp_buf)));
+ inet_ntop(pr->family, ip_addr, abuf, sizeof (abuf)));
}
if (source)
@@ -1700,9 +1690,8 @@ traceroute(union any_in_addr *ip_addr, struct msghdr *msg6, struct pr_set *pr,
}
if (pr->family == AF_INET6) {
- intp =
- (int *)find_ancillary_data(&in_msg,
- IPPROTO_IPV6, IPV6_HOPLIMIT);
+ intp = find_ancillary_data(&in_msg,
+ IPPROTO_IPV6, IPV6_HOPLIMIT);
if (intp == NULL) {
Fprintf(stderr,
"%s: can't find "
@@ -2188,10 +2177,11 @@ static void
usage(void)
{
Fprintf(stderr, "Usage: %s [-adFIlnSvx] [-A address_family] "
-"[-c traffic_class] \n"
-"\t[-f first_hop] [-g gateway [-g gateway ...]| -r] [-i iface]\n"
-"\t[-L flow_label] [-m max_hop] [-P pause_sec] [-p port] [-Q max_timeout]\n"
-"\t[-q nqueries] [-s src_addr] [-t tos] [-w wait_time] host [packetlen]\n",
- prog);
+ "[-c traffic_class]\n"
+ "\t[-f first_hop] [-g gateway [-g gateway ...]| -r] [-i iface]\n"
+ "\t[-L flow_label] [-m max_hop] [-P pause_sec] [-p port] "
+ "[-Q max_timeout]\n"
+ "\t[-q nqueries] [-s src_addr] [-t tos] [-w wait_time] host "
+ "[packetlen]\n", prog);
exit(EXIT_FAILURE);
}
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index c72be6be37..44756c3e98 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -104,7 +104,7 @@ static devfsadm_create_t misc_cbt[] = {
"(^ip$)|(^tcp$)|(^udp$)|(^icmp$)|(^sctp$)|"
"(^ip6$)|(^tcp6$)|(^udp6$)|(^icmp6$)|(^sctp6$)|"
"(^rts$)|(^arp$)|(^ipsecah$)|(^ipsecesp$)|(^keysock$)|(^spdsock$)|"
- "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)",
+ "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)",
TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name
},
{ "pseudo", "ddi_pseudo",
diff --git a/usr/src/cmd/mdb/common/modules/ip/ip.c b/usr/src/cmd/mdb/common/modules/ip/ip.c
index f2dadd5261..f064b58d83 100644
--- a/usr/src/cmd/mdb/common/modules/ip/ip.c
+++ b/usr/src/cmd/mdb/common/modules/ip/ip.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/stropts.h>
#include <sys/stream.h>
@@ -524,8 +522,7 @@ ire_format(uintptr_t addr, const void *ire_arg, void *ire_cb_arg)
static const mdb_bitmask_t mmasks[] = {
{ "CONDEMNED", IRE_MARK_CONDEMNED, IRE_MARK_CONDEMNED },
- { "NORECV", IRE_MARK_NORECV, IRE_MARK_NORECV },
- { "HIDDEN", IRE_MARK_HIDDEN, IRE_MARK_HIDDEN },
+ { "TESTHIDDEN", IRE_MARK_TESTHIDDEN, IRE_MARK_TESTHIDDEN },
{ "NOADD", IRE_MARK_NOADD, IRE_MARK_NOADD },
{ "TEMPORARY", IRE_MARK_TEMPORARY, IRE_MARK_TEMPORARY },
{ "USESRC", IRE_MARK_USESRC_CHECK, IRE_MARK_USESRC_CHECK },
diff --git a/usr/src/cmd/rcm_daemon/Makefile.com b/usr/src/cmd/rcm_daemon/Makefile.com
index 365371c45c..dbe3c1f1d1 100644
--- a/usr/src/cmd/rcm_daemon/Makefile.com
+++ b/usr/src/cmd/rcm_daemon/Makefile.com
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -124,7 +124,7 @@ SUNW_network_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vlan_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vnic_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_aggr_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
-SUNW_ip_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil -ldladm
+SUNW_ip_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil -ldladm -lipmp
SUNW_ip_anon_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil
LDLIBS += -lgen -lelf -lrcm -lnvpair -ldevinfo -lnsl -lsocket
diff --git a/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c b/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c
index be9a31f952..6e1fe1bf39 100644
--- a/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c
+++ b/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* RCM module to prevent plumbed IP addresses from being removed.
*/
@@ -177,7 +175,7 @@ ip_anon_register(rcm_handle_t *hdl)
if (_cladm(CL_INITIALIZE, CL_GET_BOOTFLAG, &bootflags) != 0) {
rcm_log_message(RCM_ERROR,
- gettext("unable to check cluster status\n"));
+ gettext("unable to check cluster status\n"));
(void) mutex_unlock(&ip_list_lock);
return (RCM_FAILURE);
}
@@ -199,7 +197,7 @@ ip_anon_register(rcm_handle_t *hdl)
else {
if ((exclude_addrs.cladm_netaddrs_array =
malloc(sizeof (cladm_netaddr_entry_t) *
- (num_exclude_addrs))) == NULL) {
+ (num_exclude_addrs))) == NULL) {
rcm_log_message(RCM_ERROR,
gettext("out of memory\n"));
(void) mutex_unlock(&ip_list_lock);
@@ -274,7 +272,7 @@ ip_anon_register(rcm_handle_t *hdl)
rcm_log_message(RCM_DEBUG,
"ip_anon: obtaining list of IPv4 addresses.\n");
- num_ifs = ifaddrlist(&al, AF_INET, errbuf);
+ num_ifs = ifaddrlist(&al, AF_INET, LIFC_UNDER_IPMP, errbuf);
if (num_ifs == -1) {
rcm_log_message(RCM_ERROR,
gettext("cannot get IPv4 address list errno=%d (%s)\n"),
@@ -286,7 +284,7 @@ ip_anon_register(rcm_handle_t *hdl)
rcm_log_message(RCM_DEBUG,
"ip_anon: obtaining list of IPv6 addresses.\n");
- num_ifs6 = ifaddrlist(&al6, AF_INET6, errbuf);
+ num_ifs6 = ifaddrlist(&al6, AF_INET6, LIFC_UNDER_IPMP, errbuf);
if (num_ifs6 == -1) {
rcm_log_message(RCM_ERROR,
gettext("cannot get IPv6 address list errno=%d (%s)\n"),
@@ -392,7 +390,7 @@ ip_anon_register(rcm_handle_t *hdl)
* currently know about it.
*/
if (!(tentry->flags & IP_FLAG_CL) &&
- !(tentry->flags & IP_FLAG_REG)) {
+ !(tentry->flags & IP_FLAG_REG)) {
tentry->flags |= IP_FLAG_REG;
rcm_log_message(RCM_DEBUG,
"ip_anon: registering interest in %s\n",
diff --git a/usr/src/cmd/rcm_daemon/common/ip_rcm.c b/usr/src/cmd/rcm_daemon/common/ip_rcm.c
index f62b3dfc19..24be0cafeb 100644
--- a/usr/src/cmd/rcm_daemon/common/ip_rcm.c
+++ b/usr/src/cmd/rcm_daemon/common/ip_rcm.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,23 +38,22 @@
#include <errno.h>
#include <fcntl.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <net/if.h>
#include <netinet/in.h>
-#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <stropts.h>
#include <strings.h>
-#include <libdevinfo.h>
-#include <sys/systeminfo.h>
-#include <netdb.h>
+#include <sys/sysmacros.h>
#include <inet/ip.h>
#include <libinetutil.h>
#include <libdllink.h>
+#include <libgen.h>
+#include <ipmp_admin.h>
-#include <ipmp_mpathd.h>
#include "rcm_module.h"
/*
@@ -75,42 +74,19 @@
#define RCM_LINK_RESOURCE_MAX (13 + LINKID_STR_WIDTH)
#define RCM_STR_SUNW_IP "SUNW_ip/" /* IP address export prefix */
-#define RCM_SIZE_SUNW_IP 9 /* strlen("SUNW_ip/") + 1 */
-/* ifconfig(1M) */
-#define USR_SBIN_IFCONFIG "/usr/sbin/ifconfig" /* ifconfig command */
-#define CFGFILE_FMT_IPV4 "/etc/hostname." /* IPV4 config file */
-#define CFGFILE_FMT_IPV6 "/etc/hostname6." /* IPV6 config file */
+#define SBIN_IFCONFIG "/sbin/ifconfig" /* ifconfig command */
+#define SBIN_IFPARSE "/sbin/ifparse" /* ifparse command */
+#define DHCPFILE_FMT "/etc/dhcp.%s" /* DHCP config file */
+#define CFGFILE_FMT_IPV4 "/etc/hostname.%s" /* IPV4 config file */
+#define CFGFILE_FMT_IPV6 "/etc/hostname6.%s" /* IPV6 config file */
#define CFG_CMDS_STD " netmask + broadcast + up" /* Normal config string */
-#define CONFIG_AF_INET 0x1 /* Post-configure IPv4 */
-#define CONFIG_AF_INET6 0x2 /* Post-configure IPv6 */
-#define MAXLINE 1024 /* Max. line length */
-#define MAXARGS 512 /* Max. args in ifconfig cmd */
-
-/* Physical interface flags mask */
-#define RCM_PIF_FLAGS (IFF_OFFLINE | IFF_INACTIVE | IFF_FAILED | \
- IFF_STANDBY)
+#define CFG_DHCP_CMD "dhcp wait 0" /* command to start DHCP */
/* Some useful macros */
-#ifndef MAX
-#define MAX(a, b) (((a) > (b))?(a):(b))
-#endif /* MAX */
-
-#ifndef ISSPACE
#define ISSPACE(c) ((c) == ' ' || (c) == '\t')
-#endif
-
-#ifndef ISEOL
#define ISEOL(c) ((c) == '\n' || (c) == '\r' || (c) == '\0')
-#endif
-
-#ifndef STREQ
#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
-#endif
-
-#ifndef ADDSPACE
-#define ADDSPACE(a) ((void) strcat((a), " "))
-#endif
/* Interface Cache state flags */
#define CACHE_IF_STALE 0x1 /* stale cached data */
@@ -125,48 +101,20 @@
/* RCM IPMP Module specific property definitions */
#define RCM_IPMP_MIN_REDUNDANCY 1 /* default min. redundancy */
-/* in.mpathd(1M) specifics */
-#define MPATHD_MAX_RETRIES 5 /* Max. offline retries */
-
/* Stream module operations */
#define MOD_INSERT 0 /* Insert a mid-stream module */
#define MOD_REMOVE 1 /* Remove a mid-stream module */
#define MOD_CHECK 2 /* Check mid-stream module safety */
/*
- * in.mpathd(1M) message passing formats
- */
-typedef struct mpathd_cmd {
- uint32_t cmd_command; /* message command */
- char cmd_ifname[LIFNAMSIZ]; /* this interface name */
- char cmd_movetoif[LIFNAMSIZ]; /* move to interface */
- uint32_t cmd_min_red; /* min. redundancy */
-/* Message passing values for MI_SETOINDEX */
-#define from_lifname cmd_ifname /* current logical interface */
-#define to_pifname cmd_movetoif /* new physical interface */
-#define addr_family cmd_min_red /* address family */
-} mpathd_cmd_t;
-
-/* This is needed since mpathd checks message size for offline */
-typedef struct mpathd_unoffline {
- uint32_t cmd_command; /* offline / undo offline */
- char cmd_ifname[LIFNAMSIZ]; /* this interface name */
-} mpathd_unoffline_t;
-
-typedef struct mpathd_response {
- uint32_t resp_sys_errno; /* system errno */
- uint32_t resp_mpathd_err; /* mpathd error information */
-} mpathd_response_t;
-
-/*
* IP module data types
*/
/* Physical interface representation */
typedef struct ip_pif {
- char pi_ifname[LIFNAMSIZ+1]; /* interface name */
- char pi_grpname[LIFNAMSIZ+1]; /* IPMP group name */
- struct ip_lif *pi_lifs; /* ptr to logical interfaces */
+ char pi_ifname[LIFNAMSIZ]; /* interface name */
+ char pi_grname[LIFGRNAMSIZ]; /* IPMP group name */
+ struct ip_lif *pi_lifs; /* ptr to logical interfaces */
} ip_pif_t;
/* Logical interface representation */
@@ -239,7 +187,7 @@ static void free_node(ip_cache_t *);
static void cache_insert(ip_cache_t *);
static char *ip_usage(ip_cache_t *);
static int update_pif(rcm_handle_t *, int, int, struct lifreq *);
-static int ip_ipmp_offline(ip_cache_t *, ip_cache_t *);
+static int ip_ipmp_offline(ip_cache_t *);
static int ip_ipmp_undo_offline(ip_cache_t *);
static int if_cfginfo(ip_cache_t *, uint_t);
static int if_unplumb(ip_cache_t *);
@@ -247,9 +195,6 @@ static int if_replumb(ip_cache_t *);
static void ip_log_err(ip_cache_t *, char **, char *);
static char *get_link_resource(const char *);
static void clr_cfg_state(ip_pif_t *);
-static uint64_t if_get_flags(ip_pif_t *);
-static int mpathd_send_cmd(mpathd_cmd_t *);
-static int connect_to_mpathd(int);
static int modop(char *, char *, int, char);
static int get_modlist(char *, ip_lif_t *);
static int ip_domux2fd(int *, int *, int *, struct lifreq *);
@@ -262,15 +207,13 @@ static char **ip_get_addrlist(ip_cache_t *);
static void ip_free_addrlist(char **);
static void ip_consumer_notify(rcm_handle_t *, datalink_id_t, char **,
uint_t, rcm_info_t **);
+static boolean_t ip_addrstr(ip_lif_t *, char *, size_t);
static int if_configure(datalink_id_t);
-static int isgrouped(char *);
-static int if_ipmp_config(char *, int, int);
-static int if_mpathd_configure(char *, char *, int, int);
-static char *get_mpathd_dest(char *, int);
-static int if_getcount(int);
-static void tokenize(char *, char **, char *, int *);
-
+static boolean_t isgrouped(const char *);
+static int if_config_inst(const char *, FILE *, int, boolean_t);
+static uint_t ntok(const char *cp);
+static boolean_t ifconfig(const char *, const char *, const char *, boolean_t);
/* Module-Private data */
static struct rcm_mod_ops ip_ops =
@@ -429,9 +372,9 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
{
ip_cache_t *node;
ip_pif_t *pif;
- int detachable = 0;
- int nofailover = 0;
- int ipmp = 0;
+ boolean_t detachable = B_FALSE;
+ boolean_t ipmp;
+ int retval;
rcm_log_message(RCM_TRACE1, "IP: offline(%s)\n", rsrc);
@@ -455,25 +398,17 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
pif = node->ip_pif;
/* Establish default detachability criteria */
- if (flags & RCM_FORCE) {
- detachable++;
- }
+ if (flags & RCM_FORCE)
+ detachable = B_TRUE;
- /* Check if the interface is an IPMP grouped interface */
- if (strcmp(pif->pi_grpname, "")) {
- ipmp++;
- }
-
- if (if_get_flags(pif) & IFF_NOFAILOVER) {
- nofailover++;
- }
+ /* Check if the interface is under IPMP */
+ ipmp = (pif->pi_grname[0] != '\0');
/*
- * Even if the interface is not in an IPMP group, it's possible that
- * it's still okay to offline it as long as there are higher-level
- * failover mechanisms for the addresses it owns (e.g., clustering).
- * In this case, ip_offlinelist() will return RCM_SUCCESS, and we
- * charge on.
+ * Even if the interface is not under IPMP, it's possible that it's
+ * still okay to offline it as long as there are higher-level failover
+ * mechanisms for the addresses it owns (e.g., clustering). In this
+ * case, ip_offlinelist() will return RCM_SUCCESS, and we charge on.
*/
if (!ipmp && !detachable) {
/* Inform consumers of IP addresses being offlined */
@@ -489,17 +424,6 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
}
}
- /*
- * Cannot remove an IPMP interface if IFF_NOFAILOVER is set.
- */
- if (ipmp && nofailover) {
- /* Interface is part of an IPMP group, and cannot failover */
- ip_log_err(node, errorp, "Failover disabled");
- errno = EBUSY;
- (void) mutex_unlock(&cache_lock);
- return (RCM_FAILURE);
- }
-
/* Check if it's a query */
if (flags & RCM_QUERY) {
rcm_log_message(RCM_TRACE1, "IP: offline query success(%s)\n",
@@ -534,38 +458,32 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
}
/*
- * This an IPMP interface that can be failed over.
- * Request in.mpathd(1M) to failover the physical interface.
+ * This is an IPMP interface that can be offlined.
+ * Request in.mpathd(1M) to offline the physical interface.
*/
+ if ((retval = ip_ipmp_offline(node)) != IPMP_SUCCESS)
+ ip_log_err(node, errorp, "in.mpathd offline failed");
- /* Failover to "any", let mpathd determine best failover candidate */
- if (ip_ipmp_offline(node, NULL) < 0) {
- ip_log_err(node, errorp, "in.mpathd failover failed");
+ if (retval == IPMP_EMINRED && !detachable) {
/*
- * Odds are that in.mpathd(1M) could not offline the device
- * because it was the last interface in the group. However,
- * it's possible that it's still okay to offline it as long as
- * there are higher-level failover mechanisms for the
- * addresses it owns (e.g., clustering). In this case,
- * ip_offlinelist() will return RCM_SUCCESS, and we charge on.
- *
- * TODO: change ip_ipmp_offline() to return the actual failure
- * from in.mpathd so that we can verify that it did indeed
- * fail with IPMP_EMINRED.
+ * in.mpathd(1M) could not offline the device because it was
+ * the last interface in the group. However, it's possible
+ * that it's still okay to offline it as long as there are
+ * higher-level failover mechanisms for the addresses it owns
+ * (e.g., clustering). In this case, ip_offlinelist() will
+ * return RCM_SUCCESS, and we charge on.
*/
- if (!detachable) {
- /* Inform consumers of IP addresses being offlined */
- if (ip_offlinelist(hd, node, errorp, flags,
- depend_info) == RCM_SUCCESS) {
- rcm_log_message(RCM_DEBUG,
- "IP: consumers agree on detach");
- } else {
- ip_log_err(node, errorp,
- "Device consumers prohibit offline");
- (void) mutex_unlock(&cache_lock);
- errno = EBUSY;
- return (RCM_FAILURE);
- }
+ /* Inform consumers of IP addresses being offlined */
+ if (ip_offlinelist(hd, node, errorp, flags,
+ depend_info) == RCM_SUCCESS) {
+ rcm_log_message(RCM_DEBUG,
+ "IP: consumers agree on detach");
+ } else {
+ ip_log_err(node, errorp,
+ "Device consumers prohibit offline");
+ (void) mutex_unlock(&cache_lock);
+ errno = EBUSY;
+ return (RCM_FAILURE);
}
}
@@ -574,8 +492,8 @@ ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
_("IP: Unplumb failed (%s)\n"),
pif->pi_ifname);
- /* Request mpathd to undo the offline */
- if (ip_ipmp_undo_offline(node) < 0) {
+ /* Request in.mpathd to undo the offline */
+ if (ip_ipmp_undo_offline(node) != IPMP_SUCCESS) {
ip_log_err(node, errorp, "Undo offline failed");
(void) mutex_unlock(&cache_lock);
return (RCM_FAILURE);
@@ -862,18 +780,16 @@ static char *
ip_usage(ip_cache_t *node)
{
ip_lif_t *lif;
- int numifs;
- char *buf;
- char *linkidstr;
+ uint_t numup;
+ char *sep, *buf, *linkidstr;
datalink_id_t linkid;
- const char *fmt;
- char *sep;
+ const char *msg;
char link[MAXLINKNAMELEN];
char addrstr[INET6_ADDRSTRLEN];
char errmsg[DLADM_STRSIZE];
dladm_status_t status;
- int offline = 0;
- size_t bufsz;
+ boolean_t offline, ipmp;
+ size_t bufsz = 0;
rcm_log_message(RCM_TRACE2, "IP: usage(%s)\n", node->ip_resource);
@@ -904,76 +820,53 @@ ip_usage(ip_cache_t *node)
/* TRANSLATION_NOTE: separator used between IP addresses */
sep = _(", ");
- numifs = 0;
- for (lif = node->ip_pif->pi_lifs; lif != NULL; lif = lif->li_next) {
- if (lif->li_ifflags & IFF_UP) {
- numifs++;
- }
- }
+ numup = 0;
+ for (lif = node->ip_pif->pi_lifs; lif != NULL; lif = lif->li_next)
+ if (lif->li_ifflags & IFF_UP)
+ numup++;
- if (node->ip_cachestate & CACHE_IF_OFFLINED) {
- offline++;
- }
+ ipmp = (node->ip_pif->pi_grname[0] != '\0');
+ offline = ((node->ip_cachestate & CACHE_IF_OFFLINED) != 0);
- if (!offline && numifs) {
- fmt = _("%1$s hosts IP addresses: ");
- } else if (offline) {
- fmt = _("%1$s offlined");
+ if (offline) {
+ msg = _("offlined");
+ } else if (numup == 0) {
+ msg = _("plumbed but down");
} else {
- fmt = _("%1$s plumbed but down");
+ if (ipmp) {
+ msg = _("providing connectivity for IPMP group ");
+ bufsz += LIFGRNAMSIZ;
+ } else {
+ msg = _("hosts IP addresses: ");
+ bufsz += (numup * (INET6_ADDRSTRLEN + strlen(sep)));
+ }
}
- /* space for addresses and separators, plus message */
- bufsz = ((numifs * (INET6_ADDRSTRLEN + strlen(sep))) +
- strlen(fmt) + strlen(link) + 1);
+ bufsz += strlen(link) + strlen(msg) + 1;
if ((buf = malloc(bufsz)) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: usage(%s) malloc failure(%s)\n"),
node->ip_resource, strerror(errno));
return (NULL);
}
- bzero(buf, bufsz);
- (void) sprintf(buf, fmt, link);
-
- if (offline || (numifs == 0)) { /* Nothing else to do */
- rcm_log_message(RCM_TRACE2, "IP: usage (%s) info = %s\n",
- node->ip_resource, buf);
-
- return (buf);
- }
-
- for (lif = node->ip_pif->pi_lifs; lif != NULL; lif = lif->li_next) {
+ (void) snprintf(buf, bufsz, "%s: %s", link, msg);
- void *addr;
- int af;
-
- if (!(lif->li_ifflags & IFF_UP)) {
- /* ignore interfaces not up */
- continue;
- }
- af = lif->li_addr.family;
- if (af == AF_INET6) {
- addr = &lif->li_addr.ip6.sin6_addr;
- } else if (af == AF_INET) {
- addr = &lif->li_addr.ip4.sin_addr;
+ if (!offline && numup > 0) {
+ if (ipmp) {
+ (void) strlcat(buf, node->ip_pif->pi_grname, bufsz);
} else {
- rcm_log_message(RCM_DEBUG,
- "IP: unknown addr family %d, assuming AF_INET\n",
- af);
- af = AF_INET;
- addr = &lif->li_addr.ip4.sin_addr;
- }
- if (inet_ntop(af, addr, addrstr, INET6_ADDRSTRLEN) == NULL) {
- rcm_log_message(RCM_ERROR,
- _("IP: inet_ntop: %s\n"), strerror(errno));
- continue;
- }
- rcm_log_message(RCM_DEBUG, "IP addr := %s\n", addrstr);
+ lif = node->ip_pif->pi_lifs;
+ for (; lif != NULL; lif = lif->li_next) {
+ if (!(lif->li_ifflags & IFF_UP))
+ continue;
+
+ if (!ip_addrstr(lif, addrstr, sizeof (addrstr)))
+ continue;
- (void) strcat(buf, addrstr);
- numifs--;
- if (numifs > 0) {
- (void) strcat(buf, ", ");
+ (void) strlcat(buf, addrstr, bufsz);
+ if (--numup > 0)
+ (void) strlcat(buf, sep, bufsz);
+ }
}
}
@@ -983,6 +876,32 @@ ip_usage(ip_cache_t *node)
return (buf);
}
+static boolean_t
+ip_addrstr(ip_lif_t *lif, char *addrstr, size_t addrsize)
+{
+ int af = lif->li_addr.family;
+ void *addr;
+
+ if (af == AF_INET6) {
+ addr = &lif->li_addr.ip6.sin6_addr;
+ } else if (af == AF_INET) {
+ addr = &lif->li_addr.ip4.sin_addr;
+ } else {
+ rcm_log_message(RCM_DEBUG,
+ "IP: unknown addr family %d, assuming AF_INET\n", af);
+ af = AF_INET;
+ addr = &lif->li_addr.ip4.sin_addr;
+ }
+ if (inet_ntop(af, addr, addrstr, addrsize) == NULL) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: inet_ntop: %s\n"), strerror(errno));
+ return (B_FALSE);
+ }
+
+ rcm_log_message(RCM_DEBUG, "IP addr := %s\n", addrstr);
+ return (B_TRUE);
+}
+
/*
* Cache management routines, all cache management functions should be
* be called with cache_lock held.
@@ -1121,11 +1040,13 @@ update_pif(rcm_handle_t *hd, int af, int sock, struct lifreq *lifr)
ifnumber = ifspec.ifsp_lun;
/* Get the interface flags */
- (void) strcpy(lifreq.lifr_name, lifr->lifr_name);
+ (void) strlcpy(lifreq.lifr_name, lifr->lifr_name, LIFNAMSIZ);
if (ioctl(sock, SIOCGLIFFLAGS, (char *)&lifreq) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFFLAGS(%s): %s\n"),
- pif.pi_ifname, strerror(errno));
+ if (errno != ENXIO) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: SIOCGLIFFLAGS(%s): %s\n"),
+ lifreq.lifr_name, strerror(errno));
+ }
return (-1);
}
(void) memcpy(&ifflags, &lifreq.lifr_flags, sizeof (ifflags));
@@ -1135,12 +1056,13 @@ update_pif(rcm_handle_t *hd, int af, int sock, struct lifreq *lifr)
* - IFF_VIRTUAL: e.g., loopback and vni
* - IFF_POINTOPOINT: e.g., sppp and ip.tun
* - !IFF_MULTICAST: e.g., ip.6to4tun
+ * - IFF_IPMP: IPMP meta-interfaces
*
* Note: The !IFF_MULTICAST check can be removed once iptun is
* implemented as a datalink.
*/
if (!(ifflags & IFF_MULTICAST) ||
- (ifflags & (IFF_POINTOPOINT | IFF_VIRTUAL))) {
+ (ifflags & (IFF_POINTOPOINT | IFF_VIRTUAL | IFF_IPMP))) {
rcm_log_message(RCM_TRACE3, "IP: if ignored (%s)\n",
pif.pi_ifname);
return (0);
@@ -1148,23 +1070,26 @@ update_pif(rcm_handle_t *hd, int af, int sock, struct lifreq *lifr)
/* Get the interface group name for this interface */
if (ioctl(sock, SIOCGLIFGROUPNAME, (char *)&lifreq) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFGROUPNAME(%s): %s\n"),
- lifreq.lifr_name, strerror(errno));
+ if (errno != ENXIO) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: SIOCGLIFGROUPNAME(%s): %s\n"),
+ lifreq.lifr_name, strerror(errno));
+ }
return (-1);
}
/* copy the group name */
- (void) memcpy(&pif.pi_grpname, &lifreq.lifr_groupname,
- sizeof (pif.pi_grpname));
- pif.pi_grpname[sizeof (pif.pi_grpname) - 1] = '\0';
+ (void) strlcpy(pif.pi_grname, lifreq.lifr_groupname,
+ sizeof (pif.pi_grname));
/* Get the interface address for this interface */
if (ioctl(sock, SIOCGLIFADDR, (char *)&lifreq) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFADDR(%s): %s\n"),
- lifreq.lifr_name, strerror(errno));
- return (-1);
+ if (errno != ENXIO) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: SIOCGLIFADDR(%s): %s\n"),
+ lifreq.lifr_name, strerror(errno));
+ return (-1);
+ }
}
(void) memcpy(&ifaddr, &lifreq.lifr_addr, sizeof (ifaddr));
@@ -1241,9 +1166,9 @@ update_pif(rcm_handle_t *hd, int af, int sock, struct lifreq *lifr)
sizeof (pif.pi_ifname));
}
- /* save pif properties */
- (void) memcpy(&probepif->pi_grpname, &pif.pi_grpname,
- sizeof (pif.pi_grpname));
+ /* save the group name */
+ (void) strlcpy(probepif->pi_grname, pif.pi_grname,
+ sizeof (pif.pi_grname));
/* add lif, if this is a lif and it is not in cache */
if (!lif_listed) {
@@ -1304,7 +1229,7 @@ update_ipifs(rcm_handle_t *hd, int af)
}
lifn.lifn_family = af;
- lifn.lifn_flags = 0;
+ lifn.lifn_flags = LIFC_UNDER_IPMP;
if (ioctl(sock, SIOCGLIFNUM, (char *)&lifn) < 0) {
rcm_log_message(RCM_ERROR,
_("IP: SIOCLGIFNUM failed: %s\n"),
@@ -1321,7 +1246,7 @@ update_ipifs(rcm_handle_t *hd, int af)
}
lifc.lifc_family = af;
- lifc.lifc_flags = 0;
+ lifc.lifc_flags = LIFC_UNDER_IPMP;
lifc.lifc_len = sizeof (struct lifreq) * lifn.lifn_count;
lifc.lifc_buf = buf;
@@ -1480,39 +1405,33 @@ static void
ip_log_err(ip_cache_t *node, char **errorp, char *errmsg)
{
char *ifname = NULL;
- int len;
+ int size;
const char *errfmt;
- char *error;
+ char *error = NULL;
if ((node != NULL) && (node->ip_pif != NULL) &&
(node->ip_pif->pi_ifname != NULL)) {
ifname = node->ip_pif->pi_ifname;
}
- if (errorp != NULL)
- *errorp = NULL;
-
if (ifname == NULL) {
rcm_log_message(RCM_ERROR, _("IP: %s\n"), errmsg);
errfmt = _("IP: %s");
- len = strlen(errfmt) + strlen(errmsg) + 1;
- if (error = (char *)calloc(1, len)) {
- (void) sprintf(error, errfmt, errmsg);
- }
+ size = strlen(errfmt) + strlen(errmsg) + 1;
+ if (errorp != NULL && (error = malloc(size)) != NULL)
+ (void) snprintf(error, size, errfmt, errmsg);
} else {
rcm_log_message(RCM_ERROR, _("IP: %s(%s)\n"), errmsg, ifname);
errfmt = _("IP: %s(%s)");
- len = strlen(errfmt) + strlen(errmsg) + strlen(ifname) + 1;
- if (error = (char *)calloc(1, len)) {
- (void) sprintf(error, errfmt, errmsg, ifname);
- }
+ size = strlen(errfmt) + strlen(errmsg) + strlen(ifname) + 1;
+ if (errorp != NULL && (error = malloc(size)) != NULL)
+ (void) snprintf(error, size, errfmt, errmsg, ifname);
}
if (errorp != NULL)
*errorp = error;
}
-
/*
* if_cfginfo() - Save off the config info for all interfaces
*/
@@ -1538,7 +1457,7 @@ if_cfginfo(ip_cache_t *node, uint_t force)
rcm_log_message(RCM_ERROR,
_("IP: get modlist error (%s) %s\n"),
pif->pi_ifname, strerror(errno));
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
@@ -1551,7 +1470,7 @@ if_cfginfo(ip_cache_t *node, uint_t force)
rcm_log_message(RCM_ERROR,
_("IP: module %s@%d\n"),
lif->li_modules[i], i);
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
}
@@ -1595,11 +1514,11 @@ if_cfginfo(ip_cache_t *node, uint_t force)
/* Save reconfiguration information */
if (lif->li_ifflags & IFF_IPV4) {
(void) snprintf(syscmd, sizeof (syscmd),
- "%s %s:%d configinfo\n", USR_SBIN_IFCONFIG,
+ "%s %s:%d configinfo\n", SBIN_IFCONFIG,
pif->pi_ifname, lif->li_ifnum);
} else if (lif->li_ifflags & IFF_IPV6) {
(void) snprintf(syscmd, sizeof (syscmd),
- "%s %s:%d inet6 configinfo\n", USR_SBIN_IFCONFIG,
+ "%s %s:%d inet6 configinfo\n", SBIN_IFCONFIG,
pif->pi_ifname, lif->li_ifnum);
}
rcm_log_message(RCM_TRACE2, "IP: %s\n", syscmd);
@@ -1609,7 +1528,7 @@ if_cfginfo(ip_cache_t *node, uint_t force)
rcm_log_message(RCM_ERROR,
_("IP: ifconfig configinfo error (%s:%d) %s\n"),
pif->pi_ifname, lif->li_ifnum, strerror(errno));
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
bzero(buf, MAX_RECONFIG_SIZE);
@@ -1619,20 +1538,18 @@ if_cfginfo(ip_cache_t *node, uint_t force)
_("IP: ifconfig configinfo error (%s:%d) %s\n"),
pif->pi_ifname, lif->li_ifnum, strerror(errno));
(void) pclose(fp);
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
(void) pclose(fp);
- lif->li_reconfig = malloc(strlen(buf)+1);
- if (lif->li_reconfig == NULL) {
+ if ((lif->li_reconfig = strdup(buf)) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: malloc error (%s) %s\n"),
pif->pi_ifname, strerror(errno));
- (void) clr_cfg_state(pif);
+ clr_cfg_state(pif);
return (-1);
}
- (void) strcpy(lif->li_reconfig, buf);
rcm_log_message(RCM_DEBUG,
"IP: if_cfginfo: reconfig string(%s:%d) = %s\n",
pif->pi_ifname, lif->li_ifnum, lif->li_reconfig);
@@ -1654,57 +1571,37 @@ static int
if_unplumb(ip_cache_t *node)
{
ip_lif_t *lif;
- ip_pif_t *pif;
- int ipv4 = 0, ipv6 = 0;
- char syscmd[MAX_RECONFIG_SIZE + LIFNAMSIZ];
+ ip_pif_t *pif = node->ip_pif;
+ boolean_t ipv4 = B_FALSE;
+ boolean_t ipv6 = B_FALSE;
rcm_log_message(RCM_TRACE2, "IP: if_unplumb(%s)\n", node->ip_resource);
- pif = node->ip_pif;
- lif = pif->pi_lifs;
-
- while (lif != NULL) {
+ for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) {
if (lif->li_ifflags & IFF_IPV4) {
- ipv4++;
+ ipv4 = B_TRUE;
} else if (lif->li_ifflags & IFF_IPV6) {
- ipv6++;
+ ipv6 = B_TRUE;
} else {
/* Unlikely case */
rcm_log_message(RCM_DEBUG,
"IP: Unplumb ignored (%s:%d)\n",
pif->pi_ifname, lif->li_ifnum);
- lif = lif->li_next;
- continue;
}
- lif = lif->li_next;
}
- /* Unplumb the physical interface */
- if (ipv4) {
- rcm_log_message(RCM_TRACE2,
- "IP: if_unplumb: ifconfig %s unplumb\n", pif->pi_ifname);
- (void) snprintf(syscmd, sizeof (syscmd), "%s %s unplumb\n",
- USR_SBIN_IFCONFIG, pif->pi_ifname);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot unplumb (%s) %s\n"),
- pif->pi_ifname, strerror(errno));
- return (-1);
- }
+ if (ipv4 && !ifconfig(pif->pi_ifname, "inet", "unplumb", B_FALSE)) {
+ rcm_log_message(RCM_ERROR, _("IP: Cannot unplumb (%s) %s\n"),
+ pif->pi_ifname, strerror(errno));
+ return (-1);
}
- if (ipv6) {
- rcm_log_message(RCM_TRACE2,
- "IP: if_unplumb: ifconfig %s inet6 unplumb\n",
- pif->pi_ifname);
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s inet6 unplumb\n", USR_SBIN_IFCONFIG, pif->pi_ifname);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot unplumb (%s) %s\n"),
- pif->pi_ifname, strerror(errno));
- return (-1);
- }
+
+ if (ipv6 && !ifconfig(pif->pi_ifname, "inet6", "unplumb", B_FALSE)) {
+ rcm_log_message(RCM_ERROR, _("IP: Cannot unplumb (%s) %s\n"),
+ pif->pi_ifname, strerror(errno));
+ return (-1);
}
+
rcm_log_message(RCM_TRACE2, "IP: if_unplumb(%s) success\n",
node->ip_resource);
@@ -1723,8 +1620,11 @@ if_replumb(ip_cache_t *node)
ip_lif_t *lif;
ip_pif_t *pif;
int i;
- char syscmd[LIFNAMSIZ+MAXPATHLEN]; /* must be big enough */
- int max_ipv4 = 0, max_ipv6 = 0;
+ boolean_t success, ipmp;
+ const char *fstr;
+ char lifname[LIFNAMSIZ];
+ char buf[MAX_RECONFIG_SIZE];
+ int max_lifnum = 0;
rcm_log_message(RCM_TRACE2, "IP: if_replumb(%s)\n", node->ip_resource);
@@ -1738,100 +1638,103 @@ if_replumb(ip_cache_t *node)
*/
pif = node->ip_pif;
- lif = pif->pi_lifs;
+ ipmp = (node->ip_pif->pi_grname[0] != '\0');
/*
* Make a first pass to plumb in physical interfaces and get a count
* of the max logical interfaces
*/
- while (lif != NULL) {
+ for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) {
+ max_lifnum = MAX(lif->li_ifnum, max_lifnum);
if (lif->li_ifflags & IFF_IPV4) {
- if (lif->li_ifnum > max_ipv4) {
- max_ipv4 = lif->li_ifnum;
- }
+ fstr = "inet";
} else if (lif->li_ifflags & IFF_IPV6) {
- if (lif->li_ifnum > max_ipv6) {
- max_ipv6 = lif->li_ifnum;
- }
+ fstr = "inet6";
} else {
/* Unlikely case */
rcm_log_message(RCM_DEBUG,
"IP: Re-plumb ignored (%s:%d)\n",
pif->pi_ifname, lif->li_ifnum);
- lif = lif->li_next;
continue;
}
- if (lif->li_ifnum == 0) { /* physical interface instance */
- if ((lif->li_ifflags & IFF_NOFAILOVER) ||
- (strcmp(pif->pi_grpname, "") == 0)) {
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s\n", USR_SBIN_IFCONFIG,
- lif->li_reconfig);
- } else if (lif->li_ifflags & IFF_IPV4) {
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s inet plumb group %s\n",
- USR_SBIN_IFCONFIG,
- pif->pi_ifname, pif->pi_grpname);
- } else if (lif->li_ifflags & IFF_IPV6) {
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s inet6 plumb group %s\n",
- USR_SBIN_IFCONFIG,
- pif->pi_ifname, pif->pi_grpname);
- }
+ /* ignore logical interface instances */
+ if (lif->li_ifnum != 0)
+ continue;
+
+ if ((lif->li_ifflags & IFF_NOFAILOVER) || !ipmp) {
+ success = ifconfig("", "", lif->li_reconfig, B_FALSE);
+ } else {
+ (void) snprintf(buf, sizeof (buf), "plumb group %s",
+ pif->pi_grname);
+ success = ifconfig(pif->pi_ifname, fstr, buf, B_FALSE);
+ }
+
+ if (!success) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot plumb (%s) %s\n"), pif->pi_ifname,
+ strerror(errno));
+ return (-1);
+ }
+
+ /*
+ * Restart DHCP if necessary.
+ */
+ if ((lif->li_ifflags & IFF_DHCPRUNNING) &&
+ !ifconfig(pif->pi_ifname, fstr, CFG_DHCP_CMD, B_FALSE)) {
+ rcm_log_message(RCM_ERROR, _("IP: Cannot start DHCP "
+ "(%s) %s\n"), pif->pi_ifname, strerror(errno));
+ return (-1);
+ }
+ rcm_log_message(RCM_TRACE2,
+ "IP: if_replumb: Modcnt = %d\n", lif->li_modcnt);
+ /* modinsert modules in order, ignore driver(last) */
+ for (i = 0; i < (lif->li_modcnt - 1); i++) {
rcm_log_message(RCM_TRACE2,
- "IP: if_replumb: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
+ "IP: modinsert: Pos = %d Mod = %s\n",
+ i, lif->li_modules[i]);
+ if (modop(pif->pi_ifname, lif->li_modules[i], i,
+ MOD_INSERT) == -1) {
rcm_log_message(RCM_ERROR,
- _("IP: Cannot plumb (%s) %s\n"),
- pif->pi_ifname, strerror(errno));
+ _("IP: modinsert error(%s)\n"),
+ pif->pi_ifname);
return (-1);
}
-
- rcm_log_message(RCM_TRACE2,
- "IP: if_replumb: Modcnt = %d\n", lif->li_modcnt);
- /* modinsert modules in order, ignore driver(last) */
- for (i = 0; i < (lif->li_modcnt - 1); i++) {
- rcm_log_message(RCM_TRACE2,
- "IP: modinsert: Pos = %d Mod = %s\n",
- i, lif->li_modules[i]);
- if (modop(pif->pi_ifname, lif->li_modules[i], i,
- MOD_INSERT) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: modinsert error(%s)\n"),
- pif->pi_ifname);
- return (-1);
- }
- }
}
-
- lif = lif->li_next;
}
/* Now, add all the logical interfaces in the correct order */
- for (i = 1; i <= MAX(max_ipv6, max_ipv4); i++) {
+ for (i = 1; i <= max_lifnum; i++) {
+ (void) snprintf(lifname, LIFNAMSIZ, "%s:%d", pif->pi_ifname, i);
+
/* reset lif through every iteration */
- lif = pif->pi_lifs;
- while (lif != NULL) {
- if (((lif->li_ifflags & IFF_NOFAILOVER) ||
- (strcmp(pif->pi_grpname, "") == 0)) &&
- (lif->li_ifnum == i)) {
- /* Plumb in the logical interface */
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s\n", USR_SBIN_IFCONFIG,
- lif->li_reconfig);
- rcm_log_message(RCM_TRACE2,
- "IP: if_replumb: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot addif (%s:%d) "
- "%s\n"),
- pif->pi_ifname, i, strerror(errno));
- return (-1);
- }
+ for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) {
+ /*
+ * Process entries in order. If the interface is
+ * using IPMP, only process test addresses.
+ */
+ if (lif->li_ifnum != i ||
+ (ipmp && !(lif->li_ifflags & IFF_NOFAILOVER)))
+ continue;
+
+ if (!ifconfig("", "", lif->li_reconfig, B_FALSE)) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot addif (%s) %s\n"), lifname,
+ strerror(errno));
+ return (-1);
+ }
+
+ /*
+ * Restart DHCP if necessary.
+ */
+ if ((lif->li_ifflags & IFF_DHCPRUNNING) &&
+ !ifconfig(lifname, fstr, CFG_DHCP_CMD, B_FALSE)) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot start DHCP (%s) %s\n"),
+ lifname, strerror(errno));
+ return (-1);
}
- lif = lif->li_next;
}
}
@@ -1865,71 +1768,64 @@ clr_cfg_state(ip_pif_t *pif)
}
/*
- * ip_ipmp_offline() - Failover from if_from to if_to using a
- * minimum redudancy of min_red. This uses IPMPs
- * "offline" mechanism to achieve the failover.
+ * Attempt to offline ip_cache_t `node'; returns an IPMP error code.
*/
static int
-ip_ipmp_offline(ip_cache_t *if_from, ip_cache_t *if_to)
+ip_ipmp_offline(ip_cache_t *node)
{
- mpathd_cmd_t mpdcmd;
-
- if ((if_from == NULL) || (if_from->ip_pif == NULL) ||
- (if_from->ip_pif->pi_ifname == NULL)) {
- return (-1);
- }
+ int retval;
+ ipmp_handle_t handle;
rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_offline\n");
- mpdcmd.cmd_command = MI_OFFLINE;
- (void) strcpy(mpdcmd.cmd_ifname, if_from->ip_pif->pi_ifname);
-
- if ((if_to != NULL) && (if_to->ip_pif != NULL) &&
- (if_to->ip_pif->pi_ifname != NULL)) {
- rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_offline (%s)->(%s)\n",
- if_from->ip_pif->pi_ifname, if_to->ip_pif->pi_ifname);
- (void) strncpy(mpdcmd.cmd_movetoif, if_to->ip_pif->pi_ifname,
- sizeof (mpdcmd.cmd_movetoif));
- mpdcmd.cmd_movetoif[sizeof (mpdcmd.cmd_movetoif) - 1] = '\0';
- } else {
- rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_offline (%s)->(any)\n",
- if_from->ip_pif->pi_ifname);
- (void) strcpy(mpdcmd.cmd_movetoif, ""); /* signifies any */
+ if ((retval = ipmp_open(&handle)) != IPMP_SUCCESS) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: cannot create ipmp handle: %s\n"),
+ ipmp_errmsg(retval));
+ return (retval);
}
- mpdcmd.cmd_min_red = if_from->ip_ifred;
- if (mpathd_send_cmd(&mpdcmd) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd offline error: %s\n"),
- strerror(errno));
- return (-1);
+ retval = ipmp_offline(handle, node->ip_pif->pi_ifname, node->ip_ifred);
+ if (retval != IPMP_SUCCESS) {
+ rcm_log_message(RCM_ERROR, _("IP: ipmp_offline error: %s\n"),
+ ipmp_errmsg(retval));
+ } else {
+ rcm_log_message(RCM_TRACE1, "IP: ipmp_offline success\n");
}
- rcm_log_message(RCM_TRACE1, "IP: ipmp offline success\n");
- return (0);
+ ipmp_close(handle);
+ return (retval);
}
/*
- * ip_ipmp_undo_offline() - Undo prior offline of the interface.
- * This uses IPMPs "undo offline" feature.
+ * Attempt to undo the offline ip_cache_t `node'; returns an IPMP error code.
*/
static int
ip_ipmp_undo_offline(ip_cache_t *node)
{
- mpathd_cmd_t mpdcmd;
+ int retval;
+ ipmp_handle_t handle;
- mpdcmd.cmd_command = MI_UNDO_OFFLINE;
- (void) strcpy(mpdcmd.cmd_ifname, node->ip_pif->pi_ifname);
+ rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_undo_offline\n");
- if (mpathd_send_cmd(&mpdcmd) < 0) {
+ if ((retval = ipmp_open(&handle)) != IPMP_SUCCESS) {
rcm_log_message(RCM_ERROR,
- _("IP: mpathd error: %s\n"),
- strerror(errno));
- return (-1);
+ _("IP: cannot create ipmp handle: %s\n"),
+ ipmp_errmsg(retval));
+ return (retval);
}
- rcm_log_message(RCM_TRACE1, "IP: ipmp undo offline success\n");
- return (0);
+ retval = ipmp_undo_offline(handle, node->ip_pif->pi_ifname);
+ if (retval != IPMP_SUCCESS) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: ipmp_undo_offline error: %s\n"),
+ ipmp_errmsg(retval));
+ } else {
+ rcm_log_message(RCM_TRACE1, "IP: ipmp_undo_offline success\n");
+ }
+
+ ipmp_close(handle);
+ return (retval);
}
/*
@@ -1946,10 +1842,9 @@ get_link_resource(const char *link)
char *resource;
dladm_status_t status;
- if ((status = dladm_name2info(dld_handle, link, &linkid, &flags, NULL,
- NULL)) != DLADM_STATUS_OK) {
+ status = dladm_name2info(dld_handle, link, &linkid, &flags, NULL, NULL);
+ if (status != DLADM_STATUS_OK)
goto fail;
- }
if (!(flags & DLADM_OPT_ACTIVE)) {
status = DLADM_STATUS_FAILED;
@@ -1976,243 +1871,6 @@ fail:
}
/*
- * if_get_flags() - Return the cached physical interface flags
- * Call with cache_lock held
- */
-static uint64_t
-if_get_flags(ip_pif_t *pif)
-{
- ip_lif_t *lif;
-
- for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) {
- if (lif->li_ifnum == 0) {
- return (lif->li_ifflags & RCM_PIF_FLAGS);
- }
- }
- return (0);
-}
-
-/*
- * mpathd_send_cmd() - Sends the command to in.mpathd.
- */
-static int
-mpathd_send_cmd(mpathd_cmd_t *mpd)
-{
- mpathd_unoffline_t mpc;
- struct mpathd_response mpr;
- int i;
- int s;
-
- rcm_log_message(RCM_TRACE1, "IP: mpathd_send_cmd \n");
-
- for (i = 0; i < MPATHD_MAX_RETRIES; i++) {
- s = connect_to_mpathd(AF_INET);
- if (s == -1) {
- s = connect_to_mpathd(AF_INET6);
- if (s == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot talk to mpathd\n"));
- return (-1);
- }
- }
- switch (mpd->cmd_command) {
- case MI_OFFLINE :
- rcm_log_message(RCM_TRACE1, "IP: MI_OFFLINE: "
- "(%s)->(%s) redundancy = %d\n", mpd->cmd_ifname,
- mpd->cmd_movetoif, mpd->cmd_min_red);
-
- if (write(s, mpd, sizeof (mpathd_cmd_t)) !=
- sizeof (mpathd_cmd_t)) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd write: %s\n"),
- strerror(errno));
- (void) close(s);
- return (-1);
- }
- break;
-
- case MI_SETOINDEX :
- rcm_log_message(RCM_TRACE1, "IP: MI_SETOINDEX: "
- "(%s)->(%s) family = %d\n", mpd->from_lifname,
- mpd->to_pifname, mpd->addr_family);
-
- if (write(s, mpd, sizeof (mpathd_cmd_t)) !=
- sizeof (mpathd_cmd_t)) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd write: %s\n"),
- strerror(errno));
- (void) close(s);
- return (-1);
- }
- break;
-
- case MI_UNDO_OFFLINE:
- /* mpathd checks for exact size of the message */
- mpc.cmd_command = mpd->cmd_command;
- (void) strcpy(mpc.cmd_ifname, mpd->cmd_ifname);
-
- rcm_log_message(RCM_TRACE1, "IP: MI_UNDO_OFFLINE: "
- "(%s)\n", mpd->cmd_ifname);
-
- if (write(s, &mpc, sizeof (mpathd_unoffline_t)) !=
- sizeof (mpathd_unoffline_t)) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd write: %s\n"),
- strerror(errno));
- (void) close(s);
- return (-1);
- }
- break;
- default :
- rcm_log_message(RCM_ERROR,
- _("IP: unsupported mpathd command\n"));
- (void) close(s);
- return (-1);
- }
-
- bzero(&mpr, sizeof (struct mpathd_response));
- /* Read the result from mpathd */
- if (read(s, &mpr, sizeof (struct mpathd_response)) !=
- sizeof (struct mpathd_response)) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd read : %s\n"), strerror(errno));
- (void) close(s);
- return (-1);
- }
-
- (void) close(s);
- if (mpr.resp_mpathd_err == 0) {
- rcm_log_message(RCM_TRACE1,
- "IP: mpathd_send_cmd success\n");
- return (0); /* Successful */
- }
-
- if (mpr.resp_mpathd_err == MPATHD_SYS_ERROR) {
- if (mpr.resp_sys_errno == EAGAIN) {
- (void) sleep(1);
- rcm_log_message(RCM_DEBUG,
- "IP: mpathd retrying\n");
- continue; /* Retry */
- }
- errno = mpr.resp_sys_errno;
- rcm_log_message(RCM_WARNING,
- _("IP: mpathd_send_cmd error: %s\n"),
- strerror(errno));
- } else if (mpr.resp_mpathd_err == MPATHD_MIN_RED_ERROR) {
- errno = EIO;
- rcm_log_message(RCM_ERROR, _("IP: in.mpathd(1M): "
- "Minimum redundancy not met\n"));
- } else {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd_send_cmd error\n"));
- }
- /* retry */
- }
-
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd_send_cmd failed %d retries\n"), MPATHD_MAX_RETRIES);
- return (-1);
-}
-
-/*
- * Returns -1 on failure. Returns the socket file descriptor on
- * success.
- */
-static int
-connect_to_mpathd(int family)
-{
- int s;
- struct sockaddr_storage ss;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
- struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
- int addrlen;
- int ret;
- int on;
-
- rcm_log_message(RCM_TRACE1, "IP: connect_to_mpathd\n");
-
- s = socket(family, SOCK_STREAM, 0);
- if (s < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd socket: %s\n"), strerror(errno));
- return (-1);
- }
- bzero((char *)&ss, sizeof (ss));
- ss.ss_family = family;
- /*
- * Need to bind to a privelged port. For non-root, this
- * will fail. in.mpathd verifies that only commands coming
- * from priveleged ports succeed so that the ordinary user
- * can't issue offline commands.
- */
- on = 1;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd setsockopt: TCP_ANONPRIVBIND: %s\n"),
- strerror(errno));
- return (-1);
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = 0;
- sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addrlen = sizeof (struct sockaddr_in);
- break;
- case AF_INET6:
- sin6->sin6_port = 0;
- sin6->sin6_addr = loopback_addr;
- addrlen = sizeof (struct sockaddr_in6);
- break;
- }
- ret = bind(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd bind: %s\n"), strerror(errno));
- return (-1);
- }
- switch (family) {
- case AF_INET:
- sin->sin_port = htons(MPATHD_PORT);
- break;
- case AF_INET6:
- sin6->sin6_port = htons(MPATHD_PORT);
- break;
- }
- ret = connect(s, (struct sockaddr *)&ss, addrlen);
- if (ret != 0) {
- if (errno == ECONNREFUSED) {
- /* in.mpathd is not running, start it */
- if (rcm_exec_cmd(MPATHD_PATH) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd exec: %s\n"),
- strerror(errno));
- return (-1);
- }
- ret = connect(s, (struct sockaddr *)&ss, addrlen);
- }
- if (ret != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd connect: %s\n"), strerror(errno));
- return (-1);
- }
- }
- on = 0;
- if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on,
- sizeof (on)) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: mpathd setsockopt TCP_ANONPRIVBIND: %s\n"),
- strerror(errno));
- return (-1);
- }
-
- rcm_log_message(RCM_TRACE1, "IP: connect_to_mpathd success\n");
-
- return (s);
-}
-
-/*
* modop() - Remove/insert a module
*/
static int
@@ -2239,12 +1897,10 @@ modop(char *name, char *arg, int pos, char op)
if (op == MOD_REMOVE) {
(void) snprintf(syscmd, sizeof (syscmd),
- "%s %s modremove %s@%d\n", USR_SBIN_IFCONFIG, name, arg,
- pos);
+ "%s %s modremove %s@%d\n", SBIN_IFCONFIG, name, arg, pos);
} else if (op == MOD_INSERT) {
(void) snprintf(syscmd, sizeof (syscmd),
- "%s %s modinsert %s@%d\n", USR_SBIN_IFCONFIG, name, arg,
- pos);
+ "%s %s modinsert %s@%d\n", SBIN_IFCONFIG, name, arg, pos);
} else {
rcm_log_message(RCM_ERROR,
_("IP: modop(%s): unknown operation\n"), name);
@@ -2277,11 +1933,11 @@ get_modlist(char *name, ip_lif_t *lif)
int i;
int num_mods;
struct lifreq lifr;
- struct str_list strlist;
+ struct str_list strlist = { 0 };
rcm_log_message(RCM_TRACE1, "IP: getmodlist(%s)\n", name);
- (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
+ (void) strlcpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
lifr.lifr_flags = lif->li_ifflags;
if (ip_domux2fd(&mux_fd, &muxid_fd, &fd, &lifr) < 0) {
rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd(%s)\n"), name);
@@ -2292,39 +1948,34 @@ get_modlist(char *name, ip_lif_t *lif)
rcm_log_message(RCM_ERROR,
_("IP: get_modlist(%s): I_LIST(%s) \n"),
name, strerror(errno));
- (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
- return (-1);
+ goto fail;
}
strlist.sl_nmods = num_mods;
strlist.sl_modlist = malloc(sizeof (struct str_mlist) * num_mods);
-
if (strlist.sl_modlist == NULL) {
rcm_log_message(RCM_ERROR, _("IP: get_modlist(%s): %s\n"),
name, strerror(errno));
- (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
- return (-1);
+ goto fail;
}
if (ioctl(fd, I_LIST, (caddr_t)&strlist) < 0) {
rcm_log_message(RCM_ERROR,
_("IP: get_modlist(%s): I_LIST error: %s\n"),
name, strerror(errno));
- (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
- return (-1);
+ goto fail;
}
for (i = 0; i < strlist.sl_nmods; i++) {
- lif->li_modules[i] =
- malloc(strlen(strlist.sl_modlist[i].l_name)+1);
+ lif->li_modules[i] = strdup(strlist.sl_modlist[i].l_name);
if (lif->li_modules[i] == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: get_modlist(%s): %s\n"),
name, strerror(errno));
- (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
- return (-1);
+ while (i > 0)
+ free(lif->li_modules[--i]);
+ goto fail;
}
- (void) strcpy(lif->li_modules[i], strlist.sl_modlist[i].l_name);
}
lif->li_modcnt = strlist.sl_nmods;
@@ -2332,6 +1983,10 @@ get_modlist(char *name, ip_lif_t *lif)
rcm_log_message(RCM_TRACE1, "IP: getmodlist(%s) success\n", name);
return (ip_plink(mux_fd, muxid_fd, fd, &lifr));
+fail:
+ free(strlist.sl_modlist);
+ (void) ip_plink(mux_fd, muxid_fd, fd, &lifr);
+ return (-1);
}
/*
@@ -2436,6 +2091,7 @@ ip_plink(int mux_fd, int muxid_fd, int fd, struct lifreq *lifr)
*
* Notify online to IP address consumers.
*/
+/*ARGSUSED*/
static int
ip_onlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags,
rcm_info_t **depend_info)
@@ -2464,6 +2120,7 @@ ip_onlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags,
*
* Offline IP address consumers.
*/
+/*ARGSUSED*/
static int
ip_offlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags,
rcm_info_t **depend_info)
@@ -2494,9 +2151,9 @@ ip_offlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags,
}
/*
- * ip_get_addrlist() - Compile list of IP addresses hosted on this NIC (node)
- * This routine malloc() required memeory for the list
- * Returns list on success, NULL if failed
+ * ip_get_addrlist() - Get the list of IP addresses on this interface (node);
+ * This routine malloc()s required memory for the list.
+ * Returns the list on success, NULL on failure.
* Call with cache_lock held.
*/
static char **
@@ -2504,11 +2161,9 @@ ip_get_addrlist(ip_cache_t *node)
{
ip_lif_t *lif;
char **addrlist = NULL;
- int numifs;
+ int i, numifs;
+ size_t addrlistsize;
char addrstr[INET6_ADDRSTRLEN];
- void *addr;
- int af;
- int i;
rcm_log_message(RCM_TRACE2, "IP: ip_get_addrlist(%s)\n",
node->ip_resource);
@@ -2532,35 +2187,21 @@ ip_get_addrlist(ip_cache_t *node)
for (lif = node->ip_pif->pi_lifs, i = 0; lif != NULL;
lif = lif->li_next, i++) {
- af = lif->li_addr.family;
- if (af == AF_INET6) {
- addr = &lif->li_addr.ip6.sin6_addr;
- } else if (af == AF_INET) {
- addr = &lif->li_addr.ip4.sin_addr;
- } else {
- rcm_log_message(RCM_DEBUG,
- "IP: unknown addr family %d, assuming AF_INET\n",
- af);
- af = AF_INET;
- addr = &lif->li_addr.ip4.sin_addr;
- }
- if (inet_ntop(af, addr, addrstr, INET6_ADDRSTRLEN) == NULL) {
- rcm_log_message(RCM_ERROR,
- _("IP: inet_ntop: %s\n"), strerror(errno));
+ if (!ip_addrstr(lif, addrstr, sizeof (addrstr))) {
ip_free_addrlist(addrlist);
return (NULL);
}
- if ((addrlist[i] = malloc(strlen(addrstr) + RCM_SIZE_SUNW_IP))
- == NULL) {
+ addrlistsize = strlen(addrstr) + sizeof (RCM_STR_SUNW_IP);
+ if ((addrlist[i] = malloc(addrlistsize)) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: ip_get_addrlist(%s) malloc failure(%s)\n"),
node->ip_resource, strerror(errno));
ip_free_addrlist(addrlist);
return (NULL);
}
- (void) strcpy(addrlist[i], RCM_STR_SUNW_IP); /* SUNW_ip/ */
- (void) strcat(addrlist[i], addrstr); /* SUNW_ip/<address> */
+ (void) snprintf(addrlist[i], addrlistsize, "%s%s",
+ RCM_STR_SUNW_IP, addrstr);
rcm_log_message(RCM_DEBUG, "Anon Address: %s\n", addrlist[i]);
}
@@ -2611,16 +2252,13 @@ ip_consumer_notify(rcm_handle_t *hd, datalink_id_t linkid, char **errorp,
return;
}
/*
- * Inform anonymous consumers about IP addresses being
- * onlined
+ * Inform anonymous consumers about IP addresses being onlined.
*/
(void) ip_onlinelist(hd, node, errorp, flags, depend_info);
(void) mutex_unlock(&cache_lock);
rcm_log_message(RCM_TRACE2, "IP: ip_consumer_notify success\n");
- return;
-
}
/*
@@ -2632,20 +2270,18 @@ if_configure(datalink_id_t linkid)
char ifinst[MAXLINKNAMELEN];
char cfgfile[MAXPATHLEN];
char cached_name[RCM_LINK_RESOURCE_MAX];
- struct stat statbuf;
+ FILE *hostfp, *host6fp;
ip_cache_t *node;
- int af = 0;
- int ipmp = 0;
+ boolean_t ipmp = B_FALSE;
assert(linkid != DATALINK_INVALID_LINKID);
-
rcm_log_message(RCM_TRACE1, _("IP: if_configure(%u)\n"), linkid);
/* Check for the interface in the cache */
(void) snprintf(cached_name, sizeof (cached_name), "%s/%u",
RCM_LINK_PREFIX, linkid);
- /* Check if the interface is new or was previously offlined */
+ /* Check if the interface is new or was not previously offlined */
(void) mutex_lock(&cache_lock);
if (((node = cache_lookup(NULL, cached_name, CACHE_REFRESH)) != NULL) &&
(!(node->ip_cachestate & CACHE_IF_OFFLINED))) {
@@ -2663,76 +2299,69 @@ if_configure(datalink_id_t linkid)
return (-1);
}
- /* Scan IPv4 configuration first */
- (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV4, ifinst);
- cfgfile[MAXPATHLEN - 1] = '\0';
-
+ /*
+ * Scan the IPv4 and IPv6 hostname files to see if (a) they exist
+ * and (b) if either one places the interface into an IPMP group.
+ */
+ (void) snprintf(cfgfile, MAXPATHLEN, CFGFILE_FMT_IPV4, ifinst);
rcm_log_message(RCM_TRACE1, "IP: Scanning %s\n", cfgfile);
- if (stat(cfgfile, &statbuf) == 0) {
- af |= CONFIG_AF_INET;
- if (isgrouped(cfgfile)) {
- ipmp++;
- }
+ if ((hostfp = fopen(cfgfile, "r")) != NULL) {
+ if (isgrouped(cfgfile))
+ ipmp = B_TRUE;
}
- /* Scan IPv6 configuration details */
- (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV6, ifinst);
- cfgfile[MAXPATHLEN - 1] = '\0';
+ (void) snprintf(cfgfile, MAXPATHLEN, CFGFILE_FMT_IPV6, ifinst);
rcm_log_message(RCM_TRACE1, "IP: Scanning %s\n", cfgfile);
- if (stat(cfgfile, &statbuf) == 0) {
- af |= CONFIG_AF_INET6;
- if ((ipmp == 0) && isgrouped(cfgfile)) {
- ipmp++;
- }
+ if ((host6fp = fopen(cfgfile, "r")) != NULL) {
+ if (!ipmp && isgrouped(cfgfile))
+ ipmp = B_TRUE;
}
- if (af & CONFIG_AF_INET) {
- if (if_ipmp_config(ifinst, CONFIG_AF_INET, ipmp) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: IPv4 Post-attach failed (%s)\n"), ifinst);
- return (-1);
- }
+ /*
+ * Configure the interface according to its hostname files.
+ */
+ if (hostfp != NULL &&
+ if_config_inst(ifinst, hostfp, AF_INET, ipmp) == -1) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: IPv4 Post-attach failed (%s)\n"), ifinst);
+ goto fail;
}
- if (af & CONFIG_AF_INET6) {
- if (if_ipmp_config(ifinst, CONFIG_AF_INET6, ipmp) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: IPv6 Post-attach failed(%s)\n"), ifinst);
- return (-1);
- }
+ if (host6fp != NULL &&
+ if_config_inst(ifinst, host6fp, AF_INET6, ipmp) == -1) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: IPv6 Post-attach failed (%s)\n"), ifinst);
+ goto fail;
}
+ (void) fclose(hostfp);
+ (void) fclose(host6fp);
rcm_log_message(RCM_TRACE1, "IP: if_configure(%s) success\n", ifinst);
-
return (0);
-
+fail:
+ (void) fclose(hostfp);
+ (void) fclose(host6fp);
+ return (-1);
}
/*
- * isgrouped() - Scans the given config file to see if this is a grouped
- * interface
- * Returns non-zero if true; 0 if false
+ * isgrouped() - Scans the given config file to see if this interface is
+ * using IPMP. Returns B_TRUE or B_FALSE.
*/
-static int
-isgrouped(char *cfgfile)
+static boolean_t
+isgrouped(const char *cfgfile)
{
FILE *fp;
struct stat statb;
- char *buf = NULL;
- char *tokens[MAXARGS]; /* token pointers */
- char tspace[MAXLINE]; /* token space */
- int ntok;
- int group = 0;
-
- if (cfgfile == NULL)
- return (0);
+ char *nlp, *line, *token, *lasts, *buf;
+ boolean_t grouped = B_FALSE;
rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s)\n", cfgfile);
if (stat(cfgfile, &statb) != 0) {
rcm_log_message(RCM_TRACE1,
_("IP: No config file(%s)\n"), cfgfile);
- return (0);
+ return (B_FALSE);
}
/*
@@ -2744,609 +2373,284 @@ isgrouped(char *cfgfile)
if (statb.st_size <= 1) {
rcm_log_message(RCM_TRACE1,
_("IP: Empty config file(%s)\n"), cfgfile);
- return (0);
+ return (B_FALSE);
}
if ((fp = fopen(cfgfile, "r")) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: Cannot open configuration file(%s): %s\n"), cfgfile,
strerror(errno));
- return (0);
+ return (B_FALSE);
}
- if ((buf = calloc(1, statb.st_size)) == NULL) {
+ if ((buf = malloc(statb.st_size)) == NULL) {
rcm_log_message(RCM_ERROR,
- _("IP: calloc failure(%s): %s\n"), cfgfile,
+ _("IP: malloc failure(%s): %s\n"), cfgfile,
strerror(errno));
- (void) fclose(fp);
- return (0);
+ goto out;
}
while (fgets(buf, statb.st_size, fp) != NULL) {
- if (*buf == '\0')
- continue;
-
- tokenize(buf, tokens, tspace, &ntok);
- while (ntok) {
- if (STREQ("group", tokens[ntok - 1])) {
- if (tokens[ntok] != NULL) {
- group++;
- }
+ if ((nlp = strrchr(buf, '\n')) != NULL)
+ *nlp = '\0';
+
+ line = buf;
+ while ((token = strtok_r(line, " \t", &lasts)) != NULL) {
+ line = NULL;
+ if (STREQ("group", token) &&
+ strtok_r(NULL, " \t", &lasts) != NULL) {
+ grouped = B_TRUE;
+ goto out;
}
- ntok--;
}
}
-
+out:
free(buf);
-
(void) fclose(fp);
- if (group <= 0) {
- rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s) non-grouped\n",
- cfgfile);
- return (0);
- } else {
- rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s) grouped\n",
- cfgfile);
- return (1);
- }
-}
+ rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s): %d\n", cfgfile,
+ grouped);
+ return (grouped);
+}
/*
- * if_ipmp_config() - Configure an interface instance as specified by the
+ * if_config_inst() - Configure an interface instance as specified by the
* address family af and if it is grouped (ipmp).
*/
static int
-if_ipmp_config(char *ifinst, int af, int ipmp)
+if_config_inst(const char *ifinst, FILE *hfp, int af, boolean_t ipmp)
{
- char cfgfile[MAXPATHLEN]; /* configuration file */
- FILE *fp;
+ FILE *ifparsefp;
struct stat statb;
- char *buf;
- char *tokens[MAXARGS]; /* list of config attributes */
- char tspace[MAXLINE]; /* token space */
- char syscmd[MAX_RECONFIG_SIZE + MAXPATHLEN + 1];
- char grpcmd[MAX_RECONFIG_SIZE + MAXPATHLEN + 1];
- char fstr[8]; /* address family string inet or inet6 */
- int nofailover = 0;
- int newattach = 0;
- int cmdvalid = 0;
- int ntok;
- int n;
- int stdif = 0;
-
- if (ifinst == NULL)
- return (0);
+ char *buf = NULL;
+ char *ifparsebuf = NULL;
+ uint_t ifparsebufsize;
+ const char *fstr; /* address family string */
+ boolean_t stdif = B_FALSE;
- rcm_log_message(RCM_TRACE1, "IP: if_ipmp_config(%s) ipmp = %d\n",
+ rcm_log_message(RCM_TRACE1, "IP: if_config_inst(%s) ipmp = %d\n",
ifinst, ipmp);
- if (af & CONFIG_AF_INET) {
- (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV4,
- ifinst);
- (void) strcpy(fstr, "inet");
- } else if (af & CONFIG_AF_INET6) {
- (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV6,
- ifinst);
- (void) strcpy(fstr, "inet6");
- } else {
- return (0); /* nothing to do */
- }
-
- cfgfile[MAXPATHLEN - 1] = '\0';
- grpcmd[0] = '\0';
-
- if (stat(cfgfile, &statb) != 0) {
- rcm_log_message(RCM_TRACE1,
- "IP: No config file(%s)\n", ifinst);
- return (0);
+ if (fstat(fileno(hfp), &statb) != 0) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot fstat file(%s)\n"), ifinst);
+ goto fail;
}
- /* Config file exists, plumb in the physical interface */
- if (af & CONFIG_AF_INET6) {
- if (if_getcount(AF_INET6) == 0) {
- /*
- * Configure software loopback driver if this is the
- * first IPv6 interface plumbed
- */
- newattach++;
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s lo0 %s plumb ::1 up", USR_SBIN_IFCONFIG, fstr);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot plumb (%s) %s\n"),
- ifinst, strerror(errno));
- return (-1);
- }
- }
- (void) snprintf(syscmd, sizeof (syscmd), "%s %s %s plumb up",
- USR_SBIN_IFCONFIG, ifinst, fstr);
- } else {
- (void) snprintf(syscmd, sizeof (syscmd), "%s %s %s plumb ",
- USR_SBIN_IFCONFIG, ifinst, fstr);
- if (if_getcount(AF_INET) == 0) {
- newattach++;
- }
+ switch (af) {
+ case AF_INET:
+ fstr = "inet";
+ break;
+ case AF_INET6:
+ fstr = "inet6";
+ break;
+ default:
+ assert(0);
}
- rcm_log_message(RCM_TRACE1, "IP: Exec: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Cannot plumb (%s) %s\n"), ifinst, strerror(errno));
- return (-1);
- }
+ /*
+ * The hostname file exists; plumb the physical interface.
+ */
+ if (!ifconfig(ifinst, fstr, "plumb", B_FALSE))
+ goto fail;
- /* Check if config file is empty, if so, nothing else to do */
- if (statb.st_size == 0) {
+ /* Skip static configuration if the hostname file is empty */
+ if (statb.st_size <= 1) {
rcm_log_message(RCM_TRACE1,
- "IP: Zero size config file(%s)\n", ifinst);
- return (0);
+ _("IP: Zero size hostname file(%s)\n"), ifinst);
+ goto configured;
}
- if ((fp = fopen(cfgfile, "r")) == NULL) {
+ if (fseek(hfp, 0, SEEK_SET) == -1) {
rcm_log_message(RCM_ERROR,
- _("IP: Open error(%s): %s\n"), cfgfile, strerror(errno));
- return (-1);
+ _("IP: Cannot rewind hostname file(%s): %s\n"), ifinst,
+ strerror(errno));
+ goto fail;
}
+ /*
+ * Allocate the worst-case single-line buffer sizes. A bit skanky,
+ * but since hostname files are small, this should suffice.
+ */
if ((buf = calloc(1, statb.st_size)) == NULL) {
rcm_log_message(RCM_ERROR,
_("IP: calloc(%s): %s\n"), ifinst, strerror(errno));
- (void) fclose(fp);
- return (-1);
+ goto fail;
}
- /* a single line with one token implies a classical if */
- if (fgets(buf, statb.st_size, fp) != NULL) {
- tokenize(buf, tokens, tspace, &ntok);
- if (ntok == 1) {
- rcm_log_message(RCM_TRACE1, "IP: Standard interface\n");
- stdif++;
- }
- }
- if (fseek(fp, 0L, SEEK_SET) == -1) {
- rcm_log_message(RCM_ERROR, _("IP: fseek: %s\n"),
- strerror(errno));
- return (-1);
+ ifparsebufsize = statb.st_size + sizeof (SBIN_IFPARSE " -s inet6 ");
+ if ((ifparsebuf = calloc(1, ifparsebufsize)) == NULL) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: calloc(%s): %s\n"), ifinst, strerror(errno));
+ goto fail;
}
/*
- * Process the config command
- * This loop also handles multiple logical interfaces that may
- * be configured on a single line
+ * For IPv4, determine whether the hostname file consists of a single
+ * line. We need to handle these specially since they should
+ * automatically be suffixed with "netmask + broadcast + up".
*/
- while (fgets(buf, statb.st_size, fp) != NULL) {
- nofailover = 0;
- cmdvalid = 0;
+ if (af == AF_INET &&
+ fgets(buf, statb.st_size, hfp) != NULL &&
+ fgets(buf, statb.st_size, hfp) == NULL) {
+ rcm_log_message(RCM_TRACE1, "IP: one-line hostname file\n");
+ stdif = B_TRUE;
+ }
- if (*buf == '\0')
- continue;
+ if (fseek(hfp, 0L, SEEK_SET) == -1) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: Cannot rewind hostname file(%s): %s\n"), ifinst,
+ strerror(errno));
+ goto fail;
+ }
- tokenize(buf, tokens, tspace, &ntok);
- if (ntok <= 0)
+ /*
+ * Loop through the file one line at a time and feed it to ifconfig.
+ * If the interface is using IPMP, then we use /sbin/ifparse -s to
+ * weed out all of the data addresses, since those are already on the
+ * IPMP meta-interface.
+ */
+ while (fgets(buf, statb.st_size, hfp) != NULL) {
+ if (ntok(buf) == 0)
continue;
- /* Reset the config command */
- (void) snprintf(syscmd, sizeof (syscmd), "%s %s %s ",
- USR_SBIN_IFCONFIG, ifinst, fstr);
-
- /* No parsing if this is first interface of its kind */
- if (newattach) {
- (void) strcat(syscmd, buf);
- /* Classic if */
- if ((af & CONFIG_AF_INET) && (stdif == 1)) {
- (void) strcat(syscmd, CFG_CMDS_STD);
- }
- rcm_log_message(RCM_TRACE1, "IP: New: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: Error: %s (%s): %s\n"),
- syscmd, ifinst, strerror(errno));
- }
+ if (!ipmp) {
+ (void) ifconfig(ifinst, fstr, buf, stdif);
continue;
}
- /* Parse the tokens to determine nature of the interface */
- for (n = 0; n < ntok; n++) {
- /* Handle pathological failover cases */
- if (STREQ("-failover", tokens[n]))
- nofailover++;
- if (STREQ("failover", tokens[n]))
- nofailover--;
-
- /* group attribute requires special processing */
- if (STREQ("group", tokens[n])) {
- if (tokens[n + 1] != NULL) {
- (void) snprintf(grpcmd, sizeof (grpcmd),
- "%s %s %s %s %s", USR_SBIN_IFCONFIG,
- ifinst, fstr,
- tokens[n], tokens[n + 1]);
- n++; /* skip next token */
- continue;
- }
- }
-
- /* Execute buffered command ? */
- if (STREQ("set", tokens[n]) ||
- STREQ("addif", tokens[n]) ||
- STREQ("removeif", tokens[n]) ||
- (n == (ntok -1))) {
-
- /* config command complete ? */
- if (n == (ntok -1)) {
- ADDSPACE(syscmd);
- (void) strcat(syscmd, tokens[n]);
- cmdvalid++;
- }
-
- if (!cmdvalid) {
- ADDSPACE(syscmd);
- (void) strcat(syscmd, tokens[n]);
- cmdvalid++;
- continue;
- }
- /* Classic if ? */
- if ((af & CONFIG_AF_INET) && (stdif == 1)) {
- (void) strcat(syscmd, CFG_CMDS_STD);
- }
-
- if (nofailover > 0) {
- rcm_log_message(RCM_TRACE1,
- "IP: Interim exec: %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: %s fail(%s): %s\n"),
- syscmd, ifinst,
- strerror(errno));
- }
- } else {
- /* Have mpathd configure the address */
- if (if_mpathd_configure(syscmd, ifinst,
- af, ipmp) != 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: %s fail(%s): %s\n"),
- syscmd, ifinst,
- strerror(errno));
- }
- }
-
- /* Reset config command */
- (void) snprintf(syscmd, sizeof (syscmd),
- "%s %s %s ", USR_SBIN_IFCONFIG, ifinst,
- fstr);
- nofailover = 0;
- cmdvalid = 0;
- }
- /*
- * Note: No explicit command validation is required
- * since ifconfig to does it for us
- */
- ADDSPACE(syscmd);
- (void) strcat(syscmd, tokens[n]);
- cmdvalid++;
- }
- }
-
- free(buf);
- (void) fclose(fp);
-
- /*
- * The group name needs to be set after all the test/nofailover
- * addresses have been configured. Otherwise, if IPMP detects that the
- * interface is failed, the addresses will be moved to a working
- * interface before the '-failover' flag can be set.
- */
- if (grpcmd[0] != '\0') {
- rcm_log_message(RCM_TRACE1, "IP: set group name: %s\n", grpcmd);
- if (rcm_exec_cmd(grpcmd) != 0) {
- rcm_log_message(RCM_ERROR, _("IP: %s fail(%s): %s\n"),
- grpcmd, ifinst, strerror(errno));
+ (void) snprintf(ifparsebuf, ifparsebufsize, SBIN_IFPARSE
+ " -s %s %s", fstr, buf);
+ if ((ifparsefp = popen(ifparsebuf, "r")) == NULL) {
+ rcm_log_message(RCM_ERROR,
+ _("IP: cannot configure %s: popen \"%s\" "
+ "failed: %s\n"), ifinst, buf, strerror(errno));
+ goto fail;
}
- }
- rcm_log_message(RCM_TRACE1, "IP: if_ipmp_config(%s) success\n", ifinst);
-
- return (0);
-}
-
-/*
- * if_mpathd_configure() - Determine configuration disposition of the interface
- */
-static int
-if_mpathd_configure(char *syscmd, char *ifinst, int af, int ipmp)
-{
- char *tokens[MAXARGS];
- char tspace[MAXLINE];
- int ntok;
- char *addr;
- char *from_lifname;
- mpathd_cmd_t mpdcmd;
- int n;
-
- rcm_log_message(RCM_TRACE1, "IP: if_mpathd_configure(%s): %s\n",
- ifinst, syscmd);
-
- tokenize(syscmd, tokens, tspace, &ntok);
- if (ntok <= 0)
- return (0);
-
- addr = tokens[3]; /* by default, third token is valid address */
- for (n = 0; n < ntok; n++) {
- if (STREQ("set", tokens[n]) ||
- STREQ("addif", tokens[n])) {
- addr = tokens[n+1];
- if (addr == NULL) { /* invalid format */
- return (-1);
- } else
- break;
+ while (fgets(buf, statb.st_size, ifparsefp) != NULL) {
+ if (ntok(buf) > 0)
+ (void) ifconfig(ifinst, fstr, buf, stdif);
}
- }
- /* Check std. commands or no failed over address */
- if (STREQ("removeif", addr) || STREQ("group", addr) ||
- ((from_lifname = get_mpathd_dest(addr, af)) == NULL)) {
- rcm_log_message(RCM_TRACE1,
- "IP: No failed-over host, exec %s\n", syscmd);
- if (rcm_exec_cmd(syscmd) != 0) {
+ if (pclose(ifparsefp) == -1) {
rcm_log_message(RCM_ERROR,
- _("IP: %s failed(%s): %s\n"),
- syscmd, ifinst, strerror(errno));
- return (-1);
+ _("IP: cannot configure %s: pclose \"%s\" "
+ "failed: %s\n"), ifinst, buf, strerror(errno));
+ goto fail;
}
- return (0);
- }
-
- /* Check for non-IPMP failover scenarios */
- if ((ipmp <= 0) && (from_lifname != NULL)) {
- /* Address already hosted on another NIC, return */
- rcm_log_message(RCM_TRACE1,
- "IP: Non-IPMP failed-over host(%s): %s\n",
- ifinst, addr);
- return (0);
}
+configured:
/*
- * Valid failed-over host; have mpathd set the original index
+ * Bring up the interface (it may already be up)
+ *
+ * Technically, since the boot scripts only unconditionally bring up
+ * IPv6 interfaces, we should only unconditionally bring up IPv6 here.
+ * However, if we don't bring up IPv4, and a legacy IPMP configuration
+ * without test addresses is being used, we will never bring the
+ * interface up even though we would've at boot. One fix is to check
+ * if the IPv4 hostname file contains data addresses that we would've
+ * brought up, but there's no simple way to do that. Given that it's
+ * rare to have persistent IP configuration for an interface that
+ * leaves it down, we cheap out and always bring it up for IPMP.
*/
- mpdcmd.cmd_command = MI_SETOINDEX;
- (void) strcpy(mpdcmd.from_lifname, from_lifname);
- (void) strcpy(mpdcmd.to_pifname, ifinst);
- if (af & CONFIG_AF_INET6) {
- mpdcmd.addr_family = AF_INET6;
- } else {
- mpdcmd.addr_family = AF_INET;
- }
-
- /* Send command to in.mpathd(1M) */
- rcm_log_message(RCM_TRACE1,
- "IP: Attempting setoindex from (%s) to (%s) ....\n",
- from_lifname, ifinst);
-
- if (mpathd_send_cmd(&mpdcmd) < 0) {
- rcm_log_message(RCM_TRACE1,
- "IP: mpathd set original index unsuccessful: %s\n",
- strerror(errno));
- return (-1);
- }
-
- rcm_log_message(RCM_TRACE1,
- "IP: setoindex success (%s) to (%s)\n",
- from_lifname, ifinst);
-
- return (0);
-}
-
-/*
- * get_mpathd_dest() - Return current destination for lif; caller is
- * responsible to free memory allocated for address
- */
-static char *
-get_mpathd_dest(char *addr, int family)
-{
- int sock;
- char *buf;
- struct lifnum lifn;
- struct lifconf lifc;
- struct lifreq *lifrp;
- sa_family_t af = AF_INET; /* IPv4 by default */
- int i;
- struct lifreq lifreq;
- struct sockaddr_in *sin;
- struct sockaddr_in6 *sin6;
- struct hostent *hp;
- char *ifname = NULL;
- char *prefix = NULL;
- char addrstr[INET6_ADDRSTRLEN];
- char ifaddr[INET6_ADDRSTRLEN];
- int err;
-
- if (addr == NULL) {
- return (NULL);
- }
-
- rcm_log_message(RCM_TRACE2, "IP: get_mpathd_dest(%s)\n", addr);
-
- if (family & CONFIG_AF_INET6) {
- af = AF_INET6;
- } else {
- af = AF_INET;
- }
-
- if ((sock = socket(af, SOCK_DGRAM, 0)) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: failure opening %s socket: %s\n"),
- af == AF_INET6 ? "IPv6" : "IPv4", strerror(errno));
- return (NULL);
- }
-
- lifn.lifn_family = af;
- lifn.lifn_flags = 0;
- if (ioctl(sock, SIOCGLIFNUM, (char *)&lifn) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCLGIFNUM failed: %s\n"),
- strerror(errno));
- (void) close(sock);
- return (NULL);
- }
-
- if ((buf = calloc(lifn.lifn_count, sizeof (struct lifreq))) == NULL) {
- rcm_log_message(RCM_ERROR, _("IP: calloc: %s\n"),
- strerror(errno));
- (void) close(sock);
- return (NULL);
- }
-
- lifc.lifc_family = af;
- lifc.lifc_flags = 0;
- lifc.lifc_len = sizeof (struct lifreq) * lifn.lifn_count;
- lifc.lifc_buf = buf;
-
- if (ioctl(sock, SIOCGLIFCONF, (char *)&lifc) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFCONF failed: %s\n"),
- strerror(errno));
- free(buf);
- (void) close(sock);
- return (NULL);
- }
+ if ((af == AF_INET6 || ipmp) && !ifconfig(ifinst, fstr, "up", B_FALSE))
+ goto fail;
- /* Filter out prefix address from netmask */
- (void) strcpy(ifaddr, addr);
- if ((prefix = strchr(ifaddr, '/')) != NULL) {
- *prefix = '\0'; /* We care about the address part only */
- }
+ /*
+ * For IPv4, if a DHCP configuration file exists, have DHCP configure
+ * the interface. As with the boot scripts, this is done after the
+ * hostname files are processed so that configuration in those files
+ * (such as IPMP group names) will be applied first.
+ */
+ if (af == AF_INET) {
+ char dhcpfile[MAXPATHLEN];
+ char *dhcpbuf;
+ off_t i, dhcpsize;
- /* Check for aliases */
- hp = getipnodebyname(ifaddr, af, AI_DEFAULT, &err);
- if (hp) {
- if (inet_ntop(af, (void *)hp->h_addr_list[0],
- ifaddr, sizeof (ifaddr)) == NULL) {
- /* Restore original address and use it */
- (void) strcpy(ifaddr, addr);
- if ((prefix = strchr(ifaddr, '/')) != NULL) {
- *prefix = '\0';
- }
- }
- freehostent(hp);
- }
- rcm_log_message(RCM_TRACE2, "IP: ifaddr(%s) = %s\n", addr, ifaddr);
+ (void) snprintf(dhcpfile, MAXPATHLEN, DHCPFILE_FMT, ifinst);
+ if (stat(dhcpfile, &statb) == -1)
+ goto out;
- /* now search the interfaces */
- lifrp = lifc.lifc_req;
- for (i = 0; i < lifn.lifn_count; i++, lifrp++) {
- (void) strcpy(lifreq.lifr_name, lifrp->lifr_name);
- /* Get the interface address for this interface */
- if (ioctl(sock, SIOCGLIFADDR, (char *)&lifreq) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCGLIFADDR: %s\n"), strerror(errno));
- free(buf);
- (void) close(sock);
- return (NULL);
- }
-
- if (af == AF_INET6) {
- sin6 = (struct sockaddr_in6 *)&lifreq.lifr_addr;
- if (inet_ntop(AF_INET6, (void *)&sin6->sin6_addr,
- addrstr, sizeof (addrstr)) == NULL) {
- continue;
- }
- } else {
- sin = (struct sockaddr_in *)&lifreq.lifr_addr;
- if (inet_ntop(AF_INET, (void *)&sin->sin_addr,
- addrstr, sizeof (addrstr)) == NULL) {
- continue;
- }
+ if ((dhcpbuf = copylist(dhcpfile, &dhcpsize)) == NULL) {
+ rcm_log_message(RCM_ERROR, _("IP: cannot read "
+ "(%s): %s\n"), dhcpfile, strerror(errno));
+ goto fail;
}
- if (STREQ(addrstr, ifaddr)) {
- /* Allocate memory to hold interface name */
- if ((ifname = (char *)malloc(LIFNAMSIZ)) == NULL) {
- rcm_log_message(RCM_ERROR,
- _("IP: malloc: %s\n"), strerror(errno));
- free(buf);
- (void) close(sock);
- return (NULL);
- }
-
- /* Copy the interface name */
- /*
- * (void) memcpy(ifname, lifrp->lifr_name,
- * sizeof (ifname));
- * ifname[sizeof (ifname) - 1] = '\0';
- */
- (void) strcpy(ifname, lifrp->lifr_name);
- break;
+ /*
+ * The copylist() API converts \n's to \0's, but we want them
+ * to be spaces.
+ */
+ if (dhcpsize > 0) {
+ for (i = 0; i < dhcpsize; i++)
+ if (dhcpbuf[i] == '\0')
+ dhcpbuf[i] = ' ';
+ dhcpbuf[dhcpsize - 1] = '\0';
}
+ (void) ifconfig(ifinst, CFG_DHCP_CMD, dhcpbuf, B_FALSE);
+ free(dhcpbuf);
}
-
- (void) close(sock);
+out:
+ free(ifparsebuf);
free(buf);
-
- if (ifname == NULL)
- rcm_log_message(RCM_TRACE2, "IP: get_mpathd_dest(%s): none\n",
- addr);
- else
- rcm_log_message(RCM_TRACE2, "IP: get_mpathd_dest(%s): %s\n",
- addr, ifname);
-
- return (ifname);
-}
-
-static int
-if_getcount(int af)
-{
- int sock;
- struct lifnum lifn;
-
- rcm_log_message(RCM_TRACE1, "IP: if_getcount\n");
-
- if ((sock = socket(af, SOCK_DGRAM, 0)) == -1) {
- rcm_log_message(RCM_ERROR,
- _("IP: failure opening %s socket: %s\n"),
- af == AF_INET6 ? "IPv6" : "IPv4", strerror(errno));
- return (-1);
- }
-
- lifn.lifn_family = af;
- lifn.lifn_flags = 0;
- if (ioctl(sock, SIOCGLIFNUM, (char *)&lifn) < 0) {
- rcm_log_message(RCM_ERROR,
- _("IP: SIOCLGIFNUM failed: %s\n"),
- strerror(errno));
- (void) close(sock);
- return (-1);
- }
- (void) close(sock);
-
- rcm_log_message(RCM_TRACE1, "IP: if_getcount success: %d\n",
- lifn.lifn_count);
-
- return (lifn.lifn_count);
+ rcm_log_message(RCM_TRACE1, "IP: if_config_inst(%s) success\n", ifinst);
+ return (0);
+fail:
+ free(ifparsebuf);
+ free(buf);
+ rcm_log_message(RCM_ERROR, "IP: if_config_inst(%s) failure\n", ifinst);
+ return (-1);
}
/*
- * tokenize() - turn a command line into tokens; caller is responsible to
- * provide enough memory to hold all tokens
+ * ntok() - count the number of tokens in the provided buffer.
*/
-static void
-tokenize(char *line, char **tokens, char *tspace, int *ntok)
+static uint_t
+ntok(const char *cp)
{
- char *cp;
- char *sp;
+ uint_t ntok = 0;
- sp = tspace;
- cp = line;
- for (*ntok = 0; *ntok < MAXARGS; (*ntok)++) {
- tokens[*ntok] = sp;
+ for (;;) {
while (ISSPACE(*cp))
cp++;
+
if (ISEOL(*cp))
break;
+
do {
- *sp++ = *cp++;
+ cp++;
} while (!ISSPACE(*cp) && !ISEOL(*cp));
- *sp++ = '\0';
+ ntok++;
+ }
+ return (ntok);
+}
+
+static boolean_t
+ifconfig(const char *ifinst, const char *fstr, const char *buf, boolean_t stdif)
+{
+ char syscmd[MAX_RECONFIG_SIZE + MAXPATHLEN + 1];
+ int status;
+
+ (void) snprintf(syscmd, sizeof (syscmd), SBIN_IFCONFIG " %s %s %s",
+ ifinst, fstr, buf);
+
+ if (stdif)
+ (void) strlcat(syscmd, CFG_CMDS_STD, sizeof (syscmd));
+
+ rcm_log_message(RCM_TRACE1, "IP: Exec: %s\n", syscmd);
+ if ((status = rcm_exec_cmd(syscmd)) != 0) {
+ if (WIFEXITED(status)) {
+ rcm_log_message(RCM_ERROR, _("IP: \"%s\" failed with "
+ "exit status %d\n"), syscmd, WEXITSTATUS(status));
+ } else {
+ rcm_log_message(RCM_ERROR, _("IP: Error: %s: %s\n"),
+ syscmd, strerror(errno));
+ }
+ return (B_FALSE);
}
+ return (B_TRUE);
}
diff --git a/usr/src/cmd/svc/milestone/net-init b/usr/src/cmd/svc/milestone/net-init
index 26b295dce9..7f0804af67 100644
--- a/usr/src/cmd/svc/milestone/net-init
+++ b/usr/src/cmd/svc/milestone/net-init
@@ -20,11 +20,9 @@
# CDDL HEADER END
#
#
-# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# This is the second phase of TCP/IP configuration. The first part is
# run by the svc:/network/physical service and includes configuring the
# interfaces and setting the machine's hostname. The svc:/network/initial
@@ -52,10 +50,11 @@ if [ -f /etc/inet/ipaddrsel.conf ]; then
fi
#
-# Now that /usr is mounted, see if in.mpathd needs to be started by firing it
-# up in "adopt" mode; if there are no interfaces it needs to manage, it will
-# automatically exit. Note that it may already be running if we're not
-# executing as part of system boot.
+# If explicit IPMP groups are being used, in.mpathd will already be started.
+# However, if TRACK_INTERFACES_ONLY_WITH_GROUPS=no and no explicit IPMP
+# groups have been configured, then it still needs to be started. So, fire
+# it up in "adopt" mode; if there are no interfaces it needs to manage, it
+# will automatically exit.
#
/usr/bin/pgrep -x -u 0 -z `smf_zonename` in.mpathd >/dev/null 2>&1 || \
/usr/lib/inet/in.mpathd -a
diff --git a/usr/src/cmd/svc/milestone/net-loopback b/usr/src/cmd/svc/milestone/net-loopback
index 3bd5a0f525..d07afd4ada 100644
--- a/usr/src/cmd/svc/milestone/net-loopback
+++ b/usr/src/cmd/svc/milestone/net-loopback
@@ -20,10 +20,9 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
. /lib/svc/share/smf_include.sh
@@ -36,14 +35,6 @@
smf_configure_ip || exit $SMF_EXIT_OK
#
-# Cause ifconfig to not automatically start in.mpathd when IPMP groups are
-# configured. This is not strictly necessary but makes it so that in.mpathd
-# will always be started explicitly from /lib/svc/method/net-init (the
-# svc:/network/initial service), when we're sure that /usr is mounted.
-#
-SUNW_NO_MPATHD=; export SUNW_NO_MPATHD
-
-#
# Before any interfaces are configured, we need to set the system
# default IP forwarding behavior. This will be the setting for
# interfaces that don't modify the per-interface setting with the
diff --git a/usr/src/cmd/svc/milestone/net-physical b/usr/src/cmd/svc/milestone/net-physical
index 8530806768..bc74c2a206 100644
--- a/usr/src/cmd/svc/milestone/net-physical
+++ b/usr/src/cmd/svc/milestone/net-physical
@@ -20,7 +20,7 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T.
@@ -38,22 +38,9 @@
#
smf_configure_ip || exit $SMF_EXIT_OK
-# Print warnings to console
-warn_failed_ifs() {
- echo "Failed to $1 interface(s): $2" >/dev/msglog
-}
-
# Make sure that the libraries essential to this stage of booting can be found.
LD_LIBRARY_PATH=/lib; export LD_LIBRARY_PATH
-#
-# Cause ifconfig to not automatically start in.mpathd when IPMP groups are
-# configured. This is not strictly necessary but makes it so that in.mpathd
-# will always be started explicitly from /etc/init.d/inetinit, when we're
-# sure that /usr is mounted.
-#
-SUNW_NO_MPATHD=; export SUNW_NO_MPATHD
-
smf_netstrategy
if smf_is_globalzone; then
@@ -127,13 +114,18 @@ if [ "$interface_names" != "/etc/hostname.*[0-9]" ]; then
IFS="$ORIGIFS"
while [ $# -ge 2 ]; do
shift
- if [ $# -gt 1 -a "$2" != "/etc/hostname" ]; then
- while [ $# -gt 1 -a "$1" != "/etc/hostname" ]; do
- shift
- done
- else
- inet_list="$inet_list $1"
+ intf_name=$1
+ while [ $# -gt 1 -a "$2" != "/etc/hostname" ]; do
+ intf_name="$intf_name.$2"
shift
+ done
+ shift
+
+ read one rest < /etc/hostname.$intf_name
+ if [ "$one" = ipmp ]; then
+ ipmp_list="$ipmp_list $intf_name"
+ else
+ inet_list="$inet_list $intf_name"
fi
done
fi
@@ -151,17 +143,38 @@ if [ "$interface_names" != "/etc/hostname6.*[0-9]" ]; then
IFS="$ORIGIFS"
while [ $# -ge 2 ]; do
shift
- if [ $# -gt 1 -a "$2" != "/etc/hostname6" ]; then
- while [ $# -gt 1 -a "$1" != "/etc/hostname6" ]; do
- shift
- done
- else
- inet6_list="$inet6_list $1"
+ intf_name=$1
+ while [ $# -gt 1 -a "$2" != "/etc/hostname6" ]; do
+ intf_name="$intf_name.$2"
shift
+ done
+ shift
+
+ read one rest < /etc/hostname6.$intf_name
+ if [ "$one" = ipmp ]; then
+ ipmp6_list="$ipmp6_list $intf_name"
+ else
+ inet6_list="$inet6_list $intf_name"
fi
done
fi
+#
+# Create all of the IPv4 IPMP interfaces.
+#
+if [ -n "$ipmp_list" ]; then
+ set -- $ipmp_list
+ while [ $# -gt 0 ]; do
+ if /sbin/ifconfig $1 ipmp; then
+ ipmp_created="$ipmp_created $1"
+ else
+ ipmp_failed="$ipmp_failed $1"
+ fi
+ shift
+ done
+ [ -n "$ipmp_failed" ] && warn_failed_ifs "create IPv4 IPMP" \
+ "$ipmp_failed"
+fi
#
# Step through the IPv4 interface list and try to plumb every interface.
@@ -178,7 +191,7 @@ if [ -n "$inet_list" ]; then
fi
shift
done
- [ -n "$inet_failed" ] && warn_failed_ifs "plumb IPv4" $inet_failed
+ [ -n "$inet_failed" ] && warn_failed_ifs "plumb IPv4" "$inet_failed"
fi
# Run autoconf to connect to a WLAN if the interface is a wireless one
@@ -209,7 +222,24 @@ if [ -n "$inet6_list" ]; then
fi
shift
done
- [ -n "$inet6_failed" ] && warn_failed_ifs "plumb IPv6" $inet6_failed
+ [ -n "$inet6_failed" ] && warn_failed_ifs "plumb IPv6" "$inet6_failed"
+fi
+
+#
+# Create all of the IPv6 IPMP interfaces.
+#
+if [ -n "$ipmp6_list" ]; then
+ set -- $ipmp6_list
+ while [ $# -gt 0 ]; do
+ if /sbin/ifconfig $1 inet6 ipmp; then
+ ipmp6_created="$ipmp6_created $1"
+ else
+ ipmp6_failed="$ipmp6_failed $1"
+ fi
+ shift
+ done
+ [ -n "$ipmp6_failed" ] && warn_failed_ifs "create IPv6 IPMP" \
+ "$ipmp6_failed"
fi
if smf_is_globalzone; then
@@ -224,49 +254,24 @@ if smf_is_globalzone; then
fi
#
-# Process the /etc/hostname.* files of plumbed IPv4 interfaces. If an
-# /etc/hostname file is not present or is empty, the ifconfig auto-dhcp
-# / auto-revarp command will attempt to set the address, later.
+# Process the /etc/hostname[6].* files for IPMP interfaces. Processing these
+# before non-IPMP interfaces avoids accidental implicit IPMP group creation.
+#
+[ -n "$ipmp_created" ] && if_configure inet "IPMP" $ipmp_created
+[ -n "$ipmp6_created" ] && if_configure inet6 "IPMP" $ipmp6_created
+
#
-# If /etc/hostname.lo0 exists the loop below will do additional
-# configuration of lo0.
+# Process the /etc/hostname[6].* files for non-IPMP interfaces.
#
-if [ -n "$inet_plumbed" ]; then
- i4s_fail=
- echo "configuring IPv4 interfaces:\c"
- set -- $inet_plumbed
- while [ $# -gt 0 ]; do
- inet_process_hostname /sbin/ifconfig $1 inet \
- </etc/hostname.$1 >/dev/null
- [ $? != 0 ] && i4s_fail="$i4s_fail $1"
- echo " $1\c"
- shift
- done
- echo "."
- [ -n "$i4s_fail" ] && warn_failed_ifs "configure IPv4" $i4s_fail
-fi
+[ -n "$inet_plumbed" ] && if_configure inet "" $inet_plumbed
+[ -n "$inet6_plumbed" ] && if_configure inet6 "" $inet6_plumbed
#
-# Process the /etc/hostname6.* files of plumbed IPv6 interfaces. After
-# processing the hostname6 file, bring the interface up. If
-# /etc/hostname6.lo0 exists the loop below will do additional
-# configuration of lo0.
+# For the IPv4 and IPv6 interfaces that failed to plumb, find (or create)
+# IPMP meta-interfaces to host their data addresses.
#
-if [ -n "$inet6_plumbed" ]; then
- i6_fail=
- echo "configuring IPv6 interfaces:\c"
- set -- $inet6_plumbed
- while [ $# -gt 0 ]; do
- inet6_process_hostname /sbin/ifconfig $1 inet6 \
- </etc/hostname6.$1 >/dev/null &&
- /sbin/ifconfig $1 inet6 up
- [ $? != 0 ] && i6_fail="$i6_fail $1"
- echo " $1\c"
- shift
- done
- echo "."
- [ -n "$i6_fail" ] && warn_failed_ifs "configure IPv6" $i6_fail
-fi
+[ -n "$inet_failed" ] && move_addresses inet
+[ -n "$inet6_failed" ] && move_addresses inet6
# Run DHCP if requested. Skip boot-configured interface.
interface_names="`echo /etc/dhcp.*[0-9] 2>/dev/null`"
@@ -326,7 +331,7 @@ if [ "$interface_names" != '/etc/dhcp.*[0-9]' ]; then
done
IFS="$ORIGIFS"
unset ORIGIFS
- [ -n "$i4d_fail" ] && warn_failed_ifs "configure IPv4 DHCP" $i4d_fail
+ [ -n "$i4d_fail" ] && warn_failed_ifs "configure IPv4 DHCP" "$i4d_fail"
fi
# In order to avoid bringing up the interfaces that have
@@ -338,14 +343,6 @@ if [ "$_INIT_NET_STRATEGY" = "rarp" -o -z "$hostname" ]; then
fi
#
-# Process IPv4 and IPv6 interfaces that failed to plumb. Find an
-# alternative interface to host the addresses.
-#
-[ -n "$inet_failed" ] && move_addresses inet
-
-[ -n "$inet6_failed" ] && move_addresses inet6
-
-#
# If the /etc/defaultrouter file exists, process it now so that the next
# stage of booting will have access to NFS.
#
diff --git a/usr/src/cmd/svc/shell/net_include.sh b/usr/src/cmd/svc/shell/net_include.sh
index 51c87a40a8..71dc6a8256 100644
--- a/usr/src/cmd/svc/shell/net_include.sh
+++ b/usr/src/cmd/svc/shell/net_include.sh
@@ -20,13 +20,18 @@
# CDDL HEADER END
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T.
# All rights reserved.
#
+# Print warnings to console
+warn_failed_ifs() {
+ echo "Failed to $1 interface(s):$2" >/dev/msglog
+}
+
#
# shcat file
# Simulates cat in sh so it doesn't need to be on the root filesystem.
@@ -41,20 +46,28 @@ shcat() {
}
#
-# Inet_list, list of IPv4 interfaces.
-# Inet_plumbed, list of plumbed IPv4 interfaces.
-# Inet_failed, list of IPv4 interfaces that failed to plumb.
-# Inet6_list, list of IPv6 interfaces.
-# Inet6_plumbed, list of plumbed IPv6 interfaces.
-# Inet6_failed, list of IPv6 interfaces that failed to plumb.
+# inet_list list of IPv4 interfaces.
+# inet6_list list of IPv6 interfaces.
+# ipmp_list list of IPMP IPv4 interfaces.
+# ipmp6_list list of IPMP IPv6 interfaces.
+# inet_plumbed list of plumbed IPv4 interfaces.
+# inet6_plumbed list of plumbed IPv6 interfaces.
+# ipmp_created list of created IPMP IPv4 interfaces.
+# ipmp6_created list of created IPMP IPv6 interfaces.
+# inet_failed list of IPv4 interfaces that failed to plumb.
+# inet6_failed list of IPv6 interfaces that failed to plumb.
+# ipmp_failed list of IPMP IPv4 interfaces that failed to be created.
+# ipmp6_failed list of IPMP IPv6 interfaces that failed to be created.
#
unset inet_list inet_plumbed inet_failed \
- inet6_list inet6_plumbed inet6_failed
+ inet6_list inet6_plumbed inet6_failed \
+ ipmp_list ipmp_created ipmp_failed \
+ ipmp6_list ipmp6_created ipmp6_failed
+
#
# get_physical interface
#
-# Return physical interface corresponding to the given logical
-# interface.
+# Return physical interface corresponding to the given interface.
#
get_physical()
{
@@ -70,7 +83,7 @@ get_physical()
# get_logical interface
#
# Return logical interface number. Zero will be returned
-# if there is no explicit logical device number.
+# if there is no explicit logical number.
#
get_logical()
{
@@ -89,19 +102,18 @@ get_logical()
#
# if_comp if1 if2
#
-# Compare Interfaces. Do the physical interface names and logical interface
+# Compare interfaces. Do the physical interface names and logical interface
# numbers match?
#
if_comp()
{
- [ "`get_physical $1`" = "`get_physical $2`" ] && \
- [ `get_logical $1` -eq `get_logical $2` ]
+ physical_comp $1 $2 && [ `get_logical $1` -eq `get_logical $2` ]
}
-
+
#
# physical_comp if1 if2
#
-# Do the two devices share a physical interface?
+# Do the two interfaces share a physical interface?
#
physical_comp()
{
@@ -129,19 +141,110 @@ in_list()
}
#
-# get_group_from_hostname interface type
+# get_inactive_ifname groupname
+#
+# Return the name of an inactive interface in `groupname', if one exists.
+#
+get_inactive_ifname()
+{
+ ORIGIFS="$IFS"
+ /sbin/ipmpstat -gP -o groupname,interfaces |
+ while IFS=: read groupname ifnames; do
+ #
+ # Skip other IPMP groups.
+ #
+ [ "$groupname" != "$1" ] && continue
+
+ #
+ # Standby interfaces are always enclosed in ()'s, so look
+ # for the first interface name starting with a "(", and
+ # strip those off.
+ #
+ IFS=" "
+ for ifname in $ifnames; do
+ case "$ifname" in
+ '('*) IFS="()"
+ echo $ifname
+ IFS="$ORIGIFS"
+ return
+ ;;
+ *) ;;
+ esac
+ done
+ done
+ IFS="$ORIGIFS"
+}
+
+#
+# get_groupifname groupname
+#
+# Return the IPMP meta-interface name for the group, if it exists.
+#
+get_groupifname()
+{
+ /sbin/ipmpstat -gP -o groupname,group | while IFS=: read name ifname; do
+ if [ "$name" = "$1" ]; then
+ echo "$ifname"
+ return
+ fi
+ done
+}
+
+#
+# create_ipmp ifname groupname type
+#
+# Helper function for create_groupifname() that returns zero if it's able
+# to create an IPMP interface of the specified type and place it in the
+# specified group, or non-zero otherwise.
+#
+create_ipmp()
+{
+ /sbin/ifconfig $1 >/dev/null 2>&1 && return 1
+ /sbin/ifconfig $1 inet6 >/dev/null 2>&1 && return 1
+ /sbin/ifconfig $1 $3 ipmp group $2 2>/dev/null
+}
+
+#
+# create_groupifname groupname type
+#
+# Create an IPMP meta-interface name for the group. We only use this
+# function if all of the interfaces in the group failed at boot and there
+# were no /etc/hostname[6].<if> files for the IPMP meta-interface.
+#
+create_groupifname()
+{
+ #
+ # This is a horrible way to count from 0 to 999, but in sh and
+ # without necessarily having /usr mounted, what else can we do?
+ #
+ for a in "" 1 2 3 4 5 6 7 8 9; do
+ for b in 0 1 2 3 4 5 6 7 8 9; do
+ for c in 0 1 2 3 4 5 6 7 8 9; do
+ # strip leading zeroes
+ [ "$a" = "" ] && [ "$b" = 0 ] && b=""
+ if create_ipmp ipmp$a$b$c $1 $2; then
+ echo ipmp$a$b$c
+ return
+ fi
+ done
+ done
+ done
+}
+
+#
+# get_hostname_ipmpinfo interface type
#
-# Return all group settings from hostname file for a given interface.
+# Return all requested IPMP keywords from hostname file for a given interface.
#
# Example:
-# get_group_from_hostname hme0 inet
+# get_hostname_ipmpinfo hme0 inet keyword [ keyword ... ]
#
-get_group_from_hostname()
+get_hostname_ipmpinfo()
{
case "$2" in
- inet) file=/etc/hostname.$1
+ inet) file=/etc/hostname.$1
;;
- inet6) file=/etc/hostname6.$1
+ inet6) file=/etc/hostname6.$1
;;
*)
return
@@ -150,16 +253,21 @@ get_group_from_hostname()
[ -r "$file" ] || return
+ type=$2
+ shift 2
+
#
- # Read through the hostname file looking for group settings
- # There may be several group settings in the file. It is up
- # to the caller to pick the right one (i.e. the last one).
+ # Read through the hostname file looking for the specified
+ # keywords. Since there may be several keywords that cancel
+ # each other out, the caller must post-process as appropriate.
#
while read line; do
[ -z "$line" ] && continue
- /sbin/ifparse -s "$2" $line
- done < "$file" | while read one two three; do
- [ "$one" = "group" ] && echo "$two"
+ /sbin/ifparse -s "$type" $line
+ done < "$file" | while read one two; do
+ for keyword in "$@"; do
+ [ "$one" = "$keyword" ] && echo "$one $two"
+ done
done
}
@@ -174,7 +282,6 @@ get_group_from_hostname()
get_group_for_type()
{
physical=`get_physical $1`
-
type=$2
group=""
@@ -183,184 +290,77 @@ get_group_for_type()
# the reason for the second while loop.
#
shift 2
- while [ $# -gt 0 ]; do
- if if_comp "$physical" $1; then
- get_group_from_hostname $1 $type
+ for ifname in "$@"; do
+ if if_comp "$physical" $ifname; then
+ get_hostname_ipmpinfo $ifname $type group
fi
- shift
done | while :; do
- read next || {
+ read keyword grname || {
echo "$group"
break
}
- group="$next"
+ group="$grname"
done
}
#
-# get_group interface [ configured | failed ]
-#
-# If there is both an inet and inet6 version of an interface, the group
-# could be set in either set of hostname files.
-#
-# Inet6 is configured after inet, so if the group is set in both
-# sets of hostname files, the inet6 file wins.
-#
-# The "configured" argument should be used to get the group for
-# an interface that has been plumbed into the stack and configured. Use
-# the "failed" argument to get the group for an interface that failed to
-# plumb.
-#
-get_group()
-{
- group=""
-
- case "$2" in
- configured)
- group=`get_group_for_type $1 inet6 $inet6_plumbed`
- ;;
- failed)
- group=`get_group_for_type $1 inet6 $inet6_list`
- ;;
- *)
- return
- ;;
- esac
-
- if [ -z "$group" ]; then
- if [ "$2" = configured ]; then
- group=`get_group_for_type $1 inet $inet_plumbed`
- else
- group=`get_group_for_type $1 inet $inet_list`
- fi
- fi
-
- echo $group
-}
-
-#
-# get_standby_from_hostname interface type
-#
-# Return any "standby" or "-standby" flags in the hostname file.
-#
-# Example:
-# get_standby_from_hostname hme0 inet6
-#
-#
-get_standby_from_hostname()
-{
- case "$2" in
- inet) file=/etc/hostname.$1
- ;;
- inet6) file=/etc/hostname6.$1
- ;;
- *)
- return
- ;;
- esac
-
- [ -r "$file" ] || return
-
- #
- # There may be several instances of the "standby" and
- # "-standby" flags in the hostname file. It is up to
- # the caller to pick the correct one.
- #
- while read line; do
- [ -z "$line" ] && continue
- /sbin/ifparse -s "$2" $line
- done < "$file" | while read one two; do
- [ "$one" = "standby" ] || [ "$one" = "-standby" ] \
- && echo "$one"
- done
-}
-
-#
-# get_standby_for_type interface type plumbed_list
+# get_standby_for_type interface type list
#
# Look through the set of hostname files associated with the same physical
-# interface as "interface", and determine whether they would configure
-# the interface as a standby interface.
+# interface as "interface", and print the standby value ("standby",
+# "-standby", or nothing). Only hostname files associated with the
+# physical interface or logical interface zero can set this flag.
#
get_standby_for_type()
{
-
physical=`get_physical $1`
type=$2
- final=""
-
#
- # The last "standby" or "-standby" flag is the one that counts,
- # which is the reason for the second while loop.
+ # The last setting of "standby" or "-standby" is the one that
+ # counts, which is the reason for the second while loop.
#
shift 2
- while [ $# -gt 0 ]; do
- if [ "`get_physical $1`" = "$physical" ]; then
- get_standby_from_hostname $1 $type
+ for ifname in "$@"; do
+ if if_comp "$physical" $ifname; then
+ get_hostname_ipmpinfo $ifname $type standby -standby
fi
- shift
done | while :; do
- read next || {
- echo "$final"
+ read keyword || {
+ echo "$iftype"
break
}
- final="$next"
+ iftype="$keyword"
done
}
#
-# is_standby interface
+# get_group interface
#
-# Determine whether a configured interface is a standby interface.
-#
-# Both the inet and inet6 hostname file sets must be checked.
-# If "standby" or "-standby" is set in the inet6 hostname file set,
-# don't bother looking at the inet set.
+# If there is both an inet and inet6 version of an interface, the group
+# could be set in either set of hostname files. Since inet6 is configured
+# after inet, if there's a setting in both files, inet6 wins.
#
-is_standby()
+get_group()
{
- standby=`get_standby_for_type $1 inet6 $inet6_plumbed`
-
- if [ -z "$standby" ]; then
- standby=`get_standby_for_type $1 inet $inet_plumbed`
- fi
-
- # The return value is the value of the following test.
- [ "$standby" = "standby" ]
+ group=`get_group_for_type $1 inet6 $inet6_list`
+ [ -z "$group" ] && group=`get_group_for_type $1 inet $inet_list`
+ echo $group
}
#
-# get_alternate interface plumbed_list
-#
-# Look for a plumbed interface in the same group as "interface".
-# A standby interface is preferred over a non-standby interface.
+# is_standby interface
#
-# Example:
-# get_alternate hme0 $inet_plumbed
+# If there is both an inet and inet6 version of an interface, the
+# "standby" or "-standby" flag could be set in either set of hostname
+# files. Since inet6 is configured after inet, if there's a setting in
+# both files, inet6 wins.
#
-get_alternate()
+is_standby()
{
- mygroup=`get_group $1 failed`
- [ -z "$mygroup" ] && return
-
- maybe=""
-
- shift
- while [ $# -gt 0 ]; do
- group=`get_group $1 configured`
- if [ "$group" = "$mygroup" ]; then
- if is_standby $1; then
- get_physical $1
- return
- else
- [ -z "$maybe" ] && maybe=$1
- fi
- fi
- shift
- done
-
- get_physical $maybe
+ standby=`get_standby_for_type $1 inet6 $inet6_list`
+ [ -z "$standby" ] && standby=`get_standby_for_type $1 inet $inet_list`
+ [ "$standby" = "standby" ]
}
#
@@ -394,7 +394,7 @@ doDHCPhostname()
#
# If there is only line in an hostname file we assume it contains
# the old style address which results in the interface being brought up
-# and the netmask and broadcast address being set.
+# and the netmask and broadcast address being set ($inet_oneline_epilogue).
#
# If there are multiple lines we assume the file contains a list of
# commands to the processor with neither the implied bringing up of the
@@ -403,6 +403,8 @@ doDHCPhostname()
# Return non-zero if any command fails so that the caller may alert
# users to errors in the configuration.
#
+inet_oneline_epilogue="netmask + broadcast + up"
+
inet_process_hostname()
{
if doDHCPhostname $2; then
@@ -418,7 +420,7 @@ inet_process_hostname()
ifcmds=""
retval=0
- while read line; do
+ while read one rest; do
if [ -n "$ifcmds" ]; then
#
# This handles the first N-1
@@ -427,7 +429,14 @@ inet_process_hostname()
$* $ifcmds || retval=$?
multiple_lines=true
fi
- ifcmds="$line"
+
+ #
+ # Strip out the "ipmp" keyword if it's the
+ # first token, since it's used to control
+ # interface creation, not configuration.
+ #
+ [ "$one" = ipmp ] && one=
+ ifcmds="$one $rest"
done
#
@@ -437,8 +446,8 @@ inet_process_hostname()
#
[ -z "$ifcmds" ] && return $retval
if [ $multiple_lines = false ]; then
- # The traditional single-line hostname file.
- ifcmds="$ifcmds netmask + broadcast + up"
+ # The traditional one-line hostname file.
+ ifcmds="$ifcmds $inet_oneline_epilogue"
fi
#
@@ -470,7 +479,13 @@ inet_process_hostname()
inet6_process_hostname()
{
retval=0
- while read ifcmds; do
+ while read one rest; do
+ #
+ # See comment in inet_process_hostname for details.
+ #
+ [ "$one" = ipmp ] && one=
+ ifcmds="$one $rest"
+
if [ -n "$ifcmds" ]; then
$* $ifcmds || retval=$?
fi
@@ -479,10 +494,9 @@ inet6_process_hostname()
}
#
-# Process interfaces that failed to plumb. Find an alternative
-# interface to host the addresses. For IPv6, only static addresses
-# defined in hostname6 files are moved, autoconfigured addresses are
-# not moved.
+# Process interfaces that failed to plumb. Find the IPMP meta-interface
+# that should host the addresses. For IPv6, only static addresses defined
+# in hostname6 files are moved, autoconfigured addresses are not moved.
#
# Example:
# move_addresses inet6
@@ -491,35 +505,43 @@ move_addresses()
{
type="$1"
eval "failed=\"\$${type}_failed\""
- eval "plumbed=\"\$${type}_plumbed\""
eval "list=\"\$${type}_list\""
- process_hostname="${type}_process_hostname"
+ process_func="${type}_process_hostname"
processed=""
if [ "$type" = inet ]; then
- echo "moving addresses from failed IPv4 interfaces:\c"
+ typedesc="IPv4"
zaddr="0.0.0.0"
hostpfx="/etc/hostname"
else
- echo "moving addresses from failed IPv6 interfaces:\c"
+ typedesc="IPv6"
zaddr="::"
hostpfx="/etc/hostname6"
fi
- set -- $failed
- while [ $# -gt 0 ]; do
- in_list if_comp $1 $processed && { shift; continue; }
-
- alternate="`get_alternate $1 $plumbed`"
- if [ -z "$alternate" ]; then
- in_list physical_comp $1 $processed || {
- echo " $1 (couldn't move, no" \
- "alternative interface)\c"
- processed="$processed $1"
+ echo "Moving addresses from missing ${typedesc} interface(s):\c" \
+ >/dev/msglog
+
+ for ifname in $failed; do
+ in_list if_comp $ifname $processed && continue
+
+ group=`get_group $ifname`
+ if [ -z "$group" ]; then
+ in_list physical_comp $ifname $processed || {
+ echo " $ifname (not moved -- not" \
+ "in an IPMP group)\c" >/dev/msglog
+ processed="$processed $ifname"
}
- shift
continue
fi
+
+ #
+ # Lookup the IPMP meta-interface name. If one doesn't exist,
+ # create it.
+ #
+ grifname=`get_groupifname $group`
+ [ -z "$grifname" ] && grifname=`create_groupifname $group $type`
+
#
# The hostname files are processed twice. In the first
# pass, we are looking for all commands that apply
@@ -528,7 +550,7 @@ move_addresses()
# whether the address represents a failover address
# or not until we've read all the files associated with the
# interface.
-
+ #
# In the first pass through the hostname files, all
# additional logical interface commands are removed.
# The remaining commands are concatenated together and
@@ -541,19 +563,18 @@ move_addresses()
# the embedded "set" command set the address later.
#
/sbin/ifparse -f $type `
- for item in $list; do
- if_comp $1 $item && \
- $process_hostname /sbin/ifparse \
- $type < $hostpfx.$item
- done | while read three four; do
- [ "$three" != addif ] && \
- echo "$three $four \c"
- done` | while read one two; do
- [ -z "$one" ] && continue
- line="addif $zaddr $one $two"
- /sbin/ifconfig $alternate $type \
- -standby $line >/dev/null
- done
+ for item in $list; do
+ if_comp $ifname $item && $process_func \
+ /sbin/ifparse $type < $hostpfx.$item
+ done | while read three four; do
+ [ "$three" != addif ] && echo "$three $four \c"
+ done` | while read one two; do
+ [ -z "$one" ] && continue
+ [ "$one $two" = "$inet_oneline_epilogue" ] && \
+ continue
+ line="addif $zaddr $one $two"
+ /sbin/ifconfig $grifname $type $line >/dev/null
+ done
#
# In the second pass, look for the the "addif" commands
@@ -561,22 +582,75 @@ move_addresses()
# commands are not valid in logical interface hostname
# files.
#
- if [ "$1" = "`get_physical $1`" ]; then
- $process_hostname /sbin/ifparse -f $type \
- <$hostpfx.$1 | while read one two; do
- [ "$one" = addif ] && \
- /sbin/ifconfig $alternate $type -standby \
- addif $two >/dev/null
+ if [ "$ifname" = "`get_physical $ifname`" ]; then
+ $process_func /sbin/ifparse -f $type < $hostpfx.$ifname \
+ | while read one two; do
+ [ "$one" = addif ] && \
+ /sbin/ifconfig $grifname $type \
+ addif $two >/dev/null
done
fi
- in_list physical_comp $1 $processed || {
- echo " $1 (moved to $alternate)\c"
- processed="$processed $1"
+ #
+ # Check if this was an active interface in the group. If so,
+ # activate another IP interface (if possible)
+ #
+ is_standby $ifname || inactive=`get_inactive_ifname $group`
+ [ -n "$inactive" ] && /sbin/ifconfig $inactive $type -standby
+
+ in_list physical_comp $ifname $processed || {
+ processed="$processed $ifname"
+ echo " $ifname (moved to $grifname\c" > /dev/msglog
+ if [ -n "$inactive" ]; then
+ echo " and cleared 'standby' on\c" > /dev/msglog
+ echo " $inactive to compensate\c" > /dev/msglog
+ fi
+ echo ")\c" > /dev/msglog
}
+ inactive=""
+ done
+ echo "." >/dev/msglog
+}
+
+#
+# if_configure type class interface_list
+#
+# Configure all of the interfaces of type `type' (e.g., "inet6") in
+# `interface_list' according to their /etc/hostname[6].* files. `class'
+# describes the class of interface (e.g., "IPMP"), as a diagnostic aid.
+# For inet6 interfaces, the interface is also brought up.
+#
+if_configure()
+{
+ fail=
+ type=$1
+ class=$2
+ process_func=${type}_process_hostname
+ shift 2
+
+ if [ "$type" = inet ]; then
+ desc="IPv4"
+ hostpfx="/etc/hostname"
+ else
+ desc="IPv6"
+ hostpfx="/etc/hostname6"
+ fi
+ [ -n "$class" ] && desc="$class $desc"
+
+ echo "configuring $desc interfaces:\c"
+ while [ $# -gt 0 ]; do
+ $process_func /sbin/ifconfig $1 $type < $hostpfx.$1 >/dev/null
+ if [ $? != 0 ]; then
+ fail="$fail $1"
+ elif [ "$type" = inet6 ]; then
+ /sbin/ifconfig $1 inet6 up || fail="$fail $1"
+ fi
+ echo " $1\c"
shift
done
echo "."
+
+ [ -n "$fail" ] && warn_failed_ifs "configure $desc" "$fail"
}
#
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c
index 46b2b5a958..dc90957dfa 100644
--- a/usr/src/cmd/truss/codes.c
+++ b/usr/src/cmd/truss/codes.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -615,13 +615,10 @@ const struct ioc {
{ (uint_t)SIOCSIPSECONFIG, "SIOCSIPSECONFIG", NULL },
{ (uint_t)SIOCDIPSECONFIG, "SIOCDIPSECONFIG", NULL },
{ (uint_t)SIOCLIPSECONFIG, "SIOCLIPSECONFIG", NULL },
- { (uint_t)SIOCLIFFAILOVER, "SIOCLIFFAILOVER", "lifreq" },
- { (uint_t)SIOCLIFFAILBACK, "SIOCLIFFAILBACK", "lifreq" },
- { (uint_t)SIOCSIPMPFAILBACK, "SIOCSIPMPFAILBACK", NULL },
+ { (uint_t)SIOCGLIFBINDING, "SIOCGLIFBINDING", "lifreq" },
{ (uint_t)SIOCSLIFGROUPNAME, "SIOCSLIFGROUPNAME", "lifreq" },
{ (uint_t)SIOCGLIFGROUPNAME, "SIOCGLIFGROUPNAME", "lifreq" },
- { (uint_t)SIOCGLIFOINDEX, "SIOCGLIFOINDEX", "lifreq" },
- { (uint_t)SIOCSLIFOINDEX, "SIOCSLIFOINDEX", "lifreq" },
+ { (uint_t)SIOCGLIFGROUPINFO, "SIOCGLIFGROUPINFO", "lifgroupinfo" },
{ (uint_t)SIOCGDSTINFO, "SIOCGDSTINFO", NULL },
{ (uint_t)SIOCGIP6ADDRPOLICY, "SIOCGIP6ADDRPOLICY", NULL },
{ (uint_t)SIOCSIP6ADDRPOLICY, "SIOCSIP6ADDRPOLICY", NULL },
diff --git a/usr/src/cmd/truss/print.c b/usr/src/cmd/truss/print.c
index edc610559d..8165f64f99 100644
--- a/usr/src/cmd/truss/print.c
+++ b/usr/src/cmd/truss/print.c
@@ -19,16 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#define _SYSCALL32 /* make 32-bit compat headers visible */
#include <stdio.h>
@@ -73,6 +70,7 @@
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/sctp.h>
+#include <net/route.h>
#include <sys/utrap.h>
#include <sys/lgrp_user.h>
#include <sys/door.h>
@@ -1749,6 +1747,8 @@ prt_sol(private_t *pri, int raw, long val)
{
if (val == SOL_SOCKET) {
outstring(pri, "SOL_SOCKET");
+ } else if (val == SOL_ROUTE) {
+ outstring(pri, "SOL_ROUTE");
} else {
const struct protoent *p;
struct protoent res;
@@ -1826,6 +1826,18 @@ sol_optname(private_t *pri, long val)
#undef CBSIZE
}
+const char *
+route_optname(private_t *pri, long val)
+{
+ switch (val) {
+ case RT_AWARE:
+ return ("RT_AWARE");
+ default:
+ (void) snprintf(pri->code_buf, sizeof (pri->code_buf),
+ "0x%lx", val);
+ return (pri->code_buf);
+ }
+}
const char *
tcp_optname(private_t *pri, long val)
@@ -1918,6 +1930,8 @@ prt_son(private_t *pri, int raw, long val)
switch (pri->sys_args[1]) {
case SOL_SOCKET: outstring(pri, sol_optname(pri, val));
break;
+ case SOL_ROUTE: outstring(pri, route_optname(pri, val));
+ break;
case IPPROTO_TCP: outstring(pri, tcp_optname(pri, val));
break;
case IPPROTO_UDP: outstring(pri, udp_optname(pri, val));
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index 72b6ce5c76..fb8f540cb5 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -2397,6 +2397,7 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
*/
char buffer[INET6_ADDRSTRLEN];
void *addr;
+ const char *nomatch = "no matching subnet found in netmasks(4)";
if (af == AF_INET)
addr = &((struct sockaddr_in *)
@@ -2405,14 +2406,23 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
addr = &((struct sockaddr_in6 *)
(&lifr.lifr_addr))->sin6_addr;
- /* Find out what netmask interface is going to be using */
+ /*
+ * Find out what netmask the interface is going to be using.
+ * If we just brought up an IPMP data address on an underlying
+ * interface above, the address will have already migrated, so
+ * the SIOCGLIFNETMASK won't be able to find it (but we need
+ * to bring the address up to get the actual netmask). Just
+ * omit printing the actual netmask in this corner-case.
+ */
if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 ||
- inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL)
- goto bad;
- zerror(zlogp, B_FALSE,
- "WARNING: %s: no matching subnet found in netmasks(4) for "
- "%s; using default of %s.",
- lifr.lifr_name, addrstr4, buffer);
+ inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL) {
+ zerror(zlogp, B_FALSE, "WARNING: %s; using default.",
+ nomatch);
+ } else {
+ zerror(zlogp, B_FALSE,
+ "WARNING: %s: %s: %s; using default of %s.",
+ lifr.lifr_name, nomatch, addrstr4, buffer);
+ }
}
/*