summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/Makefile.lint1
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/Makefile9
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c185
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.h3
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.c235
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.h54
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c120
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c24
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.h13
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.dfl32
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c50
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h3
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c13
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c19
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/states.h10
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/util.c13
-rw-r--r--usr/src/cmd/cmd-inet/usr.bin/Makefile12
-rw-r--r--usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c29
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/Makefile22
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/defs.h1
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/dupl_addr.c870
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c51
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c26
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c10
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h1
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/arp.c54
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile9
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/dupl_addr.c911
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c103
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h9
-rw-r--r--usr/src/cmd/mdb/Makefile.common1
-rw-r--r--usr/src/cmd/mdb/Makefile.kmdb2
-rw-r--r--usr/src/cmd/mdb/Makefile.kmdb.files8
-rw-r--r--usr/src/cmd/mdb/Makefile.kmdb.targ7
-rw-r--r--usr/src/cmd/mdb/Makefile.mdb13
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_ks.h8
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_modapi.h2
-rw-r--r--usr/src/cmd/mdb/common/modules/arp/arp.c700
-rw-r--r--usr/src/cmd/mdb/common/modules/ip/ip.c11
-rw-r--r--usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c103
-rw-r--r--usr/src/cmd/mdb/intel/amd64/arp/Makefile35
-rw-r--r--usr/src/cmd/mdb/intel/ia32/arp/Makefile34
-rw-r--r--usr/src/cmd/mdb/sparc/v9/arp/Makefile35
-rw-r--r--usr/src/common/util/ffs.c (renamed from usr/src/lib/libc/port/gen/ffs.c)40
-rw-r--r--usr/src/lib/libc/amd64/Makefile2
-rw-r--r--usr/src/lib/libc/i386/Makefile.com2
-rw-r--r--usr/src/lib/libc/sparc/Makefile2
-rw-r--r--usr/src/lib/libc/sparcv9/Makefile2
-rw-r--r--usr/src/lib/libdhcpagent/common/dhcpagent_ipc.h8
-rw-r--r--usr/src/lib/libdhcpagent/common/dhcpagent_util.c8
-rw-r--r--usr/src/lib/libinetcfg/Makefile.com9
-rw-r--r--usr/src/lib/libinetcfg/common/inetcfg.c159
-rw-r--r--usr/src/lib/libinetcfg/common/inetcfg_dad.c624
-rw-r--r--usr/src/lib/libinetcfg/common/inetcfg_dad.h46
-rw-r--r--usr/src/pkgdefs/SUNWmdb/prototype_i3862
-rw-r--r--usr/src/pkgdefs/SUNWmdb/prototype_sparc1
-rw-r--r--usr/src/pkgdefs/SUNWmdbr/prototype_i3862
-rw-r--r--usr/src/pkgdefs/SUNWmdbr/prototype_sparc1
-rw-r--r--usr/src/uts/common/Makefile.files1
-rw-r--r--usr/src/uts/common/inet/arp.h72
-rw-r--r--usr/src/uts/common/inet/arp/arp.c2112
-rw-r--r--usr/src/uts/common/inet/arp_impl.h39
-rw-r--r--usr/src/uts/common/inet/ip.h54
-rw-r--r--usr/src/uts/common/inet/ip/ip.c632
-rw-r--r--usr/src/uts/common/inet/ip/ip6.c162
-rw-r--r--usr/src/uts/common/inet/ip/ip6_if.c31
-rw-r--r--usr/src/uts/common/inet/ip/ip_if.c495
-rw-r--r--usr/src/uts/common/inet/ip/ip_ndp.c897
-rw-r--r--usr/src/uts/common/inet/ip/ip_squeue.c4
-rw-r--r--usr/src/uts/common/inet/ip6.h2
-rw-r--r--usr/src/uts/common/inet/ip_if.h17
-rw-r--r--usr/src/uts/common/inet/ip_ndp.h26
-rw-r--r--usr/src/uts/common/net/if.h3
-rw-r--r--usr/src/uts/common/net/if_arp.h14
-rw-r--r--usr/src/uts/common/netinet/arp.h13
-rw-r--r--usr/src/uts/common/os/subr.c26
-rw-r--r--usr/src/uts/common/os/sunddi.c3
-rw-r--r--usr/src/uts/common/sys/systm.h2
78 files changed, 4750 insertions, 4614 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint
index 7d7073b2aa..857b0e0638 100644
--- a/usr/src/Makefile.lint
+++ b/usr/src/Makefile.lint
@@ -72,6 +72,7 @@ COMMON_SUBDIRS = \
cmd/cmd-inet/usr.lib/dsvclockd \
cmd/cmd-inet/usr.lib/in.dhcpd \
cmd/cmd-inet/usr.lib/in.mpathd \
+ cmd/cmd-inet/usr.lib/in.ndpd \
cmd/cmd-inet/usr.lib/inetd \
cmd/cmd-inet/usr.lib/mipagent \
cmd/cmd-inet/usr.lib/pppoe \
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/Makefile b/usr/src/cmd/cmd-inet/sbin/dhcpagent/Makefile
index 8acf78f5a0..af6d01ccc7 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/Makefile
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -30,7 +29,7 @@ PROG = dhcpagent
ROOTFS_PROG = $(PROG)
DEFAULTFILES = dhcpagent.dfl
-LOCOBJS = adopt.o agent.o arp_check.o async.o bound.o class_id.o defaults.o \
+LOCOBJS = adopt.o agent.o async.o bound.o class_id.o defaults.o \
dlpi_io.o dlprims.o inform.o init_reboot.o interface.o ipc_action.o \
packet.o release.o renew.o request.o script_handler.o select.o util.o
COMDIR = $(SRC)/common/net/dhcp
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c
index ffd4d14f59..28b5d33004 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c
@@ -27,7 +27,6 @@
#include <sys/types.h>
#include <stdlib.h>
-#include <assert.h>
#include <errno.h>
#include <locale.h>
#include <string.h>
@@ -36,10 +35,11 @@
#include <stdio.h>
#include <stdio_ext.h>
#include <dhcp_hostconf.h>
-#include <dhcp_symbol.h>
#include <dhcpagent_ipc.h>
#include <dhcpmsg.h>
#include <netinet/dhcp.h>
+#include <net/route.h>
+#include <sys/sockio.h>
#include "async.h"
#include "agent.h"
@@ -59,11 +59,12 @@ char *class_id;
iu_eh_t *eh;
iu_tq_t *tq;
pid_t grandparent;
+int rtsock_fd;
static boolean_t shutdown_started = B_FALSE;
static boolean_t do_adopt = B_FALSE;
static unsigned int debug_level = 0;
-static iu_eh_callback_t accept_event, ipc_event;
+static iu_eh_callback_t accept_event, ipc_event, rtsock_event;
/*
* The ipc_cmd_allowed[] table indicates which IPC commands are allowed in
@@ -89,6 +90,7 @@ static int ipc_cmd_allowed[DHCP_NSTATES][DHCP_NIPC] = {
/* INIT */ { 1, 0, 1, 0, 1, 1, 1, 0 },
/* SELECTING */ { 1, 0, 1, 0, 1, 1, 0, 0 },
/* REQUESTING */ { 1, 0, 1, 0, 1, 1, 0, 0 },
+ /* PRE_BOUND */ { 1, 1, 1, 1, 0, 1, 0, 1 },
/* BOUND */ { 1, 1, 1, 1, 0, 1, 0, 1 },
/* RENEWING */ { 1, 1, 1, 1, 0, 1, 0, 1 },
/* REBINDING */ { 1, 1, 1, 1, 0, 1, 0, 1 },
@@ -245,6 +247,22 @@ main(int argc, char **argv)
}
/*
+ * Create the global routing socket. This is used for monitoring
+ * interface transitions, so that we learn about the kernel's Duplicate
+ * Address Detection status, and for inserting and removing default
+ * routes as learned from DHCP servers.
+ */
+ rtsock_fd = socket(PF_ROUTE, SOCK_RAW, AF_INET);
+ if (rtsock_fd == -1) {
+ dhcpmsg(MSG_ERR, "cannot open routing socket");
+ return (EXIT_FAILURE);
+ }
+ if (iu_register_event(eh, rtsock_fd, POLLIN, rtsock_event, 0) == -1) {
+ dhcpmsg(MSG_ERR, "cannot register routing socket for messages");
+ return (EXIT_FAILURE);
+ }
+
+ /*
* if the -a (adopt) option was specified, try to adopt the
* kernel-managed interface before we start. Our grandparent
* will be waiting for us to finish this, so signal him when
@@ -844,3 +862,164 @@ load_option:
return;
}
}
+
+/*
+ * check_rtm_addr(): determine if routing socket message matches interface
+ * address
+ *
+ * input: struct if_msghdr *: pointer to routing socket message
+ * struct in_addr: IP address
+ * output: boolean_t
+ */
+static boolean_t
+check_rtm_addr(struct ifa_msghdr *ifam, int msglen, struct in_addr addr)
+{
+ char *cp, *lim;
+ uint_t flag;
+ struct sockaddr *sa;
+ struct sockaddr_in *sinp;
+
+ if (!(ifam->ifam_addrs & RTA_IFA))
+ return (B_FALSE);
+
+ cp = (char *)(ifam + 1);
+ lim = (char *)ifam + msglen;
+ for (flag = 1; flag < RTA_IFA; flag <<= 1) {
+ if (ifam->ifam_addrs & flag) {
+ /* LINTED: alignment */
+ sa = (struct sockaddr *)cp;
+ if ((char *)(sa + 1) > lim)
+ return (B_FALSE);
+ switch (sa->sa_family) {
+ case AF_UNIX:
+ cp += sizeof (struct sockaddr_un);
+ break;
+ case AF_INET:
+ cp += sizeof (struct sockaddr_in);
+ break;
+ case AF_LINK:
+ cp += sizeof (struct sockaddr_dl);
+ break;
+ case AF_INET6:
+ cp += sizeof (struct sockaddr_in6);
+ break;
+ default:
+ cp += sizeof (struct sockaddr);
+ break;
+ }
+ }
+ }
+ /* LINTED: alignment */
+ sinp = (struct sockaddr_in *)cp;
+ if ((char *)(sinp + 1) > lim)
+ return (B_FALSE);
+ return (sinp->sin_addr.s_addr == addr.s_addr);
+}
+
+/*
+ * rtsock_event(): fetches routing socket messages and updates internal
+ * interface state based on those messages.
+ *
+ * input: iu_eh_t *: unused
+ * int: the routing socket file descriptor
+ * (other arguments unused)
+ * output: void
+ */
+
+/* ARGSUSED */
+static void
+rtsock_event(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg)
+{
+ struct ifslist *ifs;
+ union {
+ struct ifa_msghdr ifam;
+ char buf[1024];
+ } msg;
+ uint16_t ifindex;
+ struct lifreq lifr;
+ char *fail;
+ int msglen;
+
+ if ((msglen = read(fd, &msg, sizeof (msg))) <= 0)
+ return;
+
+ /*
+ * These are the messages that can identify a particular logical
+ * interface by local IP address.
+ */
+ if (msg.ifam.ifam_type != RTM_DELADDR &&
+ msg.ifam.ifam_type != RTM_NEWADDR)
+ return;
+
+ /* Note that ifam_index is just 16 bits */
+ ifindex = msg.ifam.ifam_index;
+
+ for (ifs = lookup_ifs_by_uindex(ifindex, NULL);
+ ifs != NULL;
+ ifs = lookup_ifs_by_uindex(ifindex, ifs)) {
+
+ /*
+ * The if_sock_ip_fd is set to a non-negative integer by
+ * configure_bound(). If it's negative, then DHCP doesn't
+ * think we're bound.
+ *
+ * For pre-bound interfaces, we want to check to see if the
+ * IFF_UP bit has been reported. This means that DAD is
+ * complete.
+ */
+ if (ifs->if_sock_ip_fd == -1 && ifs->if_state != PRE_BOUND)
+ continue;
+
+ /*
+ * Since we cannot trust the flags reported by the routing
+ * socket (they're just 32 bits -- and thus never include
+ * IFF_DUPLICATE), and we can't trust the ifindex (it's only 16
+ * bits and also doesn't reflect the alias in use), we get
+ * flags on all matching interfaces, and go by that.
+ */
+ (void) strlcpy(lifr.lifr_name, ifs->if_name,
+ sizeof (lifr.lifr_name));
+ if (ioctl(ifs->if_sock_fd, SIOCGLIFFLAGS, &lifr) == -1) {
+ fail = "unable to retrieve interface flags";
+ } else if (!check_rtm_addr(&msg.ifam, msglen, ifs->if_addr)) {
+ /*
+ * If the message is not about this logical interface,
+ * then just ignore it.
+ */
+ continue;
+ } else if (lifr.lifr_flags & IFF_DUPLICATE) {
+ fail = "interface has duplicate address";
+ } else {
+ /*
+ * If we're now up and we were waiting for that, then
+ * kick off this interface. DAD is done.
+ */
+ if ((lifr.lifr_flags & IFF_UP) &&
+ ifs->if_state == PRE_BOUND)
+ dhcp_bound_complete(ifs);
+
+ continue;
+ }
+
+ if (ifs->if_sock_ip_fd != -1) {
+ (void) close(ifs->if_sock_ip_fd);
+ ifs->if_sock_ip_fd = -1;
+ }
+ dhcpmsg(MSG_ERROR, fail);
+
+ /*
+ * The binding has evidently failed, so it's as though it never
+ * happened. We need to do switch back to PRE_BOUND state so
+ * that send_pkt_internal() uses DLPI instead of sockets. Our
+ * logical interface has already been torn down by the kernel,
+ * and thus we can't send DHCPDECLINE by way of regular IP.
+ */
+ ifs->if_state = PRE_BOUND;
+
+ if (ifs->if_ack->opts[CD_DHCP_TYPE] != NULL)
+ send_decline(ifs, fail, &ifs->if_addr);
+
+ ifs->if_bad_offers++;
+ dhcp_restart(ifs);
+ }
+}
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.h
index 281f376c96..b9bafe59bb 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.h
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.h
@@ -50,7 +50,7 @@ extern "C" {
* timer id of the global inactivity timer, which shuts down the agent
* if there are no interfaces to manage for DHCP_INACTIVITY_WAIT
* seconds. `grandparent' is the pid of the original process when in
- * adopt mode.
+ * adopt mode. `rtsock_fd' is the global routing socket file descriptor.
*/
extern iu_tq_t *tq;
@@ -59,6 +59,7 @@ extern char *class_id;
extern int class_id_len;
extern iu_timer_id_t inactivity_id;
extern pid_t grandparent;
+extern int rtsock_fd;
boolean_t drain_script(iu_eh_t *, void *);
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.c
deleted file mode 100644
index f4925468d8..0000000000
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 1999 by Sun Microsystems, Inc.
- * All rights reserved.
- */
-
-#pragma ident "%W% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <net/if.h>
-#include <poll.h>
-#include <netinet/in.h>
-#include <netinet/if_ether.h>
-#include <net/if_arp.h>
-#include <sys/dlpi.h>
-#include <stddef.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <sys/pfmod.h>
-#include <dhcpmsg.h>
-#include <stddef.h>
-
-#include "defaults.h"
-#include "util.h"
-#include "interface.h"
-#include "dlpi_io.h"
-#include "arp_check.h"
-
-/*
- * the struct arp_info is used by arp_reply_filter() to build a filter
- * that only receives replies from the ARPed IP address.
- */
-
-struct arp_info {
-
- uchar_t send_addr_offset; /* from start of ARP frame */
- in_addr_t send_addr; /* arped IP address */
-};
-
-/*
- * arp_reply_filter(): builds a filter that permits ARP replies to our request
- *
- * input: ushort_t *: a place to store the packet filter code
- * void *: a struct arp_info containing the requested IP address
- * output: ushort_t *: two bytes past the last byte of the filter
- */
-
-static ushort_t *
-arp_reply_filter(ushort_t *pfp, void *arg)
-{
- struct arp_info *ai = (struct arp_info *)arg;
-
- *pfp++ = ENF_PUSHWORD + (offsetof(struct arphdr, ar_op) / 2);
- *pfp++ = ENF_PUSHLIT | ENF_EQ;
- *pfp++ = htons(ARPOP_REPLY);
-
- /*
- * make sure this ARP reply is from the target IP address,
- * which will be the "sender" IP address in the reply (even in
- * the case of proxy ARP). the position of sender IP address
- * depends on the link layer; so we can be link-layer
- * independent, these values are calculated in arp_check().
- *
- * the byteorder issues here are *really* subtle. suppose
- * that the network address is 0x11223344 (as stored in the
- * packet read off the wire) by an intel machine. then notice
- * that since the packet filter operates 16 bits at a time
- * that the high-order word will load as 0x2211 and the
- * low-order word will load as 0x4433. so send_addr has the
- * register value 0x44332211 on intel since that will store to
- * the network address 0x11223344 in memory. thus, to compare
- * the low-order word, we must first ntohl() send_addr, which
- * changes its register-value to 0x11223344, and then mask
- * off the high-order bits, getting 0x3344, and then convert
- * that to network order, getting 0x4433, which is what we
- * want. the same logic applies to the high-order word. you
- * are not expected to understand this.
- */
-
- *pfp++ = ENF_PUSHWORD + (ai->send_addr_offset / 2) + 1;
- *pfp++ = ENF_PUSHLIT | ENF_EQ;
- *pfp++ = htons(ntohl(ai->send_addr) & 0xffff);
- *pfp++ = ENF_AND;
-
- *pfp++ = ENF_PUSHWORD + (ai->send_addr_offset / 2);
- *pfp++ = ENF_PUSHLIT | ENF_EQ;
- *pfp++ = htons(ntohl(ai->send_addr) >> 16);
- *pfp++ = ENF_AND;
-
- return (pfp);
-}
-
-/*
- * arp_check(): checks to see if a given IP address is already in use
- *
- * input: struct ifslist *: the interface to send the ARP request on
- * in_addr_t: the IP address to send from, network order
- * in_addr_t: the IP address to check on, network order
- * uchar_t *: a scratch buffer that holds the hardware address
- * of the machine that replied to our ARP request,
- * if there was one.
- * uint32_t: the length of the buffer
- * uint32_t: how long to wait for an ARP reply, in milliseconds
- * output: int: 1 if the IP address is in use, 0 if not in use.
- */
-
-int
-arp_check(struct ifslist *ifsp, in_addr_t send_addr, in_addr_t target_addr,
- uchar_t *target_hwaddr, uint32_t target_hwlen, uint32_t timeout_msec)
-{
- uint32_t buf[DLPI_BUF_MAX / sizeof (uint32_t)];
- dl_info_ack_t *dlia = (dl_info_ack_t *)buf;
- int fd;
- struct arphdr *arp_pkt = NULL;
- uchar_t *arp_daddr = NULL;
- caddr_t arp_payload;
- uchar_t arp_dlen;
- size_t offset;
- struct pollfd pollfd;
- int retval;
- struct arp_info ai;
- unsigned int arp_pkt_len;
-
- fd = dlpi_open(ifsp->if_name, dlia, sizeof (buf), ETHERTYPE_ARP);
- if (fd == -1)
- goto failure;
-
- /*
- * the packet consists of an ARP header, two IP addresses
- * and two hardware addresses (each ifsp->if_hwlen bytes long).
- */
-
- arp_pkt_len = sizeof (struct arphdr) + (sizeof (ipaddr_t) * 2) +
- (ifsp->if_hwlen * 2);
-
- arp_pkt = malloc(arp_pkt_len);
- arp_daddr = build_broadcast_dest(dlia, &arp_dlen);
- if (arp_pkt == NULL || arp_daddr == NULL)
- goto failure;
-
- (void) memset(arp_pkt, 0xff, arp_pkt_len);
-
- arp_pkt->ar_hrd = htons(ifsp->if_hwtype);
- arp_pkt->ar_pro = htons(ETHERTYPE_IP);
- arp_pkt->ar_hln = ifsp->if_hwlen;
- arp_pkt->ar_pln = sizeof (ipaddr_t);
- arp_pkt->ar_op = htons(ARPOP_REQUEST);
-
- arp_payload = (caddr_t)&arp_pkt[1];
- (void) memcpy(arp_payload, ifsp->if_hwaddr, ifsp->if_hwlen);
- offset = ifsp->if_hwlen;
-
- /*
- * while we're at the appropriate offset for sender IP address,
- * store it for use by the packet filter.
- */
-
- ai.send_addr = target_addr;
- ai.send_addr_offset = offset + sizeof (struct arphdr);
-
- (void) memcpy(&arp_payload[offset], &send_addr, sizeof (ipaddr_t));
- offset += ifsp->if_hwlen + sizeof (ipaddr_t);
- (void) memcpy(&arp_payload[offset], &target_addr, sizeof (ipaddr_t));
-
- /*
- * install the packet filter, send our ARP request, and wait
- * for a reply. waiting usually isn't a good idea since the
- * design of the agent is nonblocking. however, we can
- * tolerate short waits (< 5 seconds).
- */
-
- set_packet_filter(fd, arp_reply_filter, &ai, "ARP reply");
-
- if (dlpi_send_link(fd, arp_pkt, arp_pkt_len, arp_daddr, arp_dlen) == -1)
- goto failure;
-
- pollfd.fd = fd;
- pollfd.events = POLLIN;
-
- retval = poll(&pollfd, 1, timeout_msec);
- if (retval > 0 && target_hwaddr != NULL) {
-
- /*
- * try to grab the hardware address. if we fail, we'll
- * just end up with some misleading diagnostics. the
- * hardware address is at the start of the payload.
- */
-
- if (dlpi_recv_link(fd, arp_pkt, arp_pkt_len, DLPI_RECV_SHORT) ==
- arp_pkt_len)
- (void) memcpy(target_hwaddr, arp_payload, target_hwlen);
- }
-
- free(arp_daddr);
- free(arp_pkt);
- (void) close(fd);
- return ((retval == 0) ? 0 : 1);
-
-failure:
- free(arp_daddr);
- free(arp_pkt);
- (void) close(fd);
-
- if (df_get_bool(ifsp->if_name, DF_IGNORE_FAILED_ARP)) {
- dhcpmsg(MSG_WARNING, "arp_check: cannot send ARP request: "
- "assuming address is available");
- return (0);
- }
-
- dhcpmsg(MSG_WARNING, "arp_check: cannot send ARP request: "
- "assuming address is unavailable");
- return (1);
-}
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.h
deleted file mode 100644
index c3fff1ba0c..0000000000
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 1999 by Sun Microsystems, Inc.
- * All rights reserved.
- */
-
-#ifndef ARP_CHECK_H
-#define ARP_CHECK_H
-
-#pragma ident "%W% %E% SMI"
-
-#include <sys/types.h>
-#include <netinet/in.h>
-
-#include "interface.h"
-
-/*
- * arp_check.[ch] provide an interface for checking whether a given IP
- * address is currently in use. see arp_check.c for documentation on
- * how to use the exported function.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int arp_check(struct ifslist *, in_addr_t, in_addr_t, uchar_t *,
- uint32_t, uint32_t);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ARP_CHECK_H */
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c
index 9033a96f14..0853456b5e 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c
@@ -39,10 +39,7 @@
#include <sys/sysmacros.h>
#include <dhcp_hostconf.h>
#include <dhcpmsg.h>
-#include <stdio.h> /* snprintf */
-#include "defaults.h"
-#include "arp_check.h"
#include "states.h"
#include "packet.h"
#include "util.h"
@@ -53,6 +50,7 @@
#define IS_DHCP(plp) ((plp)->opts[CD_DHCP_TYPE] != NULL)
static int configure_if(struct ifslist *);
+static int configure_bound(struct ifslist *);
static int configure_timers(struct ifslist *);
/*
@@ -122,15 +120,26 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack)
(void) memcpy(ifsp->if_ack->opts[CD_LEASE_TIME]->value,
&new_lease, sizeof (lease_t));
+ if (configure_bound(ifsp) == 0)
+ return (0);
+
/*
* we have no idea when the REQUEST that generated
* this ACK was sent, but for diagnostic purposes
* we'll assume its close to the current time.
*/
-
ifsp->if_newstart_monosec = monosec();
- /* FALLTHRU into REQUESTING/INIT_REBOOT */
+ if (configure_timers(ifsp) == 0)
+ return (0);
+
+ /*
+ * if the state is ADOPTING, event loop has not been started
+ * at this time; so don't run the EVENT_BOUND script.
+ */
+ ifsp->if_curstart_monosec = ifsp->if_newstart_monosec;
+ ifsp->if_state = BOUND;
+ break;
case REQUESTING:
case INIT_REBOOT:
@@ -142,21 +151,19 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack)
return (0);
/*
- * if the state is ADOPTING, event loop has not been started
- * at this time, so don't run the script.
+ * We will continue configuring this interface via
+ * dhcp_bound_complete, once kernel DAD completes.
*/
-
- if (ifsp->if_state != ADOPTING) {
- (void) script_start(ifsp, EVENT_BOUND, bound_event_cb,
- NULL, NULL);
- }
-
+ ifsp->if_state = PRE_BOUND;
break;
+ case PRE_BOUND:
+ /* This is just a duplicate ack; silently ignore it */
+ return (1);
+
case RENEWING:
case REBINDING:
case BOUND:
-
cur_lease = ifsp->if_lease;
if (configure_timers(ifsp) == 0)
return (0);
@@ -192,6 +199,8 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack)
(void) script_start(ifsp, EVENT_EXTEND, bound_event_cb,
NULL, NULL);
+ ifsp->if_state = BOUND;
+ ifsp->if_curstart_monosec = ifsp->if_newstart_monosec;
break;
case INFORM_SENT:
@@ -206,11 +215,6 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack)
return (0);
}
- if (ifsp->if_state != INFORMATION) {
- ifsp->if_state = BOUND;
- ifsp->if_curstart_monosec = ifsp->if_newstart_monosec;
- }
-
/*
* remove any stale hostconf file that might be lying around for
* this interface. (in general, it's harmless, since we'll write a
@@ -222,13 +226,32 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack)
}
/*
+ * dhcp_bound_complete(): complete interface configuration after DAD
+ *
+ * input: struct ifslist *: the interface to configure
+ * output: none
+ */
+
+void
+dhcp_bound_complete(struct ifslist *ifsp)
+{
+ if (configure_bound(ifsp) == 0)
+ return;
+
+ (void) script_start(ifsp, EVENT_BOUND, bound_event_cb, NULL, NULL);
+
+ ifsp->if_state = BOUND;
+ ifsp->if_curstart_monosec = ifsp->if_newstart_monosec;
+}
+
+/*
* configure_timers(): configures the lease timers on an interface
*
* input: struct ifslist *: the interface to configure (with a valid if_ack)
* output: int: 1 on success, 0 on failure
*/
-int
+static int
configure_timers(struct ifslist *ifsp)
{
lease_t lease, t1, t2;
@@ -313,10 +336,6 @@ configure_if(struct ifslist *ifsp)
struct ifreq ifr;
struct sockaddr_in *sin;
PKT_LIST *ack = ifsp->if_ack;
- DHCP_OPT *router_list;
- uchar_t *target_hwaddr;
- int i;
- char in_use[256] = "IP address already in use by";
/*
* if we're using DHCP, then we'll have a valid CD_SERVER_ID
@@ -331,27 +350,6 @@ configure_if(struct ifslist *ifsp)
(void) memcpy(&ifsp->if_server.s_addr,
ack->opts[CD_SERVER_ID]->value, sizeof (ipaddr_t));
- /* no big deal if this fails; we'll just have less diagnostics */
- target_hwaddr = malloc(ifsp->if_hwlen);
-
- if (arp_check(ifsp, 0, ack->pkt->yiaddr.s_addr, target_hwaddr,
- ifsp->if_hwlen, df_get_int(ifsp->if_name, DF_ARP_WAIT)) == 1) {
-
- for (i = 0; i < ifsp->if_hwlen; i++)
- (void) snprintf(in_use, sizeof (in_use), "%s %02x",
- in_use, target_hwaddr[i]);
-
- dhcpmsg(MSG_ERROR, in_use);
-
- if (ifsp->if_ack->opts[CD_DHCP_TYPE] != NULL)
- send_decline(ifsp, in_use, &ack->pkt->yiaddr);
-
- ifsp->if_bad_offers++;
- free(target_hwaddr);
- return (0);
- }
- free(target_hwaddr);
-
ifsp->if_addr.s_addr = ack->pkt->yiaddr.s_addr;
if (ifsp->if_addr.s_addr == htonl(INADDR_ANY)) {
dhcpmsg(MSG_ERROR, "configure_if: got invalid IP address");
@@ -498,6 +496,24 @@ configure_if(struct ifslist *ifsp)
ifsp->if_broadcast = sin->sin_addr;
dhcpmsg(MSG_INFO, "using broadcast address %s on %s",
inet_ntoa(ifsp->if_broadcast), ifsp->if_name);
+ return (1);
+}
+
+/*
+ * configure_bound(): configures routing with DHCP parameters from an ACK,
+ * and sets up the if_sock_ip_fd socket used for lease
+ * renewal.
+ *
+ * input: struct ifslist *: the interface to configure (with a valid if_ack)
+ * output: int: 1 on success, 0 on failure
+ */
+
+static int
+configure_bound(struct ifslist *ifsp)
+{
+ PKT_LIST *ack = ifsp->if_ack;
+ DHCP_OPT *router_list;
+ int i;
/*
* add each provided router; we'll clean them up when the
@@ -510,7 +526,7 @@ configure_if(struct ifslist *ifsp)
ifsp->if_nrouters = router_list->len / sizeof (ipaddr_t);
ifsp->if_routers = malloc(router_list->len);
if (ifsp->if_routers == NULL) {
- dhcpmsg(MSG_ERR, "configure_if: cannot allocate "
+ dhcpmsg(MSG_ERR, "configure_bound: cannot allocate "
"default router list, ignoring default routers");
ifsp->if_nrouters = 0;
}
@@ -523,7 +539,7 @@ configure_if(struct ifslist *ifsp)
if (add_default_route(ifsp->if_name,
&ifsp->if_routers[i]) == 0) {
- dhcpmsg(MSG_ERR, "configure_if: cannot add "
+ dhcpmsg(MSG_ERR, "configure_bound: cannot add "
"default router %s on %s", inet_ntoa(
ifsp->if_routers[i]), ifsp->if_name);
ifsp->if_routers[i].s_addr = htonl(INADDR_ANY);
@@ -537,14 +553,14 @@ configure_if(struct ifslist *ifsp)
ifsp->if_sock_ip_fd = socket(AF_INET, SOCK_DGRAM, 0);
if (ifsp->if_sock_ip_fd == -1) {
- dhcpmsg(MSG_ERR, "configure_if: cannot create socket on %s",
+ dhcpmsg(MSG_ERR, "configure_bound: cannot create socket on %s",
ifsp->if_name);
return (0);
}
if (bind_sock(ifsp->if_sock_ip_fd, IPPORT_BOOTPC,
ntohl(ifsp->if_addr.s_addr)) == 0) {
- dhcpmsg(MSG_ERR, "configure_if: cannot bind socket on %s",
+ dhcpmsg(MSG_ERR, "configure_bound: cannot bind socket on %s",
ifsp->if_name);
return (0);
}
@@ -558,8 +574,8 @@ configure_if(struct ifslist *ifsp)
*/
if (bind_sock(ifsp->if_sock_fd, IPPORT_BOOTPC, INADDR_BROADCAST) == 0) {
- dhcpmsg(MSG_ERR, "configure_if: cannot bind broadcast socket "
- "on %s", ifsp->if_name);
+ dhcpmsg(MSG_ERR, "configure_bound: cannot bind broadcast "
+ "socket on %s", ifsp->if_name);
return (0);
}
@@ -573,6 +589,6 @@ configure_if(struct ifslist *ifsp)
if (ack->opts[CD_DHCP_TYPE] == NULL)
ifsp->if_dflags |= DHCP_IF_BOOTP;
- dhcpmsg(MSG_DEBUG, "configure_if: bound ifsp->if_sock_ip_fd");
+ dhcpmsg(MSG_DEBUG, "configure_bound: bound ifsp->if_sock_ip_fd");
return (1);
}
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c
index 6edf5d6da5..7800163061 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -55,9 +54,9 @@ struct dhcp_default {
static struct dhcp_default defaults[] = {
{ "RELEASE_ON_SIGTERM", "0", 0, 0 },
- { "IGNORE_FAILED_ARP", "1", 0, 0 },
+ { "IGNORE_FAILED_ARP", "1", 0, -1 },
{ "OFFER_WAIT", "3", 1, 20 },
- { "ARP_WAIT", "1000", 100, 4000 },
+ { "ARP_WAIT", "1000", 0, -1 },
{ "CLIENT_ID", NULL, 0, 0 },
{ "PARAM_REQUEST_LIST", NULL, 0, 0 },
{ "REQUEST_HOSTNAME", "1", 0, 0 }
@@ -78,6 +77,7 @@ df_build_cache(void)
char *param, *value, *end;
FILE *fp;
nvlist_t *nvlist;
+ struct dhcp_default *defp;
if ((fp = fopen(DHCP_AGENT_DEFAULTS, "r")) == NULL)
return (NULL);
@@ -112,6 +112,18 @@ df_build_cache(void)
else
param++;
+ for (defp = defaults;
+ (char *)defp < (char *)defaults + sizeof (defaults);
+ defp++) {
+ if (strcasecmp(param, defp->df_name) == 0) {
+ if (defp->df_max == -1) {
+ dhcpmsg(MSG_WARNING, "parameter %s is "
+ "obsolete; ignored", defp->df_name);
+ }
+ break;
+ }
+ }
+
for (; *param != '\0'; param++)
*param = toupper(*param);
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.h
index 4d58c2072d..95e14cfa34 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.h
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,8 +19,8 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 1999-2001 by Sun Microsystems, Inc.
- * All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
#ifndef DEFAULTS_H
@@ -48,9 +47,9 @@ extern "C" {
enum {
DF_RELEASE_ON_SIGTERM, /* send RELEASE on each if upon SIGTERM */
- DF_IGNORE_FAILED_ARP, /* what to do if agent can't ARP */
+ _UNUSED_DF_IGNORE_FAILED_ARP,
DF_OFFER_WAIT, /* how long to wait to collect offers */
- DF_ARP_WAIT, /* how long to wait for an ARP reply */
+ _UNUSED_DF_ARP_WAIT,
DF_CLIENT_ID, /* our client id */
DF_PARAM_REQUEST_LIST, /* our parameter request list */
DF_REQUEST_HOSTNAME /* request hostname associated with interface */
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.dfl b/usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.dfl
index 4299f09136..406882c77f 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.dfl
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.dfl
@@ -1,14 +1,9 @@
-#ident "%Z%%M% %I% %E% SMI"
-#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -25,6 +20,13 @@
#
#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+
+#
# This file contains tunable parameters for dhcpagent(1M).
#
@@ -41,15 +43,6 @@
#
# RELEASE_ON_SIGTERM=yes
-# When the DHCP agent gets an ACK from the server, it sends an ARP
-# request to verify that a given IP address is not already in use. If
-# an ARP reply is received, the DHCP agent declines the server's
-# offer. However, if the DHCP agent is unable to send the ARP request
-# packet for whatever reason, it assumes the address is available. To
-# be more cautious, uncomment the following parameter-value pair.
-#
-# IGNORE_FAILED_ARP=no
-
# By default, the DHCP agent waits 3 seconds to collect OFFER
# responses to a DISCOVER. If it receives no OFFERs in this time, it
# then waits for another 3 seconds, and so forth. To change this
@@ -60,13 +53,6 @@
#
# OFFER_WAIT=
-# By default, the DHCP agent waits 1000 milliseconds to collect ARP
-# replies to an ARP request when verifying that an IP address is not
-# in use. To change this behavior, set and uncomment the following
-# parameter-value pair. This parameter is specified in milliseconds.
-#
-# ARP_WAIT=
-
# By default, the DHCP agent does not send out a client identifier
# (and hence, the chaddr field is used by the DHCP server as the
# client identifier.) To make the DHCP agent send a client
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c
index bdad88a741..d88f52f38b 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c
@@ -191,6 +191,16 @@ insert_ifs(const char *if_name, boolean_t is_adopting, int *error)
/* step 6 */
(void) strlcpy(ifr.ifr_name, if_name, IFNAMSIZ);
+ if (ioctl(ifsp->if_sock_fd, SIOCGIFINDEX, &ifr) == -1) {
+ if (errno == ENXIO)
+ *error = DHCP_IPC_E_INVIF;
+ else
+ *error = DHCP_IPC_E_INT;
+ dhcpmsg(MSG_ERR, "insert_ifs: SIOCGIFINDEX for %s", if_name);
+ goto failure;
+ }
+ ifsp->if_index = ifr.ifr_index;
+
if (ioctl(ifsp->if_sock_fd, SIOCGIFFLAGS, &ifr) == -1) {
if (errno == ENXIO)
*error = DHCP_IPC_E_INVIF;
@@ -564,6 +574,36 @@ lookup_ifs_by_xid(uint32_t xid)
}
/*
+ * lookup_ifs_by_uindex(): Looks up ifs entries given truncated index and
+ * previous ifs pointer (or NULL for list start).
+ * Caller is expected to iterate through all
+ * potential matches to find interface of interest.
+ *
+ * input: int: the interface index
+ * struct ifslist *: the previous ifs, or NULL for list start
+ * output: struct ifslist *: the next matching ifs, or NULL if not found
+ * note: This operates using the 'truncated' (16-bit) ifindex as seen by
+ * routing socket clients. The value stored in if_index is the
+ * 32-bit ifindex from the ioctl interface.
+ */
+
+struct ifslist *
+lookup_ifs_by_uindex(uint16_t ifindex, struct ifslist *ifs)
+{
+ if (ifs == NULL)
+ ifs = ifsheadp;
+ else
+ ifs = ifs->next;
+
+ for (; ifs != NULL; ifs = ifs->next) {
+ if ((ifs->if_index & 0xffff) == ifindex)
+ break;
+ }
+
+ return (ifs);
+}
+
+/*
* remove_ifs(): removes a given ifs from the ifslist. marks the ifs
* for being freed (but may not actually free it).
*
@@ -772,6 +812,16 @@ verify_ifs(struct ifslist *ifsp)
goto abandon;
break;
}
+ switch (ioctl(ifsp->if_sock_fd, SIOCGIFINDEX, &ifr)) {
+ case 0:
+ if (ifr.ifr_index != ifsp->if_index)
+ goto abandon;
+ break;
+ case -1:
+ if (errno == ENXIO)
+ goto abandon;
+ break;
+ }
/* FALLTHRU */
case INIT_REBOOT:
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h
index b2fad40fb1..24b4598e31 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h
@@ -112,6 +112,8 @@ struct ifslist {
char if_name[IFNAMSIZ];
+ uint32_t if_index; /* interface index */
+
uint16_t if_max; /* largest DHCP packet on this if */
uint16_t if_min; /* minimum mtu size on this if */
uint16_t if_opt; /* amount of space for options in PKT */
@@ -367,6 +369,7 @@ void hold_ifs(struct ifslist *);
struct ifslist *insert_ifs(const char *, boolean_t, int *);
struct ifslist *lookup_ifs(const char *);
struct ifslist *lookup_ifs_by_xid(uint32_t);
+struct ifslist *lookup_ifs_by_uindex(uint16_t, struct ifslist *);
void nuke_ifslist(boolean_t);
void refresh_ifslist(iu_eh_t *, int, void *);
int release_ifs(struct ifslist *);
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c
index 8613a41245..751e6be5ac 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -204,7 +203,7 @@ dhcp_rebind(iu_tq_t *tqp, void *arg)
}
/*
- * dhcp_restart(): callback function to script_start
+ * dhcp_restart_lease(): callback function to script_start
*
* input: struct ifslist *: the interface to be restarted
* const char *: unused
@@ -213,7 +212,7 @@ dhcp_rebind(iu_tq_t *tqp, void *arg)
/* ARGSUSED */
static int
-dhcp_restart(struct ifslist *ifsp, const char *msg)
+dhcp_restart_lease(struct ifslist *ifsp, const char *msg)
{
dhcpmsg(MSG_INFO, "lease expired on %s -- restarting DHCP",
ifsp->if_name);
@@ -292,7 +291,7 @@ dhcp_expire(iu_tq_t *tqp, void *arg)
dhcpmsg(MSG_WARNING, "dhcp_expire: cannot start asynchronous "
"transaction on %s, continuing...", ifsp->if_name);
- (void) script_start(ifsp, EVENT_EXPIRE, dhcp_restart, NULL, NULL);
+ (void) script_start(ifsp, EVENT_EXPIRE, dhcp_restart_lease, NULL, NULL);
}
/*
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
index 3e8e66572c..a4f418059a 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
@@ -37,7 +37,6 @@
#include <dhcp_hostconf.h>
#include <arpa/inet.h>
#include <string.h>
-#include <stdlib.h>
#include <unistd.h>
#include <dhcpmsg.h>
@@ -46,10 +45,8 @@
#include "packet.h"
#include "interface.h"
#include "agent.h"
-#include "defaults.h"
static PKT_LIST *select_best(PKT_LIST **);
-static void restart_dhcp(struct ifslist *);
static stop_func_t stop_requesting;
/*
@@ -139,7 +136,7 @@ dhcp_requesting(iu_tq_t *tqp, void *arg)
if (dhcp_bound(ifsp, offer) == 0) {
dhcpmsg(MSG_WARNING, "dhcp_requesting: dhcp_bound "
"failed for %s", ifsp->if_name);
- restart_dhcp(ifsp);
+ dhcp_restart(ifsp);
return;
}
@@ -408,7 +405,7 @@ dhcp_acknak(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg)
ifsp->if_name);
ifsp->if_bad_offers++;
free_pkt_list(&plp);
- restart_dhcp(ifsp);
+ dhcp_restart(ifsp);
/*
* remove any bogus cached configuration we might have
@@ -426,7 +423,7 @@ dhcp_acknak(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg)
"restarting DHCP on %s", ifsp->if_name);
ifsp->if_bad_offers++;
free_pkt_list(&plp);
- restart_dhcp(ifsp);
+ dhcp_restart(ifsp);
return;
}
@@ -436,7 +433,7 @@ dhcp_acknak(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg)
if (dhcp_bound(ifsp, plp) == 0) {
dhcpmsg(MSG_WARNING, "dhcp_acknak: dhcp_bound failed "
"for %s", ifsp->if_name);
- restart_dhcp(ifsp);
+ dhcp_restart(ifsp);
return;
}
@@ -444,14 +441,14 @@ dhcp_acknak(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg)
}
/*
- * restart_dhcp(): restarts DHCP (from INIT) on a given interface
+ * dhcp_restart(): restarts DHCP (from INIT) on a given interface
*
* input: struct ifslist *: the interface to restart DHCP on
* output: void
*/
-static void
-restart_dhcp(struct ifslist *ifsp)
+void
+dhcp_restart(struct ifslist *ifsp)
{
if (iu_schedule_timer(tq, DHCP_RESTART_WAIT, dhcp_start, ifsp) == -1) {
@@ -461,7 +458,7 @@ restart_dhcp(struct ifslist *ifsp)
ipc_action_finish(ifsp, DHCP_IPC_E_MEMORY);
async_finish(ifsp);
- dhcpmsg(MSG_ERROR, "restart_dhcp: cannot schedule dhcp_start, "
+ dhcpmsg(MSG_ERROR, "dhcp_restart: cannot schedule dhcp_start, "
"reverting to INIT state on %s", ifsp->if_name);
} else
hold_ifs(ifsp);
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.h
index 74190cdd13..32f880762a 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.h
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -49,6 +48,7 @@ extern "C" {
void dhcp_acknak(iu_eh_t *, int, short, iu_event_id_t, void *);
int dhcp_adopt(void);
int dhcp_bound(struct ifslist *, PKT_LIST *);
+void dhcp_bound_complete(struct ifslist *);
int dhcp_drop(struct ifslist *, const char *);
void dhcp_expire(iu_tq_t *, void *);
int dhcp_extending(struct ifslist *);
@@ -58,11 +58,11 @@ void dhcp_rebind(iu_tq_t *, void *);
int dhcp_release(struct ifslist *, const char *);
void dhcp_renew(iu_tq_t *, void *);
void dhcp_requesting(iu_tq_t *, void *);
+void dhcp_restart(struct ifslist *);
void dhcp_selecting(struct ifslist *);
void dhcp_start(iu_tq_t *, void *);
void send_decline(struct ifslist *, char *, struct in_addr *);
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/util.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/util.c
index 481d867bad..ff2d574053 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/util.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/util.c
@@ -33,9 +33,7 @@
#include <netinet/dhcp.h>
#include <signal.h>
#include <sys/dlpi.h>
-#include <sys/sockio.h>
#include <sys/socket.h>
-#include <errno.h>
#include <net/route.h>
#include <net/if_arp.h>
#include <string.h>
@@ -50,7 +48,6 @@
#include "interface.h"
#include "util.h"
#include "packet.h"
-#include "defaults.h"
/*
* this file contains utility functions that have no real better home
@@ -395,7 +392,6 @@ static int
update_default_route(const char *ifname, int type, struct in_addr *gateway_nbo,
int flags)
{
- static int rtsock_fd = -1;
struct {
struct rt_msghdr rm_mh;
struct sockaddr_in rm_dst;
@@ -404,15 +400,6 @@ update_default_route(const char *ifname, int type, struct in_addr *gateway_nbo,
struct sockaddr_dl rm_ifp;
} rtmsg;
- if (rtsock_fd == -1) {
- rtsock_fd = socket(PF_ROUTE, SOCK_RAW, 0);
- if (rtsock_fd == -1) {
- dhcpmsg(MSG_ERR, "update_default_route: "
- "cannot create routing socket");
- return (0);
- }
- }
-
(void) memset(&rtmsg, 0, sizeof (rtmsg));
rtmsg.rm_mh.rtm_version = RTM_VERSION;
rtmsg.rm_mh.rtm_msglen = sizeof (rtmsg);
diff --git a/usr/src/cmd/cmd-inet/usr.bin/Makefile b/usr/src/cmd/cmd-inet/usr.bin/Makefile
index a982e6a334..aa203a2216 100644
--- a/usr/src/cmd/cmd-inet/usr.bin/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.bin/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,7 @@
#
# CDDL HEADER END
#
-#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -38,13 +36,13 @@ MSGSUBDIRS= nca talk
# As programs get lint-clean, add them here. Eventually.
# This hack should go away, and all in PROG should be lint-clean.
-LINTCLEAN= rlogin.c rsh.c rcp.c rdate.c
+LINTCLEAN= rlogin.c rsh.c rcp.c rdate.c rwho.c
# Likewise, as subdirs get lint-clean, add them here. Once
# they're all clean, replace the dependency of the lint target
# with SUBDIRS. Also (sigh) deal with the commented-out build lines
# for the lint rule.
-LINTSUBDIRS= nca netstat pppd tftp
+LINTSUBDIRS= nca netstat pppd pppstats tftp
include ../../Makefile.cmd
include ../Makefile.cmd-inet
diff --git a/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c b/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c
index 8218eac44d..0f15eb05f5 100644
--- a/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c
+++ b/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c
@@ -73,7 +73,6 @@
#include <net/if.h>
#include <net/route.h>
-#include <inet/common.h>
#include <inet/mib2.h>
#include <inet/ip.h>
#include <inet/arp.h>
@@ -3588,17 +3587,21 @@ arp_report(mib_item_t *item)
(void) puts(v4compat ?
"Net to Media Table" :
"Net to Media Table: IPv4");
- (void) fputs("Device "
- "IP Address Mask ",
- stdout);
- (void) puts("Flags Phys Addr ");
- (void) puts("------ -------------------- "
- "--------------- ----- ---------------");
+ (void) puts("Device "
+ " IP Address Mask "
+ "Flags Phys Addr");
+ (void) puts("------ "
+ "-------------------- --------------- "
+ "-------- ---------------");
first = B_FALSE;
}
flbuf[0] = '\0';
flags = np->ipNetToMediaInfo.ntm_flags;
+ /*
+ * Note that not all flags are possible at the same
+ * time. Patterns: SPLAy DUo
+ */
if (flags & ACE_F_PERMANENT)
(void) strcat(flbuf, "S");
if (flags & ACE_F_PUBLISH)
@@ -3609,7 +3612,17 @@ arp_report(mib_item_t *item)
(void) strcat(flbuf, "U");
if (flags & ACE_F_MAPPING)
(void) strcat(flbuf, "M");
- (void) printf("%-6s %-20s %-15s %-5s %s\n",
+ if (flags & ACE_F_MYADDR)
+ (void) strcat(flbuf, "L");
+ if (flags & ACE_F_UNVERIFIED)
+ (void) strcat(flbuf, "d");
+ if (flags & ACE_F_AUTHORITY)
+ (void) strcat(flbuf, "A");
+ if (flags & ACE_F_OLD)
+ (void) strcat(flbuf, "o");
+ if (flags & ACE_F_DELAYED)
+ (void) strcat(flbuf, "y");
+ (void) printf("%-6s %-20s %-15s %-8s %s\n",
octetstr(&np->ipNetToMediaIfIndex, 'a',
ifname, sizeof (ifname)),
pr_addr(np->ipNetToMediaNetAddress,
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/Makefile b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/Makefile
index b760270924..c7225e3e61 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -19,17 +18,16 @@
#
# CDDL HEADER END
#
-#
-#pragma ident "%Z%%M% %I% %E% SMI"
-#
-# Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# ident "%Z%%M% %I% %E% SMI"
+#
# cmd/cmd-inet/usr.lib/in.ndpd/Makefile
#
PROG= in.ndpd
-OBJS= config.o main.o ndp.o tables.o trace.o dupl_addr.o
+OBJS= config.o main.o ndp.o tables.o trace.o
SRCS= $(OBJS:%.o=%.c)
include ../../../Makefile.cmd
@@ -48,6 +46,14 @@ $(OBJS) := CPPFLAGS += $(_D_UNIX98_EXTN)
LINTFLAGS += $(_D_UNIX98_EXTN)
+# This shouldn't be necessary, but linking with libxnet requires it.
+LINTFLAGS += -erroff=E_INCONS_ARG_DECL2 -erroff=E_INCONS_VAL_TYPE_DECL2
+
+# This is needed to avoid littering the code with useless LINTED statements
+# for each place where pointers are casted. It's not perfect, but being
+# perfect would require a bigger rewrite.
+LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
.KEEP_STATE:
.PARALLEL: $(OBJS)
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/defs.h b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/defs.h
index 26531e4d96..5e16cc1fa2 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/defs.h
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/defs.h
@@ -98,7 +98,6 @@ extern struct sockaddr_dl *rta_ifp;
#define D_PKTBAD 0x0400 /* Malformed packet */
#define D_PKTOUT 0x0800 /* Sent packet */
#define D_TMP 0x1000 /* RFC3041 mechanism */
-#define D_DAD 0x2000 /* Duplciate Address Detection */
#define IF_SEPARATOR ':'
#define IPV6_MAX_HOPS 255
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/dupl_addr.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/dupl_addr.c
deleted file mode 100644
index 8c81c13901..0000000000
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/dupl_addr.c
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * Perform IPv6 duplicate address detection for a given interface
- * and IPv6 address.
- *
- * This file is copied from usr/src/cmd/cmd-inet/usr.sbin/ifconfig.
- * Only the modifications necessary to integrate into the message
- * scheme of in.ndpd have been made. This is intended to be a
- * temporary fix to allow Duplicate Address Detection to be performed
- * by in.ndpd for temporary (rfc 3041) addresses; the long-term
- * solution will be to use libinetcfg.
- */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "defs.h"
-#include <netinet/icmp6.h>
-#include <netinet/in_systm.h> /* For IP_MAXPACKET */
-#include <netinet/ip.h> /* For IP_MAXPACKET */
-
-int DupAddrDetectTransmits = 1;
-int RetransTimer = ND_RETRANS_TIMER; /* Milliseconds. */
-
-#define IPV6_MAX_HOPS 255
-
-extern struct in6_addr all_nodes_mcast;
-
-static void in6_solmulti_addr(struct in6_addr *addr,
- struct in6_addr *multi);
-static int run_dad(int s, char *phyname, struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc, int ifindex);
-static int send_dad_probe(int s, char *phyname,
- struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc);
-static int recv_dad(int s, char *phyname, struct sockaddr_in6 *testaddr,
- int ifindex);
-static boolean_t verify_opts(struct nd_opt_hdr *opt, int optlen,
- struct sockaddr_in6 *from, boolean_t reject_dad_slla);
-static void dad_failed(char *phyname, struct sockaddr_in6 *testaddr,
- int code);
-static void print_na(char *str, char *phyname,
- struct nd_neighbor_advert *na, int len,
- struct sockaddr_in6 *addr);
-static void print_ns(char *str, char *phyname,
- struct nd_neighbor_solicit *ns, int len,
- struct sockaddr_in6 *addr);
-static void print_opt(struct nd_opt_hdr *opt, int len);
-
-
-/*
- * Performing duplicate address detection.
- *
- * Returns 0 if the address is ok, 1 if there is a duplicate,
- * and -1 (with errno set) if there is some internal error.
- * As a side effect this does a syslog printf identifying any
- * duplicate.
- * Note that the state of the interface name is unchanged.
- */
-int
-do_dad(char *ifname, struct sockaddr_in6 *testaddr)
-{
- int s;
- struct lifreq lifr;
- char *cp;
- char phyname[LIFNAMSIZ];
- int ifindex;
- int64_t saved_flags;
- int ret = -1; /* Assume error by default */
- struct sockaddr_in6 solicited_mc;
-
- /*
- * Truncate name at ':'. Needed for SIOCGLIFLNKINFO
- * Keep untruncated ifname for other use.
- */
- (void) strncpy(phyname, ifname, sizeof (phyname));
- cp = strchr(phyname, ':');
- if (cp != NULL)
- *cp = '\0';
-
- /*
- * Get a socket to use to send and receive neighbor solicitations
- * for DAD. Also used for ioctls below.
- */
- if ((s = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) < 0) {
- logperror("do_dad: socket");
- return (-1);
- }
-
- /*
- * Determine interface index (for IPV6_BOUND_PIF) and
- * save the flag values so they can be restored on return.
- */
- (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
- if (ioctl(s, SIOCGLIFINDEX, (caddr_t)&lifr) < 0) {
- logperror("do_dad: SIOCGLIFINDEX");
- goto done;
- }
- ifindex = lifr.lifr_index;
- if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
- logperror("do_dad: SIOCGLIFFLAGS");
- goto done;
- }
- saved_flags = lifr.lifr_flags;
- if (!(saved_flags & IFF_MULTICAST)) {
- /* Not possible to do DAD. Pretend it is ok */
- ret = 0;
- goto done;
- }
- (void) strncpy(lifr.lifr_name, phyname, sizeof (lifr.lifr_name));
- if (ioctl(s, SIOCGLIFLNKINFO, (caddr_t)&lifr) < 0) {
- logperror("do_dad: SIOCGLIFLNKINFO");
- goto done;
- }
- if (lifr.lifr_ifinfo.lir_reachretrans != 0) {
- RetransTimer = lifr.lifr_ifinfo.lir_reachretrans;
- }
-
- /*
- * Set NOLOCAL and UP flags.
- * This prevents the use of the interface except when the user binds
- * to unspecified IPv6 address, and sends to a link local multicast
- * address.
- */
- lifr.lifr_flags = saved_flags | IFF_NOLOCAL | IFF_UP;
-
- (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
- if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
- logperror("do_dad: SIOCSLIFFLAGS");
- goto restore;
- }
-
- /*
- * IPV6_BOUND_PIF prevents load spreading to happen. If we
- * just do IPV6_BOUND_IF, the packet can go out on a different
- * interface other than "ifindex", if interface is part of
- * a group. In that case, we will get back the copy of NS that
- * we sent and think it is a duplicate(Switch loops back the
- * copy on all interfaces other than the one we sent the packet on).
- */
- if (setsockopt(s, IPPROTO_IPV6, IPV6_BOUND_PIF, (char *)&ifindex,
- sizeof (ifindex)) < 0) {
- logperror("do_dad: IPV6_BOUND_PIF");
- goto restore;
- }
-
- {
- int hops = IPV6_MAX_HOPS;
- int on = 1;
- int off = 0;
-
- if (setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
- (char *)&hops, sizeof (hops)) < 0) {
- logperror("do_dad: IPV6_MULTICAST_HOPS");
- goto restore;
- }
- if (setsockopt(s, IPPROTO_IPV6, IPV6_UNSPEC_SRC,
- (char *)&on, sizeof (on)) < 0) {
- logperror("do_dad: IPV6_UNSPEC_SRC");
- goto restore;
- }
-
- if (setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
- (char *)&off, sizeof (off)) < 0) {
- logperror("do_dad: IPV6_MULTICAST_LOOP");
- goto restore;
- }
-
- /* Enable receipt of ancillary data */
- if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
- (char *)&on, sizeof (on)) < 0) {
- logperror("do_dad: IPV6_RECVHOPLIMIT");
- goto restore;
- }
- if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
- (char *)&on, sizeof (on)) < 0) {
- logperror("do_dad: IPV6_RECVPKTINFO");
- goto restore;
- }
- if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVRTHDR,
- (char *)&on, sizeof (on)) < 0) {
- logperror("do_dad: IPV6_RECVRTHDR");
- goto restore;
- }
- }
-
- /*
- * Extract the address and determine the solicited node multicast
- * address to use.
- */
- (void) memset(&solicited_mc, 0, sizeof (solicited_mc));
- solicited_mc.sin6_family = AF_INET6;
- in6_solmulti_addr(&testaddr->sin6_addr, &solicited_mc.sin6_addr);
-
- /* Join the solicited node multicast address and all-nodes. */
- {
- struct ipv6_mreq v6mcastr;
-
- v6mcastr.ipv6mr_multiaddr = solicited_mc.sin6_addr;
- v6mcastr.ipv6mr_interface = ifindex;
-
- if (setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP,
- (char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- logperror("do_dad: IPV6_JOIN_GROUP");
- goto restore;
- }
-
- v6mcastr.ipv6mr_multiaddr = all_nodes_mcast;
- v6mcastr.ipv6mr_interface = ifindex;
-
- if (setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP,
- (char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- logperror("do_dad: IPV6_JOIN_GROUP");
- goto restore;
- }
- }
-
- ret = run_dad(s, phyname, testaddr, &solicited_mc, ifindex);
-
-restore:
- /* Restore flags */
- (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
- lifr.lifr_flags = saved_flags;
- if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
- logperror("do_dad: SIOCSLIFFLAGS");
- ret = -1;
- goto done;
- }
-done:
- (void) close(s);
- return (ret);
-}
-
-
-/*
- * Determine the solicited node multicast address for a given address.
- */
-static void
-in6_solmulti_addr(struct in6_addr *addr, struct in6_addr *multi)
-{
- struct in6_addr solicited_prefix = {
- { 0xff, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x1, 0xFF, 0x0, 0x0, 0x0 } };
- int i;
-
- *multi = solicited_prefix;
- for (i = 13; i < 16; i++)
- multi->s6_addr[i] = addr->s6_addr[i];
-}
-
-static int
-run_dad(int s, char *phyname, struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc, int ifindex)
-{
- int time_left; /* In milliseconds */
- struct timeval starttime;
- struct timeval curtime;
- struct pollfd fds;
- int i;
- int ret;
-
- if (debug & D_DAD)
- logmsg(LOG_DEBUG, "run_dad(%s)\n", phyname);
-
- /*
- * Perform duplicate address detection sequence
- * 1. Send a neighbor solicitation with an unspecified source
- * address to the solicited node MC address with the testaddr
- * being the target.
- * 2. Wait for up to RetransTimer milliseconds for either a
- * neighbor advertisement (sent to all-nodes) or a DAD neighbor
- * solicitation for the testaddr.
- * 3. Perform step 1 and 2 DupAddrDetectTransmits times.
- */
-
- /* XXX perform a random delay: 0 - MAX_RTR_SOLICITATION_DELAY */
- /* XXX use poll+recv logic for the random delay */
-
- for (i = 0; i < DupAddrDetectTransmits; i++) {
- if (send_dad_probe(s, phyname, testaddr, solicited_mc) < 0)
- return (-1);
-
- /*
- * Track time to make sure total wait is RetransTimer
- * even though random packet will awake poll.
- */
- (void) gettimeofday(&starttime, NULL);
- /* CONSTCOND */
- while (1) {
- (void) gettimeofday(&curtime, NULL);
- time_left = RetransTimer -
- (curtime.tv_sec - starttime.tv_sec) * 1000 -
- (curtime.tv_usec - starttime.tv_usec) / 1000;
-
-
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "run_dad: time_left %d ms\n",
- time_left);
- }
- if (time_left <= 0) {
- if (debug & D_DAD)
- logmsg(LOG_DEBUG, "run_dad: timeout\n");
- break;
- }
- fds.fd = s;
- fds.events = POLLIN;
-
- switch (poll(&fds, 1, time_left)) {
- case -1:
- logperror("run_dad: poll");
- return (-1);
- case 0:
- /* Need loop will break */
- break;
- default:
- /* Huh? */
- logmsg(LOG_ERR, "poll returns > 1!\n");
- return (-1);
- case 1:
- if (fds.revents & POLLIN) {
- ret = recv_dad(s, phyname, testaddr,
- ifindex);
- if (ret < 0)
- return (-1);
- if (ret > 0) {
- dad_failed(phyname, testaddr,
- ret);
- return (1);
- }
- }
- break;
- }
- }
- }
- return (0);
-}
-
-/*
- * Send a DAD NS packet. Assumes an IPV6_UNSPEC_SRC and an IPV6_BOUND_IF
- * have been done by the caller.
- */
-static int
-send_dad_probe(int s, char *phyname, struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc)
-{
- static uint64_t outpack[(IP_MAXPACKET + 1)/8];
- struct nd_neighbor_solicit *ns = (struct nd_neighbor_solicit *)outpack;
- int packetlen = 0;
- int cc;
-
- ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
- ns->nd_ns_code = 0;
- ns->nd_ns_cksum = 0;
- ns->nd_ns_reserved = 0;
- ns->nd_ns_target = testaddr->sin6_addr;
- packetlen += sizeof (struct nd_neighbor_solicit);
- cc = sendto(s, (char *)outpack, packetlen, 0,
- (struct sockaddr *)solicited_mc, sizeof (*solicited_mc));
- if (cc < 0 || cc != packetlen) {
- char abuf[INET6_ADDRSTRLEN];
-
- if (cc < 0) {
- logperror("DAD sendto");
- return (-1);
- }
- if (debug & D_DAD) {
- (void) inet_ntop(solicited_mc->sin6_family,
- (void *)&solicited_mc->sin6_addr, abuf,
- sizeof (abuf));
-
- logmsg(LOG_DEBUG, "wrote %s %d chars, ret=%d\n",
- abuf, packetlen, cc);
- }
- return (-1);
- }
- if (debug & D_DAD)
- print_ns("Sent NS", phyname, ns, packetlen, solicited_mc);
-
- return (0);
-}
-
-/*
- * Return a pointer to the specified option buffer.
- * If not found return NULL.
- */
-static void *
-find_ancillary(struct msghdr *msg, int cmsg_type)
-{
- struct cmsghdr *cmsg;
-
- for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
- cmsg = CMSG_NXTHDR(msg, cmsg)) {
- if (cmsg->cmsg_level == IPPROTO_IPV6 &&
- cmsg->cmsg_type == cmsg_type) {
- return (CMSG_DATA(cmsg));
- }
- }
- return (NULL);
-}
-
-/*
- * Receive an ICMP packet. If the packet signals a duplicate address for
- * testaddr then return a positive non-zero number. Otherwise return zero.
- * Internal errors cause a return of -1.
- */
-static int
-recv_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, int ifindex)
-{
- struct sockaddr_in6 from;
- struct icmp6_hdr *icmp;
- struct nd_neighbor_solicit *ns;
- struct nd_neighbor_advert *na;
- static uint64_t in_packet[(IP_MAXPACKET + 1)/8];
- static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8];
- int len;
- char abuf[INET6_ADDRSTRLEN];
- struct msghdr msg;
- struct iovec iov;
- uchar_t *opt;
- uint_t hoplimit;
- struct in6_addr dst;
- int rcv_ifindex;
-
- iov.iov_base = (char *)in_packet;
- iov.iov_len = sizeof (in_packet);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_name = (struct sockaddr *)&from;
- msg.msg_namelen = sizeof (from);
- msg.msg_control = ancillary_data;
- msg.msg_controllen = sizeof (ancillary_data);
-
- if ((len = recvmsg(s, &msg, 0)) < 0) {
- logperror("DAD recvmsg");
- return (-1);
- }
- if (len == 0)
- return (0);
-
- if (debug & D_DAD) {
- (void) inet_ntop(AF_INET6, (void *)&from.sin6_addr,
- abuf, sizeof (abuf));
- }
- /* Ignore packets > 64k or control buffers that don't fit */
- if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "Truncated message: msg_flags "
- "0x%x from %s\n", msg.msg_flags, abuf);
- }
- return (0);
- }
-
- icmp = (struct icmp6_hdr *)in_packet;
-
- if (len < ICMP6_MINLEN) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "Too short ICMP packet: %d bytes "
- "from %s\n", len, abuf);
- }
- return (0);
- }
-
- opt = find_ancillary(&msg, IPV6_HOPLIMIT);
- if (opt == NULL) {
- /* Unknown hoplimit - must drop */
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "Unknown hop limit from %s\n", abuf);
- }
- return (0);
- }
- hoplimit = *(uint_t *)opt;
- opt = find_ancillary(&msg, IPV6_PKTINFO);
- if (opt == NULL) {
- /* Unknown destination address - must drop */
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "Unknown destination from %s\n",
- abuf);
- }
- return (0);
- }
- dst = ((struct in6_pktinfo *)opt)->ipi6_addr;
- rcv_ifindex = ((struct in6_pktinfo *)opt)->ipi6_ifindex;
- opt = find_ancillary(&msg, IPV6_RTHDR);
- if (opt != NULL) {
- /* Can't allow routing headers in ND messages */
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG,
- "ND message with routing header from %s\n", abuf);
- }
- return (0);
- }
-
- switch (icmp->icmp6_type) {
- case ND_NEIGHBOR_SOLICIT:
- /*
- * Assumes that the kernel has verified the AH (if present)
- * and the ICMP checksum.
- */
- if (hoplimit != IPV6_MAX_HOPS) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "NS hop limit: %d from %s\n",
- hoplimit, abuf);
- }
- return (0);
- }
-
- if (icmp->icmp6_code != 0) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "NS code: %d from %s\n",
- icmp->icmp6_code, abuf);
- }
- return (0);
- }
-
- if (len < sizeof (struct nd_neighbor_solicit)) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "NS too short: %d bytes "
- "from %s\n", len, abuf);
- }
- return (0);
- }
- ns = (struct nd_neighbor_solicit *)icmp;
- if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target)) {
- if (debug & D_DAD) {
- char abuf2[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&ns->nd_ns_target,
- abuf2, sizeof (abuf2));
- logmsg(LOG_DEBUG, "NS with multicast target: "
- "%s from %s\n", abuf2, abuf);
- }
- return (0);
- }
-
- if (len > sizeof (struct nd_neighbor_solicit)) {
- /*
- * For DAD neighbor solicitation type message,
- * we need to further verify if SLLA option is present
- * in received options,
- * so we pass TRUE to reject_dad_slla argument.
- */
- if (!verify_opts((struct nd_opt_hdr *)&ns[1],
- len - sizeof (struct nd_neighbor_solicit),
- &from, _B_TRUE))
- return (0);
- }
-
- if (debug & D_DAD)
- print_ns("Received valid NS", phyname, ns, len, &from);
- if (!IN6_IS_ADDR_UNSPECIFIED(&from.sin6_addr) ||
- !IN6_IS_ADDR_MC_SOLICITEDNODE(&dst)) {
- /* Sender is doing address resolution */
- return (0);
- }
- if (rcv_ifindex != ifindex) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "Received Neighbor "
- "solicitation on ifindex %d, "
- "expecting on %d\n", rcv_ifindex, ifindex);
- }
- return (0);
- }
- if (IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr,
- &ns->nd_ns_target)) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "NS - duplicate from %s\n",
- abuf);
- }
- return (1);
- }
- return (0);
-
- case ND_NEIGHBOR_ADVERT:
- /*
- * Assumes that the kernel has verified the AH (if present)
- * and the ICMP checksum.
- */
- if (hoplimit != IPV6_MAX_HOPS) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "NA hop limit: %d from %s\n",
- hoplimit, abuf);
- }
- return (0);
- }
-
- if (icmp->icmp6_code != 0) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "NA code: %d from %s\n",
- icmp->icmp6_code, abuf);
- }
- return (0);
- }
-
- if (len < sizeof (struct nd_neighbor_advert)) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "NA too short: %d bytes "
- "from %s\n", len, abuf);
- }
- return (0);
- }
- na = (struct nd_neighbor_advert *)icmp;
- if (IN6_IS_ADDR_MULTICAST(&na->nd_na_target)) {
- if (debug & D_DAD) {
- char abuf2[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&na->nd_na_target,
- abuf2, sizeof (abuf2));
- logmsg(LOG_DEBUG, "NA with multicast target: "
- "%s from %s\n", abuf2, abuf);
- }
- return (0);
- }
-
- if (IN6_IS_ADDR_MULTICAST(&dst) &&
- (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) {
- if (debug & D_DAD) {
- char abuf2[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&na->nd_na_target,
- abuf2, sizeof (abuf2));
- logmsg(LOG_DEBUG, "NA solicited w/ mc target: "
- "%s from %s\n", abuf2, abuf);
- }
- return (0);
- }
-
- if (len > sizeof (struct nd_neighbor_advert)) {
- /*
- * Since this is a Neighbor advertisement
- * we unset the reject_dad_slla flag, thus
- * there is no need to verify the SLLA options.
- */
- if (!verify_opts((struct nd_opt_hdr *)&na[1],
- len - sizeof (struct nd_neighbor_advert),
- &from, _B_FALSE))
- return (0);
- }
-
- if (debug & D_DAD)
- print_na("Received valid NA", phyname, na, len, &from);
-
- if (IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr,
- &na->nd_na_target)) {
- if (debug & D_DAD) {
- logmsg(LOG_DEBUG, "NA - duplicate from %s\n",
- abuf);
- }
- return (1);
- }
- return (0);
- default:
- return (0);
- }
-}
-
-/*
- * Verify that all options have a non-zero length and that
- * the options fit within the total length of the packet (optlen).
- * If reject_dad_slla is set, we also verify that no SLLA option is present
- * as mandated by section 7.1.1 of RFC 2461.
- */
-static boolean_t
-verify_opts(struct nd_opt_hdr *opt, int optlen, struct sockaddr_in6 *from,
- boolean_t reject_dad_slla)
-{
- while (optlen > 0) {
- if (opt->nd_opt_len == 0) {
- if (debug & D_DAD) {
- char abuf[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&from->sin6_addr,
- abuf, sizeof (abuf));
-
- logmsg(LOG_DEBUG, "Zero length option type "
- "0x%x from %s\n", opt->nd_opt_type, abuf);
- }
- return (_B_FALSE);
- }
- optlen -= 8 * opt->nd_opt_len;
- if (optlen < 0) {
- if (debug & D_DAD) {
- char abuf[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&from->sin6_addr,
- abuf, sizeof (abuf));
-
- logmsg(LOG_DEBUG, "Too large option: type "
- "0x%x len %u from %s\n",
- opt->nd_opt_type, opt->nd_opt_len, abuf);
- }
- return (_B_FALSE);
- }
- if (reject_dad_slla &&
- opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR) {
- return (_B_FALSE);
- }
- opt = (struct nd_opt_hdr *)((char *)opt +
- 8 * opt->nd_opt_len);
- }
- return (_B_TRUE);
-}
-
-
-static void
-dad_failed(char *phyname, struct sockaddr_in6 *testaddr, int code)
-{
- char abuf[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(testaddr->sin6_family,
- (void *)&testaddr->sin6_addr,
- abuf, sizeof (abuf));
- logmsg(LOG_CRIT, "Duplicate address detected on link %s for address "
- "%s. Code %d\n", phyname, abuf, code);
-}
-
-/* Printing functions */
-
-static void
-print_ns(char *str, char *phyname,
- struct nd_neighbor_solicit *ns, int len, struct sockaddr_in6 *addr)
-{
- struct nd_opt_hdr *opt;
- char abuf[INET6_ADDRSTRLEN];
-
- logmsg(LOG_DEBUG, "%s %s (%d bytes) on %s\n", str,
- inet_ntop(addr->sin6_family, (void *)&addr->sin6_addr,
- abuf, sizeof (abuf)),
- len, phyname);
- logmsg(LOG_DEBUG, "\ttarget %s\n",
- inet_ntop(addr->sin6_family, (void *)&ns->nd_ns_target,
- abuf, sizeof (abuf)));
- len -= sizeof (*ns);
- opt = (struct nd_opt_hdr *)&ns[1];
- print_opt(opt, len);
-}
-
-static void
-print_na(char *str, char *phyname,
- struct nd_neighbor_advert *na, int len, struct sockaddr_in6 *addr)
-{
- struct nd_opt_hdr *opt;
- char abuf[INET6_ADDRSTRLEN];
-
- logmsg(LOG_DEBUG, "%s %s (%d bytes) on %s\n", str,
- inet_ntop(addr->sin6_family, (void *)&addr->sin6_addr,
- abuf, sizeof (abuf)),
- len, phyname);
- logmsg(LOG_DEBUG, "\ttarget %s\n",
- inet_ntop(addr->sin6_family, (void *)&na->nd_na_target,
- abuf, sizeof (abuf)));
- logmsg(LOG_DEBUG, "\tRouter: %s\n",
- (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) ?
- "Set" : "Not set");
- logmsg(LOG_DEBUG, "\tSolicited: %s\n",
- (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED) ?
- "Set" : "Not set");
- logmsg(LOG_DEBUG, "\tOverride: %s\n",
- (na->nd_na_flags_reserved & ND_NA_FLAG_OVERRIDE) ?
- "Set" : "Not set");
-
- len -= sizeof (*na);
- opt = (struct nd_opt_hdr *)&na[1];
- print_opt(opt, len);
-}
-
-static void
-print_opt(struct nd_opt_hdr *opt, int len)
-{
- struct nd_opt_prefix_info *po;
- struct nd_opt_mtu *mo;
- struct nd_opt_lla *lo;
- int optlen;
- char abuf[INET6_ADDRSTRLEN];
- char llabuf[BUFSIZ];
-
- while (len >= sizeof (struct nd_opt_hdr)) {
- optlen = opt->nd_opt_len * 8;
- if (optlen == 0) {
- logmsg(LOG_DEBUG, "Zero length option!\n");
- break;
- }
- switch (opt->nd_opt_type) {
- case ND_OPT_PREFIX_INFORMATION:
- po = (struct nd_opt_prefix_info *)opt;
- if (optlen != sizeof (*po) ||
- optlen > len)
- break;
-
- logmsg(LOG_DEBUG, "\tOn link flag:%s\n",
- (po->nd_opt_pi_flags_reserved &
- ND_OPT_PI_FLAG_ONLINK) ? "Set" : "Not set");
- logmsg(LOG_DEBUG, "\tAuto addrconf flag:%s\n",
- (po->nd_opt_pi_flags_reserved &
- ND_OPT_PI_FLAG_AUTO) ? "Set" : "Not set");
- logmsg(LOG_DEBUG, "\tValid time: %u\n",
- ntohl(po->nd_opt_pi_valid_time));
- logmsg(LOG_DEBUG, "\tPreferred time: %u\n",
- ntohl(po->nd_opt_pi_preferred_time));
- logmsg(LOG_DEBUG, "\tPrefix: %s/%u\n",
- inet_ntop(AF_INET6, (void *)&po->nd_opt_pi_prefix,
- abuf, sizeof (abuf)),
- po->nd_opt_pi_prefix_len);
- break;
- case ND_OPT_MTU:
- mo = (struct nd_opt_mtu *)opt;
- if (optlen != sizeof (*mo) ||
- optlen > len)
- break;
- logmsg(LOG_DEBUG, "\tMTU: %d\n",
- ntohl(mo->nd_opt_mtu_mtu));
- break;
- case ND_OPT_SOURCE_LINKADDR:
- lo = (struct nd_opt_lla *)opt;
- if (optlen < 8 ||
- optlen > len)
- break;
- (void) fmt_lla(llabuf, sizeof (llabuf),
- (uchar_t *)lo->nd_opt_lla_hdw_addr, optlen - 2);
- logmsg(LOG_DEBUG, "\tSource LLA: len %d <%s>\n",
- optlen-2, llabuf);
- break;
- case ND_OPT_TARGET_LINKADDR:
- lo = (struct nd_opt_lla *)opt;
- if (optlen < 8||
- optlen > len)
- break;
- (void) fmt_lla(llabuf, sizeof (llabuf),
- (uchar_t *)lo->nd_opt_lla_hdw_addr, optlen - 2);
- logmsg(LOG_DEBUG, "\tTarget LLA: len %d <%s>\n",
- optlen-2, llabuf);
- break;
- case ND_OPT_REDIRECTED_HEADER:
- logmsg(LOG_DEBUG, "\tRedirected header option!\n");
- break;
- default:
- logmsg(LOG_DEBUG, "Unkown option %d (0x%x)\n",
- opt->nd_opt_type, opt->nd_opt_type);
- break;
- }
- opt = (struct nd_opt_hdr *)((char *)opt + optlen);
- len -= optlen;
- }
-}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c
index 2ee75f5751..1cfbafa911 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c
@@ -533,6 +533,53 @@ if_process(int s, char *ifname, boolean_t first)
/* Detect prefixes which are removed */
if (pr->pr_kernel_state != 0)
pr->pr_in_use = _B_TRUE;
+
+ if ((lifr.lifr_flags & IFF_DUPLICATE) &&
+ (pr->pr_flags & IFF_TEMPORARY)) {
+ in6_addr_t *token;
+ int i;
+ char abuf[INET6_ADDRSTRLEN];
+
+ if (++pr->pr_attempts >= MAX_DAD_FAILURES) {
+ logmsg(LOG_ERR, "%s: token %s is duplicate after %d "
+ "attempts; disabling temporary addresses on %s",
+ pr->pr_name, inet_ntop(AF_INET6,
+ (void *)&pi->pi_tmp_token, abuf, sizeof (abuf)),
+ pr->pr_attempts, pi->pi_name);
+ pi->pi_TmpAddrsEnabled = 0;
+ tmptoken_delete(pi);
+ prefix_delete(pr);
+ return;
+ }
+ logmsg(LOG_WARNING, "%s: token %s is duplicate; trying again",
+ pr->pr_name, inet_ntop(AF_INET6, (void *)&pi->pi_tmp_token,
+ abuf, sizeof (abuf)));
+ if (!tmptoken_create(pi)) {
+ prefix_delete(pr);
+ return;
+ }
+ token = &pi->pi_tmp_token;
+ for (i = 0; i < 16; i++) {
+ /*
+ * prefix_create ensures that pr_prefix has all-zero
+ * bits after prefixlen.
+ */
+ pr->pr_address.s6_addr[i] = pr->pr_prefix.s6_addr[i] |
+ token->s6_addr[i];
+ }
+ if (prefix_lookup_addr_match(pr) != NULL) {
+ prefix_delete(pr);
+ return;
+ }
+ pr->pr_CreateTime = getcurrenttime() / MILLISEC;
+ /*
+ * We've got a new token. Clearing PR_AUTO causes
+ * prefix_update_k to bring the interface up and set the
+ * address.
+ */
+ pr->pr_kernel_state &= ~PR_AUTO;
+ prefix_update_k(pr);
+ }
}
static int ifsock = -1;
@@ -1464,7 +1511,7 @@ setup_rtsock(void)
/*
* Retrieve one routing socket message. If RTM_IFINFO indicates
* new phyint do a full scan of the interfaces. If RTM_IFINFO
- * indicates an existing phyint only scan that phyint and asociated
+ * indicates an existing phyint, only scan that phyint and associated
* prefixes.
*/
static void
@@ -1963,7 +2010,7 @@ fprintdate(FILE *file)
(void) fprintf(file, "%s ", buf);
}
-/* PRINTFLIKE1 */
+/* PRINTFLIKE2 */
void
logmsg(int level, const char *fmt, ...)
{
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c
index 9146f8e97e..e02c12ff8c 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c
@@ -702,7 +702,7 @@ incoming_prefix_addrconf_process(struct phyint *pi, struct prefix *pr,
char pbuf[INET6_ADDRSTRLEN];
uint32_t validtime, preftime; /* In seconds */
uint32_t recorded_validtime; /* In seconds */
- int plen, dadfails = 0;
+ int plen;
struct prefix *other_pr;
validtime = ntohl(po->nd_opt_pi_valid_time);
@@ -825,7 +825,6 @@ incoming_prefix_addrconf_process(struct phyint *pi, struct prefix *pr,
* Form a new local address if the lengths match.
*/
if (pr->pr_flags && IFF_TEMPORARY) {
-RETRY_TOKEN:
if (IN6_IS_ADDR_UNSPECIFIED(&pi->pi_tmp_token)) {
if (!tmptoken_create(pi)) {
prefix_delete(pr);
@@ -885,29 +884,6 @@ RETRY_TOKEN:
validtime = preftime = 0;
}
if ((pr->pr_flags & IFF_TEMPORARY) && new_prefix) {
- struct sockaddr_in6 sin6;
- sin6.sin6_family = AF_INET6;
- sin6.sin6_addr = pr->pr_address;
- if (do_dad(pi->pi_name, &sin6) != 0) {
- /* DAD failed, need a new token */
- dadfails++;
- logmsg(LOG_WARNING,
- "incoming_prefix_addrconf_process: "
- "deprecating temporary token %s\n",
- inet_ntop(AF_INET6,
- (void *)&pi->pi_tmp_token, abuf,
- sizeof (abuf)));
- tmptoken_delete(pi);
- if (dadfails == MAX_DAD_FAILURES) {
- logmsg(LOG_ERR, "Too many DAD "
- "failures; disabling temporary "
- "addresses on %s\n", pi->pi_name);
- pi->pi_TmpAddrsEnabled = 0;
- prefix_delete(pr);
- return (_B_TRUE);
- }
- goto RETRY_TOKEN;
- }
pr->pr_CreateTime = getcurrenttime() / MILLISEC;
if (debug & D_TMP)
logmsg(LOG_DEBUG,
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c
index b37ddade42..b77bd8cafd 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c
@@ -29,7 +29,6 @@
#include "tables.h"
#include <time.h>
-#include <inet/ip6.h>
struct phyint *phyints = NULL;
@@ -230,12 +229,9 @@ start_over:
pi->pi_flags = lifr.lifr_flags;
/*
- * If the link local interface is not up yet or it's IFF_UP
- * and the flag is set to IFF_NOLOCAL as Duplicate Address
- * Detection is in progress.
- * IFF_NOLOCAL is "normal" on other prefixes.
+ * If the link local interface is not up yet or it's IFF_UP and the
+ * IFF_NOLOCAL flag is set, then ignore the interface.
*/
-
if (!(pi->pi_flags & IFF_UP) || (pi->pi_flags & IFF_NOLOCAL)) {
if (newsock) {
(void) close(pi->pi_sock);
@@ -243,7 +239,7 @@ start_over:
}
if (debug & D_PHYINT) {
logmsg(LOG_DEBUG, "phyint_init_from_k(%s): "
- "not IFF_UP\n", pi->pi_name);
+ "IFF_NOLOCAL or not IFF_UP\n", pi->pi_name);
}
return (0);
}
diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h
index 20caf691c6..6bdfee9f80 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h
@@ -203,6 +203,7 @@ struct prefix {
uint_t pr_CreateTime; /* tmpaddr creation time */
/* in SECONDS */
+ uint_t pr_attempts; /* attempts to configure */
};
/*
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/arp.c b/usr/src/cmd/cmd-inet/usr.sbin/arp.c
index c028eaf77d..3ddc48405d 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/arp.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/arp.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -55,7 +55,6 @@
#include <netdb.h>
#include <net/if.h>
#include <net/if_arp.h>
-#include <netinet/if_ether.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
@@ -151,14 +150,15 @@ main(int argc, char *argv[])
/*
* Process a file to set standard arp entries
*/
-static int file(char *name)
+static int
+file(char *name)
{
/*
* A line of input can be:
- * <hostname> <macaddr> ["temp"] ["pub"] ["trail"]
+ * <hostname> <macaddr> ["temp"] ["pub"] ["trail"] ["permanent"]
*/
#define MAX_LINE_LEN (MAXHOSTNAMELEN + \
- sizeof (" xx:xx:xx:xx:xx:xx temp pub trail\n"))
+ sizeof (" xx:xx:xx:xx:xx:xx temp pub trail permanent\n"))
#define MIN_ARGS 2
#define MAX_ARGS 5
@@ -214,7 +214,8 @@ static int file(char *name)
/*
* Set an individual arp entry
*/
-static int set(int argc, char *argv[])
+static int
+set(int argc, char *argv[])
{
struct xarpreq ar;
struct hostent *hp;
@@ -255,15 +256,28 @@ static int set(int argc, char *argv[])
ar.xarp_ha.sdl_family = AF_LINK;
ar.xarp_flags = ATF_PERM;
while (argc-- > 0) {
- if (strncmp(argv[0], "temp", 4) == 0)
+ if (strncmp(argv[0], "temp", 4) == 0) {
ar.xarp_flags &= ~ATF_PERM;
- if (strncmp(argv[0], "pub", 3) == 0)
+ } else if (strncmp(argv[0], "pub", 3) == 0) {
ar.xarp_flags |= ATF_PUBL;
- if (strncmp(argv[0], "trail", 5) == 0)
+ } else if (strncmp(argv[0], "trail", 5) == 0) {
ar.xarp_flags |= ATF_USETRAILERS;
+ } else if (strcmp(argv[0], "permanent") == 0) {
+ ar.xarp_flags |= ATF_AUTHORITY;
+ } else {
+ (void) fprintf(stderr,
+ "arp: unknown keyword '%s'\n", argv[0]);
+ return (1);
+ }
argv++;
}
+ if ((ar.xarp_flags & (ATF_PERM|ATF_AUTHORITY)) == ATF_AUTHORITY) {
+ (void) fprintf(stderr, "arp: 'temp' and 'permanent' flags are "
+ "not usable together.\n");
+ return (1);
+ }
+
s = socket(AF_INET, SOCK_DGRAM, 0);
if (s < 0) {
perror("arp: socket");
@@ -277,11 +291,11 @@ static int set(int argc, char *argv[])
return (0);
}
-
/*
* Display an individual arp entry
*/
-static void get(char *host)
+static void
+get(char *host)
{
struct xarpreq ar;
struct hostent *hp;
@@ -334,19 +348,22 @@ static void get(char *host)
(void) printf("%s (%s) at (incomplete)", host,
inet_ntoa(sin->sin_addr));
}
- if (ar.xarp_flags & ATF_PERM)
- (void) printf(" permanent");
+ if (!(ar.xarp_flags & ATF_PERM))
+ (void) printf(" temp");
if (ar.xarp_flags & ATF_PUBL)
- (void) printf(" published");
+ (void) printf(" pub");
if (ar.xarp_flags & ATF_USETRAILERS)
- (void) printf(" trailers");
+ (void) printf(" trail");
+ if (ar.xarp_flags & ATF_AUTHORITY)
+ (void) printf(" permanent");
(void) printf("\n");
}
/*
* Delete an arp entry
*/
-static void delete(char *host)
+static void
+delete(char *host)
{
struct xarpreq ar;
struct hostent *hp;
@@ -385,12 +402,13 @@ static void delete(char *host)
(void) printf("%s (%s) deleted\n", host, inet_ntoa(sin->sin_addr));
}
-static void usage(void)
+static void
+usage(void)
{
(void) printf("Usage: arp hostname\n");
(void) printf(" arp -a [-n]\n");
(void) printf(" arp -d hostname\n");
(void) printf(" arp -s hostname ether_addr "
- "[temp] [pub] [trail]\n");
+ "[temp] [pub] [trail] [permanent]\n");
(void) printf(" arp -f filename\n");
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile
index df8ddd5938..a008c55ac1 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -28,7 +27,7 @@
PROG = ifconfig
ROOTFS_PROG = $(PROG)
-LOCALOBJS= dupl_addr.o ifconfig.o revarp.o
+LOCALOBJS= ifconfig.o revarp.o
COMMONOBJS= compat.o
OBJS= $(LOCALOBJS) $(COMMONOBJS)
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/dupl_addr.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/dupl_addr.c
deleted file mode 100644
index 3fc32c67a7..0000000000
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/dupl_addr.c
+++ /dev/null
@@ -1,911 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Perform IPv6 duplicate address detection for a given interface
- * and IPv6 address.
- */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "defs.h"
-#include "ifconfig.h"
-#include <netinet/icmp6.h>
-#include <netinet/in_systm.h> /* For IP_MAXPACKET */
-#include <netinet/ip.h> /* For IP_MAXPACKET */
-
-/* XXX extract DupAddrDetectTransmits from LNKINFO? */
-int DupAddrDetectTransmits = 1; /* XXX Make configurable? */
-int RetransTimer = ND_RETRANS_TIMER; /* Milliseconds. */
-
-#define IPV6_MAX_HOPS 255
-
-struct in6_addr all_nodes_mcast = { { 0xff, 0x2, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x1 } };
-
-static void in6_solmulti_addr(struct in6_addr *addr,
- struct in6_addr *multi);
-static int run_dad(int s, char *phyname, struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc, int ifindex);
-static int send_dad_probe(int s, char *phyname,
- struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc);
-static int recv_dad(int s, char *phyname, struct sockaddr_in6 *testaddr,
- int ifindex);
-static boolean_t verify_opts(struct nd_opt_hdr *opt, int optlen,
- struct sockaddr_in6 *from, boolean_t reject_dad_slla);
-static void dad_failed(char *phyname, struct sockaddr_in6 *testaddr,
- int code);
-static void print_na(char *str, char *phyname,
- struct nd_neighbor_advert *na, int len,
- struct sockaddr_in6 *addr);
-static void print_ns(char *str, char *phyname,
- struct nd_neighbor_solicit *ns, int len,
- struct sockaddr_in6 *addr);
-static void print_opt(struct nd_opt_hdr *opt, int len);
-static char *fmt_lla(char *llabuf, int bufsize, char *lla, int llalen);
-
-
-/*
- * Performing duplicate address detection.
- *
- * Returns 0 if the address is ok, 1 if there is a duplicate,
- * and -1 (with errno set) if there is some internal error.
- * As a side effect this does a syslog and a stderr printf
- * identifying any duplicate.
- * Note that the state of the interface name is unchanged.
- */
-int
-do_dad(char *ifname, struct sockaddr_in6 *testaddr)
-{
- int s;
- struct lifreq lifr;
- char *cp;
- char phyname[LIFNAMSIZ];
- int ifindex;
- int64_t saved_flags;
- int ret = -1; /* Assume error by default */
- struct sockaddr_in6 solicited_mc;
-
- /*
- * Truncate name at ':'. Needed for SIOCGLIFLNKINFO
- * Keep untruncated ifname for other use.
- */
- (void) strncpy(phyname, ifname, sizeof (phyname));
- cp = strchr(phyname, ':');
- if (cp != NULL)
- *cp = '\0';
-
- /*
- * Get a socket to use to send and receive neighbor solicitations
- * for DAD. Also used for ioctls below.
- */
- if ((s = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) < 0) {
- Perror0("socket");
- return (-1);
- }
-
- /*
- * Determine interface index (for IPV6_BOUND_PIF) and
- * save the flag values so they can be restored on return.
- */
- (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
- if (ioctl(s, SIOCGLIFINDEX, (caddr_t)&lifr) < 0) {
- Perror0("do_dad: SIOCGLIFINDEX");
- goto done;
- }
- ifindex = lifr.lifr_index;
- if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
- Perror0("do_dad: SIOCGLIFFLAGS");
- goto done;
- }
- saved_flags = lifr.lifr_flags;
- if (!(saved_flags & IFF_MULTICAST)) {
- /* Not possible to do DAD. Pretend it is ok */
- ret = 0;
- goto done;
- }
- (void) strncpy(lifr.lifr_name, phyname, sizeof (lifr.lifr_name));
- if (ioctl(s, SIOCGLIFLNKINFO, (caddr_t)&lifr) < 0) {
- Perror0("do_dad: SIOCGLIFLNKINFO");
- goto done;
- }
- if (lifr.lifr_ifinfo.lir_reachretrans != 0) {
- RetransTimer = lifr.lifr_ifinfo.lir_reachretrans;
- }
-
- /*
- * Set NOLOCAL and UP flags.
- * This prevents the use of the interface except when the user binds
- * to unspecified IPv6 address, and sends to a link local multicast
- * address.
- */
- lifr.lifr_flags = saved_flags | IFF_NOLOCAL | IFF_UP;
-
- (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
- if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
- Perror0("do_dad: SIOCSLIFFLAGS");
- goto restore;
- }
-
- /*
- * IPV6_BOUND_PIF prevents load spreading to happen. If we
- * just do IPV6_BOUND_IF, the packet can go out on a different
- * interface other than "ifindex", if interface is part of
- * a group. In that case, we will get back the copy of NS that
- * we sent and think it is a duplicate(Switch loops back the
- * copy on all interfaces other than the one we sent the packet on).
- */
- if (setsockopt(s, IPPROTO_IPV6, IPV6_BOUND_PIF, (char *)&ifindex,
- sizeof (ifindex)) < 0) {
- Perror0("IPV6_BOUND_PIF");
- goto restore;
- }
-
- {
- int hops = IPV6_MAX_HOPS;
- int on = 1;
- int off = 0;
-
- if (debug > 1)
- off = 1; /* Force duplicate */
-
- if (setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
- (char *)&hops, sizeof (hops)) < 0) {
- Perror0("IPV6_MULTICAST_HOPS");
- goto restore;
- }
- if (setsockopt(s, IPPROTO_IPV6, IPV6_UNSPEC_SRC,
- (char *)&on, sizeof (on)) < 0) {
- Perror0("IPV6_UNSPEC_SRC");
- goto restore;
- }
-
- if (setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
- (char *)&off, sizeof (off)) < 0) {
- Perror0("IPV6_MULTICAST_LOOP");
- goto restore;
- }
-
- /* Enable receipt of ancillary data */
- if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
- (char *)&on, sizeof (on)) < 0) {
- Perror0("IPV6_RECVHOPLIMIT");
- goto restore;
- }
- if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
- (char *)&on, sizeof (on)) < 0) {
- Perror0("IPV6_RECVPKTINFO");
- goto restore;
- }
- if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVRTHDR,
- (char *)&on, sizeof (on)) < 0) {
- Perror0("IPV6_RECVRTHDR");
- goto restore;
- }
- }
-
- /*
- * Extract the address and determine the solicited node multicast
- * address to use.
- */
- (void) memset(&solicited_mc, 0, sizeof (solicited_mc));
- solicited_mc.sin6_family = AF_INET6;
- in6_solmulti_addr(&testaddr->sin6_addr, &solicited_mc.sin6_addr);
-
- /* Join the solicited node multicast address and all-nodes. */
- {
- struct ipv6_mreq v6mcastr;
-
- v6mcastr.ipv6mr_multiaddr = solicited_mc.sin6_addr;
- v6mcastr.ipv6mr_interface = ifindex;
-
- if (setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP,
- (char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- Perror0("IPV6_JOIN_GROUP");
- goto restore;
- }
-
- v6mcastr.ipv6mr_multiaddr = all_nodes_mcast;
- v6mcastr.ipv6mr_interface = ifindex;
-
- if (setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP,
- (char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- Perror0("IPV6_JOIN_GROUP");
- goto restore;
- }
- }
-
- ret = run_dad(s, phyname, testaddr, &solicited_mc, ifindex);
-
-restore:
- /* Restore flags */
- (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name));
- lifr.lifr_flags = saved_flags;
- if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
- Perror0("do_dad: SIOCSLIFFLAGS");
- ret = -1;
- goto done;
- }
-done:
- (void) close(s);
- return (ret);
-}
-
-
-/*
- * Determine the solicited node multicast address for a given address.
- */
-static void
-in6_solmulti_addr(struct in6_addr *addr, struct in6_addr *multi)
-{
- struct in6_addr solicited_prefix = {
- { 0xff, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x1, 0xFF, 0x0, 0x0, 0x0 } };
- int i;
-
- *multi = solicited_prefix;
- for (i = 13; i < 16; i++)
- multi->s6_addr[i] = addr->s6_addr[i];
-}
-
-static int
-run_dad(int s, char *phyname, struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc, int ifindex)
-{
- int time_left; /* In milliseconds */
- struct timeval starttime;
- struct timeval curtime;
- struct pollfd fds;
- int i;
- int ret;
-
- if (debug)
- (void) printf("run_dad(%s)\n", phyname);
-
- /*
- * Perform duplicate address detection sequence
- * 1. Send a neighbor solicitation with an unspecified source
- * address to the solicited node MC address with the testaddr
- * being the target.
- * 2. Wait for up to RetransTimer milliseconds for either a
- * neighbor advertisement (sent to all-nodes) or a DAD neighbor
- * solicitation for the testaddr.
- * 3. Perform step 1 and 2 DupAddrDetectTransmits times.
- */
-
- /* XXX perform a random delay: 0 - MAX_RTR_SOLICITATION_DELAY */
- /* XXX use poll+recv logic for the random delay */
-
- for (i = 0; i < DupAddrDetectTransmits; i++) {
- if (send_dad_probe(s, phyname, testaddr, solicited_mc) < 0)
- return (-1);
-
- /*
- * Track time to make sure total wait is RetransTimer
- * even though random packet will awake poll.
- */
- (void) gettimeofday(&starttime, NULL);
- /* CONSTCOND */
- while (1) {
- (void) gettimeofday(&curtime, NULL);
- time_left = RetransTimer -
- (curtime.tv_sec - starttime.tv_sec) * 1000 -
- (curtime.tv_usec - starttime.tv_usec) / 1000;
-
- if (debug) {
- (void) printf("run_dad: time_left %d ms\n",
- time_left);
- }
- if (time_left <= 0) {
- if (debug)
- (void) printf("run_dad: timeout\n");
- break;
- }
- fds.fd = s;
- fds.events = POLLIN;
-
- switch (poll(&fds, 1, time_left)) {
- case -1:
- Perror0("poll");
- return (-1);
- case 0:
- /* Need loop will break */
- break;
- default:
- /* Huh? */
- (void) fprintf(stderr, "poll returns > 1!\n");
- return (-1);
- case 1:
- if (fds.revents & POLLIN) {
- ret = recv_dad(s, phyname, testaddr,
- ifindex);
- if (ret < 0)
- return (-1);
- if (ret > 0) {
- dad_failed(phyname, testaddr,
- ret);
- return (1);
- }
- }
- break;
- }
- }
- }
- return (0);
-}
-
-/*
- * Send a DAD NS packet. Assumes an IPV6_UNSPEC_SRC and an IPV6_BOUND_IF
- * have been done by the caller.
- */
-static int
-send_dad_probe(int s, char *phyname, struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc)
-{
- static uint64_t outpack[(IP_MAXPACKET + 1)/8];
- struct nd_neighbor_solicit *ns = (struct nd_neighbor_solicit *)outpack;
- int packetlen = 0;
- int cc;
-
- ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
- ns->nd_ns_code = 0;
- ns->nd_ns_cksum = 0;
- ns->nd_ns_reserved = 0;
- ns->nd_ns_target = testaddr->sin6_addr;
- packetlen += sizeof (struct nd_neighbor_solicit);
- cc = sendto(s, (char *)outpack, packetlen, 0,
- (struct sockaddr *)solicited_mc, sizeof (*solicited_mc));
- if (cc < 0 || cc != packetlen) {
- char abuf[INET6_ADDRSTRLEN];
-
- if (cc < 0) {
- Perror0("DAD, sendto");
- return (-1);
- }
- (void) inet_ntop(solicited_mc->sin6_family,
- (void *)&solicited_mc->sin6_addr, abuf, sizeof (abuf));
-
- (void) fprintf(stderr, "wrote %s %d chars, ret=%d\n",
- abuf, packetlen, cc);
- return (-1);
- }
- if (debug)
- print_ns("Sent NS", phyname, ns, packetlen, solicited_mc);
-
- return (0);
-}
-
-/*
- * Return a pointer to the specified option buffer.
- * If not found return NULL.
- */
-static void *
-find_ancillary(struct msghdr *msg, int cmsg_type)
-{
- struct cmsghdr *cmsg;
-
- for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
- cmsg = CMSG_NXTHDR(msg, cmsg)) {
- if (cmsg->cmsg_level == IPPROTO_IPV6 &&
- cmsg->cmsg_type == cmsg_type) {
- return (CMSG_DATA(cmsg));
- }
- }
- return (NULL);
-}
-
-/*
- * Receive an ICMP packet. If the packet signals a duplicate address for
- * testaddr then return a positive non-zero number. Otherwise return zero.
- * Internal errors cause a return of -1.
- */
-static int
-recv_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, int ifindex)
-{
- struct sockaddr_in6 from;
- struct icmp6_hdr *icmp;
- struct nd_neighbor_solicit *ns;
- struct nd_neighbor_advert *na;
- static uint64_t in_packet[(IP_MAXPACKET + 1)/8];
- static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8];
- int len;
- char abuf[INET6_ADDRSTRLEN];
- struct msghdr msg;
- struct iovec iov;
- uchar_t *opt;
- uint_t hoplimit;
- struct in6_addr dst;
- int rcv_ifindex;
-
- iov.iov_base = (char *)in_packet;
- iov.iov_len = sizeof (in_packet);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_name = (struct sockaddr *)&from;
- msg.msg_namelen = sizeof (from);
- msg.msg_control = ancillary_data;
- msg.msg_controllen = sizeof (ancillary_data);
-
- if ((len = recvmsg(s, &msg, 0)) < 0) {
- Perror0("DAD recvmsg");
- return (-1);
- }
- if (len == 0)
- return (0);
-
- if (debug) {
- (void) inet_ntop(AF_INET6, (void *)&from.sin6_addr,
- abuf, sizeof (abuf));
- }
- /* Ignore packets > 64k or control buffers that don't fit */
- if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
- if (debug) {
- (void) fprintf(stderr,
- "Truncated message: msg_flags 0x%x from %s\n",
- msg.msg_flags, abuf);
- }
- return (0);
- }
-
- icmp = (struct icmp6_hdr *)in_packet;
-
- if (len < ICMP6_MINLEN) {
- if (debug) {
- (void) fprintf(stderr,
- "Too short ICMP packet: %d bytes from %s\n",
- len, abuf);
- }
- return (0);
- }
-
- opt = find_ancillary(&msg, IPV6_HOPLIMIT);
- if (opt == NULL) {
- /* Unknown hoplimit - must drop */
- if (debug) {
- (void) fprintf(stderr,
- "Unknown hop limit from %s\n", abuf);
- }
- return (0);
- }
- hoplimit = *(uint_t *)opt;
- opt = find_ancillary(&msg, IPV6_PKTINFO);
- if (opt == NULL) {
- /* Unknown destination address - must drop */
- if (debug) {
- (void) fprintf(stderr,
- "Unknown destination from %s\n", abuf);
- }
- return (0);
- }
- dst = ((struct in6_pktinfo *)opt)->ipi6_addr;
- rcv_ifindex = ((struct in6_pktinfo *)opt)->ipi6_ifindex;
- opt = find_ancillary(&msg, IPV6_RTHDR);
- if (opt != NULL) {
- /* Can't allow routing headers in ND messages */
- if (debug) {
- (void) fprintf(stderr,
- "ND message with routing header from %s\n", abuf);
- }
- return (0);
- }
-
- switch (icmp->icmp6_type) {
- case ND_NEIGHBOR_SOLICIT:
- /*
- * Assumes that the kernel has verified the AH (if present)
- * and the ICMP checksum.
- */
- if (hoplimit != IPV6_MAX_HOPS) {
- if (debug) {
- (void) fprintf(stderr,
- "NS hop limit: %d from %s\n",
- hoplimit, abuf);
- }
- return (0);
- }
-
- if (icmp->icmp6_code != 0) {
- if (debug) {
- (void) fprintf(stderr, "NS code: %d from %s\n",
- icmp->icmp6_code, abuf);
- }
- return (0);
- }
-
- if (len < sizeof (struct nd_neighbor_solicit)) {
- if (debug) {
- (void) fprintf(stderr,
- "NS too short: %d bytes from %s\n",
- len, abuf);
- }
- return (0);
- }
- ns = (struct nd_neighbor_solicit *)icmp;
- if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target)) {
- if (debug) {
- char abuf2[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&ns->nd_ns_target,
- abuf2, sizeof (abuf2));
- (void) fprintf(stderr,
- "NS with multicast target: %s from %s\n",
- abuf2, abuf);
- }
- return (0);
- }
-
- if (len > sizeof (struct nd_neighbor_solicit)) {
- /*
- * For DAD type neighbor solicitation message,
- * we need to further verify if SLLA option is present
- * in received options,
- * so we pass TRUE to reject_dad_slla argument.
- */
- if (!verify_opts((struct nd_opt_hdr *)&ns[1],
- len - sizeof (struct nd_neighbor_solicit),
- &from, _B_TRUE))
- return (0);
- }
-
- if (debug)
- print_ns("Received valid NS", phyname, ns, len, &from);
- if (!IN6_IS_ADDR_UNSPECIFIED(&from.sin6_addr) ||
- !IN6_IS_ADDR_MC_SOLICITEDNODE(&dst)) {
- /* Sender is doing address resolution */
- return (0);
- }
- if (rcv_ifindex != ifindex) {
- if (debug) {
- (void) fprintf(stderr,
- "Received Neighbor solicitation on"
- " ifindex %d, expecting on %d\n",
- rcv_ifindex, ifindex);
- }
- return (0);
- }
- if (IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr,
- &ns->nd_ns_target)) {
- if (debug) {
- (void) fprintf(stderr,
- "NS - duplicate from %s\n",
- abuf);
- }
- return (1);
- }
- return (0);
-
- case ND_NEIGHBOR_ADVERT:
- /*
- * Assumes that the kernel has verified the AH (if present)
- * and the ICMP checksum.
- */
- if (hoplimit != IPV6_MAX_HOPS) {
- if (debug) {
- (void) fprintf(stderr,
- "NA hop limit: %d from %s\n",
- hoplimit, abuf);
- }
- return (0);
- }
-
- if (icmp->icmp6_code != 0) {
- if (debug) {
- (void) fprintf(stderr, "NA code: %d from %s\n",
- icmp->icmp6_code, abuf);
- }
- return (0);
- }
-
- if (len < sizeof (struct nd_neighbor_advert)) {
- if (debug) {
- (void) fprintf(stderr,
- "NA too short: %d bytes from %s\n",
- len, abuf);
- }
- return (0);
- }
- na = (struct nd_neighbor_advert *)icmp;
- if (IN6_IS_ADDR_MULTICAST(&na->nd_na_target)) {
- if (debug) {
- char abuf2[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&na->nd_na_target,
- abuf2, sizeof (abuf2));
- (void) fprintf(stderr,
- "NA with multicast target: %s from %s\n",
- abuf2, abuf);
- }
- return (0);
- }
-
- if (IN6_IS_ADDR_MULTICAST(&dst) &&
- (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) {
- if (debug) {
- char abuf2[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&na->nd_na_target,
- abuf2, sizeof (abuf2));
- (void) fprintf(stderr,
- "NA solicited w/ mc target: %s from %s\n",
- abuf2, abuf);
- }
- return (0);
- }
-
- if (len > sizeof (struct nd_neighbor_advert)) {
- /*
- * Since this is a Neighbor advertisement
- * we unset the reject_dad_slla flag, thus
- * there is no need to verify the SLLA options.
- */
- if (!verify_opts((struct nd_opt_hdr *)&na[1],
- len - sizeof (struct nd_neighbor_advert),
- &from, _B_FALSE))
- return (0);
- }
-
- if (debug)
- print_na("Received valid NA", phyname, na, len, &from);
-
- if (IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr,
- &na->nd_na_target)) {
- if (debug) {
- (void) fprintf(stderr,
- "NA - duplicate from %s\n",
- abuf);
- }
- return (1);
- }
- return (0);
- default:
- return (0);
- }
-}
-
-/*
- * Verify that all options have a non-zero length and that
- * the options fit within the total length of the packet (optlen).
- * If reject_dad_slla is set, then we also verify that no SLLA option is
- * present as mandated by section 7.1.1 of RFC 2461.
- */
-static boolean_t
-verify_opts(struct nd_opt_hdr *opt, int optlen, struct sockaddr_in6 *from,
- boolean_t reject_dad_slla)
-{
- while (optlen > 0) {
- if (opt->nd_opt_len == 0) {
- if (debug) {
- char abuf[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&from->sin6_addr,
- abuf, sizeof (abuf));
-
- (void) fprintf(stderr,
- "Zero length option type 0x%x from %s\n",
- opt->nd_opt_type, abuf);
- }
- return (_B_FALSE);
- }
- optlen -= 8 * opt->nd_opt_len;
- if (optlen < 0) {
- if (debug) {
- char abuf[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(AF_INET6,
- (void *)&from->sin6_addr,
- abuf, sizeof (abuf));
-
- (void) fprintf(stderr,
- "Too large option: type 0x%x len %u "
- "from %s\n",
- opt->nd_opt_type, opt->nd_opt_len,
- abuf);
- }
- return (_B_FALSE);
- }
- if (reject_dad_slla &&
- opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR) {
- return (_B_FALSE);
- }
- opt = (struct nd_opt_hdr *)((char *)opt +
- 8 * opt->nd_opt_len);
- }
- return (_B_TRUE);
-}
-
-
-static void
-dad_failed(char *phyname, struct sockaddr_in6 *testaddr, int code)
-{
- char abuf[INET6_ADDRSTRLEN];
-
- (void) inet_ntop(testaddr->sin6_family,
- (void *)&testaddr->sin6_addr,
- abuf, sizeof (abuf));
- (void) fprintf(stderr,
- "ifconfig: "
- "Duplicate address detected on link %s for address %s. Code %d\n",
- phyname, abuf, code);
-
- openlog("ifconfig", LOG_CONS, LOG_DAEMON);
- syslog(LOG_CRIT,
- "Duplicate address detected on link %s for address %s. Code %d\n",
- phyname, abuf, code);
- closelog();
-}
-
-/* Printing functions */
-
-static void
-print_ns(char *str, char *phyname,
- struct nd_neighbor_solicit *ns, int len, struct sockaddr_in6 *addr)
-{
- struct nd_opt_hdr *opt;
- char abuf[INET6_ADDRSTRLEN];
-
- (void) printf("%s %s (%d bytes) on %s\n", str,
- inet_ntop(addr->sin6_family, (void *)&addr->sin6_addr,
- abuf, sizeof (abuf)),
- len, phyname);
- (void) printf("\ttarget %s\n",
- inet_ntop(addr->sin6_family, (void *)&ns->nd_ns_target,
- abuf, sizeof (abuf)));
- len -= sizeof (*ns);
- opt = (struct nd_opt_hdr *)&ns[1];
- print_opt(opt, len);
-}
-
-static void
-print_na(char *str, char *phyname,
- struct nd_neighbor_advert *na, int len, struct sockaddr_in6 *addr)
-{
- struct nd_opt_hdr *opt;
- char abuf[INET6_ADDRSTRLEN];
-
- (void) printf("%s %s (%d bytes) on %s\n", str,
- inet_ntop(addr->sin6_family, (void *)&addr->sin6_addr,
- abuf, sizeof (abuf)),
- len, phyname);
- (void) printf("\ttarget %s\n",
- inet_ntop(addr->sin6_family, (void *)&na->nd_na_target,
- abuf, sizeof (abuf)));
- (void) printf("\tRouter: %s\n",
- (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) ?
- "Set" : "Not set");
- (void) printf("\tSolicited: %s\n",
- (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED) ?
- "Set" : "Not set");
- (void) printf("\tOverride: %s\n",
- (na->nd_na_flags_reserved & ND_NA_FLAG_OVERRIDE) ?
- "Set" : "Not set");
-
- len -= sizeof (*na);
- opt = (struct nd_opt_hdr *)&na[1];
- print_opt(opt, len);
-}
-
-static void
-print_opt(struct nd_opt_hdr *opt, int len)
-{
- struct nd_opt_prefix_info *po;
- struct nd_opt_mtu *mo;
- struct nd_opt_lla *lo;
- int optlen;
- char abuf[INET6_ADDRSTRLEN];
- char llabuf[BUFSIZ];
-
- while (len >= sizeof (struct nd_opt_hdr)) {
- optlen = opt->nd_opt_len * 8;
- if (optlen == 0) {
- if (debug)
- (void) printf("Zero length option!\n");
- break;
- }
- switch (opt->nd_opt_type) {
- case ND_OPT_PREFIX_INFORMATION:
- po = (struct nd_opt_prefix_info *)opt;
- if (optlen != sizeof (*po) ||
- optlen > len)
- break;
-
- (void) printf("\tOn link flag:%s\n",
- (po->nd_opt_pi_flags_reserved &
- ND_OPT_PI_FLAG_ONLINK) ?
- "Set" : "Not set");
- (void) printf("\tAuto addrconf flag:%s\n",
- (po->nd_opt_pi_flags_reserved &
- ND_OPT_PI_FLAG_AUTO) ?
- "Set" : "Not set");
- (void) printf("\tValid time: %u\n",
- ntohl(po->nd_opt_pi_valid_time));
- (void) printf("\tPreferred time: %u\n",
- ntohl(po->nd_opt_pi_preferred_time));
- (void) printf("\tPrefix: %s/%u\n",
- inet_ntop(AF_INET6, (void *)&po->nd_opt_pi_prefix,
- abuf, sizeof (abuf)),
- po->nd_opt_pi_prefix_len);
- break;
- case ND_OPT_MTU:
- mo = (struct nd_opt_mtu *)opt;
- if (optlen != sizeof (*mo) ||
- optlen > len)
- break;
- (void) printf("\tMTU: %d\n",
- ntohl(mo->nd_opt_mtu_mtu));
- break;
- case ND_OPT_SOURCE_LINKADDR:
- lo = (struct nd_opt_lla *)opt;
- if (optlen < 8 ||
- optlen > len)
- break;
- (void) fmt_lla(llabuf, sizeof (llabuf),
- (char *)lo->nd_opt_lla_hdw_addr, optlen - 2);
- (void) printf("\tSource LLA: len %d <%s>\n",
- optlen-2, llabuf);
- break;
- case ND_OPT_TARGET_LINKADDR:
- lo = (struct nd_opt_lla *)opt;
- if (optlen < 8||
- optlen > len)
- break;
- (void) fmt_lla(llabuf, sizeof (llabuf),
- (char *)lo->nd_opt_lla_hdw_addr, optlen - 2);
- (void) printf("\tTarget LLA: len %d <%s>\n",
- optlen-2, llabuf);
- break;
- case ND_OPT_REDIRECTED_HEADER:
- (void) printf("\tRedirected header option!\n");
- break;
- default:
- (void) printf("Unkown option %d (0x%x)\n",
- opt->nd_opt_type, opt->nd_opt_type);
- break;
- }
- opt = (struct nd_opt_hdr *)((char *)opt + optlen);
- len -= optlen;
- }
-}
-
-static char *
-fmt_lla(char *llabuf, int bufsize, char *lla, int llalen)
-{
- int i;
- char *cp = llabuf;
-
- for (i = 0; i < llalen; i++) {
- if (i == llalen - 1)
- (void) snprintf(cp, bufsize, "%02x", lla[i] & 0xFF);
- else
- (void) snprintf(cp, bufsize, "%02x:", lla[i] & 0xFF);
- bufsize -= strlen(cp);
- cp += strlen(cp);
- }
- return (llabuf);
-}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c
index 69d25bbdc3..f71d1439f1 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c
@@ -70,7 +70,8 @@ static if_flags_t if_flags_tbl[] = {
{ IFF_PREFERRED, "PREFERRED" },
{ IFF_TEMPORARY, "TEMPORARY" },
{ IFF_FIXEDMTU, "FIXEDMTU" },
- { IFF_VIRTUAL, "VIRTUAL"}
+ { IFF_VIRTUAL, "VIRTUAL" },
+ { IFF_DUPLICATE, "DUPLICATE" }
};
static struct lifreq lifr;
@@ -172,7 +173,6 @@ static void foreachinterface(void (*func)(), int argc, char *argv[],
int af, int64_t onflags, int64_t offflags,
int64_t lifc_flags);
static void ifconfig(int argc, char *argv[], int af, struct lifreq *lifrp);
-static int ifdad(char *ifname, struct sockaddr_in6 *laddr);
static boolean_t in_getmask(struct sockaddr_in *saddr,
boolean_t addr_set);
static int in_getprefixlen(char *addr, boolean_t slash, int plen);
@@ -1006,25 +1006,6 @@ setifaddr(char *addr, int64_t param)
sav_netmask = lifr.lifr_addr;
/*
- * Catch set of address for AF_INET6 to perform
- * duplicate address detection. Check that the interface is
- * up.
- */
- if (afp->af_af == AF_INET6) {
- if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
- Perror0_exit("ifsetaddr: SIOCGLIFFLAGS");
- }
- if (lifr.lifr_flags & IFF_UP) {
- if (debug)
- (void) printf(
- "setifaddr: Calling ifdad flags %llx\n",
- lifr.lifr_flags);
- if (ifdad(name, (struct sockaddr_in6 *)&laddr) == -1)
- exit(3);
- }
- }
-
- /*
* If setting the address and not the mask, clear any existing mask
* and the kernel will then assign the default (netmask has been set
* to 0 in this case). If setting both (either by using a prefix or
@@ -1486,25 +1467,25 @@ setifflags(char *val, int64_t value)
}
}
- /*
- * Catch "up" transition for AF_INET6 to perform duplicate address
- * detection. ifdad checks if an address has been set.
- */
- if (afp->af_af == AF_INET6 && !(lifr.lifr_flags & IFF_UP) &&
- value == IFF_UP) {
- if (debug)
- (void) printf(
- "setifaddr:Calling ifdad flags %llx value 0x%llx\n",
- lifr.lifr_flags, value);
- if (ifdad(name, NULL) == -1)
- exit(1);
- }
-
if (value < 0) {
value = -value;
lifr.lifr_flags &= ~value;
- } else
+ if ((value & IFF_UP) && (lifr.lifr_flags & IFF_DUPLICATE)) {
+ /*
+ * If the user is trying to mark an interface with a
+ * duplicate address as "down," then fetch the address
+ * and set it. This will cause IP to clear the
+ * IFF_DUPLICATE flag and stop the automatic recovery
+ * timer.
+ */
+ value = lifr.lifr_flags;
+ if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) >= 0)
+ (void) ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr);
+ lifr.lifr_flags = value;
+ }
+ } else {
lifr.lifr_flags |= value;
+ }
(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
Perror0_exit("setifflags: SIOCSLIFFLAGS");
@@ -1958,54 +1939,6 @@ removeif(char *str, int64_t param)
}
/*
- * If laddr is non-NULL it is used - otherwise we use the address on
- * the interface.
- */
-/* ARGSUSED */
-static int
-ifdad(char *ifname, struct sockaddr_in6 *laddr)
-{
- struct sockaddr_in6 testaddr;
- struct lifreq lifr2; /* Avoid overriting lifr */
-
- if (debug)
- (void) printf("ifdad(%s)\n", ifname);
-
- assert(afp->af_af == AF_INET6);
-
- /*
- * Check the address assigned to the interface.
- * Skip the check if IFF_NOLOCAL, IFF_NONUD, IFF_ANYCAST, or
- * IFF_LOOPBACK.
- * Note that IFF_NONUD turns of both NUD and DAD.
- */
- (void) strncpy(lifr2.lifr_name, ifname,
- sizeof (lifr2.lifr_name));
- if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr2) < 0) {
- Perror0_exit("ifdad: SIOCGLIFFLAGS");
- }
- if (lifr2.lifr_flags & (IFF_NOLOCAL|IFF_LOOPBACK|IFF_NONUD|IFF_ANYCAST))
- return (0);
-
- if (laddr != NULL) {
- testaddr = *laddr;
- } else {
- if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr2) < 0) {
- Perror0_exit("ifdad: SIOCGLIFADDR");
- }
- testaddr = *(struct sockaddr_in6 *)&lifr2.lifr_addr;
- }
-
- if (IN6_IS_ADDR_UNSPECIFIED(&testaddr.sin6_addr))
- return (0);
-
- if (do_dad(name, &testaddr) != 0)
- return (-1);
- else
- return (0);
-}
-
-/*
* Set the address token for IPv6.
*/
/* ARGSUSED */
@@ -4134,6 +4067,7 @@ Perror2_exit(char *cmd, char *str)
static void
in_getaddr(char *s, struct sockaddr *saddr, int *plenp)
{
+ /* LINTED: alignment */
struct sockaddr_in *sin = (struct sockaddr_in *)saddr;
struct hostent *hp;
struct netent *np;
@@ -4202,6 +4136,7 @@ in_getaddr(char *s, struct sockaddr *saddr, int *plenp)
static void
in6_getaddr(char *s, struct sockaddr *saddr, int *plenp)
{
+ /* LINTED: alignment */
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)saddr;
struct hostent *hp;
char str[BUFSIZ];
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h
index 9b96dde475..be5990ce13 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
@@ -27,8 +27,10 @@ extern "C" {
#define MAX_MODS 9 /* max modules that can be pushed on intr */
+/* No suitable header file defines this, though it's in libsocket */
+extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
+
extern int debug;
-extern uid_t euid;
extern void Perror0(char *);
extern void Perror0_exit(char *);
@@ -36,13 +38,10 @@ extern void Perror2(char *, char *);
extern void Perror2_exit(char *, char *);
extern int doifrevarp(char *, struct sockaddr_in *);
-extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
extern int dlpi_set_address(char *, uchar_t *, int);
extern void dlpi_print_address(char *);
-extern int do_dad(char *, struct sockaddr_in6 *);
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/cmd/mdb/Makefile.common b/usr/src/cmd/mdb/Makefile.common
index e91e250f6b..93bd2dc6c5 100644
--- a/usr/src/cmd/mdb/Makefile.common
+++ b/usr/src/cmd/mdb/Makefile.common
@@ -50,6 +50,7 @@ COMMON_MODULES_PROC_32BIT = \
# MDB modules used for debugging kernels.
#
COMMON_MODULES_KVM = \
+ arp \
audiosup \
cpc \
crypto \
diff --git a/usr/src/cmd/mdb/Makefile.kmdb b/usr/src/cmd/mdb/Makefile.kmdb
index 7e086edefb..4b835e1047 100644
--- a/usr/src/cmd/mdb/Makefile.kmdb
+++ b/usr/src/cmd/mdb/Makefile.kmdb
@@ -117,6 +117,8 @@ $(VERSTGTS) := CPPFLAGS += -DKMDB_VERSION='$(KMDB_VERSION)'
$(KCTLTGTS) := CPPFLAGS += -D_KERNEL
$(KCTLTGTS) := ASFLAGS += -D_KERNEL
+ffs.o ffs.ln := CPPFLAGS += -Dffs=mdb_ffs
+
INS.dir.root.sys= $(INS) -s -d -m $(DIRMODE) $@
$(CH)INS.dir.root.sys= $(INS) -s -d -m $(DIRMODE) -u root -g sys $@
diff --git a/usr/src/cmd/mdb/Makefile.kmdb.files b/usr/src/cmd/mdb/Makefile.kmdb.files
index 4f78efd06c..cd8ff1a89f 100644
--- a/usr/src/cmd/mdb/Makefile.kmdb.files
+++ b/usr/src/cmd/mdb/Makefile.kmdb.files
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,13 +19,14 @@
# CDDL HEADER END
#
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
#
KMDBSRCS += \
+ ffs.c \
kaif_start.c \
mdb.c \
mdb_addrvec.c \
diff --git a/usr/src/cmd/mdb/Makefile.kmdb.targ b/usr/src/cmd/mdb/Makefile.kmdb.targ
index b5181517af..ba95f41bdc 100644
--- a/usr/src/cmd/mdb/Makefile.kmdb.targ
+++ b/usr/src/cmd/mdb/Makefile.kmdb.targ
@@ -134,6 +134,10 @@ kmdb_modlinktest.c: $(MAPFILE_INTERMEDIATE)
$(COMPILE.c) $<
$(CTFCONVERT_O)
+%.o: $(SRC)/common/util/%.c
+ $(COMPILE.c) $<
+ $(CTFCONVERT_O)
+
#
# Lint
#
@@ -176,3 +180,6 @@ kmdb_modlinktest.c: $(MAPFILE_INTERMEDIATE)
%.ln: $(SRC)/common/net/util/%.c
$(LINT.c) -c $<
+
+%.ln: $(SRC)/common/util/%.c
+ $(LINT.c) -c $<
diff --git a/usr/src/cmd/mdb/Makefile.mdb b/usr/src/cmd/mdb/Makefile.mdb
index 6757821ff2..81cb2dd9b5 100644
--- a/usr/src/cmd/mdb/Makefile.mdb
+++ b/usr/src/cmd/mdb/Makefile.mdb
@@ -28,7 +28,9 @@
.KEEP_STATE:
.SUFFIXES:
-SRCS += mdb.c \
+SRCS += \
+ ffs.c \
+ mdb.c \
mdb_addrvec.c \
mdb_argvec.c \
mdb_callb.c \
@@ -130,6 +132,8 @@ mdb_conf.o := CPPFLAGS += -DMDB_VERSION='$(MDB_VERSION)'
inet_ntop.o := CPPFLAGS += -Dsnprintf=mdb_snprintf
+ffs.o ffs.ln := CPPFLAGS += -Dffs=mdb_ffs
+
%.o: ../../../common/mdb/%.c
$(COMPILE.c) $<
$(CTFCONVERT_O)
@@ -146,6 +150,10 @@ inet_ntop.o := CPPFLAGS += -Dsnprintf=mdb_snprintf
$(COMPILE.c) $<
$(CTFCONVERT_O)
+%.o: $(SRC)/common/util/%.c
+ $(COMPILE.c) $<
+ $(CTFCONVERT_O)
+
clean.lint:
$(RM) $(LINTFILES)
@@ -168,6 +176,9 @@ clobber: clean clean.lint
%.ln: $(SRC)/common/net/util/%.c
$(LINT.c) -c $<
+%.ln: $(SRC)/common/util/%.c
+ $(LINT.c) -c $<
+
lint: $(LINTFILES)
$(LINT) $(LINTFLAGS) $(LINTFILES)
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_ks.h b/usr/src/cmd/mdb/common/mdb/mdb_ks.h
index f543a0cee3..bb3038dcd8 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_ks.h
+++ b/usr/src/cmd/mdb/common/mdb/mdb_ks.h
@@ -129,6 +129,14 @@ extern uintptr_t mdb_qwnext(const queue_t *);
extern uintptr_t mdb_qrnext_default(const queue_t *);
extern uintptr_t mdb_qwnext_default(const queue_t *);
+extern int mdb_mblk_count(const mblk_t *);
+
+/* DLPI primitive to string; returns NULL for unknown primitives */
+extern const char *mdb_dlpi_prim(int);
+
+/* Generic function for working with MAC (network layer 2) addresses. */
+extern void mdb_mac_addr(const uint8_t *, size_t, char *, size_t);
+
/*
* Target-specific interfaces
*
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_modapi.h b/usr/src/cmd/mdb/common/mdb/mdb_modapi.h
index c6573f2afb..470196de93 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_modapi.h
+++ b/usr/src/cmd/mdb/common/mdb/mdb_modapi.h
@@ -235,6 +235,8 @@ extern void mdb_printf(const char *, ...);
extern void mdb_warn(const char *, ...);
extern void mdb_flush(void);
+extern int mdb_ffs(uintmax_t);
+
extern void mdb_nhconvert(void *, const void *, size_t);
#define MDB_DUMP_RELATIVE 0x0001 /* Start numbering at 0 */
diff --git a/usr/src/cmd/mdb/common/modules/arp/arp.c b/usr/src/cmd/mdb/common/modules/arp/arp.c
new file mode 100644
index 0000000000..06bd8887e0
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/arp/arp.c
@@ -0,0 +1,700 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/dlpi.h>
+#include <inet/led.h>
+#include <inet/common.h>
+#include <inet/mi.h>
+#include <inet/arp.h>
+#include <inet/arp_impl.h>
+#include <inet/ip.h>
+#include <netinet/arp.h>
+
+#include <mdb/mdb_modapi.h>
+#include <mdb/mdb_ks.h>
+
+typedef struct {
+ uint32_t act_cmd;
+ char *act_name;
+ char *act_type;
+} arp_cmd_tbl;
+
+/*
+ * Table of ARP commands and structure types used for messages between ARP and
+ * IP.
+ */
+static const arp_cmd_tbl act_list[] = {
+ { AR_ENTRY_ADD, "AR_ENTRY_ADD", "arp`area_t" },
+ { AR_ENTRY_DELETE, "AR_ENTRY_DELETE", "arp`ared_t" },
+ { AR_ENTRY_QUERY, "AR_ENTRY_QUERY", "arp`areq_t" },
+ { AR_ENTRY_SQUERY, "AR_ENTRY_SQUERY", "arp`area_t" },
+ { AR_MAPPING_ADD, "AR_MAPPING_ADD", "arp`arma_t" },
+ { AR_CLIENT_NOTIFY, "AR_CLIENT_NOTIFY", "arp`arcn_t" },
+ { AR_INTERFACE_UP, "AR_INTERFACE_UP", "arp`arc_t" },
+ { AR_INTERFACE_DOWN, "AR_INTERFACE_DOWN", "arp`arc_t" },
+ { AR_INTERFACE_ON, "AR_INTERFACE_ON", "arp`arc_t" },
+ { AR_INTERFACE_OFF, "AR_INTERFACE_OFF", "arp`arc_t" },
+ { AR_DLPIOP_DONE, "AR_DLPIOP_DONE", "arp`arc_t" },
+ { AR_ARP_CLOSING, "AR_ARP_CLOSING", "arp`arc_t" },
+ { AR_ARP_EXTEND, "AR_ARP_EXTEND", "arp`arc_t" },
+ { 0, "unknown command", "arp`arc_t" }
+};
+
+/*
+ * State information kept during walk over ACE hash table and unhashed mask
+ * list.
+ */
+typedef struct ace_walk_data {
+ ace_t *awd_hash_tbl[ARP_HASH_SIZE];
+ ace_t *awd_masks;
+ int awd_idx;
+} ace_walk_data_t;
+
+static int
+arl_walk_init(mdb_walk_state_t *wsp)
+{
+ if (wsp->walk_addr == NULL &&
+ mdb_readvar(&wsp->walk_addr, "arl_g_head") == -1) {
+ mdb_warn("failed to read 'arl_g_head'");
+ return (WALK_ERR);
+ }
+ return (WALK_NEXT);
+}
+
+static int
+arl_walk_step(mdb_walk_state_t *wsp)
+{
+ uintptr_t addr = wsp->walk_addr;
+ arl_t arl;
+
+ if (wsp->walk_addr == NULL)
+ return (WALK_DONE);
+
+ if (mdb_vread(&arl, sizeof (arl), addr) == -1) {
+ mdb_warn("failed to read arl_t at %p", addr);
+ return (WALK_ERR);
+ }
+
+ wsp->walk_addr = (uintptr_t)arl.arl_next;
+
+ return ((*wsp->walk_callback)(addr, &arl, wsp->walk_cbdata));
+}
+
+static int
+ace_walk_init(mdb_walk_state_t *wsp)
+{
+ ace_walk_data_t *aw;
+
+ if (wsp->walk_addr != NULL) {
+ mdb_warn("ace supports only global walks\n");
+ return (WALK_ERR);
+ }
+
+ aw = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP);
+
+ if (mdb_readsym(aw->awd_hash_tbl, sizeof (aw->awd_hash_tbl),
+ "ar_ce_hash_tbl") == -1) {
+ mdb_warn("failed to read 'ar_ce_hash_tbl'");
+ mdb_free(aw, sizeof (ace_walk_data_t));
+ return (WALK_ERR);
+ }
+
+ if (mdb_readvar(&aw->awd_masks, "ar_ce_mask_entries") == -1) {
+ mdb_warn("failed to read 'ar_ce_mask_entries'");
+ mdb_free(aw, sizeof (ace_walk_data_t));
+ return (WALK_ERR);
+ }
+
+ /* The step routine will start off by incrementing to index 0 */
+ aw->awd_idx = -1;
+ wsp->walk_addr = 0;
+ wsp->walk_data = aw;
+
+ return (WALK_NEXT);
+}
+
+static int
+ace_walk_step(mdb_walk_state_t *wsp)
+{
+ uintptr_t addr;
+ ace_walk_data_t *aw = wsp->walk_data;
+ ace_t ace;
+
+ /*
+ * If we're at the end of the previous list, then find the start of the
+ * next list to process.
+ */
+ while (wsp->walk_addr == NULL) {
+ if (aw->awd_idx == ARP_HASH_SIZE)
+ return (WALK_DONE);
+ if (++aw->awd_idx == ARP_HASH_SIZE) {
+ wsp->walk_addr = (uintptr_t)aw->awd_masks;
+ } else {
+ wsp->walk_addr =
+ (uintptr_t)aw->awd_hash_tbl[aw->awd_idx];
+ }
+ }
+
+ addr = wsp->walk_addr;
+ if (mdb_vread(&ace, sizeof (ace), addr) == -1) {
+ mdb_warn("failed to read ace_t at %p", addr);
+ return (WALK_ERR);
+ }
+
+ wsp->walk_addr = (uintptr_t)ace.ace_next;
+
+ return (wsp->walk_callback(addr, &ace, wsp->walk_cbdata));
+}
+
+static void
+ace_walk_fini(mdb_walk_state_t *wsp)
+{
+ mdb_free(wsp->walk_data, sizeof (ace_walk_data_t));
+}
+
+/* Common routine to produce an 'ar' text description */
+static void
+ar_describe(const ar_t *ar, char *buf, size_t nbytes, boolean_t addmac)
+{
+ if (ar->ar_arl == NULL) {
+ queue_t wq, ipq;
+ ill_t ill;
+ char name[LIFNAMSIZ];
+ GElf_Sym sym;
+ boolean_t nextip;
+
+ if (mdb_vread(&wq, sizeof (wq), (uintptr_t)ar->ar_wq) == -1 ||
+ mdb_vread(&ipq, sizeof (ipq), (uintptr_t)wq.q_next) == -1)
+ return;
+
+ nextip =
+ (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0 &&
+ (uintptr_t)sym.st_value == (uintptr_t)ipq.q_qinfo);
+
+ if (!ar->ar_on_ill_stream) {
+ (void) strcpy(buf, nextip ? "Client" : "Unknown");
+ return;
+ }
+
+ if (!nextip ||
+ mdb_vread(&ill, sizeof (ill), (uintptr_t)ipq.q_ptr) == -1 ||
+ mdb_readstr(name, sizeof (name),
+ (uintptr_t)ill.ill_name) == -1) {
+ return;
+ }
+ (void) mdb_snprintf(buf, nbytes, "IP %s", name);
+ } else {
+ arl_t arl;
+ ssize_t retv;
+ uint32_t alen;
+ uchar_t macaddr[ARP_MAX_ADDR_LEN];
+
+ if (mdb_vread(&arl, sizeof (arl), (uintptr_t)ar->ar_arl) == -1)
+ return;
+ retv = mdb_snprintf(buf, nbytes, "ARP %s ", arl.arl_name);
+ if (retv >= nbytes || !addmac)
+ return;
+ alen = arl.arl_hw_addr_length;
+ if (arl.arl_hw_addr == NULL || alen == 0 ||
+ alen > sizeof (macaddr))
+ return;
+ if (mdb_vread(macaddr, alen, (uintptr_t)arl.arl_hw_addr) == -1)
+ return;
+ mdb_mac_addr(macaddr, alen, buf + retv, nbytes - retv);
+ }
+}
+
+/* ARGSUSED2 */
+static int
+ar_cb(uintptr_t addr, const void *arptr, void *dummy)
+{
+ const ar_t *ar = arptr;
+ char ardesc[sizeof ("ARP ") + LIFNAMSIZ];
+
+ ar_describe(ar, ardesc, sizeof (ardesc), B_FALSE);
+ mdb_printf("%?p %?p %?p %s\n", addr, ar->ar_wq, ar->ar_arl, ardesc);
+ return (WALK_NEXT);
+}
+
+/*
+ * Print out ARP client structures.
+ */
+/* ARGSUSED2 */
+static int
+ar_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ ar_t ar;
+
+ if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
+ mdb_printf("%<u>%?s %?s %?s %s%</u>\n",
+ "AR", "WQ", "ARL", "TYPE");
+ }
+
+ if (flags & DCMD_ADDRSPEC) {
+ if (mdb_vread(&ar, sizeof (ar), addr) == -1) {
+ mdb_warn("failed to read ar_t at %p", addr);
+ return (DCMD_ERR);
+ }
+ (void) ar_cb(addr, &ar, NULL);
+ } else {
+ if (mdb_walk("ar", ar_cb, NULL) == -1) {
+ mdb_warn("cannot walk ar_t structures");
+ return (DCMD_ERR);
+ }
+ }
+ return (DCMD_OK);
+}
+
+/* ARGSUSED2 */
+static int
+arl_cb(uintptr_t addr, const void *arlptr, void *dummy)
+{
+ const arl_t *arl = arlptr;
+ uchar_t macaddr[ARP_MAX_ADDR_LEN];
+ char macstr[ARP_MAX_ADDR_LEN*3];
+ char flags[4];
+ const char *primstr;
+
+ mdb_printf("%?p ", addr);
+ if (arl->arl_dlpi_pending == DL_PRIM_INVAL)
+ mdb_printf("%16s", "--");
+ else if ((primstr = mdb_dlpi_prim(arl->arl_dlpi_pending)) != NULL)
+ mdb_printf("%16s", primstr);
+ else
+ mdb_printf("%16x", arl->arl_dlpi_pending);
+ if (arl->arl_hw_addr_length == 0 ||
+ arl->arl_hw_addr_length > sizeof (macaddr)) {
+ (void) strcpy(macstr, "--");
+ } else if (mdb_vread(macaddr, arl->arl_hw_addr_length,
+ (uintptr_t)arl->arl_hw_addr) == -1) {
+ (void) strcpy(macstr, "?");
+ } else {
+ mdb_mac_addr(macaddr, arl->arl_hw_addr_length, macstr,
+ sizeof (macstr));
+ }
+
+ /* Print both the link-layer state and the NOARP flag */
+ flags[0] = '\0';
+ if (arl->arl_flags & ARL_F_NOARP)
+ (void) strcat(flags, "N");
+ switch (arl->arl_state) {
+ case ARL_S_DOWN:
+ (void) strcat(flags, "d");
+ break;
+ case ARL_S_PENDING:
+ (void) strcat(flags, "P");
+ break;
+ case ARL_S_UP:
+ (void) strcat(flags, "U");
+ break;
+ default:
+ (void) strcat(flags, "?");
+ break;
+ }
+ mdb_printf(" %8d %-3s %-9s %s\n",
+ mdb_mblk_count(arl->arl_dlpi_deferred), flags, arl->arl_name,
+ macstr);
+ return (WALK_NEXT);
+}
+
+/*
+ * Print out ARP link-layer elements.
+ */
+/* ARGSUSED2 */
+static int
+arl_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ arl_t arl;
+
+ if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
+ mdb_printf("%<u>%?s %16s %8s %3s %9s %s%</u>\n",
+ "ARL", "DLPI REQ", "DLPI CNT", "FLG", "INTERFACE",
+ "HW ADDR");
+ }
+
+ if (flags & DCMD_ADDRSPEC) {
+ if (mdb_vread(&arl, sizeof (arl), addr) == -1) {
+ mdb_warn("failed to read arl_t at %p", addr);
+ return (DCMD_ERR);
+ }
+ (void) arl_cb(addr, &arl, NULL);
+ } else {
+ if (mdb_walk("arl", arl_cb, NULL) == -1) {
+ mdb_warn("cannot walk arl_t structures");
+ return (DCMD_ERR);
+ }
+ }
+ return (DCMD_OK);
+}
+
+/* ARGSUSED2 */
+static int
+ace_cb(uintptr_t addr, const void *aceptr, void *dummy)
+{
+ const ace_t *ace = aceptr;
+ uchar_t macaddr[ARP_MAX_ADDR_LEN];
+ char macstr[ARP_MAX_ADDR_LEN*3];
+ /* The %b format isn't compact enough for long listings */
+ static const char ace_flags[] = "SPDRMLdA ofya";
+ const char *cp;
+ char flags[sizeof (ace_flags)], *fp;
+ int flg;
+ in_addr_t inaddr, mask;
+ char addrstr[sizeof ("255.255.255.255/32")];
+
+ /* Walk the list of flags and produce a string */
+ cp = ace_flags;
+ fp = flags;
+ for (flg = 1; *cp != '\0'; flg <<= 1, cp++) {
+ if ((flg & ace->ace_flags) && *cp != ' ')
+ *fp++ = *cp;
+ }
+ *fp = '\0';
+
+ /* If it's not resolved, then it has no hardware address */
+ if (!(ace->ace_flags & ACE_F_RESOLVED) ||
+ ace->ace_hw_addr_length == 0 ||
+ ace->ace_hw_addr_length > sizeof (macaddr)) {
+ (void) strcpy(macstr, "--");
+ } else if (mdb_vread(macaddr, ace->ace_hw_addr_length,
+ (uintptr_t)ace->ace_hw_addr) == -1) {
+ (void) strcpy(macstr, "?");
+ } else {
+ mdb_mac_addr(macaddr, ace->ace_hw_addr_length, macstr,
+ sizeof (macstr));
+ }
+
+ /*
+ * Nothing other than IP uses ARP these days, so we don't try very hard
+ * here to switch out on ARP protocol type. (Note that ARP protocol
+ * types are roughly Ethertypes, but are allocated separately at IANA.)
+ */
+ if (ace->ace_proto != IP_ARP_PROTO_TYPE) {
+ (void) mdb_snprintf(addrstr, sizeof (addrstr),
+ "Unknown proto %x", ace->ace_proto);
+ } else if (mdb_vread(&inaddr, sizeof (inaddr),
+ (uintptr_t)ace->ace_proto_addr) != -1 &&
+ mdb_vread(&mask, sizeof (mask), (uintptr_t)ace->ace_proto_mask) !=
+ -1) {
+ /*
+ * If it's the standard host mask, then print it normally.
+ * Otherwise, use "/n" notation.
+ */
+ if (mask == (in_addr_t)~0) {
+ (void) mdb_snprintf(addrstr, sizeof (addrstr), "%I",
+ inaddr);
+ } else {
+ (void) mdb_snprintf(addrstr, sizeof (addrstr), "%I/%d",
+ inaddr, mask == 0 ? 0 : 33 - mdb_ffs(mask));
+ }
+ } else {
+ (void) strcpy(addrstr, "?");
+ }
+ mdb_printf("%?p %-18s %-8s %s\n", addr, addrstr, flags, macstr);
+ return (WALK_NEXT);
+}
+
+/*
+ * Print out ARP cache entry (ace_t) elements.
+ */
+/* ARGSUSED2 */
+static int
+ace_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ ace_t ace;
+
+ if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
+ mdb_printf("%<u>%?s %-18s %-8s %s%</u>\n",
+ "ACE", "PROTOADDR", "FLAGS", "HWADDR");
+ }
+
+ if (flags & DCMD_ADDRSPEC) {
+ if (mdb_vread(&ace, sizeof (ace), addr) == -1) {
+ mdb_warn("failed to read ace_t at %p", addr);
+ return (DCMD_ERR);
+ }
+ (void) ace_cb(addr, &ace, NULL);
+ } else {
+ if (mdb_walk("ace", ace_cb, NULL) == -1) {
+ mdb_warn("cannot walk ace_t structures");
+ return (DCMD_ERR);
+ }
+ }
+ return (DCMD_OK);
+}
+
+/*
+ * Print an ARP hardware and protocol address pair; used when printing an ARP
+ * message.
+ */
+static void
+print_arp(char field_id, const uchar_t *buf, const arh_t *arh, uint16_t ptype)
+{
+ char macstr[ARP_MAX_ADDR_LEN*3];
+ in_addr_t inaddr;
+
+ if (arh->arh_hlen == 0)
+ (void) strcpy(macstr, "(none)");
+ else
+ mdb_mac_addr(buf, arh->arh_hlen, macstr, sizeof (macstr));
+ mdb_printf("%?s ar$%cha %s\n", "", field_id, macstr);
+ if (arh->arh_plen == 0) {
+ mdb_printf("%?s ar$%cpa (none)\n", "", field_id);
+ } else if (ptype == IP_ARP_PROTO_TYPE) {
+ mdb_printf("%?s ar$%cpa (unknown)\n", "", field_id);
+ } else if (arh->arh_plen == sizeof (in_addr_t)) {
+ (void) memcpy(&inaddr, buf + arh->arh_hlen, sizeof (inaddr));
+ mdb_printf("%?s ar$%cpa %I\n", "", field_id, inaddr);
+ } else {
+ mdb_printf("%?s ar$%cpa (malformed IP)\n", "", field_id);
+ }
+}
+
+/*
+ * Decode an ARP message and display it.
+ */
+/* ARGSUSED2 */
+static int
+arphdr_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ struct {
+ arh_t arh;
+ uchar_t addrs[4 * ARP_MAX_ADDR_LEN];
+ } arp;
+ size_t blen;
+ uint16_t htype, ptype, op;
+ const char *cp;
+
+ if (!(flags & DCMD_ADDRSPEC)) {
+ mdb_warn("address required to print ARP header\n");
+ return (DCMD_ERR);
+ }
+
+ if (mdb_vread(&arp.arh, sizeof (arp.arh), addr) == -1) {
+ mdb_warn("unable to read ARP header at %p", addr);
+ return (DCMD_ERR);
+ }
+ mdb_nhconvert(&htype, arp.arh.arh_hardware, sizeof (htype));
+ mdb_nhconvert(&ptype, arp.arh.arh_proto, sizeof (ptype));
+ mdb_nhconvert(&op, arp.arh.arh_operation, sizeof (op));
+
+ switch (htype) {
+ case ARPHRD_ETHER:
+ cp = "Ether";
+ break;
+ case ARPHRD_IEEE802:
+ cp = "IEEE802";
+ break;
+ case ARPHRD_IB:
+ cp = "InfiniBand";
+ break;
+ default:
+ cp = "Unknown";
+ break;
+ }
+ mdb_printf("%?p: ar$hrd %x (%s)\n", addr, htype, cp);
+ mdb_printf("%?s ar$pro %x (%s)\n", "", ptype,
+ ptype == IP_ARP_PROTO_TYPE ? "IP" : "Unknown");
+
+ switch (op) {
+ case ARPOP_REQUEST:
+ cp = "ares_op$REQUEST";
+ break;
+ case ARPOP_REPLY:
+ cp = "ares_op$REPLY";
+ break;
+ case REVARP_REQUEST:
+ cp = "arev_op$REQUEST";
+ break;
+ case REVARP_REPLY:
+ cp = "arev_op$REPLY";
+ break;
+ default:
+ cp = "Unknown";
+ break;
+ }
+ mdb_printf("%?s ar$op %d (%s)\n", "", op, cp);
+
+ /*
+ * Note that we go to some length to attempt to print out the fixed
+ * header data before trying to decode the variable-length data. This
+ * is done to maximize the amount of useful information shown when the
+ * buffer is truncated or otherwise corrupt.
+ */
+ blen = 2 * (arp.arh.arh_hlen + arp.arh.arh_plen);
+ if (mdb_vread(&arp.addrs, blen, addr + sizeof (arp.arh)) == -1) {
+ mdb_warn("unable to read ARP body at %p", addr);
+ return (DCMD_ERR);
+ }
+
+ print_arp('s', arp.addrs, &arp.arh, ptype);
+ print_arp('t', arp.addrs + arp.arh.arh_hlen + arp.arh.arh_plen,
+ &arp.arh, ptype);
+ return (DCMD_OK);
+}
+
+/*
+ * Print out an arp command formatted in a reasonable manner. This implements
+ * the type switch used by ARP.
+ *
+ * It could also dump the data that follows the header (using offset and length
+ * in the various structures), but it currently does not.
+ */
+/* ARGSUSED2 */
+static int
+arpcmd_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ arc_t arc;
+ const arp_cmd_tbl *tp;
+ mdb_arg_t subargv;
+
+ if (!(flags & DCMD_ADDRSPEC)) {
+ mdb_warn("address required to print ARP command\n");
+ return (DCMD_ERR);
+ }
+ if (mdb_vread(&arc, sizeof (arc), addr) == -1) {
+ mdb_warn("unable to read arc_t at %p", addr);
+ return (DCMD_ERR);
+ }
+ for (tp = act_list; tp->act_cmd != 0; tp++)
+ if (tp->act_cmd == arc.arc_cmd)
+ break;
+ mdb_printf("%p %s (%s) = ", addr, tp->act_name, tp->act_type);
+ subargv.a_type = MDB_TYPE_STRING;
+ subargv.a_un.a_str = tp->act_type;
+ if (mdb_call_dcmd("print", addr, DCMD_ADDRSPEC, 1, &subargv) == -1)
+ return (DCMD_ERR);
+ else
+ return (DCMD_OK);
+}
+
+static size_t
+mi_osize(const queue_t *q)
+{
+ /*
+ * The code in common/inet/mi.c allocates an extra word to store the
+ * size of the allocation. An mi_o_s is thus a size_t plus an mi_o_s.
+ */
+ struct mi_block {
+ size_t mi_nbytes;
+ struct mi_o_s mi_o;
+ } m;
+
+ if (mdb_vread(&m, sizeof (m), (uintptr_t)q->q_ptr - sizeof (m)) != -1)
+ return (m.mi_nbytes - sizeof (m));
+
+ return (0);
+}
+
+/*
+ * This is called when ::stream is used and an ARP module is seen on the
+ * stream. Determine what sort of ARP usage is involved and show an
+ * appropriate message.
+ */
+static void
+arp_qinfo(const queue_t *qp, char *buf, size_t nbytes)
+{
+ size_t size = mi_osize(qp);
+ ar_t ar;
+
+ if (size != sizeof (ar_t))
+ return;
+ if (mdb_vread(&ar, sizeof (ar), (uintptr_t)qp->q_ptr) == -1)
+ return;
+ ar_describe(&ar, buf, nbytes, B_TRUE);
+}
+
+static uintptr_t
+arp_rnext(const queue_t *q)
+{
+ size_t size = mi_osize(q);
+ ar_t ar;
+
+ if (size == sizeof (ar_t) && mdb_vread(&ar, sizeof (ar),
+ (uintptr_t)q->q_ptr) != -1)
+ return ((uintptr_t)ar.ar_rq);
+
+ return (NULL);
+}
+
+static uintptr_t
+arp_wnext(const queue_t *q)
+{
+ size_t size = mi_osize(q);
+ ar_t ar;
+
+ if (size == sizeof (ar_t) && mdb_vread(&ar, sizeof (ar),
+ (uintptr_t)q->q_ptr) != -1)
+ return ((uintptr_t)ar.ar_wq);
+
+ return (NULL);
+}
+
+static const mdb_dcmd_t dcmds[] = {
+ { "ar", "?", "display ARP client streams", ar_cmd, NULL },
+ { "arl", "?", "display ARP link layers", arl_cmd, NULL },
+ { "ace", "?", "display ARP cache entries", ace_cmd, NULL },
+ { "arphdr", ":", "display an ARP header", arphdr_cmd, NULL },
+ { "arpcmd", ":", "display an ARP command", arpcmd_cmd, NULL },
+ { NULL }
+};
+
+/* Note: ar_t walker is in genunix.c and net.c; generic MI walker */
+static const mdb_walker_t walkers[] = {
+ { "arl", "walk list of arl_t links",
+ arl_walk_init, arl_walk_step, NULL },
+ { "ace", "walk list of ace_t entries",
+ ace_walk_init, ace_walk_step, ace_walk_fini },
+ { NULL }
+};
+
+static const mdb_qops_t arp_qops = { arp_qinfo, arp_rnext, arp_wnext };
+static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
+
+const mdb_modinfo_t *
+_mdb_init(void)
+{
+ GElf_Sym sym;
+
+ if (mdb_lookup_by_obj("arp", "winit", &sym) == 0)
+ mdb_qops_install(&arp_qops, (uintptr_t)sym.st_value);
+
+ return (&modinfo);
+}
+
+void
+_mdb_fini(void)
+{
+ GElf_Sym sym;
+
+ if (mdb_lookup_by_obj("arp", "winit", &sym) == 0)
+ mdb_qops_remove(&arp_qops, (uintptr_t)sym.st_value);
+}
diff --git a/usr/src/cmd/mdb/common/modules/ip/ip.c b/usr/src/cmd/mdb/common/modules/ip/ip.c
index 451a78aa24..8320fee2a5 100644
--- a/usr/src/cmd/mdb/common/modules/ip/ip.c
+++ b/usr/src/cmd/mdb/common/modules/ip/ip.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -869,7 +868,7 @@ _mdb_init(void)
{
GElf_Sym sym;
- if (mdb_lookup_by_obj("ip", "winit", &sym) == 0)
+ if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0)
mdb_qops_install(&ip_qops, (uintptr_t)sym.st_value);
return (&modinfo);
@@ -880,6 +879,6 @@ _mdb_fini(void)
{
GElf_Sym sym;
- if (mdb_lookup_by_obj("ip", "winit", &sym) == 0)
+ if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0)
mdb_qops_remove(&ip_qops, (uintptr_t)sym.st_value);
}
diff --git a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c
index e5498db180..869c58c6e0 100644
--- a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c
+++ b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c
@@ -53,6 +53,7 @@
#include <sys/ddi_impldefs.h>
#include <sys/refstr_impl.h>
#include <sys/cpuvar.h>
+#include <sys/dlpi.h>
#include <errno.h>
#include <vm/seg_vn.h>
@@ -1484,3 +1485,105 @@ mdb_read_refstr(uintptr_t refstr_addr, char *str, size_t nbytes)
return (mdb_readstr(str, nbytes, (uintptr_t)r->rs_string));
}
+
+/*
+ * Chase an mblk list by b_next and return the length.
+ */
+int
+mdb_mblk_count(const mblk_t *mb)
+{
+ int count;
+ mblk_t mblk;
+
+ if (mb == NULL)
+ return (0);
+
+ count = 1;
+ while (mb->b_next != NULL) {
+ count++;
+ if (mdb_vread(&mblk, sizeof (mblk), (uintptr_t)mb->b_next) ==
+ -1)
+ break;
+ mb = &mblk;
+ }
+ return (count);
+}
+
+/*
+ * Write the given MAC address as a printable string in the usual colon-
+ * separated format. Assumes that buflen is at least 2.
+ */
+void
+mdb_mac_addr(const uint8_t *addr, size_t alen, char *buf, size_t buflen)
+{
+ int slen;
+
+ if (alen == 0 || buflen < 4) {
+ (void) strcpy(buf, "?");
+ return;
+ }
+ for (;;) {
+ /*
+ * If there are more MAC address bytes available, but we won't
+ * have any room to print them, then add "..." to the string
+ * instead. See below for the 'magic number' explanation.
+ */
+ if ((alen == 2 && buflen < 6) || (alen > 2 && buflen < 7)) {
+ (void) strcpy(buf, "...");
+ break;
+ }
+ slen = mdb_snprintf(buf, buflen, "%02x", *addr++);
+ buf += slen;
+ if (--alen == 0)
+ break;
+ *buf++ = ':';
+ buflen -= slen + 1;
+ /*
+ * At this point, based on the first 'if' statement above,
+ * either alen == 1 and buflen >= 3, or alen > 1 and
+ * buflen >= 4. The first case leaves room for the final "xx"
+ * number and trailing NUL byte. The second leaves room for at
+ * least "...". Thus the apparently 'magic' numbers chosen for
+ * that statement.
+ */
+ }
+}
+
+/*
+ * Produce a string that represents a DLPI primitive, or NULL if no such string
+ * is possible.
+ */
+const char *
+mdb_dlpi_prim(int prim)
+{
+ switch (prim) {
+ case DL_INFO_REQ: return ("DL_INFO_REQ");
+ case DL_INFO_ACK: return ("DL_INFO_ACK");
+ case DL_ATTACH_REQ: return ("DL_ATTACH_REQ");
+ case DL_DETACH_REQ: return ("DL_DETACH_REQ");
+ case DL_BIND_REQ: return ("DL_BIND_REQ");
+ case DL_BIND_ACK: return ("DL_BIND_ACK");
+ case DL_UNBIND_REQ: return ("DL_UNBIND_REQ");
+ case DL_OK_ACK: return ("DL_OK_ACK");
+ case DL_ERROR_ACK: return ("DL_ERROR_ACK");
+ case DL_ENABMULTI_REQ: return ("DL_ENABMULTI_REQ");
+ case DL_DISABMULTI_REQ: return ("DL_DISABMULTI_REQ");
+ case DL_PROMISCON_REQ: return ("DL_PROMISCON_REQ");
+ case DL_PROMISCOFF_REQ: return ("DL_PROMISCOFF_REQ");
+ case DL_UNITDATA_REQ: return ("DL_UNITDATA_REQ");
+ case DL_UNITDATA_IND: return ("DL_UNITDATA_IND");
+ case DL_UDERROR_IND: return ("DL_UDERROR_IND");
+ case DL_PHYS_ADDR_REQ: return ("DL_PHYS_ADDR_REQ");
+ case DL_PHYS_ADDR_ACK: return ("DL_PHYS_ADDR_ACK");
+ case DL_SET_PHYS_ADDR_REQ: return ("DL_SET_PHYS_ADDR_REQ");
+ case DL_NOTIFY_REQ: return ("DL_NOTIFY_REQ");
+ case DL_NOTIFY_ACK: return ("DL_NOTIFY_ACK");
+ case DL_NOTIFY_IND: return ("DL_NOTIFY_IND");
+ case DL_CAPABILITY_REQ: return ("DL_CAPABILITY_REQ");
+ case DL_CAPABILITY_ACK: return ("DL_CAPABILITY_ACK");
+ case DL_CONTROL_REQ: return ("DL_CONTROL_REQ");
+ case DL_CONTROL_ACK: return ("DL_CONTROL_ACK");
+ case DL_PASSIVE_REQ: return ("DL_PASSIVE_REQ");
+ default: return (NULL);
+ }
+}
diff --git a/usr/src/cmd/mdb/intel/amd64/arp/Makefile b/usr/src/cmd/mdb/intel/amd64/arp/Makefile
new file mode 100644
index 0000000000..82804206f8
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/amd64/arp/Makefile
@@ -0,0 +1,35 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+MODULE = arp.so
+MDBTGT = kvm
+
+MODSRCS = arp.c
+
+include ../../../../Makefile.cmd
+include ../../../../Makefile.cmd.64
+include ../../Makefile.amd64
+include ../../../Makefile.module
diff --git a/usr/src/cmd/mdb/intel/ia32/arp/Makefile b/usr/src/cmd/mdb/intel/ia32/arp/Makefile
new file mode 100644
index 0000000000..b64c3de8ea
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/ia32/arp/Makefile
@@ -0,0 +1,34 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+MODULE = arp.so
+MDBTGT = kvm
+
+MODSRCS = arp.c
+
+include ../../../../Makefile.cmd
+include ../../Makefile.ia32
+include ../../../Makefile.module
diff --git a/usr/src/cmd/mdb/sparc/v9/arp/Makefile b/usr/src/cmd/mdb/sparc/v9/arp/Makefile
new file mode 100644
index 0000000000..0e0b290b84
--- /dev/null
+++ b/usr/src/cmd/mdb/sparc/v9/arp/Makefile
@@ -0,0 +1,35 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+MODULE = arp.so
+MDBTGT = kvm
+
+MODSRCS = arp.c
+
+include ../../../../Makefile.cmd
+include ../../../../Makefile.cmd.64
+include ../../Makefile.sparcv9
+include ../../../Makefile.module
diff --git a/usr/src/lib/libc/port/gen/ffs.c b/usr/src/common/util/ffs.c
index 19da6ceb22..b37c661b82 100644
--- a/usr/src/lib/libc/port/gen/ffs.c
+++ b/usr/src/common/util/ffs.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,34 +18,39 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
-/* Copyright (c) 1988 AT&T */
-/* All Rights Reserved */
-
+/*
+ * Common implementation of ffs for kernel, mdb, and libc. Note that mdb
+ * renames ffs into mdb_ffs to avoid user-space clashes with the signature of
+ * ffs(3C).
+ */
+#if defined(_KERNEL) || defined(ffs)
+#include <sys/int_types.h>
+#define arg_t uintmax_t
+#else
#pragma weak ffs = _ffs
-
+#define arg_t int
#include "synonyms.h"
-#include <sys/types.h>
-#include <string.h>
+#endif
int
-ffs(int field)
+ffs(arg_t bits)
{
- int idx = 1;
+ int i;
- if (field == 0)
+ if (bits == 0)
return (0);
- for (;;) {
- if (field & 1)
- return (idx);
- field >>= 1;
- ++idx;
+ for (i = 1; ; i++, bits >>= 1) {
+ if (bits & 1)
+ break;
}
+ return (i);
}
diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile
index 3e758a94b2..4293c30a14 100644
--- a/usr/src/lib/libc/amd64/Makefile
+++ b/usr/src/lib/libc/amd64/Makefile
@@ -90,6 +90,7 @@ COMOBJS= \
bcopy.o \
bsearch.o \
bzero.o \
+ ffs.o \
qsort.o \
strtol.o \
strtoul.o
@@ -388,7 +389,6 @@ PORTGEN= \
fattach.o \
fdetach.o \
fdopendir.o \
- ffs.o \
fmtmsg.o \
ftime.o \
ftok.o \
diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com
index 9560631ae8..d90e6d05f8 100644
--- a/usr/src/lib/libc/i386/Makefile.com
+++ b/usr/src/lib/libc/i386/Makefile.com
@@ -88,6 +88,7 @@ COMOBJS= \
bcopy.o \
bsearch.o \
bzero.o \
+ ffs.o \
qsort.o \
strtol.o \
strtoul.o
@@ -417,7 +418,6 @@ PORTGEN= \
fattach.o \
fdetach.o \
fdopendir.o \
- ffs.o \
fmtmsg.o \
ftime.o \
ftok.o \
diff --git a/usr/src/lib/libc/sparc/Makefile b/usr/src/lib/libc/sparc/Makefile
index e7f51d287d..2d23d8087c 100644
--- a/usr/src/lib/libc/sparc/Makefile
+++ b/usr/src/lib/libc/sparc/Makefile
@@ -107,6 +107,7 @@ COMOBJS= \
bcopy.o \
bzero.o \
bsearch.o \
+ ffs.o \
memccpy.o \
qsort.o \
strtol.o \
@@ -440,7 +441,6 @@ PORTGEN= \
fattach.o \
fdetach.o \
fdopendir.o \
- ffs.o \
fmtmsg.o \
ftime.o \
ftok.o \
diff --git a/usr/src/lib/libc/sparcv9/Makefile b/usr/src/lib/libc/sparcv9/Makefile
index 81de2b2bb9..df5eb2f5e3 100644
--- a/usr/src/lib/libc/sparcv9/Makefile
+++ b/usr/src/lib/libc/sparcv9/Makefile
@@ -112,6 +112,7 @@ COMOBJS= \
bcopy.o \
bsearch.o \
bzero.o \
+ ffs.o \
memccpy.o \
qsort.o \
strtol.o \
@@ -405,7 +406,6 @@ PORTGEN= \
fattach.o \
fdetach.o \
fdopendir.o \
- ffs.o \
fmtmsg.o \
ftime.o \
ftok.o \
diff --git a/usr/src/lib/libdhcpagent/common/dhcpagent_ipc.h b/usr/src/lib/libdhcpagent/common/dhcpagent_ipc.h
index 64df2431de..b509917dff 100644
--- a/usr/src/lib/libdhcpagent/common/dhcpagent_ipc.h
+++ b/usr/src/lib/libdhcpagent/common/dhcpagent_ipc.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -170,6 +169,7 @@ typedef enum {
INIT, /* nothing done yet */
SELECTING, /* sent DISCOVER, waiting for OFFERs */
REQUESTING, /* sent REQUEST, waiting for ACK/NAK */
+ PRE_BOUND, /* have ACK, setting up interface */
BOUND, /* have a valid lease */
RENEWING, /* have lease, but trying to renew */
REBINDING, /* have lease, but trying to rebind */
diff --git a/usr/src/lib/libdhcpagent/common/dhcpagent_util.c b/usr/src/lib/libdhcpagent/common/dhcpagent_util.c
index 18189a14cf..eab4a98edb 100644
--- a/usr/src/lib/libdhcpagent/common/dhcpagent_util.c
+++ b/usr/src/lib/libdhcpagent/common/dhcpagent_util.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -61,6 +60,7 @@ dhcp_state_to_string(DHCPSTATE state)
"INIT",
"SELECTING",
"REQUESTING",
+ "PRE_BOUND",
"BOUND",
"RENEWING",
"REBINDING",
diff --git a/usr/src/lib/libinetcfg/Makefile.com b/usr/src/lib/libinetcfg/Makefile.com
index 1eb8a4aee4..d0d667091a 100644
--- a/usr/src/lib/libinetcfg/Makefile.com
+++ b/usr/src/lib/libinetcfg/Makefile.com
@@ -18,7 +18,6 @@
#
# CDDL HEADER END
#
-#
# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -28,8 +27,7 @@
LIBRARY = libinetcfg.a
VERS = .1
OBJECTS = inetcfg.o \
- inetcfg_nic.o \
- inetcfg_dad.o
+ inetcfg_nic.o
include ../../Makefile.lib
@@ -45,11 +43,6 @@ $(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
CFLAGS += $(CCVERBOSE)
CPPFLAGS += -I$(SRCDIR) -D_REENTRANT
-#
-# Turn off argument list checking because inetcfg_dad.c is written using
-# libxnet and the rest is compiled to use libsocket (yikes!)
-#
-LINTFLAGS += -erroff=E_INCONS_ARG_DECL2 -erroff=E_INCONS_ARG_USED2
.KEEP_STATE:
diff --git a/usr/src/lib/libinetcfg/common/inetcfg.c b/usr/src/lib/libinetcfg/common/inetcfg.c
index 5a79b36cc8..62b118b894 100644
--- a/usr/src/lib/libinetcfg/common/inetcfg.c
+++ b/usr/src/lib/libinetcfg/common/inetcfg.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -37,15 +36,14 @@
#include <sys/sockio.h>
#include <sys/types.h>
#include <sys/socket.h>
+#include <net/route.h>
#include <netinet/in.h>
#include <inet/ip.h>
#include <arpa/inet.h>
-#include <netdb.h>
#include <libintl.h>
#include <inetcfg.h>
#include "inetcfg_nic.h"
-#include "inetcfg_dad.h"
#define ICFG_FAMILY(handle) handle->ifh_interface.if_protocol
@@ -60,6 +58,12 @@
#define ICFG_LOGICAL_SEP ':'
/*
+ * Maximum amount of time (in milliseconds) to wait for Duplicate Address
+ * Detection to complete in the kernel.
+ */
+#define DAD_WAIT_TIME 5000
+
+/*
* Note: must be kept in sync with error codes in <inetcfg.h>
*/
static char *errmsgs[ICFG_NERR] = {
@@ -670,13 +674,67 @@ icfg_get_tunnel_upper(icfg_handle_t handle, int *protocol)
}
/*
+ * Any time that flags are changed on an interface where either the new or the
+ * existing flags have IFF_UP set, we'll get at least one RTM_IFINFO message to
+ * announce the flag status. Typically, there are two such messages: one
+ * saying that the interface is going down, and another saying that it's coming
+ * back up.
+ *
+ * We wait here for that second message, which can take one of two forms:
+ * either IFF_UP or IFF_DUPLICATE. If something's amiss with the kernel,
+ * though, we don't wait forever. (Note that IFF_DUPLICATE is a high-order
+ * bit, and we can't see it in the routing socket messages.)
+ */
+static int
+dad_wait(icfg_handle_t handle, int rtsock)
+{
+ struct pollfd fds[1];
+ union {
+ struct if_msghdr ifm;
+ char buf[1024];
+ } msg;
+ int index;
+ int retv;
+ uint64_t flags;
+ hrtime_t starttime, now;
+
+ fds[0].fd = rtsock;
+ fds[0].events = POLLIN;
+ fds[0].revents = 0;
+
+ if ((retv = icfg_get_index(handle, &index)) != ICFG_SUCCESS)
+ return (retv);
+
+ starttime = gethrtime();
+ for (;;) {
+ now = gethrtime();
+ now = (now - starttime) / 1000000;
+ if (now >= DAD_WAIT_TIME)
+ break;
+ if (poll(fds, 1, DAD_WAIT_TIME - (int)now) <= 0)
+ break;
+ if (read(rtsock, &msg, sizeof (msg)) <= 0)
+ break;
+ if (msg.ifm.ifm_type != RTM_IFINFO)
+ continue;
+ /* Note that ifm_index is just 16 bits */
+ if (index == msg.ifm.ifm_index && (msg.ifm.ifm_flags & IFF_UP))
+ return (ICFG_SUCCESS);
+ if ((retv = icfg_get_flags(handle, &flags)) != ICFG_SUCCESS)
+ return (retv);
+ if (flags & IFF_DUPLICATE)
+ return (ICFG_DAD_FOUND);
+ }
+ return (ICFG_DAD_FAILED);
+}
+
+/*
* Sets the flags for the interface represented by the 'handle'
* argument to the value contained in the 'flags' argument.
*
- * If the interface is an IPv6 interface and the new flags value
- * would transition the interface from "down" to "up", then
- * duplicate address detection is performed and succeeds only if
- * the no duplicate address is detected.
+ * If the new flags value will transition the interface from "down" to "up,"
+ * then duplicate address detection is performed by the kernel. This routine
+ * waits to get the outcome of that test.
*
* Returns: ICFG_SUCCESS, ICFG_DAD_FOUND, ICFG_DAD_FAILED or ICFG_FAILURE.
*/
@@ -686,48 +744,39 @@ icfg_set_flags(icfg_handle_t handle, uint64_t flags)
struct lifreq lifr;
uint64_t oflags;
int ret;
+ int rtsock;
(void) strlcpy(lifr.lifr_name, handle->ifh_interface.if_name,
sizeof (lifr.lifr_name));
lifr.lifr_addr.ss_family = ICFG_FAMILY(handle);
+ if ((ret = icfg_get_flags(handle, &oflags)) != ICFG_SUCCESS)
+ return (ret);
+ if (oflags == flags)
+ return (ICFG_SUCCESS);
+
/*
- * If we are transitioning an IPv6 interface from being down
- * to being up and a local address is set, then we must perform
- * duplicate address detection.
+ * Any time flags are changed on an interface that has IFF_UP set,
+ * you'll get a routing socket message. We care about the status,
+ * though, only when the new flags are marked "up."
*/
- if ((ICFG_FAMILY(handle) == AF_INET6) &&
- (!(flags & IFF_NOLOCAL)) && (flags & IFF_UP)) {
- /*
- * Get the old flags
- */
- if ((ret = icfg_get_flags(handle, &oflags)) != ICFG_SUCCESS) {
- return (ret);
- }
-
- if (!(oflags & IFF_UP)) {
- struct sockaddr_in6 *sin6;
-
- if (ioctl(handle->ifh_sock, SIOCGLIFADDR,
- (caddr_t)&lifr) < 0) {
- return (ICFG_FAILURE);
- }
-
- sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
-
- ret = dad_test(handle, oflags, sin6);
- if (ret != ICFG_SUCCESS) {
- return (ret);
- }
- }
- }
+ rtsock = (flags & IFF_UP) ?
+ socket(PF_ROUTE, SOCK_RAW, ICFG_FAMILY(handle)) : -1;
lifr.lifr_flags = flags;
if (ioctl(handle->ifh_sock, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
+ if (rtsock != -1)
+ (void) close(rtsock);
return (ICFG_FAILURE);
}
- return (ICFG_SUCCESS);
+ if (rtsock == -1) {
+ return (ICFG_SUCCESS);
+ } else {
+ ret = dad_wait(handle, rtsock);
+ (void) close(rtsock);
+ return (ret);
+ }
}
/*
@@ -945,6 +994,7 @@ icfg_set_addr(icfg_handle_t handle, const struct sockaddr *addr,
struct lifreq lifr;
uint64_t flags;
int ret;
+ int rtsock;
(void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr));
if ((ret = to_sockaddr_storage(ICFG_FAMILY(handle), addr, addrlen,
@@ -953,32 +1003,33 @@ icfg_set_addr(icfg_handle_t handle, const struct sockaddr *addr,
}
/*
- * Need to do duplicate address detection for IPv6
+ * Need to do check on duplicate address detection results if the
+ * interface is up.
*/
- if (ICFG_FAMILY(handle) == AF_INET6) {
- if ((ret = icfg_get_flags(handle, &flags)) != ICFG_SUCCESS) {
- return (ret);
- }
-
- if (flags & IFF_UP) {
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
- &lifr.lifr_addr;
- ret = dad_test(handle, flags, sin6);
- if (ret != ICFG_SUCCESS) {
- return (ret);
- }
- }
+ if ((ret = icfg_get_flags(handle, &flags)) != ICFG_SUCCESS) {
+ return (ret);
}
+ rtsock = (flags & IFF_UP) ?
+ socket(PF_ROUTE, SOCK_RAW, ICFG_FAMILY(handle)) : -1;
+
(void) strlcpy(lifr.lifr_name, handle->ifh_interface.if_name,
sizeof (lifr.lifr_name));
lifr.lifr_addr.ss_family = ICFG_FAMILY(handle);
if (ioctl(handle->ifh_sock, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
+ if (rtsock != -1)
+ (void) close(rtsock);
return (ICFG_FAILURE);
}
- return (ICFG_SUCCESS);
+ if (rtsock == -1) {
+ return (ICFG_SUCCESS);
+ } else {
+ ret = dad_wait(handle, rtsock);
+ (void) close(rtsock);
+ return (ret);
+ }
}
/*
diff --git a/usr/src/lib/libinetcfg/common/inetcfg_dad.c b/usr/src/lib/libinetcfg/common/inetcfg_dad.c
deleted file mode 100644
index 926f116ee7..0000000000
--- a/usr/src/lib/libinetcfg/common/inetcfg_dad.c
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * This module uses the ancillary data feature that is made available
- * though the UNIX 98 standards version of the Socket interface. This
- * interface is normally accessed via libxnet. However, to use libxnet,
- * this library would have to be compiled with _XOPEN_SOURCE=500 and
- * __EXTENSIONS__. Unfortunately, this makes linting both the library
- * and its consumers impractical. Therefore, this module is itself compiled
- * for use with the UNIX 98 version of the Socket interface and the
- * xnet versions of the Socket interfaces are called directly.
- * Hopefully, our Socket implementation will one day support the ancillary
- * data feature directly and this hack will no longer be needed. In the
- * meantime, changes to this file should be made with the knowledge that the
- * data types used by this module may differ in defintion fron the same data
- * types in the other modules.
- */
-#define _XOPEN_SOURCE 500
-#define __EXTENSIONS__ 1
-
-#include <stdio.h>
-#include <errno.h>
-#include <assert.h>
-#include <string.h>
-#include <unistd.h>
-#include <stropts.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netinet/icmp6.h>
-#include <netinet/ip.h>
-#include <inetcfg.h>
-
-#define IPV6_MAX_HOPS 255
-
-static int dup_addr_detect_transmits = 1;
-
-static struct in6_addr all_nodes_mcast = { { 0xff, 0x2, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x1 } };
-
-static struct in6_addr solicited_prefix = { { 0xff, 0x2, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x1,
- 0xff, 0x0, 0x0, 0x0 } };
-
-extern int __xnet_socket(int family, int type, int protocol);
-extern int __xnet_recvmsg(int sock, struct msghdr *msg, int flags);
-extern int __xnet_sendto(int sock, const void *buf, size_t len,
- int flags, const struct sockaddr *addr, socklen_t addrlen);
-
-/*
- * Verifies that all options have a non-zero length and that
- * the options fit within the total length of the packet (optlen).
- *
- * Returns: _B_TRUE if valid, _B_FALSE otherwise.
- */
-static boolean_t
-dad_verify_optlen(struct nd_opt_hdr *opt, ssize_t optlen)
-{
- assert(opt != NULL);
- assert(optlen > 0);
-
- while (optlen > 0) {
- if ((opt->nd_opt_len == 0)) {
- return (_B_FALSE);
- }
- optlen -= 8 * opt->nd_opt_len;
- if (optlen < 0) {
- return (_B_FALSE);
- }
- opt = (struct nd_opt_hdr *)((char *)opt +
- 8 * opt->nd_opt_len);
- }
- return (_B_TRUE);
-}
-
-/*
- * Returns a pointer to the specified option buffer.
- *
- * Returns: A pointer to the option buffer or NULL if not found.
- */
-static void *
-dad_find_ancillary(struct msghdr *msg, int cmsg_type)
-{
- struct cmsghdr *cmsg;
-
- assert(msg != NULL);
-
- for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
- cmsg = CMSG_NXTHDR(msg, cmsg)) {
- if (cmsg->cmsg_level == IPPROTO_IPV6 &&
- cmsg->cmsg_type == cmsg_type) {
- return (CMSG_DATA(cmsg));
- }
- }
-
- return (NULL);
-}
-
-/*
- * Receives an ICMP packet and tests it to see if it indicates that
- * testaddr is a duplicate address. This routine returns ICFG_SUCCESS
- * if no duplicate address is detected. If an unexpected error is
- * encountered receiving the packet, then ICFG_FAILURE is returned.
- * And of course ICFG_DAD_FOUND is returned if a duplicate address
- * is detected.
- *
- * Returns: ICFG_SUCCESS, ICFG_FAILURE or ICFG_DAD_FOUND.
- */
-static int
-dad_receive(int sock, struct sockaddr_in6 *testaddr, int ifindex)
-{
- struct sockaddr_in6 from;
- struct icmp6_hdr *icmp;
- struct nd_neighbor_solicit *ns;
- struct nd_neighbor_advert *na;
- static uint64_t in_packet[(IP_MAXPACKET + 1)/8];
- static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8];
- ssize_t len;
- struct msghdr msg;
- struct iovec iov;
- void *opt;
- uint_t hoplimit;
- struct in6_addr dst;
- int rcv_ifindex;
-
- iov.iov_base = (char *)in_packet;
- iov.iov_len = sizeof (in_packet);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_name = (struct sockaddr *)&from;
- msg.msg_namelen = sizeof (from);
- msg.msg_control = ancillary_data;
- msg.msg_controllen = sizeof (ancillary_data);
-
- if ((len = __xnet_recvmsg(sock, &msg, 0)) < 0) {
- /* Error was encountered - return failure */
- return (ICFG_FAILURE);
- }
-
- if (len == 0) {
- /* Ignore zero length messages */
- return (ICFG_SUCCESS);
- }
-
- if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
- /* Ignore packets > 64k or control buffers that don't fit */
- return (ICFG_SUCCESS);
- }
-
- icmp = (struct icmp6_hdr *)in_packet;
-
- if (len < ICMP6_MINLEN) {
- /* Ignore packet if it is too small to be icmp */
- return (ICFG_SUCCESS);
- }
-
- opt = dad_find_ancillary(&msg, IPV6_HOPLIMIT);
- if (opt == NULL) {
- /* Unknown hoplimit - must drop */
- return (ICFG_SUCCESS);
- }
- hoplimit = *(uint_t *)opt;
-
- opt = dad_find_ancillary(&msg, IPV6_PKTINFO);
- if (opt == NULL) {
- /* Unknown destination address - must drop */
- return (ICFG_SUCCESS);
- }
- dst = ((struct in6_pktinfo *)opt)->ipi6_addr;
- rcv_ifindex = ((struct in6_pktinfo *)opt)->ipi6_ifindex;
-
- opt = dad_find_ancillary(&msg, IPV6_RTHDR);
- if (opt != NULL) {
- /* Can't allow routing headers in ND messages */
- return (ICFG_SUCCESS);
- }
-
- /*
- * We're only interested in neighbor solicitations (someone
- * else soliciting for the same address) and advertisements.
- * We must verify each. In either case, we assume that the
- * kernel verified the AH (if present) and the ICMP checksum.
- */
- switch (icmp->icmp6_type) {
- case ND_NEIGHBOR_SOLICIT:
-
- if (hoplimit != IPV6_MAX_HOPS) {
- /* Packet came from different subnet */
- return (ICFG_SUCCESS);
- }
-
- if (icmp->icmp6_code != 0) {
- /* There are no codes for neighbor solicitations */
- return (ICFG_SUCCESS);
- }
-
- if (len < sizeof (struct nd_neighbor_solicit)) {
- /* Packet is too small */
- return (ICFG_SUCCESS);
- }
-
- ns = (struct nd_neighbor_solicit *)icmp;
- if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target)) {
- /* NS target was multicast */
- return (ICFG_SUCCESS);
- }
-
- if (len > sizeof (struct nd_neighbor_solicit)) {
- /*
- * A neighbor solicitation packet has the form
- * of a header directly followed by options.
- */
- if (!dad_verify_optlen((struct nd_opt_hdr *)&ns[1],
- len - sizeof (struct nd_neighbor_solicit))) {
- /* Invalid options */
- return (ICFG_SUCCESS);
- }
- }
-
- if (!IN6_IS_ADDR_UNSPECIFIED(&from.sin6_addr)) {
- /* Sender is doing address resolution */
- return (ICFG_SUCCESS);
- }
-
- if (rcv_ifindex != ifindex) {
- /* Packet not received on test interface */
- return (ICFG_SUCCESS);
- }
-
- if (!IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr,
- &ns->nd_ns_target)) {
- /* NS wasn't for test address */
- return (ICFG_SUCCESS);
- }
-
- return (ICFG_DAD_FOUND);
-
- case ND_NEIGHBOR_ADVERT:
-
- if (hoplimit != IPV6_MAX_HOPS) {
- /* Packet came from different subnet */
- return (ICFG_SUCCESS);
- }
-
- if (icmp->icmp6_code != 0) {
- /* There are no codes for neighbor advertisements */
- return (ICFG_SUCCESS);
- }
-
- if (len < sizeof (struct nd_neighbor_advert)) {
- /* Packet is too small */
- return (ICFG_SUCCESS);
- }
-
- na = (struct nd_neighbor_advert *)icmp;
- if (IN6_IS_ADDR_MULTICAST(&na->nd_na_target)) {
- /* NA target was multicast */
- return (ICFG_SUCCESS);
- }
-
- if (IN6_IS_ADDR_MULTICAST(&dst) &&
- (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) {
- /* Dest was multicast and solicited flag not zero */
- return (ICFG_SUCCESS);
- }
-
- if (len > sizeof (struct nd_neighbor_advert)) {
- /*
- * A neighbor advertisement packet has the form
- * of a header directly followed by options.
- */
- if (!dad_verify_optlen((struct nd_opt_hdr *)&na[1],
- len - sizeof (struct nd_neighbor_advert))) {
- return (ICFG_SUCCESS);
- }
- }
-
- if (!IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr,
- &na->nd_na_target)) {
- /* NA wasn't for test address */
- return (ICFG_SUCCESS);
- }
- return (ICFG_DAD_FOUND);
-
- default:
- return (ICFG_SUCCESS);
- }
-}
-
-/*
- * Sends a DAD neighbor solicitation packet. Assumes the socket has been
- * configured correctly (i.e., an IPV6_UNSPEC_SRC and an IPV6_BOUND_IF have
- * been done by the caller, etc.).
- *
- * Returns: ICFG_SUCCESS or ICFG_FAILURE.
- */
-static int
-dad_send_probe(int sock, struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc)
-{
- static uint64_t outpack[(IP_MAXPACKET + 1)/8];
- struct nd_neighbor_solicit *ns = (struct nd_neighbor_solicit *)outpack;
- int packetlen = 0;
- int cc;
-
- ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
- ns->nd_ns_code = 0;
- ns->nd_ns_cksum = 0;
- ns->nd_ns_reserved = 0;
- ns->nd_ns_target = testaddr->sin6_addr;
- packetlen = sizeof (struct nd_neighbor_solicit);
- cc = __xnet_sendto(sock, (char *)outpack, packetlen, 0,
- (struct sockaddr *)solicited_mc, sizeof (*solicited_mc));
- if (cc != packetlen) {
- return (ICFG_FAILURE);
- }
-
- return (ICFG_SUCCESS);
-}
-
-/*
- * Build a solicited node multicast address for a given address.
- */
-static void
-in6_solmulti_addr(struct in6_addr *addr, struct in6_addr *multi)
-{
- int i;
-
- *multi = solicited_prefix;
- for (i = 13; i < 16; i++) {
- multi->s6_addr[i] = addr->s6_addr[i];
- }
-}
-
-/*
- * Loops sending DAD probes and polling for responses.
- *
- * Returns: ICFG_SUCCESS, ICFG_FAILURE, ICFG_DAD_FOUND or ICFG_DAD_FAILED.
- */
-static int
-dad_loop(int sock, struct sockaddr_in6 *testaddr,
- struct sockaddr_in6 *solicited_mc, int ifindex, int retrans_timer)
-{
- int time_left; /* In milliseconds */
- struct timeval starttime;
- struct timeval curtime;
- struct pollfd fds;
- int i;
- int ret;
-
- /*
- * Perform duplicate address detection sequence
- * 1. Send a neighbor solicitation with an unspecified source
- * address to the solicited node MC address with the testaddr
- * being the target.
- * 2. Wait for up to retrans_timer milliseconds for either a
- * neighbor advertisement (sent to all-nodes) or a DAD neighbor
- * solicitation for the testaddr.
- * 3. Perform step 1 and 2 dup_addr_detect_transmits times.
- */
- for (i = 0; i < dup_addr_detect_transmits; i++) {
- ret = dad_send_probe(sock, testaddr, solicited_mc);
- if (ret != ICFG_SUCCESS) {
- return (ret);
- }
-
- /*
- * Track time to make sure total wait is retrans_timer
- * even though random packet will awake poll.
- */
- (void) gettimeofday(&starttime, NULL);
- /* CONSTCOND */
- while (1) {
- (void) gettimeofday(&curtime, NULL);
- time_left = retrans_timer -
- (curtime.tv_sec - starttime.tv_sec) * 1000 -
- (curtime.tv_usec - starttime.tv_usec) / 1000;
-
- if (time_left <= 0) {
- break;
- }
- fds.fd = sock;
- fds.events = POLLIN;
-
- switch (poll(&fds, 1, time_left)) {
- case -1:
- return (ICFG_FAILURE);
- case 0:
- /* Need loop will break */
- break;
- case 1:
- if (fds.revents & POLLIN) {
- ret = dad_receive(sock, testaddr,
- ifindex);
- if (ret != ICFG_SUCCESS) {
- return (ret);
- }
- }
- break;
- default:
- return (ICFG_DAD_FAILED);
- }
- }
- }
- return (ICFG_SUCCESS);
-}
-
-/*
- * Configures a socket for DAD.
- *
- * Returns: ICFG_SUCCESS or ICFG_FAILURE.
- */
-static int
-dad_configure_socket(int sock, int ifindex, struct sockaddr_in6 *solicited_mc)
-{
- struct ipv6_mreq v6mcastr;
- int hops = IPV6_MAX_HOPS;
- int on = 1;
- int off = 0;
-
- /*
- * IPV6_BOUND_PIF prevents load spreading from happening. If we
- * just do IPV6_BOUND_IF, the packet can go out on a different
- * interface other than "ifindex", if interface is part of
- * a group. In that case, we will get back the copy of NS that
- * we sent and think it is a duplicate(Switch loops back the
- * copy on all interfaces other than the one we sent the packet on).
- */
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_BOUND_PIF, (char *)&ifindex,
- sizeof (ifindex)) < 0) {
- return (ICFG_FAILURE);
- }
-
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
- (char *)&hops, sizeof (hops)) < 0) {
- return (ICFG_FAILURE);
- }
-
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_UNSPEC_SRC,
- (char *)&on, sizeof (on)) < 0) {
- return (ICFG_FAILURE);
- }
-
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
- (char *)&off, sizeof (off)) < 0) {
- return (ICFG_FAILURE);
- }
-
- /*
- * Enable receipt of ancillary data
- */
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
- (char *)&on, sizeof (on)) < 0) {
- return (ICFG_FAILURE);
- }
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_RECVPKTINFO,
- (char *)&on, sizeof (on)) < 0) {
- return (ICFG_FAILURE);
- }
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_RECVRTHDR,
- (char *)&on, sizeof (on)) < 0) {
- return (ICFG_FAILURE);
- }
-
- /*
- * Join the solicited node multicast address and all-nodes.
- */
- v6mcastr.ipv6mr_multiaddr = solicited_mc->sin6_addr;
- v6mcastr.ipv6mr_interface = ifindex;
-
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP,
- (char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- return (ICFG_FAILURE);
- }
-
- v6mcastr.ipv6mr_multiaddr = all_nodes_mcast;
- v6mcastr.ipv6mr_interface = ifindex;
-
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP,
- (char *)&v6mcastr, sizeof (v6mcastr)) < 0) {
- return (ICFG_FAILURE);
- }
- return (ICFG_SUCCESS);
-}
-
-/*
- * Performs duplicate address detection.
- *
- * Returns: ICFG_SUCCESS, ICFG_FAILURE, ICFG_DAD_FOUND or ICFG_DAD_FAILED.
- *
- * Note: the state of the interface name is unchanged.
- */
-int
-dad_test(icfg_handle_t handle, uint64_t flags, struct sockaddr_in6 *testaddr)
-{
- struct sockaddr_in6 solicited_mc;
- lif_ifinfo_req_t linkinfo;
- int retrans_timer = ND_RETRANS_TIMER;
- int ifindex;
- int sock;
- int syserr = 0;
- int restore_ret;
- int ret;
-
- /*
- * Check the address assigned to the interface.
- * Skip the check if IFF_NOLOCAL, IFF_NONUD, IFF_ANYCAST, or
- * IFF_LOOPBACK. Note that IFF_NONUD turns of both NUD and DAD.
- * DAD is not possible if not IFF_MULTICAST.
- */
- if (flags & (IFF_NOLOCAL|IFF_LOOPBACK|IFF_NONUD|IFF_ANYCAST) ||
- !(flags & IFF_MULTICAST)) {
- return (ICFG_SUCCESS);
- }
-
- /*
- * If the address is all zeroes, then just return success.
- */
- if (IN6_IS_ADDR_UNSPECIFIED(&testaddr->sin6_addr)) {
- return (ICFG_SUCCESS);
- }
-
- /*
- * Determine interface index (for IPV6_BOUND_PIF) and
- * save the flag values so they can be restored on return.
- */
- if ((ret = icfg_get_index(handle, &ifindex)) != ICFG_SUCCESS) {
- return (ret);
- }
-
- if ((ret = icfg_get_linkinfo(handle, &linkinfo)) != ICFG_SUCCESS) {
- return (ret);
- }
-
- if (linkinfo.lir_reachretrans != 0) {
- retrans_timer = linkinfo.lir_reachretrans;
- }
-
- /*
- * Set NOLOCAL and UP flags.
- * This prevents the use of the interface except when the user binds
- * to unspecified IPv6 address, and sends to a link local multicast
- * address.
- */
- ret = icfg_set_flags(handle, flags | IFF_NOLOCAL | IFF_UP);
- if (ret != ICFG_SUCCESS) {
- return (ret);
- }
-
- /*
- * Extract the address and determine the solicited node multicast
- * address to use.
- */
- (void) memset(&solicited_mc, 0, sizeof (solicited_mc));
- solicited_mc.sin6_family = AF_INET6;
- in6_solmulti_addr(&testaddr->sin6_addr, &solicited_mc.sin6_addr);
-
- /*
- * Get a socket to use to send and receive neighbor solicitations
- * for DAD. Also used for ioctls below.
- */
- if ((sock = __xnet_socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) < 0) {
- syserr = errno;
- ret = ICFG_FAILURE;
- goto restore;
- }
-
- ret = dad_configure_socket(sock, ifindex, &solicited_mc);
- if (ret != ICFG_SUCCESS) {
- syserr = errno;
- (void) close(sock);
- goto restore;
- }
-
- ret = dad_loop(sock, testaddr, &solicited_mc, ifindex,
- retrans_timer);
- if (ret == ICFG_FAILURE) {
- syserr = errno;
- }
- (void) close(sock);
-
-restore:
- /* Restore flags */
- if ((restore_ret = icfg_set_flags(handle, flags)) != ICFG_SUCCESS) {
- if (ret == ICFG_SUCCESS) {
- syserr = errno;
- ret = restore_ret;
- }
- }
-
- if (ret == ICFG_FAILURE) {
- errno = syserr;
- }
-
- return (ret);
-}
diff --git a/usr/src/lib/libinetcfg/common/inetcfg_dad.h b/usr/src/lib/libinetcfg/common/inetcfg_dad.h
deleted file mode 100644
index b65fbb8b2a..0000000000
--- a/usr/src/lib/libinetcfg/common/inetcfg_dad.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _INETCFG_DAD_H
-#define _INETCFG_DAD_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <netinet/in.h>
-#include <inetcfg.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern int dad_test(icfg_handle_t, uint64_t, struct sockaddr_in6 *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _INETCFG_DAD_H */
diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_i386 b/usr/src/pkgdefs/SUNWmdb/prototype_i386
index 7a1fb04fd3..1fa3d16b03 100644
--- a/usr/src/pkgdefs/SUNWmdb/prototype_i386
+++ b/usr/src/pkgdefs/SUNWmdb/prototype_i386
@@ -47,6 +47,7 @@ f none usr/platform/i86pc/lib/mdb/kvm/pcplusmp.so 555 root sys
f none usr/platform/i86pc/lib/mdb/kvm/uppc.so 555 root sys
f none usr/platform/i86pc/lib/mdb/kvm/unix.so 555 root sys
d none usr/lib/mdb/kvm/amd64 755 root sys
+f none usr/lib/mdb/kvm/amd64/arp.so 555 root sys
f none usr/lib/mdb/kvm/amd64/audiosup.so 555 root sys
f none usr/lib/mdb/kvm/amd64/cpc.so 555 root sys
f none usr/lib/mdb/kvm/amd64/crypto.so 555 root sys
@@ -71,6 +72,7 @@ f none usr/lib/mdb/kvm/amd64/sppp.so 555 root sys
f none usr/lib/mdb/kvm/amd64/ufs.so 555 root sys
f none usr/lib/mdb/kvm/amd64/uhci.so 555 root sys
f none usr/lib/mdb/kvm/amd64/usba.so 555 root sys
+f none usr/lib/mdb/kvm/arp.so 555 root sys
f none usr/lib/mdb/kvm/audiosup.so 555 root sys
f none usr/lib/mdb/kvm/cpc.so 555 root sys
f none usr/lib/mdb/kvm/crypto.so 555 root sys
diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_sparc b/usr/src/pkgdefs/SUNWmdb/prototype_sparc
index a5b29115a6..f67cdbdcc8 100644
--- a/usr/src/pkgdefs/SUNWmdb/prototype_sparc
+++ b/usr/src/pkgdefs/SUNWmdb/prototype_sparc
@@ -38,6 +38,7 @@ d none usr/bin/sparcv9 755 root bin
f none usr/bin/sparcv9/mdb 555 root bin
l none usr/bin/sparcv9/adb=../../../usr/bin/sparcv9/mdb
d none usr/lib/mdb/kvm/sparcv9 755 root sys
+f none usr/lib/mdb/kvm/sparcv9/arp.so 555 root sys
f none usr/lib/mdb/kvm/sparcv9/audiosup.so 555 root sys
f none usr/lib/mdb/kvm/sparcv9/cpc.so 555 root sys
f none usr/lib/mdb/kvm/sparcv9/crypto.so 555 root sys
diff --git a/usr/src/pkgdefs/SUNWmdbr/prototype_i386 b/usr/src/pkgdefs/SUNWmdbr/prototype_i386
index bd9ea07f75..d6a2291873 100644
--- a/usr/src/pkgdefs/SUNWmdbr/prototype_i386
+++ b/usr/src/pkgdefs/SUNWmdbr/prototype_i386
@@ -28,6 +28,7 @@
!include prototype_com
d none kernel/kmdb/amd64 755 root sys
+f none kernel/kmdb/amd64/arp 555 root sys
f none kernel/kmdb/amd64/audiosup 555 root sys
f none kernel/kmdb/amd64/cpc 555 root sys
f none kernel/kmdb/amd64/crypto 555 root sys
@@ -52,6 +53,7 @@ f none kernel/kmdb/amd64/sppp 555 root sys
f none kernel/kmdb/amd64/ufs 555 root sys
f none kernel/kmdb/amd64/uhci 555 root sys
f none kernel/kmdb/amd64/usba 555 root sys
+f none kernel/kmdb/arp 555 root sys
f none kernel/kmdb/audiosup 555 root sys
f none kernel/kmdb/cpc 555 root sys
f none kernel/kmdb/crypto 555 root sys
diff --git a/usr/src/pkgdefs/SUNWmdbr/prototype_sparc b/usr/src/pkgdefs/SUNWmdbr/prototype_sparc
index 154c47733f..4b57aaacbf 100644
--- a/usr/src/pkgdefs/SUNWmdbr/prototype_sparc
+++ b/usr/src/pkgdefs/SUNWmdbr/prototype_sparc
@@ -28,6 +28,7 @@
!include prototype_com
#
d none kernel/kmdb/sparcv9 755 root sys
+f none kernel/kmdb/sparcv9/arp 555 root sys
f none kernel/kmdb/sparcv9/audiosup 555 root sys
f none kernel/kmdb/sparcv9/cpc 555 root sys
f none kernel/kmdb/sparcv9/crypto 555 root sys
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index fa5d0c132d..ef97e42257 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -131,6 +131,7 @@ GENUNIX_OBJS += \
fdbuffer.o \
fdsync.o \
fem.o \
+ ffs.o \
fio.o \
flock.o \
fm.o \
diff --git a/usr/src/uts/common/inet/arp.h b/usr/src/uts/common/inet/arp.h
index c773d6354b..71fd056afc 100644
--- a/usr/src/uts/common/inet/arp.h
+++ b/usr/src/uts/common/inet/arp.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 1992,1997-2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -30,10 +29,18 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#include <sys/types.h>
+
#ifdef __cplusplus
extern "C" {
#endif
+/*
+ * Warning: the interfaces described in this file are private to the
+ * implementation. They may change at any time without notice and are not
+ * documented. Do not depend on them.
+ */
+
#define ARP_REQUEST 1
#define ARP_RESPONSE 2
#define RARP_REQUEST 3
@@ -41,52 +48,40 @@ extern "C" {
#define AR_IOCTL (((unsigned)'A' & 0xFF)<<8)
#define CMD_IN_PROGRESS 0x10000
-/*
- * The following ARP commands are private, and not part of a supported
- * interface. They are subject to change without notice in any release.
- */
+
#define AR_ENTRY_ADD (AR_IOCTL + 1)
#define AR_ENTRY_DELETE (AR_IOCTL + 2)
#define AR_ENTRY_QUERY (AR_IOCTL + 3)
-#define AR_XMIT_REQUEST (AR_IOCTL + 4)
-#define AR_XMIT_TEMPLATE (AR_IOCTL + 5)
#define AR_ENTRY_SQUERY (AR_IOCTL + 6)
#define AR_MAPPING_ADD (AR_IOCTL + 7)
#define AR_CLIENT_NOTIFY (AR_IOCTL + 8)
#define AR_INTERFACE_UP (AR_IOCTL + 9)
#define AR_INTERFACE_DOWN (AR_IOCTL + 10)
-#define AR_XMIT_RESPONSE (AR_IOCTL + 11)
#define AR_INTERFACE_ON (AR_IOCTL + 12)
#define AR_INTERFACE_OFF (AR_IOCTL + 13)
#define AR_DLPIOP_DONE (AR_IOCTL + 14)
-#define AR_ENTRY_LLAQUERY (AR_IOCTL + 15)
/*
* This is not an ARP command per se, it is used to interface between
* ARP and IP during close.
*/
#define AR_ARP_CLOSING (AR_IOCTL + 16)
+#define AR_ARP_EXTEND (AR_IOCTL + 17)
-/*
- * The following ACE flags are private, and not part of a supported
- * interface. They are subject to change without notice in any release.
- */
-#define ACE_F_PERMANENT 0x1
-#define ACE_F_PUBLISH 0x2
-#define ACE_F_DYING 0x4
-#define ACE_F_RESOLVED 0x8
+/* Both ace_flags and area_flags; must also modify arp.c in mdb */
+#define ACE_F_PERMANENT 0x0001
+#define ACE_F_PUBLISH 0x0002
+#define ACE_F_DYING 0x0004
+#define ACE_F_RESOLVED 0x0008
/* Using bit mask extraction from target address */
-#define ACE_F_MAPPING 0x10
-#define ACE_F_MYADDR 0x20 /* Strong check for duplicate MACs */
-
-/* ARP Cmd Table entry */
-typedef struct arct_s {
- pfi_t arct_pfi;
- uint32_t arct_cmd;
- int arct_min_len;
- uint32_t arct_flags;
- int arct_priv_req; /* Privilege required for this cmd */
- const char *arct_txt;
-} arct_t;
+#define ACE_F_MAPPING 0x0010
+#define ACE_F_MYADDR 0x0020 /* IP claims to own this address */
+#define ACE_F_UNVERIFIED 0x0040 /* DAD not yet complete */
+#define ACE_F_AUTHORITY 0x0080 /* check for duplicate MACs */
+#define ACE_F_DEFEND 0x0100 /* single transmit (area_flags only) */
+#define ACE_F_OLD 0x0200 /* should revalidate when IP asks */
+#define ACE_F_FAST 0x0400 /* fast probe enabled */
+#define ACE_F_DELAYED 0x0800 /* rescheduled on arp_defend_rate */
+#define ACE_F_DAD_ABORTED 0x1000 /* DAD was aborted on link down */
/* ARP Command Structures */
@@ -98,12 +93,6 @@ typedef struct ar_cmd_s {
} arc_t;
/*
- * The following ARP command structures are private, and not
- * part of a supported interface. They are subject to change
- * without notice in any release.
- */
-
-/*
* NOTE: when using area_t for an AR_ENTRY_SQUERY, the area_hw_addr_offset
* field isn't what you might think. See comments in ip_multi.c where
* the routine ill_create_squery() is called, and also in the routine
@@ -196,13 +185,10 @@ typedef struct ar_client_notify_s {
} arcn_t;
/* Client Notification Codes */
-/*
- * The following Client Notification codes are private, and not
- * part of a supported interface. They are subject to change
- * without notice in any release.
- */
#define AR_CN_BOGON 1
#define AR_CN_ANNOUNCE 2
+#define AR_CN_READY 3 /* DAD complete; address usable */
+#define AR_CN_FAILED 4 /* DAD failed; address unusable */
/* ARP Header */
typedef struct arh_s {
diff --git a/usr/src/uts/common/inet/arp/arp.c b/usr/src/uts/common/inet/arp/arp.c
index fd7d086933..17c81b9513 100644
--- a/usr/src/uts/common/inet/arp/arp.c
+++ b/usr/src/uts/common/inet/arp/arp.c
@@ -28,8 +28,6 @@
/* AR - Address Resolution Protocol */
-#define ARP_DEBUG
-
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
@@ -47,6 +45,9 @@
#include <sys/strsun.h>
#include <sys/policy.h>
#include <sys/ethernet.h>
+#include <sys/zone.h>
+#include <sys/random.h>
+#include <sys/sdt.h>
#include <inet/common.h>
#include <inet/optcom.h>
@@ -56,24 +57,52 @@
#include <net/if.h>
#include <inet/arp.h>
#include <netinet/ip6.h>
+#include <netinet/arp.h>
#include <inet/ip.h>
#include <inet/ip_ire.h>
+#include <inet/ip_ndp.h>
#include <inet/mib2.h>
#include <inet/arp_impl.h>
-#ifdef ARP_DEBUG
-#define arp0dbg(a) printf a
-#define arp1dbg(a) if (arp_debug) printf a
-#define arp2dbg(a) if (arp_debug > 1) printf a
-#define arp3dbg(a) if (arp_debug > 2) printf a
-#else
-#define arp0dbg(a) /* */
-#define arp1dbg(a) /* */
-#define arp2dbg(a) /* */
-#define arp3dbg(a) /* */
-#endif
+/*
+ * ARP entry life time and design notes
+ * ------------------------------------
+ *
+ * ARP entries (ACEs) must last at least as long as IP knows about a given
+ * MAC-IP translation (i.e., as long as the IRE cache entry exists). It's ok
+ * if the ARP entry lasts longer, but not ok if it is removed before the IP
+ * entry. The reason for this is that if ARP doesn't have an entry, we will be
+ * unable to detect the difference between an ARP broadcast that represents no
+ * change (same, known address of sender) and one that represents a change (new
+ * address for existing entry). In the former case, we must not notify IP, or
+ * we can suffer hurricane attack. In the latter case, we must notify IP, or
+ * IP will drift out of sync with the network.
+ *
+ * Note that IP controls the lifetime of entries, not ARP.
+ *
+ * We don't attempt to reconfirm aging entries. If the system is no longer
+ * talking to a given peer, then it doesn't matter if we have the right mapping
+ * for that peer. It would be possible to send queries on aging entries that
+ * are active, but this isn't done.
+ */
+
+/*
+ * This is used when scanning for "old" (least recently broadcast) ACEs. We
+ * don't want to have to walk the list for every single one, so we gather up
+ * batches at a time.
+ */
+#define ACE_RESCHED_LIST_LEN 8
+
+typedef struct {
+ arl_t *art_arl;
+ uint_t art_naces;
+ ace_t *art_aces[ACE_RESCHED_LIST_LEN];
+} ace_resched_t;
#define ACE_RESOLVED(ace) ((ace)->ace_flags & ACE_F_RESOLVED)
+#define ACE_NONPERM(ace) \
+ (((ace)->ace_flags & (ACE_F_RESOLVED | ACE_F_PERMANENT)) == \
+ ACE_F_RESOLVED)
#define AR_DEF_XMIT_INTERVAL 500 /* time in milliseconds */
#define AR_LL_HDR_SLACK 32 /* Leave the lower layer some room */
@@ -82,6 +111,13 @@
#define AR_DRAINING (void *)0x11
/*
+ * The IPv4 Link Local address space is special; we do extra duplicate checking
+ * there, as the entire assignment mechanism rests on random numbers.
+ */
+#define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \
+ ((uchar_t *)ptr)[1] == 254)
+
+/*
* Check if the command needs to be enqueued by seeing if there are other
* commands ahead of us or if some DLPI response is being awaited. Usually
* there would be an enqueued command in the latter case, however if the
@@ -94,33 +130,9 @@
(mp->b_prev != AR_DRAINING && (arl->arl_queue != NULL || \
arl->arl_dlpi_pending != DL_PRIM_INVAL))
-/* Ugly check to determine whether the module below is IP */
-#define MODULE_BELOW_IS_IP(q) \
- ((WR(q)->q_next != NULL && WR(q)->q_next->q_next != NULL) && \
- (strcmp(WR(q)->q_next->q_qinfo->qi_minfo->mi_idname, "ip") == 0))
-
-/* ARP Cache Entry */
-typedef struct ace_s {
- struct ace_s *ace_next; /* Hash chain next pointer */
- struct ace_s **ace_ptpn; /* Pointer to previous next */
- struct arl_s *ace_arl; /* Associated arl */
- uint32_t ace_proto; /* Protocol for this ace */
- uint32_t ace_flags;
- uchar_t *ace_proto_addr;
- uint32_t ace_proto_addr_length;
- uchar_t *ace_proto_mask; /* Mask for matching addr */
- uchar_t *ace_proto_extract_mask; /* For mappings */
- uchar_t *ace_hw_addr;
- uint32_t ace_hw_addr_length;
- uint32_t ace_hw_extract_start; /* For mappings */
- mblk_t *ace_mp; /* mblk we are in */
- uint32_t ace_query_count;
- mblk_t *ace_query_mp; /* Head of outstanding query chain */
- int ace_publish_count;
-} ace_t;
-
#define ACE_EXTERNAL_FLAGS_MASK \
-(ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MAPPING | ACE_F_MYADDR)
+ (ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MAPPING | ACE_F_MYADDR | \
+ ACE_F_AUTHORITY)
#define ARH_FIXED_LEN 8
@@ -165,8 +177,8 @@ static int ar_ce_create(arl_t *arl, uint32_t proto, uchar_t *hw_addr,
uchar_t *proto_extract_mask, uint32_t hw_extract_start,
uint32_t flags);
static void ar_ce_delete(ace_t *ace);
-static void ar_ce_delete_per_arl(ace_t *ace, arl_t *arl);
-static ace_t **ar_ce_hash(uint32_t proto, uchar_t *proto_addr,
+static void ar_ce_delete_per_arl(ace_t *ace, void *arg);
+static ace_t **ar_ce_hash(uint32_t proto, const uchar_t *proto_addr,
uint32_t proto_addr_length);
static ace_t *ar_ce_lookup(arl_t *arl, uint32_t proto,
uchar_t *proto_addr, uint32_t proto_addr_length);
@@ -175,14 +187,12 @@ static ace_t *ar_ce_lookup_entry(arl_t *arl, uint32_t proto,
static ace_t *ar_ce_lookup_from_area(mblk_t *mp, ace_t *matchfn());
static ace_t *ar_ce_lookup_mapping(arl_t *arl, uint32_t proto,
uchar_t *proto_addr, uint32_t proto_addr_length);
-static int ar_ce_report(queue_t *q, mblk_t *mp, caddr_t data, cred_t *cr);
-static void ar_ce_report1(ace_t *ace, uchar_t *mp_arg);
-static void ar_ce_resolve(ace_t *ace, uchar_t *hw_addr,
+static boolean_t ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr,
uint32_t hw_addr_length);
-static void ar_ce_walk(pfi_t pfi, void *arg1);
+static void ar_ce_walk(void (*pfi)(ace_t *, void *), void *arg1);
static void ar_cleanup(void);
-static void ar_client_notify(arl_t *arl, mblk_t *mp, int code);
+static void ar_client_notify(const arl_t *arl, mblk_t *mp, int code);
static int ar_close(queue_t *q);
static int ar_cmd_dispatch(queue_t *q, mblk_t *mp);
static mblk_t *ar_dlpi_comm(t_uscalar_t prim, size_t size);
@@ -215,7 +225,7 @@ static int ar_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
static boolean_t ar_param_register(arpparam_t *arppa, int cnt);
static int ar_param_set(queue_t *q, mblk_t *mp, char *value,
caddr_t cp, cred_t *cr);
-static int ar_query_delete(ace_t *ace, uchar_t *ar);
+static void ar_query_delete(ace_t *ace, void *ar);
static void ar_query_reply(ace_t *ace, int ret_val,
uchar_t *proto_addr, uint32_t proto_addr_len);
static clock_t ar_query_xmit(ace_t *ace, ace_t *src_ace);
@@ -227,25 +237,16 @@ static int ar_slifname(queue_t *q, mblk_t *mp);
static int ar_set_ppa(queue_t *q, mblk_t *mp);
static int ar_snmp_msg(queue_t *q, mblk_t *mp_orig);
static void ar_snmp_msg2(ace_t *, void *);
-static void ar_timer_init(queue_t *q);
-static int ar_trash(ace_t *ace, uchar_t *arg);
static void ar_wput(queue_t *q, mblk_t *mp);
static void ar_wsrv(queue_t *q);
static void ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto,
- uint32_t plen, uchar_t *haddr1, uchar_t *paddr1,
- uchar_t *haddr2, uchar_t *paddr2);
-static int ar_xmit_request(queue_t *q, mblk_t *mp);
-static int ar_xmit_response(queue_t *q, mblk_t *mp);
+ uint32_t plen, const uchar_t *haddr1, const uchar_t *paddr1,
+ const uchar_t *haddr2, const uchar_t *paddr2, const uchar_t *dstaddr);
static uchar_t *ar_snmp_msg_element(mblk_t **, uchar_t *, size_t);
static void ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q,
ushort_t cmd, boolean_t);
static mblk_t *ar_cmd_dequeue(arl_t *arl);
-#if 0
-static void show_ace(char *str, ace_t *ace);
-static void show_arp(char *str, mblk_t *mp);
-#endif
-
/*
* All of these are alterable, within the min/max values given,
* at run time. arp_publish_interval and arp_publish_count are
@@ -256,16 +257,34 @@ static void show_arp(char *str, mblk_t *mp);
*/
static arpparam_t arp_param_arr[] = {
/* min max value name */
- { 0, 10, 0, "arp_debug"},
{ 30000, 3600000, 300000, "arp_cleanup_interval"},
{ 1000, 20000, 2000, "arp_publish_interval"},
{ 1, 20, 5, "arp_publish_count"},
+ { 0, 20000, 1000, "arp_probe_delay"},
+ { 10, 20000, 1500, "arp_probe_interval"},
+ { 0, 20, 3, "arp_probe_count"},
+ { 0, 20000, 100, "arp_fastprobe_delay"},
+ { 10, 20000, 150, "arp_fastprobe_interval"},
+ { 0, 20, 3, "arp_fastprobe_count"},
+ { 0, 3600000, 300000, "arp_defend_interval"},
+ { 0, 20000, 100, "arp_defend_rate"},
+ { 0, 3600000, 15000, "arp_broadcast_interval"},
+ { 5, 86400, 3600, "arp_defend_period"}
};
-#define arp_debug arp_param_arr[0].arp_param_value
-#define arp_timer_interval arp_param_arr[1].arp_param_value
-#define arp_publish_interval arp_param_arr[2].arp_param_value
-#define arp_publish_count arp_param_arr[3].arp_param_value
+#define arp_cleanup_interval arp_param_arr[0].arp_param_value
+#define arp_publish_interval arp_param_arr[1].arp_param_value
+#define arp_publish_count arp_param_arr[2].arp_param_value
+#define arp_probe_delay arp_param_arr[3].arp_param_value
+#define arp_probe_interval arp_param_arr[4].arp_param_value
+#define arp_probe_count arp_param_arr[5].arp_param_value
+#define arp_fastprobe_delay arp_param_arr[6].arp_param_value
+#define arp_fastprobe_interval arp_param_arr[7].arp_param_value
+#define arp_fastprobe_count arp_param_arr[8].arp_param_value
+#define arp_defend_interval arp_param_arr[9].arp_param_value
+#define arp_defend_rate arp_param_arr[10].arp_param_value
+#define arp_broadcast_interval arp_param_arr[11].arp_param_value
+#define arp_defend_period arp_param_arr[12].arp_param_value
static struct module_info info = {
0, "arp", 0, INFPSZ, 512, 128
@@ -289,27 +308,24 @@ static arl_t *arl_g_head; /* ARL List Head */
/*
* TODO: we need a better mechanism to set the ARP hardware type since
- * the DLPI mac type does not include enough prodefined values.
+ * the DLPI mac type does not include enough predefined values.
*/
static ar_m_t ar_m_tbl[] = {
- { DL_CSMACD, 1, -2, 6}, /* 802.3 */
- { DL_TPB, 6, -2, 6}, /* 802.4 */
- { DL_TPR, 6, -2, 6}, /* 802.5 */
- { DL_METRO, 6, -2, 6}, /* 802.6 */
- { DL_ETHER, 1, -2, 6}, /* Ethernet */
- { DL_FDDI, 1, -2, 6}, /* FDDI */
- { DL_IB, 32, -2, 20}, /* Infiniband */
- { DL_OTHER, 1, -2, 6}, /* unknown */
+ { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */
+ { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */
+ { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */
+ { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */
+ { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */
+ { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */
+ { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */
+ { DL_OTHER, ARPHRD_ETHER, -2, 6}, /* unknown */
};
/* ARP Cache Entry Hash Table */
-static ace_t *ar_ce_hash_tbl[256];
+static ace_t *ar_ce_hash_tbl[ARP_HASH_SIZE];
static ace_t *ar_ce_mask_entries; /* proto_mask not all ones */
-static mblk_t *ar_timer_mp; /* garbage collection timer */
-static queue_t *ar_timer_queue; /* queue for garbage collection */
-
/*
* Note that all routines which need to queue the message for later
* processing have to be ioctl_aware to be able to queue the complete message.
@@ -318,6 +334,16 @@ static queue_t *ar_timer_queue; /* queue for garbage collection */
#define ARF_IOCTL_AWARE 0x1 /* Arp command can come down as M_IOCTL */
#define ARF_ONLY_CMD 0x2 /* Command is exclusive to ARP */
+/* ARP Cmd Table entry */
+typedef struct arct_s {
+ int (*arct_pfi)(queue_t *, mblk_t *);
+ uint32_t arct_cmd;
+ int arct_min_len;
+ uint32_t arct_flags;
+ int arct_priv_req; /* Privilege required for this cmd */
+ const char *arct_txt;
+} arct_t;
+
static arct_t ar_cmd_tbl[] = {
{ ar_entry_add, AR_ENTRY_ADD, sizeof (area_t),
ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_ENTRY_ADD" },
@@ -327,10 +353,6 @@ static arct_t ar_cmd_tbl[] = {
ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_NP, "AR_ENTRY_QUERY" },
{ ar_entry_squery, AR_ENTRY_SQUERY, sizeof (area_t),
ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_NP, "AR_ENTRY_SQUERY" },
- { ar_xmit_request, AR_XMIT_REQUEST, sizeof (areq_t),
- ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_XMIT_REQUEST" },
- { ar_xmit_response, AR_XMIT_RESPONSE, sizeof (areq_t),
- ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_XMIT_RESPONSE" },
{ ar_mapping_add, AR_MAPPING_ADD, sizeof (arma_t),
ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_MAPPING_ADD" },
{ ar_interface_up, AR_INTERFACE_UP, sizeof (arc_t),
@@ -372,7 +394,7 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len,
if ((flags & ~ACE_EXTERNAL_FLAGS_MASK) || arl == NULL)
return (EINVAL);
if (flags & ACE_F_MYADDR)
- flags |= ACE_F_PUBLISH;
+ flags |= ACE_F_PUBLISH | ACE_F_AUTHORITY;
if (!hw_addr && hw_addr_len == 0) {
if (flags == ACE_F_PERMANENT) { /* Not publish */
@@ -398,6 +420,17 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len,
return (EINVAL);
if (!proto_extract_mask && (flags & ACE_F_MAPPING))
return (EINVAL);
+
+ /*
+ * If the underlying link doesn't have reliable up/down notification or
+ * if we're working with the IPv4 169.254.0.0/16 Link Local Address
+ * space, then don't use the fast timers. Otherwise, use them.
+ */
+ if (arl->arl_notifies &&
+ !(proto == IP_ARP_PROTO_TYPE && IS_IPV4_LL_SPACE(proto_addr))) {
+ flags |= ACE_F_FAST;
+ }
+
/*
* Allocate the timer block to hold the ace.
* (ace + proto_addr + proto_addr_mask + proto_extract_mask + hw_addr)
@@ -425,15 +458,15 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len,
* subnet structure, if, for example, there are BSD4.2 systems lurking.
*/
ace->ace_proto_mask = dst;
- if (proto_mask) {
+ if (proto_mask != NULL) {
bcopy(proto_mask, dst, proto_addr_len);
dst += proto_addr_len;
} else {
- while (proto_addr_len--)
+ while (proto_addr_len-- > 0)
*dst++ = (uchar_t)~0;
}
- if (proto_extract_mask) {
+ if (proto_extract_mask != NULL) {
ace->ace_proto_extract_mask = dst;
bcopy(proto_extract_mask, dst, ace->ace_proto_addr_length);
dst += ace->ace_proto_addr_length;
@@ -443,21 +476,22 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len,
ace->ace_hw_extract_start = hw_extract_start;
ace->ace_hw_addr_length = hw_addr_len;
ace->ace_hw_addr = dst;
- if (hw_addr) {
+ if (hw_addr != NULL) {
bcopy(hw_addr, dst, hw_addr_len);
dst += hw_addr_len;
}
ace->ace_arl = arl;
ace->ace_flags = flags;
- ace->ace_publish_count = arp_publish_count;
+
if (ar_mask_all_ones(ace->ace_proto_mask,
ace->ace_proto_addr_length)) {
acep = ar_ce_hash(ace->ace_proto, ace->ace_proto_addr,
ace->ace_proto_addr_length);
- } else
+ } else {
acep = &ar_ce_mask_entries;
- if ((ace->ace_next = *acep) != 0)
+ }
+ if ((ace->ace_next = *acep) != NULL)
ace->ace_next->ace_ptpn = &ace->ace_next;
*acep = ace;
ace->ace_ptpn = acep;
@@ -488,9 +522,9 @@ ar_ce_delete(ace_t *ace)
* that is going away.
*/
static void
-ar_ce_delete_per_arl(ace_t *ace, arl_t *arl)
+ar_ce_delete_per_arl(ace_t *ace, void *arl)
{
- if (ace != NULL && ace->ace_arl == arl) {
+ if (ace->ace_arl == arl) {
ace->ace_flags &= ~ACE_F_PERMANENT;
ar_ce_delete(ace);
}
@@ -498,9 +532,10 @@ ar_ce_delete_per_arl(ace_t *ace, arl_t *arl)
/* Cache entry hash routine, based on protocol and protocol address. */
static ace_t **
-ar_ce_hash(uint32_t proto, uchar_t *proto_addr, uint32_t proto_addr_length)
+ar_ce_hash(uint32_t proto, const uchar_t *proto_addr,
+ uint32_t proto_addr_length)
{
- uchar_t *up = proto_addr;
+ const uchar_t *up = proto_addr;
unsigned int hval = proto;
int len = proto_addr_length;
@@ -647,194 +682,170 @@ ar_ce_lookup_permanent(uint32_t proto, uchar_t *proto_addr,
}
/*
- * Pass a cache report back out via NDD.
- * TODO: Right now this report assumes IP proto address formatting.
- */
-/* ARGSUSED */
-static int
-ar_ce_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *cr)
-{
- (void) mi_mpprintf(mp,
- "ifname proto addr proto mask hardware addr flags");
- /* abcdefgh xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx xx:xx:xx:xx:xx:xx */
- ar_ce_walk((pfi_t)ar_ce_report1, mp);
- return (0);
-}
-
-/*
- * Add a single line to the ARP Cache Entry Report.
- * TODO: Right now this report assumes IP proto address formatting.
+ * ar_ce_resolve is called when a response comes in to an outstanding request.
+ * Returns 'true' if the address has changed and we need to tell the client.
+ * (We don't need to tell the client if there's still an outstanding query.)
*/
-static void
-ar_ce_report1(ace_t *ace, uchar_t *mp_arg)
+static boolean_t
+ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr, uint32_t hw_addr_length)
{
- static uchar_t zero_array[8];
- uint32_t flags = ace->ace_flags;
- mblk_t *mp = (mblk_t *)mp_arg;
- uchar_t *p = ace->ace_proto_addr;
- uchar_t *h = ace->ace_hw_addr;
- uchar_t *m = ace->ace_proto_mask;
- const char *name = "unknown";
-
- if (ace->ace_arl != NULL)
- name = ace->ace_arl->arl_name;
- if (p == NULL)
- p = zero_array;
- if (h == NULL)
- h = zero_array;
- if (m == NULL)
- m = zero_array;
- (void) mi_mpprintf(mp,
- "%8s %03d.%03d.%03d.%03d "
- "%03d.%03d.%03d.%03d %02x:%02x:%02x:%02x:%02x:%02x",
- name,
- p[0] & 0xFF, p[1] & 0xFF, p[2] & 0xFF, p[3] & 0xFF,
- m[0] & 0xFF, m[1] & 0xFF, m[2] & 0xFF, m[3] & 0xFF,
- h[0] & 0xFF, h[1] & 0xFF, h[2] & 0xFF, h[3] & 0xFF,
- h[4] & 0xFF, h[5] & 0xFF);
- if (flags & ACE_F_PERMANENT)
- (void) mi_mpprintf_nr(mp, " PERM");
- if (flags & ACE_F_PUBLISH)
- (void) mi_mpprintf_nr(mp, " PUBLISH");
- if (flags & ACE_F_DYING)
- (void) mi_mpprintf_nr(mp, " DYING");
- if (!(flags & ACE_F_RESOLVED))
- (void) mi_mpprintf_nr(mp, " UNRESOLVED");
- if (flags & ACE_F_MAPPING)
- (void) mi_mpprintf_nr(mp, " MAPPING");
- if (flags & ACE_F_MYADDR)
- (void) mi_mpprintf_nr(mp, " MYADDR");
-}
+ boolean_t hwchanged;
-/*
- * ar_ce_resolve is called when a response comes in to an outstanding
- * request.
- */
-static void
-ar_ce_resolve(ace_t *ace, uchar_t *hw_addr, uint32_t hw_addr_length)
-{
if (hw_addr_length == ace->ace_hw_addr_length) {
- if (ace->ace_hw_addr)
+ ASSERT(ace->ace_hw_addr != NULL);
+ hwchanged = bcmp(hw_addr, ace->ace_hw_addr,
+ hw_addr_length) != 0;
+ if (hwchanged)
bcopy(hw_addr, ace->ace_hw_addr, hw_addr_length);
/*
- * ar_query_reply() blows away soft entries.
- * Do not call it unless something is waiting.
+ * No need to bother with ar_query_reply if no queries are
+ * waiting.
*/
ace->ace_flags |= ACE_F_RESOLVED;
- if (ace->ace_query_mp)
+ if (ace->ace_query_mp != NULL)
ar_query_reply(ace, 0, NULL, (uint32_t)0);
+ else if (hwchanged)
+ return (B_TRUE);
}
+ return (B_FALSE);
}
/*
* There are 2 functions performed by this function.
* 1. Resolution of unresolved entries and update of resolved entries.
- * 2. Detection of hosts with (duplicate) our own IP address
+ * 2. Detection of nodes with our own IP address (duplicates).
+ *
+ * This is complicated by ill groups. We don't currently have knowledge of ill
+ * groups, so we can't distinguish between a packet that comes in on one of the
+ * arls that's part of the group versus one that's on an unrelated arl. Thus,
+ * we take a conservative approach. If the arls match, then we update resolved
+ * and unresolved entries alike. If they don't match, then we update only
+ * unresolved entries.
*
- * Resolution of unresolved entries and update of resolved entries.
+ * For all entries, we first check to see if this is a duplicate (probable
+ * loopback) message. If so, then just ignore it.
*
- * case A. The packet has been received on the same interface as this ace's
- * arl. We blindly call ar_ce_resolve(). The relevant checks for duplicate
- * detection (ACE_F_MYADDR) and trying to update published entries have
- * already happened in ar_rput(). Both resolved and unresolved entries are
- * updated now. This allows a published entry to be updated by an arp
- * request, from the node for which we are a proxy arp server, as for eg.
- * when a mobile node returns home.
+ * Next, check to see if the entry has completed DAD. If not, then we've
+ * failed, because someone is already using the address. Notify IP of the DAD
+ * failure and remove the broken ace.
*
- * case B. The interface on which the packet arrived does not match the
- * ace's arl. In this case we update only unresolved entries.
- * Look whether we have an unresolved entry for src_paddr and if so
- * resolve it. We need to look at all the aces that matches the
- * src_haddr because with ill groups we could have unresolved ace
- * across the whole group. As we don't have knowledge of groups,
- * look across all of them. Note that this logic does not update published
- * arp entries, as for eg. when we proxy arp across 2 subnets with
- * differing subnet masks.
+ * Next, we check if we're the authority for this address. If so, then it's
+ * time to defend it, because the other node is a duplicate. Report it as a
+ * 'bogon' and let IP decide how to defend.
*
- * Detection of hosts with (duplicate) our own IP address.
+ * Finally, if it's unresolved or if the arls match, we just update the MAC
+ * address. This allows a published 'static' entry to be updated by an ARP
+ * request from the node for which we're a proxy ARP server -- e.g., when a
+ * mobile node returns home. If the address has changed, then tell IP.
*
- * case A is handled in ar_rput(). case B is handled here. We return AR_BOGON,
- * if we detect duplicate, and caller will send BOGON message to IP.
- * If hme0 and hme1 are in a IPMP group. hme1 will receive broadcast arp
- * packets sent from hme0. Both IP address and Hardware address of the
- * packet match the ace. So we return AR_LOOPBACK.
+ * Note that this logic does not update published ARP entries for mismatched
+ * arls, as for example when we proxy arp across 2 subnets with differing
+ * subnet masks.
*
* Return Values below
*/
-#define AR_NORMAL 1 /* Usual return value. */
-#define AR_LOOPBACK 2 /* Our own broadcast arp packet was received */
-#define AR_BOGON 3 /* Another host has our IP addr. */
+#define AR_NOTFOUND 1 /* No matching ace found in cache */
+#define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */
+#define AR_LOOPBACK 3 /* Our own arp packet was received */
+#define AR_BOGON 4 /* Another host has our IP addr. */
+#define AR_FAILED 5 /* Duplicate Address Detection has failed */
+#define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */
static int
-ar_ce_resolve_all(arl_t *arl, uint32_t proto, uchar_t *src_haddr,
- uint32_t hlen, uchar_t *src_paddr, uint32_t plen)
+ar_ce_resolve_all(arl_t *arl, uint32_t proto, const uchar_t *src_haddr,
+ uint32_t hlen, const uchar_t *src_paddr, uint32_t plen)
{
ace_t *ace;
ace_t *ace_next;
+ int i1;
+ const uchar_t *paddr;
+ uchar_t *ace_addr;
+ uchar_t *mask;
+ int retv = AR_NOTFOUND;
ace = *ar_ce_hash(proto, src_paddr, plen);
for (; ace != NULL; ace = ace_next) {
+ /* ar_ce_resolve may delete the ace; fetch next pointer now */
ace_next = ace->ace_next;
- if (ace->ace_proto_addr_length == plen &&
- ace->ace_proto == proto) {
- int i1 = plen;
- uchar_t *ace_addr = ace->ace_proto_addr;
- uchar_t *mask = ace->ace_proto_mask;
+ if (ace->ace_proto_addr_length != plen ||
+ ace->ace_proto != proto) {
+ continue;
+ }
- /*
- * Note that the ace_proto_mask is applied to the
- * proto_addr before comparing to the ace_addr.
- */
- do {
- if (--i1 < 0) {
- /*
- * Limit updating across other
- * ills to unresolved entries only.
- * We don't want to inadvertently
- * update published entries or our
- * own entries.
- */
- if ((ace->ace_arl == arl) ||
- (!ACE_RESOLVED(ace))) {
- ar_ce_resolve(ace, src_haddr, hlen);
- } else {
- /*
- * If both IP addr and hardware
- * address match our's then this
- * is a broadcast packet emitted by
- * one of our interfaces, reflected
- * by the switch, and received on
- * another interface. We return
- * AR_LOOPBACK. If only IP addr.
- * matches our's then some other node
- * is using our IP addr, return
- * AR_BOGON.
- */
- if (ace->ace_flags & ACE_F_MYADDR) {
- if (bcmp(ace->ace_hw_addr,
- src_haddr,
- ace->ace_hw_addr_length) != 0) {
- return (AR_BOGON);
- } else {
- return (AR_LOOPBACK);
- }
-
- }
- }
+ /*
+ * Note that the ace_proto_mask is applied to the proto_addr
+ * before comparing to the ace_addr.
+ */
+ paddr = src_paddr;
+ i1 = plen;
+ ace_addr = ace->ace_proto_addr;
+ mask = ace->ace_proto_mask;
+ while (--i1 >= 0) {
+ if ((*paddr++ & *mask++) != *ace_addr++)
break;
- }
- } while ((src_paddr[i1] & mask[i1]) == ace_addr[i1]);
+ }
+ if (i1 >= 0)
+ continue;
+
+ /*
+ * If both IP addr and hardware address match what we already
+ * have, then this is a broadcast packet emitted by one of our
+ * interfaces, reflected by the switch and received on another
+ * interface. We return AR_LOOPBACK.
+ */
+ if ((ace->ace_flags & ACE_F_MYADDR) &&
+ hlen == ace->ace_hw_addr_length &&
+ bcmp(ace->ace_hw_addr, src_haddr,
+ ace->ace_hw_addr_length) == 0) {
+ return (AR_LOOPBACK);
+ }
+
+ /*
+ * If the entry is unverified, then we've just verified that
+ * someone else already owns this address, because this is a
+ * message with the same protocol address but different
+ * hardware address.
+ */
+ if (ace->ace_flags & ACE_F_UNVERIFIED) {
+ ar_ce_delete(ace);
+ return (AR_FAILED);
+ }
+
+ /*
+ * If the IP address matches ours and we're authoritative for
+ * this entry, then some other node is using our IP addr, so
+ * return AR_BOGON. Also reset the transmit count to zero so
+ * that, if we're currently in initial announcement mode, we
+ * switch back to the lazier defense mode. Knowing that
+ * there's at least one duplicate out there, we ought not
+ * blindly announce.
+ */
+ if (ace->ace_flags & ACE_F_AUTHORITY) {
+ ace->ace_xmit_count = 0;
+ return (AR_BOGON);
+ }
+
+ /*
+ * Limit updating across other ills to unresolved
+ * entries only. We don't want to inadvertently update
+ * published entries.
+ */
+ if (ace->ace_arl == arl || !ACE_RESOLVED(ace)) {
+ if (ar_ce_resolve(ace, src_haddr, hlen))
+ retv = AR_CHANGED;
+ else if (retv == AR_NOTFOUND)
+ retv = AR_MERGED;
}
}
- return (AR_NORMAL);
+ return (retv);
}
/* Pass arg1 to the pfi supplied, along with each ace in existence. */
static void
-ar_ce_walk(pfi_t pfi, void *arg1)
+ar_ce_walk(void (*pfi)(ace_t *, void *), void *arg1)
{
ace_t *ace;
ace_t *ace1;
@@ -870,7 +881,7 @@ ar_cleanup(void)
* DEV (i.e. ARL).
*/
static void
-ar_client_notify(arl_t *arl, mblk_t *mp, int code)
+ar_client_notify(const arl_t *arl, mblk_t *mp, int code)
{
ar_t *ar = ((ar_t *)arl->arl_rq->q_ptr)->ar_arl_ip_assoc;
arcn_t *arcn;
@@ -904,6 +915,39 @@ ar_client_notify(arl_t *arl, mblk_t *mp, int code)
putnext(ar->ar_wq, mp1);
}
+/*
+ * Send a delete-notify message down to IP. We've determined that IP doesn't
+ * have a cache entry for the IP address itself, but it may have other cache
+ * entries with the same hardware address, and we don't want to see those grow
+ * stale. (The alternative is sending down updates for every ARP message we
+ * get that doesn't match an existing ace. That's much more expensive than an
+ * occasional delete and reload.)
+ */
+static void
+ar_delete_notify(const ace_t *ace)
+{
+ const arl_t *arl = ace->ace_arl;
+ mblk_t *mp;
+ size_t len;
+ arh_t *arh;
+
+ len = sizeof (*arh) + 2 * ace->ace_proto_addr_length;
+ mp = allocb(len, BPRI_MED);
+ if (mp == NULL)
+ return;
+ arh = (arh_t *)mp->b_rptr;
+ mp->b_wptr = (uchar_t *)arh + len;
+ U16_TO_BE16(arl->arl_arp_hw_type, arh->arh_hardware);
+ U16_TO_BE16(ace->ace_proto, arh->arh_proto);
+ arh->arh_hlen = 0;
+ arh->arh_plen = ace->ace_proto_addr_length;
+ U16_TO_BE16(ARP_RESPONSE, arh->arh_operation);
+ bcopy(ace->ace_proto_addr, arh + 1, ace->ace_proto_addr_length);
+ bcopy(ace->ace_proto_addr, (uchar_t *)(arh + 1) +
+ ace->ace_proto_addr_length, ace->ace_proto_addr_length);
+ ar_client_notify(arl, mp, AR_CN_ANNOUNCE);
+}
+
/* ARP module close routine. */
static int
ar_close(queue_t *q)
@@ -926,7 +970,7 @@ ar_close(queue_t *q)
* an ack. This helps to make sure that messages
* that are currently being sent up by IP are not lost.
*/
- if (MODULE_BELOW_IS_IP(q)) {
+ if (ar->ar_on_ill_stream) {
mp1 = allocb(sizeof (arc_t), BPRI_MED);
if (mp1 != NULL) {
DB_TYPE(mp1) = M_CTL;
@@ -963,7 +1007,7 @@ ar_close(queue_t *q)
* If this is the control stream for an arl, delete anything
* hanging off our arl.
*/
- ar_ce_walk((pfi_t)ar_ce_delete_per_arl, arl);
+ ar_ce_walk(ar_ce_delete_per_arl, arl);
/* Free any messages waiting for a bind_ack */
/* Get the arl out of the chain. */
for (arlp = &arl_g_head; arlp[0]; arlp = &arlp[0]->arl_next) {
@@ -984,21 +1028,6 @@ ar_close(queue_t *q)
ar->ar_arl_ip_assoc->ar_arl_ip_assoc = NULL;
ar->ar_arl_ip_assoc = NULL;
}
- if (WR(q) == ar_timer_queue) {
- /* We were using this one for the garbage collection timer. */
- for (arl = arl_g_head; arl; arl = arl->arl_next)
- if (arl->arl_rq != q)
- break;
- if (arl) {
- ar_timer_queue = arl->arl_wq;
- /* Ask mi_timer to switch to the new queue. */
- mi_timer(ar_timer_queue, ar_timer_mp, -2);
- } else {
- mi_timer_free(ar_timer_mp);
- ar_timer_mp = NULL;
- ar_timer_queue = NULL;
- }
- }
cr = ar->ar_credp;
/* mi_close_comm frees the instance data. */
(void) mi_close_comm(&ar_g_head, q);
@@ -1067,7 +1096,8 @@ ar_cmd_dispatch(queue_t *q, mblk_t *mp_orig)
if (arct->arct_flags & ARF_IOCTL_AWARE)
mp = mp_orig;
- arp2dbg(("ar_cmd_dispatch: %s\n", arct->arct_txt));
+ DTRACE_PROBE3(cmd_dispatch, queue_t *, q, mblk_t *, mp,
+ arct_t *, arct);
return (*arct->arct_pfi)(q, mp);
}
@@ -1104,31 +1134,25 @@ ar_dlpi_comm(t_uscalar_t prim, size_t size)
static void
ar_dlpi_send(arl_t *arl, mblk_t *mp)
{
- mblk_t **mpp;
- union DL_primitives *dlp;
-
ASSERT(arl != NULL);
-
ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
- dlp = (union DL_primitives *)mp->b_rptr;
if (arl->arl_dlpi_pending != DL_PRIM_INVAL) {
+ mblk_t **mpp;
+
/* Must queue message. Tail insertion */
mpp = &arl->arl_dlpi_deferred;
while (*mpp != NULL)
mpp = &((*mpp)->b_next);
-
- arp1dbg(("ar_dlpi_send: deferring DLPI message arl %p %x\n",
- (void *)arl, dlp->dl_primitive));
-
*mpp = mp;
+
+ DTRACE_PROBE2(dlpi_defer, arl_t *, arl, mblk_t *, mp);
return;
}
- arp1dbg(("ar_dlpi_send: sending DLPI message arl %p %x\n", (void *)arl,
- dlp->dl_primitive));
-
- arl->arl_dlpi_pending = dlp->dl_primitive;
+ arl->arl_dlpi_pending =
+ ((union DL_primitives *)mp->b_rptr)->dl_primitive;
+ DTRACE_PROBE2(dlpi_send, arl_t *, arl, mblk_t *, mp);
putnext(arl->arl_wq, mp);
}
@@ -1141,16 +1165,16 @@ ar_dlpi_send(arl_t *arl, mblk_t *mp)
static void
ar_dlpi_done(arl_t *arl, t_uscalar_t prim)
{
- mblk_t *mp;
- union DL_primitives *dlp;
+ mblk_t *mp;
if (arl->arl_dlpi_pending != prim) {
- arp0dbg(("ar_dlpi_done: spurious response arl %p\n",
- (void *)arl));
+ DTRACE_PROBE2(dlpi_done_unexpected, arl_t *, arl,
+ t_uscalar_t, prim);
return;
}
if ((mp = arl->arl_dlpi_deferred) == NULL) {
+ DTRACE_PROBE2(dlpi_done_idle, arl_t *, arl, t_uscalar_t, prim);
arl->arl_dlpi_pending = DL_PRIM_INVAL;
ar_cmd_done(arl);
return;
@@ -1160,12 +1184,10 @@ ar_dlpi_done(arl_t *arl, t_uscalar_t prim)
mp->b_next = NULL;
ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
- dlp = (union DL_primitives *)mp->b_rptr;
- arp1dbg(("ar_dlpi_done: sending DLPI message arl %p %x\n",
- (void *)arl, dlp->dl_primitive));
-
- arl->arl_dlpi_pending = dlp->dl_primitive;
+ arl->arl_dlpi_pending =
+ ((union DL_primitives *)mp->b_rptr)->dl_primitive;
+ DTRACE_PROBE2(dlpi_done_next, arl_t *, arl, mblk_t *, mp);
putnext(arl->arl_wq, mp);
}
@@ -1268,8 +1290,8 @@ ar_cmd_done(arl_t *arl)
done:
if (dlpi_op_done_mp != NULL) {
- arp1dbg(("ar_dlpi_done: ardlpiopdone arl %p to q %p err %d\n",
- (void *)arl, (void *)dlpi_op_done_q, err));
+ DTRACE_PROBE3(cmd_done_next, arl_t *, arl,
+ queue_t *, dlpi_op_done_q, mblk_t *, dlpi_op_done_mp);
putnext(dlpi_op_done_q, dlpi_op_done_mp);
}
}
@@ -1295,9 +1317,6 @@ static void
ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q, ushort_t cmd,
boolean_t tail_insert)
{
- arp1dbg(("ar_cmd_enqueue: arl %p from q %p cmd %d \n", (void *)arl,
- (void *)q, cmd));
-
mp->b_queue = q;
if (arl->arl_queue == NULL) {
ASSERT(arl->arl_queue_tail == NULL);
@@ -1336,6 +1355,38 @@ ar_cmd_dequeue(arl_t *arl)
}
/*
+ * Standard ACE timer handling: compute 'fuzz' around a central value or from 0
+ * up to a value, and then set the timer. The randomization is necessary to
+ * prevent groups of systems from falling into synchronization on the network
+ * and producing ARP packet storms.
+ */
+static void
+ace_set_timer(ace_t *ace, boolean_t initial_time)
+{
+ clock_t intv, rnd, frac;
+
+ (void) random_get_pseudo_bytes((uint8_t *)&rnd, sizeof (rnd));
+ /* Note that clock_t is signed; must chop off bits */
+ rnd &= (1ul << (NBBY * sizeof (rnd) - 1)) - 1;
+ intv = ace->ace_xmit_interval;
+ if (initial_time) {
+ /* Set intv to be anywhere in the [1 .. intv] range */
+ if (intv <= 0)
+ intv = 1;
+ else
+ intv = (rnd % intv) + 1;
+ } else {
+ /* Compute 'frac' as 20% of the configured interval */
+ if ((frac = intv / 5) <= 1)
+ frac = 2;
+ /* Set intv randomly in the range [intv-frac .. intv+frac] */
+ if ((intv = intv - frac + rnd % (2 * frac + 1)) <= 0)
+ intv = 1;
+ }
+ mi_timer(ace->ace_arl->arl_wq, ace->ace_mp, intv);
+}
+
+/*
* Process entry add requests from external messages.
* It is also called by ip_rput_dlpi_writer() through
* ipif_resolver_up() to change hardware address when
@@ -1355,6 +1406,8 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig)
arl_t *arl;
mblk_t *mp = mp_orig;
int err;
+ uint_t aflags;
+ boolean_t unverified;
/* We handle both M_IOCTL and M_PROTO messages. */
if (DB_TYPE(mp) == M_IOCTL)
@@ -1366,16 +1419,32 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig)
* Newly received commands from clients go to the tail of the queue.
*/
if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- arp1dbg(("ar_entry_add: enqueue cmd on q %p \n", (void *)q));
+ DTRACE_PROBE3(eadd_enqueued, queue_t *, q, mblk_t *, mp_orig,
+ arl_t *, arl);
ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_ADD, B_TRUE);
return (EINPROGRESS);
}
mp_orig->b_prev = NULL;
area = (area_t *)mp->b_rptr;
- /* If this is a replacement, ditch the original. */
- if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_entry)) != 0)
+ aflags = area->area_flags;
+
+ /*
+ * If this is a replacement, ditch the original, but remember the
+ * duplicate address detection state. If it's a new entry, then we're
+ * obligated to do duplicate address detection now.
+ */
+ if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_entry)) != NULL) {
+ unverified = (ace->ace_flags & ACE_F_UNVERIFIED) != 0;
ar_ce_delete(ace);
+ } else {
+ unverified = (aflags & ACE_F_PUBLISH) != 0;
+ }
+
+ /* Allow client to request DAD restart */
+ if (aflags & ACE_F_UNVERIFIED)
+ unverified = B_TRUE;
+
/* Extract parameters from the message. */
hw_addr_len = area->area_hw_addr_length;
hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len);
@@ -1384,29 +1453,31 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig)
proto_addr_len);
proto_mask = mi_offset_paramc(mp, area->area_proto_mask_offset,
proto_addr_len);
- if (!proto_mask)
+ if (proto_mask == NULL) {
+ DTRACE_PROBE2(eadd_bad_mask, arl_t *, arl, area_t *, area);
return (EINVAL);
+ }
err = ar_ce_create(
arl,
- area->area_proto,
- hw_addr,
- hw_addr_len,
- proto_addr,
- proto_addr_len,
- proto_mask,
- NULL,
- (uint32_t)0,
- area->area_flags & ~ACE_F_MAPPING);
- if (err)
+ area->area_proto,
+ hw_addr,
+ hw_addr_len,
+ proto_addr,
+ proto_addr_len,
+ proto_mask,
+ NULL,
+ (uint32_t)0,
+ aflags & ~ACE_F_MAPPING & ~ACE_F_UNVERIFIED & ~ACE_F_DEFEND);
+ if (err != 0) {
+ DTRACE_PROBE3(eadd_create_failed, arl_t *, arl, area_t *, area,
+ int, err);
return (err);
- if (area->area_flags & ACE_F_PUBLISH) {
- /*
- * Transmit an arp request for this address to flush stale
- * information froma arp caches.
- */
+ }
+
+ if (aflags & ACE_F_PUBLISH) {
if (hw_addr == NULL || hw_addr_len == 0) {
hw_addr = arl->arl_hw_addr;
- } else if (area->area_flags & ACE_F_MYADDR) {
+ } else if (aflags & ACE_F_MYADDR) {
/*
* If hardware address changes, then make sure
* that the hardware address and hardware
@@ -1422,23 +1493,79 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig)
ace = ar_ce_lookup(arl, area->area_proto, proto_addr,
proto_addr_len);
ASSERT(ace != NULL);
- ar_xmit(arl, ARP_REQUEST, area->area_proto, proto_addr_len,
- hw_addr, proto_addr, arl->arl_arp_addr,
- proto_addr);
+
+ if (ace->ace_flags & ACE_F_FAST) {
+ ace->ace_xmit_count = arp_fastprobe_count;
+ ace->ace_xmit_interval = arp_fastprobe_delay;
+ } else {
+ ace->ace_xmit_count = arp_probe_count;
+ ace->ace_xmit_interval = arp_probe_delay;
+ }
+
+ /*
+ * If the user has disabled duplicate address detection for
+ * this kind of interface (fast or slow) by setting the probe
+ * count to zero, then pretend as if we've verified the
+ * address, and go right to address defense mode.
+ */
+ if (ace->ace_xmit_count == 0)
+ unverified = B_FALSE;
/*
- * If MYADDR is set - it is not a proxy arp entry. In that
- * case we send more than one copy, so that if this is
- * a case of failover, we send out multiple entries in case
- * the switch is very slow.
+ * If we need to do duplicate address detection, then kick that
+ * off. Otherwise, send out a gratuitous ARP message in order
+ * to update everyone's caches with the new hardware address.
*/
- if ((area->area_flags & ACE_F_MYADDR) &&
- ace->ace_publish_count != 0 && arp_publish_interval != 0) {
- /* Account for the xmit we just did */
- ace->ace_publish_count--;
- if (ace->ace_publish_count != 0) {
- mi_timer(arl->arl_wq, ace->ace_mp,
- arp_publish_interval);
+ if (unverified) {
+ ace->ace_flags |= ACE_F_UNVERIFIED;
+ if (ace->ace_xmit_interval == 0) {
+ /*
+ * User has configured us to send the first
+ * probe right away. Do so, and set up for
+ * the subsequent probes.
+ */
+ DTRACE_PROBE2(eadd_probe, ace_t *, ace,
+ area_t *, area);
+ ar_xmit(arl, ARP_REQUEST, area->area_proto,
+ proto_addr_len, hw_addr, NULL, NULL,
+ proto_addr, NULL);
+ ace->ace_xmit_count--;
+ ace->ace_xmit_interval =
+ (ace->ace_flags & ACE_F_FAST) ?
+ arp_fastprobe_interval :
+ arp_probe_interval;
+ ace_set_timer(ace, B_FALSE);
+ } else {
+ DTRACE_PROBE2(eadd_delay, ace_t *, ace,
+ area_t *, area);
+ /* Regular delay before initial probe */
+ ace_set_timer(ace, B_TRUE);
+ }
+ } else {
+ DTRACE_PROBE2(eadd_announce, ace_t *, ace,
+ area_t *, area);
+ ar_xmit(arl, ARP_REQUEST, area->area_proto,
+ proto_addr_len, hw_addr, proto_addr,
+ arl->arl_arp_addr, proto_addr, NULL);
+ ace->ace_last_bcast = ddi_get_lbolt();
+
+ /*
+ * If AUTHORITY is set, it is not just a proxy arp
+ * entry; we believe we're the authority for this
+ * entry. In that case, and if we're not just doing
+ * one-off defense of the address, we send more than
+ * one copy, so that if this is an IPMP failover, we'll
+ * still have a good chance of updating everyone even
+ * when there's a packet loss or two.
+ */
+ if ((aflags & ACE_F_AUTHORITY) &&
+ !(aflags & ACE_F_DEFEND) &&
+ arp_publish_count > 0) {
+ /* Account for the xmit we just did */
+ ace->ace_xmit_count = arp_publish_count - 1;
+ ace->ace_xmit_interval = arp_publish_interval;
+ if (ace->ace_xmit_count > 0)
+ ace_set_timer(ace, B_FALSE);
}
}
}
@@ -1463,7 +1590,8 @@ ar_entry_delete(queue_t *q, mblk_t *mp_orig)
* Newly received commands from clients go to the tail of the queue.
*/
if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- arp1dbg(("ar_entry_delete: enqueue on q %p\n", (void *)q));
+ DTRACE_PROBE3(edel_enqueued, queue_t *, q, mblk_t *, mp_orig,
+ arl_t *, arl);
ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_DELETE, B_TRUE);
return (EINPROGRESS);
}
@@ -1474,7 +1602,13 @@ ar_entry_delete(queue_t *q, mblk_t *mp_orig)
* match first.
*/
ace = ar_ce_lookup_from_area(mp, ar_ce_lookup);
- if (ace) {
+ if (ace != NULL) {
+ /*
+ * If it's a permanent entry, then the client is the one who
+ * told us to delete it, so there's no reason to notify.
+ */
+ if (ACE_NONPERM(ace))
+ ar_delete_notify(ace);
ar_ce_delete(ace);
return (0);
}
@@ -1511,6 +1645,7 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
}
arl = ar_ll_lookup_from_mp(mp);
if (arl == NULL) {
+ DTRACE_PROBE2(query_no_arl, queue_t *, q, mblk_t *, mp);
err = EINVAL;
goto err_ret;
}
@@ -1518,7 +1653,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
* Newly received commands from clients go to the tail of the queue.
*/
if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- arp1dbg(("ar_entry_query: enqueue on q %p\n", (void *)q));
+ DTRACE_PROBE3(query_enqueued, queue_t *, q, mblk_t *, mp_orig,
+ arl_t *, arl);
ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_QUERY, B_TRUE);
return (EINPROGRESS);
}
@@ -1528,7 +1664,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
proto_addr_len = areq->areq_target_addr_length;
proto_addr = mi_offset_paramc(mp, areq->areq_target_addr_offset,
proto_addr_len);
- if (proto_addr == 0) {
+ if (proto_addr == NULL) {
+ DTRACE_PROBE1(query_illegal_address, areq_t *, areq);
err = EINVAL;
goto err_ret;
}
@@ -1538,9 +1675,22 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
if (areq->areq_xmit_interval == 0)
areq->areq_xmit_interval = AR_DEF_XMIT_INTERVAL;
ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr, proto_addr_len);
- if (ace) {
+ if (ace != NULL && (ace->ace_flags & ACE_F_OLD)) {
+ /*
+ * This is a potentially stale entry that IP's asking about.
+ * Since IP is asking, it must not have an answer anymore,
+ * either due to periodic ARP flush or due to SO_DONTROUTE.
+ * Rather than go forward with what we've got, restart
+ * resolution.
+ */
+ DTRACE_PROBE2(query_stale_ace, ace_t *, ace, areq_t *, areq);
+ ar_ce_delete(ace);
+ ace = NULL;
+ }
+ if (ace != NULL) {
mblk_t **mpp;
uint32_t count = 0;
+
/*
* There is already a cache entry. This means there is either
* a permanent entry, or address resolution is in progress.
@@ -1550,6 +1700,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
*/
for (mpp = &ace->ace_query_mp; mpp[0]; mpp = &mpp[0]->b_next) {
if (++count > areq->areq_max_buffered) {
+ DTRACE_PROBE2(query_overflow, ace_t *, ace,
+ areq_t *, areq);
mp->b_prev = NULL;
err = EALREADY;
goto err_ret;
@@ -1562,6 +1714,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
* If a query was already queued up, then we must not
* have an answer yet.
*/
+ DTRACE_PROBE2(query_in_progress, ace_t *, ace,
+ areq_t *, areq);
return (EINPROGRESS);
}
if (ACE_RESOLVED(ace)) {
@@ -1572,6 +1726,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
*/
mblk_t *mp1;
+ DTRACE_PROBE2(query_resolved, ace_t *, ace,
+ areq_t *, areq);
mp1 = dupmsg(mp);
ar_query_reply(ace, 0, proto_addr, proto_addr_len);
freemsg(mp1);
@@ -1579,22 +1735,28 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
}
if (ace->ace_flags & ACE_F_MAPPING) {
/* Should never happen */
- arp0dbg(("ar_entry_query: unresolved mapping\n"));
+ DTRACE_PROBE2(query_unresolved_mapping, ace_t *, ace,
+ areq_t *, areq);
mpp[0] = mp->b_next;
err = ENXIO;
goto err_ret;
}
if (arl->arl_xmit_template == NULL) {
/* Can't get help if we don't know how. */
+ DTRACE_PROBE2(query_no_template, ace_t *, ace,
+ areq_t *, areq);
mpp[0] = NULL;
mp->b_prev = NULL;
err = ENXIO;
goto err_ret;
}
+ DTRACE_PROBE2(query_unresolved, ace_t, ace, areq_t *, areq);
} else {
/* No ace yet. Make one now. (This is the common case.) */
if (areq->areq_xmit_count == 0 ||
arl->arl_xmit_template == NULL) {
+ DTRACE_PROBE2(query_template, arl_t *, arl,
+ areq_t *, areq);
mp->b_prev = NULL;
err = ENXIO;
goto err_ret;
@@ -1607,6 +1769,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
areq->areq_sender_addr_offset,
areq->areq_sender_addr_length);
if (sender_addr == NULL) {
+ DTRACE_PROBE2(query_no_sender, arl_t *, arl,
+ areq_t *, areq);
mp->b_prev = NULL;
err = EINVAL;
goto err_ret;
@@ -1615,14 +1779,18 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
proto_addr, proto_addr_len, NULL,
NULL, (uint32_t)0,
areq->areq_flags);
- if (err) {
+ if (err != 0) {
+ DTRACE_PROBE3(query_create_failed, arl_t *, arl,
+ areq_t *, areq, int, err);
mp->b_prev = NULL;
goto err_ret;
}
ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr,
proto_addr_len);
- if (!ace || ace->ace_query_mp) {
+ if (ace == NULL || ace->ace_query_mp != NULL) {
/* Shouldn't happen! */
+ DTRACE_PROBE3(query_lookup_failed, arl_t *, arl,
+ areq_t *, areq, ace_t *, ace);
mp->b_prev = NULL;
err = ENXIO;
goto err_ret;
@@ -1637,10 +1805,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
src_ace = ar_ce_lookup_permanent(areq->areq_proto, sender_addr,
areq->areq_sender_addr_length);
if (src_ace == NULL) {
- printf("ar_entry_query: Could not find the ace for "
- "source address %d.%d.%d.%d\n",
- sender_addr[0], sender_addr[1], sender_addr[2],
- sender_addr[3]);
+ DTRACE_PROBE3(query_source_missing, arl_t *, arl,
+ areq_t *, areq, ace_t *, ace);
ar_query_reply(ace, ENXIO, NULL, (uint32_t)0);
/*
* ar_query_reply has already freed the mp.
@@ -1659,7 +1825,9 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig)
areq->areq_proto, proto_addr, proto_addr_len);
if (dst_ace != NULL && ACE_RESOLVED(dst_ace)) {
- ar_ce_resolve(ace, dst_ace->ace_hw_addr,
+ DTRACE_PROBE3(query_other_arl, arl_t *, arl,
+ areq_t *, areq, ace_t *, dst_ace);
+ (void) ar_ce_resolve(ace, dst_ace->ace_hw_addr,
dst_ace->ace_hw_addr_length);
return (EINPROGRESS);
}
@@ -1701,7 +1869,8 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig)
* Newly received commands from clients go to the tail of the queue.
*/
if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- arp1dbg(("ar_entry_squery: enqueue on q %p\n", (void *)q));
+ DTRACE_PROBE3(squery_enqueued, queue_t *, q, mblk_t *, mp_orig,
+ arl_t *, arl);
ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_SQUERY, B_TRUE);
return (EINPROGRESS);
}
@@ -1714,13 +1883,17 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig)
proto_addr_len);
hw_addr_len = area->area_hw_addr_length;
hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len);
- if (!proto_addr || !hw_addr)
+ if (proto_addr == NULL || hw_addr == NULL) {
+ DTRACE_PROBE1(squery_illegal_address, area_t *, area);
return (EINVAL);
+ }
ace = ar_ce_lookup(arl, area->area_proto, proto_addr, proto_addr_len);
- if (!ace)
+ if (ace == NULL) {
return (ENXIO);
- if (hw_addr_len < ace->ace_hw_addr_length)
+ }
+ if (hw_addr_len < ace->ace_hw_addr_length) {
return (EINVAL);
+ }
if (ACE_RESOLVED(ace)) {
/* Got it, prepare the response. */
ASSERT(area->area_hw_addr_length == ace->ace_hw_addr_length);
@@ -1736,8 +1909,9 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig)
if (mp == mp_orig) {
/* Non-ioctl case */
/* TODO: change message type? */
- arp1dbg(("ar_entry_squery: qreply\n"));
DB_TYPE(mp) = M_CTL; /* Caught by ip_wput */
+ DTRACE_PROBE3(squery_reply, queue_t *, q, mblk_t *, mp,
+ arl_t *, arl);
qreply(q, mp);
return (EINPROGRESS);
}
@@ -1751,10 +1925,9 @@ ar_interface_down(queue_t *q, mblk_t *mp)
{
arl_t *arl;
- arp1dbg(("ar_interface_down q %p\n", (void *)q));
arl = ar_ll_lookup_from_mp(mp);
- if ((arl == NULL) || (arl->arl_closing)) {
- arp1dbg(("ar_interface_down: no arl q %p \n", (void *)q));
+ if (arl == NULL || arl->arl_closing) {
+ DTRACE_PROBE2(down_no_arl, queue_t *, q, mblk_t *, mp);
return (EINVAL);
}
@@ -1762,6 +1935,8 @@ ar_interface_down(queue_t *q, mblk_t *mp)
* Newly received commands from clients go to the tail of the queue.
*/
if (CMD_NEEDS_QUEUEING(mp, arl)) {
+ DTRACE_PROBE3(down_enqueued, queue_t *, q, mblk_t *, mp,
+ arl_t *, arl);
ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_DOWN, B_TRUE);
return (EINPROGRESS);
}
@@ -1784,7 +1959,7 @@ ar_interface_down(queue_t *q, mblk_t *mp)
ASSERT(arl->arl_state == ARL_S_UP);
/* Free all arp entries for this interface */
- ar_ce_walk((pfi_t)ar_ce_delete_per_arl, arl);
+ ar_ce_walk(ar_ce_delete_per_arl, arl);
ar_ll_down(arl);
/* Return EINPROGRESS so that ar_rput does not free the 'mp' */
@@ -1801,10 +1976,9 @@ ar_interface_up(queue_t *q, mblk_t *mp)
int err;
mblk_t *mp1;
- arp1dbg(("ar_interface_up q %p\n", (void *)q));
arl = ar_ll_lookup_from_mp(mp);
- if ((arl == NULL) || (arl->arl_closing)) {
- arp1dbg(("ar_interface_up: no arl %p\n", (void *)q));
+ if (arl == NULL || arl->arl_closing) {
+ DTRACE_PROBE2(up_no_arl, queue_t *, q, mblk_t *, mp);
err = EINVAL;
goto done;
}
@@ -1813,6 +1987,8 @@ ar_interface_up(queue_t *q, mblk_t *mp)
* Newly received commands from clients go to the tail of the queue.
*/
if (CMD_NEEDS_QUEUEING(mp, arl)) {
+ DTRACE_PROBE3(up_enqueued, queue_t *, q, mblk_t *, mp,
+ arl_t *, arl);
ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_UP, B_TRUE);
return (EINPROGRESS);
}
@@ -1843,9 +2019,10 @@ done:
mp1 = ar_alloc(AR_DLPIOP_DONE, err);
if (mp1 != NULL) {
- arp1dbg(("ar_interface_up: send resp err %d q %p\n",
- err, (void *)q));
- putnext(WR(q), mp1);
+ q = WR(q);
+ DTRACE_PROBE3(up_send_err, queue_t *, q, mblk_t *, mp1,
+ int, err);
+ putnext(q, mp1);
}
return (err);
}
@@ -1860,13 +2037,13 @@ ar_interface_on(queue_t *q, mblk_t *mp)
{
arl_t *arl;
- arp1dbg(("ar_interface_on\n"));
arl = ar_ll_lookup_from_mp(mp);
if (arl == NULL) {
- arp1dbg(("ar_interface_on: no arl\n"));
+ DTRACE_PROBE2(on_no_arl, queue_t *, q, mblk_t *, mp);
return (EINVAL);
}
/* Turn off the IFF_NOARP flag and activate ARP */
+ DTRACE_PROBE3(on_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl);
arl->arl_flags = 0;
return (0);
}
@@ -1881,13 +2058,13 @@ ar_interface_off(queue_t *q, mblk_t *mp)
{
arl_t *arl;
- arp1dbg(("ar_interface_off\n"));
arl = ar_ll_lookup_from_mp(mp);
if (arl == NULL) {
- arp1dbg(("ar_interface_off: no arl\n"));
+ DTRACE_PROBE2(off_no_arl, queue_t *, q, mblk_t *, mp);
return (EINVAL);
}
/* Turn on the IFF_NOARP flag and deactivate ARP */
+ DTRACE_PROBE3(off_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl);
arl->arl_flags = ARL_F_NOARP;
return (0);
}
@@ -1978,6 +2155,7 @@ ar_ll_init(ar_t *ar, mblk_t *mp)
arl->arl_wq = ar->ar_wq;
arl->arl_dlpi_pending = DL_PRIM_INVAL;
+ arl->arl_link_up = B_TRUE;
ar->ar_arl = arl;
}
@@ -2127,8 +2305,6 @@ ar_ll_down(arl_t *arl)
mblk_t *mp;
ar_t *ar;
- arp1dbg(("ar_ll_down arl %p\n", (void *)arl));
-
ASSERT(arl->arl_state == ARL_S_UP);
/* Let's break the association between an ARL and IP instance */
@@ -2163,8 +2339,7 @@ ar_ll_up(arl_t *arl)
mblk_t *detach_mp = NULL;
mblk_t *unbind_mp = NULL;
mblk_t *info_mp = NULL;
-
- arp1dbg(("ar_ll_up arl %p \n", (void *)arl));
+ mblk_t *notify_mp = NULL;
ASSERT(arl->arl_state == ARL_S_DOWN);
@@ -2197,6 +2372,12 @@ ar_ll_up(arl_t *arl)
if (unbind_mp == NULL)
goto bad;
+ notify_mp = ar_dlpi_comm(DL_NOTIFY_REQ, sizeof (dl_notify_req_t));
+ if (notify_mp == NULL)
+ goto bad;
+ ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications =
+ DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN;
+
arl->arl_state = ARL_S_PENDING;
if (arl->arl_provider_style == DL_STYLE2) {
ar_dlpi_send(arl, attach_mp);
@@ -2206,18 +2387,16 @@ ar_ll_up(arl_t *arl)
ar_dlpi_send(arl, info_mp);
ar_dlpi_send(arl, bind_mp);
arl->arl_unbind_mp = unbind_mp;
+ ar_dlpi_send(arl, notify_mp);
return (0);
+
bad:
- if (attach_mp != NULL)
- freemsg(attach_mp);
- if (bind_mp != NULL)
- freemsg(bind_mp);
- if (detach_mp != NULL)
- freemsg(detach_mp);
- if (unbind_mp != NULL)
- freemsg(unbind_mp);
- if (info_mp != NULL)
- freemsg(info_mp);
+ freemsg(attach_mp);
+ freemsg(bind_mp);
+ freemsg(detach_mp);
+ freemsg(unbind_mp);
+ freemsg(info_mp);
+ freemsg(notify_mp);
return (ENOMEM);
}
@@ -2237,7 +2416,6 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig)
uint32_t hw_extract_start;
arl_t *arl;
- arp1dbg(("ar_mapping_add\n"));
/* We handle both M_IOCTL and M_PROTO messages. */
if (DB_TYPE(mp) == M_IOCTL)
mp = mp->b_cont;
@@ -2248,14 +2426,15 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig)
* Newly received commands from clients go to the tail of the queue.
*/
if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- arp1dbg(("ar_mapping_add: enqueue on %p q\n", (void *)q));
+ DTRACE_PROBE3(madd_enqueued, queue_t *, q, mblk_t *, mp_orig,
+ arl_t *, arl);
ar_cmd_enqueue(arl, mp_orig, q, AR_MAPPING_ADD, B_TRUE);
return (EINPROGRESS);
}
mp_orig->b_prev = NULL;
arma = (arma_t *)mp->b_rptr;
- if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_mapping)) != 0)
+ if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_mapping)) != NULL)
ar_ce_delete(ace);
hw_addr_len = arma->arma_hw_addr_length;
hw_addr = mi_offset_paramc(mp, arma->arma_hw_addr_offset, hw_addr_len);
@@ -2267,8 +2446,8 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig)
proto_extract_mask = mi_offset_paramc(mp,
arma->arma_proto_extract_mask_offset, proto_addr_len);
hw_extract_start = arma->arma_hw_mapping_start;
- if (!proto_mask || !proto_extract_mask) {
- arp0dbg(("ar_mapping_add: not masks\n"));
+ if (proto_mask == NULL || proto_extract_mask == NULL) {
+ DTRACE_PROBE2(madd_illegal_mask, arl_t *, arl, arpa_t *, arma);
return (EINVAL);
}
return (ar_ce_create(
@@ -2327,6 +2506,7 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
ar_t *ar;
int err;
queue_t *tmp_q;
+ mblk_t *mp;
TRACE_1(TR_FAC_ARP, TR_ARP_OPEN,
"arp_open: q %p", q);
@@ -2335,10 +2515,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
return (0);
}
/* Load up the Named Dispatch tables, if not already done. */
- if (!ar_g_nd &&
- (!nd_load(&ar_g_nd, "arp_cache_report", ar_ce_report, NULL,
- NULL) ||
- !ar_param_register(arp_param_arr, A_CNT(arp_param_arr)))) {
+ if (ar_g_nd == NULL &&
+ !ar_param_register(arp_param_arr, A_CNT(arp_param_arr))) {
ar_cleanup();
return (ENOMEM);
}
@@ -2362,8 +2540,6 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
crhold(credp);
ar->ar_credp = credp;
- if (!ar_timer_mp)
- ar_timer_init(q);
/*
* Probe for the DLPI info if we are not pushed on IP. Wait for
* the reply. In case of error call ar_close() which will take
@@ -2371,6 +2547,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
* as freeing the arl, restarting the timer on a different queue etc.
*/
if (strcmp(q->q_next->q_qinfo->qi_minfo->mi_idname, "ip") == 0) {
+ arc_t *arc;
+
/*
* We are pushed directly on top of IP. There is no need to
* send down a DL_INFO_REQ. Return success. This could
@@ -2378,7 +2556,25 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
* or a stream corresponding to an open of /dev/arp
* (i.e. <arp-IP> stream). Note that we don't support
* pushing some module in between arp and IP.
+ *
+ * Tell IP, though, that we're an extended implementation, so
+ * it knows to expect a DAD response after bringing an
+ * interface up. Old ATM drivers won't do this, and IP will
+ * just bring the interface up immediately.
*/
+ ar->ar_on_ill_stream = (q->q_next->q_next != NULL);
+ if (!ar->ar_on_ill_stream)
+ return (0);
+ mp = allocb(sizeof (arc_t), BPRI_MED);
+ if (mp == NULL) {
+ (void) ar_close(RD(q));
+ return (ENOMEM);
+ }
+ DB_TYPE(mp) = M_CTL;
+ arc = (arc_t *)mp->b_rptr;
+ mp->b_wptr = mp->b_rptr + sizeof (arc_t);
+ arc->arc_cmd = AR_ARP_EXTEND;
+ putnext(q, mp);
return (0);
}
tmp_q = q;
@@ -2390,8 +2586,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
if (strcmp(tmp_q->q_qinfo->qi_minfo->mi_idname, "ip") == 0) {
/*
- * We don't support pushing ARP arbitrarily on an
- * IP driver stream. ARP has to be pushed directly above IP
+ * We don't support pushing ARP arbitrarily on an IP driver
+ * stream. ARP has to be pushed directly above IP.
*/
(void) ar_close(RD(q));
return (ENOTSUP);
@@ -2400,8 +2596,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
* Send down a DL_INFO_REQ so we can find out what we are
* talking to.
*/
- mblk_t *mp = ar_dlpi_comm(DL_INFO_REQ, sizeof (dl_info_req_t));
- if (!mp) {
+ mp = ar_dlpi_comm(DL_INFO_REQ, sizeof (dl_info_req_t));
+ if (mp == NULL) {
(void) ar_close(RD(q));
return (ENOMEM);
}
@@ -2547,19 +2743,18 @@ ar_plink_send(queue_t *q, mblk_t *mp)
* ar_ce_walk routine to delete any outstanding queries for an ar that is
* going away.
*/
-static int
-ar_query_delete(ace_t *ace, uchar_t *ar)
+static void
+ar_query_delete(ace_t *ace, void *arg)
{
+ ar_t *ar = arg;
mblk_t **mpp = &ace->ace_query_mp;
- mblk_t *mp = mpp[0];
+ mblk_t *mp;
- if (!mp)
- return (0);
- do {
+ while ((mp = *mpp) != NULL) {
/* The response queue was stored in the query b_prev. */
- if ((queue_t *)mp->b_prev == ((ar_t *)ar)->ar_wq ||
- (queue_t *)mp->b_prev == ((ar_t *)ar)->ar_rq) {
- mpp[0] = mp->b_next;
+ if ((queue_t *)mp->b_prev == ar->ar_wq ||
+ (queue_t *)mp->b_prev == ar->ar_rq) {
+ *mpp = mp->b_next;
if (DB_TYPE(mp) == M_PROTO &&
*(uint32_t *)mp->b_rptr == AR_ENTRY_QUERY) {
BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed);
@@ -2568,8 +2763,7 @@ ar_query_delete(ace_t *ace, uchar_t *ar)
} else {
mpp = &mp->b_next;
}
- } while ((mp = mpp[0]) != 0);
- return (0);
+ }
}
/*
@@ -2614,11 +2808,11 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr,
}
/* Complete the response based on how the request arrived. */
if (DB_TYPE(mp) == M_IOCTL) {
- struct iocblk *ioc =
- (struct iocblk *)mp->b_rptr;
+ struct iocblk *ioc = (struct iocblk *)mp->b_rptr;
+
ioc->ioc_error = ret_val;
- DB_TYPE(mp) = M_IOCACK;
if (ret_val != 0) {
+ DB_TYPE(mp) = M_IOCNAK;
ioc->ioc_count = 0;
putnext(q, mp);
continue;
@@ -2627,6 +2821,7 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr,
* Return the xmit template out with the successful
* IOCTL.
*/
+ DB_TYPE(mp) = M_IOCACK;
ioc->ioc_count = template->b_wptr - template->b_rptr;
/* Remove the areq mblk from the IOCTL. */
areq_mp = mp->b_cont;
@@ -2680,12 +2875,23 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr,
mp->b_cont = template;
putnext(q, mp);
}
+
/*
- * Unless we are responding from a permanent cache entry, delete
- * the ace.
+ * Unless we are responding from a permanent cache entry, start the
+ * cleanup timer or (on error) delete the entry.
*/
if (!(ace->ace_flags & (ACE_F_PERMANENT | ACE_F_DYING))) {
- ar_ce_delete(ace);
+ if (!ACE_RESOLVED(ace) || arl->arl_xmit_template == NULL) {
+ /*
+ * No need to notify IP here, because the entry was
+ * never resolved, so IP can't have any cached copies
+ * of the address.
+ */
+ ar_ce_delete(ace);
+ } else {
+ mi_timer(arl->arl_wq, ace->ace_mp,
+ arp_cleanup_interval);
+ }
}
}
@@ -2726,10 +2932,26 @@ ar_query_xmit(ace_t *ace, ace_t *src_ace)
src_ace = ar_ce_lookup_permanent(areq->areq_proto, sender_addr,
areq->areq_sender_addr_length);
if (src_ace == NULL) {
- printf("ar_query_xmit: Could not find the ace\n");
+ DTRACE_PROBE3(xmit_no_source, ace_t *, ace,
+ areq_t *, areq, uchar_t *, sender_addr);
return (0);
}
}
+
+ /*
+ * If we haven't yet finished duplicate address checking on this source
+ * address, then do *not* use it on the wire. Doing so will corrupt
+ * the world's caches. Just allow the timer to restart. Note that
+ * duplicate address checking will eventually complete one way or the
+ * other, so this cannot go on "forever."
+ */
+ if (src_ace->ace_flags & ACE_F_UNVERIFIED) {
+ DTRACE_PROBE2(xmit_source_unverified, ace_t *, ace,
+ ace_t *, src_ace);
+ areq->areq_xmit_count++;
+ return (areq->areq_xmit_interval);
+ }
+
/*
* Transmit on src_arl. We should transmit on src_arl. Otherwise
* the switch will send back a copy on other interfaces of the
@@ -2737,9 +2959,12 @@ ar_query_xmit(ace_t *ace, ace_t *src_ace)
* address + hardware address, ARP will treat this as a bogon.
*/
src_arl = src_ace->ace_arl;
+ DTRACE_PROBE3(xmit_send, ace_t *, ace, ace_t *, src_ace,
+ areq_t *, areq);
ar_xmit(src_arl, ARP_REQUEST, areq->areq_proto,
areq->areq_sender_addr_length, src_arl->arl_hw_addr, sender_addr,
- src_arl->arl_arp_addr, proto_addr);
+ src_arl->arl_arp_addr, proto_addr, NULL);
+ src_ace->ace_last_bcast = ddi_get_lbolt();
return (areq->areq_xmit_interval);
}
@@ -2758,11 +2983,10 @@ ar_rput(queue_t *q, mblk_t *mp)
int op;
uint32_t plen;
uint32_t proto;
- ace_t *src_ace;
uchar_t *src_haddr;
uchar_t *src_paddr;
- dl_unitdata_ind_t *dlui;
- boolean_t hwaddr_changed = B_TRUE;
+ boolean_t is_probe;
+ int i;
TRACE_1(TR_FAC_ARP, TR_ARP_RPUT_START,
"arp_rput_start: q %p", q);
@@ -2817,34 +3041,36 @@ ar_rput(queue_t *q, mblk_t *mp)
return;
case M_PCPROTO:
case M_PROTO:
+ if (MBLKL(mp) >= sizeof (dl_unitdata_ind_t) &&
+ ((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive ==
+ DL_UNITDATA_IND) {
+ arl = ((ar_t *)q->q_ptr)->ar_arl;
+ if (arl != NULL) {
+ /* Real messages from the wire! */
+ break;
+ }
+ putnext(q, mp);
+ TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
+ "arp_rput_end: q %p (%S)", q, "default");
+ return;
+ }
err = ar_cmd_dispatch(q, mp);
switch (err) {
case ENOENT:
+ /* Miscellaneous DLPI messages get shuffled off. */
+ ar_rput_dlpi(q, mp);
+ TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
+ "arp_rput_end: q %p (%S)", q, "proto/dlpi");
break;
case EINPROGRESS:
TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
"arp_rput_end: q %p (%S)", q, "proto");
- return;
+ break;
default:
inet_freemsg(mp);
- return;
- }
- if ((mp->b_wptr - mp->b_rptr) < sizeof (dl_unitdata_ind_t) ||
- ((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive
- != DL_UNITDATA_IND) {
- /* Miscellaneous DLPI messages get shuffled off. */
- ar_rput_dlpi(q, mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "proto/dlpi");
- return;
- }
- /* DL_UNITDATA_IND */
- arl = ((ar_t *)q->q_ptr)->ar_arl;
- if (arl != NULL) {
- /* Real messages from the wire! */
break;
}
- /* FALLTHRU */
+ return;
default:
putnext(q, mp);
TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
@@ -2867,15 +3093,14 @@ ar_rput(queue_t *q, mblk_t *mp)
* followed by an ARP packet. We do some initial checks and then
* get to work.
*/
- dlui = (dl_unitdata_ind_t *)mp->b_rptr;
mp1 = mp->b_cont;
- if (!mp1) {
+ if (mp1 == NULL) {
freemsg(mp);
TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
"arp_rput_end: q %p (%S)", q, "baddlpi");
return;
}
- if (!OK_32PTR(mp1->b_rptr) || mp1->b_cont) {
+ if (mp1->b_cont != NULL) {
/* No fooling around with funny messages. */
if (!pullupmsg(mp1, -1)) {
freemsg(mp);
@@ -2885,22 +3110,33 @@ ar_rput(queue_t *q, mblk_t *mp)
}
}
arh = (arh_t *)mp1->b_rptr;
- hlen = (uint32_t)arh->arh_hlen & 0xFF;
- plen = (uint32_t)arh->arh_plen & 0xFF;
- if ((mp1->b_wptr - mp1->b_rptr)
- < (ARH_FIXED_LEN + hlen + hlen + plen + plen)) {
+ hlen = arh->arh_hlen;
+ plen = arh->arh_plen;
+ if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) {
freemsg(mp);
TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
"arp_rput_end: q %p (%S)", q, "short");
return;
}
- if (hlen == 0 || plen == 0) {
- arp1dbg(("ar_rput: bogus arh\n"));
+ /*
+ * hlen 0 is used for RFC 1868 UnARP.
+ *
+ * Note that the rest of the code checks that hlen is what we expect
+ * for this hardware address type, so might as well discard packets
+ * here that don't match.
+ */
+ if ((hlen > 0 && hlen != arl->arl_hw_addr_length) || plen == 0) {
+ DTRACE_PROBE2(rput_bogus, arl_t *, arl, mblk_t *, mp1);
freemsg(mp);
TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
"arp_rput_end: q %p (%S)", q, "hlenzero/plenzero");
return;
}
+ /*
+ * Historically, Solaris has been lenient about hardware type numbers.
+ * We should check here, but don't.
+ */
+ DTRACE_PROBE2(rput_normal, arl_t *, arl, arh_t *, arh);
proto = (uint32_t)BE16_TO_U16(arh->arh_proto);
src_haddr = (uchar_t *)arh;
src_haddr = &src_haddr[ARH_FIXED_LEN];
@@ -2908,191 +3144,255 @@ ar_rput(queue_t *q, mblk_t *mp)
dst_paddr = &src_haddr[hlen + plen + hlen];
op = BE16_TO_U16(arh->arh_operation);
- /* Now see if we have a cache entry for the source address. */
- src_ace = ar_ce_lookup_entry(arl, proto, src_paddr, plen);
+ /* Determine if this is just a probe */
+ for (i = 0; i < plen; i++)
+ if (src_paddr[i] != 0)
+ break;
+ is_probe = i >= plen;
+
/*
- * If so, and it is the entry for one of our IP addresses,
- * we really don't expect to see this packet, so pretend we didn't.
- * Tell IP that we received a bogon.
- *
- * If is a "published" (proxy arp) entry we can receive requests
- * FROM the node but we should never see an ARP_RESPONSE. In this case
- * we process the response but also inform IP.
+ * RFC 826: first check if the <protocol, sender protocol address> is
+ * in the cache, if there is a sender protocol address. Note that this
+ * step also handles resolutions based on source.
*/
- if (src_ace) {
- if (src_ace->ace_flags & ACE_F_MYADDR) {
- freeb(mp);
- ar_client_notify(arl, mp1, AR_CN_BOGON);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "pubentry");
- return;
- }
- if ((src_ace->ace_flags & ACE_F_PUBLISH) &&
- op == ARP_RESPONSE) {
- mblk_t *mp2;
-
- mp2 = copymsg(mp1);
- if (mp2 != NULL)
- ar_client_notify(arl, mp2, AR_CN_BOGON);
- }
- if (src_ace->ace_hw_addr_length == hlen &&
- bcmp(src_ace->ace_hw_addr, src_haddr, hlen) == 0) {
- hwaddr_changed = B_FALSE;
- }
+ if (is_probe)
+ err = AR_NOTFOUND;
+ else
+ err = ar_ce_resolve_all(arl, proto, src_haddr, hlen, src_paddr,
+ plen);
+ switch (err) {
+ case AR_BOGON:
+ ar_client_notify(arl, mp1, AR_CN_BOGON);
+ mp1 = NULL;
+ break;
+ case AR_FAILED:
+ ar_client_notify(arl, mp1, AR_CN_FAILED);
+ mp1 = NULL;
+ break;
+ case AR_LOOPBACK:
+ DTRACE_PROBE2(rput_loopback, arl_t *, arl, arh_t *, arh);
+ freemsg(mp1);
+ mp1 = NULL;
+ break;
}
- switch (op) {
- case ARP_REQUEST:
- /*
- * If we know the answer, and it is "published", send out
- * the response.
- */
- dst_ace = ar_ce_lookup_entry(arl, proto, dst_paddr, plen);
- if (dst_ace && (dst_ace->ace_flags & ACE_F_PUBLISH) &&
- ACE_RESOLVED(dst_ace)) {
- ar_xmit(arl, ARP_RESPONSE, dst_ace->ace_proto, plen,
- dst_ace->ace_hw_addr, dst_ace->ace_proto_addr,
- src_haddr, src_paddr);
- }
+ if (mp1 == NULL) {
+ freeb(mp);
+ TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
+ "arp_rput_end: q %p (%S)", q, "unneeded");
+ return;
+ }
+
+ /*
+ * Now look up the destination address. By RFC 826, we ignore the
+ * packet at this step if the target isn't one of our addresses. This
+ * is true even if the target is something we're trying to resolve and
+ * the packet is a response.
+ *
+ * Note that in order to do this correctly, we need to know when to
+ * notify IP of a change implied by the source address of the ARP
+ * message. That implies that the local ARP table has entries for all
+ * of the resolved entries cached in the client. This is why we must
+ * notify IP when we delete a resolved entry and we know that IP may
+ * have cached answers.
+ */
+ dst_ace = ar_ce_lookup_entry(arl, proto, dst_paddr, plen);
+ if (dst_ace == NULL || !ACE_RESOLVED(dst_ace) ||
+ !(dst_ace->ace_flags & ACE_F_PUBLISH)) {
/*
- * Now fall through to the response side, and add a cache entry
- * for the sender so we will have it when we need it.
+ * Let the client know if the source mapping has changed, even
+ * if the destination provides no useful information for the
+ * client.
*/
- /* FALLTHRU */
- case ARP_RESPONSE:
+ if (err == AR_CHANGED)
+ ar_client_notify(arl, mp1, AR_CN_ANNOUNCE);
+ else
+ freemsg(mp1);
+ freeb(mp);
+ TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
+ "arp_rput_end: q %p (%S)", q, "nottarget");
+ return;
+ }
+
+ /*
+ * If the target is unverified by DAD, then one of two things is true:
+ * either it's someone else claiming this address (on a probe or an
+ * announcement) or it's just a regular request. The former is
+ * failure, but a regular request is not.
+ */
+ if (dst_ace->ace_flags & ACE_F_UNVERIFIED) {
/*
- * With ill groups, we need to look for request across
- * all the ills in the group. The request itself may
- * not be queued on this arl. See ar_query_xmit() for
- * details.
+ * Check for a reflection. Some misbehaving bridges will
+ * reflect our own transmitted packets back to us.
*/
- err = ar_ce_resolve_all(arl, proto, src_haddr, hlen,
- src_paddr, plen);
- if (err == AR_BOGON) {
- /*
- * Some other host has our IP address. Send a
- * BOGON message to IP.
- */
+ if (hlen == dst_ace->ace_hw_addr_length &&
+ bcmp(src_haddr, dst_ace->ace_hw_addr, hlen) == 0) {
+ DTRACE_PROBE3(rput_probe_reflected, arl_t *, arl,
+ arh_t *, arh, ace_t *, dst_ace);
freeb(mp);
- ar_client_notify(arl, mp1, AR_CN_BOGON);
+ freemsg(mp1);
TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "pubentry");
+ "arp_rput_end: q %p (%S)", q, "reflection");
return;
}
+ if (is_probe || op == ARP_RESPONSE) {
+ ar_client_notify(arl, mp1, AR_CN_FAILED);
+ ar_ce_delete(dst_ace);
+ } else if (err == AR_CHANGED) {
+ ar_client_notify(arl, mp1, AR_CN_ANNOUNCE);
+ } else {
+ DTRACE_PROBE3(rput_request_unverified, arl_t *, arl,
+ arh_t *, arh, ace_t *, dst_ace);
+ freemsg(mp1);
+ }
+ freeb(mp);
+ TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
+ "arp_rput_end: q %p (%S)", q, "unverified");
+ return;
+ }
+
+ /*
+ * If it's a request, then we reply to this, and if we think the
+ * sender's unknown, then we create an entry to avoid unnecessary ARPs.
+ * The design assumption is that someone ARPing us is likely to send us
+ * a packet soon, and that we'll want to reply to it.
+ */
+ if (op == ARP_REQUEST) {
+ const uchar_t *dstaddr = src_haddr;
+ clock_t now;
- if ((err != AR_LOOPBACK) && (src_ace == NULL)) {
+ /*
+ * This implements periodic address defense based on a modified
+ * version of the RFC 3927 requirements. Instead of sending a
+ * broadcasted reply every time, as demanded by the RFC, we
+ * send at most one broadcast reply per arp_broadcast_interval.
+ */
+ now = ddi_get_lbolt();
+ if ((now - dst_ace->ace_last_bcast) >
+ MSEC_TO_TICK(arp_broadcast_interval)) {
+ DTRACE_PROBE3(rput_bcast_reply, arl_t *, arl,
+ arh_t *, arh, ace_t *, dst_ace);
+ dst_ace->ace_last_bcast = now;
+ dstaddr = arl->arl_arp_addr;
/*
- * We may need this one sooner or later. The AR_LOOPBACK
- * check above ensures, that we don't create arp
- * entries for our own IP address, on another arl.
+ * If this is one of the long-suffering entries, then
+ * pull it out now. It no longer needs separate
+ * defense, because we're doing now that with this
+ * broadcasted reply.
*/
- (void) ar_ce_create(arl, proto, src_haddr, hlen,
- src_paddr, plen, NULL,
- NULL, (uint32_t)0,
- (uint32_t)0);
+ dst_ace->ace_flags &= ~ACE_F_DELAYED;
}
- /* Let's see if this is a system ARPing itself. */
- do {
- if (*src_paddr++ != *dst_paddr++)
- break;
- } while (--plen);
- if (plen == 0) {
- /*
- * An ARP message with identical src and dst
- * protocol addresses. This guy is trying to
- * tell us something that our clients might
- * find interesting.Essentially such packets are
- * sent when a m/c comes up or changes its h/w
- * address, so before notifying our client check the
- * h/w address if there is a cache entry and notify
- * the client only if the addresses differ.
- */
- if (hwaddr_changed) {
- freeb(mp);
- ar_client_notify(arl, mp1, AR_CN_ANNOUNCE);
- } else {
- /* Just discard it. */
- freemsg(mp);
- }
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "duplicate");
- return;
+ ar_xmit(arl, ARP_RESPONSE, dst_ace->ace_proto, plen,
+ dst_ace->ace_hw_addr, dst_ace->ace_proto_addr,
+ src_haddr, src_paddr, dstaddr);
+ if (!is_probe && err == AR_NOTFOUND &&
+ ar_ce_create(arl, proto, src_haddr, hlen, src_paddr, plen,
+ NULL, NULL, 0, 0) == 0) {
+ ace_t *ace;
+
+ ace = ar_ce_lookup(arl, proto, src_paddr, plen);
+ ASSERT(ace != NULL);
+ mi_timer(arl->arl_wq, ace->ace_mp,
+ arp_cleanup_interval);
}
+ }
+ if (err == AR_CHANGED) {
+ freeb(mp);
+ ar_client_notify(arl, mp1, AR_CN_ANNOUNCE);
+ TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
+ "arp_rput_end: q %p (%S)", q, "reqchange");
+ } else {
+ freemsg(mp);
+ TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
+ "arp_rput_end: q %p (%S)", q, "end");
+ }
+}
+
+static void
+ar_ce_restart_dad(ace_t *ace, void *arl)
+{
+ if ((ace->ace_arl == arl) &&
+ (ace->ace_flags & (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) ==
+ (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) {
/*
- * A broadcast response may also be interesting.
+ * Slight cheat here: we don't use the initial probe delay
+ * in this obscure case.
*/
- if (op == ARP_RESPONSE && dlui->dl_group_address) {
- freeb(mp);
- ar_client_notify(arl, mp1, AR_CN_ANNOUNCE);
- return;
+ if (ace->ace_flags & ACE_F_FAST) {
+ ace->ace_xmit_count = arp_fastprobe_count;
+ ace->ace_xmit_interval = arp_fastprobe_interval;
+ } else {
+ ace->ace_xmit_count = arp_probe_count;
+ ace->ace_xmit_interval = arp_probe_interval;
}
- break;
- default:
- break;
+ ace->ace_flags &= ~ACE_F_DAD_ABORTED;
+ ace_set_timer(ace, B_FALSE);
}
- freemsg(mp);
- TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END,
- "arp_rput_end: q %p (%S)", q, "end");
}
/* DLPI messages, other than DL_UNITDATA_IND are handled here. */
static void
ar_rput_dlpi(queue_t *q, mblk_t *mp)
{
- ar_t *ar = (ar_t *)q->q_ptr;
+ ar_t *ar = q->q_ptr;
arl_t *arl = ar->ar_arl;
- dl_bind_ack_t *dlba;
- dl_error_ack_t *dlea;
- dl_ok_ack_t *dloa;
- dl_uderror_ind_t *dluei;
- char *err_str;
+ union DL_primitives *dlp;
+ const char *err_str;
- if ((mp->b_wptr - mp->b_rptr) < sizeof (dloa->dl_primitive)) {
+ if (MBLKL(mp) < sizeof (dlp->dl_primitive)) {
putnext(q, mp);
return;
}
- dloa = (dl_ok_ack_t *)mp->b_rptr;
- dlea = (dl_error_ack_t *)dloa;
- switch (dloa->dl_primitive) {
+ dlp = (union DL_primitives *)mp->b_rptr;
+ switch (dlp->dl_primitive) {
case DL_ERROR_ACK:
- switch (dlea->dl_error_primitive) {
+ /*
+ * ce is confused about how DLPI works, so we have to interpret
+ * an "error" on DL_NOTIFY_ACK (which we never could have sent)
+ * as really meaning an error on DL_NOTIFY_REQ.
+ *
+ * Note that supporting DL_NOTIFY_REQ is optional, so printing
+ * out an error message on the console isn't warranted except
+ * for debug.
+ */
+ if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK ||
+ dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) {
+ ar_dlpi_done(arl, DL_NOTIFY_REQ);
+ freemsg(mp);
+ return;
+ }
+ err_str = dlpi_prim_str(dlp->error_ack.dl_error_primitive);
+ DTRACE_PROBE2(rput_dl_error, arl_t *, arl,
+ dl_error_ack_t *, &dlp->error_ack);
+ switch (dlp->error_ack.dl_error_primitive) {
case DL_UNBIND_REQ:
if (arl->arl_provider_style == DL_STYLE1)
arl->arl_state = ARL_S_DOWN;
- ar_dlpi_done(arl, DL_UNBIND_REQ);
- err_str = "DL_UNBIND_REQ";
break;
case DL_DETACH_REQ:
+ case DL_BIND_REQ:
arl->arl_state = ARL_S_DOWN;
- ar_dlpi_done(arl, DL_DETACH_REQ);
- err_str = "DL_DETACH_REQ";
break;
case DL_ATTACH_REQ:
- ar_dlpi_done(arl, DL_ATTACH_REQ);
- err_str = "DL_ATTACH_REQ";
- break;
- case DL_BIND_REQ:
- arl->arl_state = ARL_S_DOWN;
- ar_dlpi_done(arl, DL_BIND_REQ);
- err_str = "DL_BIND_REQ";
break;
default:
- err_str = "?";
- break;
+ /* If it's anything else, we didn't send it. */
+ putnext(q, mp);
+ return;
}
- arp0dbg(("ar_rput_dlpi: "
- "%s (%d) failed, dl_errno %d, dl_unix_errno %d\n",
- err_str, (int)dlea->dl_error_primitive,
- (int)dlea->dl_errno, (int)dlea->dl_unix_errno));
+ ar_dlpi_done(arl, dlp->error_ack.dl_error_primitive);
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"ar_rput_dlpi: %s failed, dl_errno %d, dl_unix_errno %d",
- err_str, dlea->dl_errno, dlea->dl_unix_errno);
+ err_str, dlp->error_ack.dl_errno,
+ dlp->error_ack.dl_unix_errno);
break;
case DL_INFO_ACK:
/*
* We have a response back from the driver. Go set up transmit
* defaults.
*/
+ DTRACE_PROBE2(rput_dl_info, arl_t *, arl,
+ dl_info_ack_t *, &dlp->info_ack);
if (arl != NULL) {
ar_ll_set_defaults(arl, mp);
ar_dlpi_done(arl, DL_INFO_REQ);
@@ -3103,48 +3403,75 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp)
qenable(WR(q));
break;
case DL_OK_ACK:
- arp1dbg(("ar_rput_dlpi: arl %p DL_OK_ACK for %d\n",
- (void *)arl, dloa->dl_correct_primitive));
- switch (dloa->dl_correct_primitive) {
+ DTRACE_PROBE2(rput_dl_ok, arl_t *, arl,
+ dl_ok_ack_t *, &dlp->ok_ack);
+ switch (dlp->ok_ack.dl_correct_primitive) {
case DL_UNBIND_REQ:
if (arl->arl_provider_style == DL_STYLE1)
arl->arl_state = ARL_S_DOWN;
- ar_dlpi_done(arl, DL_UNBIND_REQ);
break;
case DL_DETACH_REQ:
arl->arl_state = ARL_S_DOWN;
- ar_dlpi_done(arl, DL_DETACH_REQ);
break;
case DL_ATTACH_REQ:
- ar_dlpi_done(arl, DL_ATTACH_REQ);
break;
+ default:
+ putnext(q, mp);
+ return;
}
+ ar_dlpi_done(arl, dlp->ok_ack.dl_correct_primitive);
+ break;
+ case DL_NOTIFY_ACK:
+ DTRACE_PROBE2(rput_dl_notify, arl_t *, arl,
+ dl_notify_ack_t *, &dlp->notify_ack);
+ /*
+ * We mostly care about interface-up transitions, as this is
+ * when we need to redo duplicate address detection.
+ */
+ arl->arl_notifies =
+ (dlp->notify_ack.dl_notifications & DL_NOTE_LINK_UP) != 0;
+ ar_dlpi_done(arl, DL_NOTIFY_REQ);
break;
case DL_BIND_ACK:
- arp1dbg(("ar_rput: DL_BIND_ACK arl %p\n", (void *)arl));
- dlba = (dl_bind_ack_t *)dloa;
+ DTRACE_PROBE2(rput_dl_bind, arl_t *, arl,
+ dl_bind_ack_t *, &dlp->bind_ack);
if (arl->arl_sap_length < 0)
- bcopy((char *)dlba + dlba->dl_addr_offset,
+ bcopy((char *)dlp + dlp->bind_ack.dl_addr_offset,
arl->arl_hw_addr, arl->arl_hw_addr_length);
else
- bcopy((char *)dlba + dlba->dl_addr_offset +
+ bcopy((char *)dlp + dlp->bind_ack.dl_addr_offset +
arl->arl_sap_length, arl->arl_hw_addr,
arl->arl_hw_addr_length);
arl->arl_state = ARL_S_UP;
ar_dlpi_done(arl, DL_BIND_REQ);
break;
+ case DL_NOTIFY_IND:
+ DTRACE_PROBE2(rput_dl_notify_ind, arl_t *, arl,
+ dl_notify_ind_t *, &dlp->notify_ind);
+ switch (dlp->notify_ind.dl_notification) {
+ case DL_NOTE_LINK_UP:
+ arl->arl_link_up = B_TRUE;
+ ar_ce_walk(ar_ce_restart_dad, arl);
+ break;
+ case DL_NOTE_LINK_DOWN:
+ arl->arl_link_up = B_FALSE;
+ break;
+ }
+ break;
case DL_UDERROR_IND:
- dluei = (dl_uderror_ind_t *)dloa;
+ DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl,
+ dl_uderror_ind_t *, &dlp->uderror_ind);
(void) mi_strlog(q, 1, SL_ERROR | SL_TRACE,
"ar_rput_dlpi: "
"DL_UDERROR_IND, dl_dest_addr_length %d dl_errno %d",
- dluei->dl_dest_addr_length, dluei->dl_errno);
+ dlp->uderror_ind.dl_dest_addr_length,
+ dlp->uderror_ind.dl_errno);
putnext(q, mp);
return;
default:
- arp1dbg(("ar_rput_dlpi: default, primitive %d\n",
- (int)dloa->dl_primitive));
+ DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
+ union DL_primitives *, dlp);
putnext(q, mp);
return;
}
@@ -3158,14 +3485,12 @@ ar_set_address(ace_t *ace, uchar_t *addrpos, uchar_t *proto_addr,
uchar_t *mask, *to;
int len;
- if (!ace->ace_hw_addr)
- return;
+ ASSERT(ace->ace_hw_addr != NULL);
bcopy(ace->ace_hw_addr, addrpos, ace->ace_hw_addr_length);
if (ace->ace_flags & ACE_F_MAPPING &&
proto_addr != NULL &&
ace->ace_proto_extract_mask) { /* careful */
- arp1dbg(("ar_set_address: MAPPING\n"));
len = MIN((int)ace->ace_hw_addr_length
- ace->ace_hw_extract_start,
proto_addr_len);
@@ -3179,14 +3504,15 @@ ar_set_address(ace_t *ace, uchar_t *addrpos, uchar_t *proto_addr,
static int
ar_slifname(queue_t *q, mblk_t *mp_orig)
{
- ar_t *ar = (ar_t *)q->q_ptr;
+ ar_t *ar = q->q_ptr;
arl_t *arl = ar->ar_arl;
struct lifreq *lifr;
mblk_t *mp = mp_orig;
+ arl_t *old_arl;
+ mblk_t *ioccpy;
+ struct iocblk *iocp;
- arp1dbg(("ar_slifname\n"));
-
- if (MODULE_BELOW_IS_IP(q)) {
+ if (ar->ar_on_ill_stream) {
/*
* This command is for IP, since it is coming down
* the <arp-IP-driver> stream. Return ENOENT so that
@@ -3197,37 +3523,71 @@ ar_slifname(queue_t *q, mblk_t *mp_orig)
/* We handle both M_IOCTL and M_PROTO messages */
if (DB_TYPE(mp) == M_IOCTL)
mp = mp->b_cont;
- if (!q->q_next || arl == NULL) {
+ if (q->q_next == NULL || arl == NULL) {
/*
* If the interface was just opened and
* the info ack has not yet come back from the driver
*/
- arp1dbg(("ar_slifname no arl - queued\n"));
+ DTRACE_PROBE2(slifname_no_arl, queue_t *, q,
+ mblk_t *, mp_orig);
(void) putq(q, mp_orig);
return (EINPROGRESS);
}
- if (arl->arl_name[0] != '\0')
+
+ if (MBLKL(mp) < sizeof (struct lifreq)) {
+ DTRACE_PROBE2(slifname_malformed, queue_t *, q,
+ mblk_t *, mp);
+ }
+
+ if (arl->arl_name[0] != '\0') {
+ DTRACE_PROBE1(slifname_already, arl_t *, arl);
return (EALREADY);
+ }
- lifr = (struct lifreq *)(mp->b_rptr);
+ lifr = (struct lifreq *)mp->b_rptr;
- if (strlen(lifr->lifr_name) >= LIFNAMSIZ)
+ if (strlen(lifr->lifr_name) >= LIFNAMSIZ) {
+ DTRACE_PROBE2(slifname_bad_name, arl_t *, arl,
+ struct lifreq *, lifr);
return (ENXIO);
+ }
/* Check whether the name is already in use. */
- if (ar_ll_lookup_by_name(lifr->lifr_name)) {
- arp1dbg(("ar_slifname: %s exists\n", lifr->lifr_name));
+
+ old_arl = ar_ll_lookup_by_name(lifr->lifr_name);
+ if (old_arl != NULL) {
+ DTRACE_PROBE2(slifname_exists, arl_t *, arl, arl_t *, old_arl);
return (EEXIST);
}
+
+ /* Make a copy of the message so we can send it downstream. */
+ if ((ioccpy = allocb(sizeof (struct iocblk), BPRI_MED)) == NULL ||
+ (ioccpy->b_cont = copymsg(mp)) == NULL) {
+ if (ioccpy != NULL)
+ freeb(ioccpy);
+ return (ENOMEM);
+ }
+
(void) strlcpy(arl->arl_name, lifr->lifr_name, sizeof (arl->arl_name));
/* The ppa is sent down by ifconfig */
arl->arl_ppa = lifr->lifr_ppa;
- arp1dbg(("ar_slifname: name is now %s, ppa %d\n", arl->arl_name,
- arl->arl_ppa));
/* Chain in the new arl. */
arl->arl_next = arl_g_head;
arl_g_head = arl;
+ DTRACE_PROBE1(slifname_set, arl_t *, arl);
+
+ /*
+ * Send along a copy of the ioctl; this is just for hitbox. Use
+ * M_CTL to avoid confusing anyone else who might be listening.
+ */
+ DB_TYPE(ioccpy) = M_CTL;
+ iocp = (struct iocblk *)ioccpy->b_rptr;
+ bzero(iocp, sizeof (*iocp));
+ iocp->ioc_cmd = SIOCSLIFNAME;
+ iocp->ioc_count = msgsize(ioccpy->b_cont);
+ ioccpy->b_wptr = (uchar_t *)(iocp + 1);
+ putnext(arl->arl_wq, ioccpy);
return (0);
}
@@ -3239,10 +3599,9 @@ ar_set_ppa(queue_t *q, mblk_t *mp_orig)
int ppa;
char *cp;
mblk_t *mp = mp_orig;
+ arl_t *old_arl;
- arp1dbg(("ar_set_ppa\n"));
-
- if (MODULE_BELOW_IS_IP(q)) {
+ if (ar->ar_on_ill_stream) {
/*
* This command is for IP, since it is coming down
* the <arp-IP-driver> stream. Return ENOENT so that
@@ -3254,35 +3613,40 @@ ar_set_ppa(queue_t *q, mblk_t *mp_orig)
/* We handle both M_IOCTL and M_PROTO messages. */
if (DB_TYPE(mp) == M_IOCTL)
mp = mp->b_cont;
- if (!q->q_next || arl == NULL) {
+ if (q->q_next == NULL || arl == NULL) {
/*
* If the interface was just opened and
* the info ack has not yet come back from the driver.
*/
- arp1dbg(("ar_set_ppa: no arl - queued\n"));
+ DTRACE_PROBE2(setppa_no_arl, queue_t *, q,
+ mblk_t *, mp_orig);
(void) putq(q, mp_orig);
return (EINPROGRESS);
}
- if (arl->arl_name[0] != '\0')
+ if (arl->arl_name[0] != '\0') {
+ DTRACE_PROBE1(setppa_already, arl_t *, arl);
return (EALREADY);
+ }
do {
q = q->q_next;
- } while (q->q_next);
+ } while (q->q_next != NULL);
cp = q->q_qinfo->qi_minfo->mi_idname;
ppa = *(int *)(mp->b_rptr);
(void) snprintf(arl->arl_name, sizeof (arl->arl_name), "%s%d", cp, ppa);
- if (ar_ll_lookup_by_name(arl->arl_name) != NULL) {
- arp1dbg(("ar_set_ppa: %s busy\n", arl->arl_name));
+
+ old_arl = ar_ll_lookup_by_name(arl->arl_name);
+ if (old_arl != NULL) {
+ DTRACE_PROBE2(setppa_exists, arl_t *, arl, arl_t *, old_arl);
/* Make it a null string again */
arl->arl_name[0] = '\0';
return (EBUSY);
}
- arp1dbg(("ar_set_ppa: %d\n", ppa));
arl->arl_ppa = ppa;
+ DTRACE_PROBE1(setppa_done, arl_t *, arl);
/* Chain in the new arl. */
arl->arl_next = arl_g_head;
arl_g_head = arl;
@@ -3357,10 +3721,8 @@ ar_snmp_msg(queue_t *q, mblk_t *mp_orig)
* this is an ipNetToMediaTable msg from IP that needs (unique)
* arp cache entries appended...
*/
- if ((mpdata = mp->b_cont) == NULL) {
- arp0dbg(("ar_snmp_msg: b_cont == NULL for MIB2_IP msg\n"));
+ if ((mpdata = mp->b_cont) == NULL)
return (EINVAL);
- }
ar_snmp_hash_tbl = ar_create_snmp_hash(mpdata);
@@ -3368,7 +3730,7 @@ ar_snmp_msg(queue_t *q, mblk_t *mp_orig)
args.m2a_hashb = ar_snmp_hash_tbl;
args.m2a_mpdata = NULL;
args.m2a_mptail = NULL;
- ar_ce_walk((pfi_t)ar_snmp_msg2, &args);
+ ar_ce_walk(ar_snmp_msg2, &args);
mi_free(ar_snmp_hash_tbl);
/*
@@ -3478,7 +3840,7 @@ ar_snmp_msg2(ace_t *ace, void *arg)
m2ap->m2a_mpdata = allocb(sizeof (mib2_ipNetToMediaEntry_t),
BPRI_HI);
if (m2ap->m2a_mpdata == NULL) {
- arp1dbg(("ar_snmp_msg2:allocb failed\n"));
+ DTRACE_PROBE(snmp_allocb_failure);
return;
}
}
@@ -3498,30 +3860,6 @@ ar_snmp_msg2(ace_t *ace, void *arg)
(char *)&ntme, sizeof (ntme));
}
-/* Start up the garbage collection timer on the queue provided. */
-static void
-ar_timer_init(queue_t *q)
-{
- if (ar_timer_mp)
- return;
- ar_timer_mp = mi_timer_alloc(0);
- if (!ar_timer_mp)
- return;
- ar_timer_queue = q;
- mi_timer(ar_timer_queue, ar_timer_mp, arp_timer_interval);
-}
-
-/* ar_ce_walk routine to trash all non-permanent resolved entries. */
-/* ARGSUSED */
-static int
-ar_trash(ace_t *ace, uchar_t *arg)
-{
- if ((ace->ace_flags & (ACE_F_RESOLVED|ACE_F_PERMANENT)) ==
- ACE_F_RESOLVED)
- ar_ce_delete(ace);
- return (0);
-}
-
/* Write side put procedure. */
static void
ar_wput(queue_t *q, mblk_t *mp)
@@ -3579,11 +3917,14 @@ ar_wput(queue_t *q, mblk_t *mp)
break;
}
ioc = (struct iocblk *)mp->b_rptr;
- ioc->ioc_error = err;
- if ((mp1 = mp->b_cont) != 0)
- ioc->ioc_count = msgdsize(mp1);
- else
- ioc->ioc_count = 0;
+ if (err != 0)
+ ioc->ioc_error = err;
+ if (ioc->ioc_error != 0) {
+ DB_TYPE(mp) = M_IOCNAK;
+ freemsg(mp->b_cont);
+ mp->b_cont = NULL;
+ }
+ ioc->ioc_count = msgdsize(mp->b_cont);
qreply(q, mp);
TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END,
"arp_wput_end: q %p (%S)", q, "ioctl");
@@ -3660,6 +4001,117 @@ ar_wput(queue_t *q, mblk_t *mp)
"arp_wput_end: q %p (%S)", q, "end");
}
+static boolean_t
+arp_say_ready(ace_t *ace)
+{
+ mblk_t *mp;
+ arl_t *arl;
+ arh_t *arh;
+ uchar_t *cp;
+
+ arl = ace->ace_arl;
+ mp = allocb(sizeof (*arh) + 2 * (arl->arl_hw_addr_length +
+ ace->ace_proto_addr_length), BPRI_MED);
+ if (mp == NULL) {
+ /* skip a beat on allocation trouble */
+ ace->ace_xmit_count = 1;
+ ace_set_timer(ace, B_FALSE);
+ return (B_FALSE);
+ }
+ /* Tell IP address is now usable */
+ arh = (arh_t *)mp->b_rptr;
+ U16_TO_BE16(arl->arl_arp_hw_type, arh->arh_hardware);
+ U16_TO_BE16(ace->ace_proto, arh->arh_proto);
+ arh->arh_hlen = arl->arl_hw_addr_length;
+ arh->arh_plen = ace->ace_proto_addr_length;
+ U16_TO_BE16(ARP_REQUEST, arh->arh_operation);
+ cp = (uchar_t *)(arh + 1);
+ bcopy(ace->ace_hw_addr, cp, arl->arl_hw_addr_length);
+ cp += arl->arl_hw_addr_length;
+ bcopy(ace->ace_proto_addr, cp, ace->ace_proto_addr_length);
+ cp += ace->ace_proto_addr_length;
+ bcopy(ace->ace_hw_addr, cp, arl->arl_hw_addr_length);
+ cp += arl->arl_hw_addr_length;
+ bcopy(ace->ace_proto_addr, cp, ace->ace_proto_addr_length);
+ cp += ace->ace_proto_addr_length;
+ mp->b_wptr = cp;
+ ar_client_notify(arl, mp, AR_CN_READY);
+ DTRACE_PROBE1(ready, ace_t *, ace);
+ return (B_TRUE);
+}
+
+/*
+ * Pick the longest-waiting aces for defense.
+ */
+static void
+ace_reschedule(ace_t *ace, void *arg)
+{
+ ace_resched_t *art = arg;
+ ace_t **aces;
+ ace_t **acemax;
+ ace_t *atemp;
+
+ if (ace->ace_arl != art->art_arl)
+ return;
+ /*
+ * Only published entries that are ready for announcement are eligible.
+ */
+ if ((ace->ace_flags & (ACE_F_PUBLISH | ACE_F_UNVERIFIED | ACE_F_DYING |
+ ACE_F_DELAYED)) != ACE_F_PUBLISH) {
+ return;
+ }
+ if (art->art_naces < ACE_RESCHED_LIST_LEN) {
+ art->art_aces[art->art_naces++] = ace;
+ } else {
+ aces = art->art_aces;
+ acemax = aces + ACE_RESCHED_LIST_LEN;
+ for (; aces < acemax; aces++) {
+ if ((*aces)->ace_last_bcast > ace->ace_last_bcast) {
+ atemp = *aces;
+ *aces = ace;
+ ace = atemp;
+ }
+ }
+ }
+}
+
+/*
+ * Reschedule the ARP defense of any long-waiting ACEs. It's assumed that this
+ * doesn't happen very often (if at all), and thus it needn't be highly
+ * optimized. (Note, though, that it's actually O(N) complexity, because the
+ * outer loop is bounded by a constant rather than by the length of the list.)
+ */
+static void
+arl_reschedule(arl_t *arl)
+{
+ ace_resched_t art;
+ int i;
+ ace_t *ace;
+
+ i = arl->arl_defend_count;
+ arl->arl_defend_count = 0;
+ /* If none could be sitting around, then don't reschedule */
+ if (i < arp_defend_rate) {
+ DTRACE_PROBE1(reschedule_none, arl_t *, arl);
+ return;
+ }
+ art.art_arl = arl;
+ while (arl->arl_defend_count < arp_defend_rate) {
+ art.art_naces = 0;
+ ar_ce_walk(ace_reschedule, &art);
+ for (i = 0; i < art.art_naces; i++) {
+ ace = art.art_aces[i];
+ ace->ace_flags |= ACE_F_DELAYED;
+ ace_set_timer(ace, B_FALSE);
+ if (++arl->arl_defend_count >= arp_defend_rate)
+ break;
+ }
+ if (art.art_naces < ACE_RESCHED_LIST_LEN)
+ break;
+ }
+ DTRACE_PROBE1(reschedule, arl_t *, arl);
+}
+
/*
* Write side service routine. The only action here is delivery of transmit
* timer events and delayed messages while waiting for the info_ack (ar_arl
@@ -3668,8 +4120,9 @@ ar_wput(queue_t *q, mblk_t *mp)
static void
ar_wsrv(queue_t *q)
{
- ace_t *ace;
- mblk_t *mp;
+ ace_t *ace;
+ arl_t *arl;
+ mblk_t *mp;
clock_t ms;
TRACE_1(TR_FAC_ARP, TR_ARP_WSRV_START,
@@ -3680,39 +4133,135 @@ ar_wsrv(queue_t *q)
case M_PCSIG:
if (!mi_timer_valid(mp))
continue;
- if (mp == ar_timer_mp) {
- /* Garbage collection time. */
- ar_ce_walk(ar_trash, NULL);
- mi_timer(ar_timer_queue, ar_timer_mp,
- arp_timer_interval);
+ ace = (ace_t *)mp->b_rptr;
+ if (ace->ace_flags & ACE_F_DYING)
continue;
+ arl = ace->ace_arl;
+ if (ace->ace_flags & ACE_F_UNVERIFIED) {
+ ASSERT(ace->ace_flags & ACE_F_PUBLISH);
+ ASSERT(ace->ace_query_mp == NULL);
+ /*
+ * If the link is down, give up for now. IP
+ * will give us the go-ahead to try again when
+ * the link restarts.
+ */
+ if (!arl->arl_link_up) {
+ DTRACE_PROBE1(timer_link_down,
+ ace_t *, ace);
+ ace->ace_flags |= ACE_F_DAD_ABORTED;
+ continue;
+ }
+ if (ace->ace_xmit_count > 0) {
+ DTRACE_PROBE1(timer_probe,
+ ace_t *, ace);
+ ace->ace_xmit_count--;
+ ar_xmit(arl, ARP_REQUEST,
+ ace->ace_proto,
+ ace->ace_proto_addr_length,
+ ace->ace_hw_addr, NULL, NULL,
+ ace->ace_proto_addr, NULL);
+ ace_set_timer(ace, B_FALSE);
+ continue;
+ }
+ if (!arp_say_ready(ace))
+ continue;
+ DTRACE_PROBE1(timer_ready, ace_t *, ace);
+ ace->ace_xmit_interval = arp_publish_interval;
+ ace->ace_xmit_count = arp_publish_count;
+ if (ace->ace_xmit_count == 0)
+ ace->ace_xmit_count++;
+ ace->ace_flags &= ~ACE_F_UNVERIFIED;
}
- ace = (ace_t *)mp->b_rptr;
- if (ace->ace_flags & (ACE_F_PUBLISH | ACE_F_MYADDR)) {
+ if (ace->ace_flags & ACE_F_PUBLISH) {
+ clock_t now;
+
+ /*
+ * If an hour has passed, then free up the
+ * entries that need defense by rescheduling
+ * them.
+ */
+ now = ddi_get_lbolt();
+ if (arp_defend_rate > 0 &&
+ now - arl->arl_defend_start >
+ SEC_TO_TICK(arp_defend_period)) {
+ arl->arl_defend_start = now;
+ arl_reschedule(arl);
+ }
/*
* Finish the job that we started in
- * ar_entry_add.
+ * ar_entry_add. When we get to zero
+ * announcement retransmits left, switch to
+ * address defense.
*/
ASSERT(ace->ace_query_mp == NULL);
- ASSERT(ace->ace_publish_count != 0);
- ace->ace_publish_count--;
- ar_xmit(ace->ace_arl, ARP_REQUEST,
+ if (ace->ace_xmit_count > 0) {
+ ace->ace_xmit_count--;
+ DTRACE_PROBE1(timer_announce,
+ ace_t *, ace);
+ } else if (ace->ace_flags & ACE_F_DELAYED) {
+ /*
+ * This guy was rescheduled as one of
+ * the really old entries needing
+ * on-going defense. Let him through
+ * now.
+ */
+ DTRACE_PROBE1(timer_send_delayed,
+ ace_t *, ace);
+ ace->ace_flags &= ~ACE_F_DELAYED;
+ } else if (arp_defend_rate > 0 &&
+ (arl->arl_defend_count >= arp_defend_rate ||
+ ++arl->arl_defend_count >=
+ arp_defend_rate)) {
+ /*
+ * If we're no longer allowed to send
+ * unbidden defense messages, then just
+ * wait for rescheduling.
+ */
+ DTRACE_PROBE1(timer_excess_defense,
+ ace_t *, ace);
+ ace_set_timer(ace, B_FALSE);
+ continue;
+ } else {
+ DTRACE_PROBE1(timer_defend,
+ ace_t *, ace);
+ }
+ ar_xmit(arl, ARP_REQUEST,
ace->ace_proto,
ace->ace_proto_addr_length,
ace->ace_hw_addr,
ace->ace_proto_addr,
- ace->ace_arl->arl_arp_addr,
- ace->ace_proto_addr);
- if (ace->ace_publish_count != 0 &&
- arp_publish_interval != 0) {
- mi_timer(ace->ace_arl->arl_wq,
- ace->ace_mp,
- arp_publish_interval);
- }
+ arl->arl_arp_addr,
+ ace->ace_proto_addr, NULL);
+ ace->ace_last_bcast = now;
+ if (ace->ace_xmit_count == 0)
+ ace->ace_xmit_interval =
+ arp_defend_interval;
+ if (ace->ace_xmit_interval != 0)
+ ace_set_timer(ace, B_FALSE);
continue;
}
- if (!ace->ace_query_mp)
+
+ /*
+ * If this is a non-permanent (regular) resolved ARP
+ * entry, then it's now time to check if it can be
+ * retired. As an optimization, we check with IP
+ * first, and just restart the timer if the address is
+ * still in use.
+ */
+ if (ACE_NONPERM(ace)) {
+ if (ace->ace_proto == IP_ARP_PROTO_TYPE &&
+ ndp_lookup_ipaddr(*(ipaddr_t *)
+ ace->ace_proto_addr)) {
+ ace->ace_flags |= ACE_F_OLD;
+ mi_timer(arl->arl_wq, ace->ace_mp,
+ arp_cleanup_interval);
+ } else {
+ ar_delete_notify(ace);
+ ar_ce_delete(ace);
+ }
continue;
+ }
+
/*
* ar_query_xmit returns the number of milliseconds to
* wait following this transmit. If the number of
@@ -3721,6 +4270,7 @@ ar_wsrv(queue_t *q)
* we complete the operation with a failure indication.
* Otherwise, we restart the timer.
*/
+ ASSERT(ace->ace_query_mp != NULL);
ms = ar_query_xmit(ace, NULL);
if (ms == 0)
ar_query_reply(ace, ENXIO, NULL, (uint32_t)0);
@@ -3739,43 +4289,50 @@ ar_wsrv(queue_t *q)
/* ar_xmit is called to transmit an ARP Request or Response. */
static void
ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen,
- uchar_t *haddr1, uchar_t *paddr1, uchar_t *haddr2, uchar_t *paddr2)
+ const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2,
+ const uchar_t *paddr2, const uchar_t *dstaddr)
{
arh_t *arh;
- char *cp;
- uint32_t hlen = arl->arl_hw_addr_length;
+ uint8_t *cp;
+ uint_t hlen;
mblk_t *mp;
- if (arl->arl_flags & ARL_F_NOARP) {
- /* IFF_NOARP flag is set. Do not send an arp request */
+ /* IFF_NOARP flag is set or interface down: do not send arp messages */
+ if ((arl->arl_flags & ARL_F_NOARP) || !arl->arl_link_up)
return;
- }
mp = arl->arl_xmit_template;
- if (!mp || !(mp = copyb(mp)))
+ if (mp == NULL || (mp = copyb(mp)) == NULL)
return;
+ hlen = arl->arl_hw_addr_length;
mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) +
plen + plen, BPRI_MED);
- if (!mp->b_cont) {
+ if (mp->b_cont == NULL) {
freeb(mp);
return;
}
+
+ /* Get the L2 destination address for the message */
+ if (haddr2 == NULL)
+ dstaddr = arl->arl_arp_addr;
+ else if (dstaddr == NULL)
+ dstaddr = haddr2;
+
/*
* Figure out where the target hardware address goes in the
* DL_UNITDATA_REQ header, and copy it in.
*/
-
- cp = (char *)mi_offset_param(mp, arl->arl_xmit_template_addr_offset,
- hlen);
- if (!cp) {
+ cp = mi_offset_param(mp, arl->arl_xmit_template_addr_offset, hlen);
+ ASSERT(cp != NULL);
+ if (cp == NULL) {
freemsg(mp);
return;
}
- bcopy(haddr2, cp, hlen);
+ bcopy(dstaddr, cp, hlen);
/* Fill in the ARP header. */
- cp = (char *)mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen);
- mp->b_cont->b_rptr = (uchar_t *)cp;
+ cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen);
+ mp->b_cont->b_rptr = cp;
arh = (arh_t *)cp;
U16_TO_BE16(arl->arl_arp_hw_type, arh->arh_hardware);
U16_TO_BE16(proto, arh->arh_proto);
@@ -3785,13 +4342,19 @@ ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen,
cp += ARH_FIXED_LEN;
bcopy(haddr1, cp, hlen);
cp += hlen;
- bcopy(paddr1, cp, plen);
+ if (paddr1 == NULL)
+ bzero(cp, plen);
+ else
+ bcopy(paddr1, cp, plen);
cp += plen;
- bcopy(haddr2, cp, hlen);
+ if (haddr2 == NULL)
+ bzero(cp, hlen);
+ else
+ bcopy(haddr2, cp, hlen);
cp += hlen;
bcopy(paddr2, cp, plen);
cp += plen;
- mp->b_cont->b_wptr = (uchar_t *)cp;
+ mp->b_cont->b_wptr = cp;
/* Ship it out. */
if (canputnext(arl->arl_wq))
putnext(arl->arl_wq, mp);
@@ -3799,209 +4362,6 @@ ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen,
freemsg(mp);
}
-/*
- * Handle an external request to broadcast an ARP request. This is used
- * by configuration programs to broadcast a request advertising our own
- * hardware and protocol addresses.
- */
-static int
-ar_xmit_request(queue_t *q, mblk_t *mp_orig)
-{
- areq_t *areq;
- arl_t *arl;
- uchar_t *sender;
- uint32_t sender_length;
- uchar_t *target;
- uint32_t target_length;
- mblk_t *mp = mp_orig;
-
- /* We handle both M_IOCTL and M_PROTO messages. */
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
- arl = ar_ll_lookup_from_mp(mp);
- if (arl == NULL)
- return (EINVAL);
- /*
- * Newly received commands from clients go to the tail of the queue.
- */
- if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- arp1dbg(("ar_xmit_request: enqueue on q %p\n", (void *)q));
- ar_cmd_enqueue(arl, mp_orig, q, AR_XMIT_REQUEST, B_TRUE);
- return (EINPROGRESS);
- }
- mp_orig->b_prev = NULL;
-
- areq = (areq_t *)mp->b_rptr;
- sender_length = areq->areq_sender_addr_length;
- sender = mi_offset_param(mp, areq->areq_sender_addr_offset,
- sender_length);
- target_length = areq->areq_target_addr_length;
- target = mi_offset_param(mp, areq->areq_target_addr_offset,
- target_length);
- if (!sender || !target)
- return (EINVAL);
- ar_xmit(arl, ARP_REQUEST, areq->areq_proto, sender_length,
- arl->arl_hw_addr, sender, arl->arl_arp_addr, target);
- return (0);
-}
-
-/*
- * Handle an external request to broadcast an ARP response. This is used
- * by configuration programs to broadcast a response advertising our own
- * hardware and protocol addresses.
- */
-static int
-ar_xmit_response(queue_t *q, mblk_t *mp_orig)
-{
- areq_t *areq;
- arl_t *arl;
- uchar_t *sender;
- uint32_t sender_length;
- uchar_t *target;
- uint32_t target_length;
- mblk_t *mp = mp_orig;
-
- /* We handle both M_IOCTL and M_PROTO messages. */
- if (DB_TYPE(mp) == M_IOCTL)
- mp = mp->b_cont;
- arl = ar_ll_lookup_from_mp(mp);
- if (arl == NULL)
- return (EINVAL);
- /*
- * Newly received commands from clients go to the tail of the queue.
- */
- if (CMD_NEEDS_QUEUEING(mp_orig, arl)) {
- arp1dbg(("ar_xmit_response: enqueue on q %p \n", (void *)q));
- ar_cmd_enqueue(arl, mp_orig, q, AR_XMIT_RESPONSE, B_TRUE);
- return (EINPROGRESS);
- }
- mp_orig->b_prev = NULL;
-
- areq = (areq_t *)mp->b_rptr;
- sender_length = areq->areq_sender_addr_length;
- sender = mi_offset_param(mp, areq->areq_sender_addr_offset,
- sender_length);
- target_length = areq->areq_target_addr_length;
- target = mi_offset_param(mp, areq->areq_target_addr_offset,
- target_length);
- if (!sender || !target)
- return (EINVAL);
- ar_xmit(arl, ARP_RESPONSE, areq->areq_proto, sender_length,
- arl->arl_hw_addr, sender, arl->arl_arp_addr, target);
- return (0);
-}
-
-#if 0
-/*
- * Debug routine to display a particular ARP Cache Entry with an
- * accompanying text message.
- */
-static void
-show_ace(char *msg, ace_t *ace)
-{
- if (msg)
- printf("%s", msg);
- printf("ace 0x%p:\n", ace);
- printf("\tace_next 0x%p, ace_ptpn 0x%p, ace_arl 0x%p\n",
- ace->ace_next, ace->ace_ptpn, ace->ace_arl);
- printf("\tace_proto %x, ace_flags %x\n", ace->ace_proto,
- ace->ace_flags);
- if (ace->ace_proto_addr && ace->ace_proto_addr_length)
- printf("\tace_proto_addr %x %x %x %x, len %d\n",
- ace->ace_proto_addr[0], ace->ace_proto_addr[1],
- ace->ace_proto_addr[2], ace->ace_proto_addr[3],
- ace->ace_proto_addr_length);
- if (ace->ace_proto_mask)
- printf("\tace_proto_mask %x %x %x %x\n",
- ace->ace_proto_mask[0], ace->ace_proto_mask[1],
- ace->ace_proto_mask[2], ace->ace_proto_mask[3]);
- if (ace->ace_hw_addr && ace->ace_hw_addr_length)
- printf("\tace_hw_addr %x %x %x %x %x %x, len %d\n",
- ace->ace_hw_addr[0], ace->ace_hw_addr[1],
- ace->ace_hw_addr[2], ace->ace_hw_addr[3],
- ace->ace_hw_addr[4], ace->ace_hw_addr[5],
- ace->ace_hw_addr_length);
- printf("\tace_mp 0x%p\n", ace->ace_mp);
- printf("\tace_query_count %d, ace_query_mp 0x%x\n",
- ace->ace_query_count, ace->ace_query_mp);
-}
-
-/* Debug routine to display an ARP packet with an accompanying text message. */
-static void
-show_arp(char *msg, mblk_t *mp)
-{
- uchar_t *up = mp->b_rptr;
- int len;
- int hlen = up[4] & 0xFF;
- char fmt[64];
- char buf[128];
- char *op;
- int plen = up[5] & 0xFF;
- uint_t proto;
-
- if (msg && *msg)
- printf("%s", msg);
- len = mp->b_wptr - up;
- if (len < 8) {
- printf("ARP packet of %d bytes too small\n", len);
- return;
- }
- switch (BE16_TO_U16(&up[6])) {
- case ARP_REQUEST:
- op = "ARP request";
- break;
- case ARP_RESPONSE:
- op = "ARP response";
- break;
- case RARP_REQUEST:
- op = "RARP request";
- break;
- case RARP_RESPONSE:
- op = "RARP response";
- break;
- default:
- op = "unknown";
- break;
- }
- proto = (uint_t)BE16_TO_U16(&up[2]);
- printf("len %d, hardware %d, proto %d, hlen %d, plen %d, op %s\n",
- len, (int)BE16_TO_U16(up), proto, hlen, plen, op);
- if (len < (8 + hlen + hlen + plen + plen))
- printf("ARP packet of %d bytes too small!\n", len);
- up += 8;
-
- (void) mi_sprintf(fmt, "sender hardware address %%%dM\n", hlen);
- (void) mi_sprintf(buf, fmt, up);
- printf(buf);
- up += hlen;
- if (proto == 0x800) {
- printf("sender proto address %d.%d.%d.%d\n",
- up[0] & 0xFF, up[1] & 0xFF, up[2] & 0xFF,
- up[3] & 0xFF);
- } else {
- (void) mi_sprintf(fmt, "sender proto address %%%dM\n", plen);
- (void) mi_sprintf(buf, fmt, up);
- printf(buf);
- }
- up += plen;
-
- (void) mi_sprintf(fmt, "target hardware address %%%dM\n", hlen);
- (void) mi_sprintf(buf, fmt, up);
- printf(buf);
- up += hlen;
- if (proto == 0x800) {
- printf("target proto address %d.%d.%d.%d\n",
- up[0] & 0xFF, up[1] & 0xFF, up[2] & 0xFF,
- up[3] & 0xFF);
- } else {
- (void) mi_sprintf(fmt, "target proto address %%%dM\n", plen);
- (void) mi_sprintf(buf, fmt, up);
- printf(buf);
- }
- up += plen;
-}
-#endif
-
static mblk_t *
ar_alloc(uint32_t cmd, int err)
{
diff --git a/usr/src/uts/common/inet/arp_impl.h b/usr/src/uts/common/inet/arp_impl.h
index 84756488f8..e87fc69ab3 100644
--- a/usr/src/uts/common/inet/arp_impl.h
+++ b/usr/src/uts/common/inet/arp_impl.h
@@ -36,6 +36,10 @@ extern "C" {
#include <sys/types.h>
#include <sys/stream.h>
+#include <net/if.h>
+
+/* ARP kernel hash size; used for mdb support */
+#define ARP_HASH_SIZE 256
/* ARL Structure, one per link level device */
typedef struct arl_s {
@@ -43,7 +47,6 @@ typedef struct arl_s {
queue_t *arl_rq; /* Read queue pointer */
queue_t *arl_wq; /* Write queue pointer */
t_uscalar_t arl_ppa; /* DL_ATTACH parameter */
- t_scalar_t arl_mac_sap;
uchar_t *arl_arp_addr; /* multicast address to use */
uchar_t *arl_hw_addr; /* Our hardware address */
uint32_t arl_hw_addr_length;
@@ -56,8 +59,6 @@ typedef struct arl_s {
mblk_t *arl_unbind_mp;
mblk_t *arl_detach_mp;
t_uscalar_t arl_provider_style; /* From DL_INFO_ACK */
- mblk_t *arl_dlpiop_done; /* DLPI opertion done */
- queue_t *arl_ip_pending_queue; /* Pending queue */
mblk_t *arl_queue; /* Queued commands head */
mblk_t *arl_queue_tail; /* Queued commands tail */
uint32_t arl_flags; /* Used for IFF_NOARP */
@@ -65,7 +66,12 @@ typedef struct arl_s {
mblk_t *arl_dlpi_deferred; /* Deferred DLPI messages */
uint_t arl_state; /* lower interface state */
char *arl_data; /* address data pointer */
- uint32_t arl_closing : 1;
+ clock_t arl_defend_start; /* start of 1-hour period */
+ uint_t arl_defend_count; /* # of unbidden broadcasts */
+ uint_t
+ arl_closing : 1, /* stream is closing */
+ arl_notifies : 1, /* handles DL_NOTE_LINK */
+ arl_link_up : 1; /* DL_NOTE status */
} arl_t;
#define ARL_F_NOARP 0x01
@@ -81,9 +87,32 @@ typedef struct ar_s {
arl_t *ar_arl; /* Associated arl */
cred_t *ar_credp; /* Credentials associated w/ open */
struct ar_s *ar_arl_ip_assoc; /* ARL - IP association */
- uint32_t ar_ip_acked_close : 1; /* IP has acked the close */
+ uint32_t
+ ar_ip_acked_close : 1, /* IP has acked the close */
+ ar_on_ill_stream : 1; /* Module below is IP */
} ar_t;
+/* ARP Cache Entry */
+typedef struct ace_s {
+ struct ace_s *ace_next; /* Hash chain next pointer */
+ struct ace_s **ace_ptpn; /* Pointer to previous next */
+ struct arl_s *ace_arl; /* Associated arl */
+ uint32_t ace_proto; /* Protocol for this ace */
+ uint32_t ace_flags;
+ uchar_t *ace_proto_addr;
+ uint32_t ace_proto_addr_length;
+ uchar_t *ace_proto_mask; /* Mask for matching addr */
+ uchar_t *ace_proto_extract_mask; /* For mappings */
+ uchar_t *ace_hw_addr;
+ uint32_t ace_hw_addr_length;
+ uint32_t ace_hw_extract_start; /* For mappings */
+ mblk_t *ace_mp; /* mblk we are in */
+ mblk_t *ace_query_mp; /* outstanding query chain */
+ clock_t ace_last_bcast; /* last broadcast Response */
+ clock_t ace_xmit_interval;
+ int ace_xmit_count;
+} ace_t;
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h
index a732b92585..67af1bf688 100644
--- a/usr/src/uts/common/inet/ip.h
+++ b/usr/src/uts/common/inet/ip.h
@@ -39,11 +39,9 @@ extern "C" {
#include <inet/mib2.h>
#include <inet/nd.h>
#include <sys/atomic.h>
-#include <sys/socket.h>
#include <net/if_dl.h>
#include <net/if.h>
#include <netinet/ip.h>
-#include <sys/dlpi.h>
#include <netinet/igmp.h>
#ifdef _KERNEL
@@ -1284,7 +1282,9 @@ typedef struct ipif_s {
ipif_replace_zero : 1, /* Replacement for zero */
ipif_was_up : 1, /* ipif was up before */
- ipif_pad_to_31 : 28;
+ ipif_addr_ready : 1, /* DAD is done */
+ ipif_was_dup : 1, /* DAD had failed */
+ ipif_pad_to_31 : 26;
int ipif_orig_ifindex; /* ifindex before SLIFFAILOVER */
uint_t ipif_seqid; /* unique index across all ills */
@@ -1295,6 +1295,7 @@ typedef struct ipif_s {
uint_t ipif_saved_ire_cnt;
zoneid_t
ipif_zoneid; /* zone ID number */
+ timeout_id_t ipif_recovery_id; /* Timer for DAD recovery */
#ifdef ILL_DEBUG
#define IP_TR_HASH_MAX 64
th_trace_t *ipif_trace[IP_TR_HASH_MAX];
@@ -1740,6 +1741,7 @@ typedef struct ill_s {
uint_t ill_ipif_up_count; /* Number of IPIFs currently up. */
uint_t ill_max_frag; /* Max IDU from DLPI. */
char *ill_name; /* Our name. */
+ uint_t ill_ipif_dup_count; /* Number of duplicate addresses. */
uint_t ill_name_length; /* Name length, incl. terminator. */
char *ill_ndd_name; /* Name + ":ip?_forwarding" for NDD. */
uint_t ill_net_type; /* IRE_IF_RESOLVER/IRE_IF_NORESOLVER. */
@@ -1807,7 +1809,9 @@ typedef struct ill_s {
ill_dl_up : 1,
ill_up_ipifs : 1,
- ill_pad_to_bit_31 : 20;
+ ill_note_link : 1, /* supports link-up notification */
+
+ ill_pad_to_bit_31 : 19;
/* Following bit fields protected by ill_lock */
uint_t
@@ -1818,7 +1822,8 @@ typedef struct ill_s {
ill_arp_bringup_pending : 1,
ill_mtu_userspecified : 1, /* SIOCSLNKINFO has set the mtu */
- ill_pad_bit_31 : 26;
+ ill_arp_extend : 1, /* ARP has DAD extensions */
+ ill_pad_bit_31 : 25;
/*
* Used in SIOCSIFMUXID and SIOCGIFMUXID for 'ifconfig unplumb'.
@@ -2501,12 +2506,8 @@ typedef struct ire_s {
/* source ip-addr of incoming packet */
clock_t ire_last_used_time; /* Last used time */
struct ire_s *ire_fastpath; /* Pointer to next ire in fastpath */
- zoneid_t ire_zoneid; /* for local address discrimination */
tsol_ire_gw_secattr_t *ire_gw_secattr; /* gateway security attributes */
-#ifdef IRE_DEBUG
- th_trace_t *ire_trace[IP_TR_HASH_MAX];
- boolean_t ire_trace_disable; /* True when alloc fails */
-#endif
+ zoneid_t ire_zoneid; /* for local address discrimination */
/*
* ire's that are embedded inside mblk_t and sent to the external
* resolver use the ire_stq_ifindex to track the ifindex of the
@@ -2514,6 +2515,12 @@ typedef struct ire_s {
* for cleanup in the esbfree routine when arp failure occurs
*/
uint_t ire_stq_ifindex;
+ uint_t ire_defense_count; /* number of ARP conflicts */
+ uint_t ire_defense_time; /* last time defended (secs) */
+#ifdef IRE_DEBUG
+ th_trace_t *ire_trace[IP_TR_HASH_MAX];
+ boolean_t ire_trace_disable; /* True when alloc fails */
+#endif
} ire_t;
/* IPv4 compatiblity macros */
@@ -2822,23 +2829,37 @@ extern int ipv6_forward;
extern vmem_t *ip_minor_arena;
#define ip_respond_to_address_mask_broadcast ip_param_arr[0].ip_param_value
+#define ip_g_resp_to_echo_bcast ip_param_arr[1].ip_param_value
+#define ip_g_resp_to_echo_mcast ip_param_arr[2].ip_param_value
+#define ip_g_resp_to_timestamp ip_param_arr[3].ip_param_value
+#define ip_g_resp_to_timestamp_bcast ip_param_arr[4].ip_param_value
#define ip_g_send_redirects ip_param_arr[5].ip_param_value
+#define ip_g_forward_directed_bcast ip_param_arr[6].ip_param_value
#define ip_debug ip_param_arr[7].ip_param_value
#define ip_mrtdebug ip_param_arr[8].ip_param_value
#define ip_timer_interval ip_param_arr[9].ip_param_value
#define ip_ire_arp_interval ip_param_arr[10].ip_param_value
+#define ip_ire_redir_interval ip_param_arr[11].ip_param_value
#define ip_def_ttl ip_param_arr[12].ip_param_value
+#define ip_forward_src_routed ip_param_arr[13].ip_param_value
#define ip_wroff_extra ip_param_arr[14].ip_param_value
+#define ip_ire_pathmtu_interval ip_param_arr[15].ip_param_value
+#define ip_icmp_return ip_param_arr[16].ip_param_value
#define ip_path_mtu_discovery ip_param_arr[17].ip_param_value
#define ip_ignore_delete_time ip_param_arr[18].ip_param_value
+#define ip_ignore_redirect ip_param_arr[19].ip_param_value
#define ip_output_queue ip_param_arr[20].ip_param_value
#define ip_broadcast_ttl ip_param_arr[21].ip_param_value
#define ip_icmp_err_interval ip_param_arr[22].ip_param_value
#define ip_icmp_err_burst ip_param_arr[23].ip_param_value
#define ip_reass_queue_bytes ip_param_arr[24].ip_param_value
+#define ip_strict_dst_multihoming ip_param_arr[25].ip_param_value
#define ip_addrs_per_if ip_param_arr[26].ip_param_value
#define ipsec_override_persocket_policy ip_param_arr[27].ip_param_value
#define icmp_accept_clear_messages ip_param_arr[28].ip_param_value
+#define igmp_accept_clear_messages ip_param_arr[29].ip_param_value
+
+/* IPv6 configuration knobs */
#define delay_first_probe_time ip_param_arr[30].ip_param_value
#define max_unicast_solicit ip_param_arr[31].ip_param_value
#define ipv6_def_hops ip_param_arr[32].ip_param_value
@@ -2850,6 +2871,7 @@ extern vmem_t *ip_minor_arena;
#define ipv6_strict_dst_multihoming ip_param_arr[38].ip_param_value
#define ip_ire_reclaim_fraction ip_param_arr[39].ip_param_value
#define ipsec_policy_log_interval ip_param_arr[40].ip_param_value
+#define pim_accept_clear_messages ip_param_arr[41].ip_param_value
#define ip_ndp_unsolicit_interval ip_param_arr[42].ip_param_value
#define ip_ndp_unsolicit_count ip_param_arr[43].ip_param_value
#define ipv6_ignore_home_address_opt ip_param_arr[44].ip_param_value
@@ -2857,8 +2879,14 @@ extern vmem_t *ip_minor_arena;
#define ip_multirt_resolution_interval ip_param_arr[46].ip_param_value
#define ip_multirt_ttl ip_param_arr[47].ip_param_value
#define ip_multidata_outbound ip_param_arr[48].ip_param_value
+#define ip_ndp_defense_interval ip_param_arr[49].ip_param_value
+#define ip_max_temp_idle ip_param_arr[50].ip_param_value
+#define ip_max_temp_defend ip_param_arr[51].ip_param_value
+#define ip_max_defend ip_param_arr[52].ip_param_value
+#define ip_defend_interval ip_param_arr[53].ip_param_value
+#define ip_dup_recovery ip_param_arr[54].ip_param_value
#ifdef DEBUG
-#define ipv6_drop_inbound_icmpv6 ip_param_arr[49].ip_param_value
+#define ipv6_drop_inbound_icmpv6 ip_param_arr[55].ip_param_value
#else
#define ipv6_drop_inbound_icmpv6 0
#endif
@@ -2934,6 +2962,9 @@ extern uint32_t ipsechw_debug;
#define ip3dbg(a) /* */
#endif /* IP_DEBUG */
+/* Default MAC-layer address string length for mac_colon_addr */
+#define MAC_STR_LEN 128
+
struct ipsec_out_s;
extern const char *dlpi_prim_str(int);
@@ -2945,6 +2976,7 @@ extern void ill_frag_timer_start(ill_t *);
extern mblk_t *ip_carve_mp(mblk_t **, ssize_t);
extern mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t);
extern char *ip_dot_addr(ipaddr_t, char *);
+extern const char *mac_colon_addr(const uint8_t *, size_t, char *, size_t);
extern void ip_lwput(queue_t *, mblk_t *);
extern boolean_t icmp_err_rate_limit(void);
extern void icmp_time_exceeded(queue_t *, mblk_t *, uint8_t);
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index 036748e78c..7872cfd212 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -956,6 +956,12 @@ static ipparam_t lcl_param_arr[] = {
{ 1000, 60000, 1000, "ip_multirt_resolution_interval" },
{ 0, 255, 1, "ip_multirt_ttl" },
{ 0, 1, 1, "ip_multidata_outbound" },
+ { 0, 3600000, 300000, "ip_ndp_defense_interval" },
+ { 0, 999999, 60*60*24, "ip_max_temp_idle" },
+ { 0, 1000, 1, "ip_max_temp_defend" },
+ { 0, 1000, 3, "ip_max_defend" },
+ { 0, 999999, 30, "ip_defend_interval" },
+ { 0, 3600000, 300000, "ip_dup_recovery" },
#ifdef DEBUG
{ 0, 1, 0, "ip6_drop_inbound_icmpv6" },
#endif
@@ -1022,65 +1028,6 @@ int ip_g_forward = IP_FORWARD_DEFAULT;
int ipv6_forward = IP_FORWARD_DEFAULT;
-/* Following line is external, and in ip.h. Normally marked with * *. */
-#define ip_respond_to_address_mask_broadcast ip_param_arr[0].ip_param_value
-#define ip_g_resp_to_echo_bcast ip_param_arr[1].ip_param_value
-#define ip_g_resp_to_echo_mcast ip_param_arr[2].ip_param_value
-#define ip_g_resp_to_timestamp ip_param_arr[3].ip_param_value
-#define ip_g_resp_to_timestamp_bcast ip_param_arr[4].ip_param_value
-#define ip_g_send_redirects ip_param_arr[5].ip_param_value
-#define ip_g_forward_directed_bcast ip_param_arr[6].ip_param_value
-#define ip_debug ip_param_arr[7].ip_param_value /* */
-#define ip_mrtdebug ip_param_arr[8].ip_param_value /* */
-#define ip_timer_interval ip_param_arr[9].ip_param_value /* */
-#define ip_ire_arp_interval ip_param_arr[10].ip_param_value /* */
-#define ip_ire_redir_interval ip_param_arr[11].ip_param_value
-#define ip_def_ttl ip_param_arr[12].ip_param_value
-#define ip_forward_src_routed ip_param_arr[13].ip_param_value
-#define ip_wroff_extra ip_param_arr[14].ip_param_value
-#define ip_ire_pathmtu_interval ip_param_arr[15].ip_param_value
-#define ip_icmp_return ip_param_arr[16].ip_param_value
-#define ip_path_mtu_discovery ip_param_arr[17].ip_param_value /* */
-#define ip_ignore_delete_time ip_param_arr[18].ip_param_value /* */
-#define ip_ignore_redirect ip_param_arr[19].ip_param_value
-#define ip_output_queue ip_param_arr[20].ip_param_value
-#define ip_broadcast_ttl ip_param_arr[21].ip_param_value
-#define ip_icmp_err_interval ip_param_arr[22].ip_param_value
-#define ip_icmp_err_burst ip_param_arr[23].ip_param_value
-#define ip_reass_queue_bytes ip_param_arr[24].ip_param_value
-#define ip_strict_dst_multihoming ip_param_arr[25].ip_param_value
-#define ip_addrs_per_if ip_param_arr[26].ip_param_value
-#define ipsec_override_persocket_policy ip_param_arr[27].ip_param_value /* */
-#define icmp_accept_clear_messages ip_param_arr[28].ip_param_value
-#define igmp_accept_clear_messages ip_param_arr[29].ip_param_value
-
-/* IPv6 configuration knobs */
-#define delay_first_probe_time ip_param_arr[30].ip_param_value
-#define max_unicast_solicit ip_param_arr[31].ip_param_value
-#define ipv6_def_hops ip_param_arr[32].ip_param_value
-#define ipv6_icmp_return ip_param_arr[33].ip_param_value
-#define ipv6_forward_src_routed ip_param_arr[34].ip_param_value
-#define ipv6_resp_echo_mcast ip_param_arr[35].ip_param_value
-#define ipv6_send_redirects ip_param_arr[36].ip_param_value
-#define ipv6_ignore_redirect ip_param_arr[37].ip_param_value
-#define ipv6_strict_dst_multihoming ip_param_arr[38].ip_param_value
-#define ip_ire_reclaim_fraction ip_param_arr[39].ip_param_value
-#define ipsec_policy_log_interval ip_param_arr[40].ip_param_value
-#define pim_accept_clear_messages ip_param_arr[41].ip_param_value
-#define ip_ndp_unsolicit_interval ip_param_arr[42].ip_param_value
-#define ip_ndp_unsolicit_count ip_param_arr[43].ip_param_value
-#define ipv6_ignore_home_address_opt ip_param_arr[44].ip_param_value
-#define ip_policy_mask ip_param_arr[45].ip_param_value
-#define ip_multirt_resolution_interval ip_param_arr[46].ip_param_value
-#define ip_multirt_ttl ip_param_arr[47].ip_param_value
-#define ip_multidata_outbound ip_param_arr[48].ip_param_value
-#ifdef DEBUG
-#define ipv6_drop_inbound_icmpv6 ip_param_arr[49].ip_param_value
-#else
-#define ipv6_drop_inbound_icmpv6 0
-#endif
-
-
/*
* Table of IP ioctls encoding the various properties of the ioctl and
* indexed based on the last byte of the ioctl command. Occasionally there
@@ -1516,28 +1463,33 @@ struct module_info ip_mod_info = {
IP_MOD_ID, IP_MOD_NAME, 1, INFPSZ, 65536, 1024
};
-static struct qinit rinit = {
+/*
+ * Duplicate static symbols within a module confuses mdb; so we avoid the
+ * problem by making the symbols here distinct from those in udp.c.
+ */
+
+static struct qinit iprinit = {
(pfi_t)ip_rput, NULL, ip_open, ip_close, NULL,
&ip_mod_info
};
-static struct qinit winit = {
+static struct qinit ipwinit = {
(pfi_t)ip_wput, (pfi_t)ip_wsrv, ip_open, ip_close, NULL,
&ip_mod_info
};
-static struct qinit lrinit = {
+static struct qinit iplrinit = {
(pfi_t)ip_lrput, NULL, ip_open, ip_close, NULL,
&ip_mod_info
};
-static struct qinit lwinit = {
+static struct qinit iplwinit = {
(pfi_t)ip_lwput, NULL, ip_open, ip_close, NULL,
&ip_mod_info
};
struct streamtab ipinfo = {
- &rinit, &winit, &lrinit, &lwinit
+ &iprinit, &ipwinit, &iplrinit, &iplwinit
};
#ifdef DEBUG
@@ -3782,6 +3734,204 @@ icmp_unreachable(queue_t *q, mblk_t *mp, uint8_t code)
}
/*
+ * Attempt to start recovery of an IPv4 interface that's been shut down as a
+ * duplicate. As long as someone else holds the address, the interface will
+ * stay down. When that conflict goes away, the interface is brought back up.
+ * This is done so that accidental shutdowns of addresses aren't made
+ * permanent. Your server will recover from a failure.
+ *
+ * For DHCP, recovery is not done in the kernel. Instead, it's handled by a
+ * user space process (dhcpagent).
+ *
+ * Recovery completes if ARP reports that the address is now ours (via
+ * AR_CN_READY). In that case, we go to ip_arp_excl to finish the operation.
+ *
+ * This function is entered on a timer expiry; the ID is in ipif_recovery_id.
+ */
+static void
+ipif_dup_recovery(void *arg)
+{
+ ipif_t *ipif = arg;
+ ill_t *ill = ipif->ipif_ill;
+ mblk_t *arp_add_mp;
+ mblk_t *arp_del_mp;
+ area_t *area;
+
+ ipif->ipif_recovery_id = 0;
+
+ if (ill->ill_arp_closing || !(ipif->ipif_flags & IPIF_DUPLICATE) ||
+ (ipif->ipif_flags & IPIF_POINTOPOINT)) {
+ /* No reason to try to bring this address back. */
+ return;
+ }
+
+ if ((arp_add_mp = ipif_area_alloc(ipif)) == NULL)
+ goto alloc_fail;
+
+ if (ipif->ipif_arp_del_mp == NULL) {
+ if ((arp_del_mp = ipif_ared_alloc(ipif)) == NULL)
+ goto alloc_fail;
+ ipif->ipif_arp_del_mp = arp_del_mp;
+ }
+
+ /* Setting the 'unverified' flag restarts DAD */
+ area = (area_t *)arp_add_mp->b_rptr;
+ area->area_flags = ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MYADDR |
+ ACE_F_UNVERIFIED;
+ putnext(ill->ill_rq, arp_add_mp);
+ return;
+
+alloc_fail:
+ /* On allocation failure, just restart the timer */
+ freemsg(arp_add_mp);
+ if (ip_dup_recovery > 0) {
+ ipif->ipif_recovery_id = timeout(ipif_dup_recovery, ipif,
+ MSEC_TO_TICK(ip_dup_recovery));
+ }
+}
+
+/*
+ * This is for exclusive changes due to ARP. Either tear down an interface due
+ * to AR_CN_FAILED and AR_CN_BOGON, or bring one up for successful recovery.
+ */
+/* ARGSUSED */
+static void
+ip_arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
+{
+ ill_t *ill = rq->q_ptr;
+ arh_t *arh;
+ ipaddr_t src;
+ ipif_t *ipif;
+ char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */
+ char hbuf[MAC_STR_LEN];
+ char sbuf[INET_ADDRSTRLEN];
+ const char *failtype;
+ boolean_t bring_up;
+
+ switch (((arcn_t *)mp->b_rptr)->arcn_code) {
+ case AR_CN_READY:
+ failtype = NULL;
+ bring_up = B_TRUE;
+ break;
+ case AR_CN_FAILED:
+ failtype = "in use";
+ bring_up = B_FALSE;
+ break;
+ default:
+ failtype = "claimed";
+ bring_up = B_FALSE;
+ break;
+ }
+
+ arh = (arh_t *)mp->b_cont->b_rptr;
+ bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN);
+
+ /* Handle failures due to probes */
+ if (src == 0) {
+ bcopy((char *)&arh[1] + 2 * arh->arh_hlen + IP_ADDR_LEN, &src,
+ IP_ADDR_LEN);
+ }
+
+ (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf));
+ (void) mac_colon_addr((uint8_t *)(arh + 1), arh->arh_hlen, hbuf,
+ sizeof (hbuf));
+ (void) ip_dot_addr(src, sbuf);
+ for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
+
+ if ((ipif->ipif_flags & IPIF_POINTOPOINT) ||
+ ipif->ipif_lcl_addr != src) {
+ continue;
+ }
+
+ /*
+ * If we failed on a recovery probe, then restart the timer to
+ * try again later.
+ */
+ if (!bring_up && (ipif->ipif_flags & IPIF_DUPLICATE) &&
+ !(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
+ ill->ill_net_type == IRE_IF_RESOLVER &&
+ ip_dup_recovery > 0 && ipif->ipif_recovery_id == 0) {
+ ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
+ ipif, MSEC_TO_TICK(ip_dup_recovery));
+ continue;
+ }
+
+ /*
+ * If what we're trying to do has already been done, then do
+ * nothing.
+ */
+ if (bring_up == ((ipif->ipif_flags & IPIF_UP) != 0))
+ continue;
+
+ if (ipif->ipif_id != 0) {
+ (void) snprintf(ibuf + ill->ill_name_length - 1,
+ sizeof (ibuf) - ill->ill_name_length + 1, ":%d",
+ ipif->ipif_id);
+ }
+ if (failtype == NULL) {
+ cmn_err(CE_NOTE, "recovered address %s on %s", sbuf,
+ ibuf);
+ } else {
+ cmn_err(CE_WARN, "%s has duplicate address %s (%s "
+ "by %s); disabled", ibuf, sbuf, failtype, hbuf);
+ }
+
+ if (bring_up) {
+ ASSERT(ill->ill_dl_up);
+ /*
+ * Free up the ARP delete message so we can allocate
+ * a fresh one through the normal path.
+ */
+ freemsg(ipif->ipif_arp_del_mp);
+ ipif->ipif_arp_del_mp = NULL;
+ if (ipif_resolver_up(ipif, Res_act_initial) !=
+ EINPROGRESS) {
+ ipif->ipif_addr_ready = 1;
+ (void) ipif_up_done(ipif);
+ }
+ continue;
+ }
+
+ mutex_enter(&ill->ill_lock);
+ ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
+ ipif->ipif_flags |= IPIF_DUPLICATE;
+ ill->ill_ipif_dup_count++;
+ mutex_exit(&ill->ill_lock);
+ /*
+ * Already exclusive on the ill; no need to handle deferred
+ * processing here.
+ */
+ (void) ipif_down(ipif, NULL, NULL);
+ ipif_down_tail(ipif);
+ if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
+ ill->ill_net_type == IRE_IF_RESOLVER &&
+ ip_dup_recovery > 0) {
+ ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
+ ipif, MSEC_TO_TICK(ip_dup_recovery));
+ }
+ }
+ freemsg(mp);
+}
+
+/* ARGSUSED */
+static void
+ip_arp_defend(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
+{
+ ill_t *ill = rq->q_ptr;
+ arh_t *arh;
+ ipaddr_t src;
+ ipif_t *ipif;
+
+ arh = (arh_t *)mp->b_cont->b_rptr;
+ bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN);
+ for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
+ if ((ipif->ipif_flags & IPIF_UP) && ipif->ipif_lcl_addr == src)
+ (void) ipif_resolver_up(ipif, Res_act_defend);
+ }
+ freemsg(mp);
+}
+
+/*
* News from ARP. ARP sends notification of interesting events down
* to its clients using M_CTL messages with the interesting ARP packet
* attached via b_cont.
@@ -3796,15 +3946,14 @@ ip_arp_news(queue_t *q, mblk_t *mp)
{
arcn_t *arcn;
arh_t *arh;
- char *cp1;
- uchar_t *cp2;
ire_t *ire = NULL;
- int i1;
- char hbuf[128];
- char sbuf[16];
+ char hbuf[MAC_STR_LEN];
+ char sbuf[INET_ADDRSTRLEN];
ipaddr_t src;
in6_addr_t v6src;
boolean_t isv6 = B_FALSE;
+ ipif_t *ipif;
+ ill_t *ill;
if ((mp->b_wptr - mp->b_rptr) < sizeof (arcn_t) || !mp->b_cont) {
if (q->q_next) {
@@ -3827,25 +3976,37 @@ ip_arp_news(queue_t *q, mblk_t *mp)
return;
}
+ ill = q->q_ptr;
+
arcn = (arcn_t *)mp->b_rptr;
switch (arcn->arcn_code) {
case AR_CN_BOGON:
/*
* Someone is sending ARP packets with a source protocol
- * address which we have published. Either they are
- * pretending to be us, or we have been asked to proxy
- * for a machine that can do fine for itself, or two
- * different machines are providing proxy service for the
- * same protocol address, or something. We try and do
- * something appropriate here.
- */
- cp2 = (uchar_t *)&arh[1];
- cp1 = hbuf;
- *cp1 = '\0';
- for (i1 = arh->arh_hlen; i1--; cp1 += 3)
- (void) sprintf(cp1, "%02x:", *cp2++ & 0xff);
- if (cp1 != hbuf)
- cp1[-1] = '\0';
+ * address that we have published and for which we believe our
+ * entry is authoritative and (when ill_arp_extend is set)
+ * verified to be unique on the network.
+ *
+ * The ARP module internally handles the cases where the sender
+ * is just probing (for DAD) and where the hardware address of
+ * a non-authoritative entry has changed. Thus, these are the
+ * real conflicts, and we have to do resolution.
+ *
+ * We back away quickly from the address if it's from DHCP or
+ * otherwise temporary and hasn't been used recently (or at
+ * all). We'd like to include "deprecated" addresses here as
+ * well (as there's no real reason to defend something we're
+ * discarding), but IPMP "reuses" this flag to mean something
+ * other than the standard meaning.
+ *
+ * If the ARP module above is not extended (meaning that it
+ * doesn't know how to defend the address), then we just log
+ * the problem as we always did and continue on. It's not
+ * right, but there's little else we can do, and those old ATM
+ * users are going away anyway.
+ */
+ (void) mac_colon_addr((uint8_t *)(arh + 1), arh->arh_hlen,
+ hbuf, sizeof (hbuf));
(void) ip_dot_addr(src, sbuf);
if (isv6)
ire = ire_cache_lookup_v6(&v6src, ALL_ZONES, NULL);
@@ -3853,16 +4014,78 @@ ip_arp_news(queue_t *q, mblk_t *mp)
ire = ire_cache_lookup(src, ALL_ZONES, NULL);
if (ire != NULL && IRE_IS_LOCAL(ire)) {
- cmn_err(CE_WARN,
- "IP: Hardware address '%s' trying"
- " to be our address %s!",
- hbuf, sbuf);
- } else {
- cmn_err(CE_WARN,
- "IP: Proxy ARP problem? "
- "Hardware address '%s' thinks it is %s",
- hbuf, sbuf);
+ uint32_t now;
+ uint32_t maxage;
+ clock_t lused;
+ uint_t maxdefense;
+ uint_t defs;
+
+ /*
+ * First, figure out if this address hasn't been used
+ * in a while. If it hasn't, then it's a better
+ * candidate for abandoning.
+ */
+ ipif = ire->ire_ipif;
+ ASSERT(ipif != NULL);
+ now = gethrestime_sec();
+ maxage = now - ire->ire_create_time;
+ if (maxage > ip_max_temp_idle)
+ maxage = ip_max_temp_idle;
+ lused = drv_hztousec(ddi_get_lbolt() -
+ ire->ire_last_used_time) / MICROSEC + 1;
+ if (lused >= maxage && (ipif->ipif_flags &
+ (IPIF_DHCPRUNNING | IPIF_TEMPORARY)))
+ maxdefense = ip_max_temp_defend;
+ else
+ maxdefense = ip_max_defend;
+
+ /*
+ * Now figure out how many times we've defended
+ * ourselves. Ignore defenses that happened long in
+ * the past.
+ */
+ mutex_enter(&ire->ire_lock);
+ if ((defs = ire->ire_defense_count) > 0 &&
+ now - ire->ire_defense_time > ip_defend_interval) {
+ ire->ire_defense_count = defs = 0;
+ }
+ ire->ire_defense_count++;
+ ire->ire_defense_time = now;
+ mutex_exit(&ire->ire_lock);
+ ill_refhold(ill);
+ ire_refrele(ire);
+
+ /*
+ * If we've defended ourselves too many times already,
+ * then give up and tear down the interface(s) using
+ * this address. Otherwise, defend by sending out a
+ * gratuitous ARP.
+ */
+ if (defs >= maxdefense && ill->ill_arp_extend) {
+ (void) qwriter_ip(NULL, ill, q, mp,
+ ip_arp_excl, CUR_OP, B_FALSE);
+ } else {
+ cmn_err(CE_WARN,
+ "node %s is using our IP address %s on %s",
+ hbuf, sbuf, ill->ill_name);
+ /*
+ * If this is an old (ATM) ARP module, then
+ * don't try to defend the address. Remain
+ * compatible with the old behavior. Defend
+ * only with new ARP.
+ */
+ if (ill->ill_arp_extend) {
+ (void) qwriter_ip(NULL, ill, q, mp,
+ ip_arp_defend, CUR_OP, B_FALSE);
+ } else {
+ ill_refrele(ill);
+ }
+ }
+ return;
}
+ cmn_err(CE_WARN,
+ "proxy ARP problem? Node '%s' is using %s on %s",
+ hbuf, sbuf, ill->ill_name);
if (ire != NULL)
ire_refrele(ire);
break;
@@ -3884,53 +4107,79 @@ ip_arp_news(queue_t *q, mblk_t *mp)
ire_walk_v6(ire_delete_cache_gw_v6,
(char *)&v6src, ALL_ZONES);
}
- break;
+ } else {
+ nce_hw_map_t hwm;
+
+ /*
+ * ARP gives us a copy of any packet where it thinks
+ * the address has changed, so that we can update our
+ * caches. We're responsible for caching known answers
+ * in the current design. We check whether the
+ * hardware address really has changed in all of our
+ * entries that have cached this mapping, and if so, we
+ * blow them away. This way we will immediately pick
+ * up the rare case of a host changing hardware
+ * address.
+ */
+ if (src == 0)
+ break;
+ hwm.hwm_addr = src;
+ hwm.hwm_hwlen = arh->arh_hlen;
+ hwm.hwm_hwaddr = (uchar_t *)(arh + 1);
+ ndp_walk_common(&ndp4, NULL,
+ (pfi_t)nce_delete_hw_changed, &hwm, ALL_ZONES);
}
- /*
- * ARP gives us a copy of any broadcast packet with identical
- * sender and receiver protocol address, in
- * case we want to intuit something from it. Such a packet
- * usually means that a machine has just come up on the net.
- * If we have an IRE_CACHE, we blow it away. This way we will
- * immediately pick up the rare case of a host changing
- * hardware address. ip_ire_clookup_and_delete achieves this.
- *
- * The address in "src" may be an entry for a router.
- * (Default router, or non-default router.) If
- * that's true, then any off-net IRE_CACHE entries
- * that go through the router with address "src"
- * must be clobbered. Use ire_walk to achieve this
- * goal.
- *
- * It should be possible to determine if the address
- * in src is or is not for a router. This way,
- * the ire_walk() isn't called all of the time here.
- * Do not pass 'src' value of 0 to ire_delete_cache_gw,
- * as it would remove all IRE_CACHE entries for onlink
- * destinations. All onlink destinations have
- * ire_gateway_addr == 0.
- *
- *
- * The ip_ire_clookup_and_delete() call deletes
- * the nce and all relevant ire cache entries that
- * are associated with that nce.
- * The ire_walk_v4->ire_delete_cache_gw() call
- * will delete the appropriate redirect ires.
- */
- if ((ip_ire_clookup_and_delete(src, NULL) ||
- (ire = ire_ftable_lookup(src, 0, 0, 0, NULL, NULL, NULL,
- 0, NULL, MATCH_IRE_DSTONLY)) != NULL) && src != 0) {
- ire_walk_v4(ire_delete_cache_gw, (char *)&src,
- ALL_ZONES);
- }
- /* From ire_ftable_lookup */
- if (ire != NULL)
- ire_refrele(ire);
break;
- default:
- if (ire != NULL)
+ case AR_CN_READY:
+ /* No external v6 resolver has a contract to use this */
+ if (isv6)
+ break;
+ /* If the link is down, we'll retry this later */
+ if (!(ill->ill_phyint->phyint_flags & PHYI_RUNNING))
+ break;
+ ipif = ipif_lookup_addr(src, ill, ALL_ZONES, NULL, NULL,
+ NULL, NULL);
+ if (ipif != NULL) {
+ /*
+ * If this is a duplicate recovery, then we now need to
+ * go exclusive to bring this thing back up.
+ */
+ if ((ipif->ipif_flags & (IPIF_UP|IPIF_DUPLICATE)) ==
+ IPIF_DUPLICATE) {
+ ipif_refrele(ipif);
+ ill_refhold(ill);
+ (void) qwriter_ip(NULL, ill, q, mp,
+ ip_arp_excl, CUR_OP, B_FALSE);
+ return;
+ }
+ /*
+ * If this is the first notice that this address is
+ * ready, then let the user know now.
+ */
+ if ((ipif->ipif_flags & IPIF_UP) &&
+ !ipif->ipif_addr_ready) {
+ ipif_mask_reply(ipif);
+ ip_rts_ifmsg(ipif);
+ ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
+ sctp_update_ipif(ipif, SCTP_IPIF_UP);
+ }
+ ipif->ipif_addr_ready = 1;
+ ipif_refrele(ipif);
+ }
+ ire = ire_cache_lookup(src, ALL_ZONES, MBLK_GETLABEL(mp));
+ if (ire != NULL) {
+ ire->ire_defense_count = 0;
ire_refrele(ire);
+ }
break;
+ case AR_CN_FAILED:
+ /* No external v6 resolver has a contract to use this */
+ if (isv6)
+ break;
+ ill_refhold(ill);
+ (void) qwriter_ip(NULL, ill, q, mp, ip_arp_excl, CUR_OP,
+ B_FALSE);
+ return;
}
freemsg(mp);
}
@@ -5598,25 +5847,57 @@ dlpi_err_str(int err)
* Debug formatting routine. Returns a character string representation of the
* addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address
* in the form of a ipaddr_t and calls ip_dot_saddr with a pointer.
+ *
+ * Once the ndd table-printing interfaces are removed, this can be changed to
+ * standard dotted-decimal form.
*/
char *
ip_dot_addr(ipaddr_t addr, char *buf)
{
- return (ip_dot_saddr((uchar_t *)&addr, buf));
+ uint8_t *ap = (uint8_t *)&addr;
+
+ (void) mi_sprintf(buf, "%03d.%03d.%03d.%03d",
+ ap[0] & 0xFF, ap[1] & 0xFF, ap[2] & 0xFF, ap[3] & 0xFF);
+ return (buf);
}
/*
- * Debug formatting routine. Returns a character string representation of the
- * addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address
- * as a pointer. The "xxx" parts including left zero padding so the final
- * string will fit easily in tables. It would be nice to take a padding
- * length argument instead.
+ * Write the given MAC address as a printable string in the usual colon-
+ * separated format.
*/
-static char *
-ip_dot_saddr(uchar_t *addr, char *buf)
+const char *
+mac_colon_addr(const uint8_t *addr, size_t alen, char *buf, size_t buflen)
{
- (void) mi_sprintf(buf, "%03d.%03d.%03d.%03d",
- addr[0] & 0xFF, addr[1] & 0xFF, addr[2] & 0xFF, addr[3] & 0xFF);
+ char *bp;
+
+ if (alen == 0 || buflen < 4)
+ return ("?");
+ bp = buf;
+ for (;;) {
+ /*
+ * If there are more MAC address bytes available, but we won't
+ * have any room to print them, then add "..." to the string
+ * instead. See below for the 'magic number' explanation.
+ */
+ if ((alen == 2 && buflen < 6) || (alen > 2 && buflen < 7)) {
+ (void) strcpy(bp, "...");
+ break;
+ }
+ (void) sprintf(bp, "%02x", *addr++);
+ bp += 2;
+ if (--alen == 0)
+ break;
+ *bp++ = ':';
+ buflen -= 3;
+ /*
+ * At this point, based on the first 'if' statement above,
+ * either alen == 1 and buflen >= 3, or alen > 1 and
+ * buflen >= 4. The first case leaves room for the final "xx"
+ * number and trailing NUL byte. The second leaves room for at
+ * least "...". Thus the apparently 'magic' numbers chosen for
+ * that statement.
+ */
+ }
return (buf);
}
@@ -9315,8 +9596,8 @@ ip_setqinfo(queue_t *q, minor_t minor, boolean_t bump_mib)
} else {
if (bump_mib)
BUMP_MIB(&ip_mib, ipOutSwitchIPv6);
- q->q_qinfo = &rinit;
- WR(q)->q_qinfo = &winit;
+ q->q_qinfo = &iprinit;
+ WR(q)->q_qinfo = &ipwinit;
(Q_TO_CONN(q))->conn_pkt_isv6 = B_FALSE;
}
@@ -14891,7 +15172,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
* v6 interfaces.
* Unlike ARP which has to do another bind
* and attach, once we get here we are
- * done withh NDP. Except in the case of
+ * done with NDP. Except in the case of
* ILLF_XRESOLV, in which case we send an
* AR_INTERFACE_UP to the external resolver.
* If all goes well, the ioctl will complete
@@ -14910,7 +15191,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
mutex_exit(&connp->conn_lock);
if (success) {
err = ipif_resolver_up(ipif,
- B_FALSE);
+ Res_act_initial);
if (err == EINPROGRESS) {
freemsg(mp);
return;
@@ -14939,7 +15220,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
mutex_exit(&ill->ill_lock);
mutex_exit(&connp->conn_lock);
if (success) {
- err = ipif_resolver_up(ipif, B_FALSE);
+ err = ipif_resolver_up(ipif, Res_act_initial);
if (err == EINPROGRESS) {
freemsg(mp);
return;
@@ -15061,13 +15342,13 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
/*
* IPv4 ARP case
*
- * Set B_TRUE, as we only want
+ * Set Res_act_move, as we only want
* ipif_resolver_up to send an
* AR_ENTRY_ADD request up to
* ARP.
*/
err = ipif_resolver_up(ipif,
- B_TRUE);
+ Res_act_move);
if (err) {
ip1dbg((
"ip_rput_dlpi_writer: "
@@ -15204,10 +15485,11 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
phyint_t *phyint = ill->ill_phyint;
uint64_t new_phyint_flags;
boolean_t changed = B_FALSE;
+ boolean_t went_up;
+ went_up = notify->dl_notification == DL_NOTE_LINK_UP;
mutex_enter(&phyint->phyint_lock);
- new_phyint_flags =
- (notify->dl_notification == DL_NOTE_LINK_UP) ?
+ new_phyint_flags = went_up ?
phyint->phyint_flags | PHYI_RUNNING :
phyint->phyint_flags & ~PHYI_RUNNING;
if (new_phyint_flags != phyint->phyint_flags) {
@@ -15216,18 +15498,12 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
}
mutex_exit(&phyint->phyint_lock);
/*
- * If the flags have changed, send a message to
- * the routing socket.
+ * ill_restart_dad handles the DAD restart and routing
+ * socket notification logic.
*/
if (changed) {
- if (phyint->phyint_illv4 != NULL) {
- ip_rts_ifmsg(
- phyint->phyint_illv4->ill_ipif);
- }
- if (phyint->phyint_illv6 != NULL) {
- ip_rts_ifmsg(
- phyint->phyint_illv6->ill_ipif);
- }
+ ill_restart_dad(phyint->phyint_illv4, went_up);
+ ill_restart_dad(phyint->phyint_illv6, went_up);
}
break;
}
@@ -15274,15 +15550,14 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
*/
break;
}
- case DL_NOTIFY_ACK:
- /*
- * Don't really need to check for what notifications
- * are supported; we'll process what gets sent upstream,
- * and we know it'll be something we support changing
- * based on our DL_NOTIFY_REQ.
- */
+ case DL_NOTIFY_ACK: {
+ dl_notify_ack_t *noteack = (dl_notify_ack_t *)mp->b_rptr;
+
+ if (noteack->dl_notifications & DL_NOTE_LINK_UP)
+ ill->ill_note_link = 1;
ill_dlpi_done(ill, DL_NOTIFY_REQ);
break;
+ }
case DL_PHYS_ADDR_ACK: {
/*
* We should have an IOCTL waiting on this when request
@@ -16198,7 +16473,7 @@ ip_fanout_proto_again(mblk_t *ipsec_mp, ill_t *ill, ill_t *recv_ill, ire_t *ire)
rput_flags |= (IN6_IS_ADDR_MULTICAST(v6dstp) ?
IP6_IN_LLMCAST : 0);
ip_rput_data_v6(ill->ill_rq, ill, ipsec_mp, ip6h, rput_flags,
- NULL);
+ NULL, NULL);
}
if (ill_need_rele)
ill_refrele(ill);
@@ -25801,6 +26076,17 @@ nak:
freemsg(mp);
}
return;
+ case AR_ARP_EXTEND:
+ /*
+ * The ARP module above us is capable of duplicate
+ * address detection. Old ATM drivers will not send
+ * this message.
+ */
+ ASSERT(q->q_next != NULL);
+ ill = (ill_t *)q->q_ptr;
+ ill->ill_arp_extend = B_TRUE;
+ freemsg(mp);
+ return;
default:
break;
}
@@ -27308,7 +27594,7 @@ static void
ip_multirt_bad_mtu(ire_t *ire, uint32_t max_frag)
{
hrtime_t current = gethrtime();
- char buf[16];
+ char buf[INET_ADDRSTRLEN];
/* Convert interval in ms to hrtime in ns */
if (multirt_bad_mtu_last_time +
diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c
index b96a6a24ef..29afe371f7 100644
--- a/usr/src/uts/common/inet/ip/ip6.c
+++ b/usr/src/uts/common/inet/ip/ip6.c
@@ -102,6 +102,9 @@
#include <rpc/pmap_prot.h>
+/* Temporary; for CR 6451644 work-around */
+#include <sys/ethernet.h>
+
extern squeue_func_t ip_input_proc;
/*
@@ -326,7 +329,7 @@ struct qinit winit_ipv6 = {
*/
static void
icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length,
- boolean_t mctl_present, uint_t flags, zoneid_t zoneid)
+ boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp)
{
icmp6_t *icmp6;
ip6_t *ip6h;
@@ -603,7 +606,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length,
if (mctl_present)
freeb(first_mp);
/* XXX may wish to pass first_mp up to ndp_input someday. */
- ndp_input(ill, mp);
+ ndp_input(ill, mp, dl_mp);
return;
case ND_NEIGHBOR_ADVERT:
@@ -612,7 +615,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length,
if (mctl_present)
freeb(first_mp);
/* XXX may wish to pass first_mp up to ndp_input someday. */
- ndp_input(ill, mp);
+ ndp_input(ill, mp, dl_mp);
return;
case ND_REDIRECT: {
@@ -5910,26 +5913,6 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif,
}
goto err_ret;
}
- /* Use any ipif for source */
- for (src_ipif = dst_ill->ill_ipif; src_ipif != NULL;
- src_ipif = src_ipif->ipif_next) {
- if ((src_ipif->ipif_flags & IPIF_UP) &&
- IN6_IS_ADDR_UNSPECIFIED(
- &src_ipif->ipif_v6src_addr))
- break;
- }
- if (src_ipif == NULL) {
- if (ip_debug > 2) {
- /* ip1dbg */
- pr_addr_dbg("ip_newroute_ipif_v6: "
- "no src for dst %s\n ",
- AF_INET6, v6dstp);
- printf("ip_newroute_ipif_v6: if %s"
- "(UNSPEC_SRC)\n",
- dst_ill->ill_name);
- }
- goto err_ret;
- }
src_ipif = ipif;
ipif_refhold(src_ipif);
}
@@ -6602,7 +6585,7 @@ bad_opt:
*/
static void
ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
- ill_t *ill, uint_t flags, mblk_t *hada_mp)
+ ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp)
{
ip6_rthdr0_t *rthdr;
uint_t ehdrlen;
@@ -6678,7 +6661,7 @@ ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
B_FALSE, B_FALSE);
return;
}
- ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp);
+ ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp);
return;
hada_drop:
/* IPsec kstats: bean counter? */
@@ -6692,12 +6675,15 @@ hada_drop:
static void
ip_rput_v6(queue_t *q, mblk_t *mp)
{
- mblk_t *mp1, *first_mp, *hada_mp = NULL;
+ mblk_t *first_mp;
+ mblk_t *hada_mp = NULL;
ip6_t *ip6h;
- boolean_t ll_multicast = B_FALSE, mctl_present = B_FALSE;
+ boolean_t ll_multicast = B_FALSE;
+ boolean_t mctl_present = B_FALSE;
ill_t *ill;
struct iocblk *iocp;
uint_t flags = 0;
+ mblk_t *dl_mp;
ill = (ill_t *)q->q_ptr;
if (ill->ill_state_flags & ILL_CONDEMNED) {
@@ -6719,9 +6705,59 @@ ip_rput_v6(queue_t *q, mblk_t *mp)
}
}
+ dl_mp = NULL;
switch (mp->b_datap->db_type) {
- case M_DATA:
+ case M_DATA: {
+ int hlen;
+ uchar_t *ucp;
+ struct ether_header *eh;
+ dl_unitdata_ind_t *dui;
+
+ /*
+ * This is a work-around for CR 6451644, a bug in Nemo. It
+ * should be removed when that problem is fixed.
+ */
+ if (ill->ill_mactype == DL_ETHER &&
+ (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) &&
+ (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) &&
+ ucp[-2] == (IP6_DL_SAP >> 8)) {
+ if (hlen >= sizeof (struct ether_vlan_header) &&
+ ucp[-5] == 0 && ucp[-6] == 0x81)
+ ucp -= sizeof (struct ether_vlan_header);
+ else
+ ucp -= sizeof (struct ether_header);
+ /*
+ * If it's a group address, then fabricate a
+ * DL_UNITDATA_IND message.
+ */
+ if ((ll_multicast = (ucp[0] & 1)) != 0 &&
+ (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16,
+ BPRI_HI)) != NULL) {
+ eh = (struct ether_header *)ucp;
+ dui = (dl_unitdata_ind_t *)dl_mp->b_rptr;
+ DB_TYPE(dl_mp) = M_PROTO;
+ dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16;
+ dui->dl_primitive = DL_UNITDATA_IND;
+ dui->dl_dest_addr_length = 8;
+ dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE;
+ dui->dl_src_addr_length = 8;
+ dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE +
+ 8;
+ dui->dl_group_address = 1;
+ ucp = (uchar_t *)(dui + 1);
+ if (ill->ill_sap_length > 0)
+ ucp += ill->ill_sap_length;
+ bcopy(&eh->ether_dhost, ucp, 6);
+ bcopy(&eh->ether_shost, ucp + 8, 6);
+ ucp = (uchar_t *)(dui + 1);
+ if (ill->ill_sap_length < 0)
+ ucp += 8 + ill->ill_sap_length;
+ bcopy(&eh->ether_type, ucp, 2);
+ bcopy(&eh->ether_type, ucp + 8, 2);
+ }
+ }
break;
+ }
case M_PROTO:
case M_PCPROTO:
@@ -6734,10 +6770,10 @@ ip_rput_v6(queue_t *q, mblk_t *mp)
#define dlur ((dl_unitdata_ind_t *)mp->b_rptr)
ll_multicast = dlur->dl_group_address;
#undef dlur
- /* Ditch the DLPI header. */
- mp1 = mp;
+ /* Save the DLPI header. */
+ dl_mp = mp;
mp = mp->b_cont;
- freeb(mp1);
+ dl_mp->b_cont = NULL;
break;
case M_BREAK:
panic("ip_rput_v6: got an M_BREAK");
@@ -6772,7 +6808,7 @@ ip_rput_v6(queue_t *q, mblk_t *mp)
mutex_exit(&ill->ill_lock);
qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE);
return;
- case M_CTL: {
+ case M_CTL:
if ((MBLKL(mp) > sizeof (int)) &&
((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) {
ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t));
@@ -6781,7 +6817,6 @@ ip_rput_v6(queue_t *q, mblk_t *mp)
}
putnext(q, mp);
return;
- }
case M_IOCNAK:
iocp = (struct iocblk *)mp->b_rptr;
switch (iocp->ioc_cmd) {
@@ -6824,8 +6859,8 @@ ip_rput_v6(queue_t *q, mblk_t *mp)
mp1 = copymsg(mp);
freemsg(mp);
if (mp1 == NULL) {
- BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards);
- return;
+ first_mp = NULL;
+ goto discard;
}
mp = mp1;
}
@@ -6841,10 +6876,8 @@ ip_rput_v6(queue_t *q, mblk_t *mp)
if (!OK_32PTR((uchar_t *)ip6h) ||
(mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) {
if (!pullupmsg(mp, IPV6_HDR_LEN)) {
- BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards);
ip1dbg(("ip_rput_v6: pullupmsg failed\n"));
- freemsg(first_mp);
- return;
+ goto discard;
}
ip6h = (ip6_t *)mp->b_rptr;
}
@@ -6857,31 +6890,32 @@ ip_rput_v6(queue_t *q, mblk_t *mp)
* TODO: Avoid this check for e.g. connected TCP sockets
*/
if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) {
- BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards);
ip1dbg(("ip_rput_v6: pkt with mapped src addr\n"));
- freemsg(first_mp);
- return;
+ goto discard;
}
if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) {
- BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards);
ip1dbg(("ip_rput_v6: pkt with loopback src"));
- freemsg(first_mp);
- return;
+ goto discard;
} else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) {
- BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards);
ip1dbg(("ip_rput_v6: pkt with loopback dst"));
- freemsg(first_mp);
- return;
+ goto discard;
}
flags |= (ll_multicast ? IP6_IN_LLMCAST : 0);
- ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp);
+ ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp);
} else {
BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4);
- BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards);
- freemsg(first_mp);
+ goto discard;
}
+ freemsg(dl_mp);
+ return;
+
+discard:
+ if (dl_mp != NULL)
+ freeb(dl_mp);
+ freemsg(first_mp);
+ BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards);
}
/*
@@ -7080,10 +7114,14 @@ ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present,
* actually arrived on. We need to remember this when saving the
* input interface index into potential IPV6_PKTINFO data in
* ip_add_info_v6().
+ *
+ * This routine doesn't free dl_mp; that's the caller's responsibility on
+ * return. (Note that the callers are complex enough that there's no tail
+ * recursion here anyway.)
*/
void
ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h,
- uint_t flags, mblk_t *hada_mp)
+ uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp)
{
ire_t *ire = NULL;
queue_t *rq;
@@ -7939,14 +7977,15 @@ tcp_fanout:
continue;
icmp_inbound_v6(q, first_mp1, ill,
hdr_len, mctl_present, 0,
- ilm->ilm_zoneid);
+ ilm->ilm_zoneid, dl_mp);
}
ILM_WALKER_RELE(ill);
} else {
first_mp1 = ip_copymsg(first_mp);
if (first_mp1 != NULL)
icmp_inbound_v6(q, first_mp1, ill,
- hdr_len, mctl_present, 0, zoneid);
+ hdr_len, mctl_present, 0, zoneid,
+ dl_mp);
}
}
/* FALLTHRU */
@@ -8181,7 +8220,7 @@ tcp_fanout:
return;
}
ip_process_rthdr(q, mp, ip6h, rthdr, ill,
- flags, hada_mp);
+ flags, hada_mp, dl_mp);
return;
}
used = ehdrlen;
@@ -10253,8 +10292,7 @@ send_from_ill:
&ip6h->ip6_src, ill, zoneid);
}
}
- if (ill != NULL)
- ill_refrele(ill);
+ ill_refrele(ill);
return;
}
if (need_decref) {
@@ -10284,8 +10322,7 @@ send_from_ill:
}
if (mp == NULL) {
BUMP_MIB(mibptr, ipv6OutDiscards);
- if (ill != NULL)
- ill_refrele(ill);
+ ill_refrele(ill);
return;
}
ip6i = (ip6i_t *)mp->b_rptr;
@@ -10333,8 +10370,7 @@ send_from_ill:
ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill,
zoneid);
}
- if (ill != NULL)
- ill_refrele(ill);
+ ill_refrele(ill);
return;
notv6:
@@ -10553,7 +10589,8 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp,
continue;
icmp_inbound_v6(q, first_mp1, ill,
hdr_length, mctl_present,
- IP6_NO_IPPOLICY, ilm->ilm_zoneid);
+ IP6_NO_IPPOLICY, ilm->ilm_zoneid,
+ NULL);
}
ILM_WALKER_RELE(ill);
} else {
@@ -10561,7 +10598,8 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp,
if (first_mp1 != NULL)
icmp_inbound_v6(q, first_mp1, ill,
hdr_length, mctl_present,
- IP6_NO_IPPOLICY, ire->ire_zoneid);
+ IP6_NO_IPPOLICY, ire->ire_zoneid,
+ NULL);
}
}
/* FALLTHRU */
diff --git a/usr/src/uts/common/inet/ip/ip6_if.c b/usr/src/uts/common/inet/ip/ip6_if.c
index 2749b6b439..313d0bbdca 100644
--- a/usr/src/uts/common/inet/ip/ip6_if.c
+++ b/usr/src/uts/common/inet/ip/ip6_if.c
@@ -1317,12 +1317,12 @@ ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr, boolean_t macaddr_change)
* ND not supported on XRESOLV interfaces. If ND support (multicast)
* added later, take out this check.
*/
- if (ill->ill_flags & ILLF_XRESOLV)
- return (0);
-
- if (IN6_IS_ADDR_UNSPECIFIED(addr) ||
- (!(ill->ill_net_type & IRE_INTERFACE)))
+ if ((ill->ill_flags & ILLF_XRESOLV) ||
+ IN6_IS_ADDR_UNSPECIFIED(addr) ||
+ (!(ill->ill_net_type & IRE_INTERFACE))) {
+ ipif->ipif_addr_ready = 1;
return (0);
+ }
/*
* Need to setup multicast mapping only when the first
@@ -1374,7 +1374,7 @@ ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr, boolean_t macaddr_change)
&ipv6_all_zeros,
0,
flags,
- ND_REACHABLE,
+ ND_PROBE, /* Causes Duplicate Address Detection to run */
&nce,
NULL,
NULL);
@@ -1382,6 +1382,11 @@ ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr, boolean_t macaddr_change)
case 0:
ip1dbg(("ipif_ndp_up: NCE created for %s\n",
ill->ill_name));
+ ipif->ipif_addr_ready = 1;
+ break;
+ case EINPROGRESS:
+ ip1dbg(("ipif_ndp_up: running DAD now for %s\n",
+ ill->ill_name));
break;
case EEXIST:
NCE_REFRELE(nce);
@@ -1401,6 +1406,9 @@ ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr, boolean_t macaddr_change)
}
return (err);
}
+ } else {
+ /* No local NCE for this entry */
+ ipif->ipif_addr_ready = 1;
}
if (nce != NULL)
NCE_REFRELE(nce);
@@ -1625,7 +1633,8 @@ ip_addr_xor_v6(const in6_addr_t *a1, const in6_addr_t *a2, in6_addr_t *res)
#define IPIF_VALID_IPV6_SOURCE(ipif) \
(((ipif)->ipif_flags & IPIF_UP) && \
- !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)))
+ !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) && \
+ (ipif)->ipif_addr_ready)
/* source address candidate */
typedef struct candidate {
@@ -3001,9 +3010,12 @@ ipif_up_done_v6(ipif_t *ipif)
}
}
+ if (ipif->ipif_addr_ready) {
+ ip_rts_ifmsg(ipif);
+ ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
+ sctp_update_ipif(ipif, SCTP_IPIF_UP);
+ }
- ip_rts_ifmsg(ipif);
- ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
if (ipif_saved_irep != NULL) {
kmem_free(ipif_saved_irep,
ipif_saved_ire_cnt * sizeof (ire_t *));
@@ -3011,7 +3023,6 @@ ipif_up_done_v6(ipif_t *ipif)
if (src_ipif_held)
ipif_refrele(src_ipif);
- sctp_update_ipif(ipif, SCTP_IPIF_UP);
return (0);
bad:
diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c
index 179c1dd7ce..8edf1bb113 100644
--- a/usr/src/uts/common/inet/ip/ip_if.c
+++ b/usr/src/uts/common/inet/ip/ip_if.c
@@ -157,10 +157,8 @@ static void ipif_check_bcast_ires(ipif_t *test_ipif);
static void ipif_down_delete_ire(ire_t *ire, char *ipif);
static void ipif_delete_cache_ire(ire_t *, char *);
static int ipif_logical_down(ipif_t *ipif, queue_t *q, mblk_t *mp);
-static void ipif_down_tail(ipif_t *ipif);
static void ipif_free(ipif_t *ipif);
static void ipif_free_tail(ipif_t *ipif);
-static void ipif_mask_reply(ipif_t *);
static void ipif_mtu_change(ire_t *ire, char *ipif_arg);
static void ipif_multicast_down(ipif_t *ipif);
static void ipif_recreate_interface_routes(ipif_t *old_ipif, ipif_t *ipif);
@@ -180,6 +178,7 @@ static int ill_arp_off(ill_t *ill);
static int ill_arp_on(ill_t *ill);
static void ill_delete_interface_type(ill_if_t *);
static int ill_dl_up(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q);
+static void ill_dl_down(ill_t *ill);
static void ill_down(ill_t *ill);
static void ill_downi(ire_t *ire, char *ill_arg);
static void ill_downi_mrtun_srcif(ire_t *ire, char *ill_arg);
@@ -671,6 +670,20 @@ ill_arp_alloc(ill_t *ill, uchar_t *template, caddr_t addr)
return (mp);
}
+mblk_t *
+ipif_area_alloc(ipif_t *ipif)
+{
+ return (ill_arp_alloc(ipif->ipif_ill, (uchar_t *)&ip_area_template,
+ (char *)&ipif->ipif_lcl_addr));
+}
+
+mblk_t *
+ipif_ared_alloc(ipif_t *ipif)
+{
+ return (ill_arp_alloc(ipif->ipif_ill, (uchar_t *)&ip_ared_template,
+ (char *)&ipif->ipif_lcl_addr));
+}
+
/*
* Completely vaporize a lower level tap and all associated interfaces.
* ill_delete is called only out of ip_close when the device control
@@ -751,6 +764,19 @@ ill_delete(ill_t *ill)
rw_exit(&ill_g_usesrc_lock);
}
+static void
+ipif_non_duplicate(ipif_t *ipif)
+{
+ ill_t *ill = ipif->ipif_ill;
+ mutex_enter(&ill->ill_lock);
+ if (ipif->ipif_flags & IPIF_DUPLICATE) {
+ ipif->ipif_flags &= ~IPIF_DUPLICATE;
+ ASSERT(ill->ill_ipif_dup_count > 0);
+ ill->ill_ipif_dup_count--;
+ }
+ mutex_exit(&ill->ill_lock);
+}
+
/*
* ill_delete_tail is called from ip_modclose after all references
* to the closing ill are gone. The wait is done in ip_modclose
@@ -761,8 +787,14 @@ ill_delete_tail(ill_t *ill)
mblk_t **mpp;
ipif_t *ipif;
- for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
+ for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
+ ipif_non_duplicate(ipif);
ipif_down_tail(ipif);
+ }
+
+ ASSERT(ill->ill_ipif_dup_count == 0 &&
+ ill->ill_arp_down_mp == NULL &&
+ ill->ill_arp_del_mapping_mp == NULL);
/*
* If polling capability is enabled (which signifies direct
@@ -1489,8 +1521,10 @@ ipif_all_down_tail(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
ipif_t *ipif;
ASSERT(IAM_WRITER_IPSQ(ipsq));
- for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
+ for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
+ ipif_non_duplicate(ipif);
ipif_down_tail(ipif);
+ }
ill_down_tail(ill);
freemsg(mp);
ipsq->ipsq_current_ipif = NULL;
@@ -5645,8 +5679,10 @@ ipif_is_quiescent(ipif_t *ipif)
}
ill = ipif->ipif_ill;
- if (ill->ill_ipif_up_count != 0 || ill->ill_logical_down)
+ if (ill->ill_ipif_up_count != 0 || ill->ill_ipif_dup_count != 0 ||
+ ill->ill_logical_down) {
return (B_TRUE);
+ }
/* This is the last ipif going down or being deleted on this ill */
if (ill->ill_ire_cnt != 0 || ill->ill_refcnt != 0) {
@@ -9144,6 +9180,8 @@ ip_sioctl_arp_common(ill_t *ill, queue_t *q, mblk_t *mp, sin_t *sin,
area->area_flags |= ACE_F_PERMANENT;
if (flags & ATF_PUBL)
area->area_flags |= ACE_F_PUBLISH;
+ if (flags & ATF_AUTHORITY)
+ area->area_flags |= ACE_F_AUTHORITY;
/*
* Up to ARP it goes. The response will come
@@ -10118,6 +10156,8 @@ errack:
*flagsp |= ATF_PERM;
if (area->area_flags & ACE_F_PUBLISH)
*flagsp |= ATF_PUBL;
+ if (area->area_flags & ACE_F_AUTHORITY)
+ *flagsp |= ATF_AUTHORITY;
if (area->area_hw_addr_length != 0) {
*flagsp |= ATF_COM;
/*
@@ -10524,10 +10564,11 @@ ip_sioctl_removeif(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
if (ipif->ipif_refcnt == 0 && ipif->ipif_ire_cnt == 0) {
mutex_exit(&ill->ill_lock);
mutex_exit(&connp->conn_lock);
+ ipif_non_duplicate(ipif);
ipif_down_tail(ipif);
ipif_free_tail(ipif);
return (0);
- }
+ }
success = ipsq_pending_mp_add(connp, ipif, CONNP_TO_WQ(connp), mp,
IPIF_FREE);
mutex_exit(&ill->ill_lock);
@@ -10565,6 +10606,7 @@ ip_sioctl_removeif_restart(ipif_t *ipif, sin_t *dummy_sin, queue_t *q,
ASSERT(IAM_WRITER_IPIF(ipif));
ASSERT(ipif->ipif_state_flags & IPIF_CONDEMNED);
+ ipif_non_duplicate(ipif);
ipif_down_tail(ipif);
ipif_free_tail(ipif);
@@ -10682,10 +10724,19 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
ipaddr_t addr;
sin6_t *sin6;
int err = 0;
+ ill_t *ill = ipif->ipif_ill;
+ boolean_t need_dl_down;
+ boolean_t need_arp_down;
ip1dbg(("ip_sioctl_addr_tail(%s:%u %p)\n",
- ipif->ipif_ill->ill_name, ipif->ipif_id, (void *)ipif));
+ ill->ill_name, ipif->ipif_id, (void *)ipif));
ASSERT(IAM_WRITER_IPIF(ipif));
+
+ /* Must cancel any pending timer before taking the ill_lock */
+ if (ipif->ipif_recovery_id != 0)
+ (void) untimeout(ipif->ipif_recovery_id);
+ ipif->ipif_recovery_id = 0;
+
if (ipif->ipif_isv6) {
sin6 = (sin6_t *)sin;
v6addr = sin6->sin6_addr;
@@ -10693,17 +10744,37 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
addr = sin->sin_addr.s_addr;
IN6_IPADDR_TO_V4MAPPED(addr, &v6addr);
}
- mutex_enter(&ipif->ipif_ill->ill_lock);
+ mutex_enter(&ill->ill_lock);
ipif->ipif_v6lcl_addr = v6addr;
if (ipif->ipif_flags & (IPIF_ANYCAST | IPIF_NOLOCAL)) {
ipif->ipif_v6src_addr = ipv6_all_zeros;
} else {
ipif->ipif_v6src_addr = v6addr;
}
+ ipif->ipif_addr_ready = 0;
+
+ /*
+ * If the interface was previously marked as a duplicate, then since
+ * we've now got a "new" address, it should no longer be considered a
+ * duplicate -- even if the "new" address is the same as the old one.
+ * Note that if all ipifs are down, we may have a pending ARP down
+ * event to handle. This is because we want to recover from duplicates
+ * and thus delay tearing down ARP until the duplicates have been
+ * removed or disabled.
+ */
+ need_dl_down = need_arp_down = B_FALSE;
+ if (ipif->ipif_flags & IPIF_DUPLICATE) {
+ need_arp_down = !need_up;
+ ipif->ipif_flags &= ~IPIF_DUPLICATE;
+ if (--ill->ill_ipif_dup_count == 0 && !need_up &&
+ ill->ill_ipif_up_count == 0 && ill->ill_dl_up) {
+ need_dl_down = B_TRUE;
+ }
+ }
- if ((ipif->ipif_isv6) && IN6_IS_ADDR_6TO4(&v6addr) &&
- (!ipif->ipif_ill->ill_is_6to4tun)) {
- queue_t *wqp = ipif->ipif_ill->ill_wq;
+ if (ipif->ipif_isv6 && IN6_IS_ADDR_6TO4(&v6addr) &&
+ !ill->ill_is_6to4tun) {
+ queue_t *wqp = ill->ill_wq;
/*
* The local address of this interface is a 6to4 address,
@@ -10719,7 +10790,7 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
if (wqp->q_next->q_qinfo->qi_minfo->mi_idnum
== TUN6TO4_MODID) {
/* set for use in IP */
- ipif->ipif_ill->ill_is_6to4tun = 1;
+ ill->ill_is_6to4tun = 1;
break;
}
wqp = wqp->q_next;
@@ -10728,7 +10799,7 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
}
ipif_set_default(ipif);
- mutex_exit(&ipif->ipif_ill->ill_lock);
+ mutex_exit(&ill->ill_lock);
if (need_up) {
/*
@@ -10748,6 +10819,11 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
sctp_update_ipif(ipif, SCTP_IPIF_UPDATE);
}
+ if (need_dl_down)
+ ill_dl_down(ill);
+ if (need_arp_down)
+ ipif_arp_down(ipif);
+
return (err);
}
@@ -10872,9 +10948,17 @@ ip_sioctl_dstaddr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
in6_addr_t v6addr;
ill_t *ill = ipif->ipif_ill;
int err = 0;
+ boolean_t need_dl_down;
+ boolean_t need_arp_down;
+
+ ip1dbg(("ip_sioctl_dstaddr_tail(%s:%u %p)\n", ill->ill_name,
+ ipif->ipif_id, (void *)ipif));
+
+ /* Must cancel any pending timer before taking the ill_lock */
+ if (ipif->ipif_recovery_id != 0)
+ (void) untimeout(ipif->ipif_recovery_id);
+ ipif->ipif_recovery_id = 0;
- ip1dbg(("ip_sioctl_dstaddr_tail(%s:%u %p)\n",
- ipif->ipif_ill->ill_name, ipif->ipif_id, (void *)ipif));
if (ipif->ipif_isv6) {
sin6_t *sin6;
@@ -10898,7 +10982,24 @@ ip_sioctl_dstaddr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
ipif->ipif_flags |= IPIF_POINTOPOINT;
ipif->ipif_flags &= ~IPIF_BROADCAST;
if (ipif->ipif_isv6)
- ipif->ipif_ill->ill_flags |= ILLF_NONUD;
+ ill->ill_flags |= ILLF_NONUD;
+ }
+
+ /*
+ * If the interface was previously marked as a duplicate, then since
+ * we've now got a "new" address, it should no longer be considered a
+ * duplicate -- even if the "new" address is the same as the old one.
+ * Note that if all ipifs are down, we may have a pending ARP down
+ * event to handle.
+ */
+ need_dl_down = need_arp_down = B_FALSE;
+ if (ipif->ipif_flags & IPIF_DUPLICATE) {
+ need_arp_down = !need_up;
+ ipif->ipif_flags &= ~IPIF_DUPLICATE;
+ if (--ill->ill_ipif_dup_count == 0 && !need_up &&
+ ill->ill_ipif_up_count == 0 && ill->ill_dl_up) {
+ need_dl_down = B_TRUE;
+ }
}
/* Set the new address. */
@@ -10918,6 +11019,12 @@ ip_sioctl_dstaddr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
*/
err = ipif_up(ipif, q, mp);
}
+
+ if (need_dl_down)
+ ill_dl_down(ill);
+
+ if (need_arp_down)
+ ipif_arp_down(ipif);
return (err);
}
@@ -12917,47 +13024,45 @@ void
ipif_arp_down(ipif_t *ipif)
{
mblk_t *mp;
+ ill_t *ill = ipif->ipif_ill;
- ip1dbg(("ipif_arp_down(%s:%u)\n",
- ipif->ipif_ill->ill_name, ipif->ipif_id));
+ ip1dbg(("ipif_arp_down(%s:%u)\n", ill->ill_name, ipif->ipif_id));
ASSERT(IAM_WRITER_IPIF(ipif));
/* Delete the mapping for the local address */
mp = ipif->ipif_arp_del_mp;
if (mp != NULL) {
- ip1dbg(("ipif_arp_down: %s (%u) for %s:%u\n",
- dlpi_prim_str(*(int *)mp->b_rptr), *(int *)mp->b_rptr,
- ipif->ipif_ill->ill_name, ipif->ipif_id));
- putnext(ipif->ipif_ill->ill_rq, mp);
+ ip1dbg(("ipif_arp_down: arp cmd %x for %s:%u\n",
+ *(unsigned *)mp->b_rptr, ill->ill_name, ipif->ipif_id));
+ putnext(ill->ill_rq, mp);
ipif->ipif_arp_del_mp = NULL;
}
/*
- * If this is the last ipif that is going down, we need
- * to clean up ARP completely.
+ * If this is the last ipif that is going down and there are no
+ * duplicate addresses we may yet attempt to re-probe, then we need to
+ * clean up ARP completely.
*/
- if (ipif->ipif_ill->ill_ipif_up_count == 0) {
+ if (ill->ill_ipif_up_count == 0 && ill->ill_ipif_dup_count == 0) {
/* Send up AR_INTERFACE_DOWN message */
- mp = ipif->ipif_ill->ill_arp_down_mp;
+ mp = ill->ill_arp_down_mp;
if (mp != NULL) {
- ip1dbg(("ipif_arp_down: %s (%u) for %s:%u\n",
- dlpi_prim_str(*(int *)mp->b_rptr),
- *(int *)mp->b_rptr, ipif->ipif_ill->ill_name,
+ ip1dbg(("ipif_arp_down: arp cmd %x for %s:%u\n",
+ *(unsigned *)mp->b_rptr, ill->ill_name,
ipif->ipif_id));
- putnext(ipif->ipif_ill->ill_rq, mp);
- ipif->ipif_ill->ill_arp_down_mp = NULL;
+ putnext(ill->ill_rq, mp);
+ ill->ill_arp_down_mp = NULL;
}
/* Tell ARP to delete the multicast mappings */
- mp = ipif->ipif_ill->ill_arp_del_mapping_mp;
+ mp = ill->ill_arp_del_mapping_mp;
if (mp != NULL) {
- ip1dbg(("ipif_arp_down: %s (%u) for %s:%u\n",
- dlpi_prim_str(*(int *)mp->b_rptr),
- *(int *)mp->b_rptr, ipif->ipif_ill->ill_name,
+ ip1dbg(("ipif_arp_down: arp cmd %x for %s:%u\n",
+ *(unsigned *)mp->b_rptr, ill->ill_name,
ipif->ipif_id));
- putnext(ipif->ipif_ill->ill_rq, mp);
- ipif->ipif_ill->ill_arp_del_mapping_mp = NULL;
+ putnext(ill->ill_rq, mp);
+ ill->ill_arp_del_mapping_mp = NULL;
}
}
}
@@ -13000,9 +13105,8 @@ ipif_arp_setup_multicast(ipif_t *ipif, mblk_t **arp_add_mapping_mp)
*/
mp = ill->ill_arp_del_mapping_mp;
if (mp != NULL) {
- ip1dbg(("ipif_arp_down: %s (%u) for %s:%u\n",
- dlpi_prim_str(*(int *)mp->b_rptr),
- *(int *)mp->b_rptr, ill->ill_name, ipif->ipif_id));
+ ip1dbg(("ipif_arp_down: arp cmd %x for %s:%u\n",
+ *(unsigned *)mp->b_rptr, ill->ill_name, ipif->ipif_id));
putnext(ill->ill_rq, mp);
ill->ill_arp_del_mapping_mp = NULL;
}
@@ -13077,6 +13181,7 @@ ipif_arp_setup_multicast(ipif_t *ipif, mblk_t **arp_add_mapping_mp)
return (0);
}
ASSERT(add_mp != NULL && del_mp != NULL);
+ ASSERT(ill->ill_arp_del_mapping_mp == NULL);
ill->ill_arp_del_mapping_mp = del_mp;
if (arp_add_mapping_mp != NULL) {
/* The caller just wants the mblks allocated */
@@ -13095,15 +13200,18 @@ ipif_arp_setup_multicast(ipif_t *ipif, mblk_t **arp_add_mapping_mp)
* though it only sets up the resolver for v6
* if it's an xresolv interface (one using an external resolver).
* Honors ILLF_NOARP.
- * The boolean value arp_just_publish, if B_TRUE, indicates that
- * it only needs to send an AR_ENTRY_ADD message up to ARP for
- * IPv4 interfaces. Currently, B_TRUE is only set when this
- * function is called by ip_rput_dlpi_writer() to handle
- * asynchronous hardware address change notification.
+ * The enumerated value res_act is used to tune the behavior.
+ * If set to Res_act_initial, then we set up all the resolver
+ * structures for a new interface. If set to Res_act_move, then
+ * we just send an AR_ENTRY_ADD message up to ARP for IPv4
+ * interfaces; this is called by ip_rput_dlpi_writer() to handle
+ * asynchronous hardware address change notification. If set to
+ * Res_act_defend, then we tell ARP that it needs to send a single
+ * gratuitous message in defense of the address.
* Returns error on failure.
*/
int
-ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish)
+ipif_resolver_up(ipif_t *ipif, enum ip_resolver_action res_act)
{
caddr_t addr;
mblk_t *arp_up_mp = NULL;
@@ -13116,22 +13224,43 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish)
uchar_t *area_p = NULL;
uchar_t *ared_p = NULL;
int err = ENOMEM;
+ boolean_t was_dup;
ip1dbg(("ipif_resolver_up(%s:%u) flags 0x%x\n",
- ipif->ipif_ill->ill_name, ipif->ipif_id,
- (uint_t)ipif->ipif_flags));
+ ill->ill_name, ipif->ipif_id, (uint_t)ipif->ipif_flags));
ASSERT(IAM_WRITER_IPIF(ipif));
- if ((ill->ill_net_type != IRE_IF_RESOLVER) ||
- (ill->ill_isv6 && !(ill->ill_flags & ILLF_XRESOLV))) {
+ was_dup = B_FALSE;
+ if (res_act == Res_act_initial) {
+ ipif->ipif_addr_ready = 0;
+ /*
+ * We're bringing an interface up here. There's no way that we
+ * should need to shut down ARP now.
+ */
+ mutex_enter(&ill->ill_lock);
+ if (ipif->ipif_flags & IPIF_DUPLICATE) {
+ ipif->ipif_flags &= ~IPIF_DUPLICATE;
+ ill->ill_ipif_dup_count--;
+ was_dup = B_TRUE;
+ }
+ mutex_exit(&ill->ill_lock);
+ }
+ if (ipif->ipif_recovery_id != 0)
+ (void) untimeout(ipif->ipif_recovery_id);
+ ipif->ipif_recovery_id = 0;
+ if (ill->ill_net_type != IRE_IF_RESOLVER) {
+ ipif->ipif_addr_ready = 1;
return (0);
}
+ /* NDP will set the ipif_addr_ready flag when it's ready */
+ if (ill->ill_isv6 && !(ill->ill_flags & ILLF_XRESOLV))
+ return (0);
if (ill->ill_isv6) {
/*
* External resolver for IPv6
*/
- ASSERT(!arp_just_publish);
+ ASSERT(res_act == Res_act_initial);
if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) {
addr = (caddr_t)&ipif->ipif_v6lcl_addr;
area_p = (uchar_t *)&ip6_area_template;
@@ -13149,7 +13278,8 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish)
err = EINVAL;
goto failed;
} else {
- if (ill->ill_ipif_up_count == 0)
+ if (ill->ill_ipif_up_count == 0 &&
+ ill->ill_ipif_dup_count == 0 && !was_dup)
ill->ill_arp_bringup_pending = 1;
mutex_exit(&ill->ill_lock);
}
@@ -13164,17 +13294,19 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish)
* Add an entry for the local address in ARP only if it
* is not UNNUMBERED and the address is not INADDR_ANY.
*/
- if (((ipif->ipif_flags & IPIF_UNNUMBERED) == 0) && area_p != NULL) {
+ if (!(ipif->ipif_flags & IPIF_UNNUMBERED) && area_p != NULL) {
+ area_t *area;
+
/* Now ask ARP to publish our address. */
arp_add_mp = ill_arp_alloc(ill, area_p, addr);
if (arp_add_mp == NULL)
goto failed;
- if (arp_just_publish) {
+ area = (area_t *)arp_add_mp->b_rptr;
+ if (res_act != Res_act_initial) {
/*
* Copy the new hardware address and length into
* arp_add_mp to be sent to ARP.
*/
- area_t *area = (area_t *)arp_add_mp->b_rptr;
area->area_hw_addr_length =
ill->ill_phys_addr_length;
bcopy((char *)ill->ill_phys_addr,
@@ -13182,10 +13314,20 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish)
area->area_hw_addr_length);
}
- ((area_t *)arp_add_mp->b_rptr)->area_flags =
- ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MYADDR;
+ area->area_flags = ACE_F_PERMANENT | ACE_F_PUBLISH |
+ ACE_F_MYADDR;
+
+ if (res_act == Res_act_defend) {
+ area->area_flags |= ACE_F_DEFEND;
+ /*
+ * If we're just defending our address now, then
+ * there's no need to set up ARP multicast mappings.
+ * The publish command is enough.
+ */
+ goto done;
+ }
- if (arp_just_publish)
+ if (res_act != Res_act_initial)
goto arp_setup_multicast;
/*
@@ -13197,15 +13339,17 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish)
goto failed;
} else {
- if (arp_just_publish)
+ if (res_act != Res_act_initial)
goto done;
}
/*
* Need to bring up ARP or setup multicast mapping only
* when the first interface is coming UP.
*/
- if (ill->ill_ipif_up_count != 0)
+ if (ill->ill_ipif_up_count != 0 || ill->ill_ipif_dup_count != 0 ||
+ was_dup) {
goto done;
+ }
/*
* Allocate an ARP down message (to be saved) and an ARP up
@@ -13236,7 +13380,7 @@ arp_setup_multicast:
ASSERT(arp_add_mapping_mp != NULL);
}
-done:;
+done:
if (arp_del_mp != NULL) {
ASSERT(ipif->ipif_arp_del_mp == NULL);
ipif->ipif_arp_del_mp = arp_del_mp;
@@ -13251,41 +13395,48 @@ done:;
}
if (arp_up_mp != NULL) {
ip1dbg(("ipif_resolver_up: ARP_UP for %s:%u\n",
- ipif->ipif_ill->ill_name, ipif->ipif_id));
+ ill->ill_name, ipif->ipif_id));
putnext(ill->ill_rq, arp_up_mp);
}
if (arp_add_mp != NULL) {
ip1dbg(("ipif_resolver_up: ARP_ADD for %s:%u\n",
- ipif->ipif_ill->ill_name, ipif->ipif_id));
+ ill->ill_name, ipif->ipif_id));
+ /*
+ * If it's an extended ARP implementation, then we'll wait to
+ * hear that DAD has finished before using the interface.
+ */
+ if (!ill->ill_arp_extend)
+ ipif->ipif_addr_ready = 1;
putnext(ill->ill_rq, arp_add_mp);
+ } else {
+ ipif->ipif_addr_ready = 1;
}
if (arp_add_mapping_mp != NULL) {
ip1dbg(("ipif_resolver_up: MAPPING_ADD for %s:%u\n",
- ipif->ipif_ill->ill_name, ipif->ipif_id));
+ ill->ill_name, ipif->ipif_id));
putnext(ill->ill_rq, arp_add_mapping_mp);
}
- if (arp_just_publish)
+ if (res_act != Res_act_initial)
return (0);
if (ill->ill_flags & ILLF_NOARP)
err = ill_arp_off(ill);
else
err = ill_arp_on(ill);
- if (err) {
+ if (err != 0) {
ip0dbg(("ipif_resolver_up: arp_on/off failed %d\n", err));
freemsg(ipif->ipif_arp_del_mp);
- if (arp_down_mp != NULL)
- freemsg(ill->ill_arp_down_mp);
- if (ill->ill_arp_del_mapping_mp != NULL)
- freemsg(ill->ill_arp_del_mapping_mp);
+ freemsg(ill->ill_arp_down_mp);
+ freemsg(ill->ill_arp_del_mapping_mp);
ipif->ipif_arp_del_mp = NULL;
ill->ill_arp_down_mp = NULL;
ill->ill_arp_del_mapping_mp = NULL;
return (err);
}
- return (ill->ill_ipif_up_count != 0 ? 0 : EINPROGRESS);
+ return ((ill->ill_ipif_up_count != 0 || was_dup ||
+ ill->ill_ipif_dup_count != 0) ? 0 : EINPROGRESS);
-failed:;
+failed:
ip1dbg(("ipif_resolver_up: FAILED\n"));
freemsg(arp_add_mp);
freemsg(arp_del_mp);
@@ -13297,6 +13448,143 @@ failed:;
}
/*
+ * This routine restarts IPv4 duplicate address detection (DAD) when a link has
+ * just gone back up.
+ */
+static void
+ipif_arp_start_dad(ipif_t *ipif)
+{
+ ill_t *ill = ipif->ipif_ill;
+ mblk_t *arp_add_mp;
+ area_t *area;
+
+ if (ill->ill_net_type != IRE_IF_RESOLVER || ill->ill_arp_closing ||
+ (ipif->ipif_flags & IPIF_UNNUMBERED) ||
+ ipif->ipif_lcl_addr == INADDR_ANY ||
+ (arp_add_mp = ill_arp_alloc(ill, (uchar_t *)&ip_area_template,
+ (char *)&ipif->ipif_lcl_addr)) == NULL) {
+ /*
+ * If we can't contact ARP for some reason, that's not really a
+ * problem. Just send out the routing socket notification that
+ * DAD completion would have done, and continue.
+ */
+ ipif_mask_reply(ipif);
+ ip_rts_ifmsg(ipif);
+ ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
+ sctp_update_ipif(ipif, SCTP_IPIF_UP);
+ ipif->ipif_addr_ready = 1;
+ return;
+ }
+
+ /* Setting the 'unverified' flag restarts DAD */
+ area = (area_t *)arp_add_mp->b_rptr;
+ area->area_flags = ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MYADDR |
+ ACE_F_UNVERIFIED;
+ putnext(ill->ill_rq, arp_add_mp);
+}
+
+static void
+ipif_ndp_start_dad(ipif_t *ipif)
+{
+ nce_t *nce;
+
+ nce = ndp_lookup_v6(ipif->ipif_ill, &ipif->ipif_v6lcl_addr, B_FALSE);
+ if (nce == NULL)
+ return;
+
+ if (!ndp_restart_dad(nce)) {
+ /*
+ * If we can't restart DAD for some reason, that's not really a
+ * problem. Just send out the routing socket notification that
+ * DAD completion would have done, and continue.
+ */
+ ip_rts_ifmsg(ipif);
+ ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
+ sctp_update_ipif(ipif, SCTP_IPIF_UP);
+ ipif->ipif_addr_ready = 1;
+ }
+ NCE_REFRELE(nce);
+}
+
+/*
+ * Restart duplicate address detection on all interfaces on the given ill.
+ *
+ * This is called when an interface transitions from down to up
+ * (DL_NOTE_LINK_UP) or up to down (DL_NOTE_LINK_DOWN).
+ *
+ * Note that since the underlying physical link has transitioned, we must cause
+ * at least one routing socket message to be sent here, either via DAD
+ * completion or just by default on the first ipif. (If we don't do this, then
+ * in.mpathd will see long delays when doing link-based failure recovery.)
+ */
+void
+ill_restart_dad(ill_t *ill, boolean_t went_up)
+{
+ ipif_t *ipif;
+
+ if (ill == NULL)
+ return;
+
+ /*
+ * If layer two doesn't support duplicate address detection, then just
+ * send the routing socket message now and be done with it.
+ */
+ if ((ill->ill_isv6 && (ill->ill_flags & ILLF_XRESOLV)) ||
+ (!ill->ill_isv6 && !ill->ill_arp_extend)) {
+ ip_rts_ifmsg(ill->ill_ipif);
+ return;
+ }
+
+ for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
+ if (went_up) {
+ if (ipif->ipif_flags & IPIF_UP) {
+ if (ill->ill_isv6)
+ ipif_ndp_start_dad(ipif);
+ else
+ ipif_arp_start_dad(ipif);
+ } else if (ill->ill_isv6 &&
+ (ipif->ipif_flags & IPIF_DUPLICATE)) {
+ /*
+ * For IPv4, the ARP module itself will
+ * automatically start the DAD process when it
+ * sees DL_NOTE_LINK_UP. We respond to the
+ * AR_CN_READY at the completion of that task.
+ * For IPv6, we must kick off the bring-up
+ * process now.
+ */
+ ndp_do_recovery(ipif);
+ } else {
+ /*
+ * Unfortunately, the first ipif is "special"
+ * and represents the underlying ill in the
+ * routing socket messages. Thus, when this
+ * one ipif is down, we must still notify so
+ * that the user knows the IFF_RUNNING status
+ * change. (If the first ipif is up, then
+ * we'll handle eventual routing socket
+ * notification via DAD completion.)
+ */
+ if (ipif == ill->ill_ipif)
+ ip_rts_ifmsg(ill->ill_ipif);
+ }
+ } else {
+ /*
+ * After link down, we'll need to send a new routing
+ * message when the link comes back, so clear
+ * ipif_addr_ready.
+ */
+ ipif->ipif_addr_ready = 0;
+ }
+ }
+
+ /*
+ * If we've torn down links, then notify the user right away.
+ */
+ if (!went_up)
+ ip_rts_ifmsg(ill->ill_ipif);
+}
+
+/*
* Wakeup all threads waiting to enter the ipsq, and sleeping
* on any of the ills in this ipsq. The ill_lock of the ill
* must be held so that waiters don't miss wakeups
@@ -13716,6 +14004,7 @@ ill_down_ipifs(ill_t *ill, mblk_t *mp, int index, boolean_t chk_nofailover)
if (!ipif->ipif_isv6)
ipif_check_bcast_ires(ipif);
(void) ipif_logical_down(ipif, NULL, NULL);
+ ipif_non_duplicate(ipif);
ipif_down_tail(ipif);
/*
* We don't do ipif_multicast_down for IPv4 in
@@ -16658,7 +16947,7 @@ ipif_move(ipif_t *ipif, ill_t *to_ill, queue_t *q, mblk_t *mp,
* move.
*/
rep_ipif->ipif_flags = ipif->ipif_flags | IPIF_NOFAILOVER;
- rep_ipif->ipif_flags &= ~IPIF_UP;
+ rep_ipif->ipif_flags &= ~IPIF_UP & ~IPIF_DUPLICATE;
rep_ipif->ipif_replace_zero = B_TRUE;
mutex_init(&rep_ipif->ipif_saved_ire_lock, NULL,
MUTEX_DEFAULT, NULL);
@@ -17796,7 +18085,7 @@ ipif_down(ipif_t *ipif, queue_t *q, mblk_t *mp)
return (EINPROGRESS);
}
-static void
+void
ipif_down_tail(ipif_t *ipif)
{
ill_t *ill = ipif->ipif_ill;
@@ -17809,11 +18098,10 @@ ipif_down_tail(ipif_t *ipif)
* there are other logical units that are up.
* This occurs e.g. when we change a "significant" IFF_ flag.
*/
- if (ipif->ipif_ill->ill_wq != NULL) {
- if (!ill->ill_logical_down && (ill->ill_ipif_up_count == 0) &&
- ill->ill_dl_up) {
- ill_dl_down(ill);
- }
+ if (ill->ill_wq != NULL && !ill->ill_logical_down &&
+ ill->ill_ipif_up_count == 0 && ill->ill_ipif_dup_count == 0 &&
+ ill->ill_dl_up) {
+ ill_dl_down(ill);
}
ill->ill_logical_down = 0;
@@ -17821,7 +18109,7 @@ ipif_down_tail(ipif_t *ipif)
* Have to be after removing the routes in ipif_down_delete_ire.
*/
if (ipif->ipif_isv6) {
- if (ipif->ipif_ill->ill_flags & ILLF_XRESOLV)
+ if (ill->ill_flags & ILLF_XRESOLV)
ipif_arp_down(ipif);
} else {
ipif_arp_down(ipif);
@@ -18048,6 +18336,10 @@ ipif_free(ipif_t *ipif)
{
ASSERT(IAM_WRITER_IPIF(ipif));
+ if (ipif->ipif_recovery_id != 0)
+ (void) untimeout(ipif->ipif_recovery_id);
+ ipif->ipif_recovery_id = 0;
+
/* Remove conn references */
reset_conn_ipif(ipif);
@@ -18127,6 +18419,9 @@ ipif_free_tail(ipif_t *ipif)
rw_exit(&ill_g_lock);
mutex_destroy(&ipif->ipif_saved_ire_lock);
+
+ ASSERT(!(ipif->ipif_flags & (IPIF_UP | IPIF_DUPLICATE)));
+
/* Free the memory. */
mi_free((char *)ipif);
}
@@ -18344,7 +18639,7 @@ ipif_lookup_on_name(char *name, size_t namelen, boolean_t do_alloc,
* but might not make the system manager very popular. (May be called
* as writer.)
*/
-static void
+void
ipif_mask_reply(ipif_t *ipif)
{
icmph_t *icmph;
@@ -18900,13 +19195,14 @@ ipif_up(ipif_t *ipif, queue_t *q, mblk_t *mp)
err = ipif_ndp_up(ipif, &ipif->ipif_v6lcl_addr,
B_FALSE);
if (err != 0) {
- mp = ipsq_pending_mp_get(ipsq, &connp);
+ if (err != EINPROGRESS)
+ mp = ipsq_pending_mp_get(ipsq, &connp);
return (err);
}
}
/* Now, ARP */
- if ((err = ipif_resolver_up(ipif, B_FALSE)) ==
- EINPROGRESS) {
+ err = ipif_resolver_up(ipif, Res_act_initial);
+ if (err == EINPROGRESS) {
/* We will complete it in ip_arp_done */
return (err);
}
@@ -19455,7 +19751,6 @@ ipif_up_done(ipif_t *ipif)
}
-
/* This is the first interface on this ill */
if (ipif->ipif_ipif_up_count == 1 && !loopback) {
/*
@@ -19496,14 +19791,7 @@ ipif_up_done(ipif_t *ipif)
}
}
- /*
- * This had to be deferred until we had bound.
- * tell routing sockets that this interface is up
- */
- ip_rts_ifmsg(ipif);
- ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
-
- if (!loopback) {
+ if (!loopback && ipif->ipif_addr_ready) {
/* Broadcast an address mask reply. */
ipif_mask_reply(ipif);
}
@@ -19513,8 +19801,19 @@ ipif_up_done(ipif_t *ipif)
}
if (src_ipif_held)
ipif_refrele(src_ipif);
- /* Let SCTP update the status for this ipif */
- sctp_update_ipif(ipif, SCTP_IPIF_UP);
+
+ /*
+ * This had to be deferred until we had bound. Tell routing sockets and
+ * others that this interface is up if it looks like the address has
+ * been validated. Otherwise, if it isn't ready yet, wait for
+ * duplicate address detection to do its thing.
+ */
+ if (ipif->ipif_addr_ready) {
+ ip_rts_ifmsg(ipif);
+ ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
+ /* Let SCTP update the status for this ipif */
+ sctp_update_ipif(ipif, SCTP_IPIF_UP);
+ }
return (0);
bad:
@@ -19919,7 +20218,8 @@ retry:
/* Always skip NOLOCAL and ANYCAST interfaces */
if (ipif->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST))
continue;
- if (!(ipif->ipif_flags & IPIF_UP))
+ if (!(ipif->ipif_flags & IPIF_UP) ||
+ !ipif->ipif_addr_ready)
continue;
if (ipif->ipif_zoneid != zoneid &&
ipif->ipif_zoneid != ALL_ZONES)
@@ -20700,7 +21000,8 @@ ip_sioctl_slifname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
* This ill has not been inserted into the global list.
* So we are still single threaded and don't need any lock
*/
- ipif->ipif_flags = lifr->lifr_flags & IFF_LOGINT_FLAGS;
+ ipif->ipif_flags = lifr->lifr_flags & IFF_LOGINT_FLAGS &
+ ~IFF_DUPLICATE;
ill->ill_flags = lifr->lifr_flags & IFF_PHYINTINST_FLAGS;
ill->ill_phyint->phyint_flags = lifr->lifr_flags & IFF_PHYINT_FLAGS;
diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c
index efdb39b657..2b40b14d08 100644
--- a/usr/src/uts/common/inet/ip/ip_ndp.c
+++ b/usr/src/uts/common/inet/ip/ip_ndp.c
@@ -28,18 +28,23 @@
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
+#include <sys/strsun.h>
#include <sys/sysmacros.h>
#include <sys/errno.h>
#include <sys/dlpi.h>
#include <sys/socket.h>
#include <sys/ddi.h>
+#include <sys/sunddi.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/vtrace.h>
#include <sys/kmem.h>
#include <sys/zone.h>
+#include <sys/ethernet.h>
+#include <sys/sdt.h>
#include <net/if.h>
+#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
@@ -58,13 +63,22 @@
#include <inet/ip_ndp.h>
#include <inet/ipsec_impl.h>
#include <inet/ipsec_info.h>
+#include <inet/sctp_ip.h>
/*
* Function names with nce_ prefix are static while function
* names with ndp_ prefix are used by rest of the IP.
+ *
+ * Lock ordering:
+ *
+ * ndp_g_lock -> ill_lock -> nce_lock
+ *
+ * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
+ * nce_next. Nce_lock protects the contents of the NCE (particularly
+ * nce_refcnt).
*/
-static boolean_t nce_cmp_ll_addr(nce_t *nce, char *new_ll_addr,
+static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr,
uint32_t ll_addr_len);
static void nce_fastpath(nce_t *nce);
static void nce_ire_delete(nce_t *nce);
@@ -84,7 +98,6 @@ static uint32_t nce_solicit(nce_t *nce, mblk_t *mp);
static boolean_t nce_xmit(ill_t *ill, uint32_t operation,
ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender,
const in6_addr_t *target, int flag);
-static void lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf);
extern void th_trace_rrecord(th_trace_t *);
static int ndp_lookup_then_add_v6(ill_t *, uchar_t *,
const in6_addr_t *, const in6_addr_t *, const in6_addr_t *,
@@ -131,6 +144,9 @@ ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr,
return (status);
}
+/* Non-tunable probe interval, based on link capabilities */
+#define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500)
+
/*
* NDP Cache Entry creation routine.
* Mapped entries will never do NUD .
@@ -148,6 +164,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
mblk_t *mp;
mblk_t *template;
nce_t **ncep;
+ int err;
boolean_t dropped = B_FALSE;
ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock));
@@ -237,6 +254,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
if (ill->ill_state_flags & ILL_CONDEMNED) {
mutex_exit(&ill->ill_lock);
freeb(mp);
+ freeb(template);
return (EINVAL);
}
if ((nce->nce_next = *ncep) != NULL)
@@ -251,13 +269,23 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
ill->ill_nce_cnt++;
mutex_exit(&ill->ill_lock);
- /*
- * Before we insert the nce, honor the UNSOL_ADV flag.
- * We cannot hold the ndp_g_lock and call nce_xmit
- * which does a putnext.
- */
- if (flags & NCE_F_UNSOL_ADV) {
- flags |= NDP_ORIDE;
+ err = 0;
+ if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) {
+ mutex_enter(&nce->nce_lock);
+ mutex_exit(&ndp6.ndp_g_lock);
+ nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
+ mutex_exit(&nce->nce_lock);
+ dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE,
+ &ipv6_all_zeros, addr, NDP_PROBE);
+ if (dropped) {
+ mutex_enter(&nce->nce_lock);
+ nce->nce_pcnt++;
+ mutex_exit(&nce->nce_lock);
+ }
+ NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
+ mutex_enter(&ndp6.ndp_g_lock);
+ err = EINPROGRESS;
+ } else if (flags & NCE_F_UNSOL_ADV) {
/*
* We account for the transmit below by assigning one
* less than the ndd variable. Subsequent decrements
@@ -273,7 +301,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
B_TRUE, /* use ill_nd_lla */
addr, /* Source and target of the advertisement pkt */
&ipv6_all_hosts_mcast, /* Destination of the packet */
- flags);
+ NDP_ORIDE);
mutex_enter(&nce->nce_lock);
if (dropped)
nce->nce_unsolicit_count++;
@@ -292,7 +320,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
*/
if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER)
nce_fastpath(nce);
- return (0);
+ return (err);
}
int
@@ -609,6 +637,41 @@ nce_ire_delete1(ire_t *ire, char *nce_arg)
}
/*
+ * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted.
+ */
+boolean_t
+ndp_restart_dad(nce_t *nce)
+{
+ boolean_t started;
+ boolean_t dropped;
+
+ if (nce == NULL)
+ return (B_FALSE);
+ mutex_enter(&nce->nce_lock);
+ if (nce->nce_state == ND_PROBE) {
+ mutex_exit(&nce->nce_lock);
+ started = B_TRUE;
+ } else if (nce->nce_state == ND_REACHABLE) {
+ nce->nce_state = ND_PROBE;
+ nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1;
+ mutex_exit(&nce->nce_lock);
+ dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL,
+ B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE);
+ if (dropped) {
+ mutex_enter(&nce->nce_lock);
+ nce->nce_pcnt++;
+ mutex_exit(&nce->nce_lock);
+ }
+ NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill));
+ started = B_TRUE;
+ } else {
+ mutex_exit(&nce->nce_lock);
+ started = B_FALSE;
+ }
+ return (started);
+}
+
+/*
* IPv6 Cache entry lookup. Try to find an nce matching the parameters passed.
* If one is found, the refcnt on the nce will be incremented.
*/
@@ -804,7 +867,7 @@ ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
}
return;
}
- ll_changed = nce_cmp_ll_addr(nce, (char *)hw_addr, hw_addr_len);
+ ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len);
if (!is_adv) {
/* If this is a SOLICITATION request only */
if (ll_changed)
@@ -1381,11 +1444,16 @@ nce_solicit(nce_t *nce, mblk_t *mp)
if (ipif != NULL)
break;
}
- if (src_ill == NULL) {
- /* May be a forwarding packet */
- src_ill = ill;
+ /*
+ * If no relevant ipif can be found, then it's not one of our
+ * addresses. Reset to :: and let nce_xmit. If an ipif can be
+ * found, but it's not yet done with DAD verification, then
+ * just postpone this transmission until later.
+ */
+ if (src_ill == NULL)
src = ipv6_all_zeros;
- }
+ else if (!ipif->ipif_addr_ready)
+ return (ill->ill_reachable_retrans_time);
}
dst = nce->nce_addr;
/*
@@ -1394,7 +1462,7 @@ nce_solicit(nce_t *nce, mblk_t *mp)
* appropriately.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&src))
- src_ill = NULL;
+ src_ill = NULL;
nce->nce_rcnt--;
mutex_exit(&nce->nce_lock);
rw_exit(&ill_g_lock);
@@ -1407,8 +1475,350 @@ nce_solicit(nce_t *nce, mblk_t *mp)
return (ill->ill_reachable_retrans_time);
}
+/*
+ * Attempt to recover an address on an interface that's been marked as a
+ * duplicate. Because NCEs are destroyed when the interface goes down, there's
+ * no easy way to just probe the address and have the right thing happen if
+ * it's no longer in use. Instead, we just bring it up normally and allow the
+ * regular interface start-up logic to probe for a remaining duplicate and take
+ * us back down if necessary.
+ * Neither DHCP nor temporary addresses arrive here; they're excluded by
+ * ip_ndp_excl.
+ */
+/* ARGSUSED */
+static void
+ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
+{
+ ill_t *ill = rq->q_ptr;
+ ipif_t *ipif;
+ in6_addr_t *addr = (in6_addr_t *)mp->b_rptr;
+
+ for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
+ /*
+ * We do not support recovery of proxy ARP'd interfaces,
+ * because the system lacks a complete proxy ARP mechanism.
+ */
+ if ((ipif->ipif_flags & IPIF_POINTOPOINT) ||
+ !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) {
+ continue;
+ }
+
+ /*
+ * If we have already recovered, then ignore.
+ */
+ mutex_enter(&ill->ill_lock);
+ if (!(ipif->ipif_flags & IPIF_DUPLICATE)) {
+ mutex_exit(&ill->ill_lock);
+ continue;
+ }
+
+ ipif->ipif_flags &= ~IPIF_DUPLICATE;
+ ill->ill_ipif_dup_count--;
+ mutex_exit(&ill->ill_lock);
+ ipif->ipif_was_dup = B_TRUE;
+
+ if (ipif_ndp_up(ipif, addr, B_FALSE) != EINPROGRESS)
+ (void) ipif_up_done_v6(ipif);
+ }
+ freeb(mp);
+}
+
+/*
+ * Attempt to recover an IPv6 interface that's been shut down as a duplicate.
+ * As long as someone else holds the address, the interface will stay down.
+ * When that conflict goes away, the interface is brought back up. This is
+ * done so that accidental shutdowns of addresses aren't made permanent. Your
+ * server will recover from a failure.
+ *
+ * For DHCP and temporary addresses, recovery is not done in the kernel.
+ * Instead, it's handled by user space processes (dhcpagent and in.ndpd).
+ *
+ * This function is entered on a timer expiry; the ID is in ipif_recovery_id.
+ */
+static void
+ipif6_dup_recovery(void *arg)
+{
+ ipif_t *ipif = arg;
+
+ ipif->ipif_recovery_id = 0;
+ if (!(ipif->ipif_flags & IPIF_DUPLICATE))
+ return;
+
+ /* If the link is down, we'll retry this later */
+ if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING))
+ return;
+
+ ndp_do_recovery(ipif);
+}
+
+/*
+ * Perform interface recovery by forcing the duplicate interfaces up and
+ * allowing the system to determine which ones should stay up.
+ *
+ * Called both by recovery timer expiry and link-up notification.
+ */
void
-ndp_input_solicit(ill_t *ill, mblk_t *mp)
+ndp_do_recovery(ipif_t *ipif)
+{
+ ill_t *ill = ipif->ipif_ill;
+ mblk_t *mp;
+
+ mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED);
+ if (mp == NULL) {
+ ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
+ ipif, MSEC_TO_TICK(ip_dup_recovery));
+ } else {
+ bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr,
+ sizeof (ipif->ipif_v6lcl_addr));
+ ill_refhold(ill);
+ (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, ip_ndp_recover,
+ CUR_OP, B_FALSE);
+ }
+}
+
+/*
+ * Find the solicitation in the given message, and extract printable details
+ * (MAC and IP addresses) from it.
+ */
+static nd_neighbor_solicit_t *
+ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf,
+ size_t hlen, char *sbuf, size_t slen, uchar_t **haddr)
+{
+ nd_neighbor_solicit_t *ns;
+ ip6_t *ip6h;
+ uchar_t *addr;
+ int alen;
+
+ alen = 0;
+ ip6h = (ip6_t *)mp->b_rptr;
+ if (dl_mp == NULL) {
+ nd_opt_hdr_t *opt;
+ int nslen;
+
+ /*
+ * If it's from the fast-path, then it can't be a probe
+ * message, and thus must include the source linkaddr option.
+ * Extract that here.
+ */
+ ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN);
+ nslen = mp->b_wptr - (uchar_t *)ns;
+ if ((nslen -= sizeof (*ns)) > 0) {
+ opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen,
+ ND_OPT_SOURCE_LINKADDR);
+ if (opt != NULL &&
+ opt->nd_opt_len * 8 - sizeof (*opt) >=
+ ill->ill_nd_lla_len) {
+ addr = (uchar_t *)(opt + 1);
+ alen = ill->ill_nd_lla_len;
+ }
+ }
+ /*
+ * We cheat a bit here for the sake of printing usable log
+ * messages in the rare case where the reply we got was unicast
+ * without a source linkaddr option, and the interface is in
+ * fastpath mode. (Sigh.)
+ */
+ if (alen == 0 && ill->ill_type == IFT_ETHER &&
+ MBLKHEAD(mp) >= sizeof (struct ether_header)) {
+ struct ether_header *pether;
+
+ pether = (struct ether_header *)((char *)ip6h -
+ sizeof (*pether));
+ addr = pether->ether_shost.ether_addr_octet;
+ alen = ETHERADDRL;
+ }
+ } else {
+ dl_unitdata_ind_t *dlu;
+
+ dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr;
+ alen = dlu->dl_src_addr_length;
+ if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) &&
+ dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) {
+ addr = dl_mp->b_rptr + dlu->dl_src_addr_offset;
+ if (ill->ill_sap_length < 0) {
+ alen += ill->ill_sap_length;
+ } else {
+ addr += ill->ill_sap_length;
+ alen -= ill->ill_sap_length;
+ }
+ }
+ }
+ if (alen > 0) {
+ *haddr = addr;
+ (void) mac_colon_addr(addr, alen, hbuf, hlen);
+ } else {
+ *haddr = NULL;
+ (void) strcpy(hbuf, "?");
+ }
+ ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN);
+ (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen);
+ return (ns);
+}
+
+/*
+ * This is for exclusive changes due to NDP duplicate address detection
+ * failure.
+ */
+/* ARGSUSED */
+static void
+ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
+{
+ ill_t *ill = rq->q_ptr;
+ ipif_t *ipif;
+ char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */
+ char hbuf[MAC_STR_LEN];
+ char sbuf[INET6_ADDRSTRLEN];
+ nd_neighbor_solicit_t *ns;
+ mblk_t *dl_mp = NULL;
+ uchar_t *haddr;
+
+ if (DB_TYPE(mp) != M_DATA) {
+ dl_mp = mp;
+ mp = mp->b_cont;
+ }
+ ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf,
+ sizeof (sbuf), &haddr);
+ if (haddr != NULL &&
+ bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) {
+ /*
+ * Ignore conflicts generated by misbehaving switches that just
+ * reflect our own messages back to us.
+ */
+ goto ignore_conflict;
+ }
+ (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf));
+ for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
+
+ if ((ipif->ipif_flags & IPIF_POINTOPOINT) ||
+ !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
+ &ns->nd_ns_target)) {
+ continue;
+ }
+
+ /* If it's already marked, then don't do anything. */
+ if (ipif->ipif_flags & IPIF_DUPLICATE)
+ continue;
+
+ /*
+ * If this is a failure during duplicate recovery, then don't
+ * complain. It may take a long time to recover.
+ */
+ if (!ipif->ipif_was_dup) {
+ if (ipif->ipif_id != 0) {
+ (void) snprintf(ibuf + ill->ill_name_length - 1,
+ sizeof (ibuf) - ill->ill_name_length + 1,
+ ":%d", ipif->ipif_id);
+ }
+ cmn_err(CE_WARN, "%s has duplicate address %s (in "
+ "use by %s); disabled", ibuf, sbuf, hbuf);
+ }
+ mutex_enter(&ill->ill_lock);
+ ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
+ ipif->ipif_flags |= IPIF_DUPLICATE;
+ ill->ill_ipif_dup_count++;
+ mutex_exit(&ill->ill_lock);
+ (void) ipif_down(ipif, NULL, NULL);
+ ipif_down_tail(ipif);
+ if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
+ ill->ill_net_type == IRE_IF_RESOLVER &&
+ ip_dup_recovery > 0)
+ ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
+ ipif, MSEC_TO_TICK(ip_dup_recovery));
+ }
+ignore_conflict:
+ if (dl_mp != NULL)
+ freeb(dl_mp);
+ freemsg(mp);
+}
+
+/*
+ * Handle failure by tearing down the ipifs with the specified address. Note
+ * that tearing down the ipif also means deleting the nce through ipif_down, so
+ * it's not possible to do recovery by just restarting the nce timer. Instead,
+ * we start a timer on the ipif.
+ */
+static void
+ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce)
+{
+ if ((mp = copymsg(mp)) != NULL) {
+ if (dl_mp == NULL)
+ dl_mp = mp;
+ else if ((dl_mp = copyb(dl_mp)) != NULL)
+ dl_mp->b_cont = mp;
+ if (dl_mp == NULL) {
+ freemsg(mp);
+ } else {
+ ill_refhold(ill);
+ (void) qwriter_ip(NULL, ill, ill->ill_rq, dl_mp,
+ ip_ndp_excl, CUR_OP, B_FALSE);
+ }
+ }
+ ndp_delete(nce);
+}
+
+/*
+ * Handle a discovered conflict: some other system is advertising that it owns
+ * one of our IP addresses. We need to defend ourselves, or just shut down the
+ * interface.
+ */
+static void
+ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce)
+{
+ ipif_t *ipif;
+ uint32_t now;
+ uint_t maxdefense;
+ uint_t defs;
+
+ ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL,
+ NULL, NULL);
+ if (ipif == NULL)
+ return;
+ /*
+ * First, figure out if this address is disposable.
+ */
+ if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY))
+ maxdefense = ip_max_temp_defend;
+ else
+ maxdefense = ip_max_defend;
+
+ /*
+ * Now figure out how many times we've defended ourselves. Ignore
+ * defenses that happened long in the past.
+ */
+ now = gethrestime_sec();
+ mutex_enter(&nce->nce_lock);
+ if ((defs = nce->nce_defense_count) > 0 &&
+ now - nce->nce_defense_time > ip_defend_interval) {
+ nce->nce_defense_count = defs = 0;
+ }
+ nce->nce_defense_count++;
+ nce->nce_defense_time = now;
+ mutex_exit(&nce->nce_lock);
+ ipif_refrele(ipif);
+
+ /*
+ * If we've defended ourselves too many times already, then give up and
+ * tear down the interface(s) using this address. Otherwise, defend by
+ * sending out an unsolicited Neighbor Advertisement.
+ */
+ if (defs >= maxdefense) {
+ ip_ndp_failure(ill, mp, dl_mp, nce);
+ } else {
+ char hbuf[MAC_STR_LEN];
+ char sbuf[INET6_ADDRSTRLEN];
+ uchar_t *haddr;
+
+ (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf,
+ sizeof (hbuf), sbuf, sizeof (sbuf), &haddr);
+ cmn_err(CE_WARN, "node %s is using our IP address %s on %s",
+ hbuf, sbuf, ill->ill_name);
+ (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE,
+ &nce->nce_addr, &ipv6_all_hosts_mcast, NDP_ORIDE);
+ }
+}
+
+static void
+ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
{
nd_neighbor_solicit_t *ns;
uint32_t hlen = ill->ill_nd_lla_len;
@@ -1485,53 +1895,16 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp)
if (opt != NULL) {
opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR);
if (opt != NULL) {
- /*
- * No source link layer address option should
- * be present in a valid DAD request.
- */
- if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
- ip1dbg(("ndp_input_solicit: source link-layer "
- "address option present with an "
- "unspecified source. \n"));
- bad_solicit = B_TRUE;
- goto done;
- }
haddr = (uchar_t *)&opt[1];
- if (hlen > opt->nd_opt_len * 8 ||
+ if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
hlen == 0) {
+ ip1dbg(("ndp_input_advert: bad SLLA\n"));
bad_solicit = B_TRUE;
goto done;
}
}
}
- /*
- * haddr can be NULL if no options are present,
- * or no Source link layer address is present in,
- * recvd NDP options of solicitation message.
- */
- if (haddr == NULL) {
- nce_t *nnce;
- mutex_enter(&ndp6.ndp_g_lock);
- nnce = *((nce_t **)NCE_HASH_PTR_V6(src));
- nnce = nce_lookup_addr(ill, &src, nnce);
- mutex_exit(&ndp6.ndp_g_lock);
- if (nnce == NULL) {
- in6_addr_t dst = ipv6_solicited_node_mcast;
-
- /* Form solicited node multicast address */
- dst.s6_addr32[3] |= src.s6_addr32[3];
- (void) nce_xmit(ill,
- ND_NEIGHBOR_SOLICIT,
- ill,
- B_TRUE,
- &target,
- &dst,
- flag);
- bad_solicit = B_TRUE;
- goto done;
- }
- }
/* Set override flag, it will be reset later if need be. */
flag |= NDP_ORIDE;
if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
@@ -1544,10 +1917,39 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp)
* the source is unspecified address.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(&src)) {
- int err = 0;
+ int err;
nce_t *nnce;
ASSERT(ill->ill_isv6);
+ /*
+ * Regular solicitations *must* include the Source Link-Layer
+ * Address option. Ignore messages that do not.
+ */
+ if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
+ ip1dbg(("ndp_input_solicit: source link-layer address "
+ "option missing with a specified source.\n"));
+ bad_solicit = B_TRUE;
+ goto done;
+ }
+
+ /*
+ * This is a regular solicitation. If we're still in the
+ * process of verifying the address, then don't respond at all
+ * and don't keep track of the sender.
+ */
+ if (our_nce->nce_state == ND_PROBE)
+ goto done;
+
+ /*
+ * If the solicitation doesn't have sender hardware address
+ * (legal for unicast solicitation), then process without
+ * installing the return NCE. Either we already know it, or
+ * we'll be forced to look it up when (and if) we reply to the
+ * packet.
+ */
+ if (haddr == NULL)
+ goto no_source;
+
err = ndp_lookup_then_add(ill,
haddr,
&src, /* Soliciting nodes address */
@@ -1577,11 +1979,38 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp)
err));
goto done;
}
+no_source:
flag |= NDP_SOLICITED;
} else {
/*
- * This is a DAD req, multicast the advertisement
- * to the all-nodes address.
+ * No source link layer address option should be present in a
+ * valid DAD request.
+ */
+ if (haddr != NULL) {
+ ip1dbg(("ndp_input_solicit: source link-layer address "
+ "option present with an unspecified source.\n"));
+ bad_solicit = B_TRUE;
+ goto done;
+ }
+ if (our_nce->nce_state == ND_PROBE) {
+ /*
+ * Internally looped-back probes won't have DLPI
+ * attached to them. External ones (which are sent by
+ * multicast) always will. Just ignore our own
+ * transmissions.
+ */
+ if (dl_mp != NULL) {
+ /*
+ * If someone else is probing our address, then
+ * we've crossed wires. Declare failure.
+ */
+ ip_ndp_failure(ill, mp, dl_mp, our_nce);
+ }
+ goto done;
+ }
+ /*
+ * This is a DAD probe. Multicast the advertisement to the
+ * all-nodes address.
*/
src = ipv6_all_hosts_mcast;
}
@@ -1605,7 +2034,7 @@ done:
}
void
-ndp_input_advert(ill_t *ill, mblk_t *mp)
+ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
{
nd_neighbor_advert_t *na;
uint32_t hlen = ill->ill_nd_lla_len;
@@ -1639,6 +2068,7 @@ ndp_input_advert(ill_t *ill, mblk_t *mp)
opt = (nd_opt_hdr_t *)&na[1];
if (!ndp_verify_optlen(opt,
len - sizeof (nd_neighbor_advert_t))) {
+ ip1dbg(("ndp_input_advert: cannot verify SLLA\n"));
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
return;
}
@@ -1647,8 +2077,9 @@ ndp_input_advert(ill_t *ill, mblk_t *mp)
opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR);
if (opt != NULL) {
haddr = (uchar_t *)&opt[1];
- if (hlen > opt->nd_opt_len * 8 ||
+ if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
hlen == 0) {
+ ip1dbg(("ndp_input_advert: bad SLLA\n"));
BUMP_MIB(mib,
ipv6IfIcmpInBadNeighborAdvertisements);
return;
@@ -1676,13 +2107,41 @@ ndp_input_advert(ill_t *ill, mblk_t *mp)
/* We have to drop the lock since ndp_process calls put* */
rw_exit(&ill_g_lock);
if (dst_nce != NULL) {
- if (na->nd_na_flags_reserved &
- ND_NA_FLAG_ROUTER) {
- dst_nce->nce_flags |= NCE_F_ISROUTER;
+ if ((dst_nce->nce_flags & NCE_F_PERMANENT) &&
+ dst_nce->nce_state == ND_PROBE) {
+ /*
+ * Someone else sent an advertisement for an
+ * address that we're trying to configure.
+ * Tear it down. Note that dl_mp might be NULL
+ * if we're getting a unicast reply. This
+ * isn't typically done (multicast is the norm
+ * in response to a probe), but ip_ndp_failure
+ * will handle the dl_mp == NULL case as well.
+ */
+ ip_ndp_failure(ill, mp, dl_mp, dst_nce);
+ } else if (dst_nce->nce_flags & NCE_F_PERMANENT) {
+ /*
+ * Someone just announced one of our local
+ * addresses. If it wasn't us, then this is a
+ * conflict. Defend the address or shut it
+ * down.
+ */
+ if (dl_mp != NULL &&
+ (haddr == NULL ||
+ nce_cmp_ll_addr(dst_nce, haddr,
+ ill->ill_nd_lla_len))) {
+ ip_ndp_conflict(ill, mp, dl_mp,
+ dst_nce);
+ }
+ } else {
+ if (na->nd_na_flags_reserved &
+ ND_NA_FLAG_ROUTER) {
+ dst_nce->nce_flags |= NCE_F_ISROUTER;
+ }
+ /* B_TRUE indicates this an advertisement */
+ ndp_process(dst_nce, haddr,
+ na->nd_na_flags_reserved, B_TRUE);
}
- /* B_TRUE indicates this an advertisement */
- ndp_process(dst_nce, haddr,
- na->nd_na_flags_reserved, B_TRUE);
NCE_REFRELE(dst_nce);
}
rw_enter(&ill_g_lock, RW_READER);
@@ -1696,7 +2155,7 @@ ndp_input_advert(ill_t *ill, mblk_t *mp)
* The checksum has already checked o.k before reaching here.
*/
void
-ndp_input(ill_t *ill, mblk_t *mp)
+ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
{
icmp6_t *icmp_nd;
ip6_t *ip6h;
@@ -1747,9 +2206,9 @@ ndp_input(ill_t *ill, mblk_t *mp)
goto done;
}
if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) {
- ndp_input_solicit(ill, mp);
+ ndp_input_solicit(ill, mp, dl_mp);
} else {
- ndp_input_advert(ill, mp);
+ ndp_input_advert(ill, mp, dl_mp);
}
done:
freemsg(mp);
@@ -1758,9 +2217,13 @@ done:
/*
* nce_xmit is called to form and transmit a ND solicitation or
* advertisement ICMP packet.
- * If source address is unspecified, appropriate source address
- * and link layer address will be chosen here. This function
- * *always* sends the link layer option.
+ *
+ * If the source address is unspecified and this isn't a probe (used for
+ * duplicate address detection), an appropriate source address and link layer
+ * address will be chosen here. The link layer address option is included if
+ * the source is specified (i.e., all non-probe packets), and omitted (per the
+ * specification) otherwise.
+ *
* It returns B_FALSE only if it does a successful put() to the
* corresponding ill's ill_wq otherwise returns B_TRUE.
*/
@@ -1792,7 +2255,7 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill,
*/
ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL));
- if (IN6_IS_ADDR_UNSPECIFIED(sender)) {
+ if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) {
ASSERT(operation != ND_NEIGHBOR_ADVERT);
/*
* Pick a source address for this solicitation, but
@@ -1816,7 +2279,10 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill,
hwaddr_ill = src_ipif->ipif_ill;
}
- plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8;
+ if (flag & NDP_PROBE)
+ plen = 0;
+ else
+ plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8;
/*
* Always make sure that the NS/NA packets don't get load
* spread. This is needed so that the probe packets sent
@@ -1842,6 +2308,8 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill,
ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
ip6i->ip6i_nxt = IPPROTO_RAW;
ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT;
+ if (flag & NDP_PROBE)
+ ip6i->ip6i_flags |= IP6I_UNSPEC_SRC;
ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex;
ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t));
@@ -1858,7 +2326,8 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill,
if (operation == ND_NEIGHBOR_SOLICIT) {
nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
- opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
+ if (!(flag & NDP_PROBE))
+ opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
ip6h->ip6_src = *sender;
ns->nd_ns_target = *target;
if (!(flag & NDP_UNICAST)) {
@@ -1870,6 +2339,7 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill,
} else {
nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6;
+ ASSERT(!(flag & NDP_PROBE));
opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
ip6h->ip6_src = *sender;
na->nd_na_target = *sender;
@@ -1881,12 +2351,16 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill,
na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE;
}
- /* Fill in link layer address and option len */
- opt->nd_opt_len = (uint8_t)plen;
- mutex_enter(&hwaddr_ill->ill_lock);
- bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr,
- &opt[1], hwaddr_ill->ill_nd_lla_len);
- mutex_exit(&hwaddr_ill->ill_lock);
+
+ if (!(flag & NDP_PROBE)) {
+ /* Fill in link layer address and option len */
+ opt->nd_opt_len = (uint8_t)plen;
+ mutex_enter(&hwaddr_ill->ill_lock);
+ bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla :
+ hwaddr_ill->ill_phys_addr, &opt[1],
+ hwaddr_ill->ill_nd_lla_len);
+ mutex_exit(&hwaddr_ill->ill_lock);
+ }
icmp6->icmp6_type = (uint8_t)operation;
icmp6->icmp6_code = 0;
/*
@@ -1950,30 +2424,6 @@ ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr)
}
/*
- * convert a link level address of arbitrary length
- * to an ascii string.
- * The caller *must* have already verified that the string buffer
- * is large enough to hold the entire string, including trailing NULL.
- */
-static void
-lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf)
-{
- uchar_t addrbyte[8]; /* needs to hold ascii for a byte plus a NULL */
- int i;
- size_t len;
-
- buf[0] = '\0';
- for (i = 0; i < addrlen; i++) {
- addrbyte[0] = '\0';
- (void) sprintf((char *)addrbyte, "%02x:", (lla[i] & 0xff));
- len = strlen((const char *)addrbyte);
- bcopy(addrbyte, buf, len);
- buf = buf + len;
- }
- *--buf = '\0';
-}
-
-/*
* Add a single line to the NDP Cache Entry Report.
*/
static void
@@ -2013,7 +2463,7 @@ nce_report1(nce_t *nce, uchar_t *mp_arg)
if (ill->ill_net_type == IRE_IF_RESOLVER) {
size_t addrlen;
- uchar_t *addr_buf;
+ char *addr_buf;
dl_unitdata_req_t *dl;
mutex_enter(&nce->nce_lock);
@@ -2042,12 +2492,10 @@ nce_report1(nce_t *nce, uchar_t *mp_arg)
mutex_exit(&nce->nce_lock);
return;
}
- if (ill->ill_flags & ILLF_XRESOLV)
- lla2ascii((uint8_t *)h, dl->dl_dest_addr_length,
- addr_buf);
- else
- lla2ascii((uint8_t *)h, ill->ill_nd_lla_len,
- addr_buf);
+ (void) mac_colon_addr((uint8_t *)h,
+ (ill->ill_flags & ILLF_XRESOLV) ?
+ dl->dl_dest_addr_length : ill->ill_nd_lla_len,
+ addr_buf, addrlen);
mutex_exit(&nce->nce_lock);
(void) mi_mpprintf(mp, "%8s %17s %5s %s/%d",
ill->ill_name, addr_buf, (uchar_t *)&flags_buf,
@@ -2152,48 +2600,108 @@ ndp_timer(void *arg)
nce->nce_pcnt--;
ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT &&
nce->nce_pcnt >= -1);
- if (nce->nce_pcnt == 0) {
+ if (nce->nce_pcnt > 0) {
+ /*
+ * As per RFC2461, the nce gets deleted after
+ * MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
+ * Note that the first unicast solicitation is sent
+ * during the DELAY state.
+ */
+ ip2dbg(("ndp_timer: pcount=%x dst %s\n",
+ nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr,
+ addrbuf, sizeof (addrbuf))));
+ mutex_exit(&nce->nce_lock);
+ dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL,
+ B_FALSE, &ipv6_all_zeros, &nce->nce_addr,
+ (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE :
+ NDP_UNICAST);
+ if (dropped) {
+ mutex_enter(&nce->nce_lock);
+ nce->nce_pcnt++;
+ mutex_exit(&nce->nce_lock);
+ }
+ NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
+ } else if (nce->nce_pcnt < 0) {
+ /* No hope, delete the nce */
+ nce->nce_state = ND_UNREACHABLE;
+ mutex_exit(&nce->nce_lock);
+ if (ip_debug > 2) {
+ /* ip1dbg */
+ pr_addr_dbg("ndp_timer: Delete IRE for"
+ " dst %s\n", AF_INET6, &nce->nce_addr);
+ }
+ ndp_delete(nce);
+ } else if (!(nce->nce_flags & NCE_F_PERMANENT)) {
/* Wait RetransTimer, before deleting the entry */
ip2dbg(("ndp_timer: pcount=%x dst %s\n",
nce->nce_pcnt, inet_ntop(AF_INET6,
&nce->nce_addr, addrbuf, sizeof (addrbuf))));
mutex_exit(&nce->nce_lock);
+ /* Wait one interval before killing */
NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time);
- } else {
+ } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) {
+ ipif_t *ipif;
+
/*
- * As per RFC2461, the nce gets deleted after
- * MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
- * Note that the first unicast solicitation is sent
- * during the DELAY state.
+ * We're done probing, and we can now declare this
+ * address to be usable. Let IP know that it's ok to
+ * use.
*/
- if (nce->nce_pcnt > 0) {
- ip2dbg(("ndp_timer: pcount=%x dst %s\n",
- nce->nce_pcnt, inet_ntop(AF_INET6,
- &nce->nce_addr,
- addrbuf, sizeof (addrbuf))));
- mutex_exit(&nce->nce_lock);
- dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT,
- NULL, B_FALSE, &ipv6_all_zeros,
- &nce->nce_addr, NDP_UNICAST);
- if (dropped) {
- mutex_enter(&nce->nce_lock);
- nce->nce_pcnt++;
- mutex_exit(&nce->nce_lock);
+ nce->nce_state = ND_REACHABLE;
+ mutex_exit(&nce->nce_lock);
+ ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill,
+ ALL_ZONES, NULL, NULL, NULL, NULL);
+ if (ipif != NULL) {
+ if (ipif->ipif_was_dup) {
+ char ibuf[LIFNAMSIZ + 10];
+ char sbuf[INET6_ADDRSTRLEN];
+
+ ipif->ipif_was_dup = B_FALSE;
+ (void) strlcpy(ibuf, ill->ill_name,
+ sizeof (ibuf));
+ (void) inet_ntop(AF_INET6,
+ &ipif->ipif_v6lcl_addr,
+ sbuf, sizeof (sbuf));
+ if (ipif->ipif_id != 0) {
+ (void) snprintf(ibuf +
+ ill->ill_name_length - 1,
+ sizeof (ibuf) -
+ ill->ill_name_length + 1,
+ ":%d", ipif->ipif_id);
+ }
+ cmn_err(CE_NOTE, "recovered address "
+ "%s on %s", sbuf, ibuf);
}
- NDP_RESTART_TIMER(nce,
- ill->ill_reachable_retrans_time);
- } else {
- /* No hope, delete the nce */
- nce->nce_state = ND_UNREACHABLE;
- mutex_exit(&nce->nce_lock);
- if (ip_debug > 2) {
- /* ip1dbg */
- pr_addr_dbg("ndp_timer: Delete IRE for"
- " dst %s\n", AF_INET6,
- &nce->nce_addr);
+ if ((ipif->ipif_flags & IPIF_UP) &&
+ !ipif->ipif_addr_ready) {
+ ip_rts_ifmsg(ipif);
+ ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
+ sctp_update_ipif(ipif, SCTP_IPIF_UP);
}
- ndp_delete(nce);
+ ipif->ipif_addr_ready = 1;
+ ipif_refrele(ipif);
+ }
+ /* Begin defending our new address */
+ nce->nce_unsolicit_count = 0;
+ dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill,
+ B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast,
+ NDP_ORIDE);
+ if (dropped) {
+ nce->nce_unsolicit_count = 1;
+ NDP_RESTART_TIMER(nce,
+ ip_ndp_unsolicit_interval);
+ } else if (ip_ndp_defense_interval != 0) {
+ NDP_RESTART_TIMER(nce, ip_ndp_defense_interval);
}
+ } else {
+ /*
+ * This is an address we're probing to be our own, but
+ * the ill is down. Wait until it comes back before
+ * doing anything, but switch to reachable state so
+ * that the restart will work.
+ */
+ nce->nce_state = ND_REACHABLE;
+ mutex_exit(&nce->nce_lock);
}
NCE_REFRELE(nce);
return;
@@ -2262,9 +2770,12 @@ ndp_timer(void *arg)
break;
case ND_REACHABLE :
rw_exit(&ill_g_lock);
- if (nce->nce_flags & NCE_F_UNSOL_ADV &&
- nce->nce_unsolicit_count != 0) {
- nce->nce_unsolicit_count--;
+ if (((nce->nce_flags & NCE_F_UNSOL_ADV) &&
+ nce->nce_unsolicit_count != 0) ||
+ ((nce->nce_flags & NCE_F_PERMANENT) &&
+ ip_ndp_defense_interval != 0)) {
+ if (nce->nce_unsolicit_count > 0)
+ nce->nce_unsolicit_count--;
mutex_exit(&nce->nce_lock);
dropped = nce_xmit(ill,
ND_NEIGHBOR_ADVERT,
@@ -2272,7 +2783,7 @@ ndp_timer(void *arg)
B_FALSE, /* use ill_phys_addr */
&nce->nce_addr,
&ipv6_all_hosts_mcast,
- nce->nce_flags | NDP_ORIDE);
+ NDP_ORIDE);
if (dropped) {
mutex_enter(&nce->nce_lock);
nce->nce_unsolicit_count++;
@@ -2281,6 +2792,9 @@ ndp_timer(void *arg)
if (nce->nce_unsolicit_count != 0) {
NDP_RESTART_TIMER(nce,
ip_ndp_unsolicit_interval);
+ } else {
+ NDP_RESTART_TIMER(nce,
+ ip_ndp_defense_interval);
}
} else {
mutex_exit(&nce->nce_lock);
@@ -2339,7 +2853,7 @@ nce_set_ll(nce_t *nce, uchar_t *ll_addr)
}
static boolean_t
-nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len)
+nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len)
{
ill_t *ill = nce->nce_ill;
uchar_t *ll_offset;
@@ -2348,7 +2862,7 @@ nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len)
if (ll_addr == NULL)
return (B_FALSE);
ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
- if (bcmp(ll_addr, (char *)ll_offset, ll_addr_len) != 0)
+ if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0)
return (B_TRUE);
return (B_FALSE);
}
@@ -3338,3 +3852,84 @@ nce_reinit(nce_t *nce)
NCE_REFRELE(nce);
return (newnce);
}
+
+/*
+ * ndp_walk routine to delete all entries that have a given destination or
+ * gateway address and cached link layer (MAC) address. This is used when ARP
+ * informs us that a network-to-link-layer mapping may have changed.
+ */
+void
+nce_delete_hw_changed(nce_t *nce, void *arg)
+{
+ nce_hw_map_t *hwm = arg;
+ mblk_t *mp;
+ dl_unitdata_req_t *dlu;
+ uchar_t *macaddr;
+ ill_t *ill;
+ int saplen;
+ ipaddr_t nce_addr;
+
+ if (nce->nce_state != ND_REACHABLE)
+ return;
+
+ IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr);
+ if (nce_addr != hwm->hwm_addr)
+ return;
+
+ mutex_enter(&nce->nce_lock);
+ if ((mp = nce->nce_res_mp) == NULL) {
+ mutex_exit(&nce->nce_lock);
+ return;
+ }
+ dlu = (dl_unitdata_req_t *)mp->b_rptr;
+ macaddr = (uchar_t *)(dlu + 1);
+ ill = nce->nce_ill;
+ if ((saplen = ill->ill_sap_length) > 0)
+ macaddr += saplen;
+ else
+ saplen = -saplen;
+
+ /*
+ * If the hardware address is unchanged, then leave this one alone.
+ * Note that saplen == abs(saplen) now.
+ */
+ if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen &&
+ bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) {
+ mutex_exit(&nce->nce_lock);
+ return;
+ }
+ mutex_exit(&nce->nce_lock);
+
+ DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce);
+ ndp_delete(nce);
+}
+
+/*
+ * This function verifies whether a given IPv4 address is potentially known to
+ * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t,
+ * so that it can continue to look for hardware changes on that address.
+ */
+boolean_t
+ndp_lookup_ipaddr(in_addr_t addr)
+{
+ nce_t *nce;
+ struct in_addr nceaddr;
+
+ if (addr == INADDR_ANY)
+ return (B_FALSE);
+
+ mutex_enter(&ndp4.ndp_g_lock);
+ nce = *(nce_t **)NCE_HASH_PTR_V4(addr);
+ for (; nce != NULL; nce = nce->nce_next) {
+ /* Note that only v4 mapped entries are in the table. */
+ IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr);
+ if (addr == nceaddr.s_addr &&
+ IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) {
+ /* Single flag check; no lock needed */
+ if (!(nce->nce_flags & NCE_F_CONDEMNED))
+ break;
+ }
+ }
+ mutex_exit(&ndp4.ndp_g_lock);
+ return (nce != NULL);
+}
diff --git a/usr/src/uts/common/inet/ip/ip_squeue.c b/usr/src/uts/common/inet/ip/ip_squeue.c
index 033b962b02..417b1580eb 100644
--- a/usr/src/uts/common/inet/ip/ip_squeue.c
+++ b/usr/src/uts/common/inet/ip/ip_squeue.c
@@ -110,16 +110,14 @@
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip_if.h>
-#include <inet/mi.h>
#include <inet/nd.h>
#include <inet/ipclassifier.h>
#include <sys/types.h>
#include <sys/conf.h>
#include <sys/sunddi.h>
-#include <sys/ddi.h>
+#include <sys/dlpi.h>
#include <sys/squeue_impl.h>
-
/*
* We allow multiple NICs to bind to the same CPU but want to preserve 1 <-> 1
* mapping between squeue and NIC (or Rx ring) for performance reasons so
diff --git a/usr/src/uts/common/inet/ip6.h b/usr/src/uts/common/inet/ip6.h
index beae955d27..2cffc239b8 100644
--- a/usr/src/uts/common/inet/ip6.h
+++ b/usr/src/uts/common/inet/ip6.h
@@ -379,7 +379,7 @@ extern void ip_output_v6(void *, mblk_t *, void *, int);
extern void ip_xmit_v6(mblk_t *, ire_t *, uint_t, conn_t *, int,
struct ipsec_out_s *);
extern void ip_rput_data_v6(queue_t *, ill_t *, mblk_t *, ip6_t *,
- uint_t, mblk_t *);
+ uint_t, mblk_t *, mblk_t *);
extern void mld_input(queue_t *, mblk_t *, ill_t *);
extern void mld_joingroup(ilm_t *);
extern void mld_leavegroup(ilm_t *);
diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h
index 00b631b9e1..173930e3ee 100644
--- a/usr/src/uts/common/inet/ip_if.h
+++ b/usr/src/uts/common/inet/ip_if.h
@@ -91,7 +91,7 @@ extern "C" {
#define IFF_LOGINT_FLAGS (IFF_UP|IFF_BROADCAST|IFF_POINTOPOINT| \
IFF_UNNUMBERED|IFF_DHCPRUNNING|IFF_PRIVATE|IFF_NOXMIT|IFF_NOLOCAL| \
IFF_DEPRECATED|IFF_ADDRCONF|IFF_ANYCAST|IFF_MIPRUNNING|IFF_NOFAILOVER| \
- IFF_PREFERRED|IFF_TEMPORARY|IFF_FIXEDMTU)
+ IFF_PREFERRED|IFF_TEMPORARY|IFF_FIXEDMTU|IFF_DUPLICATE)
#define IPIF_REPL_CHECK(to_ipif, failback_cmd) \
(((to_ipif)->ipif_replace_zero) || ((failback_cmd) && \
@@ -138,14 +138,24 @@ extern "C" {
#define IPIF_PREFERRED IFF_PREFERRED /* Prefer as source address */
#define IPIF_TEMPORARY IFF_TEMPORARY /* RFC3041 */
#define IPIF_FIXEDMTU IFF_FIXEDMTU /* set with SIOCSLIFMTU */
+#define IPIF_DUPLICATE IFF_DUPLICATE /* address is in use */
/* Source selection values for ipif_select_source_v6 */
#define RESTRICT_TO_NONE 0x0 /* No restriction in source selection */
#define RESTRICT_TO_GROUP 0x1 /* Restrict to IPMP group */
#define RESTRICT_TO_ILL 0x2 /* Restrict to ILL */
+/* for ipif_resolver_up */
+enum ip_resolver_action {
+ Res_act_initial, /* initial address establishment */
+ Res_act_move, /* address move (IPMP, new DL addr) */
+ Res_act_defend /* address defense */
+};
+
extern ill_t *illgrp_scheduler(ill_t *);
extern mblk_t *ill_arp_alloc(ill_t *, uchar_t *, caddr_t);
+extern mblk_t *ipif_area_alloc(ipif_t *);
+extern mblk_t *ipif_ared_alloc(ipif_t *);
extern void ill_dlpi_done(ill_t *, t_uscalar_t);
extern void ill_dlpi_send(ill_t *, mblk_t *);
extern mblk_t *ill_dlur_gen(uchar_t *, uint_t, t_uscalar_t, t_scalar_t);
@@ -167,6 +177,7 @@ extern time_t ill_frag_timeout(ill_t *, time_t);
extern int ill_init(queue_t *, ill_t *);
extern int ill_nominate_mcast_rcv(ill_group_t *);
extern boolean_t ill_setdefaulttoken(ill_t *);
+extern void ill_restart_dad(ill_t *, boolean_t);
extern void ill_lock_ills(ill_t **, int);
extern mblk_t *ill_pending_mp_get(ill_t *, conn_t **, uint_t);
@@ -216,9 +227,10 @@ extern void ipif_refhold_locked(ipif_t *);
extern void ipif_refrele(ipif_t *);
extern boolean_t ipif_ire_active(ipif_t *);
extern void ipif_all_down_tail(ipsq_t *, queue_t *, mblk_t *, void *);
-extern int ipif_resolver_up(ipif_t *, boolean_t);
+extern int ipif_resolver_up(ipif_t *, enum ip_resolver_action);
extern int ipif_arp_setup_multicast(ipif_t *, mblk_t **);
extern int ipif_down(ipif_t *, queue_t *, mblk_t *);
+extern void ipif_down_tail(ipif_t *);
extern void ipif_multicast_up(ipif_t *);
extern void ipif_ndp_down(ipif_t *);
extern int ipif_ndp_up(ipif_t *, const in6_addr_t *, boolean_t);
@@ -238,6 +250,7 @@ extern ipif_t *ipif_lookup_on_ifindex(uint_t, boolean_t, zoneid_t, queue_t *,
extern ipif_t *ipif_get_next_ipif(ipif_t *curr, ill_t *ill);
extern void ipif_ill_refrele_tail(ill_t *ill);
extern void ipif_arp_down(ipif_t *ipif);
+extern void ipif_mask_reply(ipif_t *);
extern int illgrp_insert(ill_group_t **, ill_t *, char *, ill_group_t *,
boolean_t);
diff --git a/usr/src/uts/common/inet/ip_ndp.h b/usr/src/uts/common/inet/ip_ndp.h
index 05edcd3225..4d0dbd5428 100644
--- a/usr/src/uts/common/inet/ip_ndp.h
+++ b/usr/src/uts/common/inet/ip_ndp.h
@@ -28,6 +28,12 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#include <sys/mutex.h>
+#include <sys/stream.h>
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+#include <inet/ip.h>
+
/*
* Internal definitions for the kernel implementation of the IPv6
* Neighbor Discovery Protocol (NDP).
@@ -69,6 +75,8 @@ typedef struct nce_s {
struct nce_s *nce_fastpath; /* for fastpath list */
timeout_id_t nce_timeout_id;
uchar_t nce_ipversion; /* IPv4(ARP)/IPv6(NDP) version */
+ uint_t nce_defense_count; /* number of NDP conflicts */
+ uint_t nce_defense_time; /* last time defended (secs) */
#ifdef NCE_DEBUG
th_trace_t *nce_trace[IP_TR_HASH_MAX];
boolean_t nce_trace_disable; /* True when alloc fails */
@@ -135,6 +143,7 @@ extern ndp_g_t ndp4, ndp6;
#define NDP_ISROUTER 0x2
#define NDP_SOLICITED 0x4
#define NDP_ORIDE 0x8
+#define NDP_PROBE 0x10
/* Number of packets queued in NDP for a neighbor */
#define ND_MAX_Q 4
@@ -219,6 +228,17 @@ typedef struct {
int ncr_host; /* Fraction for host entries */
} nce_cache_reclaim_t;
+/*
+ * Structure for nce_delete_hw_changed; specifies an IPv4 address to link-layer
+ * address mapping. Any route that has a cached copy of a mapping for that
+ * IPv4 address that doesn't match the given mapping must be purged.
+ */
+typedef struct {
+ ipaddr_t hwm_addr; /* IPv4 address */
+ uint_t hwm_hwlen; /* Length of hardware address (may be 0) */
+ uchar_t *hwm_hwaddr; /* Pointer to new hardware address, if any */
+} nce_hw_map_t;
+
/* When SAP is greater than zero address appears before SAP */
#define NCE_LL_ADDR_OFFSET(ill) (((ill)->ill_sap_length) < 0 ? \
(sizeof (dl_unitdata_req_t)) : \
@@ -276,7 +296,8 @@ extern void ndp_fastpath_flush(nce_t *, char *);
extern boolean_t ndp_fastpath_update(nce_t *, void *);
extern nd_opt_hdr_t *ndp_get_option(nd_opt_hdr_t *, int, int);
extern void ndp_inactive(nce_t *);
-extern void ndp_input(ill_t *, mblk_t *);
+extern void ndp_input(ill_t *, mblk_t *, mblk_t *);
+extern boolean_t ndp_lookup_ipaddr(in_addr_t);
extern nce_t *ndp_lookup_v6(ill_t *, const in6_addr_t *, boolean_t);
extern nce_t *ndp_lookup_v4(ill_t *, const in_addr_t *, boolean_t);
extern int ndp_lookup_then_add(ill_t *, uchar_t *, const void *,
@@ -298,6 +319,8 @@ extern void ndp_walk_common(ndp_g_t *, ill_t *, pfi_t,
extern int ndp_add(ill_t *, uchar_t *, const void *,
const void *, const void *,
uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *);
+extern boolean_t ndp_restart_dad(nce_t *);
+extern void ndp_do_recovery(ipif_t *);
extern void nce_resolv_failed(nce_t *);
extern void arp_resolv_failed(nce_t *);
extern void nce_fastpath_list_add(nce_t *);
@@ -307,6 +330,7 @@ extern void nce_fastpath_list_dispatch(ill_t *,
extern void nce_queue_mp_common(nce_t *, mblk_t *, boolean_t);
extern void ndp_flush_qd_mp(nce_t *);
extern nce_t *nce_reinit(nce_t *);
+extern void nce_delete_hw_changed(nce_t *, void *);
#ifdef NCE_DEBUG
extern void nce_trace_inactive(nce_t *);
diff --git a/usr/src/uts/common/net/if.h b/usr/src/uts/common/net/if.h
index 8351c9b33a..f2be9114c5 100644
--- a/usr/src/uts/common/net/if.h
+++ b/usr/src/uts/common/net/if.h
@@ -163,6 +163,7 @@ struct ifnet {
#define IFF_FIXEDMTU 0x1000000000ll /* MTU manually set with SIOCSLIFMTU */
#define IFF_VIRTUAL 0x2000000000ll /* Does not send or receive packets */
+#define IFF_DUPLICATE 0x4000000000ll /* Local address already in use */
/*
* The IFF_MULTICAST flag indicates that the network can support the
@@ -177,7 +178,7 @@ struct ifnet {
(IFF_BROADCAST | IFF_POINTOPOINT | IFF_RUNNING | IFF_PROMISC | \
IFF_MULTICAST | IFF_MULTI_BCAST | IFF_UNNUMBERED | IFF_IPV4 | \
IFF_IPV6 | IFF_INACTIVE | IFF_FIXEDMTU | IFF_VIRTUAL | \
- IFF_LOOPBACK | IFF_ALLMULTI)
+ IFF_LOOPBACK | IFF_ALLMULTI | IFF_DUPLICATE)
/*
* Output queues (ifp->if_snd) and internetwork datagram level (pup level 1)
diff --git a/usr/src/uts/common/net/if_arp.h b/usr/src/uts/common/net/if_arp.h
index 7df505c710..9103b1d0b5 100644
--- a/usr/src/uts/common/net/if_arp.h
+++ b/usr/src/uts/common/net/if_arp.h
@@ -1,5 +1,5 @@
/*
- * Copyright 1997-2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -15,6 +15,9 @@
#pragma ident "%Z%%M% %I% %E% SMI"
/* if_arp.h 1.5 88/08/19 SMI; from UCB 7.1 1/24/86 */
+#include <sys/types.h>
+#include <sys/socket.h>
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -32,7 +35,8 @@ extern "C" {
struct arphdr {
ushort_t ar_hrd; /* format of hardware address */
#define ARPHRD_ETHER 1 /* ethernet hardware address */
-#define ARPHRD_IB 32 /* IPoIB hardware address */
+#define ARPHRD_IEEE802 6 /* IEEE 802 hardware address */
+#define ARPHRD_IB 32 /* IPoIB hardware address */
ushort_t ar_pro; /* format of protocol address */
uchar_t ar_hln; /* length of hardware address */
uchar_t ar_pln; /* length of protocol address */
@@ -55,6 +59,9 @@ struct arphdr {
#endif /* notdef */
};
+/* Maximum hardware and protocol address length */
+#define ARP_MAX_ADDR_LEN 255
+
/*
* Extended ARP ioctl request
*/
@@ -72,12 +79,13 @@ struct arpreq {
struct sockaddr arp_ha; /* hardware address */
int arp_flags; /* flags */
};
-/* arp_flags and at_flags field values */
+/* arp_flags field values */
#define ATF_INUSE 0x01 /* entry in use */
#define ATF_COM 0x02 /* completed entry (enaddr valid) */
#define ATF_PERM 0x04 /* permanent entry */
#define ATF_PUBL 0x08 /* publish entry (respond for other host) */
#define ATF_USETRAILERS 0x10 /* has requested trailers */
+#define ATF_AUTHORITY 0x20 /* hardware address is authoritative */
/*
* This data structure is used by kernel protocol modules to register
diff --git a/usr/src/uts/common/netinet/arp.h b/usr/src/uts/common/netinet/arp.h
index 523f111c00..a3bf0e7761 100644
--- a/usr/src/uts/common/netinet/arp.h
+++ b/usr/src/uts/common/netinet/arp.h
@@ -1,5 +1,5 @@
/*
- * Copyright 1986-2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -14,6 +14,10 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#include <sys/types.h>
+#include <sys/ethernet.h>
+#include <sys/socket.h>
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -31,6 +35,7 @@ extern "C" {
struct arphdr {
ushort_t ar_hrd; /* format of hardware address */
#define ARPHRD_ETHER 1 /* ethernet hardware address */
+#define ARPHRD_IEEE802 6 /* IEEE 802 hardware address */
#define ARPHRD_IB 32 /* IPoIB hardware address */
ushort_t ar_pro; /* format of protocol address */
uchar_t ar_hln; /* length of hardware address */
@@ -54,6 +59,9 @@ struct arphdr {
#endif /* notdef */
};
+/* Maximum hardware and protocol address length */
+#define ARP_MAX_ADDR_LEN 255
+
/*
* Ethernet Address Resolution Protocol.
*
@@ -82,12 +90,13 @@ struct arpreq {
struct sockaddr arp_ha; /* hardware address */
int arp_flags; /* flags */
};
-/* arp_flags and at_flags field values */
+/* arp_flags field values */
#define ATF_INUSE 0x01 /* entry in use */
#define ATF_COM 0x02 /* completed entry (enaddr valid) */
#define ATF_PERM 0x04 /* permanent entry */
#define ATF_PUBL 0x08 /* publish entry (respond for other host) */
#define ATF_USETRAILERS 0x10 /* has requested trailers */
+#define ATF_AUTHORITY 0x20 /* hardware address is authoritative */
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/os/subr.c b/usr/src/uts/common/os/subr.c
index 9c9942ec8c..4753f1152a 100644
--- a/usr/src/uts/common/os/subr.c
+++ b/usr/src/uts/common/os/subr.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -218,25 +217,6 @@ umax(uint_t a, uint_t b)
#endif /* !_LP64 */
/*
- * Return bit position of least significant bit set in mask,
- * starting numbering from 1.
- */
-int
-ffs(long mask)
-{
- int i;
-
- if (mask == 0)
- return (0);
- for (i = 1; i <= NBBY * sizeof (mask); i++) {
- if (mask & 1)
- return (i);
- mask >>= 1;
- }
- return (0);
-}
-
-/*
* Parse suboptions from a string.
* Same as getsubopt(3C).
*/
diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c
index 1709760d74..f16ae44426 100644
--- a/usr/src/uts/common/os/sunddi.c
+++ b/usr/src/uts/common/os/sunddi.c
@@ -5955,7 +5955,6 @@ ddi_in_panic()
int
ddi_ffs(long mask)
{
- extern int ffs(long mask);
return (ffs(mask));
}
@@ -5970,8 +5969,6 @@ ddi_ffs(long mask)
int
ddi_fls(long mask)
{
- extern int ffs(long);
-
while (mask) {
long nx;
diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h
index 9c34c3f895..c96ea5b4ac 100644
--- a/usr/src/uts/common/sys/systm.h
+++ b/usr/src/uts/common/sys/systm.h
@@ -230,7 +230,7 @@ int strident_valid(const char *);
void strident_canon(char *, size_t);
int getsubopt(char **optionsp, char * const *tokens, char **valuep);
char *append_subopt(const char *, size_t, char *, const char *);
-int ffs(long);
+int ffs(uintmax_t);
int copyin(const void *, void *, size_t);
void copyin_noerr(const void *, void *, size_t);
int xcopyin(const void *, void *, size_t);