diff options
Diffstat (limited to 'usr/src')
78 files changed, 4750 insertions, 4614 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint index 7d7073b2aa..857b0e0638 100644 --- a/usr/src/Makefile.lint +++ b/usr/src/Makefile.lint @@ -72,6 +72,7 @@ COMMON_SUBDIRS = \ cmd/cmd-inet/usr.lib/dsvclockd \ cmd/cmd-inet/usr.lib/in.dhcpd \ cmd/cmd-inet/usr.lib/in.mpathd \ + cmd/cmd-inet/usr.lib/in.ndpd \ cmd/cmd-inet/usr.lib/inetd \ cmd/cmd-inet/usr.lib/mipagent \ cmd/cmd-inet/usr.lib/pppoe \ diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/Makefile b/usr/src/cmd/cmd-inet/sbin/dhcpagent/Makefile index 8acf78f5a0..af6d01ccc7 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/Makefile +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -30,7 +29,7 @@ PROG = dhcpagent ROOTFS_PROG = $(PROG) DEFAULTFILES = dhcpagent.dfl -LOCOBJS = adopt.o agent.o arp_check.o async.o bound.o class_id.o defaults.o \ +LOCOBJS = adopt.o agent.o async.o bound.o class_id.o defaults.o \ dlpi_io.o dlprims.o inform.o init_reboot.o interface.o ipc_action.o \ packet.o release.o renew.o request.o script_handler.o select.o util.o COMDIR = $(SRC)/common/net/dhcp diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c index ffd4d14f59..28b5d33004 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.c @@ -27,7 +27,6 @@ #include <sys/types.h> #include <stdlib.h> -#include <assert.h> #include <errno.h> #include <locale.h> #include <string.h> @@ -36,10 +35,11 @@ #include <stdio.h> #include <stdio_ext.h> #include <dhcp_hostconf.h> -#include <dhcp_symbol.h> #include <dhcpagent_ipc.h> #include <dhcpmsg.h> #include <netinet/dhcp.h> +#include <net/route.h> +#include <sys/sockio.h> #include "async.h" #include "agent.h" @@ -59,11 +59,12 @@ char *class_id; iu_eh_t *eh; iu_tq_t *tq; pid_t grandparent; +int rtsock_fd; static boolean_t shutdown_started = B_FALSE; static boolean_t do_adopt = B_FALSE; static unsigned int debug_level = 0; -static iu_eh_callback_t accept_event, ipc_event; +static iu_eh_callback_t accept_event, ipc_event, rtsock_event; /* * The ipc_cmd_allowed[] table indicates which IPC commands are allowed in @@ -89,6 +90,7 @@ static int ipc_cmd_allowed[DHCP_NSTATES][DHCP_NIPC] = { /* INIT */ { 1, 0, 1, 0, 1, 1, 1, 0 }, /* SELECTING */ { 1, 0, 1, 0, 1, 1, 0, 0 }, /* REQUESTING */ { 1, 0, 1, 0, 1, 1, 0, 0 }, + /* PRE_BOUND */ { 1, 1, 1, 1, 0, 1, 0, 1 }, /* BOUND */ { 1, 1, 1, 1, 0, 1, 0, 1 }, /* RENEWING */ { 1, 1, 1, 1, 0, 1, 0, 1 }, /* REBINDING */ { 1, 1, 1, 1, 0, 1, 0, 1 }, @@ -245,6 +247,22 @@ main(int argc, char **argv) } /* + * Create the global routing socket. This is used for monitoring + * interface transitions, so that we learn about the kernel's Duplicate + * Address Detection status, and for inserting and removing default + * routes as learned from DHCP servers. + */ + rtsock_fd = socket(PF_ROUTE, SOCK_RAW, AF_INET); + if (rtsock_fd == -1) { + dhcpmsg(MSG_ERR, "cannot open routing socket"); + return (EXIT_FAILURE); + } + if (iu_register_event(eh, rtsock_fd, POLLIN, rtsock_event, 0) == -1) { + dhcpmsg(MSG_ERR, "cannot register routing socket for messages"); + return (EXIT_FAILURE); + } + + /* * if the -a (adopt) option was specified, try to adopt the * kernel-managed interface before we start. Our grandparent * will be waiting for us to finish this, so signal him when @@ -844,3 +862,164 @@ load_option: return; } } + +/* + * check_rtm_addr(): determine if routing socket message matches interface + * address + * + * input: struct if_msghdr *: pointer to routing socket message + * struct in_addr: IP address + * output: boolean_t + */ +static boolean_t +check_rtm_addr(struct ifa_msghdr *ifam, int msglen, struct in_addr addr) +{ + char *cp, *lim; + uint_t flag; + struct sockaddr *sa; + struct sockaddr_in *sinp; + + if (!(ifam->ifam_addrs & RTA_IFA)) + return (B_FALSE); + + cp = (char *)(ifam + 1); + lim = (char *)ifam + msglen; + for (flag = 1; flag < RTA_IFA; flag <<= 1) { + if (ifam->ifam_addrs & flag) { + /* LINTED: alignment */ + sa = (struct sockaddr *)cp; + if ((char *)(sa + 1) > lim) + return (B_FALSE); + switch (sa->sa_family) { + case AF_UNIX: + cp += sizeof (struct sockaddr_un); + break; + case AF_INET: + cp += sizeof (struct sockaddr_in); + break; + case AF_LINK: + cp += sizeof (struct sockaddr_dl); + break; + case AF_INET6: + cp += sizeof (struct sockaddr_in6); + break; + default: + cp += sizeof (struct sockaddr); + break; + } + } + } + /* LINTED: alignment */ + sinp = (struct sockaddr_in *)cp; + if ((char *)(sinp + 1) > lim) + return (B_FALSE); + return (sinp->sin_addr.s_addr == addr.s_addr); +} + +/* + * rtsock_event(): fetches routing socket messages and updates internal + * interface state based on those messages. + * + * input: iu_eh_t *: unused + * int: the routing socket file descriptor + * (other arguments unused) + * output: void + */ + +/* ARGSUSED */ +static void +rtsock_event(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg) +{ + struct ifslist *ifs; + union { + struct ifa_msghdr ifam; + char buf[1024]; + } msg; + uint16_t ifindex; + struct lifreq lifr; + char *fail; + int msglen; + + if ((msglen = read(fd, &msg, sizeof (msg))) <= 0) + return; + + /* + * These are the messages that can identify a particular logical + * interface by local IP address. + */ + if (msg.ifam.ifam_type != RTM_DELADDR && + msg.ifam.ifam_type != RTM_NEWADDR) + return; + + /* Note that ifam_index is just 16 bits */ + ifindex = msg.ifam.ifam_index; + + for (ifs = lookup_ifs_by_uindex(ifindex, NULL); + ifs != NULL; + ifs = lookup_ifs_by_uindex(ifindex, ifs)) { + + /* + * The if_sock_ip_fd is set to a non-negative integer by + * configure_bound(). If it's negative, then DHCP doesn't + * think we're bound. + * + * For pre-bound interfaces, we want to check to see if the + * IFF_UP bit has been reported. This means that DAD is + * complete. + */ + if (ifs->if_sock_ip_fd == -1 && ifs->if_state != PRE_BOUND) + continue; + + /* + * Since we cannot trust the flags reported by the routing + * socket (they're just 32 bits -- and thus never include + * IFF_DUPLICATE), and we can't trust the ifindex (it's only 16 + * bits and also doesn't reflect the alias in use), we get + * flags on all matching interfaces, and go by that. + */ + (void) strlcpy(lifr.lifr_name, ifs->if_name, + sizeof (lifr.lifr_name)); + if (ioctl(ifs->if_sock_fd, SIOCGLIFFLAGS, &lifr) == -1) { + fail = "unable to retrieve interface flags"; + } else if (!check_rtm_addr(&msg.ifam, msglen, ifs->if_addr)) { + /* + * If the message is not about this logical interface, + * then just ignore it. + */ + continue; + } else if (lifr.lifr_flags & IFF_DUPLICATE) { + fail = "interface has duplicate address"; + } else { + /* + * If we're now up and we were waiting for that, then + * kick off this interface. DAD is done. + */ + if ((lifr.lifr_flags & IFF_UP) && + ifs->if_state == PRE_BOUND) + dhcp_bound_complete(ifs); + + continue; + } + + if (ifs->if_sock_ip_fd != -1) { + (void) close(ifs->if_sock_ip_fd); + ifs->if_sock_ip_fd = -1; + } + dhcpmsg(MSG_ERROR, fail); + + /* + * The binding has evidently failed, so it's as though it never + * happened. We need to do switch back to PRE_BOUND state so + * that send_pkt_internal() uses DLPI instead of sockets. Our + * logical interface has already been torn down by the kernel, + * and thus we can't send DHCPDECLINE by way of regular IP. + */ + ifs->if_state = PRE_BOUND; + + if (ifs->if_ack->opts[CD_DHCP_TYPE] != NULL) + send_decline(ifs, fail, &ifs->if_addr); + + ifs->if_bad_offers++; + dhcp_restart(ifs); + } +} diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.h index 281f376c96..b9bafe59bb 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.h +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/agent.h @@ -50,7 +50,7 @@ extern "C" { * timer id of the global inactivity timer, which shuts down the agent * if there are no interfaces to manage for DHCP_INACTIVITY_WAIT * seconds. `grandparent' is the pid of the original process when in - * adopt mode. + * adopt mode. `rtsock_fd' is the global routing socket file descriptor. */ extern iu_tq_t *tq; @@ -59,6 +59,7 @@ extern char *class_id; extern int class_id_len; extern iu_timer_id_t inactivity_id; extern pid_t grandparent; +extern int rtsock_fd; boolean_t drain_script(iu_eh_t *, void *); diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.c deleted file mode 100644 index f4925468d8..0000000000 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1999 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#pragma ident "%W% %E% SMI" - -#include <sys/types.h> -#include <sys/socket.h> -#include <net/if.h> -#include <poll.h> -#include <netinet/in.h> -#include <netinet/if_ether.h> -#include <net/if_arp.h> -#include <sys/dlpi.h> -#include <stddef.h> -#include <string.h> -#include <unistd.h> -#include <stdlib.h> -#include <sys/pfmod.h> -#include <dhcpmsg.h> -#include <stddef.h> - -#include "defaults.h" -#include "util.h" -#include "interface.h" -#include "dlpi_io.h" -#include "arp_check.h" - -/* - * the struct arp_info is used by arp_reply_filter() to build a filter - * that only receives replies from the ARPed IP address. - */ - -struct arp_info { - - uchar_t send_addr_offset; /* from start of ARP frame */ - in_addr_t send_addr; /* arped IP address */ -}; - -/* - * arp_reply_filter(): builds a filter that permits ARP replies to our request - * - * input: ushort_t *: a place to store the packet filter code - * void *: a struct arp_info containing the requested IP address - * output: ushort_t *: two bytes past the last byte of the filter - */ - -static ushort_t * -arp_reply_filter(ushort_t *pfp, void *arg) -{ - struct arp_info *ai = (struct arp_info *)arg; - - *pfp++ = ENF_PUSHWORD + (offsetof(struct arphdr, ar_op) / 2); - *pfp++ = ENF_PUSHLIT | ENF_EQ; - *pfp++ = htons(ARPOP_REPLY); - - /* - * make sure this ARP reply is from the target IP address, - * which will be the "sender" IP address in the reply (even in - * the case of proxy ARP). the position of sender IP address - * depends on the link layer; so we can be link-layer - * independent, these values are calculated in arp_check(). - * - * the byteorder issues here are *really* subtle. suppose - * that the network address is 0x11223344 (as stored in the - * packet read off the wire) by an intel machine. then notice - * that since the packet filter operates 16 bits at a time - * that the high-order word will load as 0x2211 and the - * low-order word will load as 0x4433. so send_addr has the - * register value 0x44332211 on intel since that will store to - * the network address 0x11223344 in memory. thus, to compare - * the low-order word, we must first ntohl() send_addr, which - * changes its register-value to 0x11223344, and then mask - * off the high-order bits, getting 0x3344, and then convert - * that to network order, getting 0x4433, which is what we - * want. the same logic applies to the high-order word. you - * are not expected to understand this. - */ - - *pfp++ = ENF_PUSHWORD + (ai->send_addr_offset / 2) + 1; - *pfp++ = ENF_PUSHLIT | ENF_EQ; - *pfp++ = htons(ntohl(ai->send_addr) & 0xffff); - *pfp++ = ENF_AND; - - *pfp++ = ENF_PUSHWORD + (ai->send_addr_offset / 2); - *pfp++ = ENF_PUSHLIT | ENF_EQ; - *pfp++ = htons(ntohl(ai->send_addr) >> 16); - *pfp++ = ENF_AND; - - return (pfp); -} - -/* - * arp_check(): checks to see if a given IP address is already in use - * - * input: struct ifslist *: the interface to send the ARP request on - * in_addr_t: the IP address to send from, network order - * in_addr_t: the IP address to check on, network order - * uchar_t *: a scratch buffer that holds the hardware address - * of the machine that replied to our ARP request, - * if there was one. - * uint32_t: the length of the buffer - * uint32_t: how long to wait for an ARP reply, in milliseconds - * output: int: 1 if the IP address is in use, 0 if not in use. - */ - -int -arp_check(struct ifslist *ifsp, in_addr_t send_addr, in_addr_t target_addr, - uchar_t *target_hwaddr, uint32_t target_hwlen, uint32_t timeout_msec) -{ - uint32_t buf[DLPI_BUF_MAX / sizeof (uint32_t)]; - dl_info_ack_t *dlia = (dl_info_ack_t *)buf; - int fd; - struct arphdr *arp_pkt = NULL; - uchar_t *arp_daddr = NULL; - caddr_t arp_payload; - uchar_t arp_dlen; - size_t offset; - struct pollfd pollfd; - int retval; - struct arp_info ai; - unsigned int arp_pkt_len; - - fd = dlpi_open(ifsp->if_name, dlia, sizeof (buf), ETHERTYPE_ARP); - if (fd == -1) - goto failure; - - /* - * the packet consists of an ARP header, two IP addresses - * and two hardware addresses (each ifsp->if_hwlen bytes long). - */ - - arp_pkt_len = sizeof (struct arphdr) + (sizeof (ipaddr_t) * 2) + - (ifsp->if_hwlen * 2); - - arp_pkt = malloc(arp_pkt_len); - arp_daddr = build_broadcast_dest(dlia, &arp_dlen); - if (arp_pkt == NULL || arp_daddr == NULL) - goto failure; - - (void) memset(arp_pkt, 0xff, arp_pkt_len); - - arp_pkt->ar_hrd = htons(ifsp->if_hwtype); - arp_pkt->ar_pro = htons(ETHERTYPE_IP); - arp_pkt->ar_hln = ifsp->if_hwlen; - arp_pkt->ar_pln = sizeof (ipaddr_t); - arp_pkt->ar_op = htons(ARPOP_REQUEST); - - arp_payload = (caddr_t)&arp_pkt[1]; - (void) memcpy(arp_payload, ifsp->if_hwaddr, ifsp->if_hwlen); - offset = ifsp->if_hwlen; - - /* - * while we're at the appropriate offset for sender IP address, - * store it for use by the packet filter. - */ - - ai.send_addr = target_addr; - ai.send_addr_offset = offset + sizeof (struct arphdr); - - (void) memcpy(&arp_payload[offset], &send_addr, sizeof (ipaddr_t)); - offset += ifsp->if_hwlen + sizeof (ipaddr_t); - (void) memcpy(&arp_payload[offset], &target_addr, sizeof (ipaddr_t)); - - /* - * install the packet filter, send our ARP request, and wait - * for a reply. waiting usually isn't a good idea since the - * design of the agent is nonblocking. however, we can - * tolerate short waits (< 5 seconds). - */ - - set_packet_filter(fd, arp_reply_filter, &ai, "ARP reply"); - - if (dlpi_send_link(fd, arp_pkt, arp_pkt_len, arp_daddr, arp_dlen) == -1) - goto failure; - - pollfd.fd = fd; - pollfd.events = POLLIN; - - retval = poll(&pollfd, 1, timeout_msec); - if (retval > 0 && target_hwaddr != NULL) { - - /* - * try to grab the hardware address. if we fail, we'll - * just end up with some misleading diagnostics. the - * hardware address is at the start of the payload. - */ - - if (dlpi_recv_link(fd, arp_pkt, arp_pkt_len, DLPI_RECV_SHORT) == - arp_pkt_len) - (void) memcpy(target_hwaddr, arp_payload, target_hwlen); - } - - free(arp_daddr); - free(arp_pkt); - (void) close(fd); - return ((retval == 0) ? 0 : 1); - -failure: - free(arp_daddr); - free(arp_pkt); - (void) close(fd); - - if (df_get_bool(ifsp->if_name, DF_IGNORE_FAILED_ARP)) { - dhcpmsg(MSG_WARNING, "arp_check: cannot send ARP request: " - "assuming address is available"); - return (0); - } - - dhcpmsg(MSG_WARNING, "arp_check: cannot send ARP request: " - "assuming address is unavailable"); - return (1); -} diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.h deleted file mode 100644 index c3fff1ba0c..0000000000 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1999 by Sun Microsystems, Inc. - * All rights reserved. - */ - -#ifndef ARP_CHECK_H -#define ARP_CHECK_H - -#pragma ident "%W% %E% SMI" - -#include <sys/types.h> -#include <netinet/in.h> - -#include "interface.h" - -/* - * arp_check.[ch] provide an interface for checking whether a given IP - * address is currently in use. see arp_check.c for documentation on - * how to use the exported function. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -int arp_check(struct ifslist *, in_addr_t, in_addr_t, uchar_t *, - uint32_t, uint32_t); - -#ifdef __cplusplus -} -#endif - -#endif /* ARP_CHECK_H */ diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c index 9033a96f14..0853456b5e 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/bound.c @@ -39,10 +39,7 @@ #include <sys/sysmacros.h> #include <dhcp_hostconf.h> #include <dhcpmsg.h> -#include <stdio.h> /* snprintf */ -#include "defaults.h" -#include "arp_check.h" #include "states.h" #include "packet.h" #include "util.h" @@ -53,6 +50,7 @@ #define IS_DHCP(plp) ((plp)->opts[CD_DHCP_TYPE] != NULL) static int configure_if(struct ifslist *); +static int configure_bound(struct ifslist *); static int configure_timers(struct ifslist *); /* @@ -122,15 +120,26 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack) (void) memcpy(ifsp->if_ack->opts[CD_LEASE_TIME]->value, &new_lease, sizeof (lease_t)); + if (configure_bound(ifsp) == 0) + return (0); + /* * we have no idea when the REQUEST that generated * this ACK was sent, but for diagnostic purposes * we'll assume its close to the current time. */ - ifsp->if_newstart_monosec = monosec(); - /* FALLTHRU into REQUESTING/INIT_REBOOT */ + if (configure_timers(ifsp) == 0) + return (0); + + /* + * if the state is ADOPTING, event loop has not been started + * at this time; so don't run the EVENT_BOUND script. + */ + ifsp->if_curstart_monosec = ifsp->if_newstart_monosec; + ifsp->if_state = BOUND; + break; case REQUESTING: case INIT_REBOOT: @@ -142,21 +151,19 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack) return (0); /* - * if the state is ADOPTING, event loop has not been started - * at this time, so don't run the script. + * We will continue configuring this interface via + * dhcp_bound_complete, once kernel DAD completes. */ - - if (ifsp->if_state != ADOPTING) { - (void) script_start(ifsp, EVENT_BOUND, bound_event_cb, - NULL, NULL); - } - + ifsp->if_state = PRE_BOUND; break; + case PRE_BOUND: + /* This is just a duplicate ack; silently ignore it */ + return (1); + case RENEWING: case REBINDING: case BOUND: - cur_lease = ifsp->if_lease; if (configure_timers(ifsp) == 0) return (0); @@ -192,6 +199,8 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack) (void) script_start(ifsp, EVENT_EXTEND, bound_event_cb, NULL, NULL); + ifsp->if_state = BOUND; + ifsp->if_curstart_monosec = ifsp->if_newstart_monosec; break; case INFORM_SENT: @@ -206,11 +215,6 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack) return (0); } - if (ifsp->if_state != INFORMATION) { - ifsp->if_state = BOUND; - ifsp->if_curstart_monosec = ifsp->if_newstart_monosec; - } - /* * remove any stale hostconf file that might be lying around for * this interface. (in general, it's harmless, since we'll write a @@ -222,13 +226,32 @@ dhcp_bound(struct ifslist *ifsp, PKT_LIST *ack) } /* + * dhcp_bound_complete(): complete interface configuration after DAD + * + * input: struct ifslist *: the interface to configure + * output: none + */ + +void +dhcp_bound_complete(struct ifslist *ifsp) +{ + if (configure_bound(ifsp) == 0) + return; + + (void) script_start(ifsp, EVENT_BOUND, bound_event_cb, NULL, NULL); + + ifsp->if_state = BOUND; + ifsp->if_curstart_monosec = ifsp->if_newstart_monosec; +} + +/* * configure_timers(): configures the lease timers on an interface * * input: struct ifslist *: the interface to configure (with a valid if_ack) * output: int: 1 on success, 0 on failure */ -int +static int configure_timers(struct ifslist *ifsp) { lease_t lease, t1, t2; @@ -313,10 +336,6 @@ configure_if(struct ifslist *ifsp) struct ifreq ifr; struct sockaddr_in *sin; PKT_LIST *ack = ifsp->if_ack; - DHCP_OPT *router_list; - uchar_t *target_hwaddr; - int i; - char in_use[256] = "IP address already in use by"; /* * if we're using DHCP, then we'll have a valid CD_SERVER_ID @@ -331,27 +350,6 @@ configure_if(struct ifslist *ifsp) (void) memcpy(&ifsp->if_server.s_addr, ack->opts[CD_SERVER_ID]->value, sizeof (ipaddr_t)); - /* no big deal if this fails; we'll just have less diagnostics */ - target_hwaddr = malloc(ifsp->if_hwlen); - - if (arp_check(ifsp, 0, ack->pkt->yiaddr.s_addr, target_hwaddr, - ifsp->if_hwlen, df_get_int(ifsp->if_name, DF_ARP_WAIT)) == 1) { - - for (i = 0; i < ifsp->if_hwlen; i++) - (void) snprintf(in_use, sizeof (in_use), "%s %02x", - in_use, target_hwaddr[i]); - - dhcpmsg(MSG_ERROR, in_use); - - if (ifsp->if_ack->opts[CD_DHCP_TYPE] != NULL) - send_decline(ifsp, in_use, &ack->pkt->yiaddr); - - ifsp->if_bad_offers++; - free(target_hwaddr); - return (0); - } - free(target_hwaddr); - ifsp->if_addr.s_addr = ack->pkt->yiaddr.s_addr; if (ifsp->if_addr.s_addr == htonl(INADDR_ANY)) { dhcpmsg(MSG_ERROR, "configure_if: got invalid IP address"); @@ -498,6 +496,24 @@ configure_if(struct ifslist *ifsp) ifsp->if_broadcast = sin->sin_addr; dhcpmsg(MSG_INFO, "using broadcast address %s on %s", inet_ntoa(ifsp->if_broadcast), ifsp->if_name); + return (1); +} + +/* + * configure_bound(): configures routing with DHCP parameters from an ACK, + * and sets up the if_sock_ip_fd socket used for lease + * renewal. + * + * input: struct ifslist *: the interface to configure (with a valid if_ack) + * output: int: 1 on success, 0 on failure + */ + +static int +configure_bound(struct ifslist *ifsp) +{ + PKT_LIST *ack = ifsp->if_ack; + DHCP_OPT *router_list; + int i; /* * add each provided router; we'll clean them up when the @@ -510,7 +526,7 @@ configure_if(struct ifslist *ifsp) ifsp->if_nrouters = router_list->len / sizeof (ipaddr_t); ifsp->if_routers = malloc(router_list->len); if (ifsp->if_routers == NULL) { - dhcpmsg(MSG_ERR, "configure_if: cannot allocate " + dhcpmsg(MSG_ERR, "configure_bound: cannot allocate " "default router list, ignoring default routers"); ifsp->if_nrouters = 0; } @@ -523,7 +539,7 @@ configure_if(struct ifslist *ifsp) if (add_default_route(ifsp->if_name, &ifsp->if_routers[i]) == 0) { - dhcpmsg(MSG_ERR, "configure_if: cannot add " + dhcpmsg(MSG_ERR, "configure_bound: cannot add " "default router %s on %s", inet_ntoa( ifsp->if_routers[i]), ifsp->if_name); ifsp->if_routers[i].s_addr = htonl(INADDR_ANY); @@ -537,14 +553,14 @@ configure_if(struct ifslist *ifsp) ifsp->if_sock_ip_fd = socket(AF_INET, SOCK_DGRAM, 0); if (ifsp->if_sock_ip_fd == -1) { - dhcpmsg(MSG_ERR, "configure_if: cannot create socket on %s", + dhcpmsg(MSG_ERR, "configure_bound: cannot create socket on %s", ifsp->if_name); return (0); } if (bind_sock(ifsp->if_sock_ip_fd, IPPORT_BOOTPC, ntohl(ifsp->if_addr.s_addr)) == 0) { - dhcpmsg(MSG_ERR, "configure_if: cannot bind socket on %s", + dhcpmsg(MSG_ERR, "configure_bound: cannot bind socket on %s", ifsp->if_name); return (0); } @@ -558,8 +574,8 @@ configure_if(struct ifslist *ifsp) */ if (bind_sock(ifsp->if_sock_fd, IPPORT_BOOTPC, INADDR_BROADCAST) == 0) { - dhcpmsg(MSG_ERR, "configure_if: cannot bind broadcast socket " - "on %s", ifsp->if_name); + dhcpmsg(MSG_ERR, "configure_bound: cannot bind broadcast " + "socket on %s", ifsp->if_name); return (0); } @@ -573,6 +589,6 @@ configure_if(struct ifslist *ifsp) if (ack->opts[CD_DHCP_TYPE] == NULL) ifsp->if_dflags |= DHCP_IF_BOOTP; - dhcpmsg(MSG_DEBUG, "configure_if: bound ifsp->if_sock_ip_fd"); + dhcpmsg(MSG_DEBUG, "configure_bound: bound ifsp->if_sock_ip_fd"); return (1); } diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c index 6edf5d6da5..7800163061 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -55,9 +54,9 @@ struct dhcp_default { static struct dhcp_default defaults[] = { { "RELEASE_ON_SIGTERM", "0", 0, 0 }, - { "IGNORE_FAILED_ARP", "1", 0, 0 }, + { "IGNORE_FAILED_ARP", "1", 0, -1 }, { "OFFER_WAIT", "3", 1, 20 }, - { "ARP_WAIT", "1000", 100, 4000 }, + { "ARP_WAIT", "1000", 0, -1 }, { "CLIENT_ID", NULL, 0, 0 }, { "PARAM_REQUEST_LIST", NULL, 0, 0 }, { "REQUEST_HOSTNAME", "1", 0, 0 } @@ -78,6 +77,7 @@ df_build_cache(void) char *param, *value, *end; FILE *fp; nvlist_t *nvlist; + struct dhcp_default *defp; if ((fp = fopen(DHCP_AGENT_DEFAULTS, "r")) == NULL) return (NULL); @@ -112,6 +112,18 @@ df_build_cache(void) else param++; + for (defp = defaults; + (char *)defp < (char *)defaults + sizeof (defaults); + defp++) { + if (strcasecmp(param, defp->df_name) == 0) { + if (defp->df_max == -1) { + dhcpmsg(MSG_WARNING, "parameter %s is " + "obsolete; ignored", defp->df_name); + } + break; + } + } + for (; *param != '\0'; param++) *param = toupper(*param); diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.h index 4d58c2072d..95e14cfa34 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.h +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,8 +19,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 1999-2001 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #ifndef DEFAULTS_H @@ -48,9 +47,9 @@ extern "C" { enum { DF_RELEASE_ON_SIGTERM, /* send RELEASE on each if upon SIGTERM */ - DF_IGNORE_FAILED_ARP, /* what to do if agent can't ARP */ + _UNUSED_DF_IGNORE_FAILED_ARP, DF_OFFER_WAIT, /* how long to wait to collect offers */ - DF_ARP_WAIT, /* how long to wait for an ARP reply */ + _UNUSED_DF_ARP_WAIT, DF_CLIENT_ID, /* our client id */ DF_PARAM_REQUEST_LIST, /* our parameter request list */ DF_REQUEST_HOSTNAME /* request hostname associated with interface */ diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.dfl b/usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.dfl index 4299f09136..406882c77f 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.dfl +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.dfl @@ -1,14 +1,9 @@ -#ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. # # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -25,6 +20,13 @@ # # +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +# # This file contains tunable parameters for dhcpagent(1M). # @@ -41,15 +43,6 @@ # # RELEASE_ON_SIGTERM=yes -# When the DHCP agent gets an ACK from the server, it sends an ARP -# request to verify that a given IP address is not already in use. If -# an ARP reply is received, the DHCP agent declines the server's -# offer. However, if the DHCP agent is unable to send the ARP request -# packet for whatever reason, it assumes the address is available. To -# be more cautious, uncomment the following parameter-value pair. -# -# IGNORE_FAILED_ARP=no - # By default, the DHCP agent waits 3 seconds to collect OFFER # responses to a DISCOVER. If it receives no OFFERs in this time, it # then waits for another 3 seconds, and so forth. To change this @@ -60,13 +53,6 @@ # # OFFER_WAIT= -# By default, the DHCP agent waits 1000 milliseconds to collect ARP -# replies to an ARP request when verifying that an IP address is not -# in use. To change this behavior, set and uncomment the following -# parameter-value pair. This parameter is specified in milliseconds. -# -# ARP_WAIT= - # By default, the DHCP agent does not send out a client identifier # (and hence, the chaddr field is used by the DHCP server as the # client identifier.) To make the DHCP agent send a client diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c index bdad88a741..d88f52f38b 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.c @@ -191,6 +191,16 @@ insert_ifs(const char *if_name, boolean_t is_adopting, int *error) /* step 6 */ (void) strlcpy(ifr.ifr_name, if_name, IFNAMSIZ); + if (ioctl(ifsp->if_sock_fd, SIOCGIFINDEX, &ifr) == -1) { + if (errno == ENXIO) + *error = DHCP_IPC_E_INVIF; + else + *error = DHCP_IPC_E_INT; + dhcpmsg(MSG_ERR, "insert_ifs: SIOCGIFINDEX for %s", if_name); + goto failure; + } + ifsp->if_index = ifr.ifr_index; + if (ioctl(ifsp->if_sock_fd, SIOCGIFFLAGS, &ifr) == -1) { if (errno == ENXIO) *error = DHCP_IPC_E_INVIF; @@ -564,6 +574,36 @@ lookup_ifs_by_xid(uint32_t xid) } /* + * lookup_ifs_by_uindex(): Looks up ifs entries given truncated index and + * previous ifs pointer (or NULL for list start). + * Caller is expected to iterate through all + * potential matches to find interface of interest. + * + * input: int: the interface index + * struct ifslist *: the previous ifs, or NULL for list start + * output: struct ifslist *: the next matching ifs, or NULL if not found + * note: This operates using the 'truncated' (16-bit) ifindex as seen by + * routing socket clients. The value stored in if_index is the + * 32-bit ifindex from the ioctl interface. + */ + +struct ifslist * +lookup_ifs_by_uindex(uint16_t ifindex, struct ifslist *ifs) +{ + if (ifs == NULL) + ifs = ifsheadp; + else + ifs = ifs->next; + + for (; ifs != NULL; ifs = ifs->next) { + if ((ifs->if_index & 0xffff) == ifindex) + break; + } + + return (ifs); +} + +/* * remove_ifs(): removes a given ifs from the ifslist. marks the ifs * for being freed (but may not actually free it). * @@ -772,6 +812,16 @@ verify_ifs(struct ifslist *ifsp) goto abandon; break; } + switch (ioctl(ifsp->if_sock_fd, SIOCGIFINDEX, &ifr)) { + case 0: + if (ifr.ifr_index != ifsp->if_index) + goto abandon; + break; + case -1: + if (errno == ENXIO) + goto abandon; + break; + } /* FALLTHRU */ case INIT_REBOOT: diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h index b2fad40fb1..24b4598e31 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/interface.h @@ -112,6 +112,8 @@ struct ifslist { char if_name[IFNAMSIZ]; + uint32_t if_index; /* interface index */ + uint16_t if_max; /* largest DHCP packet on this if */ uint16_t if_min; /* minimum mtu size on this if */ uint16_t if_opt; /* amount of space for options in PKT */ @@ -367,6 +369,7 @@ void hold_ifs(struct ifslist *); struct ifslist *insert_ifs(const char *, boolean_t, int *); struct ifslist *lookup_ifs(const char *); struct ifslist *lookup_ifs_by_xid(uint32_t); +struct ifslist *lookup_ifs_by_uindex(uint16_t, struct ifslist *); void nuke_ifslist(boolean_t); void refresh_ifslist(iu_eh_t *, int, void *); int release_ifs(struct ifslist *); diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c index 8613a41245..751e6be5ac 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/renew.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -204,7 +203,7 @@ dhcp_rebind(iu_tq_t *tqp, void *arg) } /* - * dhcp_restart(): callback function to script_start + * dhcp_restart_lease(): callback function to script_start * * input: struct ifslist *: the interface to be restarted * const char *: unused @@ -213,7 +212,7 @@ dhcp_rebind(iu_tq_t *tqp, void *arg) /* ARGSUSED */ static int -dhcp_restart(struct ifslist *ifsp, const char *msg) +dhcp_restart_lease(struct ifslist *ifsp, const char *msg) { dhcpmsg(MSG_INFO, "lease expired on %s -- restarting DHCP", ifsp->if_name); @@ -292,7 +291,7 @@ dhcp_expire(iu_tq_t *tqp, void *arg) dhcpmsg(MSG_WARNING, "dhcp_expire: cannot start asynchronous " "transaction on %s, continuing...", ifsp->if_name); - (void) script_start(ifsp, EVENT_EXPIRE, dhcp_restart, NULL, NULL); + (void) script_start(ifsp, EVENT_EXPIRE, dhcp_restart_lease, NULL, NULL); } /* diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c index 3e8e66572c..a4f418059a 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c @@ -37,7 +37,6 @@ #include <dhcp_hostconf.h> #include <arpa/inet.h> #include <string.h> -#include <stdlib.h> #include <unistd.h> #include <dhcpmsg.h> @@ -46,10 +45,8 @@ #include "packet.h" #include "interface.h" #include "agent.h" -#include "defaults.h" static PKT_LIST *select_best(PKT_LIST **); -static void restart_dhcp(struct ifslist *); static stop_func_t stop_requesting; /* @@ -139,7 +136,7 @@ dhcp_requesting(iu_tq_t *tqp, void *arg) if (dhcp_bound(ifsp, offer) == 0) { dhcpmsg(MSG_WARNING, "dhcp_requesting: dhcp_bound " "failed for %s", ifsp->if_name); - restart_dhcp(ifsp); + dhcp_restart(ifsp); return; } @@ -408,7 +405,7 @@ dhcp_acknak(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg) ifsp->if_name); ifsp->if_bad_offers++; free_pkt_list(&plp); - restart_dhcp(ifsp); + dhcp_restart(ifsp); /* * remove any bogus cached configuration we might have @@ -426,7 +423,7 @@ dhcp_acknak(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg) "restarting DHCP on %s", ifsp->if_name); ifsp->if_bad_offers++; free_pkt_list(&plp); - restart_dhcp(ifsp); + dhcp_restart(ifsp); return; } @@ -436,7 +433,7 @@ dhcp_acknak(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg) if (dhcp_bound(ifsp, plp) == 0) { dhcpmsg(MSG_WARNING, "dhcp_acknak: dhcp_bound failed " "for %s", ifsp->if_name); - restart_dhcp(ifsp); + dhcp_restart(ifsp); return; } @@ -444,14 +441,14 @@ dhcp_acknak(iu_eh_t *ehp, int fd, short events, iu_event_id_t id, void *arg) } /* - * restart_dhcp(): restarts DHCP (from INIT) on a given interface + * dhcp_restart(): restarts DHCP (from INIT) on a given interface * * input: struct ifslist *: the interface to restart DHCP on * output: void */ -static void -restart_dhcp(struct ifslist *ifsp) +void +dhcp_restart(struct ifslist *ifsp) { if (iu_schedule_timer(tq, DHCP_RESTART_WAIT, dhcp_start, ifsp) == -1) { @@ -461,7 +458,7 @@ restart_dhcp(struct ifslist *ifsp) ipc_action_finish(ifsp, DHCP_IPC_E_MEMORY); async_finish(ifsp); - dhcpmsg(MSG_ERROR, "restart_dhcp: cannot schedule dhcp_start, " + dhcpmsg(MSG_ERROR, "dhcp_restart: cannot schedule dhcp_start, " "reverting to INIT state on %s", ifsp->if_name); } else hold_ifs(ifsp); diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.h b/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.h index 74190cdd13..32f880762a 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.h +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/states.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -49,6 +48,7 @@ extern "C" { void dhcp_acknak(iu_eh_t *, int, short, iu_event_id_t, void *); int dhcp_adopt(void); int dhcp_bound(struct ifslist *, PKT_LIST *); +void dhcp_bound_complete(struct ifslist *); int dhcp_drop(struct ifslist *, const char *); void dhcp_expire(iu_tq_t *, void *); int dhcp_extending(struct ifslist *); @@ -58,11 +58,11 @@ void dhcp_rebind(iu_tq_t *, void *); int dhcp_release(struct ifslist *, const char *); void dhcp_renew(iu_tq_t *, void *); void dhcp_requesting(iu_tq_t *, void *); +void dhcp_restart(struct ifslist *); void dhcp_selecting(struct ifslist *); void dhcp_start(iu_tq_t *, void *); void send_decline(struct ifslist *, char *, struct in_addr *); - #ifdef __cplusplus } #endif diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/util.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/util.c index 481d867bad..ff2d574053 100644 --- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/util.c +++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/util.c @@ -33,9 +33,7 @@ #include <netinet/dhcp.h> #include <signal.h> #include <sys/dlpi.h> -#include <sys/sockio.h> #include <sys/socket.h> -#include <errno.h> #include <net/route.h> #include <net/if_arp.h> #include <string.h> @@ -50,7 +48,6 @@ #include "interface.h" #include "util.h" #include "packet.h" -#include "defaults.h" /* * this file contains utility functions that have no real better home @@ -395,7 +392,6 @@ static int update_default_route(const char *ifname, int type, struct in_addr *gateway_nbo, int flags) { - static int rtsock_fd = -1; struct { struct rt_msghdr rm_mh; struct sockaddr_in rm_dst; @@ -404,15 +400,6 @@ update_default_route(const char *ifname, int type, struct in_addr *gateway_nbo, struct sockaddr_dl rm_ifp; } rtmsg; - if (rtsock_fd == -1) { - rtsock_fd = socket(PF_ROUTE, SOCK_RAW, 0); - if (rtsock_fd == -1) { - dhcpmsg(MSG_ERR, "update_default_route: " - "cannot create routing socket"); - return (0); - } - } - (void) memset(&rtmsg, 0, sizeof (rtmsg)); rtmsg.rm_mh.rtm_version = RTM_VERSION; rtmsg.rm_mh.rtm_msglen = sizeof (rtmsg); diff --git a/usr/src/cmd/cmd-inet/usr.bin/Makefile b/usr/src/cmd/cmd-inet/usr.bin/Makefile index a982e6a334..aa203a2216 100644 --- a/usr/src/cmd/cmd-inet/usr.bin/Makefile +++ b/usr/src/cmd/cmd-inet/usr.bin/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -19,8 +18,7 @@ # # CDDL HEADER END # -# -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -38,13 +36,13 @@ MSGSUBDIRS= nca talk # As programs get lint-clean, add them here. Eventually. # This hack should go away, and all in PROG should be lint-clean. -LINTCLEAN= rlogin.c rsh.c rcp.c rdate.c +LINTCLEAN= rlogin.c rsh.c rcp.c rdate.c rwho.c # Likewise, as subdirs get lint-clean, add them here. Once # they're all clean, replace the dependency of the lint target # with SUBDIRS. Also (sigh) deal with the commented-out build lines # for the lint rule. -LINTSUBDIRS= nca netstat pppd tftp +LINTSUBDIRS= nca netstat pppd pppstats tftp include ../../Makefile.cmd include ../Makefile.cmd-inet diff --git a/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c b/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c index 8218eac44d..0f15eb05f5 100644 --- a/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c +++ b/usr/src/cmd/cmd-inet/usr.bin/netstat/netstat.c @@ -73,7 +73,6 @@ #include <net/if.h> #include <net/route.h> -#include <inet/common.h> #include <inet/mib2.h> #include <inet/ip.h> #include <inet/arp.h> @@ -3588,17 +3587,21 @@ arp_report(mib_item_t *item) (void) puts(v4compat ? "Net to Media Table" : "Net to Media Table: IPv4"); - (void) fputs("Device " - "IP Address Mask ", - stdout); - (void) puts("Flags Phys Addr "); - (void) puts("------ -------------------- " - "--------------- ----- ---------------"); + (void) puts("Device " + " IP Address Mask " + "Flags Phys Addr"); + (void) puts("------ " + "-------------------- --------------- " + "-------- ---------------"); first = B_FALSE; } flbuf[0] = '\0'; flags = np->ipNetToMediaInfo.ntm_flags; + /* + * Note that not all flags are possible at the same + * time. Patterns: SPLAy DUo + */ if (flags & ACE_F_PERMANENT) (void) strcat(flbuf, "S"); if (flags & ACE_F_PUBLISH) @@ -3609,7 +3612,17 @@ arp_report(mib_item_t *item) (void) strcat(flbuf, "U"); if (flags & ACE_F_MAPPING) (void) strcat(flbuf, "M"); - (void) printf("%-6s %-20s %-15s %-5s %s\n", + if (flags & ACE_F_MYADDR) + (void) strcat(flbuf, "L"); + if (flags & ACE_F_UNVERIFIED) + (void) strcat(flbuf, "d"); + if (flags & ACE_F_AUTHORITY) + (void) strcat(flbuf, "A"); + if (flags & ACE_F_OLD) + (void) strcat(flbuf, "o"); + if (flags & ACE_F_DELAYED) + (void) strcat(flbuf, "y"); + (void) printf("%-6s %-20s %-15s %-8s %s\n", octetstr(&np->ipNetToMediaIfIndex, 'a', ifname, sizeof (ifname)), pr_addr(np->ipNetToMediaNetAddress, diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/Makefile b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/Makefile index b760270924..c7225e3e61 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/Makefile +++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -19,17 +18,16 @@ # # CDDL HEADER END # -# -#pragma ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # +# ident "%Z%%M% %I% %E% SMI" +# # cmd/cmd-inet/usr.lib/in.ndpd/Makefile # PROG= in.ndpd -OBJS= config.o main.o ndp.o tables.o trace.o dupl_addr.o +OBJS= config.o main.o ndp.o tables.o trace.o SRCS= $(OBJS:%.o=%.c) include ../../../Makefile.cmd @@ -48,6 +46,14 @@ $(OBJS) := CPPFLAGS += $(_D_UNIX98_EXTN) LINTFLAGS += $(_D_UNIX98_EXTN) +# This shouldn't be necessary, but linking with libxnet requires it. +LINTFLAGS += -erroff=E_INCONS_ARG_DECL2 -erroff=E_INCONS_VAL_TYPE_DECL2 + +# This is needed to avoid littering the code with useless LINTED statements +# for each place where pointers are casted. It's not perfect, but being +# perfect would require a bigger rewrite. +LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN + .KEEP_STATE: .PARALLEL: $(OBJS) diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/defs.h b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/defs.h index 26531e4d96..5e16cc1fa2 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/defs.h +++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/defs.h @@ -98,7 +98,6 @@ extern struct sockaddr_dl *rta_ifp; #define D_PKTBAD 0x0400 /* Malformed packet */ #define D_PKTOUT 0x0800 /* Sent packet */ #define D_TMP 0x1000 /* RFC3041 mechanism */ -#define D_DAD 0x2000 /* Duplciate Address Detection */ #define IF_SEPARATOR ':' #define IPV6_MAX_HOPS 255 diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/dupl_addr.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/dupl_addr.c deleted file mode 100644 index 8c81c13901..0000000000 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/dupl_addr.c +++ /dev/null @@ -1,870 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Perform IPv6 duplicate address detection for a given interface - * and IPv6 address. - * - * This file is copied from usr/src/cmd/cmd-inet/usr.sbin/ifconfig. - * Only the modifications necessary to integrate into the message - * scheme of in.ndpd have been made. This is intended to be a - * temporary fix to allow Duplicate Address Detection to be performed - * by in.ndpd for temporary (rfc 3041) addresses; the long-term - * solution will be to use libinetcfg. - */ -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "defs.h" -#include <netinet/icmp6.h> -#include <netinet/in_systm.h> /* For IP_MAXPACKET */ -#include <netinet/ip.h> /* For IP_MAXPACKET */ - -int DupAddrDetectTransmits = 1; -int RetransTimer = ND_RETRANS_TIMER; /* Milliseconds. */ - -#define IPV6_MAX_HOPS 255 - -extern struct in6_addr all_nodes_mcast; - -static void in6_solmulti_addr(struct in6_addr *addr, - struct in6_addr *multi); -static int run_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc, int ifindex); -static int send_dad_probe(int s, char *phyname, - struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc); -static int recv_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, - int ifindex); -static boolean_t verify_opts(struct nd_opt_hdr *opt, int optlen, - struct sockaddr_in6 *from, boolean_t reject_dad_slla); -static void dad_failed(char *phyname, struct sockaddr_in6 *testaddr, - int code); -static void print_na(char *str, char *phyname, - struct nd_neighbor_advert *na, int len, - struct sockaddr_in6 *addr); -static void print_ns(char *str, char *phyname, - struct nd_neighbor_solicit *ns, int len, - struct sockaddr_in6 *addr); -static void print_opt(struct nd_opt_hdr *opt, int len); - - -/* - * Performing duplicate address detection. - * - * Returns 0 if the address is ok, 1 if there is a duplicate, - * and -1 (with errno set) if there is some internal error. - * As a side effect this does a syslog printf identifying any - * duplicate. - * Note that the state of the interface name is unchanged. - */ -int -do_dad(char *ifname, struct sockaddr_in6 *testaddr) -{ - int s; - struct lifreq lifr; - char *cp; - char phyname[LIFNAMSIZ]; - int ifindex; - int64_t saved_flags; - int ret = -1; /* Assume error by default */ - struct sockaddr_in6 solicited_mc; - - /* - * Truncate name at ':'. Needed for SIOCGLIFLNKINFO - * Keep untruncated ifname for other use. - */ - (void) strncpy(phyname, ifname, sizeof (phyname)); - cp = strchr(phyname, ':'); - if (cp != NULL) - *cp = '\0'; - - /* - * Get a socket to use to send and receive neighbor solicitations - * for DAD. Also used for ioctls below. - */ - if ((s = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) < 0) { - logperror("do_dad: socket"); - return (-1); - } - - /* - * Determine interface index (for IPV6_BOUND_PIF) and - * save the flag values so they can be restored on return. - */ - (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name)); - if (ioctl(s, SIOCGLIFINDEX, (caddr_t)&lifr) < 0) { - logperror("do_dad: SIOCGLIFINDEX"); - goto done; - } - ifindex = lifr.lifr_index; - if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { - logperror("do_dad: SIOCGLIFFLAGS"); - goto done; - } - saved_flags = lifr.lifr_flags; - if (!(saved_flags & IFF_MULTICAST)) { - /* Not possible to do DAD. Pretend it is ok */ - ret = 0; - goto done; - } - (void) strncpy(lifr.lifr_name, phyname, sizeof (lifr.lifr_name)); - if (ioctl(s, SIOCGLIFLNKINFO, (caddr_t)&lifr) < 0) { - logperror("do_dad: SIOCGLIFLNKINFO"); - goto done; - } - if (lifr.lifr_ifinfo.lir_reachretrans != 0) { - RetransTimer = lifr.lifr_ifinfo.lir_reachretrans; - } - - /* - * Set NOLOCAL and UP flags. - * This prevents the use of the interface except when the user binds - * to unspecified IPv6 address, and sends to a link local multicast - * address. - */ - lifr.lifr_flags = saved_flags | IFF_NOLOCAL | IFF_UP; - - (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name)); - if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { - logperror("do_dad: SIOCSLIFFLAGS"); - goto restore; - } - - /* - * IPV6_BOUND_PIF prevents load spreading to happen. If we - * just do IPV6_BOUND_IF, the packet can go out on a different - * interface other than "ifindex", if interface is part of - * a group. In that case, we will get back the copy of NS that - * we sent and think it is a duplicate(Switch loops back the - * copy on all interfaces other than the one we sent the packet on). - */ - if (setsockopt(s, IPPROTO_IPV6, IPV6_BOUND_PIF, (char *)&ifindex, - sizeof (ifindex)) < 0) { - logperror("do_dad: IPV6_BOUND_PIF"); - goto restore; - } - - { - int hops = IPV6_MAX_HOPS; - int on = 1; - int off = 0; - - if (setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, - (char *)&hops, sizeof (hops)) < 0) { - logperror("do_dad: IPV6_MULTICAST_HOPS"); - goto restore; - } - if (setsockopt(s, IPPROTO_IPV6, IPV6_UNSPEC_SRC, - (char *)&on, sizeof (on)) < 0) { - logperror("do_dad: IPV6_UNSPEC_SRC"); - goto restore; - } - - if (setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, - (char *)&off, sizeof (off)) < 0) { - logperror("do_dad: IPV6_MULTICAST_LOOP"); - goto restore; - } - - /* Enable receipt of ancillary data */ - if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, - (char *)&on, sizeof (on)) < 0) { - logperror("do_dad: IPV6_RECVHOPLIMIT"); - goto restore; - } - if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, - (char *)&on, sizeof (on)) < 0) { - logperror("do_dad: IPV6_RECVPKTINFO"); - goto restore; - } - if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVRTHDR, - (char *)&on, sizeof (on)) < 0) { - logperror("do_dad: IPV6_RECVRTHDR"); - goto restore; - } - } - - /* - * Extract the address and determine the solicited node multicast - * address to use. - */ - (void) memset(&solicited_mc, 0, sizeof (solicited_mc)); - solicited_mc.sin6_family = AF_INET6; - in6_solmulti_addr(&testaddr->sin6_addr, &solicited_mc.sin6_addr); - - /* Join the solicited node multicast address and all-nodes. */ - { - struct ipv6_mreq v6mcastr; - - v6mcastr.ipv6mr_multiaddr = solicited_mc.sin6_addr; - v6mcastr.ipv6mr_interface = ifindex; - - if (setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP, - (char *)&v6mcastr, sizeof (v6mcastr)) < 0) { - logperror("do_dad: IPV6_JOIN_GROUP"); - goto restore; - } - - v6mcastr.ipv6mr_multiaddr = all_nodes_mcast; - v6mcastr.ipv6mr_interface = ifindex; - - if (setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP, - (char *)&v6mcastr, sizeof (v6mcastr)) < 0) { - logperror("do_dad: IPV6_JOIN_GROUP"); - goto restore; - } - } - - ret = run_dad(s, phyname, testaddr, &solicited_mc, ifindex); - -restore: - /* Restore flags */ - (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name)); - lifr.lifr_flags = saved_flags; - if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { - logperror("do_dad: SIOCSLIFFLAGS"); - ret = -1; - goto done; - } -done: - (void) close(s); - return (ret); -} - - -/* - * Determine the solicited node multicast address for a given address. - */ -static void -in6_solmulti_addr(struct in6_addr *addr, struct in6_addr *multi) -{ - struct in6_addr solicited_prefix = { - { 0xff, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x1, 0xFF, 0x0, 0x0, 0x0 } }; - int i; - - *multi = solicited_prefix; - for (i = 13; i < 16; i++) - multi->s6_addr[i] = addr->s6_addr[i]; -} - -static int -run_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc, int ifindex) -{ - int time_left; /* In milliseconds */ - struct timeval starttime; - struct timeval curtime; - struct pollfd fds; - int i; - int ret; - - if (debug & D_DAD) - logmsg(LOG_DEBUG, "run_dad(%s)\n", phyname); - - /* - * Perform duplicate address detection sequence - * 1. Send a neighbor solicitation with an unspecified source - * address to the solicited node MC address with the testaddr - * being the target. - * 2. Wait for up to RetransTimer milliseconds for either a - * neighbor advertisement (sent to all-nodes) or a DAD neighbor - * solicitation for the testaddr. - * 3. Perform step 1 and 2 DupAddrDetectTransmits times. - */ - - /* XXX perform a random delay: 0 - MAX_RTR_SOLICITATION_DELAY */ - /* XXX use poll+recv logic for the random delay */ - - for (i = 0; i < DupAddrDetectTransmits; i++) { - if (send_dad_probe(s, phyname, testaddr, solicited_mc) < 0) - return (-1); - - /* - * Track time to make sure total wait is RetransTimer - * even though random packet will awake poll. - */ - (void) gettimeofday(&starttime, NULL); - /* CONSTCOND */ - while (1) { - (void) gettimeofday(&curtime, NULL); - time_left = RetransTimer - - (curtime.tv_sec - starttime.tv_sec) * 1000 - - (curtime.tv_usec - starttime.tv_usec) / 1000; - - - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "run_dad: time_left %d ms\n", - time_left); - } - if (time_left <= 0) { - if (debug & D_DAD) - logmsg(LOG_DEBUG, "run_dad: timeout\n"); - break; - } - fds.fd = s; - fds.events = POLLIN; - - switch (poll(&fds, 1, time_left)) { - case -1: - logperror("run_dad: poll"); - return (-1); - case 0: - /* Need loop will break */ - break; - default: - /* Huh? */ - logmsg(LOG_ERR, "poll returns > 1!\n"); - return (-1); - case 1: - if (fds.revents & POLLIN) { - ret = recv_dad(s, phyname, testaddr, - ifindex); - if (ret < 0) - return (-1); - if (ret > 0) { - dad_failed(phyname, testaddr, - ret); - return (1); - } - } - break; - } - } - } - return (0); -} - -/* - * Send a DAD NS packet. Assumes an IPV6_UNSPEC_SRC and an IPV6_BOUND_IF - * have been done by the caller. - */ -static int -send_dad_probe(int s, char *phyname, struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc) -{ - static uint64_t outpack[(IP_MAXPACKET + 1)/8]; - struct nd_neighbor_solicit *ns = (struct nd_neighbor_solicit *)outpack; - int packetlen = 0; - int cc; - - ns->nd_ns_type = ND_NEIGHBOR_SOLICIT; - ns->nd_ns_code = 0; - ns->nd_ns_cksum = 0; - ns->nd_ns_reserved = 0; - ns->nd_ns_target = testaddr->sin6_addr; - packetlen += sizeof (struct nd_neighbor_solicit); - cc = sendto(s, (char *)outpack, packetlen, 0, - (struct sockaddr *)solicited_mc, sizeof (*solicited_mc)); - if (cc < 0 || cc != packetlen) { - char abuf[INET6_ADDRSTRLEN]; - - if (cc < 0) { - logperror("DAD sendto"); - return (-1); - } - if (debug & D_DAD) { - (void) inet_ntop(solicited_mc->sin6_family, - (void *)&solicited_mc->sin6_addr, abuf, - sizeof (abuf)); - - logmsg(LOG_DEBUG, "wrote %s %d chars, ret=%d\n", - abuf, packetlen, cc); - } - return (-1); - } - if (debug & D_DAD) - print_ns("Sent NS", phyname, ns, packetlen, solicited_mc); - - return (0); -} - -/* - * Return a pointer to the specified option buffer. - * If not found return NULL. - */ -static void * -find_ancillary(struct msghdr *msg, int cmsg_type) -{ - struct cmsghdr *cmsg; - - for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(msg, cmsg)) { - if (cmsg->cmsg_level == IPPROTO_IPV6 && - cmsg->cmsg_type == cmsg_type) { - return (CMSG_DATA(cmsg)); - } - } - return (NULL); -} - -/* - * Receive an ICMP packet. If the packet signals a duplicate address for - * testaddr then return a positive non-zero number. Otherwise return zero. - * Internal errors cause a return of -1. - */ -static int -recv_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, int ifindex) -{ - struct sockaddr_in6 from; - struct icmp6_hdr *icmp; - struct nd_neighbor_solicit *ns; - struct nd_neighbor_advert *na; - static uint64_t in_packet[(IP_MAXPACKET + 1)/8]; - static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8]; - int len; - char abuf[INET6_ADDRSTRLEN]; - struct msghdr msg; - struct iovec iov; - uchar_t *opt; - uint_t hoplimit; - struct in6_addr dst; - int rcv_ifindex; - - iov.iov_base = (char *)in_packet; - iov.iov_len = sizeof (in_packet); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_name = (struct sockaddr *)&from; - msg.msg_namelen = sizeof (from); - msg.msg_control = ancillary_data; - msg.msg_controllen = sizeof (ancillary_data); - - if ((len = recvmsg(s, &msg, 0)) < 0) { - logperror("DAD recvmsg"); - return (-1); - } - if (len == 0) - return (0); - - if (debug & D_DAD) { - (void) inet_ntop(AF_INET6, (void *)&from.sin6_addr, - abuf, sizeof (abuf)); - } - /* Ignore packets > 64k or control buffers that don't fit */ - if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "Truncated message: msg_flags " - "0x%x from %s\n", msg.msg_flags, abuf); - } - return (0); - } - - icmp = (struct icmp6_hdr *)in_packet; - - if (len < ICMP6_MINLEN) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "Too short ICMP packet: %d bytes " - "from %s\n", len, abuf); - } - return (0); - } - - opt = find_ancillary(&msg, IPV6_HOPLIMIT); - if (opt == NULL) { - /* Unknown hoplimit - must drop */ - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "Unknown hop limit from %s\n", abuf); - } - return (0); - } - hoplimit = *(uint_t *)opt; - opt = find_ancillary(&msg, IPV6_PKTINFO); - if (opt == NULL) { - /* Unknown destination address - must drop */ - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "Unknown destination from %s\n", - abuf); - } - return (0); - } - dst = ((struct in6_pktinfo *)opt)->ipi6_addr; - rcv_ifindex = ((struct in6_pktinfo *)opt)->ipi6_ifindex; - opt = find_ancillary(&msg, IPV6_RTHDR); - if (opt != NULL) { - /* Can't allow routing headers in ND messages */ - if (debug & D_DAD) { - logmsg(LOG_DEBUG, - "ND message with routing header from %s\n", abuf); - } - return (0); - } - - switch (icmp->icmp6_type) { - case ND_NEIGHBOR_SOLICIT: - /* - * Assumes that the kernel has verified the AH (if present) - * and the ICMP checksum. - */ - if (hoplimit != IPV6_MAX_HOPS) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "NS hop limit: %d from %s\n", - hoplimit, abuf); - } - return (0); - } - - if (icmp->icmp6_code != 0) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "NS code: %d from %s\n", - icmp->icmp6_code, abuf); - } - return (0); - } - - if (len < sizeof (struct nd_neighbor_solicit)) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "NS too short: %d bytes " - "from %s\n", len, abuf); - } - return (0); - } - ns = (struct nd_neighbor_solicit *)icmp; - if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target)) { - if (debug & D_DAD) { - char abuf2[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&ns->nd_ns_target, - abuf2, sizeof (abuf2)); - logmsg(LOG_DEBUG, "NS with multicast target: " - "%s from %s\n", abuf2, abuf); - } - return (0); - } - - if (len > sizeof (struct nd_neighbor_solicit)) { - /* - * For DAD neighbor solicitation type message, - * we need to further verify if SLLA option is present - * in received options, - * so we pass TRUE to reject_dad_slla argument. - */ - if (!verify_opts((struct nd_opt_hdr *)&ns[1], - len - sizeof (struct nd_neighbor_solicit), - &from, _B_TRUE)) - return (0); - } - - if (debug & D_DAD) - print_ns("Received valid NS", phyname, ns, len, &from); - if (!IN6_IS_ADDR_UNSPECIFIED(&from.sin6_addr) || - !IN6_IS_ADDR_MC_SOLICITEDNODE(&dst)) { - /* Sender is doing address resolution */ - return (0); - } - if (rcv_ifindex != ifindex) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "Received Neighbor " - "solicitation on ifindex %d, " - "expecting on %d\n", rcv_ifindex, ifindex); - } - return (0); - } - if (IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr, - &ns->nd_ns_target)) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "NS - duplicate from %s\n", - abuf); - } - return (1); - } - return (0); - - case ND_NEIGHBOR_ADVERT: - /* - * Assumes that the kernel has verified the AH (if present) - * and the ICMP checksum. - */ - if (hoplimit != IPV6_MAX_HOPS) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "NA hop limit: %d from %s\n", - hoplimit, abuf); - } - return (0); - } - - if (icmp->icmp6_code != 0) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "NA code: %d from %s\n", - icmp->icmp6_code, abuf); - } - return (0); - } - - if (len < sizeof (struct nd_neighbor_advert)) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "NA too short: %d bytes " - "from %s\n", len, abuf); - } - return (0); - } - na = (struct nd_neighbor_advert *)icmp; - if (IN6_IS_ADDR_MULTICAST(&na->nd_na_target)) { - if (debug & D_DAD) { - char abuf2[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&na->nd_na_target, - abuf2, sizeof (abuf2)); - logmsg(LOG_DEBUG, "NA with multicast target: " - "%s from %s\n", abuf2, abuf); - } - return (0); - } - - if (IN6_IS_ADDR_MULTICAST(&dst) && - (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { - if (debug & D_DAD) { - char abuf2[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&na->nd_na_target, - abuf2, sizeof (abuf2)); - logmsg(LOG_DEBUG, "NA solicited w/ mc target: " - "%s from %s\n", abuf2, abuf); - } - return (0); - } - - if (len > sizeof (struct nd_neighbor_advert)) { - /* - * Since this is a Neighbor advertisement - * we unset the reject_dad_slla flag, thus - * there is no need to verify the SLLA options. - */ - if (!verify_opts((struct nd_opt_hdr *)&na[1], - len - sizeof (struct nd_neighbor_advert), - &from, _B_FALSE)) - return (0); - } - - if (debug & D_DAD) - print_na("Received valid NA", phyname, na, len, &from); - - if (IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr, - &na->nd_na_target)) { - if (debug & D_DAD) { - logmsg(LOG_DEBUG, "NA - duplicate from %s\n", - abuf); - } - return (1); - } - return (0); - default: - return (0); - } -} - -/* - * Verify that all options have a non-zero length and that - * the options fit within the total length of the packet (optlen). - * If reject_dad_slla is set, we also verify that no SLLA option is present - * as mandated by section 7.1.1 of RFC 2461. - */ -static boolean_t -verify_opts(struct nd_opt_hdr *opt, int optlen, struct sockaddr_in6 *from, - boolean_t reject_dad_slla) -{ - while (optlen > 0) { - if (opt->nd_opt_len == 0) { - if (debug & D_DAD) { - char abuf[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&from->sin6_addr, - abuf, sizeof (abuf)); - - logmsg(LOG_DEBUG, "Zero length option type " - "0x%x from %s\n", opt->nd_opt_type, abuf); - } - return (_B_FALSE); - } - optlen -= 8 * opt->nd_opt_len; - if (optlen < 0) { - if (debug & D_DAD) { - char abuf[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&from->sin6_addr, - abuf, sizeof (abuf)); - - logmsg(LOG_DEBUG, "Too large option: type " - "0x%x len %u from %s\n", - opt->nd_opt_type, opt->nd_opt_len, abuf); - } - return (_B_FALSE); - } - if (reject_dad_slla && - opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR) { - return (_B_FALSE); - } - opt = (struct nd_opt_hdr *)((char *)opt + - 8 * opt->nd_opt_len); - } - return (_B_TRUE); -} - - -static void -dad_failed(char *phyname, struct sockaddr_in6 *testaddr, int code) -{ - char abuf[INET6_ADDRSTRLEN]; - - (void) inet_ntop(testaddr->sin6_family, - (void *)&testaddr->sin6_addr, - abuf, sizeof (abuf)); - logmsg(LOG_CRIT, "Duplicate address detected on link %s for address " - "%s. Code %d\n", phyname, abuf, code); -} - -/* Printing functions */ - -static void -print_ns(char *str, char *phyname, - struct nd_neighbor_solicit *ns, int len, struct sockaddr_in6 *addr) -{ - struct nd_opt_hdr *opt; - char abuf[INET6_ADDRSTRLEN]; - - logmsg(LOG_DEBUG, "%s %s (%d bytes) on %s\n", str, - inet_ntop(addr->sin6_family, (void *)&addr->sin6_addr, - abuf, sizeof (abuf)), - len, phyname); - logmsg(LOG_DEBUG, "\ttarget %s\n", - inet_ntop(addr->sin6_family, (void *)&ns->nd_ns_target, - abuf, sizeof (abuf))); - len -= sizeof (*ns); - opt = (struct nd_opt_hdr *)&ns[1]; - print_opt(opt, len); -} - -static void -print_na(char *str, char *phyname, - struct nd_neighbor_advert *na, int len, struct sockaddr_in6 *addr) -{ - struct nd_opt_hdr *opt; - char abuf[INET6_ADDRSTRLEN]; - - logmsg(LOG_DEBUG, "%s %s (%d bytes) on %s\n", str, - inet_ntop(addr->sin6_family, (void *)&addr->sin6_addr, - abuf, sizeof (abuf)), - len, phyname); - logmsg(LOG_DEBUG, "\ttarget %s\n", - inet_ntop(addr->sin6_family, (void *)&na->nd_na_target, - abuf, sizeof (abuf))); - logmsg(LOG_DEBUG, "\tRouter: %s\n", - (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) ? - "Set" : "Not set"); - logmsg(LOG_DEBUG, "\tSolicited: %s\n", - (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED) ? - "Set" : "Not set"); - logmsg(LOG_DEBUG, "\tOverride: %s\n", - (na->nd_na_flags_reserved & ND_NA_FLAG_OVERRIDE) ? - "Set" : "Not set"); - - len -= sizeof (*na); - opt = (struct nd_opt_hdr *)&na[1]; - print_opt(opt, len); -} - -static void -print_opt(struct nd_opt_hdr *opt, int len) -{ - struct nd_opt_prefix_info *po; - struct nd_opt_mtu *mo; - struct nd_opt_lla *lo; - int optlen; - char abuf[INET6_ADDRSTRLEN]; - char llabuf[BUFSIZ]; - - while (len >= sizeof (struct nd_opt_hdr)) { - optlen = opt->nd_opt_len * 8; - if (optlen == 0) { - logmsg(LOG_DEBUG, "Zero length option!\n"); - break; - } - switch (opt->nd_opt_type) { - case ND_OPT_PREFIX_INFORMATION: - po = (struct nd_opt_prefix_info *)opt; - if (optlen != sizeof (*po) || - optlen > len) - break; - - logmsg(LOG_DEBUG, "\tOn link flag:%s\n", - (po->nd_opt_pi_flags_reserved & - ND_OPT_PI_FLAG_ONLINK) ? "Set" : "Not set"); - logmsg(LOG_DEBUG, "\tAuto addrconf flag:%s\n", - (po->nd_opt_pi_flags_reserved & - ND_OPT_PI_FLAG_AUTO) ? "Set" : "Not set"); - logmsg(LOG_DEBUG, "\tValid time: %u\n", - ntohl(po->nd_opt_pi_valid_time)); - logmsg(LOG_DEBUG, "\tPreferred time: %u\n", - ntohl(po->nd_opt_pi_preferred_time)); - logmsg(LOG_DEBUG, "\tPrefix: %s/%u\n", - inet_ntop(AF_INET6, (void *)&po->nd_opt_pi_prefix, - abuf, sizeof (abuf)), - po->nd_opt_pi_prefix_len); - break; - case ND_OPT_MTU: - mo = (struct nd_opt_mtu *)opt; - if (optlen != sizeof (*mo) || - optlen > len) - break; - logmsg(LOG_DEBUG, "\tMTU: %d\n", - ntohl(mo->nd_opt_mtu_mtu)); - break; - case ND_OPT_SOURCE_LINKADDR: - lo = (struct nd_opt_lla *)opt; - if (optlen < 8 || - optlen > len) - break; - (void) fmt_lla(llabuf, sizeof (llabuf), - (uchar_t *)lo->nd_opt_lla_hdw_addr, optlen - 2); - logmsg(LOG_DEBUG, "\tSource LLA: len %d <%s>\n", - optlen-2, llabuf); - break; - case ND_OPT_TARGET_LINKADDR: - lo = (struct nd_opt_lla *)opt; - if (optlen < 8|| - optlen > len) - break; - (void) fmt_lla(llabuf, sizeof (llabuf), - (uchar_t *)lo->nd_opt_lla_hdw_addr, optlen - 2); - logmsg(LOG_DEBUG, "\tTarget LLA: len %d <%s>\n", - optlen-2, llabuf); - break; - case ND_OPT_REDIRECTED_HEADER: - logmsg(LOG_DEBUG, "\tRedirected header option!\n"); - break; - default: - logmsg(LOG_DEBUG, "Unkown option %d (0x%x)\n", - opt->nd_opt_type, opt->nd_opt_type); - break; - } - opt = (struct nd_opt_hdr *)((char *)opt + optlen); - len -= optlen; - } -} diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c index 2ee75f5751..1cfbafa911 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c +++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/main.c @@ -533,6 +533,53 @@ if_process(int s, char *ifname, boolean_t first) /* Detect prefixes which are removed */ if (pr->pr_kernel_state != 0) pr->pr_in_use = _B_TRUE; + + if ((lifr.lifr_flags & IFF_DUPLICATE) && + (pr->pr_flags & IFF_TEMPORARY)) { + in6_addr_t *token; + int i; + char abuf[INET6_ADDRSTRLEN]; + + if (++pr->pr_attempts >= MAX_DAD_FAILURES) { + logmsg(LOG_ERR, "%s: token %s is duplicate after %d " + "attempts; disabling temporary addresses on %s", + pr->pr_name, inet_ntop(AF_INET6, + (void *)&pi->pi_tmp_token, abuf, sizeof (abuf)), + pr->pr_attempts, pi->pi_name); + pi->pi_TmpAddrsEnabled = 0; + tmptoken_delete(pi); + prefix_delete(pr); + return; + } + logmsg(LOG_WARNING, "%s: token %s is duplicate; trying again", + pr->pr_name, inet_ntop(AF_INET6, (void *)&pi->pi_tmp_token, + abuf, sizeof (abuf))); + if (!tmptoken_create(pi)) { + prefix_delete(pr); + return; + } + token = &pi->pi_tmp_token; + for (i = 0; i < 16; i++) { + /* + * prefix_create ensures that pr_prefix has all-zero + * bits after prefixlen. + */ + pr->pr_address.s6_addr[i] = pr->pr_prefix.s6_addr[i] | + token->s6_addr[i]; + } + if (prefix_lookup_addr_match(pr) != NULL) { + prefix_delete(pr); + return; + } + pr->pr_CreateTime = getcurrenttime() / MILLISEC; + /* + * We've got a new token. Clearing PR_AUTO causes + * prefix_update_k to bring the interface up and set the + * address. + */ + pr->pr_kernel_state &= ~PR_AUTO; + prefix_update_k(pr); + } } static int ifsock = -1; @@ -1464,7 +1511,7 @@ setup_rtsock(void) /* * Retrieve one routing socket message. If RTM_IFINFO indicates * new phyint do a full scan of the interfaces. If RTM_IFINFO - * indicates an existing phyint only scan that phyint and asociated + * indicates an existing phyint, only scan that phyint and associated * prefixes. */ static void @@ -1963,7 +2010,7 @@ fprintdate(FILE *file) (void) fprintf(file, "%s ", buf); } -/* PRINTFLIKE1 */ +/* PRINTFLIKE2 */ void logmsg(int level, const char *fmt, ...) { diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c index 9146f8e97e..e02c12ff8c 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c +++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/ndp.c @@ -702,7 +702,7 @@ incoming_prefix_addrconf_process(struct phyint *pi, struct prefix *pr, char pbuf[INET6_ADDRSTRLEN]; uint32_t validtime, preftime; /* In seconds */ uint32_t recorded_validtime; /* In seconds */ - int plen, dadfails = 0; + int plen; struct prefix *other_pr; validtime = ntohl(po->nd_opt_pi_valid_time); @@ -825,7 +825,6 @@ incoming_prefix_addrconf_process(struct phyint *pi, struct prefix *pr, * Form a new local address if the lengths match. */ if (pr->pr_flags && IFF_TEMPORARY) { -RETRY_TOKEN: if (IN6_IS_ADDR_UNSPECIFIED(&pi->pi_tmp_token)) { if (!tmptoken_create(pi)) { prefix_delete(pr); @@ -885,29 +884,6 @@ RETRY_TOKEN: validtime = preftime = 0; } if ((pr->pr_flags & IFF_TEMPORARY) && new_prefix) { - struct sockaddr_in6 sin6; - sin6.sin6_family = AF_INET6; - sin6.sin6_addr = pr->pr_address; - if (do_dad(pi->pi_name, &sin6) != 0) { - /* DAD failed, need a new token */ - dadfails++; - logmsg(LOG_WARNING, - "incoming_prefix_addrconf_process: " - "deprecating temporary token %s\n", - inet_ntop(AF_INET6, - (void *)&pi->pi_tmp_token, abuf, - sizeof (abuf))); - tmptoken_delete(pi); - if (dadfails == MAX_DAD_FAILURES) { - logmsg(LOG_ERR, "Too many DAD " - "failures; disabling temporary " - "addresses on %s\n", pi->pi_name); - pi->pi_TmpAddrsEnabled = 0; - prefix_delete(pr); - return (_B_TRUE); - } - goto RETRY_TOKEN; - } pr->pr_CreateTime = getcurrenttime() / MILLISEC; if (debug & D_TMP) logmsg(LOG_DEBUG, diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c index b37ddade42..b77bd8cafd 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c +++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.c @@ -29,7 +29,6 @@ #include "tables.h" #include <time.h> -#include <inet/ip6.h> struct phyint *phyints = NULL; @@ -230,12 +229,9 @@ start_over: pi->pi_flags = lifr.lifr_flags; /* - * If the link local interface is not up yet or it's IFF_UP - * and the flag is set to IFF_NOLOCAL as Duplicate Address - * Detection is in progress. - * IFF_NOLOCAL is "normal" on other prefixes. + * If the link local interface is not up yet or it's IFF_UP and the + * IFF_NOLOCAL flag is set, then ignore the interface. */ - if (!(pi->pi_flags & IFF_UP) || (pi->pi_flags & IFF_NOLOCAL)) { if (newsock) { (void) close(pi->pi_sock); @@ -243,7 +239,7 @@ start_over: } if (debug & D_PHYINT) { logmsg(LOG_DEBUG, "phyint_init_from_k(%s): " - "not IFF_UP\n", pi->pi_name); + "IFF_NOLOCAL or not IFF_UP\n", pi->pi_name); } return (0); } diff --git a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h index 20caf691c6..6bdfee9f80 100644 --- a/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h +++ b/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/tables.h @@ -203,6 +203,7 @@ struct prefix { uint_t pr_CreateTime; /* tmpaddr creation time */ /* in SECONDS */ + uint_t pr_attempts; /* attempts to configure */ }; /* diff --git a/usr/src/cmd/cmd-inet/usr.sbin/arp.c b/usr/src/cmd/cmd-inet/usr.sbin/arp.c index c028eaf77d..3ddc48405d 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/arp.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/arp.c @@ -1,5 +1,5 @@ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -55,7 +55,6 @@ #include <netdb.h> #include <net/if.h> #include <net/if_arp.h> -#include <netinet/if_ether.h> #include <stdlib.h> #include <unistd.h> #include <string.h> @@ -151,14 +150,15 @@ main(int argc, char *argv[]) /* * Process a file to set standard arp entries */ -static int file(char *name) +static int +file(char *name) { /* * A line of input can be: - * <hostname> <macaddr> ["temp"] ["pub"] ["trail"] + * <hostname> <macaddr> ["temp"] ["pub"] ["trail"] ["permanent"] */ #define MAX_LINE_LEN (MAXHOSTNAMELEN + \ - sizeof (" xx:xx:xx:xx:xx:xx temp pub trail\n")) + sizeof (" xx:xx:xx:xx:xx:xx temp pub trail permanent\n")) #define MIN_ARGS 2 #define MAX_ARGS 5 @@ -214,7 +214,8 @@ static int file(char *name) /* * Set an individual arp entry */ -static int set(int argc, char *argv[]) +static int +set(int argc, char *argv[]) { struct xarpreq ar; struct hostent *hp; @@ -255,15 +256,28 @@ static int set(int argc, char *argv[]) ar.xarp_ha.sdl_family = AF_LINK; ar.xarp_flags = ATF_PERM; while (argc-- > 0) { - if (strncmp(argv[0], "temp", 4) == 0) + if (strncmp(argv[0], "temp", 4) == 0) { ar.xarp_flags &= ~ATF_PERM; - if (strncmp(argv[0], "pub", 3) == 0) + } else if (strncmp(argv[0], "pub", 3) == 0) { ar.xarp_flags |= ATF_PUBL; - if (strncmp(argv[0], "trail", 5) == 0) + } else if (strncmp(argv[0], "trail", 5) == 0) { ar.xarp_flags |= ATF_USETRAILERS; + } else if (strcmp(argv[0], "permanent") == 0) { + ar.xarp_flags |= ATF_AUTHORITY; + } else { + (void) fprintf(stderr, + "arp: unknown keyword '%s'\n", argv[0]); + return (1); + } argv++; } + if ((ar.xarp_flags & (ATF_PERM|ATF_AUTHORITY)) == ATF_AUTHORITY) { + (void) fprintf(stderr, "arp: 'temp' and 'permanent' flags are " + "not usable together.\n"); + return (1); + } + s = socket(AF_INET, SOCK_DGRAM, 0); if (s < 0) { perror("arp: socket"); @@ -277,11 +291,11 @@ static int set(int argc, char *argv[]) return (0); } - /* * Display an individual arp entry */ -static void get(char *host) +static void +get(char *host) { struct xarpreq ar; struct hostent *hp; @@ -334,19 +348,22 @@ static void get(char *host) (void) printf("%s (%s) at (incomplete)", host, inet_ntoa(sin->sin_addr)); } - if (ar.xarp_flags & ATF_PERM) - (void) printf(" permanent"); + if (!(ar.xarp_flags & ATF_PERM)) + (void) printf(" temp"); if (ar.xarp_flags & ATF_PUBL) - (void) printf(" published"); + (void) printf(" pub"); if (ar.xarp_flags & ATF_USETRAILERS) - (void) printf(" trailers"); + (void) printf(" trail"); + if (ar.xarp_flags & ATF_AUTHORITY) + (void) printf(" permanent"); (void) printf("\n"); } /* * Delete an arp entry */ -static void delete(char *host) +static void +delete(char *host) { struct xarpreq ar; struct hostent *hp; @@ -385,12 +402,13 @@ static void delete(char *host) (void) printf("%s (%s) deleted\n", host, inet_ntoa(sin->sin_addr)); } -static void usage(void) +static void +usage(void) { (void) printf("Usage: arp hostname\n"); (void) printf(" arp -a [-n]\n"); (void) printf(" arp -d hostname\n"); (void) printf(" arp -s hostname ether_addr " - "[temp] [pub] [trail]\n"); + "[temp] [pub] [trail] [permanent]\n"); (void) printf(" arp -f filename\n"); } diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile index df8ddd5938..a008c55ac1 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile +++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -28,7 +27,7 @@ PROG = ifconfig ROOTFS_PROG = $(PROG) -LOCALOBJS= dupl_addr.o ifconfig.o revarp.o +LOCALOBJS= ifconfig.o revarp.o COMMONOBJS= compat.o OBJS= $(LOCALOBJS) $(COMMONOBJS) diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/dupl_addr.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/dupl_addr.c deleted file mode 100644 index 3fc32c67a7..0000000000 --- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/dupl_addr.c +++ /dev/null @@ -1,911 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Perform IPv6 duplicate address detection for a given interface - * and IPv6 address. - */ -#pragma ident "%Z%%M% %I% %E% SMI" - -#include "defs.h" -#include "ifconfig.h" -#include <netinet/icmp6.h> -#include <netinet/in_systm.h> /* For IP_MAXPACKET */ -#include <netinet/ip.h> /* For IP_MAXPACKET */ - -/* XXX extract DupAddrDetectTransmits from LNKINFO? */ -int DupAddrDetectTransmits = 1; /* XXX Make configurable? */ -int RetransTimer = ND_RETRANS_TIMER; /* Milliseconds. */ - -#define IPV6_MAX_HOPS 255 - -struct in6_addr all_nodes_mcast = { { 0xff, 0x2, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x1 } }; - -static void in6_solmulti_addr(struct in6_addr *addr, - struct in6_addr *multi); -static int run_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc, int ifindex); -static int send_dad_probe(int s, char *phyname, - struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc); -static int recv_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, - int ifindex); -static boolean_t verify_opts(struct nd_opt_hdr *opt, int optlen, - struct sockaddr_in6 *from, boolean_t reject_dad_slla); -static void dad_failed(char *phyname, struct sockaddr_in6 *testaddr, - int code); -static void print_na(char *str, char *phyname, - struct nd_neighbor_advert *na, int len, - struct sockaddr_in6 *addr); -static void print_ns(char *str, char *phyname, - struct nd_neighbor_solicit *ns, int len, - struct sockaddr_in6 *addr); -static void print_opt(struct nd_opt_hdr *opt, int len); -static char *fmt_lla(char *llabuf, int bufsize, char *lla, int llalen); - - -/* - * Performing duplicate address detection. - * - * Returns 0 if the address is ok, 1 if there is a duplicate, - * and -1 (with errno set) if there is some internal error. - * As a side effect this does a syslog and a stderr printf - * identifying any duplicate. - * Note that the state of the interface name is unchanged. - */ -int -do_dad(char *ifname, struct sockaddr_in6 *testaddr) -{ - int s; - struct lifreq lifr; - char *cp; - char phyname[LIFNAMSIZ]; - int ifindex; - int64_t saved_flags; - int ret = -1; /* Assume error by default */ - struct sockaddr_in6 solicited_mc; - - /* - * Truncate name at ':'. Needed for SIOCGLIFLNKINFO - * Keep untruncated ifname for other use. - */ - (void) strncpy(phyname, ifname, sizeof (phyname)); - cp = strchr(phyname, ':'); - if (cp != NULL) - *cp = '\0'; - - /* - * Get a socket to use to send and receive neighbor solicitations - * for DAD. Also used for ioctls below. - */ - if ((s = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) < 0) { - Perror0("socket"); - return (-1); - } - - /* - * Determine interface index (for IPV6_BOUND_PIF) and - * save the flag values so they can be restored on return. - */ - (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name)); - if (ioctl(s, SIOCGLIFINDEX, (caddr_t)&lifr) < 0) { - Perror0("do_dad: SIOCGLIFINDEX"); - goto done; - } - ifindex = lifr.lifr_index; - if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { - Perror0("do_dad: SIOCGLIFFLAGS"); - goto done; - } - saved_flags = lifr.lifr_flags; - if (!(saved_flags & IFF_MULTICAST)) { - /* Not possible to do DAD. Pretend it is ok */ - ret = 0; - goto done; - } - (void) strncpy(lifr.lifr_name, phyname, sizeof (lifr.lifr_name)); - if (ioctl(s, SIOCGLIFLNKINFO, (caddr_t)&lifr) < 0) { - Perror0("do_dad: SIOCGLIFLNKINFO"); - goto done; - } - if (lifr.lifr_ifinfo.lir_reachretrans != 0) { - RetransTimer = lifr.lifr_ifinfo.lir_reachretrans; - } - - /* - * Set NOLOCAL and UP flags. - * This prevents the use of the interface except when the user binds - * to unspecified IPv6 address, and sends to a link local multicast - * address. - */ - lifr.lifr_flags = saved_flags | IFF_NOLOCAL | IFF_UP; - - (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name)); - if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { - Perror0("do_dad: SIOCSLIFFLAGS"); - goto restore; - } - - /* - * IPV6_BOUND_PIF prevents load spreading to happen. If we - * just do IPV6_BOUND_IF, the packet can go out on a different - * interface other than "ifindex", if interface is part of - * a group. In that case, we will get back the copy of NS that - * we sent and think it is a duplicate(Switch loops back the - * copy on all interfaces other than the one we sent the packet on). - */ - if (setsockopt(s, IPPROTO_IPV6, IPV6_BOUND_PIF, (char *)&ifindex, - sizeof (ifindex)) < 0) { - Perror0("IPV6_BOUND_PIF"); - goto restore; - } - - { - int hops = IPV6_MAX_HOPS; - int on = 1; - int off = 0; - - if (debug > 1) - off = 1; /* Force duplicate */ - - if (setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, - (char *)&hops, sizeof (hops)) < 0) { - Perror0("IPV6_MULTICAST_HOPS"); - goto restore; - } - if (setsockopt(s, IPPROTO_IPV6, IPV6_UNSPEC_SRC, - (char *)&on, sizeof (on)) < 0) { - Perror0("IPV6_UNSPEC_SRC"); - goto restore; - } - - if (setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, - (char *)&off, sizeof (off)) < 0) { - Perror0("IPV6_MULTICAST_LOOP"); - goto restore; - } - - /* Enable receipt of ancillary data */ - if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, - (char *)&on, sizeof (on)) < 0) { - Perror0("IPV6_RECVHOPLIMIT"); - goto restore; - } - if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, - (char *)&on, sizeof (on)) < 0) { - Perror0("IPV6_RECVPKTINFO"); - goto restore; - } - if (setsockopt(s, IPPROTO_IPV6, IPV6_RECVRTHDR, - (char *)&on, sizeof (on)) < 0) { - Perror0("IPV6_RECVRTHDR"); - goto restore; - } - } - - /* - * Extract the address and determine the solicited node multicast - * address to use. - */ - (void) memset(&solicited_mc, 0, sizeof (solicited_mc)); - solicited_mc.sin6_family = AF_INET6; - in6_solmulti_addr(&testaddr->sin6_addr, &solicited_mc.sin6_addr); - - /* Join the solicited node multicast address and all-nodes. */ - { - struct ipv6_mreq v6mcastr; - - v6mcastr.ipv6mr_multiaddr = solicited_mc.sin6_addr; - v6mcastr.ipv6mr_interface = ifindex; - - if (setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP, - (char *)&v6mcastr, sizeof (v6mcastr)) < 0) { - Perror0("IPV6_JOIN_GROUP"); - goto restore; - } - - v6mcastr.ipv6mr_multiaddr = all_nodes_mcast; - v6mcastr.ipv6mr_interface = ifindex; - - if (setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP, - (char *)&v6mcastr, sizeof (v6mcastr)) < 0) { - Perror0("IPV6_JOIN_GROUP"); - goto restore; - } - } - - ret = run_dad(s, phyname, testaddr, &solicited_mc, ifindex); - -restore: - /* Restore flags */ - (void) strncpy(lifr.lifr_name, ifname, sizeof (lifr.lifr_name)); - lifr.lifr_flags = saved_flags; - if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { - Perror0("do_dad: SIOCSLIFFLAGS"); - ret = -1; - goto done; - } -done: - (void) close(s); - return (ret); -} - - -/* - * Determine the solicited node multicast address for a given address. - */ -static void -in6_solmulti_addr(struct in6_addr *addr, struct in6_addr *multi) -{ - struct in6_addr solicited_prefix = { - { 0xff, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x1, 0xFF, 0x0, 0x0, 0x0 } }; - int i; - - *multi = solicited_prefix; - for (i = 13; i < 16; i++) - multi->s6_addr[i] = addr->s6_addr[i]; -} - -static int -run_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc, int ifindex) -{ - int time_left; /* In milliseconds */ - struct timeval starttime; - struct timeval curtime; - struct pollfd fds; - int i; - int ret; - - if (debug) - (void) printf("run_dad(%s)\n", phyname); - - /* - * Perform duplicate address detection sequence - * 1. Send a neighbor solicitation with an unspecified source - * address to the solicited node MC address with the testaddr - * being the target. - * 2. Wait for up to RetransTimer milliseconds for either a - * neighbor advertisement (sent to all-nodes) or a DAD neighbor - * solicitation for the testaddr. - * 3. Perform step 1 and 2 DupAddrDetectTransmits times. - */ - - /* XXX perform a random delay: 0 - MAX_RTR_SOLICITATION_DELAY */ - /* XXX use poll+recv logic for the random delay */ - - for (i = 0; i < DupAddrDetectTransmits; i++) { - if (send_dad_probe(s, phyname, testaddr, solicited_mc) < 0) - return (-1); - - /* - * Track time to make sure total wait is RetransTimer - * even though random packet will awake poll. - */ - (void) gettimeofday(&starttime, NULL); - /* CONSTCOND */ - while (1) { - (void) gettimeofday(&curtime, NULL); - time_left = RetransTimer - - (curtime.tv_sec - starttime.tv_sec) * 1000 - - (curtime.tv_usec - starttime.tv_usec) / 1000; - - if (debug) { - (void) printf("run_dad: time_left %d ms\n", - time_left); - } - if (time_left <= 0) { - if (debug) - (void) printf("run_dad: timeout\n"); - break; - } - fds.fd = s; - fds.events = POLLIN; - - switch (poll(&fds, 1, time_left)) { - case -1: - Perror0("poll"); - return (-1); - case 0: - /* Need loop will break */ - break; - default: - /* Huh? */ - (void) fprintf(stderr, "poll returns > 1!\n"); - return (-1); - case 1: - if (fds.revents & POLLIN) { - ret = recv_dad(s, phyname, testaddr, - ifindex); - if (ret < 0) - return (-1); - if (ret > 0) { - dad_failed(phyname, testaddr, - ret); - return (1); - } - } - break; - } - } - } - return (0); -} - -/* - * Send a DAD NS packet. Assumes an IPV6_UNSPEC_SRC and an IPV6_BOUND_IF - * have been done by the caller. - */ -static int -send_dad_probe(int s, char *phyname, struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc) -{ - static uint64_t outpack[(IP_MAXPACKET + 1)/8]; - struct nd_neighbor_solicit *ns = (struct nd_neighbor_solicit *)outpack; - int packetlen = 0; - int cc; - - ns->nd_ns_type = ND_NEIGHBOR_SOLICIT; - ns->nd_ns_code = 0; - ns->nd_ns_cksum = 0; - ns->nd_ns_reserved = 0; - ns->nd_ns_target = testaddr->sin6_addr; - packetlen += sizeof (struct nd_neighbor_solicit); - cc = sendto(s, (char *)outpack, packetlen, 0, - (struct sockaddr *)solicited_mc, sizeof (*solicited_mc)); - if (cc < 0 || cc != packetlen) { - char abuf[INET6_ADDRSTRLEN]; - - if (cc < 0) { - Perror0("DAD, sendto"); - return (-1); - } - (void) inet_ntop(solicited_mc->sin6_family, - (void *)&solicited_mc->sin6_addr, abuf, sizeof (abuf)); - - (void) fprintf(stderr, "wrote %s %d chars, ret=%d\n", - abuf, packetlen, cc); - return (-1); - } - if (debug) - print_ns("Sent NS", phyname, ns, packetlen, solicited_mc); - - return (0); -} - -/* - * Return a pointer to the specified option buffer. - * If not found return NULL. - */ -static void * -find_ancillary(struct msghdr *msg, int cmsg_type) -{ - struct cmsghdr *cmsg; - - for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(msg, cmsg)) { - if (cmsg->cmsg_level == IPPROTO_IPV6 && - cmsg->cmsg_type == cmsg_type) { - return (CMSG_DATA(cmsg)); - } - } - return (NULL); -} - -/* - * Receive an ICMP packet. If the packet signals a duplicate address for - * testaddr then return a positive non-zero number. Otherwise return zero. - * Internal errors cause a return of -1. - */ -static int -recv_dad(int s, char *phyname, struct sockaddr_in6 *testaddr, int ifindex) -{ - struct sockaddr_in6 from; - struct icmp6_hdr *icmp; - struct nd_neighbor_solicit *ns; - struct nd_neighbor_advert *na; - static uint64_t in_packet[(IP_MAXPACKET + 1)/8]; - static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8]; - int len; - char abuf[INET6_ADDRSTRLEN]; - struct msghdr msg; - struct iovec iov; - uchar_t *opt; - uint_t hoplimit; - struct in6_addr dst; - int rcv_ifindex; - - iov.iov_base = (char *)in_packet; - iov.iov_len = sizeof (in_packet); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_name = (struct sockaddr *)&from; - msg.msg_namelen = sizeof (from); - msg.msg_control = ancillary_data; - msg.msg_controllen = sizeof (ancillary_data); - - if ((len = recvmsg(s, &msg, 0)) < 0) { - Perror0("DAD recvmsg"); - return (-1); - } - if (len == 0) - return (0); - - if (debug) { - (void) inet_ntop(AF_INET6, (void *)&from.sin6_addr, - abuf, sizeof (abuf)); - } - /* Ignore packets > 64k or control buffers that don't fit */ - if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) { - if (debug) { - (void) fprintf(stderr, - "Truncated message: msg_flags 0x%x from %s\n", - msg.msg_flags, abuf); - } - return (0); - } - - icmp = (struct icmp6_hdr *)in_packet; - - if (len < ICMP6_MINLEN) { - if (debug) { - (void) fprintf(stderr, - "Too short ICMP packet: %d bytes from %s\n", - len, abuf); - } - return (0); - } - - opt = find_ancillary(&msg, IPV6_HOPLIMIT); - if (opt == NULL) { - /* Unknown hoplimit - must drop */ - if (debug) { - (void) fprintf(stderr, - "Unknown hop limit from %s\n", abuf); - } - return (0); - } - hoplimit = *(uint_t *)opt; - opt = find_ancillary(&msg, IPV6_PKTINFO); - if (opt == NULL) { - /* Unknown destination address - must drop */ - if (debug) { - (void) fprintf(stderr, - "Unknown destination from %s\n", abuf); - } - return (0); - } - dst = ((struct in6_pktinfo *)opt)->ipi6_addr; - rcv_ifindex = ((struct in6_pktinfo *)opt)->ipi6_ifindex; - opt = find_ancillary(&msg, IPV6_RTHDR); - if (opt != NULL) { - /* Can't allow routing headers in ND messages */ - if (debug) { - (void) fprintf(stderr, - "ND message with routing header from %s\n", abuf); - } - return (0); - } - - switch (icmp->icmp6_type) { - case ND_NEIGHBOR_SOLICIT: - /* - * Assumes that the kernel has verified the AH (if present) - * and the ICMP checksum. - */ - if (hoplimit != IPV6_MAX_HOPS) { - if (debug) { - (void) fprintf(stderr, - "NS hop limit: %d from %s\n", - hoplimit, abuf); - } - return (0); - } - - if (icmp->icmp6_code != 0) { - if (debug) { - (void) fprintf(stderr, "NS code: %d from %s\n", - icmp->icmp6_code, abuf); - } - return (0); - } - - if (len < sizeof (struct nd_neighbor_solicit)) { - if (debug) { - (void) fprintf(stderr, - "NS too short: %d bytes from %s\n", - len, abuf); - } - return (0); - } - ns = (struct nd_neighbor_solicit *)icmp; - if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target)) { - if (debug) { - char abuf2[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&ns->nd_ns_target, - abuf2, sizeof (abuf2)); - (void) fprintf(stderr, - "NS with multicast target: %s from %s\n", - abuf2, abuf); - } - return (0); - } - - if (len > sizeof (struct nd_neighbor_solicit)) { - /* - * For DAD type neighbor solicitation message, - * we need to further verify if SLLA option is present - * in received options, - * so we pass TRUE to reject_dad_slla argument. - */ - if (!verify_opts((struct nd_opt_hdr *)&ns[1], - len - sizeof (struct nd_neighbor_solicit), - &from, _B_TRUE)) - return (0); - } - - if (debug) - print_ns("Received valid NS", phyname, ns, len, &from); - if (!IN6_IS_ADDR_UNSPECIFIED(&from.sin6_addr) || - !IN6_IS_ADDR_MC_SOLICITEDNODE(&dst)) { - /* Sender is doing address resolution */ - return (0); - } - if (rcv_ifindex != ifindex) { - if (debug) { - (void) fprintf(stderr, - "Received Neighbor solicitation on" - " ifindex %d, expecting on %d\n", - rcv_ifindex, ifindex); - } - return (0); - } - if (IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr, - &ns->nd_ns_target)) { - if (debug) { - (void) fprintf(stderr, - "NS - duplicate from %s\n", - abuf); - } - return (1); - } - return (0); - - case ND_NEIGHBOR_ADVERT: - /* - * Assumes that the kernel has verified the AH (if present) - * and the ICMP checksum. - */ - if (hoplimit != IPV6_MAX_HOPS) { - if (debug) { - (void) fprintf(stderr, - "NA hop limit: %d from %s\n", - hoplimit, abuf); - } - return (0); - } - - if (icmp->icmp6_code != 0) { - if (debug) { - (void) fprintf(stderr, "NA code: %d from %s\n", - icmp->icmp6_code, abuf); - } - return (0); - } - - if (len < sizeof (struct nd_neighbor_advert)) { - if (debug) { - (void) fprintf(stderr, - "NA too short: %d bytes from %s\n", - len, abuf); - } - return (0); - } - na = (struct nd_neighbor_advert *)icmp; - if (IN6_IS_ADDR_MULTICAST(&na->nd_na_target)) { - if (debug) { - char abuf2[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&na->nd_na_target, - abuf2, sizeof (abuf2)); - (void) fprintf(stderr, - "NA with multicast target: %s from %s\n", - abuf2, abuf); - } - return (0); - } - - if (IN6_IS_ADDR_MULTICAST(&dst) && - (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { - if (debug) { - char abuf2[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&na->nd_na_target, - abuf2, sizeof (abuf2)); - (void) fprintf(stderr, - "NA solicited w/ mc target: %s from %s\n", - abuf2, abuf); - } - return (0); - } - - if (len > sizeof (struct nd_neighbor_advert)) { - /* - * Since this is a Neighbor advertisement - * we unset the reject_dad_slla flag, thus - * there is no need to verify the SLLA options. - */ - if (!verify_opts((struct nd_opt_hdr *)&na[1], - len - sizeof (struct nd_neighbor_advert), - &from, _B_FALSE)) - return (0); - } - - if (debug) - print_na("Received valid NA", phyname, na, len, &from); - - if (IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr, - &na->nd_na_target)) { - if (debug) { - (void) fprintf(stderr, - "NA - duplicate from %s\n", - abuf); - } - return (1); - } - return (0); - default: - return (0); - } -} - -/* - * Verify that all options have a non-zero length and that - * the options fit within the total length of the packet (optlen). - * If reject_dad_slla is set, then we also verify that no SLLA option is - * present as mandated by section 7.1.1 of RFC 2461. - */ -static boolean_t -verify_opts(struct nd_opt_hdr *opt, int optlen, struct sockaddr_in6 *from, - boolean_t reject_dad_slla) -{ - while (optlen > 0) { - if (opt->nd_opt_len == 0) { - if (debug) { - char abuf[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&from->sin6_addr, - abuf, sizeof (abuf)); - - (void) fprintf(stderr, - "Zero length option type 0x%x from %s\n", - opt->nd_opt_type, abuf); - } - return (_B_FALSE); - } - optlen -= 8 * opt->nd_opt_len; - if (optlen < 0) { - if (debug) { - char abuf[INET6_ADDRSTRLEN]; - - (void) inet_ntop(AF_INET6, - (void *)&from->sin6_addr, - abuf, sizeof (abuf)); - - (void) fprintf(stderr, - "Too large option: type 0x%x len %u " - "from %s\n", - opt->nd_opt_type, opt->nd_opt_len, - abuf); - } - return (_B_FALSE); - } - if (reject_dad_slla && - opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR) { - return (_B_FALSE); - } - opt = (struct nd_opt_hdr *)((char *)opt + - 8 * opt->nd_opt_len); - } - return (_B_TRUE); -} - - -static void -dad_failed(char *phyname, struct sockaddr_in6 *testaddr, int code) -{ - char abuf[INET6_ADDRSTRLEN]; - - (void) inet_ntop(testaddr->sin6_family, - (void *)&testaddr->sin6_addr, - abuf, sizeof (abuf)); - (void) fprintf(stderr, - "ifconfig: " - "Duplicate address detected on link %s for address %s. Code %d\n", - phyname, abuf, code); - - openlog("ifconfig", LOG_CONS, LOG_DAEMON); - syslog(LOG_CRIT, - "Duplicate address detected on link %s for address %s. Code %d\n", - phyname, abuf, code); - closelog(); -} - -/* Printing functions */ - -static void -print_ns(char *str, char *phyname, - struct nd_neighbor_solicit *ns, int len, struct sockaddr_in6 *addr) -{ - struct nd_opt_hdr *opt; - char abuf[INET6_ADDRSTRLEN]; - - (void) printf("%s %s (%d bytes) on %s\n", str, - inet_ntop(addr->sin6_family, (void *)&addr->sin6_addr, - abuf, sizeof (abuf)), - len, phyname); - (void) printf("\ttarget %s\n", - inet_ntop(addr->sin6_family, (void *)&ns->nd_ns_target, - abuf, sizeof (abuf))); - len -= sizeof (*ns); - opt = (struct nd_opt_hdr *)&ns[1]; - print_opt(opt, len); -} - -static void -print_na(char *str, char *phyname, - struct nd_neighbor_advert *na, int len, struct sockaddr_in6 *addr) -{ - struct nd_opt_hdr *opt; - char abuf[INET6_ADDRSTRLEN]; - - (void) printf("%s %s (%d bytes) on %s\n", str, - inet_ntop(addr->sin6_family, (void *)&addr->sin6_addr, - abuf, sizeof (abuf)), - len, phyname); - (void) printf("\ttarget %s\n", - inet_ntop(addr->sin6_family, (void *)&na->nd_na_target, - abuf, sizeof (abuf))); - (void) printf("\tRouter: %s\n", - (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) ? - "Set" : "Not set"); - (void) printf("\tSolicited: %s\n", - (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED) ? - "Set" : "Not set"); - (void) printf("\tOverride: %s\n", - (na->nd_na_flags_reserved & ND_NA_FLAG_OVERRIDE) ? - "Set" : "Not set"); - - len -= sizeof (*na); - opt = (struct nd_opt_hdr *)&na[1]; - print_opt(opt, len); -} - -static void -print_opt(struct nd_opt_hdr *opt, int len) -{ - struct nd_opt_prefix_info *po; - struct nd_opt_mtu *mo; - struct nd_opt_lla *lo; - int optlen; - char abuf[INET6_ADDRSTRLEN]; - char llabuf[BUFSIZ]; - - while (len >= sizeof (struct nd_opt_hdr)) { - optlen = opt->nd_opt_len * 8; - if (optlen == 0) { - if (debug) - (void) printf("Zero length option!\n"); - break; - } - switch (opt->nd_opt_type) { - case ND_OPT_PREFIX_INFORMATION: - po = (struct nd_opt_prefix_info *)opt; - if (optlen != sizeof (*po) || - optlen > len) - break; - - (void) printf("\tOn link flag:%s\n", - (po->nd_opt_pi_flags_reserved & - ND_OPT_PI_FLAG_ONLINK) ? - "Set" : "Not set"); - (void) printf("\tAuto addrconf flag:%s\n", - (po->nd_opt_pi_flags_reserved & - ND_OPT_PI_FLAG_AUTO) ? - "Set" : "Not set"); - (void) printf("\tValid time: %u\n", - ntohl(po->nd_opt_pi_valid_time)); - (void) printf("\tPreferred time: %u\n", - ntohl(po->nd_opt_pi_preferred_time)); - (void) printf("\tPrefix: %s/%u\n", - inet_ntop(AF_INET6, (void *)&po->nd_opt_pi_prefix, - abuf, sizeof (abuf)), - po->nd_opt_pi_prefix_len); - break; - case ND_OPT_MTU: - mo = (struct nd_opt_mtu *)opt; - if (optlen != sizeof (*mo) || - optlen > len) - break; - (void) printf("\tMTU: %d\n", - ntohl(mo->nd_opt_mtu_mtu)); - break; - case ND_OPT_SOURCE_LINKADDR: - lo = (struct nd_opt_lla *)opt; - if (optlen < 8 || - optlen > len) - break; - (void) fmt_lla(llabuf, sizeof (llabuf), - (char *)lo->nd_opt_lla_hdw_addr, optlen - 2); - (void) printf("\tSource LLA: len %d <%s>\n", - optlen-2, llabuf); - break; - case ND_OPT_TARGET_LINKADDR: - lo = (struct nd_opt_lla *)opt; - if (optlen < 8|| - optlen > len) - break; - (void) fmt_lla(llabuf, sizeof (llabuf), - (char *)lo->nd_opt_lla_hdw_addr, optlen - 2); - (void) printf("\tTarget LLA: len %d <%s>\n", - optlen-2, llabuf); - break; - case ND_OPT_REDIRECTED_HEADER: - (void) printf("\tRedirected header option!\n"); - break; - default: - (void) printf("Unkown option %d (0x%x)\n", - opt->nd_opt_type, opt->nd_opt_type); - break; - } - opt = (struct nd_opt_hdr *)((char *)opt + optlen); - len -= optlen; - } -} - -static char * -fmt_lla(char *llabuf, int bufsize, char *lla, int llalen) -{ - int i; - char *cp = llabuf; - - for (i = 0; i < llalen; i++) { - if (i == llalen - 1) - (void) snprintf(cp, bufsize, "%02x", lla[i] & 0xFF); - else - (void) snprintf(cp, bufsize, "%02x:", lla[i] & 0xFF); - bufsize -= strlen(cp); - cp += strlen(cp); - } - return (llabuf); -} diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c index 69d25bbdc3..f71d1439f1 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c @@ -70,7 +70,8 @@ static if_flags_t if_flags_tbl[] = { { IFF_PREFERRED, "PREFERRED" }, { IFF_TEMPORARY, "TEMPORARY" }, { IFF_FIXEDMTU, "FIXEDMTU" }, - { IFF_VIRTUAL, "VIRTUAL"} + { IFF_VIRTUAL, "VIRTUAL" }, + { IFF_DUPLICATE, "DUPLICATE" } }; static struct lifreq lifr; @@ -172,7 +173,6 @@ static void foreachinterface(void (*func)(), int argc, char *argv[], int af, int64_t onflags, int64_t offflags, int64_t lifc_flags); static void ifconfig(int argc, char *argv[], int af, struct lifreq *lifrp); -static int ifdad(char *ifname, struct sockaddr_in6 *laddr); static boolean_t in_getmask(struct sockaddr_in *saddr, boolean_t addr_set); static int in_getprefixlen(char *addr, boolean_t slash, int plen); @@ -1006,25 +1006,6 @@ setifaddr(char *addr, int64_t param) sav_netmask = lifr.lifr_addr; /* - * Catch set of address for AF_INET6 to perform - * duplicate address detection. Check that the interface is - * up. - */ - if (afp->af_af == AF_INET6) { - if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { - Perror0_exit("ifsetaddr: SIOCGLIFFLAGS"); - } - if (lifr.lifr_flags & IFF_UP) { - if (debug) - (void) printf( - "setifaddr: Calling ifdad flags %llx\n", - lifr.lifr_flags); - if (ifdad(name, (struct sockaddr_in6 *)&laddr) == -1) - exit(3); - } - } - - /* * If setting the address and not the mask, clear any existing mask * and the kernel will then assign the default (netmask has been set * to 0 in this case). If setting both (either by using a prefix or @@ -1486,25 +1467,25 @@ setifflags(char *val, int64_t value) } } - /* - * Catch "up" transition for AF_INET6 to perform duplicate address - * detection. ifdad checks if an address has been set. - */ - if (afp->af_af == AF_INET6 && !(lifr.lifr_flags & IFF_UP) && - value == IFF_UP) { - if (debug) - (void) printf( - "setifaddr:Calling ifdad flags %llx value 0x%llx\n", - lifr.lifr_flags, value); - if (ifdad(name, NULL) == -1) - exit(1); - } - if (value < 0) { value = -value; lifr.lifr_flags &= ~value; - } else + if ((value & IFF_UP) && (lifr.lifr_flags & IFF_DUPLICATE)) { + /* + * If the user is trying to mark an interface with a + * duplicate address as "down," then fetch the address + * and set it. This will cause IP to clear the + * IFF_DUPLICATE flag and stop the automatic recovery + * timer. + */ + value = lifr.lifr_flags; + if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) >= 0) + (void) ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr); + lifr.lifr_flags = value; + } + } else { lifr.lifr_flags |= value; + } (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { Perror0_exit("setifflags: SIOCSLIFFLAGS"); @@ -1958,54 +1939,6 @@ removeif(char *str, int64_t param) } /* - * If laddr is non-NULL it is used - otherwise we use the address on - * the interface. - */ -/* ARGSUSED */ -static int -ifdad(char *ifname, struct sockaddr_in6 *laddr) -{ - struct sockaddr_in6 testaddr; - struct lifreq lifr2; /* Avoid overriting lifr */ - - if (debug) - (void) printf("ifdad(%s)\n", ifname); - - assert(afp->af_af == AF_INET6); - - /* - * Check the address assigned to the interface. - * Skip the check if IFF_NOLOCAL, IFF_NONUD, IFF_ANYCAST, or - * IFF_LOOPBACK. - * Note that IFF_NONUD turns of both NUD and DAD. - */ - (void) strncpy(lifr2.lifr_name, ifname, - sizeof (lifr2.lifr_name)); - if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr2) < 0) { - Perror0_exit("ifdad: SIOCGLIFFLAGS"); - } - if (lifr2.lifr_flags & (IFF_NOLOCAL|IFF_LOOPBACK|IFF_NONUD|IFF_ANYCAST)) - return (0); - - if (laddr != NULL) { - testaddr = *laddr; - } else { - if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr2) < 0) { - Perror0_exit("ifdad: SIOCGLIFADDR"); - } - testaddr = *(struct sockaddr_in6 *)&lifr2.lifr_addr; - } - - if (IN6_IS_ADDR_UNSPECIFIED(&testaddr.sin6_addr)) - return (0); - - if (do_dad(name, &testaddr) != 0) - return (-1); - else - return (0); -} - -/* * Set the address token for IPv6. */ /* ARGSUSED */ @@ -4134,6 +4067,7 @@ Perror2_exit(char *cmd, char *str) static void in_getaddr(char *s, struct sockaddr *saddr, int *plenp) { + /* LINTED: alignment */ struct sockaddr_in *sin = (struct sockaddr_in *)saddr; struct hostent *hp; struct netent *np; @@ -4202,6 +4136,7 @@ in_getaddr(char *s, struct sockaddr *saddr, int *plenp) static void in6_getaddr(char *s, struct sockaddr *saddr, int *plenp) { + /* LINTED: alignment */ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)saddr; struct hostent *hp; char str[BUFSIZ]; diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h index 9b96dde475..be5990ce13 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h +++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.h @@ -1,5 +1,5 @@ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -27,8 +27,10 @@ extern "C" { #define MAX_MODS 9 /* max modules that can be pushed on intr */ +/* No suitable header file defines this, though it's in libsocket */ +extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); + extern int debug; -extern uid_t euid; extern void Perror0(char *); extern void Perror0_exit(char *); @@ -36,13 +38,10 @@ extern void Perror2(char *, char *); extern void Perror2_exit(char *, char *); extern int doifrevarp(char *, struct sockaddr_in *); -extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); extern int dlpi_set_address(char *, uchar_t *, int); extern void dlpi_print_address(char *); -extern int do_dad(char *, struct sockaddr_in6 *); - #ifdef __cplusplus } #endif diff --git a/usr/src/cmd/mdb/Makefile.common b/usr/src/cmd/mdb/Makefile.common index e91e250f6b..93bd2dc6c5 100644 --- a/usr/src/cmd/mdb/Makefile.common +++ b/usr/src/cmd/mdb/Makefile.common @@ -50,6 +50,7 @@ COMMON_MODULES_PROC_32BIT = \ # MDB modules used for debugging kernels. # COMMON_MODULES_KVM = \ + arp \ audiosup \ cpc \ crypto \ diff --git a/usr/src/cmd/mdb/Makefile.kmdb b/usr/src/cmd/mdb/Makefile.kmdb index 7e086edefb..4b835e1047 100644 --- a/usr/src/cmd/mdb/Makefile.kmdb +++ b/usr/src/cmd/mdb/Makefile.kmdb @@ -117,6 +117,8 @@ $(VERSTGTS) := CPPFLAGS += -DKMDB_VERSION='$(KMDB_VERSION)' $(KCTLTGTS) := CPPFLAGS += -D_KERNEL $(KCTLTGTS) := ASFLAGS += -D_KERNEL +ffs.o ffs.ln := CPPFLAGS += -Dffs=mdb_ffs + INS.dir.root.sys= $(INS) -s -d -m $(DIRMODE) $@ $(CH)INS.dir.root.sys= $(INS) -s -d -m $(DIRMODE) -u root -g sys $@ diff --git a/usr/src/cmd/mdb/Makefile.kmdb.files b/usr/src/cmd/mdb/Makefile.kmdb.files index 4f78efd06c..cd8ff1a89f 100644 --- a/usr/src/cmd/mdb/Makefile.kmdb.files +++ b/usr/src/cmd/mdb/Makefile.kmdb.files @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,13 +19,14 @@ # CDDL HEADER END # # -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" # KMDBSRCS += \ + ffs.c \ kaif_start.c \ mdb.c \ mdb_addrvec.c \ diff --git a/usr/src/cmd/mdb/Makefile.kmdb.targ b/usr/src/cmd/mdb/Makefile.kmdb.targ index b5181517af..ba95f41bdc 100644 --- a/usr/src/cmd/mdb/Makefile.kmdb.targ +++ b/usr/src/cmd/mdb/Makefile.kmdb.targ @@ -134,6 +134,10 @@ kmdb_modlinktest.c: $(MAPFILE_INTERMEDIATE) $(COMPILE.c) $< $(CTFCONVERT_O) +%.o: $(SRC)/common/util/%.c + $(COMPILE.c) $< + $(CTFCONVERT_O) + # # Lint # @@ -176,3 +180,6 @@ kmdb_modlinktest.c: $(MAPFILE_INTERMEDIATE) %.ln: $(SRC)/common/net/util/%.c $(LINT.c) -c $< + +%.ln: $(SRC)/common/util/%.c + $(LINT.c) -c $< diff --git a/usr/src/cmd/mdb/Makefile.mdb b/usr/src/cmd/mdb/Makefile.mdb index 6757821ff2..81cb2dd9b5 100644 --- a/usr/src/cmd/mdb/Makefile.mdb +++ b/usr/src/cmd/mdb/Makefile.mdb @@ -28,7 +28,9 @@ .KEEP_STATE: .SUFFIXES: -SRCS += mdb.c \ +SRCS += \ + ffs.c \ + mdb.c \ mdb_addrvec.c \ mdb_argvec.c \ mdb_callb.c \ @@ -130,6 +132,8 @@ mdb_conf.o := CPPFLAGS += -DMDB_VERSION='$(MDB_VERSION)' inet_ntop.o := CPPFLAGS += -Dsnprintf=mdb_snprintf +ffs.o ffs.ln := CPPFLAGS += -Dffs=mdb_ffs + %.o: ../../../common/mdb/%.c $(COMPILE.c) $< $(CTFCONVERT_O) @@ -146,6 +150,10 @@ inet_ntop.o := CPPFLAGS += -Dsnprintf=mdb_snprintf $(COMPILE.c) $< $(CTFCONVERT_O) +%.o: $(SRC)/common/util/%.c + $(COMPILE.c) $< + $(CTFCONVERT_O) + clean.lint: $(RM) $(LINTFILES) @@ -168,6 +176,9 @@ clobber: clean clean.lint %.ln: $(SRC)/common/net/util/%.c $(LINT.c) -c $< +%.ln: $(SRC)/common/util/%.c + $(LINT.c) -c $< + lint: $(LINTFILES) $(LINT) $(LINTFLAGS) $(LINTFILES) diff --git a/usr/src/cmd/mdb/common/mdb/mdb_ks.h b/usr/src/cmd/mdb/common/mdb/mdb_ks.h index f543a0cee3..bb3038dcd8 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_ks.h +++ b/usr/src/cmd/mdb/common/mdb/mdb_ks.h @@ -129,6 +129,14 @@ extern uintptr_t mdb_qwnext(const queue_t *); extern uintptr_t mdb_qrnext_default(const queue_t *); extern uintptr_t mdb_qwnext_default(const queue_t *); +extern int mdb_mblk_count(const mblk_t *); + +/* DLPI primitive to string; returns NULL for unknown primitives */ +extern const char *mdb_dlpi_prim(int); + +/* Generic function for working with MAC (network layer 2) addresses. */ +extern void mdb_mac_addr(const uint8_t *, size_t, char *, size_t); + /* * Target-specific interfaces * diff --git a/usr/src/cmd/mdb/common/mdb/mdb_modapi.h b/usr/src/cmd/mdb/common/mdb/mdb_modapi.h index c6573f2afb..470196de93 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_modapi.h +++ b/usr/src/cmd/mdb/common/mdb/mdb_modapi.h @@ -235,6 +235,8 @@ extern void mdb_printf(const char *, ...); extern void mdb_warn(const char *, ...); extern void mdb_flush(void); +extern int mdb_ffs(uintmax_t); + extern void mdb_nhconvert(void *, const void *, size_t); #define MDB_DUMP_RELATIVE 0x0001 /* Start numbering at 0 */ diff --git a/usr/src/cmd/mdb/common/modules/arp/arp.c b/usr/src/cmd/mdb/common/modules/arp/arp.c new file mode 100644 index 0000000000..06bd8887e0 --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/arp/arp.c @@ -0,0 +1,700 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <sys/types.h> +#include <sys/stropts.h> +#include <sys/stream.h> +#include <sys/dlpi.h> +#include <inet/led.h> +#include <inet/common.h> +#include <inet/mi.h> +#include <inet/arp.h> +#include <inet/arp_impl.h> +#include <inet/ip.h> +#include <netinet/arp.h> + +#include <mdb/mdb_modapi.h> +#include <mdb/mdb_ks.h> + +typedef struct { + uint32_t act_cmd; + char *act_name; + char *act_type; +} arp_cmd_tbl; + +/* + * Table of ARP commands and structure types used for messages between ARP and + * IP. + */ +static const arp_cmd_tbl act_list[] = { + { AR_ENTRY_ADD, "AR_ENTRY_ADD", "arp`area_t" }, + { AR_ENTRY_DELETE, "AR_ENTRY_DELETE", "arp`ared_t" }, + { AR_ENTRY_QUERY, "AR_ENTRY_QUERY", "arp`areq_t" }, + { AR_ENTRY_SQUERY, "AR_ENTRY_SQUERY", "arp`area_t" }, + { AR_MAPPING_ADD, "AR_MAPPING_ADD", "arp`arma_t" }, + { AR_CLIENT_NOTIFY, "AR_CLIENT_NOTIFY", "arp`arcn_t" }, + { AR_INTERFACE_UP, "AR_INTERFACE_UP", "arp`arc_t" }, + { AR_INTERFACE_DOWN, "AR_INTERFACE_DOWN", "arp`arc_t" }, + { AR_INTERFACE_ON, "AR_INTERFACE_ON", "arp`arc_t" }, + { AR_INTERFACE_OFF, "AR_INTERFACE_OFF", "arp`arc_t" }, + { AR_DLPIOP_DONE, "AR_DLPIOP_DONE", "arp`arc_t" }, + { AR_ARP_CLOSING, "AR_ARP_CLOSING", "arp`arc_t" }, + { AR_ARP_EXTEND, "AR_ARP_EXTEND", "arp`arc_t" }, + { 0, "unknown command", "arp`arc_t" } +}; + +/* + * State information kept during walk over ACE hash table and unhashed mask + * list. + */ +typedef struct ace_walk_data { + ace_t *awd_hash_tbl[ARP_HASH_SIZE]; + ace_t *awd_masks; + int awd_idx; +} ace_walk_data_t; + +static int +arl_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr == NULL && + mdb_readvar(&wsp->walk_addr, "arl_g_head") == -1) { + mdb_warn("failed to read 'arl_g_head'"); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +static int +arl_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t addr = wsp->walk_addr; + arl_t arl; + + if (wsp->walk_addr == NULL) + return (WALK_DONE); + + if (mdb_vread(&arl, sizeof (arl), addr) == -1) { + mdb_warn("failed to read arl_t at %p", addr); + return (WALK_ERR); + } + + wsp->walk_addr = (uintptr_t)arl.arl_next; + + return ((*wsp->walk_callback)(addr, &arl, wsp->walk_cbdata)); +} + +static int +ace_walk_init(mdb_walk_state_t *wsp) +{ + ace_walk_data_t *aw; + + if (wsp->walk_addr != NULL) { + mdb_warn("ace supports only global walks\n"); + return (WALK_ERR); + } + + aw = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP); + + if (mdb_readsym(aw->awd_hash_tbl, sizeof (aw->awd_hash_tbl), + "ar_ce_hash_tbl") == -1) { + mdb_warn("failed to read 'ar_ce_hash_tbl'"); + mdb_free(aw, sizeof (ace_walk_data_t)); + return (WALK_ERR); + } + + if (mdb_readvar(&aw->awd_masks, "ar_ce_mask_entries") == -1) { + mdb_warn("failed to read 'ar_ce_mask_entries'"); + mdb_free(aw, sizeof (ace_walk_data_t)); + return (WALK_ERR); + } + + /* The step routine will start off by incrementing to index 0 */ + aw->awd_idx = -1; + wsp->walk_addr = 0; + wsp->walk_data = aw; + + return (WALK_NEXT); +} + +static int +ace_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t addr; + ace_walk_data_t *aw = wsp->walk_data; + ace_t ace; + + /* + * If we're at the end of the previous list, then find the start of the + * next list to process. + */ + while (wsp->walk_addr == NULL) { + if (aw->awd_idx == ARP_HASH_SIZE) + return (WALK_DONE); + if (++aw->awd_idx == ARP_HASH_SIZE) { + wsp->walk_addr = (uintptr_t)aw->awd_masks; + } else { + wsp->walk_addr = + (uintptr_t)aw->awd_hash_tbl[aw->awd_idx]; + } + } + + addr = wsp->walk_addr; + if (mdb_vread(&ace, sizeof (ace), addr) == -1) { + mdb_warn("failed to read ace_t at %p", addr); + return (WALK_ERR); + } + + wsp->walk_addr = (uintptr_t)ace.ace_next; + + return (wsp->walk_callback(addr, &ace, wsp->walk_cbdata)); +} + +static void +ace_walk_fini(mdb_walk_state_t *wsp) +{ + mdb_free(wsp->walk_data, sizeof (ace_walk_data_t)); +} + +/* Common routine to produce an 'ar' text description */ +static void +ar_describe(const ar_t *ar, char *buf, size_t nbytes, boolean_t addmac) +{ + if (ar->ar_arl == NULL) { + queue_t wq, ipq; + ill_t ill; + char name[LIFNAMSIZ]; + GElf_Sym sym; + boolean_t nextip; + + if (mdb_vread(&wq, sizeof (wq), (uintptr_t)ar->ar_wq) == -1 || + mdb_vread(&ipq, sizeof (ipq), (uintptr_t)wq.q_next) == -1) + return; + + nextip = + (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0 && + (uintptr_t)sym.st_value == (uintptr_t)ipq.q_qinfo); + + if (!ar->ar_on_ill_stream) { + (void) strcpy(buf, nextip ? "Client" : "Unknown"); + return; + } + + if (!nextip || + mdb_vread(&ill, sizeof (ill), (uintptr_t)ipq.q_ptr) == -1 || + mdb_readstr(name, sizeof (name), + (uintptr_t)ill.ill_name) == -1) { + return; + } + (void) mdb_snprintf(buf, nbytes, "IP %s", name); + } else { + arl_t arl; + ssize_t retv; + uint32_t alen; + uchar_t macaddr[ARP_MAX_ADDR_LEN]; + + if (mdb_vread(&arl, sizeof (arl), (uintptr_t)ar->ar_arl) == -1) + return; + retv = mdb_snprintf(buf, nbytes, "ARP %s ", arl.arl_name); + if (retv >= nbytes || !addmac) + return; + alen = arl.arl_hw_addr_length; + if (arl.arl_hw_addr == NULL || alen == 0 || + alen > sizeof (macaddr)) + return; + if (mdb_vread(macaddr, alen, (uintptr_t)arl.arl_hw_addr) == -1) + return; + mdb_mac_addr(macaddr, alen, buf + retv, nbytes - retv); + } +} + +/* ARGSUSED2 */ +static int +ar_cb(uintptr_t addr, const void *arptr, void *dummy) +{ + const ar_t *ar = arptr; + char ardesc[sizeof ("ARP ") + LIFNAMSIZ]; + + ar_describe(ar, ardesc, sizeof (ardesc), B_FALSE); + mdb_printf("%?p %?p %?p %s\n", addr, ar->ar_wq, ar->ar_arl, ardesc); + return (WALK_NEXT); +} + +/* + * Print out ARP client structures. + */ +/* ARGSUSED2 */ +static int +ar_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + ar_t ar; + + if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { + mdb_printf("%<u>%?s %?s %?s %s%</u>\n", + "AR", "WQ", "ARL", "TYPE"); + } + + if (flags & DCMD_ADDRSPEC) { + if (mdb_vread(&ar, sizeof (ar), addr) == -1) { + mdb_warn("failed to read ar_t at %p", addr); + return (DCMD_ERR); + } + (void) ar_cb(addr, &ar, NULL); + } else { + if (mdb_walk("ar", ar_cb, NULL) == -1) { + mdb_warn("cannot walk ar_t structures"); + return (DCMD_ERR); + } + } + return (DCMD_OK); +} + +/* ARGSUSED2 */ +static int +arl_cb(uintptr_t addr, const void *arlptr, void *dummy) +{ + const arl_t *arl = arlptr; + uchar_t macaddr[ARP_MAX_ADDR_LEN]; + char macstr[ARP_MAX_ADDR_LEN*3]; + char flags[4]; + const char *primstr; + + mdb_printf("%?p ", addr); + if (arl->arl_dlpi_pending == DL_PRIM_INVAL) + mdb_printf("%16s", "--"); + else if ((primstr = mdb_dlpi_prim(arl->arl_dlpi_pending)) != NULL) + mdb_printf("%16s", primstr); + else + mdb_printf("%16x", arl->arl_dlpi_pending); + if (arl->arl_hw_addr_length == 0 || + arl->arl_hw_addr_length > sizeof (macaddr)) { + (void) strcpy(macstr, "--"); + } else if (mdb_vread(macaddr, arl->arl_hw_addr_length, + (uintptr_t)arl->arl_hw_addr) == -1) { + (void) strcpy(macstr, "?"); + } else { + mdb_mac_addr(macaddr, arl->arl_hw_addr_length, macstr, + sizeof (macstr)); + } + + /* Print both the link-layer state and the NOARP flag */ + flags[0] = '\0'; + if (arl->arl_flags & ARL_F_NOARP) + (void) strcat(flags, "N"); + switch (arl->arl_state) { + case ARL_S_DOWN: + (void) strcat(flags, "d"); + break; + case ARL_S_PENDING: + (void) strcat(flags, "P"); + break; + case ARL_S_UP: + (void) strcat(flags, "U"); + break; + default: + (void) strcat(flags, "?"); + break; + } + mdb_printf(" %8d %-3s %-9s %s\n", + mdb_mblk_count(arl->arl_dlpi_deferred), flags, arl->arl_name, + macstr); + return (WALK_NEXT); +} + +/* + * Print out ARP link-layer elements. + */ +/* ARGSUSED2 */ +static int +arl_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + arl_t arl; + + if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { + mdb_printf("%<u>%?s %16s %8s %3s %9s %s%</u>\n", + "ARL", "DLPI REQ", "DLPI CNT", "FLG", "INTERFACE", + "HW ADDR"); + } + + if (flags & DCMD_ADDRSPEC) { + if (mdb_vread(&arl, sizeof (arl), addr) == -1) { + mdb_warn("failed to read arl_t at %p", addr); + return (DCMD_ERR); + } + (void) arl_cb(addr, &arl, NULL); + } else { + if (mdb_walk("arl", arl_cb, NULL) == -1) { + mdb_warn("cannot walk arl_t structures"); + return (DCMD_ERR); + } + } + return (DCMD_OK); +} + +/* ARGSUSED2 */ +static int +ace_cb(uintptr_t addr, const void *aceptr, void *dummy) +{ + const ace_t *ace = aceptr; + uchar_t macaddr[ARP_MAX_ADDR_LEN]; + char macstr[ARP_MAX_ADDR_LEN*3]; + /* The %b format isn't compact enough for long listings */ + static const char ace_flags[] = "SPDRMLdA ofya"; + const char *cp; + char flags[sizeof (ace_flags)], *fp; + int flg; + in_addr_t inaddr, mask; + char addrstr[sizeof ("255.255.255.255/32")]; + + /* Walk the list of flags and produce a string */ + cp = ace_flags; + fp = flags; + for (flg = 1; *cp != '\0'; flg <<= 1, cp++) { + if ((flg & ace->ace_flags) && *cp != ' ') + *fp++ = *cp; + } + *fp = '\0'; + + /* If it's not resolved, then it has no hardware address */ + if (!(ace->ace_flags & ACE_F_RESOLVED) || + ace->ace_hw_addr_length == 0 || + ace->ace_hw_addr_length > sizeof (macaddr)) { + (void) strcpy(macstr, "--"); + } else if (mdb_vread(macaddr, ace->ace_hw_addr_length, + (uintptr_t)ace->ace_hw_addr) == -1) { + (void) strcpy(macstr, "?"); + } else { + mdb_mac_addr(macaddr, ace->ace_hw_addr_length, macstr, + sizeof (macstr)); + } + + /* + * Nothing other than IP uses ARP these days, so we don't try very hard + * here to switch out on ARP protocol type. (Note that ARP protocol + * types are roughly Ethertypes, but are allocated separately at IANA.) + */ + if (ace->ace_proto != IP_ARP_PROTO_TYPE) { + (void) mdb_snprintf(addrstr, sizeof (addrstr), + "Unknown proto %x", ace->ace_proto); + } else if (mdb_vread(&inaddr, sizeof (inaddr), + (uintptr_t)ace->ace_proto_addr) != -1 && + mdb_vread(&mask, sizeof (mask), (uintptr_t)ace->ace_proto_mask) != + -1) { + /* + * If it's the standard host mask, then print it normally. + * Otherwise, use "/n" notation. + */ + if (mask == (in_addr_t)~0) { + (void) mdb_snprintf(addrstr, sizeof (addrstr), "%I", + inaddr); + } else { + (void) mdb_snprintf(addrstr, sizeof (addrstr), "%I/%d", + inaddr, mask == 0 ? 0 : 33 - mdb_ffs(mask)); + } + } else { + (void) strcpy(addrstr, "?"); + } + mdb_printf("%?p %-18s %-8s %s\n", addr, addrstr, flags, macstr); + return (WALK_NEXT); +} + +/* + * Print out ARP cache entry (ace_t) elements. + */ +/* ARGSUSED2 */ +static int +ace_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + ace_t ace; + + if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { + mdb_printf("%<u>%?s %-18s %-8s %s%</u>\n", + "ACE", "PROTOADDR", "FLAGS", "HWADDR"); + } + + if (flags & DCMD_ADDRSPEC) { + if (mdb_vread(&ace, sizeof (ace), addr) == -1) { + mdb_warn("failed to read ace_t at %p", addr); + return (DCMD_ERR); + } + (void) ace_cb(addr, &ace, NULL); + } else { + if (mdb_walk("ace", ace_cb, NULL) == -1) { + mdb_warn("cannot walk ace_t structures"); + return (DCMD_ERR); + } + } + return (DCMD_OK); +} + +/* + * Print an ARP hardware and protocol address pair; used when printing an ARP + * message. + */ +static void +print_arp(char field_id, const uchar_t *buf, const arh_t *arh, uint16_t ptype) +{ + char macstr[ARP_MAX_ADDR_LEN*3]; + in_addr_t inaddr; + + if (arh->arh_hlen == 0) + (void) strcpy(macstr, "(none)"); + else + mdb_mac_addr(buf, arh->arh_hlen, macstr, sizeof (macstr)); + mdb_printf("%?s ar$%cha %s\n", "", field_id, macstr); + if (arh->arh_plen == 0) { + mdb_printf("%?s ar$%cpa (none)\n", "", field_id); + } else if (ptype == IP_ARP_PROTO_TYPE) { + mdb_printf("%?s ar$%cpa (unknown)\n", "", field_id); + } else if (arh->arh_plen == sizeof (in_addr_t)) { + (void) memcpy(&inaddr, buf + arh->arh_hlen, sizeof (inaddr)); + mdb_printf("%?s ar$%cpa %I\n", "", field_id, inaddr); + } else { + mdb_printf("%?s ar$%cpa (malformed IP)\n", "", field_id); + } +} + +/* + * Decode an ARP message and display it. + */ +/* ARGSUSED2 */ +static int +arphdr_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + struct { + arh_t arh; + uchar_t addrs[4 * ARP_MAX_ADDR_LEN]; + } arp; + size_t blen; + uint16_t htype, ptype, op; + const char *cp; + + if (!(flags & DCMD_ADDRSPEC)) { + mdb_warn("address required to print ARP header\n"); + return (DCMD_ERR); + } + + if (mdb_vread(&arp.arh, sizeof (arp.arh), addr) == -1) { + mdb_warn("unable to read ARP header at %p", addr); + return (DCMD_ERR); + } + mdb_nhconvert(&htype, arp.arh.arh_hardware, sizeof (htype)); + mdb_nhconvert(&ptype, arp.arh.arh_proto, sizeof (ptype)); + mdb_nhconvert(&op, arp.arh.arh_operation, sizeof (op)); + + switch (htype) { + case ARPHRD_ETHER: + cp = "Ether"; + break; + case ARPHRD_IEEE802: + cp = "IEEE802"; + break; + case ARPHRD_IB: + cp = "InfiniBand"; + break; + default: + cp = "Unknown"; + break; + } + mdb_printf("%?p: ar$hrd %x (%s)\n", addr, htype, cp); + mdb_printf("%?s ar$pro %x (%s)\n", "", ptype, + ptype == IP_ARP_PROTO_TYPE ? "IP" : "Unknown"); + + switch (op) { + case ARPOP_REQUEST: + cp = "ares_op$REQUEST"; + break; + case ARPOP_REPLY: + cp = "ares_op$REPLY"; + break; + case REVARP_REQUEST: + cp = "arev_op$REQUEST"; + break; + case REVARP_REPLY: + cp = "arev_op$REPLY"; + break; + default: + cp = "Unknown"; + break; + } + mdb_printf("%?s ar$op %d (%s)\n", "", op, cp); + + /* + * Note that we go to some length to attempt to print out the fixed + * header data before trying to decode the variable-length data. This + * is done to maximize the amount of useful information shown when the + * buffer is truncated or otherwise corrupt. + */ + blen = 2 * (arp.arh.arh_hlen + arp.arh.arh_plen); + if (mdb_vread(&arp.addrs, blen, addr + sizeof (arp.arh)) == -1) { + mdb_warn("unable to read ARP body at %p", addr); + return (DCMD_ERR); + } + + print_arp('s', arp.addrs, &arp.arh, ptype); + print_arp('t', arp.addrs + arp.arh.arh_hlen + arp.arh.arh_plen, + &arp.arh, ptype); + return (DCMD_OK); +} + +/* + * Print out an arp command formatted in a reasonable manner. This implements + * the type switch used by ARP. + * + * It could also dump the data that follows the header (using offset and length + * in the various structures), but it currently does not. + */ +/* ARGSUSED2 */ +static int +arpcmd_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + arc_t arc; + const arp_cmd_tbl *tp; + mdb_arg_t subargv; + + if (!(flags & DCMD_ADDRSPEC)) { + mdb_warn("address required to print ARP command\n"); + return (DCMD_ERR); + } + if (mdb_vread(&arc, sizeof (arc), addr) == -1) { + mdb_warn("unable to read arc_t at %p", addr); + return (DCMD_ERR); + } + for (tp = act_list; tp->act_cmd != 0; tp++) + if (tp->act_cmd == arc.arc_cmd) + break; + mdb_printf("%p %s (%s) = ", addr, tp->act_name, tp->act_type); + subargv.a_type = MDB_TYPE_STRING; + subargv.a_un.a_str = tp->act_type; + if (mdb_call_dcmd("print", addr, DCMD_ADDRSPEC, 1, &subargv) == -1) + return (DCMD_ERR); + else + return (DCMD_OK); +} + +static size_t +mi_osize(const queue_t *q) +{ + /* + * The code in common/inet/mi.c allocates an extra word to store the + * size of the allocation. An mi_o_s is thus a size_t plus an mi_o_s. + */ + struct mi_block { + size_t mi_nbytes; + struct mi_o_s mi_o; + } m; + + if (mdb_vread(&m, sizeof (m), (uintptr_t)q->q_ptr - sizeof (m)) != -1) + return (m.mi_nbytes - sizeof (m)); + + return (0); +} + +/* + * This is called when ::stream is used and an ARP module is seen on the + * stream. Determine what sort of ARP usage is involved and show an + * appropriate message. + */ +static void +arp_qinfo(const queue_t *qp, char *buf, size_t nbytes) +{ + size_t size = mi_osize(qp); + ar_t ar; + + if (size != sizeof (ar_t)) + return; + if (mdb_vread(&ar, sizeof (ar), (uintptr_t)qp->q_ptr) == -1) + return; + ar_describe(&ar, buf, nbytes, B_TRUE); +} + +static uintptr_t +arp_rnext(const queue_t *q) +{ + size_t size = mi_osize(q); + ar_t ar; + + if (size == sizeof (ar_t) && mdb_vread(&ar, sizeof (ar), + (uintptr_t)q->q_ptr) != -1) + return ((uintptr_t)ar.ar_rq); + + return (NULL); +} + +static uintptr_t +arp_wnext(const queue_t *q) +{ + size_t size = mi_osize(q); + ar_t ar; + + if (size == sizeof (ar_t) && mdb_vread(&ar, sizeof (ar), + (uintptr_t)q->q_ptr) != -1) + return ((uintptr_t)ar.ar_wq); + + return (NULL); +} + +static const mdb_dcmd_t dcmds[] = { + { "ar", "?", "display ARP client streams", ar_cmd, NULL }, + { "arl", "?", "display ARP link layers", arl_cmd, NULL }, + { "ace", "?", "display ARP cache entries", ace_cmd, NULL }, + { "arphdr", ":", "display an ARP header", arphdr_cmd, NULL }, + { "arpcmd", ":", "display an ARP command", arpcmd_cmd, NULL }, + { NULL } +}; + +/* Note: ar_t walker is in genunix.c and net.c; generic MI walker */ +static const mdb_walker_t walkers[] = { + { "arl", "walk list of arl_t links", + arl_walk_init, arl_walk_step, NULL }, + { "ace", "walk list of ace_t entries", + ace_walk_init, ace_walk_step, ace_walk_fini }, + { NULL } +}; + +static const mdb_qops_t arp_qops = { arp_qinfo, arp_rnext, arp_wnext }; +static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers }; + +const mdb_modinfo_t * +_mdb_init(void) +{ + GElf_Sym sym; + + if (mdb_lookup_by_obj("arp", "winit", &sym) == 0) + mdb_qops_install(&arp_qops, (uintptr_t)sym.st_value); + + return (&modinfo); +} + +void +_mdb_fini(void) +{ + GElf_Sym sym; + + if (mdb_lookup_by_obj("arp", "winit", &sym) == 0) + mdb_qops_remove(&arp_qops, (uintptr_t)sym.st_value); +} diff --git a/usr/src/cmd/mdb/common/modules/ip/ip.c b/usr/src/cmd/mdb/common/modules/ip/ip.c index 451a78aa24..8320fee2a5 100644 --- a/usr/src/cmd/mdb/common/modules/ip/ip.c +++ b/usr/src/cmd/mdb/common/modules/ip/ip.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -869,7 +868,7 @@ _mdb_init(void) { GElf_Sym sym; - if (mdb_lookup_by_obj("ip", "winit", &sym) == 0) + if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0) mdb_qops_install(&ip_qops, (uintptr_t)sym.st_value); return (&modinfo); @@ -880,6 +879,6 @@ _mdb_fini(void) { GElf_Sym sym; - if (mdb_lookup_by_obj("ip", "winit", &sym) == 0) + if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0) mdb_qops_remove(&ip_qops, (uintptr_t)sym.st_value); } diff --git a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c index e5498db180..869c58c6e0 100644 --- a/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c +++ b/usr/src/cmd/mdb/common/modules/mdb_ks/mdb_ks.c @@ -53,6 +53,7 @@ #include <sys/ddi_impldefs.h> #include <sys/refstr_impl.h> #include <sys/cpuvar.h> +#include <sys/dlpi.h> #include <errno.h> #include <vm/seg_vn.h> @@ -1484,3 +1485,105 @@ mdb_read_refstr(uintptr_t refstr_addr, char *str, size_t nbytes) return (mdb_readstr(str, nbytes, (uintptr_t)r->rs_string)); } + +/* + * Chase an mblk list by b_next and return the length. + */ +int +mdb_mblk_count(const mblk_t *mb) +{ + int count; + mblk_t mblk; + + if (mb == NULL) + return (0); + + count = 1; + while (mb->b_next != NULL) { + count++; + if (mdb_vread(&mblk, sizeof (mblk), (uintptr_t)mb->b_next) == + -1) + break; + mb = &mblk; + } + return (count); +} + +/* + * Write the given MAC address as a printable string in the usual colon- + * separated format. Assumes that buflen is at least 2. + */ +void +mdb_mac_addr(const uint8_t *addr, size_t alen, char *buf, size_t buflen) +{ + int slen; + + if (alen == 0 || buflen < 4) { + (void) strcpy(buf, "?"); + return; + } + for (;;) { + /* + * If there are more MAC address bytes available, but we won't + * have any room to print them, then add "..." to the string + * instead. See below for the 'magic number' explanation. + */ + if ((alen == 2 && buflen < 6) || (alen > 2 && buflen < 7)) { + (void) strcpy(buf, "..."); + break; + } + slen = mdb_snprintf(buf, buflen, "%02x", *addr++); + buf += slen; + if (--alen == 0) + break; + *buf++ = ':'; + buflen -= slen + 1; + /* + * At this point, based on the first 'if' statement above, + * either alen == 1 and buflen >= 3, or alen > 1 and + * buflen >= 4. The first case leaves room for the final "xx" + * number and trailing NUL byte. The second leaves room for at + * least "...". Thus the apparently 'magic' numbers chosen for + * that statement. + */ + } +} + +/* + * Produce a string that represents a DLPI primitive, or NULL if no such string + * is possible. + */ +const char * +mdb_dlpi_prim(int prim) +{ + switch (prim) { + case DL_INFO_REQ: return ("DL_INFO_REQ"); + case DL_INFO_ACK: return ("DL_INFO_ACK"); + case DL_ATTACH_REQ: return ("DL_ATTACH_REQ"); + case DL_DETACH_REQ: return ("DL_DETACH_REQ"); + case DL_BIND_REQ: return ("DL_BIND_REQ"); + case DL_BIND_ACK: return ("DL_BIND_ACK"); + case DL_UNBIND_REQ: return ("DL_UNBIND_REQ"); + case DL_OK_ACK: return ("DL_OK_ACK"); + case DL_ERROR_ACK: return ("DL_ERROR_ACK"); + case DL_ENABMULTI_REQ: return ("DL_ENABMULTI_REQ"); + case DL_DISABMULTI_REQ: return ("DL_DISABMULTI_REQ"); + case DL_PROMISCON_REQ: return ("DL_PROMISCON_REQ"); + case DL_PROMISCOFF_REQ: return ("DL_PROMISCOFF_REQ"); + case DL_UNITDATA_REQ: return ("DL_UNITDATA_REQ"); + case DL_UNITDATA_IND: return ("DL_UNITDATA_IND"); + case DL_UDERROR_IND: return ("DL_UDERROR_IND"); + case DL_PHYS_ADDR_REQ: return ("DL_PHYS_ADDR_REQ"); + case DL_PHYS_ADDR_ACK: return ("DL_PHYS_ADDR_ACK"); + case DL_SET_PHYS_ADDR_REQ: return ("DL_SET_PHYS_ADDR_REQ"); + case DL_NOTIFY_REQ: return ("DL_NOTIFY_REQ"); + case DL_NOTIFY_ACK: return ("DL_NOTIFY_ACK"); + case DL_NOTIFY_IND: return ("DL_NOTIFY_IND"); + case DL_CAPABILITY_REQ: return ("DL_CAPABILITY_REQ"); + case DL_CAPABILITY_ACK: return ("DL_CAPABILITY_ACK"); + case DL_CONTROL_REQ: return ("DL_CONTROL_REQ"); + case DL_CONTROL_ACK: return ("DL_CONTROL_ACK"); + case DL_PASSIVE_REQ: return ("DL_PASSIVE_REQ"); + default: return (NULL); + } +} diff --git a/usr/src/cmd/mdb/intel/amd64/arp/Makefile b/usr/src/cmd/mdb/intel/amd64/arp/Makefile new file mode 100644 index 0000000000..82804206f8 --- /dev/null +++ b/usr/src/cmd/mdb/intel/amd64/arp/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = arp.so +MDBTGT = kvm + +MODSRCS = arp.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.amd64 +include ../../../Makefile.module diff --git a/usr/src/cmd/mdb/intel/ia32/arp/Makefile b/usr/src/cmd/mdb/intel/ia32/arp/Makefile new file mode 100644 index 0000000000..b64c3de8ea --- /dev/null +++ b/usr/src/cmd/mdb/intel/ia32/arp/Makefile @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = arp.so +MDBTGT = kvm + +MODSRCS = arp.c + +include ../../../../Makefile.cmd +include ../../Makefile.ia32 +include ../../../Makefile.module diff --git a/usr/src/cmd/mdb/sparc/v9/arp/Makefile b/usr/src/cmd/mdb/sparc/v9/arp/Makefile new file mode 100644 index 0000000000..0e0b290b84 --- /dev/null +++ b/usr/src/cmd/mdb/sparc/v9/arp/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = arp.so +MDBTGT = kvm + +MODSRCS = arp.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.sparcv9 +include ../../../Makefile.module diff --git a/usr/src/lib/libc/port/gen/ffs.c b/usr/src/common/util/ffs.c index 19da6ceb22..b37c661b82 100644 --- a/usr/src/lib/libc/port/gen/ffs.c +++ b/usr/src/common/util/ffs.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,34 +18,39 @@ * * CDDL HEADER END */ + /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" -/* Copyright (c) 1988 AT&T */ -/* All Rights Reserved */ - +/* + * Common implementation of ffs for kernel, mdb, and libc. Note that mdb + * renames ffs into mdb_ffs to avoid user-space clashes with the signature of + * ffs(3C). + */ +#if defined(_KERNEL) || defined(ffs) +#include <sys/int_types.h> +#define arg_t uintmax_t +#else #pragma weak ffs = _ffs - +#define arg_t int #include "synonyms.h" -#include <sys/types.h> -#include <string.h> +#endif int -ffs(int field) +ffs(arg_t bits) { - int idx = 1; + int i; - if (field == 0) + if (bits == 0) return (0); - for (;;) { - if (field & 1) - return (idx); - field >>= 1; - ++idx; + for (i = 1; ; i++, bits >>= 1) { + if (bits & 1) + break; } + return (i); } diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index 3e758a94b2..4293c30a14 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -90,6 +90,7 @@ COMOBJS= \ bcopy.o \ bsearch.o \ bzero.o \ + ffs.o \ qsort.o \ strtol.o \ strtoul.o @@ -388,7 +389,6 @@ PORTGEN= \ fattach.o \ fdetach.o \ fdopendir.o \ - ffs.o \ fmtmsg.o \ ftime.o \ ftok.o \ diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index 9560631ae8..d90e6d05f8 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -88,6 +88,7 @@ COMOBJS= \ bcopy.o \ bsearch.o \ bzero.o \ + ffs.o \ qsort.o \ strtol.o \ strtoul.o @@ -417,7 +418,6 @@ PORTGEN= \ fattach.o \ fdetach.o \ fdopendir.o \ - ffs.o \ fmtmsg.o \ ftime.o \ ftok.o \ diff --git a/usr/src/lib/libc/sparc/Makefile b/usr/src/lib/libc/sparc/Makefile index e7f51d287d..2d23d8087c 100644 --- a/usr/src/lib/libc/sparc/Makefile +++ b/usr/src/lib/libc/sparc/Makefile @@ -107,6 +107,7 @@ COMOBJS= \ bcopy.o \ bzero.o \ bsearch.o \ + ffs.o \ memccpy.o \ qsort.o \ strtol.o \ @@ -440,7 +441,6 @@ PORTGEN= \ fattach.o \ fdetach.o \ fdopendir.o \ - ffs.o \ fmtmsg.o \ ftime.o \ ftok.o \ diff --git a/usr/src/lib/libc/sparcv9/Makefile b/usr/src/lib/libc/sparcv9/Makefile index 81de2b2bb9..df5eb2f5e3 100644 --- a/usr/src/lib/libc/sparcv9/Makefile +++ b/usr/src/lib/libc/sparcv9/Makefile @@ -112,6 +112,7 @@ COMOBJS= \ bcopy.o \ bsearch.o \ bzero.o \ + ffs.o \ memccpy.o \ qsort.o \ strtol.o \ @@ -405,7 +406,6 @@ PORTGEN= \ fattach.o \ fdetach.o \ fdopendir.o \ - ffs.o \ fmtmsg.o \ ftime.o \ ftok.o \ diff --git a/usr/src/lib/libdhcpagent/common/dhcpagent_ipc.h b/usr/src/lib/libdhcpagent/common/dhcpagent_ipc.h index 64df2431de..b509917dff 100644 --- a/usr/src/lib/libdhcpagent/common/dhcpagent_ipc.h +++ b/usr/src/lib/libdhcpagent/common/dhcpagent_ipc.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -170,6 +169,7 @@ typedef enum { INIT, /* nothing done yet */ SELECTING, /* sent DISCOVER, waiting for OFFERs */ REQUESTING, /* sent REQUEST, waiting for ACK/NAK */ + PRE_BOUND, /* have ACK, setting up interface */ BOUND, /* have a valid lease */ RENEWING, /* have lease, but trying to renew */ REBINDING, /* have lease, but trying to rebind */ diff --git a/usr/src/lib/libdhcpagent/common/dhcpagent_util.c b/usr/src/lib/libdhcpagent/common/dhcpagent_util.c index 18189a14cf..eab4a98edb 100644 --- a/usr/src/lib/libdhcpagent/common/dhcpagent_util.c +++ b/usr/src/lib/libdhcpagent/common/dhcpagent_util.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -61,6 +60,7 @@ dhcp_state_to_string(DHCPSTATE state) "INIT", "SELECTING", "REQUESTING", + "PRE_BOUND", "BOUND", "RENEWING", "REBINDING", diff --git a/usr/src/lib/libinetcfg/Makefile.com b/usr/src/lib/libinetcfg/Makefile.com index 1eb8a4aee4..d0d667091a 100644 --- a/usr/src/lib/libinetcfg/Makefile.com +++ b/usr/src/lib/libinetcfg/Makefile.com @@ -18,7 +18,6 @@ # # CDDL HEADER END # -# # Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -28,8 +27,7 @@ LIBRARY = libinetcfg.a VERS = .1 OBJECTS = inetcfg.o \ - inetcfg_nic.o \ - inetcfg_dad.o + inetcfg_nic.o include ../../Makefile.lib @@ -45,11 +43,6 @@ $(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) CFLAGS += $(CCVERBOSE) CPPFLAGS += -I$(SRCDIR) -D_REENTRANT -# -# Turn off argument list checking because inetcfg_dad.c is written using -# libxnet and the rest is compiled to use libsocket (yikes!) -# -LINTFLAGS += -erroff=E_INCONS_ARG_DECL2 -erroff=E_INCONS_ARG_USED2 .KEEP_STATE: diff --git a/usr/src/lib/libinetcfg/common/inetcfg.c b/usr/src/lib/libinetcfg/common/inetcfg.c index 5a79b36cc8..62b118b894 100644 --- a/usr/src/lib/libinetcfg/common/inetcfg.c +++ b/usr/src/lib/libinetcfg/common/inetcfg.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -37,15 +36,14 @@ #include <sys/sockio.h> #include <sys/types.h> #include <sys/socket.h> +#include <net/route.h> #include <netinet/in.h> #include <inet/ip.h> #include <arpa/inet.h> -#include <netdb.h> #include <libintl.h> #include <inetcfg.h> #include "inetcfg_nic.h" -#include "inetcfg_dad.h" #define ICFG_FAMILY(handle) handle->ifh_interface.if_protocol @@ -60,6 +58,12 @@ #define ICFG_LOGICAL_SEP ':' /* + * Maximum amount of time (in milliseconds) to wait for Duplicate Address + * Detection to complete in the kernel. + */ +#define DAD_WAIT_TIME 5000 + +/* * Note: must be kept in sync with error codes in <inetcfg.h> */ static char *errmsgs[ICFG_NERR] = { @@ -670,13 +674,67 @@ icfg_get_tunnel_upper(icfg_handle_t handle, int *protocol) } /* + * Any time that flags are changed on an interface where either the new or the + * existing flags have IFF_UP set, we'll get at least one RTM_IFINFO message to + * announce the flag status. Typically, there are two such messages: one + * saying that the interface is going down, and another saying that it's coming + * back up. + * + * We wait here for that second message, which can take one of two forms: + * either IFF_UP or IFF_DUPLICATE. If something's amiss with the kernel, + * though, we don't wait forever. (Note that IFF_DUPLICATE is a high-order + * bit, and we can't see it in the routing socket messages.) + */ +static int +dad_wait(icfg_handle_t handle, int rtsock) +{ + struct pollfd fds[1]; + union { + struct if_msghdr ifm; + char buf[1024]; + } msg; + int index; + int retv; + uint64_t flags; + hrtime_t starttime, now; + + fds[0].fd = rtsock; + fds[0].events = POLLIN; + fds[0].revents = 0; + + if ((retv = icfg_get_index(handle, &index)) != ICFG_SUCCESS) + return (retv); + + starttime = gethrtime(); + for (;;) { + now = gethrtime(); + now = (now - starttime) / 1000000; + if (now >= DAD_WAIT_TIME) + break; + if (poll(fds, 1, DAD_WAIT_TIME - (int)now) <= 0) + break; + if (read(rtsock, &msg, sizeof (msg)) <= 0) + break; + if (msg.ifm.ifm_type != RTM_IFINFO) + continue; + /* Note that ifm_index is just 16 bits */ + if (index == msg.ifm.ifm_index && (msg.ifm.ifm_flags & IFF_UP)) + return (ICFG_SUCCESS); + if ((retv = icfg_get_flags(handle, &flags)) != ICFG_SUCCESS) + return (retv); + if (flags & IFF_DUPLICATE) + return (ICFG_DAD_FOUND); + } + return (ICFG_DAD_FAILED); +} + +/* * Sets the flags for the interface represented by the 'handle' * argument to the value contained in the 'flags' argument. * - * If the interface is an IPv6 interface and the new flags value - * would transition the interface from "down" to "up", then - * duplicate address detection is performed and succeeds only if - * the no duplicate address is detected. + * If the new flags value will transition the interface from "down" to "up," + * then duplicate address detection is performed by the kernel. This routine + * waits to get the outcome of that test. * * Returns: ICFG_SUCCESS, ICFG_DAD_FOUND, ICFG_DAD_FAILED or ICFG_FAILURE. */ @@ -686,48 +744,39 @@ icfg_set_flags(icfg_handle_t handle, uint64_t flags) struct lifreq lifr; uint64_t oflags; int ret; + int rtsock; (void) strlcpy(lifr.lifr_name, handle->ifh_interface.if_name, sizeof (lifr.lifr_name)); lifr.lifr_addr.ss_family = ICFG_FAMILY(handle); + if ((ret = icfg_get_flags(handle, &oflags)) != ICFG_SUCCESS) + return (ret); + if (oflags == flags) + return (ICFG_SUCCESS); + /* - * If we are transitioning an IPv6 interface from being down - * to being up and a local address is set, then we must perform - * duplicate address detection. + * Any time flags are changed on an interface that has IFF_UP set, + * you'll get a routing socket message. We care about the status, + * though, only when the new flags are marked "up." */ - if ((ICFG_FAMILY(handle) == AF_INET6) && - (!(flags & IFF_NOLOCAL)) && (flags & IFF_UP)) { - /* - * Get the old flags - */ - if ((ret = icfg_get_flags(handle, &oflags)) != ICFG_SUCCESS) { - return (ret); - } - - if (!(oflags & IFF_UP)) { - struct sockaddr_in6 *sin6; - - if (ioctl(handle->ifh_sock, SIOCGLIFADDR, - (caddr_t)&lifr) < 0) { - return (ICFG_FAILURE); - } - - sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; - - ret = dad_test(handle, oflags, sin6); - if (ret != ICFG_SUCCESS) { - return (ret); - } - } - } + rtsock = (flags & IFF_UP) ? + socket(PF_ROUTE, SOCK_RAW, ICFG_FAMILY(handle)) : -1; lifr.lifr_flags = flags; if (ioctl(handle->ifh_sock, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { + if (rtsock != -1) + (void) close(rtsock); return (ICFG_FAILURE); } - return (ICFG_SUCCESS); + if (rtsock == -1) { + return (ICFG_SUCCESS); + } else { + ret = dad_wait(handle, rtsock); + (void) close(rtsock); + return (ret); + } } /* @@ -945,6 +994,7 @@ icfg_set_addr(icfg_handle_t handle, const struct sockaddr *addr, struct lifreq lifr; uint64_t flags; int ret; + int rtsock; (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); if ((ret = to_sockaddr_storage(ICFG_FAMILY(handle), addr, addrlen, @@ -953,32 +1003,33 @@ icfg_set_addr(icfg_handle_t handle, const struct sockaddr *addr, } /* - * Need to do duplicate address detection for IPv6 + * Need to do check on duplicate address detection results if the + * interface is up. */ - if (ICFG_FAMILY(handle) == AF_INET6) { - if ((ret = icfg_get_flags(handle, &flags)) != ICFG_SUCCESS) { - return (ret); - } - - if (flags & IFF_UP) { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &lifr.lifr_addr; - ret = dad_test(handle, flags, sin6); - if (ret != ICFG_SUCCESS) { - return (ret); - } - } + if ((ret = icfg_get_flags(handle, &flags)) != ICFG_SUCCESS) { + return (ret); } + rtsock = (flags & IFF_UP) ? + socket(PF_ROUTE, SOCK_RAW, ICFG_FAMILY(handle)) : -1; + (void) strlcpy(lifr.lifr_name, handle->ifh_interface.if_name, sizeof (lifr.lifr_name)); lifr.lifr_addr.ss_family = ICFG_FAMILY(handle); if (ioctl(handle->ifh_sock, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { + if (rtsock != -1) + (void) close(rtsock); return (ICFG_FAILURE); } - return (ICFG_SUCCESS); + if (rtsock == -1) { + return (ICFG_SUCCESS); + } else { + ret = dad_wait(handle, rtsock); + (void) close(rtsock); + return (ret); + } } /* diff --git a/usr/src/lib/libinetcfg/common/inetcfg_dad.c b/usr/src/lib/libinetcfg/common/inetcfg_dad.c deleted file mode 100644 index 926f116ee7..0000000000 --- a/usr/src/lib/libinetcfg/common/inetcfg_dad.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * This module uses the ancillary data feature that is made available - * though the UNIX 98 standards version of the Socket interface. This - * interface is normally accessed via libxnet. However, to use libxnet, - * this library would have to be compiled with _XOPEN_SOURCE=500 and - * __EXTENSIONS__. Unfortunately, this makes linting both the library - * and its consumers impractical. Therefore, this module is itself compiled - * for use with the UNIX 98 version of the Socket interface and the - * xnet versions of the Socket interfaces are called directly. - * Hopefully, our Socket implementation will one day support the ancillary - * data feature directly and this hack will no longer be needed. In the - * meantime, changes to this file should be made with the knowledge that the - * data types used by this module may differ in defintion fron the same data - * types in the other modules. - */ -#define _XOPEN_SOURCE 500 -#define __EXTENSIONS__ 1 - -#include <stdio.h> -#include <errno.h> -#include <assert.h> -#include <string.h> -#include <unistd.h> -#include <stropts.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <netinet/icmp6.h> -#include <netinet/ip.h> -#include <inetcfg.h> - -#define IPV6_MAX_HOPS 255 - -static int dup_addr_detect_transmits = 1; - -static struct in6_addr all_nodes_mcast = { { 0xff, 0x2, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x1 } }; - -static struct in6_addr solicited_prefix = { { 0xff, 0x2, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x1, - 0xff, 0x0, 0x0, 0x0 } }; - -extern int __xnet_socket(int family, int type, int protocol); -extern int __xnet_recvmsg(int sock, struct msghdr *msg, int flags); -extern int __xnet_sendto(int sock, const void *buf, size_t len, - int flags, const struct sockaddr *addr, socklen_t addrlen); - -/* - * Verifies that all options have a non-zero length and that - * the options fit within the total length of the packet (optlen). - * - * Returns: _B_TRUE if valid, _B_FALSE otherwise. - */ -static boolean_t -dad_verify_optlen(struct nd_opt_hdr *opt, ssize_t optlen) -{ - assert(opt != NULL); - assert(optlen > 0); - - while (optlen > 0) { - if ((opt->nd_opt_len == 0)) { - return (_B_FALSE); - } - optlen -= 8 * opt->nd_opt_len; - if (optlen < 0) { - return (_B_FALSE); - } - opt = (struct nd_opt_hdr *)((char *)opt + - 8 * opt->nd_opt_len); - } - return (_B_TRUE); -} - -/* - * Returns a pointer to the specified option buffer. - * - * Returns: A pointer to the option buffer or NULL if not found. - */ -static void * -dad_find_ancillary(struct msghdr *msg, int cmsg_type) -{ - struct cmsghdr *cmsg; - - assert(msg != NULL); - - for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(msg, cmsg)) { - if (cmsg->cmsg_level == IPPROTO_IPV6 && - cmsg->cmsg_type == cmsg_type) { - return (CMSG_DATA(cmsg)); - } - } - - return (NULL); -} - -/* - * Receives an ICMP packet and tests it to see if it indicates that - * testaddr is a duplicate address. This routine returns ICFG_SUCCESS - * if no duplicate address is detected. If an unexpected error is - * encountered receiving the packet, then ICFG_FAILURE is returned. - * And of course ICFG_DAD_FOUND is returned if a duplicate address - * is detected. - * - * Returns: ICFG_SUCCESS, ICFG_FAILURE or ICFG_DAD_FOUND. - */ -static int -dad_receive(int sock, struct sockaddr_in6 *testaddr, int ifindex) -{ - struct sockaddr_in6 from; - struct icmp6_hdr *icmp; - struct nd_neighbor_solicit *ns; - struct nd_neighbor_advert *na; - static uint64_t in_packet[(IP_MAXPACKET + 1)/8]; - static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8]; - ssize_t len; - struct msghdr msg; - struct iovec iov; - void *opt; - uint_t hoplimit; - struct in6_addr dst; - int rcv_ifindex; - - iov.iov_base = (char *)in_packet; - iov.iov_len = sizeof (in_packet); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_name = (struct sockaddr *)&from; - msg.msg_namelen = sizeof (from); - msg.msg_control = ancillary_data; - msg.msg_controllen = sizeof (ancillary_data); - - if ((len = __xnet_recvmsg(sock, &msg, 0)) < 0) { - /* Error was encountered - return failure */ - return (ICFG_FAILURE); - } - - if (len == 0) { - /* Ignore zero length messages */ - return (ICFG_SUCCESS); - } - - if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) { - /* Ignore packets > 64k or control buffers that don't fit */ - return (ICFG_SUCCESS); - } - - icmp = (struct icmp6_hdr *)in_packet; - - if (len < ICMP6_MINLEN) { - /* Ignore packet if it is too small to be icmp */ - return (ICFG_SUCCESS); - } - - opt = dad_find_ancillary(&msg, IPV6_HOPLIMIT); - if (opt == NULL) { - /* Unknown hoplimit - must drop */ - return (ICFG_SUCCESS); - } - hoplimit = *(uint_t *)opt; - - opt = dad_find_ancillary(&msg, IPV6_PKTINFO); - if (opt == NULL) { - /* Unknown destination address - must drop */ - return (ICFG_SUCCESS); - } - dst = ((struct in6_pktinfo *)opt)->ipi6_addr; - rcv_ifindex = ((struct in6_pktinfo *)opt)->ipi6_ifindex; - - opt = dad_find_ancillary(&msg, IPV6_RTHDR); - if (opt != NULL) { - /* Can't allow routing headers in ND messages */ - return (ICFG_SUCCESS); - } - - /* - * We're only interested in neighbor solicitations (someone - * else soliciting for the same address) and advertisements. - * We must verify each. In either case, we assume that the - * kernel verified the AH (if present) and the ICMP checksum. - */ - switch (icmp->icmp6_type) { - case ND_NEIGHBOR_SOLICIT: - - if (hoplimit != IPV6_MAX_HOPS) { - /* Packet came from different subnet */ - return (ICFG_SUCCESS); - } - - if (icmp->icmp6_code != 0) { - /* There are no codes for neighbor solicitations */ - return (ICFG_SUCCESS); - } - - if (len < sizeof (struct nd_neighbor_solicit)) { - /* Packet is too small */ - return (ICFG_SUCCESS); - } - - ns = (struct nd_neighbor_solicit *)icmp; - if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target)) { - /* NS target was multicast */ - return (ICFG_SUCCESS); - } - - if (len > sizeof (struct nd_neighbor_solicit)) { - /* - * A neighbor solicitation packet has the form - * of a header directly followed by options. - */ - if (!dad_verify_optlen((struct nd_opt_hdr *)&ns[1], - len - sizeof (struct nd_neighbor_solicit))) { - /* Invalid options */ - return (ICFG_SUCCESS); - } - } - - if (!IN6_IS_ADDR_UNSPECIFIED(&from.sin6_addr)) { - /* Sender is doing address resolution */ - return (ICFG_SUCCESS); - } - - if (rcv_ifindex != ifindex) { - /* Packet not received on test interface */ - return (ICFG_SUCCESS); - } - - if (!IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr, - &ns->nd_ns_target)) { - /* NS wasn't for test address */ - return (ICFG_SUCCESS); - } - - return (ICFG_DAD_FOUND); - - case ND_NEIGHBOR_ADVERT: - - if (hoplimit != IPV6_MAX_HOPS) { - /* Packet came from different subnet */ - return (ICFG_SUCCESS); - } - - if (icmp->icmp6_code != 0) { - /* There are no codes for neighbor advertisements */ - return (ICFG_SUCCESS); - } - - if (len < sizeof (struct nd_neighbor_advert)) { - /* Packet is too small */ - return (ICFG_SUCCESS); - } - - na = (struct nd_neighbor_advert *)icmp; - if (IN6_IS_ADDR_MULTICAST(&na->nd_na_target)) { - /* NA target was multicast */ - return (ICFG_SUCCESS); - } - - if (IN6_IS_ADDR_MULTICAST(&dst) && - (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { - /* Dest was multicast and solicited flag not zero */ - return (ICFG_SUCCESS); - } - - if (len > sizeof (struct nd_neighbor_advert)) { - /* - * A neighbor advertisement packet has the form - * of a header directly followed by options. - */ - if (!dad_verify_optlen((struct nd_opt_hdr *)&na[1], - len - sizeof (struct nd_neighbor_advert))) { - return (ICFG_SUCCESS); - } - } - - if (!IN6_ARE_ADDR_EQUAL(&testaddr->sin6_addr, - &na->nd_na_target)) { - /* NA wasn't for test address */ - return (ICFG_SUCCESS); - } - return (ICFG_DAD_FOUND); - - default: - return (ICFG_SUCCESS); - } -} - -/* - * Sends a DAD neighbor solicitation packet. Assumes the socket has been - * configured correctly (i.e., an IPV6_UNSPEC_SRC and an IPV6_BOUND_IF have - * been done by the caller, etc.). - * - * Returns: ICFG_SUCCESS or ICFG_FAILURE. - */ -static int -dad_send_probe(int sock, struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc) -{ - static uint64_t outpack[(IP_MAXPACKET + 1)/8]; - struct nd_neighbor_solicit *ns = (struct nd_neighbor_solicit *)outpack; - int packetlen = 0; - int cc; - - ns->nd_ns_type = ND_NEIGHBOR_SOLICIT; - ns->nd_ns_code = 0; - ns->nd_ns_cksum = 0; - ns->nd_ns_reserved = 0; - ns->nd_ns_target = testaddr->sin6_addr; - packetlen = sizeof (struct nd_neighbor_solicit); - cc = __xnet_sendto(sock, (char *)outpack, packetlen, 0, - (struct sockaddr *)solicited_mc, sizeof (*solicited_mc)); - if (cc != packetlen) { - return (ICFG_FAILURE); - } - - return (ICFG_SUCCESS); -} - -/* - * Build a solicited node multicast address for a given address. - */ -static void -in6_solmulti_addr(struct in6_addr *addr, struct in6_addr *multi) -{ - int i; - - *multi = solicited_prefix; - for (i = 13; i < 16; i++) { - multi->s6_addr[i] = addr->s6_addr[i]; - } -} - -/* - * Loops sending DAD probes and polling for responses. - * - * Returns: ICFG_SUCCESS, ICFG_FAILURE, ICFG_DAD_FOUND or ICFG_DAD_FAILED. - */ -static int -dad_loop(int sock, struct sockaddr_in6 *testaddr, - struct sockaddr_in6 *solicited_mc, int ifindex, int retrans_timer) -{ - int time_left; /* In milliseconds */ - struct timeval starttime; - struct timeval curtime; - struct pollfd fds; - int i; - int ret; - - /* - * Perform duplicate address detection sequence - * 1. Send a neighbor solicitation with an unspecified source - * address to the solicited node MC address with the testaddr - * being the target. - * 2. Wait for up to retrans_timer milliseconds for either a - * neighbor advertisement (sent to all-nodes) or a DAD neighbor - * solicitation for the testaddr. - * 3. Perform step 1 and 2 dup_addr_detect_transmits times. - */ - for (i = 0; i < dup_addr_detect_transmits; i++) { - ret = dad_send_probe(sock, testaddr, solicited_mc); - if (ret != ICFG_SUCCESS) { - return (ret); - } - - /* - * Track time to make sure total wait is retrans_timer - * even though random packet will awake poll. - */ - (void) gettimeofday(&starttime, NULL); - /* CONSTCOND */ - while (1) { - (void) gettimeofday(&curtime, NULL); - time_left = retrans_timer - - (curtime.tv_sec - starttime.tv_sec) * 1000 - - (curtime.tv_usec - starttime.tv_usec) / 1000; - - if (time_left <= 0) { - break; - } - fds.fd = sock; - fds.events = POLLIN; - - switch (poll(&fds, 1, time_left)) { - case -1: - return (ICFG_FAILURE); - case 0: - /* Need loop will break */ - break; - case 1: - if (fds.revents & POLLIN) { - ret = dad_receive(sock, testaddr, - ifindex); - if (ret != ICFG_SUCCESS) { - return (ret); - } - } - break; - default: - return (ICFG_DAD_FAILED); - } - } - } - return (ICFG_SUCCESS); -} - -/* - * Configures a socket for DAD. - * - * Returns: ICFG_SUCCESS or ICFG_FAILURE. - */ -static int -dad_configure_socket(int sock, int ifindex, struct sockaddr_in6 *solicited_mc) -{ - struct ipv6_mreq v6mcastr; - int hops = IPV6_MAX_HOPS; - int on = 1; - int off = 0; - - /* - * IPV6_BOUND_PIF prevents load spreading from happening. If we - * just do IPV6_BOUND_IF, the packet can go out on a different - * interface other than "ifindex", if interface is part of - * a group. In that case, we will get back the copy of NS that - * we sent and think it is a duplicate(Switch loops back the - * copy on all interfaces other than the one we sent the packet on). - */ - if (setsockopt(sock, IPPROTO_IPV6, IPV6_BOUND_PIF, (char *)&ifindex, - sizeof (ifindex)) < 0) { - return (ICFG_FAILURE); - } - - if (setsockopt(sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, - (char *)&hops, sizeof (hops)) < 0) { - return (ICFG_FAILURE); - } - - if (setsockopt(sock, IPPROTO_IPV6, IPV6_UNSPEC_SRC, - (char *)&on, sizeof (on)) < 0) { - return (ICFG_FAILURE); - } - - if (setsockopt(sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, - (char *)&off, sizeof (off)) < 0) { - return (ICFG_FAILURE); - } - - /* - * Enable receipt of ancillary data - */ - if (setsockopt(sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, - (char *)&on, sizeof (on)) < 0) { - return (ICFG_FAILURE); - } - if (setsockopt(sock, IPPROTO_IPV6, IPV6_RECVPKTINFO, - (char *)&on, sizeof (on)) < 0) { - return (ICFG_FAILURE); - } - if (setsockopt(sock, IPPROTO_IPV6, IPV6_RECVRTHDR, - (char *)&on, sizeof (on)) < 0) { - return (ICFG_FAILURE); - } - - /* - * Join the solicited node multicast address and all-nodes. - */ - v6mcastr.ipv6mr_multiaddr = solicited_mc->sin6_addr; - v6mcastr.ipv6mr_interface = ifindex; - - if (setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, - (char *)&v6mcastr, sizeof (v6mcastr)) < 0) { - return (ICFG_FAILURE); - } - - v6mcastr.ipv6mr_multiaddr = all_nodes_mcast; - v6mcastr.ipv6mr_interface = ifindex; - - if (setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, - (char *)&v6mcastr, sizeof (v6mcastr)) < 0) { - return (ICFG_FAILURE); - } - return (ICFG_SUCCESS); -} - -/* - * Performs duplicate address detection. - * - * Returns: ICFG_SUCCESS, ICFG_FAILURE, ICFG_DAD_FOUND or ICFG_DAD_FAILED. - * - * Note: the state of the interface name is unchanged. - */ -int -dad_test(icfg_handle_t handle, uint64_t flags, struct sockaddr_in6 *testaddr) -{ - struct sockaddr_in6 solicited_mc; - lif_ifinfo_req_t linkinfo; - int retrans_timer = ND_RETRANS_TIMER; - int ifindex; - int sock; - int syserr = 0; - int restore_ret; - int ret; - - /* - * Check the address assigned to the interface. - * Skip the check if IFF_NOLOCAL, IFF_NONUD, IFF_ANYCAST, or - * IFF_LOOPBACK. Note that IFF_NONUD turns of both NUD and DAD. - * DAD is not possible if not IFF_MULTICAST. - */ - if (flags & (IFF_NOLOCAL|IFF_LOOPBACK|IFF_NONUD|IFF_ANYCAST) || - !(flags & IFF_MULTICAST)) { - return (ICFG_SUCCESS); - } - - /* - * If the address is all zeroes, then just return success. - */ - if (IN6_IS_ADDR_UNSPECIFIED(&testaddr->sin6_addr)) { - return (ICFG_SUCCESS); - } - - /* - * Determine interface index (for IPV6_BOUND_PIF) and - * save the flag values so they can be restored on return. - */ - if ((ret = icfg_get_index(handle, &ifindex)) != ICFG_SUCCESS) { - return (ret); - } - - if ((ret = icfg_get_linkinfo(handle, &linkinfo)) != ICFG_SUCCESS) { - return (ret); - } - - if (linkinfo.lir_reachretrans != 0) { - retrans_timer = linkinfo.lir_reachretrans; - } - - /* - * Set NOLOCAL and UP flags. - * This prevents the use of the interface except when the user binds - * to unspecified IPv6 address, and sends to a link local multicast - * address. - */ - ret = icfg_set_flags(handle, flags | IFF_NOLOCAL | IFF_UP); - if (ret != ICFG_SUCCESS) { - return (ret); - } - - /* - * Extract the address and determine the solicited node multicast - * address to use. - */ - (void) memset(&solicited_mc, 0, sizeof (solicited_mc)); - solicited_mc.sin6_family = AF_INET6; - in6_solmulti_addr(&testaddr->sin6_addr, &solicited_mc.sin6_addr); - - /* - * Get a socket to use to send and receive neighbor solicitations - * for DAD. Also used for ioctls below. - */ - if ((sock = __xnet_socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) < 0) { - syserr = errno; - ret = ICFG_FAILURE; - goto restore; - } - - ret = dad_configure_socket(sock, ifindex, &solicited_mc); - if (ret != ICFG_SUCCESS) { - syserr = errno; - (void) close(sock); - goto restore; - } - - ret = dad_loop(sock, testaddr, &solicited_mc, ifindex, - retrans_timer); - if (ret == ICFG_FAILURE) { - syserr = errno; - } - (void) close(sock); - -restore: - /* Restore flags */ - if ((restore_ret = icfg_set_flags(handle, flags)) != ICFG_SUCCESS) { - if (ret == ICFG_SUCCESS) { - syserr = errno; - ret = restore_ret; - } - } - - if (ret == ICFG_FAILURE) { - errno = syserr; - } - - return (ret); -} diff --git a/usr/src/lib/libinetcfg/common/inetcfg_dad.h b/usr/src/lib/libinetcfg/common/inetcfg_dad.h deleted file mode 100644 index b65fbb8b2a..0000000000 --- a/usr/src/lib/libinetcfg/common/inetcfg_dad.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2002 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _INETCFG_DAD_H -#define _INETCFG_DAD_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <netinet/in.h> -#include <inetcfg.h> - -#ifdef __cplusplus -extern "C" { -#endif - -extern int dad_test(icfg_handle_t, uint64_t, struct sockaddr_in6 *); - -#ifdef __cplusplus -} -#endif - -#endif /* _INETCFG_DAD_H */ diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_i386 b/usr/src/pkgdefs/SUNWmdb/prototype_i386 index 7a1fb04fd3..1fa3d16b03 100644 --- a/usr/src/pkgdefs/SUNWmdb/prototype_i386 +++ b/usr/src/pkgdefs/SUNWmdb/prototype_i386 @@ -47,6 +47,7 @@ f none usr/platform/i86pc/lib/mdb/kvm/pcplusmp.so 555 root sys f none usr/platform/i86pc/lib/mdb/kvm/uppc.so 555 root sys f none usr/platform/i86pc/lib/mdb/kvm/unix.so 555 root sys d none usr/lib/mdb/kvm/amd64 755 root sys +f none usr/lib/mdb/kvm/amd64/arp.so 555 root sys f none usr/lib/mdb/kvm/amd64/audiosup.so 555 root sys f none usr/lib/mdb/kvm/amd64/cpc.so 555 root sys f none usr/lib/mdb/kvm/amd64/crypto.so 555 root sys @@ -71,6 +72,7 @@ f none usr/lib/mdb/kvm/amd64/sppp.so 555 root sys f none usr/lib/mdb/kvm/amd64/ufs.so 555 root sys f none usr/lib/mdb/kvm/amd64/uhci.so 555 root sys f none usr/lib/mdb/kvm/amd64/usba.so 555 root sys +f none usr/lib/mdb/kvm/arp.so 555 root sys f none usr/lib/mdb/kvm/audiosup.so 555 root sys f none usr/lib/mdb/kvm/cpc.so 555 root sys f none usr/lib/mdb/kvm/crypto.so 555 root sys diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_sparc b/usr/src/pkgdefs/SUNWmdb/prototype_sparc index a5b29115a6..f67cdbdcc8 100644 --- a/usr/src/pkgdefs/SUNWmdb/prototype_sparc +++ b/usr/src/pkgdefs/SUNWmdb/prototype_sparc @@ -38,6 +38,7 @@ d none usr/bin/sparcv9 755 root bin f none usr/bin/sparcv9/mdb 555 root bin l none usr/bin/sparcv9/adb=../../../usr/bin/sparcv9/mdb d none usr/lib/mdb/kvm/sparcv9 755 root sys +f none usr/lib/mdb/kvm/sparcv9/arp.so 555 root sys f none usr/lib/mdb/kvm/sparcv9/audiosup.so 555 root sys f none usr/lib/mdb/kvm/sparcv9/cpc.so 555 root sys f none usr/lib/mdb/kvm/sparcv9/crypto.so 555 root sys diff --git a/usr/src/pkgdefs/SUNWmdbr/prototype_i386 b/usr/src/pkgdefs/SUNWmdbr/prototype_i386 index bd9ea07f75..d6a2291873 100644 --- a/usr/src/pkgdefs/SUNWmdbr/prototype_i386 +++ b/usr/src/pkgdefs/SUNWmdbr/prototype_i386 @@ -28,6 +28,7 @@ !include prototype_com d none kernel/kmdb/amd64 755 root sys +f none kernel/kmdb/amd64/arp 555 root sys f none kernel/kmdb/amd64/audiosup 555 root sys f none kernel/kmdb/amd64/cpc 555 root sys f none kernel/kmdb/amd64/crypto 555 root sys @@ -52,6 +53,7 @@ f none kernel/kmdb/amd64/sppp 555 root sys f none kernel/kmdb/amd64/ufs 555 root sys f none kernel/kmdb/amd64/uhci 555 root sys f none kernel/kmdb/amd64/usba 555 root sys +f none kernel/kmdb/arp 555 root sys f none kernel/kmdb/audiosup 555 root sys f none kernel/kmdb/cpc 555 root sys f none kernel/kmdb/crypto 555 root sys diff --git a/usr/src/pkgdefs/SUNWmdbr/prototype_sparc b/usr/src/pkgdefs/SUNWmdbr/prototype_sparc index 154c47733f..4b57aaacbf 100644 --- a/usr/src/pkgdefs/SUNWmdbr/prototype_sparc +++ b/usr/src/pkgdefs/SUNWmdbr/prototype_sparc @@ -28,6 +28,7 @@ !include prototype_com # d none kernel/kmdb/sparcv9 755 root sys +f none kernel/kmdb/sparcv9/arp 555 root sys f none kernel/kmdb/sparcv9/audiosup 555 root sys f none kernel/kmdb/sparcv9/cpc 555 root sys f none kernel/kmdb/sparcv9/crypto 555 root sys diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index fa5d0c132d..ef97e42257 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -131,6 +131,7 @@ GENUNIX_OBJS += \ fdbuffer.o \ fdsync.o \ fem.o \ + ffs.o \ fio.o \ flock.o \ fm.o \ diff --git a/usr/src/uts/common/inet/arp.h b/usr/src/uts/common/inet/arp.h index c773d6354b..71fd056afc 100644 --- a/usr/src/uts/common/inet/arp.h +++ b/usr/src/uts/common/inet/arp.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1992,1997-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -30,10 +29,18 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include <sys/types.h> + #ifdef __cplusplus extern "C" { #endif +/* + * Warning: the interfaces described in this file are private to the + * implementation. They may change at any time without notice and are not + * documented. Do not depend on them. + */ + #define ARP_REQUEST 1 #define ARP_RESPONSE 2 #define RARP_REQUEST 3 @@ -41,52 +48,40 @@ extern "C" { #define AR_IOCTL (((unsigned)'A' & 0xFF)<<8) #define CMD_IN_PROGRESS 0x10000 -/* - * The following ARP commands are private, and not part of a supported - * interface. They are subject to change without notice in any release. - */ + #define AR_ENTRY_ADD (AR_IOCTL + 1) #define AR_ENTRY_DELETE (AR_IOCTL + 2) #define AR_ENTRY_QUERY (AR_IOCTL + 3) -#define AR_XMIT_REQUEST (AR_IOCTL + 4) -#define AR_XMIT_TEMPLATE (AR_IOCTL + 5) #define AR_ENTRY_SQUERY (AR_IOCTL + 6) #define AR_MAPPING_ADD (AR_IOCTL + 7) #define AR_CLIENT_NOTIFY (AR_IOCTL + 8) #define AR_INTERFACE_UP (AR_IOCTL + 9) #define AR_INTERFACE_DOWN (AR_IOCTL + 10) -#define AR_XMIT_RESPONSE (AR_IOCTL + 11) #define AR_INTERFACE_ON (AR_IOCTL + 12) #define AR_INTERFACE_OFF (AR_IOCTL + 13) #define AR_DLPIOP_DONE (AR_IOCTL + 14) -#define AR_ENTRY_LLAQUERY (AR_IOCTL + 15) /* * This is not an ARP command per se, it is used to interface between * ARP and IP during close. */ #define AR_ARP_CLOSING (AR_IOCTL + 16) +#define AR_ARP_EXTEND (AR_IOCTL + 17) -/* - * The following ACE flags are private, and not part of a supported - * interface. They are subject to change without notice in any release. - */ -#define ACE_F_PERMANENT 0x1 -#define ACE_F_PUBLISH 0x2 -#define ACE_F_DYING 0x4 -#define ACE_F_RESOLVED 0x8 +/* Both ace_flags and area_flags; must also modify arp.c in mdb */ +#define ACE_F_PERMANENT 0x0001 +#define ACE_F_PUBLISH 0x0002 +#define ACE_F_DYING 0x0004 +#define ACE_F_RESOLVED 0x0008 /* Using bit mask extraction from target address */ -#define ACE_F_MAPPING 0x10 -#define ACE_F_MYADDR 0x20 /* Strong check for duplicate MACs */ - -/* ARP Cmd Table entry */ -typedef struct arct_s { - pfi_t arct_pfi; - uint32_t arct_cmd; - int arct_min_len; - uint32_t arct_flags; - int arct_priv_req; /* Privilege required for this cmd */ - const char *arct_txt; -} arct_t; +#define ACE_F_MAPPING 0x0010 +#define ACE_F_MYADDR 0x0020 /* IP claims to own this address */ +#define ACE_F_UNVERIFIED 0x0040 /* DAD not yet complete */ +#define ACE_F_AUTHORITY 0x0080 /* check for duplicate MACs */ +#define ACE_F_DEFEND 0x0100 /* single transmit (area_flags only) */ +#define ACE_F_OLD 0x0200 /* should revalidate when IP asks */ +#define ACE_F_FAST 0x0400 /* fast probe enabled */ +#define ACE_F_DELAYED 0x0800 /* rescheduled on arp_defend_rate */ +#define ACE_F_DAD_ABORTED 0x1000 /* DAD was aborted on link down */ /* ARP Command Structures */ @@ -98,12 +93,6 @@ typedef struct ar_cmd_s { } arc_t; /* - * The following ARP command structures are private, and not - * part of a supported interface. They are subject to change - * without notice in any release. - */ - -/* * NOTE: when using area_t for an AR_ENTRY_SQUERY, the area_hw_addr_offset * field isn't what you might think. See comments in ip_multi.c where * the routine ill_create_squery() is called, and also in the routine @@ -196,13 +185,10 @@ typedef struct ar_client_notify_s { } arcn_t; /* Client Notification Codes */ -/* - * The following Client Notification codes are private, and not - * part of a supported interface. They are subject to change - * without notice in any release. - */ #define AR_CN_BOGON 1 #define AR_CN_ANNOUNCE 2 +#define AR_CN_READY 3 /* DAD complete; address usable */ +#define AR_CN_FAILED 4 /* DAD failed; address unusable */ /* ARP Header */ typedef struct arh_s { diff --git a/usr/src/uts/common/inet/arp/arp.c b/usr/src/uts/common/inet/arp/arp.c index fd7d086933..17c81b9513 100644 --- a/usr/src/uts/common/inet/arp/arp.c +++ b/usr/src/uts/common/inet/arp/arp.c @@ -28,8 +28,6 @@ /* AR - Address Resolution Protocol */ -#define ARP_DEBUG - #include <sys/types.h> #include <sys/stream.h> #include <sys/stropts.h> @@ -47,6 +45,9 @@ #include <sys/strsun.h> #include <sys/policy.h> #include <sys/ethernet.h> +#include <sys/zone.h> +#include <sys/random.h> +#include <sys/sdt.h> #include <inet/common.h> #include <inet/optcom.h> @@ -56,24 +57,52 @@ #include <net/if.h> #include <inet/arp.h> #include <netinet/ip6.h> +#include <netinet/arp.h> #include <inet/ip.h> #include <inet/ip_ire.h> +#include <inet/ip_ndp.h> #include <inet/mib2.h> #include <inet/arp_impl.h> -#ifdef ARP_DEBUG -#define arp0dbg(a) printf a -#define arp1dbg(a) if (arp_debug) printf a -#define arp2dbg(a) if (arp_debug > 1) printf a -#define arp3dbg(a) if (arp_debug > 2) printf a -#else -#define arp0dbg(a) /* */ -#define arp1dbg(a) /* */ -#define arp2dbg(a) /* */ -#define arp3dbg(a) /* */ -#endif +/* + * ARP entry life time and design notes + * ------------------------------------ + * + * ARP entries (ACEs) must last at least as long as IP knows about a given + * MAC-IP translation (i.e., as long as the IRE cache entry exists). It's ok + * if the ARP entry lasts longer, but not ok if it is removed before the IP + * entry. The reason for this is that if ARP doesn't have an entry, we will be + * unable to detect the difference between an ARP broadcast that represents no + * change (same, known address of sender) and one that represents a change (new + * address for existing entry). In the former case, we must not notify IP, or + * we can suffer hurricane attack. In the latter case, we must notify IP, or + * IP will drift out of sync with the network. + * + * Note that IP controls the lifetime of entries, not ARP. + * + * We don't attempt to reconfirm aging entries. If the system is no longer + * talking to a given peer, then it doesn't matter if we have the right mapping + * for that peer. It would be possible to send queries on aging entries that + * are active, but this isn't done. + */ + +/* + * This is used when scanning for "old" (least recently broadcast) ACEs. We + * don't want to have to walk the list for every single one, so we gather up + * batches at a time. + */ +#define ACE_RESCHED_LIST_LEN 8 + +typedef struct { + arl_t *art_arl; + uint_t art_naces; + ace_t *art_aces[ACE_RESCHED_LIST_LEN]; +} ace_resched_t; #define ACE_RESOLVED(ace) ((ace)->ace_flags & ACE_F_RESOLVED) +#define ACE_NONPERM(ace) \ + (((ace)->ace_flags & (ACE_F_RESOLVED | ACE_F_PERMANENT)) == \ + ACE_F_RESOLVED) #define AR_DEF_XMIT_INTERVAL 500 /* time in milliseconds */ #define AR_LL_HDR_SLACK 32 /* Leave the lower layer some room */ @@ -82,6 +111,13 @@ #define AR_DRAINING (void *)0x11 /* + * The IPv4 Link Local address space is special; we do extra duplicate checking + * there, as the entire assignment mechanism rests on random numbers. + */ +#define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \ + ((uchar_t *)ptr)[1] == 254) + +/* * Check if the command needs to be enqueued by seeing if there are other * commands ahead of us or if some DLPI response is being awaited. Usually * there would be an enqueued command in the latter case, however if the @@ -94,33 +130,9 @@ (mp->b_prev != AR_DRAINING && (arl->arl_queue != NULL || \ arl->arl_dlpi_pending != DL_PRIM_INVAL)) -/* Ugly check to determine whether the module below is IP */ -#define MODULE_BELOW_IS_IP(q) \ - ((WR(q)->q_next != NULL && WR(q)->q_next->q_next != NULL) && \ - (strcmp(WR(q)->q_next->q_qinfo->qi_minfo->mi_idname, "ip") == 0)) - -/* ARP Cache Entry */ -typedef struct ace_s { - struct ace_s *ace_next; /* Hash chain next pointer */ - struct ace_s **ace_ptpn; /* Pointer to previous next */ - struct arl_s *ace_arl; /* Associated arl */ - uint32_t ace_proto; /* Protocol for this ace */ - uint32_t ace_flags; - uchar_t *ace_proto_addr; - uint32_t ace_proto_addr_length; - uchar_t *ace_proto_mask; /* Mask for matching addr */ - uchar_t *ace_proto_extract_mask; /* For mappings */ - uchar_t *ace_hw_addr; - uint32_t ace_hw_addr_length; - uint32_t ace_hw_extract_start; /* For mappings */ - mblk_t *ace_mp; /* mblk we are in */ - uint32_t ace_query_count; - mblk_t *ace_query_mp; /* Head of outstanding query chain */ - int ace_publish_count; -} ace_t; - #define ACE_EXTERNAL_FLAGS_MASK \ -(ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MAPPING | ACE_F_MYADDR) + (ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MAPPING | ACE_F_MYADDR | \ + ACE_F_AUTHORITY) #define ARH_FIXED_LEN 8 @@ -165,8 +177,8 @@ static int ar_ce_create(arl_t *arl, uint32_t proto, uchar_t *hw_addr, uchar_t *proto_extract_mask, uint32_t hw_extract_start, uint32_t flags); static void ar_ce_delete(ace_t *ace); -static void ar_ce_delete_per_arl(ace_t *ace, arl_t *arl); -static ace_t **ar_ce_hash(uint32_t proto, uchar_t *proto_addr, +static void ar_ce_delete_per_arl(ace_t *ace, void *arg); +static ace_t **ar_ce_hash(uint32_t proto, const uchar_t *proto_addr, uint32_t proto_addr_length); static ace_t *ar_ce_lookup(arl_t *arl, uint32_t proto, uchar_t *proto_addr, uint32_t proto_addr_length); @@ -175,14 +187,12 @@ static ace_t *ar_ce_lookup_entry(arl_t *arl, uint32_t proto, static ace_t *ar_ce_lookup_from_area(mblk_t *mp, ace_t *matchfn()); static ace_t *ar_ce_lookup_mapping(arl_t *arl, uint32_t proto, uchar_t *proto_addr, uint32_t proto_addr_length); -static int ar_ce_report(queue_t *q, mblk_t *mp, caddr_t data, cred_t *cr); -static void ar_ce_report1(ace_t *ace, uchar_t *mp_arg); -static void ar_ce_resolve(ace_t *ace, uchar_t *hw_addr, +static boolean_t ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr, uint32_t hw_addr_length); -static void ar_ce_walk(pfi_t pfi, void *arg1); +static void ar_ce_walk(void (*pfi)(ace_t *, void *), void *arg1); static void ar_cleanup(void); -static void ar_client_notify(arl_t *arl, mblk_t *mp, int code); +static void ar_client_notify(const arl_t *arl, mblk_t *mp, int code); static int ar_close(queue_t *q); static int ar_cmd_dispatch(queue_t *q, mblk_t *mp); static mblk_t *ar_dlpi_comm(t_uscalar_t prim, size_t size); @@ -215,7 +225,7 @@ static int ar_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); static boolean_t ar_param_register(arpparam_t *arppa, int cnt); static int ar_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); -static int ar_query_delete(ace_t *ace, uchar_t *ar); +static void ar_query_delete(ace_t *ace, void *ar); static void ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, uint32_t proto_addr_len); static clock_t ar_query_xmit(ace_t *ace, ace_t *src_ace); @@ -227,25 +237,16 @@ static int ar_slifname(queue_t *q, mblk_t *mp); static int ar_set_ppa(queue_t *q, mblk_t *mp); static int ar_snmp_msg(queue_t *q, mblk_t *mp_orig); static void ar_snmp_msg2(ace_t *, void *); -static void ar_timer_init(queue_t *q); -static int ar_trash(ace_t *ace, uchar_t *arg); static void ar_wput(queue_t *q, mblk_t *mp); static void ar_wsrv(queue_t *q); static void ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, - uint32_t plen, uchar_t *haddr1, uchar_t *paddr1, - uchar_t *haddr2, uchar_t *paddr2); -static int ar_xmit_request(queue_t *q, mblk_t *mp); -static int ar_xmit_response(queue_t *q, mblk_t *mp); + uint32_t plen, const uchar_t *haddr1, const uchar_t *paddr1, + const uchar_t *haddr2, const uchar_t *paddr2, const uchar_t *dstaddr); static uchar_t *ar_snmp_msg_element(mblk_t **, uchar_t *, size_t); static void ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q, ushort_t cmd, boolean_t); static mblk_t *ar_cmd_dequeue(arl_t *arl); -#if 0 -static void show_ace(char *str, ace_t *ace); -static void show_arp(char *str, mblk_t *mp); -#endif - /* * All of these are alterable, within the min/max values given, * at run time. arp_publish_interval and arp_publish_count are @@ -256,16 +257,34 @@ static void show_arp(char *str, mblk_t *mp); */ static arpparam_t arp_param_arr[] = { /* min max value name */ - { 0, 10, 0, "arp_debug"}, { 30000, 3600000, 300000, "arp_cleanup_interval"}, { 1000, 20000, 2000, "arp_publish_interval"}, { 1, 20, 5, "arp_publish_count"}, + { 0, 20000, 1000, "arp_probe_delay"}, + { 10, 20000, 1500, "arp_probe_interval"}, + { 0, 20, 3, "arp_probe_count"}, + { 0, 20000, 100, "arp_fastprobe_delay"}, + { 10, 20000, 150, "arp_fastprobe_interval"}, + { 0, 20, 3, "arp_fastprobe_count"}, + { 0, 3600000, 300000, "arp_defend_interval"}, + { 0, 20000, 100, "arp_defend_rate"}, + { 0, 3600000, 15000, "arp_broadcast_interval"}, + { 5, 86400, 3600, "arp_defend_period"} }; -#define arp_debug arp_param_arr[0].arp_param_value -#define arp_timer_interval arp_param_arr[1].arp_param_value -#define arp_publish_interval arp_param_arr[2].arp_param_value -#define arp_publish_count arp_param_arr[3].arp_param_value +#define arp_cleanup_interval arp_param_arr[0].arp_param_value +#define arp_publish_interval arp_param_arr[1].arp_param_value +#define arp_publish_count arp_param_arr[2].arp_param_value +#define arp_probe_delay arp_param_arr[3].arp_param_value +#define arp_probe_interval arp_param_arr[4].arp_param_value +#define arp_probe_count arp_param_arr[5].arp_param_value +#define arp_fastprobe_delay arp_param_arr[6].arp_param_value +#define arp_fastprobe_interval arp_param_arr[7].arp_param_value +#define arp_fastprobe_count arp_param_arr[8].arp_param_value +#define arp_defend_interval arp_param_arr[9].arp_param_value +#define arp_defend_rate arp_param_arr[10].arp_param_value +#define arp_broadcast_interval arp_param_arr[11].arp_param_value +#define arp_defend_period arp_param_arr[12].arp_param_value static struct module_info info = { 0, "arp", 0, INFPSZ, 512, 128 @@ -289,27 +308,24 @@ static arl_t *arl_g_head; /* ARL List Head */ /* * TODO: we need a better mechanism to set the ARP hardware type since - * the DLPI mac type does not include enough prodefined values. + * the DLPI mac type does not include enough predefined values. */ static ar_m_t ar_m_tbl[] = { - { DL_CSMACD, 1, -2, 6}, /* 802.3 */ - { DL_TPB, 6, -2, 6}, /* 802.4 */ - { DL_TPR, 6, -2, 6}, /* 802.5 */ - { DL_METRO, 6, -2, 6}, /* 802.6 */ - { DL_ETHER, 1, -2, 6}, /* Ethernet */ - { DL_FDDI, 1, -2, 6}, /* FDDI */ - { DL_IB, 32, -2, 20}, /* Infiniband */ - { DL_OTHER, 1, -2, 6}, /* unknown */ + { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */ + { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */ + { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */ + { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */ + { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */ + { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */ + { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */ + { DL_OTHER, ARPHRD_ETHER, -2, 6}, /* unknown */ }; /* ARP Cache Entry Hash Table */ -static ace_t *ar_ce_hash_tbl[256]; +static ace_t *ar_ce_hash_tbl[ARP_HASH_SIZE]; static ace_t *ar_ce_mask_entries; /* proto_mask not all ones */ -static mblk_t *ar_timer_mp; /* garbage collection timer */ -static queue_t *ar_timer_queue; /* queue for garbage collection */ - /* * Note that all routines which need to queue the message for later * processing have to be ioctl_aware to be able to queue the complete message. @@ -318,6 +334,16 @@ static queue_t *ar_timer_queue; /* queue for garbage collection */ #define ARF_IOCTL_AWARE 0x1 /* Arp command can come down as M_IOCTL */ #define ARF_ONLY_CMD 0x2 /* Command is exclusive to ARP */ +/* ARP Cmd Table entry */ +typedef struct arct_s { + int (*arct_pfi)(queue_t *, mblk_t *); + uint32_t arct_cmd; + int arct_min_len; + uint32_t arct_flags; + int arct_priv_req; /* Privilege required for this cmd */ + const char *arct_txt; +} arct_t; + static arct_t ar_cmd_tbl[] = { { ar_entry_add, AR_ENTRY_ADD, sizeof (area_t), ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_ENTRY_ADD" }, @@ -327,10 +353,6 @@ static arct_t ar_cmd_tbl[] = { ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_NP, "AR_ENTRY_QUERY" }, { ar_entry_squery, AR_ENTRY_SQUERY, sizeof (area_t), ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_NP, "AR_ENTRY_SQUERY" }, - { ar_xmit_request, AR_XMIT_REQUEST, sizeof (areq_t), - ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_XMIT_REQUEST" }, - { ar_xmit_response, AR_XMIT_RESPONSE, sizeof (areq_t), - ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_XMIT_RESPONSE" }, { ar_mapping_add, AR_MAPPING_ADD, sizeof (arma_t), ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_MAPPING_ADD" }, { ar_interface_up, AR_INTERFACE_UP, sizeof (arc_t), @@ -372,7 +394,7 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, if ((flags & ~ACE_EXTERNAL_FLAGS_MASK) || arl == NULL) return (EINVAL); if (flags & ACE_F_MYADDR) - flags |= ACE_F_PUBLISH; + flags |= ACE_F_PUBLISH | ACE_F_AUTHORITY; if (!hw_addr && hw_addr_len == 0) { if (flags == ACE_F_PERMANENT) { /* Not publish */ @@ -398,6 +420,17 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, return (EINVAL); if (!proto_extract_mask && (flags & ACE_F_MAPPING)) return (EINVAL); + + /* + * If the underlying link doesn't have reliable up/down notification or + * if we're working with the IPv4 169.254.0.0/16 Link Local Address + * space, then don't use the fast timers. Otherwise, use them. + */ + if (arl->arl_notifies && + !(proto == IP_ARP_PROTO_TYPE && IS_IPV4_LL_SPACE(proto_addr))) { + flags |= ACE_F_FAST; + } + /* * Allocate the timer block to hold the ace. * (ace + proto_addr + proto_addr_mask + proto_extract_mask + hw_addr) @@ -425,15 +458,15 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, * subnet structure, if, for example, there are BSD4.2 systems lurking. */ ace->ace_proto_mask = dst; - if (proto_mask) { + if (proto_mask != NULL) { bcopy(proto_mask, dst, proto_addr_len); dst += proto_addr_len; } else { - while (proto_addr_len--) + while (proto_addr_len-- > 0) *dst++ = (uchar_t)~0; } - if (proto_extract_mask) { + if (proto_extract_mask != NULL) { ace->ace_proto_extract_mask = dst; bcopy(proto_extract_mask, dst, ace->ace_proto_addr_length); dst += ace->ace_proto_addr_length; @@ -443,21 +476,22 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, ace->ace_hw_extract_start = hw_extract_start; ace->ace_hw_addr_length = hw_addr_len; ace->ace_hw_addr = dst; - if (hw_addr) { + if (hw_addr != NULL) { bcopy(hw_addr, dst, hw_addr_len); dst += hw_addr_len; } ace->ace_arl = arl; ace->ace_flags = flags; - ace->ace_publish_count = arp_publish_count; + if (ar_mask_all_ones(ace->ace_proto_mask, ace->ace_proto_addr_length)) { acep = ar_ce_hash(ace->ace_proto, ace->ace_proto_addr, ace->ace_proto_addr_length); - } else + } else { acep = &ar_ce_mask_entries; - if ((ace->ace_next = *acep) != 0) + } + if ((ace->ace_next = *acep) != NULL) ace->ace_next->ace_ptpn = &ace->ace_next; *acep = ace; ace->ace_ptpn = acep; @@ -488,9 +522,9 @@ ar_ce_delete(ace_t *ace) * that is going away. */ static void -ar_ce_delete_per_arl(ace_t *ace, arl_t *arl) +ar_ce_delete_per_arl(ace_t *ace, void *arl) { - if (ace != NULL && ace->ace_arl == arl) { + if (ace->ace_arl == arl) { ace->ace_flags &= ~ACE_F_PERMANENT; ar_ce_delete(ace); } @@ -498,9 +532,10 @@ ar_ce_delete_per_arl(ace_t *ace, arl_t *arl) /* Cache entry hash routine, based on protocol and protocol address. */ static ace_t ** -ar_ce_hash(uint32_t proto, uchar_t *proto_addr, uint32_t proto_addr_length) +ar_ce_hash(uint32_t proto, const uchar_t *proto_addr, + uint32_t proto_addr_length) { - uchar_t *up = proto_addr; + const uchar_t *up = proto_addr; unsigned int hval = proto; int len = proto_addr_length; @@ -647,194 +682,170 @@ ar_ce_lookup_permanent(uint32_t proto, uchar_t *proto_addr, } /* - * Pass a cache report back out via NDD. - * TODO: Right now this report assumes IP proto address formatting. - */ -/* ARGSUSED */ -static int -ar_ce_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *cr) -{ - (void) mi_mpprintf(mp, - "ifname proto addr proto mask hardware addr flags"); - /* abcdefgh xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx xx:xx:xx:xx:xx:xx */ - ar_ce_walk((pfi_t)ar_ce_report1, mp); - return (0); -} - -/* - * Add a single line to the ARP Cache Entry Report. - * TODO: Right now this report assumes IP proto address formatting. + * ar_ce_resolve is called when a response comes in to an outstanding request. + * Returns 'true' if the address has changed and we need to tell the client. + * (We don't need to tell the client if there's still an outstanding query.) */ -static void -ar_ce_report1(ace_t *ace, uchar_t *mp_arg) +static boolean_t +ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr, uint32_t hw_addr_length) { - static uchar_t zero_array[8]; - uint32_t flags = ace->ace_flags; - mblk_t *mp = (mblk_t *)mp_arg; - uchar_t *p = ace->ace_proto_addr; - uchar_t *h = ace->ace_hw_addr; - uchar_t *m = ace->ace_proto_mask; - const char *name = "unknown"; - - if (ace->ace_arl != NULL) - name = ace->ace_arl->arl_name; - if (p == NULL) - p = zero_array; - if (h == NULL) - h = zero_array; - if (m == NULL) - m = zero_array; - (void) mi_mpprintf(mp, - "%8s %03d.%03d.%03d.%03d " - "%03d.%03d.%03d.%03d %02x:%02x:%02x:%02x:%02x:%02x", - name, - p[0] & 0xFF, p[1] & 0xFF, p[2] & 0xFF, p[3] & 0xFF, - m[0] & 0xFF, m[1] & 0xFF, m[2] & 0xFF, m[3] & 0xFF, - h[0] & 0xFF, h[1] & 0xFF, h[2] & 0xFF, h[3] & 0xFF, - h[4] & 0xFF, h[5] & 0xFF); - if (flags & ACE_F_PERMANENT) - (void) mi_mpprintf_nr(mp, " PERM"); - if (flags & ACE_F_PUBLISH) - (void) mi_mpprintf_nr(mp, " PUBLISH"); - if (flags & ACE_F_DYING) - (void) mi_mpprintf_nr(mp, " DYING"); - if (!(flags & ACE_F_RESOLVED)) - (void) mi_mpprintf_nr(mp, " UNRESOLVED"); - if (flags & ACE_F_MAPPING) - (void) mi_mpprintf_nr(mp, " MAPPING"); - if (flags & ACE_F_MYADDR) - (void) mi_mpprintf_nr(mp, " MYADDR"); -} + boolean_t hwchanged; -/* - * ar_ce_resolve is called when a response comes in to an outstanding - * request. - */ -static void -ar_ce_resolve(ace_t *ace, uchar_t *hw_addr, uint32_t hw_addr_length) -{ if (hw_addr_length == ace->ace_hw_addr_length) { - if (ace->ace_hw_addr) + ASSERT(ace->ace_hw_addr != NULL); + hwchanged = bcmp(hw_addr, ace->ace_hw_addr, + hw_addr_length) != 0; + if (hwchanged) bcopy(hw_addr, ace->ace_hw_addr, hw_addr_length); /* - * ar_query_reply() blows away soft entries. - * Do not call it unless something is waiting. + * No need to bother with ar_query_reply if no queries are + * waiting. */ ace->ace_flags |= ACE_F_RESOLVED; - if (ace->ace_query_mp) + if (ace->ace_query_mp != NULL) ar_query_reply(ace, 0, NULL, (uint32_t)0); + else if (hwchanged) + return (B_TRUE); } + return (B_FALSE); } /* * There are 2 functions performed by this function. * 1. Resolution of unresolved entries and update of resolved entries. - * 2. Detection of hosts with (duplicate) our own IP address + * 2. Detection of nodes with our own IP address (duplicates). + * + * This is complicated by ill groups. We don't currently have knowledge of ill + * groups, so we can't distinguish between a packet that comes in on one of the + * arls that's part of the group versus one that's on an unrelated arl. Thus, + * we take a conservative approach. If the arls match, then we update resolved + * and unresolved entries alike. If they don't match, then we update only + * unresolved entries. * - * Resolution of unresolved entries and update of resolved entries. + * For all entries, we first check to see if this is a duplicate (probable + * loopback) message. If so, then just ignore it. * - * case A. The packet has been received on the same interface as this ace's - * arl. We blindly call ar_ce_resolve(). The relevant checks for duplicate - * detection (ACE_F_MYADDR) and trying to update published entries have - * already happened in ar_rput(). Both resolved and unresolved entries are - * updated now. This allows a published entry to be updated by an arp - * request, from the node for which we are a proxy arp server, as for eg. - * when a mobile node returns home. + * Next, check to see if the entry has completed DAD. If not, then we've + * failed, because someone is already using the address. Notify IP of the DAD + * failure and remove the broken ace. * - * case B. The interface on which the packet arrived does not match the - * ace's arl. In this case we update only unresolved entries. - * Look whether we have an unresolved entry for src_paddr and if so - * resolve it. We need to look at all the aces that matches the - * src_haddr because with ill groups we could have unresolved ace - * across the whole group. As we don't have knowledge of groups, - * look across all of them. Note that this logic does not update published - * arp entries, as for eg. when we proxy arp across 2 subnets with - * differing subnet masks. + * Next, we check if we're the authority for this address. If so, then it's + * time to defend it, because the other node is a duplicate. Report it as a + * 'bogon' and let IP decide how to defend. * - * Detection of hosts with (duplicate) our own IP address. + * Finally, if it's unresolved or if the arls match, we just update the MAC + * address. This allows a published 'static' entry to be updated by an ARP + * request from the node for which we're a proxy ARP server -- e.g., when a + * mobile node returns home. If the address has changed, then tell IP. * - * case A is handled in ar_rput(). case B is handled here. We return AR_BOGON, - * if we detect duplicate, and caller will send BOGON message to IP. - * If hme0 and hme1 are in a IPMP group. hme1 will receive broadcast arp - * packets sent from hme0. Both IP address and Hardware address of the - * packet match the ace. So we return AR_LOOPBACK. + * Note that this logic does not update published ARP entries for mismatched + * arls, as for example when we proxy arp across 2 subnets with differing + * subnet masks. * * Return Values below */ -#define AR_NORMAL 1 /* Usual return value. */ -#define AR_LOOPBACK 2 /* Our own broadcast arp packet was received */ -#define AR_BOGON 3 /* Another host has our IP addr. */ +#define AR_NOTFOUND 1 /* No matching ace found in cache */ +#define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */ +#define AR_LOOPBACK 3 /* Our own arp packet was received */ +#define AR_BOGON 4 /* Another host has our IP addr. */ +#define AR_FAILED 5 /* Duplicate Address Detection has failed */ +#define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */ static int -ar_ce_resolve_all(arl_t *arl, uint32_t proto, uchar_t *src_haddr, - uint32_t hlen, uchar_t *src_paddr, uint32_t plen) +ar_ce_resolve_all(arl_t *arl, uint32_t proto, const uchar_t *src_haddr, + uint32_t hlen, const uchar_t *src_paddr, uint32_t plen) { ace_t *ace; ace_t *ace_next; + int i1; + const uchar_t *paddr; + uchar_t *ace_addr; + uchar_t *mask; + int retv = AR_NOTFOUND; ace = *ar_ce_hash(proto, src_paddr, plen); for (; ace != NULL; ace = ace_next) { + /* ar_ce_resolve may delete the ace; fetch next pointer now */ ace_next = ace->ace_next; - if (ace->ace_proto_addr_length == plen && - ace->ace_proto == proto) { - int i1 = plen; - uchar_t *ace_addr = ace->ace_proto_addr; - uchar_t *mask = ace->ace_proto_mask; + if (ace->ace_proto_addr_length != plen || + ace->ace_proto != proto) { + continue; + } - /* - * Note that the ace_proto_mask is applied to the - * proto_addr before comparing to the ace_addr. - */ - do { - if (--i1 < 0) { - /* - * Limit updating across other - * ills to unresolved entries only. - * We don't want to inadvertently - * update published entries or our - * own entries. - */ - if ((ace->ace_arl == arl) || - (!ACE_RESOLVED(ace))) { - ar_ce_resolve(ace, src_haddr, hlen); - } else { - /* - * If both IP addr and hardware - * address match our's then this - * is a broadcast packet emitted by - * one of our interfaces, reflected - * by the switch, and received on - * another interface. We return - * AR_LOOPBACK. If only IP addr. - * matches our's then some other node - * is using our IP addr, return - * AR_BOGON. - */ - if (ace->ace_flags & ACE_F_MYADDR) { - if (bcmp(ace->ace_hw_addr, - src_haddr, - ace->ace_hw_addr_length) != 0) { - return (AR_BOGON); - } else { - return (AR_LOOPBACK); - } - - } - } + /* + * Note that the ace_proto_mask is applied to the proto_addr + * before comparing to the ace_addr. + */ + paddr = src_paddr; + i1 = plen; + ace_addr = ace->ace_proto_addr; + mask = ace->ace_proto_mask; + while (--i1 >= 0) { + if ((*paddr++ & *mask++) != *ace_addr++) break; - } - } while ((src_paddr[i1] & mask[i1]) == ace_addr[i1]); + } + if (i1 >= 0) + continue; + + /* + * If both IP addr and hardware address match what we already + * have, then this is a broadcast packet emitted by one of our + * interfaces, reflected by the switch and received on another + * interface. We return AR_LOOPBACK. + */ + if ((ace->ace_flags & ACE_F_MYADDR) && + hlen == ace->ace_hw_addr_length && + bcmp(ace->ace_hw_addr, src_haddr, + ace->ace_hw_addr_length) == 0) { + return (AR_LOOPBACK); + } + + /* + * If the entry is unverified, then we've just verified that + * someone else already owns this address, because this is a + * message with the same protocol address but different + * hardware address. + */ + if (ace->ace_flags & ACE_F_UNVERIFIED) { + ar_ce_delete(ace); + return (AR_FAILED); + } + + /* + * If the IP address matches ours and we're authoritative for + * this entry, then some other node is using our IP addr, so + * return AR_BOGON. Also reset the transmit count to zero so + * that, if we're currently in initial announcement mode, we + * switch back to the lazier defense mode. Knowing that + * there's at least one duplicate out there, we ought not + * blindly announce. + */ + if (ace->ace_flags & ACE_F_AUTHORITY) { + ace->ace_xmit_count = 0; + return (AR_BOGON); + } + + /* + * Limit updating across other ills to unresolved + * entries only. We don't want to inadvertently update + * published entries. + */ + if (ace->ace_arl == arl || !ACE_RESOLVED(ace)) { + if (ar_ce_resolve(ace, src_haddr, hlen)) + retv = AR_CHANGED; + else if (retv == AR_NOTFOUND) + retv = AR_MERGED; } } - return (AR_NORMAL); + return (retv); } /* Pass arg1 to the pfi supplied, along with each ace in existence. */ static void -ar_ce_walk(pfi_t pfi, void *arg1) +ar_ce_walk(void (*pfi)(ace_t *, void *), void *arg1) { ace_t *ace; ace_t *ace1; @@ -870,7 +881,7 @@ ar_cleanup(void) * DEV (i.e. ARL). */ static void -ar_client_notify(arl_t *arl, mblk_t *mp, int code) +ar_client_notify(const arl_t *arl, mblk_t *mp, int code) { ar_t *ar = ((ar_t *)arl->arl_rq->q_ptr)->ar_arl_ip_assoc; arcn_t *arcn; @@ -904,6 +915,39 @@ ar_client_notify(arl_t *arl, mblk_t *mp, int code) putnext(ar->ar_wq, mp1); } +/* + * Send a delete-notify message down to IP. We've determined that IP doesn't + * have a cache entry for the IP address itself, but it may have other cache + * entries with the same hardware address, and we don't want to see those grow + * stale. (The alternative is sending down updates for every ARP message we + * get that doesn't match an existing ace. That's much more expensive than an + * occasional delete and reload.) + */ +static void +ar_delete_notify(const ace_t *ace) +{ + const arl_t *arl = ace->ace_arl; + mblk_t *mp; + size_t len; + arh_t *arh; + + len = sizeof (*arh) + 2 * ace->ace_proto_addr_length; + mp = allocb(len, BPRI_MED); + if (mp == NULL) + return; + arh = (arh_t *)mp->b_rptr; + mp->b_wptr = (uchar_t *)arh + len; + U16_TO_BE16(arl->arl_arp_hw_type, arh->arh_hardware); + U16_TO_BE16(ace->ace_proto, arh->arh_proto); + arh->arh_hlen = 0; + arh->arh_plen = ace->ace_proto_addr_length; + U16_TO_BE16(ARP_RESPONSE, arh->arh_operation); + bcopy(ace->ace_proto_addr, arh + 1, ace->ace_proto_addr_length); + bcopy(ace->ace_proto_addr, (uchar_t *)(arh + 1) + + ace->ace_proto_addr_length, ace->ace_proto_addr_length); + ar_client_notify(arl, mp, AR_CN_ANNOUNCE); +} + /* ARP module close routine. */ static int ar_close(queue_t *q) @@ -926,7 +970,7 @@ ar_close(queue_t *q) * an ack. This helps to make sure that messages * that are currently being sent up by IP are not lost. */ - if (MODULE_BELOW_IS_IP(q)) { + if (ar->ar_on_ill_stream) { mp1 = allocb(sizeof (arc_t), BPRI_MED); if (mp1 != NULL) { DB_TYPE(mp1) = M_CTL; @@ -963,7 +1007,7 @@ ar_close(queue_t *q) * If this is the control stream for an arl, delete anything * hanging off our arl. */ - ar_ce_walk((pfi_t)ar_ce_delete_per_arl, arl); + ar_ce_walk(ar_ce_delete_per_arl, arl); /* Free any messages waiting for a bind_ack */ /* Get the arl out of the chain. */ for (arlp = &arl_g_head; arlp[0]; arlp = &arlp[0]->arl_next) { @@ -984,21 +1028,6 @@ ar_close(queue_t *q) ar->ar_arl_ip_assoc->ar_arl_ip_assoc = NULL; ar->ar_arl_ip_assoc = NULL; } - if (WR(q) == ar_timer_queue) { - /* We were using this one for the garbage collection timer. */ - for (arl = arl_g_head; arl; arl = arl->arl_next) - if (arl->arl_rq != q) - break; - if (arl) { - ar_timer_queue = arl->arl_wq; - /* Ask mi_timer to switch to the new queue. */ - mi_timer(ar_timer_queue, ar_timer_mp, -2); - } else { - mi_timer_free(ar_timer_mp); - ar_timer_mp = NULL; - ar_timer_queue = NULL; - } - } cr = ar->ar_credp; /* mi_close_comm frees the instance data. */ (void) mi_close_comm(&ar_g_head, q); @@ -1067,7 +1096,8 @@ ar_cmd_dispatch(queue_t *q, mblk_t *mp_orig) if (arct->arct_flags & ARF_IOCTL_AWARE) mp = mp_orig; - arp2dbg(("ar_cmd_dispatch: %s\n", arct->arct_txt)); + DTRACE_PROBE3(cmd_dispatch, queue_t *, q, mblk_t *, mp, + arct_t *, arct); return (*arct->arct_pfi)(q, mp); } @@ -1104,31 +1134,25 @@ ar_dlpi_comm(t_uscalar_t prim, size_t size) static void ar_dlpi_send(arl_t *arl, mblk_t *mp) { - mblk_t **mpp; - union DL_primitives *dlp; - ASSERT(arl != NULL); - ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); - dlp = (union DL_primitives *)mp->b_rptr; if (arl->arl_dlpi_pending != DL_PRIM_INVAL) { + mblk_t **mpp; + /* Must queue message. Tail insertion */ mpp = &arl->arl_dlpi_deferred; while (*mpp != NULL) mpp = &((*mpp)->b_next); - - arp1dbg(("ar_dlpi_send: deferring DLPI message arl %p %x\n", - (void *)arl, dlp->dl_primitive)); - *mpp = mp; + + DTRACE_PROBE2(dlpi_defer, arl_t *, arl, mblk_t *, mp); return; } - arp1dbg(("ar_dlpi_send: sending DLPI message arl %p %x\n", (void *)arl, - dlp->dl_primitive)); - - arl->arl_dlpi_pending = dlp->dl_primitive; + arl->arl_dlpi_pending = + ((union DL_primitives *)mp->b_rptr)->dl_primitive; + DTRACE_PROBE2(dlpi_send, arl_t *, arl, mblk_t *, mp); putnext(arl->arl_wq, mp); } @@ -1141,16 +1165,16 @@ ar_dlpi_send(arl_t *arl, mblk_t *mp) static void ar_dlpi_done(arl_t *arl, t_uscalar_t prim) { - mblk_t *mp; - union DL_primitives *dlp; + mblk_t *mp; if (arl->arl_dlpi_pending != prim) { - arp0dbg(("ar_dlpi_done: spurious response arl %p\n", - (void *)arl)); + DTRACE_PROBE2(dlpi_done_unexpected, arl_t *, arl, + t_uscalar_t, prim); return; } if ((mp = arl->arl_dlpi_deferred) == NULL) { + DTRACE_PROBE2(dlpi_done_idle, arl_t *, arl, t_uscalar_t, prim); arl->arl_dlpi_pending = DL_PRIM_INVAL; ar_cmd_done(arl); return; @@ -1160,12 +1184,10 @@ ar_dlpi_done(arl_t *arl, t_uscalar_t prim) mp->b_next = NULL; ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); - dlp = (union DL_primitives *)mp->b_rptr; - arp1dbg(("ar_dlpi_done: sending DLPI message arl %p %x\n", - (void *)arl, dlp->dl_primitive)); - - arl->arl_dlpi_pending = dlp->dl_primitive; + arl->arl_dlpi_pending = + ((union DL_primitives *)mp->b_rptr)->dl_primitive; + DTRACE_PROBE2(dlpi_done_next, arl_t *, arl, mblk_t *, mp); putnext(arl->arl_wq, mp); } @@ -1268,8 +1290,8 @@ ar_cmd_done(arl_t *arl) done: if (dlpi_op_done_mp != NULL) { - arp1dbg(("ar_dlpi_done: ardlpiopdone arl %p to q %p err %d\n", - (void *)arl, (void *)dlpi_op_done_q, err)); + DTRACE_PROBE3(cmd_done_next, arl_t *, arl, + queue_t *, dlpi_op_done_q, mblk_t *, dlpi_op_done_mp); putnext(dlpi_op_done_q, dlpi_op_done_mp); } } @@ -1295,9 +1317,6 @@ static void ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q, ushort_t cmd, boolean_t tail_insert) { - arp1dbg(("ar_cmd_enqueue: arl %p from q %p cmd %d \n", (void *)arl, - (void *)q, cmd)); - mp->b_queue = q; if (arl->arl_queue == NULL) { ASSERT(arl->arl_queue_tail == NULL); @@ -1336,6 +1355,38 @@ ar_cmd_dequeue(arl_t *arl) } /* + * Standard ACE timer handling: compute 'fuzz' around a central value or from 0 + * up to a value, and then set the timer. The randomization is necessary to + * prevent groups of systems from falling into synchronization on the network + * and producing ARP packet storms. + */ +static void +ace_set_timer(ace_t *ace, boolean_t initial_time) +{ + clock_t intv, rnd, frac; + + (void) random_get_pseudo_bytes((uint8_t *)&rnd, sizeof (rnd)); + /* Note that clock_t is signed; must chop off bits */ + rnd &= (1ul << (NBBY * sizeof (rnd) - 1)) - 1; + intv = ace->ace_xmit_interval; + if (initial_time) { + /* Set intv to be anywhere in the [1 .. intv] range */ + if (intv <= 0) + intv = 1; + else + intv = (rnd % intv) + 1; + } else { + /* Compute 'frac' as 20% of the configured interval */ + if ((frac = intv / 5) <= 1) + frac = 2; + /* Set intv randomly in the range [intv-frac .. intv+frac] */ + if ((intv = intv - frac + rnd % (2 * frac + 1)) <= 0) + intv = 1; + } + mi_timer(ace->ace_arl->arl_wq, ace->ace_mp, intv); +} + +/* * Process entry add requests from external messages. * It is also called by ip_rput_dlpi_writer() through * ipif_resolver_up() to change hardware address when @@ -1355,6 +1406,8 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) arl_t *arl; mblk_t *mp = mp_orig; int err; + uint_t aflags; + boolean_t unverified; /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) @@ -1366,16 +1419,32 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_entry_add: enqueue cmd on q %p \n", (void *)q)); + DTRACE_PROBE3(eadd_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_ADD, B_TRUE); return (EINPROGRESS); } mp_orig->b_prev = NULL; area = (area_t *)mp->b_rptr; - /* If this is a replacement, ditch the original. */ - if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_entry)) != 0) + aflags = area->area_flags; + + /* + * If this is a replacement, ditch the original, but remember the + * duplicate address detection state. If it's a new entry, then we're + * obligated to do duplicate address detection now. + */ + if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_entry)) != NULL) { + unverified = (ace->ace_flags & ACE_F_UNVERIFIED) != 0; ar_ce_delete(ace); + } else { + unverified = (aflags & ACE_F_PUBLISH) != 0; + } + + /* Allow client to request DAD restart */ + if (aflags & ACE_F_UNVERIFIED) + unverified = B_TRUE; + /* Extract parameters from the message. */ hw_addr_len = area->area_hw_addr_length; hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len); @@ -1384,29 +1453,31 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) proto_addr_len); proto_mask = mi_offset_paramc(mp, area->area_proto_mask_offset, proto_addr_len); - if (!proto_mask) + if (proto_mask == NULL) { + DTRACE_PROBE2(eadd_bad_mask, arl_t *, arl, area_t *, area); return (EINVAL); + } err = ar_ce_create( arl, - area->area_proto, - hw_addr, - hw_addr_len, - proto_addr, - proto_addr_len, - proto_mask, - NULL, - (uint32_t)0, - area->area_flags & ~ACE_F_MAPPING); - if (err) + area->area_proto, + hw_addr, + hw_addr_len, + proto_addr, + proto_addr_len, + proto_mask, + NULL, + (uint32_t)0, + aflags & ~ACE_F_MAPPING & ~ACE_F_UNVERIFIED & ~ACE_F_DEFEND); + if (err != 0) { + DTRACE_PROBE3(eadd_create_failed, arl_t *, arl, area_t *, area, + int, err); return (err); - if (area->area_flags & ACE_F_PUBLISH) { - /* - * Transmit an arp request for this address to flush stale - * information froma arp caches. - */ + } + + if (aflags & ACE_F_PUBLISH) { if (hw_addr == NULL || hw_addr_len == 0) { hw_addr = arl->arl_hw_addr; - } else if (area->area_flags & ACE_F_MYADDR) { + } else if (aflags & ACE_F_MYADDR) { /* * If hardware address changes, then make sure * that the hardware address and hardware @@ -1422,23 +1493,79 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) ace = ar_ce_lookup(arl, area->area_proto, proto_addr, proto_addr_len); ASSERT(ace != NULL); - ar_xmit(arl, ARP_REQUEST, area->area_proto, proto_addr_len, - hw_addr, proto_addr, arl->arl_arp_addr, - proto_addr); + + if (ace->ace_flags & ACE_F_FAST) { + ace->ace_xmit_count = arp_fastprobe_count; + ace->ace_xmit_interval = arp_fastprobe_delay; + } else { + ace->ace_xmit_count = arp_probe_count; + ace->ace_xmit_interval = arp_probe_delay; + } + + /* + * If the user has disabled duplicate address detection for + * this kind of interface (fast or slow) by setting the probe + * count to zero, then pretend as if we've verified the + * address, and go right to address defense mode. + */ + if (ace->ace_xmit_count == 0) + unverified = B_FALSE; /* - * If MYADDR is set - it is not a proxy arp entry. In that - * case we send more than one copy, so that if this is - * a case of failover, we send out multiple entries in case - * the switch is very slow. + * If we need to do duplicate address detection, then kick that + * off. Otherwise, send out a gratuitous ARP message in order + * to update everyone's caches with the new hardware address. */ - if ((area->area_flags & ACE_F_MYADDR) && - ace->ace_publish_count != 0 && arp_publish_interval != 0) { - /* Account for the xmit we just did */ - ace->ace_publish_count--; - if (ace->ace_publish_count != 0) { - mi_timer(arl->arl_wq, ace->ace_mp, - arp_publish_interval); + if (unverified) { + ace->ace_flags |= ACE_F_UNVERIFIED; + if (ace->ace_xmit_interval == 0) { + /* + * User has configured us to send the first + * probe right away. Do so, and set up for + * the subsequent probes. + */ + DTRACE_PROBE2(eadd_probe, ace_t *, ace, + area_t *, area); + ar_xmit(arl, ARP_REQUEST, area->area_proto, + proto_addr_len, hw_addr, NULL, NULL, + proto_addr, NULL); + ace->ace_xmit_count--; + ace->ace_xmit_interval = + (ace->ace_flags & ACE_F_FAST) ? + arp_fastprobe_interval : + arp_probe_interval; + ace_set_timer(ace, B_FALSE); + } else { + DTRACE_PROBE2(eadd_delay, ace_t *, ace, + area_t *, area); + /* Regular delay before initial probe */ + ace_set_timer(ace, B_TRUE); + } + } else { + DTRACE_PROBE2(eadd_announce, ace_t *, ace, + area_t *, area); + ar_xmit(arl, ARP_REQUEST, area->area_proto, + proto_addr_len, hw_addr, proto_addr, + arl->arl_arp_addr, proto_addr, NULL); + ace->ace_last_bcast = ddi_get_lbolt(); + + /* + * If AUTHORITY is set, it is not just a proxy arp + * entry; we believe we're the authority for this + * entry. In that case, and if we're not just doing + * one-off defense of the address, we send more than + * one copy, so that if this is an IPMP failover, we'll + * still have a good chance of updating everyone even + * when there's a packet loss or two. + */ + if ((aflags & ACE_F_AUTHORITY) && + !(aflags & ACE_F_DEFEND) && + arp_publish_count > 0) { + /* Account for the xmit we just did */ + ace->ace_xmit_count = arp_publish_count - 1; + ace->ace_xmit_interval = arp_publish_interval; + if (ace->ace_xmit_count > 0) + ace_set_timer(ace, B_FALSE); } } } @@ -1463,7 +1590,8 @@ ar_entry_delete(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_entry_delete: enqueue on q %p\n", (void *)q)); + DTRACE_PROBE3(edel_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_DELETE, B_TRUE); return (EINPROGRESS); } @@ -1474,7 +1602,13 @@ ar_entry_delete(queue_t *q, mblk_t *mp_orig) * match first. */ ace = ar_ce_lookup_from_area(mp, ar_ce_lookup); - if (ace) { + if (ace != NULL) { + /* + * If it's a permanent entry, then the client is the one who + * told us to delete it, so there's no reason to notify. + */ + if (ACE_NONPERM(ace)) + ar_delete_notify(ace); ar_ce_delete(ace); return (0); } @@ -1511,6 +1645,7 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) } arl = ar_ll_lookup_from_mp(mp); if (arl == NULL) { + DTRACE_PROBE2(query_no_arl, queue_t *, q, mblk_t *, mp); err = EINVAL; goto err_ret; } @@ -1518,7 +1653,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_entry_query: enqueue on q %p\n", (void *)q)); + DTRACE_PROBE3(query_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_QUERY, B_TRUE); return (EINPROGRESS); } @@ -1528,7 +1664,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) proto_addr_len = areq->areq_target_addr_length; proto_addr = mi_offset_paramc(mp, areq->areq_target_addr_offset, proto_addr_len); - if (proto_addr == 0) { + if (proto_addr == NULL) { + DTRACE_PROBE1(query_illegal_address, areq_t *, areq); err = EINVAL; goto err_ret; } @@ -1538,9 +1675,22 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) if (areq->areq_xmit_interval == 0) areq->areq_xmit_interval = AR_DEF_XMIT_INTERVAL; ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr, proto_addr_len); - if (ace) { + if (ace != NULL && (ace->ace_flags & ACE_F_OLD)) { + /* + * This is a potentially stale entry that IP's asking about. + * Since IP is asking, it must not have an answer anymore, + * either due to periodic ARP flush or due to SO_DONTROUTE. + * Rather than go forward with what we've got, restart + * resolution. + */ + DTRACE_PROBE2(query_stale_ace, ace_t *, ace, areq_t *, areq); + ar_ce_delete(ace); + ace = NULL; + } + if (ace != NULL) { mblk_t **mpp; uint32_t count = 0; + /* * There is already a cache entry. This means there is either * a permanent entry, or address resolution is in progress. @@ -1550,6 +1700,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) */ for (mpp = &ace->ace_query_mp; mpp[0]; mpp = &mpp[0]->b_next) { if (++count > areq->areq_max_buffered) { + DTRACE_PROBE2(query_overflow, ace_t *, ace, + areq_t *, areq); mp->b_prev = NULL; err = EALREADY; goto err_ret; @@ -1562,6 +1714,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) * If a query was already queued up, then we must not * have an answer yet. */ + DTRACE_PROBE2(query_in_progress, ace_t *, ace, + areq_t *, areq); return (EINPROGRESS); } if (ACE_RESOLVED(ace)) { @@ -1572,6 +1726,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) */ mblk_t *mp1; + DTRACE_PROBE2(query_resolved, ace_t *, ace, + areq_t *, areq); mp1 = dupmsg(mp); ar_query_reply(ace, 0, proto_addr, proto_addr_len); freemsg(mp1); @@ -1579,22 +1735,28 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) } if (ace->ace_flags & ACE_F_MAPPING) { /* Should never happen */ - arp0dbg(("ar_entry_query: unresolved mapping\n")); + DTRACE_PROBE2(query_unresolved_mapping, ace_t *, ace, + areq_t *, areq); mpp[0] = mp->b_next; err = ENXIO; goto err_ret; } if (arl->arl_xmit_template == NULL) { /* Can't get help if we don't know how. */ + DTRACE_PROBE2(query_no_template, ace_t *, ace, + areq_t *, areq); mpp[0] = NULL; mp->b_prev = NULL; err = ENXIO; goto err_ret; } + DTRACE_PROBE2(query_unresolved, ace_t, ace, areq_t *, areq); } else { /* No ace yet. Make one now. (This is the common case.) */ if (areq->areq_xmit_count == 0 || arl->arl_xmit_template == NULL) { + DTRACE_PROBE2(query_template, arl_t *, arl, + areq_t *, areq); mp->b_prev = NULL; err = ENXIO; goto err_ret; @@ -1607,6 +1769,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) areq->areq_sender_addr_offset, areq->areq_sender_addr_length); if (sender_addr == NULL) { + DTRACE_PROBE2(query_no_sender, arl_t *, arl, + areq_t *, areq); mp->b_prev = NULL; err = EINVAL; goto err_ret; @@ -1615,14 +1779,18 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) proto_addr, proto_addr_len, NULL, NULL, (uint32_t)0, areq->areq_flags); - if (err) { + if (err != 0) { + DTRACE_PROBE3(query_create_failed, arl_t *, arl, + areq_t *, areq, int, err); mp->b_prev = NULL; goto err_ret; } ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr, proto_addr_len); - if (!ace || ace->ace_query_mp) { + if (ace == NULL || ace->ace_query_mp != NULL) { /* Shouldn't happen! */ + DTRACE_PROBE3(query_lookup_failed, arl_t *, arl, + areq_t *, areq, ace_t *, ace); mp->b_prev = NULL; err = ENXIO; goto err_ret; @@ -1637,10 +1805,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) src_ace = ar_ce_lookup_permanent(areq->areq_proto, sender_addr, areq->areq_sender_addr_length); if (src_ace == NULL) { - printf("ar_entry_query: Could not find the ace for " - "source address %d.%d.%d.%d\n", - sender_addr[0], sender_addr[1], sender_addr[2], - sender_addr[3]); + DTRACE_PROBE3(query_source_missing, arl_t *, arl, + areq_t *, areq, ace_t *, ace); ar_query_reply(ace, ENXIO, NULL, (uint32_t)0); /* * ar_query_reply has already freed the mp. @@ -1659,7 +1825,9 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) areq->areq_proto, proto_addr, proto_addr_len); if (dst_ace != NULL && ACE_RESOLVED(dst_ace)) { - ar_ce_resolve(ace, dst_ace->ace_hw_addr, + DTRACE_PROBE3(query_other_arl, arl_t *, arl, + areq_t *, areq, ace_t *, dst_ace); + (void) ar_ce_resolve(ace, dst_ace->ace_hw_addr, dst_ace->ace_hw_addr_length); return (EINPROGRESS); } @@ -1701,7 +1869,8 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_entry_squery: enqueue on q %p\n", (void *)q)); + DTRACE_PROBE3(squery_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_SQUERY, B_TRUE); return (EINPROGRESS); } @@ -1714,13 +1883,17 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig) proto_addr_len); hw_addr_len = area->area_hw_addr_length; hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len); - if (!proto_addr || !hw_addr) + if (proto_addr == NULL || hw_addr == NULL) { + DTRACE_PROBE1(squery_illegal_address, area_t *, area); return (EINVAL); + } ace = ar_ce_lookup(arl, area->area_proto, proto_addr, proto_addr_len); - if (!ace) + if (ace == NULL) { return (ENXIO); - if (hw_addr_len < ace->ace_hw_addr_length) + } + if (hw_addr_len < ace->ace_hw_addr_length) { return (EINVAL); + } if (ACE_RESOLVED(ace)) { /* Got it, prepare the response. */ ASSERT(area->area_hw_addr_length == ace->ace_hw_addr_length); @@ -1736,8 +1909,9 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig) if (mp == mp_orig) { /* Non-ioctl case */ /* TODO: change message type? */ - arp1dbg(("ar_entry_squery: qreply\n")); DB_TYPE(mp) = M_CTL; /* Caught by ip_wput */ + DTRACE_PROBE3(squery_reply, queue_t *, q, mblk_t *, mp, + arl_t *, arl); qreply(q, mp); return (EINPROGRESS); } @@ -1751,10 +1925,9 @@ ar_interface_down(queue_t *q, mblk_t *mp) { arl_t *arl; - arp1dbg(("ar_interface_down q %p\n", (void *)q)); arl = ar_ll_lookup_from_mp(mp); - if ((arl == NULL) || (arl->arl_closing)) { - arp1dbg(("ar_interface_down: no arl q %p \n", (void *)q)); + if (arl == NULL || arl->arl_closing) { + DTRACE_PROBE2(down_no_arl, queue_t *, q, mblk_t *, mp); return (EINVAL); } @@ -1762,6 +1935,8 @@ ar_interface_down(queue_t *q, mblk_t *mp) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp, arl)) { + DTRACE_PROBE3(down_enqueued, queue_t *, q, mblk_t *, mp, + arl_t *, arl); ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_DOWN, B_TRUE); return (EINPROGRESS); } @@ -1784,7 +1959,7 @@ ar_interface_down(queue_t *q, mblk_t *mp) ASSERT(arl->arl_state == ARL_S_UP); /* Free all arp entries for this interface */ - ar_ce_walk((pfi_t)ar_ce_delete_per_arl, arl); + ar_ce_walk(ar_ce_delete_per_arl, arl); ar_ll_down(arl); /* Return EINPROGRESS so that ar_rput does not free the 'mp' */ @@ -1801,10 +1976,9 @@ ar_interface_up(queue_t *q, mblk_t *mp) int err; mblk_t *mp1; - arp1dbg(("ar_interface_up q %p\n", (void *)q)); arl = ar_ll_lookup_from_mp(mp); - if ((arl == NULL) || (arl->arl_closing)) { - arp1dbg(("ar_interface_up: no arl %p\n", (void *)q)); + if (arl == NULL || arl->arl_closing) { + DTRACE_PROBE2(up_no_arl, queue_t *, q, mblk_t *, mp); err = EINVAL; goto done; } @@ -1813,6 +1987,8 @@ ar_interface_up(queue_t *q, mblk_t *mp) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp, arl)) { + DTRACE_PROBE3(up_enqueued, queue_t *, q, mblk_t *, mp, + arl_t *, arl); ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_UP, B_TRUE); return (EINPROGRESS); } @@ -1843,9 +2019,10 @@ done: mp1 = ar_alloc(AR_DLPIOP_DONE, err); if (mp1 != NULL) { - arp1dbg(("ar_interface_up: send resp err %d q %p\n", - err, (void *)q)); - putnext(WR(q), mp1); + q = WR(q); + DTRACE_PROBE3(up_send_err, queue_t *, q, mblk_t *, mp1, + int, err); + putnext(q, mp1); } return (err); } @@ -1860,13 +2037,13 @@ ar_interface_on(queue_t *q, mblk_t *mp) { arl_t *arl; - arp1dbg(("ar_interface_on\n")); arl = ar_ll_lookup_from_mp(mp); if (arl == NULL) { - arp1dbg(("ar_interface_on: no arl\n")); + DTRACE_PROBE2(on_no_arl, queue_t *, q, mblk_t *, mp); return (EINVAL); } /* Turn off the IFF_NOARP flag and activate ARP */ + DTRACE_PROBE3(on_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl); arl->arl_flags = 0; return (0); } @@ -1881,13 +2058,13 @@ ar_interface_off(queue_t *q, mblk_t *mp) { arl_t *arl; - arp1dbg(("ar_interface_off\n")); arl = ar_ll_lookup_from_mp(mp); if (arl == NULL) { - arp1dbg(("ar_interface_off: no arl\n")); + DTRACE_PROBE2(off_no_arl, queue_t *, q, mblk_t *, mp); return (EINVAL); } /* Turn on the IFF_NOARP flag and deactivate ARP */ + DTRACE_PROBE3(off_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl); arl->arl_flags = ARL_F_NOARP; return (0); } @@ -1978,6 +2155,7 @@ ar_ll_init(ar_t *ar, mblk_t *mp) arl->arl_wq = ar->ar_wq; arl->arl_dlpi_pending = DL_PRIM_INVAL; + arl->arl_link_up = B_TRUE; ar->ar_arl = arl; } @@ -2127,8 +2305,6 @@ ar_ll_down(arl_t *arl) mblk_t *mp; ar_t *ar; - arp1dbg(("ar_ll_down arl %p\n", (void *)arl)); - ASSERT(arl->arl_state == ARL_S_UP); /* Let's break the association between an ARL and IP instance */ @@ -2163,8 +2339,7 @@ ar_ll_up(arl_t *arl) mblk_t *detach_mp = NULL; mblk_t *unbind_mp = NULL; mblk_t *info_mp = NULL; - - arp1dbg(("ar_ll_up arl %p \n", (void *)arl)); + mblk_t *notify_mp = NULL; ASSERT(arl->arl_state == ARL_S_DOWN); @@ -2197,6 +2372,12 @@ ar_ll_up(arl_t *arl) if (unbind_mp == NULL) goto bad; + notify_mp = ar_dlpi_comm(DL_NOTIFY_REQ, sizeof (dl_notify_req_t)); + if (notify_mp == NULL) + goto bad; + ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = + DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; + arl->arl_state = ARL_S_PENDING; if (arl->arl_provider_style == DL_STYLE2) { ar_dlpi_send(arl, attach_mp); @@ -2206,18 +2387,16 @@ ar_ll_up(arl_t *arl) ar_dlpi_send(arl, info_mp); ar_dlpi_send(arl, bind_mp); arl->arl_unbind_mp = unbind_mp; + ar_dlpi_send(arl, notify_mp); return (0); + bad: - if (attach_mp != NULL) - freemsg(attach_mp); - if (bind_mp != NULL) - freemsg(bind_mp); - if (detach_mp != NULL) - freemsg(detach_mp); - if (unbind_mp != NULL) - freemsg(unbind_mp); - if (info_mp != NULL) - freemsg(info_mp); + freemsg(attach_mp); + freemsg(bind_mp); + freemsg(detach_mp); + freemsg(unbind_mp); + freemsg(info_mp); + freemsg(notify_mp); return (ENOMEM); } @@ -2237,7 +2416,6 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig) uint32_t hw_extract_start; arl_t *arl; - arp1dbg(("ar_mapping_add\n")); /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; @@ -2248,14 +2426,15 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_mapping_add: enqueue on %p q\n", (void *)q)); + DTRACE_PROBE3(madd_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_MAPPING_ADD, B_TRUE); return (EINPROGRESS); } mp_orig->b_prev = NULL; arma = (arma_t *)mp->b_rptr; - if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_mapping)) != 0) + if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_mapping)) != NULL) ar_ce_delete(ace); hw_addr_len = arma->arma_hw_addr_length; hw_addr = mi_offset_paramc(mp, arma->arma_hw_addr_offset, hw_addr_len); @@ -2267,8 +2446,8 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig) proto_extract_mask = mi_offset_paramc(mp, arma->arma_proto_extract_mask_offset, proto_addr_len); hw_extract_start = arma->arma_hw_mapping_start; - if (!proto_mask || !proto_extract_mask) { - arp0dbg(("ar_mapping_add: not masks\n")); + if (proto_mask == NULL || proto_extract_mask == NULL) { + DTRACE_PROBE2(madd_illegal_mask, arl_t *, arl, arpa_t *, arma); return (EINVAL); } return (ar_ce_create( @@ -2327,6 +2506,7 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) ar_t *ar; int err; queue_t *tmp_q; + mblk_t *mp; TRACE_1(TR_FAC_ARP, TR_ARP_OPEN, "arp_open: q %p", q); @@ -2335,10 +2515,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) return (0); } /* Load up the Named Dispatch tables, if not already done. */ - if (!ar_g_nd && - (!nd_load(&ar_g_nd, "arp_cache_report", ar_ce_report, NULL, - NULL) || - !ar_param_register(arp_param_arr, A_CNT(arp_param_arr)))) { + if (ar_g_nd == NULL && + !ar_param_register(arp_param_arr, A_CNT(arp_param_arr))) { ar_cleanup(); return (ENOMEM); } @@ -2362,8 +2540,6 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) crhold(credp); ar->ar_credp = credp; - if (!ar_timer_mp) - ar_timer_init(q); /* * Probe for the DLPI info if we are not pushed on IP. Wait for * the reply. In case of error call ar_close() which will take @@ -2371,6 +2547,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * as freeing the arl, restarting the timer on a different queue etc. */ if (strcmp(q->q_next->q_qinfo->qi_minfo->mi_idname, "ip") == 0) { + arc_t *arc; + /* * We are pushed directly on top of IP. There is no need to * send down a DL_INFO_REQ. Return success. This could @@ -2378,7 +2556,25 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * or a stream corresponding to an open of /dev/arp * (i.e. <arp-IP> stream). Note that we don't support * pushing some module in between arp and IP. + * + * Tell IP, though, that we're an extended implementation, so + * it knows to expect a DAD response after bringing an + * interface up. Old ATM drivers won't do this, and IP will + * just bring the interface up immediately. */ + ar->ar_on_ill_stream = (q->q_next->q_next != NULL); + if (!ar->ar_on_ill_stream) + return (0); + mp = allocb(sizeof (arc_t), BPRI_MED); + if (mp == NULL) { + (void) ar_close(RD(q)); + return (ENOMEM); + } + DB_TYPE(mp) = M_CTL; + arc = (arc_t *)mp->b_rptr; + mp->b_wptr = mp->b_rptr + sizeof (arc_t); + arc->arc_cmd = AR_ARP_EXTEND; + putnext(q, mp); return (0); } tmp_q = q; @@ -2390,8 +2586,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (strcmp(tmp_q->q_qinfo->qi_minfo->mi_idname, "ip") == 0) { /* - * We don't support pushing ARP arbitrarily on an - * IP driver stream. ARP has to be pushed directly above IP + * We don't support pushing ARP arbitrarily on an IP driver + * stream. ARP has to be pushed directly above IP. */ (void) ar_close(RD(q)); return (ENOTSUP); @@ -2400,8 +2596,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * Send down a DL_INFO_REQ so we can find out what we are * talking to. */ - mblk_t *mp = ar_dlpi_comm(DL_INFO_REQ, sizeof (dl_info_req_t)); - if (!mp) { + mp = ar_dlpi_comm(DL_INFO_REQ, sizeof (dl_info_req_t)); + if (mp == NULL) { (void) ar_close(RD(q)); return (ENOMEM); } @@ -2547,19 +2743,18 @@ ar_plink_send(queue_t *q, mblk_t *mp) * ar_ce_walk routine to delete any outstanding queries for an ar that is * going away. */ -static int -ar_query_delete(ace_t *ace, uchar_t *ar) +static void +ar_query_delete(ace_t *ace, void *arg) { + ar_t *ar = arg; mblk_t **mpp = &ace->ace_query_mp; - mblk_t *mp = mpp[0]; + mblk_t *mp; - if (!mp) - return (0); - do { + while ((mp = *mpp) != NULL) { /* The response queue was stored in the query b_prev. */ - if ((queue_t *)mp->b_prev == ((ar_t *)ar)->ar_wq || - (queue_t *)mp->b_prev == ((ar_t *)ar)->ar_rq) { - mpp[0] = mp->b_next; + if ((queue_t *)mp->b_prev == ar->ar_wq || + (queue_t *)mp->b_prev == ar->ar_rq) { + *mpp = mp->b_next; if (DB_TYPE(mp) == M_PROTO && *(uint32_t *)mp->b_rptr == AR_ENTRY_QUERY) { BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed); @@ -2568,8 +2763,7 @@ ar_query_delete(ace_t *ace, uchar_t *ar) } else { mpp = &mp->b_next; } - } while ((mp = mpp[0]) != 0); - return (0); + } } /* @@ -2614,11 +2808,11 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, } /* Complete the response based on how the request arrived. */ if (DB_TYPE(mp) == M_IOCTL) { - struct iocblk *ioc = - (struct iocblk *)mp->b_rptr; + struct iocblk *ioc = (struct iocblk *)mp->b_rptr; + ioc->ioc_error = ret_val; - DB_TYPE(mp) = M_IOCACK; if (ret_val != 0) { + DB_TYPE(mp) = M_IOCNAK; ioc->ioc_count = 0; putnext(q, mp); continue; @@ -2627,6 +2821,7 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, * Return the xmit template out with the successful * IOCTL. */ + DB_TYPE(mp) = M_IOCACK; ioc->ioc_count = template->b_wptr - template->b_rptr; /* Remove the areq mblk from the IOCTL. */ areq_mp = mp->b_cont; @@ -2680,12 +2875,23 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, mp->b_cont = template; putnext(q, mp); } + /* - * Unless we are responding from a permanent cache entry, delete - * the ace. + * Unless we are responding from a permanent cache entry, start the + * cleanup timer or (on error) delete the entry. */ if (!(ace->ace_flags & (ACE_F_PERMANENT | ACE_F_DYING))) { - ar_ce_delete(ace); + if (!ACE_RESOLVED(ace) || arl->arl_xmit_template == NULL) { + /* + * No need to notify IP here, because the entry was + * never resolved, so IP can't have any cached copies + * of the address. + */ + ar_ce_delete(ace); + } else { + mi_timer(arl->arl_wq, ace->ace_mp, + arp_cleanup_interval); + } } } @@ -2726,10 +2932,26 @@ ar_query_xmit(ace_t *ace, ace_t *src_ace) src_ace = ar_ce_lookup_permanent(areq->areq_proto, sender_addr, areq->areq_sender_addr_length); if (src_ace == NULL) { - printf("ar_query_xmit: Could not find the ace\n"); + DTRACE_PROBE3(xmit_no_source, ace_t *, ace, + areq_t *, areq, uchar_t *, sender_addr); return (0); } } + + /* + * If we haven't yet finished duplicate address checking on this source + * address, then do *not* use it on the wire. Doing so will corrupt + * the world's caches. Just allow the timer to restart. Note that + * duplicate address checking will eventually complete one way or the + * other, so this cannot go on "forever." + */ + if (src_ace->ace_flags & ACE_F_UNVERIFIED) { + DTRACE_PROBE2(xmit_source_unverified, ace_t *, ace, + ace_t *, src_ace); + areq->areq_xmit_count++; + return (areq->areq_xmit_interval); + } + /* * Transmit on src_arl. We should transmit on src_arl. Otherwise * the switch will send back a copy on other interfaces of the @@ -2737,9 +2959,12 @@ ar_query_xmit(ace_t *ace, ace_t *src_ace) * address + hardware address, ARP will treat this as a bogon. */ src_arl = src_ace->ace_arl; + DTRACE_PROBE3(xmit_send, ace_t *, ace, ace_t *, src_ace, + areq_t *, areq); ar_xmit(src_arl, ARP_REQUEST, areq->areq_proto, areq->areq_sender_addr_length, src_arl->arl_hw_addr, sender_addr, - src_arl->arl_arp_addr, proto_addr); + src_arl->arl_arp_addr, proto_addr, NULL); + src_ace->ace_last_bcast = ddi_get_lbolt(); return (areq->areq_xmit_interval); } @@ -2758,11 +2983,10 @@ ar_rput(queue_t *q, mblk_t *mp) int op; uint32_t plen; uint32_t proto; - ace_t *src_ace; uchar_t *src_haddr; uchar_t *src_paddr; - dl_unitdata_ind_t *dlui; - boolean_t hwaddr_changed = B_TRUE; + boolean_t is_probe; + int i; TRACE_1(TR_FAC_ARP, TR_ARP_RPUT_START, "arp_rput_start: q %p", q); @@ -2817,34 +3041,36 @@ ar_rput(queue_t *q, mblk_t *mp) return; case M_PCPROTO: case M_PROTO: + if (MBLKL(mp) >= sizeof (dl_unitdata_ind_t) && + ((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive == + DL_UNITDATA_IND) { + arl = ((ar_t *)q->q_ptr)->ar_arl; + if (arl != NULL) { + /* Real messages from the wire! */ + break; + } + putnext(q, mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "default"); + return; + } err = ar_cmd_dispatch(q, mp); switch (err) { case ENOENT: + /* Miscellaneous DLPI messages get shuffled off. */ + ar_rput_dlpi(q, mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "proto/dlpi"); break; case EINPROGRESS: TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, "arp_rput_end: q %p (%S)", q, "proto"); - return; + break; default: inet_freemsg(mp); - return; - } - if ((mp->b_wptr - mp->b_rptr) < sizeof (dl_unitdata_ind_t) || - ((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive - != DL_UNITDATA_IND) { - /* Miscellaneous DLPI messages get shuffled off. */ - ar_rput_dlpi(q, mp); - TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "proto/dlpi"); - return; - } - /* DL_UNITDATA_IND */ - arl = ((ar_t *)q->q_ptr)->ar_arl; - if (arl != NULL) { - /* Real messages from the wire! */ break; } - /* FALLTHRU */ + return; default: putnext(q, mp); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, @@ -2867,15 +3093,14 @@ ar_rput(queue_t *q, mblk_t *mp) * followed by an ARP packet. We do some initial checks and then * get to work. */ - dlui = (dl_unitdata_ind_t *)mp->b_rptr; mp1 = mp->b_cont; - if (!mp1) { + if (mp1 == NULL) { freemsg(mp); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, "arp_rput_end: q %p (%S)", q, "baddlpi"); return; } - if (!OK_32PTR(mp1->b_rptr) || mp1->b_cont) { + if (mp1->b_cont != NULL) { /* No fooling around with funny messages. */ if (!pullupmsg(mp1, -1)) { freemsg(mp); @@ -2885,22 +3110,33 @@ ar_rput(queue_t *q, mblk_t *mp) } } arh = (arh_t *)mp1->b_rptr; - hlen = (uint32_t)arh->arh_hlen & 0xFF; - plen = (uint32_t)arh->arh_plen & 0xFF; - if ((mp1->b_wptr - mp1->b_rptr) - < (ARH_FIXED_LEN + hlen + hlen + plen + plen)) { + hlen = arh->arh_hlen; + plen = arh->arh_plen; + if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) { freemsg(mp); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, "arp_rput_end: q %p (%S)", q, "short"); return; } - if (hlen == 0 || plen == 0) { - arp1dbg(("ar_rput: bogus arh\n")); + /* + * hlen 0 is used for RFC 1868 UnARP. + * + * Note that the rest of the code checks that hlen is what we expect + * for this hardware address type, so might as well discard packets + * here that don't match. + */ + if ((hlen > 0 && hlen != arl->arl_hw_addr_length) || plen == 0) { + DTRACE_PROBE2(rput_bogus, arl_t *, arl, mblk_t *, mp1); freemsg(mp); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, "arp_rput_end: q %p (%S)", q, "hlenzero/plenzero"); return; } + /* + * Historically, Solaris has been lenient about hardware type numbers. + * We should check here, but don't. + */ + DTRACE_PROBE2(rput_normal, arl_t *, arl, arh_t *, arh); proto = (uint32_t)BE16_TO_U16(arh->arh_proto); src_haddr = (uchar_t *)arh; src_haddr = &src_haddr[ARH_FIXED_LEN]; @@ -2908,191 +3144,255 @@ ar_rput(queue_t *q, mblk_t *mp) dst_paddr = &src_haddr[hlen + plen + hlen]; op = BE16_TO_U16(arh->arh_operation); - /* Now see if we have a cache entry for the source address. */ - src_ace = ar_ce_lookup_entry(arl, proto, src_paddr, plen); + /* Determine if this is just a probe */ + for (i = 0; i < plen; i++) + if (src_paddr[i] != 0) + break; + is_probe = i >= plen; + /* - * If so, and it is the entry for one of our IP addresses, - * we really don't expect to see this packet, so pretend we didn't. - * Tell IP that we received a bogon. - * - * If is a "published" (proxy arp) entry we can receive requests - * FROM the node but we should never see an ARP_RESPONSE. In this case - * we process the response but also inform IP. + * RFC 826: first check if the <protocol, sender protocol address> is + * in the cache, if there is a sender protocol address. Note that this + * step also handles resolutions based on source. */ - if (src_ace) { - if (src_ace->ace_flags & ACE_F_MYADDR) { - freeb(mp); - ar_client_notify(arl, mp1, AR_CN_BOGON); - TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "pubentry"); - return; - } - if ((src_ace->ace_flags & ACE_F_PUBLISH) && - op == ARP_RESPONSE) { - mblk_t *mp2; - - mp2 = copymsg(mp1); - if (mp2 != NULL) - ar_client_notify(arl, mp2, AR_CN_BOGON); - } - if (src_ace->ace_hw_addr_length == hlen && - bcmp(src_ace->ace_hw_addr, src_haddr, hlen) == 0) { - hwaddr_changed = B_FALSE; - } + if (is_probe) + err = AR_NOTFOUND; + else + err = ar_ce_resolve_all(arl, proto, src_haddr, hlen, src_paddr, + plen); + switch (err) { + case AR_BOGON: + ar_client_notify(arl, mp1, AR_CN_BOGON); + mp1 = NULL; + break; + case AR_FAILED: + ar_client_notify(arl, mp1, AR_CN_FAILED); + mp1 = NULL; + break; + case AR_LOOPBACK: + DTRACE_PROBE2(rput_loopback, arl_t *, arl, arh_t *, arh); + freemsg(mp1); + mp1 = NULL; + break; } - switch (op) { - case ARP_REQUEST: - /* - * If we know the answer, and it is "published", send out - * the response. - */ - dst_ace = ar_ce_lookup_entry(arl, proto, dst_paddr, plen); - if (dst_ace && (dst_ace->ace_flags & ACE_F_PUBLISH) && - ACE_RESOLVED(dst_ace)) { - ar_xmit(arl, ARP_RESPONSE, dst_ace->ace_proto, plen, - dst_ace->ace_hw_addr, dst_ace->ace_proto_addr, - src_haddr, src_paddr); - } + if (mp1 == NULL) { + freeb(mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "unneeded"); + return; + } + + /* + * Now look up the destination address. By RFC 826, we ignore the + * packet at this step if the target isn't one of our addresses. This + * is true even if the target is something we're trying to resolve and + * the packet is a response. + * + * Note that in order to do this correctly, we need to know when to + * notify IP of a change implied by the source address of the ARP + * message. That implies that the local ARP table has entries for all + * of the resolved entries cached in the client. This is why we must + * notify IP when we delete a resolved entry and we know that IP may + * have cached answers. + */ + dst_ace = ar_ce_lookup_entry(arl, proto, dst_paddr, plen); + if (dst_ace == NULL || !ACE_RESOLVED(dst_ace) || + !(dst_ace->ace_flags & ACE_F_PUBLISH)) { /* - * Now fall through to the response side, and add a cache entry - * for the sender so we will have it when we need it. + * Let the client know if the source mapping has changed, even + * if the destination provides no useful information for the + * client. */ - /* FALLTHRU */ - case ARP_RESPONSE: + if (err == AR_CHANGED) + ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); + else + freemsg(mp1); + freeb(mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "nottarget"); + return; + } + + /* + * If the target is unverified by DAD, then one of two things is true: + * either it's someone else claiming this address (on a probe or an + * announcement) or it's just a regular request. The former is + * failure, but a regular request is not. + */ + if (dst_ace->ace_flags & ACE_F_UNVERIFIED) { /* - * With ill groups, we need to look for request across - * all the ills in the group. The request itself may - * not be queued on this arl. See ar_query_xmit() for - * details. + * Check for a reflection. Some misbehaving bridges will + * reflect our own transmitted packets back to us. */ - err = ar_ce_resolve_all(arl, proto, src_haddr, hlen, - src_paddr, plen); - if (err == AR_BOGON) { - /* - * Some other host has our IP address. Send a - * BOGON message to IP. - */ + if (hlen == dst_ace->ace_hw_addr_length && + bcmp(src_haddr, dst_ace->ace_hw_addr, hlen) == 0) { + DTRACE_PROBE3(rput_probe_reflected, arl_t *, arl, + arh_t *, arh, ace_t *, dst_ace); freeb(mp); - ar_client_notify(arl, mp1, AR_CN_BOGON); + freemsg(mp1); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "pubentry"); + "arp_rput_end: q %p (%S)", q, "reflection"); return; } + if (is_probe || op == ARP_RESPONSE) { + ar_client_notify(arl, mp1, AR_CN_FAILED); + ar_ce_delete(dst_ace); + } else if (err == AR_CHANGED) { + ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); + } else { + DTRACE_PROBE3(rput_request_unverified, arl_t *, arl, + arh_t *, arh, ace_t *, dst_ace); + freemsg(mp1); + } + freeb(mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "unverified"); + return; + } + + /* + * If it's a request, then we reply to this, and if we think the + * sender's unknown, then we create an entry to avoid unnecessary ARPs. + * The design assumption is that someone ARPing us is likely to send us + * a packet soon, and that we'll want to reply to it. + */ + if (op == ARP_REQUEST) { + const uchar_t *dstaddr = src_haddr; + clock_t now; - if ((err != AR_LOOPBACK) && (src_ace == NULL)) { + /* + * This implements periodic address defense based on a modified + * version of the RFC 3927 requirements. Instead of sending a + * broadcasted reply every time, as demanded by the RFC, we + * send at most one broadcast reply per arp_broadcast_interval. + */ + now = ddi_get_lbolt(); + if ((now - dst_ace->ace_last_bcast) > + MSEC_TO_TICK(arp_broadcast_interval)) { + DTRACE_PROBE3(rput_bcast_reply, arl_t *, arl, + arh_t *, arh, ace_t *, dst_ace); + dst_ace->ace_last_bcast = now; + dstaddr = arl->arl_arp_addr; /* - * We may need this one sooner or later. The AR_LOOPBACK - * check above ensures, that we don't create arp - * entries for our own IP address, on another arl. + * If this is one of the long-suffering entries, then + * pull it out now. It no longer needs separate + * defense, because we're doing now that with this + * broadcasted reply. */ - (void) ar_ce_create(arl, proto, src_haddr, hlen, - src_paddr, plen, NULL, - NULL, (uint32_t)0, - (uint32_t)0); + dst_ace->ace_flags &= ~ACE_F_DELAYED; } - /* Let's see if this is a system ARPing itself. */ - do { - if (*src_paddr++ != *dst_paddr++) - break; - } while (--plen); - if (plen == 0) { - /* - * An ARP message with identical src and dst - * protocol addresses. This guy is trying to - * tell us something that our clients might - * find interesting.Essentially such packets are - * sent when a m/c comes up or changes its h/w - * address, so before notifying our client check the - * h/w address if there is a cache entry and notify - * the client only if the addresses differ. - */ - if (hwaddr_changed) { - freeb(mp); - ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); - } else { - /* Just discard it. */ - freemsg(mp); - } - TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "duplicate"); - return; + ar_xmit(arl, ARP_RESPONSE, dst_ace->ace_proto, plen, + dst_ace->ace_hw_addr, dst_ace->ace_proto_addr, + src_haddr, src_paddr, dstaddr); + if (!is_probe && err == AR_NOTFOUND && + ar_ce_create(arl, proto, src_haddr, hlen, src_paddr, plen, + NULL, NULL, 0, 0) == 0) { + ace_t *ace; + + ace = ar_ce_lookup(arl, proto, src_paddr, plen); + ASSERT(ace != NULL); + mi_timer(arl->arl_wq, ace->ace_mp, + arp_cleanup_interval); } + } + if (err == AR_CHANGED) { + freeb(mp); + ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "reqchange"); + } else { + freemsg(mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "end"); + } +} + +static void +ar_ce_restart_dad(ace_t *ace, void *arl) +{ + if ((ace->ace_arl == arl) && + (ace->ace_flags & (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) == + (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) { /* - * A broadcast response may also be interesting. + * Slight cheat here: we don't use the initial probe delay + * in this obscure case. */ - if (op == ARP_RESPONSE && dlui->dl_group_address) { - freeb(mp); - ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); - return; + if (ace->ace_flags & ACE_F_FAST) { + ace->ace_xmit_count = arp_fastprobe_count; + ace->ace_xmit_interval = arp_fastprobe_interval; + } else { + ace->ace_xmit_count = arp_probe_count; + ace->ace_xmit_interval = arp_probe_interval; } - break; - default: - break; + ace->ace_flags &= ~ACE_F_DAD_ABORTED; + ace_set_timer(ace, B_FALSE); } - freemsg(mp); - TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "end"); } /* DLPI messages, other than DL_UNITDATA_IND are handled here. */ static void ar_rput_dlpi(queue_t *q, mblk_t *mp) { - ar_t *ar = (ar_t *)q->q_ptr; + ar_t *ar = q->q_ptr; arl_t *arl = ar->ar_arl; - dl_bind_ack_t *dlba; - dl_error_ack_t *dlea; - dl_ok_ack_t *dloa; - dl_uderror_ind_t *dluei; - char *err_str; + union DL_primitives *dlp; + const char *err_str; - if ((mp->b_wptr - mp->b_rptr) < sizeof (dloa->dl_primitive)) { + if (MBLKL(mp) < sizeof (dlp->dl_primitive)) { putnext(q, mp); return; } - dloa = (dl_ok_ack_t *)mp->b_rptr; - dlea = (dl_error_ack_t *)dloa; - switch (dloa->dl_primitive) { + dlp = (union DL_primitives *)mp->b_rptr; + switch (dlp->dl_primitive) { case DL_ERROR_ACK: - switch (dlea->dl_error_primitive) { + /* + * ce is confused about how DLPI works, so we have to interpret + * an "error" on DL_NOTIFY_ACK (which we never could have sent) + * as really meaning an error on DL_NOTIFY_REQ. + * + * Note that supporting DL_NOTIFY_REQ is optional, so printing + * out an error message on the console isn't warranted except + * for debug. + */ + if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK || + dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) { + ar_dlpi_done(arl, DL_NOTIFY_REQ); + freemsg(mp); + return; + } + err_str = dlpi_prim_str(dlp->error_ack.dl_error_primitive); + DTRACE_PROBE2(rput_dl_error, arl_t *, arl, + dl_error_ack_t *, &dlp->error_ack); + switch (dlp->error_ack.dl_error_primitive) { case DL_UNBIND_REQ: if (arl->arl_provider_style == DL_STYLE1) arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_UNBIND_REQ); - err_str = "DL_UNBIND_REQ"; break; case DL_DETACH_REQ: + case DL_BIND_REQ: arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_DETACH_REQ); - err_str = "DL_DETACH_REQ"; break; case DL_ATTACH_REQ: - ar_dlpi_done(arl, DL_ATTACH_REQ); - err_str = "DL_ATTACH_REQ"; - break; - case DL_BIND_REQ: - arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_BIND_REQ); - err_str = "DL_BIND_REQ"; break; default: - err_str = "?"; - break; + /* If it's anything else, we didn't send it. */ + putnext(q, mp); + return; } - arp0dbg(("ar_rput_dlpi: " - "%s (%d) failed, dl_errno %d, dl_unix_errno %d\n", - err_str, (int)dlea->dl_error_primitive, - (int)dlea->dl_errno, (int)dlea->dl_unix_errno)); + ar_dlpi_done(arl, dlp->error_ack.dl_error_primitive); (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, "ar_rput_dlpi: %s failed, dl_errno %d, dl_unix_errno %d", - err_str, dlea->dl_errno, dlea->dl_unix_errno); + err_str, dlp->error_ack.dl_errno, + dlp->error_ack.dl_unix_errno); break; case DL_INFO_ACK: /* * We have a response back from the driver. Go set up transmit * defaults. */ + DTRACE_PROBE2(rput_dl_info, arl_t *, arl, + dl_info_ack_t *, &dlp->info_ack); if (arl != NULL) { ar_ll_set_defaults(arl, mp); ar_dlpi_done(arl, DL_INFO_REQ); @@ -3103,48 +3403,75 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp) qenable(WR(q)); break; case DL_OK_ACK: - arp1dbg(("ar_rput_dlpi: arl %p DL_OK_ACK for %d\n", - (void *)arl, dloa->dl_correct_primitive)); - switch (dloa->dl_correct_primitive) { + DTRACE_PROBE2(rput_dl_ok, arl_t *, arl, + dl_ok_ack_t *, &dlp->ok_ack); + switch (dlp->ok_ack.dl_correct_primitive) { case DL_UNBIND_REQ: if (arl->arl_provider_style == DL_STYLE1) arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_UNBIND_REQ); break; case DL_DETACH_REQ: arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_DETACH_REQ); break; case DL_ATTACH_REQ: - ar_dlpi_done(arl, DL_ATTACH_REQ); break; + default: + putnext(q, mp); + return; } + ar_dlpi_done(arl, dlp->ok_ack.dl_correct_primitive); + break; + case DL_NOTIFY_ACK: + DTRACE_PROBE2(rput_dl_notify, arl_t *, arl, + dl_notify_ack_t *, &dlp->notify_ack); + /* + * We mostly care about interface-up transitions, as this is + * when we need to redo duplicate address detection. + */ + arl->arl_notifies = + (dlp->notify_ack.dl_notifications & DL_NOTE_LINK_UP) != 0; + ar_dlpi_done(arl, DL_NOTIFY_REQ); break; case DL_BIND_ACK: - arp1dbg(("ar_rput: DL_BIND_ACK arl %p\n", (void *)arl)); - dlba = (dl_bind_ack_t *)dloa; + DTRACE_PROBE2(rput_dl_bind, arl_t *, arl, + dl_bind_ack_t *, &dlp->bind_ack); if (arl->arl_sap_length < 0) - bcopy((char *)dlba + dlba->dl_addr_offset, + bcopy((char *)dlp + dlp->bind_ack.dl_addr_offset, arl->arl_hw_addr, arl->arl_hw_addr_length); else - bcopy((char *)dlba + dlba->dl_addr_offset + + bcopy((char *)dlp + dlp->bind_ack.dl_addr_offset + arl->arl_sap_length, arl->arl_hw_addr, arl->arl_hw_addr_length); arl->arl_state = ARL_S_UP; ar_dlpi_done(arl, DL_BIND_REQ); break; + case DL_NOTIFY_IND: + DTRACE_PROBE2(rput_dl_notify_ind, arl_t *, arl, + dl_notify_ind_t *, &dlp->notify_ind); + switch (dlp->notify_ind.dl_notification) { + case DL_NOTE_LINK_UP: + arl->arl_link_up = B_TRUE; + ar_ce_walk(ar_ce_restart_dad, arl); + break; + case DL_NOTE_LINK_DOWN: + arl->arl_link_up = B_FALSE; + break; + } + break; case DL_UDERROR_IND: - dluei = (dl_uderror_ind_t *)dloa; + DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl, + dl_uderror_ind_t *, &dlp->uderror_ind); (void) mi_strlog(q, 1, SL_ERROR | SL_TRACE, "ar_rput_dlpi: " "DL_UDERROR_IND, dl_dest_addr_length %d dl_errno %d", - dluei->dl_dest_addr_length, dluei->dl_errno); + dlp->uderror_ind.dl_dest_addr_length, + dlp->uderror_ind.dl_errno); putnext(q, mp); return; default: - arp1dbg(("ar_rput_dlpi: default, primitive %d\n", - (int)dloa->dl_primitive)); + DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, + union DL_primitives *, dlp); putnext(q, mp); return; } @@ -3158,14 +3485,12 @@ ar_set_address(ace_t *ace, uchar_t *addrpos, uchar_t *proto_addr, uchar_t *mask, *to; int len; - if (!ace->ace_hw_addr) - return; + ASSERT(ace->ace_hw_addr != NULL); bcopy(ace->ace_hw_addr, addrpos, ace->ace_hw_addr_length); if (ace->ace_flags & ACE_F_MAPPING && proto_addr != NULL && ace->ace_proto_extract_mask) { /* careful */ - arp1dbg(("ar_set_address: MAPPING\n")); len = MIN((int)ace->ace_hw_addr_length - ace->ace_hw_extract_start, proto_addr_len); @@ -3179,14 +3504,15 @@ ar_set_address(ace_t *ace, uchar_t *addrpos, uchar_t *proto_addr, static int ar_slifname(queue_t *q, mblk_t *mp_orig) { - ar_t *ar = (ar_t *)q->q_ptr; + ar_t *ar = q->q_ptr; arl_t *arl = ar->ar_arl; struct lifreq *lifr; mblk_t *mp = mp_orig; + arl_t *old_arl; + mblk_t *ioccpy; + struct iocblk *iocp; - arp1dbg(("ar_slifname\n")); - - if (MODULE_BELOW_IS_IP(q)) { + if (ar->ar_on_ill_stream) { /* * This command is for IP, since it is coming down * the <arp-IP-driver> stream. Return ENOENT so that @@ -3197,37 +3523,71 @@ ar_slifname(queue_t *q, mblk_t *mp_orig) /* We handle both M_IOCTL and M_PROTO messages */ if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; - if (!q->q_next || arl == NULL) { + if (q->q_next == NULL || arl == NULL) { /* * If the interface was just opened and * the info ack has not yet come back from the driver */ - arp1dbg(("ar_slifname no arl - queued\n")); + DTRACE_PROBE2(slifname_no_arl, queue_t *, q, + mblk_t *, mp_orig); (void) putq(q, mp_orig); return (EINPROGRESS); } - if (arl->arl_name[0] != '\0') + + if (MBLKL(mp) < sizeof (struct lifreq)) { + DTRACE_PROBE2(slifname_malformed, queue_t *, q, + mblk_t *, mp); + } + + if (arl->arl_name[0] != '\0') { + DTRACE_PROBE1(slifname_already, arl_t *, arl); return (EALREADY); + } - lifr = (struct lifreq *)(mp->b_rptr); + lifr = (struct lifreq *)mp->b_rptr; - if (strlen(lifr->lifr_name) >= LIFNAMSIZ) + if (strlen(lifr->lifr_name) >= LIFNAMSIZ) { + DTRACE_PROBE2(slifname_bad_name, arl_t *, arl, + struct lifreq *, lifr); return (ENXIO); + } /* Check whether the name is already in use. */ - if (ar_ll_lookup_by_name(lifr->lifr_name)) { - arp1dbg(("ar_slifname: %s exists\n", lifr->lifr_name)); + + old_arl = ar_ll_lookup_by_name(lifr->lifr_name); + if (old_arl != NULL) { + DTRACE_PROBE2(slifname_exists, arl_t *, arl, arl_t *, old_arl); return (EEXIST); } + + /* Make a copy of the message so we can send it downstream. */ + if ((ioccpy = allocb(sizeof (struct iocblk), BPRI_MED)) == NULL || + (ioccpy->b_cont = copymsg(mp)) == NULL) { + if (ioccpy != NULL) + freeb(ioccpy); + return (ENOMEM); + } + (void) strlcpy(arl->arl_name, lifr->lifr_name, sizeof (arl->arl_name)); /* The ppa is sent down by ifconfig */ arl->arl_ppa = lifr->lifr_ppa; - arp1dbg(("ar_slifname: name is now %s, ppa %d\n", arl->arl_name, - arl->arl_ppa)); /* Chain in the new arl. */ arl->arl_next = arl_g_head; arl_g_head = arl; + DTRACE_PROBE1(slifname_set, arl_t *, arl); + + /* + * Send along a copy of the ioctl; this is just for hitbox. Use + * M_CTL to avoid confusing anyone else who might be listening. + */ + DB_TYPE(ioccpy) = M_CTL; + iocp = (struct iocblk *)ioccpy->b_rptr; + bzero(iocp, sizeof (*iocp)); + iocp->ioc_cmd = SIOCSLIFNAME; + iocp->ioc_count = msgsize(ioccpy->b_cont); + ioccpy->b_wptr = (uchar_t *)(iocp + 1); + putnext(arl->arl_wq, ioccpy); return (0); } @@ -3239,10 +3599,9 @@ ar_set_ppa(queue_t *q, mblk_t *mp_orig) int ppa; char *cp; mblk_t *mp = mp_orig; + arl_t *old_arl; - arp1dbg(("ar_set_ppa\n")); - - if (MODULE_BELOW_IS_IP(q)) { + if (ar->ar_on_ill_stream) { /* * This command is for IP, since it is coming down * the <arp-IP-driver> stream. Return ENOENT so that @@ -3254,35 +3613,40 @@ ar_set_ppa(queue_t *q, mblk_t *mp_orig) /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; - if (!q->q_next || arl == NULL) { + if (q->q_next == NULL || arl == NULL) { /* * If the interface was just opened and * the info ack has not yet come back from the driver. */ - arp1dbg(("ar_set_ppa: no arl - queued\n")); + DTRACE_PROBE2(setppa_no_arl, queue_t *, q, + mblk_t *, mp_orig); (void) putq(q, mp_orig); return (EINPROGRESS); } - if (arl->arl_name[0] != '\0') + if (arl->arl_name[0] != '\0') { + DTRACE_PROBE1(setppa_already, arl_t *, arl); return (EALREADY); + } do { q = q->q_next; - } while (q->q_next); + } while (q->q_next != NULL); cp = q->q_qinfo->qi_minfo->mi_idname; ppa = *(int *)(mp->b_rptr); (void) snprintf(arl->arl_name, sizeof (arl->arl_name), "%s%d", cp, ppa); - if (ar_ll_lookup_by_name(arl->arl_name) != NULL) { - arp1dbg(("ar_set_ppa: %s busy\n", arl->arl_name)); + + old_arl = ar_ll_lookup_by_name(arl->arl_name); + if (old_arl != NULL) { + DTRACE_PROBE2(setppa_exists, arl_t *, arl, arl_t *, old_arl); /* Make it a null string again */ arl->arl_name[0] = '\0'; return (EBUSY); } - arp1dbg(("ar_set_ppa: %d\n", ppa)); arl->arl_ppa = ppa; + DTRACE_PROBE1(setppa_done, arl_t *, arl); /* Chain in the new arl. */ arl->arl_next = arl_g_head; arl_g_head = arl; @@ -3357,10 +3721,8 @@ ar_snmp_msg(queue_t *q, mblk_t *mp_orig) * this is an ipNetToMediaTable msg from IP that needs (unique) * arp cache entries appended... */ - if ((mpdata = mp->b_cont) == NULL) { - arp0dbg(("ar_snmp_msg: b_cont == NULL for MIB2_IP msg\n")); + if ((mpdata = mp->b_cont) == NULL) return (EINVAL); - } ar_snmp_hash_tbl = ar_create_snmp_hash(mpdata); @@ -3368,7 +3730,7 @@ ar_snmp_msg(queue_t *q, mblk_t *mp_orig) args.m2a_hashb = ar_snmp_hash_tbl; args.m2a_mpdata = NULL; args.m2a_mptail = NULL; - ar_ce_walk((pfi_t)ar_snmp_msg2, &args); + ar_ce_walk(ar_snmp_msg2, &args); mi_free(ar_snmp_hash_tbl); /* @@ -3478,7 +3840,7 @@ ar_snmp_msg2(ace_t *ace, void *arg) m2ap->m2a_mpdata = allocb(sizeof (mib2_ipNetToMediaEntry_t), BPRI_HI); if (m2ap->m2a_mpdata == NULL) { - arp1dbg(("ar_snmp_msg2:allocb failed\n")); + DTRACE_PROBE(snmp_allocb_failure); return; } } @@ -3498,30 +3860,6 @@ ar_snmp_msg2(ace_t *ace, void *arg) (char *)&ntme, sizeof (ntme)); } -/* Start up the garbage collection timer on the queue provided. */ -static void -ar_timer_init(queue_t *q) -{ - if (ar_timer_mp) - return; - ar_timer_mp = mi_timer_alloc(0); - if (!ar_timer_mp) - return; - ar_timer_queue = q; - mi_timer(ar_timer_queue, ar_timer_mp, arp_timer_interval); -} - -/* ar_ce_walk routine to trash all non-permanent resolved entries. */ -/* ARGSUSED */ -static int -ar_trash(ace_t *ace, uchar_t *arg) -{ - if ((ace->ace_flags & (ACE_F_RESOLVED|ACE_F_PERMANENT)) == - ACE_F_RESOLVED) - ar_ce_delete(ace); - return (0); -} - /* Write side put procedure. */ static void ar_wput(queue_t *q, mblk_t *mp) @@ -3579,11 +3917,14 @@ ar_wput(queue_t *q, mblk_t *mp) break; } ioc = (struct iocblk *)mp->b_rptr; - ioc->ioc_error = err; - if ((mp1 = mp->b_cont) != 0) - ioc->ioc_count = msgdsize(mp1); - else - ioc->ioc_count = 0; + if (err != 0) + ioc->ioc_error = err; + if (ioc->ioc_error != 0) { + DB_TYPE(mp) = M_IOCNAK; + freemsg(mp->b_cont); + mp->b_cont = NULL; + } + ioc->ioc_count = msgdsize(mp->b_cont); qreply(q, mp); TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END, "arp_wput_end: q %p (%S)", q, "ioctl"); @@ -3660,6 +4001,117 @@ ar_wput(queue_t *q, mblk_t *mp) "arp_wput_end: q %p (%S)", q, "end"); } +static boolean_t +arp_say_ready(ace_t *ace) +{ + mblk_t *mp; + arl_t *arl; + arh_t *arh; + uchar_t *cp; + + arl = ace->ace_arl; + mp = allocb(sizeof (*arh) + 2 * (arl->arl_hw_addr_length + + ace->ace_proto_addr_length), BPRI_MED); + if (mp == NULL) { + /* skip a beat on allocation trouble */ + ace->ace_xmit_count = 1; + ace_set_timer(ace, B_FALSE); + return (B_FALSE); + } + /* Tell IP address is now usable */ + arh = (arh_t *)mp->b_rptr; + U16_TO_BE16(arl->arl_arp_hw_type, arh->arh_hardware); + U16_TO_BE16(ace->ace_proto, arh->arh_proto); + arh->arh_hlen = arl->arl_hw_addr_length; + arh->arh_plen = ace->ace_proto_addr_length; + U16_TO_BE16(ARP_REQUEST, arh->arh_operation); + cp = (uchar_t *)(arh + 1); + bcopy(ace->ace_hw_addr, cp, arl->arl_hw_addr_length); + cp += arl->arl_hw_addr_length; + bcopy(ace->ace_proto_addr, cp, ace->ace_proto_addr_length); + cp += ace->ace_proto_addr_length; + bcopy(ace->ace_hw_addr, cp, arl->arl_hw_addr_length); + cp += arl->arl_hw_addr_length; + bcopy(ace->ace_proto_addr, cp, ace->ace_proto_addr_length); + cp += ace->ace_proto_addr_length; + mp->b_wptr = cp; + ar_client_notify(arl, mp, AR_CN_READY); + DTRACE_PROBE1(ready, ace_t *, ace); + return (B_TRUE); +} + +/* + * Pick the longest-waiting aces for defense. + */ +static void +ace_reschedule(ace_t *ace, void *arg) +{ + ace_resched_t *art = arg; + ace_t **aces; + ace_t **acemax; + ace_t *atemp; + + if (ace->ace_arl != art->art_arl) + return; + /* + * Only published entries that are ready for announcement are eligible. + */ + if ((ace->ace_flags & (ACE_F_PUBLISH | ACE_F_UNVERIFIED | ACE_F_DYING | + ACE_F_DELAYED)) != ACE_F_PUBLISH) { + return; + } + if (art->art_naces < ACE_RESCHED_LIST_LEN) { + art->art_aces[art->art_naces++] = ace; + } else { + aces = art->art_aces; + acemax = aces + ACE_RESCHED_LIST_LEN; + for (; aces < acemax; aces++) { + if ((*aces)->ace_last_bcast > ace->ace_last_bcast) { + atemp = *aces; + *aces = ace; + ace = atemp; + } + } + } +} + +/* + * Reschedule the ARP defense of any long-waiting ACEs. It's assumed that this + * doesn't happen very often (if at all), and thus it needn't be highly + * optimized. (Note, though, that it's actually O(N) complexity, because the + * outer loop is bounded by a constant rather than by the length of the list.) + */ +static void +arl_reschedule(arl_t *arl) +{ + ace_resched_t art; + int i; + ace_t *ace; + + i = arl->arl_defend_count; + arl->arl_defend_count = 0; + /* If none could be sitting around, then don't reschedule */ + if (i < arp_defend_rate) { + DTRACE_PROBE1(reschedule_none, arl_t *, arl); + return; + } + art.art_arl = arl; + while (arl->arl_defend_count < arp_defend_rate) { + art.art_naces = 0; + ar_ce_walk(ace_reschedule, &art); + for (i = 0; i < art.art_naces; i++) { + ace = art.art_aces[i]; + ace->ace_flags |= ACE_F_DELAYED; + ace_set_timer(ace, B_FALSE); + if (++arl->arl_defend_count >= arp_defend_rate) + break; + } + if (art.art_naces < ACE_RESCHED_LIST_LEN) + break; + } + DTRACE_PROBE1(reschedule, arl_t *, arl); +} + /* * Write side service routine. The only action here is delivery of transmit * timer events and delayed messages while waiting for the info_ack (ar_arl @@ -3668,8 +4120,9 @@ ar_wput(queue_t *q, mblk_t *mp) static void ar_wsrv(queue_t *q) { - ace_t *ace; - mblk_t *mp; + ace_t *ace; + arl_t *arl; + mblk_t *mp; clock_t ms; TRACE_1(TR_FAC_ARP, TR_ARP_WSRV_START, @@ -3680,39 +4133,135 @@ ar_wsrv(queue_t *q) case M_PCSIG: if (!mi_timer_valid(mp)) continue; - if (mp == ar_timer_mp) { - /* Garbage collection time. */ - ar_ce_walk(ar_trash, NULL); - mi_timer(ar_timer_queue, ar_timer_mp, - arp_timer_interval); + ace = (ace_t *)mp->b_rptr; + if (ace->ace_flags & ACE_F_DYING) continue; + arl = ace->ace_arl; + if (ace->ace_flags & ACE_F_UNVERIFIED) { + ASSERT(ace->ace_flags & ACE_F_PUBLISH); + ASSERT(ace->ace_query_mp == NULL); + /* + * If the link is down, give up for now. IP + * will give us the go-ahead to try again when + * the link restarts. + */ + if (!arl->arl_link_up) { + DTRACE_PROBE1(timer_link_down, + ace_t *, ace); + ace->ace_flags |= ACE_F_DAD_ABORTED; + continue; + } + if (ace->ace_xmit_count > 0) { + DTRACE_PROBE1(timer_probe, + ace_t *, ace); + ace->ace_xmit_count--; + ar_xmit(arl, ARP_REQUEST, + ace->ace_proto, + ace->ace_proto_addr_length, + ace->ace_hw_addr, NULL, NULL, + ace->ace_proto_addr, NULL); + ace_set_timer(ace, B_FALSE); + continue; + } + if (!arp_say_ready(ace)) + continue; + DTRACE_PROBE1(timer_ready, ace_t *, ace); + ace->ace_xmit_interval = arp_publish_interval; + ace->ace_xmit_count = arp_publish_count; + if (ace->ace_xmit_count == 0) + ace->ace_xmit_count++; + ace->ace_flags &= ~ACE_F_UNVERIFIED; } - ace = (ace_t *)mp->b_rptr; - if (ace->ace_flags & (ACE_F_PUBLISH | ACE_F_MYADDR)) { + if (ace->ace_flags & ACE_F_PUBLISH) { + clock_t now; + + /* + * If an hour has passed, then free up the + * entries that need defense by rescheduling + * them. + */ + now = ddi_get_lbolt(); + if (arp_defend_rate > 0 && + now - arl->arl_defend_start > + SEC_TO_TICK(arp_defend_period)) { + arl->arl_defend_start = now; + arl_reschedule(arl); + } /* * Finish the job that we started in - * ar_entry_add. + * ar_entry_add. When we get to zero + * announcement retransmits left, switch to + * address defense. */ ASSERT(ace->ace_query_mp == NULL); - ASSERT(ace->ace_publish_count != 0); - ace->ace_publish_count--; - ar_xmit(ace->ace_arl, ARP_REQUEST, + if (ace->ace_xmit_count > 0) { + ace->ace_xmit_count--; + DTRACE_PROBE1(timer_announce, + ace_t *, ace); + } else if (ace->ace_flags & ACE_F_DELAYED) { + /* + * This guy was rescheduled as one of + * the really old entries needing + * on-going defense. Let him through + * now. + */ + DTRACE_PROBE1(timer_send_delayed, + ace_t *, ace); + ace->ace_flags &= ~ACE_F_DELAYED; + } else if (arp_defend_rate > 0 && + (arl->arl_defend_count >= arp_defend_rate || + ++arl->arl_defend_count >= + arp_defend_rate)) { + /* + * If we're no longer allowed to send + * unbidden defense messages, then just + * wait for rescheduling. + */ + DTRACE_PROBE1(timer_excess_defense, + ace_t *, ace); + ace_set_timer(ace, B_FALSE); + continue; + } else { + DTRACE_PROBE1(timer_defend, + ace_t *, ace); + } + ar_xmit(arl, ARP_REQUEST, ace->ace_proto, ace->ace_proto_addr_length, ace->ace_hw_addr, ace->ace_proto_addr, - ace->ace_arl->arl_arp_addr, - ace->ace_proto_addr); - if (ace->ace_publish_count != 0 && - arp_publish_interval != 0) { - mi_timer(ace->ace_arl->arl_wq, - ace->ace_mp, - arp_publish_interval); - } + arl->arl_arp_addr, + ace->ace_proto_addr, NULL); + ace->ace_last_bcast = now; + if (ace->ace_xmit_count == 0) + ace->ace_xmit_interval = + arp_defend_interval; + if (ace->ace_xmit_interval != 0) + ace_set_timer(ace, B_FALSE); continue; } - if (!ace->ace_query_mp) + + /* + * If this is a non-permanent (regular) resolved ARP + * entry, then it's now time to check if it can be + * retired. As an optimization, we check with IP + * first, and just restart the timer if the address is + * still in use. + */ + if (ACE_NONPERM(ace)) { + if (ace->ace_proto == IP_ARP_PROTO_TYPE && + ndp_lookup_ipaddr(*(ipaddr_t *) + ace->ace_proto_addr)) { + ace->ace_flags |= ACE_F_OLD; + mi_timer(arl->arl_wq, ace->ace_mp, + arp_cleanup_interval); + } else { + ar_delete_notify(ace); + ar_ce_delete(ace); + } continue; + } + /* * ar_query_xmit returns the number of milliseconds to * wait following this transmit. If the number of @@ -3721,6 +4270,7 @@ ar_wsrv(queue_t *q) * we complete the operation with a failure indication. * Otherwise, we restart the timer. */ + ASSERT(ace->ace_query_mp != NULL); ms = ar_query_xmit(ace, NULL); if (ms == 0) ar_query_reply(ace, ENXIO, NULL, (uint32_t)0); @@ -3739,43 +4289,50 @@ ar_wsrv(queue_t *q) /* ar_xmit is called to transmit an ARP Request or Response. */ static void ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen, - uchar_t *haddr1, uchar_t *paddr1, uchar_t *haddr2, uchar_t *paddr2) + const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2, + const uchar_t *paddr2, const uchar_t *dstaddr) { arh_t *arh; - char *cp; - uint32_t hlen = arl->arl_hw_addr_length; + uint8_t *cp; + uint_t hlen; mblk_t *mp; - if (arl->arl_flags & ARL_F_NOARP) { - /* IFF_NOARP flag is set. Do not send an arp request */ + /* IFF_NOARP flag is set or interface down: do not send arp messages */ + if ((arl->arl_flags & ARL_F_NOARP) || !arl->arl_link_up) return; - } mp = arl->arl_xmit_template; - if (!mp || !(mp = copyb(mp))) + if (mp == NULL || (mp = copyb(mp)) == NULL) return; + hlen = arl->arl_hw_addr_length; mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) + plen + plen, BPRI_MED); - if (!mp->b_cont) { + if (mp->b_cont == NULL) { freeb(mp); return; } + + /* Get the L2 destination address for the message */ + if (haddr2 == NULL) + dstaddr = arl->arl_arp_addr; + else if (dstaddr == NULL) + dstaddr = haddr2; + /* * Figure out where the target hardware address goes in the * DL_UNITDATA_REQ header, and copy it in. */ - - cp = (char *)mi_offset_param(mp, arl->arl_xmit_template_addr_offset, - hlen); - if (!cp) { + cp = mi_offset_param(mp, arl->arl_xmit_template_addr_offset, hlen); + ASSERT(cp != NULL); + if (cp == NULL) { freemsg(mp); return; } - bcopy(haddr2, cp, hlen); + bcopy(dstaddr, cp, hlen); /* Fill in the ARP header. */ - cp = (char *)mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen); - mp->b_cont->b_rptr = (uchar_t *)cp; + cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen); + mp->b_cont->b_rptr = cp; arh = (arh_t *)cp; U16_TO_BE16(arl->arl_arp_hw_type, arh->arh_hardware); U16_TO_BE16(proto, arh->arh_proto); @@ -3785,13 +4342,19 @@ ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen, cp += ARH_FIXED_LEN; bcopy(haddr1, cp, hlen); cp += hlen; - bcopy(paddr1, cp, plen); + if (paddr1 == NULL) + bzero(cp, plen); + else + bcopy(paddr1, cp, plen); cp += plen; - bcopy(haddr2, cp, hlen); + if (haddr2 == NULL) + bzero(cp, hlen); + else + bcopy(haddr2, cp, hlen); cp += hlen; bcopy(paddr2, cp, plen); cp += plen; - mp->b_cont->b_wptr = (uchar_t *)cp; + mp->b_cont->b_wptr = cp; /* Ship it out. */ if (canputnext(arl->arl_wq)) putnext(arl->arl_wq, mp); @@ -3799,209 +4362,6 @@ ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen, freemsg(mp); } -/* - * Handle an external request to broadcast an ARP request. This is used - * by configuration programs to broadcast a request advertising our own - * hardware and protocol addresses. - */ -static int -ar_xmit_request(queue_t *q, mblk_t *mp_orig) -{ - areq_t *areq; - arl_t *arl; - uchar_t *sender; - uint32_t sender_length; - uchar_t *target; - uint32_t target_length; - mblk_t *mp = mp_orig; - - /* We handle both M_IOCTL and M_PROTO messages. */ - if (DB_TYPE(mp) == M_IOCTL) - mp = mp->b_cont; - arl = ar_ll_lookup_from_mp(mp); - if (arl == NULL) - return (EINVAL); - /* - * Newly received commands from clients go to the tail of the queue. - */ - if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_xmit_request: enqueue on q %p\n", (void *)q)); - ar_cmd_enqueue(arl, mp_orig, q, AR_XMIT_REQUEST, B_TRUE); - return (EINPROGRESS); - } - mp_orig->b_prev = NULL; - - areq = (areq_t *)mp->b_rptr; - sender_length = areq->areq_sender_addr_length; - sender = mi_offset_param(mp, areq->areq_sender_addr_offset, - sender_length); - target_length = areq->areq_target_addr_length; - target = mi_offset_param(mp, areq->areq_target_addr_offset, - target_length); - if (!sender || !target) - return (EINVAL); - ar_xmit(arl, ARP_REQUEST, areq->areq_proto, sender_length, - arl->arl_hw_addr, sender, arl->arl_arp_addr, target); - return (0); -} - -/* - * Handle an external request to broadcast an ARP response. This is used - * by configuration programs to broadcast a response advertising our own - * hardware and protocol addresses. - */ -static int -ar_xmit_response(queue_t *q, mblk_t *mp_orig) -{ - areq_t *areq; - arl_t *arl; - uchar_t *sender; - uint32_t sender_length; - uchar_t *target; - uint32_t target_length; - mblk_t *mp = mp_orig; - - /* We handle both M_IOCTL and M_PROTO messages. */ - if (DB_TYPE(mp) == M_IOCTL) - mp = mp->b_cont; - arl = ar_ll_lookup_from_mp(mp); - if (arl == NULL) - return (EINVAL); - /* - * Newly received commands from clients go to the tail of the queue. - */ - if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_xmit_response: enqueue on q %p \n", (void *)q)); - ar_cmd_enqueue(arl, mp_orig, q, AR_XMIT_RESPONSE, B_TRUE); - return (EINPROGRESS); - } - mp_orig->b_prev = NULL; - - areq = (areq_t *)mp->b_rptr; - sender_length = areq->areq_sender_addr_length; - sender = mi_offset_param(mp, areq->areq_sender_addr_offset, - sender_length); - target_length = areq->areq_target_addr_length; - target = mi_offset_param(mp, areq->areq_target_addr_offset, - target_length); - if (!sender || !target) - return (EINVAL); - ar_xmit(arl, ARP_RESPONSE, areq->areq_proto, sender_length, - arl->arl_hw_addr, sender, arl->arl_arp_addr, target); - return (0); -} - -#if 0 -/* - * Debug routine to display a particular ARP Cache Entry with an - * accompanying text message. - */ -static void -show_ace(char *msg, ace_t *ace) -{ - if (msg) - printf("%s", msg); - printf("ace 0x%p:\n", ace); - printf("\tace_next 0x%p, ace_ptpn 0x%p, ace_arl 0x%p\n", - ace->ace_next, ace->ace_ptpn, ace->ace_arl); - printf("\tace_proto %x, ace_flags %x\n", ace->ace_proto, - ace->ace_flags); - if (ace->ace_proto_addr && ace->ace_proto_addr_length) - printf("\tace_proto_addr %x %x %x %x, len %d\n", - ace->ace_proto_addr[0], ace->ace_proto_addr[1], - ace->ace_proto_addr[2], ace->ace_proto_addr[3], - ace->ace_proto_addr_length); - if (ace->ace_proto_mask) - printf("\tace_proto_mask %x %x %x %x\n", - ace->ace_proto_mask[0], ace->ace_proto_mask[1], - ace->ace_proto_mask[2], ace->ace_proto_mask[3]); - if (ace->ace_hw_addr && ace->ace_hw_addr_length) - printf("\tace_hw_addr %x %x %x %x %x %x, len %d\n", - ace->ace_hw_addr[0], ace->ace_hw_addr[1], - ace->ace_hw_addr[2], ace->ace_hw_addr[3], - ace->ace_hw_addr[4], ace->ace_hw_addr[5], - ace->ace_hw_addr_length); - printf("\tace_mp 0x%p\n", ace->ace_mp); - printf("\tace_query_count %d, ace_query_mp 0x%x\n", - ace->ace_query_count, ace->ace_query_mp); -} - -/* Debug routine to display an ARP packet with an accompanying text message. */ -static void -show_arp(char *msg, mblk_t *mp) -{ - uchar_t *up = mp->b_rptr; - int len; - int hlen = up[4] & 0xFF; - char fmt[64]; - char buf[128]; - char *op; - int plen = up[5] & 0xFF; - uint_t proto; - - if (msg && *msg) - printf("%s", msg); - len = mp->b_wptr - up; - if (len < 8) { - printf("ARP packet of %d bytes too small\n", len); - return; - } - switch (BE16_TO_U16(&up[6])) { - case ARP_REQUEST: - op = "ARP request"; - break; - case ARP_RESPONSE: - op = "ARP response"; - break; - case RARP_REQUEST: - op = "RARP request"; - break; - case RARP_RESPONSE: - op = "RARP response"; - break; - default: - op = "unknown"; - break; - } - proto = (uint_t)BE16_TO_U16(&up[2]); - printf("len %d, hardware %d, proto %d, hlen %d, plen %d, op %s\n", - len, (int)BE16_TO_U16(up), proto, hlen, plen, op); - if (len < (8 + hlen + hlen + plen + plen)) - printf("ARP packet of %d bytes too small!\n", len); - up += 8; - - (void) mi_sprintf(fmt, "sender hardware address %%%dM\n", hlen); - (void) mi_sprintf(buf, fmt, up); - printf(buf); - up += hlen; - if (proto == 0x800) { - printf("sender proto address %d.%d.%d.%d\n", - up[0] & 0xFF, up[1] & 0xFF, up[2] & 0xFF, - up[3] & 0xFF); - } else { - (void) mi_sprintf(fmt, "sender proto address %%%dM\n", plen); - (void) mi_sprintf(buf, fmt, up); - printf(buf); - } - up += plen; - - (void) mi_sprintf(fmt, "target hardware address %%%dM\n", hlen); - (void) mi_sprintf(buf, fmt, up); - printf(buf); - up += hlen; - if (proto == 0x800) { - printf("target proto address %d.%d.%d.%d\n", - up[0] & 0xFF, up[1] & 0xFF, up[2] & 0xFF, - up[3] & 0xFF); - } else { - (void) mi_sprintf(fmt, "target proto address %%%dM\n", plen); - (void) mi_sprintf(buf, fmt, up); - printf(buf); - } - up += plen; -} -#endif - static mblk_t * ar_alloc(uint32_t cmd, int err) { diff --git a/usr/src/uts/common/inet/arp_impl.h b/usr/src/uts/common/inet/arp_impl.h index 84756488f8..e87fc69ab3 100644 --- a/usr/src/uts/common/inet/arp_impl.h +++ b/usr/src/uts/common/inet/arp_impl.h @@ -36,6 +36,10 @@ extern "C" { #include <sys/types.h> #include <sys/stream.h> +#include <net/if.h> + +/* ARP kernel hash size; used for mdb support */ +#define ARP_HASH_SIZE 256 /* ARL Structure, one per link level device */ typedef struct arl_s { @@ -43,7 +47,6 @@ typedef struct arl_s { queue_t *arl_rq; /* Read queue pointer */ queue_t *arl_wq; /* Write queue pointer */ t_uscalar_t arl_ppa; /* DL_ATTACH parameter */ - t_scalar_t arl_mac_sap; uchar_t *arl_arp_addr; /* multicast address to use */ uchar_t *arl_hw_addr; /* Our hardware address */ uint32_t arl_hw_addr_length; @@ -56,8 +59,6 @@ typedef struct arl_s { mblk_t *arl_unbind_mp; mblk_t *arl_detach_mp; t_uscalar_t arl_provider_style; /* From DL_INFO_ACK */ - mblk_t *arl_dlpiop_done; /* DLPI opertion done */ - queue_t *arl_ip_pending_queue; /* Pending queue */ mblk_t *arl_queue; /* Queued commands head */ mblk_t *arl_queue_tail; /* Queued commands tail */ uint32_t arl_flags; /* Used for IFF_NOARP */ @@ -65,7 +66,12 @@ typedef struct arl_s { mblk_t *arl_dlpi_deferred; /* Deferred DLPI messages */ uint_t arl_state; /* lower interface state */ char *arl_data; /* address data pointer */ - uint32_t arl_closing : 1; + clock_t arl_defend_start; /* start of 1-hour period */ + uint_t arl_defend_count; /* # of unbidden broadcasts */ + uint_t + arl_closing : 1, /* stream is closing */ + arl_notifies : 1, /* handles DL_NOTE_LINK */ + arl_link_up : 1; /* DL_NOTE status */ } arl_t; #define ARL_F_NOARP 0x01 @@ -81,9 +87,32 @@ typedef struct ar_s { arl_t *ar_arl; /* Associated arl */ cred_t *ar_credp; /* Credentials associated w/ open */ struct ar_s *ar_arl_ip_assoc; /* ARL - IP association */ - uint32_t ar_ip_acked_close : 1; /* IP has acked the close */ + uint32_t + ar_ip_acked_close : 1, /* IP has acked the close */ + ar_on_ill_stream : 1; /* Module below is IP */ } ar_t; +/* ARP Cache Entry */ +typedef struct ace_s { + struct ace_s *ace_next; /* Hash chain next pointer */ + struct ace_s **ace_ptpn; /* Pointer to previous next */ + struct arl_s *ace_arl; /* Associated arl */ + uint32_t ace_proto; /* Protocol for this ace */ + uint32_t ace_flags; + uchar_t *ace_proto_addr; + uint32_t ace_proto_addr_length; + uchar_t *ace_proto_mask; /* Mask for matching addr */ + uchar_t *ace_proto_extract_mask; /* For mappings */ + uchar_t *ace_hw_addr; + uint32_t ace_hw_addr_length; + uint32_t ace_hw_extract_start; /* For mappings */ + mblk_t *ace_mp; /* mblk we are in */ + mblk_t *ace_query_mp; /* outstanding query chain */ + clock_t ace_last_bcast; /* last broadcast Response */ + clock_t ace_xmit_interval; + int ace_xmit_count; +} ace_t; + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/inet/ip.h b/usr/src/uts/common/inet/ip.h index a732b92585..67af1bf688 100644 --- a/usr/src/uts/common/inet/ip.h +++ b/usr/src/uts/common/inet/ip.h @@ -39,11 +39,9 @@ extern "C" { #include <inet/mib2.h> #include <inet/nd.h> #include <sys/atomic.h> -#include <sys/socket.h> #include <net/if_dl.h> #include <net/if.h> #include <netinet/ip.h> -#include <sys/dlpi.h> #include <netinet/igmp.h> #ifdef _KERNEL @@ -1284,7 +1282,9 @@ typedef struct ipif_s { ipif_replace_zero : 1, /* Replacement for zero */ ipif_was_up : 1, /* ipif was up before */ - ipif_pad_to_31 : 28; + ipif_addr_ready : 1, /* DAD is done */ + ipif_was_dup : 1, /* DAD had failed */ + ipif_pad_to_31 : 26; int ipif_orig_ifindex; /* ifindex before SLIFFAILOVER */ uint_t ipif_seqid; /* unique index across all ills */ @@ -1295,6 +1295,7 @@ typedef struct ipif_s { uint_t ipif_saved_ire_cnt; zoneid_t ipif_zoneid; /* zone ID number */ + timeout_id_t ipif_recovery_id; /* Timer for DAD recovery */ #ifdef ILL_DEBUG #define IP_TR_HASH_MAX 64 th_trace_t *ipif_trace[IP_TR_HASH_MAX]; @@ -1740,6 +1741,7 @@ typedef struct ill_s { uint_t ill_ipif_up_count; /* Number of IPIFs currently up. */ uint_t ill_max_frag; /* Max IDU from DLPI. */ char *ill_name; /* Our name. */ + uint_t ill_ipif_dup_count; /* Number of duplicate addresses. */ uint_t ill_name_length; /* Name length, incl. terminator. */ char *ill_ndd_name; /* Name + ":ip?_forwarding" for NDD. */ uint_t ill_net_type; /* IRE_IF_RESOLVER/IRE_IF_NORESOLVER. */ @@ -1807,7 +1809,9 @@ typedef struct ill_s { ill_dl_up : 1, ill_up_ipifs : 1, - ill_pad_to_bit_31 : 20; + ill_note_link : 1, /* supports link-up notification */ + + ill_pad_to_bit_31 : 19; /* Following bit fields protected by ill_lock */ uint_t @@ -1818,7 +1822,8 @@ typedef struct ill_s { ill_arp_bringup_pending : 1, ill_mtu_userspecified : 1, /* SIOCSLNKINFO has set the mtu */ - ill_pad_bit_31 : 26; + ill_arp_extend : 1, /* ARP has DAD extensions */ + ill_pad_bit_31 : 25; /* * Used in SIOCSIFMUXID and SIOCGIFMUXID for 'ifconfig unplumb'. @@ -2501,12 +2506,8 @@ typedef struct ire_s { /* source ip-addr of incoming packet */ clock_t ire_last_used_time; /* Last used time */ struct ire_s *ire_fastpath; /* Pointer to next ire in fastpath */ - zoneid_t ire_zoneid; /* for local address discrimination */ tsol_ire_gw_secattr_t *ire_gw_secattr; /* gateway security attributes */ -#ifdef IRE_DEBUG - th_trace_t *ire_trace[IP_TR_HASH_MAX]; - boolean_t ire_trace_disable; /* True when alloc fails */ -#endif + zoneid_t ire_zoneid; /* for local address discrimination */ /* * ire's that are embedded inside mblk_t and sent to the external * resolver use the ire_stq_ifindex to track the ifindex of the @@ -2514,6 +2515,12 @@ typedef struct ire_s { * for cleanup in the esbfree routine when arp failure occurs */ uint_t ire_stq_ifindex; + uint_t ire_defense_count; /* number of ARP conflicts */ + uint_t ire_defense_time; /* last time defended (secs) */ +#ifdef IRE_DEBUG + th_trace_t *ire_trace[IP_TR_HASH_MAX]; + boolean_t ire_trace_disable; /* True when alloc fails */ +#endif } ire_t; /* IPv4 compatiblity macros */ @@ -2822,23 +2829,37 @@ extern int ipv6_forward; extern vmem_t *ip_minor_arena; #define ip_respond_to_address_mask_broadcast ip_param_arr[0].ip_param_value +#define ip_g_resp_to_echo_bcast ip_param_arr[1].ip_param_value +#define ip_g_resp_to_echo_mcast ip_param_arr[2].ip_param_value +#define ip_g_resp_to_timestamp ip_param_arr[3].ip_param_value +#define ip_g_resp_to_timestamp_bcast ip_param_arr[4].ip_param_value #define ip_g_send_redirects ip_param_arr[5].ip_param_value +#define ip_g_forward_directed_bcast ip_param_arr[6].ip_param_value #define ip_debug ip_param_arr[7].ip_param_value #define ip_mrtdebug ip_param_arr[8].ip_param_value #define ip_timer_interval ip_param_arr[9].ip_param_value #define ip_ire_arp_interval ip_param_arr[10].ip_param_value +#define ip_ire_redir_interval ip_param_arr[11].ip_param_value #define ip_def_ttl ip_param_arr[12].ip_param_value +#define ip_forward_src_routed ip_param_arr[13].ip_param_value #define ip_wroff_extra ip_param_arr[14].ip_param_value +#define ip_ire_pathmtu_interval ip_param_arr[15].ip_param_value +#define ip_icmp_return ip_param_arr[16].ip_param_value #define ip_path_mtu_discovery ip_param_arr[17].ip_param_value #define ip_ignore_delete_time ip_param_arr[18].ip_param_value +#define ip_ignore_redirect ip_param_arr[19].ip_param_value #define ip_output_queue ip_param_arr[20].ip_param_value #define ip_broadcast_ttl ip_param_arr[21].ip_param_value #define ip_icmp_err_interval ip_param_arr[22].ip_param_value #define ip_icmp_err_burst ip_param_arr[23].ip_param_value #define ip_reass_queue_bytes ip_param_arr[24].ip_param_value +#define ip_strict_dst_multihoming ip_param_arr[25].ip_param_value #define ip_addrs_per_if ip_param_arr[26].ip_param_value #define ipsec_override_persocket_policy ip_param_arr[27].ip_param_value #define icmp_accept_clear_messages ip_param_arr[28].ip_param_value +#define igmp_accept_clear_messages ip_param_arr[29].ip_param_value + +/* IPv6 configuration knobs */ #define delay_first_probe_time ip_param_arr[30].ip_param_value #define max_unicast_solicit ip_param_arr[31].ip_param_value #define ipv6_def_hops ip_param_arr[32].ip_param_value @@ -2850,6 +2871,7 @@ extern vmem_t *ip_minor_arena; #define ipv6_strict_dst_multihoming ip_param_arr[38].ip_param_value #define ip_ire_reclaim_fraction ip_param_arr[39].ip_param_value #define ipsec_policy_log_interval ip_param_arr[40].ip_param_value +#define pim_accept_clear_messages ip_param_arr[41].ip_param_value #define ip_ndp_unsolicit_interval ip_param_arr[42].ip_param_value #define ip_ndp_unsolicit_count ip_param_arr[43].ip_param_value #define ipv6_ignore_home_address_opt ip_param_arr[44].ip_param_value @@ -2857,8 +2879,14 @@ extern vmem_t *ip_minor_arena; #define ip_multirt_resolution_interval ip_param_arr[46].ip_param_value #define ip_multirt_ttl ip_param_arr[47].ip_param_value #define ip_multidata_outbound ip_param_arr[48].ip_param_value +#define ip_ndp_defense_interval ip_param_arr[49].ip_param_value +#define ip_max_temp_idle ip_param_arr[50].ip_param_value +#define ip_max_temp_defend ip_param_arr[51].ip_param_value +#define ip_max_defend ip_param_arr[52].ip_param_value +#define ip_defend_interval ip_param_arr[53].ip_param_value +#define ip_dup_recovery ip_param_arr[54].ip_param_value #ifdef DEBUG -#define ipv6_drop_inbound_icmpv6 ip_param_arr[49].ip_param_value +#define ipv6_drop_inbound_icmpv6 ip_param_arr[55].ip_param_value #else #define ipv6_drop_inbound_icmpv6 0 #endif @@ -2934,6 +2962,9 @@ extern uint32_t ipsechw_debug; #define ip3dbg(a) /* */ #endif /* IP_DEBUG */ +/* Default MAC-layer address string length for mac_colon_addr */ +#define MAC_STR_LEN 128 + struct ipsec_out_s; extern const char *dlpi_prim_str(int); @@ -2945,6 +2976,7 @@ extern void ill_frag_timer_start(ill_t *); extern mblk_t *ip_carve_mp(mblk_t **, ssize_t); extern mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t); extern char *ip_dot_addr(ipaddr_t, char *); +extern const char *mac_colon_addr(const uint8_t *, size_t, char *, size_t); extern void ip_lwput(queue_t *, mblk_t *); extern boolean_t icmp_err_rate_limit(void); extern void icmp_time_exceeded(queue_t *, mblk_t *, uint8_t); diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index 036748e78c..7872cfd212 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -956,6 +956,12 @@ static ipparam_t lcl_param_arr[] = { { 1000, 60000, 1000, "ip_multirt_resolution_interval" }, { 0, 255, 1, "ip_multirt_ttl" }, { 0, 1, 1, "ip_multidata_outbound" }, + { 0, 3600000, 300000, "ip_ndp_defense_interval" }, + { 0, 999999, 60*60*24, "ip_max_temp_idle" }, + { 0, 1000, 1, "ip_max_temp_defend" }, + { 0, 1000, 3, "ip_max_defend" }, + { 0, 999999, 30, "ip_defend_interval" }, + { 0, 3600000, 300000, "ip_dup_recovery" }, #ifdef DEBUG { 0, 1, 0, "ip6_drop_inbound_icmpv6" }, #endif @@ -1022,65 +1028,6 @@ int ip_g_forward = IP_FORWARD_DEFAULT; int ipv6_forward = IP_FORWARD_DEFAULT; -/* Following line is external, and in ip.h. Normally marked with * *. */ -#define ip_respond_to_address_mask_broadcast ip_param_arr[0].ip_param_value -#define ip_g_resp_to_echo_bcast ip_param_arr[1].ip_param_value -#define ip_g_resp_to_echo_mcast ip_param_arr[2].ip_param_value -#define ip_g_resp_to_timestamp ip_param_arr[3].ip_param_value -#define ip_g_resp_to_timestamp_bcast ip_param_arr[4].ip_param_value -#define ip_g_send_redirects ip_param_arr[5].ip_param_value -#define ip_g_forward_directed_bcast ip_param_arr[6].ip_param_value -#define ip_debug ip_param_arr[7].ip_param_value /* */ -#define ip_mrtdebug ip_param_arr[8].ip_param_value /* */ -#define ip_timer_interval ip_param_arr[9].ip_param_value /* */ -#define ip_ire_arp_interval ip_param_arr[10].ip_param_value /* */ -#define ip_ire_redir_interval ip_param_arr[11].ip_param_value -#define ip_def_ttl ip_param_arr[12].ip_param_value -#define ip_forward_src_routed ip_param_arr[13].ip_param_value -#define ip_wroff_extra ip_param_arr[14].ip_param_value -#define ip_ire_pathmtu_interval ip_param_arr[15].ip_param_value -#define ip_icmp_return ip_param_arr[16].ip_param_value -#define ip_path_mtu_discovery ip_param_arr[17].ip_param_value /* */ -#define ip_ignore_delete_time ip_param_arr[18].ip_param_value /* */ -#define ip_ignore_redirect ip_param_arr[19].ip_param_value -#define ip_output_queue ip_param_arr[20].ip_param_value -#define ip_broadcast_ttl ip_param_arr[21].ip_param_value -#define ip_icmp_err_interval ip_param_arr[22].ip_param_value -#define ip_icmp_err_burst ip_param_arr[23].ip_param_value -#define ip_reass_queue_bytes ip_param_arr[24].ip_param_value -#define ip_strict_dst_multihoming ip_param_arr[25].ip_param_value -#define ip_addrs_per_if ip_param_arr[26].ip_param_value -#define ipsec_override_persocket_policy ip_param_arr[27].ip_param_value /* */ -#define icmp_accept_clear_messages ip_param_arr[28].ip_param_value -#define igmp_accept_clear_messages ip_param_arr[29].ip_param_value - -/* IPv6 configuration knobs */ -#define delay_first_probe_time ip_param_arr[30].ip_param_value -#define max_unicast_solicit ip_param_arr[31].ip_param_value -#define ipv6_def_hops ip_param_arr[32].ip_param_value -#define ipv6_icmp_return ip_param_arr[33].ip_param_value -#define ipv6_forward_src_routed ip_param_arr[34].ip_param_value -#define ipv6_resp_echo_mcast ip_param_arr[35].ip_param_value -#define ipv6_send_redirects ip_param_arr[36].ip_param_value -#define ipv6_ignore_redirect ip_param_arr[37].ip_param_value -#define ipv6_strict_dst_multihoming ip_param_arr[38].ip_param_value -#define ip_ire_reclaim_fraction ip_param_arr[39].ip_param_value -#define ipsec_policy_log_interval ip_param_arr[40].ip_param_value -#define pim_accept_clear_messages ip_param_arr[41].ip_param_value -#define ip_ndp_unsolicit_interval ip_param_arr[42].ip_param_value -#define ip_ndp_unsolicit_count ip_param_arr[43].ip_param_value -#define ipv6_ignore_home_address_opt ip_param_arr[44].ip_param_value -#define ip_policy_mask ip_param_arr[45].ip_param_value -#define ip_multirt_resolution_interval ip_param_arr[46].ip_param_value -#define ip_multirt_ttl ip_param_arr[47].ip_param_value -#define ip_multidata_outbound ip_param_arr[48].ip_param_value -#ifdef DEBUG -#define ipv6_drop_inbound_icmpv6 ip_param_arr[49].ip_param_value -#else -#define ipv6_drop_inbound_icmpv6 0 -#endif - - /* * Table of IP ioctls encoding the various properties of the ioctl and * indexed based on the last byte of the ioctl command. Occasionally there @@ -1516,28 +1463,33 @@ struct module_info ip_mod_info = { IP_MOD_ID, IP_MOD_NAME, 1, INFPSZ, 65536, 1024 }; -static struct qinit rinit = { +/* + * Duplicate static symbols within a module confuses mdb; so we avoid the + * problem by making the symbols here distinct from those in udp.c. + */ + +static struct qinit iprinit = { (pfi_t)ip_rput, NULL, ip_open, ip_close, NULL, &ip_mod_info }; -static struct qinit winit = { +static struct qinit ipwinit = { (pfi_t)ip_wput, (pfi_t)ip_wsrv, ip_open, ip_close, NULL, &ip_mod_info }; -static struct qinit lrinit = { +static struct qinit iplrinit = { (pfi_t)ip_lrput, NULL, ip_open, ip_close, NULL, &ip_mod_info }; -static struct qinit lwinit = { +static struct qinit iplwinit = { (pfi_t)ip_lwput, NULL, ip_open, ip_close, NULL, &ip_mod_info }; struct streamtab ipinfo = { - &rinit, &winit, &lrinit, &lwinit + &iprinit, &ipwinit, &iplrinit, &iplwinit }; #ifdef DEBUG @@ -3782,6 +3734,204 @@ icmp_unreachable(queue_t *q, mblk_t *mp, uint8_t code) } /* + * Attempt to start recovery of an IPv4 interface that's been shut down as a + * duplicate. As long as someone else holds the address, the interface will + * stay down. When that conflict goes away, the interface is brought back up. + * This is done so that accidental shutdowns of addresses aren't made + * permanent. Your server will recover from a failure. + * + * For DHCP, recovery is not done in the kernel. Instead, it's handled by a + * user space process (dhcpagent). + * + * Recovery completes if ARP reports that the address is now ours (via + * AR_CN_READY). In that case, we go to ip_arp_excl to finish the operation. + * + * This function is entered on a timer expiry; the ID is in ipif_recovery_id. + */ +static void +ipif_dup_recovery(void *arg) +{ + ipif_t *ipif = arg; + ill_t *ill = ipif->ipif_ill; + mblk_t *arp_add_mp; + mblk_t *arp_del_mp; + area_t *area; + + ipif->ipif_recovery_id = 0; + + if (ill->ill_arp_closing || !(ipif->ipif_flags & IPIF_DUPLICATE) || + (ipif->ipif_flags & IPIF_POINTOPOINT)) { + /* No reason to try to bring this address back. */ + return; + } + + if ((arp_add_mp = ipif_area_alloc(ipif)) == NULL) + goto alloc_fail; + + if (ipif->ipif_arp_del_mp == NULL) { + if ((arp_del_mp = ipif_ared_alloc(ipif)) == NULL) + goto alloc_fail; + ipif->ipif_arp_del_mp = arp_del_mp; + } + + /* Setting the 'unverified' flag restarts DAD */ + area = (area_t *)arp_add_mp->b_rptr; + area->area_flags = ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MYADDR | + ACE_F_UNVERIFIED; + putnext(ill->ill_rq, arp_add_mp); + return; + +alloc_fail: + /* On allocation failure, just restart the timer */ + freemsg(arp_add_mp); + if (ip_dup_recovery > 0) { + ipif->ipif_recovery_id = timeout(ipif_dup_recovery, ipif, + MSEC_TO_TICK(ip_dup_recovery)); + } +} + +/* + * This is for exclusive changes due to ARP. Either tear down an interface due + * to AR_CN_FAILED and AR_CN_BOGON, or bring one up for successful recovery. + */ +/* ARGSUSED */ +static void +ip_arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) +{ + ill_t *ill = rq->q_ptr; + arh_t *arh; + ipaddr_t src; + ipif_t *ipif; + char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ + char hbuf[MAC_STR_LEN]; + char sbuf[INET_ADDRSTRLEN]; + const char *failtype; + boolean_t bring_up; + + switch (((arcn_t *)mp->b_rptr)->arcn_code) { + case AR_CN_READY: + failtype = NULL; + bring_up = B_TRUE; + break; + case AR_CN_FAILED: + failtype = "in use"; + bring_up = B_FALSE; + break; + default: + failtype = "claimed"; + bring_up = B_FALSE; + break; + } + + arh = (arh_t *)mp->b_cont->b_rptr; + bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN); + + /* Handle failures due to probes */ + if (src == 0) { + bcopy((char *)&arh[1] + 2 * arh->arh_hlen + IP_ADDR_LEN, &src, + IP_ADDR_LEN); + } + + (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf)); + (void) mac_colon_addr((uint8_t *)(arh + 1), arh->arh_hlen, hbuf, + sizeof (hbuf)); + (void) ip_dot_addr(src, sbuf); + for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { + + if ((ipif->ipif_flags & IPIF_POINTOPOINT) || + ipif->ipif_lcl_addr != src) { + continue; + } + + /* + * If we failed on a recovery probe, then restart the timer to + * try again later. + */ + if (!bring_up && (ipif->ipif_flags & IPIF_DUPLICATE) && + !(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && + ill->ill_net_type == IRE_IF_RESOLVER && + ip_dup_recovery > 0 && ipif->ipif_recovery_id == 0) { + ipif->ipif_recovery_id = timeout(ipif_dup_recovery, + ipif, MSEC_TO_TICK(ip_dup_recovery)); + continue; + } + + /* + * If what we're trying to do has already been done, then do + * nothing. + */ + if (bring_up == ((ipif->ipif_flags & IPIF_UP) != 0)) + continue; + + if (ipif->ipif_id != 0) { + (void) snprintf(ibuf + ill->ill_name_length - 1, + sizeof (ibuf) - ill->ill_name_length + 1, ":%d", + ipif->ipif_id); + } + if (failtype == NULL) { + cmn_err(CE_NOTE, "recovered address %s on %s", sbuf, + ibuf); + } else { + cmn_err(CE_WARN, "%s has duplicate address %s (%s " + "by %s); disabled", ibuf, sbuf, failtype, hbuf); + } + + if (bring_up) { + ASSERT(ill->ill_dl_up); + /* + * Free up the ARP delete message so we can allocate + * a fresh one through the normal path. + */ + freemsg(ipif->ipif_arp_del_mp); + ipif->ipif_arp_del_mp = NULL; + if (ipif_resolver_up(ipif, Res_act_initial) != + EINPROGRESS) { + ipif->ipif_addr_ready = 1; + (void) ipif_up_done(ipif); + } + continue; + } + + mutex_enter(&ill->ill_lock); + ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); + ipif->ipif_flags |= IPIF_DUPLICATE; + ill->ill_ipif_dup_count++; + mutex_exit(&ill->ill_lock); + /* + * Already exclusive on the ill; no need to handle deferred + * processing here. + */ + (void) ipif_down(ipif, NULL, NULL); + ipif_down_tail(ipif); + if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && + ill->ill_net_type == IRE_IF_RESOLVER && + ip_dup_recovery > 0) { + ipif->ipif_recovery_id = timeout(ipif_dup_recovery, + ipif, MSEC_TO_TICK(ip_dup_recovery)); + } + } + freemsg(mp); +} + +/* ARGSUSED */ +static void +ip_arp_defend(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) +{ + ill_t *ill = rq->q_ptr; + arh_t *arh; + ipaddr_t src; + ipif_t *ipif; + + arh = (arh_t *)mp->b_cont->b_rptr; + bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN); + for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { + if ((ipif->ipif_flags & IPIF_UP) && ipif->ipif_lcl_addr == src) + (void) ipif_resolver_up(ipif, Res_act_defend); + } + freemsg(mp); +} + +/* * News from ARP. ARP sends notification of interesting events down * to its clients using M_CTL messages with the interesting ARP packet * attached via b_cont. @@ -3796,15 +3946,14 @@ ip_arp_news(queue_t *q, mblk_t *mp) { arcn_t *arcn; arh_t *arh; - char *cp1; - uchar_t *cp2; ire_t *ire = NULL; - int i1; - char hbuf[128]; - char sbuf[16]; + char hbuf[MAC_STR_LEN]; + char sbuf[INET_ADDRSTRLEN]; ipaddr_t src; in6_addr_t v6src; boolean_t isv6 = B_FALSE; + ipif_t *ipif; + ill_t *ill; if ((mp->b_wptr - mp->b_rptr) < sizeof (arcn_t) || !mp->b_cont) { if (q->q_next) { @@ -3827,25 +3976,37 @@ ip_arp_news(queue_t *q, mblk_t *mp) return; } + ill = q->q_ptr; + arcn = (arcn_t *)mp->b_rptr; switch (arcn->arcn_code) { case AR_CN_BOGON: /* * Someone is sending ARP packets with a source protocol - * address which we have published. Either they are - * pretending to be us, or we have been asked to proxy - * for a machine that can do fine for itself, or two - * different machines are providing proxy service for the - * same protocol address, or something. We try and do - * something appropriate here. - */ - cp2 = (uchar_t *)&arh[1]; - cp1 = hbuf; - *cp1 = '\0'; - for (i1 = arh->arh_hlen; i1--; cp1 += 3) - (void) sprintf(cp1, "%02x:", *cp2++ & 0xff); - if (cp1 != hbuf) - cp1[-1] = '\0'; + * address that we have published and for which we believe our + * entry is authoritative and (when ill_arp_extend is set) + * verified to be unique on the network. + * + * The ARP module internally handles the cases where the sender + * is just probing (for DAD) and where the hardware address of + * a non-authoritative entry has changed. Thus, these are the + * real conflicts, and we have to do resolution. + * + * We back away quickly from the address if it's from DHCP or + * otherwise temporary and hasn't been used recently (or at + * all). We'd like to include "deprecated" addresses here as + * well (as there's no real reason to defend something we're + * discarding), but IPMP "reuses" this flag to mean something + * other than the standard meaning. + * + * If the ARP module above is not extended (meaning that it + * doesn't know how to defend the address), then we just log + * the problem as we always did and continue on. It's not + * right, but there's little else we can do, and those old ATM + * users are going away anyway. + */ + (void) mac_colon_addr((uint8_t *)(arh + 1), arh->arh_hlen, + hbuf, sizeof (hbuf)); (void) ip_dot_addr(src, sbuf); if (isv6) ire = ire_cache_lookup_v6(&v6src, ALL_ZONES, NULL); @@ -3853,16 +4014,78 @@ ip_arp_news(queue_t *q, mblk_t *mp) ire = ire_cache_lookup(src, ALL_ZONES, NULL); if (ire != NULL && IRE_IS_LOCAL(ire)) { - cmn_err(CE_WARN, - "IP: Hardware address '%s' trying" - " to be our address %s!", - hbuf, sbuf); - } else { - cmn_err(CE_WARN, - "IP: Proxy ARP problem? " - "Hardware address '%s' thinks it is %s", - hbuf, sbuf); + uint32_t now; + uint32_t maxage; + clock_t lused; + uint_t maxdefense; + uint_t defs; + + /* + * First, figure out if this address hasn't been used + * in a while. If it hasn't, then it's a better + * candidate for abandoning. + */ + ipif = ire->ire_ipif; + ASSERT(ipif != NULL); + now = gethrestime_sec(); + maxage = now - ire->ire_create_time; + if (maxage > ip_max_temp_idle) + maxage = ip_max_temp_idle; + lused = drv_hztousec(ddi_get_lbolt() - + ire->ire_last_used_time) / MICROSEC + 1; + if (lused >= maxage && (ipif->ipif_flags & + (IPIF_DHCPRUNNING | IPIF_TEMPORARY))) + maxdefense = ip_max_temp_defend; + else + maxdefense = ip_max_defend; + + /* + * Now figure out how many times we've defended + * ourselves. Ignore defenses that happened long in + * the past. + */ + mutex_enter(&ire->ire_lock); + if ((defs = ire->ire_defense_count) > 0 && + now - ire->ire_defense_time > ip_defend_interval) { + ire->ire_defense_count = defs = 0; + } + ire->ire_defense_count++; + ire->ire_defense_time = now; + mutex_exit(&ire->ire_lock); + ill_refhold(ill); + ire_refrele(ire); + + /* + * If we've defended ourselves too many times already, + * then give up and tear down the interface(s) using + * this address. Otherwise, defend by sending out a + * gratuitous ARP. + */ + if (defs >= maxdefense && ill->ill_arp_extend) { + (void) qwriter_ip(NULL, ill, q, mp, + ip_arp_excl, CUR_OP, B_FALSE); + } else { + cmn_err(CE_WARN, + "node %s is using our IP address %s on %s", + hbuf, sbuf, ill->ill_name); + /* + * If this is an old (ATM) ARP module, then + * don't try to defend the address. Remain + * compatible with the old behavior. Defend + * only with new ARP. + */ + if (ill->ill_arp_extend) { + (void) qwriter_ip(NULL, ill, q, mp, + ip_arp_defend, CUR_OP, B_FALSE); + } else { + ill_refrele(ill); + } + } + return; } + cmn_err(CE_WARN, + "proxy ARP problem? Node '%s' is using %s on %s", + hbuf, sbuf, ill->ill_name); if (ire != NULL) ire_refrele(ire); break; @@ -3884,53 +4107,79 @@ ip_arp_news(queue_t *q, mblk_t *mp) ire_walk_v6(ire_delete_cache_gw_v6, (char *)&v6src, ALL_ZONES); } - break; + } else { + nce_hw_map_t hwm; + + /* + * ARP gives us a copy of any packet where it thinks + * the address has changed, so that we can update our + * caches. We're responsible for caching known answers + * in the current design. We check whether the + * hardware address really has changed in all of our + * entries that have cached this mapping, and if so, we + * blow them away. This way we will immediately pick + * up the rare case of a host changing hardware + * address. + */ + if (src == 0) + break; + hwm.hwm_addr = src; + hwm.hwm_hwlen = arh->arh_hlen; + hwm.hwm_hwaddr = (uchar_t *)(arh + 1); + ndp_walk_common(&ndp4, NULL, + (pfi_t)nce_delete_hw_changed, &hwm, ALL_ZONES); } - /* - * ARP gives us a copy of any broadcast packet with identical - * sender and receiver protocol address, in - * case we want to intuit something from it. Such a packet - * usually means that a machine has just come up on the net. - * If we have an IRE_CACHE, we blow it away. This way we will - * immediately pick up the rare case of a host changing - * hardware address. ip_ire_clookup_and_delete achieves this. - * - * The address in "src" may be an entry for a router. - * (Default router, or non-default router.) If - * that's true, then any off-net IRE_CACHE entries - * that go through the router with address "src" - * must be clobbered. Use ire_walk to achieve this - * goal. - * - * It should be possible to determine if the address - * in src is or is not for a router. This way, - * the ire_walk() isn't called all of the time here. - * Do not pass 'src' value of 0 to ire_delete_cache_gw, - * as it would remove all IRE_CACHE entries for onlink - * destinations. All onlink destinations have - * ire_gateway_addr == 0. - * - * - * The ip_ire_clookup_and_delete() call deletes - * the nce and all relevant ire cache entries that - * are associated with that nce. - * The ire_walk_v4->ire_delete_cache_gw() call - * will delete the appropriate redirect ires. - */ - if ((ip_ire_clookup_and_delete(src, NULL) || - (ire = ire_ftable_lookup(src, 0, 0, 0, NULL, NULL, NULL, - 0, NULL, MATCH_IRE_DSTONLY)) != NULL) && src != 0) { - ire_walk_v4(ire_delete_cache_gw, (char *)&src, - ALL_ZONES); - } - /* From ire_ftable_lookup */ - if (ire != NULL) - ire_refrele(ire); break; - default: - if (ire != NULL) + case AR_CN_READY: + /* No external v6 resolver has a contract to use this */ + if (isv6) + break; + /* If the link is down, we'll retry this later */ + if (!(ill->ill_phyint->phyint_flags & PHYI_RUNNING)) + break; + ipif = ipif_lookup_addr(src, ill, ALL_ZONES, NULL, NULL, + NULL, NULL); + if (ipif != NULL) { + /* + * If this is a duplicate recovery, then we now need to + * go exclusive to bring this thing back up. + */ + if ((ipif->ipif_flags & (IPIF_UP|IPIF_DUPLICATE)) == + IPIF_DUPLICATE) { + ipif_refrele(ipif); + ill_refhold(ill); + (void) qwriter_ip(NULL, ill, q, mp, + ip_arp_excl, CUR_OP, B_FALSE); + return; + } + /* + * If this is the first notice that this address is + * ready, then let the user know now. + */ + if ((ipif->ipif_flags & IPIF_UP) && + !ipif->ipif_addr_ready) { + ipif_mask_reply(ipif); + ip_rts_ifmsg(ipif); + ip_rts_newaddrmsg(RTM_ADD, 0, ipif); + sctp_update_ipif(ipif, SCTP_IPIF_UP); + } + ipif->ipif_addr_ready = 1; + ipif_refrele(ipif); + } + ire = ire_cache_lookup(src, ALL_ZONES, MBLK_GETLABEL(mp)); + if (ire != NULL) { + ire->ire_defense_count = 0; ire_refrele(ire); + } break; + case AR_CN_FAILED: + /* No external v6 resolver has a contract to use this */ + if (isv6) + break; + ill_refhold(ill); + (void) qwriter_ip(NULL, ill, q, mp, ip_arp_excl, CUR_OP, + B_FALSE); + return; } freemsg(mp); } @@ -5598,25 +5847,57 @@ dlpi_err_str(int err) * Debug formatting routine. Returns a character string representation of the * addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address * in the form of a ipaddr_t and calls ip_dot_saddr with a pointer. + * + * Once the ndd table-printing interfaces are removed, this can be changed to + * standard dotted-decimal form. */ char * ip_dot_addr(ipaddr_t addr, char *buf) { - return (ip_dot_saddr((uchar_t *)&addr, buf)); + uint8_t *ap = (uint8_t *)&addr; + + (void) mi_sprintf(buf, "%03d.%03d.%03d.%03d", + ap[0] & 0xFF, ap[1] & 0xFF, ap[2] & 0xFF, ap[3] & 0xFF); + return (buf); } /* - * Debug formatting routine. Returns a character string representation of the - * addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address - * as a pointer. The "xxx" parts including left zero padding so the final - * string will fit easily in tables. It would be nice to take a padding - * length argument instead. + * Write the given MAC address as a printable string in the usual colon- + * separated format. */ -static char * -ip_dot_saddr(uchar_t *addr, char *buf) +const char * +mac_colon_addr(const uint8_t *addr, size_t alen, char *buf, size_t buflen) { - (void) mi_sprintf(buf, "%03d.%03d.%03d.%03d", - addr[0] & 0xFF, addr[1] & 0xFF, addr[2] & 0xFF, addr[3] & 0xFF); + char *bp; + + if (alen == 0 || buflen < 4) + return ("?"); + bp = buf; + for (;;) { + /* + * If there are more MAC address bytes available, but we won't + * have any room to print them, then add "..." to the string + * instead. See below for the 'magic number' explanation. + */ + if ((alen == 2 && buflen < 6) || (alen > 2 && buflen < 7)) { + (void) strcpy(bp, "..."); + break; + } + (void) sprintf(bp, "%02x", *addr++); + bp += 2; + if (--alen == 0) + break; + *bp++ = ':'; + buflen -= 3; + /* + * At this point, based on the first 'if' statement above, + * either alen == 1 and buflen >= 3, or alen > 1 and + * buflen >= 4. The first case leaves room for the final "xx" + * number and trailing NUL byte. The second leaves room for at + * least "...". Thus the apparently 'magic' numbers chosen for + * that statement. + */ + } return (buf); } @@ -9315,8 +9596,8 @@ ip_setqinfo(queue_t *q, minor_t minor, boolean_t bump_mib) } else { if (bump_mib) BUMP_MIB(&ip_mib, ipOutSwitchIPv6); - q->q_qinfo = &rinit; - WR(q)->q_qinfo = &winit; + q->q_qinfo = &iprinit; + WR(q)->q_qinfo = &ipwinit; (Q_TO_CONN(q))->conn_pkt_isv6 = B_FALSE; } @@ -14891,7 +15172,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) * v6 interfaces. * Unlike ARP which has to do another bind * and attach, once we get here we are - * done withh NDP. Except in the case of + * done with NDP. Except in the case of * ILLF_XRESOLV, in which case we send an * AR_INTERFACE_UP to the external resolver. * If all goes well, the ioctl will complete @@ -14910,7 +15191,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) mutex_exit(&connp->conn_lock); if (success) { err = ipif_resolver_up(ipif, - B_FALSE); + Res_act_initial); if (err == EINPROGRESS) { freemsg(mp); return; @@ -14939,7 +15220,7 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) mutex_exit(&ill->ill_lock); mutex_exit(&connp->conn_lock); if (success) { - err = ipif_resolver_up(ipif, B_FALSE); + err = ipif_resolver_up(ipif, Res_act_initial); if (err == EINPROGRESS) { freemsg(mp); return; @@ -15061,13 +15342,13 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) /* * IPv4 ARP case * - * Set B_TRUE, as we only want + * Set Res_act_move, as we only want * ipif_resolver_up to send an * AR_ENTRY_ADD request up to * ARP. */ err = ipif_resolver_up(ipif, - B_TRUE); + Res_act_move); if (err) { ip1dbg(( "ip_rput_dlpi_writer: " @@ -15204,10 +15485,11 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) phyint_t *phyint = ill->ill_phyint; uint64_t new_phyint_flags; boolean_t changed = B_FALSE; + boolean_t went_up; + went_up = notify->dl_notification == DL_NOTE_LINK_UP; mutex_enter(&phyint->phyint_lock); - new_phyint_flags = - (notify->dl_notification == DL_NOTE_LINK_UP) ? + new_phyint_flags = went_up ? phyint->phyint_flags | PHYI_RUNNING : phyint->phyint_flags & ~PHYI_RUNNING; if (new_phyint_flags != phyint->phyint_flags) { @@ -15216,18 +15498,12 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) } mutex_exit(&phyint->phyint_lock); /* - * If the flags have changed, send a message to - * the routing socket. + * ill_restart_dad handles the DAD restart and routing + * socket notification logic. */ if (changed) { - if (phyint->phyint_illv4 != NULL) { - ip_rts_ifmsg( - phyint->phyint_illv4->ill_ipif); - } - if (phyint->phyint_illv6 != NULL) { - ip_rts_ifmsg( - phyint->phyint_illv6->ill_ipif); - } + ill_restart_dad(phyint->phyint_illv4, went_up); + ill_restart_dad(phyint->phyint_illv6, went_up); } break; } @@ -15274,15 +15550,14 @@ ip_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) */ break; } - case DL_NOTIFY_ACK: - /* - * Don't really need to check for what notifications - * are supported; we'll process what gets sent upstream, - * and we know it'll be something we support changing - * based on our DL_NOTIFY_REQ. - */ + case DL_NOTIFY_ACK: { + dl_notify_ack_t *noteack = (dl_notify_ack_t *)mp->b_rptr; + + if (noteack->dl_notifications & DL_NOTE_LINK_UP) + ill->ill_note_link = 1; ill_dlpi_done(ill, DL_NOTIFY_REQ); break; + } case DL_PHYS_ADDR_ACK: { /* * We should have an IOCTL waiting on this when request @@ -16198,7 +16473,7 @@ ip_fanout_proto_again(mblk_t *ipsec_mp, ill_t *ill, ill_t *recv_ill, ire_t *ire) rput_flags |= (IN6_IS_ADDR_MULTICAST(v6dstp) ? IP6_IN_LLMCAST : 0); ip_rput_data_v6(ill->ill_rq, ill, ipsec_mp, ip6h, rput_flags, - NULL); + NULL, NULL); } if (ill_need_rele) ill_refrele(ill); @@ -25801,6 +26076,17 @@ nak: freemsg(mp); } return; + case AR_ARP_EXTEND: + /* + * The ARP module above us is capable of duplicate + * address detection. Old ATM drivers will not send + * this message. + */ + ASSERT(q->q_next != NULL); + ill = (ill_t *)q->q_ptr; + ill->ill_arp_extend = B_TRUE; + freemsg(mp); + return; default: break; } @@ -27308,7 +27594,7 @@ static void ip_multirt_bad_mtu(ire_t *ire, uint32_t max_frag) { hrtime_t current = gethrtime(); - char buf[16]; + char buf[INET_ADDRSTRLEN]; /* Convert interval in ms to hrtime in ns */ if (multirt_bad_mtu_last_time + diff --git a/usr/src/uts/common/inet/ip/ip6.c b/usr/src/uts/common/inet/ip/ip6.c index b96a6a24ef..29afe371f7 100644 --- a/usr/src/uts/common/inet/ip/ip6.c +++ b/usr/src/uts/common/inet/ip/ip6.c @@ -102,6 +102,9 @@ #include <rpc/pmap_prot.h> +/* Temporary; for CR 6451644 work-around */ +#include <sys/ethernet.h> + extern squeue_func_t ip_input_proc; /* @@ -326,7 +329,7 @@ struct qinit winit_ipv6 = { */ static void icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, - boolean_t mctl_present, uint_t flags, zoneid_t zoneid) + boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) { icmp6_t *icmp6; ip6_t *ip6h; @@ -603,7 +606,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, if (mctl_present) freeb(first_mp); /* XXX may wish to pass first_mp up to ndp_input someday. */ - ndp_input(ill, mp); + ndp_input(ill, mp, dl_mp); return; case ND_NEIGHBOR_ADVERT: @@ -612,7 +615,7 @@ icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, if (mctl_present) freeb(first_mp); /* XXX may wish to pass first_mp up to ndp_input someday. */ - ndp_input(ill, mp); + ndp_input(ill, mp, dl_mp); return; case ND_REDIRECT: { @@ -5910,26 +5913,6 @@ ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, } goto err_ret; } - /* Use any ipif for source */ - for (src_ipif = dst_ill->ill_ipif; src_ipif != NULL; - src_ipif = src_ipif->ipif_next) { - if ((src_ipif->ipif_flags & IPIF_UP) && - IN6_IS_ADDR_UNSPECIFIED( - &src_ipif->ipif_v6src_addr)) - break; - } - if (src_ipif == NULL) { - if (ip_debug > 2) { - /* ip1dbg */ - pr_addr_dbg("ip_newroute_ipif_v6: " - "no src for dst %s\n ", - AF_INET6, v6dstp); - printf("ip_newroute_ipif_v6: if %s" - "(UNSPEC_SRC)\n", - dst_ill->ill_name); - } - goto err_ret; - } src_ipif = ipif; ipif_refhold(src_ipif); } @@ -6602,7 +6585,7 @@ bad_opt: */ static void ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, - ill_t *ill, uint_t flags, mblk_t *hada_mp) + ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) { ip6_rthdr0_t *rthdr; uint_t ehdrlen; @@ -6678,7 +6661,7 @@ ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, B_FALSE, B_FALSE); return; } - ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); + ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); return; hada_drop: /* IPsec kstats: bean counter? */ @@ -6692,12 +6675,15 @@ hada_drop: static void ip_rput_v6(queue_t *q, mblk_t *mp) { - mblk_t *mp1, *first_mp, *hada_mp = NULL; + mblk_t *first_mp; + mblk_t *hada_mp = NULL; ip6_t *ip6h; - boolean_t ll_multicast = B_FALSE, mctl_present = B_FALSE; + boolean_t ll_multicast = B_FALSE; + boolean_t mctl_present = B_FALSE; ill_t *ill; struct iocblk *iocp; uint_t flags = 0; + mblk_t *dl_mp; ill = (ill_t *)q->q_ptr; if (ill->ill_state_flags & ILL_CONDEMNED) { @@ -6719,9 +6705,59 @@ ip_rput_v6(queue_t *q, mblk_t *mp) } } + dl_mp = NULL; switch (mp->b_datap->db_type) { - case M_DATA: + case M_DATA: { + int hlen; + uchar_t *ucp; + struct ether_header *eh; + dl_unitdata_ind_t *dui; + + /* + * This is a work-around for CR 6451644, a bug in Nemo. It + * should be removed when that problem is fixed. + */ + if (ill->ill_mactype == DL_ETHER && + (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && + (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && + ucp[-2] == (IP6_DL_SAP >> 8)) { + if (hlen >= sizeof (struct ether_vlan_header) && + ucp[-5] == 0 && ucp[-6] == 0x81) + ucp -= sizeof (struct ether_vlan_header); + else + ucp -= sizeof (struct ether_header); + /* + * If it's a group address, then fabricate a + * DL_UNITDATA_IND message. + */ + if ((ll_multicast = (ucp[0] & 1)) != 0 && + (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, + BPRI_HI)) != NULL) { + eh = (struct ether_header *)ucp; + dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; + DB_TYPE(dl_mp) = M_PROTO; + dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; + dui->dl_primitive = DL_UNITDATA_IND; + dui->dl_dest_addr_length = 8; + dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; + dui->dl_src_addr_length = 8; + dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + + 8; + dui->dl_group_address = 1; + ucp = (uchar_t *)(dui + 1); + if (ill->ill_sap_length > 0) + ucp += ill->ill_sap_length; + bcopy(&eh->ether_dhost, ucp, 6); + bcopy(&eh->ether_shost, ucp + 8, 6); + ucp = (uchar_t *)(dui + 1); + if (ill->ill_sap_length < 0) + ucp += 8 + ill->ill_sap_length; + bcopy(&eh->ether_type, ucp, 2); + bcopy(&eh->ether_type, ucp + 8, 2); + } + } break; + } case M_PROTO: case M_PCPROTO: @@ -6734,10 +6770,10 @@ ip_rput_v6(queue_t *q, mblk_t *mp) #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) ll_multicast = dlur->dl_group_address; #undef dlur - /* Ditch the DLPI header. */ - mp1 = mp; + /* Save the DLPI header. */ + dl_mp = mp; mp = mp->b_cont; - freeb(mp1); + dl_mp->b_cont = NULL; break; case M_BREAK: panic("ip_rput_v6: got an M_BREAK"); @@ -6772,7 +6808,7 @@ ip_rput_v6(queue_t *q, mblk_t *mp) mutex_exit(&ill->ill_lock); qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); return; - case M_CTL: { + case M_CTL: if ((MBLKL(mp) > sizeof (int)) && ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); @@ -6781,7 +6817,6 @@ ip_rput_v6(queue_t *q, mblk_t *mp) } putnext(q, mp); return; - } case M_IOCNAK: iocp = (struct iocblk *)mp->b_rptr; switch (iocp->ioc_cmd) { @@ -6824,8 +6859,8 @@ ip_rput_v6(queue_t *q, mblk_t *mp) mp1 = copymsg(mp); freemsg(mp); if (mp1 == NULL) { - BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); - return; + first_mp = NULL; + goto discard; } mp = mp1; } @@ -6841,10 +6876,8 @@ ip_rput_v6(queue_t *q, mblk_t *mp) if (!OK_32PTR((uchar_t *)ip6h) || (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { if (!pullupmsg(mp, IPV6_HDR_LEN)) { - BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); ip1dbg(("ip_rput_v6: pullupmsg failed\n")); - freemsg(first_mp); - return; + goto discard; } ip6h = (ip6_t *)mp->b_rptr; } @@ -6857,31 +6890,32 @@ ip_rput_v6(queue_t *q, mblk_t *mp) * TODO: Avoid this check for e.g. connected TCP sockets */ if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { - BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); - freemsg(first_mp); - return; + goto discard; } if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { - BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); ip1dbg(("ip_rput_v6: pkt with loopback src")); - freemsg(first_mp); - return; + goto discard; } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { - BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); ip1dbg(("ip_rput_v6: pkt with loopback dst")); - freemsg(first_mp); - return; + goto discard; } flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); - ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp); + ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); } else { BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); - BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); - freemsg(first_mp); + goto discard; } + freemsg(dl_mp); + return; + +discard: + if (dl_mp != NULL) + freeb(dl_mp); + freemsg(first_mp); + BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); } /* @@ -7080,10 +7114,14 @@ ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, * actually arrived on. We need to remember this when saving the * input interface index into potential IPV6_PKTINFO data in * ip_add_info_v6(). + * + * This routine doesn't free dl_mp; that's the caller's responsibility on + * return. (Note that the callers are complex enough that there's no tail + * recursion here anyway.) */ void ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, - uint_t flags, mblk_t *hada_mp) + uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) { ire_t *ire = NULL; queue_t *rq; @@ -7939,14 +7977,15 @@ tcp_fanout: continue; icmp_inbound_v6(q, first_mp1, ill, hdr_len, mctl_present, 0, - ilm->ilm_zoneid); + ilm->ilm_zoneid, dl_mp); } ILM_WALKER_RELE(ill); } else { first_mp1 = ip_copymsg(first_mp); if (first_mp1 != NULL) icmp_inbound_v6(q, first_mp1, ill, - hdr_len, mctl_present, 0, zoneid); + hdr_len, mctl_present, 0, zoneid, + dl_mp); } } /* FALLTHRU */ @@ -8181,7 +8220,7 @@ tcp_fanout: return; } ip_process_rthdr(q, mp, ip6h, rthdr, ill, - flags, hada_mp); + flags, hada_mp, dl_mp); return; } used = ehdrlen; @@ -10253,8 +10292,7 @@ send_from_ill: &ip6h->ip6_src, ill, zoneid); } } - if (ill != NULL) - ill_refrele(ill); + ill_refrele(ill); return; } if (need_decref) { @@ -10284,8 +10322,7 @@ send_from_ill: } if (mp == NULL) { BUMP_MIB(mibptr, ipv6OutDiscards); - if (ill != NULL) - ill_refrele(ill); + ill_refrele(ill); return; } ip6i = (ip6i_t *)mp->b_rptr; @@ -10333,8 +10370,7 @@ send_from_ill: ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, zoneid); } - if (ill != NULL) - ill_refrele(ill); + ill_refrele(ill); return; notv6: @@ -10553,7 +10589,8 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, continue; icmp_inbound_v6(q, first_mp1, ill, hdr_length, mctl_present, - IP6_NO_IPPOLICY, ilm->ilm_zoneid); + IP6_NO_IPPOLICY, ilm->ilm_zoneid, + NULL); } ILM_WALKER_RELE(ill); } else { @@ -10561,7 +10598,8 @@ ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, if (first_mp1 != NULL) icmp_inbound_v6(q, first_mp1, ill, hdr_length, mctl_present, - IP6_NO_IPPOLICY, ire->ire_zoneid); + IP6_NO_IPPOLICY, ire->ire_zoneid, + NULL); } } /* FALLTHRU */ diff --git a/usr/src/uts/common/inet/ip/ip6_if.c b/usr/src/uts/common/inet/ip/ip6_if.c index 2749b6b439..313d0bbdca 100644 --- a/usr/src/uts/common/inet/ip/ip6_if.c +++ b/usr/src/uts/common/inet/ip/ip6_if.c @@ -1317,12 +1317,12 @@ ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr, boolean_t macaddr_change) * ND not supported on XRESOLV interfaces. If ND support (multicast) * added later, take out this check. */ - if (ill->ill_flags & ILLF_XRESOLV) - return (0); - - if (IN6_IS_ADDR_UNSPECIFIED(addr) || - (!(ill->ill_net_type & IRE_INTERFACE))) + if ((ill->ill_flags & ILLF_XRESOLV) || + IN6_IS_ADDR_UNSPECIFIED(addr) || + (!(ill->ill_net_type & IRE_INTERFACE))) { + ipif->ipif_addr_ready = 1; return (0); + } /* * Need to setup multicast mapping only when the first @@ -1374,7 +1374,7 @@ ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr, boolean_t macaddr_change) &ipv6_all_zeros, 0, flags, - ND_REACHABLE, + ND_PROBE, /* Causes Duplicate Address Detection to run */ &nce, NULL, NULL); @@ -1382,6 +1382,11 @@ ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr, boolean_t macaddr_change) case 0: ip1dbg(("ipif_ndp_up: NCE created for %s\n", ill->ill_name)); + ipif->ipif_addr_ready = 1; + break; + case EINPROGRESS: + ip1dbg(("ipif_ndp_up: running DAD now for %s\n", + ill->ill_name)); break; case EEXIST: NCE_REFRELE(nce); @@ -1401,6 +1406,9 @@ ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr, boolean_t macaddr_change) } return (err); } + } else { + /* No local NCE for this entry */ + ipif->ipif_addr_ready = 1; } if (nce != NULL) NCE_REFRELE(nce); @@ -1625,7 +1633,8 @@ ip_addr_xor_v6(const in6_addr_t *a1, const in6_addr_t *a2, in6_addr_t *res) #define IPIF_VALID_IPV6_SOURCE(ipif) \ (((ipif)->ipif_flags & IPIF_UP) && \ - !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST))) + !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) && \ + (ipif)->ipif_addr_ready) /* source address candidate */ typedef struct candidate { @@ -3001,9 +3010,12 @@ ipif_up_done_v6(ipif_t *ipif) } } + if (ipif->ipif_addr_ready) { + ip_rts_ifmsg(ipif); + ip_rts_newaddrmsg(RTM_ADD, 0, ipif); + sctp_update_ipif(ipif, SCTP_IPIF_UP); + } - ip_rts_ifmsg(ipif); - ip_rts_newaddrmsg(RTM_ADD, 0, ipif); if (ipif_saved_irep != NULL) { kmem_free(ipif_saved_irep, ipif_saved_ire_cnt * sizeof (ire_t *)); @@ -3011,7 +3023,6 @@ ipif_up_done_v6(ipif_t *ipif) if (src_ipif_held) ipif_refrele(src_ipif); - sctp_update_ipif(ipif, SCTP_IPIF_UP); return (0); bad: diff --git a/usr/src/uts/common/inet/ip/ip_if.c b/usr/src/uts/common/inet/ip/ip_if.c index 179c1dd7ce..8edf1bb113 100644 --- a/usr/src/uts/common/inet/ip/ip_if.c +++ b/usr/src/uts/common/inet/ip/ip_if.c @@ -157,10 +157,8 @@ static void ipif_check_bcast_ires(ipif_t *test_ipif); static void ipif_down_delete_ire(ire_t *ire, char *ipif); static void ipif_delete_cache_ire(ire_t *, char *); static int ipif_logical_down(ipif_t *ipif, queue_t *q, mblk_t *mp); -static void ipif_down_tail(ipif_t *ipif); static void ipif_free(ipif_t *ipif); static void ipif_free_tail(ipif_t *ipif); -static void ipif_mask_reply(ipif_t *); static void ipif_mtu_change(ire_t *ire, char *ipif_arg); static void ipif_multicast_down(ipif_t *ipif); static void ipif_recreate_interface_routes(ipif_t *old_ipif, ipif_t *ipif); @@ -180,6 +178,7 @@ static int ill_arp_off(ill_t *ill); static int ill_arp_on(ill_t *ill); static void ill_delete_interface_type(ill_if_t *); static int ill_dl_up(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q); +static void ill_dl_down(ill_t *ill); static void ill_down(ill_t *ill); static void ill_downi(ire_t *ire, char *ill_arg); static void ill_downi_mrtun_srcif(ire_t *ire, char *ill_arg); @@ -671,6 +670,20 @@ ill_arp_alloc(ill_t *ill, uchar_t *template, caddr_t addr) return (mp); } +mblk_t * +ipif_area_alloc(ipif_t *ipif) +{ + return (ill_arp_alloc(ipif->ipif_ill, (uchar_t *)&ip_area_template, + (char *)&ipif->ipif_lcl_addr)); +} + +mblk_t * +ipif_ared_alloc(ipif_t *ipif) +{ + return (ill_arp_alloc(ipif->ipif_ill, (uchar_t *)&ip_ared_template, + (char *)&ipif->ipif_lcl_addr)); +} + /* * Completely vaporize a lower level tap and all associated interfaces. * ill_delete is called only out of ip_close when the device control @@ -751,6 +764,19 @@ ill_delete(ill_t *ill) rw_exit(&ill_g_usesrc_lock); } +static void +ipif_non_duplicate(ipif_t *ipif) +{ + ill_t *ill = ipif->ipif_ill; + mutex_enter(&ill->ill_lock); + if (ipif->ipif_flags & IPIF_DUPLICATE) { + ipif->ipif_flags &= ~IPIF_DUPLICATE; + ASSERT(ill->ill_ipif_dup_count > 0); + ill->ill_ipif_dup_count--; + } + mutex_exit(&ill->ill_lock); +} + /* * ill_delete_tail is called from ip_modclose after all references * to the closing ill are gone. The wait is done in ip_modclose @@ -761,8 +787,14 @@ ill_delete_tail(ill_t *ill) mblk_t **mpp; ipif_t *ipif; - for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) + for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { + ipif_non_duplicate(ipif); ipif_down_tail(ipif); + } + + ASSERT(ill->ill_ipif_dup_count == 0 && + ill->ill_arp_down_mp == NULL && + ill->ill_arp_del_mapping_mp == NULL); /* * If polling capability is enabled (which signifies direct @@ -1489,8 +1521,10 @@ ipif_all_down_tail(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) ipif_t *ipif; ASSERT(IAM_WRITER_IPSQ(ipsq)); - for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) + for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { + ipif_non_duplicate(ipif); ipif_down_tail(ipif); + } ill_down_tail(ill); freemsg(mp); ipsq->ipsq_current_ipif = NULL; @@ -5645,8 +5679,10 @@ ipif_is_quiescent(ipif_t *ipif) } ill = ipif->ipif_ill; - if (ill->ill_ipif_up_count != 0 || ill->ill_logical_down) + if (ill->ill_ipif_up_count != 0 || ill->ill_ipif_dup_count != 0 || + ill->ill_logical_down) { return (B_TRUE); + } /* This is the last ipif going down or being deleted on this ill */ if (ill->ill_ire_cnt != 0 || ill->ill_refcnt != 0) { @@ -9144,6 +9180,8 @@ ip_sioctl_arp_common(ill_t *ill, queue_t *q, mblk_t *mp, sin_t *sin, area->area_flags |= ACE_F_PERMANENT; if (flags & ATF_PUBL) area->area_flags |= ACE_F_PUBLISH; + if (flags & ATF_AUTHORITY) + area->area_flags |= ACE_F_AUTHORITY; /* * Up to ARP it goes. The response will come @@ -10118,6 +10156,8 @@ errack: *flagsp |= ATF_PERM; if (area->area_flags & ACE_F_PUBLISH) *flagsp |= ATF_PUBL; + if (area->area_flags & ACE_F_AUTHORITY) + *flagsp |= ATF_AUTHORITY; if (area->area_hw_addr_length != 0) { *flagsp |= ATF_COM; /* @@ -10524,10 +10564,11 @@ ip_sioctl_removeif(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, if (ipif->ipif_refcnt == 0 && ipif->ipif_ire_cnt == 0) { mutex_exit(&ill->ill_lock); mutex_exit(&connp->conn_lock); + ipif_non_duplicate(ipif); ipif_down_tail(ipif); ipif_free_tail(ipif); return (0); - } + } success = ipsq_pending_mp_add(connp, ipif, CONNP_TO_WQ(connp), mp, IPIF_FREE); mutex_exit(&ill->ill_lock); @@ -10565,6 +10606,7 @@ ip_sioctl_removeif_restart(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, ASSERT(IAM_WRITER_IPIF(ipif)); ASSERT(ipif->ipif_state_flags & IPIF_CONDEMNED); + ipif_non_duplicate(ipif); ipif_down_tail(ipif); ipif_free_tail(ipif); @@ -10682,10 +10724,19 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, ipaddr_t addr; sin6_t *sin6; int err = 0; + ill_t *ill = ipif->ipif_ill; + boolean_t need_dl_down; + boolean_t need_arp_down; ip1dbg(("ip_sioctl_addr_tail(%s:%u %p)\n", - ipif->ipif_ill->ill_name, ipif->ipif_id, (void *)ipif)); + ill->ill_name, ipif->ipif_id, (void *)ipif)); ASSERT(IAM_WRITER_IPIF(ipif)); + + /* Must cancel any pending timer before taking the ill_lock */ + if (ipif->ipif_recovery_id != 0) + (void) untimeout(ipif->ipif_recovery_id); + ipif->ipif_recovery_id = 0; + if (ipif->ipif_isv6) { sin6 = (sin6_t *)sin; v6addr = sin6->sin6_addr; @@ -10693,17 +10744,37 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, addr = sin->sin_addr.s_addr; IN6_IPADDR_TO_V4MAPPED(addr, &v6addr); } - mutex_enter(&ipif->ipif_ill->ill_lock); + mutex_enter(&ill->ill_lock); ipif->ipif_v6lcl_addr = v6addr; if (ipif->ipif_flags & (IPIF_ANYCAST | IPIF_NOLOCAL)) { ipif->ipif_v6src_addr = ipv6_all_zeros; } else { ipif->ipif_v6src_addr = v6addr; } + ipif->ipif_addr_ready = 0; + + /* + * If the interface was previously marked as a duplicate, then since + * we've now got a "new" address, it should no longer be considered a + * duplicate -- even if the "new" address is the same as the old one. + * Note that if all ipifs are down, we may have a pending ARP down + * event to handle. This is because we want to recover from duplicates + * and thus delay tearing down ARP until the duplicates have been + * removed or disabled. + */ + need_dl_down = need_arp_down = B_FALSE; + if (ipif->ipif_flags & IPIF_DUPLICATE) { + need_arp_down = !need_up; + ipif->ipif_flags &= ~IPIF_DUPLICATE; + if (--ill->ill_ipif_dup_count == 0 && !need_up && + ill->ill_ipif_up_count == 0 && ill->ill_dl_up) { + need_dl_down = B_TRUE; + } + } - if ((ipif->ipif_isv6) && IN6_IS_ADDR_6TO4(&v6addr) && - (!ipif->ipif_ill->ill_is_6to4tun)) { - queue_t *wqp = ipif->ipif_ill->ill_wq; + if (ipif->ipif_isv6 && IN6_IS_ADDR_6TO4(&v6addr) && + !ill->ill_is_6to4tun) { + queue_t *wqp = ill->ill_wq; /* * The local address of this interface is a 6to4 address, @@ -10719,7 +10790,7 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, if (wqp->q_next->q_qinfo->qi_minfo->mi_idnum == TUN6TO4_MODID) { /* set for use in IP */ - ipif->ipif_ill->ill_is_6to4tun = 1; + ill->ill_is_6to4tun = 1; break; } wqp = wqp->q_next; @@ -10728,7 +10799,7 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, } ipif_set_default(ipif); - mutex_exit(&ipif->ipif_ill->ill_lock); + mutex_exit(&ill->ill_lock); if (need_up) { /* @@ -10748,6 +10819,11 @@ ip_sioctl_addr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, sctp_update_ipif(ipif, SCTP_IPIF_UPDATE); } + if (need_dl_down) + ill_dl_down(ill); + if (need_arp_down) + ipif_arp_down(ipif); + return (err); } @@ -10872,9 +10948,17 @@ ip_sioctl_dstaddr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, in6_addr_t v6addr; ill_t *ill = ipif->ipif_ill; int err = 0; + boolean_t need_dl_down; + boolean_t need_arp_down; + + ip1dbg(("ip_sioctl_dstaddr_tail(%s:%u %p)\n", ill->ill_name, + ipif->ipif_id, (void *)ipif)); + + /* Must cancel any pending timer before taking the ill_lock */ + if (ipif->ipif_recovery_id != 0) + (void) untimeout(ipif->ipif_recovery_id); + ipif->ipif_recovery_id = 0; - ip1dbg(("ip_sioctl_dstaddr_tail(%s:%u %p)\n", - ipif->ipif_ill->ill_name, ipif->ipif_id, (void *)ipif)); if (ipif->ipif_isv6) { sin6_t *sin6; @@ -10898,7 +10982,24 @@ ip_sioctl_dstaddr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, ipif->ipif_flags |= IPIF_POINTOPOINT; ipif->ipif_flags &= ~IPIF_BROADCAST; if (ipif->ipif_isv6) - ipif->ipif_ill->ill_flags |= ILLF_NONUD; + ill->ill_flags |= ILLF_NONUD; + } + + /* + * If the interface was previously marked as a duplicate, then since + * we've now got a "new" address, it should no longer be considered a + * duplicate -- even if the "new" address is the same as the old one. + * Note that if all ipifs are down, we may have a pending ARP down + * event to handle. + */ + need_dl_down = need_arp_down = B_FALSE; + if (ipif->ipif_flags & IPIF_DUPLICATE) { + need_arp_down = !need_up; + ipif->ipif_flags &= ~IPIF_DUPLICATE; + if (--ill->ill_ipif_dup_count == 0 && !need_up && + ill->ill_ipif_up_count == 0 && ill->ill_dl_up) { + need_dl_down = B_TRUE; + } } /* Set the new address. */ @@ -10918,6 +11019,12 @@ ip_sioctl_dstaddr_tail(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, */ err = ipif_up(ipif, q, mp); } + + if (need_dl_down) + ill_dl_down(ill); + + if (need_arp_down) + ipif_arp_down(ipif); return (err); } @@ -12917,47 +13024,45 @@ void ipif_arp_down(ipif_t *ipif) { mblk_t *mp; + ill_t *ill = ipif->ipif_ill; - ip1dbg(("ipif_arp_down(%s:%u)\n", - ipif->ipif_ill->ill_name, ipif->ipif_id)); + ip1dbg(("ipif_arp_down(%s:%u)\n", ill->ill_name, ipif->ipif_id)); ASSERT(IAM_WRITER_IPIF(ipif)); /* Delete the mapping for the local address */ mp = ipif->ipif_arp_del_mp; if (mp != NULL) { - ip1dbg(("ipif_arp_down: %s (%u) for %s:%u\n", - dlpi_prim_str(*(int *)mp->b_rptr), *(int *)mp->b_rptr, - ipif->ipif_ill->ill_name, ipif->ipif_id)); - putnext(ipif->ipif_ill->ill_rq, mp); + ip1dbg(("ipif_arp_down: arp cmd %x for %s:%u\n", + *(unsigned *)mp->b_rptr, ill->ill_name, ipif->ipif_id)); + putnext(ill->ill_rq, mp); ipif->ipif_arp_del_mp = NULL; } /* - * If this is the last ipif that is going down, we need - * to clean up ARP completely. + * If this is the last ipif that is going down and there are no + * duplicate addresses we may yet attempt to re-probe, then we need to + * clean up ARP completely. */ - if (ipif->ipif_ill->ill_ipif_up_count == 0) { + if (ill->ill_ipif_up_count == 0 && ill->ill_ipif_dup_count == 0) { /* Send up AR_INTERFACE_DOWN message */ - mp = ipif->ipif_ill->ill_arp_down_mp; + mp = ill->ill_arp_down_mp; if (mp != NULL) { - ip1dbg(("ipif_arp_down: %s (%u) for %s:%u\n", - dlpi_prim_str(*(int *)mp->b_rptr), - *(int *)mp->b_rptr, ipif->ipif_ill->ill_name, + ip1dbg(("ipif_arp_down: arp cmd %x for %s:%u\n", + *(unsigned *)mp->b_rptr, ill->ill_name, ipif->ipif_id)); - putnext(ipif->ipif_ill->ill_rq, mp); - ipif->ipif_ill->ill_arp_down_mp = NULL; + putnext(ill->ill_rq, mp); + ill->ill_arp_down_mp = NULL; } /* Tell ARP to delete the multicast mappings */ - mp = ipif->ipif_ill->ill_arp_del_mapping_mp; + mp = ill->ill_arp_del_mapping_mp; if (mp != NULL) { - ip1dbg(("ipif_arp_down: %s (%u) for %s:%u\n", - dlpi_prim_str(*(int *)mp->b_rptr), - *(int *)mp->b_rptr, ipif->ipif_ill->ill_name, + ip1dbg(("ipif_arp_down: arp cmd %x for %s:%u\n", + *(unsigned *)mp->b_rptr, ill->ill_name, ipif->ipif_id)); - putnext(ipif->ipif_ill->ill_rq, mp); - ipif->ipif_ill->ill_arp_del_mapping_mp = NULL; + putnext(ill->ill_rq, mp); + ill->ill_arp_del_mapping_mp = NULL; } } } @@ -13000,9 +13105,8 @@ ipif_arp_setup_multicast(ipif_t *ipif, mblk_t **arp_add_mapping_mp) */ mp = ill->ill_arp_del_mapping_mp; if (mp != NULL) { - ip1dbg(("ipif_arp_down: %s (%u) for %s:%u\n", - dlpi_prim_str(*(int *)mp->b_rptr), - *(int *)mp->b_rptr, ill->ill_name, ipif->ipif_id)); + ip1dbg(("ipif_arp_down: arp cmd %x for %s:%u\n", + *(unsigned *)mp->b_rptr, ill->ill_name, ipif->ipif_id)); putnext(ill->ill_rq, mp); ill->ill_arp_del_mapping_mp = NULL; } @@ -13077,6 +13181,7 @@ ipif_arp_setup_multicast(ipif_t *ipif, mblk_t **arp_add_mapping_mp) return (0); } ASSERT(add_mp != NULL && del_mp != NULL); + ASSERT(ill->ill_arp_del_mapping_mp == NULL); ill->ill_arp_del_mapping_mp = del_mp; if (arp_add_mapping_mp != NULL) { /* The caller just wants the mblks allocated */ @@ -13095,15 +13200,18 @@ ipif_arp_setup_multicast(ipif_t *ipif, mblk_t **arp_add_mapping_mp) * though it only sets up the resolver for v6 * if it's an xresolv interface (one using an external resolver). * Honors ILLF_NOARP. - * The boolean value arp_just_publish, if B_TRUE, indicates that - * it only needs to send an AR_ENTRY_ADD message up to ARP for - * IPv4 interfaces. Currently, B_TRUE is only set when this - * function is called by ip_rput_dlpi_writer() to handle - * asynchronous hardware address change notification. + * The enumerated value res_act is used to tune the behavior. + * If set to Res_act_initial, then we set up all the resolver + * structures for a new interface. If set to Res_act_move, then + * we just send an AR_ENTRY_ADD message up to ARP for IPv4 + * interfaces; this is called by ip_rput_dlpi_writer() to handle + * asynchronous hardware address change notification. If set to + * Res_act_defend, then we tell ARP that it needs to send a single + * gratuitous message in defense of the address. * Returns error on failure. */ int -ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish) +ipif_resolver_up(ipif_t *ipif, enum ip_resolver_action res_act) { caddr_t addr; mblk_t *arp_up_mp = NULL; @@ -13116,22 +13224,43 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish) uchar_t *area_p = NULL; uchar_t *ared_p = NULL; int err = ENOMEM; + boolean_t was_dup; ip1dbg(("ipif_resolver_up(%s:%u) flags 0x%x\n", - ipif->ipif_ill->ill_name, ipif->ipif_id, - (uint_t)ipif->ipif_flags)); + ill->ill_name, ipif->ipif_id, (uint_t)ipif->ipif_flags)); ASSERT(IAM_WRITER_IPIF(ipif)); - if ((ill->ill_net_type != IRE_IF_RESOLVER) || - (ill->ill_isv6 && !(ill->ill_flags & ILLF_XRESOLV))) { + was_dup = B_FALSE; + if (res_act == Res_act_initial) { + ipif->ipif_addr_ready = 0; + /* + * We're bringing an interface up here. There's no way that we + * should need to shut down ARP now. + */ + mutex_enter(&ill->ill_lock); + if (ipif->ipif_flags & IPIF_DUPLICATE) { + ipif->ipif_flags &= ~IPIF_DUPLICATE; + ill->ill_ipif_dup_count--; + was_dup = B_TRUE; + } + mutex_exit(&ill->ill_lock); + } + if (ipif->ipif_recovery_id != 0) + (void) untimeout(ipif->ipif_recovery_id); + ipif->ipif_recovery_id = 0; + if (ill->ill_net_type != IRE_IF_RESOLVER) { + ipif->ipif_addr_ready = 1; return (0); } + /* NDP will set the ipif_addr_ready flag when it's ready */ + if (ill->ill_isv6 && !(ill->ill_flags & ILLF_XRESOLV)) + return (0); if (ill->ill_isv6) { /* * External resolver for IPv6 */ - ASSERT(!arp_just_publish); + ASSERT(res_act == Res_act_initial); if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) { addr = (caddr_t)&ipif->ipif_v6lcl_addr; area_p = (uchar_t *)&ip6_area_template; @@ -13149,7 +13278,8 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish) err = EINVAL; goto failed; } else { - if (ill->ill_ipif_up_count == 0) + if (ill->ill_ipif_up_count == 0 && + ill->ill_ipif_dup_count == 0 && !was_dup) ill->ill_arp_bringup_pending = 1; mutex_exit(&ill->ill_lock); } @@ -13164,17 +13294,19 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish) * Add an entry for the local address in ARP only if it * is not UNNUMBERED and the address is not INADDR_ANY. */ - if (((ipif->ipif_flags & IPIF_UNNUMBERED) == 0) && area_p != NULL) { + if (!(ipif->ipif_flags & IPIF_UNNUMBERED) && area_p != NULL) { + area_t *area; + /* Now ask ARP to publish our address. */ arp_add_mp = ill_arp_alloc(ill, area_p, addr); if (arp_add_mp == NULL) goto failed; - if (arp_just_publish) { + area = (area_t *)arp_add_mp->b_rptr; + if (res_act != Res_act_initial) { /* * Copy the new hardware address and length into * arp_add_mp to be sent to ARP. */ - area_t *area = (area_t *)arp_add_mp->b_rptr; area->area_hw_addr_length = ill->ill_phys_addr_length; bcopy((char *)ill->ill_phys_addr, @@ -13182,10 +13314,20 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish) area->area_hw_addr_length); } - ((area_t *)arp_add_mp->b_rptr)->area_flags = - ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MYADDR; + area->area_flags = ACE_F_PERMANENT | ACE_F_PUBLISH | + ACE_F_MYADDR; + + if (res_act == Res_act_defend) { + area->area_flags |= ACE_F_DEFEND; + /* + * If we're just defending our address now, then + * there's no need to set up ARP multicast mappings. + * The publish command is enough. + */ + goto done; + } - if (arp_just_publish) + if (res_act != Res_act_initial) goto arp_setup_multicast; /* @@ -13197,15 +13339,17 @@ ipif_resolver_up(ipif_t *ipif, boolean_t arp_just_publish) goto failed; } else { - if (arp_just_publish) + if (res_act != Res_act_initial) goto done; } /* * Need to bring up ARP or setup multicast mapping only * when the first interface is coming UP. */ - if (ill->ill_ipif_up_count != 0) + if (ill->ill_ipif_up_count != 0 || ill->ill_ipif_dup_count != 0 || + was_dup) { goto done; + } /* * Allocate an ARP down message (to be saved) and an ARP up @@ -13236,7 +13380,7 @@ arp_setup_multicast: ASSERT(arp_add_mapping_mp != NULL); } -done:; +done: if (arp_del_mp != NULL) { ASSERT(ipif->ipif_arp_del_mp == NULL); ipif->ipif_arp_del_mp = arp_del_mp; @@ -13251,41 +13395,48 @@ done:; } if (arp_up_mp != NULL) { ip1dbg(("ipif_resolver_up: ARP_UP for %s:%u\n", - ipif->ipif_ill->ill_name, ipif->ipif_id)); + ill->ill_name, ipif->ipif_id)); putnext(ill->ill_rq, arp_up_mp); } if (arp_add_mp != NULL) { ip1dbg(("ipif_resolver_up: ARP_ADD for %s:%u\n", - ipif->ipif_ill->ill_name, ipif->ipif_id)); + ill->ill_name, ipif->ipif_id)); + /* + * If it's an extended ARP implementation, then we'll wait to + * hear that DAD has finished before using the interface. + */ + if (!ill->ill_arp_extend) + ipif->ipif_addr_ready = 1; putnext(ill->ill_rq, arp_add_mp); + } else { + ipif->ipif_addr_ready = 1; } if (arp_add_mapping_mp != NULL) { ip1dbg(("ipif_resolver_up: MAPPING_ADD for %s:%u\n", - ipif->ipif_ill->ill_name, ipif->ipif_id)); + ill->ill_name, ipif->ipif_id)); putnext(ill->ill_rq, arp_add_mapping_mp); } - if (arp_just_publish) + if (res_act != Res_act_initial) return (0); if (ill->ill_flags & ILLF_NOARP) err = ill_arp_off(ill); else err = ill_arp_on(ill); - if (err) { + if (err != 0) { ip0dbg(("ipif_resolver_up: arp_on/off failed %d\n", err)); freemsg(ipif->ipif_arp_del_mp); - if (arp_down_mp != NULL) - freemsg(ill->ill_arp_down_mp); - if (ill->ill_arp_del_mapping_mp != NULL) - freemsg(ill->ill_arp_del_mapping_mp); + freemsg(ill->ill_arp_down_mp); + freemsg(ill->ill_arp_del_mapping_mp); ipif->ipif_arp_del_mp = NULL; ill->ill_arp_down_mp = NULL; ill->ill_arp_del_mapping_mp = NULL; return (err); } - return (ill->ill_ipif_up_count != 0 ? 0 : EINPROGRESS); + return ((ill->ill_ipif_up_count != 0 || was_dup || + ill->ill_ipif_dup_count != 0) ? 0 : EINPROGRESS); -failed:; +failed: ip1dbg(("ipif_resolver_up: FAILED\n")); freemsg(arp_add_mp); freemsg(arp_del_mp); @@ -13297,6 +13448,143 @@ failed:; } /* + * This routine restarts IPv4 duplicate address detection (DAD) when a link has + * just gone back up. + */ +static void +ipif_arp_start_dad(ipif_t *ipif) +{ + ill_t *ill = ipif->ipif_ill; + mblk_t *arp_add_mp; + area_t *area; + + if (ill->ill_net_type != IRE_IF_RESOLVER || ill->ill_arp_closing || + (ipif->ipif_flags & IPIF_UNNUMBERED) || + ipif->ipif_lcl_addr == INADDR_ANY || + (arp_add_mp = ill_arp_alloc(ill, (uchar_t *)&ip_area_template, + (char *)&ipif->ipif_lcl_addr)) == NULL) { + /* + * If we can't contact ARP for some reason, that's not really a + * problem. Just send out the routing socket notification that + * DAD completion would have done, and continue. + */ + ipif_mask_reply(ipif); + ip_rts_ifmsg(ipif); + ip_rts_newaddrmsg(RTM_ADD, 0, ipif); + sctp_update_ipif(ipif, SCTP_IPIF_UP); + ipif->ipif_addr_ready = 1; + return; + } + + /* Setting the 'unverified' flag restarts DAD */ + area = (area_t *)arp_add_mp->b_rptr; + area->area_flags = ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MYADDR | + ACE_F_UNVERIFIED; + putnext(ill->ill_rq, arp_add_mp); +} + +static void +ipif_ndp_start_dad(ipif_t *ipif) +{ + nce_t *nce; + + nce = ndp_lookup_v6(ipif->ipif_ill, &ipif->ipif_v6lcl_addr, B_FALSE); + if (nce == NULL) + return; + + if (!ndp_restart_dad(nce)) { + /* + * If we can't restart DAD for some reason, that's not really a + * problem. Just send out the routing socket notification that + * DAD completion would have done, and continue. + */ + ip_rts_ifmsg(ipif); + ip_rts_newaddrmsg(RTM_ADD, 0, ipif); + sctp_update_ipif(ipif, SCTP_IPIF_UP); + ipif->ipif_addr_ready = 1; + } + NCE_REFRELE(nce); +} + +/* + * Restart duplicate address detection on all interfaces on the given ill. + * + * This is called when an interface transitions from down to up + * (DL_NOTE_LINK_UP) or up to down (DL_NOTE_LINK_DOWN). + * + * Note that since the underlying physical link has transitioned, we must cause + * at least one routing socket message to be sent here, either via DAD + * completion or just by default on the first ipif. (If we don't do this, then + * in.mpathd will see long delays when doing link-based failure recovery.) + */ +void +ill_restart_dad(ill_t *ill, boolean_t went_up) +{ + ipif_t *ipif; + + if (ill == NULL) + return; + + /* + * If layer two doesn't support duplicate address detection, then just + * send the routing socket message now and be done with it. + */ + if ((ill->ill_isv6 && (ill->ill_flags & ILLF_XRESOLV)) || + (!ill->ill_isv6 && !ill->ill_arp_extend)) { + ip_rts_ifmsg(ill->ill_ipif); + return; + } + + for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { + if (went_up) { + if (ipif->ipif_flags & IPIF_UP) { + if (ill->ill_isv6) + ipif_ndp_start_dad(ipif); + else + ipif_arp_start_dad(ipif); + } else if (ill->ill_isv6 && + (ipif->ipif_flags & IPIF_DUPLICATE)) { + /* + * For IPv4, the ARP module itself will + * automatically start the DAD process when it + * sees DL_NOTE_LINK_UP. We respond to the + * AR_CN_READY at the completion of that task. + * For IPv6, we must kick off the bring-up + * process now. + */ + ndp_do_recovery(ipif); + } else { + /* + * Unfortunately, the first ipif is "special" + * and represents the underlying ill in the + * routing socket messages. Thus, when this + * one ipif is down, we must still notify so + * that the user knows the IFF_RUNNING status + * change. (If the first ipif is up, then + * we'll handle eventual routing socket + * notification via DAD completion.) + */ + if (ipif == ill->ill_ipif) + ip_rts_ifmsg(ill->ill_ipif); + } + } else { + /* + * After link down, we'll need to send a new routing + * message when the link comes back, so clear + * ipif_addr_ready. + */ + ipif->ipif_addr_ready = 0; + } + } + + /* + * If we've torn down links, then notify the user right away. + */ + if (!went_up) + ip_rts_ifmsg(ill->ill_ipif); +} + +/* * Wakeup all threads waiting to enter the ipsq, and sleeping * on any of the ills in this ipsq. The ill_lock of the ill * must be held so that waiters don't miss wakeups @@ -13716,6 +14004,7 @@ ill_down_ipifs(ill_t *ill, mblk_t *mp, int index, boolean_t chk_nofailover) if (!ipif->ipif_isv6) ipif_check_bcast_ires(ipif); (void) ipif_logical_down(ipif, NULL, NULL); + ipif_non_duplicate(ipif); ipif_down_tail(ipif); /* * We don't do ipif_multicast_down for IPv4 in @@ -16658,7 +16947,7 @@ ipif_move(ipif_t *ipif, ill_t *to_ill, queue_t *q, mblk_t *mp, * move. */ rep_ipif->ipif_flags = ipif->ipif_flags | IPIF_NOFAILOVER; - rep_ipif->ipif_flags &= ~IPIF_UP; + rep_ipif->ipif_flags &= ~IPIF_UP & ~IPIF_DUPLICATE; rep_ipif->ipif_replace_zero = B_TRUE; mutex_init(&rep_ipif->ipif_saved_ire_lock, NULL, MUTEX_DEFAULT, NULL); @@ -17796,7 +18085,7 @@ ipif_down(ipif_t *ipif, queue_t *q, mblk_t *mp) return (EINPROGRESS); } -static void +void ipif_down_tail(ipif_t *ipif) { ill_t *ill = ipif->ipif_ill; @@ -17809,11 +18098,10 @@ ipif_down_tail(ipif_t *ipif) * there are other logical units that are up. * This occurs e.g. when we change a "significant" IFF_ flag. */ - if (ipif->ipif_ill->ill_wq != NULL) { - if (!ill->ill_logical_down && (ill->ill_ipif_up_count == 0) && - ill->ill_dl_up) { - ill_dl_down(ill); - } + if (ill->ill_wq != NULL && !ill->ill_logical_down && + ill->ill_ipif_up_count == 0 && ill->ill_ipif_dup_count == 0 && + ill->ill_dl_up) { + ill_dl_down(ill); } ill->ill_logical_down = 0; @@ -17821,7 +18109,7 @@ ipif_down_tail(ipif_t *ipif) * Have to be after removing the routes in ipif_down_delete_ire. */ if (ipif->ipif_isv6) { - if (ipif->ipif_ill->ill_flags & ILLF_XRESOLV) + if (ill->ill_flags & ILLF_XRESOLV) ipif_arp_down(ipif); } else { ipif_arp_down(ipif); @@ -18048,6 +18336,10 @@ ipif_free(ipif_t *ipif) { ASSERT(IAM_WRITER_IPIF(ipif)); + if (ipif->ipif_recovery_id != 0) + (void) untimeout(ipif->ipif_recovery_id); + ipif->ipif_recovery_id = 0; + /* Remove conn references */ reset_conn_ipif(ipif); @@ -18127,6 +18419,9 @@ ipif_free_tail(ipif_t *ipif) rw_exit(&ill_g_lock); mutex_destroy(&ipif->ipif_saved_ire_lock); + + ASSERT(!(ipif->ipif_flags & (IPIF_UP | IPIF_DUPLICATE))); + /* Free the memory. */ mi_free((char *)ipif); } @@ -18344,7 +18639,7 @@ ipif_lookup_on_name(char *name, size_t namelen, boolean_t do_alloc, * but might not make the system manager very popular. (May be called * as writer.) */ -static void +void ipif_mask_reply(ipif_t *ipif) { icmph_t *icmph; @@ -18900,13 +19195,14 @@ ipif_up(ipif_t *ipif, queue_t *q, mblk_t *mp) err = ipif_ndp_up(ipif, &ipif->ipif_v6lcl_addr, B_FALSE); if (err != 0) { - mp = ipsq_pending_mp_get(ipsq, &connp); + if (err != EINPROGRESS) + mp = ipsq_pending_mp_get(ipsq, &connp); return (err); } } /* Now, ARP */ - if ((err = ipif_resolver_up(ipif, B_FALSE)) == - EINPROGRESS) { + err = ipif_resolver_up(ipif, Res_act_initial); + if (err == EINPROGRESS) { /* We will complete it in ip_arp_done */ return (err); } @@ -19455,7 +19751,6 @@ ipif_up_done(ipif_t *ipif) } - /* This is the first interface on this ill */ if (ipif->ipif_ipif_up_count == 1 && !loopback) { /* @@ -19496,14 +19791,7 @@ ipif_up_done(ipif_t *ipif) } } - /* - * This had to be deferred until we had bound. - * tell routing sockets that this interface is up - */ - ip_rts_ifmsg(ipif); - ip_rts_newaddrmsg(RTM_ADD, 0, ipif); - - if (!loopback) { + if (!loopback && ipif->ipif_addr_ready) { /* Broadcast an address mask reply. */ ipif_mask_reply(ipif); } @@ -19513,8 +19801,19 @@ ipif_up_done(ipif_t *ipif) } if (src_ipif_held) ipif_refrele(src_ipif); - /* Let SCTP update the status for this ipif */ - sctp_update_ipif(ipif, SCTP_IPIF_UP); + + /* + * This had to be deferred until we had bound. Tell routing sockets and + * others that this interface is up if it looks like the address has + * been validated. Otherwise, if it isn't ready yet, wait for + * duplicate address detection to do its thing. + */ + if (ipif->ipif_addr_ready) { + ip_rts_ifmsg(ipif); + ip_rts_newaddrmsg(RTM_ADD, 0, ipif); + /* Let SCTP update the status for this ipif */ + sctp_update_ipif(ipif, SCTP_IPIF_UP); + } return (0); bad: @@ -19919,7 +20218,8 @@ retry: /* Always skip NOLOCAL and ANYCAST interfaces */ if (ipif->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) continue; - if (!(ipif->ipif_flags & IPIF_UP)) + if (!(ipif->ipif_flags & IPIF_UP) || + !ipif->ipif_addr_ready) continue; if (ipif->ipif_zoneid != zoneid && ipif->ipif_zoneid != ALL_ZONES) @@ -20700,7 +21000,8 @@ ip_sioctl_slifname(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp, * This ill has not been inserted into the global list. * So we are still single threaded and don't need any lock */ - ipif->ipif_flags = lifr->lifr_flags & IFF_LOGINT_FLAGS; + ipif->ipif_flags = lifr->lifr_flags & IFF_LOGINT_FLAGS & + ~IFF_DUPLICATE; ill->ill_flags = lifr->lifr_flags & IFF_PHYINTINST_FLAGS; ill->ill_phyint->phyint_flags = lifr->lifr_flags & IFF_PHYINT_FLAGS; diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c index efdb39b657..2b40b14d08 100644 --- a/usr/src/uts/common/inet/ip/ip_ndp.c +++ b/usr/src/uts/common/inet/ip/ip_ndp.c @@ -28,18 +28,23 @@ #include <sys/types.h> #include <sys/stream.h> #include <sys/stropts.h> +#include <sys/strsun.h> #include <sys/sysmacros.h> #include <sys/errno.h> #include <sys/dlpi.h> #include <sys/socket.h> #include <sys/ddi.h> +#include <sys/sunddi.h> #include <sys/cmn_err.h> #include <sys/debug.h> #include <sys/vtrace.h> #include <sys/kmem.h> #include <sys/zone.h> +#include <sys/ethernet.h> +#include <sys/sdt.h> #include <net/if.h> +#include <net/if_types.h> #include <net/if_dl.h> #include <net/route.h> #include <netinet/in.h> @@ -58,13 +63,22 @@ #include <inet/ip_ndp.h> #include <inet/ipsec_impl.h> #include <inet/ipsec_info.h> +#include <inet/sctp_ip.h> /* * Function names with nce_ prefix are static while function * names with ndp_ prefix are used by rest of the IP. + * + * Lock ordering: + * + * ndp_g_lock -> ill_lock -> nce_lock + * + * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and + * nce_next. Nce_lock protects the contents of the NCE (particularly + * nce_refcnt). */ -static boolean_t nce_cmp_ll_addr(nce_t *nce, char *new_ll_addr, +static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, uint32_t ll_addr_len); static void nce_fastpath(nce_t *nce); static void nce_ire_delete(nce_t *nce); @@ -84,7 +98,6 @@ static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); static boolean_t nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, const in6_addr_t *target, int flag); -static void lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf); extern void th_trace_rrecord(th_trace_t *); static int ndp_lookup_then_add_v6(ill_t *, uchar_t *, const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, @@ -131,6 +144,9 @@ ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, return (status); } +/* Non-tunable probe interval, based on link capabilities */ +#define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) + /* * NDP Cache Entry creation routine. * Mapped entries will never do NUD . @@ -148,6 +164,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, mblk_t *mp; mblk_t *template; nce_t **ncep; + int err; boolean_t dropped = B_FALSE; ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); @@ -237,6 +254,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, if (ill->ill_state_flags & ILL_CONDEMNED) { mutex_exit(&ill->ill_lock); freeb(mp); + freeb(template); return (EINVAL); } if ((nce->nce_next = *ncep) != NULL) @@ -251,13 +269,23 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, ill->ill_nce_cnt++; mutex_exit(&ill->ill_lock); - /* - * Before we insert the nce, honor the UNSOL_ADV flag. - * We cannot hold the ndp_g_lock and call nce_xmit - * which does a putnext. - */ - if (flags & NCE_F_UNSOL_ADV) { - flags |= NDP_ORIDE; + err = 0; + if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { + mutex_enter(&nce->nce_lock); + mutex_exit(&ndp6.ndp_g_lock); + nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; + mutex_exit(&nce->nce_lock); + dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, + &ipv6_all_zeros, addr, NDP_PROBE); + if (dropped) { + mutex_enter(&nce->nce_lock); + nce->nce_pcnt++; + mutex_exit(&nce->nce_lock); + } + NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); + mutex_enter(&ndp6.ndp_g_lock); + err = EINPROGRESS; + } else if (flags & NCE_F_UNSOL_ADV) { /* * We account for the transmit below by assigning one * less than the ndd variable. Subsequent decrements @@ -273,7 +301,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, B_TRUE, /* use ill_nd_lla */ addr, /* Source and target of the advertisement pkt */ &ipv6_all_hosts_mcast, /* Destination of the packet */ - flags); + NDP_ORIDE); mutex_enter(&nce->nce_lock); if (dropped) nce->nce_unsolicit_count++; @@ -292,7 +320,7 @@ ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, */ if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) nce_fastpath(nce); - return (0); + return (err); } int @@ -609,6 +637,41 @@ nce_ire_delete1(ire_t *ire, char *nce_arg) } /* + * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. + */ +boolean_t +ndp_restart_dad(nce_t *nce) +{ + boolean_t started; + boolean_t dropped; + + if (nce == NULL) + return (B_FALSE); + mutex_enter(&nce->nce_lock); + if (nce->nce_state == ND_PROBE) { + mutex_exit(&nce->nce_lock); + started = B_TRUE; + } else if (nce->nce_state == ND_REACHABLE) { + nce->nce_state = ND_PROBE; + nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; + mutex_exit(&nce->nce_lock); + dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, + B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); + if (dropped) { + mutex_enter(&nce->nce_lock); + nce->nce_pcnt++; + mutex_exit(&nce->nce_lock); + } + NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); + started = B_TRUE; + } else { + mutex_exit(&nce->nce_lock); + started = B_FALSE; + } + return (started); +} + +/* * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. * If one is found, the refcnt on the nce will be incremented. */ @@ -804,7 +867,7 @@ ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) } return; } - ll_changed = nce_cmp_ll_addr(nce, (char *)hw_addr, hw_addr_len); + ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); if (!is_adv) { /* If this is a SOLICITATION request only */ if (ll_changed) @@ -1381,11 +1444,16 @@ nce_solicit(nce_t *nce, mblk_t *mp) if (ipif != NULL) break; } - if (src_ill == NULL) { - /* May be a forwarding packet */ - src_ill = ill; + /* + * If no relevant ipif can be found, then it's not one of our + * addresses. Reset to :: and let nce_xmit. If an ipif can be + * found, but it's not yet done with DAD verification, then + * just postpone this transmission until later. + */ + if (src_ill == NULL) src = ipv6_all_zeros; - } + else if (!ipif->ipif_addr_ready) + return (ill->ill_reachable_retrans_time); } dst = nce->nce_addr; /* @@ -1394,7 +1462,7 @@ nce_solicit(nce_t *nce, mblk_t *mp) * appropriately. */ if (IN6_IS_ADDR_UNSPECIFIED(&src)) - src_ill = NULL; + src_ill = NULL; nce->nce_rcnt--; mutex_exit(&nce->nce_lock); rw_exit(&ill_g_lock); @@ -1407,8 +1475,350 @@ nce_solicit(nce_t *nce, mblk_t *mp) return (ill->ill_reachable_retrans_time); } +/* + * Attempt to recover an address on an interface that's been marked as a + * duplicate. Because NCEs are destroyed when the interface goes down, there's + * no easy way to just probe the address and have the right thing happen if + * it's no longer in use. Instead, we just bring it up normally and allow the + * regular interface start-up logic to probe for a remaining duplicate and take + * us back down if necessary. + * Neither DHCP nor temporary addresses arrive here; they're excluded by + * ip_ndp_excl. + */ +/* ARGSUSED */ +static void +ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) +{ + ill_t *ill = rq->q_ptr; + ipif_t *ipif; + in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; + + for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { + /* + * We do not support recovery of proxy ARP'd interfaces, + * because the system lacks a complete proxy ARP mechanism. + */ + if ((ipif->ipif_flags & IPIF_POINTOPOINT) || + !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { + continue; + } + + /* + * If we have already recovered, then ignore. + */ + mutex_enter(&ill->ill_lock); + if (!(ipif->ipif_flags & IPIF_DUPLICATE)) { + mutex_exit(&ill->ill_lock); + continue; + } + + ipif->ipif_flags &= ~IPIF_DUPLICATE; + ill->ill_ipif_dup_count--; + mutex_exit(&ill->ill_lock); + ipif->ipif_was_dup = B_TRUE; + + if (ipif_ndp_up(ipif, addr, B_FALSE) != EINPROGRESS) + (void) ipif_up_done_v6(ipif); + } + freeb(mp); +} + +/* + * Attempt to recover an IPv6 interface that's been shut down as a duplicate. + * As long as someone else holds the address, the interface will stay down. + * When that conflict goes away, the interface is brought back up. This is + * done so that accidental shutdowns of addresses aren't made permanent. Your + * server will recover from a failure. + * + * For DHCP and temporary addresses, recovery is not done in the kernel. + * Instead, it's handled by user space processes (dhcpagent and in.ndpd). + * + * This function is entered on a timer expiry; the ID is in ipif_recovery_id. + */ +static void +ipif6_dup_recovery(void *arg) +{ + ipif_t *ipif = arg; + + ipif->ipif_recovery_id = 0; + if (!(ipif->ipif_flags & IPIF_DUPLICATE)) + return; + + /* If the link is down, we'll retry this later */ + if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) + return; + + ndp_do_recovery(ipif); +} + +/* + * Perform interface recovery by forcing the duplicate interfaces up and + * allowing the system to determine which ones should stay up. + * + * Called both by recovery timer expiry and link-up notification. + */ void -ndp_input_solicit(ill_t *ill, mblk_t *mp) +ndp_do_recovery(ipif_t *ipif) +{ + ill_t *ill = ipif->ipif_ill; + mblk_t *mp; + + mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); + if (mp == NULL) { + ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, + ipif, MSEC_TO_TICK(ip_dup_recovery)); + } else { + bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, + sizeof (ipif->ipif_v6lcl_addr)); + ill_refhold(ill); + (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, ip_ndp_recover, + CUR_OP, B_FALSE); + } +} + +/* + * Find the solicitation in the given message, and extract printable details + * (MAC and IP addresses) from it. + */ +static nd_neighbor_solicit_t * +ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, + size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) +{ + nd_neighbor_solicit_t *ns; + ip6_t *ip6h; + uchar_t *addr; + int alen; + + alen = 0; + ip6h = (ip6_t *)mp->b_rptr; + if (dl_mp == NULL) { + nd_opt_hdr_t *opt; + int nslen; + + /* + * If it's from the fast-path, then it can't be a probe + * message, and thus must include the source linkaddr option. + * Extract that here. + */ + ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); + nslen = mp->b_wptr - (uchar_t *)ns; + if ((nslen -= sizeof (*ns)) > 0) { + opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, + ND_OPT_SOURCE_LINKADDR); + if (opt != NULL && + opt->nd_opt_len * 8 - sizeof (*opt) >= + ill->ill_nd_lla_len) { + addr = (uchar_t *)(opt + 1); + alen = ill->ill_nd_lla_len; + } + } + /* + * We cheat a bit here for the sake of printing usable log + * messages in the rare case where the reply we got was unicast + * without a source linkaddr option, and the interface is in + * fastpath mode. (Sigh.) + */ + if (alen == 0 && ill->ill_type == IFT_ETHER && + MBLKHEAD(mp) >= sizeof (struct ether_header)) { + struct ether_header *pether; + + pether = (struct ether_header *)((char *)ip6h - + sizeof (*pether)); + addr = pether->ether_shost.ether_addr_octet; + alen = ETHERADDRL; + } + } else { + dl_unitdata_ind_t *dlu; + + dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; + alen = dlu->dl_src_addr_length; + if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && + dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { + addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; + if (ill->ill_sap_length < 0) { + alen += ill->ill_sap_length; + } else { + addr += ill->ill_sap_length; + alen -= ill->ill_sap_length; + } + } + } + if (alen > 0) { + *haddr = addr; + (void) mac_colon_addr(addr, alen, hbuf, hlen); + } else { + *haddr = NULL; + (void) strcpy(hbuf, "?"); + } + ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); + (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); + return (ns); +} + +/* + * This is for exclusive changes due to NDP duplicate address detection + * failure. + */ +/* ARGSUSED */ +static void +ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) +{ + ill_t *ill = rq->q_ptr; + ipif_t *ipif; + char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ + char hbuf[MAC_STR_LEN]; + char sbuf[INET6_ADDRSTRLEN]; + nd_neighbor_solicit_t *ns; + mblk_t *dl_mp = NULL; + uchar_t *haddr; + + if (DB_TYPE(mp) != M_DATA) { + dl_mp = mp; + mp = mp->b_cont; + } + ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, + sizeof (sbuf), &haddr); + if (haddr != NULL && + bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { + /* + * Ignore conflicts generated by misbehaving switches that just + * reflect our own messages back to us. + */ + goto ignore_conflict; + } + (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf)); + for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { + + if ((ipif->ipif_flags & IPIF_POINTOPOINT) || + !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, + &ns->nd_ns_target)) { + continue; + } + + /* If it's already marked, then don't do anything. */ + if (ipif->ipif_flags & IPIF_DUPLICATE) + continue; + + /* + * If this is a failure during duplicate recovery, then don't + * complain. It may take a long time to recover. + */ + if (!ipif->ipif_was_dup) { + if (ipif->ipif_id != 0) { + (void) snprintf(ibuf + ill->ill_name_length - 1, + sizeof (ibuf) - ill->ill_name_length + 1, + ":%d", ipif->ipif_id); + } + cmn_err(CE_WARN, "%s has duplicate address %s (in " + "use by %s); disabled", ibuf, sbuf, hbuf); + } + mutex_enter(&ill->ill_lock); + ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); + ipif->ipif_flags |= IPIF_DUPLICATE; + ill->ill_ipif_dup_count++; + mutex_exit(&ill->ill_lock); + (void) ipif_down(ipif, NULL, NULL); + ipif_down_tail(ipif); + if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && + ill->ill_net_type == IRE_IF_RESOLVER && + ip_dup_recovery > 0) + ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, + ipif, MSEC_TO_TICK(ip_dup_recovery)); + } +ignore_conflict: + if (dl_mp != NULL) + freeb(dl_mp); + freemsg(mp); +} + +/* + * Handle failure by tearing down the ipifs with the specified address. Note + * that tearing down the ipif also means deleting the nce through ipif_down, so + * it's not possible to do recovery by just restarting the nce timer. Instead, + * we start a timer on the ipif. + */ +static void +ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) +{ + if ((mp = copymsg(mp)) != NULL) { + if (dl_mp == NULL) + dl_mp = mp; + else if ((dl_mp = copyb(dl_mp)) != NULL) + dl_mp->b_cont = mp; + if (dl_mp == NULL) { + freemsg(mp); + } else { + ill_refhold(ill); + (void) qwriter_ip(NULL, ill, ill->ill_rq, dl_mp, + ip_ndp_excl, CUR_OP, B_FALSE); + } + } + ndp_delete(nce); +} + +/* + * Handle a discovered conflict: some other system is advertising that it owns + * one of our IP addresses. We need to defend ourselves, or just shut down the + * interface. + */ +static void +ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) +{ + ipif_t *ipif; + uint32_t now; + uint_t maxdefense; + uint_t defs; + + ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, + NULL, NULL); + if (ipif == NULL) + return; + /* + * First, figure out if this address is disposable. + */ + if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) + maxdefense = ip_max_temp_defend; + else + maxdefense = ip_max_defend; + + /* + * Now figure out how many times we've defended ourselves. Ignore + * defenses that happened long in the past. + */ + now = gethrestime_sec(); + mutex_enter(&nce->nce_lock); + if ((defs = nce->nce_defense_count) > 0 && + now - nce->nce_defense_time > ip_defend_interval) { + nce->nce_defense_count = defs = 0; + } + nce->nce_defense_count++; + nce->nce_defense_time = now; + mutex_exit(&nce->nce_lock); + ipif_refrele(ipif); + + /* + * If we've defended ourselves too many times already, then give up and + * tear down the interface(s) using this address. Otherwise, defend by + * sending out an unsolicited Neighbor Advertisement. + */ + if (defs >= maxdefense) { + ip_ndp_failure(ill, mp, dl_mp, nce); + } else { + char hbuf[MAC_STR_LEN]; + char sbuf[INET6_ADDRSTRLEN]; + uchar_t *haddr; + + (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, + sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); + cmn_err(CE_WARN, "node %s is using our IP address %s on %s", + hbuf, sbuf, ill->ill_name); + (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, + &nce->nce_addr, &ipv6_all_hosts_mcast, NDP_ORIDE); + } +} + +static void +ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) { nd_neighbor_solicit_t *ns; uint32_t hlen = ill->ill_nd_lla_len; @@ -1485,53 +1895,16 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp) if (opt != NULL) { opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); if (opt != NULL) { - /* - * No source link layer address option should - * be present in a valid DAD request. - */ - if (IN6_IS_ADDR_UNSPECIFIED(&src)) { - ip1dbg(("ndp_input_solicit: source link-layer " - "address option present with an " - "unspecified source. \n")); - bad_solicit = B_TRUE; - goto done; - } haddr = (uchar_t *)&opt[1]; - if (hlen > opt->nd_opt_len * 8 || + if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || hlen == 0) { + ip1dbg(("ndp_input_advert: bad SLLA\n")); bad_solicit = B_TRUE; goto done; } } } - /* - * haddr can be NULL if no options are present, - * or no Source link layer address is present in, - * recvd NDP options of solicitation message. - */ - if (haddr == NULL) { - nce_t *nnce; - mutex_enter(&ndp6.ndp_g_lock); - nnce = *((nce_t **)NCE_HASH_PTR_V6(src)); - nnce = nce_lookup_addr(ill, &src, nnce); - mutex_exit(&ndp6.ndp_g_lock); - if (nnce == NULL) { - in6_addr_t dst = ipv6_solicited_node_mcast; - - /* Form solicited node multicast address */ - dst.s6_addr32[3] |= src.s6_addr32[3]; - (void) nce_xmit(ill, - ND_NEIGHBOR_SOLICIT, - ill, - B_TRUE, - &target, - &dst, - flag); - bad_solicit = B_TRUE; - goto done; - } - } /* Set override flag, it will be reset later if need be. */ flag |= NDP_ORIDE; if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { @@ -1544,10 +1917,39 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp) * the source is unspecified address. */ if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { - int err = 0; + int err; nce_t *nnce; ASSERT(ill->ill_isv6); + /* + * Regular solicitations *must* include the Source Link-Layer + * Address option. Ignore messages that do not. + */ + if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { + ip1dbg(("ndp_input_solicit: source link-layer address " + "option missing with a specified source.\n")); + bad_solicit = B_TRUE; + goto done; + } + + /* + * This is a regular solicitation. If we're still in the + * process of verifying the address, then don't respond at all + * and don't keep track of the sender. + */ + if (our_nce->nce_state == ND_PROBE) + goto done; + + /* + * If the solicitation doesn't have sender hardware address + * (legal for unicast solicitation), then process without + * installing the return NCE. Either we already know it, or + * we'll be forced to look it up when (and if) we reply to the + * packet. + */ + if (haddr == NULL) + goto no_source; + err = ndp_lookup_then_add(ill, haddr, &src, /* Soliciting nodes address */ @@ -1577,11 +1979,38 @@ ndp_input_solicit(ill_t *ill, mblk_t *mp) err)); goto done; } +no_source: flag |= NDP_SOLICITED; } else { /* - * This is a DAD req, multicast the advertisement - * to the all-nodes address. + * No source link layer address option should be present in a + * valid DAD request. + */ + if (haddr != NULL) { + ip1dbg(("ndp_input_solicit: source link-layer address " + "option present with an unspecified source.\n")); + bad_solicit = B_TRUE; + goto done; + } + if (our_nce->nce_state == ND_PROBE) { + /* + * Internally looped-back probes won't have DLPI + * attached to them. External ones (which are sent by + * multicast) always will. Just ignore our own + * transmissions. + */ + if (dl_mp != NULL) { + /* + * If someone else is probing our address, then + * we've crossed wires. Declare failure. + */ + ip_ndp_failure(ill, mp, dl_mp, our_nce); + } + goto done; + } + /* + * This is a DAD probe. Multicast the advertisement to the + * all-nodes address. */ src = ipv6_all_hosts_mcast; } @@ -1605,7 +2034,7 @@ done: } void -ndp_input_advert(ill_t *ill, mblk_t *mp) +ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) { nd_neighbor_advert_t *na; uint32_t hlen = ill->ill_nd_lla_len; @@ -1639,6 +2068,7 @@ ndp_input_advert(ill_t *ill, mblk_t *mp) opt = (nd_opt_hdr_t *)&na[1]; if (!ndp_verify_optlen(opt, len - sizeof (nd_neighbor_advert_t))) { + ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); return; } @@ -1647,8 +2077,9 @@ ndp_input_advert(ill_t *ill, mblk_t *mp) opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); if (opt != NULL) { haddr = (uchar_t *)&opt[1]; - if (hlen > opt->nd_opt_len * 8 || + if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || hlen == 0) { + ip1dbg(("ndp_input_advert: bad SLLA\n")); BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); return; @@ -1676,13 +2107,41 @@ ndp_input_advert(ill_t *ill, mblk_t *mp) /* We have to drop the lock since ndp_process calls put* */ rw_exit(&ill_g_lock); if (dst_nce != NULL) { - if (na->nd_na_flags_reserved & - ND_NA_FLAG_ROUTER) { - dst_nce->nce_flags |= NCE_F_ISROUTER; + if ((dst_nce->nce_flags & NCE_F_PERMANENT) && + dst_nce->nce_state == ND_PROBE) { + /* + * Someone else sent an advertisement for an + * address that we're trying to configure. + * Tear it down. Note that dl_mp might be NULL + * if we're getting a unicast reply. This + * isn't typically done (multicast is the norm + * in response to a probe), but ip_ndp_failure + * will handle the dl_mp == NULL case as well. + */ + ip_ndp_failure(ill, mp, dl_mp, dst_nce); + } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { + /* + * Someone just announced one of our local + * addresses. If it wasn't us, then this is a + * conflict. Defend the address or shut it + * down. + */ + if (dl_mp != NULL && + (haddr == NULL || + nce_cmp_ll_addr(dst_nce, haddr, + ill->ill_nd_lla_len))) { + ip_ndp_conflict(ill, mp, dl_mp, + dst_nce); + } + } else { + if (na->nd_na_flags_reserved & + ND_NA_FLAG_ROUTER) { + dst_nce->nce_flags |= NCE_F_ISROUTER; + } + /* B_TRUE indicates this an advertisement */ + ndp_process(dst_nce, haddr, + na->nd_na_flags_reserved, B_TRUE); } - /* B_TRUE indicates this an advertisement */ - ndp_process(dst_nce, haddr, - na->nd_na_flags_reserved, B_TRUE); NCE_REFRELE(dst_nce); } rw_enter(&ill_g_lock, RW_READER); @@ -1696,7 +2155,7 @@ ndp_input_advert(ill_t *ill, mblk_t *mp) * The checksum has already checked o.k before reaching here. */ void -ndp_input(ill_t *ill, mblk_t *mp) +ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) { icmp6_t *icmp_nd; ip6_t *ip6h; @@ -1747,9 +2206,9 @@ ndp_input(ill_t *ill, mblk_t *mp) goto done; } if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { - ndp_input_solicit(ill, mp); + ndp_input_solicit(ill, mp, dl_mp); } else { - ndp_input_advert(ill, mp); + ndp_input_advert(ill, mp, dl_mp); } done: freemsg(mp); @@ -1758,9 +2217,13 @@ done: /* * nce_xmit is called to form and transmit a ND solicitation or * advertisement ICMP packet. - * If source address is unspecified, appropriate source address - * and link layer address will be chosen here. This function - * *always* sends the link layer option. + * + * If the source address is unspecified and this isn't a probe (used for + * duplicate address detection), an appropriate source address and link layer + * address will be chosen here. The link layer address option is included if + * the source is specified (i.e., all non-probe packets), and omitted (per the + * specification) otherwise. + * * It returns B_FALSE only if it does a successful put() to the * corresponding ill's ill_wq otherwise returns B_TRUE. */ @@ -1792,7 +2255,7 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, */ ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); - if (IN6_IS_ADDR_UNSPECIFIED(sender)) { + if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { ASSERT(operation != ND_NEIGHBOR_ADVERT); /* * Pick a source address for this solicitation, but @@ -1816,7 +2279,10 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, hwaddr_ill = src_ipif->ipif_ill; } - plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8; + if (flag & NDP_PROBE) + plen = 0; + else + plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8; /* * Always make sure that the NS/NA packets don't get load * spread. This is needed so that the probe packets sent @@ -1842,6 +2308,8 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; ip6i->ip6i_nxt = IPPROTO_RAW; ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; + if (flag & NDP_PROBE) + ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); @@ -1858,7 +2326,8 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, if (operation == ND_NEIGHBOR_SOLICIT) { nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; - opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; + if (!(flag & NDP_PROBE)) + opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; ip6h->ip6_src = *sender; ns->nd_ns_target = *target; if (!(flag & NDP_UNICAST)) { @@ -1870,6 +2339,7 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, } else { nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; + ASSERT(!(flag & NDP_PROBE)); opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; ip6h->ip6_src = *sender; na->nd_na_target = *sender; @@ -1881,12 +2351,16 @@ nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; } - /* Fill in link layer address and option len */ - opt->nd_opt_len = (uint8_t)plen; - mutex_enter(&hwaddr_ill->ill_lock); - bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr, - &opt[1], hwaddr_ill->ill_nd_lla_len); - mutex_exit(&hwaddr_ill->ill_lock); + + if (!(flag & NDP_PROBE)) { + /* Fill in link layer address and option len */ + opt->nd_opt_len = (uint8_t)plen; + mutex_enter(&hwaddr_ill->ill_lock); + bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : + hwaddr_ill->ill_phys_addr, &opt[1], + hwaddr_ill->ill_nd_lla_len); + mutex_exit(&hwaddr_ill->ill_lock); + } icmp6->icmp6_type = (uint8_t)operation; icmp6->icmp6_code = 0; /* @@ -1950,30 +2424,6 @@ ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) } /* - * convert a link level address of arbitrary length - * to an ascii string. - * The caller *must* have already verified that the string buffer - * is large enough to hold the entire string, including trailing NULL. - */ -static void -lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf) -{ - uchar_t addrbyte[8]; /* needs to hold ascii for a byte plus a NULL */ - int i; - size_t len; - - buf[0] = '\0'; - for (i = 0; i < addrlen; i++) { - addrbyte[0] = '\0'; - (void) sprintf((char *)addrbyte, "%02x:", (lla[i] & 0xff)); - len = strlen((const char *)addrbyte); - bcopy(addrbyte, buf, len); - buf = buf + len; - } - *--buf = '\0'; -} - -/* * Add a single line to the NDP Cache Entry Report. */ static void @@ -2013,7 +2463,7 @@ nce_report1(nce_t *nce, uchar_t *mp_arg) if (ill->ill_net_type == IRE_IF_RESOLVER) { size_t addrlen; - uchar_t *addr_buf; + char *addr_buf; dl_unitdata_req_t *dl; mutex_enter(&nce->nce_lock); @@ -2042,12 +2492,10 @@ nce_report1(nce_t *nce, uchar_t *mp_arg) mutex_exit(&nce->nce_lock); return; } - if (ill->ill_flags & ILLF_XRESOLV) - lla2ascii((uint8_t *)h, dl->dl_dest_addr_length, - addr_buf); - else - lla2ascii((uint8_t *)h, ill->ill_nd_lla_len, - addr_buf); + (void) mac_colon_addr((uint8_t *)h, + (ill->ill_flags & ILLF_XRESOLV) ? + dl->dl_dest_addr_length : ill->ill_nd_lla_len, + addr_buf, addrlen); mutex_exit(&nce->nce_lock); (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", ill->ill_name, addr_buf, (uchar_t *)&flags_buf, @@ -2152,48 +2600,108 @@ ndp_timer(void *arg) nce->nce_pcnt--; ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && nce->nce_pcnt >= -1); - if (nce->nce_pcnt == 0) { + if (nce->nce_pcnt > 0) { + /* + * As per RFC2461, the nce gets deleted after + * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. + * Note that the first unicast solicitation is sent + * during the DELAY state. + */ + ip2dbg(("ndp_timer: pcount=%x dst %s\n", + nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, + addrbuf, sizeof (addrbuf)))); + mutex_exit(&nce->nce_lock); + dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, + B_FALSE, &ipv6_all_zeros, &nce->nce_addr, + (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : + NDP_UNICAST); + if (dropped) { + mutex_enter(&nce->nce_lock); + nce->nce_pcnt++; + mutex_exit(&nce->nce_lock); + } + NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); + } else if (nce->nce_pcnt < 0) { + /* No hope, delete the nce */ + nce->nce_state = ND_UNREACHABLE; + mutex_exit(&nce->nce_lock); + if (ip_debug > 2) { + /* ip1dbg */ + pr_addr_dbg("ndp_timer: Delete IRE for" + " dst %s\n", AF_INET6, &nce->nce_addr); + } + ndp_delete(nce); + } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { /* Wait RetransTimer, before deleting the entry */ ip2dbg(("ndp_timer: pcount=%x dst %s\n", nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, addrbuf, sizeof (addrbuf)))); mutex_exit(&nce->nce_lock); + /* Wait one interval before killing */ NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); - } else { + } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { + ipif_t *ipif; + /* - * As per RFC2461, the nce gets deleted after - * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. - * Note that the first unicast solicitation is sent - * during the DELAY state. + * We're done probing, and we can now declare this + * address to be usable. Let IP know that it's ok to + * use. */ - if (nce->nce_pcnt > 0) { - ip2dbg(("ndp_timer: pcount=%x dst %s\n", - nce->nce_pcnt, inet_ntop(AF_INET6, - &nce->nce_addr, - addrbuf, sizeof (addrbuf)))); - mutex_exit(&nce->nce_lock); - dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, - NULL, B_FALSE, &ipv6_all_zeros, - &nce->nce_addr, NDP_UNICAST); - if (dropped) { - mutex_enter(&nce->nce_lock); - nce->nce_pcnt++; - mutex_exit(&nce->nce_lock); + nce->nce_state = ND_REACHABLE; + mutex_exit(&nce->nce_lock); + ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, + ALL_ZONES, NULL, NULL, NULL, NULL); + if (ipif != NULL) { + if (ipif->ipif_was_dup) { + char ibuf[LIFNAMSIZ + 10]; + char sbuf[INET6_ADDRSTRLEN]; + + ipif->ipif_was_dup = B_FALSE; + (void) strlcpy(ibuf, ill->ill_name, + sizeof (ibuf)); + (void) inet_ntop(AF_INET6, + &ipif->ipif_v6lcl_addr, + sbuf, sizeof (sbuf)); + if (ipif->ipif_id != 0) { + (void) snprintf(ibuf + + ill->ill_name_length - 1, + sizeof (ibuf) - + ill->ill_name_length + 1, + ":%d", ipif->ipif_id); + } + cmn_err(CE_NOTE, "recovered address " + "%s on %s", sbuf, ibuf); } - NDP_RESTART_TIMER(nce, - ill->ill_reachable_retrans_time); - } else { - /* No hope, delete the nce */ - nce->nce_state = ND_UNREACHABLE; - mutex_exit(&nce->nce_lock); - if (ip_debug > 2) { - /* ip1dbg */ - pr_addr_dbg("ndp_timer: Delete IRE for" - " dst %s\n", AF_INET6, - &nce->nce_addr); + if ((ipif->ipif_flags & IPIF_UP) && + !ipif->ipif_addr_ready) { + ip_rts_ifmsg(ipif); + ip_rts_newaddrmsg(RTM_ADD, 0, ipif); + sctp_update_ipif(ipif, SCTP_IPIF_UP); } - ndp_delete(nce); + ipif->ipif_addr_ready = 1; + ipif_refrele(ipif); + } + /* Begin defending our new address */ + nce->nce_unsolicit_count = 0; + dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, + B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, + NDP_ORIDE); + if (dropped) { + nce->nce_unsolicit_count = 1; + NDP_RESTART_TIMER(nce, + ip_ndp_unsolicit_interval); + } else if (ip_ndp_defense_interval != 0) { + NDP_RESTART_TIMER(nce, ip_ndp_defense_interval); } + } else { + /* + * This is an address we're probing to be our own, but + * the ill is down. Wait until it comes back before + * doing anything, but switch to reachable state so + * that the restart will work. + */ + nce->nce_state = ND_REACHABLE; + mutex_exit(&nce->nce_lock); } NCE_REFRELE(nce); return; @@ -2262,9 +2770,12 @@ ndp_timer(void *arg) break; case ND_REACHABLE : rw_exit(&ill_g_lock); - if (nce->nce_flags & NCE_F_UNSOL_ADV && - nce->nce_unsolicit_count != 0) { - nce->nce_unsolicit_count--; + if (((nce->nce_flags & NCE_F_UNSOL_ADV) && + nce->nce_unsolicit_count != 0) || + ((nce->nce_flags & NCE_F_PERMANENT) && + ip_ndp_defense_interval != 0)) { + if (nce->nce_unsolicit_count > 0) + nce->nce_unsolicit_count--; mutex_exit(&nce->nce_lock); dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, @@ -2272,7 +2783,7 @@ ndp_timer(void *arg) B_FALSE, /* use ill_phys_addr */ &nce->nce_addr, &ipv6_all_hosts_mcast, - nce->nce_flags | NDP_ORIDE); + NDP_ORIDE); if (dropped) { mutex_enter(&nce->nce_lock); nce->nce_unsolicit_count++; @@ -2281,6 +2792,9 @@ ndp_timer(void *arg) if (nce->nce_unsolicit_count != 0) { NDP_RESTART_TIMER(nce, ip_ndp_unsolicit_interval); + } else { + NDP_RESTART_TIMER(nce, + ip_ndp_defense_interval); } } else { mutex_exit(&nce->nce_lock); @@ -2339,7 +2853,7 @@ nce_set_ll(nce_t *nce, uchar_t *ll_addr) } static boolean_t -nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len) +nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) { ill_t *ill = nce->nce_ill; uchar_t *ll_offset; @@ -2348,7 +2862,7 @@ nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len) if (ll_addr == NULL) return (B_FALSE); ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); - if (bcmp(ll_addr, (char *)ll_offset, ll_addr_len) != 0) + if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) return (B_TRUE); return (B_FALSE); } @@ -3338,3 +3852,84 @@ nce_reinit(nce_t *nce) NCE_REFRELE(nce); return (newnce); } + +/* + * ndp_walk routine to delete all entries that have a given destination or + * gateway address and cached link layer (MAC) address. This is used when ARP + * informs us that a network-to-link-layer mapping may have changed. + */ +void +nce_delete_hw_changed(nce_t *nce, void *arg) +{ + nce_hw_map_t *hwm = arg; + mblk_t *mp; + dl_unitdata_req_t *dlu; + uchar_t *macaddr; + ill_t *ill; + int saplen; + ipaddr_t nce_addr; + + if (nce->nce_state != ND_REACHABLE) + return; + + IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); + if (nce_addr != hwm->hwm_addr) + return; + + mutex_enter(&nce->nce_lock); + if ((mp = nce->nce_res_mp) == NULL) { + mutex_exit(&nce->nce_lock); + return; + } + dlu = (dl_unitdata_req_t *)mp->b_rptr; + macaddr = (uchar_t *)(dlu + 1); + ill = nce->nce_ill; + if ((saplen = ill->ill_sap_length) > 0) + macaddr += saplen; + else + saplen = -saplen; + + /* + * If the hardware address is unchanged, then leave this one alone. + * Note that saplen == abs(saplen) now. + */ + if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && + bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { + mutex_exit(&nce->nce_lock); + return; + } + mutex_exit(&nce->nce_lock); + + DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); + ndp_delete(nce); +} + +/* + * This function verifies whether a given IPv4 address is potentially known to + * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, + * so that it can continue to look for hardware changes on that address. + */ +boolean_t +ndp_lookup_ipaddr(in_addr_t addr) +{ + nce_t *nce; + struct in_addr nceaddr; + + if (addr == INADDR_ANY) + return (B_FALSE); + + mutex_enter(&ndp4.ndp_g_lock); + nce = *(nce_t **)NCE_HASH_PTR_V4(addr); + for (; nce != NULL; nce = nce->nce_next) { + /* Note that only v4 mapped entries are in the table. */ + IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); + if (addr == nceaddr.s_addr && + IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { + /* Single flag check; no lock needed */ + if (!(nce->nce_flags & NCE_F_CONDEMNED)) + break; + } + } + mutex_exit(&ndp4.ndp_g_lock); + return (nce != NULL); +} diff --git a/usr/src/uts/common/inet/ip/ip_squeue.c b/usr/src/uts/common/inet/ip/ip_squeue.c index 033b962b02..417b1580eb 100644 --- a/usr/src/uts/common/inet/ip/ip_squeue.c +++ b/usr/src/uts/common/inet/ip/ip_squeue.c @@ -110,16 +110,14 @@ #include <inet/common.h> #include <inet/ip.h> #include <inet/ip_if.h> -#include <inet/mi.h> #include <inet/nd.h> #include <inet/ipclassifier.h> #include <sys/types.h> #include <sys/conf.h> #include <sys/sunddi.h> -#include <sys/ddi.h> +#include <sys/dlpi.h> #include <sys/squeue_impl.h> - /* * We allow multiple NICs to bind to the same CPU but want to preserve 1 <-> 1 * mapping between squeue and NIC (or Rx ring) for performance reasons so diff --git a/usr/src/uts/common/inet/ip6.h b/usr/src/uts/common/inet/ip6.h index beae955d27..2cffc239b8 100644 --- a/usr/src/uts/common/inet/ip6.h +++ b/usr/src/uts/common/inet/ip6.h @@ -379,7 +379,7 @@ extern void ip_output_v6(void *, mblk_t *, void *, int); extern void ip_xmit_v6(mblk_t *, ire_t *, uint_t, conn_t *, int, struct ipsec_out_s *); extern void ip_rput_data_v6(queue_t *, ill_t *, mblk_t *, ip6_t *, - uint_t, mblk_t *); + uint_t, mblk_t *, mblk_t *); extern void mld_input(queue_t *, mblk_t *, ill_t *); extern void mld_joingroup(ilm_t *); extern void mld_leavegroup(ilm_t *); diff --git a/usr/src/uts/common/inet/ip_if.h b/usr/src/uts/common/inet/ip_if.h index 00b631b9e1..173930e3ee 100644 --- a/usr/src/uts/common/inet/ip_if.h +++ b/usr/src/uts/common/inet/ip_if.h @@ -91,7 +91,7 @@ extern "C" { #define IFF_LOGINT_FLAGS (IFF_UP|IFF_BROADCAST|IFF_POINTOPOINT| \ IFF_UNNUMBERED|IFF_DHCPRUNNING|IFF_PRIVATE|IFF_NOXMIT|IFF_NOLOCAL| \ IFF_DEPRECATED|IFF_ADDRCONF|IFF_ANYCAST|IFF_MIPRUNNING|IFF_NOFAILOVER| \ - IFF_PREFERRED|IFF_TEMPORARY|IFF_FIXEDMTU) + IFF_PREFERRED|IFF_TEMPORARY|IFF_FIXEDMTU|IFF_DUPLICATE) #define IPIF_REPL_CHECK(to_ipif, failback_cmd) \ (((to_ipif)->ipif_replace_zero) || ((failback_cmd) && \ @@ -138,14 +138,24 @@ extern "C" { #define IPIF_PREFERRED IFF_PREFERRED /* Prefer as source address */ #define IPIF_TEMPORARY IFF_TEMPORARY /* RFC3041 */ #define IPIF_FIXEDMTU IFF_FIXEDMTU /* set with SIOCSLIFMTU */ +#define IPIF_DUPLICATE IFF_DUPLICATE /* address is in use */ /* Source selection values for ipif_select_source_v6 */ #define RESTRICT_TO_NONE 0x0 /* No restriction in source selection */ #define RESTRICT_TO_GROUP 0x1 /* Restrict to IPMP group */ #define RESTRICT_TO_ILL 0x2 /* Restrict to ILL */ +/* for ipif_resolver_up */ +enum ip_resolver_action { + Res_act_initial, /* initial address establishment */ + Res_act_move, /* address move (IPMP, new DL addr) */ + Res_act_defend /* address defense */ +}; + extern ill_t *illgrp_scheduler(ill_t *); extern mblk_t *ill_arp_alloc(ill_t *, uchar_t *, caddr_t); +extern mblk_t *ipif_area_alloc(ipif_t *); +extern mblk_t *ipif_ared_alloc(ipif_t *); extern void ill_dlpi_done(ill_t *, t_uscalar_t); extern void ill_dlpi_send(ill_t *, mblk_t *); extern mblk_t *ill_dlur_gen(uchar_t *, uint_t, t_uscalar_t, t_scalar_t); @@ -167,6 +177,7 @@ extern time_t ill_frag_timeout(ill_t *, time_t); extern int ill_init(queue_t *, ill_t *); extern int ill_nominate_mcast_rcv(ill_group_t *); extern boolean_t ill_setdefaulttoken(ill_t *); +extern void ill_restart_dad(ill_t *, boolean_t); extern void ill_lock_ills(ill_t **, int); extern mblk_t *ill_pending_mp_get(ill_t *, conn_t **, uint_t); @@ -216,9 +227,10 @@ extern void ipif_refhold_locked(ipif_t *); extern void ipif_refrele(ipif_t *); extern boolean_t ipif_ire_active(ipif_t *); extern void ipif_all_down_tail(ipsq_t *, queue_t *, mblk_t *, void *); -extern int ipif_resolver_up(ipif_t *, boolean_t); +extern int ipif_resolver_up(ipif_t *, enum ip_resolver_action); extern int ipif_arp_setup_multicast(ipif_t *, mblk_t **); extern int ipif_down(ipif_t *, queue_t *, mblk_t *); +extern void ipif_down_tail(ipif_t *); extern void ipif_multicast_up(ipif_t *); extern void ipif_ndp_down(ipif_t *); extern int ipif_ndp_up(ipif_t *, const in6_addr_t *, boolean_t); @@ -238,6 +250,7 @@ extern ipif_t *ipif_lookup_on_ifindex(uint_t, boolean_t, zoneid_t, queue_t *, extern ipif_t *ipif_get_next_ipif(ipif_t *curr, ill_t *ill); extern void ipif_ill_refrele_tail(ill_t *ill); extern void ipif_arp_down(ipif_t *ipif); +extern void ipif_mask_reply(ipif_t *); extern int illgrp_insert(ill_group_t **, ill_t *, char *, ill_group_t *, boolean_t); diff --git a/usr/src/uts/common/inet/ip_ndp.h b/usr/src/uts/common/inet/ip_ndp.h index 05edcd3225..4d0dbd5428 100644 --- a/usr/src/uts/common/inet/ip_ndp.h +++ b/usr/src/uts/common/inet/ip_ndp.h @@ -28,6 +28,12 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include <sys/mutex.h> +#include <sys/stream.h> +#include <netinet/in.h> +#include <netinet/icmp6.h> +#include <inet/ip.h> + /* * Internal definitions for the kernel implementation of the IPv6 * Neighbor Discovery Protocol (NDP). @@ -69,6 +75,8 @@ typedef struct nce_s { struct nce_s *nce_fastpath; /* for fastpath list */ timeout_id_t nce_timeout_id; uchar_t nce_ipversion; /* IPv4(ARP)/IPv6(NDP) version */ + uint_t nce_defense_count; /* number of NDP conflicts */ + uint_t nce_defense_time; /* last time defended (secs) */ #ifdef NCE_DEBUG th_trace_t *nce_trace[IP_TR_HASH_MAX]; boolean_t nce_trace_disable; /* True when alloc fails */ @@ -135,6 +143,7 @@ extern ndp_g_t ndp4, ndp6; #define NDP_ISROUTER 0x2 #define NDP_SOLICITED 0x4 #define NDP_ORIDE 0x8 +#define NDP_PROBE 0x10 /* Number of packets queued in NDP for a neighbor */ #define ND_MAX_Q 4 @@ -219,6 +228,17 @@ typedef struct { int ncr_host; /* Fraction for host entries */ } nce_cache_reclaim_t; +/* + * Structure for nce_delete_hw_changed; specifies an IPv4 address to link-layer + * address mapping. Any route that has a cached copy of a mapping for that + * IPv4 address that doesn't match the given mapping must be purged. + */ +typedef struct { + ipaddr_t hwm_addr; /* IPv4 address */ + uint_t hwm_hwlen; /* Length of hardware address (may be 0) */ + uchar_t *hwm_hwaddr; /* Pointer to new hardware address, if any */ +} nce_hw_map_t; + /* When SAP is greater than zero address appears before SAP */ #define NCE_LL_ADDR_OFFSET(ill) (((ill)->ill_sap_length) < 0 ? \ (sizeof (dl_unitdata_req_t)) : \ @@ -276,7 +296,8 @@ extern void ndp_fastpath_flush(nce_t *, char *); extern boolean_t ndp_fastpath_update(nce_t *, void *); extern nd_opt_hdr_t *ndp_get_option(nd_opt_hdr_t *, int, int); extern void ndp_inactive(nce_t *); -extern void ndp_input(ill_t *, mblk_t *); +extern void ndp_input(ill_t *, mblk_t *, mblk_t *); +extern boolean_t ndp_lookup_ipaddr(in_addr_t); extern nce_t *ndp_lookup_v6(ill_t *, const in6_addr_t *, boolean_t); extern nce_t *ndp_lookup_v4(ill_t *, const in_addr_t *, boolean_t); extern int ndp_lookup_then_add(ill_t *, uchar_t *, const void *, @@ -298,6 +319,8 @@ extern void ndp_walk_common(ndp_g_t *, ill_t *, pfi_t, extern int ndp_add(ill_t *, uchar_t *, const void *, const void *, const void *, uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); +extern boolean_t ndp_restart_dad(nce_t *); +extern void ndp_do_recovery(ipif_t *); extern void nce_resolv_failed(nce_t *); extern void arp_resolv_failed(nce_t *); extern void nce_fastpath_list_add(nce_t *); @@ -307,6 +330,7 @@ extern void nce_fastpath_list_dispatch(ill_t *, extern void nce_queue_mp_common(nce_t *, mblk_t *, boolean_t); extern void ndp_flush_qd_mp(nce_t *); extern nce_t *nce_reinit(nce_t *); +extern void nce_delete_hw_changed(nce_t *, void *); #ifdef NCE_DEBUG extern void nce_trace_inactive(nce_t *); diff --git a/usr/src/uts/common/net/if.h b/usr/src/uts/common/net/if.h index 8351c9b33a..f2be9114c5 100644 --- a/usr/src/uts/common/net/if.h +++ b/usr/src/uts/common/net/if.h @@ -163,6 +163,7 @@ struct ifnet { #define IFF_FIXEDMTU 0x1000000000ll /* MTU manually set with SIOCSLIFMTU */ #define IFF_VIRTUAL 0x2000000000ll /* Does not send or receive packets */ +#define IFF_DUPLICATE 0x4000000000ll /* Local address already in use */ /* * The IFF_MULTICAST flag indicates that the network can support the @@ -177,7 +178,7 @@ struct ifnet { (IFF_BROADCAST | IFF_POINTOPOINT | IFF_RUNNING | IFF_PROMISC | \ IFF_MULTICAST | IFF_MULTI_BCAST | IFF_UNNUMBERED | IFF_IPV4 | \ IFF_IPV6 | IFF_INACTIVE | IFF_FIXEDMTU | IFF_VIRTUAL | \ - IFF_LOOPBACK | IFF_ALLMULTI) + IFF_LOOPBACK | IFF_ALLMULTI | IFF_DUPLICATE) /* * Output queues (ifp->if_snd) and internetwork datagram level (pup level 1) diff --git a/usr/src/uts/common/net/if_arp.h b/usr/src/uts/common/net/if_arp.h index 7df505c710..9103b1d0b5 100644 --- a/usr/src/uts/common/net/if_arp.h +++ b/usr/src/uts/common/net/if_arp.h @@ -1,5 +1,5 @@ /* - * Copyright 1997-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -15,6 +15,9 @@ #pragma ident "%Z%%M% %I% %E% SMI" /* if_arp.h 1.5 88/08/19 SMI; from UCB 7.1 1/24/86 */ +#include <sys/types.h> +#include <sys/socket.h> + #ifdef __cplusplus extern "C" { #endif @@ -32,7 +35,8 @@ extern "C" { struct arphdr { ushort_t ar_hrd; /* format of hardware address */ #define ARPHRD_ETHER 1 /* ethernet hardware address */ -#define ARPHRD_IB 32 /* IPoIB hardware address */ +#define ARPHRD_IEEE802 6 /* IEEE 802 hardware address */ +#define ARPHRD_IB 32 /* IPoIB hardware address */ ushort_t ar_pro; /* format of protocol address */ uchar_t ar_hln; /* length of hardware address */ uchar_t ar_pln; /* length of protocol address */ @@ -55,6 +59,9 @@ struct arphdr { #endif /* notdef */ }; +/* Maximum hardware and protocol address length */ +#define ARP_MAX_ADDR_LEN 255 + /* * Extended ARP ioctl request */ @@ -72,12 +79,13 @@ struct arpreq { struct sockaddr arp_ha; /* hardware address */ int arp_flags; /* flags */ }; -/* arp_flags and at_flags field values */ +/* arp_flags field values */ #define ATF_INUSE 0x01 /* entry in use */ #define ATF_COM 0x02 /* completed entry (enaddr valid) */ #define ATF_PERM 0x04 /* permanent entry */ #define ATF_PUBL 0x08 /* publish entry (respond for other host) */ #define ATF_USETRAILERS 0x10 /* has requested trailers */ +#define ATF_AUTHORITY 0x20 /* hardware address is authoritative */ /* * This data structure is used by kernel protocol modules to register diff --git a/usr/src/uts/common/netinet/arp.h b/usr/src/uts/common/netinet/arp.h index 523f111c00..a3bf0e7761 100644 --- a/usr/src/uts/common/netinet/arp.h +++ b/usr/src/uts/common/netinet/arp.h @@ -1,5 +1,5 @@ /* - * Copyright 1986-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -14,6 +14,10 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include <sys/types.h> +#include <sys/ethernet.h> +#include <sys/socket.h> + #ifdef __cplusplus extern "C" { #endif @@ -31,6 +35,7 @@ extern "C" { struct arphdr { ushort_t ar_hrd; /* format of hardware address */ #define ARPHRD_ETHER 1 /* ethernet hardware address */ +#define ARPHRD_IEEE802 6 /* IEEE 802 hardware address */ #define ARPHRD_IB 32 /* IPoIB hardware address */ ushort_t ar_pro; /* format of protocol address */ uchar_t ar_hln; /* length of hardware address */ @@ -54,6 +59,9 @@ struct arphdr { #endif /* notdef */ }; +/* Maximum hardware and protocol address length */ +#define ARP_MAX_ADDR_LEN 255 + /* * Ethernet Address Resolution Protocol. * @@ -82,12 +90,13 @@ struct arpreq { struct sockaddr arp_ha; /* hardware address */ int arp_flags; /* flags */ }; -/* arp_flags and at_flags field values */ +/* arp_flags field values */ #define ATF_INUSE 0x01 /* entry in use */ #define ATF_COM 0x02 /* completed entry (enaddr valid) */ #define ATF_PERM 0x04 /* permanent entry */ #define ATF_PUBL 0x08 /* publish entry (respond for other host) */ #define ATF_USETRAILERS 0x10 /* has requested trailers */ +#define ATF_AUTHORITY 0x20 /* hardware address is authoritative */ #ifdef __cplusplus } diff --git a/usr/src/uts/common/os/subr.c b/usr/src/uts/common/os/subr.c index 9c9942ec8c..4753f1152a 100644 --- a/usr/src/uts/common/os/subr.c +++ b/usr/src/uts/common/os/subr.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -218,25 +217,6 @@ umax(uint_t a, uint_t b) #endif /* !_LP64 */ /* - * Return bit position of least significant bit set in mask, - * starting numbering from 1. - */ -int -ffs(long mask) -{ - int i; - - if (mask == 0) - return (0); - for (i = 1; i <= NBBY * sizeof (mask); i++) { - if (mask & 1) - return (i); - mask >>= 1; - } - return (0); -} - -/* * Parse suboptions from a string. * Same as getsubopt(3C). */ diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c index 1709760d74..f16ae44426 100644 --- a/usr/src/uts/common/os/sunddi.c +++ b/usr/src/uts/common/os/sunddi.c @@ -5955,7 +5955,6 @@ ddi_in_panic() int ddi_ffs(long mask) { - extern int ffs(long mask); return (ffs(mask)); } @@ -5970,8 +5969,6 @@ ddi_ffs(long mask) int ddi_fls(long mask) { - extern int ffs(long); - while (mask) { long nx; diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h index 9c34c3f895..c96ea5b4ac 100644 --- a/usr/src/uts/common/sys/systm.h +++ b/usr/src/uts/common/sys/systm.h @@ -230,7 +230,7 @@ int strident_valid(const char *); void strident_canon(char *, size_t); int getsubopt(char **optionsp, char * const *tokens, char **valuep); char *append_subopt(const char *, size_t, char *, const char *); -int ffs(long); +int ffs(uintmax_t); int copyin(const void *, void *, size_t); void copyin_noerr(const void *, void *, size_t); int xcopyin(const void *, void *, size_t); |