diff options
author | carlsonj <none@none> | 2006-08-14 14:10:48 -0700 |
---|---|---|
committer | carlsonj <none@none> | 2006-08-14 14:10:48 -0700 |
commit | 69bb4bb45c98da60d21839c4dc3c01ea1be60585 (patch) | |
tree | 81b0b1f2cca24e6379bc7933ea584bda2861f39f /usr/src/uts/common/inet/arp/arp.c | |
parent | 0173c38a73f34277e0c97a19fedfd25d81ba8380 (diff) | |
download | illumos-joyent-69bb4bb45c98da60d21839c4dc3c01ea1be60585.tar.gz |
PSARC 2005/314 IP Duplicate Address Detection
PSARC 2006/017 Arp Single Entry Display
1248254 NDD doesn't have explicit range checking for ARP NDD variables.
1253974 Please make a "permanent contents" option for arp vs. static
4069191 ace_t::ace_query_count field is not used anywhere.
4157198 ARP cache inconsistency between arp and ip modules.
4396195 ar_rput: ar_cmd_dispatch not necessary for every packet?
4705220 No IPv6 DAD performed during boot
4728609 IPv4 Duplicate Address Detection (DAD) is broken
4971789 Need unsolicited neighbor advertisements when interface comes up
4978063 SO_DONTROUTE option causes ARP traffic for every frame.
4997903 /usr/sbin/arp accepts invalid modifier keywords
6266155 flag on the play: ndp gets its bits confused
6272993 in.ndpd could be free of lint
6273003 arp shouldn't export status via ndd
6360928 ipif_arp_down debug message wrongly expects DLPI
6363393 6281236 causes generation of bogus NS
6454158 need a temporary work-around for 6451644's IPv6 impact
6456379 need ffs(3C)-like function in modapi
--HG--
rename : usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.c => deleted_files/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.c
rename : usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.h => deleted_files/usr/src/cmd/cmd-inet/sbin/dhcpagent/arp_check.h
rename : usr/src/cmd/cmd-inet/usr.lib/in.ndpd/dupl_addr.c => deleted_files/usr/src/cmd/cmd-inet/usr.lib/in.ndpd/dupl_addr.c
rename : usr/src/cmd/cmd-inet/usr.sbin/ifconfig/dupl_addr.c => deleted_files/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/dupl_addr.c
rename : usr/src/lib/libc/port/gen/ffs.c => deleted_files/usr/src/lib/libc/port/gen/ffs.c
rename : usr/src/lib/libinetcfg/common/inetcfg_dad.c => deleted_files/usr/src/lib/libinetcfg/common/inetcfg_dad.c
rename : usr/src/lib/libinetcfg/common/inetcfg_dad.h => deleted_files/usr/src/lib/libinetcfg/common/inetcfg_dad.h
Diffstat (limited to 'usr/src/uts/common/inet/arp/arp.c')
-rw-r--r-- | usr/src/uts/common/inet/arp/arp.c | 2112 |
1 files changed, 1236 insertions, 876 deletions
diff --git a/usr/src/uts/common/inet/arp/arp.c b/usr/src/uts/common/inet/arp/arp.c index fd7d086933..17c81b9513 100644 --- a/usr/src/uts/common/inet/arp/arp.c +++ b/usr/src/uts/common/inet/arp/arp.c @@ -28,8 +28,6 @@ /* AR - Address Resolution Protocol */ -#define ARP_DEBUG - #include <sys/types.h> #include <sys/stream.h> #include <sys/stropts.h> @@ -47,6 +45,9 @@ #include <sys/strsun.h> #include <sys/policy.h> #include <sys/ethernet.h> +#include <sys/zone.h> +#include <sys/random.h> +#include <sys/sdt.h> #include <inet/common.h> #include <inet/optcom.h> @@ -56,24 +57,52 @@ #include <net/if.h> #include <inet/arp.h> #include <netinet/ip6.h> +#include <netinet/arp.h> #include <inet/ip.h> #include <inet/ip_ire.h> +#include <inet/ip_ndp.h> #include <inet/mib2.h> #include <inet/arp_impl.h> -#ifdef ARP_DEBUG -#define arp0dbg(a) printf a -#define arp1dbg(a) if (arp_debug) printf a -#define arp2dbg(a) if (arp_debug > 1) printf a -#define arp3dbg(a) if (arp_debug > 2) printf a -#else -#define arp0dbg(a) /* */ -#define arp1dbg(a) /* */ -#define arp2dbg(a) /* */ -#define arp3dbg(a) /* */ -#endif +/* + * ARP entry life time and design notes + * ------------------------------------ + * + * ARP entries (ACEs) must last at least as long as IP knows about a given + * MAC-IP translation (i.e., as long as the IRE cache entry exists). It's ok + * if the ARP entry lasts longer, but not ok if it is removed before the IP + * entry. The reason for this is that if ARP doesn't have an entry, we will be + * unable to detect the difference between an ARP broadcast that represents no + * change (same, known address of sender) and one that represents a change (new + * address for existing entry). In the former case, we must not notify IP, or + * we can suffer hurricane attack. In the latter case, we must notify IP, or + * IP will drift out of sync with the network. + * + * Note that IP controls the lifetime of entries, not ARP. + * + * We don't attempt to reconfirm aging entries. If the system is no longer + * talking to a given peer, then it doesn't matter if we have the right mapping + * for that peer. It would be possible to send queries on aging entries that + * are active, but this isn't done. + */ + +/* + * This is used when scanning for "old" (least recently broadcast) ACEs. We + * don't want to have to walk the list for every single one, so we gather up + * batches at a time. + */ +#define ACE_RESCHED_LIST_LEN 8 + +typedef struct { + arl_t *art_arl; + uint_t art_naces; + ace_t *art_aces[ACE_RESCHED_LIST_LEN]; +} ace_resched_t; #define ACE_RESOLVED(ace) ((ace)->ace_flags & ACE_F_RESOLVED) +#define ACE_NONPERM(ace) \ + (((ace)->ace_flags & (ACE_F_RESOLVED | ACE_F_PERMANENT)) == \ + ACE_F_RESOLVED) #define AR_DEF_XMIT_INTERVAL 500 /* time in milliseconds */ #define AR_LL_HDR_SLACK 32 /* Leave the lower layer some room */ @@ -82,6 +111,13 @@ #define AR_DRAINING (void *)0x11 /* + * The IPv4 Link Local address space is special; we do extra duplicate checking + * there, as the entire assignment mechanism rests on random numbers. + */ +#define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \ + ((uchar_t *)ptr)[1] == 254) + +/* * Check if the command needs to be enqueued by seeing if there are other * commands ahead of us or if some DLPI response is being awaited. Usually * there would be an enqueued command in the latter case, however if the @@ -94,33 +130,9 @@ (mp->b_prev != AR_DRAINING && (arl->arl_queue != NULL || \ arl->arl_dlpi_pending != DL_PRIM_INVAL)) -/* Ugly check to determine whether the module below is IP */ -#define MODULE_BELOW_IS_IP(q) \ - ((WR(q)->q_next != NULL && WR(q)->q_next->q_next != NULL) && \ - (strcmp(WR(q)->q_next->q_qinfo->qi_minfo->mi_idname, "ip") == 0)) - -/* ARP Cache Entry */ -typedef struct ace_s { - struct ace_s *ace_next; /* Hash chain next pointer */ - struct ace_s **ace_ptpn; /* Pointer to previous next */ - struct arl_s *ace_arl; /* Associated arl */ - uint32_t ace_proto; /* Protocol for this ace */ - uint32_t ace_flags; - uchar_t *ace_proto_addr; - uint32_t ace_proto_addr_length; - uchar_t *ace_proto_mask; /* Mask for matching addr */ - uchar_t *ace_proto_extract_mask; /* For mappings */ - uchar_t *ace_hw_addr; - uint32_t ace_hw_addr_length; - uint32_t ace_hw_extract_start; /* For mappings */ - mblk_t *ace_mp; /* mblk we are in */ - uint32_t ace_query_count; - mblk_t *ace_query_mp; /* Head of outstanding query chain */ - int ace_publish_count; -} ace_t; - #define ACE_EXTERNAL_FLAGS_MASK \ -(ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MAPPING | ACE_F_MYADDR) + (ACE_F_PERMANENT | ACE_F_PUBLISH | ACE_F_MAPPING | ACE_F_MYADDR | \ + ACE_F_AUTHORITY) #define ARH_FIXED_LEN 8 @@ -165,8 +177,8 @@ static int ar_ce_create(arl_t *arl, uint32_t proto, uchar_t *hw_addr, uchar_t *proto_extract_mask, uint32_t hw_extract_start, uint32_t flags); static void ar_ce_delete(ace_t *ace); -static void ar_ce_delete_per_arl(ace_t *ace, arl_t *arl); -static ace_t **ar_ce_hash(uint32_t proto, uchar_t *proto_addr, +static void ar_ce_delete_per_arl(ace_t *ace, void *arg); +static ace_t **ar_ce_hash(uint32_t proto, const uchar_t *proto_addr, uint32_t proto_addr_length); static ace_t *ar_ce_lookup(arl_t *arl, uint32_t proto, uchar_t *proto_addr, uint32_t proto_addr_length); @@ -175,14 +187,12 @@ static ace_t *ar_ce_lookup_entry(arl_t *arl, uint32_t proto, static ace_t *ar_ce_lookup_from_area(mblk_t *mp, ace_t *matchfn()); static ace_t *ar_ce_lookup_mapping(arl_t *arl, uint32_t proto, uchar_t *proto_addr, uint32_t proto_addr_length); -static int ar_ce_report(queue_t *q, mblk_t *mp, caddr_t data, cred_t *cr); -static void ar_ce_report1(ace_t *ace, uchar_t *mp_arg); -static void ar_ce_resolve(ace_t *ace, uchar_t *hw_addr, +static boolean_t ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr, uint32_t hw_addr_length); -static void ar_ce_walk(pfi_t pfi, void *arg1); +static void ar_ce_walk(void (*pfi)(ace_t *, void *), void *arg1); static void ar_cleanup(void); -static void ar_client_notify(arl_t *arl, mblk_t *mp, int code); +static void ar_client_notify(const arl_t *arl, mblk_t *mp, int code); static int ar_close(queue_t *q); static int ar_cmd_dispatch(queue_t *q, mblk_t *mp); static mblk_t *ar_dlpi_comm(t_uscalar_t prim, size_t size); @@ -215,7 +225,7 @@ static int ar_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); static boolean_t ar_param_register(arpparam_t *arppa, int cnt); static int ar_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr); -static int ar_query_delete(ace_t *ace, uchar_t *ar); +static void ar_query_delete(ace_t *ace, void *ar); static void ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, uint32_t proto_addr_len); static clock_t ar_query_xmit(ace_t *ace, ace_t *src_ace); @@ -227,25 +237,16 @@ static int ar_slifname(queue_t *q, mblk_t *mp); static int ar_set_ppa(queue_t *q, mblk_t *mp); static int ar_snmp_msg(queue_t *q, mblk_t *mp_orig); static void ar_snmp_msg2(ace_t *, void *); -static void ar_timer_init(queue_t *q); -static int ar_trash(ace_t *ace, uchar_t *arg); static void ar_wput(queue_t *q, mblk_t *mp); static void ar_wsrv(queue_t *q); static void ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, - uint32_t plen, uchar_t *haddr1, uchar_t *paddr1, - uchar_t *haddr2, uchar_t *paddr2); -static int ar_xmit_request(queue_t *q, mblk_t *mp); -static int ar_xmit_response(queue_t *q, mblk_t *mp); + uint32_t plen, const uchar_t *haddr1, const uchar_t *paddr1, + const uchar_t *haddr2, const uchar_t *paddr2, const uchar_t *dstaddr); static uchar_t *ar_snmp_msg_element(mblk_t **, uchar_t *, size_t); static void ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q, ushort_t cmd, boolean_t); static mblk_t *ar_cmd_dequeue(arl_t *arl); -#if 0 -static void show_ace(char *str, ace_t *ace); -static void show_arp(char *str, mblk_t *mp); -#endif - /* * All of these are alterable, within the min/max values given, * at run time. arp_publish_interval and arp_publish_count are @@ -256,16 +257,34 @@ static void show_arp(char *str, mblk_t *mp); */ static arpparam_t arp_param_arr[] = { /* min max value name */ - { 0, 10, 0, "arp_debug"}, { 30000, 3600000, 300000, "arp_cleanup_interval"}, { 1000, 20000, 2000, "arp_publish_interval"}, { 1, 20, 5, "arp_publish_count"}, + { 0, 20000, 1000, "arp_probe_delay"}, + { 10, 20000, 1500, "arp_probe_interval"}, + { 0, 20, 3, "arp_probe_count"}, + { 0, 20000, 100, "arp_fastprobe_delay"}, + { 10, 20000, 150, "arp_fastprobe_interval"}, + { 0, 20, 3, "arp_fastprobe_count"}, + { 0, 3600000, 300000, "arp_defend_interval"}, + { 0, 20000, 100, "arp_defend_rate"}, + { 0, 3600000, 15000, "arp_broadcast_interval"}, + { 5, 86400, 3600, "arp_defend_period"} }; -#define arp_debug arp_param_arr[0].arp_param_value -#define arp_timer_interval arp_param_arr[1].arp_param_value -#define arp_publish_interval arp_param_arr[2].arp_param_value -#define arp_publish_count arp_param_arr[3].arp_param_value +#define arp_cleanup_interval arp_param_arr[0].arp_param_value +#define arp_publish_interval arp_param_arr[1].arp_param_value +#define arp_publish_count arp_param_arr[2].arp_param_value +#define arp_probe_delay arp_param_arr[3].arp_param_value +#define arp_probe_interval arp_param_arr[4].arp_param_value +#define arp_probe_count arp_param_arr[5].arp_param_value +#define arp_fastprobe_delay arp_param_arr[6].arp_param_value +#define arp_fastprobe_interval arp_param_arr[7].arp_param_value +#define arp_fastprobe_count arp_param_arr[8].arp_param_value +#define arp_defend_interval arp_param_arr[9].arp_param_value +#define arp_defend_rate arp_param_arr[10].arp_param_value +#define arp_broadcast_interval arp_param_arr[11].arp_param_value +#define arp_defend_period arp_param_arr[12].arp_param_value static struct module_info info = { 0, "arp", 0, INFPSZ, 512, 128 @@ -289,27 +308,24 @@ static arl_t *arl_g_head; /* ARL List Head */ /* * TODO: we need a better mechanism to set the ARP hardware type since - * the DLPI mac type does not include enough prodefined values. + * the DLPI mac type does not include enough predefined values. */ static ar_m_t ar_m_tbl[] = { - { DL_CSMACD, 1, -2, 6}, /* 802.3 */ - { DL_TPB, 6, -2, 6}, /* 802.4 */ - { DL_TPR, 6, -2, 6}, /* 802.5 */ - { DL_METRO, 6, -2, 6}, /* 802.6 */ - { DL_ETHER, 1, -2, 6}, /* Ethernet */ - { DL_FDDI, 1, -2, 6}, /* FDDI */ - { DL_IB, 32, -2, 20}, /* Infiniband */ - { DL_OTHER, 1, -2, 6}, /* unknown */ + { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */ + { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */ + { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */ + { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */ + { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */ + { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */ + { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */ + { DL_OTHER, ARPHRD_ETHER, -2, 6}, /* unknown */ }; /* ARP Cache Entry Hash Table */ -static ace_t *ar_ce_hash_tbl[256]; +static ace_t *ar_ce_hash_tbl[ARP_HASH_SIZE]; static ace_t *ar_ce_mask_entries; /* proto_mask not all ones */ -static mblk_t *ar_timer_mp; /* garbage collection timer */ -static queue_t *ar_timer_queue; /* queue for garbage collection */ - /* * Note that all routines which need to queue the message for later * processing have to be ioctl_aware to be able to queue the complete message. @@ -318,6 +334,16 @@ static queue_t *ar_timer_queue; /* queue for garbage collection */ #define ARF_IOCTL_AWARE 0x1 /* Arp command can come down as M_IOCTL */ #define ARF_ONLY_CMD 0x2 /* Command is exclusive to ARP */ +/* ARP Cmd Table entry */ +typedef struct arct_s { + int (*arct_pfi)(queue_t *, mblk_t *); + uint32_t arct_cmd; + int arct_min_len; + uint32_t arct_flags; + int arct_priv_req; /* Privilege required for this cmd */ + const char *arct_txt; +} arct_t; + static arct_t ar_cmd_tbl[] = { { ar_entry_add, AR_ENTRY_ADD, sizeof (area_t), ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_ENTRY_ADD" }, @@ -327,10 +353,6 @@ static arct_t ar_cmd_tbl[] = { ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_NP, "AR_ENTRY_QUERY" }, { ar_entry_squery, AR_ENTRY_SQUERY, sizeof (area_t), ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_NP, "AR_ENTRY_SQUERY" }, - { ar_xmit_request, AR_XMIT_REQUEST, sizeof (areq_t), - ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_XMIT_REQUEST" }, - { ar_xmit_response, AR_XMIT_RESPONSE, sizeof (areq_t), - ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_XMIT_RESPONSE" }, { ar_mapping_add, AR_MAPPING_ADD, sizeof (arma_t), ARF_IOCTL_AWARE | ARF_ONLY_CMD, OP_CONFIG, "AR_MAPPING_ADD" }, { ar_interface_up, AR_INTERFACE_UP, sizeof (arc_t), @@ -372,7 +394,7 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, if ((flags & ~ACE_EXTERNAL_FLAGS_MASK) || arl == NULL) return (EINVAL); if (flags & ACE_F_MYADDR) - flags |= ACE_F_PUBLISH; + flags |= ACE_F_PUBLISH | ACE_F_AUTHORITY; if (!hw_addr && hw_addr_len == 0) { if (flags == ACE_F_PERMANENT) { /* Not publish */ @@ -398,6 +420,17 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, return (EINVAL); if (!proto_extract_mask && (flags & ACE_F_MAPPING)) return (EINVAL); + + /* + * If the underlying link doesn't have reliable up/down notification or + * if we're working with the IPv4 169.254.0.0/16 Link Local Address + * space, then don't use the fast timers. Otherwise, use them. + */ + if (arl->arl_notifies && + !(proto == IP_ARP_PROTO_TYPE && IS_IPV4_LL_SPACE(proto_addr))) { + flags |= ACE_F_FAST; + } + /* * Allocate the timer block to hold the ace. * (ace + proto_addr + proto_addr_mask + proto_extract_mask + hw_addr) @@ -425,15 +458,15 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, * subnet structure, if, for example, there are BSD4.2 systems lurking. */ ace->ace_proto_mask = dst; - if (proto_mask) { + if (proto_mask != NULL) { bcopy(proto_mask, dst, proto_addr_len); dst += proto_addr_len; } else { - while (proto_addr_len--) + while (proto_addr_len-- > 0) *dst++ = (uchar_t)~0; } - if (proto_extract_mask) { + if (proto_extract_mask != NULL) { ace->ace_proto_extract_mask = dst; bcopy(proto_extract_mask, dst, ace->ace_proto_addr_length); dst += ace->ace_proto_addr_length; @@ -443,21 +476,22 @@ ar_ce_create(arl_t *arl, uint_t proto, uchar_t *hw_addr, uint_t hw_addr_len, ace->ace_hw_extract_start = hw_extract_start; ace->ace_hw_addr_length = hw_addr_len; ace->ace_hw_addr = dst; - if (hw_addr) { + if (hw_addr != NULL) { bcopy(hw_addr, dst, hw_addr_len); dst += hw_addr_len; } ace->ace_arl = arl; ace->ace_flags = flags; - ace->ace_publish_count = arp_publish_count; + if (ar_mask_all_ones(ace->ace_proto_mask, ace->ace_proto_addr_length)) { acep = ar_ce_hash(ace->ace_proto, ace->ace_proto_addr, ace->ace_proto_addr_length); - } else + } else { acep = &ar_ce_mask_entries; - if ((ace->ace_next = *acep) != 0) + } + if ((ace->ace_next = *acep) != NULL) ace->ace_next->ace_ptpn = &ace->ace_next; *acep = ace; ace->ace_ptpn = acep; @@ -488,9 +522,9 @@ ar_ce_delete(ace_t *ace) * that is going away. */ static void -ar_ce_delete_per_arl(ace_t *ace, arl_t *arl) +ar_ce_delete_per_arl(ace_t *ace, void *arl) { - if (ace != NULL && ace->ace_arl == arl) { + if (ace->ace_arl == arl) { ace->ace_flags &= ~ACE_F_PERMANENT; ar_ce_delete(ace); } @@ -498,9 +532,10 @@ ar_ce_delete_per_arl(ace_t *ace, arl_t *arl) /* Cache entry hash routine, based on protocol and protocol address. */ static ace_t ** -ar_ce_hash(uint32_t proto, uchar_t *proto_addr, uint32_t proto_addr_length) +ar_ce_hash(uint32_t proto, const uchar_t *proto_addr, + uint32_t proto_addr_length) { - uchar_t *up = proto_addr; + const uchar_t *up = proto_addr; unsigned int hval = proto; int len = proto_addr_length; @@ -647,194 +682,170 @@ ar_ce_lookup_permanent(uint32_t proto, uchar_t *proto_addr, } /* - * Pass a cache report back out via NDD. - * TODO: Right now this report assumes IP proto address formatting. - */ -/* ARGSUSED */ -static int -ar_ce_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *cr) -{ - (void) mi_mpprintf(mp, - "ifname proto addr proto mask hardware addr flags"); - /* abcdefgh xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx xx:xx:xx:xx:xx:xx */ - ar_ce_walk((pfi_t)ar_ce_report1, mp); - return (0); -} - -/* - * Add a single line to the ARP Cache Entry Report. - * TODO: Right now this report assumes IP proto address formatting. + * ar_ce_resolve is called when a response comes in to an outstanding request. + * Returns 'true' if the address has changed and we need to tell the client. + * (We don't need to tell the client if there's still an outstanding query.) */ -static void -ar_ce_report1(ace_t *ace, uchar_t *mp_arg) +static boolean_t +ar_ce_resolve(ace_t *ace, const uchar_t *hw_addr, uint32_t hw_addr_length) { - static uchar_t zero_array[8]; - uint32_t flags = ace->ace_flags; - mblk_t *mp = (mblk_t *)mp_arg; - uchar_t *p = ace->ace_proto_addr; - uchar_t *h = ace->ace_hw_addr; - uchar_t *m = ace->ace_proto_mask; - const char *name = "unknown"; - - if (ace->ace_arl != NULL) - name = ace->ace_arl->arl_name; - if (p == NULL) - p = zero_array; - if (h == NULL) - h = zero_array; - if (m == NULL) - m = zero_array; - (void) mi_mpprintf(mp, - "%8s %03d.%03d.%03d.%03d " - "%03d.%03d.%03d.%03d %02x:%02x:%02x:%02x:%02x:%02x", - name, - p[0] & 0xFF, p[1] & 0xFF, p[2] & 0xFF, p[3] & 0xFF, - m[0] & 0xFF, m[1] & 0xFF, m[2] & 0xFF, m[3] & 0xFF, - h[0] & 0xFF, h[1] & 0xFF, h[2] & 0xFF, h[3] & 0xFF, - h[4] & 0xFF, h[5] & 0xFF); - if (flags & ACE_F_PERMANENT) - (void) mi_mpprintf_nr(mp, " PERM"); - if (flags & ACE_F_PUBLISH) - (void) mi_mpprintf_nr(mp, " PUBLISH"); - if (flags & ACE_F_DYING) - (void) mi_mpprintf_nr(mp, " DYING"); - if (!(flags & ACE_F_RESOLVED)) - (void) mi_mpprintf_nr(mp, " UNRESOLVED"); - if (flags & ACE_F_MAPPING) - (void) mi_mpprintf_nr(mp, " MAPPING"); - if (flags & ACE_F_MYADDR) - (void) mi_mpprintf_nr(mp, " MYADDR"); -} + boolean_t hwchanged; -/* - * ar_ce_resolve is called when a response comes in to an outstanding - * request. - */ -static void -ar_ce_resolve(ace_t *ace, uchar_t *hw_addr, uint32_t hw_addr_length) -{ if (hw_addr_length == ace->ace_hw_addr_length) { - if (ace->ace_hw_addr) + ASSERT(ace->ace_hw_addr != NULL); + hwchanged = bcmp(hw_addr, ace->ace_hw_addr, + hw_addr_length) != 0; + if (hwchanged) bcopy(hw_addr, ace->ace_hw_addr, hw_addr_length); /* - * ar_query_reply() blows away soft entries. - * Do not call it unless something is waiting. + * No need to bother with ar_query_reply if no queries are + * waiting. */ ace->ace_flags |= ACE_F_RESOLVED; - if (ace->ace_query_mp) + if (ace->ace_query_mp != NULL) ar_query_reply(ace, 0, NULL, (uint32_t)0); + else if (hwchanged) + return (B_TRUE); } + return (B_FALSE); } /* * There are 2 functions performed by this function. * 1. Resolution of unresolved entries and update of resolved entries. - * 2. Detection of hosts with (duplicate) our own IP address + * 2. Detection of nodes with our own IP address (duplicates). + * + * This is complicated by ill groups. We don't currently have knowledge of ill + * groups, so we can't distinguish between a packet that comes in on one of the + * arls that's part of the group versus one that's on an unrelated arl. Thus, + * we take a conservative approach. If the arls match, then we update resolved + * and unresolved entries alike. If they don't match, then we update only + * unresolved entries. * - * Resolution of unresolved entries and update of resolved entries. + * For all entries, we first check to see if this is a duplicate (probable + * loopback) message. If so, then just ignore it. * - * case A. The packet has been received on the same interface as this ace's - * arl. We blindly call ar_ce_resolve(). The relevant checks for duplicate - * detection (ACE_F_MYADDR) and trying to update published entries have - * already happened in ar_rput(). Both resolved and unresolved entries are - * updated now. This allows a published entry to be updated by an arp - * request, from the node for which we are a proxy arp server, as for eg. - * when a mobile node returns home. + * Next, check to see if the entry has completed DAD. If not, then we've + * failed, because someone is already using the address. Notify IP of the DAD + * failure and remove the broken ace. * - * case B. The interface on which the packet arrived does not match the - * ace's arl. In this case we update only unresolved entries. - * Look whether we have an unresolved entry for src_paddr and if so - * resolve it. We need to look at all the aces that matches the - * src_haddr because with ill groups we could have unresolved ace - * across the whole group. As we don't have knowledge of groups, - * look across all of them. Note that this logic does not update published - * arp entries, as for eg. when we proxy arp across 2 subnets with - * differing subnet masks. + * Next, we check if we're the authority for this address. If so, then it's + * time to defend it, because the other node is a duplicate. Report it as a + * 'bogon' and let IP decide how to defend. * - * Detection of hosts with (duplicate) our own IP address. + * Finally, if it's unresolved or if the arls match, we just update the MAC + * address. This allows a published 'static' entry to be updated by an ARP + * request from the node for which we're a proxy ARP server -- e.g., when a + * mobile node returns home. If the address has changed, then tell IP. * - * case A is handled in ar_rput(). case B is handled here. We return AR_BOGON, - * if we detect duplicate, and caller will send BOGON message to IP. - * If hme0 and hme1 are in a IPMP group. hme1 will receive broadcast arp - * packets sent from hme0. Both IP address and Hardware address of the - * packet match the ace. So we return AR_LOOPBACK. + * Note that this logic does not update published ARP entries for mismatched + * arls, as for example when we proxy arp across 2 subnets with differing + * subnet masks. * * Return Values below */ -#define AR_NORMAL 1 /* Usual return value. */ -#define AR_LOOPBACK 2 /* Our own broadcast arp packet was received */ -#define AR_BOGON 3 /* Another host has our IP addr. */ +#define AR_NOTFOUND 1 /* No matching ace found in cache */ +#define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */ +#define AR_LOOPBACK 3 /* Our own arp packet was received */ +#define AR_BOGON 4 /* Another host has our IP addr. */ +#define AR_FAILED 5 /* Duplicate Address Detection has failed */ +#define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */ static int -ar_ce_resolve_all(arl_t *arl, uint32_t proto, uchar_t *src_haddr, - uint32_t hlen, uchar_t *src_paddr, uint32_t plen) +ar_ce_resolve_all(arl_t *arl, uint32_t proto, const uchar_t *src_haddr, + uint32_t hlen, const uchar_t *src_paddr, uint32_t plen) { ace_t *ace; ace_t *ace_next; + int i1; + const uchar_t *paddr; + uchar_t *ace_addr; + uchar_t *mask; + int retv = AR_NOTFOUND; ace = *ar_ce_hash(proto, src_paddr, plen); for (; ace != NULL; ace = ace_next) { + /* ar_ce_resolve may delete the ace; fetch next pointer now */ ace_next = ace->ace_next; - if (ace->ace_proto_addr_length == plen && - ace->ace_proto == proto) { - int i1 = plen; - uchar_t *ace_addr = ace->ace_proto_addr; - uchar_t *mask = ace->ace_proto_mask; + if (ace->ace_proto_addr_length != plen || + ace->ace_proto != proto) { + continue; + } - /* - * Note that the ace_proto_mask is applied to the - * proto_addr before comparing to the ace_addr. - */ - do { - if (--i1 < 0) { - /* - * Limit updating across other - * ills to unresolved entries only. - * We don't want to inadvertently - * update published entries or our - * own entries. - */ - if ((ace->ace_arl == arl) || - (!ACE_RESOLVED(ace))) { - ar_ce_resolve(ace, src_haddr, hlen); - } else { - /* - * If both IP addr and hardware - * address match our's then this - * is a broadcast packet emitted by - * one of our interfaces, reflected - * by the switch, and received on - * another interface. We return - * AR_LOOPBACK. If only IP addr. - * matches our's then some other node - * is using our IP addr, return - * AR_BOGON. - */ - if (ace->ace_flags & ACE_F_MYADDR) { - if (bcmp(ace->ace_hw_addr, - src_haddr, - ace->ace_hw_addr_length) != 0) { - return (AR_BOGON); - } else { - return (AR_LOOPBACK); - } - - } - } + /* + * Note that the ace_proto_mask is applied to the proto_addr + * before comparing to the ace_addr. + */ + paddr = src_paddr; + i1 = plen; + ace_addr = ace->ace_proto_addr; + mask = ace->ace_proto_mask; + while (--i1 >= 0) { + if ((*paddr++ & *mask++) != *ace_addr++) break; - } - } while ((src_paddr[i1] & mask[i1]) == ace_addr[i1]); + } + if (i1 >= 0) + continue; + + /* + * If both IP addr and hardware address match what we already + * have, then this is a broadcast packet emitted by one of our + * interfaces, reflected by the switch and received on another + * interface. We return AR_LOOPBACK. + */ + if ((ace->ace_flags & ACE_F_MYADDR) && + hlen == ace->ace_hw_addr_length && + bcmp(ace->ace_hw_addr, src_haddr, + ace->ace_hw_addr_length) == 0) { + return (AR_LOOPBACK); + } + + /* + * If the entry is unverified, then we've just verified that + * someone else already owns this address, because this is a + * message with the same protocol address but different + * hardware address. + */ + if (ace->ace_flags & ACE_F_UNVERIFIED) { + ar_ce_delete(ace); + return (AR_FAILED); + } + + /* + * If the IP address matches ours and we're authoritative for + * this entry, then some other node is using our IP addr, so + * return AR_BOGON. Also reset the transmit count to zero so + * that, if we're currently in initial announcement mode, we + * switch back to the lazier defense mode. Knowing that + * there's at least one duplicate out there, we ought not + * blindly announce. + */ + if (ace->ace_flags & ACE_F_AUTHORITY) { + ace->ace_xmit_count = 0; + return (AR_BOGON); + } + + /* + * Limit updating across other ills to unresolved + * entries only. We don't want to inadvertently update + * published entries. + */ + if (ace->ace_arl == arl || !ACE_RESOLVED(ace)) { + if (ar_ce_resolve(ace, src_haddr, hlen)) + retv = AR_CHANGED; + else if (retv == AR_NOTFOUND) + retv = AR_MERGED; } } - return (AR_NORMAL); + return (retv); } /* Pass arg1 to the pfi supplied, along with each ace in existence. */ static void -ar_ce_walk(pfi_t pfi, void *arg1) +ar_ce_walk(void (*pfi)(ace_t *, void *), void *arg1) { ace_t *ace; ace_t *ace1; @@ -870,7 +881,7 @@ ar_cleanup(void) * DEV (i.e. ARL). */ static void -ar_client_notify(arl_t *arl, mblk_t *mp, int code) +ar_client_notify(const arl_t *arl, mblk_t *mp, int code) { ar_t *ar = ((ar_t *)arl->arl_rq->q_ptr)->ar_arl_ip_assoc; arcn_t *arcn; @@ -904,6 +915,39 @@ ar_client_notify(arl_t *arl, mblk_t *mp, int code) putnext(ar->ar_wq, mp1); } +/* + * Send a delete-notify message down to IP. We've determined that IP doesn't + * have a cache entry for the IP address itself, but it may have other cache + * entries with the same hardware address, and we don't want to see those grow + * stale. (The alternative is sending down updates for every ARP message we + * get that doesn't match an existing ace. That's much more expensive than an + * occasional delete and reload.) + */ +static void +ar_delete_notify(const ace_t *ace) +{ + const arl_t *arl = ace->ace_arl; + mblk_t *mp; + size_t len; + arh_t *arh; + + len = sizeof (*arh) + 2 * ace->ace_proto_addr_length; + mp = allocb(len, BPRI_MED); + if (mp == NULL) + return; + arh = (arh_t *)mp->b_rptr; + mp->b_wptr = (uchar_t *)arh + len; + U16_TO_BE16(arl->arl_arp_hw_type, arh->arh_hardware); + U16_TO_BE16(ace->ace_proto, arh->arh_proto); + arh->arh_hlen = 0; + arh->arh_plen = ace->ace_proto_addr_length; + U16_TO_BE16(ARP_RESPONSE, arh->arh_operation); + bcopy(ace->ace_proto_addr, arh + 1, ace->ace_proto_addr_length); + bcopy(ace->ace_proto_addr, (uchar_t *)(arh + 1) + + ace->ace_proto_addr_length, ace->ace_proto_addr_length); + ar_client_notify(arl, mp, AR_CN_ANNOUNCE); +} + /* ARP module close routine. */ static int ar_close(queue_t *q) @@ -926,7 +970,7 @@ ar_close(queue_t *q) * an ack. This helps to make sure that messages * that are currently being sent up by IP are not lost. */ - if (MODULE_BELOW_IS_IP(q)) { + if (ar->ar_on_ill_stream) { mp1 = allocb(sizeof (arc_t), BPRI_MED); if (mp1 != NULL) { DB_TYPE(mp1) = M_CTL; @@ -963,7 +1007,7 @@ ar_close(queue_t *q) * If this is the control stream for an arl, delete anything * hanging off our arl. */ - ar_ce_walk((pfi_t)ar_ce_delete_per_arl, arl); + ar_ce_walk(ar_ce_delete_per_arl, arl); /* Free any messages waiting for a bind_ack */ /* Get the arl out of the chain. */ for (arlp = &arl_g_head; arlp[0]; arlp = &arlp[0]->arl_next) { @@ -984,21 +1028,6 @@ ar_close(queue_t *q) ar->ar_arl_ip_assoc->ar_arl_ip_assoc = NULL; ar->ar_arl_ip_assoc = NULL; } - if (WR(q) == ar_timer_queue) { - /* We were using this one for the garbage collection timer. */ - for (arl = arl_g_head; arl; arl = arl->arl_next) - if (arl->arl_rq != q) - break; - if (arl) { - ar_timer_queue = arl->arl_wq; - /* Ask mi_timer to switch to the new queue. */ - mi_timer(ar_timer_queue, ar_timer_mp, -2); - } else { - mi_timer_free(ar_timer_mp); - ar_timer_mp = NULL; - ar_timer_queue = NULL; - } - } cr = ar->ar_credp; /* mi_close_comm frees the instance data. */ (void) mi_close_comm(&ar_g_head, q); @@ -1067,7 +1096,8 @@ ar_cmd_dispatch(queue_t *q, mblk_t *mp_orig) if (arct->arct_flags & ARF_IOCTL_AWARE) mp = mp_orig; - arp2dbg(("ar_cmd_dispatch: %s\n", arct->arct_txt)); + DTRACE_PROBE3(cmd_dispatch, queue_t *, q, mblk_t *, mp, + arct_t *, arct); return (*arct->arct_pfi)(q, mp); } @@ -1104,31 +1134,25 @@ ar_dlpi_comm(t_uscalar_t prim, size_t size) static void ar_dlpi_send(arl_t *arl, mblk_t *mp) { - mblk_t **mpp; - union DL_primitives *dlp; - ASSERT(arl != NULL); - ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); - dlp = (union DL_primitives *)mp->b_rptr; if (arl->arl_dlpi_pending != DL_PRIM_INVAL) { + mblk_t **mpp; + /* Must queue message. Tail insertion */ mpp = &arl->arl_dlpi_deferred; while (*mpp != NULL) mpp = &((*mpp)->b_next); - - arp1dbg(("ar_dlpi_send: deferring DLPI message arl %p %x\n", - (void *)arl, dlp->dl_primitive)); - *mpp = mp; + + DTRACE_PROBE2(dlpi_defer, arl_t *, arl, mblk_t *, mp); return; } - arp1dbg(("ar_dlpi_send: sending DLPI message arl %p %x\n", (void *)arl, - dlp->dl_primitive)); - - arl->arl_dlpi_pending = dlp->dl_primitive; + arl->arl_dlpi_pending = + ((union DL_primitives *)mp->b_rptr)->dl_primitive; + DTRACE_PROBE2(dlpi_send, arl_t *, arl, mblk_t *, mp); putnext(arl->arl_wq, mp); } @@ -1141,16 +1165,16 @@ ar_dlpi_send(arl_t *arl, mblk_t *mp) static void ar_dlpi_done(arl_t *arl, t_uscalar_t prim) { - mblk_t *mp; - union DL_primitives *dlp; + mblk_t *mp; if (arl->arl_dlpi_pending != prim) { - arp0dbg(("ar_dlpi_done: spurious response arl %p\n", - (void *)arl)); + DTRACE_PROBE2(dlpi_done_unexpected, arl_t *, arl, + t_uscalar_t, prim); return; } if ((mp = arl->arl_dlpi_deferred) == NULL) { + DTRACE_PROBE2(dlpi_done_idle, arl_t *, arl, t_uscalar_t, prim); arl->arl_dlpi_pending = DL_PRIM_INVAL; ar_cmd_done(arl); return; @@ -1160,12 +1184,10 @@ ar_dlpi_done(arl_t *arl, t_uscalar_t prim) mp->b_next = NULL; ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); - dlp = (union DL_primitives *)mp->b_rptr; - arp1dbg(("ar_dlpi_done: sending DLPI message arl %p %x\n", - (void *)arl, dlp->dl_primitive)); - - arl->arl_dlpi_pending = dlp->dl_primitive; + arl->arl_dlpi_pending = + ((union DL_primitives *)mp->b_rptr)->dl_primitive; + DTRACE_PROBE2(dlpi_done_next, arl_t *, arl, mblk_t *, mp); putnext(arl->arl_wq, mp); } @@ -1268,8 +1290,8 @@ ar_cmd_done(arl_t *arl) done: if (dlpi_op_done_mp != NULL) { - arp1dbg(("ar_dlpi_done: ardlpiopdone arl %p to q %p err %d\n", - (void *)arl, (void *)dlpi_op_done_q, err)); + DTRACE_PROBE3(cmd_done_next, arl_t *, arl, + queue_t *, dlpi_op_done_q, mblk_t *, dlpi_op_done_mp); putnext(dlpi_op_done_q, dlpi_op_done_mp); } } @@ -1295,9 +1317,6 @@ static void ar_cmd_enqueue(arl_t *arl, mblk_t *mp, queue_t *q, ushort_t cmd, boolean_t tail_insert) { - arp1dbg(("ar_cmd_enqueue: arl %p from q %p cmd %d \n", (void *)arl, - (void *)q, cmd)); - mp->b_queue = q; if (arl->arl_queue == NULL) { ASSERT(arl->arl_queue_tail == NULL); @@ -1336,6 +1355,38 @@ ar_cmd_dequeue(arl_t *arl) } /* + * Standard ACE timer handling: compute 'fuzz' around a central value or from 0 + * up to a value, and then set the timer. The randomization is necessary to + * prevent groups of systems from falling into synchronization on the network + * and producing ARP packet storms. + */ +static void +ace_set_timer(ace_t *ace, boolean_t initial_time) +{ + clock_t intv, rnd, frac; + + (void) random_get_pseudo_bytes((uint8_t *)&rnd, sizeof (rnd)); + /* Note that clock_t is signed; must chop off bits */ + rnd &= (1ul << (NBBY * sizeof (rnd) - 1)) - 1; + intv = ace->ace_xmit_interval; + if (initial_time) { + /* Set intv to be anywhere in the [1 .. intv] range */ + if (intv <= 0) + intv = 1; + else + intv = (rnd % intv) + 1; + } else { + /* Compute 'frac' as 20% of the configured interval */ + if ((frac = intv / 5) <= 1) + frac = 2; + /* Set intv randomly in the range [intv-frac .. intv+frac] */ + if ((intv = intv - frac + rnd % (2 * frac + 1)) <= 0) + intv = 1; + } + mi_timer(ace->ace_arl->arl_wq, ace->ace_mp, intv); +} + +/* * Process entry add requests from external messages. * It is also called by ip_rput_dlpi_writer() through * ipif_resolver_up() to change hardware address when @@ -1355,6 +1406,8 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) arl_t *arl; mblk_t *mp = mp_orig; int err; + uint_t aflags; + boolean_t unverified; /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) @@ -1366,16 +1419,32 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_entry_add: enqueue cmd on q %p \n", (void *)q)); + DTRACE_PROBE3(eadd_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_ADD, B_TRUE); return (EINPROGRESS); } mp_orig->b_prev = NULL; area = (area_t *)mp->b_rptr; - /* If this is a replacement, ditch the original. */ - if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_entry)) != 0) + aflags = area->area_flags; + + /* + * If this is a replacement, ditch the original, but remember the + * duplicate address detection state. If it's a new entry, then we're + * obligated to do duplicate address detection now. + */ + if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_entry)) != NULL) { + unverified = (ace->ace_flags & ACE_F_UNVERIFIED) != 0; ar_ce_delete(ace); + } else { + unverified = (aflags & ACE_F_PUBLISH) != 0; + } + + /* Allow client to request DAD restart */ + if (aflags & ACE_F_UNVERIFIED) + unverified = B_TRUE; + /* Extract parameters from the message. */ hw_addr_len = area->area_hw_addr_length; hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len); @@ -1384,29 +1453,31 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) proto_addr_len); proto_mask = mi_offset_paramc(mp, area->area_proto_mask_offset, proto_addr_len); - if (!proto_mask) + if (proto_mask == NULL) { + DTRACE_PROBE2(eadd_bad_mask, arl_t *, arl, area_t *, area); return (EINVAL); + } err = ar_ce_create( arl, - area->area_proto, - hw_addr, - hw_addr_len, - proto_addr, - proto_addr_len, - proto_mask, - NULL, - (uint32_t)0, - area->area_flags & ~ACE_F_MAPPING); - if (err) + area->area_proto, + hw_addr, + hw_addr_len, + proto_addr, + proto_addr_len, + proto_mask, + NULL, + (uint32_t)0, + aflags & ~ACE_F_MAPPING & ~ACE_F_UNVERIFIED & ~ACE_F_DEFEND); + if (err != 0) { + DTRACE_PROBE3(eadd_create_failed, arl_t *, arl, area_t *, area, + int, err); return (err); - if (area->area_flags & ACE_F_PUBLISH) { - /* - * Transmit an arp request for this address to flush stale - * information froma arp caches. - */ + } + + if (aflags & ACE_F_PUBLISH) { if (hw_addr == NULL || hw_addr_len == 0) { hw_addr = arl->arl_hw_addr; - } else if (area->area_flags & ACE_F_MYADDR) { + } else if (aflags & ACE_F_MYADDR) { /* * If hardware address changes, then make sure * that the hardware address and hardware @@ -1422,23 +1493,79 @@ ar_entry_add(queue_t *q, mblk_t *mp_orig) ace = ar_ce_lookup(arl, area->area_proto, proto_addr, proto_addr_len); ASSERT(ace != NULL); - ar_xmit(arl, ARP_REQUEST, area->area_proto, proto_addr_len, - hw_addr, proto_addr, arl->arl_arp_addr, - proto_addr); + + if (ace->ace_flags & ACE_F_FAST) { + ace->ace_xmit_count = arp_fastprobe_count; + ace->ace_xmit_interval = arp_fastprobe_delay; + } else { + ace->ace_xmit_count = arp_probe_count; + ace->ace_xmit_interval = arp_probe_delay; + } + + /* + * If the user has disabled duplicate address detection for + * this kind of interface (fast or slow) by setting the probe + * count to zero, then pretend as if we've verified the + * address, and go right to address defense mode. + */ + if (ace->ace_xmit_count == 0) + unverified = B_FALSE; /* - * If MYADDR is set - it is not a proxy arp entry. In that - * case we send more than one copy, so that if this is - * a case of failover, we send out multiple entries in case - * the switch is very slow. + * If we need to do duplicate address detection, then kick that + * off. Otherwise, send out a gratuitous ARP message in order + * to update everyone's caches with the new hardware address. */ - if ((area->area_flags & ACE_F_MYADDR) && - ace->ace_publish_count != 0 && arp_publish_interval != 0) { - /* Account for the xmit we just did */ - ace->ace_publish_count--; - if (ace->ace_publish_count != 0) { - mi_timer(arl->arl_wq, ace->ace_mp, - arp_publish_interval); + if (unverified) { + ace->ace_flags |= ACE_F_UNVERIFIED; + if (ace->ace_xmit_interval == 0) { + /* + * User has configured us to send the first + * probe right away. Do so, and set up for + * the subsequent probes. + */ + DTRACE_PROBE2(eadd_probe, ace_t *, ace, + area_t *, area); + ar_xmit(arl, ARP_REQUEST, area->area_proto, + proto_addr_len, hw_addr, NULL, NULL, + proto_addr, NULL); + ace->ace_xmit_count--; + ace->ace_xmit_interval = + (ace->ace_flags & ACE_F_FAST) ? + arp_fastprobe_interval : + arp_probe_interval; + ace_set_timer(ace, B_FALSE); + } else { + DTRACE_PROBE2(eadd_delay, ace_t *, ace, + area_t *, area); + /* Regular delay before initial probe */ + ace_set_timer(ace, B_TRUE); + } + } else { + DTRACE_PROBE2(eadd_announce, ace_t *, ace, + area_t *, area); + ar_xmit(arl, ARP_REQUEST, area->area_proto, + proto_addr_len, hw_addr, proto_addr, + arl->arl_arp_addr, proto_addr, NULL); + ace->ace_last_bcast = ddi_get_lbolt(); + + /* + * If AUTHORITY is set, it is not just a proxy arp + * entry; we believe we're the authority for this + * entry. In that case, and if we're not just doing + * one-off defense of the address, we send more than + * one copy, so that if this is an IPMP failover, we'll + * still have a good chance of updating everyone even + * when there's a packet loss or two. + */ + if ((aflags & ACE_F_AUTHORITY) && + !(aflags & ACE_F_DEFEND) && + arp_publish_count > 0) { + /* Account for the xmit we just did */ + ace->ace_xmit_count = arp_publish_count - 1; + ace->ace_xmit_interval = arp_publish_interval; + if (ace->ace_xmit_count > 0) + ace_set_timer(ace, B_FALSE); } } } @@ -1463,7 +1590,8 @@ ar_entry_delete(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_entry_delete: enqueue on q %p\n", (void *)q)); + DTRACE_PROBE3(edel_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_DELETE, B_TRUE); return (EINPROGRESS); } @@ -1474,7 +1602,13 @@ ar_entry_delete(queue_t *q, mblk_t *mp_orig) * match first. */ ace = ar_ce_lookup_from_area(mp, ar_ce_lookup); - if (ace) { + if (ace != NULL) { + /* + * If it's a permanent entry, then the client is the one who + * told us to delete it, so there's no reason to notify. + */ + if (ACE_NONPERM(ace)) + ar_delete_notify(ace); ar_ce_delete(ace); return (0); } @@ -1511,6 +1645,7 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) } arl = ar_ll_lookup_from_mp(mp); if (arl == NULL) { + DTRACE_PROBE2(query_no_arl, queue_t *, q, mblk_t *, mp); err = EINVAL; goto err_ret; } @@ -1518,7 +1653,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_entry_query: enqueue on q %p\n", (void *)q)); + DTRACE_PROBE3(query_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_QUERY, B_TRUE); return (EINPROGRESS); } @@ -1528,7 +1664,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) proto_addr_len = areq->areq_target_addr_length; proto_addr = mi_offset_paramc(mp, areq->areq_target_addr_offset, proto_addr_len); - if (proto_addr == 0) { + if (proto_addr == NULL) { + DTRACE_PROBE1(query_illegal_address, areq_t *, areq); err = EINVAL; goto err_ret; } @@ -1538,9 +1675,22 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) if (areq->areq_xmit_interval == 0) areq->areq_xmit_interval = AR_DEF_XMIT_INTERVAL; ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr, proto_addr_len); - if (ace) { + if (ace != NULL && (ace->ace_flags & ACE_F_OLD)) { + /* + * This is a potentially stale entry that IP's asking about. + * Since IP is asking, it must not have an answer anymore, + * either due to periodic ARP flush or due to SO_DONTROUTE. + * Rather than go forward with what we've got, restart + * resolution. + */ + DTRACE_PROBE2(query_stale_ace, ace_t *, ace, areq_t *, areq); + ar_ce_delete(ace); + ace = NULL; + } + if (ace != NULL) { mblk_t **mpp; uint32_t count = 0; + /* * There is already a cache entry. This means there is either * a permanent entry, or address resolution is in progress. @@ -1550,6 +1700,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) */ for (mpp = &ace->ace_query_mp; mpp[0]; mpp = &mpp[0]->b_next) { if (++count > areq->areq_max_buffered) { + DTRACE_PROBE2(query_overflow, ace_t *, ace, + areq_t *, areq); mp->b_prev = NULL; err = EALREADY; goto err_ret; @@ -1562,6 +1714,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) * If a query was already queued up, then we must not * have an answer yet. */ + DTRACE_PROBE2(query_in_progress, ace_t *, ace, + areq_t *, areq); return (EINPROGRESS); } if (ACE_RESOLVED(ace)) { @@ -1572,6 +1726,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) */ mblk_t *mp1; + DTRACE_PROBE2(query_resolved, ace_t *, ace, + areq_t *, areq); mp1 = dupmsg(mp); ar_query_reply(ace, 0, proto_addr, proto_addr_len); freemsg(mp1); @@ -1579,22 +1735,28 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) } if (ace->ace_flags & ACE_F_MAPPING) { /* Should never happen */ - arp0dbg(("ar_entry_query: unresolved mapping\n")); + DTRACE_PROBE2(query_unresolved_mapping, ace_t *, ace, + areq_t *, areq); mpp[0] = mp->b_next; err = ENXIO; goto err_ret; } if (arl->arl_xmit_template == NULL) { /* Can't get help if we don't know how. */ + DTRACE_PROBE2(query_no_template, ace_t *, ace, + areq_t *, areq); mpp[0] = NULL; mp->b_prev = NULL; err = ENXIO; goto err_ret; } + DTRACE_PROBE2(query_unresolved, ace_t, ace, areq_t *, areq); } else { /* No ace yet. Make one now. (This is the common case.) */ if (areq->areq_xmit_count == 0 || arl->arl_xmit_template == NULL) { + DTRACE_PROBE2(query_template, arl_t *, arl, + areq_t *, areq); mp->b_prev = NULL; err = ENXIO; goto err_ret; @@ -1607,6 +1769,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) areq->areq_sender_addr_offset, areq->areq_sender_addr_length); if (sender_addr == NULL) { + DTRACE_PROBE2(query_no_sender, arl_t *, arl, + areq_t *, areq); mp->b_prev = NULL; err = EINVAL; goto err_ret; @@ -1615,14 +1779,18 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) proto_addr, proto_addr_len, NULL, NULL, (uint32_t)0, areq->areq_flags); - if (err) { + if (err != 0) { + DTRACE_PROBE3(query_create_failed, arl_t *, arl, + areq_t *, areq, int, err); mp->b_prev = NULL; goto err_ret; } ace = ar_ce_lookup(arl, areq->areq_proto, proto_addr, proto_addr_len); - if (!ace || ace->ace_query_mp) { + if (ace == NULL || ace->ace_query_mp != NULL) { /* Shouldn't happen! */ + DTRACE_PROBE3(query_lookup_failed, arl_t *, arl, + areq_t *, areq, ace_t *, ace); mp->b_prev = NULL; err = ENXIO; goto err_ret; @@ -1637,10 +1805,8 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) src_ace = ar_ce_lookup_permanent(areq->areq_proto, sender_addr, areq->areq_sender_addr_length); if (src_ace == NULL) { - printf("ar_entry_query: Could not find the ace for " - "source address %d.%d.%d.%d\n", - sender_addr[0], sender_addr[1], sender_addr[2], - sender_addr[3]); + DTRACE_PROBE3(query_source_missing, arl_t *, arl, + areq_t *, areq, ace_t *, ace); ar_query_reply(ace, ENXIO, NULL, (uint32_t)0); /* * ar_query_reply has already freed the mp. @@ -1659,7 +1825,9 @@ ar_entry_query(queue_t *q, mblk_t *mp_orig) areq->areq_proto, proto_addr, proto_addr_len); if (dst_ace != NULL && ACE_RESOLVED(dst_ace)) { - ar_ce_resolve(ace, dst_ace->ace_hw_addr, + DTRACE_PROBE3(query_other_arl, arl_t *, arl, + areq_t *, areq, ace_t *, dst_ace); + (void) ar_ce_resolve(ace, dst_ace->ace_hw_addr, dst_ace->ace_hw_addr_length); return (EINPROGRESS); } @@ -1701,7 +1869,8 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_entry_squery: enqueue on q %p\n", (void *)q)); + DTRACE_PROBE3(squery_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_ENTRY_SQUERY, B_TRUE); return (EINPROGRESS); } @@ -1714,13 +1883,17 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig) proto_addr_len); hw_addr_len = area->area_hw_addr_length; hw_addr = mi_offset_paramc(mp, area->area_hw_addr_offset, hw_addr_len); - if (!proto_addr || !hw_addr) + if (proto_addr == NULL || hw_addr == NULL) { + DTRACE_PROBE1(squery_illegal_address, area_t *, area); return (EINVAL); + } ace = ar_ce_lookup(arl, area->area_proto, proto_addr, proto_addr_len); - if (!ace) + if (ace == NULL) { return (ENXIO); - if (hw_addr_len < ace->ace_hw_addr_length) + } + if (hw_addr_len < ace->ace_hw_addr_length) { return (EINVAL); + } if (ACE_RESOLVED(ace)) { /* Got it, prepare the response. */ ASSERT(area->area_hw_addr_length == ace->ace_hw_addr_length); @@ -1736,8 +1909,9 @@ ar_entry_squery(queue_t *q, mblk_t *mp_orig) if (mp == mp_orig) { /* Non-ioctl case */ /* TODO: change message type? */ - arp1dbg(("ar_entry_squery: qreply\n")); DB_TYPE(mp) = M_CTL; /* Caught by ip_wput */ + DTRACE_PROBE3(squery_reply, queue_t *, q, mblk_t *, mp, + arl_t *, arl); qreply(q, mp); return (EINPROGRESS); } @@ -1751,10 +1925,9 @@ ar_interface_down(queue_t *q, mblk_t *mp) { arl_t *arl; - arp1dbg(("ar_interface_down q %p\n", (void *)q)); arl = ar_ll_lookup_from_mp(mp); - if ((arl == NULL) || (arl->arl_closing)) { - arp1dbg(("ar_interface_down: no arl q %p \n", (void *)q)); + if (arl == NULL || arl->arl_closing) { + DTRACE_PROBE2(down_no_arl, queue_t *, q, mblk_t *, mp); return (EINVAL); } @@ -1762,6 +1935,8 @@ ar_interface_down(queue_t *q, mblk_t *mp) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp, arl)) { + DTRACE_PROBE3(down_enqueued, queue_t *, q, mblk_t *, mp, + arl_t *, arl); ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_DOWN, B_TRUE); return (EINPROGRESS); } @@ -1784,7 +1959,7 @@ ar_interface_down(queue_t *q, mblk_t *mp) ASSERT(arl->arl_state == ARL_S_UP); /* Free all arp entries for this interface */ - ar_ce_walk((pfi_t)ar_ce_delete_per_arl, arl); + ar_ce_walk(ar_ce_delete_per_arl, arl); ar_ll_down(arl); /* Return EINPROGRESS so that ar_rput does not free the 'mp' */ @@ -1801,10 +1976,9 @@ ar_interface_up(queue_t *q, mblk_t *mp) int err; mblk_t *mp1; - arp1dbg(("ar_interface_up q %p\n", (void *)q)); arl = ar_ll_lookup_from_mp(mp); - if ((arl == NULL) || (arl->arl_closing)) { - arp1dbg(("ar_interface_up: no arl %p\n", (void *)q)); + if (arl == NULL || arl->arl_closing) { + DTRACE_PROBE2(up_no_arl, queue_t *, q, mblk_t *, mp); err = EINVAL; goto done; } @@ -1813,6 +1987,8 @@ ar_interface_up(queue_t *q, mblk_t *mp) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp, arl)) { + DTRACE_PROBE3(up_enqueued, queue_t *, q, mblk_t *, mp, + arl_t *, arl); ar_cmd_enqueue(arl, mp, q, AR_INTERFACE_UP, B_TRUE); return (EINPROGRESS); } @@ -1843,9 +2019,10 @@ done: mp1 = ar_alloc(AR_DLPIOP_DONE, err); if (mp1 != NULL) { - arp1dbg(("ar_interface_up: send resp err %d q %p\n", - err, (void *)q)); - putnext(WR(q), mp1); + q = WR(q); + DTRACE_PROBE3(up_send_err, queue_t *, q, mblk_t *, mp1, + int, err); + putnext(q, mp1); } return (err); } @@ -1860,13 +2037,13 @@ ar_interface_on(queue_t *q, mblk_t *mp) { arl_t *arl; - arp1dbg(("ar_interface_on\n")); arl = ar_ll_lookup_from_mp(mp); if (arl == NULL) { - arp1dbg(("ar_interface_on: no arl\n")); + DTRACE_PROBE2(on_no_arl, queue_t *, q, mblk_t *, mp); return (EINVAL); } /* Turn off the IFF_NOARP flag and activate ARP */ + DTRACE_PROBE3(on_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl); arl->arl_flags = 0; return (0); } @@ -1881,13 +2058,13 @@ ar_interface_off(queue_t *q, mblk_t *mp) { arl_t *arl; - arp1dbg(("ar_interface_off\n")); arl = ar_ll_lookup_from_mp(mp); if (arl == NULL) { - arp1dbg(("ar_interface_off: no arl\n")); + DTRACE_PROBE2(off_no_arl, queue_t *, q, mblk_t *, mp); return (EINVAL); } /* Turn on the IFF_NOARP flag and deactivate ARP */ + DTRACE_PROBE3(off_intf, queue_t *, q, mblk_t *, mp, arl_t *, arl); arl->arl_flags = ARL_F_NOARP; return (0); } @@ -1978,6 +2155,7 @@ ar_ll_init(ar_t *ar, mblk_t *mp) arl->arl_wq = ar->ar_wq; arl->arl_dlpi_pending = DL_PRIM_INVAL; + arl->arl_link_up = B_TRUE; ar->ar_arl = arl; } @@ -2127,8 +2305,6 @@ ar_ll_down(arl_t *arl) mblk_t *mp; ar_t *ar; - arp1dbg(("ar_ll_down arl %p\n", (void *)arl)); - ASSERT(arl->arl_state == ARL_S_UP); /* Let's break the association between an ARL and IP instance */ @@ -2163,8 +2339,7 @@ ar_ll_up(arl_t *arl) mblk_t *detach_mp = NULL; mblk_t *unbind_mp = NULL; mblk_t *info_mp = NULL; - - arp1dbg(("ar_ll_up arl %p \n", (void *)arl)); + mblk_t *notify_mp = NULL; ASSERT(arl->arl_state == ARL_S_DOWN); @@ -2197,6 +2372,12 @@ ar_ll_up(arl_t *arl) if (unbind_mp == NULL) goto bad; + notify_mp = ar_dlpi_comm(DL_NOTIFY_REQ, sizeof (dl_notify_req_t)); + if (notify_mp == NULL) + goto bad; + ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications = + DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN; + arl->arl_state = ARL_S_PENDING; if (arl->arl_provider_style == DL_STYLE2) { ar_dlpi_send(arl, attach_mp); @@ -2206,18 +2387,16 @@ ar_ll_up(arl_t *arl) ar_dlpi_send(arl, info_mp); ar_dlpi_send(arl, bind_mp); arl->arl_unbind_mp = unbind_mp; + ar_dlpi_send(arl, notify_mp); return (0); + bad: - if (attach_mp != NULL) - freemsg(attach_mp); - if (bind_mp != NULL) - freemsg(bind_mp); - if (detach_mp != NULL) - freemsg(detach_mp); - if (unbind_mp != NULL) - freemsg(unbind_mp); - if (info_mp != NULL) - freemsg(info_mp); + freemsg(attach_mp); + freemsg(bind_mp); + freemsg(detach_mp); + freemsg(unbind_mp); + freemsg(info_mp); + freemsg(notify_mp); return (ENOMEM); } @@ -2237,7 +2416,6 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig) uint32_t hw_extract_start; arl_t *arl; - arp1dbg(("ar_mapping_add\n")); /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; @@ -2248,14 +2426,15 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig) * Newly received commands from clients go to the tail of the queue. */ if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_mapping_add: enqueue on %p q\n", (void *)q)); + DTRACE_PROBE3(madd_enqueued, queue_t *, q, mblk_t *, mp_orig, + arl_t *, arl); ar_cmd_enqueue(arl, mp_orig, q, AR_MAPPING_ADD, B_TRUE); return (EINPROGRESS); } mp_orig->b_prev = NULL; arma = (arma_t *)mp->b_rptr; - if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_mapping)) != 0) + if ((ace = ar_ce_lookup_from_area(mp, ar_ce_lookup_mapping)) != NULL) ar_ce_delete(ace); hw_addr_len = arma->arma_hw_addr_length; hw_addr = mi_offset_paramc(mp, arma->arma_hw_addr_offset, hw_addr_len); @@ -2267,8 +2446,8 @@ ar_mapping_add(queue_t *q, mblk_t *mp_orig) proto_extract_mask = mi_offset_paramc(mp, arma->arma_proto_extract_mask_offset, proto_addr_len); hw_extract_start = arma->arma_hw_mapping_start; - if (!proto_mask || !proto_extract_mask) { - arp0dbg(("ar_mapping_add: not masks\n")); + if (proto_mask == NULL || proto_extract_mask == NULL) { + DTRACE_PROBE2(madd_illegal_mask, arl_t *, arl, arpa_t *, arma); return (EINVAL); } return (ar_ce_create( @@ -2327,6 +2506,7 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) ar_t *ar; int err; queue_t *tmp_q; + mblk_t *mp; TRACE_1(TR_FAC_ARP, TR_ARP_OPEN, "arp_open: q %p", q); @@ -2335,10 +2515,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) return (0); } /* Load up the Named Dispatch tables, if not already done. */ - if (!ar_g_nd && - (!nd_load(&ar_g_nd, "arp_cache_report", ar_ce_report, NULL, - NULL) || - !ar_param_register(arp_param_arr, A_CNT(arp_param_arr)))) { + if (ar_g_nd == NULL && + !ar_param_register(arp_param_arr, A_CNT(arp_param_arr))) { ar_cleanup(); return (ENOMEM); } @@ -2362,8 +2540,6 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) crhold(credp); ar->ar_credp = credp; - if (!ar_timer_mp) - ar_timer_init(q); /* * Probe for the DLPI info if we are not pushed on IP. Wait for * the reply. In case of error call ar_close() which will take @@ -2371,6 +2547,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * as freeing the arl, restarting the timer on a different queue etc. */ if (strcmp(q->q_next->q_qinfo->qi_minfo->mi_idname, "ip") == 0) { + arc_t *arc; + /* * We are pushed directly on top of IP. There is no need to * send down a DL_INFO_REQ. Return success. This could @@ -2378,7 +2556,25 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * or a stream corresponding to an open of /dev/arp * (i.e. <arp-IP> stream). Note that we don't support * pushing some module in between arp and IP. + * + * Tell IP, though, that we're an extended implementation, so + * it knows to expect a DAD response after bringing an + * interface up. Old ATM drivers won't do this, and IP will + * just bring the interface up immediately. */ + ar->ar_on_ill_stream = (q->q_next->q_next != NULL); + if (!ar->ar_on_ill_stream) + return (0); + mp = allocb(sizeof (arc_t), BPRI_MED); + if (mp == NULL) { + (void) ar_close(RD(q)); + return (ENOMEM); + } + DB_TYPE(mp) = M_CTL; + arc = (arc_t *)mp->b_rptr; + mp->b_wptr = mp->b_rptr + sizeof (arc_t); + arc->arc_cmd = AR_ARP_EXTEND; + putnext(q, mp); return (0); } tmp_q = q; @@ -2390,8 +2586,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) if (strcmp(tmp_q->q_qinfo->qi_minfo->mi_idname, "ip") == 0) { /* - * We don't support pushing ARP arbitrarily on an - * IP driver stream. ARP has to be pushed directly above IP + * We don't support pushing ARP arbitrarily on an IP driver + * stream. ARP has to be pushed directly above IP. */ (void) ar_close(RD(q)); return (ENOTSUP); @@ -2400,8 +2596,8 @@ ar_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) * Send down a DL_INFO_REQ so we can find out what we are * talking to. */ - mblk_t *mp = ar_dlpi_comm(DL_INFO_REQ, sizeof (dl_info_req_t)); - if (!mp) { + mp = ar_dlpi_comm(DL_INFO_REQ, sizeof (dl_info_req_t)); + if (mp == NULL) { (void) ar_close(RD(q)); return (ENOMEM); } @@ -2547,19 +2743,18 @@ ar_plink_send(queue_t *q, mblk_t *mp) * ar_ce_walk routine to delete any outstanding queries for an ar that is * going away. */ -static int -ar_query_delete(ace_t *ace, uchar_t *ar) +static void +ar_query_delete(ace_t *ace, void *arg) { + ar_t *ar = arg; mblk_t **mpp = &ace->ace_query_mp; - mblk_t *mp = mpp[0]; + mblk_t *mp; - if (!mp) - return (0); - do { + while ((mp = *mpp) != NULL) { /* The response queue was stored in the query b_prev. */ - if ((queue_t *)mp->b_prev == ((ar_t *)ar)->ar_wq || - (queue_t *)mp->b_prev == ((ar_t *)ar)->ar_rq) { - mpp[0] = mp->b_next; + if ((queue_t *)mp->b_prev == ar->ar_wq || + (queue_t *)mp->b_prev == ar->ar_rq) { + *mpp = mp->b_next; if (DB_TYPE(mp) == M_PROTO && *(uint32_t *)mp->b_rptr == AR_ENTRY_QUERY) { BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed); @@ -2568,8 +2763,7 @@ ar_query_delete(ace_t *ace, uchar_t *ar) } else { mpp = &mp->b_next; } - } while ((mp = mpp[0]) != 0); - return (0); + } } /* @@ -2614,11 +2808,11 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, } /* Complete the response based on how the request arrived. */ if (DB_TYPE(mp) == M_IOCTL) { - struct iocblk *ioc = - (struct iocblk *)mp->b_rptr; + struct iocblk *ioc = (struct iocblk *)mp->b_rptr; + ioc->ioc_error = ret_val; - DB_TYPE(mp) = M_IOCACK; if (ret_val != 0) { + DB_TYPE(mp) = M_IOCNAK; ioc->ioc_count = 0; putnext(q, mp); continue; @@ -2627,6 +2821,7 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, * Return the xmit template out with the successful * IOCTL. */ + DB_TYPE(mp) = M_IOCACK; ioc->ioc_count = template->b_wptr - template->b_rptr; /* Remove the areq mblk from the IOCTL. */ areq_mp = mp->b_cont; @@ -2680,12 +2875,23 @@ ar_query_reply(ace_t *ace, int ret_val, uchar_t *proto_addr, mp->b_cont = template; putnext(q, mp); } + /* - * Unless we are responding from a permanent cache entry, delete - * the ace. + * Unless we are responding from a permanent cache entry, start the + * cleanup timer or (on error) delete the entry. */ if (!(ace->ace_flags & (ACE_F_PERMANENT | ACE_F_DYING))) { - ar_ce_delete(ace); + if (!ACE_RESOLVED(ace) || arl->arl_xmit_template == NULL) { + /* + * No need to notify IP here, because the entry was + * never resolved, so IP can't have any cached copies + * of the address. + */ + ar_ce_delete(ace); + } else { + mi_timer(arl->arl_wq, ace->ace_mp, + arp_cleanup_interval); + } } } @@ -2726,10 +2932,26 @@ ar_query_xmit(ace_t *ace, ace_t *src_ace) src_ace = ar_ce_lookup_permanent(areq->areq_proto, sender_addr, areq->areq_sender_addr_length); if (src_ace == NULL) { - printf("ar_query_xmit: Could not find the ace\n"); + DTRACE_PROBE3(xmit_no_source, ace_t *, ace, + areq_t *, areq, uchar_t *, sender_addr); return (0); } } + + /* + * If we haven't yet finished duplicate address checking on this source + * address, then do *not* use it on the wire. Doing so will corrupt + * the world's caches. Just allow the timer to restart. Note that + * duplicate address checking will eventually complete one way or the + * other, so this cannot go on "forever." + */ + if (src_ace->ace_flags & ACE_F_UNVERIFIED) { + DTRACE_PROBE2(xmit_source_unverified, ace_t *, ace, + ace_t *, src_ace); + areq->areq_xmit_count++; + return (areq->areq_xmit_interval); + } + /* * Transmit on src_arl. We should transmit on src_arl. Otherwise * the switch will send back a copy on other interfaces of the @@ -2737,9 +2959,12 @@ ar_query_xmit(ace_t *ace, ace_t *src_ace) * address + hardware address, ARP will treat this as a bogon. */ src_arl = src_ace->ace_arl; + DTRACE_PROBE3(xmit_send, ace_t *, ace, ace_t *, src_ace, + areq_t *, areq); ar_xmit(src_arl, ARP_REQUEST, areq->areq_proto, areq->areq_sender_addr_length, src_arl->arl_hw_addr, sender_addr, - src_arl->arl_arp_addr, proto_addr); + src_arl->arl_arp_addr, proto_addr, NULL); + src_ace->ace_last_bcast = ddi_get_lbolt(); return (areq->areq_xmit_interval); } @@ -2758,11 +2983,10 @@ ar_rput(queue_t *q, mblk_t *mp) int op; uint32_t plen; uint32_t proto; - ace_t *src_ace; uchar_t *src_haddr; uchar_t *src_paddr; - dl_unitdata_ind_t *dlui; - boolean_t hwaddr_changed = B_TRUE; + boolean_t is_probe; + int i; TRACE_1(TR_FAC_ARP, TR_ARP_RPUT_START, "arp_rput_start: q %p", q); @@ -2817,34 +3041,36 @@ ar_rput(queue_t *q, mblk_t *mp) return; case M_PCPROTO: case M_PROTO: + if (MBLKL(mp) >= sizeof (dl_unitdata_ind_t) && + ((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive == + DL_UNITDATA_IND) { + arl = ((ar_t *)q->q_ptr)->ar_arl; + if (arl != NULL) { + /* Real messages from the wire! */ + break; + } + putnext(q, mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "default"); + return; + } err = ar_cmd_dispatch(q, mp); switch (err) { case ENOENT: + /* Miscellaneous DLPI messages get shuffled off. */ + ar_rput_dlpi(q, mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "proto/dlpi"); break; case EINPROGRESS: TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, "arp_rput_end: q %p (%S)", q, "proto"); - return; + break; default: inet_freemsg(mp); - return; - } - if ((mp->b_wptr - mp->b_rptr) < sizeof (dl_unitdata_ind_t) || - ((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive - != DL_UNITDATA_IND) { - /* Miscellaneous DLPI messages get shuffled off. */ - ar_rput_dlpi(q, mp); - TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "proto/dlpi"); - return; - } - /* DL_UNITDATA_IND */ - arl = ((ar_t *)q->q_ptr)->ar_arl; - if (arl != NULL) { - /* Real messages from the wire! */ break; } - /* FALLTHRU */ + return; default: putnext(q, mp); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, @@ -2867,15 +3093,14 @@ ar_rput(queue_t *q, mblk_t *mp) * followed by an ARP packet. We do some initial checks and then * get to work. */ - dlui = (dl_unitdata_ind_t *)mp->b_rptr; mp1 = mp->b_cont; - if (!mp1) { + if (mp1 == NULL) { freemsg(mp); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, "arp_rput_end: q %p (%S)", q, "baddlpi"); return; } - if (!OK_32PTR(mp1->b_rptr) || mp1->b_cont) { + if (mp1->b_cont != NULL) { /* No fooling around with funny messages. */ if (!pullupmsg(mp1, -1)) { freemsg(mp); @@ -2885,22 +3110,33 @@ ar_rput(queue_t *q, mblk_t *mp) } } arh = (arh_t *)mp1->b_rptr; - hlen = (uint32_t)arh->arh_hlen & 0xFF; - plen = (uint32_t)arh->arh_plen & 0xFF; - if ((mp1->b_wptr - mp1->b_rptr) - < (ARH_FIXED_LEN + hlen + hlen + plen + plen)) { + hlen = arh->arh_hlen; + plen = arh->arh_plen; + if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) { freemsg(mp); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, "arp_rput_end: q %p (%S)", q, "short"); return; } - if (hlen == 0 || plen == 0) { - arp1dbg(("ar_rput: bogus arh\n")); + /* + * hlen 0 is used for RFC 1868 UnARP. + * + * Note that the rest of the code checks that hlen is what we expect + * for this hardware address type, so might as well discard packets + * here that don't match. + */ + if ((hlen > 0 && hlen != arl->arl_hw_addr_length) || plen == 0) { + DTRACE_PROBE2(rput_bogus, arl_t *, arl, mblk_t *, mp1); freemsg(mp); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, "arp_rput_end: q %p (%S)", q, "hlenzero/plenzero"); return; } + /* + * Historically, Solaris has been lenient about hardware type numbers. + * We should check here, but don't. + */ + DTRACE_PROBE2(rput_normal, arl_t *, arl, arh_t *, arh); proto = (uint32_t)BE16_TO_U16(arh->arh_proto); src_haddr = (uchar_t *)arh; src_haddr = &src_haddr[ARH_FIXED_LEN]; @@ -2908,191 +3144,255 @@ ar_rput(queue_t *q, mblk_t *mp) dst_paddr = &src_haddr[hlen + plen + hlen]; op = BE16_TO_U16(arh->arh_operation); - /* Now see if we have a cache entry for the source address. */ - src_ace = ar_ce_lookup_entry(arl, proto, src_paddr, plen); + /* Determine if this is just a probe */ + for (i = 0; i < plen; i++) + if (src_paddr[i] != 0) + break; + is_probe = i >= plen; + /* - * If so, and it is the entry for one of our IP addresses, - * we really don't expect to see this packet, so pretend we didn't. - * Tell IP that we received a bogon. - * - * If is a "published" (proxy arp) entry we can receive requests - * FROM the node but we should never see an ARP_RESPONSE. In this case - * we process the response but also inform IP. + * RFC 826: first check if the <protocol, sender protocol address> is + * in the cache, if there is a sender protocol address. Note that this + * step also handles resolutions based on source. */ - if (src_ace) { - if (src_ace->ace_flags & ACE_F_MYADDR) { - freeb(mp); - ar_client_notify(arl, mp1, AR_CN_BOGON); - TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "pubentry"); - return; - } - if ((src_ace->ace_flags & ACE_F_PUBLISH) && - op == ARP_RESPONSE) { - mblk_t *mp2; - - mp2 = copymsg(mp1); - if (mp2 != NULL) - ar_client_notify(arl, mp2, AR_CN_BOGON); - } - if (src_ace->ace_hw_addr_length == hlen && - bcmp(src_ace->ace_hw_addr, src_haddr, hlen) == 0) { - hwaddr_changed = B_FALSE; - } + if (is_probe) + err = AR_NOTFOUND; + else + err = ar_ce_resolve_all(arl, proto, src_haddr, hlen, src_paddr, + plen); + switch (err) { + case AR_BOGON: + ar_client_notify(arl, mp1, AR_CN_BOGON); + mp1 = NULL; + break; + case AR_FAILED: + ar_client_notify(arl, mp1, AR_CN_FAILED); + mp1 = NULL; + break; + case AR_LOOPBACK: + DTRACE_PROBE2(rput_loopback, arl_t *, arl, arh_t *, arh); + freemsg(mp1); + mp1 = NULL; + break; } - switch (op) { - case ARP_REQUEST: - /* - * If we know the answer, and it is "published", send out - * the response. - */ - dst_ace = ar_ce_lookup_entry(arl, proto, dst_paddr, plen); - if (dst_ace && (dst_ace->ace_flags & ACE_F_PUBLISH) && - ACE_RESOLVED(dst_ace)) { - ar_xmit(arl, ARP_RESPONSE, dst_ace->ace_proto, plen, - dst_ace->ace_hw_addr, dst_ace->ace_proto_addr, - src_haddr, src_paddr); - } + if (mp1 == NULL) { + freeb(mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "unneeded"); + return; + } + + /* + * Now look up the destination address. By RFC 826, we ignore the + * packet at this step if the target isn't one of our addresses. This + * is true even if the target is something we're trying to resolve and + * the packet is a response. + * + * Note that in order to do this correctly, we need to know when to + * notify IP of a change implied by the source address of the ARP + * message. That implies that the local ARP table has entries for all + * of the resolved entries cached in the client. This is why we must + * notify IP when we delete a resolved entry and we know that IP may + * have cached answers. + */ + dst_ace = ar_ce_lookup_entry(arl, proto, dst_paddr, plen); + if (dst_ace == NULL || !ACE_RESOLVED(dst_ace) || + !(dst_ace->ace_flags & ACE_F_PUBLISH)) { /* - * Now fall through to the response side, and add a cache entry - * for the sender so we will have it when we need it. + * Let the client know if the source mapping has changed, even + * if the destination provides no useful information for the + * client. */ - /* FALLTHRU */ - case ARP_RESPONSE: + if (err == AR_CHANGED) + ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); + else + freemsg(mp1); + freeb(mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "nottarget"); + return; + } + + /* + * If the target is unverified by DAD, then one of two things is true: + * either it's someone else claiming this address (on a probe or an + * announcement) or it's just a regular request. The former is + * failure, but a regular request is not. + */ + if (dst_ace->ace_flags & ACE_F_UNVERIFIED) { /* - * With ill groups, we need to look for request across - * all the ills in the group. The request itself may - * not be queued on this arl. See ar_query_xmit() for - * details. + * Check for a reflection. Some misbehaving bridges will + * reflect our own transmitted packets back to us. */ - err = ar_ce_resolve_all(arl, proto, src_haddr, hlen, - src_paddr, plen); - if (err == AR_BOGON) { - /* - * Some other host has our IP address. Send a - * BOGON message to IP. - */ + if (hlen == dst_ace->ace_hw_addr_length && + bcmp(src_haddr, dst_ace->ace_hw_addr, hlen) == 0) { + DTRACE_PROBE3(rput_probe_reflected, arl_t *, arl, + arh_t *, arh, ace_t *, dst_ace); freeb(mp); - ar_client_notify(arl, mp1, AR_CN_BOGON); + freemsg(mp1); TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "pubentry"); + "arp_rput_end: q %p (%S)", q, "reflection"); return; } + if (is_probe || op == ARP_RESPONSE) { + ar_client_notify(arl, mp1, AR_CN_FAILED); + ar_ce_delete(dst_ace); + } else if (err == AR_CHANGED) { + ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); + } else { + DTRACE_PROBE3(rput_request_unverified, arl_t *, arl, + arh_t *, arh, ace_t *, dst_ace); + freemsg(mp1); + } + freeb(mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "unverified"); + return; + } + + /* + * If it's a request, then we reply to this, and if we think the + * sender's unknown, then we create an entry to avoid unnecessary ARPs. + * The design assumption is that someone ARPing us is likely to send us + * a packet soon, and that we'll want to reply to it. + */ + if (op == ARP_REQUEST) { + const uchar_t *dstaddr = src_haddr; + clock_t now; - if ((err != AR_LOOPBACK) && (src_ace == NULL)) { + /* + * This implements periodic address defense based on a modified + * version of the RFC 3927 requirements. Instead of sending a + * broadcasted reply every time, as demanded by the RFC, we + * send at most one broadcast reply per arp_broadcast_interval. + */ + now = ddi_get_lbolt(); + if ((now - dst_ace->ace_last_bcast) > + MSEC_TO_TICK(arp_broadcast_interval)) { + DTRACE_PROBE3(rput_bcast_reply, arl_t *, arl, + arh_t *, arh, ace_t *, dst_ace); + dst_ace->ace_last_bcast = now; + dstaddr = arl->arl_arp_addr; /* - * We may need this one sooner or later. The AR_LOOPBACK - * check above ensures, that we don't create arp - * entries for our own IP address, on another arl. + * If this is one of the long-suffering entries, then + * pull it out now. It no longer needs separate + * defense, because we're doing now that with this + * broadcasted reply. */ - (void) ar_ce_create(arl, proto, src_haddr, hlen, - src_paddr, plen, NULL, - NULL, (uint32_t)0, - (uint32_t)0); + dst_ace->ace_flags &= ~ACE_F_DELAYED; } - /* Let's see if this is a system ARPing itself. */ - do { - if (*src_paddr++ != *dst_paddr++) - break; - } while (--plen); - if (plen == 0) { - /* - * An ARP message with identical src and dst - * protocol addresses. This guy is trying to - * tell us something that our clients might - * find interesting.Essentially such packets are - * sent when a m/c comes up or changes its h/w - * address, so before notifying our client check the - * h/w address if there is a cache entry and notify - * the client only if the addresses differ. - */ - if (hwaddr_changed) { - freeb(mp); - ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); - } else { - /* Just discard it. */ - freemsg(mp); - } - TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "duplicate"); - return; + ar_xmit(arl, ARP_RESPONSE, dst_ace->ace_proto, plen, + dst_ace->ace_hw_addr, dst_ace->ace_proto_addr, + src_haddr, src_paddr, dstaddr); + if (!is_probe && err == AR_NOTFOUND && + ar_ce_create(arl, proto, src_haddr, hlen, src_paddr, plen, + NULL, NULL, 0, 0) == 0) { + ace_t *ace; + + ace = ar_ce_lookup(arl, proto, src_paddr, plen); + ASSERT(ace != NULL); + mi_timer(arl->arl_wq, ace->ace_mp, + arp_cleanup_interval); } + } + if (err == AR_CHANGED) { + freeb(mp); + ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "reqchange"); + } else { + freemsg(mp); + TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, + "arp_rput_end: q %p (%S)", q, "end"); + } +} + +static void +ar_ce_restart_dad(ace_t *ace, void *arl) +{ + if ((ace->ace_arl == arl) && + (ace->ace_flags & (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) == + (ACE_F_UNVERIFIED|ACE_F_DAD_ABORTED)) { /* - * A broadcast response may also be interesting. + * Slight cheat here: we don't use the initial probe delay + * in this obscure case. */ - if (op == ARP_RESPONSE && dlui->dl_group_address) { - freeb(mp); - ar_client_notify(arl, mp1, AR_CN_ANNOUNCE); - return; + if (ace->ace_flags & ACE_F_FAST) { + ace->ace_xmit_count = arp_fastprobe_count; + ace->ace_xmit_interval = arp_fastprobe_interval; + } else { + ace->ace_xmit_count = arp_probe_count; + ace->ace_xmit_interval = arp_probe_interval; } - break; - default: - break; + ace->ace_flags &= ~ACE_F_DAD_ABORTED; + ace_set_timer(ace, B_FALSE); } - freemsg(mp); - TRACE_2(TR_FAC_ARP, TR_ARP_RPUT_END, - "arp_rput_end: q %p (%S)", q, "end"); } /* DLPI messages, other than DL_UNITDATA_IND are handled here. */ static void ar_rput_dlpi(queue_t *q, mblk_t *mp) { - ar_t *ar = (ar_t *)q->q_ptr; + ar_t *ar = q->q_ptr; arl_t *arl = ar->ar_arl; - dl_bind_ack_t *dlba; - dl_error_ack_t *dlea; - dl_ok_ack_t *dloa; - dl_uderror_ind_t *dluei; - char *err_str; + union DL_primitives *dlp; + const char *err_str; - if ((mp->b_wptr - mp->b_rptr) < sizeof (dloa->dl_primitive)) { + if (MBLKL(mp) < sizeof (dlp->dl_primitive)) { putnext(q, mp); return; } - dloa = (dl_ok_ack_t *)mp->b_rptr; - dlea = (dl_error_ack_t *)dloa; - switch (dloa->dl_primitive) { + dlp = (union DL_primitives *)mp->b_rptr; + switch (dlp->dl_primitive) { case DL_ERROR_ACK: - switch (dlea->dl_error_primitive) { + /* + * ce is confused about how DLPI works, so we have to interpret + * an "error" on DL_NOTIFY_ACK (which we never could have sent) + * as really meaning an error on DL_NOTIFY_REQ. + * + * Note that supporting DL_NOTIFY_REQ is optional, so printing + * out an error message on the console isn't warranted except + * for debug. + */ + if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK || + dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) { + ar_dlpi_done(arl, DL_NOTIFY_REQ); + freemsg(mp); + return; + } + err_str = dlpi_prim_str(dlp->error_ack.dl_error_primitive); + DTRACE_PROBE2(rput_dl_error, arl_t *, arl, + dl_error_ack_t *, &dlp->error_ack); + switch (dlp->error_ack.dl_error_primitive) { case DL_UNBIND_REQ: if (arl->arl_provider_style == DL_STYLE1) arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_UNBIND_REQ); - err_str = "DL_UNBIND_REQ"; break; case DL_DETACH_REQ: + case DL_BIND_REQ: arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_DETACH_REQ); - err_str = "DL_DETACH_REQ"; break; case DL_ATTACH_REQ: - ar_dlpi_done(arl, DL_ATTACH_REQ); - err_str = "DL_ATTACH_REQ"; - break; - case DL_BIND_REQ: - arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_BIND_REQ); - err_str = "DL_BIND_REQ"; break; default: - err_str = "?"; - break; + /* If it's anything else, we didn't send it. */ + putnext(q, mp); + return; } - arp0dbg(("ar_rput_dlpi: " - "%s (%d) failed, dl_errno %d, dl_unix_errno %d\n", - err_str, (int)dlea->dl_error_primitive, - (int)dlea->dl_errno, (int)dlea->dl_unix_errno)); + ar_dlpi_done(arl, dlp->error_ack.dl_error_primitive); (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, "ar_rput_dlpi: %s failed, dl_errno %d, dl_unix_errno %d", - err_str, dlea->dl_errno, dlea->dl_unix_errno); + err_str, dlp->error_ack.dl_errno, + dlp->error_ack.dl_unix_errno); break; case DL_INFO_ACK: /* * We have a response back from the driver. Go set up transmit * defaults. */ + DTRACE_PROBE2(rput_dl_info, arl_t *, arl, + dl_info_ack_t *, &dlp->info_ack); if (arl != NULL) { ar_ll_set_defaults(arl, mp); ar_dlpi_done(arl, DL_INFO_REQ); @@ -3103,48 +3403,75 @@ ar_rput_dlpi(queue_t *q, mblk_t *mp) qenable(WR(q)); break; case DL_OK_ACK: - arp1dbg(("ar_rput_dlpi: arl %p DL_OK_ACK for %d\n", - (void *)arl, dloa->dl_correct_primitive)); - switch (dloa->dl_correct_primitive) { + DTRACE_PROBE2(rput_dl_ok, arl_t *, arl, + dl_ok_ack_t *, &dlp->ok_ack); + switch (dlp->ok_ack.dl_correct_primitive) { case DL_UNBIND_REQ: if (arl->arl_provider_style == DL_STYLE1) arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_UNBIND_REQ); break; case DL_DETACH_REQ: arl->arl_state = ARL_S_DOWN; - ar_dlpi_done(arl, DL_DETACH_REQ); break; case DL_ATTACH_REQ: - ar_dlpi_done(arl, DL_ATTACH_REQ); break; + default: + putnext(q, mp); + return; } + ar_dlpi_done(arl, dlp->ok_ack.dl_correct_primitive); + break; + case DL_NOTIFY_ACK: + DTRACE_PROBE2(rput_dl_notify, arl_t *, arl, + dl_notify_ack_t *, &dlp->notify_ack); + /* + * We mostly care about interface-up transitions, as this is + * when we need to redo duplicate address detection. + */ + arl->arl_notifies = + (dlp->notify_ack.dl_notifications & DL_NOTE_LINK_UP) != 0; + ar_dlpi_done(arl, DL_NOTIFY_REQ); break; case DL_BIND_ACK: - arp1dbg(("ar_rput: DL_BIND_ACK arl %p\n", (void *)arl)); - dlba = (dl_bind_ack_t *)dloa; + DTRACE_PROBE2(rput_dl_bind, arl_t *, arl, + dl_bind_ack_t *, &dlp->bind_ack); if (arl->arl_sap_length < 0) - bcopy((char *)dlba + dlba->dl_addr_offset, + bcopy((char *)dlp + dlp->bind_ack.dl_addr_offset, arl->arl_hw_addr, arl->arl_hw_addr_length); else - bcopy((char *)dlba + dlba->dl_addr_offset + + bcopy((char *)dlp + dlp->bind_ack.dl_addr_offset + arl->arl_sap_length, arl->arl_hw_addr, arl->arl_hw_addr_length); arl->arl_state = ARL_S_UP; ar_dlpi_done(arl, DL_BIND_REQ); break; + case DL_NOTIFY_IND: + DTRACE_PROBE2(rput_dl_notify_ind, arl_t *, arl, + dl_notify_ind_t *, &dlp->notify_ind); + switch (dlp->notify_ind.dl_notification) { + case DL_NOTE_LINK_UP: + arl->arl_link_up = B_TRUE; + ar_ce_walk(ar_ce_restart_dad, arl); + break; + case DL_NOTE_LINK_DOWN: + arl->arl_link_up = B_FALSE; + break; + } + break; case DL_UDERROR_IND: - dluei = (dl_uderror_ind_t *)dloa; + DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl, + dl_uderror_ind_t *, &dlp->uderror_ind); (void) mi_strlog(q, 1, SL_ERROR | SL_TRACE, "ar_rput_dlpi: " "DL_UDERROR_IND, dl_dest_addr_length %d dl_errno %d", - dluei->dl_dest_addr_length, dluei->dl_errno); + dlp->uderror_ind.dl_dest_addr_length, + dlp->uderror_ind.dl_errno); putnext(q, mp); return; default: - arp1dbg(("ar_rput_dlpi: default, primitive %d\n", - (int)dloa->dl_primitive)); + DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, + union DL_primitives *, dlp); putnext(q, mp); return; } @@ -3158,14 +3485,12 @@ ar_set_address(ace_t *ace, uchar_t *addrpos, uchar_t *proto_addr, uchar_t *mask, *to; int len; - if (!ace->ace_hw_addr) - return; + ASSERT(ace->ace_hw_addr != NULL); bcopy(ace->ace_hw_addr, addrpos, ace->ace_hw_addr_length); if (ace->ace_flags & ACE_F_MAPPING && proto_addr != NULL && ace->ace_proto_extract_mask) { /* careful */ - arp1dbg(("ar_set_address: MAPPING\n")); len = MIN((int)ace->ace_hw_addr_length - ace->ace_hw_extract_start, proto_addr_len); @@ -3179,14 +3504,15 @@ ar_set_address(ace_t *ace, uchar_t *addrpos, uchar_t *proto_addr, static int ar_slifname(queue_t *q, mblk_t *mp_orig) { - ar_t *ar = (ar_t *)q->q_ptr; + ar_t *ar = q->q_ptr; arl_t *arl = ar->ar_arl; struct lifreq *lifr; mblk_t *mp = mp_orig; + arl_t *old_arl; + mblk_t *ioccpy; + struct iocblk *iocp; - arp1dbg(("ar_slifname\n")); - - if (MODULE_BELOW_IS_IP(q)) { + if (ar->ar_on_ill_stream) { /* * This command is for IP, since it is coming down * the <arp-IP-driver> stream. Return ENOENT so that @@ -3197,37 +3523,71 @@ ar_slifname(queue_t *q, mblk_t *mp_orig) /* We handle both M_IOCTL and M_PROTO messages */ if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; - if (!q->q_next || arl == NULL) { + if (q->q_next == NULL || arl == NULL) { /* * If the interface was just opened and * the info ack has not yet come back from the driver */ - arp1dbg(("ar_slifname no arl - queued\n")); + DTRACE_PROBE2(slifname_no_arl, queue_t *, q, + mblk_t *, mp_orig); (void) putq(q, mp_orig); return (EINPROGRESS); } - if (arl->arl_name[0] != '\0') + + if (MBLKL(mp) < sizeof (struct lifreq)) { + DTRACE_PROBE2(slifname_malformed, queue_t *, q, + mblk_t *, mp); + } + + if (arl->arl_name[0] != '\0') { + DTRACE_PROBE1(slifname_already, arl_t *, arl); return (EALREADY); + } - lifr = (struct lifreq *)(mp->b_rptr); + lifr = (struct lifreq *)mp->b_rptr; - if (strlen(lifr->lifr_name) >= LIFNAMSIZ) + if (strlen(lifr->lifr_name) >= LIFNAMSIZ) { + DTRACE_PROBE2(slifname_bad_name, arl_t *, arl, + struct lifreq *, lifr); return (ENXIO); + } /* Check whether the name is already in use. */ - if (ar_ll_lookup_by_name(lifr->lifr_name)) { - arp1dbg(("ar_slifname: %s exists\n", lifr->lifr_name)); + + old_arl = ar_ll_lookup_by_name(lifr->lifr_name); + if (old_arl != NULL) { + DTRACE_PROBE2(slifname_exists, arl_t *, arl, arl_t *, old_arl); return (EEXIST); } + + /* Make a copy of the message so we can send it downstream. */ + if ((ioccpy = allocb(sizeof (struct iocblk), BPRI_MED)) == NULL || + (ioccpy->b_cont = copymsg(mp)) == NULL) { + if (ioccpy != NULL) + freeb(ioccpy); + return (ENOMEM); + } + (void) strlcpy(arl->arl_name, lifr->lifr_name, sizeof (arl->arl_name)); /* The ppa is sent down by ifconfig */ arl->arl_ppa = lifr->lifr_ppa; - arp1dbg(("ar_slifname: name is now %s, ppa %d\n", arl->arl_name, - arl->arl_ppa)); /* Chain in the new arl. */ arl->arl_next = arl_g_head; arl_g_head = arl; + DTRACE_PROBE1(slifname_set, arl_t *, arl); + + /* + * Send along a copy of the ioctl; this is just for hitbox. Use + * M_CTL to avoid confusing anyone else who might be listening. + */ + DB_TYPE(ioccpy) = M_CTL; + iocp = (struct iocblk *)ioccpy->b_rptr; + bzero(iocp, sizeof (*iocp)); + iocp->ioc_cmd = SIOCSLIFNAME; + iocp->ioc_count = msgsize(ioccpy->b_cont); + ioccpy->b_wptr = (uchar_t *)(iocp + 1); + putnext(arl->arl_wq, ioccpy); return (0); } @@ -3239,10 +3599,9 @@ ar_set_ppa(queue_t *q, mblk_t *mp_orig) int ppa; char *cp; mblk_t *mp = mp_orig; + arl_t *old_arl; - arp1dbg(("ar_set_ppa\n")); - - if (MODULE_BELOW_IS_IP(q)) { + if (ar->ar_on_ill_stream) { /* * This command is for IP, since it is coming down * the <arp-IP-driver> stream. Return ENOENT so that @@ -3254,35 +3613,40 @@ ar_set_ppa(queue_t *q, mblk_t *mp_orig) /* We handle both M_IOCTL and M_PROTO messages. */ if (DB_TYPE(mp) == M_IOCTL) mp = mp->b_cont; - if (!q->q_next || arl == NULL) { + if (q->q_next == NULL || arl == NULL) { /* * If the interface was just opened and * the info ack has not yet come back from the driver. */ - arp1dbg(("ar_set_ppa: no arl - queued\n")); + DTRACE_PROBE2(setppa_no_arl, queue_t *, q, + mblk_t *, mp_orig); (void) putq(q, mp_orig); return (EINPROGRESS); } - if (arl->arl_name[0] != '\0') + if (arl->arl_name[0] != '\0') { + DTRACE_PROBE1(setppa_already, arl_t *, arl); return (EALREADY); + } do { q = q->q_next; - } while (q->q_next); + } while (q->q_next != NULL); cp = q->q_qinfo->qi_minfo->mi_idname; ppa = *(int *)(mp->b_rptr); (void) snprintf(arl->arl_name, sizeof (arl->arl_name), "%s%d", cp, ppa); - if (ar_ll_lookup_by_name(arl->arl_name) != NULL) { - arp1dbg(("ar_set_ppa: %s busy\n", arl->arl_name)); + + old_arl = ar_ll_lookup_by_name(arl->arl_name); + if (old_arl != NULL) { + DTRACE_PROBE2(setppa_exists, arl_t *, arl, arl_t *, old_arl); /* Make it a null string again */ arl->arl_name[0] = '\0'; return (EBUSY); } - arp1dbg(("ar_set_ppa: %d\n", ppa)); arl->arl_ppa = ppa; + DTRACE_PROBE1(setppa_done, arl_t *, arl); /* Chain in the new arl. */ arl->arl_next = arl_g_head; arl_g_head = arl; @@ -3357,10 +3721,8 @@ ar_snmp_msg(queue_t *q, mblk_t *mp_orig) * this is an ipNetToMediaTable msg from IP that needs (unique) * arp cache entries appended... */ - if ((mpdata = mp->b_cont) == NULL) { - arp0dbg(("ar_snmp_msg: b_cont == NULL for MIB2_IP msg\n")); + if ((mpdata = mp->b_cont) == NULL) return (EINVAL); - } ar_snmp_hash_tbl = ar_create_snmp_hash(mpdata); @@ -3368,7 +3730,7 @@ ar_snmp_msg(queue_t *q, mblk_t *mp_orig) args.m2a_hashb = ar_snmp_hash_tbl; args.m2a_mpdata = NULL; args.m2a_mptail = NULL; - ar_ce_walk((pfi_t)ar_snmp_msg2, &args); + ar_ce_walk(ar_snmp_msg2, &args); mi_free(ar_snmp_hash_tbl); /* @@ -3478,7 +3840,7 @@ ar_snmp_msg2(ace_t *ace, void *arg) m2ap->m2a_mpdata = allocb(sizeof (mib2_ipNetToMediaEntry_t), BPRI_HI); if (m2ap->m2a_mpdata == NULL) { - arp1dbg(("ar_snmp_msg2:allocb failed\n")); + DTRACE_PROBE(snmp_allocb_failure); return; } } @@ -3498,30 +3860,6 @@ ar_snmp_msg2(ace_t *ace, void *arg) (char *)&ntme, sizeof (ntme)); } -/* Start up the garbage collection timer on the queue provided. */ -static void -ar_timer_init(queue_t *q) -{ - if (ar_timer_mp) - return; - ar_timer_mp = mi_timer_alloc(0); - if (!ar_timer_mp) - return; - ar_timer_queue = q; - mi_timer(ar_timer_queue, ar_timer_mp, arp_timer_interval); -} - -/* ar_ce_walk routine to trash all non-permanent resolved entries. */ -/* ARGSUSED */ -static int -ar_trash(ace_t *ace, uchar_t *arg) -{ - if ((ace->ace_flags & (ACE_F_RESOLVED|ACE_F_PERMANENT)) == - ACE_F_RESOLVED) - ar_ce_delete(ace); - return (0); -} - /* Write side put procedure. */ static void ar_wput(queue_t *q, mblk_t *mp) @@ -3579,11 +3917,14 @@ ar_wput(queue_t *q, mblk_t *mp) break; } ioc = (struct iocblk *)mp->b_rptr; - ioc->ioc_error = err; - if ((mp1 = mp->b_cont) != 0) - ioc->ioc_count = msgdsize(mp1); - else - ioc->ioc_count = 0; + if (err != 0) + ioc->ioc_error = err; + if (ioc->ioc_error != 0) { + DB_TYPE(mp) = M_IOCNAK; + freemsg(mp->b_cont); + mp->b_cont = NULL; + } + ioc->ioc_count = msgdsize(mp->b_cont); qreply(q, mp); TRACE_2(TR_FAC_ARP, TR_ARP_WPUT_END, "arp_wput_end: q %p (%S)", q, "ioctl"); @@ -3660,6 +4001,117 @@ ar_wput(queue_t *q, mblk_t *mp) "arp_wput_end: q %p (%S)", q, "end"); } +static boolean_t +arp_say_ready(ace_t *ace) +{ + mblk_t *mp; + arl_t *arl; + arh_t *arh; + uchar_t *cp; + + arl = ace->ace_arl; + mp = allocb(sizeof (*arh) + 2 * (arl->arl_hw_addr_length + + ace->ace_proto_addr_length), BPRI_MED); + if (mp == NULL) { + /* skip a beat on allocation trouble */ + ace->ace_xmit_count = 1; + ace_set_timer(ace, B_FALSE); + return (B_FALSE); + } + /* Tell IP address is now usable */ + arh = (arh_t *)mp->b_rptr; + U16_TO_BE16(arl->arl_arp_hw_type, arh->arh_hardware); + U16_TO_BE16(ace->ace_proto, arh->arh_proto); + arh->arh_hlen = arl->arl_hw_addr_length; + arh->arh_plen = ace->ace_proto_addr_length; + U16_TO_BE16(ARP_REQUEST, arh->arh_operation); + cp = (uchar_t *)(arh + 1); + bcopy(ace->ace_hw_addr, cp, arl->arl_hw_addr_length); + cp += arl->arl_hw_addr_length; + bcopy(ace->ace_proto_addr, cp, ace->ace_proto_addr_length); + cp += ace->ace_proto_addr_length; + bcopy(ace->ace_hw_addr, cp, arl->arl_hw_addr_length); + cp += arl->arl_hw_addr_length; + bcopy(ace->ace_proto_addr, cp, ace->ace_proto_addr_length); + cp += ace->ace_proto_addr_length; + mp->b_wptr = cp; + ar_client_notify(arl, mp, AR_CN_READY); + DTRACE_PROBE1(ready, ace_t *, ace); + return (B_TRUE); +} + +/* + * Pick the longest-waiting aces for defense. + */ +static void +ace_reschedule(ace_t *ace, void *arg) +{ + ace_resched_t *art = arg; + ace_t **aces; + ace_t **acemax; + ace_t *atemp; + + if (ace->ace_arl != art->art_arl) + return; + /* + * Only published entries that are ready for announcement are eligible. + */ + if ((ace->ace_flags & (ACE_F_PUBLISH | ACE_F_UNVERIFIED | ACE_F_DYING | + ACE_F_DELAYED)) != ACE_F_PUBLISH) { + return; + } + if (art->art_naces < ACE_RESCHED_LIST_LEN) { + art->art_aces[art->art_naces++] = ace; + } else { + aces = art->art_aces; + acemax = aces + ACE_RESCHED_LIST_LEN; + for (; aces < acemax; aces++) { + if ((*aces)->ace_last_bcast > ace->ace_last_bcast) { + atemp = *aces; + *aces = ace; + ace = atemp; + } + } + } +} + +/* + * Reschedule the ARP defense of any long-waiting ACEs. It's assumed that this + * doesn't happen very often (if at all), and thus it needn't be highly + * optimized. (Note, though, that it's actually O(N) complexity, because the + * outer loop is bounded by a constant rather than by the length of the list.) + */ +static void +arl_reschedule(arl_t *arl) +{ + ace_resched_t art; + int i; + ace_t *ace; + + i = arl->arl_defend_count; + arl->arl_defend_count = 0; + /* If none could be sitting around, then don't reschedule */ + if (i < arp_defend_rate) { + DTRACE_PROBE1(reschedule_none, arl_t *, arl); + return; + } + art.art_arl = arl; + while (arl->arl_defend_count < arp_defend_rate) { + art.art_naces = 0; + ar_ce_walk(ace_reschedule, &art); + for (i = 0; i < art.art_naces; i++) { + ace = art.art_aces[i]; + ace->ace_flags |= ACE_F_DELAYED; + ace_set_timer(ace, B_FALSE); + if (++arl->arl_defend_count >= arp_defend_rate) + break; + } + if (art.art_naces < ACE_RESCHED_LIST_LEN) + break; + } + DTRACE_PROBE1(reschedule, arl_t *, arl); +} + /* * Write side service routine. The only action here is delivery of transmit * timer events and delayed messages while waiting for the info_ack (ar_arl @@ -3668,8 +4120,9 @@ ar_wput(queue_t *q, mblk_t *mp) static void ar_wsrv(queue_t *q) { - ace_t *ace; - mblk_t *mp; + ace_t *ace; + arl_t *arl; + mblk_t *mp; clock_t ms; TRACE_1(TR_FAC_ARP, TR_ARP_WSRV_START, @@ -3680,39 +4133,135 @@ ar_wsrv(queue_t *q) case M_PCSIG: if (!mi_timer_valid(mp)) continue; - if (mp == ar_timer_mp) { - /* Garbage collection time. */ - ar_ce_walk(ar_trash, NULL); - mi_timer(ar_timer_queue, ar_timer_mp, - arp_timer_interval); + ace = (ace_t *)mp->b_rptr; + if (ace->ace_flags & ACE_F_DYING) continue; + arl = ace->ace_arl; + if (ace->ace_flags & ACE_F_UNVERIFIED) { + ASSERT(ace->ace_flags & ACE_F_PUBLISH); + ASSERT(ace->ace_query_mp == NULL); + /* + * If the link is down, give up for now. IP + * will give us the go-ahead to try again when + * the link restarts. + */ + if (!arl->arl_link_up) { + DTRACE_PROBE1(timer_link_down, + ace_t *, ace); + ace->ace_flags |= ACE_F_DAD_ABORTED; + continue; + } + if (ace->ace_xmit_count > 0) { + DTRACE_PROBE1(timer_probe, + ace_t *, ace); + ace->ace_xmit_count--; + ar_xmit(arl, ARP_REQUEST, + ace->ace_proto, + ace->ace_proto_addr_length, + ace->ace_hw_addr, NULL, NULL, + ace->ace_proto_addr, NULL); + ace_set_timer(ace, B_FALSE); + continue; + } + if (!arp_say_ready(ace)) + continue; + DTRACE_PROBE1(timer_ready, ace_t *, ace); + ace->ace_xmit_interval = arp_publish_interval; + ace->ace_xmit_count = arp_publish_count; + if (ace->ace_xmit_count == 0) + ace->ace_xmit_count++; + ace->ace_flags &= ~ACE_F_UNVERIFIED; } - ace = (ace_t *)mp->b_rptr; - if (ace->ace_flags & (ACE_F_PUBLISH | ACE_F_MYADDR)) { + if (ace->ace_flags & ACE_F_PUBLISH) { + clock_t now; + + /* + * If an hour has passed, then free up the + * entries that need defense by rescheduling + * them. + */ + now = ddi_get_lbolt(); + if (arp_defend_rate > 0 && + now - arl->arl_defend_start > + SEC_TO_TICK(arp_defend_period)) { + arl->arl_defend_start = now; + arl_reschedule(arl); + } /* * Finish the job that we started in - * ar_entry_add. + * ar_entry_add. When we get to zero + * announcement retransmits left, switch to + * address defense. */ ASSERT(ace->ace_query_mp == NULL); - ASSERT(ace->ace_publish_count != 0); - ace->ace_publish_count--; - ar_xmit(ace->ace_arl, ARP_REQUEST, + if (ace->ace_xmit_count > 0) { + ace->ace_xmit_count--; + DTRACE_PROBE1(timer_announce, + ace_t *, ace); + } else if (ace->ace_flags & ACE_F_DELAYED) { + /* + * This guy was rescheduled as one of + * the really old entries needing + * on-going defense. Let him through + * now. + */ + DTRACE_PROBE1(timer_send_delayed, + ace_t *, ace); + ace->ace_flags &= ~ACE_F_DELAYED; + } else if (arp_defend_rate > 0 && + (arl->arl_defend_count >= arp_defend_rate || + ++arl->arl_defend_count >= + arp_defend_rate)) { + /* + * If we're no longer allowed to send + * unbidden defense messages, then just + * wait for rescheduling. + */ + DTRACE_PROBE1(timer_excess_defense, + ace_t *, ace); + ace_set_timer(ace, B_FALSE); + continue; + } else { + DTRACE_PROBE1(timer_defend, + ace_t *, ace); + } + ar_xmit(arl, ARP_REQUEST, ace->ace_proto, ace->ace_proto_addr_length, ace->ace_hw_addr, ace->ace_proto_addr, - ace->ace_arl->arl_arp_addr, - ace->ace_proto_addr); - if (ace->ace_publish_count != 0 && - arp_publish_interval != 0) { - mi_timer(ace->ace_arl->arl_wq, - ace->ace_mp, - arp_publish_interval); - } + arl->arl_arp_addr, + ace->ace_proto_addr, NULL); + ace->ace_last_bcast = now; + if (ace->ace_xmit_count == 0) + ace->ace_xmit_interval = + arp_defend_interval; + if (ace->ace_xmit_interval != 0) + ace_set_timer(ace, B_FALSE); continue; } - if (!ace->ace_query_mp) + + /* + * If this is a non-permanent (regular) resolved ARP + * entry, then it's now time to check if it can be + * retired. As an optimization, we check with IP + * first, and just restart the timer if the address is + * still in use. + */ + if (ACE_NONPERM(ace)) { + if (ace->ace_proto == IP_ARP_PROTO_TYPE && + ndp_lookup_ipaddr(*(ipaddr_t *) + ace->ace_proto_addr)) { + ace->ace_flags |= ACE_F_OLD; + mi_timer(arl->arl_wq, ace->ace_mp, + arp_cleanup_interval); + } else { + ar_delete_notify(ace); + ar_ce_delete(ace); + } continue; + } + /* * ar_query_xmit returns the number of milliseconds to * wait following this transmit. If the number of @@ -3721,6 +4270,7 @@ ar_wsrv(queue_t *q) * we complete the operation with a failure indication. * Otherwise, we restart the timer. */ + ASSERT(ace->ace_query_mp != NULL); ms = ar_query_xmit(ace, NULL); if (ms == 0) ar_query_reply(ace, ENXIO, NULL, (uint32_t)0); @@ -3739,43 +4289,50 @@ ar_wsrv(queue_t *q) /* ar_xmit is called to transmit an ARP Request or Response. */ static void ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen, - uchar_t *haddr1, uchar_t *paddr1, uchar_t *haddr2, uchar_t *paddr2) + const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2, + const uchar_t *paddr2, const uchar_t *dstaddr) { arh_t *arh; - char *cp; - uint32_t hlen = arl->arl_hw_addr_length; + uint8_t *cp; + uint_t hlen; mblk_t *mp; - if (arl->arl_flags & ARL_F_NOARP) { - /* IFF_NOARP flag is set. Do not send an arp request */ + /* IFF_NOARP flag is set or interface down: do not send arp messages */ + if ((arl->arl_flags & ARL_F_NOARP) || !arl->arl_link_up) return; - } mp = arl->arl_xmit_template; - if (!mp || !(mp = copyb(mp))) + if (mp == NULL || (mp = copyb(mp)) == NULL) return; + hlen = arl->arl_hw_addr_length; mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) + plen + plen, BPRI_MED); - if (!mp->b_cont) { + if (mp->b_cont == NULL) { freeb(mp); return; } + + /* Get the L2 destination address for the message */ + if (haddr2 == NULL) + dstaddr = arl->arl_arp_addr; + else if (dstaddr == NULL) + dstaddr = haddr2; + /* * Figure out where the target hardware address goes in the * DL_UNITDATA_REQ header, and copy it in. */ - - cp = (char *)mi_offset_param(mp, arl->arl_xmit_template_addr_offset, - hlen); - if (!cp) { + cp = mi_offset_param(mp, arl->arl_xmit_template_addr_offset, hlen); + ASSERT(cp != NULL); + if (cp == NULL) { freemsg(mp); return; } - bcopy(haddr2, cp, hlen); + bcopy(dstaddr, cp, hlen); /* Fill in the ARP header. */ - cp = (char *)mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen); - mp->b_cont->b_rptr = (uchar_t *)cp; + cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen); + mp->b_cont->b_rptr = cp; arh = (arh_t *)cp; U16_TO_BE16(arl->arl_arp_hw_type, arh->arh_hardware); U16_TO_BE16(proto, arh->arh_proto); @@ -3785,13 +4342,19 @@ ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen, cp += ARH_FIXED_LEN; bcopy(haddr1, cp, hlen); cp += hlen; - bcopy(paddr1, cp, plen); + if (paddr1 == NULL) + bzero(cp, plen); + else + bcopy(paddr1, cp, plen); cp += plen; - bcopy(haddr2, cp, hlen); + if (haddr2 == NULL) + bzero(cp, hlen); + else + bcopy(haddr2, cp, hlen); cp += hlen; bcopy(paddr2, cp, plen); cp += plen; - mp->b_cont->b_wptr = (uchar_t *)cp; + mp->b_cont->b_wptr = cp; /* Ship it out. */ if (canputnext(arl->arl_wq)) putnext(arl->arl_wq, mp); @@ -3799,209 +4362,6 @@ ar_xmit(arl_t *arl, uint32_t operation, uint32_t proto, uint32_t plen, freemsg(mp); } -/* - * Handle an external request to broadcast an ARP request. This is used - * by configuration programs to broadcast a request advertising our own - * hardware and protocol addresses. - */ -static int -ar_xmit_request(queue_t *q, mblk_t *mp_orig) -{ - areq_t *areq; - arl_t *arl; - uchar_t *sender; - uint32_t sender_length; - uchar_t *target; - uint32_t target_length; - mblk_t *mp = mp_orig; - - /* We handle both M_IOCTL and M_PROTO messages. */ - if (DB_TYPE(mp) == M_IOCTL) - mp = mp->b_cont; - arl = ar_ll_lookup_from_mp(mp); - if (arl == NULL) - return (EINVAL); - /* - * Newly received commands from clients go to the tail of the queue. - */ - if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_xmit_request: enqueue on q %p\n", (void *)q)); - ar_cmd_enqueue(arl, mp_orig, q, AR_XMIT_REQUEST, B_TRUE); - return (EINPROGRESS); - } - mp_orig->b_prev = NULL; - - areq = (areq_t *)mp->b_rptr; - sender_length = areq->areq_sender_addr_length; - sender = mi_offset_param(mp, areq->areq_sender_addr_offset, - sender_length); - target_length = areq->areq_target_addr_length; - target = mi_offset_param(mp, areq->areq_target_addr_offset, - target_length); - if (!sender || !target) - return (EINVAL); - ar_xmit(arl, ARP_REQUEST, areq->areq_proto, sender_length, - arl->arl_hw_addr, sender, arl->arl_arp_addr, target); - return (0); -} - -/* - * Handle an external request to broadcast an ARP response. This is used - * by configuration programs to broadcast a response advertising our own - * hardware and protocol addresses. - */ -static int -ar_xmit_response(queue_t *q, mblk_t *mp_orig) -{ - areq_t *areq; - arl_t *arl; - uchar_t *sender; - uint32_t sender_length; - uchar_t *target; - uint32_t target_length; - mblk_t *mp = mp_orig; - - /* We handle both M_IOCTL and M_PROTO messages. */ - if (DB_TYPE(mp) == M_IOCTL) - mp = mp->b_cont; - arl = ar_ll_lookup_from_mp(mp); - if (arl == NULL) - return (EINVAL); - /* - * Newly received commands from clients go to the tail of the queue. - */ - if (CMD_NEEDS_QUEUEING(mp_orig, arl)) { - arp1dbg(("ar_xmit_response: enqueue on q %p \n", (void *)q)); - ar_cmd_enqueue(arl, mp_orig, q, AR_XMIT_RESPONSE, B_TRUE); - return (EINPROGRESS); - } - mp_orig->b_prev = NULL; - - areq = (areq_t *)mp->b_rptr; - sender_length = areq->areq_sender_addr_length; - sender = mi_offset_param(mp, areq->areq_sender_addr_offset, - sender_length); - target_length = areq->areq_target_addr_length; - target = mi_offset_param(mp, areq->areq_target_addr_offset, - target_length); - if (!sender || !target) - return (EINVAL); - ar_xmit(arl, ARP_RESPONSE, areq->areq_proto, sender_length, - arl->arl_hw_addr, sender, arl->arl_arp_addr, target); - return (0); -} - -#if 0 -/* - * Debug routine to display a particular ARP Cache Entry with an - * accompanying text message. - */ -static void -show_ace(char *msg, ace_t *ace) -{ - if (msg) - printf("%s", msg); - printf("ace 0x%p:\n", ace); - printf("\tace_next 0x%p, ace_ptpn 0x%p, ace_arl 0x%p\n", - ace->ace_next, ace->ace_ptpn, ace->ace_arl); - printf("\tace_proto %x, ace_flags %x\n", ace->ace_proto, - ace->ace_flags); - if (ace->ace_proto_addr && ace->ace_proto_addr_length) - printf("\tace_proto_addr %x %x %x %x, len %d\n", - ace->ace_proto_addr[0], ace->ace_proto_addr[1], - ace->ace_proto_addr[2], ace->ace_proto_addr[3], - ace->ace_proto_addr_length); - if (ace->ace_proto_mask) - printf("\tace_proto_mask %x %x %x %x\n", - ace->ace_proto_mask[0], ace->ace_proto_mask[1], - ace->ace_proto_mask[2], ace->ace_proto_mask[3]); - if (ace->ace_hw_addr && ace->ace_hw_addr_length) - printf("\tace_hw_addr %x %x %x %x %x %x, len %d\n", - ace->ace_hw_addr[0], ace->ace_hw_addr[1], - ace->ace_hw_addr[2], ace->ace_hw_addr[3], - ace->ace_hw_addr[4], ace->ace_hw_addr[5], - ace->ace_hw_addr_length); - printf("\tace_mp 0x%p\n", ace->ace_mp); - printf("\tace_query_count %d, ace_query_mp 0x%x\n", - ace->ace_query_count, ace->ace_query_mp); -} - -/* Debug routine to display an ARP packet with an accompanying text message. */ -static void -show_arp(char *msg, mblk_t *mp) -{ - uchar_t *up = mp->b_rptr; - int len; - int hlen = up[4] & 0xFF; - char fmt[64]; - char buf[128]; - char *op; - int plen = up[5] & 0xFF; - uint_t proto; - - if (msg && *msg) - printf("%s", msg); - len = mp->b_wptr - up; - if (len < 8) { - printf("ARP packet of %d bytes too small\n", len); - return; - } - switch (BE16_TO_U16(&up[6])) { - case ARP_REQUEST: - op = "ARP request"; - break; - case ARP_RESPONSE: - op = "ARP response"; - break; - case RARP_REQUEST: - op = "RARP request"; - break; - case RARP_RESPONSE: - op = "RARP response"; - break; - default: - op = "unknown"; - break; - } - proto = (uint_t)BE16_TO_U16(&up[2]); - printf("len %d, hardware %d, proto %d, hlen %d, plen %d, op %s\n", - len, (int)BE16_TO_U16(up), proto, hlen, plen, op); - if (len < (8 + hlen + hlen + plen + plen)) - printf("ARP packet of %d bytes too small!\n", len); - up += 8; - - (void) mi_sprintf(fmt, "sender hardware address %%%dM\n", hlen); - (void) mi_sprintf(buf, fmt, up); - printf(buf); - up += hlen; - if (proto == 0x800) { - printf("sender proto address %d.%d.%d.%d\n", - up[0] & 0xFF, up[1] & 0xFF, up[2] & 0xFF, - up[3] & 0xFF); - } else { - (void) mi_sprintf(fmt, "sender proto address %%%dM\n", plen); - (void) mi_sprintf(buf, fmt, up); - printf(buf); - } - up += plen; - - (void) mi_sprintf(fmt, "target hardware address %%%dM\n", hlen); - (void) mi_sprintf(buf, fmt, up); - printf(buf); - up += hlen; - if (proto == 0x800) { - printf("target proto address %d.%d.%d.%d\n", - up[0] & 0xFF, up[1] & 0xFF, up[2] & 0xFF, - up[3] & 0xFF); - } else { - (void) mi_sprintf(fmt, "target proto address %%%dM\n", plen); - (void) mi_sprintf(buf, fmt, up); - printf(buf); - } - up += plen; -} -#endif - static mblk_t * ar_alloc(uint32_t cmd, int err) { |